summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/api_auth.py1
-rw-r--r--python/fatcat_tools/cleanups/common.py3
-rw-r--r--python/fatcat_tools/cleanups/files.py2
-rw-r--r--python/fatcat_tools/fcid.py1
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py7
-rw-r--r--python/fatcat_tools/harvest/harvest_common.py11
-rw-r--r--python/fatcat_tools/harvest/oaipmh.py3
-rw-r--r--python/fatcat_tools/importers/arabesque.py2
-rw-r--r--python/fatcat_tools/importers/arxiv.py8
-rw-r--r--python/fatcat_tools/importers/chocula.py1
-rw-r--r--python/fatcat_tools/importers/common.py24
-rw-r--r--python/fatcat_tools/importers/crossref.py5
-rw-r--r--python/fatcat_tools/importers/datacite.py2
-rw-r--r--python/fatcat_tools/importers/dblp_container.py3
-rw-r--r--python/fatcat_tools/importers/dblp_release.py19
-rw-r--r--python/fatcat_tools/importers/doaj_article.py17
-rw-r--r--python/fatcat_tools/importers/file_meta.py1
-rw-r--r--python/fatcat_tools/importers/fileset_generic.py1
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py4
-rw-r--r--python/fatcat_tools/importers/ingest.py1
-rw-r--r--python/fatcat_tools/importers/jalc.py10
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py1
-rw-r--r--python/fatcat_tools/importers/jstor.py9
-rw-r--r--python/fatcat_tools/importers/orcid.py3
-rw-r--r--python/fatcat_tools/kafka.py2
-rw-r--r--python/fatcat_tools/normal.py4
-rw-r--r--python/fatcat_tools/references.py10
-rw-r--r--python/fatcat_tools/reviewers/review_common.py4
-rw-r--r--python/fatcat_tools/transforms/access.py4
-rw-r--r--python/fatcat_tools/transforms/csl.py10
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py11
-rw-r--r--python/fatcat_tools/workers/changelog.py3
32 files changed, 116 insertions, 71 deletions
diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py
index 13310120..bbf059c0 100644
--- a/python/fatcat_tools/api_auth.py
+++ b/python/fatcat_tools/api_auth.py
@@ -1,6 +1,7 @@
import os
import sys
+
import fatcat_openapi_client
diff --git a/python/fatcat_tools/cleanups/common.py b/python/fatcat_tools/cleanups/common.py
index 04e6ade4..d0fcc761 100644
--- a/python/fatcat_tools/cleanups/common.py
+++ b/python/fatcat_tools/cleanups/common.py
@@ -1,10 +1,11 @@
-import json
import copy
+import json
import subprocess
from collections import Counter
from fatcat_openapi_client import ApiClient, Editgroup
+
from fatcat_tools.transforms import entity_from_dict, entity_to_dict
diff --git a/python/fatcat_tools/cleanups/files.py b/python/fatcat_tools/cleanups/files.py
index 10dd45cc..0d275ba6 100644
--- a/python/fatcat_tools/cleanups/files.py
+++ b/python/fatcat_tools/cleanups/files.py
@@ -1,6 +1,6 @@
-from fatcat_openapi_client.rest import ApiException
from fatcat_openapi_client.models import FileEntity
+from fatcat_openapi_client.rest import ApiException
from .common import EntityCleaner
diff --git a/python/fatcat_tools/fcid.py b/python/fatcat_tools/fcid.py
index 4194ea63..0987d10d 100644
--- a/python/fatcat_tools/fcid.py
+++ b/python/fatcat_tools/fcid.py
@@ -2,6 +2,7 @@
import base64
import uuid
+
def fcid2uuid(s):
"""
Converts a fatcat identifier (base32 encoded string) to a uuid.UUID object
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index 553f4e7a..d441d495 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -1,9 +1,10 @@
-import sys
import json
+import sys
import time
-from confluent_kafka import Producer, KafkaException
-from urllib.parse import urlparse, parse_qs
+from urllib.parse import parse_qs, urlparse
+
+from confluent_kafka import KafkaException, Producer
from .harvest_common import HarvestState, requests_retry_session
diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py
index 5e7702d9..45c2b8ea 100644
--- a/python/fatcat_tools/harvest/harvest_common.py
+++ b/python/fatcat_tools/harvest/harvest_common.py
@@ -1,14 +1,15 @@
-import sys
-import json
import datetime
+import json
+import sys
+
import requests
+from confluent_kafka import Consumer, KafkaException, Producer, TopicPartition
from requests.adapters import HTTPAdapter
+
# unclear why pylint chokes on this import. Recent 'requests' and 'urllib3' are
# in Pipenv.lock, and there are no errors in QA
-from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error
-from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException
-
+from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error
# Used for parsing ISO date format (YYYY-MM-DD)
DATE_FMT = "%Y-%m-%d"
diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py
index c4e4a82a..0eb0343d 100644
--- a/python/fatcat_tools/harvest/oaipmh.py
+++ b/python/fatcat_tools/harvest/oaipmh.py
@@ -1,8 +1,9 @@
import sys
import time
+
import sickle
-from confluent_kafka import Producer, KafkaException
+from confluent_kafka import KafkaException, Producer
from .harvest_common import HarvestState
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py
index c8f7c77c..2b0ff7ec 100644
--- a/python/fatcat_tools/importers/arabesque.py
+++ b/python/fatcat_tools/importers/arabesque.py
@@ -1,7 +1,7 @@
import fatcat_openapi_client
-from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex
+from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, b32_hex, make_rel_url
ARABESQUE_MATCH_WHERE_CLAUSE='WHERE hit = 1 AND identifier IS NOT NULL'
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index 43325ebc..fc429fb0 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -1,16 +1,16 @@
+import datetime
+import json
import re
import sys
-import json
-import datetime
+
+import fatcat_openapi_client
from bs4 import BeautifulSoup
from pylatexenc.latex2text import LatexNodes2Text
-import fatcat_openapi_client
from .common import EntityImporter
from .crossref import lookup_license_slug
-
latex2text = LatexNodes2Text()
def latex_to_text(raw):
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py
index 5c9efe94..0b634e73 100644
--- a/python/fatcat_tools/importers/chocula.py
+++ b/python/fatcat_tools/importers/chocula.py
@@ -1,5 +1,6 @@
import fatcat_openapi_client
+
from .common import EntityImporter, clean
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 9d22ce83..e33a2012 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -1,32 +1,32 @@
-import re
-import sys
import csv
+import datetime
import json
+import re
import sqlite3
-import datetime
import subprocess
-from collections import Counter
-from typing import Dict, Any, List, Optional, Tuple
-import lxml
+import sys
import xml.etree.ElementTree as ET
+from collections import Counter
+from typing import Any, Dict, List, Optional, Tuple
import elasticsearch
+import fatcat_openapi_client
+import fuzzycat.common
+import fuzzycat.verify
+import lxml
from bs4 import BeautifulSoup
from confluent_kafka import Consumer, KafkaException
-
-import fatcat_openapi_client
from fatcat_openapi_client import ReleaseEntity
from fatcat_openapi_client.rest import ApiException
from fuzzycat.matching import match_release_fuzzy
-import fuzzycat.common
-import fuzzycat.verify
# TODO: refactor so remove need for this (re-imports for backwards compatibility)
-from fatcat_tools.normal import (clean_str as clean, is_cjk, b32_hex, LANG_MAP_MARC) # noqa: F401
+from fatcat_tools.normal import LANG_MAP_MARC, b32_hex
+from fatcat_tools.normal import clean_str as clean # noqa: F401
+from fatcat_tools.normal import is_cjk
from fatcat_tools.transforms import entity_to_dict
-
DATE_FMT: str = "%Y-%m-%d"
SANE_MAX_RELEASES: int = 200
SANE_MAX_URLS: int = 100
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 38c19a63..fd6936a4 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -1,14 +1,13 @@
-import sqlite3
import datetime
-from typing import Dict, Optional, Any
+import sqlite3
+from typing import Any, Dict, Optional
import fatcat_openapi_client
from fatcat_openapi_client import ReleaseEntity
from .common import EntityImporter, clean
-
# The docs/guide should be the canonical home for these mappings; update there
# first
# Can get a list of Crossref types (with counts) via API:
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 1593e6f8..a06c68a4 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -10,8 +10,8 @@ functions (parse_datacite_...), which may help testing.
import collections
import datetime
-import re
import json
+import re
import sqlite3
import sys
diff --git a/python/fatcat_tools/importers/dblp_container.py b/python/fatcat_tools/importers/dblp_container.py
index a9f993a8..3d280fb7 100644
--- a/python/fatcat_tools/importers/dblp_container.py
+++ b/python/fatcat_tools/importers/dblp_container.py
@@ -7,8 +7,9 @@ pre-scraped in to JSON from HTML pages.
import sys # noqa: F401
import fatcat_openapi_client
-from fatcat_tools.normal import clean_str
+
from fatcat_tools.importers.common import EntityImporter
+from fatcat_tools.normal import clean_str
class DblpContainerImporter(EntityImporter):
diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py
index fa5cb842..6d028f2f 100644
--- a/python/fatcat_tools/importers/dblp_release.py
+++ b/python/fatcat_tools/importers/dblp_release.py
@@ -21,18 +21,25 @@ brittle/unreliable TSV lookup mechanism for prefix-to-container_id (as of
December 2020).
"""
-import sys # noqa: F401
+import datetime
import json
+import sys # noqa: F401
import warnings
-import datetime
-from typing import List, Optional, Any
+from typing import Any, List, Optional
import fatcat_openapi_client
-from fatcat_tools.normal import (clean_doi, clean_str, parse_month,
- clean_orcid, clean_hdl,
- clean_arxiv_id, clean_wikidata_qid, clean_isbn13)
from fatcat_tools.importers.common import EntityImporter
+from fatcat_tools.normal import (
+ clean_arxiv_id,
+ clean_doi,
+ clean_hdl,
+ clean_isbn13,
+ clean_orcid,
+ clean_str,
+ clean_wikidata_qid,
+ parse_month,
+)
from fatcat_tools.transforms import entity_to_dict
diff --git a/python/fatcat_tools/importers/doaj_article.py b/python/fatcat_tools/importers/doaj_article.py
index 833089ae..1831c4cd 100644
--- a/python/fatcat_tools/importers/doaj_article.py
+++ b/python/fatcat_tools/importers/doaj_article.py
@@ -4,15 +4,24 @@ Importer for DOAJ article-level metadata, schema v1.
DOAJ API schema and docs: https://doaj.org/api/v1/docs
"""
-import warnings
import datetime
+import warnings
from typing import List, Optional
import fatcat_openapi_client
-from fatcat_tools.normal import (clean_doi, clean_str, parse_month,
- clean_orcid, detect_text_lang, parse_lang_name, parse_country_name,
- clean_pmid, clean_pmcid)
+
from fatcat_tools.importers.common import EntityImporter
+from fatcat_tools.normal import (
+ clean_doi,
+ clean_orcid,
+ clean_pmcid,
+ clean_pmid,
+ clean_str,
+ detect_text_lang,
+ parse_country_name,
+ parse_lang_name,
+ parse_month,
+)
# Cutoff length for abstracts.
MAX_ABSTRACT_LENGTH = 2048
diff --git a/python/fatcat_tools/importers/file_meta.py b/python/fatcat_tools/importers/file_meta.py
index 3d9f5923..0951ed84 100644
--- a/python/fatcat_tools/importers/file_meta.py
+++ b/python/fatcat_tools/importers/file_meta.py
@@ -1,5 +1,6 @@
import fatcat_openapi_client
+
from .common import EntityImporter
diff --git a/python/fatcat_tools/importers/fileset_generic.py b/python/fatcat_tools/importers/fileset_generic.py
index 13352fb2..43c2a49c 100644
--- a/python/fatcat_tools/importers/fileset_generic.py
+++ b/python/fatcat_tools/importers/fileset_generic.py
@@ -2,6 +2,7 @@
import fatcat_openapi_client
from fatcat_tools import entity_from_dict
+
from .common import EntityImporter
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index a811c856..0f666652 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -1,8 +1,10 @@
#!/usr/bin/env python3
-import json
import base64
+import json
+
import fatcat_openapi_client
+
from .common import EntityImporter, clean, make_rel_url
MAX_ABSTRACT_BYTES=4096
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 4d4efc0a..f0943c1e 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -2,6 +2,7 @@
import datetime
import fatcat_openapi_client
+
from .common import EntityImporter, make_rel_url
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index 12f5450f..0a983c5e 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -1,12 +1,14 @@
-import sys
-import sqlite3
import datetime
-from bs4 import BeautifulSoup
+import sqlite3
+import sys
import fatcat_openapi_client
+from bs4 import BeautifulSoup
+
from fatcat_tools.normal import clean_doi
-from .common import EntityImporter, clean, is_cjk, DATE_FMT
+
+from .common import DATE_FMT, EntityImporter, clean, is_cjk
def parse_jalc_persons(raw_persons):
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index 9f3b429f..25d7b3b5 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -1,5 +1,6 @@
import fatcat_openapi_client
+
from .common import EntityImporter, clean
diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py
index 5d35f5e2..d37424d6 100644
--- a/python/fatcat_tools/importers/jstor.py
+++ b/python/fatcat_tools/importers/jstor.py
@@ -1,12 +1,13 @@
-import sys
-import json
import datetime
+import json
+import sys
import warnings
-from bs4 import BeautifulSoup
import fatcat_openapi_client
-from .common import EntityImporter, clean, LANG_MAP_MARC
+from bs4 import BeautifulSoup
+
+from .common import LANG_MAP_MARC, EntityImporter, clean
from .crossref import CONTAINER_TYPE_MAP
# TODO: more entries?
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index 4412a46d..3bdd23a1 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -1,8 +1,11 @@
import sys
+
import fatcat_openapi_client
+
from .common import EntityImporter, clean
+
def value_or_none(e):
if type(e) == dict:
e = e.get('value')
diff --git a/python/fatcat_tools/kafka.py b/python/fatcat_tools/kafka.py
index 228de134..32749db2 100644
--- a/python/fatcat_tools/kafka.py
+++ b/python/fatcat_tools/kafka.py
@@ -1,5 +1,5 @@
-from confluent_kafka import Producer, KafkaException
+from confluent_kafka import KafkaException, Producer
def kafka_fail_fast(err, msg):
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index 24c0bb0a..9b65e768 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -4,10 +4,10 @@ A bunch of helpers to parse and normalize strings: external identifiers,
free-form input, titles, etc.
"""
-import re
import base64
-from typing import Optional, Union
+import re
import unicodedata
+from typing import Optional, Union
import ftfy
import langdetect
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py
index 3a2709a4..8361b260 100644
--- a/python/fatcat_tools/references.py
+++ b/python/fatcat_tools/references.py
@@ -5,18 +5,18 @@ index of reference links between works in the main catalog.
See bulk citation and citation API proposals for design documentation.
"""
-import sys
-import datetime
import argparse
-from typing import Optional, List, Any, Dict, Union
+import datetime
+import sys
+from typing import Any, Dict, List, Optional, Union
-from pydantic import BaseModel, validator
import elasticsearch
from elasticsearch_dsl import Search
from fatcat_openapi_client import ReleaseEntity
+from pydantic import BaseModel, validator
from fatcat_tools import public_api
-from fatcat_tools.transforms.access import release_access_options, AccessOption
+from fatcat_tools.transforms.access import AccessOption, release_access_options
from fatcat_tools.transforms.entities import entity_to_dict
diff --git a/python/fatcat_tools/reviewers/review_common.py b/python/fatcat_tools/reviewers/review_common.py
index b4930c19..867d826d 100644
--- a/python/fatcat_tools/reviewers/review_common.py
+++ b/python/fatcat_tools/reviewers/review_common.py
@@ -1,9 +1,9 @@
-import time
import datetime
import subprocess
+import time
from collections import Counter
-from typing import Optional, List, Any
+from typing import Any, List, Optional
import fatcat_openapi_client
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
index 39d4c6d3..ae9880e7 100644
--- a/python/fatcat_tools/transforms/access.py
+++ b/python/fatcat_tools/transforms/access.py
@@ -1,9 +1,9 @@
from enum import Enum
-from typing import Optional, List
+from typing import List, Optional
-from pydantic import BaseModel
from fatcat_openapi_client import ReleaseEntity
+from pydantic import BaseModel
class AccessType(str, Enum):
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index 0556f4fe..f8b26bce 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -1,9 +1,13 @@
import json
-from citeproc import CitationStylesStyle, CitationStylesBibliography
-from citeproc import Citation, CitationItem
-from citeproc import formatter
+from citeproc import (
+ Citation,
+ CitationItem,
+ CitationStylesBibliography,
+ CitationStylesStyle,
+ formatter,
+)
from citeproc.source.json import CiteProcJSON
from citeproc_styles import get_style_filepath
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index ec5891c3..1826d4eb 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -1,10 +1,15 @@
import datetime
-from typing import Dict, Any, Optional
+from typing import Any, Dict, Optional
import tldextract
-
-from fatcat_openapi_client import ReleaseEntity, ContainerEntity, EntityEdit, ChangelogEntry, FileEntity
+from fatcat_openapi_client import (
+ ChangelogEntry,
+ ContainerEntity,
+ EntityEdit,
+ FileEntity,
+ ReleaseEntity,
+)
def check_kbart(year: int, archive: dict) -> Optional[bool]:
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index 982ee3ea..a61e364c 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -1,7 +1,8 @@
import json
import time
-from confluent_kafka import Consumer, Producer, KafkaException
+
+from confluent_kafka import Consumer, KafkaException, Producer
from fatcat_tools.transforms import release_ingest_request, release_to_elasticsearch