aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorMartin Czygan <martin@archive.org>2020-07-06 18:53:24 +0000
committerMartin Czygan <martin@archive.org>2020-07-06 18:53:24 +0000
commit68cf95bd1d1588c0d3170b4032596756e07ae718 (patch)
tree3163a803dd6743c84c83a786b5aea7eda3bbca8e /python/fatcat_tools
parentbea909f997bcef51e2624b9eea42c8fbe7115aaa (diff)
parent8583c6866f2bb89e8dfe5f5e5893048c2fd854e7 (diff)
downloadfatcat-68cf95bd1d1588c0d3170b4032596756e07ae718.tar.gz
fatcat-68cf95bd1d1588c0d3170b4032596756e07ae718.zip
Merge branch 'bnewbold-lint' into 'master'
lint cleanups See merge request webgroup/fatcat!62
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/api_auth.py4
-rw-r--r--python/fatcat_tools/cleanups/common.py1
-rw-r--r--python/fatcat_tools/cleanups/files.py2
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py7
-rw-r--r--python/fatcat_tools/harvest/harvest_common.py8
-rw-r--r--python/fatcat_tools/harvest/oaipmh.py8
-rw-r--r--python/fatcat_tools/harvest/pubmed.py2
-rw-r--r--python/fatcat_tools/importers/arabesque.py7
-rw-r--r--python/fatcat_tools/importers/arxiv.py9
-rwxr-xr-xpython/fatcat_tools/importers/cdl_dash_dat.py4
-rw-r--r--python/fatcat_tools/importers/chocula.py3
-rw-r--r--python/fatcat_tools/importers/common.py26
-rw-r--r--python/fatcat_tools/importers/crossref.py8
-rw-r--r--python/fatcat_tools/importers/datacite.py2
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py2
-rw-r--r--python/fatcat_tools/importers/ingest.py7
-rw-r--r--python/fatcat_tools/importers/jalc.py3
-rw-r--r--python/fatcat_tools/importers/journal_metadata.py3
-rw-r--r--python/fatcat_tools/importers/jstor.py2
-rw-r--r--python/fatcat_tools/importers/matched.py8
-rw-r--r--python/fatcat_tools/importers/orcid.py4
-rw-r--r--python/fatcat_tools/importers/pubmed.py6
-rw-r--r--python/fatcat_tools/importers/shadow.py5
-rw-r--r--python/fatcat_tools/kafka.py2
-rw-r--r--python/fatcat_tools/normal.py1
-rw-r--r--python/fatcat_tools/reviewers/review_common.py2
-rw-r--r--python/fatcat_tools/transforms/csl.py14
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py12
-rw-r--r--python/fatcat_tools/transforms/entities.py1
-rw-r--r--python/fatcat_tools/transforms/ingest.py1
-rw-r--r--python/fatcat_tools/workers/changelog.py1
-rw-r--r--python/fatcat_tools/workers/elasticsearch.py2
-rw-r--r--python/fatcat_tools/workers/worker_common.py9
33 files changed, 46 insertions, 130 deletions
diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py
index add03399..13310120 100644
--- a/python/fatcat_tools/api_auth.py
+++ b/python/fatcat_tools/api_auth.py
@@ -1,7 +1,7 @@
-import os, sys
+import os
+import sys
import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
def public_api(host_uri):
diff --git a/python/fatcat_tools/cleanups/common.py b/python/fatcat_tools/cleanups/common.py
index 47607cf1..04e6ade4 100644
--- a/python/fatcat_tools/cleanups/common.py
+++ b/python/fatcat_tools/cleanups/common.py
@@ -5,7 +5,6 @@ import subprocess
from collections import Counter
from fatcat_openapi_client import ApiClient, Editgroup
-from fatcat_openapi_client.rest import ApiException
from fatcat_tools.transforms import entity_from_dict, entity_to_dict
diff --git a/python/fatcat_tools/cleanups/files.py b/python/fatcat_tools/cleanups/files.py
index ec7e9064..a40e4a28 100644
--- a/python/fatcat_tools/cleanups/files.py
+++ b/python/fatcat_tools/cleanups/files.py
@@ -1,7 +1,6 @@
from fatcat_openapi_client.rest import ApiException
from fatcat_openapi_client.models import FileEntity
-from fatcat_tools.transforms import entity_to_dict, entity_from_json
from .common import EntityCleaner
@@ -70,4 +69,3 @@ class FileCleaner(EntityCleaner):
self.api.update_file(self.get_editgroup_id(), entity.ident, entity)
return 1
-
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index 37628f09..2554fe96 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -1,16 +1,10 @@
-import re
import sys
-import csv
import json
import time
-import itertools
-import datetime
-import requests
from confluent_kafka import Producer, KafkaException
from urllib.parse import urlparse, parse_qs
-from fatcat_tools.workers import most_recent_message
from .harvest_common import HarvestState, requests_retry_session
@@ -64,7 +58,6 @@ class HarvestCrossrefWorker:
to be careful how state is serialized back into kafka.
"""
-
def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
api_host_url="https://api.crossref.org/works", start_date=None,
end_date=None):
diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py
index 27ab8b4a..bdae3054 100644
--- a/python/fatcat_tools/harvest/harvest_common.py
+++ b/python/fatcat_tools/harvest/harvest_common.py
@@ -1,15 +1,13 @@
import sys
import json
-import time
import datetime
import requests
from requests.adapters import HTTPAdapter
# unclear why pylint chokes on this import. Recent 'requests' and 'urllib3' are
# in Pipenv.lock, and there are no errors in QA
from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error
-from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException, \
- OFFSET_BEGINNING
+from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException
# Used for parsing ISO date format (YYYY-MM-DD)
@@ -130,9 +128,11 @@ class HarvestState:
}).encode('utf-8')
if kafka_topic:
assert(kafka_config)
+
def fail_fast(err, msg):
if err:
raise KafkaException(err)
+
print("Committing status to Kafka: {}".format(kafka_topic), file=sys.stderr)
producer_conf = kafka_config.copy()
producer_conf.update({
@@ -159,9 +159,11 @@ class HarvestState:
return
print("Fetching state from kafka topic: {}".format(kafka_topic), file=sys.stderr)
+
def fail_fast(err, msg):
if err:
raise KafkaException(err)
+
conf = kafka_config.copy()
conf.update({
'group.id': 'dummy_init_group', # should never be committed
diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py
index d30f9507..a7dc3d8c 100644
--- a/python/fatcat_tools/harvest/oaipmh.py
+++ b/python/fatcat_tools/harvest/oaipmh.py
@@ -1,16 +1,9 @@
-import re
import sys
-import csv
-import json
import time
-import itertools
-import datetime
-import requests
import sickle
from confluent_kafka import Producer, KafkaException
-from fatcat_tools.workers import most_recent_message
from .harvest_common import HarvestState
@@ -31,7 +24,6 @@ class HarvestOaiPmhWorker:
would want something similar operationally. Oh well!
"""
-
def __init__(self, kafka_hosts, produce_topic, state_topic,
start_date=None, end_date=None):
diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py
index f6301b8d..802d31d8 100644
--- a/python/fatcat_tools/harvest/pubmed.py
+++ b/python/fatcat_tools/harvest/pubmed.py
@@ -19,7 +19,7 @@ import tempfile
import time
import xml.etree.ElementTree as ET
from ftplib import FTP
-from urllib.parse import urljoin, urlparse
+from urllib.parse import urlparse
import dateparser
from bs4 import BeautifulSoup
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py
index c71b33e9..47a8c4da 100644
--- a/python/fatcat_tools/importers/arabesque.py
+++ b/python/fatcat_tools/importers/arabesque.py
@@ -1,10 +1,6 @@
-import sys
-import json
-import sqlite3
-import itertools
import fatcat_openapi_client
-from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex
+from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex
ARABESQUE_MATCH_WHERE_CLAUSE='WHERE hit = 1 AND identifier IS NOT NULL'
@@ -186,4 +182,3 @@ class ArabesqueMatchImporter(EntityImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index 719592fc..43325ebc 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
from pylatexenc.latex2text import LatexNodes2Text
import fatcat_openapi_client
-from .common import EntityImporter, clean
+from .common import EntityImporter
from .crossref import lookup_license_slug
@@ -97,7 +97,6 @@ class ArxivRawImporter(EntityImporter):
**kwargs)
self._test_override = False
-
def parse_record(self, record):
if not record:
@@ -188,7 +187,6 @@ class ArxivRawImporter(EntityImporter):
if lang == 'en':
lang = None
-
# extra:
# withdrawn_date
# translation_of
@@ -244,7 +242,7 @@ class ArxivRawImporter(EntityImporter):
For each version, do a lookup by full arxiv_id, and store work/release
id results.
-
+
If a version has a DOI, also do a doi lookup and store that result. If
there is an existing release with both matching, set that as the
existing work. If they don't match, use the full arxiv_id match and
@@ -345,6 +343,7 @@ class ArxivRawImporter(EntityImporter):
print(json.dumps(resp))
#sys.exit(-1)
-if __name__=='__main__':
+
+if __name__ == '__main__':
parser = ArxivRawImporter(None)
parser.parse_file(open(sys.argv[1]))
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py
index 536c013b..36a2f9a6 100755
--- a/python/fatcat_tools/importers/cdl_dash_dat.py
+++ b/python/fatcat_tools/importers/cdl_dash_dat.py
@@ -82,7 +82,7 @@ def cdl_dash_release(meta, extra=None):
#print(abstracts)
if not abstracts:
abstracts = None
-
+
contribs = []
for creator in meta['creator']:
contribs.append(ReleaseContrib(
@@ -120,7 +120,7 @@ def make_release_fileset(dat_path):
with open(dat_path + "/cdl_dash_metadata.json", 'r') as fp:
meta_dict = json.loads(fp.read())
-
+
release = cdl_dash_release(meta_dict)
ark_id = release.extra['ark_id']
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py
index 375b6051..d5d1cce8 100644
--- a/python/fatcat_tools/importers/chocula.py
+++ b/python/fatcat_tools/importers/chocula.py
@@ -1,7 +1,4 @@
-import sys
-import json
-import itertools
import fatcat_openapi_client
from .common import EntityImporter, clean
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index eafc6546..c0578224 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -161,18 +161,18 @@ def is_cjk(s):
return False
def test_is_cjk():
- assert is_cjk(None) == False
- assert is_cjk('') == False
- assert is_cjk('blah') == False
- assert is_cjk('岡, 鹿, 梨, 阜, 埼') == True
- assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') == True
- assert is_cjk('菊') == True
- assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') == True
- assert is_cjk('水道') == True
- assert is_cjk('オウ, イク') == True # kanji
- assert is_cjk('ひヒ') == True
- assert is_cjk('き゚ゅ') == True
- assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') == True
+ assert is_cjk(None) is False
+ assert is_cjk('') is False
+ assert is_cjk('blah') is False
+ assert is_cjk('岡, 鹿, 梨, 阜, 埼') is True
+ assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') is True
+ assert is_cjk('菊') is True
+ assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') is True
+ assert is_cjk('水道') is True
+ assert is_cjk('オウ, イク') is True # kanji
+ assert is_cjk('ひヒ') is True
+ assert is_cjk('き゚ゅ') is True
+ assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') is True
DOMAIN_REL_MAP = {
"archive.org": "archive",
@@ -368,7 +368,7 @@ class EntityImporter:
if self._entity_queue:
self.insert_batch(self._entity_queue)
self.counts['insert'] += len(self._entity_queue)
- self._entity_queue = []
+ self._entity_queue = []
return self.counts
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index d26f089f..854e3d9f 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -1,10 +1,6 @@
-import sys
-import json
import sqlite3
import datetime
-import itertools
-import subprocess
import fatcat_openapi_client
from .common import EntityImporter, clean
@@ -425,7 +421,6 @@ class CrossrefImporter(EntityImporter):
release_year = raw_date[0]
release_date = None
-
original_title = None
if obj.get('original-title'):
original_title = clean(obj.get('original-title')[0], force_xml=True)
@@ -500,7 +495,7 @@ class CrossrefImporter(EntityImporter):
if existing:
self.counts['exists'] += 1
return False
-
+
return True
def insert_batch(self, batch):
@@ -509,4 +504,3 @@ class CrossrefImporter(EntityImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 434a2941..08c85b30 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -10,7 +10,6 @@ functions (parse_datacite_...), which may help testing.
import collections
import datetime
-import hashlib
import re
import json
import sqlite3
@@ -292,7 +291,6 @@ class DataciteImporter(EntityImporter):
print('[{}] skipping non-ascii doi for now'.format(doi))
return None
-
creators = attributes.get('creators', []) or []
contributors = attributes.get('contributors', []) or [] # Much fewer than creators.
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 2077eae4..5ec6cc3c 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -1,9 +1,7 @@
#!/usr/bin/env python3
-import sys
import json
import base64
-import datetime
import fatcat_openapi_client
from .common import EntityImporter, clean, make_rel_url
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 2b630e67..4b1d3702 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -1,10 +1,6 @@
-import sys
-import json
-import base64
-import itertools
import fatcat_openapi_client
-from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex
+from .common import EntityImporter, make_rel_url
class IngestFileResultImporter(EntityImporter):
@@ -284,4 +280,3 @@ class SavePaperNowFileImporter(IngestFileResultImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index e30bb233..38aa00eb 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -1,10 +1,7 @@
import sys
-import json
import sqlite3
import datetime
-import itertools
-import subprocess
from bs4 import BeautifulSoup
import fatcat_openapi_client
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index d439c80a..32782eac 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -1,7 +1,4 @@
-import sys
-import json
-import itertools
import fatcat_openapi_client
from .common import EntityImporter, clean
diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py
index 96dbf947..5d35f5e2 100644
--- a/python/fatcat_tools/importers/jstor.py
+++ b/python/fatcat_tools/importers/jstor.py
@@ -183,7 +183,7 @@ class JstorImporter(EntityImporter):
# suspect jan 1st dates get set by JSTOR when actual
# date not known (citation needed), so drop them
release_date = None
-
+
volume = None
if article_meta.volume:
volume = article_meta.volume.string or None
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 180d7ba3..d95c5847 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -1,12 +1,8 @@
-import sys
-import json
-import sqlite3
-import itertools
import fatcat_openapi_client
from fatcat_tools.normal import *
-from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS
+from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS
class MatchedImporter(EntityImporter):
@@ -160,7 +156,6 @@ class MatchedImporter(EntityImporter):
self.counts['skip-update-inflight'] += 1
return False
-
# minimum viable "existing" URL cleanup to fix dupes and broken links:
# remove 'None' wayback URLs, and set archive.org rel 'archive'
existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)]
@@ -207,4 +202,3 @@ class MatchedImporter(EntityImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index 554e052f..21feea9e 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -1,7 +1,5 @@
import sys
-import json
-import itertools
import fatcat_openapi_client
from .common import EntityImporter, clean
@@ -89,7 +87,7 @@ class OrcidImporter(EntityImporter):
if existing:
self.counts['exists'] += 1
return False
-
+
return True
def insert_batch(self, batch):
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 3d3e3a8c..d8a6842c 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -1,11 +1,9 @@
import sys
import json
-import sqlite3
import datetime
import warnings
from bs4 import BeautifulSoup
-from bs4.element import NavigableString
import fatcat_openapi_client
from fatcat_tools.normal import *
@@ -314,7 +312,7 @@ class PubmedImporter(EntityImporter):
Importer for PubMed/MEDLINE XML metadata.
If lookup_refs is true, will do identifer-based lookups for all references.
-
+
TODO: MEDLINE doesn't include PMC/OA license; could include in importer?
"""
@@ -502,7 +500,7 @@ class PubmedImporter(EntityImporter):
ce_edit = self.create_container(ce)
container_id = ce_edit.ident
self._issnl_id_map[issnl] = container_id
-
+
ji = journal.JournalIssue
volume = None
if ji.find("Volume"):
diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py
index 4cd22775..c04e9aa8 100644
--- a/python/fatcat_tools/importers/shadow.py
+++ b/python/fatcat_tools/importers/shadow.py
@@ -1,8 +1,4 @@
-import sys
-import json
-import sqlite3
-import itertools
import fatcat_openapi_client
from fatcat_tools.normal import *
@@ -192,4 +188,3 @@ class ShadowLibraryImporter(EntityImporter):
description=self.editgroup_description,
extra=self.editgroup_extra),
entity_list=batch))
-
diff --git a/python/fatcat_tools/kafka.py b/python/fatcat_tools/kafka.py
index 53b62a37..228de134 100644
--- a/python/fatcat_tools/kafka.py
+++ b/python/fatcat_tools/kafka.py
@@ -1,5 +1,5 @@
-from confluent_kafka import Consumer, Producer, KafkaException
+from confluent_kafka import Producer, KafkaException
def kafka_fail_fast(err, msg):
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index f962ff3c..e65af8d6 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -231,4 +231,3 @@ def test_clean_orcid():
assert clean_orcid("0123-4567-3456-6789 ") == "0123-4567-3456-6789"
assert clean_orcid("01234567-3456-6780") == None
assert clean_orcid("0x23-4567-3456-6780") == None
-
diff --git a/python/fatcat_tools/reviewers/review_common.py b/python/fatcat_tools/reviewers/review_common.py
index 994cec56..336a47f6 100644
--- a/python/fatcat_tools/reviewers/review_common.py
+++ b/python/fatcat_tools/reviewers/review_common.py
@@ -1,12 +1,10 @@
-import json
import time
import datetime
import subprocess
from collections import Counter
import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
"""
checks should return:
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index 832ad6aa..ba199efb 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -1,6 +1,5 @@
import json
-import collections
from citeproc import CitationStylesStyle, CitationStylesBibliography
from citeproc import Citation, CitationItem
@@ -8,8 +7,6 @@ from citeproc import formatter
from citeproc.source.json import CiteProcJSON
from citeproc_styles import get_style_filepath
-from fatcat_openapi_client import ApiClient
-
def contribs_by_role(contribs, role):
ret = [c.copy() for c in contribs if c['role'] == role]
@@ -214,14 +211,13 @@ def citeproc_csl(csl_json, style, html=False):
lines = bib.bibliography()[0]
if style == "bibtex":
out = ""
- for l in lines:
- if l.startswith(" @"):
+ for line in lines:
+ if line.startswith(" @"):
out += "@"
- elif l.startswith(" "):
- out += "\n " + l
+ elif line.startswith(" "):
+ out += "\n " + line
else:
- out += l
+ out += line
return ''.join(out)
else:
return ''.join(lines)
-
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 1d35141b..8ec9c164 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -1,7 +1,5 @@
-import collections
import tldextract
-from fatcat_openapi_client import ApiClient
def check_kbart(year, archive):
@@ -14,11 +12,11 @@ def check_kbart(year, archive):
def test_check_kbart():
- assert check_kbart(1990, dict()) == None
- assert check_kbart(1990, dict(year_spans=[[2000, 2000]])) == False
- assert check_kbart(2000, dict(year_spans=[[2000, 2000]])) == True
- assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) == False
- assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) == True
+ assert check_kbart(1990, dict()) is None
+ assert check_kbart(1990, dict(year_spans=[[2000, 2000]])) is False
+ assert check_kbart(2000, dict(year_spans=[[2000, 2000]])) is True
+ assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) is False
+ assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) is True
def release_to_elasticsearch(entity, force_bool=True):
diff --git a/python/fatcat_tools/transforms/entities.py b/python/fatcat_tools/transforms/entities.py
index ae666413..53455e85 100644
--- a/python/fatcat_tools/transforms/entities.py
+++ b/python/fatcat_tools/transforms/entities.py
@@ -32,4 +32,3 @@ def entity_from_json(json_str, entity_type, api_client=None):
def entity_from_dict(obj, entity_type, api_client=None):
json_str = json.dumps(obj)
return entity_from_json(json_str, entity_type, api_client=api_client)
-
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py
index 22b5154e..2f4e2271 100644
--- a/python/fatcat_tools/transforms/ingest.py
+++ b/python/fatcat_tools/transforms/ingest.py
@@ -61,4 +61,3 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=
ingest_request['link_source_id'] = link_source_id
return ingest_request
-
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index 3a49f86e..d5891ad1 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -354,4 +354,3 @@ class EntityUpdatesWorker(FatcatWorker):
producer.flush()
# TODO: publish updated 'work' entities to a topic
consumer.store_offsets(message=msg)
-
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py
index e58b3da1..61854c31 100644
--- a/python/fatcat_tools/workers/elasticsearch.py
+++ b/python/fatcat_tools/workers/elasticsearch.py
@@ -1,6 +1,5 @@
import json
-import time
import requests
from confluent_kafka import Consumer, KafkaException
@@ -138,7 +137,6 @@ class ElasticsearchReleaseWorker(FatcatWorker):
consumer.store_offsets(message=msg)
-
class ElasticsearchContainerWorker(ElasticsearchReleaseWorker):
def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None,
diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py
index ef79f528..8c2936be 100644
--- a/python/fatcat_tools/workers/worker_common.py
+++ b/python/fatcat_tools/workers/worker_common.py
@@ -1,15 +1,6 @@
-import re
-import sys
-import csv
-import json
-import itertools
-from itertools import islice
from confluent_kafka import Consumer, KafkaException, TopicPartition
-import fatcat_openapi_client
-from fatcat_openapi_client.rest import ApiException
-
def most_recent_message(topic, kafka_config):
"""