diff options
Diffstat (limited to 'python')
96 files changed, 182 insertions, 340 deletions
| diff --git a/python/.flake8 b/python/.flake8 new file mode 100644 index 00000000..34f6131c --- /dev/null +++ b/python/.flake8 @@ -0,0 +1,13 @@ +[flake8] +# TODO: ANN for better annotation coverage +select = C,E,F,W +# The ignores starting with "E251" should be removed after using 'black' +ignore = F405,F403,W503,E231,E203,E501,E226,E711,E713,E265,ANN101,ANN204,ANN102,E251,E128,E302,E261,E241,E201,E202,E266,E124,E305,E225,W504,E123,E122,E125,E121,E129,E126,E712,W191,E101 +# TODO: should reduce max-complexity +max-complexity = 50 +exclude = .git,__pycache__,.venv +max-line-length = 120 +per-file-ignores = +    */__init__.py: F401 +    tests/*.py: F401,F811 +    tests/transform_csl.py: W291 diff --git a/python/Makefile b/python/Makefile index 182bc739..4c8ff45f 100644 --- a/python/Makefile +++ b/python/Makefile @@ -6,14 +6,34 @@ SHELL = /bin/bash  help: ## Print info about all commands  	@echo "Commands:"  	@echo -	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "    \033[01;32m%-20s\033[0m %s\n", $$1, $$2}' +	@grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "    \033[01;32m%-20s\033[0m %s\n", $$1, $$2}' + +.PHONY: dep +dep: ## Create local virtualenv using pipenv +	pipenv install --dev + +.PHONY: lint +lint: ## Run lints (eg, flake8, mypy) +	pipenv run flake8 *.py tests/ fatcat_web/ fatcat_tools/ --select=E9,F63,F7,F82 +	pipenv run flake8 *.py tests/ fatcat_web/ fatcat_tools/ --exit-zero + +.PHONY: mypy +mypy: ## Run mypy type checks (not part of regular lint yet) +	pipenv run mypy *.py fatcat_web/ fatcat_tools/ --ignore-missing-imports + +# Not ready for 'black' yet +#.PHONY: fmt +#fmt: ## Run code formating on all source code +#	pipenv run black *.py fatcat_web/ fatcat_tools/ tests/  .PHONY: test -test: ## Run all tests and lints -	curl --silent localhost:9411/v0/changelog > /dev/null || (echo "API not running locally, bailing early from tests" && exit 1) +test: lint ## Run all tests and lints +	@curl --silent localhost:9411/v0/changelog > /dev/null || (echo "API not running locally, bailing early from tests" && exit 1)  	pipenv run pytest -	pipenv run pylint -j 0 -E fatcat*.py fatcat_tools fatcat_web tests/*.py -	pipenv run flake8 tests/ fatcat_web/ fatcat_tools/ *.py --count --select=E9,F63,F7,F82 --show-source --statistics + +.PHONY: coverage +coverage: ## Run all tests with coverage +	pipenv run pytest --cov  .PHONY: test-cli  test-cli: ## Run CLI commands. WARNING: may mutate local database diff --git a/python/TODO b/python/TODO index fdb72849..52b2b8fe 100644 --- a/python/TODO +++ b/python/TODO @@ -1,4 +1,14 @@ +improve argparse usage +    change --host-url to --fatcat-api-url +    add 'help=' to all CLI sub-commands; improves --help output +    do ArgumentDefaultsHelpFormatter everywhere +        parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +Try stubgen for type annotation:: +    stubgen -m fatcat_openapi_client -o stubs/ +    stubgen -p fatcat_openapi_client -o stubs/ +  - schema.org metadata for releases  additional tests diff --git a/python/fatcat_cleanup.py b/python/fatcat_cleanup.py index d8b2aea2..4e11139e 100755 --- a/python/fatcat_cleanup.py +++ b/python/fatcat_cleanup.py @@ -1,6 +1,8 @@  #!/usr/bin/env python3 -import os, sys, argparse +import os +import sys +import argparse  import raven  from fatcat_tools import authenticated_api diff --git a/python/fatcat_export.py b/python/fatcat_export.py index 5419e46c..763c217e 100755 --- a/python/fatcat_export.py +++ b/python/fatcat_export.py @@ -11,11 +11,7 @@ import sys  import json  import argparse -import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException -from fatcat_openapi_client import ReleaseEntity, ContainerEntity, ChangelogEntry -from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \ -    public_api +from fatcat_tools import uuid2fcid, entity_to_dict, public_api  def run_export_releases(args): diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 331cf791..252ab3a5 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -1,6 +1,8 @@  #!/usr/bin/env python3 -import os, sys, argparse +import os +import sys +import argparse  import raven  from fatcat_tools import authenticated_api diff --git a/python/fatcat_review.py b/python/fatcat_review.py index 1d1db9a5..a10fc34b 100755 --- a/python/fatcat_review.py +++ b/python/fatcat_review.py @@ -2,11 +2,10 @@  import sys  import argparse -import datetime  import raven  from fatcat_tools import authenticated_api -from fatcat_tools.reviewers import DummyReviewBot, ReviewBot +from fatcat_tools.reviewers import DummyReviewBot  # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable  sentry_client = raven.Client() diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py index add03399..13310120 100644 --- a/python/fatcat_tools/api_auth.py +++ b/python/fatcat_tools/api_auth.py @@ -1,7 +1,7 @@ -import os, sys +import os +import sys  import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException  def public_api(host_uri): diff --git a/python/fatcat_tools/cleanups/common.py b/python/fatcat_tools/cleanups/common.py index 47607cf1..04e6ade4 100644 --- a/python/fatcat_tools/cleanups/common.py +++ b/python/fatcat_tools/cleanups/common.py @@ -5,7 +5,6 @@ import subprocess  from collections import Counter  from fatcat_openapi_client import ApiClient, Editgroup -from fatcat_openapi_client.rest import ApiException  from fatcat_tools.transforms import entity_from_dict, entity_to_dict diff --git a/python/fatcat_tools/cleanups/files.py b/python/fatcat_tools/cleanups/files.py index ec7e9064..a40e4a28 100644 --- a/python/fatcat_tools/cleanups/files.py +++ b/python/fatcat_tools/cleanups/files.py @@ -1,7 +1,6 @@  from fatcat_openapi_client.rest import ApiException  from fatcat_openapi_client.models import FileEntity -from fatcat_tools.transforms import entity_to_dict, entity_from_json  from .common import EntityCleaner @@ -70,4 +69,3 @@ class FileCleaner(EntityCleaner):          self.api.update_file(self.get_editgroup_id(), entity.ident, entity)          return 1 - diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 37628f09..2554fe96 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -1,16 +1,10 @@ -import re  import sys -import csv  import json  import time -import itertools -import datetime -import requests  from confluent_kafka import Producer, KafkaException  from urllib.parse import urlparse, parse_qs -from fatcat_tools.workers import most_recent_message  from .harvest_common import HarvestState, requests_retry_session @@ -64,7 +58,6 @@ class HarvestCrossrefWorker:      to be careful how state is serialized back into kafka.      """ -      def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,              api_host_url="https://api.crossref.org/works", start_date=None,              end_date=None): diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py index 27ab8b4a..bdae3054 100644 --- a/python/fatcat_tools/harvest/harvest_common.py +++ b/python/fatcat_tools/harvest/harvest_common.py @@ -1,15 +1,13 @@  import sys  import json -import time  import datetime  import requests  from requests.adapters import HTTPAdapter  # unclear why pylint chokes on this import. Recent 'requests' and 'urllib3' are  # in Pipenv.lock, and there are no errors in QA  from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error -from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException, \ -    OFFSET_BEGINNING +from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException  # Used for parsing ISO date format (YYYY-MM-DD) @@ -130,9 +128,11 @@ class HarvestState:          }).encode('utf-8')          if kafka_topic:              assert(kafka_config) +              def fail_fast(err, msg):                  if err:                      raise KafkaException(err) +              print("Committing status to Kafka: {}".format(kafka_topic), file=sys.stderr)              producer_conf = kafka_config.copy()              producer_conf.update({ @@ -159,9 +159,11 @@ class HarvestState:              return          print("Fetching state from kafka topic: {}".format(kafka_topic), file=sys.stderr) +          def fail_fast(err, msg):              if err:                  raise KafkaException(err) +          conf = kafka_config.copy()          conf.update({              'group.id': 'dummy_init_group', # should never be committed diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index d30f9507..c4e4a82a 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -1,16 +1,9 @@ -import re  import sys -import csv -import json  import time -import itertools -import datetime -import requests  import sickle  from confluent_kafka import Producer, KafkaException -from fatcat_tools.workers import most_recent_message  from .harvest_common import HarvestState @@ -31,7 +24,6 @@ class HarvestOaiPmhWorker:      would want something similar operationally. Oh well!      """ -      def __init__(self, kafka_hosts, produce_topic, state_topic,              start_date=None, end_date=None): @@ -69,7 +61,7 @@ class HarvestOaiPmhWorker:          })          producer = Producer(producer_conf) -        api = sickle.Sickle(self.endpoint_url) +        api = sickle.Sickle(self.endpoint_url, max_retries=5, retry_status_codes=[503])          date_str = date.isoformat()          # this dict kwargs hack is to work around 'from' as a reserved python keyword          # recommended by sickle docs diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py index f6301b8d..802d31d8 100644 --- a/python/fatcat_tools/harvest/pubmed.py +++ b/python/fatcat_tools/harvest/pubmed.py @@ -19,7 +19,7 @@ import tempfile  import time  import xml.etree.ElementTree as ET  from ftplib import FTP -from urllib.parse import urljoin, urlparse +from urllib.parse import urlparse  import dateparser  from bs4 import BeautifulSoup diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index c71b33e9..47a8c4da 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -1,10 +1,6 @@ -import sys -import json -import sqlite3 -import itertools  import fatcat_openapi_client -from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex +from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex  ARABESQUE_MATCH_WHERE_CLAUSE='WHERE hit = 1 AND identifier IS NOT NULL' @@ -186,4 +182,3 @@ class ArabesqueMatchImporter(EntityImporter):                  description=self.editgroup_description,                  extra=self.editgroup_extra),              entity_list=batch)) - diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index 719592fc..43325ebc 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -7,7 +7,7 @@ from bs4 import BeautifulSoup  from pylatexenc.latex2text import LatexNodes2Text  import fatcat_openapi_client -from .common import EntityImporter, clean +from .common import EntityImporter  from .crossref import lookup_license_slug @@ -97,7 +97,6 @@ class ArxivRawImporter(EntityImporter):              **kwargs)          self._test_override = False -      def parse_record(self, record):          if not record: @@ -188,7 +187,6 @@ class ArxivRawImporter(EntityImporter):                  if lang == 'en':                      lang = None -          # extra:          #   withdrawn_date          #   translation_of @@ -244,7 +242,7 @@ class ArxivRawImporter(EntityImporter):          For each version, do a lookup by full arxiv_id, and store work/release          id results. -         +          If a version has a DOI, also do a doi lookup and store that result. If          there is an existing release with both matching, set that as the          existing work. If they don't match, use the full arxiv_id match and @@ -345,6 +343,7 @@ class ArxivRawImporter(EntityImporter):              print(json.dumps(resp))              #sys.exit(-1) -if __name__=='__main__': + +if __name__ == '__main__':      parser = ArxivRawImporter(None)      parser.parse_file(open(sys.argv[1])) diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 536c013b..36a2f9a6 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -82,7 +82,7 @@ def cdl_dash_release(meta, extra=None):              #print(abstracts)      if not abstracts:          abstracts = None -     +      contribs = []      for creator in meta['creator']:          contribs.append(ReleaseContrib( @@ -120,7 +120,7 @@ def make_release_fileset(dat_path):      with open(dat_path + "/cdl_dash_metadata.json", 'r') as fp:          meta_dict = json.loads(fp.read()) -     +      release = cdl_dash_release(meta_dict)      ark_id = release.extra['ark_id'] diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py index 375b6051..d5d1cce8 100644 --- a/python/fatcat_tools/importers/chocula.py +++ b/python/fatcat_tools/importers/chocula.py @@ -1,7 +1,4 @@ -import sys -import json -import itertools  import fatcat_openapi_client  from .common import EntityImporter, clean diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index eafc6546..c0578224 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -161,18 +161,18 @@ def is_cjk(s):      return False  def test_is_cjk(): -    assert is_cjk(None) == False -    assert is_cjk('') == False -    assert is_cjk('blah') == False -    assert is_cjk('岡, 鹿, 梨, 阜, 埼') == True -    assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') == True -    assert is_cjk('菊') == True -    assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') == True -    assert is_cjk('水道') == True -    assert is_cjk('オウ, イク') == True # kanji -    assert is_cjk('ひヒ') == True -    assert is_cjk('き゚ゅ') == True -    assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') == True +    assert is_cjk(None) is False +    assert is_cjk('') is False +    assert is_cjk('blah') is False +    assert is_cjk('岡, 鹿, 梨, 阜, 埼') is True +    assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') is True +    assert is_cjk('菊') is True +    assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') is True +    assert is_cjk('水道') is True +    assert is_cjk('オウ, イク') is True # kanji +    assert is_cjk('ひヒ') is True +    assert is_cjk('き゚ゅ') is True +    assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') is True  DOMAIN_REL_MAP = {      "archive.org": "archive", @@ -368,7 +368,7 @@ class EntityImporter:          if self._entity_queue:              self.insert_batch(self._entity_queue)              self.counts['insert'] += len(self._entity_queue) -            self._entity_queue =  [] +            self._entity_queue = []          return self.counts diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index d26f089f..854e3d9f 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -1,10 +1,6 @@ -import sys -import json  import sqlite3  import datetime -import itertools -import subprocess  import fatcat_openapi_client  from .common import EntityImporter, clean @@ -425,7 +421,6 @@ class CrossrefImporter(EntityImporter):              release_year = raw_date[0]              release_date = None -          original_title = None          if obj.get('original-title'):              original_title = clean(obj.get('original-title')[0], force_xml=True) @@ -500,7 +495,7 @@ class CrossrefImporter(EntityImporter):          if existing:              self.counts['exists'] += 1              return False -         +          return True      def insert_batch(self, batch): @@ -509,4 +504,3 @@ class CrossrefImporter(EntityImporter):                  description=self.editgroup_description,                  extra=self.editgroup_extra),              entity_list=batch)) - diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 962d80c6..6aeb6a68 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -10,7 +10,6 @@ functions (parse_datacite_...), which may help testing.  import collections  import datetime -import hashlib  import re  import json  import sqlite3 @@ -765,7 +764,7 @@ class DataciteImporter(EntityImporter):              nameType = c.get('nameType', '') or ''              if nameType in ('', 'Personal'):                  creator_id = None -                for nid in c.get('nameIdentifiers', []): +                for nid in c.get('nameIdentifiers', []) or []:                      name_scheme = nid.get('nameIdentifierScheme', '') or ''                      if not name_scheme.lower() == "orcid":                          continue diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 2077eae4..5ec6cc3c 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -1,9 +1,7 @@  #!/usr/bin/env python3 -import sys  import json  import base64 -import datetime  import fatcat_openapi_client  from .common import EntityImporter, clean, make_rel_url diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 2b630e67..4b1d3702 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -1,10 +1,6 @@ -import sys -import json -import base64 -import itertools  import fatcat_openapi_client -from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex +from .common import EntityImporter, make_rel_url  class IngestFileResultImporter(EntityImporter): @@ -284,4 +280,3 @@ class SavePaperNowFileImporter(IngestFileResultImporter):                      description=self.editgroup_description,                      extra=self.editgroup_extra),                  entity_list=batch)) - diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index e30bb233..38aa00eb 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -1,10 +1,7 @@  import sys -import json  import sqlite3  import datetime -import itertools -import subprocess  from bs4 import BeautifulSoup  import fatcat_openapi_client diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index d439c80a..32782eac 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -1,7 +1,4 @@ -import sys -import json -import itertools  import fatcat_openapi_client  from .common import EntityImporter, clean diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py index 96dbf947..5d35f5e2 100644 --- a/python/fatcat_tools/importers/jstor.py +++ b/python/fatcat_tools/importers/jstor.py @@ -183,7 +183,7 @@ class JstorImporter(EntityImporter):                      # suspect jan 1st dates get set by JSTOR when actual                      # date not known (citation needed), so drop them                      release_date = None -         +          volume = None          if article_meta.volume:              volume = article_meta.volume.string or None diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 180d7ba3..d95c5847 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -1,12 +1,8 @@ -import sys -import json -import sqlite3 -import itertools  import fatcat_openapi_client  from fatcat_tools.normal import * -from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS  class MatchedImporter(EntityImporter): @@ -160,7 +156,6 @@ class MatchedImporter(EntityImporter):              self.counts['skip-update-inflight'] += 1              return False -          # minimum viable "existing" URL cleanup to fix dupes and broken links:          # remove 'None' wayback URLs, and set archive.org rel 'archive'          existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)] @@ -207,4 +202,3 @@ class MatchedImporter(EntityImporter):                  description=self.editgroup_description,                  extra=self.editgroup_extra),              entity_list=batch)) - diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index 554e052f..21feea9e 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -1,7 +1,5 @@  import sys -import json -import itertools  import fatcat_openapi_client  from .common import EntityImporter, clean @@ -89,7 +87,7 @@ class OrcidImporter(EntityImporter):          if existing:              self.counts['exists'] += 1              return False -         +          return True      def insert_batch(self, batch): diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index 3d3e3a8c..d8a6842c 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -1,11 +1,9 @@  import sys  import json -import sqlite3  import datetime  import warnings  from bs4 import BeautifulSoup -from bs4.element import NavigableString  import fatcat_openapi_client  from fatcat_tools.normal import * @@ -314,7 +312,7 @@ class PubmedImporter(EntityImporter):      Importer for PubMed/MEDLINE XML metadata.      If lookup_refs is true, will do identifer-based lookups for all references. -     +      TODO: MEDLINE doesn't include PMC/OA license; could include in importer?      """ @@ -502,7 +500,7 @@ class PubmedImporter(EntityImporter):              ce_edit = self.create_container(ce)              container_id = ce_edit.ident              self._issnl_id_map[issnl] = container_id -        +          ji = journal.JournalIssue          volume = None          if ji.find("Volume"): diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py index 4cd22775..c04e9aa8 100644 --- a/python/fatcat_tools/importers/shadow.py +++ b/python/fatcat_tools/importers/shadow.py @@ -1,8 +1,4 @@ -import sys -import json -import sqlite3 -import itertools  import fatcat_openapi_client  from fatcat_tools.normal import * @@ -192,4 +188,3 @@ class ShadowLibraryImporter(EntityImporter):                  description=self.editgroup_description,                  extra=self.editgroup_extra),              entity_list=batch)) - diff --git a/python/fatcat_tools/kafka.py b/python/fatcat_tools/kafka.py index 53b62a37..228de134 100644 --- a/python/fatcat_tools/kafka.py +++ b/python/fatcat_tools/kafka.py @@ -1,5 +1,5 @@ -from confluent_kafka import Consumer, Producer, KafkaException +from confluent_kafka import Producer, KafkaException  def kafka_fail_fast(err, msg): diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py index f962ff3c..e65af8d6 100644 --- a/python/fatcat_tools/normal.py +++ b/python/fatcat_tools/normal.py @@ -231,4 +231,3 @@ def test_clean_orcid():      assert clean_orcid("0123-4567-3456-6789 ") == "0123-4567-3456-6789"      assert clean_orcid("01234567-3456-6780") == None      assert clean_orcid("0x23-4567-3456-6780") == None - diff --git a/python/fatcat_tools/reviewers/review_common.py b/python/fatcat_tools/reviewers/review_common.py index ecf7da8f..336a47f6 100644 --- a/python/fatcat_tools/reviewers/review_common.py +++ b/python/fatcat_tools/reviewers/review_common.py @@ -1,12 +1,10 @@ -import json  import time  import datetime  import subprocess  from collections import Counter  import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException  """  checks should return: @@ -132,11 +130,12 @@ class ReviewBot:                      status, result_counts[status])              for result in results:                  if result.status == status and result.check_type == "editgroup": -                    comment += "\n- {description}".format(result.description) +                    comment += "\n- {description}".format(description=result.description)                  if result.status == status and result.check_type != "editgroup": -                    comment += "\n- {check_type} [{rev}](/{release_type}/rev/{rev}): {description}".format( +                    comment += "\n- {check_type} [{rev}](/{entity_type}/rev/{rev}): {description}".format(                          check_type=result.check_type,                          rev=result.rev, +                        entity_type=result.check_type,                          description=result.description)          extra = self.extra.copy() diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py index 832ad6aa..ba199efb 100644 --- a/python/fatcat_tools/transforms/csl.py +++ b/python/fatcat_tools/transforms/csl.py @@ -1,6 +1,5 @@  import json -import collections  from citeproc import CitationStylesStyle, CitationStylesBibliography  from citeproc import Citation, CitationItem @@ -8,8 +7,6 @@ from citeproc import formatter  from citeproc.source.json import CiteProcJSON  from citeproc_styles import get_style_filepath -from fatcat_openapi_client import ApiClient -  def contribs_by_role(contribs, role):      ret = [c.copy() for c in contribs if c['role'] == role] @@ -214,14 +211,13 @@ def citeproc_csl(csl_json, style, html=False):      lines = bib.bibliography()[0]      if style == "bibtex":          out = "" -        for l in lines: -            if l.startswith(" @"): +        for line in lines: +            if line.startswith(" @"):                  out += "@" -            elif l.startswith(" "): -                out += "\n " + l +            elif line.startswith(" "): +                out += "\n " + line              else: -                out += l +                out += line          return ''.join(out)      else:          return ''.join(lines) - diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 1d35141b..8ec9c164 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -1,7 +1,5 @@ -import collections  import tldextract -from fatcat_openapi_client import ApiClient  def check_kbart(year, archive): @@ -14,11 +12,11 @@ def check_kbart(year, archive):  def test_check_kbart(): -    assert check_kbart(1990, dict()) == None -    assert check_kbart(1990, dict(year_spans=[[2000, 2000]])) == False -    assert check_kbart(2000, dict(year_spans=[[2000, 2000]])) == True -    assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) == False -    assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) == True +    assert check_kbart(1990, dict()) is None +    assert check_kbart(1990, dict(year_spans=[[2000, 2000]])) is False +    assert check_kbart(2000, dict(year_spans=[[2000, 2000]])) is True +    assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) is False +    assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) is True  def release_to_elasticsearch(entity, force_bool=True): diff --git a/python/fatcat_tools/transforms/entities.py b/python/fatcat_tools/transforms/entities.py index ae666413..53455e85 100644 --- a/python/fatcat_tools/transforms/entities.py +++ b/python/fatcat_tools/transforms/entities.py @@ -32,4 +32,3 @@ def entity_from_json(json_str, entity_type, api_client=None):  def entity_from_dict(obj, entity_type, api_client=None):      json_str = json.dumps(obj)      return entity_from_json(json_str, entity_type, api_client=api_client) - diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py index 22b5154e..2f4e2271 100644 --- a/python/fatcat_tools/transforms/ingest.py +++ b/python/fatcat_tools/transforms/ingest.py @@ -61,4 +61,3 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=          ingest_request['link_source_id'] = link_source_id      return ingest_request - diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 3a49f86e..d5891ad1 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -354,4 +354,3 @@ class EntityUpdatesWorker(FatcatWorker):              producer.flush()              # TODO: publish updated 'work' entities to a topic              consumer.store_offsets(message=msg) - diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py index e58b3da1..61854c31 100644 --- a/python/fatcat_tools/workers/elasticsearch.py +++ b/python/fatcat_tools/workers/elasticsearch.py @@ -1,6 +1,5 @@  import json -import time  import requests  from confluent_kafka import Consumer, KafkaException @@ -138,7 +137,6 @@ class ElasticsearchReleaseWorker(FatcatWorker):                  consumer.store_offsets(message=msg) -  class ElasticsearchContainerWorker(ElasticsearchReleaseWorker):      def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None, diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py index ef79f528..8c2936be 100644 --- a/python/fatcat_tools/workers/worker_common.py +++ b/python/fatcat_tools/workers/worker_common.py @@ -1,15 +1,6 @@ -import re -import sys -import csv -import json -import itertools -from itertools import islice  from confluent_kafka import Consumer, KafkaException, TopicPartition -import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException -  def most_recent_message(topic, kafka_config):      """ diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py index 14595670..8e01c860 100755 --- a/python/fatcat_transform.py +++ b/python/fatcat_transform.py @@ -9,16 +9,8 @@ import sys  import json  import argparse -from citeproc import CitationStylesStyle, CitationStylesBibliography -from citeproc import Citation, CitationItem -from citeproc import formatter -from citeproc.source.json import CiteProcJSON -from citeproc_styles import get_style_filepath - -import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException  from fatcat_openapi_client import ReleaseEntity, ContainerEntity, FileEntity, ChangelogEntry -from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \ +from fatcat_tools import entity_from_json, \      release_to_elasticsearch, container_to_elasticsearch, \      file_to_elasticsearch, changelog_to_elasticsearch, public_api, \      release_to_csl, citeproc_csl diff --git a/python/fatcat_util.py b/python/fatcat_util.py index d6e76697..a45b2ba4 100755 --- a/python/fatcat_util.py +++ b/python/fatcat_util.py @@ -8,14 +8,9 @@ TODO:  """  import sys -import json  import argparse -import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException -from fatcat_openapi_client import ReleaseEntity, ContainerEntity, ChangelogEntry -from fatcat_tools import uuid2fcid, fcid2uuid, entity_from_json, \ -    entity_to_dict, public_api, authenticated_api +from fatcat_tools import uuid2fcid, fcid2uuid, authenticated_api  def run_uuid2fcid(args): diff --git a/python/fatcat_web/auth.py b/python/fatcat_web/auth.py index 8e26b7fe..ed9f2252 100644 --- a/python/fatcat_web/auth.py +++ b/python/fatcat_web/auth.py @@ -2,8 +2,7 @@  from collections import namedtuple  import requests  import pymacaroons -from flask import Flask, render_template, send_from_directory, request, \ -    url_for, abort, g, redirect, jsonify, session, flash +from flask import render_template, abort, redirect, session, flash  from flask_login import logout_user, login_user, UserMixin  from fatcat_web import login_manager, app, api, priv_api, Config  import fatcat_openapi_client @@ -141,8 +140,9 @@ def handle_wmoauth(username):      # pass off "as if" we did OAuth successfully      FakeOAuthRemote = namedtuple('FakeOAuthRemote', ['name', 'OAUTH_CONFIG'])      remote = FakeOAuthRemote(name='wikipedia', OAUTH_CONFIG={'api_base_url': "https://www.mediawiki.org/w"}) +    conservative_username = ''.join(filter(str.isalnum, username))      oauth_info = { -        'preferred_username': username, +        'preferred_username': conservative_username,          'iss': "https://www.mediawiki.org/w",          'sub': username,      } diff --git a/python/fatcat_web/editing_routes.py b/python/fatcat_web/editing_routes.py index 87223868..44000b1a 100644 --- a/python/fatcat_web/editing_routes.py +++ b/python/fatcat_web/editing_routes.py @@ -1,16 +1,11 @@ -import os -import json -from flask import Flask, render_template, send_from_directory, request, \ -    url_for, abort, g, redirect, jsonify, session, flash, Response +from flask import render_template, abort, redirect, session, flash  from flask_login import login_required  from fatcat_openapi_client import Editgroup  from fatcat_openapi_client.rest import ApiException  from fatcat_tools.transforms import * -from fatcat_web import app, api, auth_api, priv_api -from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth -from fatcat_web.cors import crossdomain +from fatcat_web import app, api, auth_api  from fatcat_web.search import *  from fatcat_web.forms import *  from fatcat_web.entity_helpers import * @@ -20,7 +15,7 @@ from fatcat_web.entity_helpers import *  def form_editgroup_get_or_create(api, edit_form):      """ -    This function expects a submitted, validated  +    This function expects a submitted, validated edit form      """      if edit_form.editgroup_id.data:          try: @@ -43,8 +38,10 @@ def form_editgroup_get_or_create(api, edit_form):              app.log.warning(ae)              abort(ae.status)          # set this session editgroup_id -        flash('Started new editgroup <a href="/editgroup/{}">{}</a>' \ -            .format(eg.editgroup_id, eg.editgroup_id)) +        flash('Started new editgroup <a href="/editgroup/{}">{}</a>'.format( +            eg.editgroup_id, +            eg.editgroup_id, +        ))      return eg  def generic_entity_edit(editgroup_id, entity_type, existing_ident, edit_template): @@ -68,7 +65,7 @@ def generic_entity_edit(editgroup_id, entity_type, existing_ident, edit_template      Helpers:      - get_editgroup_revision(editgroup, entity_type, ident) -> None or entity -     +      TODO: prev_rev interlock      """ @@ -214,7 +211,7 @@ def generic_edit_delete(editgroup_id, entity_type, edit_id):      # API on behalf of user      user_api = auth_api(session['api_token']) -     +      # do the deletion      try:          if entity_type == 'container': @@ -358,4 +355,3 @@ def work_editgroup_edit(editgroup_id, ident):  @app.route('/editgroup/<editgroup_id>/work/edit/<edit_id>/delete', methods=['POST'])  def work_edit_delete(editgroup_id, edit_id):      return abort(404) - diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index 377e35aa..15585bf6 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -8,7 +8,7 @@ from flask_wtf import FlaskForm  from wtforms import SelectField, DateField, StringField, IntegerField, \      HiddenField, FormField, FieldList, validators -from fatcat_openapi_client import ContainerEntity, CreatorEntity, FileEntity, \ +from fatcat_openapi_client import ContainerEntity, FileEntity, \      ReleaseEntity, ReleaseContrib, FileUrl, ReleaseExtIds  release_type_options = [ @@ -293,9 +293,9 @@ class FileUrlForm(FlaskForm):          default='web')  class FileEntityForm(EntityEditForm): +    # TODO: positive definite      size = IntegerField('Size (bytes)',          [validators.DataRequired()]) -        # TODO: positive definite      md5 = StringField("MD5",          [validators.Optional(True),           validators.Length(min=32, max=32)]) @@ -413,4 +413,3 @@ class SavePaperNowForm(FlaskForm):              ingest_request['link_source'] = 'arxiv'              ingest_request['link_source_id'] = release.ext_ids.arxiv          return ingest_request - diff --git a/python/fatcat_web/graphics.py b/python/fatcat_web/graphics.py index fea7eb5a..56852627 100644 --- a/python/fatcat_web/graphics.py +++ b/python/fatcat_web/graphics.py @@ -33,4 +33,3 @@ def ia_coverage_histogram(rows):      chart.add('via Fatcat', [y['available'] for y in years])      chart.add('Missing', [y['missing'] for y in years])      return chart - diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 58f4b7e0..4684f799 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -2,12 +2,12 @@  import os  import sys  import json -from flask import Flask, render_template, make_response, send_from_directory, \ -    request, url_for, abort, g, redirect, jsonify, session, flash, Response +from flask import render_template, make_response, send_from_directory, \ +    request, url_for, abort, redirect, jsonify, session, flash, Response  from flask_login import login_required  from flask_wtf.csrf import CSRFError -from fatcat_openapi_client import Editgroup, EditgroupAnnotation +from fatcat_openapi_client import EditgroupAnnotation  from fatcat_openapi_client.rest import ApiException  from fatcat_tools.transforms import *  from fatcat_tools.normal import * @@ -1042,4 +1042,3 @@ def robots():      return send_from_directory(os.path.join(app.root_path, 'static'),                                 'robots.txt',                                 mimetype='text/plain') - diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index c1246d22..4a87c735 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -66,7 +66,6 @@ def do_release_search(q, limit=30, fulltext_only=True, offset=0):      if len(q.split()) == 1 and q.startswith("10.") and q.count("/") >= 1:          q = 'doi:"{}"'.format(q) -      if fulltext_only:          q += " in_web:true" @@ -297,7 +296,7 @@ def get_elastic_container_random_releases(ident, limit=5):  def get_elastic_container_histogram(ident):      """ -    Fetches a stacked histogram of  +    Fetches a stacked histogram      Filters to the past 500 years (at most), or about 1000 values. diff --git a/python/fatcat_web/web_config.py b/python/fatcat_web/web_config.py index 0cb153d6..344f1c2a 100644 --- a/python/fatcat_web/web_config.py +++ b/python/fatcat_web/web_config.py @@ -83,4 +83,3 @@ class Config(object):              'fatcat_domain': FATCAT_DOMAIN,          },      } - diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py index 03167a3a..19ac16cd 100755 --- a/python/fatcat_worker.py +++ b/python/fatcat_worker.py @@ -2,7 +2,6 @@  import sys  import argparse -import datetime  import raven  from fatcat_tools import public_api diff --git a/python/shell.py b/python/shell.py index c207a325..d53911b9 100644 --- a/python/shell.py +++ b/python/shell.py @@ -1,3 +1,4 @@ +# flake8: noqa  # bunch of libraries one might want  import uuid diff --git a/python/tests/api_annotations.py b/python/tests/api_annotations.py index e5566eef..0606b637 100644 --- a/python/tests/api_annotations.py +++ b/python/tests/api_annotations.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy -  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * diff --git a/python/tests/api_containers.py b/python/tests/api_containers.py index 0850fab7..70dbcd7e 100644 --- a/python/tests/api_containers.py +++ b/python/tests/api_containers.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy -  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -43,7 +38,7 @@ def test_container(api):      # get redirects (none)      assert api.get_container_redirects(c2.ident) == [] -     +      # delete      eg = quick_eg(api)      api.delete_container(eg.editgroup_id, c2.ident) @@ -59,4 +54,3 @@ def test_container_examples(api):      c2 = api.lookup_container(issnl=c1.issnl)      assert c1.ident == c2.ident - diff --git a/python/tests/api_creators.py b/python/tests/api_creators.py index 1ce6380a..b271e2b3 100644 --- a/python/tests/api_creators.py +++ b/python/tests/api_creators.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy -  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -52,7 +47,7 @@ def test_creators(api):      assert c1.display_name == c3.display_name      assert c1.extra == c3.extra -     +      # delete      eg = quick_eg(api)      api.delete_creator(eg.editgroup_id, c2.ident) diff --git a/python/tests/api_editgroups.py b/python/tests/api_editgroups.py index d82c9233..142687c2 100644 --- a/python/tests/api_editgroups.py +++ b/python/tests/api_editgroups.py @@ -1,11 +1,8 @@ -import json  import pytest  import datetime -from copy import copy  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * diff --git a/python/tests/api_editor.py b/python/tests/api_editor.py index 64bb2759..91881743 100644 --- a/python/tests/api_editor.py +++ b/python/tests/api_editor.py @@ -1,12 +1,5 @@ -import json -import pytest -import datetime -from copy import copy - -from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException -from fixtures import * +from fixtures import api  def test_editor_update(api): diff --git a/python/tests/api_entity_editing.py b/python/tests/api_entity_editing.py index d5377e18..fee4e34f 100644 --- a/python/tests/api_entity_editing.py +++ b/python/tests/api_entity_editing.py @@ -1,10 +1,7 @@ -import json  import pytest -from copy import copy  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -180,4 +177,3 @@ def test_edit_delete_all(api_dummy_entities):      assert len(eg.edits.webcaptures) == 0      assert len(eg.edits.releases) == 0      assert len(eg.edits.works) == 0 - diff --git a/python/tests/api_files.py b/python/tests/api_files.py index 74865daa..65eda993 100644 --- a/python/tests/api_files.py +++ b/python/tests/api_files.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy -  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -50,7 +45,7 @@ def test_file(api):      # get redirects (none)      assert api.get_file_redirects(f2.ident) == [] -     +      # delete      eg = quick_eg(api)      api.delete_file(eg.editgroup_id, f2.ident) diff --git a/python/tests/api_filesets.py b/python/tests/api_filesets.py index 7f3235cb..6d755744 100644 --- a/python/tests/api_filesets.py +++ b/python/tests/api_filesets.py @@ -1,10 +1,7 @@ -import json  import pytest -from copy import copy  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -64,7 +61,7 @@ def test_fileset(api):      # get redirects (none)      assert api.get_fileset_redirects(fs2.ident) == [] -     +      # delete      eg = quick_eg(api)      api.delete_fileset(eg.editgroup_id, fs2.ident) @@ -100,4 +97,3 @@ def test_bad_fileset(api):      for b in bad_list:          with pytest.raises(fatcat_openapi_client.rest.ApiException):              api.create_fileset(eg.editgroup_id, b) - diff --git a/python/tests/api_misc.py b/python/tests/api_misc.py index 11f85fd6..4c9ac9a6 100644 --- a/python/tests/api_misc.py +++ b/python/tests/api_misc.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy -  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -45,4 +40,3 @@ def test_unexpected_body(api):      )      f1.urls = [dict(url="http://thing", rel="repository", asdf="blue")]      api.create_file(eg.editgroup_id, f1) - diff --git a/python/tests/api_releases.py b/python/tests/api_releases.py index 2df08698..c4c05ea6 100644 --- a/python/tests/api_releases.py +++ b/python/tests/api_releases.py @@ -1,11 +1,8 @@ -import json  import pytest  import datetime -from copy import copy  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -130,7 +127,7 @@ def test_release(api):      # get redirects (none)      assert api.get_release_redirects(r2.ident) == [] -     +      # delete      eg = quick_eg(api)      api.delete_release(eg.editgroup_id, r2.ident) @@ -210,4 +207,3 @@ def test_controlled_vocab(api):          api.create_release(eg.editgroup_id, r3)      r3.withdrawn_status = "spam"      api.create_release(eg.editgroup_id, r3) - diff --git a/python/tests/api_webcaptures.py b/python/tests/api_webcaptures.py index 1054b41f..85813218 100644 --- a/python/tests/api_webcaptures.py +++ b/python/tests/api_webcaptures.py @@ -1,11 +1,8 @@ -import json  import pytest  import datetime -from copy import copy  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -84,7 +81,7 @@ def test_webcapture(api):      # get redirects (none)      assert api.get_webcapture_redirects(wc2.ident) == [] -     +      # delete      eg = quick_eg(api)      api.delete_webcapture(eg.editgroup_id, wc2.ident) diff --git a/python/tests/citation_efficiency.py b/python/tests/citation_efficiency.py index aefb7d15..f8807db6 100644 --- a/python/tests/citation_efficiency.py +++ b/python/tests/citation_efficiency.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy -  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -110,4 +105,3 @@ def test_citation_encoding(api):      assert container == r1.refs[0].container_name      assert extra == r1.refs[0].extra      assert locator == r1.refs[0].locator - diff --git a/python/tests/clean_files.py b/python/tests/clean_files.py index 8a87f218..ce1102be 100644 --- a/python/tests/clean_files.py +++ b/python/tests/clean_files.py @@ -1,9 +1,10 @@  import copy  import pytest +  from fatcat_tools.cleanups import FileCleaner  from fatcat_openapi_client import * -from fixtures import api +from fixtures import *  @pytest.fixture(scope="function") diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index 78742114..44c7be63 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -1,8 +1,4 @@ -import os -import time -import json -import signal  import pytest  from dotenv import load_dotenv  import fatcat_web @@ -87,4 +83,3 @@ def test_get_changelog_entry(api):  def quick_eg(api_inst):      eg = api_inst.create_editgroup(fatcat_openapi_client.Editgroup())      return eg - diff --git a/python/tests/harvest_crossref.py b/python/tests/harvest_crossref.py index e902cda5..cad0f03b 100644 --- a/python/tests/harvest_crossref.py +++ b/python/tests/harvest_crossref.py @@ -1,6 +1,5 @@  import json -import pytest  import datetime  import responses  from fatcat_tools.harvest import * diff --git a/python/tests/harvest_datacite.py b/python/tests/harvest_datacite.py index 004d1fef..13c6042a 100644 --- a/python/tests/harvest_datacite.py +++ b/python/tests/harvest_datacite.py @@ -1,6 +1,5 @@  import json -import pytest  import datetime  import responses  from fatcat_tools.harvest import * diff --git a/python/tests/harvest_pubmed.py b/python/tests/harvest_pubmed.py index f8db46b6..58bc4226 100644 --- a/python/tests/harvest_pubmed.py +++ b/python/tests/harvest_pubmed.py @@ -2,14 +2,11 @@  Test pubmed FTP harvest.  """ -import datetime -import json  import os - +import datetime  import pytest  from fatcat_tools.harvest import * -from fatcat_tools.harvest.pubmed import generate_date_file_map  def test_pubmed_harvest_date(mocker): @@ -77,4 +74,3 @@ def test_pubmed_harvest_date_no_pmid(mocker):      # The file has not PMID, not importable.      with pytest.raises(ValueError):          harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d')) - diff --git a/python/tests/harvest_state.py b/python/tests/harvest_state.py index 8b7deba6..cc624d97 100644 --- a/python/tests/harvest_state.py +++ b/python/tests/harvest_state.py @@ -1,6 +1,4 @@ -import json -import pytest  import datetime  from fatcat_tools.harvest import * diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py index 9483eb45..20cde3a6 100644 --- a/python/tests/import_arabesque.py +++ b/python/tests/import_arabesque.py @@ -1,8 +1,9 @@  import json  import pytest +  from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher, JsonLinePusher -from fixtures import api +from fixtures import *  @pytest.fixture(scope="function") diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py index 1e649616..9306e67c 100644 --- a/python/tests/import_arxiv.py +++ b/python/tests/import_arxiv.py @@ -1,10 +1,10 @@ -import json, gzip  import pytest -from fatcat_tools.importers import ArxivRawImporter, Bs4XmlFilePusher -from fixtures import api  from bs4 import BeautifulSoup +from fatcat_tools.importers import ArxivRawImporter, Bs4XmlFilePusher +from fixtures import * +  @pytest.fixture(scope="function")  def arxiv_importer(api): diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index afa2410f..65cd2c37 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -1,8 +1,10 @@ -import json, gzip +import json +import gzip  import pytest +  from fatcat_tools.importers import CrossrefImporter, JsonLinePusher -from fixtures import api +from fixtures import *  @pytest.fixture(scope="function") diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index b01a11e6..b94b6bc5 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -2,10 +2,13 @@  Test datacite importer.  """ -import collections +import gzip +import json  import datetime +import collections +  import pytest -import gzip +  from fatcat_tools.importers import DataciteImporter, JsonLinePusher  from fatcat_tools.importers.datacite import (      find_original_language_title, diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py index 51ab3faa..52284b89 100644 --- a/python/tests/import_grobid_metadata.py +++ b/python/tests/import_grobid_metadata.py @@ -3,8 +3,9 @@ import os  import json  import base64  import pytest +  from fatcat_tools.importers import GrobidMetadataImporter, LinePusher -from fixtures import api +from fixtures import *  """  WARNING: these tests are currently very fragile because they have database diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py index 02486de6..ebe2923c 100644 --- a/python/tests/import_ingest.py +++ b/python/tests/import_ingest.py @@ -1,6 +1,7 @@  import json  import pytest +  from fatcat_tools.importers import IngestFileResultImporter, JsonLinePusher  from fixtures import * diff --git a/python/tests/import_jalc.py b/python/tests/import_jalc.py index f61ec849..ff757e51 100644 --- a/python/tests/import_jalc.py +++ b/python/tests/import_jalc.py @@ -1,10 +1,10 @@ -import json, gzip  import pytest -from fatcat_tools.importers import JalcImporter, Bs4XmlFilePusher, Bs4XmlLinesPusher -from fixtures import api  from bs4 import BeautifulSoup +from fatcat_tools.importers import JalcImporter, Bs4XmlFilePusher, Bs4XmlLinesPusher +from fixtures import * +  @pytest.fixture(scope="function")  def jalc_importer(api): diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py index cfeee517..51b0a78a 100644 --- a/python/tests/import_journal_metadata.py +++ b/python/tests/import_journal_metadata.py @@ -1,7 +1,8 @@  import pytest +  from fatcat_tools.importers import JournalMetadataImporter, JsonLinePusher -from fixtures import api +from fixtures import *  @pytest.fixture(scope="function") diff --git a/python/tests/import_jstor.py b/python/tests/import_jstor.py index 019f0aae..8494ffb2 100644 --- a/python/tests/import_jstor.py +++ b/python/tests/import_jstor.py @@ -1,10 +1,10 @@ -import json, gzip  import pytest -from fatcat_tools.importers import JstorImporter, Bs4XmlFilePusher -from fixtures import api  from bs4 import BeautifulSoup +from fatcat_tools.importers import JstorImporter, Bs4XmlFilePusher +from fixtures import * +  @pytest.fixture(scope="function")  def jstor_importer(api): diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py index 72ed068c..6b61c53c 100644 --- a/python/tests/import_matched.py +++ b/python/tests/import_matched.py @@ -1,8 +1,9 @@  import json  import pytest +  from fatcat_tools.importers import MatchedImporter, JsonLinePusher -from fixtures import api +from fixtures import *  @pytest.fixture(scope="function") diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py index 57886b52..f78ccde7 100644 --- a/python/tests/import_orcid.py +++ b/python/tests/import_orcid.py @@ -1,8 +1,9 @@  import json  import pytest +  from fatcat_tools.importers import OrcidImporter, JsonLinePusher -from fixtures import api +from fixtures import *  @pytest.fixture(scope="function") diff --git a/python/tests/import_pubmed.py b/python/tests/import_pubmed.py index f57aa273..201f533c 100644 --- a/python/tests/import_pubmed.py +++ b/python/tests/import_pubmed.py @@ -1,10 +1,10 @@ -import json, gzip  import pytest -from fatcat_tools.importers import PubmedImporter, Bs4XmlLargeFilePusher -from fixtures import api  from bs4 import BeautifulSoup +from fatcat_tools.importers import PubmedImporter, Bs4XmlLargeFilePusher +from fixtures import * +  @pytest.fixture(scope="function")  def pubmed_importer(api): @@ -137,4 +137,3 @@ def test_pubmed_xml_parse_refs(pubmed_importer):          r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0])      assert len(r1.refs) > 1 - diff --git a/python/tests/import_shadow.py b/python/tests/import_shadow.py index 70a918d2..40a1d589 100644 --- a/python/tests/import_shadow.py +++ b/python/tests/import_shadow.py @@ -1,8 +1,9 @@  import json  import pytest +  from fatcat_tools.importers import ShadowLibraryImporter, JsonLinePusher -from fixtures import api +from fixtures import *  @pytest.fixture(scope="function") @@ -58,4 +59,3 @@ def test_shadow_dict_parse(shadow_importer):                  assert u.url.startswith("https://web.archive.org/")                  assert "20180729135948" in u.url          assert len(f.release_ids) == 1 - diff --git a/python/tests/importer.py b/python/tests/importer.py index 9308ba84..a412b247 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -1,8 +1,6 @@ - -import pytest  from fatcat_tools.importers import CrossrefImporter, OrcidImporter -from fixtures import api +from fixtures import *  def test_issnl_mapping_lookup(api): @@ -32,4 +30,3 @@ def test_identifiers(api):      assert oi.is_orcid("0000-00x3-3118-659") == False      assert oi.is_orcid("0000-00033118-659") == False      assert oi.is_orcid("0000-0003-3118-659.") == False - diff --git a/python/tests/subentity_state.py b/python/tests/subentity_state.py index 614f88f1..e03fa99e 100644 --- a/python/tests/subentity_state.py +++ b/python/tests/subentity_state.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy -  from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException  from fixtures import *  """ @@ -221,4 +216,3 @@ def test_app_entity_states(api, app):      assert rv.status_code == 200      rv = app.get('/work/{}'.format(r2.work_id))      assert rv.status_code == 302 - diff --git a/python/tests/tools_api.py b/python/tests/tools_api.py index fd26b8ee..a4b5f2ea 100644 --- a/python/tests/tools_api.py +++ b/python/tests/tools_api.py @@ -1,6 +1,5 @@  import pytest -from fatcat_openapi_client import EditgroupAnnotation  from fatcat_openapi_client.rest import ApiException  from fatcat_tools import public_api, authenticated_api diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py index 15c64ce5..6436f876 100644 --- a/python/tests/transform_csl.py +++ b/python/tests/transform_csl.py @@ -1,11 +1,11 @@  import json  import pytest +  from fatcat_tools import *  from fatcat_openapi_client import * - -from fixtures import api  from import_crossref import crossref_importer +from fixtures import *  def test_csl_crossref(crossref_importer):      with open('tests/files/crossref-works.single.json', 'r') as f: diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index a954fc4d..f791562c 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -1,13 +1,13 @@  import json -import pytest +  from fatcat_tools import *  from fatcat_openapi_client import * -from fixtures import api  from import_journal_metadata import journal_metadata_importer -  from import_crossref import crossref_importer  from import_matched import matched_importer +from fixtures import * +  def test_basic_elasticsearch_convert(crossref_importer):      with open('tests/files/crossref-works.single.json', 'r') as f: diff --git a/python/tests/transform_ingest.py b/python/tests/transform_ingest.py index 2d5652b8..c7044bc0 100644 --- a/python/tests/transform_ingest.py +++ b/python/tests/transform_ingest.py @@ -1,12 +1,12 @@  import json -import pytest +  from fatcat_tools.transforms import release_ingest_request  from fatcat_openapi_client import *  from fixtures import api -  from import_crossref import crossref_importer +  def test_basic_ingest_release(crossref_importer):      with open('tests/files/crossref-works.single.json', 'r') as f:          # not a single line @@ -54,4 +54,3 @@ def test_rich_ingest_release():      assert ir['base_url'] == 'https://doi.org/10.123/456'      assert ir['ext_ids']['doi'] == '10.123/456'      assert ir['ext_ids'].get('pmcid') is None - diff --git a/python/tests/web_auth.py b/python/tests/web_auth.py index 2c545b6b..643d806e 100644 --- a/python/tests/web_auth.py +++ b/python/tests/web_auth.py @@ -1,8 +1,5 @@ -import json -import pytest  import responses -from fatcat_openapi_client.rest import ApiException  from fixtures import * diff --git a/python/tests/web_citation_csl.py b/python/tests/web_citation_csl.py index e016b2d9..fb3ce58d 100644 --- a/python/tests/web_citation_csl.py +++ b/python/tests/web_citation_csl.py @@ -1,8 +1,6 @@  import json -import tempfile  import pytest -from fatcat_openapi_client.rest import ApiException  from fixtures import * diff --git a/python/tests/web_editgroup.py b/python/tests/web_editgroup.py index cbdd2176..20dc8d93 100644 --- a/python/tests/web_editgroup.py +++ b/python/tests/web_editgroup.py @@ -1,9 +1,7 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException  from fixtures import * +  def test_editgroup_basics(app):      rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaae') @@ -59,4 +57,3 @@ def test_editgroup_annotations_login(app_admin):      assert rv.status_code == 200      assert b'Signup' not in rv.data      assert b'Add Comment' in rv.data - diff --git a/python/tests/web_editing.py b/python/tests/web_editing.py index 773a59dd..17f4f5ae 100644 --- a/python/tests/web_editing.py +++ b/python/tests/web_editing.py @@ -1,7 +1,4 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -147,4 +144,3 @@ def test_web_edit_get(app_admin):      rv = app_admin.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/edit')      assert rv.status_code == 200      assert b'1549-1277' in rv.data - diff --git a/python/tests/web_editor.py b/python/tests/web_editor.py index 2614be96..58b21ddf 100644 --- a/python/tests/web_editor.py +++ b/python/tests/web_editor.py @@ -1,7 +1,4 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -25,4 +22,3 @@ def test_change_username(app_admin):      assert rv.status_code == 200      rv = app_admin.get('/auth/account')      assert b'admin-tmp' not in rv.data - diff --git a/python/tests/web_entity_views.py b/python/tests/web_entity_views.py index a3f0f897..c1cbdc29 100644 --- a/python/tests/web_entity_views.py +++ b/python/tests/web_entity_views.py @@ -1,7 +1,4 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException  from fixtures import *  from fatcat_web.forms import ReleaseEntityForm, FileEntityForm, ContainerEntityForm @@ -367,4 +364,3 @@ def test_web_work(app):      assert rv.status_code == 404      rv = app.get('/work/create')      assert rv.status_code == 404 - diff --git a/python/tests/web_routes.py b/python/tests/web_routes.py index 026776ee..0edf06d1 100644 --- a/python/tests/web_routes.py +++ b/python/tests/web_routes.py @@ -1,7 +1,4 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException  from fixtures import * @@ -13,4 +10,3 @@ def test_static_routes(app):      assert app.get("/search").status_code == 302      assert app.get("/static/bogus/route").status_code == 404 - diff --git a/python/tests/web_search.py b/python/tests/web_search.py index 24b817dc..7647bcf5 100644 --- a/python/tests/web_search.py +++ b/python/tests/web_search.py @@ -1,8 +1,7 @@  import json -import pytest  import responses -from fatcat_openapi_client.rest import ApiException +  from fixtures import *  @responses.activate | 
