diff options
94 files changed, 152 insertions, 351 deletions
diff --git a/python/.flake8 b/python/.flake8 index 49935ced..34f6131c 100644 --- a/python/.flake8 +++ b/python/.flake8 @@ -1,7 +1,13 @@ [flake8] # TODO: ANN for better annotation coverage select = C,E,F,W -ignore = F405,F403,W503,E231,E203,E501,E226,E711,E713,E265,ANN101,ANN204,ANN102 -max-complexity = 20 +# The ignores starting with "E251" should be removed after using 'black' +ignore = F405,F403,W503,E231,E203,E501,E226,E711,E713,E265,ANN101,ANN204,ANN102,E251,E128,E302,E261,E241,E201,E202,E266,E124,E305,E225,W504,E123,E122,E125,E121,E129,E126,E712,W191,E101 +# TODO: should reduce max-complexity +max-complexity = 50 exclude = .git,__pycache__,.venv max-line-length = 120 +per-file-ignores = + */__init__.py: F401 + tests/*.py: F401,F811 + tests/transform_csl.py: W291 diff --git a/python/fatcat_cleanup.py b/python/fatcat_cleanup.py index d8b2aea2..4e11139e 100755 --- a/python/fatcat_cleanup.py +++ b/python/fatcat_cleanup.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 -import os, sys, argparse +import os +import sys +import argparse import raven from fatcat_tools import authenticated_api diff --git a/python/fatcat_export.py b/python/fatcat_export.py index 5419e46c..763c217e 100755 --- a/python/fatcat_export.py +++ b/python/fatcat_export.py @@ -11,11 +11,7 @@ import sys import json import argparse -import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException -from fatcat_openapi_client import ReleaseEntity, ContainerEntity, ChangelogEntry -from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \ - public_api +from fatcat_tools import uuid2fcid, entity_to_dict, public_api def run_export_releases(args): diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 331cf791..252ab3a5 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 -import os, sys, argparse +import os +import sys +import argparse import raven from fatcat_tools import authenticated_api diff --git a/python/fatcat_review.py b/python/fatcat_review.py index 1d1db9a5..a10fc34b 100755 --- a/python/fatcat_review.py +++ b/python/fatcat_review.py @@ -2,11 +2,10 @@ import sys import argparse -import datetime import raven from fatcat_tools import authenticated_api -from fatcat_tools.reviewers import DummyReviewBot, ReviewBot +from fatcat_tools.reviewers import DummyReviewBot # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable sentry_client = raven.Client() diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py index add03399..13310120 100644 --- a/python/fatcat_tools/api_auth.py +++ b/python/fatcat_tools/api_auth.py @@ -1,7 +1,7 @@ -import os, sys +import os +import sys import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException def public_api(host_uri): diff --git a/python/fatcat_tools/cleanups/common.py b/python/fatcat_tools/cleanups/common.py index 47607cf1..04e6ade4 100644 --- a/python/fatcat_tools/cleanups/common.py +++ b/python/fatcat_tools/cleanups/common.py @@ -5,7 +5,6 @@ import subprocess from collections import Counter from fatcat_openapi_client import ApiClient, Editgroup -from fatcat_openapi_client.rest import ApiException from fatcat_tools.transforms import entity_from_dict, entity_to_dict diff --git a/python/fatcat_tools/cleanups/files.py b/python/fatcat_tools/cleanups/files.py index ec7e9064..a40e4a28 100644 --- a/python/fatcat_tools/cleanups/files.py +++ b/python/fatcat_tools/cleanups/files.py @@ -1,7 +1,6 @@ from fatcat_openapi_client.rest import ApiException from fatcat_openapi_client.models import FileEntity -from fatcat_tools.transforms import entity_to_dict, entity_from_json from .common import EntityCleaner @@ -70,4 +69,3 @@ class FileCleaner(EntityCleaner): self.api.update_file(self.get_editgroup_id(), entity.ident, entity) return 1 - diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 37628f09..2554fe96 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -1,16 +1,10 @@ -import re import sys -import csv import json import time -import itertools -import datetime -import requests from confluent_kafka import Producer, KafkaException from urllib.parse import urlparse, parse_qs -from fatcat_tools.workers import most_recent_message from .harvest_common import HarvestState, requests_retry_session @@ -64,7 +58,6 @@ class HarvestCrossrefWorker: to be careful how state is serialized back into kafka. """ - def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email, api_host_url="https://api.crossref.org/works", start_date=None, end_date=None): diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py index 27ab8b4a..bdae3054 100644 --- a/python/fatcat_tools/harvest/harvest_common.py +++ b/python/fatcat_tools/harvest/harvest_common.py @@ -1,15 +1,13 @@ import sys import json -import time import datetime import requests from requests.adapters import HTTPAdapter # unclear why pylint chokes on this import. Recent 'requests' and 'urllib3' are # in Pipenv.lock, and there are no errors in QA from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error -from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException, \ - OFFSET_BEGINNING +from confluent_kafka import Producer, Consumer, TopicPartition, KafkaException # Used for parsing ISO date format (YYYY-MM-DD) @@ -130,9 +128,11 @@ class HarvestState: }).encode('utf-8') if kafka_topic: assert(kafka_config) + def fail_fast(err, msg): if err: raise KafkaException(err) + print("Committing status to Kafka: {}".format(kafka_topic), file=sys.stderr) producer_conf = kafka_config.copy() producer_conf.update({ @@ -159,9 +159,11 @@ class HarvestState: return print("Fetching state from kafka topic: {}".format(kafka_topic), file=sys.stderr) + def fail_fast(err, msg): if err: raise KafkaException(err) + conf = kafka_config.copy() conf.update({ 'group.id': 'dummy_init_group', # should never be committed diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py index d30f9507..a7dc3d8c 100644 --- a/python/fatcat_tools/harvest/oaipmh.py +++ b/python/fatcat_tools/harvest/oaipmh.py @@ -1,16 +1,9 @@ -import re import sys -import csv -import json import time -import itertools -import datetime -import requests import sickle from confluent_kafka import Producer, KafkaException -from fatcat_tools.workers import most_recent_message from .harvest_common import HarvestState @@ -31,7 +24,6 @@ class HarvestOaiPmhWorker: would want something similar operationally. Oh well! """ - def __init__(self, kafka_hosts, produce_topic, state_topic, start_date=None, end_date=None): diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py index f6301b8d..802d31d8 100644 --- a/python/fatcat_tools/harvest/pubmed.py +++ b/python/fatcat_tools/harvest/pubmed.py @@ -19,7 +19,7 @@ import tempfile import time import xml.etree.ElementTree as ET from ftplib import FTP -from urllib.parse import urljoin, urlparse +from urllib.parse import urlparse import dateparser from bs4 import BeautifulSoup diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index c71b33e9..47a8c4da 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -1,10 +1,6 @@ -import sys -import json -import sqlite3 -import itertools import fatcat_openapi_client -from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex +from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex ARABESQUE_MATCH_WHERE_CLAUSE='WHERE hit = 1 AND identifier IS NOT NULL' @@ -186,4 +182,3 @@ class ArabesqueMatchImporter(EntityImporter): description=self.editgroup_description, extra=self.editgroup_extra), entity_list=batch)) - diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py index 719592fc..43325ebc 100644 --- a/python/fatcat_tools/importers/arxiv.py +++ b/python/fatcat_tools/importers/arxiv.py @@ -7,7 +7,7 @@ from bs4 import BeautifulSoup from pylatexenc.latex2text import LatexNodes2Text import fatcat_openapi_client -from .common import EntityImporter, clean +from .common import EntityImporter from .crossref import lookup_license_slug @@ -97,7 +97,6 @@ class ArxivRawImporter(EntityImporter): **kwargs) self._test_override = False - def parse_record(self, record): if not record: @@ -188,7 +187,6 @@ class ArxivRawImporter(EntityImporter): if lang == 'en': lang = None - # extra: # withdrawn_date # translation_of @@ -244,7 +242,7 @@ class ArxivRawImporter(EntityImporter): For each version, do a lookup by full arxiv_id, and store work/release id results. - + If a version has a DOI, also do a doi lookup and store that result. If there is an existing release with both matching, set that as the existing work. If they don't match, use the full arxiv_id match and @@ -345,6 +343,7 @@ class ArxivRawImporter(EntityImporter): print(json.dumps(resp)) #sys.exit(-1) -if __name__=='__main__': + +if __name__ == '__main__': parser = ArxivRawImporter(None) parser.parse_file(open(sys.argv[1])) diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index 536c013b..36a2f9a6 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -82,7 +82,7 @@ def cdl_dash_release(meta, extra=None): #print(abstracts) if not abstracts: abstracts = None - + contribs = [] for creator in meta['creator']: contribs.append(ReleaseContrib( @@ -120,7 +120,7 @@ def make_release_fileset(dat_path): with open(dat_path + "/cdl_dash_metadata.json", 'r') as fp: meta_dict = json.loads(fp.read()) - + release = cdl_dash_release(meta_dict) ark_id = release.extra['ark_id'] diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py index 375b6051..d5d1cce8 100644 --- a/python/fatcat_tools/importers/chocula.py +++ b/python/fatcat_tools/importers/chocula.py @@ -1,7 +1,4 @@ -import sys -import json -import itertools import fatcat_openapi_client from .common import EntityImporter, clean diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index eafc6546..c0578224 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -161,18 +161,18 @@ def is_cjk(s): return False def test_is_cjk(): - assert is_cjk(None) == False - assert is_cjk('') == False - assert is_cjk('blah') == False - assert is_cjk('岡, 鹿, 梨, 阜, 埼') == True - assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') == True - assert is_cjk('菊') == True - assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') == True - assert is_cjk('水道') == True - assert is_cjk('オウ, イク') == True # kanji - assert is_cjk('ひヒ') == True - assert is_cjk('き゚ゅ') == True - assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') == True + assert is_cjk(None) is False + assert is_cjk('') is False + assert is_cjk('blah') is False + assert is_cjk('岡, 鹿, 梨, 阜, 埼') is True + assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') is True + assert is_cjk('菊') is True + assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') is True + assert is_cjk('水道') is True + assert is_cjk('オウ, イク') is True # kanji + assert is_cjk('ひヒ') is True + assert is_cjk('き゚ゅ') is True + assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') is True DOMAIN_REL_MAP = { "archive.org": "archive", @@ -368,7 +368,7 @@ class EntityImporter: if self._entity_queue: self.insert_batch(self._entity_queue) self.counts['insert'] += len(self._entity_queue) - self._entity_queue = [] + self._entity_queue = [] return self.counts diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index d26f089f..854e3d9f 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -1,10 +1,6 @@ -import sys -import json import sqlite3 import datetime -import itertools -import subprocess import fatcat_openapi_client from .common import EntityImporter, clean @@ -425,7 +421,6 @@ class CrossrefImporter(EntityImporter): release_year = raw_date[0] release_date = None - original_title = None if obj.get('original-title'): original_title = clean(obj.get('original-title')[0], force_xml=True) @@ -500,7 +495,7 @@ class CrossrefImporter(EntityImporter): if existing: self.counts['exists'] += 1 return False - + return True def insert_batch(self, batch): @@ -509,4 +504,3 @@ class CrossrefImporter(EntityImporter): description=self.editgroup_description, extra=self.editgroup_extra), entity_list=batch)) - diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 434a2941..08c85b30 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -10,7 +10,6 @@ functions (parse_datacite_...), which may help testing. import collections import datetime -import hashlib import re import json import sqlite3 @@ -292,7 +291,6 @@ class DataciteImporter(EntityImporter): print('[{}] skipping non-ascii doi for now'.format(doi)) return None - creators = attributes.get('creators', []) or [] contributors = attributes.get('contributors', []) or [] # Much fewer than creators. diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 2077eae4..5ec6cc3c 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -1,9 +1,7 @@ #!/usr/bin/env python3 -import sys import json import base64 -import datetime import fatcat_openapi_client from .common import EntityImporter, clean, make_rel_url diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 2b630e67..4b1d3702 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -1,10 +1,6 @@ -import sys -import json -import base64 -import itertools import fatcat_openapi_client -from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS, b32_hex +from .common import EntityImporter, make_rel_url class IngestFileResultImporter(EntityImporter): @@ -284,4 +280,3 @@ class SavePaperNowFileImporter(IngestFileResultImporter): description=self.editgroup_description, extra=self.editgroup_extra), entity_list=batch)) - diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index e30bb233..38aa00eb 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -1,10 +1,7 @@ import sys -import json import sqlite3 import datetime -import itertools -import subprocess from bs4 import BeautifulSoup import fatcat_openapi_client diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py index d439c80a..32782eac 100644 --- a/python/fatcat_tools/importers/journal_metadata.py +++ b/python/fatcat_tools/importers/journal_metadata.py @@ -1,7 +1,4 @@ -import sys -import json -import itertools import fatcat_openapi_client from .common import EntityImporter, clean diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py index 96dbf947..5d35f5e2 100644 --- a/python/fatcat_tools/importers/jstor.py +++ b/python/fatcat_tools/importers/jstor.py @@ -183,7 +183,7 @@ class JstorImporter(EntityImporter): # suspect jan 1st dates get set by JSTOR when actual # date not known (citation needed), so drop them release_date = None - + volume = None if article_meta.volume: volume = article_meta.volume.string or None diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 180d7ba3..d95c5847 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -1,12 +1,8 @@ -import sys -import json -import sqlite3 -import itertools import fatcat_openapi_client from fatcat_tools.normal import * -from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS +from .common import EntityImporter, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS class MatchedImporter(EntityImporter): @@ -160,7 +156,6 @@ class MatchedImporter(EntityImporter): self.counts['skip-update-inflight'] += 1 return False - # minimum viable "existing" URL cleanup to fix dupes and broken links: # remove 'None' wayback URLs, and set archive.org rel 'archive' existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)] @@ -207,4 +202,3 @@ class MatchedImporter(EntityImporter): description=self.editgroup_description, extra=self.editgroup_extra), entity_list=batch)) - diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index 554e052f..21feea9e 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -1,7 +1,5 @@ import sys -import json -import itertools import fatcat_openapi_client from .common import EntityImporter, clean @@ -89,7 +87,7 @@ class OrcidImporter(EntityImporter): if existing: self.counts['exists'] += 1 return False - + return True def insert_batch(self, batch): diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index 3d3e3a8c..d8a6842c 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -1,11 +1,9 @@ import sys import json -import sqlite3 import datetime import warnings from bs4 import BeautifulSoup -from bs4.element import NavigableString import fatcat_openapi_client from fatcat_tools.normal import * @@ -314,7 +312,7 @@ class PubmedImporter(EntityImporter): Importer for PubMed/MEDLINE XML metadata. If lookup_refs is true, will do identifer-based lookups for all references. - + TODO: MEDLINE doesn't include PMC/OA license; could include in importer? """ @@ -502,7 +500,7 @@ class PubmedImporter(EntityImporter): ce_edit = self.create_container(ce) container_id = ce_edit.ident self._issnl_id_map[issnl] = container_id - + ji = journal.JournalIssue volume = None if ji.find("Volume"): diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py index 4cd22775..c04e9aa8 100644 --- a/python/fatcat_tools/importers/shadow.py +++ b/python/fatcat_tools/importers/shadow.py @@ -1,8 +1,4 @@ -import sys -import json -import sqlite3 -import itertools import fatcat_openapi_client from fatcat_tools.normal import * @@ -192,4 +188,3 @@ class ShadowLibraryImporter(EntityImporter): description=self.editgroup_description, extra=self.editgroup_extra), entity_list=batch)) - diff --git a/python/fatcat_tools/kafka.py b/python/fatcat_tools/kafka.py index 53b62a37..228de134 100644 --- a/python/fatcat_tools/kafka.py +++ b/python/fatcat_tools/kafka.py @@ -1,5 +1,5 @@ -from confluent_kafka import Consumer, Producer, KafkaException +from confluent_kafka import Producer, KafkaException def kafka_fail_fast(err, msg): diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py index f962ff3c..e65af8d6 100644 --- a/python/fatcat_tools/normal.py +++ b/python/fatcat_tools/normal.py @@ -231,4 +231,3 @@ def test_clean_orcid(): assert clean_orcid("0123-4567-3456-6789 ") == "0123-4567-3456-6789" assert clean_orcid("01234567-3456-6780") == None assert clean_orcid("0x23-4567-3456-6780") == None - diff --git a/python/fatcat_tools/reviewers/review_common.py b/python/fatcat_tools/reviewers/review_common.py index 994cec56..336a47f6 100644 --- a/python/fatcat_tools/reviewers/review_common.py +++ b/python/fatcat_tools/reviewers/review_common.py @@ -1,12 +1,10 @@ -import json import time import datetime import subprocess from collections import Counter import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException """ checks should return: diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py index 832ad6aa..ba199efb 100644 --- a/python/fatcat_tools/transforms/csl.py +++ b/python/fatcat_tools/transforms/csl.py @@ -1,6 +1,5 @@ import json -import collections from citeproc import CitationStylesStyle, CitationStylesBibliography from citeproc import Citation, CitationItem @@ -8,8 +7,6 @@ from citeproc import formatter from citeproc.source.json import CiteProcJSON from citeproc_styles import get_style_filepath -from fatcat_openapi_client import ApiClient - def contribs_by_role(contribs, role): ret = [c.copy() for c in contribs if c['role'] == role] @@ -214,14 +211,13 @@ def citeproc_csl(csl_json, style, html=False): lines = bib.bibliography()[0] if style == "bibtex": out = "" - for l in lines: - if l.startswith(" @"): + for line in lines: + if line.startswith(" @"): out += "@" - elif l.startswith(" "): - out += "\n " + l + elif line.startswith(" "): + out += "\n " + line else: - out += l + out += line return ''.join(out) else: return ''.join(lines) - diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 1d35141b..8ec9c164 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -1,7 +1,5 @@ -import collections import tldextract -from fatcat_openapi_client import ApiClient def check_kbart(year, archive): @@ -14,11 +12,11 @@ def check_kbart(year, archive): def test_check_kbart(): - assert check_kbart(1990, dict()) == None - assert check_kbart(1990, dict(year_spans=[[2000, 2000]])) == False - assert check_kbart(2000, dict(year_spans=[[2000, 2000]])) == True - assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) == False - assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) == True + assert check_kbart(1990, dict()) is None + assert check_kbart(1990, dict(year_spans=[[2000, 2000]])) is False + assert check_kbart(2000, dict(year_spans=[[2000, 2000]])) is True + assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1990, 2000]])) is False + assert check_kbart(1950, dict(year_spans=[[1900, 1920], [1930, 2000]])) is True def release_to_elasticsearch(entity, force_bool=True): diff --git a/python/fatcat_tools/transforms/entities.py b/python/fatcat_tools/transforms/entities.py index ae666413..53455e85 100644 --- a/python/fatcat_tools/transforms/entities.py +++ b/python/fatcat_tools/transforms/entities.py @@ -32,4 +32,3 @@ def entity_from_json(json_str, entity_type, api_client=None): def entity_from_dict(obj, entity_type, api_client=None): json_str = json.dumps(obj) return entity_from_json(json_str, entity_type, api_client=api_client) - diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py index 22b5154e..2f4e2271 100644 --- a/python/fatcat_tools/transforms/ingest.py +++ b/python/fatcat_tools/transforms/ingest.py @@ -61,4 +61,3 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type= ingest_request['link_source_id'] = link_source_id return ingest_request - diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 3a49f86e..d5891ad1 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -354,4 +354,3 @@ class EntityUpdatesWorker(FatcatWorker): producer.flush() # TODO: publish updated 'work' entities to a topic consumer.store_offsets(message=msg) - diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py index e58b3da1..61854c31 100644 --- a/python/fatcat_tools/workers/elasticsearch.py +++ b/python/fatcat_tools/workers/elasticsearch.py @@ -1,6 +1,5 @@ import json -import time import requests from confluent_kafka import Consumer, KafkaException @@ -138,7 +137,6 @@ class ElasticsearchReleaseWorker(FatcatWorker): consumer.store_offsets(message=msg) - class ElasticsearchContainerWorker(ElasticsearchReleaseWorker): def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None, diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py index ef79f528..8c2936be 100644 --- a/python/fatcat_tools/workers/worker_common.py +++ b/python/fatcat_tools/workers/worker_common.py @@ -1,15 +1,6 @@ -import re -import sys -import csv -import json -import itertools -from itertools import islice from confluent_kafka import Consumer, KafkaException, TopicPartition -import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException - def most_recent_message(topic, kafka_config): """ diff --git a/python/fatcat_transform.py b/python/fatcat_transform.py index 14595670..8e01c860 100755 --- a/python/fatcat_transform.py +++ b/python/fatcat_transform.py @@ -9,16 +9,8 @@ import sys import json import argparse -from citeproc import CitationStylesStyle, CitationStylesBibliography -from citeproc import Citation, CitationItem -from citeproc import formatter -from citeproc.source.json import CiteProcJSON -from citeproc_styles import get_style_filepath - -import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException from fatcat_openapi_client import ReleaseEntity, ContainerEntity, FileEntity, ChangelogEntry -from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \ +from fatcat_tools import entity_from_json, \ release_to_elasticsearch, container_to_elasticsearch, \ file_to_elasticsearch, changelog_to_elasticsearch, public_api, \ release_to_csl, citeproc_csl diff --git a/python/fatcat_util.py b/python/fatcat_util.py index d6e76697..a45b2ba4 100755 --- a/python/fatcat_util.py +++ b/python/fatcat_util.py @@ -8,14 +8,9 @@ TODO: """ import sys -import json import argparse -import fatcat_openapi_client -from fatcat_openapi_client.rest import ApiException -from fatcat_openapi_client import ReleaseEntity, ContainerEntity, ChangelogEntry -from fatcat_tools import uuid2fcid, fcid2uuid, entity_from_json, \ - entity_to_dict, public_api, authenticated_api +from fatcat_tools import uuid2fcid, fcid2uuid, authenticated_api def run_uuid2fcid(args): diff --git a/python/fatcat_web/auth.py b/python/fatcat_web/auth.py index 8e26b7fe..268bc1c4 100644 --- a/python/fatcat_web/auth.py +++ b/python/fatcat_web/auth.py @@ -2,8 +2,7 @@ from collections import namedtuple import requests import pymacaroons -from flask import Flask, render_template, send_from_directory, request, \ - url_for, abort, g, redirect, jsonify, session, flash +from flask import render_template, abort, redirect, session, flash from flask_login import logout_user, login_user, UserMixin from fatcat_web import login_manager, app, api, priv_api, Config import fatcat_openapi_client diff --git a/python/fatcat_web/editing_routes.py b/python/fatcat_web/editing_routes.py index 87223868..44000b1a 100644 --- a/python/fatcat_web/editing_routes.py +++ b/python/fatcat_web/editing_routes.py @@ -1,16 +1,11 @@ -import os -import json -from flask import Flask, render_template, send_from_directory, request, \ - url_for, abort, g, redirect, jsonify, session, flash, Response +from flask import render_template, abort, redirect, session, flash from flask_login import login_required from fatcat_openapi_client import Editgroup from fatcat_openapi_client.rest import ApiException from fatcat_tools.transforms import * -from fatcat_web import app, api, auth_api, priv_api -from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth -from fatcat_web.cors import crossdomain +from fatcat_web import app, api, auth_api from fatcat_web.search import * from fatcat_web.forms import * from fatcat_web.entity_helpers import * @@ -20,7 +15,7 @@ from fatcat_web.entity_helpers import * def form_editgroup_get_or_create(api, edit_form): """ - This function expects a submitted, validated + This function expects a submitted, validated edit form """ if edit_form.editgroup_id.data: try: @@ -43,8 +38,10 @@ def form_editgroup_get_or_create(api, edit_form): app.log.warning(ae) abort(ae.status) # set this session editgroup_id - flash('Started new editgroup <a href="/editgroup/{}">{}</a>' \ - .format(eg.editgroup_id, eg.editgroup_id)) + flash('Started new editgroup <a href="/editgroup/{}">{}</a>'.format( + eg.editgroup_id, + eg.editgroup_id, + )) return eg def generic_entity_edit(editgroup_id, entity_type, existing_ident, edit_template): @@ -68,7 +65,7 @@ def generic_entity_edit(editgroup_id, entity_type, existing_ident, edit_template Helpers: - get_editgroup_revision(editgroup, entity_type, ident) -> None or entity - + TODO: prev_rev interlock """ @@ -214,7 +211,7 @@ def generic_edit_delete(editgroup_id, entity_type, edit_id): # API on behalf of user user_api = auth_api(session['api_token']) - + # do the deletion try: if entity_type == 'container': @@ -358,4 +355,3 @@ def work_editgroup_edit(editgroup_id, ident): @app.route('/editgroup/<editgroup_id>/work/edit/<edit_id>/delete', methods=['POST']) def work_edit_delete(editgroup_id, edit_id): return abort(404) - diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index 377e35aa..15585bf6 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -8,7 +8,7 @@ from flask_wtf import FlaskForm from wtforms import SelectField, DateField, StringField, IntegerField, \ HiddenField, FormField, FieldList, validators -from fatcat_openapi_client import ContainerEntity, CreatorEntity, FileEntity, \ +from fatcat_openapi_client import ContainerEntity, FileEntity, \ ReleaseEntity, ReleaseContrib, FileUrl, ReleaseExtIds release_type_options = [ @@ -293,9 +293,9 @@ class FileUrlForm(FlaskForm): default='web') class FileEntityForm(EntityEditForm): + # TODO: positive definite size = IntegerField('Size (bytes)', [validators.DataRequired()]) - # TODO: positive definite md5 = StringField("MD5", [validators.Optional(True), validators.Length(min=32, max=32)]) @@ -413,4 +413,3 @@ class SavePaperNowForm(FlaskForm): ingest_request['link_source'] = 'arxiv' ingest_request['link_source_id'] = release.ext_ids.arxiv return ingest_request - diff --git a/python/fatcat_web/graphics.py b/python/fatcat_web/graphics.py index fea7eb5a..56852627 100644 --- a/python/fatcat_web/graphics.py +++ b/python/fatcat_web/graphics.py @@ -33,4 +33,3 @@ def ia_coverage_histogram(rows): chart.add('via Fatcat', [y['available'] for y in years]) chart.add('Missing', [y['missing'] for y in years]) return chart - diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 58f4b7e0..4684f799 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -2,12 +2,12 @@ import os import sys import json -from flask import Flask, render_template, make_response, send_from_directory, \ - request, url_for, abort, g, redirect, jsonify, session, flash, Response +from flask import render_template, make_response, send_from_directory, \ + request, url_for, abort, redirect, jsonify, session, flash, Response from flask_login import login_required from flask_wtf.csrf import CSRFError -from fatcat_openapi_client import Editgroup, EditgroupAnnotation +from fatcat_openapi_client import EditgroupAnnotation from fatcat_openapi_client.rest import ApiException from fatcat_tools.transforms import * from fatcat_tools.normal import * @@ -1042,4 +1042,3 @@ def robots(): return send_from_directory(os.path.join(app.root_path, 'static'), 'robots.txt', mimetype='text/plain') - diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index c1246d22..4a87c735 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -66,7 +66,6 @@ def do_release_search(q, limit=30, fulltext_only=True, offset=0): if len(q.split()) == 1 and q.startswith("10.") and q.count("/") >= 1: q = 'doi:"{}"'.format(q) - if fulltext_only: q += " in_web:true" @@ -297,7 +296,7 @@ def get_elastic_container_random_releases(ident, limit=5): def get_elastic_container_histogram(ident): """ - Fetches a stacked histogram of + Fetches a stacked histogram Filters to the past 500 years (at most), or about 1000 values. diff --git a/python/fatcat_web/web_config.py b/python/fatcat_web/web_config.py index 0cb153d6..344f1c2a 100644 --- a/python/fatcat_web/web_config.py +++ b/python/fatcat_web/web_config.py @@ -83,4 +83,3 @@ class Config(object): 'fatcat_domain': FATCAT_DOMAIN, }, } - diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py index 03167a3a..19ac16cd 100755 --- a/python/fatcat_worker.py +++ b/python/fatcat_worker.py @@ -2,7 +2,6 @@ import sys import argparse -import datetime import raven from fatcat_tools import public_api diff --git a/python/shell.py b/python/shell.py index c207a325..d53911b9 100644 --- a/python/shell.py +++ b/python/shell.py @@ -1,3 +1,4 @@ +# flake8: noqa # bunch of libraries one might want import uuid diff --git a/python/tests/api_annotations.py b/python/tests/api_annotations.py index e5566eef..0606b637 100644 --- a/python/tests/api_annotations.py +++ b/python/tests/api_annotations.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy - from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * diff --git a/python/tests/api_containers.py b/python/tests/api_containers.py index 0850fab7..70dbcd7e 100644 --- a/python/tests/api_containers.py +++ b/python/tests/api_containers.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy - from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -43,7 +38,7 @@ def test_container(api): # get redirects (none) assert api.get_container_redirects(c2.ident) == [] - + # delete eg = quick_eg(api) api.delete_container(eg.editgroup_id, c2.ident) @@ -59,4 +54,3 @@ def test_container_examples(api): c2 = api.lookup_container(issnl=c1.issnl) assert c1.ident == c2.ident - diff --git a/python/tests/api_creators.py b/python/tests/api_creators.py index 1ce6380a..b271e2b3 100644 --- a/python/tests/api_creators.py +++ b/python/tests/api_creators.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy - from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -52,7 +47,7 @@ def test_creators(api): assert c1.display_name == c3.display_name assert c1.extra == c3.extra - + # delete eg = quick_eg(api) api.delete_creator(eg.editgroup_id, c2.ident) diff --git a/python/tests/api_editgroups.py b/python/tests/api_editgroups.py index d82c9233..142687c2 100644 --- a/python/tests/api_editgroups.py +++ b/python/tests/api_editgroups.py @@ -1,11 +1,8 @@ -import json import pytest import datetime -from copy import copy from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * diff --git a/python/tests/api_editor.py b/python/tests/api_editor.py index 64bb2759..91881743 100644 --- a/python/tests/api_editor.py +++ b/python/tests/api_editor.py @@ -1,12 +1,5 @@ -import json -import pytest -import datetime -from copy import copy - -from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException -from fixtures import * +from fixtures import api def test_editor_update(api): diff --git a/python/tests/api_entity_editing.py b/python/tests/api_entity_editing.py index d5377e18..fee4e34f 100644 --- a/python/tests/api_entity_editing.py +++ b/python/tests/api_entity_editing.py @@ -1,10 +1,7 @@ -import json import pytest -from copy import copy from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -180,4 +177,3 @@ def test_edit_delete_all(api_dummy_entities): assert len(eg.edits.webcaptures) == 0 assert len(eg.edits.releases) == 0 assert len(eg.edits.works) == 0 - diff --git a/python/tests/api_files.py b/python/tests/api_files.py index 74865daa..65eda993 100644 --- a/python/tests/api_files.py +++ b/python/tests/api_files.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy - from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -50,7 +45,7 @@ def test_file(api): # get redirects (none) assert api.get_file_redirects(f2.ident) == [] - + # delete eg = quick_eg(api) api.delete_file(eg.editgroup_id, f2.ident) diff --git a/python/tests/api_filesets.py b/python/tests/api_filesets.py index 7f3235cb..6d755744 100644 --- a/python/tests/api_filesets.py +++ b/python/tests/api_filesets.py @@ -1,10 +1,7 @@ -import json import pytest -from copy import copy from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -64,7 +61,7 @@ def test_fileset(api): # get redirects (none) assert api.get_fileset_redirects(fs2.ident) == [] - + # delete eg = quick_eg(api) api.delete_fileset(eg.editgroup_id, fs2.ident) @@ -100,4 +97,3 @@ def test_bad_fileset(api): for b in bad_list: with pytest.raises(fatcat_openapi_client.rest.ApiException): api.create_fileset(eg.editgroup_id, b) - diff --git a/python/tests/api_misc.py b/python/tests/api_misc.py index 11f85fd6..4c9ac9a6 100644 --- a/python/tests/api_misc.py +++ b/python/tests/api_misc.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy - from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -45,4 +40,3 @@ def test_unexpected_body(api): ) f1.urls = [dict(url="http://thing", rel="repository", asdf="blue")] api.create_file(eg.editgroup_id, f1) - diff --git a/python/tests/api_releases.py b/python/tests/api_releases.py index 2df08698..c4c05ea6 100644 --- a/python/tests/api_releases.py +++ b/python/tests/api_releases.py @@ -1,11 +1,8 @@ -import json import pytest import datetime -from copy import copy from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -130,7 +127,7 @@ def test_release(api): # get redirects (none) assert api.get_release_redirects(r2.ident) == [] - + # delete eg = quick_eg(api) api.delete_release(eg.editgroup_id, r2.ident) @@ -210,4 +207,3 @@ def test_controlled_vocab(api): api.create_release(eg.editgroup_id, r3) r3.withdrawn_status = "spam" api.create_release(eg.editgroup_id, r3) - diff --git a/python/tests/api_webcaptures.py b/python/tests/api_webcaptures.py index 1054b41f..85813218 100644 --- a/python/tests/api_webcaptures.py +++ b/python/tests/api_webcaptures.py @@ -1,11 +1,8 @@ -import json import pytest import datetime -from copy import copy from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -84,7 +81,7 @@ def test_webcapture(api): # get redirects (none) assert api.get_webcapture_redirects(wc2.ident) == [] - + # delete eg = quick_eg(api) api.delete_webcapture(eg.editgroup_id, wc2.ident) diff --git a/python/tests/citation_efficiency.py b/python/tests/citation_efficiency.py index aefb7d15..f8807db6 100644 --- a/python/tests/citation_efficiency.py +++ b/python/tests/citation_efficiency.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy - from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -110,4 +105,3 @@ def test_citation_encoding(api): assert container == r1.refs[0].container_name assert extra == r1.refs[0].extra assert locator == r1.refs[0].locator - diff --git a/python/tests/clean_files.py b/python/tests/clean_files.py index 8a87f218..ce1102be 100644 --- a/python/tests/clean_files.py +++ b/python/tests/clean_files.py @@ -1,9 +1,10 @@ import copy import pytest + from fatcat_tools.cleanups import FileCleaner from fatcat_openapi_client import * -from fixtures import api +from fixtures import * @pytest.fixture(scope="function") diff --git a/python/tests/fixtures.py b/python/tests/fixtures.py index 78742114..44c7be63 100644 --- a/python/tests/fixtures.py +++ b/python/tests/fixtures.py @@ -1,8 +1,4 @@ -import os -import time -import json -import signal import pytest from dotenv import load_dotenv import fatcat_web @@ -87,4 +83,3 @@ def test_get_changelog_entry(api): def quick_eg(api_inst): eg = api_inst.create_editgroup(fatcat_openapi_client.Editgroup()) return eg - diff --git a/python/tests/harvest_crossref.py b/python/tests/harvest_crossref.py index e902cda5..cad0f03b 100644 --- a/python/tests/harvest_crossref.py +++ b/python/tests/harvest_crossref.py @@ -1,6 +1,5 @@ import json -import pytest import datetime import responses from fatcat_tools.harvest import * diff --git a/python/tests/harvest_datacite.py b/python/tests/harvest_datacite.py index 004d1fef..13c6042a 100644 --- a/python/tests/harvest_datacite.py +++ b/python/tests/harvest_datacite.py @@ -1,6 +1,5 @@ import json -import pytest import datetime import responses from fatcat_tools.harvest import * diff --git a/python/tests/harvest_pubmed.py b/python/tests/harvest_pubmed.py index f8db46b6..58bc4226 100644 --- a/python/tests/harvest_pubmed.py +++ b/python/tests/harvest_pubmed.py @@ -2,14 +2,11 @@ Test pubmed FTP harvest. """ -import datetime -import json import os - +import datetime import pytest from fatcat_tools.harvest import * -from fatcat_tools.harvest.pubmed import generate_date_file_map def test_pubmed_harvest_date(mocker): @@ -77,4 +74,3 @@ def test_pubmed_harvest_date_no_pmid(mocker): # The file has not PMID, not importable. with pytest.raises(ValueError): harvester.fetch_date(datetime.datetime.strptime(test_date, '%Y-%m-%d')) - diff --git a/python/tests/harvest_state.py b/python/tests/harvest_state.py index 8b7deba6..cc624d97 100644 --- a/python/tests/harvest_state.py +++ b/python/tests/harvest_state.py @@ -1,6 +1,4 @@ -import json -import pytest import datetime from fatcat_tools.harvest import * diff --git a/python/tests/import_arabesque.py b/python/tests/import_arabesque.py index 9483eb45..20cde3a6 100644 --- a/python/tests/import_arabesque.py +++ b/python/tests/import_arabesque.py @@ -1,8 +1,9 @@ import json import pytest + from fatcat_tools.importers import ArabesqueMatchImporter, SqlitePusher, JsonLinePusher -from fixtures import api +from fixtures import * @pytest.fixture(scope="function") diff --git a/python/tests/import_arxiv.py b/python/tests/import_arxiv.py index 1e649616..9306e67c 100644 --- a/python/tests/import_arxiv.py +++ b/python/tests/import_arxiv.py @@ -1,10 +1,10 @@ -import json, gzip import pytest -from fatcat_tools.importers import ArxivRawImporter, Bs4XmlFilePusher -from fixtures import api from bs4 import BeautifulSoup +from fatcat_tools.importers import ArxivRawImporter, Bs4XmlFilePusher +from fixtures import * + @pytest.fixture(scope="function") def arxiv_importer(api): diff --git a/python/tests/import_crossref.py b/python/tests/import_crossref.py index afa2410f..65cd2c37 100644 --- a/python/tests/import_crossref.py +++ b/python/tests/import_crossref.py @@ -1,8 +1,10 @@ -import json, gzip +import json +import gzip import pytest + from fatcat_tools.importers import CrossrefImporter, JsonLinePusher -from fixtures import api +from fixtures import * @pytest.fixture(scope="function") diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 20c1eaf8..8fb2d079 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -2,15 +2,17 @@ Test datacite importer. """ -import collections +import gzip +import json import datetime +import collections + import pytest -import gzip + from fatcat_tools.importers import DataciteImporter, JsonLinePusher -from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi, index_form_to_display_name, lookup_license_slug +from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, index_form_to_display_name, lookup_license_slug from fatcat_tools.transforms import entity_to_dict -from fixtures import api -import json +from fixtures import * @pytest.fixture(scope="function") @@ -295,7 +297,7 @@ def test_datacite_conversions(datacite_importer): re = datacite_importer.parse_record(json.load(f)) result = entity_to_dict(re) with open(dst, 'r') as f: - expected = json.loads(f.read()) + expected = json.loads(f.read()) assert result == expected, 'output mismatch in {}'.format(dst) @@ -340,20 +342,20 @@ def test_lookup_license_slug(): Case('http://onlinelibrary.wiley.com/termsAndConditions', 'WILEY'), Case('https://publikationen.bibliothek.kit.edu/kitopen-lizenz', 'KIT-OPEN'), Case('http://journals.sagepub.com/page/policies/text-and-data-mining-license', 'SAGE-TDM'), - Case('https://creativecommons.org/publicdomain/mark/1.0/deed.de', 'CC-PUBLICDOMAIN'), - Case('http://creativecommons.org/publicdomain/mark/1.0', 'CC-PUBLICDOMAIN'), - Case('https://creativecommons.org/publicdomain/mark/1.0', 'CC-PUBLICDOMAIN'), - Case('https://creativecommons.org/publicdomain/mark/1.0/', 'CC-PUBLICDOMAIN'), - Case('https://creativecommons.org/publicdomain/mark/1.0/deed.de', 'CC-PUBLICDOMAIN'), - Case('https://creativecommons.org/share-your-work/public-domain/cc0/', 'CC-0'), - Case('http://spdx.org/licenses/CC0-1.0.json', 'CC-0'), - Case('http://spdx.org/licenses/CC-BY-1.0.json', 'CC-BY'), - Case('http://spdx.org/licenses/CC-BY-4.0.json', 'CC-BY'), - Case('http://spdx.org/licenses/CC-BY-NC-4.0.json', 'CC-BY-NC'), - Case('http://spdx.org/licenses/CC-BY-SA-3.0.json', 'CC-BY-SA'), - Case('http://spdx.org/licenses/CC-BY-SA-4.0.json', 'CC-BY-SA'), - Case('http://spdx.org/licenses/MIT.json', 'MIT'), - Case('http://spdx.org/licenses/OGL-Canada-2.0.json', 'OGL-CANADA'), + Case('https://creativecommons.org/publicdomain/mark/1.0/deed.de', 'CC-PUBLICDOMAIN'), + Case('http://creativecommons.org/publicdomain/mark/1.0', 'CC-PUBLICDOMAIN'), + Case('https://creativecommons.org/publicdomain/mark/1.0', 'CC-PUBLICDOMAIN'), + Case('https://creativecommons.org/publicdomain/mark/1.0/', 'CC-PUBLICDOMAIN'), + Case('https://creativecommons.org/publicdomain/mark/1.0/deed.de', 'CC-PUBLICDOMAIN'), + Case('https://creativecommons.org/share-your-work/public-domain/cc0/', 'CC-0'), + Case('http://spdx.org/licenses/CC0-1.0.json', 'CC-0'), + Case('http://spdx.org/licenses/CC-BY-1.0.json', 'CC-BY'), + Case('http://spdx.org/licenses/CC-BY-4.0.json', 'CC-BY'), + Case('http://spdx.org/licenses/CC-BY-NC-4.0.json', 'CC-BY-NC'), + Case('http://spdx.org/licenses/CC-BY-SA-3.0.json', 'CC-BY-SA'), + Case('http://spdx.org/licenses/CC-BY-SA-4.0.json', 'CC-BY-SA'), + Case('http://spdx.org/licenses/MIT.json', 'MIT'), + Case('http://spdx.org/licenses/OGL-Canada-2.0.json', 'OGL-CANADA'), ] for c in cases: diff --git a/python/tests/import_grobid_metadata.py b/python/tests/import_grobid_metadata.py index 51ab3faa..52284b89 100644 --- a/python/tests/import_grobid_metadata.py +++ b/python/tests/import_grobid_metadata.py @@ -3,8 +3,9 @@ import os import json import base64 import pytest + from fatcat_tools.importers import GrobidMetadataImporter, LinePusher -from fixtures import api +from fixtures import * """ WARNING: these tests are currently very fragile because they have database diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py index 02486de6..ebe2923c 100644 --- a/python/tests/import_ingest.py +++ b/python/tests/import_ingest.py @@ -1,6 +1,7 @@ import json import pytest + from fatcat_tools.importers import IngestFileResultImporter, JsonLinePusher from fixtures import * diff --git a/python/tests/import_jalc.py b/python/tests/import_jalc.py index f61ec849..ff757e51 100644 --- a/python/tests/import_jalc.py +++ b/python/tests/import_jalc.py @@ -1,10 +1,10 @@ -import json, gzip import pytest -from fatcat_tools.importers import JalcImporter, Bs4XmlFilePusher, Bs4XmlLinesPusher -from fixtures import api from bs4 import BeautifulSoup +from fatcat_tools.importers import JalcImporter, Bs4XmlFilePusher, Bs4XmlLinesPusher +from fixtures import * + @pytest.fixture(scope="function") def jalc_importer(api): diff --git a/python/tests/import_journal_metadata.py b/python/tests/import_journal_metadata.py index cfeee517..51b0a78a 100644 --- a/python/tests/import_journal_metadata.py +++ b/python/tests/import_journal_metadata.py @@ -1,7 +1,8 @@ import pytest + from fatcat_tools.importers import JournalMetadataImporter, JsonLinePusher -from fixtures import api +from fixtures import * @pytest.fixture(scope="function") diff --git a/python/tests/import_jstor.py b/python/tests/import_jstor.py index 019f0aae..8494ffb2 100644 --- a/python/tests/import_jstor.py +++ b/python/tests/import_jstor.py @@ -1,10 +1,10 @@ -import json, gzip import pytest -from fatcat_tools.importers import JstorImporter, Bs4XmlFilePusher -from fixtures import api from bs4 import BeautifulSoup +from fatcat_tools.importers import JstorImporter, Bs4XmlFilePusher +from fixtures import * + @pytest.fixture(scope="function") def jstor_importer(api): diff --git a/python/tests/import_matched.py b/python/tests/import_matched.py index 72ed068c..6b61c53c 100644 --- a/python/tests/import_matched.py +++ b/python/tests/import_matched.py @@ -1,8 +1,9 @@ import json import pytest + from fatcat_tools.importers import MatchedImporter, JsonLinePusher -from fixtures import api +from fixtures import * @pytest.fixture(scope="function") diff --git a/python/tests/import_orcid.py b/python/tests/import_orcid.py index 57886b52..f78ccde7 100644 --- a/python/tests/import_orcid.py +++ b/python/tests/import_orcid.py @@ -1,8 +1,9 @@ import json import pytest + from fatcat_tools.importers import OrcidImporter, JsonLinePusher -from fixtures import api +from fixtures import * @pytest.fixture(scope="function") diff --git a/python/tests/import_pubmed.py b/python/tests/import_pubmed.py index f57aa273..201f533c 100644 --- a/python/tests/import_pubmed.py +++ b/python/tests/import_pubmed.py @@ -1,10 +1,10 @@ -import json, gzip import pytest -from fatcat_tools.importers import PubmedImporter, Bs4XmlLargeFilePusher -from fixtures import api from bs4 import BeautifulSoup +from fatcat_tools.importers import PubmedImporter, Bs4XmlLargeFilePusher +from fixtures import * + @pytest.fixture(scope="function") def pubmed_importer(api): @@ -137,4 +137,3 @@ def test_pubmed_xml_parse_refs(pubmed_importer): r1 = pubmed_importer.parse_record(soup.find_all("PubmedArticle")[0]) assert len(r1.refs) > 1 - diff --git a/python/tests/import_shadow.py b/python/tests/import_shadow.py index 70a918d2..40a1d589 100644 --- a/python/tests/import_shadow.py +++ b/python/tests/import_shadow.py @@ -1,8 +1,9 @@ import json import pytest + from fatcat_tools.importers import ShadowLibraryImporter, JsonLinePusher -from fixtures import api +from fixtures import * @pytest.fixture(scope="function") @@ -58,4 +59,3 @@ def test_shadow_dict_parse(shadow_importer): assert u.url.startswith("https://web.archive.org/") assert "20180729135948" in u.url assert len(f.release_ids) == 1 - diff --git a/python/tests/importer.py b/python/tests/importer.py index 9308ba84..a412b247 100644 --- a/python/tests/importer.py +++ b/python/tests/importer.py @@ -1,8 +1,6 @@ - -import pytest from fatcat_tools.importers import CrossrefImporter, OrcidImporter -from fixtures import api +from fixtures import * def test_issnl_mapping_lookup(api): @@ -32,4 +30,3 @@ def test_identifiers(api): assert oi.is_orcid("0000-00x3-3118-659") == False assert oi.is_orcid("0000-00033118-659") == False assert oi.is_orcid("0000-0003-3118-659.") == False - diff --git a/python/tests/subentity_state.py b/python/tests/subentity_state.py index 614f88f1..e03fa99e 100644 --- a/python/tests/subentity_state.py +++ b/python/tests/subentity_state.py @@ -1,10 +1,5 @@ -import json -import pytest -from copy import copy - from fatcat_openapi_client import * -from fatcat_openapi_client.rest import ApiException from fixtures import * """ @@ -221,4 +216,3 @@ def test_app_entity_states(api, app): assert rv.status_code == 200 rv = app.get('/work/{}'.format(r2.work_id)) assert rv.status_code == 302 - diff --git a/python/tests/tools_api.py b/python/tests/tools_api.py index fd26b8ee..a4b5f2ea 100644 --- a/python/tests/tools_api.py +++ b/python/tests/tools_api.py @@ -1,6 +1,5 @@ import pytest -from fatcat_openapi_client import EditgroupAnnotation from fatcat_openapi_client.rest import ApiException from fatcat_tools import public_api, authenticated_api diff --git a/python/tests/transform_csl.py b/python/tests/transform_csl.py index 15c64ce5..6436f876 100644 --- a/python/tests/transform_csl.py +++ b/python/tests/transform_csl.py @@ -1,11 +1,11 @@ import json import pytest + from fatcat_tools import * from fatcat_openapi_client import * - -from fixtures import api from import_crossref import crossref_importer +from fixtures import * def test_csl_crossref(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: diff --git a/python/tests/transform_elasticsearch.py b/python/tests/transform_elasticsearch.py index a954fc4d..f791562c 100644 --- a/python/tests/transform_elasticsearch.py +++ b/python/tests/transform_elasticsearch.py @@ -1,13 +1,13 @@ import json -import pytest + from fatcat_tools import * from fatcat_openapi_client import * -from fixtures import api from import_journal_metadata import journal_metadata_importer - from import_crossref import crossref_importer from import_matched import matched_importer +from fixtures import * + def test_basic_elasticsearch_convert(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: diff --git a/python/tests/transform_ingest.py b/python/tests/transform_ingest.py index 2d5652b8..c7044bc0 100644 --- a/python/tests/transform_ingest.py +++ b/python/tests/transform_ingest.py @@ -1,12 +1,12 @@ import json -import pytest + from fatcat_tools.transforms import release_ingest_request from fatcat_openapi_client import * from fixtures import api - from import_crossref import crossref_importer + def test_basic_ingest_release(crossref_importer): with open('tests/files/crossref-works.single.json', 'r') as f: # not a single line @@ -54,4 +54,3 @@ def test_rich_ingest_release(): assert ir['base_url'] == 'https://doi.org/10.123/456' assert ir['ext_ids']['doi'] == '10.123/456' assert ir['ext_ids'].get('pmcid') is None - diff --git a/python/tests/web_auth.py b/python/tests/web_auth.py index 2c545b6b..643d806e 100644 --- a/python/tests/web_auth.py +++ b/python/tests/web_auth.py @@ -1,8 +1,5 @@ -import json -import pytest import responses -from fatcat_openapi_client.rest import ApiException from fixtures import * diff --git a/python/tests/web_citation_csl.py b/python/tests/web_citation_csl.py index e016b2d9..fb3ce58d 100644 --- a/python/tests/web_citation_csl.py +++ b/python/tests/web_citation_csl.py @@ -1,8 +1,6 @@ import json -import tempfile import pytest -from fatcat_openapi_client.rest import ApiException from fixtures import * diff --git a/python/tests/web_editgroup.py b/python/tests/web_editgroup.py index cbdd2176..20dc8d93 100644 --- a/python/tests/web_editgroup.py +++ b/python/tests/web_editgroup.py @@ -1,9 +1,7 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException from fixtures import * + def test_editgroup_basics(app): rv = app.get('/editgroup/aaaaaaaaaaaabo53aaaaaaaaae') @@ -59,4 +57,3 @@ def test_editgroup_annotations_login(app_admin): assert rv.status_code == 200 assert b'Signup' not in rv.data assert b'Add Comment' in rv.data - diff --git a/python/tests/web_editing.py b/python/tests/web_editing.py index 773a59dd..17f4f5ae 100644 --- a/python/tests/web_editing.py +++ b/python/tests/web_editing.py @@ -1,7 +1,4 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -147,4 +144,3 @@ def test_web_edit_get(app_admin): rv = app_admin.get('/container/aaaaaaaaaaaaaeiraaaaaaaaam/edit') assert rv.status_code == 200 assert b'1549-1277' in rv.data - diff --git a/python/tests/web_editor.py b/python/tests/web_editor.py index 2614be96..58b21ddf 100644 --- a/python/tests/web_editor.py +++ b/python/tests/web_editor.py @@ -1,7 +1,4 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -25,4 +22,3 @@ def test_change_username(app_admin): assert rv.status_code == 200 rv = app_admin.get('/auth/account') assert b'admin-tmp' not in rv.data - diff --git a/python/tests/web_entity_views.py b/python/tests/web_entity_views.py index a3f0f897..c1cbdc29 100644 --- a/python/tests/web_entity_views.py +++ b/python/tests/web_entity_views.py @@ -1,7 +1,4 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException from fixtures import * from fatcat_web.forms import ReleaseEntityForm, FileEntityForm, ContainerEntityForm @@ -367,4 +364,3 @@ def test_web_work(app): assert rv.status_code == 404 rv = app.get('/work/create') assert rv.status_code == 404 - diff --git a/python/tests/web_routes.py b/python/tests/web_routes.py index 026776ee..0edf06d1 100644 --- a/python/tests/web_routes.py +++ b/python/tests/web_routes.py @@ -1,7 +1,4 @@ -import json -import pytest -from fatcat_openapi_client.rest import ApiException from fixtures import * @@ -13,4 +10,3 @@ def test_static_routes(app): assert app.get("/search").status_code == 302 assert app.get("/static/bogus/route").status_code == 404 - diff --git a/python/tests/web_search.py b/python/tests/web_search.py index 24b817dc..7647bcf5 100644 --- a/python/tests/web_search.py +++ b/python/tests/web_search.py @@ -1,8 +1,7 @@ import json -import pytest import responses -from fatcat_openapi_client.rest import ApiException + from fixtures import * @responses.activate |