diff options
-rw-r--r-- | python/Pipfile | 12 | ||||
-rw-r--r-- | python/Pipfile.lock | 51 | ||||
-rwxr-xr-x | python/fatcat_harvest.py | 24 | ||||
-rw-r--r-- | python/fatcat_tools/importers/common.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/normal.py | 10 | ||||
-rw-r--r-- | python/fatcat_web/forms.py | 2 | ||||
-rw-r--r-- | python/pytest.ini | 2 |
7 files changed, 39 insertions, 64 deletions
diff --git a/python/Pipfile b/python/Pipfile index 1bcb55cd..eb706815 100644 --- a/python/Pipfile +++ b/python/Pipfile @@ -11,7 +11,7 @@ name = "pypi" pytest = ">=5,<6.0.0" pytest-pythonpath = "*" pytest-pylint = "*" -ipython = "<7.0.0" +ipython = "*" responses = ">=0.10" pytest-cov = "*" pylint = "*" @@ -55,12 +55,8 @@ pathlib2 = "*" pycountry = "*" tldextract = "*" -# this is only to lock to a python3.5-compatible version. needed by an -# importlib-metadata, under pytest -zipp = "<2.0.0" - [requires] -# Python 3.5 is the bundled (system) version of python for Ubuntu 16.04 -# (Xenial), currently the default on Internet Archive production VMs. But we -# install Python 3.7 using an external package repository. +# We install Python 3.7 using a PPA (deadsnakes) on Internet Archive cluster +# machines, which as of Spring 2020 are still running Ubuntu Xenial (16.04), +# which has a system default of Python 3.5. python_version = "3.7" diff --git a/python/Pipfile.lock b/python/Pipfile.lock index ba9b8972..4efd165d 100644 --- a/python/Pipfile.lock +++ b/python/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "dc6b4bafa0214cc4ef81cc0946767ae8d71c76da670e404083015bf0b3996e42" + "sha256": "d93f0b9522e1aa9a60030febce10f8941c6eafc638aeb1663e915721e94aff79" }, "pipfile-spec": 6, "requires": { @@ -128,6 +128,7 @@ "sha256:4ec0fc40eea26d21586e09eea9046efc3f24df78911b14decf36f613568bfa7e", "sha256:54b101bfe4b073712cd2d4523ecda56138eb3737f4825dcf3575bb9183334774", "sha256:57f7a2570587fef85ef0a97c11f05ef2e9e9929fd752351974b75e2c8c65e585", + "sha256:598ebb288d708d62d087b1736b51e98a6f5fa799db8da9f826197ee0744dfc70", "sha256:5bc72cafd9a857618b3ef1f851bc3c9e023182cd74ec3a711d0f335f31ae1803", "sha256:6e99142b37ea58274f42ddd40ecea46520d14cd40689e03ed2dfedb2863c127e", "sha256:74c68a1c6c63eaf4056e55b5162e9f56f26fbd872571660ab629195c63b92198", @@ -654,14 +655,6 @@ ], "index": "pypi", "version": "==2.2.1" - }, - "zipp": { - "hashes": [ - "sha256:c70410551488251b0fee67b460fb9a536af8d6f9f008ad10ac51f615b6a521b1", - "sha256:e0d9e63797e483a30d27e09fffd308c59a700d365ec34e93cc100844168bf921" - ], - "index": "pypi", - "version": "==1.2.0" } }, "develop": { @@ -760,11 +753,11 @@ }, "ipython": { "hashes": [ - "sha256:007dcd929c14631f83daff35df0147ea51d1af420da303fd078343878bd5fb62", - "sha256:b0f2ef9eada4a68ef63ee10b6dde4f35c840035c50fd24265f8052c98947d5a4" + "sha256:ca478e52ae1f88da0102360e57e528b92f3ae4316aabac80a2cd7f7ab2efb48a", + "sha256:eb8d075de37f678424527b5ef6ea23f7b80240ca031c2dd6de5879d687a65333" ], "index": "pypi", - "version": "==6.5.0" + "version": "==7.13.0" }, "ipython-genutils": { "hashes": [ @@ -782,10 +775,10 @@ }, "jedi": { "hashes": [ - "sha256:b4f4052551025c6b0b0b193b29a6ff7bdb74c52450631206c262aef9f7159ad2", - "sha256:d5c871cb9360b414f981e7072c52c33258d598305280fef91c6cae34739d65d5" + "sha256:cd60c93b71944d628ccac47df9a60fec53150de53d42dc10a7fc4b5ba6aae798", + "sha256:df40c97641cb943661d2db4c33c2e1ff75d491189423249e989bcea4464f3030" ], - "version": "==0.16.0" + "version": "==0.17.0" }, "lazy-object-proxy": { "hashes": [ @@ -841,14 +834,6 @@ ], "version": "==0.7.0" }, - "pathlib2": { - "hashes": [ - "sha256:0ec8205a157c80d7acc301c0b18fbd5d44fe655968f5d947b6ecef5290fc35db", - "sha256:6cd9a47b597b37cc57de1c05e56fb1a1c9cc9fab04fe78c29acd090418529868" - ], - "index": "pypi", - "version": "==2.3.5" - }, "pexpect": { "hashes": [ "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937", @@ -880,11 +865,10 @@ }, "prompt-toolkit": { "hashes": [ - "sha256:37925b37a4af1f6448c76b7606e0285f79f434ad246dda007a27411cca730c6d", - "sha256:dd4fca02c8069497ad931a2d09914c6b0d1b50151ce876bc15bde4c747090126", - "sha256:f7eec66105baf40eda9ab026cd8b2e251337eea8d111196695d82e0c5f0af852" + "sha256:563d1a4140b63ff9dd587bda9557cffb2fe73650205ab6f4383092fb882e7dc8", + "sha256:df7e9e63aea609b1da3a65641ceaf5bc7d05e0a04de5bd45d05dbeffbabf9e04" ], - "version": "==1.0.18" + "version": "==3.0.5" }, "psycopg2": { "hashes": [ @@ -995,12 +979,6 @@ "index": "pypi", "version": "==0.10.12" }, - "simplegeneric": { - "hashes": [ - "sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173" - ], - "version": "==0.8.1" - }, "six": { "hashes": [ "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a", @@ -1064,11 +1042,10 @@ }, "zipp": { "hashes": [ - "sha256:c70410551488251b0fee67b460fb9a536af8d6f9f008ad10ac51f615b6a521b1", - "sha256:e0d9e63797e483a30d27e09fffd308c59a700d365ec34e93cc100844168bf921" + "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b", + "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96" ], - "index": "pypi", - "version": "==1.2.0" + "version": "==3.1.0" } } } diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py index a45b44f8..5f67c679 100755 --- a/python/fatcat_harvest.py +++ b/python/fatcat_harvest.py @@ -15,8 +15,8 @@ sentry_client = raven.Client() def run_crossref(args): worker = HarvestCrossrefWorker( kafka_hosts=args.kafka_hosts, - produce_topic="fatcat-{}.api-crossref".format(args.env), - state_topic="fatcat-{}.api-crossref-state".format(args.env), + produce_topic=f"fatcat-{args.env}.api-crossref", + state_topic=f"fatcat-{args.env}.api-crossref-state", contact_email=args.contact_email, start_date=args.start_date, end_date=args.end_date) @@ -25,8 +25,8 @@ def run_crossref(args): def run_datacite(args): worker = HarvestDataciteWorker( kafka_hosts=args.kafka_hosts, - produce_topic="fatcat-{}.api-datacite".format(args.env), - state_topic="fatcat-{}.api-datacite-state".format(args.env), + produce_topic=f"fatcat-{args.env}.api-datacite", + state_topic=f"fatcat-{args.env}.api-datacite-state", contact_email=args.contact_email, start_date=args.start_date, end_date=args.end_date) @@ -35,8 +35,8 @@ def run_datacite(args): def run_arxiv(args): worker = HarvestArxivWorker( kafka_hosts=args.kafka_hosts, - produce_topic="fatcat-{}.oaipmh-arxiv".format(args.env), - state_topic="fatcat-{}.oaipmh-arxiv-state".format(args.env), + produce_topic=f"fatcat-{args.env}.oaipmh-arxiv", + state_topic=f"fatcat-{args.env}.oaipmh-arxiv-state", start_date=args.start_date, end_date=args.end_date) worker.run(continuous=args.continuous) @@ -44,8 +44,8 @@ def run_arxiv(args): def run_pubmed(args): worker = PubmedFTPWorker( kafka_hosts=args.kafka_hosts, - produce_topic="fatcat-{}.ftp-pubmed".format(args.env), - state_topic="fatcat-{}.ftp-pubmed-state".format(args.env), + produce_topic=f"fatcat-{args.env}.ftp-pubmed", + state_topic=f"fatcat-{args.env}.ftp-pubmed-state", start_date=args.start_date, end_date=args.end_date) worker.run(continuous=args.continuous) @@ -53,8 +53,8 @@ def run_pubmed(args): def run_doaj_article(args): worker = HarvestDoajArticleWorker( kafka_hosts=args.kafka_hosts, - produce_topic="fatcat-{}.oaipmh-doaj-article".format(args.env), - state_topic="fatcat-{}.oaipmh-doaj-article-state".format(args.env), + produce_topic=f"fatcat-{args.env}.oaipmh-doaj-article", + state_topic="fatcat-{args.env}.oaipmh-doaj-article-state", start_date=args.start_date, end_date=args.end_date) worker.run(continuous=args.continuous) @@ -62,8 +62,8 @@ def run_doaj_article(args): def run_doaj_journal(args): worker = HarvestDoajJournalWorker( kafka_hosts=args.kafka_hosts, - produce_topic="fatcat-{}.oaipmh-doaj-journal".format(args.env), - state_topic="fatcat-{}.oaipmh-doaj-journal-state".format(args.env), + produce_topic=f"fatcat-{args.env}.oaipmh-doaj-journal", + state_topic=f"fatcat-{args.env}.oaipmh-doaj-journal-state", start_date=args.start_date, end_date=args.end_date) worker.run(continuous=args.continuous) diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index da611ecb..99c330a6 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -306,7 +306,7 @@ class EntityImporter: self._issnl_id_map = dict() self._orcid_id_map = dict() - self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$") + self._orcid_regex = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$") self._doi_id_map = dict() self._pmid_id_map = dict() diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py index 7a2b5fd9..528a822e 100644 --- a/python/fatcat_tools/normal.py +++ b/python/fatcat_tools/normal.py @@ -6,7 +6,7 @@ free-form input, titles, etc. import re -DOI_REGEX = re.compile("^10.\d{3,6}/\S+$") +DOI_REGEX = re.compile(r"^10.\d{3,6}/\S+$") def clean_doi(raw): """ @@ -66,7 +66,7 @@ def test_clean_doi(): assert clean_doi("doi:10.1234/ asdf ") == None assert clean_doi("10.4149/gpb¬_2017042") == None # "logical negation" character -ARXIV_ID_REGEX = re.compile("^(\d{4}.\d{4,5}|[a-z\-]+(\.[A-Z]{2})?/\d{7})(v\d+)?$") +ARXIV_ID_REGEX = re.compile(r"^(\d{4}.\d{4,5}|[a-z\-]+(\.[A-Z]{2})?/\d{7})(v\d+)?$") def clean_arxiv_id(raw): """ @@ -175,7 +175,7 @@ def test_clean_sha256(): assert clean_sha256("6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f") == "6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f" assert clean_sha256("0fba3fba0e1937aa0297de3836b768b5dfb23d7b") == None -ISSN_REGEX = re.compile("^\d{4}-\d{3}[0-9X]$") +ISSN_REGEX = re.compile(r"^\d{4}-\d{3}[0-9X]$") def clean_issn(raw): if not raw: @@ -193,7 +193,7 @@ def test_clean_issn(): assert clean_issn("134-4567") == None assert clean_issn("123X-4567") == None -ISBN13_REGEX = re.compile("^97(?:8|9)-\d{1,5}-\d{1,7}-\d{1,6}-\d$") +ISBN13_REGEX = re.compile(r"^97(?:8|9)-\d{1,5}-\d{1,7}-\d{1,6}-\d$") def clean_isbn13(raw): if not raw: @@ -209,7 +209,7 @@ def test_clean_isbn13(): assert clean_isbn13("978-1-56619-909-4 ") == "978-1-56619-909-4" assert clean_isbn13("9781566199094") == None -ORCID_REGEX = re.compile("^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$") +ORCID_REGEX = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$") def clean_orcid(raw): if not raw: diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index dd322cff..377e35aa 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -91,7 +91,7 @@ class ReleaseEntityForm(EntityEditForm): [validators.Optional(True)]) #release_year doi = StringField('DOI', - [validators.Regexp('^10\..*\/.*', message="DOI must be valid"), + [validators.Regexp(r'^10\..*\/.*', message="DOI must be valid"), validators.Optional(True)]) wikidata_qid = StringField('Wikidata QID') isbn13 = StringField('ISBN-13') diff --git a/python/pytest.ini b/python/pytest.ini index 069102b1..ba1ac946 100644 --- a/python/pytest.ini +++ b/python/pytest.ini @@ -12,5 +12,7 @@ filterwarnings = ignore:.*common_exception_handling.*StopIteration:PendingDeprecationWarning ignore:passing extensions and flags as constants is deprecated:DeprecationWarning ignore:.*authlib.specs.*:authlib.deprecate.AuthlibDeprecationWarning + ignore:Using or importing the ABCs.*:DeprecationWarning + ignore:direct construction of PyLintItem.*:_pytest.warning_types.PytestDeprecationWarning log_level = INFO |