From 2c0a6c23eb4edf2941a1c106ce556ed505e778b1 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Wed, 15 Apr 2020 21:19:22 -0700
Subject: pipenv: update deps for python 3.7

We had some pre-3.6 work arounds. Also seems like a reasonable time to
update all depdencies to most recent versions.
---
 python/Pipfile      | 12 ++++--------
 python/Pipfile.lock | 51 ++++++++++++++-------------------------------------
 2 files changed, 18 insertions(+), 45 deletions(-)

diff --git a/python/Pipfile b/python/Pipfile
index 1bcb55cd..eb706815 100644
--- a/python/Pipfile
+++ b/python/Pipfile
@@ -11,7 +11,7 @@ name = "pypi"
 pytest = ">=5,<6.0.0"
 pytest-pythonpath = "*"
 pytest-pylint = "*"
-ipython = "<7.0.0"
+ipython = "*"
 responses = ">=0.10"
 pytest-cov = "*"
 pylint = "*"
@@ -55,12 +55,8 @@ pathlib2 = "*"
 pycountry = "*"
 tldextract = "*"
 
-# this is only to lock to a python3.5-compatible version. needed by an
-# importlib-metadata, under pytest
-zipp = "<2.0.0"
-
 [requires]
-# Python 3.5 is the bundled (system) version of python for Ubuntu 16.04
-# (Xenial), currently the default on Internet Archive production VMs. But we
-# install Python 3.7 using an external package repository.
+# We install Python 3.7 using a PPA (deadsnakes) on Internet Archive cluster
+# machines, which as of Spring 2020 are still running Ubuntu Xenial (16.04),
+# which has a system default of Python 3.5.
 python_version = "3.7"
diff --git a/python/Pipfile.lock b/python/Pipfile.lock
index ba9b8972..4efd165d 100644
--- a/python/Pipfile.lock
+++ b/python/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "dc6b4bafa0214cc4ef81cc0946767ae8d71c76da670e404083015bf0b3996e42"
+            "sha256": "d93f0b9522e1aa9a60030febce10f8941c6eafc638aeb1663e915721e94aff79"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -128,6 +128,7 @@
                 "sha256:4ec0fc40eea26d21586e09eea9046efc3f24df78911b14decf36f613568bfa7e",
                 "sha256:54b101bfe4b073712cd2d4523ecda56138eb3737f4825dcf3575bb9183334774",
                 "sha256:57f7a2570587fef85ef0a97c11f05ef2e9e9929fd752351974b75e2c8c65e585",
+                "sha256:598ebb288d708d62d087b1736b51e98a6f5fa799db8da9f826197ee0744dfc70",
                 "sha256:5bc72cafd9a857618b3ef1f851bc3c9e023182cd74ec3a711d0f335f31ae1803",
                 "sha256:6e99142b37ea58274f42ddd40ecea46520d14cd40689e03ed2dfedb2863c127e",
                 "sha256:74c68a1c6c63eaf4056e55b5162e9f56f26fbd872571660ab629195c63b92198",
@@ -654,14 +655,6 @@
             ],
             "index": "pypi",
             "version": "==2.2.1"
-        },
-        "zipp": {
-            "hashes": [
-                "sha256:c70410551488251b0fee67b460fb9a536af8d6f9f008ad10ac51f615b6a521b1",
-                "sha256:e0d9e63797e483a30d27e09fffd308c59a700d365ec34e93cc100844168bf921"
-            ],
-            "index": "pypi",
-            "version": "==1.2.0"
         }
     },
     "develop": {
@@ -760,11 +753,11 @@
         },
         "ipython": {
             "hashes": [
-                "sha256:007dcd929c14631f83daff35df0147ea51d1af420da303fd078343878bd5fb62",
-                "sha256:b0f2ef9eada4a68ef63ee10b6dde4f35c840035c50fd24265f8052c98947d5a4"
+                "sha256:ca478e52ae1f88da0102360e57e528b92f3ae4316aabac80a2cd7f7ab2efb48a",
+                "sha256:eb8d075de37f678424527b5ef6ea23f7b80240ca031c2dd6de5879d687a65333"
             ],
             "index": "pypi",
-            "version": "==6.5.0"
+            "version": "==7.13.0"
         },
         "ipython-genutils": {
             "hashes": [
@@ -782,10 +775,10 @@
         },
         "jedi": {
             "hashes": [
-                "sha256:b4f4052551025c6b0b0b193b29a6ff7bdb74c52450631206c262aef9f7159ad2",
-                "sha256:d5c871cb9360b414f981e7072c52c33258d598305280fef91c6cae34739d65d5"
+                "sha256:cd60c93b71944d628ccac47df9a60fec53150de53d42dc10a7fc4b5ba6aae798",
+                "sha256:df40c97641cb943661d2db4c33c2e1ff75d491189423249e989bcea4464f3030"
             ],
-            "version": "==0.16.0"
+            "version": "==0.17.0"
         },
         "lazy-object-proxy": {
             "hashes": [
@@ -841,14 +834,6 @@
             ],
             "version": "==0.7.0"
         },
-        "pathlib2": {
-            "hashes": [
-                "sha256:0ec8205a157c80d7acc301c0b18fbd5d44fe655968f5d947b6ecef5290fc35db",
-                "sha256:6cd9a47b597b37cc57de1c05e56fb1a1c9cc9fab04fe78c29acd090418529868"
-            ],
-            "index": "pypi",
-            "version": "==2.3.5"
-        },
         "pexpect": {
             "hashes": [
                 "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937",
@@ -880,11 +865,10 @@
         },
         "prompt-toolkit": {
             "hashes": [
-                "sha256:37925b37a4af1f6448c76b7606e0285f79f434ad246dda007a27411cca730c6d",
-                "sha256:dd4fca02c8069497ad931a2d09914c6b0d1b50151ce876bc15bde4c747090126",
-                "sha256:f7eec66105baf40eda9ab026cd8b2e251337eea8d111196695d82e0c5f0af852"
+                "sha256:563d1a4140b63ff9dd587bda9557cffb2fe73650205ab6f4383092fb882e7dc8",
+                "sha256:df7e9e63aea609b1da3a65641ceaf5bc7d05e0a04de5bd45d05dbeffbabf9e04"
             ],
-            "version": "==1.0.18"
+            "version": "==3.0.5"
         },
         "psycopg2": {
             "hashes": [
@@ -995,12 +979,6 @@
             "index": "pypi",
             "version": "==0.10.12"
         },
-        "simplegeneric": {
-            "hashes": [
-                "sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173"
-            ],
-            "version": "==0.8.1"
-        },
         "six": {
             "hashes": [
                 "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
@@ -1064,11 +1042,10 @@
         },
         "zipp": {
             "hashes": [
-                "sha256:c70410551488251b0fee67b460fb9a536af8d6f9f008ad10ac51f615b6a521b1",
-                "sha256:e0d9e63797e483a30d27e09fffd308c59a700d365ec34e93cc100844168bf921"
+                "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b",
+                "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96"
             ],
-            "index": "pypi",
-            "version": "==1.2.0"
+            "version": "==3.1.0"
         }
     }
 }
-- 
cgit v1.2.3


From 116a26f072e8628cc4cfabb2e55c6661b6b94605 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Wed, 15 Apr 2020 21:35:48 -0700
Subject: consistently use raw string prefix for regex

---
 python/fatcat_tools/importers/common.py |  2 +-
 python/fatcat_tools/normal.py           | 10 +++++-----
 python/fatcat_web/forms.py              |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index da611ecb..99c330a6 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -306,7 +306,7 @@ class EntityImporter:
 
         self._issnl_id_map = dict()
         self._orcid_id_map = dict()
-        self._orcid_regex = re.compile("^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$")
+        self._orcid_regex = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$")
         self._doi_id_map = dict()
         self._pmid_id_map = dict()
 
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index 7a2b5fd9..528a822e 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -6,7 +6,7 @@ free-form input, titles, etc.
 
 import re
 
-DOI_REGEX = re.compile("^10.\d{3,6}/\S+$")
+DOI_REGEX = re.compile(r"^10.\d{3,6}/\S+$")
 
 def clean_doi(raw):
     """
@@ -66,7 +66,7 @@ def test_clean_doi():
     assert clean_doi("doi:10.1234/ asdf ") == None
     assert clean_doi("10.4149/gpb¬_2017042") == None  # "logical negation" character
 
-ARXIV_ID_REGEX = re.compile("^(\d{4}.\d{4,5}|[a-z\-]+(\.[A-Z]{2})?/\d{7})(v\d+)?$")
+ARXIV_ID_REGEX = re.compile(r"^(\d{4}.\d{4,5}|[a-z\-]+(\.[A-Z]{2})?/\d{7})(v\d+)?$")
 
 def clean_arxiv_id(raw):
     """
@@ -175,7 +175,7 @@ def test_clean_sha256():
     assert clean_sha256("6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f") == "6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f"
     assert clean_sha256("0fba3fba0e1937aa0297de3836b768b5dfb23d7b") == None
 
-ISSN_REGEX = re.compile("^\d{4}-\d{3}[0-9X]$")
+ISSN_REGEX = re.compile(r"^\d{4}-\d{3}[0-9X]$")
 
 def clean_issn(raw):
     if not raw:
@@ -193,7 +193,7 @@ def test_clean_issn():
     assert clean_issn("134-4567") == None
     assert clean_issn("123X-4567") == None
 
-ISBN13_REGEX = re.compile("^97(?:8|9)-\d{1,5}-\d{1,7}-\d{1,6}-\d$")
+ISBN13_REGEX = re.compile(r"^97(?:8|9)-\d{1,5}-\d{1,7}-\d{1,6}-\d$")
 
 def clean_isbn13(raw):
     if not raw:
@@ -209,7 +209,7 @@ def test_clean_isbn13():
     assert clean_isbn13("978-1-56619-909-4 ") == "978-1-56619-909-4"
     assert clean_isbn13("9781566199094") == None
 
-ORCID_REGEX = re.compile("^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$")
+ORCID_REGEX = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$")
 
 def clean_orcid(raw):
     if not raw:
diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py
index dd322cff..377e35aa 100644
--- a/python/fatcat_web/forms.py
+++ b/python/fatcat_web/forms.py
@@ -91,7 +91,7 @@ class ReleaseEntityForm(EntityEditForm):
         [validators.Optional(True)])
     #release_year
     doi = StringField('DOI',
-        [validators.Regexp('^10\..*\/.*', message="DOI must be valid"),
+        [validators.Regexp(r'^10\..*\/.*', message="DOI must be valid"),
          validators.Optional(True)])
     wikidata_qid = StringField('Wikidata QID')
     isbn13 = StringField('ISBN-13')
-- 
cgit v1.2.3


From 9dd0737277d7dec90832f2a4a11f31e1b864e4f0 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Wed, 15 Apr 2020 21:36:10 -0700
Subject: pytest: ignore remaining deprecation warnings in 3rd party libraries

---
 python/pytest.ini | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pytest.ini b/python/pytest.ini
index 069102b1..ba1ac946 100644
--- a/python/pytest.ini
+++ b/python/pytest.ini
@@ -12,5 +12,7 @@ filterwarnings =
     ignore:.*common_exception_handling.*StopIteration:PendingDeprecationWarning
     ignore:passing extensions and flags as constants is deprecated:DeprecationWarning
     ignore:.*authlib.specs.*:authlib.deprecate.AuthlibDeprecationWarning
+    ignore:Using or importing the ABCs.*:DeprecationWarning
+    ignore:direct construction of PyLintItem.*:_pytest.warning_types.PytestDeprecationWarning
 
 log_level = INFO
-- 
cgit v1.2.3


From bbe37c06a3d6c213823258f054d10eaf0814da8f Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Wed, 15 Apr 2020 21:41:30 -0700
Subject: example of starting to use format strings

---
 python/fatcat_harvest.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py
index a45b44f8..5f67c679 100755
--- a/python/fatcat_harvest.py
+++ b/python/fatcat_harvest.py
@@ -15,8 +15,8 @@ sentry_client = raven.Client()
 def run_crossref(args):
     worker = HarvestCrossrefWorker(
         kafka_hosts=args.kafka_hosts,
-        produce_topic="fatcat-{}.api-crossref".format(args.env),
-        state_topic="fatcat-{}.api-crossref-state".format(args.env),
+        produce_topic=f"fatcat-{args.env}.api-crossref",
+        state_topic=f"fatcat-{args.env}.api-crossref-state",
         contact_email=args.contact_email,
         start_date=args.start_date,
         end_date=args.end_date)
@@ -25,8 +25,8 @@ def run_crossref(args):
 def run_datacite(args):
     worker = HarvestDataciteWorker(
         kafka_hosts=args.kafka_hosts,
-        produce_topic="fatcat-{}.api-datacite".format(args.env),
-        state_topic="fatcat-{}.api-datacite-state".format(args.env),
+        produce_topic=f"fatcat-{args.env}.api-datacite",
+        state_topic=f"fatcat-{args.env}.api-datacite-state",
         contact_email=args.contact_email,
         start_date=args.start_date,
         end_date=args.end_date)
@@ -35,8 +35,8 @@ def run_datacite(args):
 def run_arxiv(args):
     worker = HarvestArxivWorker(
         kafka_hosts=args.kafka_hosts,
-        produce_topic="fatcat-{}.oaipmh-arxiv".format(args.env),
-        state_topic="fatcat-{}.oaipmh-arxiv-state".format(args.env),
+        produce_topic=f"fatcat-{args.env}.oaipmh-arxiv",
+        state_topic=f"fatcat-{args.env}.oaipmh-arxiv-state",
         start_date=args.start_date,
         end_date=args.end_date)
     worker.run(continuous=args.continuous)
@@ -44,8 +44,8 @@ def run_arxiv(args):
 def run_pubmed(args):
     worker = PubmedFTPWorker(
         kafka_hosts=args.kafka_hosts,
-        produce_topic="fatcat-{}.ftp-pubmed".format(args.env),
-        state_topic="fatcat-{}.ftp-pubmed-state".format(args.env),
+        produce_topic=f"fatcat-{args.env}.ftp-pubmed",
+        state_topic=f"fatcat-{args.env}.ftp-pubmed-state",
         start_date=args.start_date,
         end_date=args.end_date)
     worker.run(continuous=args.continuous)
@@ -53,8 +53,8 @@ def run_pubmed(args):
 def run_doaj_article(args):
     worker = HarvestDoajArticleWorker(
         kafka_hosts=args.kafka_hosts,
-        produce_topic="fatcat-{}.oaipmh-doaj-article".format(args.env),
-        state_topic="fatcat-{}.oaipmh-doaj-article-state".format(args.env),
+        produce_topic=f"fatcat-{args.env}.oaipmh-doaj-article",
+        state_topic="fatcat-{args.env}.oaipmh-doaj-article-state",
         start_date=args.start_date,
         end_date=args.end_date)
     worker.run(continuous=args.continuous)
@@ -62,8 +62,8 @@ def run_doaj_article(args):
 def run_doaj_journal(args):
     worker = HarvestDoajJournalWorker(
         kafka_hosts=args.kafka_hosts,
-        produce_topic="fatcat-{}.oaipmh-doaj-journal".format(args.env),
-        state_topic="fatcat-{}.oaipmh-doaj-journal-state".format(args.env),
+        produce_topic=f"fatcat-{args.env}.oaipmh-doaj-journal",
+        state_topic=f"fatcat-{args.env}.oaipmh-doaj-journal-state",
         start_date=args.start_date,
         end_date=args.end_date)
     worker.run(continuous=args.continuous)
-- 
cgit v1.2.3