43 files changed, 4020 insertions, 3194 deletions
diff --git a/python/fatcat_tools/__init__.py b/python/fatcat_tools/__init__.py
index ec38a17b..6f9ee7d8 100644
--- a/python/fatcat_tools/__init__.py
+++ b/python/fatcat_tools/__init__.py
@@ -1,4 +1,3 @@
-
 from .api_auth import authenticated_api, public_api
 from .fcid import fcid2uuid, uuid2fcid
 from .kafka import kafka_fail_fast, simple_kafka_producer
diff --git a/python/fatcat_tools/api_auth.py b/python/fatcat_tools/api_auth.py
index bbf059c0..d8f0c46d 100644
--- a/python/fatcat_tools/api_auth.py
+++ b/python/fatcat_tools/api_auth.py
@@ -1,4 +1,3 @@
-
 import os
 import sys
 
@@ -15,6 +14,7 @@ def public_api(host_uri):
     conf.host = host_uri
     return fatcat_openapi_client.DefaultApi(fatcat_openapi_client.ApiClient(conf))
 
+
 def authenticated_api(host_uri, token=None):
     """
     Note: if this helper is called, it's implied that an actual API connection
@@ -24,10 +24,11 @@ def authenticated_api(host_uri, token=None):
     conf = fatcat_openapi_client.Configuration()
     conf.host = host_uri
     if not token:
-        token = os.environ['FATCAT_API_AUTH_TOKEN']
+        token = os.environ["FATCAT_API_AUTH_TOKEN"]
     if not token:
         sys.stderr.write(
-            'This client requires a fatcat API token (eg, in env var FATCAT_API_AUTH_TOKEN)\n')
+            "This client requires a fatcat API token (eg, in env var FATCAT_API_AUTH_TOKEN)\n"
+        )
         sys.exit(-1)
 
     conf.api_key["Authorization"] = token
diff --git a/python/fatcat_tools/cleanups/__init__.py b/python/fatcat_tools/cleanups/__init__.py
index 587c7b9b..0aeec977 100644
--- a/python/fatcat_tools/cleanups/__init__.py
+++ b/python/fatcat_tools/cleanups/__init__.py
@@ -1,3 +1,2 @@
-
 from .common import EntityCleaner
 from .files import FileCleaner
diff --git a/python/fatcat_tools/cleanups/common.py b/python/fatcat_tools/cleanups/common.py
index d0fcc761..26ca7bd6 100644
--- a/python/fatcat_tools/cleanups/common.py
+++ b/python/fatcat_tools/cleanups/common.py
@@ -1,4 +1,3 @@
-
 import copy
 import json
 import subprocess
@@ -30,16 +29,19 @@ class EntityCleaner:
 
     def __init__(self, api, entity_type, **kwargs):
 
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['git_rev'] = eg_extra.get('git_rev',
-            subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8')
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.EntityCleaner')
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["git_rev"] = eg_extra.get(
+            "git_rev", subprocess.check_output(["git", "describe", "--always"]).strip()
+        ).decode("utf-8")
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.EntityCleaner")
 
         self.api = api
         self.entity_type = entity_type
-        self.dry_run_mode = kwargs.get('dry_run_mode', True)
-        self.edit_batch_size = kwargs.get('edit_batch_size', 50)
-        self.editgroup_description = kwargs.get('editgroup_description', "Generic Entity Cleaner Bot")
+        self.dry_run_mode = kwargs.get("dry_run_mode", True)
+        self.edit_batch_size = kwargs.get("edit_batch_size", 50)
+        self.editgroup_description = kwargs.get(
+            "editgroup_description", "Generic Entity Cleaner Bot"
+        )
         self.editgroup_extra = eg_extra
         self.reset()
         self.ac = ApiClient()
@@ -48,7 +50,7 @@ class EntityCleaner:
             print("Running in dry-run mode!")
 
     def reset(self):
-        self.counts = Counter({'lines': 0, 'cleaned': 0, 'updated': 0})
+        self.counts = Counter({"lines": 0, "cleaned": 0, "updated": 0})
         self._edit_count = 0
         self._editgroup_id = None
         self._entity_queue = []
@@ -63,23 +65,23 @@ class EntityCleaner:
 
         Returns nothing.
         """
-        self.counts['lines'] += 1
-        if (not record):
-            self.counts['skip-null'] += 1
+        self.counts["lines"] += 1
+        if not record:
+            self.counts["skip-null"] += 1
             return
 
         entity = entity_from_dict(record, self.entity_type, api_client=self.ac)
 
-        if entity.state != 'active':
-            self.counts['skip-inactive'] += 1
+        if entity.state != "active":
+            self.counts["skip-inactive"] += 1
             return
 
         cleaned = self.clean_entity(copy.deepcopy(entity))
         if entity == cleaned:
-            self.counts['skip-clean'] += 1
+            self.counts["skip-clean"] += 1
             return
         else:
-            self.counts['cleaned'] += 1
+            self.counts["cleaned"] += 1
 
         if self.dry_run_mode:
             entity_dict = entity_to_dict(entity, api_client=self.ac)
@@ -87,11 +89,13 @@ class EntityCleaner:
             return
 
         if entity.ident in self._idents_inflight:
-            raise ValueError("Entity already part of in-process update: {}".format(entity.ident))
+            raise ValueError(
+                "Entity already part of in-process update: {}".format(entity.ident)
+            )
 
         updated = self.try_update(cleaned)
         if updated:
-            self.counts['updated'] += updated
+            self.counts["updated"] += updated
             self._edit_count += updated
             self._idents_inflight.append(entity.ident)
 
@@ -132,9 +136,8 @@ class EntityCleaner:
 
         if not self._editgroup_id:
             eg = self.api.create_editgroup(
-                Editgroup(
-                    description=self.editgroup_description,
-                    extra=self.editgroup_extra))
+                Editgroup(description=self.editgroup_description, extra=self.editgroup_extra)
+            )
             self._editgroup_id = eg.editgroup_id
 
         return self._editgroup_id
diff --git a/python/fatcat_tools/cleanups/files.py b/python/fatcat_tools/cleanups/files.py
index 0d275ba6..d378a91f 100644
--- a/python/fatcat_tools/cleanups/files.py
+++ b/python/fatcat_tools/cleanups/files.py
@@ -1,4 +1,3 @@
-
 from fatcat_openapi_client.models import FileEntity
 from fatcat_openapi_client.rest import ApiException
 
@@ -12,14 +11,19 @@ class FileCleaner(EntityCleaner):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Automated cleanup of file entities (eg, remove bad URLs)"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.FileCleaner')
-        super().__init__(api,
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Automated cleanup of file entities (eg, remove bad URLs)"
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.FileCleaner")
+        super().__init__(
+            api,
             entity_type=FileEntity,
             editgroup_description=eg_desc,
             editgroup_extra=eg_extra,
-            **kwargs)
+            **kwargs
+        )
 
     def clean_entity(self, entity):
         """
@@ -27,24 +31,24 @@ class FileCleaner(EntityCleaner):
         """
 
         # URL has ://web.archive.org/web/None/ link => delete URL
-        entity.urls = [u for u in entity.urls if '://web.archive.org/web/None/' not in u.url]
+        entity.urls = [u for u in entity.urls if "://web.archive.org/web/None/" not in u.url]
 
         # URL has ://archive.org/ link with rel=repository => rel=archive
         for u in entity.urls:
-            if '://archive.org/' in u.url and u.rel == 'repository':
-                u.rel = 'archive'
+            if "://archive.org/" in u.url and u.rel == "repository":
+                u.rel = "archive"
 
         # URL has short wayback date ("2017") and another url with that as prefix => delete URL
         stub_wayback_urls = []
         full_wayback_urls = []
         for u in entity.urls:
-            if '://web.archive.org/web/' in u.url:
-                if len(u.url.split('/')[4]) <= 8:
+            if "://web.archive.org/web/" in u.url:
+                if len(u.url.split("/")[4]) <= 8:
                     stub_wayback_urls.append(u.url)
                 else:
-                    full_wayback_urls.append('/'.join(u.url.split('/')[5:]))
+                    full_wayback_urls.append("/".join(u.url.split("/")[5:]))
         for stub in stub_wayback_urls:
-            target = '/'.join(stub.split('/')[5:])
+            target = "/".join(stub.split("/")[5:])
             if target in full_wayback_urls:
                 entity.urls = [u for u in entity.urls if u.url != stub]
 
@@ -57,14 +61,14 @@ class FileCleaner(EntityCleaner):
         except ApiException as err:
             if err.status != 404:
                 raise err
-            self.counts['skip-not-found'] += 1
+            self.counts["skip-not-found"] += 1
             return 0
 
-        if existing.state != 'active':
-            self.counts['skip-existing-inactive'] += 1
+        if existing.state != "active":
+            self.counts["skip-existing-inactive"] += 1
             return 0
         if existing.revision != entity.revision:
-            self.counts['skip-revision'] += 1
+            self.counts["skip-revision"] += 1
             return 0
 
         self.api.update_file(self.get_editgroup_id(), entity.ident, entity)
diff --git a/python/fatcat_tools/fcid.py b/python/fatcat_tools/fcid.py
index 0987d10d..53891e5a 100644
--- a/python/fatcat_tools/fcid.py
+++ b/python/fatcat_tools/fcid.py
@@ -1,4 +1,3 @@
-
 import base64
 import uuid
 
@@ -7,18 +6,20 @@ def fcid2uuid(s):
     """
     Converts a fatcat identifier (base32 encoded string) to a uuid.UUID object
     """
-    s = s.split('_')[-1].upper().encode('utf-8')
+    s = s.split("_")[-1].upper().encode("utf-8")
     assert len(s) == 26
     raw = base64.b32decode(s + b"======")
     return str(uuid.UUID(bytes=raw)).lower()
 
+
 def uuid2fcid(s):
     """
     Converts a uuid.UUID object to a fatcat identifier (base32 encoded string)
     """
     raw = uuid.UUID(s).bytes
-    return base64.b32encode(raw)[:26].lower().decode('utf-8')
+    return base64.b32encode(raw)[:26].lower().decode("utf-8")
+
 
 def test_fcid():
-    test_uuid = '00000000-0000-0000-3333-000000000001'
+    test_uuid = "00000000-0000-0000-3333-000000000001"
     assert test_uuid == fcid2uuid(uuid2fcid(test_uuid))
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index d441d495..dd48e256 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -1,4 +1,3 @@
-
 import json
 import sys
 import time
@@ -59,29 +58,35 @@ class HarvestCrossrefWorker:
     to be careful how state is serialized back into kafka.
     """
 
-    def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
-            api_host_url="https://api.crossref.org/works", start_date=None,
-            end_date=None):
+    def __init__(
+        self,
+        kafka_hosts,
+        produce_topic,
+        state_topic,
+        contact_email,
+        api_host_url="https://api.crossref.org/works",
+        start_date=None,
+        end_date=None,
+    ):
 
         self.api_host_url = api_host_url
         self.produce_topic = produce_topic
         self.state_topic = state_topic
         self.contact_email = contact_email
         self.kafka_config = {
-            'bootstrap.servers': kafka_hosts,
-            'message.max.bytes': 20000000, # ~20 MBytes; broker is ~50 MBytes
+            "bootstrap.servers": kafka_hosts,
+            "message.max.bytes": 20000000,  # ~20 MBytes; broker is ~50 MBytes
         }
 
         self.state = HarvestState(start_date, end_date)
         self.state.initialize_from_kafka(self.state_topic, self.kafka_config)
 
-        self.loop_sleep = 60*60 # how long to wait, in seconds, between date checks
+        self.loop_sleep = 60 * 60  # how long to wait, in seconds, between date checks
         self.api_batch_size = 50
         self.name = "Crossref"
         self.producer = self._kafka_producer()
 
     def _kafka_producer(self):
-
         def fail_fast(err, msg):
             if err is not None:
                 print("Kafka producer delivery error: {}".format(err), file=sys.stderr)
@@ -92,46 +97,53 @@ class HarvestCrossrefWorker:
         self._kafka_fail_fast = fail_fast
 
         producer_conf = self.kafka_config.copy()
-        producer_conf.update({
-            'delivery.report.only.error': True,
-            'default.topic.config': {
-                'request.required.acks': -1, # all brokers must confirm
-            },
-        })
+        producer_conf.update(
+            {
+                "delivery.report.only.error": True,
+                "default.topic.config": {
+                    "request.required.acks": -1,  # all brokers must confirm
+                },
+            }
+        )
         return Producer(producer_conf)
 
     def params(self, date_str):
-        filter_param = 'from-update-date:{},until-update-date:{}'.format(
-            date_str, date_str)
+        filter_param = "from-update-date:{},until-update-date:{}".format(date_str, date_str)
         return {
-            'filter': filter_param,
-            'rows': self.api_batch_size,
-            'cursor': '*',
+            "filter": filter_param,
+            "rows": self.api_batch_size,
+            "cursor": "*",
         }
 
     def update_params(self, params, resp):
-        params['cursor'] = resp['message']['next-cursor']
+        params["cursor"] = resp["message"]["next-cursor"]
         return params
 
     def extract_key(self, obj):
-        return obj['DOI'].encode('utf-8')
+        return obj["DOI"].encode("utf-8")
 
     def fetch_date(self, date):
 
         date_str = date.isoformat()
         params = self.params(date_str)
         http_session = requests_retry_session()
-        http_session.headers.update({
-            'User-Agent': 'fatcat_tools/0.1.0 (https://fatcat.wiki; mailto:{}) python-requests'.format(
-                self.contact_email),
-        })
+        http_session.headers.update(
+            {
+                "User-Agent": "fatcat_tools/0.1.0 (https://fatcat.wiki; mailto:{}) python-requests".format(
+                    self.contact_email
+                ),
+            }
+        )
         count = 0
         while True:
             http_resp = http_session.get(self.api_host_url, params=params)
             if http_resp.status_code == 503:
                 # crude backoff; now redundant with session exponential
                 # backoff, but allows for longer backoff/downtime on remote end
-                print("got HTTP {}, pausing for 30 seconds".format(http_resp.status_code), file=sys.stderr)
+                print(
+                    "got HTTP {}, pausing for 30 seconds".format(http_resp.status_code),
+                    file=sys.stderr,
+                )
                 # keep kafka producer connection alive
                 self.producer.poll(0)
                 time.sleep(30.0)
@@ -143,19 +155,27 @@ class HarvestCrossrefWorker:
             except json.JSONDecodeError as exc:
                 # Datacite API returned HTTP 200, but JSON seemed unparseable.
                 # It might be a glitch, so we retry.
-                print("failed to decode body from {}: {}".format(http_resp.url, resp_body), file=sys.stderr)
+                print(
+                    "failed to decode body from {}: {}".format(http_resp.url, resp_body),
+                    file=sys.stderr,
+                )
                 raise exc
             items = self.extract_items(resp)
             count += len(items)
-            print("... got {} ({} of {}), HTTP fetch took {}".format(len(items), count,
-                self.extract_total(resp), http_resp.elapsed), file=sys.stderr)
-            #print(json.dumps(resp))
+            print(
+                "... got {} ({} of {}), HTTP fetch took {}".format(
+                    len(items), count, self.extract_total(resp), http_resp.elapsed
+                ),
+                file=sys.stderr,
+            )
+            # print(json.dumps(resp))
             for work in items:
                 self.producer.produce(
                     self.produce_topic,
-                    json.dumps(work).encode('utf-8'),
+                    json.dumps(work).encode("utf-8"),
                     key=self.extract_key(work),
-                    on_delivery=self._kafka_fail_fast)
+                    on_delivery=self._kafka_fail_fast,
+                )
             self.producer.poll(0)
             if len(items) < self.api_batch_size:
                 break
@@ -163,10 +183,10 @@ class HarvestCrossrefWorker:
         self.producer.flush()
 
     def extract_items(self, resp):
-        return resp['message']['items']
+        return resp["message"]["items"]
 
     def extract_total(self, resp):
-        return resp['message']['total-results']
+        return resp["message"]["total-results"]
 
     def run(self, continuous=False):
 
@@ -175,9 +195,9 @@ class HarvestCrossrefWorker:
             if current:
                 print("Fetching DOIs updated on {} (UTC)".format(current), file=sys.stderr)
                 self.fetch_date(current)
-                self.state.complete(current,
-                    kafka_topic=self.state_topic,
-                    kafka_config=self.kafka_config)
+                self.state.complete(
+                    current, kafka_topic=self.state_topic, kafka_config=self.kafka_config
+                )
                 continue
 
             if continuous:
@@ -200,16 +220,25 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
     could/should use this script for that, and dump to JSON?
     """
 
-    def __init__(self, kafka_hosts, produce_topic, state_topic, contact_email,
-            api_host_url="https://api.datacite.org/dois",
-            start_date=None, end_date=None):
-        super().__init__(kafka_hosts=kafka_hosts,
-                         produce_topic=produce_topic,
-                         state_topic=state_topic,
-                         api_host_url=api_host_url,
-                         contact_email=contact_email,
-                         start_date=start_date,
-                         end_date=end_date)
+    def __init__(
+        self,
+        kafka_hosts,
+        produce_topic,
+        state_topic,
+        contact_email,
+        api_host_url="https://api.datacite.org/dois",
+        start_date=None,
+        end_date=None,
+    ):
+        super().__init__(
+            kafka_hosts=kafka_hosts,
+            produce_topic=produce_topic,
+            state_topic=state_topic,
+            api_host_url=api_host_url,
+            contact_email=contact_email,
+            start_date=start_date,
+            end_date=end_date,
+        )
 
         # for datecite, it's "from-update-date"
         self.name = "Datacite"
@@ -219,19 +248,21 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
         Dates have to be supplied in 2018-10-27T22:36:30.000Z format.
         """
         return {
-            'query': 'updated:[{}T00:00:00.000Z TO {}T23:59:59.999Z]'.format(date_str, date_str),
-            'page[size]': self.api_batch_size,
-            'page[cursor]': 1,
+            "query": "updated:[{}T00:00:00.000Z TO {}T23:59:59.999Z]".format(
+                date_str, date_str
+            ),
+            "page[size]": self.api_batch_size,
+            "page[cursor]": 1,
         }
 
     def extract_items(self, resp):
-        return resp['data']
+        return resp["data"]
 
     def extract_total(self, resp):
-        return resp['meta']['total']
+        return resp["meta"]["total"]
 
     def extract_key(self, obj):
-        return obj['attributes']['doi'].encode('utf-8')
+        return obj["attributes"]["doi"].encode("utf-8")
 
     def update_params(self, params, resp):
         """
@@ -245,9 +276,9 @@ class HarvestDataciteWorker(HarvestCrossrefWorker):
             https://github.com/datacite/datacite/issues/897 (HTTP 400)
             https://github.com/datacite/datacite/issues/898 (HTTP 500)
         """
-        parsed = urlparse(resp['links']['next'])
-        page_cursor = parse_qs(parsed.query).get('page[cursor]')
+        parsed = urlparse(resp["links"]["next"])
+        page_cursor = parse_qs(parsed.query).get("page[cursor]")
         if not page_cursor:
-            raise ValueError('no page[cursor] in .links.next')
-        params['page[cursor]'] = page_cursor[0]
+            raise ValueError("no page[cursor] in .links.next")
+        params["page[cursor]"] = page_cursor[0]
         return params
diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py
index 45c2b8ea..fda0dc62 100644
--- a/python/fatcat_tools/harvest/harvest_common.py
+++ b/python/fatcat_tools/harvest/harvest_common.py
@@ -1,4 +1,3 @@
-
 import datetime
 import json
 import sys
@@ -14,8 +13,10 @@ from requests.packages.urllib3.util.retry import Retry  # pylint: disable=import
 # Used for parsing ISO date format (YYYY-MM-DD)
 DATE_FMT = "%Y-%m-%d"
 
-def requests_retry_session(retries=10, backoff_factor=3,
-        status_forcelist=(500, 502, 504), session=None):
+
+def requests_retry_session(
+    retries=10, backoff_factor=3, status_forcelist=(500, 502, 504), session=None
+):
     """
     From: https://www.peterbe.com/plog/best-practice-with-retries-with-requests
     """
@@ -28,10 +29,11 @@ def requests_retry_session(retries=10, backoff_factor=3,
         status_forcelist=status_forcelist,
     )
     adapter = HTTPAdapter(max_retries=retry)
-    session.mount('http://', adapter)
-    session.mount('https://', adapter)
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
     return session
 
+
 class HarvestState:
     """
     First version of this works with full days (dates)
@@ -57,8 +59,9 @@ class HarvestState:
             self.enqueue_period(start_date, end_date, catchup_days)
 
     def __str__(self):
-        return '<HarvestState to_process={}, completed={}>'.format(
-            len(self.to_process), len(self.completed))
+        return "<HarvestState to_process={}, completed={}>".format(
+            len(self.to_process), len(self.completed)
+        )
 
     def enqueue_period(self, start_date=None, end_date=None, catchup_days=14):
         """
@@ -92,7 +95,9 @@ class HarvestState:
         """
         if continuous:
             # enqueue yesterday
-            self.enqueue_period(start_date=datetime.datetime.utcnow().date() - datetime.timedelta(days=1))
+            self.enqueue_period(
+                start_date=datetime.datetime.utcnow().date() - datetime.timedelta(days=1)
+            )
         if not self.to_process:
             return None
         return sorted(list(self.to_process))[0]
@@ -105,8 +110,8 @@ class HarvestState:
         state stored on disk or in Kafka.
         """
         state = json.loads(state_json)
-        if 'completed-date' in state:
-            date = datetime.datetime.strptime(state['completed-date'], DATE_FMT).date()
+        if "completed-date" in state:
+            date = datetime.datetime.strptime(state["completed-date"], DATE_FMT).date()
             self.complete(date)
 
     def complete(self, date, kafka_topic=None, kafka_config=None):
@@ -123,12 +128,14 @@ class HarvestState:
         except KeyError:
             pass
         self.completed.add(date)
-        state_json = json.dumps({
-            'in-progress-dates': [str(d) for d in self.to_process],
-            'completed-date': str(date),
-        }).encode('utf-8')
+        state_json = json.dumps(
+            {
+                "in-progress-dates": [str(d) for d in self.to_process],
+                "completed-date": str(date),
+            }
+        ).encode("utf-8")
         if kafka_topic:
-            assert(kafka_config)
+            assert kafka_config
 
             def fail_fast(err, msg):
                 if err:
@@ -136,17 +143,16 @@ class HarvestState:
 
             print("Committing status to Kafka: {}".format(kafka_topic), file=sys.stderr)
             producer_conf = kafka_config.copy()
-            producer_conf.update({
-                'delivery.report.only.error': True,
-                'default.topic.config': {
-                    'request.required.acks': -1, # all brokers must confirm
-                },
-            })
+            producer_conf.update(
+                {
+                    "delivery.report.only.error": True,
+                    "default.topic.config": {
+                        "request.required.acks": -1,  # all brokers must confirm
+                    },
+                }
+            )
             producer = Producer(producer_conf)
-            producer.produce(
-                kafka_topic,
-                state_json,
-                on_delivery=fail_fast)
+            producer.produce(kafka_topic, state_json, on_delivery=fail_fast)
             producer.flush()
         return state_json
 
@@ -166,22 +172,25 @@ class HarvestState:
                 raise KafkaException(err)
 
         conf = kafka_config.copy()
-        conf.update({
-            'group.id': 'dummy_init_group', # should never be committed
-            'enable.auto.commit': False,
-            'auto.offset.reset': 'earliest',
-            'session.timeout.ms': 10000,
-        })
+        conf.update(
+            {
+                "group.id": "dummy_init_group",  # should never be committed
+                "enable.auto.commit": False,
+                "auto.offset.reset": "earliest",
+                "session.timeout.ms": 10000,
+            }
+        )
         consumer = Consumer(conf)
 
         # this watermark fetch is mostly to ensure we are connected to broker and
         # fail fast if not, but we also confirm that we read to end below.
         hwm = consumer.get_watermark_offsets(
-            TopicPartition(kafka_topic, 0),
-            timeout=5.0,
-            cached=False)
+            TopicPartition(kafka_topic, 0), timeout=5.0, cached=False
+        )
         if not hwm:
-            raise Exception("Kafka consumer timeout, or topic {} doesn't exist".format(kafka_topic))
+            raise Exception(
+                "Kafka consumer timeout, or topic {} doesn't exist".format(kafka_topic)
+            )
 
         consumer.assign([TopicPartition(kafka_topic, 0, 0)])
         c = 0
@@ -191,8 +200,8 @@ class HarvestState:
                 break
             if msg.error():
                 raise KafkaException(msg.error())
-            #sys.stdout.write('.')
-            self.update(msg.value().decode('utf-8'))
+            # sys.stdout.write('.')
+            self.update(msg.value().decode("utf-8"))
             c += 1
         consumer.close()
 
diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py
index 0eb0343d..40d1c853 100644
--- a/python/fatcat_tools/harvest/oaipmh.py
+++ b/python/fatcat_tools/harvest/oaipmh.py
@@ -1,4 +1,3 @@
-
 import sys
 import time
 
@@ -25,19 +24,18 @@ class HarvestOaiPmhWorker:
     would want something similar operationally. Oh well!
     """
 
-    def __init__(self, kafka_hosts, produce_topic, state_topic,
-            start_date=None, end_date=None):
+    def __init__(self, kafka_hosts, produce_topic, state_topic, start_date=None, end_date=None):
 
         self.produce_topic = produce_topic
         self.state_topic = state_topic
         self.kafka_config = {
-            'bootstrap.servers': kafka_hosts,
-            'message.max.bytes': 20000000, # ~20 MBytes; broker is ~50 MBytes
+            "bootstrap.servers": kafka_hosts,
+            "message.max.bytes": 20000000,  # ~20 MBytes; broker is ~50 MBytes
         }
 
-        self.loop_sleep = 60*60 # how long to wait, in seconds, between date checks
+        self.loop_sleep = 60 * 60  # how long to wait, in seconds, between date checks
 
-        self.endpoint_url = None # needs override
+        self.endpoint_url = None  # needs override
         self.metadata_prefix = None  # needs override
         self.name = "unnamed"
         self.state = HarvestState(start_date, end_date)
@@ -45,7 +43,6 @@ class HarvestOaiPmhWorker:
         print(self.state, file=sys.stderr)
 
     def fetch_date(self, date):
-
         def fail_fast(err, msg):
             if err is not None:
                 print("Kafka producer delivery error: {}".format(err), file=sys.stderr)
@@ -54,12 +51,14 @@ class HarvestOaiPmhWorker:
                 raise KafkaException(err)
 
         producer_conf = self.kafka_config.copy()
-        producer_conf.update({
-            'delivery.report.only.error': True,
-            'default.topic.config': {
-                'request.required.acks': -1, # all brokers must confirm
-            },
-        })
+        producer_conf.update(
+            {
+                "delivery.report.only.error": True,
+                "default.topic.config": {
+                    "request.required.acks": -1,  # all brokers must confirm
+                },
+            }
+        )
         producer = Producer(producer_conf)
 
         api = sickle.Sickle(self.endpoint_url, max_retries=5, retry_status_codes=[503])
@@ -67,13 +66,18 @@ class HarvestOaiPmhWorker:
         # this dict kwargs hack is to work around 'from' as a reserved python keyword
         # recommended by sickle docs
         try:
-            records = api.ListRecords(**{
-                'metadataPrefix': self.metadata_prefix,
-                'from': date_str,
-                'until': date_str,
-            })
+            records = api.ListRecords(
+                **{
+                    "metadataPrefix": self.metadata_prefix,
+                    "from": date_str,
+                    "until": date_str,
+                }
+            )
         except sickle.oaiexceptions.NoRecordsMatch:
-            print("WARN: no OAI-PMH records for this date: {} (UTC)".format(date_str), file=sys.stderr)
+            print(
+                "WARN: no OAI-PMH records for this date: {} (UTC)".format(date_str),
+                file=sys.stderr,
+            )
             return
 
         count = 0
@@ -83,9 +87,10 @@ class HarvestOaiPmhWorker:
                 print("... up to {}".format(count), file=sys.stderr)
             producer.produce(
                 self.produce_topic,
-                item.raw.encode('utf-8'),
-                key=item.header.identifier.encode('utf-8'),
-                on_delivery=fail_fast)
+                item.raw.encode("utf-8"),
+                key=item.header.identifier.encode("utf-8"),
+                on_delivery=fail_fast,
+            )
         producer.flush()
 
     def run(self, continuous=False):
@@ -95,9 +100,9 @@ class HarvestOaiPmhWorker:
             if current:
                 print("Fetching DOIs updated on {} (UTC)".format(current), file=sys.stderr)
                 self.fetch_date(current)
-                self.state.complete(current,
-                    kafka_topic=self.state_topic,
-                    kafka_config=self.kafka_config)
+                self.state.complete(
+                    current, kafka_topic=self.state_topic, kafka_config=self.kafka_config
+                )
                 continue
 
             if continuous:
diff --git a/python/fatcat_tools/harvest/pubmed.py b/python/fatcat_tools/harvest/pubmed.py
index ee55f4eb..0f33f334 100644
--- a/python/fatcat_tools/harvest/pubmed.py
+++ b/python/fatcat_tools/harvest/pubmed.py
@@ -60,14 +60,15 @@ class PubmedFTPWorker:
         <tr>
 
     """
+
     def __init__(self, kafka_hosts, produce_topic, state_topic, start_date=None, end_date=None):
-        self.name = 'Pubmed'
-        self.host = 'ftp.ncbi.nlm.nih.gov'
+        self.name = "Pubmed"
+        self.host = "ftp.ncbi.nlm.nih.gov"
         self.produce_topic = produce_topic
         self.state_topic = state_topic
         self.kafka_config = {
-            'bootstrap.servers': kafka_hosts,
-            'message.max.bytes': 20000000,  # ~20 MBytes; broker is ~50 MBytes
+            "bootstrap.servers": kafka_hosts,
+            "message.max.bytes": 20000000,  # ~20 MBytes; broker is ~50 MBytes
         }
         self.loop_sleep = 60 * 60  # how long to wait, in seconds, between date checks
         self.state = HarvestState(start_date, end_date)
@@ -86,12 +87,14 @@ class PubmedFTPWorker:
         self._kafka_fail_fast = fail_fast
 
         producer_conf = self.kafka_config.copy()
-        producer_conf.update({
-            'delivery.report.only.error': True,
-            'default.topic.config': {
-                'request.required.acks': -1,  # all brokers must confirm
-            },
-        })
+        producer_conf.update(
+            {
+                "delivery.report.only.error": True,
+                "default.topic.config": {
+                    "request.required.acks": -1,  # all brokers must confirm
+                },
+            }
+        )
         return Producer(producer_conf)
 
     def fetch_date(self, date):
@@ -105,24 +108,35 @@ class PubmedFTPWorker:
         if self.date_file_map is None:
             raise ValueError("cannot fetch date without date file mapping")
 
-        date_str = date.strftime('%Y-%m-%d')
+        date_str = date.strftime("%Y-%m-%d")
         paths = self.date_file_map.get(date_str)
         if paths is None:
-            print("WARN: no pubmed update for this date: {} (UTC), available dates were: {}".format(date_str, self.date_file_map), file=sys.stderr)
+            print(
+                "WARN: no pubmed update for this date: {} (UTC), available dates were: {}".format(
+                    date_str, self.date_file_map
+                ),
+                file=sys.stderr,
+            )
             return False
 
         count = 0
         for path in paths:
             # Fetch and decompress file.
             url = "ftp://{}{}".format(self.host, path)
-            filename = ftpretr(url, proxy_hostport="159.69.240.245:15201") # TODO: proxy obsolete, when networking issue is resolved
-            with tempfile.NamedTemporaryFile(prefix='fatcat-ftp-tmp-', delete=False) as decomp:
+            filename = ftpretr(
+                url, proxy_hostport="159.69.240.245:15201"
+            )  # TODO: proxy obsolete, when networking issue is resolved
+            with tempfile.NamedTemporaryFile(prefix="fatcat-ftp-tmp-", delete=False) as decomp:
                 try:
                     gzf = gzip.open(filename)
                     shutil.copyfileobj(gzf, decomp)
                 except zlib.error as exc:
-                    print('[skip] retrieving {} failed with {} (maybe empty, missing or broken gzip)'.format(
-                        url, exc), file=sys.stderr)
+                    print(
+                        "[skip] retrieving {} failed with {} (maybe empty, missing or broken gzip)".format(
+                            url, exc
+                        ),
+                        file=sys.stderr,
+                    )
                     continue
 
             # Here, blob is the unparsed XML; we peek into it to use PMID as
@@ -131,15 +145,17 @@ class PubmedFTPWorker:
             # WARNING: Parsing foreign XML exposes us at some
             # https://docs.python.org/3/library/xml.html#xml-vulnerabilities
             # here.
-            for blob in xmlstream(decomp.name, 'PubmedArticle', encoding='utf-8'):
-                soup = BeautifulSoup(blob, 'xml')
-                pmid = soup.find('PMID')
+            for blob in xmlstream(decomp.name, "PubmedArticle", encoding="utf-8"):
+                soup = BeautifulSoup(blob, "xml")
+                pmid = soup.find("PMID")
                 if pmid is None:
                     raise ValueError("no PMID found, please adjust identifier extraction")
                 count += 1
                 if count % 50 == 0:
                     print("... up to {}".format(count), file=sys.stderr)
-                self.producer.produce(self.produce_topic, blob, key=pmid.text, on_delivery=self._kafka_fail_fast)
+                self.producer.produce(
+                    self.produce_topic, blob, key=pmid.text, on_delivery=self._kafka_fail_fast
+                )
 
             self.producer.flush()
             os.remove(filename)
@@ -151,13 +167,17 @@ class PubmedFTPWorker:
         while True:
             self.date_file_map = generate_date_file_map(host=self.host)
             if len(self.date_file_map) == 0:
-                raise ValueError("map from dates to files should not be empty, maybe the HTML changed?")
+                raise ValueError(
+                    "map from dates to files should not be empty, maybe the HTML changed?"
+                )
 
             current = self.state.next_span(continuous)
             if current:
                 print("Fetching citations updated on {} (UTC)".format(current), file=sys.stderr)
                 self.fetch_date(current)
-                self.state.complete(current, kafka_topic=self.state_topic, kafka_config=self.kafka_config)
+                self.state.complete(
+                    current, kafka_topic=self.state_topic, kafka_config=self.kafka_config
+                )
                 continue
 
             if continuous:
@@ -168,7 +188,7 @@ class PubmedFTPWorker:
         print("{} FTP ingest caught up".format(self.name))
 
 
-def generate_date_file_map(host='ftp.ncbi.nlm.nih.gov'):
+def generate_date_file_map(host="ftp.ncbi.nlm.nih.gov"):
     """
     Generate a DefaultDict[string, set] mapping dates to absolute filepaths on
     the server (mostly we have one file, but sometimes more).
@@ -176,14 +196,14 @@ def generate_date_file_map(host='ftp.ncbi.nlm.nih.gov'):
     Example: {"2020-01-02": set(["/pubmed/updatefiles/pubmed20n1016.xml.gz"]), ...}
     """
     mapping = collections.defaultdict(set)
-    pattern = re.compile(r'Filename: ([^ ]*.xml) -- Created: ([^<]*)')
+    pattern = re.compile(r"Filename: ([^ ]*.xml) -- Created: ([^<]*)")
     ftp = ftplib.FTP(host)
     ftp.login()
-    filenames = ftp.nlst('/pubmed/updatefiles')
+    filenames = ftp.nlst("/pubmed/updatefiles")
     retries, retry_delay = 10, 60
 
     for name in filenames:
-        if not name.endswith('.html'):
+        if not name.endswith(".html"):
             continue
         sio = io.StringIO()
         for i in range(retries):
@@ -201,10 +221,14 @@ def generate_date_file_map(host='ftp.ncbi.nlm.nih.gov'):
                     ftp = ftplib.FTP(host)
                     ftp.login()
                     sio.truncate(0)
-                ftp.retrlines('RETR {}'.format(name), sio.write)
+                ftp.retrlines("RETR {}".format(name), sio.write)
             except (EOFError, ftplib.error_temp, socket.gaierror, BrokenPipeError) as exc:
-                print("ftp retr on {} failed with {} ({}) ({} retries left)".format(
-                    name, exc, type(exc), retries - (i + 1)), file=sys.stderr)
+                print(
+                    "ftp retr on {} failed with {} ({}) ({} retries left)".format(
+                        name, exc, type(exc), retries - (i + 1)
+                    ),
+                    file=sys.stderr,
+                )
                 if i + 1 == retries:
                     raise
                 else:
@@ -214,16 +238,24 @@ def generate_date_file_map(host='ftp.ncbi.nlm.nih.gov'):
         contents = sio.getvalue()
         match = pattern.search(contents)
         if match is None:
-            print('pattern miss in {} on: {}, may need to adjust pattern: {}'.format(name, contents, pattern), file=sys.stderr)
+            print(
+                "pattern miss in {} on: {}, may need to adjust pattern: {}".format(
+                    name, contents, pattern
+                ),
+                file=sys.stderr,
+            )
             continue
-        filename, filedate = match.groups()  # ('pubmed20n1017.xml', 'Tue Dec 17 15:23:32 EST 2019')
+        (
+            filename,
+            filedate,
+        ) = match.groups()  # ('pubmed20n1017.xml', 'Tue Dec 17 15:23:32 EST 2019')
         date = dateparser.parse(filedate)
-        fullpath = '/pubmed/updatefiles/{}.gz'.format(filename)
-        date_str = date.strftime('%Y-%m-%d')
+        fullpath = "/pubmed/updatefiles/{}.gz".format(filename)
+        date_str = date.strftime("%Y-%m-%d")
         mapping[date_str].add(fullpath)
-        print('added entry for {}: {}'.format(date_str, fullpath), file=sys.stderr)
+        print("added entry for {}: {}".format(date_str, fullpath), file=sys.stderr)
 
-    print('generated date-file mapping for {} dates'.format(len(mapping)), file=sys.stderr)
+    print("generated date-file mapping for {} dates".format(len(mapping)), file=sys.stderr)
     return mapping
 
 
@@ -241,20 +273,29 @@ def ftpretr(url, max_retries=10, retry_delay=1, proxy_hostport=None):
     when we encountered EOFError while talking to the FTP server. Retry delay in seconds.
     """
     if proxy_hostport is not None:
-        return ftpretr_via_http_proxy(url, proxy_hostport, max_retries=max_retries, retry_delay=retry_delay)
+        return ftpretr_via_http_proxy(
+            url, proxy_hostport, max_retries=max_retries, retry_delay=retry_delay
+        )
     parsed = urlparse(url)
     server, path = parsed.netloc, parsed.path
     for i in range(max_retries):
         try:
             ftp = ftplib.FTP(server)
             ftp.login()
-            with tempfile.NamedTemporaryFile(prefix='fatcat-ftp-tmp-', delete=False) as f:
-                print('retrieving {} from {} to {} ...'.format(path, server, f.name), file=sys.stderr)
-                ftp.retrbinary('RETR %s' % path, f.write)
+            with tempfile.NamedTemporaryFile(prefix="fatcat-ftp-tmp-", delete=False) as f:
+                print(
+                    "retrieving {} from {} to {} ...".format(path, server, f.name),
+                    file=sys.stderr,
+                )
+                ftp.retrbinary("RETR %s" % path, f.write)
             ftp.close()
         except EOFError as exc:
-            print("ftp retrbinary on {} failed with {} ({}) ({} retries left)".format(
-                path, exc, type(exc), max_retries - (i + 1)), file=sys.stderr)
+            print(
+                "ftp retrbinary on {} failed with {} ({}) ({} retries left)".format(
+                    path, exc, type(exc), max_retries - (i + 1)
+                ),
+                file=sys.stderr,
+            )
             if i + 1 == max_retries:
                 raise
             else:
@@ -263,7 +304,9 @@ def ftpretr(url, max_retries=10, retry_delay=1, proxy_hostport=None):
             return f.name
 
 
-def ftpretr_via_http_proxy(url, proxy_hostport="ftp.ncbi.nlm.nih.gov", max_retries=10, retry_delay=1):
+def ftpretr_via_http_proxy(
+    url, proxy_hostport="ftp.ncbi.nlm.nih.gov", max_retries=10, retry_delay=1
+):
     """
     Fetch file from FTP via external HTTP proxy, e.g. ftp.host.com:/a/b/c would
     be retrievable via proxy.com/a/b/c; (in 09/2021 we used
@@ -276,19 +319,23 @@ def ftpretr_via_http_proxy(url, proxy_hostport="ftp.ncbi.nlm.nih.gov", max_retri
         try:
             url = "http://{}{}".format(proxy_hostport, path)
             print("retrieving file via proxy (ftpup) from {}".format(url), file=sys.stderr)
-            with tempfile.NamedTemporaryFile(prefix='fatcat-ftp-tmp-', delete=False) as f:
+            with tempfile.NamedTemporaryFile(prefix="fatcat-ftp-tmp-", delete=False) as f:
                 cmd = ["wget", "-c", url, "-O", f.name]
                 result = subprocess.run(cmd)
                 return f.name
         except (subprocess.CalledProcessError, OSError, ValueError) as exc:
-            print("ftp fetch {} failed with {} ({}) ({} retries left)".format(
-                url, exc, type(exc), max_retries - (i + 1)), file=sys.stderr)
+            print(
+                "ftp fetch {} failed with {} ({}) ({} retries left)".format(
+                    url, exc, type(exc), max_retries - (i + 1)
+                ),
+                file=sys.stderr,
+            )
             if i + 1 == max_retries:
                 raise
             time.sleep(retry_delay)
 
 
-def xmlstream(filename, tag, encoding='utf-8'):
+def xmlstream(filename, tag, encoding="utf-8"):
     """
     Note: This might move into a generic place in the future.
 
@@ -300,23 +347,29 @@ def xmlstream(filename, tag, encoding='utf-8'):
 
     Known vulnerabilities: https://docs.python.org/3/library/xml.html#xml-vulnerabilities
     """
+
     def strip_ns(tag):
-        if '}' not in tag:
+        if "}" not in tag:
             return tag
-        return tag.split('}')[1]
+        return tag.split("}")[1]
 
     # https://stackoverflow.com/a/13261805, http://effbot.org/elementtree/iterparse.htm
-    context = iter(ET.iterparse(filename, events=(
-        'start',
-        'end',
-    )))
+    context = iter(
+        ET.iterparse(
+            filename,
+            events=(
+                "start",
+                "end",
+            ),
+        )
+    )
     try:
         _, root = next(context)
     except StopIteration:
         return
 
     for event, elem in context:
-        if not strip_ns(elem.tag) == tag or event == 'start':
+        if not strip_ns(elem.tag) == tag or event == "start":
             continue
 
         yield ET.tostring(elem, encoding=encoding)
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py
index 2b0ff7ec..ae4f9049 100644
--- a/python/fatcat_tools/importers/arabesque.py
+++ b/python/fatcat_tools/importers/arabesque.py
@@ -1,9 +1,9 @@
-
 import fatcat_openapi_client
 
 from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, b32_hex, make_rel_url
 
-ARABESQUE_MATCH_WHERE_CLAUSE='WHERE hit = 1 AND identifier IS NOT NULL'
+ARABESQUE_MATCH_WHERE_CLAUSE = "WHERE hit = 1 AND identifier IS NOT NULL"
+
 
 class ArabesqueMatchImporter(EntityImporter):
     """
@@ -38,17 +38,17 @@ class ArabesqueMatchImporter(EntityImporter):
 
     def __init__(self, api, extid_type, require_grobid=True, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description', None) or "Match web crawl files to releases based on identifier/URL seedlist"
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.ArabesqueMatchImporter')
-        if kwargs.get('crawl_id'):
-            eg_extra['crawl_id'] = kwargs.get('crawl_id')
-        kwargs['do_updates'] = kwargs.get("do_updates", False)
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
-        assert extid_type in ('doi', 'pmcid', 'pmid')
+        eg_desc = (
+            kwargs.get("editgroup_description", None)
+            or "Match web crawl files to releases based on identifier/URL seedlist"
+        )
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.ArabesqueMatchImporter")
+        if kwargs.get("crawl_id"):
+            eg_extra["crawl_id"] = kwargs.get("crawl_id")
+        kwargs["do_updates"] = kwargs.get("do_updates", False)
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
+        assert extid_type in ("doi", "pmcid", "pmid")
         self.extid_type = extid_type
         self.default_link_rel = kwargs.get("default_link_rel", "web")
         assert self.default_link_rel
@@ -60,33 +60,35 @@ class ArabesqueMatchImporter(EntityImporter):
             print("NOT checking GROBID status column")
 
     def want(self, row):
-        if self.require_grobid and not row['postproc_status'] == "200":
+        if self.require_grobid and not row["postproc_status"] == "200":
             return False
-        if (bool(row['hit']) is True
-                and row['final_sha1']
-                and row['final_timestamp']
-                and row['final_timestamp'] != "-"
-                and len(row['final_timestamp']) == 14
-                and row['final_mimetype']
-                and bool(row['hit']) is True
-                and row['identifier']):
+        if (
+            bool(row["hit"]) is True
+            and row["final_sha1"]
+            and row["final_timestamp"]
+            and row["final_timestamp"] != "-"
+            and len(row["final_timestamp"]) == 14
+            and row["final_mimetype"]
+            and bool(row["hit"]) is True
+            and row["identifier"]
+        ):
             return True
         else:
             return False
 
     def parse_record(self, row):
 
-        extid = row['identifier'].strip()
+        extid = row["identifier"].strip()
 
         # check/cleanup DOI
-        if self.extid_type == 'doi':
+        if self.extid_type == "doi":
             extid = extid.lower()
-            extid.replace('http://doi.org/', '')
-            extid.replace('https://doi.org/', '')
-            if extid.startswith('doi:'):
+            extid.replace("http://doi.org/", "")
+            extid.replace("https://doi.org/", "")
+            if extid.startswith("doi:"):
                 extid = extid[4:]
-            if not extid.startswith('10.'):
-                self.counts['skip-extid-invalid']
+            if not extid.startswith("10."):
+                self.counts["skip-extid-invalid"]
                 return None
 
         # lookup extid
@@ -95,35 +97,35 @@ class ArabesqueMatchImporter(EntityImporter):
         except fatcat_openapi_client.rest.ApiException as err:
             if err.status == 404:
                 # bail on 404 (release not in DB)
-                self.counts['skip-extid-not-found'] += 1
+                self.counts["skip-extid-not-found"] += 1
                 return None
             elif err.status == 400:
-                self.counts['skip-extid-invalid'] += 1
+                self.counts["skip-extid-invalid"] += 1
                 return None
             else:
                 raise err
 
-        url = make_rel_url(row['final_url'], self.default_link_rel)
+        url = make_rel_url(row["final_url"], self.default_link_rel)
         if not url:
-            self.counts['skip-url'] += 1
+            self.counts["skip-url"] += 1
             return None
-        if not row['final_timestamp']:
-            self.counts['skip-missing-timestamp'] += 1
+        if not row["final_timestamp"]:
+            self.counts["skip-missing-timestamp"] += 1
             return None
         wayback = "https://web.archive.org/web/{}/{}".format(
-            row['final_timestamp'],
-            row['final_url'])
+            row["final_timestamp"], row["final_url"]
+        )
         urls = [url, ("webarchive", wayback)]
 
         urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
 
         if len(urls) > SANE_MAX_URLS:
-            self.counts['skip-too-many-url'] += 1
+            self.counts["skip-too-many-url"] += 1
             return None
 
         fe = fatcat_openapi_client.FileEntity(
-            sha1=b32_hex(row['final_sha1']),
-            mimetype=row['final_mimetype'] or self.default_mimetype,
+            sha1=b32_hex(row["final_sha1"]),
+            mimetype=row["final_mimetype"] or self.default_mimetype,
             release_ids=[re.ident],
             urls=urls,
         )
@@ -143,15 +145,15 @@ class ArabesqueMatchImporter(EntityImporter):
 
         if (fe.release_ids[0] in existing.release_ids) and existing.urls:
             # TODO: could still, in theory update with the new URL?
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         if not self.do_updates:
-            self.counts['skip-update-disabled'] += 1
+            self.counts["skip-update-disabled"] += 1
             return False
 
         if existing.ident in [e.ident for e in self._edits_inflight]:
-            self.counts['skip-update-inflight'] += 1
+            self.counts["skip-update-inflight"] += 1
             return False
 
         # TODO: this code path never gets hit because of the check above
@@ -159,28 +161,33 @@ class ArabesqueMatchImporter(EntityImporter):
             existing_urls = set([u.url for u in existing.urls])
             new_urls = set([u.url for u in fe.urls])
             if existing_urls.issuperset(new_urls):
-                self.counts['skip-update-nothing-new'] += 1
+                self.counts["skip-update-nothing-new"] += 1
                 return False
 
         # merge the existing into this one and update
         existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls]))
-        existing.urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls]
+        existing.urls = [
+            fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls
+        ]
         if len(existing.urls) > SANE_MAX_URLS:
-            self.counts['skip-update-too-many-url'] += 1
+            self.counts["skip-update-too-many-url"] += 1
             return None
         existing.release_ids = list(set(fe.release_ids + existing.release_ids))
         if len(existing.release_ids) > SANE_MAX_RELEASES:
-            self.counts['skip-update-too-many-url'] += 1
+            self.counts["skip-update-too-many-url"] += 1
             return None
         existing.mimetype = existing.mimetype or fe.mimetype
         edit = self.api.update_file(self.get_editgroup_id(), existing.ident, existing)
         self._edits_inflight.append(edit)
-        self.counts['update'] += 1
+        self.counts["update"] += 1
         return False
 
     def insert_batch(self, batch):
-        self.api.create_file_auto_batch(fatcat_openapi_client.FileAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_file_auto_batch(
+            fatcat_openapi_client.FileAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index fc429fb0..7a689ed2 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -1,4 +1,3 @@
-
 import datetime
 import json
 import re
@@ -13,6 +12,7 @@ from .crossref import lookup_license_slug
 
 latex2text = LatexNodes2Text()
 
+
 def latex_to_text(raw):
     try:
         return latex2text.latex_to_text(raw).strip()
@@ -21,13 +21,14 @@ def latex_to_text(raw):
     except IndexError:
         return raw.strip()
 
+
 def parse_arxiv_authors(raw):
     if not raw:
         return []
-    raw = raw.replace('*', '')
-    if '(' in raw:
-        raw = re.sub(r'\(.*\)', '', raw)
-    authors = raw.split(', ')
+    raw = raw.replace("*", "")
+    if "(" in raw:
+        raw = re.sub(r"\(.*\)", "", raw)
+    authors = raw.split(", ")
     if authors:
         last = authors[-1].split(" and ")
         if len(last) == 2:
@@ -39,9 +40,12 @@ def parse_arxiv_authors(raw):
     authors = [a for a in authors if a]
     return authors
 
+
 def test_parse_arxiv_authors():
 
-    assert parse_arxiv_authors("Raphael Chetrite, Shamik Gupta, Izaak Neri and \\'Edgar Rold\\'an") == [
+    assert parse_arxiv_authors(
+        "Raphael Chetrite, Shamik Gupta, Izaak Neri and \\'Edgar Rold\\'an"
+    ) == [
         "Raphael Chetrite",
         "Shamik Gupta",
         "Izaak Neri",
@@ -63,7 +67,9 @@ def test_parse_arxiv_authors():
         "Raphael Chetrite Shamik Gupta",
     ]
 
-    assert parse_arxiv_authors("B. P. Lanyon, T. J. Weinhold, N. K. Langford, M. Barbieri, D. F. V.  James*, A. Gilchrist, and A. G. White (University of Queensland, *University of Toronto)") == [
+    assert parse_arxiv_authors(
+        "B. P. Lanyon, T. J. Weinhold, N. K. Langford, M. Barbieri, D. F. V.  James*, A. Gilchrist, and A. G. White (University of Queensland, *University of Toronto)"
+    ) == [
         "B. P. Lanyon",
         "T. J. Weinhold",
         "N. K. Langford",
@@ -84,17 +90,21 @@ class ArxivRawImporter(EntityImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of arxiv metadata via arXivRaw OAI-PMH feed")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.ArxivRawImporter')
+        eg_desc = kwargs.get(
+            "editgroup_description",
+            "Automated import of arxiv metadata via arXivRaw OAI-PMH feed",
+        )
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.ArxivRawImporter")
         # lower batch size, because multiple versions per entry (guessing 2-3 on average?)
-        batch_size = kwargs.get('edit_batch_size', 50)
-        super().__init__(api,
+        batch_size = kwargs.get("edit_batch_size", 50)
+        super().__init__(
+            api,
             editgroup_description=eg_desc,
             editgroup_extra=eg_extra,
             batch_size=batch_size,
-            **kwargs)
+            **kwargs
+        )
         self._test_override = False
 
     def parse_record(self, record):
@@ -114,53 +124,56 @@ class ArxivRawImporter(EntityImporter):
         doi = None
         if metadata.doi and metadata.doi.string:
             doi = metadata.doi.string.lower().split()[0].strip()
-            if not (doi.startswith('10.') and '/' in doi and doi.split('/')[1]):
+            if not (doi.startswith("10.") and "/" in doi and doi.split("/")[1]):
                 sys.stderr.write("BOGUS DOI: {}\n".format(doi))
                 doi = None
-        title = latex_to_text(metadata.title.get_text().replace('\n', ' '))
-        authors = parse_arxiv_authors(metadata.authors.get_text().replace('\n', ' '))
-        contribs = [fatcat_openapi_client.ReleaseContrib(index=i, raw_name=a, role='author') for i, a in enumerate(authors)]
-
-        lang = "en"     # the vast majority in english
+        title = latex_to_text(metadata.title.get_text().replace("\n", " "))
+        authors = parse_arxiv_authors(metadata.authors.get_text().replace("\n", " "))
+        contribs = [
+            fatcat_openapi_client.ReleaseContrib(index=i, raw_name=a, role="author")
+            for i, a in enumerate(authors)
+        ]
+
+        lang = "en"  # the vast majority in english
         if metadata.comments and metadata.comments.get_text():
-            comments = metadata.comments.get_text().replace('\n', ' ').strip()
-            extra_arxiv['comments'] = comments
-            if 'in french' in comments.lower():
-                lang = 'fr'
-            elif 'in spanish' in comments.lower():
-                lang = 'es'
-            elif 'in portuguese' in comments.lower():
-                lang = 'pt'
-            elif 'in hindi' in comments.lower():
-                lang = 'hi'
-            elif 'in japanese' in comments.lower():
-                lang = 'ja'
-            elif 'in german' in comments.lower():
-                lang = 'de'
-            elif 'simplified chinese' in comments.lower():
-                lang = 'zh'
-            elif 'in russian' in comments.lower():
-                lang = 'ru'
+            comments = metadata.comments.get_text().replace("\n", " ").strip()
+            extra_arxiv["comments"] = comments
+            if "in french" in comments.lower():
+                lang = "fr"
+            elif "in spanish" in comments.lower():
+                lang = "es"
+            elif "in portuguese" in comments.lower():
+                lang = "pt"
+            elif "in hindi" in comments.lower():
+                lang = "hi"
+            elif "in japanese" in comments.lower():
+                lang = "ja"
+            elif "in german" in comments.lower():
+                lang = "de"
+            elif "simplified chinese" in comments.lower():
+                lang = "zh"
+            elif "in russian" in comments.lower():
+                lang = "ru"
             # more languages?
 
         number = None
-        if metadata.find('journal-ref') and metadata.find('journal-ref').get_text():
-            journal_ref = metadata.find('journal-ref').get_text().replace('\n', ' ').strip()
-            extra_arxiv['journal_ref'] = journal_ref
+        if metadata.find("journal-ref") and metadata.find("journal-ref").get_text():
+            journal_ref = metadata.find("journal-ref").get_text().replace("\n", " ").strip()
+            extra_arxiv["journal_ref"] = journal_ref
             if "conf." in journal_ref.lower() or "proc." in journal_ref.lower():
                 release_type = "paper-conference"
-        if metadata.find('report-no') and metadata.find('report-no').string:
-            number = metadata.find('report-no').string.strip()
+        if metadata.find("report-no") and metadata.find("report-no").string:
+            number = metadata.find("report-no").string.strip()
             # at least some people plop extra metadata in here. hrmf!
-            if 'ISSN ' in number or 'ISBN ' in number or len(number.split()) > 2:
-                extra_arxiv['report-no'] = number
+            if "ISSN " in number or "ISBN " in number or len(number.split()) > 2:
+                extra_arxiv["report-no"] = number
                 number = None
             else:
                 release_type = "report"
-        if metadata.find('acm-class') and metadata.find('acm-class').string:
-            extra_arxiv['acm_class'] = metadata.find('acm-class').string.strip()
+        if metadata.find("acm-class") and metadata.find("acm-class").string:
+            extra_arxiv["acm_class"] = metadata.find("acm-class").string.strip()
         if metadata.categories and metadata.categories.get_text():
-            extra_arxiv['categories'] = metadata.categories.get_text().split()
+            extra_arxiv["categories"] = metadata.categories.get_text().split()
         license_slug = None
         if metadata.license and metadata.license.get_text():
             license_slug = lookup_license_slug(metadata.license.get_text())
@@ -170,21 +183,29 @@ class ArxivRawImporter(EntityImporter):
             abstracts = []
             abst = metadata.abstract.get_text().strip()
             orig = None
-            if '-----' in abst:
-                both = abst.split('-----')
+            if "-----" in abst:
+                both = abst.split("-----")
                 abst = both[0].strip()
                 orig = both[1].strip()
-            if '$' in abst or '{' in abst:
+            if "$" in abst or "{" in abst:
                 mime = "application/x-latex"
                 abst_plain = latex_to_text(abst)
-                abstracts.append(fatcat_openapi_client.ReleaseAbstract(content=abst_plain, mimetype="text/plain", lang="en"))
+                abstracts.append(
+                    fatcat_openapi_client.ReleaseAbstract(
+                        content=abst_plain, mimetype="text/plain", lang="en"
+                    )
+                )
             else:
                 mime = "text/plain"
-            abstracts.append(fatcat_openapi_client.ReleaseAbstract(content=abst, mimetype=mime, lang="en"))
+            abstracts.append(
+                fatcat_openapi_client.ReleaseAbstract(content=abst, mimetype=mime, lang="en")
+            )
             if orig:
-                abstracts.append(fatcat_openapi_client.ReleaseAbstract(content=orig, mimetype=mime))
+                abstracts.append(
+                    fatcat_openapi_client.ReleaseAbstract(content=orig, mimetype=mime)
+                )
                 # indicates that fulltext probably isn't english either
-                if lang == 'en':
+                if lang == "en":
                     lang = None
 
         # extra:
@@ -195,39 +216,43 @@ class ArxivRawImporter(EntityImporter):
         #   container_name
         #   group-title
         #   arxiv: comments, categories, etc
-        extra_arxiv['base_id'] = base_id
-        extra['superceded'] = True
-        extra['arxiv'] = extra_arxiv
+        extra_arxiv["base_id"] = base_id
+        extra["superceded"] = True
+        extra["arxiv"] = extra_arxiv
 
         versions = []
-        for version in metadata.find_all('version'):
-            arxiv_id = base_id + version['version']
+        for version in metadata.find_all("version"):
+            arxiv_id = base_id + version["version"]
             release_date = version.date.string.strip()
-            release_date = datetime.datetime.strptime(release_date, "%a, %d %b %Y %H:%M:%S %Z").date()
+            release_date = datetime.datetime.strptime(
+                release_date, "%a, %d %b %Y %H:%M:%S %Z"
+            ).date()
             # TODO: source_type?
-            versions.append(fatcat_openapi_client.ReleaseEntity(
-                work_id=None,
-                title=title,
-                #original_title
-                version=version['version'],
-                release_type=release_type,
-                release_stage='submitted',
-                release_date=release_date.isoformat(),
-                release_year=release_date.year,
-                ext_ids=fatcat_openapi_client.ReleaseExtIds(
-                    arxiv=arxiv_id,
-                ),
-                number=number,
-                language=lang,
-                license_slug=license_slug,
-                abstracts=abstracts,
-                contribs=contribs,
-                extra=extra.copy(),
-            ))
+            versions.append(
+                fatcat_openapi_client.ReleaseEntity(
+                    work_id=None,
+                    title=title,
+                    # original_title
+                    version=version["version"],
+                    release_type=release_type,
+                    release_stage="submitted",
+                    release_date=release_date.isoformat(),
+                    release_year=release_date.year,
+                    ext_ids=fatcat_openapi_client.ReleaseExtIds(
+                        arxiv=arxiv_id,
+                    ),
+                    number=number,
+                    language=lang,
+                    license_slug=license_slug,
+                    abstracts=abstracts,
+                    contribs=contribs,
+                    extra=extra.copy(),
+                )
+            )
         # TODO: assert that versions are actually in order?
         assert versions
 
-        versions[-1].extra.pop('superceded')
+        versions[-1].extra.pop("superceded")
 
         # only apply DOI to most recent version (HACK)
         if doi:
@@ -306,7 +331,7 @@ class ArxivRawImporter(EntityImporter):
         for v in versions:
             if v._existing_work_id:
                 if not v._updated:
-                    self.counts['exists'] += 1
+                    self.counts["exists"] += 1
                 continue
             if not any_work_id and last_edit:
                 # fetch the last inserted release from this group
@@ -315,7 +340,7 @@ class ArxivRawImporter(EntityImporter):
                 any_work_id = r.work_id
             v.work_id = any_work_id
             last_edit = self.api.create_release(self.get_editgroup_id(), v)
-            self.counts['insert'] += 1
+            self.counts["insert"] += 1
 
         return False
 
@@ -323,12 +348,15 @@ class ArxivRawImporter(EntityImporter):
         # there is no batch/bezerk mode for arxiv importer, except for testing
         if self._test_override:
             for batch in batch_batch:
-                self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
-                    editgroup=fatcat_openapi_client.Editgroup(
-                        description=self.editgroup_description,
-                        extra=self.editgroup_extra),
-                    entity_list=batch))
-                self.counts['insert'] += len(batch) - 1
+                self.api.create_release_auto_batch(
+                    fatcat_openapi_client.ReleaseAutoBatch(
+                        editgroup=fatcat_openapi_client.Editgroup(
+                            description=self.editgroup_description, extra=self.editgroup_extra
+                        ),
+                        entity_list=batch,
+                    )
+                )
+                self.counts["insert"] += len(batch) - 1
         else:
             raise NotImplementedError()
 
@@ -341,9 +369,9 @@ class ArxivRawImporter(EntityImporter):
         for article in soup.find_all("record"):
             resp = self.parse_record(article)
             print(json.dumps(resp))
-            #sys.exit(-1)
+            # sys.exit(-1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = ArxivRawImporter(None)
     parser.parse_file(open(sys.argv[1]))
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py
index 0340f6a3..e9de42fc 100755
--- a/python/fatcat_tools/importers/cdl_dash_dat.py
+++ b/python/fatcat_tools/importers/cdl_dash_dat.py
@@ -34,15 +34,15 @@ def single_file(prefix, path):
         hashlib.sha1(),
         hashlib.sha256(),
     ]
-    with open(full, 'rb') as fp:
+    with open(full, "rb") as fp:
         while True:
-            data = fp.read(2**20)
+            data = fp.read(2 ** 20)
             if not data:
                 break
             for h in hashes:
                 h.update(data)
     mime = magic.Magic(mime=True).from_file(full)
-    if mime == 'application/octet-stream':
+    if mime == "application/octet-stream":
         # magic apparently isn't that great; try using filename as well
         guess = mimetypes.guess_type(full)[0]
         if guess:
@@ -54,9 +54,11 @@ def single_file(prefix, path):
         md5=hashes[0].hexdigest(),
         sha1=hashes[1].hexdigest(),
         sha256=hashes[2].hexdigest(),
-        extra=dict(mimetype=mime))
+        extra=dict(mimetype=mime),
+    )
     return fsf
 
+
 def make_manifest(base_dir):
     manifest = []
     for root, dirs, files in os.walk(base_dir):
@@ -70,47 +72,49 @@ def cdl_dash_release(meta, extra=None):
     if not extra:
         extra = dict()
 
-    assert meta['identifier']['type'] == 'DOI'
-    doi = meta['identifier']['value'].lower()
-    assert doi.startswith('10.')
+    assert meta["identifier"]["type"] == "DOI"
+    doi = meta["identifier"]["value"].lower()
+    assert doi.startswith("10.")
 
     ark_id = None
-    for extid in meta.get('alternativeIdentifiers', []):
-        if extid['value'].startswith('ark:'):
-            ark_id = extid['value']
+    for extid in meta.get("alternativeIdentifiers", []):
+        if extid["value"].startswith("ark:"):
+            ark_id = extid["value"]
     assert ark_id
 
-    license_slug = lookup_license_slug(meta['rights']['uri'])
+    license_slug = lookup_license_slug(meta["rights"]["uri"])
 
     abstracts = []
-    for desc in meta['descriptions']:
-        if desc['type'] == "abstract":
-            abstracts.append(ReleaseAbstract(
-                mimetype="text/html",
-                content=clean(desc['value'])))
-            #print(abstracts)
+    for desc in meta["descriptions"]:
+        if desc["type"] == "abstract":
+            abstracts.append(
+                ReleaseAbstract(mimetype="text/html", content=clean(desc["value"]))
+            )
+            # print(abstracts)
     if not abstracts:
         abstracts = None
 
     contribs = []
-    for creator in meta['creator']:
-        contribs.append(ReleaseContrib(
-            given_name=creator['given'],
-            surname=creator['family'],
-            # sorry everybody
-            raw_name="{} {}".format(creator['given'], creator['family']),
-            raw_affiliation=creator.get('affiliation'),
-            role="author", # presumably, for these datasets?
-        ))
+    for creator in meta["creator"]:
+        contribs.append(
+            ReleaseContrib(
+                given_name=creator["given"],
+                surname=creator["family"],
+                # sorry everybody
+                raw_name="{} {}".format(creator["given"], creator["family"]),
+                raw_affiliation=creator.get("affiliation"),
+                role="author",  # presumably, for these datasets?
+            )
+        )
 
     r = ReleaseEntity(
         ext_ids=ReleaseExtIds(
             doi=doi,
             ark=ark_id,
         ),
-        title=clean(meta['title'], force_xml=True),
-        publisher=clean(meta['publisher']),
-        release_year=int(meta['publicationYear']),
+        title=clean(meta["title"], force_xml=True),
+        publisher=clean(meta["publisher"]),
+        release_year=int(meta["publicationYear"]),
         release_type="dataset",
         license_slug=license_slug,
         contribs=contribs,
@@ -119,66 +123,66 @@ def cdl_dash_release(meta, extra=None):
     )
     return r
 
+
 def make_release_fileset(dat_path):
 
-    if dat_path.endswith('/'):
+    if dat_path.endswith("/"):
         dat_path = dat_path[:-1]
     dat_discovery = dat_path
     extra = dict()
     assert len(dat_discovery) == 64
 
-    with open(dat_path + "/cdl_dash_metadata.json", 'r') as fp:
+    with open(dat_path + "/cdl_dash_metadata.json", "r") as fp:
         meta_dict = json.loads(fp.read())
 
     release = cdl_dash_release(meta_dict)
-    ark_id = release.extra['ark_id']
+    ark_id = release.extra["ark_id"]
 
     dash_version = None
     # really crude XML parse-out
-    with open(dat_path + "/stash-wrapper.xml", 'r') as fp:
+    with open(dat_path + "/stash-wrapper.xml", "r") as fp:
         for line in fp:
             line = line.strip()
             if line.startswith("<st:version_number>"):
-                dash_version = int(line[19:].split('<')[0])
+                dash_version = int(line[19:].split("<")[0])
     assert dash_version is not None
-    extra['cdl_dash'] = dict(version=dash_version)
-    release.extra['cdl_dash'] = dict(version=dash_version)
+    extra["cdl_dash"] = dict(version=dash_version)
+    release.extra["cdl_dash"] = dict(version=dash_version)
 
     manifest = make_manifest(dat_path + "/files/")
 
     bundle_url = dict(
         url="https://merritt.cdlib.org/u/{}/{}".format(
-            urllib.parse.quote(ark_id, safe=''),
-            dash_version),
-        rel="repo-bundle")
+            urllib.parse.quote(ark_id, safe=""), dash_version
+        ),
+        rel="repo-bundle",
+    )
     repo_url = dict(
         url="https://merritt.cdlib.org/d/{}/{}/".format(
-            urllib.parse.quote(ark_id, safe=''),
-            dash_version),
-        rel="repo")
-    dat_url = dict(
-        url="dat://{}/files/".format(dat_discovery),
-        rel="dweb")
+            urllib.parse.quote(ark_id, safe=""), dash_version
+        ),
+        rel="repo",
+    )
+    dat_url = dict(url="dat://{}/files/".format(dat_discovery), rel="dweb")
     fs = FilesetEntity(
-        urls=[bundle_url, repo_url, dat_url],
-        release_ids=None,
-        manifest=manifest,
-        extra=extra)
+        urls=[bundle_url, repo_url, dat_url], release_ids=None, manifest=manifest, extra=extra
+    )
     return (release, fs)
 
+
 def auto_cdl_dash_dat(api, dat_path, release_id=None, editgroup_id=None):
 
-    git_rev = subprocess.check_output(
-        ["git", "describe", "--always"]).strip().decode('utf-8')
+    git_rev = subprocess.check_output(["git", "describe", "--always"]).strip().decode("utf-8")
 
     (release, fileset) = make_release_fileset(dat_path)
 
     if not editgroup_id:
-        eg = api.create_editgroup(Editgroup(
-            description="One-off import of dataset(s) from CDL/DASH repository (via IA, Dat dweb pilot project)",
-            extra=dict(
-                git_rev=git_rev,
-                agent="fatcat_tools.auto_cdl_dash_dat")))
+        eg = api.create_editgroup(
+            Editgroup(
+                description="One-off import of dataset(s) from CDL/DASH repository (via IA, Dat dweb pilot project)",
+                extra=dict(git_rev=git_rev, agent="fatcat_tools.auto_cdl_dash_dat"),
+            )
+        )
         editgroup_id = eg.editgroup_id
 
     if not release_id and release.ext_ids.doi:
@@ -201,6 +205,7 @@ def auto_cdl_dash_dat(api, dat_path, release_id=None, editgroup_id=None):
     fileset = api.get_fileset(edit.ident)
     return (editgroup_id, release, fileset)
 
-if __name__=='__main__':
+
+if __name__ == "__main__":
     # pass this a discovery key that has been cloned to the local directory
     print(make_release_fileset(sys.argv[1]))
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py
index 0b634e73..8d2a89b6 100644
--- a/python/fatcat_tools/importers/chocula.py
+++ b/python/fatcat_tools/importers/chocula.py
@@ -1,4 +1,3 @@
-
 import fatcat_openapi_client
 
 from .common import EntityImporter, clean
@@ -15,20 +14,19 @@ class ChoculaImporter(EntityImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of container-level metadata from Chocula tool.")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.ChoculaImporter')
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = kwargs.get(
+            "editgroup_description",
+            "Automated import of container-level metadata from Chocula tool.",
+        )
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.ChoculaImporter")
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
     def want(self, raw_record):
-        if not raw_record.get('ident') and not raw_record.get('_known_issnl'):
-            self.counts['skip-unknown-new-issnl'] += 1
+        if not raw_record.get("ident") and not raw_record.get("_known_issnl"):
+            self.counts["skip-unknown-new-issnl"] += 1
             return False
-        if raw_record.get('issnl') and raw_record.get('name'):
+        if raw_record.get("issnl") and raw_record.get("name"):
             return True
         return False
 
@@ -39,42 +37,55 @@ class ChoculaImporter(EntityImporter):
         returns a ContainerEntity (or None if invalid or couldn't parse)
         """
 
-        name = clean(row.get('name'))
+        name = clean(row.get("name"))
         if not name:
             # Name is required (by schema)
             return None
 
         name = name.strip()
 
-        if name.endswith(',  Proceedings of the'):
-            name = "Proceedings of the " + name.split(',')[0]
+        if name.endswith(",  Proceedings of the"):
+            name = "Proceedings of the " + name.split(",")[0]
 
-        if name.endswith('.'):
+        if name.endswith("."):
             name = name[:-1]
 
         extra = dict()
-        for k in ('urls', 'webarchive_urls', 'country',
-                  'sherpa_romeo', 'ezb', 'szczepanski', 'doaj', 'languages',
-                  'ia', 'scielo', 'kbart', 'publisher_type', 'platform'):
-            if row['extra'].get(k):
-                extra[k] = row['extra'][k]
+        for k in (
+            "urls",
+            "webarchive_urls",
+            "country",
+            "sherpa_romeo",
+            "ezb",
+            "szczepanski",
+            "doaj",
+            "languages",
+            "ia",
+            "scielo",
+            "kbart",
+            "publisher_type",
+            "platform",
+        ):
+            if row["extra"].get(k):
+                extra[k] = row["extra"][k]
 
         container_type = None
-        if 'proceedings' in name.lower():
-            container_type = 'proceedings'
-        elif 'journal ' in name.lower():
-            container_type = 'journal'
+        if "proceedings" in name.lower():
+            container_type = "proceedings"
+        elif "journal " in name.lower():
+            container_type = "journal"
 
         ce = fatcat_openapi_client.ContainerEntity(
-            issnl=row['issnl'],
-            issnp=row['extra'].get('issnp'),
-            issne=row['extra'].get('issne'),
-            ident=row['ident'],
+            issnl=row["issnl"],
+            issnp=row["extra"].get("issnp"),
+            issne=row["extra"].get("issne"),
+            ident=row["ident"],
             name=name,
             container_type=container_type,
-            publisher=clean(row.get('publisher')),
-            wikidata_qid=row.get('wikidata_qid'),
-            extra=extra)
+            publisher=clean(row.get("publisher")),
+            wikidata_qid=row.get("wikidata_qid"),
+            extra=extra,
+        )
         return ce
 
     def try_update(self, ce):
@@ -86,12 +97,12 @@ class ChoculaImporter(EntityImporter):
             except fatcat_openapi_client.rest.ApiException as err:
                 if err.status != 404:
                     raise err
-                self.counts['exists'] += 1
-                self.counts['exists-not-found'] += 1
+                self.counts["exists"] += 1
+                self.counts["exists-not-found"] += 1
                 return False
-            if existing.state != 'active':
-                self.counts['exists'] += 1
-                self.counts['exists-inactive'] += 1
+            if existing.state != "active":
+                self.counts["exists"] += 1
+                self.counts["exists-inactive"] += 1
                 return False
 
         if not existing:
@@ -102,8 +113,8 @@ class ChoculaImporter(EntityImporter):
                 if err.status != 404:
                     raise err
             if existing:
-                self.counts['exists'] += 1
-                self.counts['exists-by-issnl'] += 1
+                self.counts["exists"] += 1
+                self.counts["exists-by-issnl"] += 1
                 return False
             # doesn't exist, always create
             return True
@@ -111,18 +122,22 @@ class ChoculaImporter(EntityImporter):
         # decide whether to update
         do_update = False
         if not self.do_updates:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
         if not existing.extra:
             existing.extra = dict()
-        if ce.extra.get('urls') and set(ce.extra.get('urls', [])) != set(existing.extra.get('urls', [])):
+        if ce.extra.get("urls") and set(ce.extra.get("urls", [])) != set(
+            existing.extra.get("urls", [])
+        ):
             do_update = True
-        if ce.extra.get('webarchive_urls') and set(ce.extra.get('webarchive_urls', [])) != set(existing.extra.get('webarchive_urls', [])):
+        if ce.extra.get("webarchive_urls") and set(ce.extra.get("webarchive_urls", [])) != set(
+            existing.extra.get("webarchive_urls", [])
+        ):
             do_update = True
-        for k in ('ezb', 'szczepanski', 'publisher_type', 'platform'):
+        for k in ("ezb", "szczepanski", "publisher_type", "platform"):
             if ce.extra.get(k) and not existing.extra.get(k):
                 do_update = True
-        for k in ('kbart', 'ia', 'doaj'):
+        for k in ("kbart", "ia", "doaj"):
             # always update these fields if not equal (chocula override)
             if ce.extra.get(k) and ce.extra[k] != existing.extra.get(k):
                 do_update = True
@@ -137,41 +152,53 @@ class ChoculaImporter(EntityImporter):
             existing.container_type = existing.container_type or ce.container_type
             existing.issne = existing.issne or ce.issne
             existing.issnp = existing.issnp or ce.issnp
-            for k in ('urls', 'webarchive_urls'):
+            for k in ("urls", "webarchive_urls"):
                 # be conservative about URL updates; don't clobber existing URL lists
                 # may want to make this behavior more sophisticated in the
                 # future, or at least a config flag
                 if ce.extra.get(k) and not existing.extra.get(k):
                     existing.extra[k] = ce.extra.get(k, [])
-            for k in ('sherpa_romeo', 'ezb', 'szczepanski', 'doaj', 'ia',
-                      'scielo', 'kbart', 'publisher_type', 'platform'):
+            for k in (
+                "sherpa_romeo",
+                "ezb",
+                "szczepanski",
+                "doaj",
+                "ia",
+                "scielo",
+                "kbart",
+                "publisher_type",
+                "platform",
+            ):
                 # always update (chocula over-rides)
                 if ce.extra.get(k):
                     existing.extra[k] = ce.extra[k]
-            for k in ('country',):
+            for k in ("country",):
                 # only include if not set (don't clobber human edits)
                 if ce.extra.get(k) and not existing.extra.get(k):
                     existing.extra[k] = ce.extra[k]
-            if ce.extra.get('languages'):
-                if not existing.extra.get('languages'):
-                    existing.extra['languages'] = ce.extra['languages']
-                elif not ce.extra['languages'][0] in existing.extra['languages']:
-                    existing.extra['languages'].append(ce.extra['languages'][0])
+            if ce.extra.get("languages"):
+                if not existing.extra.get("languages"):
+                    existing.extra["languages"] = ce.extra["languages"]
+                elif not ce.extra["languages"][0] in existing.extra["languages"]:
+                    existing.extra["languages"].append(ce.extra["languages"][0])
 
             self.api.update_container(self.get_editgroup_id(), existing.ident, existing)
-            self.counts['update'] += 1
+            self.counts["update"] += 1
             return False
         else:
-            self.counts['exists'] += 1
-            self.counts['exists-skip-update'] += 1
+            self.counts["exists"] += 1
+            self.counts["exists-skip-update"] += 1
             return False
 
         # if we got this far, it's a bug
         raise NotImplementedError
 
     def insert_batch(self, batch):
-        self.api.create_container_auto_batch(fatcat_openapi_client.ContainerAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_container_auto_batch(
+            fatcat_openapi_client.ContainerAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index e33a2012..2639c85a 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -1,4 +1,3 @@
-
 import csv
 import datetime
 import json
@@ -34,7 +33,6 @@ SANE_MAX_URLS: int = 100
 DOMAIN_REL_MAP: Dict[str, str] = {
     "archive.org": "archive",
     # LOCKSS, Portico, DuraSpace, etc would also be "archive"
-
     "arxiv.org": "repository",
     "babel.hathitrust.org": "repository",
     "cds.cern.ch": "repository",
@@ -53,7 +51,6 @@ DOMAIN_REL_MAP: Dict[str, str] = {
     "zenodo.org": "repository",
     "www.biorxiv.org": "repository",
     "www.medrxiv.org": "repository",
-
     "citeseerx.ist.psu.edu": "aggregator",
     "publisher-connector.core.ac.uk": "aggregator",
     "core.ac.uk": "aggregator",
@@ -62,7 +59,6 @@ DOMAIN_REL_MAP: Dict[str, str] = {
     "pdfs.semanticscholar.org": "aggregator",
     "semanticscholar.org": "aggregator",
     "www.semanticscholar.org": "aggregator",
-
     "academic.oup.com": "publisher",
     "cdn.elifesciences.org": "publisher",
     "cell.com": "publisher",
@@ -86,15 +82,14 @@ DOMAIN_REL_MAP: Dict[str, str] = {
     "ehp.niehs.nih.gov": "publisher",
     "journals.tsu.ru": "publisher",
     "www.cogentoa.com": "publisher",
-
     "www.researchgate.net": "academicsocial",
     "academia.edu": "academicsocial",
-
     "wayback.archive-it.org": "webarchive",
     "web.archive.org": "webarchive",
     "archive.is": "webarchive",
 }
 
+
 def make_rel_url(raw_url: str, default_link_rel: str = "web"):
     # this is where we map specific domains to rel types, and also filter out
     # bad domains, invalid URLs, etc
@@ -105,12 +100,17 @@ def make_rel_url(raw_url: str, default_link_rel: str = "web"):
             break
     return (rel, raw_url)
 
+
 def test_make_rel_url():
     assert make_rel_url("http://example.com/thing.pdf")[0] == "web"
     assert make_rel_url("http://example.com/thing.pdf", default_link_rel="jeans")[0] == "jeans"
-    assert make_rel_url("https://web.archive.org/web/*/http://example.com/thing.pdf")[0] == "webarchive"
+    assert (
+        make_rel_url("https://web.archive.org/web/*/http://example.com/thing.pdf")[0]
+        == "webarchive"
+    )
     assert make_rel_url("http://cell.com/thing.pdf")[0] == "publisher"
 
+
 class EntityImporter:
     """
     Base class for fatcat entity importers.
@@ -147,23 +147,26 @@ class EntityImporter:
 
     def __init__(self, api, **kwargs):
 
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['git_rev'] = eg_extra.get('git_rev',
-            subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8')
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.EntityImporter')
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["git_rev"] = eg_extra.get(
+            "git_rev", subprocess.check_output(["git", "describe", "--always"]).strip()
+        ).decode("utf-8")
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.EntityImporter")
 
         self.api = api
-        self.do_updates = bool(kwargs.get('do_updates', True))
-        self.do_fuzzy_match: bool = kwargs.get('do_fuzzy_match', True)
-        self.bezerk_mode: bool = kwargs.get('bezerk_mode', False)
-        self.submit_mode: bool = kwargs.get('submit_mode', False)
-        self.edit_batch_size: int = kwargs.get('edit_batch_size', 100)
-        self.editgroup_description: Optional[str] = kwargs.get('editgroup_description')
+        self.do_updates = bool(kwargs.get("do_updates", True))
+        self.do_fuzzy_match: bool = kwargs.get("do_fuzzy_match", True)
+        self.bezerk_mode: bool = kwargs.get("bezerk_mode", False)
+        self.submit_mode: bool = kwargs.get("submit_mode", False)
+        self.edit_batch_size: int = kwargs.get("edit_batch_size", 100)
+        self.editgroup_description: Optional[str] = kwargs.get("editgroup_description")
         self.editgroup_extra: Optional[Any] = eg_extra
 
-        self.es_client = kwargs.get('es_client')
+        self.es_client = kwargs.get("es_client")
         if not self.es_client:
-            self.es_client = elasticsearch.Elasticsearch("https://search.fatcat.wiki", timeout=120)
+            self.es_client = elasticsearch.Elasticsearch(
+                "https://search.fatcat.wiki", timeout=120
+            )
 
         self._issnl_id_map: Dict[str, Any] = dict()
         self._orcid_id_map: Dict[str, Any] = dict()
@@ -174,7 +177,7 @@ class EntityImporter:
         self.reset()
 
     def reset(self) -> None:
-        self.counts = Counter({'total': 0, 'skip': 0, 'insert': 0, 'update': 0, 'exists': 0})
+        self.counts = Counter({"total": 0, "skip": 0, "insert": 0, "update": 0, "exists": 0})
         self._edit_count: int = 0
         self._editgroup_id: Optional[str] = None
         self._entity_queue: List[Any] = []
@@ -184,13 +187,13 @@ class EntityImporter:
         """
         Returns nothing.
         """
-        self.counts['total'] += 1
+        self.counts["total"] += 1
         if (not raw_record) or (not self.want(raw_record)):
-            self.counts['skip'] += 1
+            self.counts["skip"] += 1
             return
         entity = self.parse_record(raw_record)
         if not entity:
-            self.counts['skip'] += 1
+            self.counts["skip"] += 1
             return
         if self.bezerk_mode:
             self.push_entity(entity)
@@ -230,7 +233,7 @@ class EntityImporter:
 
         if self._entity_queue:
             self.insert_batch(self._entity_queue)
-            self.counts['insert'] += len(self._entity_queue)
+            self.counts["insert"] += len(self._entity_queue)
             self._entity_queue = []
 
         return self.counts
@@ -248,8 +251,9 @@ class EntityImporter:
         if not self._editgroup_id:
             eg = self.api.create_editgroup(
                 fatcat_openapi_client.Editgroup(
-                    description=self.editgroup_description,
-                    extra=self.editgroup_extra))
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                )
+            )
             self._editgroup_id = eg.editgroup_id
 
         self._edit_count += edits
@@ -257,30 +261,30 @@ class EntityImporter:
 
     def create_container(self, entity):
         eg_id = self.get_editgroup_id()
-        self.counts['inserted.container'] += 1
+        self.counts["inserted.container"] += 1
         return self.api.create_container(eg_id, entity)
 
     def create_release(self, entity):
         eg_id = self.get_editgroup_id()
-        self.counts['inserted.release'] += 1
+        self.counts["inserted.release"] += 1
         return self.api.create_release(eg_id, entity)
 
     def create_file(self, entity):
         eg_id = self.get_editgroup_id()
-        self.counts['inserted.file'] += 1
+        self.counts["inserted.file"] += 1
         return self.api.create_file(eg_id, entity)
 
     def updated(self):
         """
         Implementations should call this from try_update() if the update was successful
         """
-        self.counts['update'] += 1
+        self.counts["update"] += 1
 
     def push_entity(self, entity):
         self._entity_queue.append(entity)
         if len(self._entity_queue) >= self.edit_batch_size:
             self.insert_batch(self._entity_queue)
-            self.counts['insert'] += len(self._entity_queue)
+            self.counts["insert"] += len(self._entity_queue)
             self._entity_queue = []
 
     def want(self, raw_record: Any) -> bool:
@@ -324,7 +328,7 @@ class EntityImporter:
             # If anything other than a 404 (not found), something is wrong
             if ae.status != 404:
                 raise ae
-        self._orcid_id_map[orcid] = creator_id # might be None
+        self._orcid_id_map[orcid] = creator_id  # might be None
         return creator_id
 
     def is_doi(self, doi: str) -> bool:
@@ -347,7 +351,7 @@ class EntityImporter:
             # If anything other than a 404 (not found), something is wrong
             if ae.status != 404:
                 raise ae
-        self._doi_id_map[doi] = release_id # might be None
+        self._doi_id_map[doi] = release_id  # might be None
         return release_id
 
     def lookup_pmid(self, pmid: str):
@@ -364,11 +368,11 @@ class EntityImporter:
             # If anything other than a 404 (not found), something is wrong
             if ae.status != 404:
                 raise ae
-        self._pmid_id_map[pmid] = release_id # might be None
+        self._pmid_id_map[pmid] = release_id  # might be None
         return release_id
 
     def is_issnl(self, issnl: str) -> bool:
-        return len(issnl) == 9 and issnl[4] == '-'
+        return len(issnl) == 9 and issnl[4] == "-"
 
     def lookup_issnl(self, issnl: str):
         """Caches calls to the ISSN-L lookup API endpoint in a local dict"""
@@ -382,7 +386,7 @@ class EntityImporter:
             # If anything other than a 404 (not found), something is wrong
             if ae.status != 404:
                 raise ae
-        self._issnl_id_map[issnl] = container_id # might be None
+        self._issnl_id_map[issnl] = container_id  # might be None
         return container_id
 
     def read_issn_map_file(self, issn_map_file):
@@ -417,26 +421,26 @@ class EntityImporter:
         # update old/deprecated 'rel' on URLs
         for i in range(len(existing.urls)):
             u = existing.urls[i]
-            if u.rel == 'repository' and '://archive.org/download/' in u.url:
-                existing.urls[i].rel = 'archive'
-            if u.rel == 'social':
-                u.rel = 'academicsocial'
+            if u.rel == "repository" and "://archive.org/download/" in u.url:
+                existing.urls[i].rel = "archive"
+            if u.rel == "social":
+                u.rel = "academicsocial"
 
         # remove URLs which are near-duplicates
         redundant_urls = []
         all_urls = [u.url for u in existing.urls]
-        all_wayback_urls = [u.url for u in existing.urls if '://web.archive.org/web/' in u.url]
+        all_wayback_urls = [u.url for u in existing.urls if "://web.archive.org/web/" in u.url]
         for url in all_urls:
             # https/http redundancy
-            if url.startswith('http://') and url.replace('http://', 'https://', 1) in all_urls:
+            if url.startswith("http://") and url.replace("http://", "https://", 1) in all_urls:
                 redundant_urls.append(url)
                 continue
             # default HTTP port included and not included
-            if ':80/' in url and url.replace(':80', '', 1) in all_urls:
+            if ":80/" in url and url.replace(":80", "", 1) in all_urls:
                 redundant_urls.append(url)
                 continue
             # partial and complete wayback timestamps
-            if '://web.archive.org/web/2017/' in url:
+            if "://web.archive.org/web/2017/" in url:
                 original_url = "/".join(url.split("/")[5:])
                 assert len(original_url) > 5
                 for wb_url in all_wayback_urls:
@@ -452,7 +456,9 @@ class EntityImporter:
     def generic_fileset_cleanups(existing):
         return existing
 
-    def match_existing_release_fuzzy(self, release: ReleaseEntity) -> Optional[Tuple[str, str, ReleaseEntity]]:
+    def match_existing_release_fuzzy(
+        self, release: ReleaseEntity
+    ) -> Optional[Tuple[str, str, ReleaseEntity]]:
         """
         This helper function uses fuzzycat (and elasticsearch) to look for
         existing release entities with similar metadata.
@@ -488,7 +494,15 @@ class EntityImporter:
             return None
 
         release_dict = entity_to_dict(release, api_client=self.api.api_client)
-        verified = [(fuzzycat.verify.verify(release_dict, entity_to_dict(c, api_client=self.api.api_client)), c) for c in candidates]
+        verified = [
+            (
+                fuzzycat.verify.verify(
+                    release_dict, entity_to_dict(c, api_client=self.api.api_client)
+                ),
+                c,
+            )
+            for c in candidates
+        ]
 
         # chose the "closest" match
         closest = sorted(verified, key=lambda v: STATUS_SORT[v[0].status])[0]
@@ -522,7 +536,6 @@ class RecordPusher:
 
 
 class JsonLinePusher(RecordPusher):
-
     def __init__(self, importer, json_file, **kwargs):
         self.importer = importer
         self.json_file = json_file
@@ -539,10 +552,9 @@ class JsonLinePusher(RecordPusher):
 
 
 class CsvPusher(RecordPusher):
-
     def __init__(self, importer, csv_file, **kwargs):
         self.importer = importer
-        self.reader = csv.DictReader(csv_file, delimiter=kwargs.get('delimiter', ','))
+        self.reader = csv.DictReader(csv_file, delimiter=kwargs.get("delimiter", ","))
 
     def run(self):
         for line in self.reader:
@@ -555,7 +567,6 @@ class CsvPusher(RecordPusher):
 
 
 class LinePusher(RecordPusher):
-
     def __init__(self, importer, text_file, **kwargs):
         self.importer = importer
         self.text_file = text_file
@@ -571,17 +582,15 @@ class LinePusher(RecordPusher):
 
 
 class SqlitePusher(RecordPusher):
-
     def __init__(self, importer, db_file, table_name, where_clause="", **kwargs):
         self.importer = importer
-        self.db = sqlite3.connect(db_file, isolation_level='EXCLUSIVE')
+        self.db = sqlite3.connect(db_file, isolation_level="EXCLUSIVE")
         self.db.row_factory = sqlite3.Row
         self.table_name = table_name
         self.where_clause = where_clause
 
     def run(self):
-        cur = self.db.execute("SELECT * FROM {} {};".format(
-            self.table_name, self.where_clause))
+        cur = self.db.execute("SELECT * FROM {} {};".format(self.table_name, self.where_clause))
         for row in cur:
             self.importer.push_record(row)
         counts = self.importer.finish()
@@ -590,7 +599,6 @@ class SqlitePusher(RecordPusher):
 
 
 class Bs4XmlLinesPusher(RecordPusher):
-
     def __init__(self, importer, xml_file, prefix_filter=None, **kwargs):
         self.importer = importer
         self.xml_file = xml_file
@@ -611,7 +619,6 @@ class Bs4XmlLinesPusher(RecordPusher):
 
 
 class Bs4XmlFilePusher(RecordPusher):
-
     def __init__(self, importer, xml_file, record_tag, **kwargs):
         self.importer = importer
         self.xml_file = xml_file
@@ -684,7 +691,6 @@ class Bs4XmlLargeFilePusher(RecordPusher):
 
 
 class Bs4XmlFileListPusher(RecordPusher):
-
     def __init__(self, importer, list_file, record_tag, **kwargs):
         self.importer = importer
         self.list_file = list_file
@@ -695,7 +701,7 @@ class Bs4XmlFileListPusher(RecordPusher):
             xml_path = xml_path.strip()
             if not xml_path or xml_path.startswith("#"):
                 continue
-            with open(xml_path, 'r') as xml_file:
+            with open(xml_path, "r") as xml_file:
                 soup = BeautifulSoup(xml_file, "xml")
                 for record in soup.find_all(self.record_tag):
                     self.importer.push_record(record)
@@ -705,10 +711,12 @@ class Bs4XmlFileListPusher(RecordPusher):
         print(counts)
         return counts
 
+
 class KafkaBs4XmlPusher(RecordPusher):
     """
     Fetch XML for an article from Kafka, parse via Bs4.
     """
+
     def __init__(self, importer, kafka_hosts, kafka_env, topic_suffix, group, **kwargs):
         self.importer = importer
         self.consumer = make_kafka_consumer(
@@ -716,10 +724,10 @@ class KafkaBs4XmlPusher(RecordPusher):
             kafka_env,
             topic_suffix,
             group,
-            kafka_namespace=kwargs.get('kafka_namespace', 'fatcat')
+            kafka_namespace=kwargs.get("kafka_namespace", "fatcat"),
         )
-        self.poll_interval = kwargs.get('poll_interval', 5.0)
-        self.consume_batch_size = kwargs.get('consume_batch_size', 25)
+        self.poll_interval = kwargs.get("poll_interval", 5.0)
+        self.consume_batch_size = kwargs.get("consume_batch_size", 25)
 
     def run(self):
         count = 0
@@ -735,16 +743,19 @@ class KafkaBs4XmlPusher(RecordPusher):
             # outstanding editgroups every 5 minutes, but there is still that
             # window when editgroups might be hanging (unsubmitted).
             batch = self.consumer.consume(
-                num_messages=self.consume_batch_size,
-                timeout=self.poll_interval)
-            print("... got {} kafka messages ({}sec poll interval) {}".format(
-                len(batch), self.poll_interval, self.importer.counts))
+                num_messages=self.consume_batch_size, timeout=self.poll_interval
+            )
+            print(
+                "... got {} kafka messages ({}sec poll interval) {}".format(
+                    len(batch), self.poll_interval, self.importer.counts
+                )
+            )
             if not batch:
                 if datetime.datetime.now() - last_push > datetime.timedelta(minutes=5):
                     # it has been some time, so flush any current editgroup
                     self.importer.finish()
                     last_push = datetime.datetime.now()
-                    #print("Flushed any partial import batch: {}".format(self.importer.counts))
+                    # print("Flushed any partial import batch: {}".format(self.importer.counts))
                 continue
             # first check errors on entire batch...
             for msg in batch:
@@ -752,7 +763,7 @@ class KafkaBs4XmlPusher(RecordPusher):
                     raise KafkaException(msg.error())
             # ... then process
             for msg in batch:
-                soup = BeautifulSoup(msg.value().decode('utf-8'), "xml")
+                soup = BeautifulSoup(msg.value().decode("utf-8"), "xml")
                 self.importer.push_record(soup)
                 soup.decompose()
                 count += 1
@@ -771,8 +782,8 @@ class KafkaBs4XmlPusher(RecordPusher):
         self.consumer.close()
         return counts
 
-class KafkaJsonPusher(RecordPusher):
 
+class KafkaJsonPusher(RecordPusher):
     def __init__(self, importer, kafka_hosts, kafka_env, topic_suffix, group, **kwargs):
         self.importer = importer
         self.consumer = make_kafka_consumer(
@@ -780,11 +791,11 @@ class KafkaJsonPusher(RecordPusher):
             kafka_env,
             topic_suffix,
             group,
-            kafka_namespace=kwargs.get('kafka_namespace', 'fatcat')
+            kafka_namespace=kwargs.get("kafka_namespace", "fatcat"),
         )
-        self.poll_interval = kwargs.get('poll_interval', 5.0)
-        self.consume_batch_size = kwargs.get('consume_batch_size', 100)
-        self.force_flush = kwargs.get('force_flush', False)
+        self.poll_interval = kwargs.get("poll_interval", 5.0)
+        self.consume_batch_size = kwargs.get("consume_batch_size", 100)
+        self.force_flush = kwargs.get("force_flush", False)
 
     def run(self):
         count = 0
@@ -801,10 +812,13 @@ class KafkaJsonPusher(RecordPusher):
             # outstanding editgroups every 5 minutes, but there is still that
             # window when editgroups might be hanging (unsubmitted).
             batch = self.consumer.consume(
-                num_messages=self.consume_batch_size,
-                timeout=self.poll_interval)
-            print("... got {} kafka messages ({}sec poll interval) {}".format(
-                len(batch), self.poll_interval, self.importer.counts))
+                num_messages=self.consume_batch_size, timeout=self.poll_interval
+            )
+            print(
+                "... got {} kafka messages ({}sec poll interval) {}".format(
+                    len(batch), self.poll_interval, self.importer.counts
+                )
+            )
             if self.force_flush:
                 # this flushing happens even if there have been 'push' events
                 # more recently. it is intended for, eg, importers off the
@@ -821,7 +835,7 @@ class KafkaJsonPusher(RecordPusher):
                     self.importer.finish()
                     last_push = datetime.datetime.now()
                     last_force_flush = datetime.datetime.now()
-                    #print("Flushed any partial import batch: {}".format(self.importer.counts))
+                    # print("Flushed any partial import batch: {}".format(self.importer.counts))
                 continue
             # first check errors on entire batch...
             for msg in batch:
@@ -829,7 +843,7 @@ class KafkaJsonPusher(RecordPusher):
                     raise KafkaException(msg.error())
             # ... then process
             for msg in batch:
-                record = json.loads(msg.value().decode('utf-8'))
+                record = json.loads(msg.value().decode("utf-8"))
                 self.importer.push_record(record)
                 count += 1
                 if count % 500 == 0:
@@ -864,25 +878,25 @@ def make_kafka_consumer(hosts, env, topic_suffix, group, kafka_namespace="fatcat
                 print("Bailing out...")
                 # TODO: should it be sys.exit(-1)?
                 raise KafkaException(p.error)
-        #print("Kafka consumer commit successful")
+        # print("Kafka consumer commit successful")
         pass
 
     # previously, using pykafka
-    #auto_commit_enable=True,
-    #auto_commit_interval_ms=30000, # 30 seconds
+    # auto_commit_enable=True,
+    # auto_commit_interval_ms=30000, # 30 seconds
     conf = {
-        'bootstrap.servers': hosts,
-        'group.id': group,
-        'on_commit': fail_fast,
+        "bootstrap.servers": hosts,
+        "group.id": group,
+        "on_commit": fail_fast,
         # messages don't have offset marked as stored until pushed to
         # elastic, but we do auto-commit stored offsets to broker
-        'enable.auto.offset.store': False,
-        'enable.auto.commit': True,
+        "enable.auto.offset.store": False,
+        "enable.auto.commit": True,
         # user code timeout; if no poll after this long, assume user code
         # hung and rebalance (default: 5min)
-        'max.poll.interval.ms': 120000,
-        'default.topic.config': {
-            'auto.offset.reset': 'latest',
+        "max.poll.interval.ms": 120000,
+        "default.topic.config": {
+            "auto.offset.reset": "latest",
         },
     }
 
@@ -890,13 +904,13 @@ def make_kafka_consumer(hosts, env, topic_suffix, group, kafka_namespace="fatcat
         for p in partitions:
             if p.error:
                 raise KafkaException(p.error)
-        print("Kafka partitions rebalanced: {} / {}".format(
-            consumer, partitions))
+        print("Kafka partitions rebalanced: {} / {}".format(consumer, partitions))
 
     consumer = Consumer(conf)
     # NOTE: it's actually important that topic_name *not* be bytes (UTF-8
     # encoded)
-    consumer.subscribe([topic_name],
+    consumer.subscribe(
+        [topic_name],
         on_assign=on_rebalance,
         on_revoke=on_rebalance,
     )
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index fd6936a4..606d4bb1 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -1,4 +1,3 @@
-
 import datetime
 import sqlite3
 from typing import Any, Dict, Optional
@@ -13,30 +12,30 @@ from .common import EntityImporter, clean
 # Can get a list of Crossref types (with counts) via API:
 # https://api.crossref.org/works?rows=0&facet=type-name:*
 CROSSREF_TYPE_MAP: Dict[str, Optional[str]] = {
-    'book': 'book',
-    'book-chapter': 'chapter',
-    'book-part': 'chapter',
-    'book-section': 'chapter',
-    'component': 'component',
-    'dataset': 'dataset',
-    'dissertation': 'thesis',
-    'edited-book': 'book',
-    'journal-article': 'article-journal',
-    'monograph': 'book',
-    'other': None,
-    'peer-review': 'peer_review',
-    'posted-content': 'post',
-    'proceedings-article': 'paper-conference',
-    'reference-book': 'book',
-    'reference-entry': 'entry',
-    'report': 'report',
-    'standard': 'standard',
+    "book": "book",
+    "book-chapter": "chapter",
+    "book-part": "chapter",
+    "book-section": "chapter",
+    "component": "component",
+    "dataset": "dataset",
+    "dissertation": "thesis",
+    "edited-book": "book",
+    "journal-article": "article-journal",
+    "monograph": "book",
+    "other": None,
+    "peer-review": "peer_review",
+    "posted-content": "post",
+    "proceedings-article": "paper-conference",
+    "reference-book": "book",
+    "reference-entry": "entry",
+    "report": "report",
+    "standard": "standard",
 }
 
 CONTAINER_TYPE_MAP: Dict[str, str] = {
-    'article-journal': 'journal',
-    'paper-conference': 'conference',
-    'book': 'book-series',
+    "article-journal": "journal",
+    "paper-conference": "conference",
+    "book": "book-series",
 }
 
 # These are based, informally, on sorting the most popular licenses found in
@@ -90,29 +89,41 @@ LICENSE_SLUG_MAP: Dict[str, str] = {
     "//arxiv.org/licenses/nonexclusive-distrib/1.0/": "ARXIV-1.0",
 }
 
+
 def lookup_license_slug(raw: str) -> Optional[str]:
     if not raw:
         return None
-    raw = raw.strip().replace('http://', '//').replace('https://', '//')
-    if 'creativecommons.org' in raw.lower():
+    raw = raw.strip().replace("http://", "//").replace("https://", "//")
+    if "creativecommons.org" in raw.lower():
         raw = raw.lower()
-        raw = raw.replace('/legalcode', '/').replace('/uk', '')
-        if not raw.endswith('/'):
-            raw = raw + '/'
+        raw = raw.replace("/legalcode", "/").replace("/uk", "")
+        if not raw.endswith("/"):
+            raw = raw + "/"
     return LICENSE_SLUG_MAP.get(raw)
 
+
 def test_lookup_license_slug():
 
     assert lookup_license_slug("https://creativecommons.org/licenses/by-nc/3.0/") == "CC-BY-NC"
-    assert lookup_license_slug("http://creativecommons.org/licenses/by/2.0/uk/legalcode") == "CC-BY"
-    assert lookup_license_slug("https://creativecommons.org/publicdomain/zero/1.0/legalcode") == "CC-0"
+    assert (
+        lookup_license_slug("http://creativecommons.org/licenses/by/2.0/uk/legalcode")
+        == "CC-BY"
+    )
+    assert (
+        lookup_license_slug("https://creativecommons.org/publicdomain/zero/1.0/legalcode")
+        == "CC-0"
+    )
     assert lookup_license_slug("http://creativecommons.org/licenses/by/4.0") == "CC-BY"
-    assert lookup_license_slug("https://creativecommons.org/licenses/by-nc-sa/4.0/") == "CC-BY-NC-SA"
+    assert (
+        lookup_license_slug("https://creativecommons.org/licenses/by-nc-sa/4.0/")
+        == "CC-BY-NC-SA"
+    )
     assert lookup_license_slug("https://www.ametsoc.org/PUBSReuseLicenses") == "AMETSOC"
     assert lookup_license_slug("https://www.amec.org/PUBSReuseLicenses") is None
     assert lookup_license_slug("") is None
     assert lookup_license_slug(None) is None
 
+
 class CrossrefImporter(EntityImporter):
     """
     Importer for Crossref metadata.
@@ -124,18 +135,22 @@ class CrossrefImporter(EntityImporter):
 
     def __init__(self, api, issn_map_file, **kwargs):
 
-        eg_desc: Optional[str] = kwargs.get('editgroup_description',
-            "Automated import of Crossref DOI metadata, harvested from REST API")
-        eg_extra: Optional[dict] = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.CrossrefImporter')
-        super().__init__(api,
+        eg_desc: Optional[str] = kwargs.get(
+            "editgroup_description",
+            "Automated import of Crossref DOI metadata, harvested from REST API",
+        )
+        eg_extra: Optional[dict] = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.CrossrefImporter")
+        super().__init__(
+            api,
             issn_map_file=issn_map_file,
             editgroup_description=eg_desc,
             editgroup_extra=eg_extra,
-            **kwargs)
+            **kwargs
+        )
 
-        self.create_containers: bool = kwargs.get('create_containers', True)
-        extid_map_file = kwargs.get('extid_map_file')
+        self.create_containers: bool = kwargs.get("create_containers", True)
+        extid_map_file = kwargs.get("extid_map_file")
         self.extid_map_db: Optional[Any] = None
         if extid_map_file:
             db_uri = "file:{}?mode=ro".format(extid_map_file)
@@ -148,12 +163,27 @@ class CrossrefImporter(EntityImporter):
 
     def lookup_ext_ids(self, doi: str) -> Optional[Any]:
         if self.extid_map_db is None:
-            return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None, arxiv_id=None, jstor_id=None)
-        row = self.extid_map_db.execute("SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1",
-            [doi.lower()]).fetchone()
+            return dict(
+                core_id=None,
+                pmid=None,
+                pmcid=None,
+                wikidata_qid=None,
+                arxiv_id=None,
+                jstor_id=None,
+            )
+        row = self.extid_map_db.execute(
+            "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1", [doi.lower()]
+        ).fetchone()
         if row is None:
-            return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None, arxiv_id=None, jstor_id=None)
-        row = [str(cell or '') or None for cell in row]
+            return dict(
+                core_id=None,
+                pmid=None,
+                pmcid=None,
+                wikidata_qid=None,
+                arxiv_id=None,
+                jstor_id=None,
+            )
+        row = [str(cell or "") or None for cell in row]
         return dict(
             core_id=row[0],
             pmid=row[1],
@@ -173,17 +203,17 @@ class CrossrefImporter(EntityImporter):
         return CONTAINER_TYPE_MAP.get(crossref_type)
 
     def want(self, obj: Dict[str, Any]) -> bool:
-        if not obj.get('title'):
-            self.counts['skip-blank-title'] += 1
+        if not obj.get("title"):
+            self.counts["skip-blank-title"] += 1
             return False
 
         # these are pre-registered DOIs before the actual record is ready
         # title is a list of titles
-        titles = obj.get('title')
+        titles = obj.get("title")
         if titles is not None and titles[0].strip().lower() in [
-                "OUP accepted manuscript".lower(),
-            ]:
-            self.counts['skip-stub-title'] += 1
+            "OUP accepted manuscript".lower(),
+        ]:
+            self.counts["skip-stub-title"] += 1
             return False
 
         # do most of these checks in-line below
@@ -197,86 +227,105 @@ class CrossrefImporter(EntityImporter):
 
         # Ways to be out of scope (provisionally)
         # journal-issue and journal-volume map to None, but allowed for now
-        if obj.get('type') in (None, 'journal', 'proceedings',
-                'standard-series', 'report-series', 'book-series', 'book-set',
-                'book-track', 'proceedings-series'):
-            self.counts['skip-release-type'] += 1
+        if obj.get("type") in (
+            None,
+            "journal",
+            "proceedings",
+            "standard-series",
+            "report-series",
+            "book-series",
+            "book-set",
+            "book-track",
+            "proceedings-series",
+        ):
+            self.counts["skip-release-type"] += 1
             return None
 
         # Do require the 'title' keys to exist, as release entities do
-        if ('title' not in obj) or (not obj['title']):
-            self.counts['skip-blank-title'] += 1
+        if ("title" not in obj) or (not obj["title"]):
+            self.counts["skip-blank-title"] += 1
             return None
 
-        release_type = self.map_release_type(obj['type'])
+        release_type = self.map_release_type(obj["type"])
 
         # contribs
         def do_contribs(obj_list, ctype):
             contribs = []
             for i, am in enumerate(obj_list):
                 creator_id = None
-                if 'ORCID' in am.keys():
-                    creator_id = self.lookup_orcid(am['ORCID'].split('/')[-1])
+                if "ORCID" in am.keys():
+                    creator_id = self.lookup_orcid(am["ORCID"].split("/")[-1])
                 # Sorry humans :(
-                if am.get('given') and am.get('family'):
-                    raw_name = "{} {}".format(am['given'], am['family'])
-                elif am.get('family'):
-                    raw_name = am['family']
+                if am.get("given") and am.get("family"):
+                    raw_name = "{} {}".format(am["given"], am["family"])
+                elif am.get("family"):
+                    raw_name = am["family"]
                 else:
                     # TODO: can end up empty
-                    raw_name = am.get('name') or am.get('given')
+                    raw_name = am.get("name") or am.get("given")
                 extra = dict()
                 if ctype == "author":
                     index = i
                 else:
                     index = None
                 raw_affiliation = None
-                if am.get('affiliation'):
-                    if len(am.get('affiliation')) > 0:
-                        raw_affiliation = am.get('affiliation')[0]['name']
-                    if len(am.get('affiliation')) > 1:
+                if am.get("affiliation"):
+                    if len(am.get("affiliation")) > 0:
+                        raw_affiliation = am.get("affiliation")[0]["name"]
+                    if len(am.get("affiliation")) > 1:
                         # note: affiliation => more_affiliations
-                        extra['more_affiliations'] = [clean(a['name']) for a in am.get('affiliation')[1:]]
-                if am.get('sequence') and am.get('sequence') != "additional":
-                    extra['seq'] = clean(am.get('sequence'))
+                        extra["more_affiliations"] = [
+                            clean(a["name"]) for a in am.get("affiliation")[1:]
+                        ]
+                if am.get("sequence") and am.get("sequence") != "additional":
+                    extra["seq"] = clean(am.get("sequence"))
                 if not extra:
                     extra = None
                 assert ctype in ("author", "editor", "translator")
                 raw_name = clean(raw_name)
-                contribs.append(fatcat_openapi_client.ReleaseContrib(
-                    creator_id=creator_id,
-                    index=index,
-                    raw_name=raw_name,
-                    given_name=clean(am.get('given')),
-                    surname=clean(am.get('family')),
-                    raw_affiliation=clean(raw_affiliation),
-                    role=ctype,
-                    extra=extra))
+                contribs.append(
+                    fatcat_openapi_client.ReleaseContrib(
+                        creator_id=creator_id,
+                        index=index,
+                        raw_name=raw_name,
+                        given_name=clean(am.get("given")),
+                        surname=clean(am.get("family")),
+                        raw_affiliation=clean(raw_affiliation),
+                        role=ctype,
+                        extra=extra,
+                    )
+                )
             return contribs
-        contribs = do_contribs(obj.get('author', []), "author")
-        contribs.extend(do_contribs(obj.get('editor', []), "editor"))
-        contribs.extend(do_contribs(obj.get('translator', []), "translator"))
+
+        contribs = do_contribs(obj.get("author", []), "author")
+        contribs.extend(do_contribs(obj.get("editor", []), "editor"))
+        contribs.extend(do_contribs(obj.get("translator", []), "translator"))
 
         # container
-        issn = obj.get('ISSN', [None])[0]
+        issn = obj.get("ISSN", [None])[0]
         issnl = self.issn2issnl(issn)
         container_id = None
         if issnl:
             container_id = self.lookup_issnl(issnl)
-        publisher = clean(obj.get('publisher'))
+        publisher = clean(obj.get("publisher"))
 
-        container_name = obj.get('container-title')
+        container_name = obj.get("container-title")
         if container_name:
             container_name = clean(container_name[0], force_xml=True)
         if not container_name:
             container_name = None
-        if (container_id is None and self.create_containers and (issnl is not None)
-                and container_name):
+        if (
+            container_id is None
+            and self.create_containers
+            and (issnl is not None)
+            and container_name
+        ):
             ce = fatcat_openapi_client.ContainerEntity(
                 issnl=issnl,
                 publisher=publisher,
                 container_type=self.map_container_type(release_type),
-                name=container_name)
+                name=container_name,
+            )
             ce_edit = self.create_container(ce)
             container_id = ce_edit.ident
             self._issnl_id_map[issnl] = container_id
@@ -284,21 +333,21 @@ class CrossrefImporter(EntityImporter):
         # license slug
         license_slug = None
         license_extra = []
-        for lic in obj.get('license', []):
-            if lic['content-version'] not in ('vor', 'unspecified'):
+        for lic in obj.get("license", []):
+            if lic["content-version"] not in ("vor", "unspecified"):
                 continue
-            slug = lookup_license_slug(lic['URL'])
+            slug = lookup_license_slug(lic["URL"])
             if slug:
                 license_slug = slug
-            if 'start' in lic:
-                lic['start'] = lic['start']['date-time']
+            if "start" in lic:
+                lic["start"] = lic["start"]["date-time"]
             license_extra.append(lic)
 
         # references
         refs = []
-        for i, rm in enumerate(obj.get('reference', [])):
+        for i, rm in enumerate(obj.get("reference", [])):
             try:
-                year: Optional[int] = int(rm.get('year'))
+                year: Optional[int] = int(rm.get("year"))
                 # TODO: will need to update/config in the future!
                 # NOTE: are there crossref works with year < 100?
                 if year is not None:
@@ -307,56 +356,78 @@ class CrossrefImporter(EntityImporter):
             except (TypeError, ValueError):
                 year = None
             ref_extra: Dict[str, Any] = dict()
-            key = rm.get('key')
-            if key and key.startswith(obj['DOI'].upper()):
-                key = key.replace(obj['DOI'].upper() + "-", '')
-                key = key.replace(obj['DOI'].upper(), '')
-            ref_container_name = rm.get('volume-title')
+            key = rm.get("key")
+            if key and key.startswith(obj["DOI"].upper()):
+                key = key.replace(obj["DOI"].upper() + "-", "")
+                key = key.replace(obj["DOI"].upper(), "")
+            ref_container_name = rm.get("volume-title")
             if not ref_container_name:
-                ref_container_name = rm.get('journal-title')
-            elif rm.get('journal-title'):
-                ref_extra['journal-title'] = rm['journal-title']
-            if rm.get('DOI'):
-                ref_extra['doi'] = rm.get('DOI').lower()
-            author = clean(rm.get('author'))
+                ref_container_name = rm.get("journal-title")
+            elif rm.get("journal-title"):
+                ref_extra["journal-title"] = rm["journal-title"]
+            if rm.get("DOI"):
+                ref_extra["doi"] = rm.get("DOI").lower()
+            author = clean(rm.get("author"))
             if author:
-                ref_extra['authors'] = [author]
-            for k in ('editor', 'edition', 'authority', 'version', 'genre',
-                    'url', 'event', 'issue', 'volume', 'date', 'accessed_date',
-                    'issued', 'page', 'medium', 'collection_title', 'chapter_number',
-                    'unstructured', 'series-title', 'volume-title'):
+                ref_extra["authors"] = [author]
+            for k in (
+                "editor",
+                "edition",
+                "authority",
+                "version",
+                "genre",
+                "url",
+                "event",
+                "issue",
+                "volume",
+                "date",
+                "accessed_date",
+                "issued",
+                "page",
+                "medium",
+                "collection_title",
+                "chapter_number",
+                "unstructured",
+                "series-title",
+                "volume-title",
+            ):
                 if clean(rm.get(k)):
                     ref_extra[k] = clean(rm[k])
             if not ref_extra:
                 ref_extra = None
-            refs.append(fatcat_openapi_client.ReleaseRef(
-                index=i,
-                # doing lookups would be a second import pass
-                target_release_id=None,
-                key=key,
-                year=year,
-                container_name=clean(ref_container_name),
-                title=clean(rm.get('article-title')),
-                locator=clean(rm.get('first-page')),
-                # TODO: just dump JSON somewhere here?
-                extra=ref_extra))
+            refs.append(
+                fatcat_openapi_client.ReleaseRef(
+                    index=i,
+                    # doing lookups would be a second import pass
+                    target_release_id=None,
+                    key=key,
+                    year=year,
+                    container_name=clean(ref_container_name),
+                    title=clean(rm.get("article-title")),
+                    locator=clean(rm.get("first-page")),
+                    # TODO: just dump JSON somewhere here?
+                    extra=ref_extra,
+                )
+            )
 
         # abstracts
         abstracts = []
-        abstract = clean(obj.get('abstract'))
+        abstract = clean(obj.get("abstract"))
         if abstract and len(abstract) > 10:
-            abstracts.append(fatcat_openapi_client.ReleaseAbstract(
-                mimetype="application/xml+jats",
-                content=abstract))
+            abstracts.append(
+                fatcat_openapi_client.ReleaseAbstract(
+                    mimetype="application/xml+jats", content=abstract
+                )
+            )
 
         # extra fields
         extra = dict()
         extra_crossref = dict()
         # top-level extra keys
         if not container_id:
-            if obj.get('container-title'):
-                extra['container_name'] = container_name
-        for key in ('group-title'):
+            if obj.get("container-title"):
+                extra["container_name"] = container_name
+        for key in "group-title":
             val = obj.get(key)
             if val:
                 if type(val) == list:
@@ -368,7 +439,7 @@ class CrossrefImporter(EntityImporter):
                 else:
                     extra[key] = val
         # crossref-nested extra keys
-        for key in ('subject', 'type', 'alternative-id', 'archive', 'funder'):
+        for key in ("subject", "type", "alternative-id", "archive", "funder"):
             val = obj.get(key)
             if val:
                 if type(val) == str:
@@ -376,46 +447,51 @@ class CrossrefImporter(EntityImporter):
                 else:
                     extra_crossref[key] = val
         if license_extra:
-            extra_crossref['license'] = license_extra
+            extra_crossref["license"] = license_extra
 
-        if len(obj['title']) > 1:
-            aliases = [clean(t) for t in obj['title'][1:]]
+        if len(obj["title"]) > 1:
+            aliases = [clean(t) for t in obj["title"][1:]]
             aliases = [t for t in aliases if t]
             if aliases:
-                extra['aliases'] = aliases
+                extra["aliases"] = aliases
 
         # ISBN
         isbn13 = None
-        for raw in obj.get('ISBN', []):
+        for raw in obj.get("ISBN", []):
             # TODO: convert if not ISBN-13 format
             if len(raw) == 17:
                 isbn13 = raw
                 break
 
         # release status
-        if obj['type'] in ('journal-article', 'conference-proceeding', 'book',
-                'dissertation', 'book-chapter'):
+        if obj["type"] in (
+            "journal-article",
+            "conference-proceeding",
+            "book",
+            "dissertation",
+            "book-chapter",
+        ):
             release_stage = "published"
         else:
             # unknown
             release_stage = None
 
         # external identifiers
-        extids: Dict[str, Any] = self.lookup_ext_ids(doi=obj['DOI'].lower())
+        extids: Dict[str, Any] = self.lookup_ext_ids(doi=obj["DOI"].lower())
 
         # filter out unreasonably huge releases
         if len(abstracts) > 100:
-            self.counts['skip-huge-abstracts'] += 1
+            self.counts["skip-huge-abstracts"] += 1
             return None
         if len(contribs) > 2000:
-            self.counts['skip-huge-contribs'] += 1
+            self.counts["skip-huge-contribs"] += 1
             return None
         if len(refs) > 5000:
-            self.counts['skip-huge-refs'] += 1
+            self.counts["skip-huge-refs"] += 1
             return None
 
         # release date parsing is amazingly complex
-        raw_date = obj['issued']['date-parts'][0]
+        raw_date = obj["issued"]["date-parts"][0]
         if not raw_date or not raw_date[0]:
             # got some NoneType, even though at least year is supposed to be set
             release_year = None
@@ -429,28 +505,28 @@ class CrossrefImporter(EntityImporter):
             release_date = None
 
         original_title: Optional[str] = None
-        if obj.get('original-title'):
-            ot = obj.get('original-title')
+        if obj.get("original-title"):
+            ot = obj.get("original-title")
             if ot is not None:
                 original_title = clean(ot[0], force_xml=True)
 
         title: Optional[str] = None
-        if obj.get('title'):
-            title = clean(obj.get('title')[0], force_xml=True)
+        if obj.get("title"):
+            title = clean(obj.get("title")[0], force_xml=True)
             if not title or len(title) <= 1:
                 # title can't be just a single character
-                self.counts['skip-blank-title'] += 1
+                self.counts["skip-blank-title"] += 1
                 return None
 
         subtitle = None
-        if obj.get('subtitle'):
-            subtitle = clean(obj.get('subtitle')[0], force_xml=True)
+        if obj.get("subtitle"):
+            subtitle = clean(obj.get("subtitle")[0], force_xml=True)
             if not subtitle or len(subtitle) <= 1:
                 # subtitle can't be just a single character
                 subtitle = None
 
         if extra_crossref:
-            extra['crossref'] = extra_crossref
+            extra["crossref"] = extra_crossref
         if not extra:
             extra = None
 
@@ -466,19 +542,19 @@ class CrossrefImporter(EntityImporter):
             release_year=release_year,
             publisher=publisher,
             ext_ids=fatcat_openapi_client.ReleaseExtIds(
-                doi=obj['DOI'].lower(),
-                pmid=extids['pmid'],
-                pmcid=extids['pmcid'],
-                wikidata_qid=extids['wikidata_qid'],
+                doi=obj["DOI"].lower(),
+                pmid=extids["pmid"],
+                pmcid=extids["pmcid"],
+                wikidata_qid=extids["wikidata_qid"],
                 isbn13=isbn13,
-                core=extids['core_id'],
-                arxiv=extids['arxiv_id'],
-                jstor=extids['jstor_id'],
+                core=extids["core_id"],
+                arxiv=extids["arxiv_id"],
+                jstor=extids["jstor_id"],
             ),
-            volume=clean(obj.get('volume')),
-            issue=clean(obj.get('issue')),
-            pages=clean(obj.get('page')),
-            language=clean(obj.get('language')),
+            volume=clean(obj.get("volume")),
+            issue=clean(obj.get("issue")),
+            pages=clean(obj.get("page")),
+            language=clean(obj.get("language")),
             license_slug=license_slug,
             extra=extra,
             abstracts=abstracts,
@@ -500,14 +576,17 @@ class CrossrefImporter(EntityImporter):
         # eventually we'll want to support "updates", but for now just skip if
         # entity already exists
         if existing:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         return True
 
     def insert_batch(self, batch):
-        self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_release_auto_batch(
+            fatcat_openapi_client.ReleaseAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index a06c68a4..4c174b0b 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -30,126 +30,130 @@ MAX_ABSTRACT_LENGTH = 2048
 
 # https://guide.fatcat.wiki/entity_container.html#container_type-vocabulary
 CONTAINER_TYPE_MAP = {
-    'Journal': 'journal',
-    'Series': 'journal',
-    'Book Series': 'book-series',
+    "Journal": "journal",
+    "Series": "journal",
+    "Book Series": "book-series",
 }
 
 # The docs/guide should be the canonical home for these mappings; update there
 # first.  Map various datacite type types to CSL-ish types. None means TODO or
 # remove.
 DATACITE_TYPE_MAP = {
-    'ris': {
-        'THES': 'thesis',
-        'SOUND': 'song', # 99.9% maps to citeproc song, so use that (exception: report)
-        'CHAP': 'chapter',
-        'FIGURE': 'figure',
-        'RPRT': 'report',
-        'JOUR': 'article-journal',
-        'MPCT': 'motion_picture',
-        'GEN': 'article-journal', # GEN consist of 99% article and report, post-weblog, misc - and one dataset
-        'BOOK': 'book',
-        'DATA': 'dataset',
-        'COMP': 'software',
+    "ris": {
+        "THES": "thesis",
+        "SOUND": "song",  # 99.9% maps to citeproc song, so use that (exception: report)
+        "CHAP": "chapter",
+        "FIGURE": "figure",
+        "RPRT": "report",
+        "JOUR": "article-journal",
+        "MPCT": "motion_picture",
+        "GEN": "article-journal",  # GEN consist of 99% article and report, post-weblog, misc - and one dataset
+        "BOOK": "book",
+        "DATA": "dataset",
+        "COMP": "software",
     },
-    'schemaOrg': {
-        'Dataset': 'dataset',
-        'Book': 'book',
-        'ScholarlyArticle': 'article-journal',
-        'ImageObject': 'graphic',
-        'Collection': None,
-        'MediaObject': None,
-        'Event': None,
-        'SoftwareSourceCode': 'software',
-        'Chapter': 'chapter',
-        'CreativeWork': None, # Seems to be a catch-all resourceType, from PGRFA Material, Pamphlet, to music score.
-        'PublicationIssue': 'article',
-        'AudioObject': None,
-        'Thesis': 'thesis',
+    "schemaOrg": {
+        "Dataset": "dataset",
+        "Book": "book",
+        "ScholarlyArticle": "article-journal",
+        "ImageObject": "graphic",
+        "Collection": None,
+        "MediaObject": None,
+        "Event": None,
+        "SoftwareSourceCode": "software",
+        "Chapter": "chapter",
+        "CreativeWork": None,  # Seems to be a catch-all resourceType, from PGRFA Material, Pamphlet, to music score.
+        "PublicationIssue": "article",
+        "AudioObject": None,
+        "Thesis": "thesis",
     },
-    'citeproc': {
-        'article': 'article',
-        'article-journal': 'article-journal',
-        'article-magazine': 'article-magazine',
-        'article-newspaper': 'article-newspaper',
-        'bill': 'bill',
-        'book': 'book',
-        'broadcast': 'broadcast',
-        'chapter': 'chapter',
-        'dataset': 'dataset',
-        'entry-dictionary': 'entry-dictionary',
-        'entry-encyclopedia': 'entry-encyclopedia',
-        'entry': 'entry',
-        'figure': 'figure',
-        'graphic': 'graphic',
-        'interview': 'interview',
-        'legal_case': 'legal_case',
-        'legislation': 'legislation',
-        'manuscript': 'manuscript',
-        'map': 'map',
-        'motion_picture': 'motion_picture',
-        'musical_score': 'musical_score',
-        'pamphlet': 'pamphlet',
-        'paper-conference': 'paper-conference',
-        'patent': 'patent',
-        'personal_communication': 'personal_communication',
-        'post': 'post',
-        'post-weblog': 'post-weblog',
-        'report': 'report',
-        'review-book': 'review-book',
-        'review': 'review',
-        'song': 'song',
-        'speech': 'speech',
-        'thesis': 'thesis',
-        'treaty': 'treaty',
-        'webpage': 'webpage',
+    "citeproc": {
+        "article": "article",
+        "article-journal": "article-journal",
+        "article-magazine": "article-magazine",
+        "article-newspaper": "article-newspaper",
+        "bill": "bill",
+        "book": "book",
+        "broadcast": "broadcast",
+        "chapter": "chapter",
+        "dataset": "dataset",
+        "entry-dictionary": "entry-dictionary",
+        "entry-encyclopedia": "entry-encyclopedia",
+        "entry": "entry",
+        "figure": "figure",
+        "graphic": "graphic",
+        "interview": "interview",
+        "legal_case": "legal_case",
+        "legislation": "legislation",
+        "manuscript": "manuscript",
+        "map": "map",
+        "motion_picture": "motion_picture",
+        "musical_score": "musical_score",
+        "pamphlet": "pamphlet",
+        "paper-conference": "paper-conference",
+        "patent": "patent",
+        "personal_communication": "personal_communication",
+        "post": "post",
+        "post-weblog": "post-weblog",
+        "report": "report",
+        "review-book": "review-book",
+        "review": "review",
+        "song": "song",
+        "speech": "speech",
+        "thesis": "thesis",
+        "treaty": "treaty",
+        "webpage": "webpage",
     },  # https://docs.citationstyles.org/en/master/specification.html#appendix-iii-types
-    'bibtex': {
-        'phdthesis': 'thesis',
-        'inbook': 'chapter',
-        'misc': None,
-        'article': 'article-journal',
-        'book': 'book',
+    "bibtex": {
+        "phdthesis": "thesis",
+        "inbook": "chapter",
+        "misc": None,
+        "article": "article-journal",
+        "book": "book",
     },
-    'resourceTypeGeneral': {
-        'Image': 'graphic',
-        'Dataset': 'dataset',
-        'PhysicalObject': None,
-        'Collection': None,
-        'Text': None, # "Greyliterature, labnotes, accompanyingmaterials"
-        'Sound': None,
-        'InteractiveResource': None,
-        'Event': None,
-        'Software': 'software',
-        'Other': None,
-        'Workflow': None,
-        'Audiovisual': None,
-    } # https://schema.datacite.org/meta/kernel-4.0/doc/DataCite-MetadataKernel_v4.0.pdf#page=32
+    "resourceTypeGeneral": {
+        "Image": "graphic",
+        "Dataset": "dataset",
+        "PhysicalObject": None,
+        "Collection": None,
+        "Text": None,  # "Greyliterature, labnotes, accompanyingmaterials"
+        "Sound": None,
+        "InteractiveResource": None,
+        "Event": None,
+        "Software": "software",
+        "Other": None,
+        "Workflow": None,
+        "Audiovisual": None,
+    },  # https://schema.datacite.org/meta/kernel-4.0/doc/DataCite-MetadataKernel_v4.0.pdf#page=32
 }
 
 # DATACITE_UNKNOWN_MARKERS via https://support.datacite.org/docs/schema-values-unknown-information-v43.
 DATACITE_UNKNOWN_MARKERS = (
-    '(:unac)',  # temporarily inaccessible
-    '(:unal)',  # unallowed, suppressed intentionally
-    '(:unap)',  # not applicable, makes no sense
-    '(:unas)',  # value unassigned (e.g., Untitled)
-    '(:unav)',  # value unavailable, possibly unknown
-    '(:unkn)',  # known to be unknown (e.g., Anonymous, Inconnue)
-    '(:none)',  # never had a value, never will
-    '(:null)',  # explicitly and meaningfully empty
-    '(:tba)',  # to be assigned or announced later
-    '(:etal)',  # too numerous to list (et alia)
+    "(:unac)",  # temporarily inaccessible
+    "(:unal)",  # unallowed, suppressed intentionally
+    "(:unap)",  # not applicable, makes no sense
+    "(:unas)",  # value unassigned (e.g., Untitled)
+    "(:unav)",  # value unavailable, possibly unknown
+    "(:unkn)",  # known to be unknown (e.g., Anonymous, Inconnue)
+    "(:none)",  # never had a value, never will
+    "(:null)",  # explicitly and meaningfully empty
+    "(:tba)",  # to be assigned or announced later
+    "(:etal)",  # too numerous to list (et alia)
 )
 
 # UNKNOWN_MARKERS joins official datacite markers with a generic tokens marking
 # unknown values.
-UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(set((
-    'NA',
-    'NN',
-    'n.a.',
-    '[s.n.]',
-    'Unknown',
-)))
+UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(
+    set(
+        (
+            "NA",
+            "NN",
+            "n.a.",
+            "[s.n.]",
+            "Unknown",
+        )
+    )
+)
 
 # UNKNOWN_MARKERS_LOWER are lowercase version of UNKNOWN blocklist.
 UNKNOWN_MARKERS_LOWER = set((v.lower() for v in UNKNOWN_MARKERS))
@@ -157,8 +161,20 @@ UNKNOWN_MARKERS_LOWER = set((v.lower() for v in UNKNOWN_MARKERS))
 # Any "min" number of "tokens" will signal "spam", https://fatcat.wiki/release/rzcpjwukobd4pj36ipla22cnoi
 DATACITE_TITLE_SPAM_WORDGROUPS = [
     {
-        "tokens": ('full', 'movies', 'movie', 'watch', 'streaming', 'online',
-                   'free', 'hd', 'download', 'english', 'subtitle', 'bluray'),
+        "tokens": (
+            "full",
+            "movies",
+            "movie",
+            "watch",
+            "streaming",
+            "online",
+            "free",
+            "hd",
+            "download",
+            "english",
+            "subtitle",
+            "bluray",
+        ),
         "min": 4,
     }
 ]
@@ -205,28 +221,25 @@ class DataciteImporter(EntityImporter):
     """
     Importer for datacite records.
     """
-    def __init__(self,
-                 api,
-                 issn_map_file,
-                 debug=False,
-                 insert_log_file=None,
-                 **kwargs):
+
+    def __init__(self, api, issn_map_file, debug=False, insert_log_file=None, **kwargs):
 
         eg_desc = kwargs.get(
-            'editgroup_description',
-            "Automated import of Datacite DOI metadata, harvested from REST API"
+            "editgroup_description",
+            "Automated import of Datacite DOI metadata, harvested from REST API",
         )
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent',
-                                         'fatcat_tools.DataciteImporter')
-        super().__init__(api,
-                         issn_map_file=issn_map_file,
-                         editgroup_description=eg_desc,
-                         editgroup_extra=eg_extra,
-                         **kwargs)
-
-        self.create_containers = kwargs.get('create_containers', True)
-        extid_map_file = kwargs.get('extid_map_file')
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.DataciteImporter")
+        super().__init__(
+            api,
+            issn_map_file=issn_map_file,
+            editgroup_description=eg_desc,
+            editgroup_extra=eg_extra,
+            **kwargs
+        )
+
+        self.create_containers = kwargs.get("create_containers", True)
+        extid_map_file = kwargs.get("extid_map_file")
         self.extid_map_db = None
         if extid_map_file:
             db_uri = "file:{}?mode=ro".format(extid_map_file)
@@ -240,30 +253,34 @@ class DataciteImporter(EntityImporter):
         self.insert_log_file = insert_log_file
         self.this_year = datetime.datetime.now().year
 
-        print('datacite with debug={}'.format(self.debug), file=sys.stderr)
+        print("datacite with debug={}".format(self.debug), file=sys.stderr)
 
     def lookup_ext_ids(self, doi):
         """
         Return dictionary of identifiers referring to the same things as the given DOI.
         """
         if self.extid_map_db is None:
-            return dict(core_id=None,
-                        pmid=None,
-                        pmcid=None,
-                        wikidata_qid=None,
-                        arxiv_id=None,
-                        jstor_id=None)
+            return dict(
+                core_id=None,
+                pmid=None,
+                pmcid=None,
+                wikidata_qid=None,
+                arxiv_id=None,
+                jstor_id=None,
+            )
         row = self.extid_map_db.execute(
-            "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1",
-            [doi.lower()]).fetchone()
+            "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1", [doi.lower()]
+        ).fetchone()
         if row is None:
-            return dict(core_id=None,
-                        pmid=None,
-                        pmcid=None,
-                        wikidata_qid=None,
-                        arxiv_id=None,
-                        jstor_id=None)
-        row = [str(cell or '') or None for cell in row]
+            return dict(
+                core_id=None,
+                pmid=None,
+                pmcid=None,
+                wikidata_qid=None,
+                arxiv_id=None,
+                jstor_id=None,
+            )
+        row = [str(cell or "") or None for cell in row]
         return dict(
             core_id=row[0],
             pmid=row[1],
@@ -280,22 +297,22 @@ class DataciteImporter(EntityImporter):
         """
         if not obj or not isinstance(obj, dict):
             return None
-        if 'attributes' not in obj:
+        if "attributes" not in obj:
             return None
 
-        attributes = obj['attributes']
-        doi = clean_doi(attributes.get('doi', '').lower())
+        attributes = obj["attributes"]
+        doi = clean_doi(attributes.get("doi", "").lower())
 
         if not doi:
-            print('skipping record without a DOI', file=sys.stderr)
+            print("skipping record without a DOI", file=sys.stderr)
             return
 
         if not str.isascii(doi):
-            print('[{}] skipping non-ascii doi for now'.format(doi))
+            print("[{}] skipping non-ascii doi for now".format(doi))
             return None
 
-        creators = attributes.get('creators', []) or []
-        contributors = attributes.get('contributors', []) or []  # Much fewer than creators.
+        creators = attributes.get("creators", []) or []
+        contributors = attributes.get("contributors", []) or []  # Much fewer than creators.
 
         contribs = self.parse_datacite_creators(creators, doi=doi)
 
@@ -323,7 +340,9 @@ class DataciteImporter(EntityImporter):
         # Related: https://guide.fatcat.wiki/entity_release.html -- role
         # (string, of a set): the type of contribution, from a controlled
         # vocabulary. TODO: vocabulary needs review.
-        contribs_extra_contributors = self.parse_datacite_creators(contributors, set_index=False, doi=doi)
+        contribs_extra_contributors = self.parse_datacite_creators(
+            contributors, set_index=False, doi=doi
+        )
 
         # Unfortunately, creators and contributors might overlap, refs GH59.
         for cc in contribs_extra_contributors:
@@ -333,17 +352,16 @@ class DataciteImporter(EntityImporter):
 
         # Title, may come with "attributes.titles[].titleType", like
         # "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"
-        titles = attributes.get('titles', []) or []
-        title, original_language_title, subtitle = parse_datacite_titles(
-            titles)
+        titles = attributes.get("titles", []) or []
+        title, original_language_title, subtitle = parse_datacite_titles(titles)
 
         if title is None:
-            print('[{}] skipping record w/o title: {}'.format(doi, obj), file=sys.stderr)
+            print("[{}] skipping record w/o title: {}".format(doi, obj), file=sys.stderr)
             return False
 
         title = clean(title)
         if not title:
-            print('[{}] skipping record w/o title: {}'.format(doi, obj), file=sys.stderr)
+            print("[{}] skipping record w/o title: {}".format(doi, obj), file=sys.stderr)
             return False
 
         # check for blocklisted "spam", e.g. "FULL MOVIE"
@@ -367,10 +385,13 @@ class DataciteImporter(EntityImporter):
         # "Collected", "Copyrighted", "Created", "Issued", "Submitted",
         # "Updated", "Valid".
         release_date, release_month, release_year = parse_datacite_dates(
-            attributes.get('dates', []))
+            attributes.get("dates", [])
+        )
 
         # block bogus far-future years/dates
-        if release_year is not None and (release_year > (self.this_year + 5) or release_year < 1000):
+        if release_year is not None and (
+            release_year > (self.this_year + 5) or release_year < 1000
+        ):
             release_date = None
             release_month = None
             release_year = None
@@ -378,26 +399,30 @@ class DataciteImporter(EntityImporter):
         # Some records do not use the "dates" field (e.g. micropub), but:
         # "attributes.published" or "attributes.publicationYear"
         if not any((release_date, release_month, release_year)):
-            release_date, release_month, release_year = parse_single_date(attributes.get('publicationYear'))
+            release_date, release_month, release_year = parse_single_date(
+                attributes.get("publicationYear")
+            )
             if not any((release_date, release_month, release_year)):
-                release_date, release_month, release_year = parse_single_date(attributes.get('published'))
+                release_date, release_month, release_year = parse_single_date(
+                    attributes.get("published")
+                )
 
         if not any((release_date, release_month, release_year)):
-            print('[{}] record w/o date: {}'.format(doi, obj), file=sys.stderr)
+            print("[{}] record w/o date: {}".format(doi, obj), file=sys.stderr)
 
         # Start with clear stages, e.g. published. TODO(martin): we could
         # probably infer a bit more from the relations, e.g.
         # "IsPreviousVersionOf" or "IsNewVersionOf".
-        release_stage = 'published'
+        release_stage = "published"
 
         # TODO(martin): If 'state' is not 'findable' or 'isActive' is not true,
         # we might want something else than 'published'. See also:
         # https://support.datacite.org/docs/doi-states.
 
         # Publisher. A few NA values. A few bogus values.
-        publisher = attributes.get('publisher')
+        publisher = attributes.get("publisher")
 
-        if publisher in UNKNOWN_MARKERS | set(('Unpublished', 'Unknown')):
+        if publisher in UNKNOWN_MARKERS | set(("Unpublished", "Unknown")):
             publisher = None
             release_stage = None
         if publisher is not None and len(publisher) > 80:
@@ -416,24 +441,26 @@ class DataciteImporter(EntityImporter):
         container_id = None
         container_name = None
 
-        container = attributes.get('container', {}) or {}
-        if container.get('type') in CONTAINER_TYPE_MAP.keys():
-            container_type = CONTAINER_TYPE_MAP.get(container['type'])
-            if container.get('identifier') and container.get(
-                    'identifierType') == 'ISSN':
-                issn = container.get('identifier')
+        container = attributes.get("container", {}) or {}
+        if container.get("type") in CONTAINER_TYPE_MAP.keys():
+            container_type = CONTAINER_TYPE_MAP.get(container["type"])
+            if container.get("identifier") and container.get("identifierType") == "ISSN":
+                issn = container.get("identifier")
                 if len(issn) == 8:
                     issn = issn[:4] + "-" + issn[4:]
                 issnl = self.issn2issnl(issn)
                 if issnl is not None:
                     container_id = self.lookup_issnl(issnl)
 
-                    if container_id is None and container.get('title'):
-                        container_name = container.get('title')
+                    if container_id is None and container.get("title"):
+                        container_name = container.get("title")
                         if isinstance(container_name, list):
                             if len(container_name) > 0:
-                                print('[{}] too many container titles: {}'.format(doi,
-                                    len(container_name)))
+                                print(
+                                    "[{}] too many container titles: {}".format(
+                                        doi, len(container_name)
+                                    )
+                                )
                                 container_name = container_name[0]
                         assert isinstance(container_name, str)
                         ce = fatcat_openapi_client.ContainerEntity(
@@ -447,21 +474,24 @@ class DataciteImporter(EntityImporter):
                 else:
                     # TODO(martin): factor this out into a testable function.
                     # TODO(martin): "container_name": "№1(1) (2018)" / 10.26087/inasan.2018.1.1.013
-                    container_name = container.get('title')
+                    container_name = container.get("title")
                     if isinstance(container_name, list):
                         if len(container_name) > 0:
-                            print('[{}] too many container titles: {}'.format(doi,
-                                len(container_name)))
+                            print(
+                                "[{}] too many container titles: {}".format(
+                                    doi, len(container_name)
+                                )
+                            )
                             container_name = container_name[0]
 
         # Exception: https://www.micropublication.org/, see: !MR24.
         if container_id is None and container_name is None:
-            if publisher and publisher.lower().startswith('micropublication'):
+            if publisher and publisher.lower().startswith("micropublication"):
                 container_name = publisher
 
         # Volume and issue.
-        volume = container.get('volume')
-        issue = container.get('issue')
+        volume = container.get("volume")
+        issue = container.get("issue")
 
         if volume:
             volume = clean(volume)
@@ -472,13 +502,13 @@ class DataciteImporter(EntityImporter):
         # Pages.
         pages = None
 
-        first_page = container.get('firstPage')
-        last_page = container.get('lastPage')
+        first_page = container.get("firstPage")
+        last_page = container.get("lastPage")
 
         if first_page and last_page:
             try:
                 _ = int(first_page) < int(last_page)
-                pages = '{}-{}'.format(first_page, last_page)
+                pages = "{}-{}".format(first_page, last_page)
             except ValueError as err:  # noqa: F841
                 # TODO(martin): This is more debug than info.
                 # print('[{}] {}'.format(doi, err), file=sys.stderr)
@@ -491,8 +521,8 @@ class DataciteImporter(EntityImporter):
         license_slug = None
         license_extra = []
 
-        for lic in attributes.get('rightsList', []):
-            slug = lookup_license_slug(lic.get('rightsUri'))
+        for lic in attributes.get("rightsList", []):
+            slug = lookup_license_slug(lic.get("rightsUri"))
             if slug:
                 license_slug = slug
             license_extra.append(lic)
@@ -506,7 +536,7 @@ class DataciteImporter(EntityImporter):
         # library solves it for you." -- TODO(martin): We need more of these.
         language = None
 
-        value = attributes.get('language', '') or ''
+        value = attributes.get("language", "") or ""
         try:
             language = pycountry.languages.lookup(value).alpha_2
         except (LookupError, AttributeError) as err:  # noqa: F841
@@ -520,22 +550,22 @@ class DataciteImporter(EntityImporter):
         # "Other" fields might contain references or related articles (with
         # DOI). TODO(martin): maybe try to parse out some of those refs.
         abstracts = []
-        descs = attributes.get('descriptions', []) or []
+        descs = attributes.get("descriptions", []) or []
         for desc in descs:
-            if not desc.get('descriptionType') == 'Abstract':
+            if not desc.get("descriptionType") == "Abstract":
                 continue
 
             # Description maybe a string, int or list.
-            text = desc.get('description', '')
+            text = desc.get("description", "")
             if not text:
                 continue
             if isinstance(text, int):
-                text = '{}'.format(text)
+                text = "{}".format(text)
             if isinstance(text, list):
                 try:
                     text = "\n".join(text)
                 except TypeError:
-                    continue # Bail out, if it is not a list of strings.
+                    continue  # Bail out, if it is not a list of strings.
 
             # Limit length.
             if len(text) < 10:
@@ -548,7 +578,10 @@ class DataciteImporter(EntityImporter):
             try:
                 lang = langdetect.detect(text)
             except (langdetect.lang_detect_exception.LangDetectException, TypeError) as err:
-                print('[{}] language detection failed with {} on {}'.format(doi, err, text), file=sys.stderr)
+                print(
+                    "[{}] language detection failed with {} on {}".format(doi, err, text),
+                    file=sys.stderr,
+                )
             abstract_text = clean(text)
             if not abstract_text:
                 continue
@@ -557,7 +590,8 @@ class DataciteImporter(EntityImporter):
                     mimetype="text/plain",
                     content=abstract_text,
                     lang=lang,
-                ))
+                )
+            )
 
         # References and relations. Datacite include many relation types in
         # "attributes.relatedIdentifiers[].relationType", e.g.
@@ -570,67 +604,76 @@ class DataciteImporter(EntityImporter):
         # For the moment, we only care about References.
         refs, ref_index = [], 0
 
-        relIds = attributes.get('relatedIdentifiers', []) or []
+        relIds = attributes.get("relatedIdentifiers", []) or []
         for rel in relIds:
-            if not rel.get('relationType', '') in ('References', 'Cites'):
+            if not rel.get("relationType", "") in ("References", "Cites"):
                 continue
             ref_extra = dict()
-            if rel.get('relatedIdentifierType', '') == 'DOI':
-                ref_extra['doi'] = rel.get('relatedIdentifier')
+            if rel.get("relatedIdentifierType", "") == "DOI":
+                ref_extra["doi"] = rel.get("relatedIdentifier")
             if not ref_extra:
                 ref_extra = None
             refs.append(
                 fatcat_openapi_client.ReleaseRef(
                     index=ref_index,
                     extra=ref_extra,
-                ))
+                )
+            )
             ref_index += 1
 
         # More specific release_type via 'Reviews' relationsship.
         for rel in relIds:
-            if rel.get('relatedIdentifierType', '') != 'Reviews':
+            if rel.get("relatedIdentifierType", "") != "Reviews":
                 continue
-            release_type = 'review'
+            release_type = "review"
 
         # Extra information.
         extra_datacite = dict()
 
         if license_extra:
-            extra_datacite['license'] = license_extra
-        if attributes.get('subjects'):
-            extra_datacite['subjects'] = attributes['subjects']
+            extra_datacite["license"] = license_extra
+        if attributes.get("subjects"):
+            extra_datacite["subjects"] = attributes["subjects"]
 
         # Include version information.
-        metadata_version = attributes.get('metadataVersion') or ''
+        metadata_version = attributes.get("metadataVersion") or ""
 
         if metadata_version:
-            extra_datacite['metadataVersion'] = metadata_version
+            extra_datacite["metadataVersion"] = metadata_version
 
         # Include resource types.
-        types = attributes.get('types', {}) or {}
-        resource_type = types.get('resourceType', '') or ''
-        resource_type_general = types.get('resourceTypeGeneral', '') or ''
+        types = attributes.get("types", {}) or {}
+        resource_type = types.get("resourceType", "") or ""
+        resource_type_general = types.get("resourceTypeGeneral", "") or ""
 
         if resource_type and resource_type.lower() not in UNKNOWN_MARKERS_LOWER:
-            extra_datacite['resourceType'] = resource_type
+            extra_datacite["resourceType"] = resource_type
         if resource_type_general and resource_type_general.lower() not in UNKNOWN_MARKERS_LOWER:
-            extra_datacite['resourceTypeGeneral'] = resource_type_general
+            extra_datacite["resourceTypeGeneral"] = resource_type_general
 
         # Include certain relations from relatedIdentifiers. Keeping the
         # original structure of data here, which is a list of dicts, with
         # relation type, identifier and identifier type (mostly).
         relations = []
         for rel in relIds:
-            if rel.get('relationType') in ('IsPartOf', 'Reviews', 'Continues',
-                                           'IsVariantFormOf', 'IsSupplementTo',
-                                           'HasVersion', 'IsMetadataFor',
-                                           'IsNewVersionOf', 'IsIdenticalTo',
-                                           'IsVersionOf', 'IsDerivedFrom',
-                                           'IsSourceOf'):
+            if rel.get("relationType") in (
+                "IsPartOf",
+                "Reviews",
+                "Continues",
+                "IsVariantFormOf",
+                "IsSupplementTo",
+                "HasVersion",
+                "IsMetadataFor",
+                "IsNewVersionOf",
+                "IsIdenticalTo",
+                "IsVersionOf",
+                "IsDerivedFrom",
+                "IsSourceOf",
+            ):
                 relations.append(rel)
 
         if relations:
-            extra_datacite['relations'] = relations
+            extra_datacite["relations"] = relations
 
         extra = dict()
 
@@ -640,18 +683,18 @@ class DataciteImporter(EntityImporter):
         # Edition", "20191024", "v2.0.0", "v0.9.3", "10149", "2.0", null,
         # "v0.1.1", "3.0", "1.0", "3", "v1.12.2", "20191018", "v0.3.1", "v1.0",
         # "10161", "10010691", "10780", # "Presentación"
-        version = attributes.get('version') or None
+        version = attributes.get("version") or None
 
         # top-level extra keys
         if not container_id and container_name:
-            extra['container_name'] = container_name
+            extra["container_name"] = container_name
 
         # Always include datacite key, even if value is empty (dict).
-        extra['datacite'] = extra_datacite
+        extra["datacite"] = extra_datacite
 
         # Preparation for a schema update.
         if release_month:
-            extra['release_month'] = release_month
+            extra["release_month"] = release_month
 
         extids = self.lookup_ext_ids(doi=doi)
 
@@ -669,12 +712,12 @@ class DataciteImporter(EntityImporter):
             publisher=publisher,
             ext_ids=fatcat_openapi_client.ReleaseExtIds(
                 doi=doi,
-                pmid=extids['pmid'],
-                pmcid=extids['pmcid'],
-                wikidata_qid=extids['wikidata_qid'],
-                core=extids['core_id'],
-                arxiv=extids['arxiv_id'],
-                jstor=extids['jstor_id'],
+                pmid=extids["pmid"],
+                pmcid=extids["pmcid"],
+                wikidata_qid=extids["wikidata_qid"],
+                core=extids["core_id"],
+                arxiv=extids["arxiv_id"],
+                jstor=extids["jstor_id"],
             ),
             contribs=contribs,
             volume=volume,
@@ -702,19 +745,19 @@ class DataciteImporter(EntityImporter):
         """
 
         release_type = None
-        if not attributes.get('types'):
+        if not attributes.get("types"):
             return None
-        types = attributes['types']
+        types = attributes["types"]
 
-        for typeType in ('citeproc', 'ris', 'schemaOrg', 'bibtex', 'resourceTypeGeneral'):
+        for typeType in ("citeproc", "ris", "schemaOrg", "bibtex", "resourceTypeGeneral"):
             value = types.get(typeType)
             release_type = DATACITE_TYPE_MAP.get(typeType, {}).get(value)
             if release_type is not None:
                 break
 
         # special case: figshare "collections" which group other entities
-        if doi.startswith('10.6084/') or doi.startswith('10.25384'):
-            if types.get('resourceType') == "Collection":
+        if doi.startswith("10.6084/") or doi.startswith("10.25384"):
+            if types.get("resourceType") == "Collection":
                 release_type = "stub"
 
         if release_type is None:
@@ -736,35 +779,41 @@ class DataciteImporter(EntityImporter):
         # publishes highly interesting datasets, but titles are mostly the same
         # ("GBIF Occurrence Download" or "Occurrence Download"); set
         # release_type to "stub" (CSL/FC).
-        if re.title == 'GBIF Occurrence Download' and re.ext_ids.doi.startswith('10.15468/dl.'):
-            re.release_type = 'stub'
+        if re.title == "GBIF Occurrence Download" and re.ext_ids.doi.startswith("10.15468/dl."):
+            re.release_type = "stub"
 
         # release_type exception: lots of "Experimental Crystal Structure Determination"
         # publisher: "Cambridge Crystallographic Data Centre"
-        if re.ext_ids.doi.startswith('10.5517/'):
-            re.release_type = 'entry'
+        if re.ext_ids.doi.startswith("10.5517/"):
+            re.release_type = "entry"
 
         # Supplement files, e.g. "Additional file 1: ASE constructs in questionnaire."
-        if re.title.lower().startswith('additional file') and re.release_type in ('article', 'article-journal'):
-            re.release_type = 'component'
+        if re.title.lower().startswith("additional file") and re.release_type in (
+            "article",
+            "article-journal",
+        ):
+            re.release_type = "component"
 
         # figshare
-        if re.ext_ids.doi.startswith('10.6084/') or re.ext_ids.doi.startswith('10.25384'):
+        if re.ext_ids.doi.startswith("10.6084/") or re.ext_ids.doi.startswith("10.25384"):
             # set version if DOI ends with versioned suffix
-            doi_suffix = re.ext_ids.doi.split('.')[-1]
-            if doi_suffix and doi_suffix.startswith('v') and doi_suffix[1:].isdigit():
+            doi_suffix = re.ext_ids.doi.split(".")[-1]
+            if doi_suffix and doi_suffix.startswith("v") and doi_suffix[1:].isdigit():
                 re.version = doi_suffix
             # "Figure 123 from " -> component
             # "Table S1. ;Figure S1;Figure S2. ;Figure S3. ;Figure S4. from Use of organic exudates from two polar diatoms by bacterial isolates from the Arctic ocean"
-            if " from " in re.title and re.release_type not in ('stub', 'graphic'):
+            if " from " in re.title and re.release_type not in ("stub", "graphic"):
                 if re.title.startswith("Figure "):
                     re.release_type = "component"
                 elif re.title.startswith("Table "):
                     re.release_type = "component"
 
         # figshare.com
-        if re.ext_ids.doi.startswith('10.6084/m9.figshare.') and re.extra.get('container_name') is None:
-            re.extra['container_name'] = "figshare.com"
+        if (
+            re.ext_ids.doi.startswith("10.6084/m9.figshare.")
+            and re.extra.get("container_name") is None
+        ):
+            re.extra["container_name"] = "figshare.com"
 
         return re
 
@@ -788,26 +837,28 @@ class DataciteImporter(EntityImporter):
         # eventually we'll want to support "updates", but for now just skip if
         # entity already exists
         if existing:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         return True
 
     def insert_batch(self, batch):
-        print('inserting batch ({})'.format(len(batch)), file=sys.stderr)
+        print("inserting batch ({})".format(len(batch)), file=sys.stderr)
         if self.insert_log_file:
-            with open(self.insert_log_file, 'a') as f:
+            with open(self.insert_log_file, "a") as f:
                 for doc in batch:
                     json.dump(entity_to_dict(doc, api_client=None), f)
-                    f.write('\n')
+                    f.write("\n")
         self.api.create_release_auto_batch(
             fatcat_openapi_client.ReleaseAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
-                    description=self.editgroup_description,
-                    extra=self.editgroup_extra),
-                entity_list=batch))
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
 
-    def parse_datacite_creators(self, creators, role='author', set_index=True, doi=None):
+    def parse_datacite_creators(self, creators, role="author", set_index=True, doi=None):
         """
         Parses a list of creators into a list of ReleaseContrib objects. Set
         set_index to False, if the index contrib field should be left blank.
@@ -820,48 +871,53 @@ class DataciteImporter(EntityImporter):
         contribs = []
 
         # Names, that should be ignored right away.
-        name_blocklist = set(('Occdownload Gbif.Org',))
+        name_blocklist = set(("Occdownload Gbif.Org",))
 
         i = 0
         for c in creators:
             if not set_index:
                 i = None
-            nameType = c.get('nameType', '') or ''
-            if nameType in ('', 'Personal'):
+            nameType = c.get("nameType", "") or ""
+            if nameType in ("", "Personal"):
                 creator_id = None
-                for nid in c.get('nameIdentifiers', []) or []:
+                for nid in c.get("nameIdentifiers", []) or []:
                     if not isinstance(nid, dict):
                         # see: fatcat-workers/issues/44035/
-                        print('unexpected nameIdentifiers, expected list of dicts, got: {}'.format(nid), file=sys.stderr)
+                        print(
+                            "unexpected nameIdentifiers, expected list of dicts, got: {}".format(
+                                nid
+                            ),
+                            file=sys.stderr,
+                        )
                         continue
-                    name_scheme = nid.get('nameIdentifierScheme', '') or ''
+                    name_scheme = nid.get("nameIdentifierScheme", "") or ""
                     if not name_scheme.lower() == "orcid":
                         continue
-                    orcid = nid.get('nameIdentifier') or ''
-                    orcid = orcid.replace('https://orcid.org/', '')
+                    orcid = nid.get("nameIdentifier") or ""
+                    orcid = orcid.replace("https://orcid.org/", "")
                     if not orcid:
                         continue
                     creator_id = self.lookup_orcid(orcid)
                     # TODO(martin): If creator_id is None, should we create creators?
 
                 # If there are multiple affiliation strings, use the first one.
-                affiliations = c.get('affiliation', []) or []
+                affiliations = c.get("affiliation", []) or []
                 raw_affiliation = None
                 if len(affiliations) == 0:
                     raw_affiliation = None
                 else:
                     raw_affiliation = clean(affiliations[0])
 
-                name = c.get('name')
-                given_name = c.get('givenName')
-                surname = c.get('familyName')
+                name = c.get("name")
+                given_name = c.get("givenName")
+                surname = c.get("familyName")
 
                 if name:
                     name = clean(name)
                 if not any((name, given_name, surname)):
                     continue
                 if not name:
-                    name = "{} {}".format(given_name or '', surname or '').strip()
+                    name = "{} {}".format(given_name or "", surname or "").strip()
                 if name in name_blocklist:
                     continue
                 if name.lower() in UNKNOWN_MARKERS_LOWER:
@@ -881,7 +937,7 @@ class DataciteImporter(EntityImporter):
                 if not name:
                     continue
 
-                if raw_affiliation == '':
+                if raw_affiliation == "":
                     continue
 
                 extra = None
@@ -891,39 +947,38 @@ class DataciteImporter(EntityImporter):
                 # "RelatedPerson", "ProjectLeader", "Editor", "Other",
                 # "ProjectMember", "Funder", "RightsHolder", "DataCollector",
                 # "Supervisor", "Producer", "HostingInstitution", "ResearchGroup"
-                contributorType = c.get('contributorType', '') or ''
+                contributorType = c.get("contributorType", "") or ""
 
                 if contributorType:
-                    extra = {'type': contributorType}
+                    extra = {"type": contributorType}
 
                 rc = fatcat_openapi_client.ReleaseContrib(
-                        creator_id=creator_id,
-                        index=i,
-                        raw_name=name,
-                        given_name=given_name,
-                        surname=surname,
-                        role=role,
-                        raw_affiliation=raw_affiliation,
-                        extra=extra,
-                    )
+                    creator_id=creator_id,
+                    index=i,
+                    raw_name=name,
+                    given_name=given_name,
+                    surname=surname,
+                    role=role,
+                    raw_affiliation=raw_affiliation,
+                    extra=extra,
+                )
                 # Filter out duplicates early.
                 if not contributor_list_contains_contributor(contribs, rc):
                     contribs.append(rc)
                     if i is not None:
                         i += 1
-            elif nameType == 'Organizational':
-                name = c.get('name', '') or ''
+            elif nameType == "Organizational":
+                name = c.get("name", "") or ""
                 if name in UNKNOWN_MARKERS:
                     continue
                 if len(name) < 3:
                     continue
-                extra = {'organization': name}
-                contribs.append(fatcat_openapi_client.ReleaseContrib(
-                    index=i, extra=extra))
+                extra = {"organization": name}
+                contribs.append(fatcat_openapi_client.ReleaseContrib(index=i, extra=extra))
                 if i is not None:
                     i += 1
             else:
-                print('[{}] unknown name type: {}'.format(doi, nameType), file=sys.stderr)
+                print("[{}] unknown name type: {}".format(doi, nameType), file=sys.stderr)
 
         return contribs
 
@@ -935,8 +990,8 @@ def contributor_list_contains_contributor(contributor_list, contributor):
     for cc in contributor_list:
         if cc.raw_name != contributor.raw_name:
             continue
-        cc_role = cc.role or 'author'
-        contributor_role = contributor.role or 'author'
+        cc_role = cc.role or "author"
+        contributor_role = contributor.role or "author"
         if cc_role != contributor_role:
             continue
         return True
@@ -952,91 +1007,97 @@ def lookup_license_slug(raw):
     if not raw:
         return None
 
-    if 'creativecommons.org/publicdomain/zero' in raw:
-        return 'CC-0'
-    if raw.lower().endswith('/cc0'):
-        return 'CC-0'
+    if "creativecommons.org/publicdomain/zero" in raw:
+        return "CC-0"
+    if raw.lower().endswith("/cc0"):
+        return "CC-0"
 
-    if 'creativecommons' in raw:
+    if "creativecommons" in raw:
         # https://creativecommons.org/publicdomain/mark/1.0/deed.de
-        if 'creativecommons.org/publicdomain' in raw:
-            return 'CC-PUBLICDOMAIN'
-        if 'creativecommons.org/share-your-work/public-domain/cc0' in raw:
-            return 'CC-0'
+        if "creativecommons.org/publicdomain" in raw:
+            return "CC-PUBLICDOMAIN"
+        if "creativecommons.org/share-your-work/public-domain/cc0" in raw:
+            return "CC-0"
         # https://creativecommons.org/licenses/by/4.0/deed.es_ES
         raw = raw.lower()
-        match = re.search(r'creativecommons.org/licen[sc]es/(?P<name>[a-z-]+)', raw, re.IGNORECASE)
+        match = re.search(
+            r"creativecommons.org/licen[sc]es/(?P<name>[a-z-]+)", raw, re.IGNORECASE
+        )
         if not match:
-            print('missed potential license: {}'.format(raw), file=sys.stderr)
+            print("missed potential license: {}".format(raw), file=sys.stderr)
             return None
-        name = match.groupdict().get('name')
+        name = match.groupdict().get("name")
         if not name:
             return None
-        if not name.startswith('cc'):
-            name = 'cc-{}'.format(name)
+        if not name.startswith("cc"):
+            name = "cc-{}".format(name)
         return name.upper()
 
-    if 'opensource.org' in raw:
+    if "opensource.org" in raw:
         # https://opensource.org/licenses/alphabetical, e.g. opensource.org/licenses/EUPL-1.2
-        match = re.search(r'opensource.org/licenses/(?P<name>[^/]+)', raw, re.IGNORECASE)
+        match = re.search(r"opensource.org/licenses/(?P<name>[^/]+)", raw, re.IGNORECASE)
         if not match:
-            print('missed potential license: {}'.format(raw), file=sys.stderr)
+            print("missed potential license: {}".format(raw), file=sys.stderr)
             return None
-        name = match.groupdict().get('name')
+        name = match.groupdict().get("name")
         if not name:
             return None
         if len(name) > 11:
             return None
         return name.upper()
 
-    if 'gnu.org' in raw:
+    if "gnu.org" in raw:
         # http://www.gnu.org/copyleft/gpl, https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html
-        match = re.search(r'/(?P<name>fdl(-[0-9.]*[0-9]+)?|gpl(-[0-9.]*[0-9]+)?|lgpl(-[0-9.]*[0-9]+)|aglp(-[0-9.]*[0-9]+)?)', raw, re.IGNORECASE)
+        match = re.search(
+            r"/(?P<name>fdl(-[0-9.]*[0-9]+)?|gpl(-[0-9.]*[0-9]+)?|lgpl(-[0-9.]*[0-9]+)|aglp(-[0-9.]*[0-9]+)?)",
+            raw,
+            re.IGNORECASE,
+        )
         if not match:
-            print('missed potential license: {}'.format(raw), file=sys.stderr)
+            print("missed potential license: {}".format(raw), file=sys.stderr)
             return None
-        name = match.groupdict().get('name')
+        name = match.groupdict().get("name")
         if not name:
             return None
         if len(name) > 8:
             return None
         return name.upper()
 
-    if 'spdx.org' in raw:
-        if 'spdx.org/licenses/CC0' in raw:
-            return 'CC-0'
+    if "spdx.org" in raw:
+        if "spdx.org/licenses/CC0" in raw:
+            return "CC-0"
         # https://spdx.org/licenses/CC-BY-NC-ND-4.0.html
-        match = re.search(r'spdx.org/licenses/(?P<name>[a-z0-9-]+)', raw, re.IGNORECASE)
+        match = re.search(r"spdx.org/licenses/(?P<name>[a-z0-9-]+)", raw, re.IGNORECASE)
         if not match:
-            print('missed potential license: {}'.format(raw), file=sys.stderr)
+            print("missed potential license: {}".format(raw), file=sys.stderr)
             return None
-        name = match.groupdict().get('name')
+        name = match.groupdict().get("name")
         if not name:
             return None
         if len(name) > 36:
             return None
         # cleanup version and extensions
-        name = re.sub('(-[0-9])?[.]?[0-9]?(.json|.html)?', '', name.lower())
+        name = re.sub("(-[0-9])?[.]?[0-9]?(.json|.html)?", "", name.lower())
         return name.upper()
 
-    if 'rightsstatements.org' in raw:
+    if "rightsstatements.org" in raw:
         # http://rightsstatements.org/vocab/InC/1.0/
-        match = re.search(r'rightsstatements.org/(vocab|page)/(?P<name>[^/]*)', raw)
+        match = re.search(r"rightsstatements.org/(vocab|page)/(?P<name>[^/]*)", raw)
         if not match:
-            print('missed potential license: {}'.format(raw), file=sys.stderr)
+            print("missed potential license: {}".format(raw), file=sys.stderr)
             return None
-        name = match.groupdict().get('name')
+        name = match.groupdict().get("name")
         if not name:
             return None
         if len(name) > 9:
             return None
-        return 'RS-{}'.format(name.upper())
+        return "RS-{}".format(name.upper())
 
     # Fallback to mapped values.
     raw = raw.lower()
-    raw = raw.strip().replace('http://', '//').replace('https://', '//')
-    if not raw.endswith('/'):
-        raw = raw + '/'
+    raw = raw.strip().replace("http://", "//").replace("https://", "//")
+    if not raw.endswith("/"):
+        raw = raw + "/"
     return LICENSE_SLUG_MAP.get(raw)
 
 
@@ -1046,23 +1107,21 @@ def find_original_language_title(item, min_length=4, max_questionmarks=3):
 
     Example input: {'title': 'Some title', 'original_language_title': 'Some title'}
     """
-    if 'original_language_title' not in item:
+    if "original_language_title" not in item:
         return None
-    title = item.get('title')
+    title = item.get("title")
     if not title:
         return None
-    original_language_title = item.get('original_language_title')
-    if isinstance(original_language_title,
-                  str) and title != original_language_title:
+    original_language_title = item.get("original_language_title")
+    if isinstance(original_language_title, str) and title != original_language_title:
         if len(original_language_title) < min_length:
             return None
-        if original_language_title.count('?') > max_questionmarks:
+        if original_language_title.count("?") > max_questionmarks:
             return None
         return original_language_title
     if isinstance(original_language_title, dict):
-        content = original_language_title.get('__content__', '') or ''
-        if content and content != title and not content.count(
-                '?') > max_questionmarks:
+        content = original_language_title.get("__content__", "") or ""
+        if content and content != title and not content.count("?") > max_questionmarks:
             return content
     return None
 
@@ -1082,23 +1141,23 @@ def parse_datacite_titles(titles):
         return title, original_language_title, subtitle
     elif len(titles) == 1:
         original_language_title = find_original_language_title(titles[0])
-        title = titles[0].get('title', '') or ''
+        title = titles[0].get("title", "") or ""
         title = title.strip()
         if not title:
             title = None
         return title, original_language_title, subtitle
     else:
         for entry in titles:
-            if not title and ('titleType' not in entry
-                              or not entry.get('titleType')):
-                title = (entry.get('title') or '').strip()
-            if not subtitle and entry.get('titleType') == 'Subtitle':
-                subtitle = entry.get('title', '').strip()
+            if not title and ("titleType" not in entry or not entry.get("titleType")):
+                title = (entry.get("title") or "").strip()
+            if not subtitle and entry.get("titleType") == "Subtitle":
+                subtitle = entry.get("title", "").strip()
             if not original_language_title:
                 original_language_title = find_original_language_title(entry)
 
     return title, original_language_title, subtitle
 
+
 def parse_single_date(value):
     """
     Given a single string containing a date in arbitrary format, try to return
@@ -1113,11 +1172,11 @@ def parse_single_date(value):
         # Results in a dict with keys: date_obj, period, locale.
         parse_result = parser.get_date_data(value)
         # A datetime object, later we need a date, only.
-        result = parse_result['date_obj']
+        result = parse_result["date_obj"]
         if result is not None:
-            if parse_result['period'] == 'year':
+            if parse_result["period"] == "year":
                 return None, None, result.year
-            elif parse_result['period'] == 'month':
+            elif parse_result["period"] == "month":
                 return None, result.month, result.year
             else:
                 return result.date(), result.month, result.year
@@ -1126,6 +1185,7 @@ def parse_single_date(value):
 
     return None, None, None
 
+
 def parse_datacite_dates(dates):
     """
     Given a list of date fields (under .dates), return tuple, (release_date,
@@ -1137,37 +1197,37 @@ def parse_datacite_dates(dates):
         return release_date, release_month, release_year
 
     if not isinstance(dates, list):
-        raise ValueError('expected a list of date items')
+        raise ValueError("expected a list of date items")
 
     # Observed values: "Available", "Submitted", "Valid", "Issued", "Accepted",
     # "Collected", "Updated", "Copyrighted", "Created"
     # Ignored for now: "Collected", "Issued"
     date_type_prio = (
-        'Valid',
-        'Available',
-        'Accepted',
-        'Submitted',
-        'Copyrighted',
-        'Created',
-        'Updated',
+        "Valid",
+        "Available",
+        "Accepted",
+        "Submitted",
+        "Copyrighted",
+        "Created",
+        "Updated",
     )
 
     # We need to note the granularity, since a string like "2019" would be
     # parsed into "2019-01-01", even though the month is unknown. Use 3
     # granularity types: 'y', 'm', 'd'.
-    Pattern = collections.namedtuple('Pattern', 'layout granularity')
+    Pattern = collections.namedtuple("Pattern", "layout granularity")
 
     # Before using (expensive) dateparser, try a few common patterns.
     common_patterns = (
-        Pattern('%Y-%m-%d', 'd'),
-        Pattern('%Y-%m', 'm'),
-        Pattern('%Y-%m-%dT%H:%M:%SZ', 'd'),
-        Pattern('%Y-%m-%dT%H:%M:%S', 'd'),
-        Pattern('%Y', 'y'),
+        Pattern("%Y-%m-%d", "d"),
+        Pattern("%Y-%m", "m"),
+        Pattern("%Y-%m-%dT%H:%M:%SZ", "d"),
+        Pattern("%Y-%m-%dT%H:%M:%S", "d"),
+        Pattern("%Y", "y"),
     )
 
     def parse_item(item):
-        result, value, year_only = None, str(item.get('date', '')) or '', False
+        result, value, year_only = None, str(item.get("date", "")) or "", False
         release_date, release_month, release_year = None, None, None
 
         for layout, granularity in common_patterns:
@@ -1176,22 +1236,22 @@ def parse_datacite_dates(dates):
             except ValueError:
                 continue
             else:
-                if granularity == 'y':
+                if granularity == "y":
                     year_only = True
                 break
 
         if result is None:
-            print('fallback for {}'.format(value), file=sys.stderr)
+            print("fallback for {}".format(value), file=sys.stderr)
             release_date, release_month, release_year = parse_single_date(value)
 
         if result is None:
             # Unparsable date.
             return release_date, release_month, release_year
 
-        if granularity != 'y':
+        if granularity != "y":
             release_date = result.date()
         release_year = result.year
-        if granularity in ('m', 'd'):
+        if granularity in ("m", "d"):
             release_month = result.month
 
         return release_date, release_month, release_year
@@ -1200,7 +1260,7 @@ def parse_datacite_dates(dates):
 
     for prio in date_type_prio:
         for item in dates:
-            if not item.get('dateType') == prio:
+            if not item.get("dateType") == prio:
                 continue
 
             release_date, release_month, release_year = parse_item(item)
@@ -1224,45 +1284,49 @@ def parse_datacite_dates(dates):
 
     return release_date, release_month, release_year
 
+
 def index_form_to_display_name(s):
     """
     Try to convert an index form name, like 'Razis, Panos A' into display_name,
     e.g. 'Panos A Razis'.
     """
-    if ',' not in s:
+    if "," not in s:
         return s
-    skip_on_chars = ['(', ')', '*']
+    skip_on_chars = ["(", ")", "*"]
     for char in skip_on_chars:
         if char in s:
             return s
-    if s.count(',') > 1:
+    if s.count(",") > 1:
         # "Dr. Hina, Dr. Muhammad Usman Shahid, Dr. Muhammad Zeeshan Khan"
         return s
 
     # Not names, but sprinkled in fields where authors live.
-    stopwords = [s.lower() for s in (
-        'Archive',
-        'Collection',
-        'Coordinator',
-        'Department',
-        'Germany',
-        'International',
-        'National',
-        'Netherlands',
-        'Office',
-        'Organisation',
-        'Organization',
-        'Service',
-        'Services',
-        'United States',
-        'University',
-        'Verein',
-        'Volkshochschule',
-    )]
+    stopwords = [
+        s.lower()
+        for s in (
+            "Archive",
+            "Collection",
+            "Coordinator",
+            "Department",
+            "Germany",
+            "International",
+            "National",
+            "Netherlands",
+            "Office",
+            "Organisation",
+            "Organization",
+            "Service",
+            "Services",
+            "United States",
+            "University",
+            "Verein",
+            "Volkshochschule",
+        )
+    ]
     lower = s.lower()
     for stop in stopwords:
         if stop in lower:
             return s
 
-    a, b = s.split(',')
-    return '{} {}'.format(b.strip(), a.strip())
+    a, b = s.split(",")
+    return "{} {}".format(b.strip(), a.strip())
diff --git a/python/fatcat_tools/importers/dblp_container.py b/python/fatcat_tools/importers/dblp_container.py
index 3d280fb7..603a6271 100644
--- a/python/fatcat_tools/importers/dblp_container.py
+++ b/python/fatcat_tools/importers/dblp_container.py
@@ -1,4 +1,3 @@
-
 """
 Importer for DBLP container-level (journal/conference/series) metadata,
 pre-scraped in to JSON from HTML pages.
@@ -13,17 +12,17 @@ from fatcat_tools.normal import clean_str
 
 
 class DblpContainerImporter(EntityImporter):
+    def __init__(
+        self, api, issn_map_file, dblp_container_map_file, dblp_container_map_output, **kwargs
+    ):
 
-    def __init__(self, api, issn_map_file, dblp_container_map_file, dblp_container_map_output, **kwargs):
-
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of container-level metadata scraped from dblp HTML")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.DblpContainerImporter')
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = kwargs.get(
+            "editgroup_description",
+            "Automated import of container-level metadata scraped from dblp HTML",
+        )
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.DblpContainerImporter")
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
         self.dblp_container_map_output = dblp_container_map_output
         self.read_dblp_container_map_file(dblp_container_map_file)
@@ -40,7 +39,10 @@ class DblpContainerImporter(EntityImporter):
             assert len(container_id) == 26
             self._dblp_container_map[prefix] = container_id
             print("\t".join([prefix, container_id]), file=self.dblp_container_map_output)
-        print("Got {} existing dblp container mappings.".format(len(self._dblp_container_map)), file=sys.stderr)
+        print(
+            "Got {} existing dblp container mappings.".format(len(self._dblp_container_map)),
+            file=sys.stderr,
+        )
 
     def lookup_dblp_prefix(self, prefix):
         if not prefix:
@@ -57,48 +59,48 @@ class DblpContainerImporter(EntityImporter):
         returns a ContainerEntity (or None if invalid or couldn't parse)
         """
 
-        dblp_prefix = row.get('key') or row.get('dblp_prefix')
+        dblp_prefix = row.get("key") or row.get("dblp_prefix")
         assert dblp_prefix
-        assert row['title']
+        assert row["title"]
 
         container_type = None
-        if dblp_prefix.startswith('conf/'):
+        if dblp_prefix.startswith("conf/"):
             container_type = "conference-series"
-        elif dblp_prefix.startswith('journals/'):
+        elif dblp_prefix.startswith("journals/"):
             container_type = "journal"
-        elif dblp_prefix.startswith('series/'):
+        elif dblp_prefix.startswith("series/"):
             container_type = "book-series"
 
         issnl = None
-        for issn in row.get('issns', []):
+        for issn in row.get("issns", []):
             issnl = self.issn2issnl(issn)
             if issnl:
                 break
 
         extra = {
-            'dblp': {
-                'prefix': dblp_prefix,
+            "dblp": {
+                "prefix": dblp_prefix,
             },
         }
 
-        if row.get('homepage_url'):
-            extra['urls'] = [row['homepage_url']]
+        if row.get("homepage_url"):
+            extra["urls"] = [row["homepage_url"]]
 
-        if row.get('acronym'):
-            extra['acronym'] = row['acronym']
+        if row.get("acronym"):
+            extra["acronym"] = row["acronym"]
 
         ce = fatcat_openapi_client.ContainerEntity(
-            name=clean_str(row['title']),
+            name=clean_str(row["title"]),
             container_type=container_type,
             issnl=issnl,
-            wikidata_qid=row.get('wikidata_qid'),
+            wikidata_qid=row.get("wikidata_qid"),
             extra=extra,
         )
         return ce
 
     def try_update(self, ce):
 
-        dblp_prefix = ce.extra['dblp']['prefix']
+        dblp_prefix = ce.extra["dblp"]["prefix"]
         existing = None
         existing_container_id = self.lookup_dblp_prefix(dblp_prefix)
         if existing_container_id:
@@ -123,8 +125,11 @@ class DblpContainerImporter(EntityImporter):
             return True
 
         if existing:
-            self.counts['exists'] += 1
-            print("\t".join([ce.extra['dblp']['prefix'], existing.ident]), file=self.dblp_container_map_output)
+            self.counts["exists"] += 1
+            print(
+                "\t".join([ce.extra["dblp"]["prefix"], existing.ident]),
+                file=self.dblp_container_map_output,
+            )
             return False
 
         # shouldn't get here
@@ -135,11 +140,17 @@ class DblpContainerImporter(EntityImporter):
         Because we want to print a prefix/container_id match for each row, we
         require a special batch insert method
         """
-        eg = self.api.create_container_auto_batch(fatcat_openapi_client.ContainerAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        eg = self.api.create_container_auto_batch(
+            fatcat_openapi_client.ContainerAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
         for c_edit in eg.edits.containers:
             c = self.api.get_container(c_edit.ident)
-            print("\t".join([c.extra['dblp']['prefix'], c.ident]), file=self.dblp_container_map_output)
+            print(
+                "\t".join([c.extra["dblp"]["prefix"], c.ident]),
+                file=self.dblp_container_map_output,
+            )
diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py
index 6d028f2f..5baa6cd6 100644
--- a/python/fatcat_tools/importers/dblp_release.py
+++ b/python/fatcat_tools/importers/dblp_release.py
@@ -1,4 +1,3 @@
-
 """
 Importer for DBLP release-level (article/paper/etc) XML metadata.
 
@@ -44,25 +43,16 @@ from fatcat_tools.transforms import entity_to_dict
 
 
 class DblpReleaseImporter(EntityImporter):
-
-    def __init__(self,
-                 api,
-                 dblp_container_map_file=None,
-                 **kwargs):
+    def __init__(self, api, dblp_container_map_file=None, **kwargs):
 
         eg_desc = kwargs.get(
-            'editgroup_description',
-            "Automated import of dblp metadata via XML records"
+            "editgroup_description", "Automated import of dblp metadata via XML records"
         )
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent',
-                                         'fatcat_tools.DblpReleaseImporter')
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.DblpReleaseImporter")
         # ensure default is to not do updates with this worker (override super() default)
-        kwargs['do_updates'] = kwargs.get("do_updates", False)
-        super().__init__(api,
-                         editgroup_description=eg_desc,
-                         editgroup_extra=eg_extra,
-                         **kwargs)
+        kwargs["do_updates"] = kwargs.get("do_updates", False)
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
         self.dump_json_mode = kwargs.get("dump_json_mode", False)
         self.this_year = datetime.datetime.now().year
@@ -76,13 +66,16 @@ class DblpReleaseImporter(EntityImporter):
         "phdthesis",
         "mastersthesis",
         "www",
-        #"data",  # no instances in 2020-11 dump
+        # "data",  # no instances in 2020-11 dump
     ]
 
     def read_dblp_container_map_file(self, dblp_container_map_file) -> None:
         self._dblp_container_map = dict()
         if not dblp_container_map_file:
-            print("Not loading a dblp prefix container map file; entities will fail to import", file=sys.stderr)
+            print(
+                "Not loading a dblp prefix container map file; entities will fail to import",
+                file=sys.stderr,
+            )
             return
         print("Loading dblp prefix container map file...", file=sys.stderr)
         for line in dblp_container_map_file:
@@ -92,7 +85,10 @@ class DblpReleaseImporter(EntityImporter):
             container_id = container_id.strip()
             assert len(container_id) == 26
             self._dblp_container_map[prefix] = container_id
-        print("Got {} dblp container mappings.".format(len(self._dblp_container_map)), file=sys.stderr)
+        print(
+            "Got {} dblp container mappings.".format(len(self._dblp_container_map)),
+            file=sys.stderr,
+        )
 
     def lookup_dblp_prefix(self, prefix):
         if not prefix:
@@ -101,13 +97,13 @@ class DblpReleaseImporter(EntityImporter):
 
     def want(self, xml_elem):
         if xml_elem.name not in self.ELEMENT_TYPES:
-            self.counts['skip-type'] += 1
+            self.counts["skip-type"] += 1
             return False
-        if not xml_elem.get('key'):
-            self.counts['skip-no-key'] += 1
+        if not xml_elem.get("key"):
+            self.counts["skip-no-key"] += 1
             return False
-        if xml_elem['key'].startswith('homepage/'):
-            self.counts['skip-type-homepage'] += 1
+        if xml_elem["key"].startswith("homepage/"):
+            self.counts["skip-type-homepage"] += 1
             return False
         return True
 
@@ -127,88 +123,88 @@ class DblpReleaseImporter(EntityImporter):
         - isbn
         """
 
-        dblp_key = xml_elem.get('key')
+        dblp_key = xml_elem.get("key")
         if not dblp_key:
-            self.counts['skip-empty-key'] += 1
+            self.counts["skip-empty-key"] += 1
             return False
-        dblp_key_type = dblp_key.split('/')[0]
+        dblp_key_type = dblp_key.split("/")[0]
 
         # dblp_prefix may be used for container lookup
         dblp_prefix = None
-        if dblp_key_type in ('journals', 'conf'):
-            dblp_prefix = '/'.join(dblp_key.split('/')[:2])
-        elif dblp_key_type in ('series', 'reference', 'tr', 'books'):
-            dblp_prefix = '/'.join(dblp_key.split('/')[:-1])
+        if dblp_key_type in ("journals", "conf"):
+            dblp_prefix = "/".join(dblp_key.split("/")[:2])
+        elif dblp_key_type in ("series", "reference", "tr", "books"):
+            dblp_prefix = "/".join(dblp_key.split("/")[:-1])
 
-        publtype = xml_elem.get('publtype') or None
+        publtype = xml_elem.get("publtype") or None
 
         dblp_type = xml_elem.name
         if dblp_type not in self.ELEMENT_TYPES:
-            self.counts[f'skip-dblp-type:{dblp_type}'] += 1
+            self.counts[f"skip-dblp-type:{dblp_type}"] += 1
 
-        if dblp_key_type in ('homepages', 'persons', 'dblpnote'):
-            self.counts['skip-key-type'] += 1
+        if dblp_key_type in ("homepages", "persons", "dblpnote"):
+            self.counts["skip-key-type"] += 1
             return False
 
-        if dblp_key.startswith('journals/corr/'):
-            self.counts['skip-arxiv-corr'] += 1
+        if dblp_key.startswith("journals/corr/"):
+            self.counts["skip-arxiv-corr"] += 1
             return False
 
         title = clean_str(" ".join(xml_elem.title.stripped_strings), force_xml=True)
         if not title:
-            self.counts['skip-title'] += 1
+            self.counts["skip-title"] += 1
             return False
-        if title.endswith('.'):
+        if title.endswith("."):
             title = title[:-1]
 
         release_type = None
-        release_stage = 'published'
+        release_stage = "published"
         withdrawn_status = None
 
         # primary releae_type detection: type of XML element, then prefix of key for granularity
-        if dblp_type == 'article':
-            release_type = 'article'
-            if dblp_key_type == 'journals' and publtype != 'informal':
-                release_type = 'article-journal'
-            elif dblp_key_type == 'tr':
-                release_type = 'report'
+        if dblp_type == "article":
+            release_type = "article"
+            if dblp_key_type == "journals" and publtype != "informal":
+                release_type = "article-journal"
+            elif dblp_key_type == "tr":
+                release_type = "report"
             elif title.startswith("Review:"):
-                release_type = 'review'
-        elif dblp_type == 'inproceedings':
-            release_type = 'paper-conference'
-        elif dblp_type == 'book':
-            release_type = 'book'
-        elif dblp_type == 'incollection':
+                release_type = "review"
+        elif dblp_type == "inproceedings":
+            release_type = "paper-conference"
+        elif dblp_type == "book":
+            release_type = "book"
+        elif dblp_type == "incollection":
             # XXX: part vs. chapter?
-            release_type = 'chapter'
-        elif dblp_type == 'data':
-            release_type = 'dataset'
-        elif dblp_type in ('mastersthesis', 'phdthesis'):
-            release_type = 'thesis'
+            release_type = "chapter"
+        elif dblp_type == "data":
+            release_type = "dataset"
+        elif dblp_type in ("mastersthesis", "phdthesis"):
+            release_type = "thesis"
 
         # overrides/extensions of the above
-        if publtype == 'informal':
+        if publtype == "informal":
             # for conferences, seems to indicate peer-review status
             # for journals, seems to indicate things like book reviews; split out above
             pass
-        elif publtype == 'encyclopedia':
-            release_type = 'entry-encyclopedia'
-        elif publtype == 'edited':
+        elif publtype == "encyclopedia":
+            release_type = "entry-encyclopedia"
+        elif publtype == "edited":
             # XXX: article?
-            release_type = 'editorial'
-        elif publtype == 'data':
-            release_type = 'dataset'
-        elif publtype == 'data':
-            release_type = 'dataset'
-        elif publtype == 'software':
-            release_type = 'software'
-        elif publtype == 'widthdrawn':
-            withdrawn_status = 'widthdrawn'
-        elif publtype == 'survey':
+            release_type = "editorial"
+        elif publtype == "data":
+            release_type = "dataset"
+        elif publtype == "data":
+            release_type = "dataset"
+        elif publtype == "software":
+            release_type = "software"
+        elif publtype == "widthdrawn":
+            withdrawn_status = "widthdrawn"
+        elif publtype == "survey":
             # XXX: flag as a review/survey article?
             pass
 
-        #print((release_type, dblp_type, dblp_key_type, publtype), file=sys.stderr)
+        # print((release_type, dblp_type, dblp_key_type, publtype), file=sys.stderr)
 
         container_name = None
         booktitle = clean_str(xml_elem.booktitle and xml_elem.booktitle.text)
@@ -236,7 +232,9 @@ class DblpReleaseImporter(EntityImporter):
         part_of_key = clean_str(xml_elem.crossref and xml_elem.crossref.text)
 
         # block bogus far-future years/dates
-        if release_year is not None and (release_year > (self.this_year + 5) or release_year < 1000):
+        if release_year is not None and (
+            release_year > (self.this_year + 5) or release_year < 1000
+        ):
             release_month = None
             release_year = None
 
@@ -245,39 +243,39 @@ class DblpReleaseImporter(EntityImporter):
         if isbn:
             ext_ids.isbn13 = isbn
         if ext_ids.doi:
-            self.counts['has-doi'] += 1
+            self.counts["has-doi"] += 1
 
         # dblp-specific extra
         dblp_extra = dict(type=dblp_type)
         note = clean_str(xml_elem.note and xml_elem.note.text)
-        if note and 'base-search.net' not in note:
-            dblp_extra['note'] = note
+        if note and "base-search.net" not in note:
+            dblp_extra["note"] = note
         if part_of_key:
-            dblp_extra['part_of_key'] = part_of_key
+            dblp_extra["part_of_key"] = part_of_key
 
         # generic extra
         extra = dict()
         if not container_id and container_name:
-            extra['container_name'] = container_name
+            extra["container_name"] = container_name
 
-        if series and (dblp_key_type == 'series' or dblp_type == 'book'):
-            extra['series-title'] = series
+        if series and (dblp_key_type == "series" or dblp_type == "book"):
+            extra["series-title"] = series
         elif series:
-            dblp_extra['series'] = series
+            dblp_extra["series"] = series
 
-        if booktitle and dblp_key_type == 'series':
-            extra['container-title'] = booktitle
-        elif booktitle and dblp_key_type == 'conf':
-            extra['event'] = booktitle
+        if booktitle and dblp_key_type == "series":
+            extra["container-title"] = booktitle
+        elif booktitle and dblp_key_type == "conf":
+            extra["event"] = booktitle
         elif booktitle:
-            dblp_extra['booktitle'] = booktitle
+            dblp_extra["booktitle"] = booktitle
 
         if release_year and release_month:
             # TODO: release_month schema migration
-            extra['release_month'] = release_month
+            extra["release_month"] = release_month
 
         if dblp_extra:
-            extra['dblp'] = dblp_extra
+            extra["dblp"] = dblp_extra
         if not extra:
             extra = None
 
@@ -289,7 +287,7 @@ class DblpReleaseImporter(EntityImporter):
             withdrawn_status=withdrawn_status,
             title=title,
             release_year=release_year,
-            #release_date,
+            # release_date,
             publisher=publisher,
             ext_ids=ext_ids,
             contribs=contribs,
@@ -302,8 +300,8 @@ class DblpReleaseImporter(EntityImporter):
 
         if self.dump_json_mode:
             re_dict = entity_to_dict(re, api_client=self.api.api_client)
-            re_dict['_dblp_ee_urls'] = self.dblp_ext_urls(xml_elem)
-            re_dict['_dblp_prefix'] = dblp_prefix
+            re_dict["_dblp_ee_urls"] = self.dblp_ext_urls(xml_elem)
+            re_dict["_dblp_prefix"] = dblp_prefix
             print(json.dumps(re_dict, sort_keys=True))
             return False
 
@@ -341,11 +339,11 @@ class DblpReleaseImporter(EntityImporter):
 
         # then try other ext_id lookups
         if not existing:
-            for extid_type in ('doi', 'wikidata_qid', 'isbn13', 'arxiv'):
+            for extid_type in ("doi", "wikidata_qid", "isbn13", "arxiv"):
                 extid_val = getattr(re.ext_ids, extid_type)
                 if not extid_val:
                     continue
-                #print(f"  lookup release type: {extid_type} val: {extid_val}")
+                # print(f"  lookup release type: {extid_type} val: {extid_val}")
                 try:
                     existing = self.api.lookup_release(**{extid_type: extid_val})
                 except fatcat_openapi_client.rest.ApiException as err:
@@ -373,12 +371,14 @@ class DblpReleaseImporter(EntityImporter):
             return True
 
         if not self.do_updates or existing.ext_ids.dblp:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         # logic for whether to do update or skip
-        if (existing.container_id and existing.release_type and existing.release_stage) or existing.ext_ids.arxiv:
-            self.counts['skip-update'] += 1
+        if (
+            existing.container_id and existing.release_type and existing.release_stage
+        ) or existing.ext_ids.arxiv:
+            self.counts["skip-update"] += 1
             return False
 
         # fields to copy over for update
@@ -390,20 +390,20 @@ class DblpReleaseImporter(EntityImporter):
         existing.release_stage = existing.release_stage or re.release_stage
         existing.withdrawn_status = existing.withdrawn_status or re.withdrawn_status
         existing.container_id = existing.container_id or re.container_id
-        existing.extra['dblp'] = re.extra['dblp']
+        existing.extra["dblp"] = re.extra["dblp"]
         existing.volume = existing.volume or re.volume
         existing.issue = existing.issue or re.issue
         existing.pages = existing.pages or re.pages
 
         try:
             self.api.update_release(self.get_editgroup_id(), existing.ident, existing)
-            self.counts['update'] += 1
+            self.counts["update"] += 1
         except fatcat_openapi_client.rest.ApiException as err:
             # there is a code path where we try to update the same release
             # twice in a row; if that happens, just skip
             # NOTE: API behavior might change in the future?
             if "release_edit_editgroup_id_ident_id_key" in err.body:
-                self.counts['skip-update-conflict'] += 1
+                self.counts["skip-update-conflict"] += 1
                 return False
             else:
                 raise err
@@ -411,11 +411,14 @@ class DblpReleaseImporter(EntityImporter):
         return False
 
     def insert_batch(self, batch):
-        self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_release_auto_batch(
+            fatcat_openapi_client.ReleaseAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
 
     def dblp_contribs(self, authors: List[dict]) -> List[fatcat_openapi_client.ReleaseContrib]:
         """
@@ -428,14 +431,14 @@ class DblpReleaseImporter(EntityImporter):
         """
         contribs = []
         index = 0
-        for elem in authors.find_all('author'):
+        for elem in authors.find_all("author"):
             contrib = self.dblp_contrib_single(elem)
             contrib.role = "author"
             contrib.index = index
             contribs.append(contrib)
             index += 1
 
-        for elem in authors.find_all('editor'):
+        for elem in authors.find_all("editor"):
             contrib = self.dblp_contrib_single(elem)
             contrib.role = "editor"
             contribs.append(contrib)
@@ -459,10 +462,10 @@ class DblpReleaseImporter(EntityImporter):
 
         # remove number in author name, if present
         if raw_name.split()[-1].isdigit():
-            raw_name = ' '.join(raw_name.split()[:-1])
+            raw_name = " ".join(raw_name.split()[:-1])
 
-        if elem.get('orcid'):
-            orcid = clean_orcid(elem['orcid'])
+        if elem.get("orcid"):
+            orcid = clean_orcid(elem["orcid"])
             if orcid:
                 creator_id = self.lookup_orcid(orcid)
                 if not creator_id:
@@ -491,22 +494,26 @@ class DblpReleaseImporter(EntityImporter):
         wikidata_qid: Optional[str] = None
         arxiv_id: Optional[str] = None
         hdl: Optional[str] = None
-        for ee in xml_elem.find_all('ee'):
+        for ee in xml_elem.find_all("ee"):
             url = ee.text
             # convert DOI-like domains, which mostly have DOIs anyways
-            if '://doi.acm.org/' in url:
-                url = url.replace('://doi.acm.org/', '://doi.org/')
-            elif '://doi.ieeecomputersociety.org/' in url:
-                url = url.replace('://doi.ieeecomputersociety.org/', '://doi.org/')
+            if "://doi.acm.org/" in url:
+                url = url.replace("://doi.acm.org/", "://doi.org/")
+            elif "://doi.ieeecomputersociety.org/" in url:
+                url = url.replace("://doi.ieeecomputersociety.org/", "://doi.org/")
 
-            if 'doi.org/10.' in url and not doi:
+            if "doi.org/10." in url and not doi:
                 doi = clean_doi(url)
-            elif 'wikidata.org/entity/Q' in url and not wikidata_qid:
+            elif "wikidata.org/entity/Q" in url and not wikidata_qid:
                 wikidata_qid = clean_wikidata_qid(url)
-            elif '://arxiv.org/abs/' in url and not arxiv_id:
-                arxiv_id = url.replace('http://', '').replace('https://', '').replace('arxiv.org/abs/', '')
+            elif "://arxiv.org/abs/" in url and not arxiv_id:
+                arxiv_id = (
+                    url.replace("http://", "")
+                    .replace("https://", "")
+                    .replace("arxiv.org/abs/", "")
+                )
                 arxiv_id = clean_arxiv_id(arxiv_id)
-            elif '://hdl.handle.net' in url and not hdl:
+            elif "://hdl.handle.net" in url and not hdl:
                 hdl = clean_hdl(url)
 
         return fatcat_openapi_client.ReleaseExtIds(
@@ -525,14 +532,14 @@ class DblpReleaseImporter(EntityImporter):
         sandcrawler ingest requests.
         """
         EXTID_PATTERNS = [
-            '://doi.acm.org/',
-            '://doi.ieeecomputersociety.org/',
-            'doi.org/10.',
-            'wikidata.org/entity/Q',
-            '://arxiv.org/abs/',
+            "://doi.acm.org/",
+            "://doi.ieeecomputersociety.org/",
+            "doi.org/10.",
+            "wikidata.org/entity/Q",
+            "://arxiv.org/abs/",
         ]
         urls = []
-        for ee in xml_elem.find_all('ee'):
+        for ee in xml_elem.find_all("ee"):
             url = ee.text
             skip = False
             for pattern in EXTID_PATTERNS:
diff --git a/python/fatcat_tools/importers/doaj_article.py b/python/fatcat_tools/importers/doaj_article.py
index 1831c4cd..cd063337 100644
--- a/python/fatcat_tools/importers/doaj_article.py
+++ b/python/fatcat_tools/importers/doaj_article.py
@@ -28,26 +28,23 @@ MAX_ABSTRACT_LENGTH = 2048
 
 
 class DoajArticleImporter(EntityImporter):
-
-    def __init__(self,
-                 api,
-                 issn_map_file,
-                 **kwargs):
+    def __init__(self, api, issn_map_file, **kwargs):
 
         eg_desc = kwargs.get(
-            'editgroup_description',
-            "Automated import of DOAJ article metadata, harvested from REST API or bulk dumps"
+            "editgroup_description",
+            "Automated import of DOAJ article metadata, harvested from REST API or bulk dumps",
         )
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent',
-                                         'fatcat_tools.DoajArticleImporter')
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.DoajArticleImporter")
         # ensure default is to not do updates with this worker (override super() default)
-        kwargs['do_updates'] = kwargs.get("do_updates", False)
-        super().__init__(api,
-                         issn_map_file=issn_map_file,
-                         editgroup_description=eg_desc,
-                         editgroup_extra=eg_extra,
-                         **kwargs)
+        kwargs["do_updates"] = kwargs.get("do_updates", False)
+        super().__init__(
+            api,
+            issn_map_file=issn_map_file,
+            editgroup_description=eg_desc,
+            editgroup_extra=eg_extra,
+            **kwargs,
+        )
 
         self.this_year = datetime.datetime.now().year
         self.read_issn_map_file(issn_map_file)
@@ -82,21 +79,21 @@ class DoajArticleImporter(EntityImporter):
         }
         """
 
-        if not obj or not isinstance(obj, dict) or 'bibjson' not in obj:
-            self.counts['skip-empty'] += 1
+        if not obj or not isinstance(obj, dict) or "bibjson" not in obj:
+            self.counts["skip-empty"] += 1
             return None
 
-        bibjson = obj['bibjson']
+        bibjson = obj["bibjson"]
 
-        title = clean_str(bibjson.get('title'), force_xml=True)
+        title = clean_str(bibjson.get("title"), force_xml=True)
         if not title:
-            self.counts['skip-title'] += 1
+            self.counts["skip-title"] += 1
             return False
 
-        container_name = clean_str(bibjson['journal']['title'])
+        container_name = clean_str(bibjson["journal"]["title"])
         container_id = None
         # NOTE: 'issns' not documented in API schema
-        for issn in bibjson['journal']['issns']:
+        for issn in bibjson["journal"]["issns"]:
             issnl = self.issn2issnl(issn)
             if issnl:
                 container_id = self.lookup_issnl(self.issn2issnl(issn))
@@ -105,75 +102,83 @@ class DoajArticleImporter(EntityImporter):
                 container_name = None
                 break
 
-        volume = clean_str(bibjson['journal'].get('volume'))
+        volume = clean_str(bibjson["journal"].get("volume"))
         # NOTE: this schema seems to use "number" as "issue number"
-        issue = clean_str(bibjson['journal'].get('number'))
-        publisher = clean_str(bibjson['journal'].get('publisher'))
+        issue = clean_str(bibjson["journal"].get("number"))
+        publisher = clean_str(bibjson["journal"].get("publisher"))
 
         try:
-            release_year = int(bibjson.get('year'))
+            release_year = int(bibjson.get("year"))
         except (TypeError, ValueError):
             release_year = None
-        release_month = parse_month(clean_str(bibjson.get('month')))
+        release_month = parse_month(clean_str(bibjson.get("month")))
 
         # block bogus far-future years/dates
-        if release_year is not None and (release_year > (self.this_year + 5) or release_year < 1000):
+        if release_year is not None and (
+            release_year > (self.this_year + 5) or release_year < 1000
+        ):
             release_month = None
             release_year = None
 
-        license_slug = self.doaj_license_slug(bibjson['journal'].get('license'))
-        country = parse_country_name(bibjson['journal'].get('country'))
+        license_slug = self.doaj_license_slug(bibjson["journal"].get("license"))
+        country = parse_country_name(bibjson["journal"].get("country"))
         language = None
-        for raw in bibjson['journal'].get('language') or []:
+        for raw in bibjson["journal"].get("language") or []:
             language = parse_lang_name(raw)
             if language:
                 break
 
         # pages
         # NOTE: error in API docs? seems like start_page not under 'journal' object
-        start_page = clean_str(bibjson['journal'].get('start_page')) or clean_str(bibjson.get('start_page'))
-        end_page = clean_str(bibjson['journal'].get('end_page')) or clean_str(bibjson.get('end_page'))
+        start_page = clean_str(bibjson["journal"].get("start_page")) or clean_str(
+            bibjson.get("start_page")
+        )
+        end_page = clean_str(bibjson["journal"].get("end_page")) or clean_str(
+            bibjson.get("end_page")
+        )
         pages: Optional[str] = None
         if start_page and end_page:
             pages = f"{start_page}-{end_page}"
         elif start_page:
             pages = start_page
 
-        doaj_article_id = obj['id'].lower()
-        ext_ids = self.doaj_ext_ids(bibjson['identifier'], doaj_article_id)
+        doaj_article_id = obj["id"].lower()
+        ext_ids = self.doaj_ext_ids(bibjson["identifier"], doaj_article_id)
         abstracts = self.doaj_abstracts(bibjson)
-        contribs = self.doaj_contribs(bibjson.get('author') or [])
+        contribs = self.doaj_contribs(bibjson.get("author") or [])
 
         # DOAJ-specific extra
         doaj_extra = dict()
-        if bibjson.get('subject'):
-            doaj_extra['subject'] = bibjson.get('subject')
-        if bibjson.get('keywords'):
-            doaj_extra['keywords'] = [k for k in [clean_str(s) for s in bibjson.get('keywords')] if k]
+        if bibjson.get("subject"):
+            doaj_extra["subject"] = bibjson.get("subject")
+        if bibjson.get("keywords"):
+            doaj_extra["keywords"] = [
+                k for k in [clean_str(s) for s in bibjson.get("keywords")] if k
+            ]
 
         # generic extra
         extra = dict()
         if country:
-            extra['country'] = country
+            extra["country"] = country
         if not container_id and container_name:
-            extra['container_name'] = container_name
+            extra["container_name"] = container_name
         if release_year and release_month:
             # TODO: schema migration
-            extra['release_month'] = release_month
+            extra["release_month"] = release_month
 
         if doaj_extra:
-            extra['doaj'] = doaj_extra
+            extra["doaj"] = doaj_extra
         if not extra:
             extra = None
 
         re = fatcat_openapi_client.ReleaseEntity(
             work_id=None,
             container_id=container_id,
-            release_type='article-journal',
-            release_stage='published',
+            release_type="article-journal",
+            release_stage="published",
             title=title,
             release_year=release_year,
-            #release_date,
+            # release_date,
             publisher=publisher,
             ext_ids=ext_ids,
             contribs=contribs,
@@ -208,11 +213,11 @@ class DoajArticleImporter(EntityImporter):
 
         # then try other ext_id lookups
         if not existing:
-            for extid_type in ('doi', 'pmid', 'pmcid'):
+            for extid_type in ("doi", "pmid", "pmcid"):
                 extid_val = getattr(re.ext_ids, extid_type)
                 if not extid_val:
                     continue
-                #print(f"  lookup release type: {extid_type} val: {extid_val}")
+                # print(f"  lookup release type: {extid_type} val: {extid_val}")
                 try:
                     existing = self.api.lookup_release(**{extid_type: extid_val})
                 except fatcat_openapi_client.rest.ApiException as err:
@@ -241,7 +246,7 @@ class DoajArticleImporter(EntityImporter):
 
         # other logic could go here about skipping updates
         if not self.do_updates or existing.ext_ids.doaj:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         # fields to copy over for update
@@ -250,7 +255,7 @@ class DoajArticleImporter(EntityImporter):
         existing.release_stage = existing.release_stage or re.release_stage
         existing.container_id = existing.container_id or re.container_id
         existing.abstracts = existing.abstracts or re.abstracts
-        existing.extra['doaj'] = re.extra['doaj']
+        existing.extra["doaj"] = re.extra["doaj"]
         existing.volume = existing.volume or re.volume
         existing.issue = existing.issue or re.issue
         existing.pages = existing.pages or re.pages
@@ -258,13 +263,13 @@ class DoajArticleImporter(EntityImporter):
 
         try:
             self.api.update_release(self.get_editgroup_id(), existing.ident, existing)
-            self.counts['update'] += 1
+            self.counts["update"] += 1
         except fatcat_openapi_client.rest.ApiException as err:
             # there is a code path where we try to update the same release
             # twice in a row; if that happens, just skip
             # NOTE: API behavior might change in the future?
             if "release_edit_editgroup_id_ident_id_key" in err.body:
-                self.counts['skip-update-conflict'] += 1
+                self.counts["skip-update-conflict"] += 1
                 return False
             else:
                 raise err
@@ -272,14 +277,17 @@ class DoajArticleImporter(EntityImporter):
         return False
 
     def insert_batch(self, batch):
-        self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_release_auto_batch(
+            fatcat_openapi_client.ReleaseAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
 
     def doaj_abstracts(self, bibjson: dict) -> List[fatcat_openapi_client.ReleaseAbstract]:
-        text = clean_str(bibjson.get('abstract'))
+        text = clean_str(bibjson.get("abstract"))
         if not text or len(text) < 10:
             return []
         if len(text) > MAX_ABSTRACT_LENGTH:
@@ -293,7 +301,9 @@ class DoajArticleImporter(EntityImporter):
             lang=lang,
         )
 
-        return [abstract,]
+        return [
+            abstract,
+        ]
 
     def doaj_contribs(self, authors: List[dict]) -> List[fatcat_openapi_client.ReleaseContrib]:
         """
@@ -306,23 +316,27 @@ class DoajArticleImporter(EntityImporter):
         contribs = []
         index = 0
         for author in authors:
-            if not author.get('name'):
+            if not author.get("name"):
                 continue
             creator_id = None
-            orcid = clean_orcid(author.get('orcid_id'))
+            orcid = clean_orcid(author.get("orcid_id"))
             if orcid:
                 creator_id = self.lookup_orcid(orcid)
-            contribs.append(fatcat_openapi_client.ReleaseContrib(
-                raw_name=author.get('name'),
-                role='author',
-                index=index,
-                creator_id=creator_id,
-                raw_affiliation=clean_str(author.get('affiliation')),
-            ))
+            contribs.append(
+                fatcat_openapi_client.ReleaseContrib(
+                    raw_name=author.get("name"),
+                    role="author",
+                    index=index,
+                    creator_id=creator_id,
+                    raw_affiliation=clean_str(author.get("affiliation")),
+                )
+            )
             index += 1
         return contribs
 
-    def doaj_ext_ids(self, identifiers: List[dict], doaj_article_id: str) -> fatcat_openapi_client.ReleaseExtIds:
+    def doaj_ext_ids(
+        self, identifiers: List[dict], doaj_article_id: str
+    ) -> fatcat_openapi_client.ReleaseExtIds:
         """
         bibjson.identifier {
             id (string),
@@ -336,14 +350,14 @@ class DoajArticleImporter(EntityImporter):
         pmid: Optional[str] = None
         pmcid: Optional[str] = None
         for id_obj in identifiers:
-            if not id_obj.get('id'):
+            if not id_obj.get("id"):
                 continue
-            if id_obj['type'].lower() == 'doi':
-                doi = clean_doi(id_obj['id'])
-            elif id_obj['type'].lower() == 'pmid':
-                pmid = clean_pmid(id_obj['id'])
-            elif id_obj['type'].lower() == 'pmcid':
-                pmcid = clean_pmcid(id_obj['id'])
+            if id_obj["type"].lower() == "doi":
+                doi = clean_doi(id_obj["id"])
+            elif id_obj["type"].lower() == "pmid":
+                pmid = clean_pmid(id_obj["id"])
+            elif id_obj["type"].lower() == "pmcid":
+                pmcid = clean_pmcid(id_obj["id"])
 
         return fatcat_openapi_client.ReleaseExtIds(
             doaj=doaj_article_id,
@@ -365,10 +379,10 @@ class DoajArticleImporter(EntityImporter):
         if not license_list:
             return None
         for license in license_list:
-            if not license.get('open_access'):
+            if not license.get("open_access"):
                 continue
-            slug = license.get('type')
-            if slug.startswith('CC '):
-                slug = slug.replace('CC ', 'cc-').lower()
+            slug = license.get("type")
+            if slug.startswith("CC "):
+                slug = slug.replace("CC ", "cc-").lower()
                 return slug
         return None
diff --git a/python/fatcat_tools/importers/file_meta.py b/python/fatcat_tools/importers/file_meta.py
index 0951ed84..26584ff3 100644
--- a/python/fatcat_tools/importers/file_meta.py
+++ b/python/fatcat_tools/importers/file_meta.py
@@ -1,4 +1,3 @@
-
 import fatcat_openapi_client
 
 from .common import EntityImporter
@@ -17,19 +16,16 @@ class FileMetaImporter(EntityImporter):
 
     def __init__(self, api, require_grobid=True, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "File metadata updates"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.FileMetaImporter')
-        kwargs['do_updates'] = kwargs.get("do_updates", True)
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = kwargs.pop("editgroup_description", None) or "File metadata updates"
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.FileMetaImporter")
+        kwargs["do_updates"] = kwargs.get("do_updates", True)
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
     def want(self, row):
-        for k in ('sha1hex', 'sha256hex', 'md5hex', 'size_bytes', 'mimetype'):
+        for k in ("sha1hex", "sha256hex", "md5hex", "size_bytes", "mimetype"):
             if not row.get(k):
-                self.counts['skip-missing-field'] += 1
+                self.counts["skip-missing-field"] += 1
                 return False
         return True
 
@@ -40,11 +36,11 @@ class FileMetaImporter(EntityImporter):
 
         file_meta = row
         fe = fatcat_openapi_client.FileEntity(
-            md5=file_meta['md5hex'],
-            sha1=file_meta['sha1hex'],
-            sha256=file_meta['sha256hex'],
-            size=file_meta['size_bytes'],
-            mimetype=file_meta['mimetype'],
+            md5=file_meta["md5hex"],
+            sha1=file_meta["sha1hex"],
+            sha256=file_meta["sha256hex"],
+            size=file_meta["size_bytes"],
+            mimetype=file_meta["mimetype"],
         )
         return fe
 
@@ -59,11 +55,11 @@ class FileMetaImporter(EntityImporter):
                 raise err
 
         if not existing:
-            self.counts['skip-no-match'] += 1
+            self.counts["skip-no-match"] += 1
             return False
 
-        if (existing.md5 and existing.sha256 and existing.size and existing.mimetype):
-            self.counts['skip-existing-complete'] += 1
+        if existing.md5 and existing.sha256 and existing.size and existing.mimetype:
+            self.counts["skip-existing-complete"] += 1
             return False
 
         existing.md5 = existing.md5 or fe.md5
@@ -75,5 +71,5 @@ class FileMetaImporter(EntityImporter):
         existing = self.generic_file_cleanups(existing)
 
         self.api.update_file(self.get_editgroup_id(), existing.ident, existing)
-        self.counts['update'] += 1
+        self.counts["update"] += 1
         return False
diff --git a/python/fatcat_tools/importers/fileset_generic.py b/python/fatcat_tools/importers/fileset_generic.py
index 43c2a49c..dd8f5600 100644
--- a/python/fatcat_tools/importers/fileset_generic.py
+++ b/python/fatcat_tools/importers/fileset_generic.py
@@ -1,4 +1,3 @@
-
 import fatcat_openapi_client
 
 from fatcat_tools import entity_from_dict
@@ -20,34 +19,31 @@ class FilesetImporter(EntityImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Generic Fileset entity import"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.FilesetImporter')
-        kwargs['do_updates'] = bool(kwargs.get("do_updates", False))
+        eg_desc = kwargs.pop("editgroup_description", None) or "Generic Fileset entity import"
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.FilesetImporter")
+        kwargs["do_updates"] = bool(kwargs.get("do_updates", False))
         self.skip_release_fileset_check = bool(kwargs.get("skip_release_fileset_check", False))
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
         # bezerk mode doesn't make sense for this importer
         assert self.bezerk_mode is False
 
     def want(self, row):
-        if not row.get('release_ids'):
-            self.counts['skip-no-release-ids'] += 1
+        if not row.get("release_ids"):
+            self.counts["skip-no-release-ids"] += 1
             return False
-        if not row.get('urls'):
-            self.counts['skip-no-urls'] += 1
+        if not row.get("urls"):
+            self.counts["skip-no-urls"] += 1
             return False
-        if not row.get('manifest'):
-            self.counts['skip-no-files'] += 1
+        if not row.get("manifest"):
+            self.counts["skip-no-files"] += 1
             return False
 
-        for f in row.get('manifest'):
-            for k in ('sha1', 'md5'):
+        for f in row.get("manifest"):
+            for k in ("sha1", "md5"):
                 if not f.get(k):
-                    self.counts['skip-missing-file-field'] += 1
+                    self.counts["skip-missing-file-field"] += 1
                     return False
         return True
 
@@ -66,19 +62,24 @@ class FilesetImporter(EntityImporter):
         if not self.skip_release_fileset_check:
             for release_id in fse.release_ids:
                 # don't catch 404, that would be an error
-                release = self.api.get_release(release_id, expand='filesets', hide='abstracts,refs')
-                assert release.state == 'active'
+                release = self.api.get_release(
+                    release_id, expand="filesets", hide="abstracts,refs"
+                )
+                assert release.state == "active"
                 if release.filesets:
-                    self.counts['exists'] += 1
-                    self.counts['exists-via-release-filesets'] += 1
+                    self.counts["exists"] += 1
+                    self.counts["exists-via-release-filesets"] += 1
                     return False
 
         # do the insert
         return True
 
     def insert_batch(self, batch):
-        self.api.create_fileset_auto_batch(fatcat_openapi_client.FilesetAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_fileset_auto_batch(
+            fatcat_openapi_client.FilesetAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 0f666652..f7bb5357 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -7,7 +7,7 @@ import fatcat_openapi_client
 
 from .common import EntityImporter, clean, make_rel_url
 
-MAX_ABSTRACT_BYTES=4096
+MAX_ABSTRACT_BYTES = 4096
 
 
 class GrobidMetadataImporter(EntityImporter):
@@ -24,14 +24,13 @@ class GrobidMetadataImporter(EntityImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description',
-            "Import of release and file metadata, as extracted from PDFs by GROBID.")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.GrobidMetadataImporter')
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = kwargs.get(
+            "editgroup_description",
+            "Import of release and file metadata, as extracted from PDFs by GROBID.",
+        )
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.GrobidMetadataImporter")
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
         self.default_link_rel = kwargs.get("default_link_rel", "web")
         self.longtail_oa = kwargs.get("longtail_oa", False)
 
@@ -40,7 +39,7 @@ class GrobidMetadataImporter(EntityImporter):
 
     def parse_record(self, row):
 
-        fields = row.split('\t')
+        fields = row.split("\t")
         sha1_key = fields[0]
         cdx = json.loads(fields[1])
         mimetype = fields[2]
@@ -65,8 +64,8 @@ class GrobidMetadataImporter(EntityImporter):
         # TODO: this is where we should check if the file actually has
         # release_ids and/or URLs associated with it
         if existing and not self.bezerk_mode:
-            self.counts['exists'] += 1
-            self.counts['skip'] -= 1
+            self.counts["exists"] += 1
+            self.counts["skip"] -= 1
             return None
 
         release_edit = self.create_release(re)
@@ -75,75 +74,81 @@ class GrobidMetadataImporter(EntityImporter):
 
     def parse_grobid_json(self, obj):
 
-        if not obj.get('title'):
+        if not obj.get("title"):
             return None
 
         extra_grobid = dict()
 
-        abstract = obj.get('abstract')
+        abstract = obj.get("abstract")
         if abstract and len(abstract) < MAX_ABSTRACT_BYTES and len(abstract) > 10:
             abobj = fatcat_openapi_client.ReleaseAbstract(
-                mimetype="text/plain",
-                content=clean(obj.get('abstract')))
+                mimetype="text/plain", content=clean(obj.get("abstract"))
+            )
             abstracts = [abobj]
         else:
             abstracts = None
 
         contribs = []
-        for i, a in enumerate(obj.get('authors', [])):
-            contribs.append(fatcat_openapi_client.ReleaseContrib(
-                index=i,
-                raw_name=clean(a['name']),
-                given_name=clean(a.get('given_name')),
-                surname=clean(a.get('surname')),
-                role="author",
-                extra=None))
+        for i, a in enumerate(obj.get("authors", [])):
+            contribs.append(
+                fatcat_openapi_client.ReleaseContrib(
+                    index=i,
+                    raw_name=clean(a["name"]),
+                    given_name=clean(a.get("given_name")),
+                    surname=clean(a.get("surname")),
+                    role="author",
+                    extra=None,
+                )
+            )
 
         refs = []
-        for raw in obj.get('citations', []):
+        for raw in obj.get("citations", []):
             cite_extra = dict()
             year = None
-            if raw.get('date'):
+            if raw.get("date"):
                 try:
-                    year = int(raw['date'].strip()[:4])
+                    year = int(raw["date"].strip()[:4])
                 except (IndexError, ValueError):
                     pass
-            for key in ('volume', 'url', 'issue', 'publisher'):
+            for key in ("volume", "url", "issue", "publisher"):
                 if raw.get(key):
                     cite_extra[key] = clean(raw[key])
-            if raw.get('authors'):
-                cite_extra['authors'] = [clean(a['name']) for a in raw['authors']]
+            if raw.get("authors"):
+                cite_extra["authors"] = [clean(a["name"]) for a in raw["authors"]]
 
             if not cite_extra:
                 cite_extra = None
-            refs.append(fatcat_openapi_client.ReleaseRef(
-                key=clean(raw.get('id')),
-                year=year,
-                title=clean(raw['title']),
-                extra=cite_extra))
+            refs.append(
+                fatcat_openapi_client.ReleaseRef(
+                    key=clean(raw.get("id")),
+                    year=year,
+                    title=clean(raw["title"]),
+                    extra=cite_extra,
+                )
+            )
 
         release_date = None
         release_year = None
-        if obj.get('date'):
+        if obj.get("date"):
             # only returns year, ever?
-            release_year = int(obj['date'][:4])
+            release_year = int(obj["date"][:4])
 
         extra = dict()
-        if obj.get('doi'):
-            extra['doi'] = obj['doi']
-        if obj['journal'] and obj['journal'].get('name'):
-            extra['container_name'] = clean(obj['journal']['name'])
+        if obj.get("doi"):
+            extra["doi"] = obj["doi"]
+        if obj["journal"] and obj["journal"].get("name"):
+            extra["container_name"] = clean(obj["journal"]["name"])
 
         # TODO: ISSN/eISSN handling? or just journal name lookup?
 
         if extra_grobid:
-            extra['grobid'] = extra_grobid
+            extra["grobid"] = extra_grobid
         if self.longtail_oa:
-            extra['longtail_oa'] = True
+            extra["longtail_oa"] = True
         if not extra:
             extra = None
 
-        title = clean(obj['title'], force_xml=True)
+        title = clean(obj["title"], force_xml=True)
         if not title or len(title) < 2:
             return None
 
@@ -154,17 +159,22 @@ class GrobidMetadataImporter(EntityImporter):
             release_year=release_year,
             contribs=contribs,
             refs=refs,
-            publisher=clean(obj['journal'].get('publisher')),
-            volume=clean(obj['journal'].get('volume')),
-            issue=clean(obj['journal'].get('issue')),
+            publisher=clean(obj["journal"].get("publisher")),
+            volume=clean(obj["journal"].get("volume")),
+            issue=clean(obj["journal"].get("issue")),
             abstracts=abstracts,
             ext_ids=fatcat_openapi_client.ReleaseExtIds(),
-            extra=extra)
+            extra=extra,
+        )
         return re
 
     def parse_file_metadata(self, sha1_key, cdx, mimetype, file_size):
 
-        sha1 = base64.b16encode(base64.b32decode(sha1_key.replace('sha1:', ''))).decode('ascii').lower()
+        sha1 = (
+            base64.b16encode(base64.b32decode(sha1_key.replace("sha1:", "")))
+            .decode("ascii")
+            .lower()
+        )
 
         fe = fatcat_openapi_client.FileEntity(
             sha1=sha1,
@@ -175,16 +185,15 @@ class GrobidMetadataImporter(EntityImporter):
         )
 
         # parse URLs and CDX
-        original = cdx['url']
-        assert len(cdx['dt']) >= 8
-        wayback = "https://web.archive.org/web/{}/{}".format(
-            cdx['dt'],
-            original)
-        fe.urls.append(
-            fatcat_openapi_client.FileUrl(url=wayback, rel="webarchive"))
+        original = cdx["url"]
+        assert len(cdx["dt"]) >= 8
+        wayback = "https://web.archive.org/web/{}/{}".format(cdx["dt"], original)
+        fe.urls.append(fatcat_openapi_client.FileUrl(url=wayback, rel="webarchive"))
         original_url = make_rel_url(original, default_link_rel=self.default_link_rel)
         if original_url is not None:
-            fe.urls.append(fatcat_openapi_client.FileUrl(rel=original_url[0], url=original_url[1]))
+            fe.urls.append(
+                fatcat_openapi_client.FileUrl(rel=original_url[0], url=original_url[1])
+            )
 
         return fe
 
@@ -193,8 +202,11 @@ class GrobidMetadataImporter(EntityImporter):
         return True
 
     def insert_batch(self, batch):
-        self.api.create_file_auto_batch(fatcat_openapi_client.FileAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_file_auto_batch(
+            fatcat_openapi_client.FileAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index f0943c1e..e0a6c3f5 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -1,4 +1,3 @@
-
 import datetime
 
 import fatcat_openapi_client
@@ -7,17 +6,16 @@ from .common import EntityImporter, make_rel_url
 
 
 class IngestFileResultImporter(EntityImporter):
-
     def __init__(self, api, require_grobid=True, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Files crawled from web using sandcrawler ingest tool"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IngestFileResultImporter')
-        kwargs['do_updates'] = kwargs.get("do_updates", False)
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Files crawled from web using sandcrawler ingest tool"
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.IngestFileResultImporter")
+        kwargs["do_updates"] = kwargs.get("do_updates", False)
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
         self.use_glutton_match = False
         self.default_link_rel = kwargs.get("default_link_rel", "web")
         assert self.default_link_rel
@@ -27,20 +25,20 @@ class IngestFileResultImporter(EntityImporter):
         else:
             print("NOT checking GROBID success")
         self.ingest_request_source_allowlist = [
-            'fatcat-changelog',
-            'fatcat-ingest-container',
-            'fatcat-ingest',
-            'arabesque',
+            "fatcat-changelog",
+            "fatcat-ingest-container",
+            "fatcat-ingest",
+            "arabesque",
             #'mag-corpus',
             #'mag',
-            'unpaywall-corpus',
-            'unpaywall',
+            "unpaywall-corpus",
+            "unpaywall",
             #'s2-corpus',
             #'s2',
-            'doaj',
-            'dblp',
+            "doaj",
+            "dblp",
         ]
-        if kwargs.get('skip_source_allowlist', False):
+        if kwargs.get("skip_source_allowlist", False):
             self.ingest_request_source_allowlist = []
 
     def want_file(self, row) -> bool:
@@ -48,28 +46,32 @@ class IngestFileResultImporter(EntityImporter):
         File-specific part of want(). Generic across general ingest and save-paper-now.
         """
 
-        if not row.get('file_meta'):
-            self.counts['skip-file-meta'] += 1
+        if not row.get("file_meta"):
+            self.counts["skip-file-meta"] += 1
             return False
 
         # type-specific filters
-        if row['request'].get('ingest_type') == 'pdf':
-            if self.require_grobid and row.get('grobid', {}).get('status_code') != 200:
-                self.counts['skip-grobid'] += 1
+        if row["request"].get("ingest_type") == "pdf":
+            if self.require_grobid and row.get("grobid", {}).get("status_code") != 200:
+                self.counts["skip-grobid"] += 1
                 return False
-            if row['file_meta'].get('mimetype') not in ("application/pdf",):
-                self.counts['skip-mimetype'] += 1
+            if row["file_meta"].get("mimetype") not in ("application/pdf",):
+                self.counts["skip-mimetype"] += 1
                 return False
-        elif row['request'].get('ingest_type') == 'xml':
-            if row['file_meta'].get('mimetype') not in ("application/xml",
-                    "application/jats+xml", "application/tei+xml", "text/xml"):
-                self.counts['skip-mimetype'] += 1
+        elif row["request"].get("ingest_type") == "xml":
+            if row["file_meta"].get("mimetype") not in (
+                "application/xml",
+                "application/jats+xml",
+                "application/tei+xml",
+                "text/xml",
+            ):
+                self.counts["skip-mimetype"] += 1
                 return False
-        elif row['request'].get('ingest_type') in ['component', 'src', 'dataset-file']:
+        elif row["request"].get("ingest_type") in ["component", "src", "dataset-file"]:
             # we rely on sandcrawler for these checks
             pass
         else:
-            self.counts['skip-ingest-type'] += 1
+            self.counts["skip-ingest-type"] += 1
             return False
 
         return True
@@ -79,24 +81,36 @@ class IngestFileResultImporter(EntityImporter):
         Sandcrawler ingest-specific part of want(). Generic across file and
         webcapture ingest.
         """
-        if row.get('hit') is not True:
-            self.counts['skip-hit'] += 1
+        if row.get("hit") is not True:
+            self.counts["skip-hit"] += 1
             return False
-        source = row['request'].get('ingest_request_source')
+        source = row["request"].get("ingest_request_source")
         if not source:
-            self.counts['skip-ingest_request_source'] += 1
+            self.counts["skip-ingest_request_source"] += 1
             return False
-        if self.ingest_request_source_allowlist and source not in self.ingest_request_source_allowlist:
-            self.counts['skip-ingest_request_source'] += 1
+        if (
+            self.ingest_request_source_allowlist
+            and source not in self.ingest_request_source_allowlist
+        ):
+            self.counts["skip-ingest_request_source"] += 1
             return False
 
-        if row['request'].get('link_source') not in ('arxiv', 'pmc', 'unpaywall', 'doi', 'mag', 's2', 'doaj', 'dblp'):
-            self.counts['skip-link-source'] += 1
+        if row["request"].get("link_source") not in (
+            "arxiv",
+            "pmc",
+            "unpaywall",
+            "doi",
+            "mag",
+            "s2",
+            "doaj",
+            "dblp",
+        ):
+            self.counts["skip-link-source"] += 1
             return False
 
-        if source.startswith('savepapernow'):
+        if source.startswith("savepapernow"):
             # never process async savepapernow requests
-            self.counts['skip-savepapernow'] += 1
+            self.counts["skip-savepapernow"] += 1
             return False
 
         return True
@@ -125,19 +139,19 @@ class IngestFileResultImporter(EntityImporter):
 
     def parse_ingest_release_ident(self, row):
 
-        request = row['request']
-        fatcat = request.get('fatcat')
+        request = row["request"]
+        fatcat = request.get("fatcat")
 
         release_ident = None
-        if fatcat and fatcat.get('release_ident'):
-            release_ident = fatcat.get('release_ident')
-        elif request.get('ext_ids'):
+        if fatcat and fatcat.get("release_ident"):
+            release_ident = fatcat.get("release_ident")
+        elif request.get("ext_ids"):
             # if no fatcat ident, try extids
-            for extid_type in ('doi', 'pmid', 'pmcid', 'arxiv', 'doaj', 'dblp'):
-                extid = request['ext_ids'].get(extid_type)
+            for extid_type in ("doi", "pmid", "pmcid", "arxiv", "doaj", "dblp"):
+                extid = request["ext_ids"].get(extid_type)
                 if not extid:
                     continue
-                if extid_type == 'doi':
+                if extid_type == "doi":
                     extid = extid.lower()
                 try:
                     release = self.api.lookup_release(**{extid_type: extid})
@@ -145,66 +159,69 @@ class IngestFileResultImporter(EntityImporter):
                     if err.status == 404:
                         continue
                     elif err.status == 400:
-                        self.counts['warn-extid-invalid'] += 1
+                        self.counts["warn-extid-invalid"] += 1
                         continue
                     raise err
                 # verify release_stage
-                if request.get('release_stage') and release.release_stage:
-                    if request['release_stage'] != release.release_stage:
-                        self.counts['skip-release-stage'] += 1
+                if request.get("release_stage") and release.release_stage:
+                    if request["release_stage"] != release.release_stage:
+                        self.counts["skip-release-stage"] += 1
                         return None
                 release_ident = release.ident
                 break
 
-        if self.use_glutton_match and not release_ident and row.get('grobid'):
+        if self.use_glutton_match and not release_ident and row.get("grobid"):
             # try biblio-glutton extracted hit
-            if row['grobid'].get('fatcat_release'):
-                release_ident = row['grobid']['fatcat_release'].split('_')[-1]
-                self.counts['glutton-match'] += 1
+            if row["grobid"].get("fatcat_release"):
+                release_ident = row["grobid"]["fatcat_release"].split("_")[-1]
+                self.counts["glutton-match"] += 1
 
         return release_ident
 
     def parse_terminal(self, row):
-        terminal = row.get('terminal')
+        terminal = row.get("terminal")
         if not terminal:
             # support old cdx-only ingest results
-            cdx = row.get('cdx')
+            cdx = row.get("cdx")
             if not cdx:
                 return None
             else:
                 terminal = {
-                    'terminal_url': cdx['url'],
-                    'terminal_dt': cdx['datetime'],
-                    'terminal_status_code': cdx.get('status_code') or cdx.get('http_status'),
+                    "terminal_url": cdx["url"],
+                    "terminal_dt": cdx["datetime"],
+                    "terminal_status_code": cdx.get("status_code") or cdx.get("http_status"),
                 }
 
         # work around old schema
-        if 'terminal_url' not in terminal:
-            terminal['terminal_url'] = terminal['url']
-        if 'terminal_dt' not in terminal:
-            terminal['terminal_dt'] = terminal['dt']
+        if "terminal_url" not in terminal:
+            terminal["terminal_url"] = terminal["url"]
+        if "terminal_dt" not in terminal:
+            terminal["terminal_dt"] = terminal["dt"]
 
         # convert CDX-style digits to ISO-style timestamp
-        assert len(terminal['terminal_dt']) == 14
-        terminal['terminal_timestamp'] = datetime.datetime.strptime(terminal['terminal_dt'], "%Y%m%d%H%M%S").isoformat() + "Z"
+        assert len(terminal["terminal_dt"]) == 14
+        terminal["terminal_timestamp"] = (
+            datetime.datetime.strptime(terminal["terminal_dt"], "%Y%m%d%H%M%S").isoformat()
+            + "Z"
+        )
         return terminal
 
     def parse_urls(self, row, terminal):
 
-        request = row['request']
+        request = row["request"]
 
         default_rel = self.default_link_rel
-        if request.get('link_source') == 'doi':
-            default_rel = 'publisher'
-        default_rel = request.get('rel', default_rel)
-        url = make_rel_url(terminal['terminal_url'], default_rel)
+        if request.get("link_source") == "doi":
+            default_rel = "publisher"
+        default_rel = request.get("rel", default_rel)
+        url = make_rel_url(terminal["terminal_url"], default_rel)
 
         if not url:
-            self.counts['skip-url'] += 1
+            self.counts["skip-url"] += 1
             return None
         wayback = "https://web.archive.org/web/{}/{}".format(
-            terminal['terminal_dt'],
-            terminal['terminal_url'])
+            terminal["terminal_dt"], terminal["terminal_url"]
+        )
         urls = [url, ("webarchive", wayback)]
 
         urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
@@ -212,38 +229,38 @@ class IngestFileResultImporter(EntityImporter):
 
     def parse_edit_extra(self, row):
 
-        request = row['request']
+        request = row["request"]
         edit_extra = dict()
 
-        if request.get('edit_extra'):
-            edit_extra = request['edit_extra']
+        if request.get("edit_extra"):
+            edit_extra = request["edit_extra"]
 
-        if request.get('ingest_request_source'):
-            edit_extra['ingest_request_source'] = request['ingest_request_source']
-        if request.get('link_source') and request.get('link_source_id'):
-            edit_extra['link_source'] = request['link_source']
-            edit_extra['link_source_id'] = request['link_source_id']
-            if edit_extra['link_source'] == 'doi':
-                edit_extra['link_source_id'] = edit_extra['link_source_id'].lower()
+        if request.get("ingest_request_source"):
+            edit_extra["ingest_request_source"] = request["ingest_request_source"]
+        if request.get("link_source") and request.get("link_source_id"):
+            edit_extra["link_source"] = request["link_source"]
+            edit_extra["link_source_id"] = request["link_source_id"]
+            if edit_extra["link_source"] == "doi":
+                edit_extra["link_source_id"] = edit_extra["link_source_id"].lower()
 
         # GROBID metadata, for SPN requests (when there might not be 'success')
-        if request.get('ingest_type') == 'pdf':
-            if row.get('grobid') and row['grobid'].get('status') != 'success':
-                edit_extra['grobid_status_code'] = row['grobid']['status_code']
-                edit_extra['grobid_version'] = row['grobid'].get('grobid_version')
+        if request.get("ingest_type") == "pdf":
+            if row.get("grobid") and row["grobid"].get("status") != "success":
+                edit_extra["grobid_status_code"] = row["grobid"]["status_code"]
+                edit_extra["grobid_version"] = row["grobid"].get("grobid_version")
 
         return edit_extra
 
     def parse_record(self, row):
 
-        request = row['request']
-        file_meta = row['file_meta']
+        request = row["request"]
+        file_meta = row["file_meta"]
 
         # double check that want() filtered request correctly (eg, old requests)
-        if request.get('ingest_type') not in ('pdf', 'xml'):
-            self.counts['skip-ingest-type'] += 1
+        if request.get("ingest_type") not in ("pdf", "xml"):
+            self.counts["skip-ingest-type"] += 1
             return None
-        assert (request['ingest_type'], file_meta['mimetype']) in [
+        assert (request["ingest_type"], file_meta["mimetype"]) in [
             ("pdf", "application/pdf"),
             ("xml", "application/xml"),
             ("xml", "application/jats+xml"),
@@ -255,23 +272,23 @@ class IngestFileResultImporter(EntityImporter):
         release_ident = self.parse_ingest_release_ident(row)
 
         if not release_ident:
-            self.counts['skip-release-not-found'] += 1
+            self.counts["skip-release-not-found"] += 1
             return None
 
         terminal = self.parse_terminal(row)
         if not terminal:
             # TODO: support archive.org hits?
-            self.counts['skip-no-terminal'] += 1
+            self.counts["skip-no-terminal"] += 1
             return None
 
         urls = self.parse_urls(row, terminal)
 
         fe = fatcat_openapi_client.FileEntity(
-            md5=file_meta['md5hex'],
-            sha1=file_meta['sha1hex'],
-            sha256=file_meta['sha256hex'],
-            size=file_meta['size_bytes'],
-            mimetype=file_meta['mimetype'],
+            md5=file_meta["md5hex"],
+            sha1=file_meta["sha1hex"],
+            sha256=file_meta["sha256hex"],
+            size=file_meta["size_bytes"],
+            mimetype=file_meta["mimetype"],
             release_ids=[release_ident],
             urls=urls,
         )
@@ -293,7 +310,7 @@ class IngestFileResultImporter(EntityImporter):
         # check for existing edits-in-progress with same file hash
         for other in self._entity_queue:
             if other.sha1 == fe.sha1:
-                self.counts['skip-in-queue'] += 1
+                self.counts["skip-in-queue"] += 1
                 return False
 
         if not existing:
@@ -302,31 +319,36 @@ class IngestFileResultImporter(EntityImporter):
         # NOTE: the following checks all assume there is an existing item
         if (fe.release_ids[0] in existing.release_ids) and existing.urls:
             # TODO: could still, in theory update with the new URL?
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         if not self.do_updates:
-            self.counts['skip-update-disabled'] += 1
+            self.counts["skip-update-disabled"] += 1
             return False
 
         # TODO: for now, never update
-        self.counts['skip-update-disabled'] += 1
+        self.counts["skip-update-disabled"] += 1
         return False
 
     def insert_batch(self, batch):
         if self.submit_mode:
-            eg = self.api.create_editgroup(fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra))
+            eg = self.api.create_editgroup(
+                fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                )
+            )
             for fe in batch:
                 self.api.create_file(eg.editgroup_id, fe)
             self.api.update_editgroup(eg.editgroup_id, eg, submit=True)
         else:
-            self.api.create_file_auto_batch(fatcat_openapi_client.FileAutoBatch(
-                editgroup=fatcat_openapi_client.Editgroup(
-                    description=self.editgroup_description,
-                    extra=self.editgroup_extra),
-                entity_list=batch))
+            self.api.create_file_auto_batch(
+                fatcat_openapi_client.FileAutoBatch(
+                    editgroup=fatcat_openapi_client.Editgroup(
+                        description=self.editgroup_description, extra=self.editgroup_extra
+                    ),
+                    entity_list=batch,
+                )
+            )
 
 
 class SavePaperNowFileImporter(IngestFileResultImporter):
@@ -338,29 +360,29 @@ class SavePaperNowFileImporter(IngestFileResultImporter):
 
     def __init__(self, api, submit_mode=True, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Files crawled after a public 'Save Paper Now' request"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.SavePaperNowFileImporter')
-        kwargs['submit_mode'] = submit_mode
-        kwargs['require_grobid'] = False
-        kwargs['do_updates'] = False
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Files crawled after a public 'Save Paper Now' request"
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.SavePaperNowFileImporter")
+        kwargs["submit_mode"] = submit_mode
+        kwargs["require_grobid"] = False
+        kwargs["do_updates"] = False
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
     def want(self, row):
 
-        source = row['request'].get('ingest_request_source')
+        source = row["request"].get("ingest_request_source")
         if not source:
-            self.counts['skip-ingest_request_source'] += 1
+            self.counts["skip-ingest_request_source"] += 1
             return False
-        if not source.startswith('savepapernow'):
-            self.counts['skip-not-savepapernow'] += 1
+        if not source.startswith("savepapernow"):
+            self.counts["skip-not-savepapernow"] += 1
             return False
 
-        if row.get('hit') is not True:
-            self.counts['skip-hit'] += 1
+        if row.get("hit") is not True:
+            self.counts["skip-hit"] += 1
             return False
 
         if not self.want_file(row):
@@ -377,14 +399,14 @@ class IngestWebResultImporter(IngestFileResultImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Webcaptures crawled from web using sandcrawler ingest tool"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IngestWebResultImporter')
-        kwargs['do_updates'] = False
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Webcaptures crawled from web using sandcrawler ingest tool"
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.IngestWebResultImporter")
+        kwargs["do_updates"] = False
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
     def want(self, row):
 
@@ -392,91 +414,95 @@ class IngestWebResultImporter(IngestFileResultImporter):
             return False
 
         # webcapture-specific filters
-        if row['request'].get('ingest_type') != 'html':
-            self.counts['skip-ingest-type'] += 1
+        if row["request"].get("ingest_type") != "html":
+            self.counts["skip-ingest-type"] += 1
             return False
-        if not row.get('file_meta'):
-            self.counts['skip-file-meta'] += 1
+        if not row.get("file_meta"):
+            self.counts["skip-file-meta"] += 1
             return False
-        if row['file_meta'].get('mimetype') not in ("text/html", "application/xhtml+xml"):
-            self.counts['skip-mimetype'] += 1
+        if row["file_meta"].get("mimetype") not in ("text/html", "application/xhtml+xml"):
+            self.counts["skip-mimetype"] += 1
             return False
 
         return True
 
     def parse_record(self, row):
 
-        request = row['request']
-        file_meta = row['file_meta']
+        request = row["request"]
+        file_meta = row["file_meta"]
 
         # double check that want() filtered request correctly (eg, old requests)
-        if request.get('ingest_type') != "html":
-            self.counts['skip-ingest-type'] += 1
+        if request.get("ingest_type") != "html":
+            self.counts["skip-ingest-type"] += 1
             return None
-        if file_meta['mimetype'] not in ("text/html", "application/xhtml+xml"):
-            self.counts['skip-mimetype'] += 1
+        if file_meta["mimetype"] not in ("text/html", "application/xhtml+xml"):
+            self.counts["skip-mimetype"] += 1
             return None
 
         # identify release by fatcat ident, or extid lookup
         release_ident = self.parse_ingest_release_ident(row)
 
         if not release_ident:
-            self.counts['skip-release-not-found'] += 1
+            self.counts["skip-release-not-found"] += 1
             return None
 
         terminal = self.parse_terminal(row)
         if not terminal:
             # TODO: support archive.org hits?
-            self.counts['skip-no-terminal'] += 1
+            self.counts["skip-no-terminal"] += 1
             return None
 
         urls = self.parse_urls(row, terminal)
-        archive_urls = [u for u in urls if u.rel == 'webarchive']
+        archive_urls = [u for u in urls if u.rel == "webarchive"]
 
-        if terminal['terminal_status_code'] != 200:
-            self.counts['skip-terminal-status-code'] += 1
+        if terminal["terminal_status_code"] != 200:
+            self.counts["skip-terminal-status-code"] += 1
             return None
 
-        terminal_cdx = row['cdx']
-        if 'revisit_cdx' in row:
-            terminal_cdx = row['revisit_cdx']
-        assert terminal_cdx['surt']
-        if terminal_cdx['url'] != terminal['terminal_url']:
-            self.counts['skip-terminal-url-mismatch'] += 1
+        terminal_cdx = row["cdx"]
+        if "revisit_cdx" in row:
+            terminal_cdx = row["revisit_cdx"]
+        assert terminal_cdx["surt"]
+        if terminal_cdx["url"] != terminal["terminal_url"]:
+            self.counts["skip-terminal-url-mismatch"] += 1
             return None
 
         wc_cdx = []
         # primary resource first
-        wc_cdx.append(fatcat_openapi_client.WebcaptureCdxLine(
-            surt=terminal_cdx['surt'],
-            timestamp=terminal['terminal_timestamp'],
-            url=terminal['terminal_url'],
-            mimetype=file_meta['mimetype'],
-            status_code=terminal['terminal_status_code'],
-            sha1=file_meta['sha1hex'],
-            sha256=file_meta['sha256hex'],
-            size=file_meta['size_bytes'],
-        ))
-
-        for resource in row.get('html_resources', []):
-            timestamp = resource['timestamp']
+        wc_cdx.append(
+            fatcat_openapi_client.WebcaptureCdxLine(
+                surt=terminal_cdx["surt"],
+                timestamp=terminal["terminal_timestamp"],
+                url=terminal["terminal_url"],
+                mimetype=file_meta["mimetype"],
+                status_code=terminal["terminal_status_code"],
+                sha1=file_meta["sha1hex"],
+                sha256=file_meta["sha256hex"],
+                size=file_meta["size_bytes"],
+            )
+        )
+
+        for resource in row.get("html_resources", []):
+            timestamp = resource["timestamp"]
             if "+" not in timestamp and "Z" not in timestamp:
                 timestamp += "Z"
-            wc_cdx.append(fatcat_openapi_client.WebcaptureCdxLine(
-                surt=resource['surt'],
-                timestamp=timestamp,
-                url=resource['url'],
-                mimetype=resource.get('mimetype'),
-                size=resource.get('size'),
-                sha1=resource.get('sha1hex'),
-                sha256=resource.get('sha256hex'),
-            ))
+            wc_cdx.append(
+                fatcat_openapi_client.WebcaptureCdxLine(
+                    surt=resource["surt"],
+                    timestamp=timestamp,
+                    url=resource["url"],
+                    mimetype=resource.get("mimetype"),
+                    size=resource.get("size"),
+                    sha1=resource.get("sha1hex"),
+                    sha256=resource.get("sha256hex"),
+                )
+            )
 
         wc = fatcat_openapi_client.WebcaptureEntity(
             cdx=wc_cdx,
             archive_urls=archive_urls,
-            original_url=terminal['terminal_url'],
-            timestamp=terminal['terminal_timestamp'],
+            original_url=terminal["terminal_url"],
+            timestamp=terminal["terminal_timestamp"],
             release_ids=[release_ident],
         )
 
@@ -491,11 +517,11 @@ class IngestWebResultImporter(IngestFileResultImporter):
         # check for existing edits-in-progress with same URL
         for other in self._entity_queue:
             if other.original_url == wc.original_url:
-                self.counts['skip-in-queue'] += 1
+                self.counts["skip-in-queue"] += 1
                 return False
 
         # lookup sha1, or create new entity (TODO: API doesn't support this yet)
-        #existing = None
+        # existing = None
 
         # TODO: currently only allow one release per webcapture
         release = self.api.get_release(wc.release_ids[0], expand="webcaptures")
@@ -504,9 +530,9 @@ class IngestWebResultImporter(IngestFileResultImporter):
             for other in release.webcaptures:
                 if wc.original_url == other.original_url:
                     # TODO: compare very similar timestamps of same time (different formats)
-                    self.counts['exists'] += 1
+                    self.counts["exists"] += 1
                     return False
-            self.counts['skip-release-has-webcapture'] += 1
+            self.counts["skip-release-has-webcapture"] += 1
             return False
 
         # Ok, if we got here then no existing web capture for (first) release,
@@ -515,18 +541,24 @@ class IngestWebResultImporter(IngestFileResultImporter):
 
     def insert_batch(self, batch):
         if self.submit_mode:
-            eg = self.api.create_editgroup(fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra))
+            eg = self.api.create_editgroup(
+                fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                )
+            )
             for fe in batch:
                 self.api.create_webcapture(eg.editgroup_id, fe)
             self.api.update_editgroup(eg.editgroup_id, eg, submit=True)
         else:
-            self.api.create_webcapture_auto_batch(fatcat_openapi_client.WebcaptureAutoBatch(
-                editgroup=fatcat_openapi_client.Editgroup(
-                    description=self.editgroup_description,
-                    extra=self.editgroup_extra),
-                entity_list=batch))
+            self.api.create_webcapture_auto_batch(
+                fatcat_openapi_client.WebcaptureAutoBatch(
+                    editgroup=fatcat_openapi_client.Editgroup(
+                        description=self.editgroup_description, extra=self.editgroup_extra
+                    ),
+                    entity_list=batch,
+                )
+            )
+
 
 class SavePaperNowWebImporter(IngestWebResultImporter):
     """
@@ -535,15 +567,15 @@ class SavePaperNowWebImporter(IngestWebResultImporter):
 
     def __init__(self, api, submit_mode=True, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Webcaptures crawled after a public 'Save Paper Now' request"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.SavePaperNowWebImporter')
-        kwargs['submit_mode'] = submit_mode
-        kwargs['do_updates'] = False
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Webcaptures crawled after a public 'Save Paper Now' request"
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.SavePaperNowWebImporter")
+        kwargs["submit_mode"] = submit_mode
+        kwargs["do_updates"] = False
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
     def want(self, row):
         """
@@ -553,27 +585,27 @@ class SavePaperNowWebImporter(IngestWebResultImporter):
         path, which means allowing hit=false.
         """
 
-        source = row['request'].get('ingest_request_source')
+        source = row["request"].get("ingest_request_source")
         if not source:
-            self.counts['skip-ingest_request_source'] += 1
+            self.counts["skip-ingest_request_source"] += 1
             return False
-        if not source.startswith('savepapernow'):
-            self.counts['skip-not-savepapernow'] += 1
+        if not source.startswith("savepapernow"):
+            self.counts["skip-not-savepapernow"] += 1
             return False
 
         # webcapture-specific filters
-        if row['request'].get('ingest_type') != 'html':
-            self.counts['skip-ingest-type'] += 1
+        if row["request"].get("ingest_type") != "html":
+            self.counts["skip-ingest-type"] += 1
             return False
-        if not row.get('file_meta'):
-            self.counts['skip-file-meta'] += 1
+        if not row.get("file_meta"):
+            self.counts["skip-file-meta"] += 1
             return False
-        if row['file_meta'].get('mimetype') not in ("text/html", "application/xhtml+xml"):
-            self.counts['skip-mimetype'] += 1
+        if row["file_meta"].get("mimetype") not in ("text/html", "application/xhtml+xml"):
+            self.counts["skip-mimetype"] += 1
             return False
 
-        if row.get('status') not in ['success', 'unknown-scope']:
-            self.counts['skip-hit'] += 1
+        if row.get("status") not in ["success", "unknown-scope"]:
+            self.counts["skip-hit"] += 1
             return False
 
         return True
@@ -587,28 +619,28 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Filesets crawled from web using sandcrawler ingest tool"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IngestFilesetResultImporter')
-        kwargs['do_updates'] = False
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Filesets crawled from web using sandcrawler ingest tool"
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.IngestFilesetResultImporter")
+        kwargs["do_updates"] = False
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
         self.max_file_count = 300
 
     def want_fileset(self, row):
 
-        if not row.get('manifest') or len(row.get('manifest')) == 0:
-            self.counts['skip-empty-manifest'] += 1
+        if not row.get("manifest") or len(row.get("manifest")) == 0:
+            self.counts["skip-empty-manifest"] += 1
             return False
 
-        if len(row.get('manifest')) == 1:
-            self.counts['skip-single-file'] += 1
+        if len(row.get("manifest")) == 1:
+            self.counts["skip-single-file"] += 1
             return False
 
-        if len(row.get('manifest')) > self.max_file_count:
-            self.counts['skip-too-many-files'] += 1
+        if len(row.get("manifest")) > self.max_file_count:
+            self.counts["skip-too-many-files"] += 1
             return False
 
         return True
@@ -619,8 +651,10 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
             return False
 
         # fileset-specific filters
-        if row['request'].get('ingest_type') not in ['dataset',]:
-            self.counts['skip-ingest-type'] += 1
+        if row["request"].get("ingest_type") not in [
+            "dataset",
+        ]:
+            self.counts["skip-ingest-type"] += 1
             return False
 
         if not self.want_fileset(row):
@@ -629,102 +663,118 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
         return True
 
     def parse_fileset_urls(self, row):
-        if not row.get('strategy'):
+        if not row.get("strategy"):
             return []
-        strategy = row['strategy']
+        strategy = row["strategy"]
         urls = []
-        if strategy == 'archiveorg-fileset' and row.get('archiveorg_item_name'):
-            urls.append(fatcat_openapi_client.FilesetUrl(
-                url=f"https://archive.org/download/{row['archiveorg_item_name']}/",
-                rel="archive-base",
-            ))
-        if row['strategy'].startswith('web-') and row.get('platform_base_url'):
-            urls.append(fatcat_openapi_client.FilesetUrl(
-                url=f"https://web.archive.org/web/{row['web_base_url_dt']}/{row['web_base_url']}",
-                rel="webarchive-base",
-            ))
+        if strategy == "archiveorg-fileset" and row.get("archiveorg_item_name"):
+            urls.append(
+                fatcat_openapi_client.FilesetUrl(
+                    url=f"https://archive.org/download/{row['archiveorg_item_name']}/",
+                    rel="archive-base",
+                )
+            )
+        if row["strategy"].startswith("web-") and row.get("platform_base_url"):
+            urls.append(
+                fatcat_openapi_client.FilesetUrl(
+                    url=f"https://web.archive.org/web/{row['web_base_url_dt']}/{row['web_base_url']}",
+                    rel="webarchive-base",
+                )
+            )
         # TODO: repository-base
         # TODO: web-base
 
-        if row['strategy'] == 'archiveorg-fileset-bundle' and row.get('archiveorg_item_name'):
-            urls.append(fatcat_openapi_client.FilesetUrl(
-                url=f"https://archive.org/download/{row['archiveorg_item_name']}/{row['archiveorg_bundle_path']}",
-                rel="archive-bundle",
-            ))
+        if row["strategy"] == "archiveorg-fileset-bundle" and row.get("archiveorg_item_name"):
+            urls.append(
+                fatcat_openapi_client.FilesetUrl(
+                    url=f"https://archive.org/download/{row['archiveorg_item_name']}/{row['archiveorg_bundle_path']}",
+                    rel="archive-bundle",
+                )
+            )
 
-        if row['strategy'] == 'web-fileset-bundle' and row.get('platform_bundle_url'):
-            urls.append(fatcat_openapi_client.FilesetUrl(
-                url=f"https://web.archive.org/web/{row['web_bundle_url_dt']}/{row['web_bundle_url']}",
-                rel="webarchive-bundle",
-            ))
+        if row["strategy"] == "web-fileset-bundle" and row.get("platform_bundle_url"):
+            urls.append(
+                fatcat_openapi_client.FilesetUrl(
+                    url=f"https://web.archive.org/web/{row['web_bundle_url_dt']}/{row['web_bundle_url']}",
+                    rel="webarchive-bundle",
+                )
+            )
 
         # add any additional / platform URLs here
-        if row.get('platform_bundle_url'):
-            urls.append(fatcat_openapi_client.FilesetUrl(
-                url=row['platform_bundle_url'],
-                rel="repository-bundle",
-            ))
-        if row.get('platform_base_url'):
-            urls.append(fatcat_openapi_client.FilesetUrl(
-                url=row['platform_bundle_url'],
-                rel="repository-base",
-            ))
+        if row.get("platform_bundle_url"):
+            urls.append(
+                fatcat_openapi_client.FilesetUrl(
+                    url=row["platform_bundle_url"],
+                    rel="repository-bundle",
+                )
+            )
+        if row.get("platform_base_url"):
+            urls.append(
+                fatcat_openapi_client.FilesetUrl(
+                    url=row["platform_bundle_url"],
+                    rel="repository-base",
+                )
+            )
         return urls
 
     def parse_record(self, row):
 
-        request = row['request']
+        request = row["request"]
 
         # double check that want() filtered request correctly
-        if request.get('ingest_type') not in ["dataset",]:
-            self.counts['skip-ingest-type'] += 1
+        if request.get("ingest_type") not in [
+            "dataset",
+        ]:
+            self.counts["skip-ingest-type"] += 1
             return None
 
         # identify release by fatcat ident, or extid lookup
         release_ident = self.parse_ingest_release_ident(row)
 
         if not release_ident:
-            self.counts['skip-release-not-found'] += 1
+            self.counts["skip-release-not-found"] += 1
             return None
 
         entity_extra = dict()
         edit_extra = self.parse_edit_extra(row)
-        edit_extra['ingest_strategy'] = row['ingest_strategy']
-        if row.get('platform'):
-            edit_extra['platform'] = row['platform']
-        if row.get('platform_id'):
-            edit_extra['platform_id'] = row['platform_id']
+        edit_extra["ingest_strategy"] = row["ingest_strategy"]
+        if row.get("platform"):
+            edit_extra["platform"] = row["platform"]
+        if row.get("platform_id"):
+            edit_extra["platform_id"] = row["platform_id"]
 
         entity_urls = self.parse_fileset_urls(row)
         if not entity_urls:
-            self.counts['skip-no-access-url'] += 1
+            self.counts["skip-no-access-url"] += 1
             return None
 
-        assert row['file_count'] == len(row['manifest'])
-        if row['file_count'] > self.max_file_count:
-            self.counts['skip-too-many-manifest-files'] += 1
+        assert row["file_count"] == len(row["manifest"])
+        if row["file_count"] > self.max_file_count:
+            self.counts["skip-too-many-manifest-files"] += 1
             return None
 
         manifest = []
-        for ingest_file in row['manifest']:
+        for ingest_file in row["manifest"]:
             fsf = fatcat_openapi_client.FilesetFile(
-                path=ingest_file['path'],
-                size=ingest_file['size'],
-                md5=ingest_file['md5'],
-                sha1=ingest_file['sha1'],
-                sha256=ingest_file.get('sha256'),
+                path=ingest_file["path"],
+                size=ingest_file["size"],
+                md5=ingest_file["md5"],
+                sha1=ingest_file["sha1"],
+                sha256=ingest_file.get("sha256"),
                 extra=dict(
-                    mimetype=ingest_file['mimetype'],
+                    mimetype=ingest_file["mimetype"],
                 ),
             )
             if not (fsf.md5 and fsf.sha1 and fsf.path and fsf.size):
-                self.counts['skip-partial-file-info'] += 1
+                self.counts["skip-partial-file-info"] += 1
                 return None
-            if ingest_file.get('platform_url'):
+            if ingest_file.get("platform_url"):
                 # XXX: should we include this?
-                fsf.extra['original_url'] = ingest_file['platform_url']
-            if ingest_file.get('terminal_url') and ingest_file.get('terminal_dt'):
-                fsf.extra['wayback_url'] = f"https://web.archive.org/web/{ingest_file['terminal_dt']}/{ingest_file['terminal_url']}"
+                fsf.extra["original_url"] = ingest_file["platform_url"]
+            if ingest_file.get("terminal_url") and ingest_file.get("terminal_dt"):
+                fsf.extra[
+                    "wayback_url"
+                ] = f"https://web.archive.org/web/{ingest_file['terminal_dt']}/{ingest_file['terminal_url']}"
             manifest.append(fsf)
 
         fe = fatcat_openapi_client.FilesetEntity(
@@ -745,11 +795,11 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
         for other in self._entity_queue:
             # XXX: how to duplicate check?
             if other.original_url == wc.original_url:
-                self.counts['skip-in-queue'] += 1
+                self.counts["skip-in-queue"] += 1
                 return False
 
         # lookup sha1, or create new entity (TODO: API doesn't support this yet)
-        #existing = None
+        # existing = None
 
         # NOTE: in lieu of existing checks (by lookup), only allow one fileset per release
         release = self.api.get_release(wc.release_ids[0], expand="filesets")
@@ -759,27 +809,32 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
             for other in release.filesets:
                 if wc.original_url == other.original_url:
                     # TODO: compare very similar timestamps of same time (different formats)
-                    self.counts['exists'] += 1
+                    self.counts["exists"] += 1
                     return False
-            self.counts['skip-release-has-fileset'] += 1
+            self.counts["skip-release-has-fileset"] += 1
             return False
 
         return True
 
     def insert_batch(self, batch):
         if self.submit_mode:
-            eg = self.api.create_editgroup(fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra))
+            eg = self.api.create_editgroup(
+                fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                )
+            )
             for fe in batch:
                 self.api.create_fileset(eg.editgroup_id, fe)
             self.api.update_editgroup(eg.editgroup_id, eg, submit=True)
         else:
-            self.api.create_fileset_auto_batch(fatcat_openapi_client.FilesetAutoBatch(
-                editgroup=fatcat_openapi_client.Editgroup(
-                    description=self.editgroup_description,
-                    extra=self.editgroup_extra),
-                entity_list=batch))
+            self.api.create_fileset_auto_batch(
+                fatcat_openapi_client.FilesetAutoBatch(
+                    editgroup=fatcat_openapi_client.Editgroup(
+                        description=self.editgroup_description, extra=self.editgroup_extra
+                    ),
+                    entity_list=batch,
+                )
+            )
 
 
 class SavePaperNowFilesetImporter(IngestFilesetResultImporter):
@@ -789,28 +844,28 @@ class SavePaperNowFilesetImporter(IngestFilesetResultImporter):
 
     def __init__(self, api, submit_mode=True, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Fileset crawled after a public 'Save Paper Now' request"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.SavePaperNowFilesetImporter')
-        kwargs['submit_mode'] = submit_mode
-        kwargs['do_updates'] = False
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Fileset crawled after a public 'Save Paper Now' request"
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.SavePaperNowFilesetImporter")
+        kwargs["submit_mode"] = submit_mode
+        kwargs["do_updates"] = False
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
     def want(self, row):
 
-        source = row['request'].get('ingest_request_source')
+        source = row["request"].get("ingest_request_source")
         if not source:
-            self.counts['skip-ingest_request_source'] += 1
+            self.counts["skip-ingest_request_source"] += 1
             return False
-        if not source.startswith('savepapernow'):
-            self.counts['skip-not-savepapernow'] += 1
+        if not source.startswith("savepapernow"):
+            self.counts["skip-not-savepapernow"] += 1
             return False
 
-        if row.get('hit') is not True:
-            self.counts['skip-hit'] += 1
+        if row.get("hit") is not True:
+            self.counts["skip-hit"] += 1
             return False
 
         if not self.want_fileset(row):
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index 0a983c5e..8e3af416 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -1,4 +1,3 @@
-
 import datetime
 import sqlite3
 import sys
@@ -33,26 +32,24 @@ def parse_jalc_persons(raw_persons):
 
     # first parse out into language-agnostic dics
     for raw in raw_persons:
-        name = raw.find('name') or None
+        name = raw.find("name") or None
         if name:
-            name = clean(name.get_text().replace('\n', ' '))
-        surname = raw.find('familyName') or None
+            name = clean(name.get_text().replace("\n", " "))
+        surname = raw.find("familyName") or None
         if surname:
-            surname = clean(surname.get_text().replace('\n', ' '))
-        given_name = raw.find('givenName') or None
+            surname = clean(surname.get_text().replace("\n", " "))
+        given_name = raw.find("givenName") or None
         if given_name:
-            given_name = clean(given_name.get_text().replace('\n', ' '))
-        lang = 'en'
+            given_name = clean(given_name.get_text().replace("\n", " "))
+        lang = "en"
         if is_cjk(name):
-            lang = 'ja'
-        if lang == 'en' and surname and given_name:
+            lang = "ja"
+        if lang == "en" and surname and given_name:
             # english names order is flipped
             name = "{} {}".format(given_name, surname)
         rc = fatcat_openapi_client.ReleaseContrib(
-            raw_name=name,
-            surname=surname,
-            given_name=given_name,
-            role="author")
+            raw_name=name, surname=surname, given_name=given_name, role="author"
+        )
         # add an extra hint field; won't end up in serialized object
         rc._lang = lang
         persons.append(rc)
@@ -60,12 +57,12 @@ def parse_jalc_persons(raw_persons):
     if not persons:
         return []
 
-    if all([p._lang == 'en' for p in persons]) or all([p._lang == 'ja' for p in persons]):
+    if all([p._lang == "en" for p in persons]) or all([p._lang == "ja" for p in persons]):
         # all english names, or all japanese names
         return persons
 
     # for debugging
-    #if len([1 for p in persons if p._lang == 'en']) != len([1 for p in persons if p._lang == 'ja']):
+    # if len([1 for p in persons if p._lang == 'en']) != len([1 for p in persons if p._lang == 'ja']):
     #    print("INTERESTING: {}".format(persons[0]))
 
     start_lang = persons[0]._lang
@@ -74,10 +71,10 @@ def parse_jalc_persons(raw_persons):
         if p._lang == start_lang:
             contribs.append(p)
         else:
-            if p._lang == 'en' and contribs[-1]._lang == 'ja':
+            if p._lang == "en" and contribs[-1]._lang == "ja":
                 eng = p
                 jpn = contribs[-1]
-            elif p._lang == 'ja' and contribs[-1]._lang == 'en':
+            elif p._lang == "ja" and contribs[-1]._lang == "en":
                 eng = contribs[-1]
                 jpn = p
             else:
@@ -85,11 +82,11 @@ def parse_jalc_persons(raw_persons):
                 contribs.append(p)
                 continue
             eng.extra = {
-                'original_name': {
-                    'lang': jpn._lang,
-                    'raw_name': jpn.raw_name,
-                    'given_name': jpn.given_name,
-                    'surname': jpn.surname,
+                "original_name": {
+                    "lang": jpn._lang,
+                    "raw_name": jpn.raw_name,
+                    "given_name": jpn.given_name,
+                    "surname": jpn.surname,
                 },
             }
             contribs[-1] = eng
@@ -105,18 +102,19 @@ class JalcImporter(EntityImporter):
 
     def __init__(self, api, issn_map_file, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of JALC DOI metadata")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.JalcImporter')
-        super().__init__(api,
+        eg_desc = kwargs.get("editgroup_description", "Automated import of JALC DOI metadata")
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.JalcImporter")
+        super().__init__(
+            api,
             issn_map_file=issn_map_file,
             editgroup_description=eg_desc,
             editgroup_extra=eg_extra,
-            **kwargs)
+            **kwargs
+        )
 
-        self.create_containers = kwargs.get('create_containers', True)
-        extid_map_file = kwargs.get('extid_map_file')
+        self.create_containers = kwargs.get("create_containers", True)
+        extid_map_file = kwargs.get("extid_map_file")
         self.extid_map_db = None
         if extid_map_file:
             db_uri = "file:{}?mode=ro".format(extid_map_file)
@@ -129,12 +127,27 @@ class JalcImporter(EntityImporter):
 
     def lookup_ext_ids(self, doi):
         if self.extid_map_db is None:
-            return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None, arxiv_id=None, jstor_id=None)
-        row = self.extid_map_db.execute("SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1",
-            [doi.lower()]).fetchone()
+            return dict(
+                core_id=None,
+                pmid=None,
+                pmcid=None,
+                wikidata_qid=None,
+                arxiv_id=None,
+                jstor_id=None,
+            )
+        row = self.extid_map_db.execute(
+            "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1", [doi.lower()]
+        ).fetchone()
         if row is None:
-            return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None, arxiv_id=None, jstor_id=None)
-        row = [str(cell or '') or None for cell in row]
+            return dict(
+                core_id=None,
+                pmid=None,
+                pmcid=None,
+                wikidata_qid=None,
+                arxiv_id=None,
+                jstor_id=None,
+            )
+        row = [str(cell or "") or None for cell in row]
         return dict(
             core_id=row[0],
             pmid=row[1],
@@ -163,27 +176,27 @@ class JalcImporter(EntityImporter):
         titles = record.find_all("title")
         if not titles:
             return None
-        title = titles[0].get_text().replace('\n', ' ').strip()
+        title = titles[0].get_text().replace("\n", " ").strip()
         original_title = None
-        if title.endswith('.'):
+        if title.endswith("."):
             title = title[:-1]
         if len(titles) > 1:
-            original_title = titles[1].get_text().replace('\n', ' ').strip()
-            if original_title.endswith('.'):
+            original_title = titles[1].get_text().replace("\n", " ").strip()
+            if original_title.endswith("."):
                 original_title = original_title[:-1]
 
         doi = None
         if record.doi:
             doi = clean_doi(record.doi.string.strip().lower())
-            if doi.startswith('http://dx.doi.org/'):
-                doi = doi.replace('http://dx.doi.org/', '')
-            elif doi.startswith('https://dx.doi.org/'):
-                doi = doi.replace('https://dx.doi.org/', '')
-            elif doi.startswith('http://doi.org/'):
-                doi = doi.replace('http://doi.org/', '')
-            elif doi.startswith('https://doi.org/'):
-                doi = doi.replace('https://doi.org/', '')
-            if not (doi.startswith('10.') and '/' in doi):
+            if doi.startswith("http://dx.doi.org/"):
+                doi = doi.replace("http://dx.doi.org/", "")
+            elif doi.startswith("https://dx.doi.org/"):
+                doi = doi.replace("https://dx.doi.org/", "")
+            elif doi.startswith("http://doi.org/"):
+                doi = doi.replace("http://doi.org/", "")
+            elif doi.startswith("https://doi.org/"):
+                doi = doi.replace("https://doi.org/", "")
+            if not (doi.startswith("10.") and "/" in doi):
                 sys.stderr.write("bogus JALC DOI: {}\n".format(doi))
                 doi = None
         if not doi:
@@ -202,7 +215,9 @@ class JalcImporter(EntityImporter):
         if date:
             date = date.string
             if len(date) == 10:
-                release_date = datetime.datetime.strptime(date['completed-date'], DATE_FMT).date()
+                release_date = datetime.datetime.strptime(
+                    date["completed-date"], DATE_FMT
+                ).date()
                 release_year = release_date.year
                 release_date = release_date.isoformat()
             elif len(date) == 4 and date.isdigit():
@@ -214,7 +229,7 @@ class JalcImporter(EntityImporter):
             if record.endingPage and record.endingPage.string.strip():
                 pages = "{}-{}".format(pages, record.endingPage.string.strip())
         # double check to prevent "-" as pages
-        if pages and pages.strip() == '-':
+        if pages and pages.strip() == "-":
             pages = None
 
         volume = None
@@ -242,9 +257,13 @@ class JalcImporter(EntityImporter):
         container_extra = dict()
 
         if record.publicationName:
-            pubs = [p.get_text().replace('\n', ' ').strip() for p in record.find_all("publicationName") if p.get_text()]
+            pubs = [
+                p.get_text().replace("\n", " ").strip()
+                for p in record.find_all("publicationName")
+                if p.get_text()
+            ]
             pubs = [clean(p) for p in pubs if p]
-            assert(pubs)
+            assert pubs
             if len(pubs) > 1 and pubs[0] == pubs[1]:
                 pubs = [pubs[0]]
             if len(pubs) > 1 and is_cjk(pubs[0]):
@@ -252,10 +271,14 @@ class JalcImporter(EntityImporter):
                 pubs = [pubs[1], pubs[0]]
             container_name = clean(pubs[0])
             if len(pubs) > 1:
-                container_extra['original_name'] = clean(pubs[1])
+                container_extra["original_name"] = clean(pubs[1])
 
         if record.publisher:
-            pubs = [p.get_text().replace('\n', ' ').strip() for p in record.find_all("publisher") if p.get_text()]
+            pubs = [
+                p.get_text().replace("\n", " ").strip()
+                for p in record.find_all("publisher")
+                if p.get_text()
+            ]
             pubs = [p for p in pubs if p]
             if len(pubs) > 1 and pubs[0] == pubs[1]:
                 pubs = [pubs[0]]
@@ -265,20 +288,25 @@ class JalcImporter(EntityImporter):
             if pubs:
                 publisher = clean(pubs[0])
                 if len(pubs) > 1:
-                    container_extra['publisher_aliases'] = pubs[1:]
-
-        if (container_id is None and self.create_containers and (issnl is not None)
-                and container_name):
+                    container_extra["publisher_aliases"] = pubs[1:]
+
+        if (
+            container_id is None
+            and self.create_containers
+            and (issnl is not None)
+            and container_name
+        ):
             # name, type, publisher, issnl
             # extra: issnp, issne, original_name, languages, country
-            container_extra['country'] = 'jp'
-            container_extra['languages'] = ['ja']
+            container_extra["country"] = "jp"
+            container_extra["languages"] = ["ja"]
             ce = fatcat_openapi_client.ContainerEntity(
                 name=container_name,
-                container_type='journal',
+                container_type="journal",
                 publisher=publisher,
                 issnl=issnl,
-                extra=(container_extra or None))
+                extra=(container_extra or None),
+            )
             ce_edit = self.create_container(ce)
             container_id = ce_edit.ident
             # short-cut future imports in same batch
@@ -301,7 +329,7 @@ class JalcImporter(EntityImporter):
         #   group-title
         # always put at least an empty dict here to indicate the DOI registrar
         # (informally)
-        extra['jalc'] = extra_jalc
+        extra["jalc"] = extra_jalc
 
         title = clean(title)
         if not title:
@@ -312,24 +340,24 @@ class JalcImporter(EntityImporter):
             title=title,
             original_title=clean(original_title),
             release_type=release_type,
-            release_stage='published',
+            release_stage="published",
             release_date=release_date,
             release_year=release_year,
             ext_ids=fatcat_openapi_client.ReleaseExtIds(
                 doi=doi,
-                pmid=extids['pmid'],
-                pmcid=extids['pmcid'],
-                wikidata_qid=extids['wikidata_qid'],
-                core=extids['core_id'],
-                arxiv=extids['arxiv_id'],
-                jstor=extids['jstor_id'],
+                pmid=extids["pmid"],
+                pmcid=extids["pmcid"],
+                wikidata_qid=extids["wikidata_qid"],
+                core=extids["core_id"],
+                arxiv=extids["arxiv_id"],
+                jstor=extids["jstor_id"],
             ),
             volume=volume,
             issue=issue,
             pages=pages,
             publisher=publisher,
             language=lang,
-            #license_slug
+            # license_slug
             container_id=container_id,
             contribs=contribs,
             extra=extra,
@@ -351,17 +379,20 @@ class JalcImporter(EntityImporter):
         # eventually we'll want to support "updates", but for now just skip if
         # entity already exists
         if existing:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         return True
 
     def insert_batch(self, batch):
-        self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_release_auto_batch(
+            fatcat_openapi_client.ReleaseAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
 
     def parse_file(self, handle):
         """
@@ -374,11 +405,11 @@ class JalcImporter(EntityImporter):
         # 2. iterate over articles, call parse_article on each
         for record in soup.find_all("Description"):
             resp = self.parse_record(record)
-            #print(json.dumps(resp))
+            # print(json.dumps(resp))
             print(resp)
-            #sys.exit(-1)
+            # sys.exit(-1)
 
 
-if __name__=='__main__':
+if __name__ == "__main__":
     parser = JalcImporter(None, None)
     parser.parse_file(open(sys.argv[1]))
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index 25d7b3b5..6d1fefa3 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -1,4 +1,3 @@
-
 import fatcat_openapi_client
 
 from .common import EntityImporter, clean
@@ -11,18 +10,20 @@ def or_none(s):
         return None
     return s
 
+
 def truthy(s):
     if s is None:
         return None
     s = s.lower()
 
-    if s in ('true', 't', 'yes', 'y', '1'):
+    if s in ("true", "t", "yes", "y", "1"):
         return True
-    elif s in ('false', 'f', 'no', 'n', '0'):
+    elif s in ("false", "f", "no", "n", "0"):
         return False
     else:
         return None
 
+
 class JournalMetadataImporter(EntityImporter):
     """
     Imports journal metadata ("containers") by ISSN, currently from a custom
@@ -33,17 +34,16 @@ class JournalMetadataImporter(EntityImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.JournalMetadataImporter')
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = kwargs.get(
+            "editgroup_description",
+            "Automated import of container-level metadata, by ISSN. Metadata from Internet Archive munging.",
+        )
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.JournalMetadataImporter")
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
     def want(self, raw_record):
-        if raw_record.get('issnl') and raw_record.get('name'):
+        if raw_record.get("issnl") and raw_record.get("name"):
             return True
         return False
 
@@ -54,52 +54,68 @@ class JournalMetadataImporter(EntityImporter):
         returns a ContainerEntity (or None if invalid or couldn't parse)
         """
 
-        if not row.get('name'):
+        if not row.get("name"):
             # Name is required (by schema)
             return None
 
         extra = dict()
-        for key in ('issne', 'issnp', 'languages', 'country', 'urls', 'abbrev',
-            'coden', 'aliases', 'original_name', 'first_year', 'last_year',
-            'platform', 'default_license', 'road', 'mimetypes',
-            'sherpa_romeo', 'kbart'):
+        for key in (
+            "issne",
+            "issnp",
+            "languages",
+            "country",
+            "urls",
+            "abbrev",
+            "coden",
+            "aliases",
+            "original_name",
+            "first_year",
+            "last_year",
+            "platform",
+            "default_license",
+            "road",
+            "mimetypes",
+            "sherpa_romeo",
+            "kbart",
+        ):
             if row.get(key):
                 extra[key] = row[key]
         # TODO: not including for now: norwegian, dois/crossref, ia
 
         extra_doaj = dict()
-        if row.get('doaj'):
-            if row['doaj'].get('as_of'):
-                extra_doaj['as_of'] = row['doaj']['as_of']
-            if row['doaj'].get('works'):
-                extra_doaj['works'] = row['doaj']['works']
+        if row.get("doaj"):
+            if row["doaj"].get("as_of"):
+                extra_doaj["as_of"] = row["doaj"]["as_of"]
+            if row["doaj"].get("works"):
+                extra_doaj["works"] = row["doaj"]["works"]
         if extra_doaj:
-            extra['doaj'] = extra_doaj
+            extra["doaj"] = extra_doaj
 
         extra_ia = dict()
         # TODO: would like an ia.longtail_ia flag
-        if row.get('sim'):
+        if row.get("sim"):
             # NB: None case of the .get() here is blech, but othrwise
             # extra['ia'].get('sim') would be false-y, breaking 'any_ia_sim' later on
-            extra_ia['sim'] = {
-                'year_spans': row['sim'].get('year_spans'),
+            extra_ia["sim"] = {
+                "year_spans": row["sim"].get("year_spans"),
             }
         if extra_ia:
-            extra['ia'] = extra_ia
+            extra["ia"] = extra_ia
 
-        name = clean(row.get('name'))
+        name = clean(row.get("name"))
         if not name:
             return None
 
         ce = fatcat_openapi_client.ContainerEntity(
-            issnl=row['issnl'],
-            issne=row.get('issne'),
-            issnp=row.get('issnp'),
-            container_type=None, # TODO
+            issnl=row["issnl"],
+            issne=row.get("issne"),
+            issnp=row.get("issnp"),
+            container_type=None,  # TODO
             name=name,
-            publisher=clean(row.get('publisher')),
-            wikidata_qid=None, # TODO
-            extra=extra)
+            publisher=clean(row.get("publisher")),
+            wikidata_qid=None,  # TODO
+            extra=extra,
+        )
         return ce
 
     def try_update(self, ce):
@@ -118,23 +134,26 @@ class JournalMetadataImporter(EntityImporter):
         # for now, only update KBART, and only if there is new content
         if not existing.extra:
             existing.extra = dict()
-        if ce.extra.get('kbart') and (existing.extra.get('kbart') != ce.extra['kbart']):
-            if not existing.extra.get('kbart'):
-                existing.extra['kbart'] = {}
-            existing.extra['kbart'].update(ce.extra['kbart'])
+        if ce.extra.get("kbart") and (existing.extra.get("kbart") != ce.extra["kbart"]):
+            if not existing.extra.get("kbart"):
+                existing.extra["kbart"] = {}
+            existing.extra["kbart"].update(ce.extra["kbart"])
             self.api.update_container(self.get_editgroup_id(), existing.ident, existing)
-            self.counts['update'] += 1
+            self.counts["update"] += 1
             return False
         else:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         # if we got this far, it's a bug
         raise NotImplementedError
 
     def insert_batch(self, batch):
-        self.api.create_container_auto_batch(fatcat_openapi_client.ContainerAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_container_auto_batch(
+            fatcat_openapi_client.ContainerAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py
index d37424d6..8c7bfad4 100644
--- a/python/fatcat_tools/importers/jstor.py
+++ b/python/fatcat_tools/importers/jstor.py
@@ -1,4 +1,3 @@
-
 import datetime
 import json
 import sys
@@ -12,10 +11,10 @@ from .crossref import CONTAINER_TYPE_MAP
 
 # TODO: more entries?
 JSTOR_CONTRIB_MAP = {
-    'author': 'author',
-    'editor': 'editor',
-    'translator': 'translator',
-    'illustrator': 'illustrator',
+    "author": "author",
+    "editor": "editor",
+    "translator": "translator",
+    "illustrator": "illustrator",
 }
 
 JSTOR_TYPE_MAP = {
@@ -26,6 +25,7 @@ JSTOR_TYPE_MAP = {
     "research-article": "article-journal",
 }
 
+
 class JstorImporter(EntityImporter):
     """
     Importer for JSTOR bulk XML metadata (eg, from their Early Journals
@@ -34,17 +34,18 @@ class JstorImporter(EntityImporter):
 
     def __init__(self, api, issn_map_file, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of JSTOR XML metadata")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.JstorImporter')
-        super().__init__(api,
+        eg_desc = kwargs.get("editgroup_description", "Automated import of JSTOR XML metadata")
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.JstorImporter")
+        super().__init__(
+            api,
             issn_map_file=issn_map_file,
             editgroup_description=eg_desc,
             editgroup_extra=eg_extra,
-            **kwargs)
+            **kwargs
+        )
 
-        self.create_containers = kwargs.get('create_containers', True)
+        self.create_containers = kwargs.get("create_containers", True)
 
         self.read_issn_map_file(issn_map_file)
 
@@ -62,20 +63,22 @@ class JstorImporter(EntityImporter):
         extra = dict()
         extra_jstor = dict()
 
-        release_type = JSTOR_TYPE_MAP.get(article['article-type'])
+        release_type = JSTOR_TYPE_MAP.get(article["article-type"])
         title = article_meta.find("article-title")
         if title and title.get_text():
-            title = title.get_text().replace('\n', ' ').strip()
+            title = title.get_text().replace("\n", " ").strip()
         elif title and not title.get_text():
             title = None
 
-        if not title and release_type.startswith('review') and article_meta.product.source:
-            title = "Review: {}".format(article_meta.product.source.replace('\n', ' ').get_text())
+        if not title and release_type.startswith("review") and article_meta.product.source:
+            title = "Review: {}".format(
+                article_meta.product.source.replace("\n", " ").get_text()
+            )
 
         if not title:
             return None
 
-        if title.endswith('.'):
+        if title.endswith("."):
             title = title[:-1]
 
         if "[Abstract]" in title:
@@ -93,12 +96,12 @@ class JstorImporter(EntityImporter):
             title = title[1:-1]
 
         # JSTOR journal-id
-        journal_ids = [j.string for j in journal_meta.find_all('journal-id')]
+        journal_ids = [j.string for j in journal_meta.find_all("journal-id")]
         if journal_ids:
-            extra_jstor['journal_ids'] = journal_ids
+            extra_jstor["journal_ids"] = journal_ids
 
-        journal_title = journal_meta.find("journal-title").get_text().replace('\n', ' ')
-        publisher = journal_meta.find("publisher-name").get_text().replace('\n', ' ')
+        journal_title = journal_meta.find("journal-title").get_text().replace("\n", " ")
+        publisher = journal_meta.find("publisher-name").get_text().replace("\n", " ")
         issn = journal_meta.find("issn")
         if issn:
             issn = issn.string
@@ -113,13 +116,18 @@ class JstorImporter(EntityImporter):
             container_id = self.lookup_issnl(issnl)
 
         # create container if it doesn't exist
-        if (container_id is None and self.create_containers and (issnl is not None)
-                and journal_title):
+        if (
+            container_id is None
+            and self.create_containers
+            and (issnl is not None)
+            and journal_title
+        ):
             ce = fatcat_openapi_client.ContainerEntity(
                 issnl=issnl,
                 publisher=publisher,
                 container_type=self.map_container_type(release_type),
-                name=clean(journal_title, force_xml=True))
+                name=clean(journal_title, force_xml=True),
+            )
             ce_edit = self.create_container(ce)
             container_id = ce_edit.ident
             self._issnl_id_map[issnl] = container_id
@@ -132,8 +140,8 @@ class JstorImporter(EntityImporter):
         if jstor_id:
             jstor_id = jstor_id.string.strip()
         if not jstor_id and doi:
-            assert doi.startswith('10.2307/')
-            jstor_id = doi.replace('10.2307/', '')
+            assert doi.startswith("10.2307/")
+            jstor_id = doi.replace("10.2307/", "")
         assert jstor_id and int(jstor_id)
 
         contribs = []
@@ -142,13 +150,13 @@ class JstorImporter(EntityImporter):
             for c in cgroup.find_all("contrib"):
                 given = c.find("given-names")
                 if given:
-                    given = clean(given.get_text().replace('\n', ' '))
+                    given = clean(given.get_text().replace("\n", " "))
                 surname = c.find("surname")
                 if surname:
-                    surname = clean(surname.get_text().replace('\n', ' '))
+                    surname = clean(surname.get_text().replace("\n", " "))
                 raw_name = c.find("string-name")
                 if raw_name:
-                    raw_name = clean(raw_name.get_text().replace('\n', ' '))
+                    raw_name = clean(raw_name.get_text().replace("\n", " "))
 
                 if not raw_name:
                     if given and surname:
@@ -156,15 +164,17 @@ class JstorImporter(EntityImporter):
                     elif surname:
                         raw_name = surname
 
-                role = JSTOR_CONTRIB_MAP.get(c.get('contrib-type', 'author'))
-                if not role and c.get('contrib-type'):
-                    sys.stderr.write("NOT IN JSTOR_CONTRIB_MAP: {}\n".format(c['contrib-type']))
-                contribs.append(fatcat_openapi_client.ReleaseContrib(
-                    role=role,
-                    raw_name=raw_name,
-                    given_name=given,
-                    surname=surname,
-                ))
+                role = JSTOR_CONTRIB_MAP.get(c.get("contrib-type", "author"))
+                if not role and c.get("contrib-type"):
+                    sys.stderr.write("NOT IN JSTOR_CONTRIB_MAP: {}\n".format(c["contrib-type"]))
+                contribs.append(
+                    fatcat_openapi_client.ReleaseContrib(
+                        role=role,
+                        raw_name=raw_name,
+                        given_name=given,
+                        surname=surname,
+                    )
+                )
 
         for i, contrib in enumerate(contribs):
             if contrib.raw_name != "et al.":
@@ -172,14 +182,13 @@ class JstorImporter(EntityImporter):
 
         release_year = None
         release_date = None
-        pub_date = article_meta.find('pub-date')
+        pub_date = article_meta.find("pub-date")
         if pub_date and pub_date.year:
             release_year = int(pub_date.year.string)
             if pub_date.month and pub_date.day:
                 release_date = datetime.date(
-                    release_year,
-                    int(pub_date.month.string),
-                    int(pub_date.day.string))
+                    release_year, int(pub_date.month.string), int(pub_date.day.string)
+                )
                 if release_date.day == 1 and release_date.month == 1:
                     # suspect jan 1st dates get set by JSTOR when actual
                     # date not known (citation needed), so drop them
@@ -208,10 +217,10 @@ class JstorImporter(EntityImporter):
                 warnings.warn("MISSING MARC LANG: {}".format(cm.find("meta-value").string))
 
         # JSTOR issue-id
-        if article_meta.find('issue-id'):
-            issue_id = clean(article_meta.find('issue-id').string)
+        if article_meta.find("issue-id"):
+            issue_id = clean(article_meta.find("issue-id").string)
             if issue_id:
-                extra_jstor['issue_id'] = issue_id
+                extra_jstor["issue_id"] = issue_id
 
         # everything in JSTOR is published
         release_stage = "published"
@@ -225,14 +234,14 @@ class JstorImporter(EntityImporter):
         #   group-title
         #   pubmed: retraction refs
         if extra_jstor:
-            extra['jstor'] = extra_jstor
+            extra["jstor"] = extra_jstor
         if not extra:
             extra = None
 
         re = fatcat_openapi_client.ReleaseEntity(
-            #work_id
+            # work_id
             title=title,
-            #original_title
+            # original_title
             release_type=release_type,
             release_stage=release_stage,
             release_date=release_date,
@@ -246,21 +255,16 @@ class JstorImporter(EntityImporter):
             pages=pages,
             publisher=publisher,
             language=language,
-            #license_slug
-
+            # license_slug
             # content, mimetype, lang
-            #abstracts=abstracts,
-
+            # abstracts=abstracts,
             contribs=contribs,
-
             # key, year, container_name, title, locator
             # extra: volume, authors, issue, publisher, identifiers
-            #refs=refs,
-
+            # refs=refs,
             #   name, type, publisher, issnl
             #   extra: issnp, issne, original_name, languages, country
             container_id=container_id,
-
             extra=extra,
         )
         return re
@@ -289,12 +293,12 @@ class JstorImporter(EntityImporter):
 
         if existing and existing.ext_ids.jstor:
             # don't update if it already has JSTOR ID
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
         elif existing:
             # but do update if only DOI was set
             existing.ext_ids.jstor = re.ext_ids.jstor
-            existing.extra['jstor'] = re.extra['jstor']
+            existing.extra["jstor"] = re.extra["jstor"]
             # better release_type detection, and some other fields
             # TODO: don't do this over-writing in the future? assuming here
             # this is a one-time batch import over/extending bootstrap crossref
@@ -304,17 +308,20 @@ class JstorImporter(EntityImporter):
             existing.contribs = re.contribs
             existing.language = re.language
             self.api.update_release(self.get_editgroup_id(), existing.ident, existing)
-            self.counts['update'] += 1
+            self.counts["update"] += 1
             return False
 
         return True
 
     def insert_batch(self, batch):
-        self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_release_auto_batch(
+            fatcat_openapi_client.ReleaseAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
 
     def parse_file(self, handle):
 
@@ -325,8 +332,9 @@ class JstorImporter(EntityImporter):
         for article in soup.find_all("article"):
             resp = self.parse_record(article)
             print(json.dumps(resp))
-            #sys.exit(-1)
+            # sys.exit(-1)
+
 
-if __name__=='__main__':
+if __name__ == "__main__":
     parser = JstorImporter(None, None)
     parser.parse_file(open(sys.argv[1]))
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 09807276..7c2a6a87 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -1,4 +1,3 @@
-
 import fatcat_openapi_client
 
 from fatcat_tools.normal import clean_doi
@@ -32,13 +31,13 @@ class MatchedImporter(EntityImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Import of large-scale file-to-release match results. Source of metadata varies."
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.MatchedImporter')
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Import of large-scale file-to-release match results. Source of metadata varies."
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.MatchedImporter")
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
         self.default_link_rel = kwargs.get("default_link_rel", "web")
         self.default_mimetype = kwargs.get("default_mimetype", None)
 
@@ -46,14 +45,14 @@ class MatchedImporter(EntityImporter):
         return True
 
     def parse_record(self, obj):
-        dois = [d.lower() for d in obj.get('dois', [])]
+        dois = [d.lower() for d in obj.get("dois", [])]
 
         # lookup dois
         re_list = set()
         for doi in dois:
             doi = clean_doi(doi)
             if not doi:
-                self.counts['skip-bad-doi'] += 1
+                self.counts["skip-bad-doi"] += 1
                 return None
             try:
                 re = self.api.lookup_release(doi=doi)
@@ -62,13 +61,22 @@ class MatchedImporter(EntityImporter):
                     raise err
                 re = None
             if re is None:
-                #print("DOI not found: {}".format(doi))
+                # print("DOI not found: {}".format(doi))
                 pass
             else:
                 re_list.add(re.ident)
 
         # look up other external ids
-        for extid_type in ('arxiv', 'pmid', 'pmcid', 'jstor', 'wikidata_qid', 'core', 'isbn13', 'ark'):
+        for extid_type in (
+            "arxiv",
+            "pmid",
+            "pmcid",
+            "jstor",
+            "wikidata_qid",
+            "core",
+            "isbn13",
+            "ark",
+        ):
             extid = obj.get(extid_type)
             if extid:
                 try:
@@ -84,49 +92,47 @@ class MatchedImporter(EntityImporter):
 
         release_ids = list(re_list)
         if len(release_ids) == 0:
-            self.counts['skip-no-releases'] += 1
+            self.counts["skip-no-releases"] += 1
             return None
         if len(release_ids) > SANE_MAX_RELEASES:
-            self.counts['skip-too-many-releases'] += 1
+            self.counts["skip-too-many-releases"] += 1
             return None
 
         # parse URLs and CDX
         urls = set()
-        for url in obj.get('urls', []):
+        for url in obj.get("urls", []):
             url = make_rel_url(url, default_link_rel=self.default_link_rel)
             if url is not None:
                 urls.add(url)
-        for cdx in obj.get('cdx', []):
-            original = cdx['url']
-            if cdx.get('dt'):
-                wayback = "https://web.archive.org/web/{}/{}".format(
-                    cdx['dt'],
-                    original)
+        for cdx in obj.get("cdx", []):
+            original = cdx["url"]
+            if cdx.get("dt"):
+                wayback = "https://web.archive.org/web/{}/{}".format(cdx["dt"], original)
                 urls.add(("webarchive", wayback))
             url = make_rel_url(original, default_link_rel=self.default_link_rel)
             if url is not None:
                 urls.add(url)
         urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
         if len(urls) == 0:
-            self.counts['skip-no-urls'] += 1
+            self.counts["skip-no-urls"] += 1
             return None
         if len(urls) > SANE_MAX_URLS:
-            self.counts['skip-too-many-urls'] += 1
+            self.counts["skip-too-many-urls"] += 1
             return None
 
-        size = obj.get('size')
+        size = obj.get("size")
         if size:
             size = int(size)
 
-        mimetype = obj.get('mimetype', self.default_mimetype)
+        mimetype = obj.get("mimetype", self.default_mimetype)
         if not mimetype and urls:
-            if urls[0].url.endswith('.pdf'):
-                mimetype = 'application/pdf'
+            if urls[0].url.endswith(".pdf"):
+                mimetype = "application/pdf"
 
         fe = fatcat_openapi_client.FileEntity(
-            md5=obj.get('md5'),
-            sha1=obj['sha1'],
-            sha256=obj.get('sha256'),
+            md5=obj.get("md5"),
+            sha1=obj["sha1"],
+            sha256=obj.get("sha256"),
             size=size,
             mimetype=mimetype,
             release_ids=release_ids,
@@ -149,28 +155,30 @@ class MatchedImporter(EntityImporter):
         combined_release_ids = list(set(fe.release_ids + existing.release_ids))
         if set(combined_release_ids) == set(existing.release_ids) and len(existing.urls) > 0:
             # no new release matches *and* there are already existing URLs
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         # check for edit conflicts
         if existing.ident in [e.ident for e in self._edits_inflight]:
-            self.counts['skip-update-inflight'] += 1
+            self.counts["skip-update-inflight"] += 1
             return False
 
         # minimum viable "existing" URL cleanup to fix dupes and broken links:
         # remove 'None' wayback URLs, and set archive.org rel 'archive'
-        existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)]
+        existing.urls = [
+            u for u in existing.urls if not ("://web.archive.org/web/None/" in u.url)
+        ]
         for i in range(len(existing.urls)):
             u = existing.urls[i]
-            if u.rel == 'repository' and '://archive.org/download/' in u.url:
-                existing.urls[i].rel = 'archive'
+            if u.rel == "repository" and "://archive.org/download/" in u.url:
+                existing.urls[i].rel = "archive"
 
         # special case: if importing *new* from archive.org arxiv collections,
         # blow away any existing release_id mappings; this is a direct arxiv_id
         # map. This *should* be safe to run in all matched imports.
         is_arxiv = False
         for u in fe.urls:
-            if 'archive.org/download/arxiv' in u.url.lower():
+            if "archive.org/download/arxiv" in u.url.lower():
                 is_arxiv = True
                 break
         if is_arxiv and fe.release_ids:
@@ -178,14 +186,16 @@ class MatchedImporter(EntityImporter):
 
         # merge the existing into this one and update
         existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls]))
-        existing.urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls]
+        existing.urls = [
+            fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in existing.urls
+        ]
 
         if len(existing.urls) > SANE_MAX_URLS:
-            self.counts['skip-update-too-many-url'] += 1
+            self.counts["skip-update-too-many-url"] += 1
             return None
         existing.release_ids = list(set(fe.release_ids + existing.release_ids))
         if len(existing.release_ids) > SANE_MAX_RELEASES:
-            self.counts['skip-update-too-many-releases'] += 1
+            self.counts["skip-update-too-many-releases"] += 1
             return None
         existing.mimetype = existing.mimetype or fe.mimetype
         existing.size = existing.size or fe.size
@@ -194,12 +204,15 @@ class MatchedImporter(EntityImporter):
         existing.sha256 = existing.sha256 or fe.sha256
         edit = self.api.update_file(self.get_editgroup_id(), existing.ident, existing)
         self._edits_inflight.append(edit)
-        self.counts['update'] += 1
+        self.counts["update"] += 1
         return False
 
     def insert_batch(self, batch):
-        self.api.create_file_auto_batch(fatcat_openapi_client.FileAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_file_auto_batch(
+            fatcat_openapi_client.FileAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index 3bdd23a1..b514e6e5 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -1,4 +1,3 @@
-
 import sys
 
 import fatcat_openapi_client
@@ -8,7 +7,7 @@ from .common import EntityImporter, clean
 
 def value_or_none(e):
     if type(e) == dict:
-        e = e.get('value')
+        e = e.get("value")
     if type(e) == str and len(e) == 0:
         e = None
     # TODO: this is probably bogus; patched in desperation; remove?
@@ -21,18 +20,17 @@ def value_or_none(e):
             return None
     return e
 
-class OrcidImporter(EntityImporter):
 
+class OrcidImporter(EntityImporter):
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of ORCID metadata, from official bulk releases.")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.OrcidImporter')
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = kwargs.get(
+            "editgroup_description",
+            "Automated import of ORCID metadata, from official bulk releases.",
+        )
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.OrcidImporter")
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
     def want(self, raw_record):
         return True
@@ -43,16 +41,16 @@ class OrcidImporter(EntityImporter):
         returns a CreatorEntity
         """
 
-        if 'person' not in obj:
+        if "person" not in obj:
             return False
 
-        name = obj['person']['name']
+        name = obj["person"]["name"]
         if not name:
             return None
         extra = None
-        given = value_or_none(name.get('given-names'))
-        sur = value_or_none(name.get('family-name'))
-        display = value_or_none(name.get('credit-name'))
+        given = value_or_none(name.get("given-names"))
+        sur = value_or_none(name.get("family-name"))
+        display = value_or_none(name.get("credit-name"))
         if display is None:
             # TODO: sorry human beings
             if given and sur:
@@ -61,7 +59,7 @@ class OrcidImporter(EntityImporter):
                 display = sur
             elif given:
                 display = given
-        orcid = obj['orcid-identifier']['path']
+        orcid = obj["orcid-identifier"]["path"]
         if not self.is_orcid(orcid):
             sys.stderr.write("Bad ORCID: {}\n".format(orcid))
             return None
@@ -74,7 +72,8 @@ class OrcidImporter(EntityImporter):
             given_name=clean(given),
             surname=clean(sur),
             display_name=display,
-            extra=extra)
+            extra=extra,
+        )
         return ce
 
     def try_update(self, raw_record):
@@ -88,14 +87,17 @@ class OrcidImporter(EntityImporter):
         # eventually we'll want to support "updates", but for now just skip if
         # entity already exists
         if existing:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         return True
 
     def insert_batch(self, batch):
-        self.api.create_creator_auto_batch(fatcat_openapi_client.CreatorAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_creator_auto_batch(
+            fatcat_openapi_client.CreatorAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 00ad54d0..cfdafcf7 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -1,4 +1,3 @@
-
 import datetime
 import json
 import sys
@@ -13,42 +12,42 @@ from .common import LANG_MAP_MARC, EntityImporter, clean
 
 # from: https://www.ncbi.nlm.nih.gov/books/NBK3827/table/pubmedhelp.T.publication_types/?report=objectonly
 PUBMED_RELEASE_TYPE_MAP = {
-    #Adaptive Clinical Trial
+    # Adaptive Clinical Trial
     "Address": "speech",
     "Autobiography": "book",
-    #Bibliography
+    # Bibliography
     "Biography": "book",
-    #Case Reports
+    # Case Reports
     "Classical Article": "article-journal",
-    #Clinical Conference
-    #Clinical Study
-    #Clinical Trial
-    #Clinical Trial, Phase I
-    #Clinical Trial, Phase II
-    #Clinical Trial, Phase III
-    #Clinical Trial, Phase IV
-    #Clinical Trial Protocol
-    #Clinical Trial, Veterinary
-    #Collected Works
-    #Comparative Study
-    #Congress
-    #Consensus Development Conference
-    #Consensus Development Conference, NIH
-    #Controlled Clinical Trial
+    # Clinical Conference
+    # Clinical Study
+    # Clinical Trial
+    # Clinical Trial, Phase I
+    # Clinical Trial, Phase II
+    # Clinical Trial, Phase III
+    # Clinical Trial, Phase IV
+    # Clinical Trial Protocol
+    # Clinical Trial, Veterinary
+    # Collected Works
+    # Comparative Study
+    # Congress
+    # Consensus Development Conference
+    # Consensus Development Conference, NIH
+    # Controlled Clinical Trial
     "Dataset": "dataset",
-    #Dictionary
-    #Directory
-    #Duplicate Publication
+    # Dictionary
+    # Directory
+    # Duplicate Publication
     "Editorial": "editorial",
-    #English Abstract   # doesn't indicate that this is abstract-only
-    #Equivalence Trial
-    #Evaluation Studies
-    #Expression of Concern
-    #Festschrift
-    #Government Document
-    #Guideline
+    # English Abstract   # doesn't indicate that this is abstract-only
+    # Equivalence Trial
+    # Evaluation Studies
+    # Expression of Concern
+    # Festschrift
+    # Government Document
+    # Guideline
     "Historical Article": "article-journal",
-    #Interactive Tutorial
+    # Interactive Tutorial
     "Interview": "interview",
     "Introductory Journal Article": "article-journal",
     "Journal Article": "article-journal",
@@ -56,53 +55,65 @@ PUBMED_RELEASE_TYPE_MAP = {
     "Legal Case": "legal_case",
     "Legislation": "legislation",
     "Letter": "letter",
-    #Meta-Analysis
-    #Multicenter Study
-    #News
+    # Meta-Analysis
+    # Multicenter Study
+    # News
     "Newspaper Article": "article-newspaper",
-    #Observational Study
-    #Observational Study, Veterinary
-    #Overall
-    #Patient Education Handout
-    #Periodical Index
-    #Personal Narrative
-    #Portrait
-    #Practice Guideline
-    #Pragmatic Clinical Trial
-    #Publication Components
-    #Publication Formats
-    #Publication Type Category
-    #Randomized Controlled Trial
-    #Research Support, American Recovery and Reinvestment Act
-    #Research Support, N.I.H., Extramural
-    #Research Support, N.I.H., Intramural
-    #Research Support, Non-U.S. Gov't Research Support, U.S. Gov't, Non-P.H.S.
-    #Research Support, U.S. Gov't, P.H.S.
-    #Review     # in the "literature review" sense, not "product review"
-    #Scientific Integrity Review
-    #Study Characteristics
-    #Support of Research
-    #Systematic Review
+    # Observational Study
+    # Observational Study, Veterinary
+    # Overall
+    # Patient Education Handout
+    # Periodical Index
+    # Personal Narrative
+    # Portrait
+    # Practice Guideline
+    # Pragmatic Clinical Trial
+    # Publication Components
+    # Publication Formats
+    # Publication Type Category
+    # Randomized Controlled Trial
+    # Research Support, American Recovery and Reinvestment Act
+    # Research Support, N.I.H., Extramural
+    # Research Support, N.I.H., Intramural
+    # Research Support, Non-U.S. Gov't Research Support, U.S. Gov't, Non-P.H.S.
+    # Research Support, U.S. Gov't, P.H.S.
+    # Review     # in the "literature review" sense, not "product review"
+    # Scientific Integrity Review
+    # Study Characteristics
+    # Support of Research
+    # Systematic Review
     "Technical Report": "report",
-    #Twin Study
-    #Validation Studies
-    #Video-Audio Media
-    #Webcasts
+    # Twin Study
+    # Validation Studies
+    # Video-Audio Media
+    # Webcasts
 }
 
 MONTH_ABBR_MAP = {
-    "Jan":  1, "01":  1,
-    "Feb":  2, "02":  2,
-    "Mar":  3, "03":  3,
-    "Apr":  4, "04":  4,
-    "May":  5, "05":  5,
-    "Jun":  6, "06":  6,
-    "Jul":  7, "07":  7,
-    "Aug":  8, "08":  8,
-    "Sep":  9, "09":  9,
-    "Oct": 10, "10": 10,
-    "Nov": 11, "11": 11,
-    "Dec": 12, "12": 12,
+    "Jan": 1,
+    "01": 1,
+    "Feb": 2,
+    "02": 2,
+    "Mar": 3,
+    "03": 3,
+    "Apr": 4,
+    "04": 4,
+    "May": 5,
+    "05": 5,
+    "Jun": 6,
+    "06": 6,
+    "Jul": 7,
+    "07": 7,
+    "Aug": 8,
+    "08": 8,
+    "Sep": 9,
+    "09": 9,
+    "Oct": 10,
+    "10": 10,
+    "Nov": 11,
+    "11": 11,
+    "Dec": 12,
+    "12": 12,
 }
 
 # From: https://www.ncbi.nlm.nih.gov/books/NBK7249/
@@ -295,11 +306,10 @@ COUNTRY_NAME_MAP = {
     "United Kingdom": "gb",
     "United States": "us",
     "Uruguay": "uy",
-
     # Additions from running over large files
     "Bosnia and Herzegovina": "ba",
-    #"International"
-    "China (Republic : 1949- )": "tw", # pretty sure this is tw not cn
+    # "International"
+    "China (Republic : 1949- )": "tw",  # pretty sure this is tw not cn
     "Russia (Federation)": "ru",
     "Scotland": "gb",
     "England": "gb",
@@ -320,18 +330,21 @@ class PubmedImporter(EntityImporter):
 
     def __init__(self, api, issn_map_file, lookup_refs=True, **kwargs):
 
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of PubMed/MEDLINE XML metadata")
-        eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.PubmedImporter')
-        super().__init__(api,
+        eg_desc = kwargs.get(
+            "editgroup_description", "Automated import of PubMed/MEDLINE XML metadata"
+        )
+        eg_extra = kwargs.get("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.PubmedImporter")
+        super().__init__(
+            api,
             issn_map_file=issn_map_file,
             editgroup_description=eg_desc,
             editgroup_extra=eg_extra,
-            **kwargs)
+            **kwargs
+        )
 
         self.lookup_refs = lookup_refs
-        self.create_containers = kwargs.get('create_containers', True)
+        self.create_containers = kwargs.get("create_containers", True)
         self.read_issn_map_file(issn_map_file)
 
     def want(self, obj):
@@ -365,15 +378,15 @@ class PubmedImporter(EntityImporter):
                 release_type = PUBMED_RELEASE_TYPE_MAP[pub_type.string]
                 break
         if pub_types:
-            extra_pubmed['pub_types'] = pub_types
+            extra_pubmed["pub_types"] = pub_types
         if medline.Article.PublicationTypeList.find(string="Retraction of Publication"):
             release_type = "retraction"
             retraction_of = medline.find("CommentsCorrections", RefType="RetractionOf")
             if retraction_of:
                 if retraction_of.RefSource:
-                    extra_pubmed['retraction_of_raw'] = retraction_of.RefSource.string
+                    extra_pubmed["retraction_of_raw"] = retraction_of.RefSource.string
                 if retraction_of.PMID:
-                    extra_pubmed['retraction_of_pmid'] = retraction_of.PMID.string
+                    extra_pubmed["retraction_of_pmid"] = retraction_of.PMID.string
 
         # everything in medline is published
         release_stage = "published"
@@ -388,18 +401,18 @@ class PubmedImporter(EntityImporter):
         elif medline.find("CommentsCorrections", RefType="ExpressionOfConcernIn"):
             withdrawn_status = "concern"
 
-        pages = medline.find('MedlinePgn')
+        pages = medline.find("MedlinePgn")
         if pages:
             pages = pages.string
 
-        title = medline.Article.ArticleTitle.get_text() # always present
+        title = medline.Article.ArticleTitle.get_text()  # always present
         if title:
-            title = title.replace('\n', ' ')
-            if title.endswith('.'):
+            title = title.replace("\n", " ")
+            if title.endswith("."):
                 title = title[:-1]
             # this hides some "special" titles, but the vast majority are
             # translations; translations don't always include the original_title
-            if title.startswith('[') and title.endswith(']'):
+            if title.startswith("[") and title.endswith("]"):
                 title = title[1:-1]
         else:
             # will filter out later
@@ -408,8 +421,8 @@ class PubmedImporter(EntityImporter):
         original_title = medline.Article.find("VernacularTitle", recurse=False)
         if original_title:
             original_title = original_title.get_text() or None
-            original_title = original_title.replace('\n', ' ')
-            if original_title and original_title.endswith('.'):
+            original_title = original_title.replace("\n", " ")
+            if original_title and original_title.endswith("."):
                 original_title = original_title[:-1]
 
         if original_title and not title:
@@ -428,7 +441,9 @@ class PubmedImporter(EntityImporter):
             else:
                 language = LANG_MAP_MARC.get(language)
                 if not language and not (medline.Article.Language.get_text() in LANG_MAP_MARC):
-                    warnings.warn("MISSING MARC LANG: {}".format(medline.Article.Language.string))
+                    warnings.warn(
+                        "MISSING MARC LANG: {}".format(medline.Article.Language.string)
+                    )
 
         ### Journal/Issue Metadata
         # MedlineJournalInfo is always present
@@ -441,9 +456,9 @@ class PubmedImporter(EntityImporter):
             country_name = mji.Country.string.strip()
             country_code = COUNTRY_NAME_MAP.get(country_name)
             if country_code:
-                container_extra['country'] = country_code
+                container_extra["country"] = country_code
             elif country_name:
-                container_extra['country_name'] = country_name
+                container_extra["country_name"] = country_name
         if mji.find("ISSNLinking"):
             issnl = mji.ISSNLinking.string
 
@@ -462,7 +477,7 @@ class PubmedImporter(EntityImporter):
         if issnl:
             container_id = self.lookup_issnl(issnl)
 
-        pub_date = medline.Article.find('ArticleDate')
+        pub_date = medline.Article.find("ArticleDate")
         if not pub_date:
             pub_date = journal.PubDate
         if not pub_date:
@@ -476,7 +491,8 @@ class PubmedImporter(EntityImporter):
                     release_date = datetime.date(
                         release_year,
                         MONTH_ABBR_MAP[pub_date.Month.string],
-                        int(pub_date.Day.string))
+                        int(pub_date.Day.string),
+                    )
                     release_date = release_date.isoformat()
                 except ValueError as ve:
                     print("bad date, skipping: {}".format(ve), file=sys.stderr)
@@ -486,25 +502,35 @@ class PubmedImporter(EntityImporter):
             if len(medline_date) >= 4 and medline_date[:4].isdigit():
                 release_year = int(medline_date[:4])
                 if release_year < 1300 or release_year > 2040:
-                    print("bad medline year, skipping: {}".format(release_year), file=sys.stderr)
+                    print(
+                        "bad medline year, skipping: {}".format(release_year), file=sys.stderr
+                    )
                     release_year = None
             else:
-                print("unparsable medline date, skipping: {}".format(medline_date), file=sys.stderr)
+                print(
+                    "unparsable medline date, skipping: {}".format(medline_date),
+                    file=sys.stderr,
+                )
 
         if journal.find("Title"):
             container_name = journal.Title.get_text()
 
-        if (container_id is None and self.create_containers and (issnl is not None)
-                and container_name):
+        if (
+            container_id is None
+            and self.create_containers
+            and (issnl is not None)
+            and container_name
+        ):
             # name, type, publisher, issnl
             # extra: original_name, languages, country
             ce = fatcat_openapi_client.ContainerEntity(
                 name=container_name,
-                container_type='journal',
-                #NOTE: publisher not included
+                container_type="journal",
+                # NOTE: publisher not included
                 issnl=issnl,
                 issnp=issnp,
-                extra=(container_extra or None))
+                extra=(container_extra or None),
+            )
             ce_edit = self.create_container(ce)
             container_id = ce_edit.ident
             self._issnl_id_map[issnl] = container_id
@@ -521,8 +547,10 @@ class PubmedImporter(EntityImporter):
         # "All abstracts are in English"
         abstracts = []
         primary_abstract = medline.find("Abstract")
-        if primary_abstract and primary_abstract.AbstractText.get('NlmCategory'):
-            joined = "\n".join([m.get_text() for m in primary_abstract.find_all("AbstractText")])
+        if primary_abstract and primary_abstract.AbstractText.get("NlmCategory"):
+            joined = "\n".join(
+                [m.get_text() for m in primary_abstract.find_all("AbstractText")]
+            )
             abst = fatcat_openapi_client.ReleaseAbstract(
                 content=joined,
                 mimetype="text/plain",
@@ -539,7 +567,7 @@ class PubmedImporter(EntityImporter):
                 )
                 if abst.content:
                     abstracts.append(abst)
-                if abstract.find('math'):
+                if abstract.find("math"):
                     abst = fatcat_openapi_client.ReleaseAbstract(
                         # strip the <AbstractText> tags
                         content=str(abstract)[14:-15],
@@ -551,8 +579,8 @@ class PubmedImporter(EntityImporter):
         other_abstracts = medline.find_all("OtherAbstract")
         for other in other_abstracts:
             lang = "en"
-            if other.get('Language'):
-                lang = LANG_MAP_MARC.get(other['Language'])
+            if other.get("Language"):
+                lang = LANG_MAP_MARC.get(other["Language"])
             abst = fatcat_openapi_client.ReleaseAbstract(
                 content=other.AbstractText.get_text().strip(),
                 mimetype="text/plain",
@@ -572,15 +600,15 @@ class PubmedImporter(EntityImporter):
                 surname = None
                 raw_name = None
                 if author.ForeName:
-                    given_name = author.ForeName.get_text().replace('\n', ' ')
+                    given_name = author.ForeName.get_text().replace("\n", " ")
                 if author.LastName:
-                    surname = author.LastName.get_text().replace('\n', ' ')
+                    surname = author.LastName.get_text().replace("\n", " ")
                 if given_name and surname:
                     raw_name = "{} {}".format(given_name, surname)
                 elif surname:
                     raw_name = surname
                 if not raw_name and author.CollectiveName and author.CollectiveName.get_text():
-                    raw_name = author.CollectiveName.get_text().replace('\n', ' ')
+                    raw_name = author.CollectiveName.get_text().replace("\n", " ")
                 contrib_extra = dict()
                 orcid = author.find("Identifier", Source="ORCID")
                 if orcid:
@@ -590,7 +618,7 @@ class PubmedImporter(EntityImporter):
                         orcid = orcid.replace("http://orcid.org/", "")
                     elif orcid.startswith("https://orcid.org/"):
                         orcid = orcid.replace("https://orcid.org/", "")
-                    elif '-' not in orcid:
+                    elif "-" not in orcid:
                         orcid = "{}-{}-{}-{}".format(
                             orcid[0:4],
                             orcid[4:8],
@@ -598,27 +626,31 @@ class PubmedImporter(EntityImporter):
                             orcid[12:16],
                         )
                     creator_id = self.lookup_orcid(orcid)
-                    contrib_extra['orcid'] = orcid
+                    contrib_extra["orcid"] = orcid
                 affiliations = author.find_all("Affiliation")
                 raw_affiliation = None
                 if affiliations:
-                    raw_affiliation = affiliations[0].get_text().replace('\n', ' ')
+                    raw_affiliation = affiliations[0].get_text().replace("\n", " ")
                     if len(affiliations) > 1:
-                        contrib_extra['more_affiliations'] = [ra.get_text().replace('\n', ' ') for ra in affiliations[1:]]
+                        contrib_extra["more_affiliations"] = [
+                            ra.get_text().replace("\n", " ") for ra in affiliations[1:]
+                        ]
                 if author.find("EqualContrib"):
                     # TODO: schema for this?
-                    contrib_extra['equal'] = True
-                contribs.append(fatcat_openapi_client.ReleaseContrib(
-                    raw_name=raw_name,
-                    given_name=given_name,
-                    surname=surname,
-                    role="author",
-                    raw_affiliation=raw_affiliation,
-                    creator_id=creator_id,
-                    extra=contrib_extra,
-                ))
-
-            if medline.AuthorList['CompleteYN'] == 'N':
+                    contrib_extra["equal"] = True
+                contribs.append(
+                    fatcat_openapi_client.ReleaseContrib(
+                        raw_name=raw_name,
+                        given_name=given_name,
+                        surname=surname,
+                        role="author",
+                        raw_affiliation=raw_affiliation,
+                        creator_id=creator_id,
+                        extra=contrib_extra,
+                    )
+                )
+
+            if medline.AuthorList["CompleteYN"] == "N":
                 contribs.append(fatcat_openapi_client.ReleaseContrib(raw_name="et al."))
 
         for i, contrib in enumerate(contribs):
@@ -633,7 +665,7 @@ class PubmedImporter(EntityImporter):
             # note that Reference always exists within a ReferenceList, but
             # that there may be multiple ReferenceList (eg, sometimes one per
             # Reference)
-            for ref in pubmed.find_all('Reference'):
+            for ref in pubmed.find_all("Reference"):
                 ref_extra = dict()
                 ref_doi = ref.find("ArticleId", IdType="doi")
                 if ref_doi:
@@ -643,22 +675,24 @@ class PubmedImporter(EntityImporter):
                     ref_pmid = clean_pmid(ref_pmid.string)
                 ref_release_id = None
                 if ref_doi:
-                    ref_extra['doi'] = ref_doi
+                    ref_extra["doi"] = ref_doi
                     if self.lookup_refs:
                         ref_release_id = self.lookup_doi(ref_doi)
                 if ref_pmid:
-                    ref_extra['pmid'] = ref_pmid
+                    ref_extra["pmid"] = ref_pmid
                     if self.lookup_refs:
                         ref_release_id = self.lookup_pmid(ref_pmid)
                 ref_raw = ref.Citation
                 if ref_raw:
-                    ref_extra['unstructured'] = ref_raw.get_text()
+                    ref_extra["unstructured"] = ref_raw.get_text()
                 if not ref_extra:
                     ref_extra = None
-                refs.append(fatcat_openapi_client.ReleaseRef(
-                    target_release_id=ref_release_id,
-                    extra=ref_extra,
-                ))
+                refs.append(
+                    fatcat_openapi_client.ReleaseRef(
+                        target_release_id=ref_release_id,
+                        extra=ref_extra,
+                    )
+                )
         if not refs:
             refs = None
 
@@ -669,7 +703,7 @@ class PubmedImporter(EntityImporter):
         #   group-title
         #   pubmed: retraction refs
         if extra_pubmed:
-            extra['pubmed'] = extra_pubmed
+            extra["pubmed"] = extra_pubmed
         if not extra:
             extra = None
 
@@ -690,14 +724,14 @@ class PubmedImporter(EntityImporter):
                 doi=doi,
                 pmid=pmid,
                 pmcid=pmcid,
-                #isbn13     # never in Article
+                # isbn13     # never in Article
             ),
             volume=volume,
             issue=issue,
             pages=pages,
-            #publisher  # not included?
+            # publisher  # not included?
             language=language,
-            #license_slug   # not in MEDLINE
+            # license_slug   # not in MEDLINE
             abstracts=abstracts,
             contribs=contribs,
             refs=refs,
@@ -725,21 +759,22 @@ class PubmedImporter(EntityImporter):
                     raise err
             if existing and existing.ext_ids.pmid and existing.ext_ids.pmid != re.ext_ids.pmid:
                 warn_str = "PMID/DOI mismatch: release {}, pmid {} != {}".format(
-                    existing.ident, existing.ext_ids.pmid, re.ext_ids.pmid)
+                    existing.ident, existing.ext_ids.pmid, re.ext_ids.pmid
+                )
                 warnings.warn(warn_str)
-                self.counts['warn-pmid-doi-mismatch'] += 1
+                self.counts["warn-pmid-doi-mismatch"] += 1
                 # don't clobber DOI, but do group together
                 re.ext_ids.doi = None
                 re.work_id = existing.work_id
 
         if existing and not self.do_updates:
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         if existing and existing.ext_ids.pmid and (existing.refs or not re.refs):
             # TODO: any other reasons to do an update?
             # don't update if it already has PMID
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
         elif existing:
             # but do update if only DOI was set
@@ -750,12 +785,12 @@ class PubmedImporter(EntityImporter):
             existing.container_id = existing.container_id or re.container_id
             existing.refs = existing.refs or re.refs
             existing.abstracts = existing.abstracts or re.abstracts
-            existing.extra['pubmed'] = re.extra['pubmed']
+            existing.extra["pubmed"] = re.extra["pubmed"]
 
             # fix stub titles
             if existing.title in [
-                    "OUP accepted manuscript",
-                ]:
+                "OUP accepted manuscript",
+            ]:
                 existing.title = re.title
 
             existing.original_title = existing.original_title or re.original_title
@@ -770,8 +805,8 @@ class PubmedImporter(EntityImporter):
             existing.language = existing.language or re.language
 
             # update subtitle in-place first
-            if not existing.subtitle and existing.extra.get('subtitle'):
-                subtitle = existing.extra.pop('subtitle')
+            if not existing.subtitle and existing.extra.get("subtitle"):
+                subtitle = existing.extra.pop("subtitle")
                 if type(subtitle) == list:
                     subtitle = subtitle[0]
                 if subtitle:
@@ -781,13 +816,13 @@ class PubmedImporter(EntityImporter):
 
             try:
                 self.api.update_release(self.get_editgroup_id(), existing.ident, existing)
-                self.counts['update'] += 1
+                self.counts["update"] += 1
             except fatcat_openapi_client.rest.ApiException as err:
                 # there is a code path where we try to update the same release
                 # twice in a row; if that happens, just skip
                 # NOTE: API behavior might change in the future?
                 if "release_edit_editgroup_id_ident_id_key" in err.body:
-                    self.counts['skip-update-conflict'] += 1
+                    self.counts["skip-update-conflict"] += 1
                     return False
                 else:
                     raise err
@@ -797,11 +832,14 @@ class PubmedImporter(EntityImporter):
         return True
 
     def insert_batch(self, batch):
-        self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_release_auto_batch(
+            fatcat_openapi_client.ReleaseAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
 
     def parse_file(self, handle):
 
@@ -812,8 +850,9 @@ class PubmedImporter(EntityImporter):
         for article in soup.find_all("PubmedArticle"):
             resp = self.parse_record(article)
             print(json.dumps(resp))
-            #sys.exit(-1)
+            # sys.exit(-1)
+
 
-if __name__=='__main__':
+if __name__ == "__main__":
     parser = PubmedImporter(None, None)
     parser.parse_file(open(sys.argv[1]))
diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py
index 77205cee..78eeec7a 100644
--- a/python/fatcat_tools/importers/shadow.py
+++ b/python/fatcat_tools/importers/shadow.py
@@ -1,4 +1,3 @@
-
 import fatcat_openapi_client
 
 from fatcat_tools.normal import clean_doi, clean_isbn13, clean_pmid
@@ -30,25 +29,25 @@ class ShadowLibraryImporter(EntityImporter):
 
     def __init__(self, api, **kwargs):
 
-        eg_desc = kwargs.pop('editgroup_description', None) or "Import of 'Shadow Library' file/release matches"
-        eg_extra = kwargs.pop('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.ShadowLibraryImporter')
-        super().__init__(api,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+        eg_desc = (
+            kwargs.pop("editgroup_description", None)
+            or "Import of 'Shadow Library' file/release matches"
+        )
+        eg_extra = kwargs.pop("editgroup_extra", dict())
+        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.ShadowLibraryImporter")
+        super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
         self.default_link_rel = kwargs.get("default_link_rel", "web")
 
     def want(self, raw_record):
         """
         Only want to import records with complete file-level metadata
         """
-        fm = raw_record['file_meta']
-        if not (fm['mimetype'] and fm['md5hex'] and fm['sha256hex'] and fm['size_bytes']):
-            self.counts['skip-file-meta-incomplete'] += 1
+        fm = raw_record["file_meta"]
+        if not (fm["mimetype"] and fm["md5hex"] and fm["sha256hex"] and fm["size_bytes"]):
+            self.counts["skip-file-meta-incomplete"] += 1
             return False
-        if fm['mimetype'] != 'application/pdf':
-            self.counts['skip-not-pdf'] += 1
+        if fm["mimetype"] != "application/pdf":
+            self.counts["skip-not-pdf"] += 1
             return False
         return True
 
@@ -57,23 +56,23 @@ class ShadowLibraryImporter(EntityImporter):
         We do the release lookup in this method. Try DOI, then PMID, last ISBN13.
         """
 
-        shadow_corpus = obj['shadow']['shadow_corpus']
+        shadow_corpus = obj["shadow"]["shadow_corpus"]
         assert shadow_corpus == shadow_corpus.strip().lower()
-        doi = clean_doi(obj['shadow'].get('doi'))
-        pmid = clean_pmid(obj['shadow'].get('pmid'))
-        isbn13 = clean_isbn13(obj['shadow'].get('isbn13'))
-        shadow_id = obj['shadow'].get('shadow_id').strip()
+        doi = clean_doi(obj["shadow"].get("doi"))
+        pmid = clean_pmid(obj["shadow"].get("pmid"))
+        isbn13 = clean_isbn13(obj["shadow"].get("isbn13"))
+        shadow_id = obj["shadow"].get("shadow_id").strip()
         assert shadow_id
 
-        extra = { '{}_id'.format(shadow_corpus): shadow_id }
-        for (ext_type, ext_id) in [('doi', doi), ('pmid', pmid), ('isbn13', isbn13)]:
+        extra = {"{}_id".format(shadow_corpus): shadow_id}
+        for (ext_type, ext_id) in [("doi", doi), ("pmid", pmid), ("isbn13", isbn13)]:
             if not ext_id:
                 continue
-            extra['{}_{}'.format(shadow_corpus, ext_type)] = ext_id
+            extra["{}_{}".format(shadow_corpus, ext_type)] = ext_id
 
         # lookup release via several idents
         re = None
-        for (ext_type, ext_id) in [('doi', doi), ('pmid', pmid), ('isbn13', isbn13)]:
+        for (ext_type, ext_id) in [("doi", doi), ("pmid", pmid), ("isbn13", isbn13)]:
             if not ext_id:
                 continue
             try:
@@ -86,29 +85,31 @@ class ShadowLibraryImporter(EntityImporter):
                 break
 
         if not re:
-            self.counts['skip-release-not-found'] += 1
+            self.counts["skip-release-not-found"] += 1
             return None
 
-        release_ids = [re.ident,]
+        release_ids = [
+            re.ident,
+        ]
 
         # parse single CDX into URLs (if exists)
         urls = []
-        if obj.get('cdx'):
-            url = make_rel_url(obj['cdx']['url'], default_link_rel=self.default_link_rel)
+        if obj.get("cdx"):
+            url = make_rel_url(obj["cdx"]["url"], default_link_rel=self.default_link_rel)
             if url is not None:
                 urls.append(url)
             wayback = "https://web.archive.org/web/{}/{}".format(
-                obj['cdx']['datetime'],
-                obj['cdx']['url'])
+                obj["cdx"]["datetime"], obj["cdx"]["url"]
+            )
             urls.append(("webarchive", wayback))
         urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
 
         fe = fatcat_openapi_client.FileEntity(
-            md5=obj['file_meta']['md5hex'],
-            sha1=obj['file_meta']['sha1hex'],
-            sha256=obj['file_meta']['sha256hex'],
-            size=int(obj['file_meta']['size_bytes']),
-            mimetype=obj['file_meta']['mimetype'] or None,
+            md5=obj["file_meta"]["md5hex"],
+            sha1=obj["file_meta"]["sha1hex"],
+            sha256=obj["file_meta"]["sha256hex"],
+            size=int(obj["file_meta"]["size_bytes"]),
+            mimetype=obj["file_meta"]["mimetype"] or None,
             release_ids=release_ids,
             urls=urls,
             extra=dict(shadows=extra),
@@ -130,45 +131,50 @@ class ShadowLibraryImporter(EntityImporter):
         if not existing.extra:
             existing.extra = {}
 
-        if existing.extra.get('shadows') and list(fe.extra['shadows'].keys())[0] in existing.extra['shadows']:
+        if (
+            existing.extra.get("shadows")
+            and list(fe.extra["shadows"].keys())[0] in existing.extra["shadows"]
+        ):
             # already imported from this shadow library; skip
-            self.counts['exists'] += 1
+            self.counts["exists"] += 1
             return False
 
         # check for edit conflicts
         if existing.ident in [e.ident for e in self._edits_inflight]:
-            self.counts['skip-update-inflight'] += 1
+            self.counts["skip-update-inflight"] += 1
             return False
         if fe.sha1 in [e.sha1 for e in self._edits_inflight]:
             raise Exception("Inflight insert; shouldn't happen")
 
         # minimum viable "existing" URL cleanup to fix dupes and broken links:
         # remove 'None' wayback URLs, and set archive.org rel 'archive'
-        existing.urls = [u for u in existing.urls if not ('://web.archive.org/web/None/' in u.url)]
+        existing.urls = [
+            u for u in existing.urls if not ("://web.archive.org/web/None/" in u.url)
+        ]
         for i in range(len(existing.urls)):
             u = existing.urls[i]
-            if u.rel == 'repository' and '://archive.org/download/' in u.url:
-                existing.urls[i].rel = 'archive'
-            if u.rel == 'social':
-                u.rel = 'academicsocial'
+            if u.rel == "repository" and "://archive.org/download/" in u.url:
+                existing.urls[i].rel = "archive"
+            if u.rel == "social":
+                u.rel = "academicsocial"
 
         # merge the existing into this one and update
         merged_urls = {}
         for u in fe.urls + existing.urls:
             merged_urls[u.url] = u
         existing.urls = list(merged_urls.values())
-        if not existing.extra.get('shadows'):
-            existing.extra['shadows'] = fe.extra['shadows']
+        if not existing.extra.get("shadows"):
+            existing.extra["shadows"] = fe.extra["shadows"]
         else:
-            existing.extra['shadows'].update(fe.extra['shadows'])
+            existing.extra["shadows"].update(fe.extra["shadows"])
 
         # do these "plus ones" because we really want to do these updates when possible
         if len(existing.urls) > SANE_MAX_URLS + 1:
-            self.counts['skip-update-too-many-url'] += 1
+            self.counts["skip-update-too-many-url"] += 1
             return None
         existing.release_ids = list(set(fe.release_ids + existing.release_ids))
         if len(existing.release_ids) > SANE_MAX_RELEASES + 1:
-            self.counts['skip-update-too-many-releases'] += 1
+            self.counts["skip-update-too-many-releases"] += 1
             return None
         existing.mimetype = existing.mimetype or fe.mimetype
         existing.size = existing.size or fe.size
@@ -180,12 +186,15 @@ class ShadowLibraryImporter(EntityImporter):
         # group-level de-dupe
         edit.sha1 = existing.sha1
         self._edits_inflight.append(edit)
-        self.counts['update'] += 1
+        self.counts["update"] += 1
         return False
 
     def insert_batch(self, batch):
-        self.api.create_file_auto_batch(fatcat_openapi_client.FileAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_file_auto_batch(
+            fatcat_openapi_client.FileAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description, extra=self.editgroup_extra
+                ),
+                entity_list=batch,
+            )
+        )
diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py
index 196f86ff..22fefad3 100755
--- a/python/fatcat_tools/importers/wayback_static.py
+++ b/python/fatcat_tools/importers/wayback_static.py
@@ -33,22 +33,23 @@ REQ_SESSION = requests.Session()
 def parse_wbm_url(url):
     """Takes a wayback machine URL, and returns a tuple:
 
-        (timestamp, datetime, original_url)
+    (timestamp, datetime, original_url)
     """
-    chunks = url.split('/')
+    chunks = url.split("/")
     assert len(chunks) >= 6
-    assert chunks[2] == 'web.archive.org'
-    assert chunks[3] == 'web'
-    return (chunks[4],
-            parse_wbm_timestamp(chunks[4]),
-            '/'.join(chunks[5:]))
+    assert chunks[2] == "web.archive.org"
+    assert chunks[3] == "web"
+    return (chunks[4], parse_wbm_timestamp(chunks[4]), "/".join(chunks[5:]))
+
 
 def test_parse_wbm_url():
     u = "http://web.archive.org/web/20010712114837/http://www.dlib.org/dlib/june01/reich/06reich.html"
     assert parse_wbm_url(u) == (
         "20010712114837",
         datetime.datetime(2001, 7, 12, 11, 48, 37),
-        "http://www.dlib.org/dlib/june01/reich/06reich.html")
+        "http://www.dlib.org/dlib/june01/reich/06reich.html",
+    )
+
 
 def parse_wbm_timestamp(timestamp):
     """
@@ -56,7 +57,7 @@ def parse_wbm_timestamp(timestamp):
     python datetime object (UTC)
     """
     # strip any "im_" or "id_" suffix
-    if timestamp.endswith('_'):
+    if timestamp.endswith("_"):
         timestamp = timestamp[:-3]
     # inflexible; require the full second-precision timestamp
     assert len(timestamp) == 14
@@ -66,11 +67,13 @@ def parse_wbm_timestamp(timestamp):
         day=int(timestamp[6:8]),
         hour=int(timestamp[8:10]),
         minute=int(timestamp[10:12]),
-        second=int(timestamp[12:14]))
+        second=int(timestamp[12:14]),
+    )
+
 
 def test_parse_wbm_timestamp():
-    assert parse_wbm_timestamp("20010712114837") == \
-        datetime.datetime(2001, 7, 12, 11, 48, 37)
+    assert parse_wbm_timestamp("20010712114837") == datetime.datetime(2001, 7, 12, 11, 48, 37)
+
 
 def fetch_wbm(url):
     resp = REQ_SESSION.get(url)
@@ -78,31 +81,35 @@ def fetch_wbm(url):
     assert resp.content
     return resp.content
 
+
 def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None):
     sys.stderr.write(embed_url + "\n")
-    assert embed_url.startswith('/web/')
-    embed_url = embed_url.split('/')
+    assert embed_url.startswith("/web/")
+    embed_url = embed_url.split("/")
     timestamp = embed_url[2]
-    if timestamp.endswith('_'):
+    if timestamp.endswith("_"):
         timestamp = timestamp[:-3]
-    url = '/'.join(embed_url[3:])
-    #print((timestamp, url))
-    resp = REQ_SESSION.get(CDX_API_BASE, params=dict(
-        url=url,
-        closest=timestamp,
-        sort="closest",
-        resolveRevisits="true",
-        matchType="exact",
-        limit=1,
-    ))
+    url = "/".join(embed_url[3:])
+    # print((timestamp, url))
+    resp = REQ_SESSION.get(
+        CDX_API_BASE,
+        params=dict(
+            url=url,
+            closest=timestamp,
+            sort="closest",
+            resolveRevisits="true",
+            matchType="exact",
+            limit=1,
+        ),
+    )
     resp.raise_for_status()
-    #print(resp.url)
+    # print(resp.url)
     if resp.content:
-        hit = resp.content.decode('utf-8').split('\n')[0]
+        hit = resp.content.decode("utf-8").split("\n")[0]
         if cdx_output:
             cdx_output.write(hit + "\n")
-        cdx = hit.split(' ')
-        cdx = [x if (x and x != '-') else None for x in cdx]
+        cdx = hit.split(" ")
+        cdx = [x if (x and x != "-") else None for x in cdx]
         webcapture_cdx = WebcaptureCdxLine(
             surt=cdx[0],
             timestamp=parse_wbm_timestamp(cdx[1]).isoformat() + "Z",
@@ -113,9 +120,9 @@ def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None):
             sha256=None,
         )
         if verify_hashes:
-            resp = REQ_SESSION.get(GWB_URL_BASE + "/{}id_/{}".format(
-                cdx[1], # raw timestamp
-                webcapture_cdx.url))
+            resp = REQ_SESSION.get(
+                GWB_URL_BASE + "/{}id_/{}".format(cdx[1], webcapture_cdx.url)  # raw timestamp
+            )
             resp.raise_for_status()
             assert webcapture_cdx.sha1 == hashlib.sha1(resp.content).digest().hex()
             webcapture_cdx.sha256 = hashlib.sha256(resp.content).digest().hex()
@@ -124,47 +131,50 @@ def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None):
     else:
         return None
 
+
 def wayback_url_to_relative(url):
     """
     Wayback URLs can be relative or absolute in rewritten documents. This
     function converts any form of rewritten URL to a relative (to
     web.archive.org) one, or returns None if it isn't a rewritten URL at all.
     """
-    if url.startswith('https://web.archive.org/'):
+    if url.startswith("https://web.archive.org/"):
         url = url[23:]
-    elif url.startswith('http://web.archive.org/'):
+    elif url.startswith("http://web.archive.org/"):
         url = url[22:]
 
-    if url.startswith('/web/'):
+    if url.startswith("/web/"):
         return url
     else:
         return None
 
+
 def extract_embeds(soup):
 
     embeds = set()
 
     # <link href="">
-    for tag in soup.find_all('link', href=True):
-        if tag['rel'] not in ('stylesheet',):
+    for tag in soup.find_all("link", href=True):
+        if tag["rel"] not in ("stylesheet",):
             continue
-        url = wayback_url_to_relative(tag['href'])
+        url = wayback_url_to_relative(tag["href"])
         if url:
             embeds.add(url)
     # <img src="">
-    for tag in soup.find_all('img', src=True):
-        url = wayback_url_to_relative(tag['src'])
+    for tag in soup.find_all("img", src=True):
+        url = wayback_url_to_relative(tag["src"])
         if url:
             embeds.add(url)
 
     # <script src="">
-    for tag in soup.find_all('script', src=True):
-        url = wayback_url_to_relative(tag['src'])
+    for tag in soup.find_all("script", src=True):
+        url = wayback_url_to_relative(tag["src"])
         if url:
             embeds.add(url)
 
     return list(embeds)
 
+
 def static_wayback_webcapture(wayback_url, cdx_output=None):
     """
     Given a complete wayback machine capture URL, like:
@@ -177,36 +187,40 @@ def static_wayback_webcapture(wayback_url, cdx_output=None):
 
     wbm_html = fetch_wbm(wayback_url)
     raw_timestamp, timestamp, original_url = parse_wbm_url(wayback_url)
-    #with open(rewritten_path, 'r') as fp:
+    # with open(rewritten_path, 'r') as fp:
     #    soup = BeautifulSoup(fp, "lxml")
     soup = BeautifulSoup(wbm_html, "lxml")
     embeds = extract_embeds(soup)
-    cdx_obj = lookup_cdx("/web/{}/{}".format(raw_timestamp, original_url),
-        cdx_output=cdx_output)
+    cdx_obj = lookup_cdx(
+        "/web/{}/{}".format(raw_timestamp, original_url), cdx_output=cdx_output
+    )
     cdx_list = [cdx_obj]
     for url in embeds:
         cdx_obj = lookup_cdx(url, cdx_output=cdx_output)
         cdx_list.append(cdx_obj)
-    archive_urls = [WebcaptureUrl(
-        rel="wayback",
-        url="https://web.archive.org/web/",
-    )]
+    archive_urls = [
+        WebcaptureUrl(
+            rel="wayback",
+            url="https://web.archive.org/web/",
+        )
+    ]
     wc = WebcaptureEntity(
         cdx=cdx_list,
         timestamp=timestamp.isoformat() + "Z",
         original_url=original_url,
         archive_urls=archive_urls,
-        release_ids=None)
+        release_ids=None,
+    )
     return wc
 
+
 def auto_wayback_static(api, release_id, wayback_url, editgroup_id=None):
     """
     Returns a tuple: (editgroup_id, edit). If failed, both are None
     """
 
     raw_timestamp, timestamp, original_url = parse_wbm_url(wayback_url)
-    git_rev = subprocess.check_output(
-        ["git", "describe", "--always"]).strip().decode('utf-8')
+    git_rev = subprocess.check_output(["git", "describe", "--always"]).strip().decode("utf-8")
 
     release = api.get_release(release_id, expand="webcaptures")
 
@@ -214,37 +228,44 @@ def auto_wayback_static(api, release_id, wayback_url, editgroup_id=None):
     for wc in release.webcaptures:
         if wc.original_url == original_url and wc.timestamp.date() == timestamp.date():
             # skipping: already existed
-            print("release {} already had webcapture {} {}".format(
-                release_id, raw_timestamp, original_url))
+            print(
+                "release {} already had webcapture {} {}".format(
+                    release_id, raw_timestamp, original_url
+                )
+            )
             return (None, None)
 
     wc = static_wayback_webcapture(wayback_url)
     assert len(wc.cdx) >= 1
     wc.release_ids = [release_id]
     if not editgroup_id:
-        eg = api.create_editgroup(Editgroup(
-            description="One-off import of static web content from wayback machine",
-            extra=dict(
-                git_rev=git_rev,
-                agent="fatcat_tools.auto_wayback_static")))
+        eg = api.create_editgroup(
+            Editgroup(
+                description="One-off import of static web content from wayback machine",
+                extra=dict(git_rev=git_rev, agent="fatcat_tools.auto_wayback_static"),
+            )
+        )
         editgroup_id = eg.editgroup_id
     edit = api.create_webcapture(eg.editgroup_id, wc)
     return (editgroup_id, edit)
 
+
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--verbose',
-        action='store_true',
-        help="verbose output")
-    parser.add_argument('wayback_url',
-        type=str,
-        help="URL of wayback capture to extract from")
-    parser.add_argument('--json-output',
-        type=argparse.FileType('w'), default=sys.stdout,
-        help="where to write out webcapture entity (as JSON)")
-    parser.add_argument('--cdx-output',
-        type=argparse.FileType('w'), default=None,
-        help="(optional) file to write out CDX stub")
+    parser.add_argument("--verbose", action="store_true", help="verbose output")
+    parser.add_argument("wayback_url", type=str, help="URL of wayback capture to extract from")
+    parser.add_argument(
+        "--json-output",
+        type=argparse.FileType("w"),
+        default=sys.stdout,
+        help="where to write out webcapture entity (as JSON)",
+    )
+    parser.add_argument(
+        "--cdx-output",
+        type=argparse.FileType("w"),
+        default=None,
+        help="(optional) file to write out CDX stub",
+    )
 
     args = parser.parse_args()
 
@@ -254,5 +275,6 @@ def main():
     wc_dict = api_client.sanitize_for_serialization(wc)
     print(json.dumps(wc_dict))
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/python/fatcat_tools/kafka.py b/python/fatcat_tools/kafka.py
index 32749db2..2a4451ad 100644
--- a/python/fatcat_tools/kafka.py
+++ b/python/fatcat_tools/kafka.py
@@ -1,4 +1,3 @@
-
 from confluent_kafka import KafkaException, Producer
 
 
@@ -9,14 +8,15 @@ def kafka_fail_fast(err, msg):
         # TODO: should it be sys.exit(-1)?
         raise KafkaException(err)
 
+
 def simple_kafka_producer(kafka_hosts):
 
     kafka_config = {
-        'bootstrap.servers': kafka_hosts,
-        'message.max.bytes': 20000000, # ~20 MBytes; broker-side max is ~50 MBytes
-        'delivery.report.only.error': True,
-        'default.topic.config': {
-            'request.required.acks': -1,
+        "bootstrap.servers": kafka_hosts,
+        "message.max.bytes": 20000000,  # ~20 MBytes; broker-side max is ~50 MBytes
+        "delivery.report.only.error": True,
+        "default.topic.config": {
+            "request.required.acks": -1,
         },
     }
     return Producer(kafka_config)
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
index 9b65e768..12c58829 100644
--- a/python/fatcat_tools/normal.py
+++ b/python/fatcat_tools/normal.py
@@ -1,4 +1,3 @@
-
 """
 A bunch of helpers to parse and normalize strings: external identifiers,
 free-form input, titles, etc.
@@ -32,7 +31,7 @@ def clean_doi(raw: str) -> Optional[str]:
     if not raw:
         return None
     raw = raw.strip().lower()
-    if '\u2013' in raw:
+    if "\u2013" in raw:
         # Do not attempt to normalize "en dash" and since FC does not allow
         # unicode in DOI, treat this as invalid.
         return None
@@ -54,7 +53,7 @@ def clean_doi(raw: str) -> Optional[str]:
     # fatcatd uses same REGEX, but Rust regex rejects these characters, while
     # python doesn't. DOIs are syntaxtually valid, but very likely to be typos;
     # for now filter them out.
-    for c in ('¬', ):
+    for c in ("¬",):
         if c in raw:
             return None
 
@@ -70,6 +69,7 @@ def clean_doi(raw: str) -> Optional[str]:
         return None
     return raw
 
+
 def test_clean_doi():
     assert clean_doi("10.1234/asdf ") == "10.1234/asdf"
     assert clean_doi("10.1037//0002-9432.72.1.50") == "10.1037/0002-9432.72.1.50"
@@ -81,7 +81,9 @@ def test_clean_doi():
     assert clean_doi("doi:10.1234/asdf ") == "10.1234/asdf"
     assert clean_doi("doi:10.1234/ asdf ") is None
     assert clean_doi("10.4149/gpb¬_2017042") is None  # "logical negation" character
-    assert clean_doi("10.6002/ect.2020.häyry") is None  # this example via pubmed (pmid:32519616)
+    assert (
+        clean_doi("10.6002/ect.2020.häyry") is None
+    )  # this example via pubmed (pmid:32519616)
     assert clean_doi("10.30466/vrf.2019.98547.2350\u200e") is None
     assert clean_doi("10.12016/j.issn.2096⁃1456.2017.06.014") is None
     assert clean_doi("10.4025/diálogos.v17i2.36030") is None
@@ -92,6 +94,7 @@ def test_clean_doi():
 
 ARXIV_ID_REGEX = re.compile(r"^(\d{4}.\d{4,5}|[a-z\-]+(\.[A-Z]{2})?/\d{7})(v\d+)?$")
 
+
 def clean_arxiv_id(raw: str) -> Optional[str]:
     """
     Removes any:
@@ -113,6 +116,7 @@ def clean_arxiv_id(raw: str) -> Optional[str]:
         return None
     return raw
 
+
 def test_clean_arxiv_id():
     assert clean_arxiv_id("0806.2878v1") == "0806.2878v1"
     assert clean_arxiv_id("0806.2878") == "0806.2878"
@@ -141,16 +145,18 @@ def test_clean_arxiv_id():
     assert clean_arxiv_id("0806.v1") is None
     assert clean_arxiv_id("08062878v1") is None
 
+
 def clean_wikidata_qid(raw):
     if not raw:
         return None
     raw = raw.strip()
     if len(raw.split()) != 1 or len(raw) < 2:
         return None
-    if raw[0] == 'Q' and raw[1] != '0' and raw[1:].isdigit():
+    if raw[0] == "Q" and raw[1] != "0" and raw[1:].isdigit():
         return raw
     return None
 
+
 def test_clean_wikidata_qid():
     assert clean_wikidata_qid("Q1234") == "Q1234"
     assert clean_wikidata_qid("Q1") == "Q1"
@@ -163,6 +169,7 @@ def test_clean_wikidata_qid():
     assert clean_wikidata_qid("qfba3") is None
     assert clean_wikidata_qid("") is None
 
+
 def clean_pmid(raw: str) -> Optional[str]:
     if not raw:
         return None
@@ -173,6 +180,7 @@ def clean_pmid(raw: str) -> Optional[str]:
         return raw
     return None
 
+
 def test_clean_pmid():
     assert clean_pmid("1234") == "1234"
     assert clean_pmid("1234 ") == "1234"
@@ -180,6 +188,7 @@ def test_clean_pmid():
     assert clean_pmid("qfba3") is None
     assert clean_pmid("") is None
 
+
 def clean_pmcid(raw: str) -> Optional[str]:
     if not raw:
         return None
@@ -190,6 +199,7 @@ def clean_pmcid(raw: str) -> Optional[str]:
         return raw
     return None
 
+
 def clean_sha1(raw: str) -> Optional[str]:
     if not raw:
         return None
@@ -203,13 +213,21 @@ def clean_sha1(raw: str) -> Optional[str]:
             return None
     return raw
 
+
 def test_clean_sha1():
-    assert clean_sha1("0fba3fba0e1937aa0297de3836b768b5dfb23d7b") == "0fba3fba0e1937aa0297de3836b768b5dfb23d7b"
-    assert clean_sha1("0fba3fba0e1937aa0297de3836b768b5dfb23d7b ") == "0fba3fba0e1937aa0297de3836b768b5dfb23d7b"
+    assert (
+        clean_sha1("0fba3fba0e1937aa0297de3836b768b5dfb23d7b")
+        == "0fba3fba0e1937aa0297de3836b768b5dfb23d7b"
+    )
+    assert (
+        clean_sha1("0fba3fba0e1937aa0297de3836b768b5dfb23d7b ")
+        == "0fba3fba0e1937aa0297de3836b768b5dfb23d7b"
+    )
     assert clean_sha1("fba3fba0e1937aa0297de3836b768b5dfb23d7b") is None
     assert clean_sha1("qfba3fba0e1937aa0297de3836b768b5dfb23d7b") is None
     assert clean_sha1("0fba3fb a0e1937aa0297de3836b768b5dfb23d7b") is None
 
+
 def clean_sha256(raw: str) -> Optional[str]:
     raw = raw.strip().lower()
     if len(raw.split()) != 1:
@@ -221,12 +239,18 @@ def clean_sha256(raw: str) -> Optional[str]:
             return None
     return raw
 
+
 def test_clean_sha256():
-    assert clean_sha256("6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f") == "6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f"
+    assert (
+        clean_sha256("6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f")
+        == "6cc853f2ae75696b2e45f476c76b946b0fc2df7c52bb38287cb074aceb77bc7f"
+    )
     assert clean_sha256("0fba3fba0e1937aa0297de3836b768b5dfb23d7b") is None
 
+
 ISSN_REGEX = re.compile(r"^\d{4}-\d{3}[0-9X]$")
 
+
 def clean_issn(raw: str) -> Optional[str]:
     if not raw:
         return None
@@ -237,14 +261,17 @@ def clean_issn(raw: str) -> Optional[str]:
         return None
     return raw
 
+
 def test_clean_issn():
     assert clean_issn("1234-4567") == "1234-4567"
     assert clean_issn("1234-456X") == "1234-456X"
     assert clean_issn("134-4567") is None
     assert clean_issn("123X-4567") is None
 
+
 ISBN13_REGEX = re.compile(r"^97(?:8|9)-\d{1,5}-\d{1,7}-\d{1,6}-\d$")
 
+
 def clean_isbn13(raw: str) -> Optional[str]:
     if not raw:
         return None
@@ -253,14 +280,17 @@ def clean_isbn13(raw: str) -> Optional[str]:
         return None
     return raw
 
+
 def test_clean_isbn13():
     assert clean_isbn13("978-1-56619-909-4") == "978-1-56619-909-4"
     assert clean_isbn13("978-1-4028-9462-6") == "978-1-4028-9462-6"
     assert clean_isbn13("978-1-56619-909-4 ") == "978-1-56619-909-4"
     assert clean_isbn13("9781566199094") is None
 
+
 ORCID_REGEX = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$")
 
+
 def clean_orcid(raw: str) -> Optional[str]:
     if not raw:
         return None
@@ -269,6 +299,7 @@ def clean_orcid(raw: str) -> Optional[str]:
         return None
     return raw
 
+
 def test_clean_orcid():
     assert clean_orcid("0123-4567-3456-6789") == "0123-4567-3456-6789"
     assert clean_orcid("0123-4567-3456-678X") == "0123-4567-3456-678X"
@@ -279,6 +310,7 @@ def test_clean_orcid():
 
 HDL_REGEX = re.compile(r"^\d+(\.\d+)*/\S+$")
 
+
 def clean_hdl(raw):
     if not raw:
         return None
@@ -293,14 +325,17 @@ def clean_hdl(raw):
         raw = raw[15:]
     if not HDL_REGEX.fullmatch(raw):
         return None
-    if raw.startswith('10.'):
+    if raw.startswith("10."):
         return None
     return raw
 
+
 def test_clean_hdl():
     assert clean_hdl("20.500.23456/ABC/DUMMY") == "20.500.23456/abc/dummy"
     assert clean_hdl("hdl:20.500.23456/ABC/DUMMY") == "20.500.23456/abc/dummy"
-    assert clean_hdl("https://hdl.handle.net/20.500.23456/ABC/DUMMY") == "20.500.23456/abc/dummy"
+    assert (
+        clean_hdl("https://hdl.handle.net/20.500.23456/ABC/DUMMY") == "20.500.23456/abc/dummy"
+    )
     assert clean_hdl("http://hdl.handle.net/20.500.23456/ABC/DUMMY") == "20.500.23456/abc/dummy"
     assert clean_hdl("21.1234/aksjdfh") == "21.1234/aksjdfh"
     assert clean_hdl("2381/12775") == "2381/12775"
@@ -326,7 +361,7 @@ def clean_str(thing: Optional[str], force_xml: bool = False) -> Optional[str]:
     """
     if not thing:
         return None
-    unescape_html: Union[str, bool] = 'auto'
+    unescape_html: Union[str, bool] = "auto"
     if force_xml:
         unescape_html = True
     fixed = ftfy.fix_text(thing, unescape_html=unescape_html).strip()
@@ -335,15 +370,17 @@ def clean_str(thing: Optional[str], force_xml: bool = False) -> Optional[str]:
         return None
     return fixed
 
+
 def test_clean_str():
 
     assert clean_str(None) is None
-    assert clean_str('') is None
-    assert clean_str('1') is None
-    assert clean_str('123') == '123'
-    assert clean_str('a&amp;b') == 'a&b'
-    assert clean_str('<b>a&amp;b</b>') == '<b>a&amp;b</b>'
-    assert clean_str('<b>a&amp;b</b>', force_xml=True) == '<b>a&b</b>'
+    assert clean_str("") is None
+    assert clean_str("1") is None
+    assert clean_str("123") == "123"
+    assert clean_str("a&amp;b") == "a&b"
+    assert clean_str("<b>a&amp;b</b>") == "<b>a&amp;b</b>"
+    assert clean_str("<b>a&amp;b</b>", force_xml=True) == "<b>a&b</b>"
+
 
 def b32_hex(s):
     s = s.strip().split()[0].lower()
@@ -351,7 +388,8 @@ def b32_hex(s):
         s = s[5:]
     if len(s) != 32:
         return s
-    return base64.b16encode(base64.b32decode(s.upper())).lower().decode('utf-8')
+    return base64.b16encode(base64.b32decode(s.upper())).lower().decode("utf-8")
+
 
 def is_cjk(s):
     if not s:
@@ -359,38 +397,53 @@ def is_cjk(s):
     for c in s:
         if c.isalpha():
             lang_prefix = unicodedata.name(c).split()[0]
-            return lang_prefix in ('CJK', 'HIRAGANA', 'KATAKANA', 'HANGUL')
+            return lang_prefix in ("CJK", "HIRAGANA", "KATAKANA", "HANGUL")
     return False
 
+
 def test_is_cjk():
     assert is_cjk(None) is False
-    assert is_cjk('') is False
-    assert is_cjk('blah') is False
-    assert is_cjk('岡, 鹿, 梨, 阜, 埼') is True
-    assert is_cjk('[岡, 鹿, 梨, 阜, 埼]') is True
-    assert is_cjk('菊') is True
-    assert is_cjk('岡, 鹿, 梨, 阜, 埼 with eng after') is True
-    assert is_cjk('水道') is True
-    assert is_cjk('オウ, イク') is True # kanji
-    assert is_cjk('ひヒ') is True
-    assert is_cjk('き゚ゅ') is True
-    assert is_cjk('ㄴ, ㄹ, ㅁ, ㅂ, ㅅ') is True
+    assert is_cjk("") is False
+    assert is_cjk("blah") is False
+    assert is_cjk("岡, 鹿, 梨, 阜, 埼") is True
+    assert is_cjk("[岡, 鹿, 梨, 阜, 埼]") is True
+    assert is_cjk("菊") is True
+    assert is_cjk("岡, 鹿, 梨, 阜, 埼 with eng after") is True
+    assert is_cjk("水道") is True
+    assert is_cjk("オウ, イク") is True  # kanji
+    assert is_cjk("ひヒ") is True
+    assert is_cjk("き゚ゅ") is True
+    assert is_cjk("ㄴ, ㄹ, ㅁ, ㅂ, ㅅ") is True
+
 
 MONTH_MAP = {
-    "jan":  1, "january":   1,
-    "feb":  2, "febuary":   2,
-    "mar":  3, "march":     3,
-    "apr":  4, "april":     4,
-    "may":  5, "may":       5,
-    "jun":  6, "june":      6,
-    "jul":  7, "july":      7,
-    "aug":  8, "august":    8,
-    "sep":  9, "september": 9,
-    "oct": 10, "october":   10,
-    "nov": 11, "nov":       11,
-    "dec": 12, "december":  12,
+    "jan": 1,
+    "january": 1,
+    "feb": 2,
+    "febuary": 2,
+    "mar": 3,
+    "march": 3,
+    "apr": 4,
+    "april": 4,
+    "may": 5,
+    "may": 5,
+    "jun": 6,
+    "june": 6,
+    "jul": 7,
+    "july": 7,
+    "aug": 8,
+    "august": 8,
+    "sep": 9,
+    "september": 9,
+    "oct": 10,
+    "october": 10,
+    "nov": 11,
+    "nov": 11,
+    "dec": 12,
+    "december": 12,
 }
 
+
 def parse_month(raw: Optional[str]) -> Optional[int]:
     """
     Parses a string into a month number (1 to 12)
@@ -408,6 +461,7 @@ def parse_month(raw: Optional[str]) -> Optional[int]:
         return MONTH_MAP[raw]
     return None
 
+
 def test_parse_month() -> None:
 
     assert parse_month(None) is None
@@ -417,6 +471,7 @@ def test_parse_month() -> None:
     assert parse_month("jan") == 1
     assert parse_month("September") == 9
 
+
 def detect_text_lang(raw: str) -> Optional[str]:
     """
     Tries to determine language of, eg, an abstract.
@@ -427,13 +482,14 @@ def detect_text_lang(raw: str) -> Optional[str]:
         return None
     try:
         lang = langdetect.detect(raw)
-        lang = lang.split('-')[0]
+        lang = lang.split("-")[0]
         assert len(lang) == 2
         return lang
     except (langdetect.lang_detect_exception.LangDetectException, TypeError):
         return None
     return None
 
+
 def test_detect_text_lang() -> None:
     assert detect_text_lang("") is None
     EN_SAMPLE = "this is a string of English text for testing"
@@ -444,6 +500,7 @@ def test_detect_text_lang() -> None:
     # XXX: why does this detect as `ko` sometimes?
     assert detect_text_lang(ZH_SAMPLE) in ("zh", "ko")
 
+
 def parse_lang_name(raw: Optional[str]) -> Optional[str]:
     """
     Parses a language name and returns a 2-char ISO 631 language code.
@@ -456,13 +513,14 @@ def parse_lang_name(raw: Optional[str]) -> Optional[str]:
             return None
         return lang.alpha_2.lower()
     except LookupError:
-        #print(f"  unknown language: '{raw}', file=sys.stderr)
+        # print(f"  unknown language: '{raw}', file=sys.stderr)
         return None
     except AttributeError:
-        #print(f"  partial language metadata: '{lang}', file=sys.stderr)
+        # print(f"  partial language metadata: '{lang}', file=sys.stderr)
         return None
     return None
 
+
 def test_parse_lang_name() -> None:
 
     assert parse_lang_name(None) is None
@@ -544,86 +602,85 @@ def test_parse_country_name():
     assert parse_country_name("Russia") == "ru"
     assert parse_country_name("Japan") == "jp"
 
+
 # These are very close, but maybe not exactly 1-to-1 with 639-2? Some mix of
 # 2/T and 2/B?
 # PubMed/MEDLINE and JSTOR use these MARC codes
 # https://www.loc.gov/marc/languages/language_name.html
 LANG_MAP_MARC = {
-    'afr': 'af',
-    'alb': 'sq',
-    'amh': 'am',
-    'ara': 'ar',
-    'arm': 'hy',
-    'aze': 'az',
-    'ben': 'bn',
-    'bos': 'bs',
-    'bul': 'bg',
-    'cat': 'ca',
-    'chi': 'zh',
-    'cze': 'cs',
-    'dan': 'da',
-    'dut': 'nl',
-    'eng': 'en',
-    'epo': 'eo',
-    'est': 'et',
-    'fin': 'fi',
-    'fre': 'fr',
-    'geo': 'ka',
-    'ger': 'de',
-    'gla': 'gd',
-    'gre': 'el',
-    'heb': 'he',
-    'hin': 'hi',
-    'hrv': 'hr',
-    'hun': 'hu',
-    'ice': 'is',
-    'ind': 'id',
-    'ita': 'it',
-    'jpn': 'ja',
-    'kin': 'rw',
-    'kor': 'ko',
-    'lat': 'la',
-    'lav': 'lv',
-    'lit': 'lt',
-    'mac': 'mk',
-    'mal': 'ml',
-    'mao': 'mi',
-    'may': 'ms',
-    'nor': 'no',
-    'per': 'fa',
-    'per': 'fa',
-    'pol': 'pl',
-    'por': 'pt',
-    'pus': 'ps',
-    'rum': 'ro',
-    'rus': 'ru',
-    'san': 'sa',
-    'slo': 'sk',
-    'slv': 'sl',
-    'spa': 'es',
-    'srp': 'sr',
-    'swe': 'sv',
-    'tha': 'th',
-    'tur': 'tr',
-    'ukr': 'uk',
-    'urd': 'ur',
-    'vie': 'vi',
-    'wel': 'cy',
-
-# additions
-    'gle': 'ga', # "Irish" (Gaelic)
-    'jav': 'jv', # Javanese
-    'welsh': 'cy', # Welsh
-    'oci': 'oc', # Occitan
-
-# Don't have ISO 639-1 codes
-    'grc': 'el', # Ancient Greek; map to modern greek
-    'map': None, # Austronesian (collection)
-    'syr': None, # Syriac, Modern
-    'gem': None, # Old Saxon
-    'non': None, # Old Norse
-    'emg': None, # Eastern Meohang
-    'neg': None, # Negidal
-    'mul': None, # Multiple languages
-    'und': None, # Undetermined
+    "afr": "af",
+    "alb": "sq",
+    "amh": "am",
+    "ara": "ar",
+    "arm": "hy",
+    "aze": "az",
+    "ben": "bn",
+    "bos": "bs",
+    "bul": "bg",
+    "cat": "ca",
+    "chi": "zh",
+    "cze": "cs",
+    "dan": "da",
+    "dut": "nl",
+    "eng": "en",
+    "epo": "eo",
+    "est": "et",
+    "fin": "fi",
+    "fre": "fr",
+    "geo": "ka",
+    "ger": "de",
+    "gla": "gd",
+    "gre": "el",
+    "heb": "he",
+    "hin": "hi",
+    "hrv": "hr",
+    "hun": "hu",
+    "ice": "is",
+    "ind": "id",
+    "ita": "it",
+    "jpn": "ja",
+    "kin": "rw",
+    "kor": "ko",
+    "lat": "la",
+    "lav": "lv",
+    "lit": "lt",
+    "mac": "mk",
+    "mal": "ml",
+    "mao": "mi",
+    "may": "ms",
+    "nor": "no",
+    "per": "fa",
+    "per": "fa",
+    "pol": "pl",
+    "por": "pt",
+    "pus": "ps",
+    "rum": "ro",
+    "rus": "ru",
+    "san": "sa",
+    "slo": "sk",
+    "slv": "sl",
+    "spa": "es",
+    "srp": "sr",
+    "swe": "sv",
+    "tha": "th",
+    "tur": "tr",
+    "ukr": "uk",
+    "urd": "ur",
+    "vie": "vi",
+    "wel": "cy",
+    # additions
+    "gle": "ga",  # "Irish" (Gaelic)
+    "jav": "jv",  # Javanese
+    "welsh": "cy",  # Welsh
+    "oci": "oc",  # Occitan
+    # Don't have ISO 639-1 codes
+    "grc": "el",  # Ancient Greek; map to modern greek
+    "map": None,  # Austronesian (collection)
+    "syr": None,  # Syriac, Modern
+    "gem": None,  # Old Saxon
+    "non": None,  # Old Norse
+    "emg": None,  # Eastern Meohang
+    "neg": None,  # Negidal
+    "mul": None,  # Multiple languages
+    "und": None,  # Undetermined
 }
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py
index 8361b260..6fd9ca49 100644
--- a/python/fatcat_tools/references.py
+++ b/python/fatcat_tools/references.py
@@ -22,6 +22,7 @@ from fatcat_tools.transforms.entities import entity_to_dict
 
 class BiblioRef(BaseModel):
     """bibliographic reference"""
+
     # ("release", source_release_ident, ref_index)
     # ("wikipedia", source_wikipedia_article, ref_index)
     _key: Optional[str]
@@ -37,7 +38,7 @@ class BiblioRef(BaseModel):
 
     # context of the reference itself
     # 1-indexed, not 0-indexed
-    ref_index: Optional[int] # TODO: actually optional?
+    ref_index: Optional[int]  # TODO: actually optional?
     # eg, "Lee86", "BIB23"
     ref_key: Optional[str]
     # eg, page number
@@ -74,16 +75,20 @@ class BiblioRef(BaseModel):
         # work-arounds for bad/weird ref_key
         if self.ref_key:
             self.ref_key = self.ref_key.strip()
-            if self.ref_key[0] in ['/', '_']:
+            if self.ref_key[0] in ["/", "_"]:
                 self.ref_key = self.ref_key[1:]
-            if self.ref_key.startswith("10.") and 'SICI' in self.ref_key and '-' in self.ref_key:
-                self.ref_key = self.ref_key.split('-')[-1]
-            if self.ref_key.startswith("10.") and '_' in self.ref_key:
-                self.ref_key = self.ref_key.split('_')[-1]
+            if (
+                self.ref_key.startswith("10.")
+                and "SICI" in self.ref_key
+                and "-" in self.ref_key
+            ):
+                self.ref_key = self.ref_key.split("-")[-1]
+            if self.ref_key.startswith("10.") and "_" in self.ref_key:
+                self.ref_key = self.ref_key.split("_")[-1]
             if len(self.ref_key) > 10 and "#" in self.ref_key:
-                self.ref_key = self.ref_key.split('#')[-1]
+                self.ref_key = self.ref_key.split("#")[-1]
             if len(self.ref_key) > 10 and "_" in self.ref_key:
-                self.ref_key = self.ref_key.split('_')[-1]
+                self.ref_key = self.ref_key.split("_")[-1]
         if not self.ref_key and self.ref_index is not None:
             self.ref_key = str(self.ref_index)
         return self
@@ -98,7 +103,7 @@ class EnrichedBiblioRef(BaseModel):
     # TODO: openlibrary work?
     access: List[AccessOption]
 
-    @validator('release')
+    @validator("release")
     @classmethod
     def check_release(cls, v):
         if v is not None and not isinstance(v, ReleaseEntity):
@@ -119,7 +124,7 @@ class RefHits(BaseModel):
     limit: int
     query_time_ms: int
     query_wall_time_ms: int
-    result_refs: List[Union[BiblioRef,EnrichedBiblioRef]]
+    result_refs: List[Union[BiblioRef, EnrichedBiblioRef]]
 
     class Config:
         json_encoders = {
@@ -145,22 +150,22 @@ def _execute_ref_query(search: Any, limit: int, offset: Optional[int] = None) ->
     except elasticsearch.exceptions.RequestError as e_raw:
         # this is a "user" error
         e: Any = e_raw
-        #logging.warn("elasticsearch 400: " + str(e.info))
+        # logging.warn("elasticsearch 400: " + str(e.info))
         if e.info.get("error", {}).get("root_cause", {}):
             raise ValueError(str(e.info["error"]["root_cause"][0].get("reason"))) from e
         else:
             raise ValueError(str(e.info)) from e
     except elasticsearch.exceptions.TransportError as e:
         # all other errors
-        #logging.warn(f"elasticsearch non-200 status code: {e.info}")
+        # logging.warn(f"elasticsearch non-200 status code: {e.info}")
         raise IOError(str(e.info)) from e
     query_delta = datetime.datetime.now() - query_start
 
     result_refs = []
     for h in resp.hits:
         # might be a list because of consolidation
-        if isinstance(h._d_.get('source_work_ident'), list):
-            h._d_['source_work_ident'] = h._d_['source_work_ident'][0]
+        if isinstance(h._d_.get("source_work_ident"), list):
+            h._d_["source_work_ident"] = h._d_["source_work_ident"][0]
         result_refs.append(BiblioRef.parse_obj(h._d_).hacks())
 
     return RefHits(
@@ -224,7 +229,10 @@ def get_inbound_refs(
         search = search.extra(
             collapse={
                 "field": "source_work_ident",
-                "inner_hits": {"name": "source_more", "size": 0,},
+                "inner_hits": {
+                    "name": "source_more",
+                    "size": 0,
+                },
             }
         )
 
@@ -281,61 +289,87 @@ def count_inbound_refs(
 
 
 # run fatcat API fetches for each ref and return "enriched" refs
-def enrich_inbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]:
+def enrich_inbound_refs(
+    refs: List[BiblioRef],
+    fatcat_api_client: Any,
+    hide: Optional[str] = "refs",
+    expand: Optional[str] = "container,files,webcaptures,filesets",
+) -> List[EnrichedBiblioRef]:
     enriched = []
     for ref in refs:
         release = None
         access = []
         if ref.source_release_ident:
-            release = fatcat_api_client.get_release(ref.source_release_ident, hide=hide, expand=expand)
+            release = fatcat_api_client.get_release(
+                ref.source_release_ident, hide=hide, expand=expand
+            )
             access = release_access_options(release)
         if ref.source_wikipedia_article:
-            wiki_lang = ref.source_wikipedia_article.split(':')[0]
-            wiki_article = ':'.join(ref.source_wikipedia_article.split(':')[1:]).replace(' ', '_')
-            access.append(AccessOption(
-                access_type="wikipedia",
-                access_url=f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}",
-                mimetype=None,
-                size_bytes=None,
-                thumbnail_url=None
-            ))
-        enriched.append(EnrichedBiblioRef(
-            ref=ref,
-            access=access,
-            release=release,
-        ))
+            wiki_lang = ref.source_wikipedia_article.split(":")[0]
+            wiki_article = ":".join(ref.source_wikipedia_article.split(":")[1:]).replace(
+                " ", "_"
+            )
+            access.append(
+                AccessOption(
+                    access_type="wikipedia",
+                    access_url=f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}",
+                    mimetype=None,
+                    size_bytes=None,
+                    thumbnail_url=None,
+                )
+            )
+        enriched.append(
+            EnrichedBiblioRef(
+                ref=ref,
+                access=access,
+                release=release,
+            )
+        )
     return enriched
 
 
-def enrich_outbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]:
+def enrich_outbound_refs(
+    refs: List[BiblioRef],
+    fatcat_api_client: Any,
+    hide: Optional[str] = "refs",
+    expand: Optional[str] = "container,files,webcaptures,filesets",
+) -> List[EnrichedBiblioRef]:
     enriched = []
     for ref in refs:
         release = None
         access = []
         if ref.target_release_ident:
-            release = fatcat_api_client.get_release(ref.target_release_ident, hide=hide, expand=expand)
+            release = fatcat_api_client.get_release(
+                ref.target_release_ident, hide=hide, expand=expand
+            )
             access = release_access_options(release)
         if ref.target_openlibrary_work:
-            access.append(AccessOption(
-                access_type="openlibrary",
-                access_url=f"https://openlibrary.org/works/{ref.target_openlibrary_work}",
-                mimetype=None,
-                size_bytes=None,
-                thumbnail_url=None
-            ))
-        if ref.target_url and '://web.archive.org/' in ref.target_url:
-            access.append(AccessOption(
-                access_type="wayback",
-                access_url=ref.target_url,
-                mimetype=None,
-                size_bytes=None,
-                thumbnail_url=None
-            ))
-        enriched.append(EnrichedBiblioRef(
-            ref=ref,
-            access=access,
-            release=release,
-        ))
+            access.append(
+                AccessOption(
+                    access_type="openlibrary",
+                    access_url=f"https://openlibrary.org/works/{ref.target_openlibrary_work}",
+                    mimetype=None,
+                    size_bytes=None,
+                    thumbnail_url=None,
+                )
+            )
+        if ref.target_url and "://web.archive.org/" in ref.target_url:
+            access.append(
+                AccessOption(
+                    access_type="wayback",
+                    access_url=ref.target_url,
+                    mimetype=None,
+                    size_bytes=None,
+                    thumbnail_url=None,
+                )
+            )
+        enriched.append(
+            EnrichedBiblioRef(
+                ref=ref,
+                access=access,
+                release=release,
+            )
+        )
     return enriched
 
 
@@ -346,21 +380,29 @@ def run_ref_query(args) -> None:
     release_ident = None
     work_ident = None
     if args.ident.startswith("release_"):
-        release_ident = args.ident.split('_')[1]
+        release_ident = args.ident.split("_")[1]
     elif args.ident.startswith("work_"):
-        work_ident = args.ident.split('_')[1]
+        work_ident = args.ident.split("_")[1]
     else:
         release_ident = args.ident
 
     print("## Outbound References")
-    hits = get_outbound_refs(release_ident=release_ident, work_ident=work_ident, es_client=args.es_client)
-    print(f"Total: {hits.count_total}  Time: {hits.query_wall_time_ms}ms; {hits.query_time_ms}ms")
+    hits = get_outbound_refs(
+        release_ident=release_ident, work_ident=work_ident, es_client=args.es_client
+    )
+    print(
+        f"Total: {hits.count_total}  Time: {hits.query_wall_time_ms}ms; {hits.query_time_ms}ms"
+    )
 
     if args.enrich == "fatcat":
-        enriched = enrich_outbound_refs(hits.result_refs, hide='refs,abstracts', fatcat_api_client=args.fatcat_api_client)
+        enriched = enrich_outbound_refs(
+            hits.result_refs, hide="refs,abstracts", fatcat_api_client=args.fatcat_api_client
+        )
         for ref in enriched:
             if ref.release:
-                print(f"{ref.ref.ref_index or '-'}\trelease_{ref.release.ident}\t{ref.ref.match_provenance}/{ref.ref.match_status}\t{ref.release.release_year or '-'}\t{ref.release.title}\t{ref.release.ext_ids.pmid or ref.release.ext_ids.doi or '-'}")
+                print(
+                    f"{ref.ref.ref_index or '-'}\trelease_{ref.release.ident}\t{ref.ref.match_provenance}/{ref.ref.match_status}\t{ref.release.release_year or '-'}\t{ref.release.title}\t{ref.release.ext_ids.pmid or ref.release.ext_ids.doi or '-'}"
+                )
             else:
                 print(f"{ref.ref.ref_index or '-'}\trelease_{ref.target_release_ident}")
     else:
@@ -369,21 +411,30 @@ def run_ref_query(args) -> None:
 
     print()
     print("## Inbound References")
-    hits = get_inbound_refs(release_ident=release_ident, work_ident=work_ident, es_client=args.es_client)
+    hits = get_inbound_refs(
+        release_ident=release_ident, work_ident=work_ident, es_client=args.es_client
+    )
 
-    print(f"Total: {hits.count_total}  Time: {hits.query_wall_time_ms}ms; {hits.query_time_ms}ms")
+    print(
+        f"Total: {hits.count_total}  Time: {hits.query_wall_time_ms}ms; {hits.query_time_ms}ms"
+    )
 
     if args.enrich == "fatcat":
-        enriched = enrich_inbound_refs(hits.result_refs, hide='refs,abstracts', fatcat_api_client=args.fatcat_api_client)
+        enriched = enrich_inbound_refs(
+            hits.result_refs, hide="refs,abstracts", fatcat_api_client=args.fatcat_api_client
+        )
         for ref in enriched:
             if ref.release:
-                print(f"release_{ref.release.ident}\t{ref.ref.match_provenance}/{ref.ref.match_status}\t{ref.release.release_year or '-'}\t{ref.release.title}\t{ref.release.ext_ids.pmid or ref.release.ext_ids.doi or '-'}")
+                print(
+                    f"release_{ref.release.ident}\t{ref.ref.match_provenance}/{ref.ref.match_status}\t{ref.release.release_year or '-'}\t{ref.release.title}\t{ref.release.ext_ids.pmid or ref.release.ext_ids.doi or '-'}"
+                )
             else:
                 print(f"release_{ref.target_release_ident}")
     else:
         for ref in hits.result_refs:
             print(f"work_{ref.source_work_ident}\trelease_{ref.source_release_ident}")
 
+
 def main() -> None:
     """
     Run this utility like:
@@ -395,9 +446,7 @@ def main() -> None:
         python -m fatcat_tools.references query release_pfrind3kh5hqhgqkueulk2tply
     """
 
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     subparsers = parser.add_subparsers()
 
     parser.add_argument("--fatcat-api-base", default="https://api.fatcat.wiki/v0")
@@ -425,5 +474,6 @@ def main() -> None:
     else:
         raise NotImplementedError(args.func)
 
+
 if __name__ == "__main__":
     main()
diff --git a/python/fatcat_tools/reviewers/review_common.py b/python/fatcat_tools/reviewers/review_common.py
index 867d826d..59ff1c4e 100644
--- a/python/fatcat_tools/reviewers/review_common.py
+++ b/python/fatcat_tools/reviewers/review_common.py
@@ -1,4 +1,3 @@
-
 import datetime
 import subprocess
 import time
@@ -34,8 +33,8 @@ class CheckResult:
         self.status = status
         self.check_type = check_type
         self.description = description
-        self.ident = kwargs.get('ident')
-        self.rev = kwargs.get('rev')
+        self.ident = kwargs.get("ident")
+        self.rev = kwargs.get("rev")
 
     def __repr__(self):
         return str(self.__dict__)
@@ -72,17 +71,17 @@ class EditCheck:
 
 
 class ReviewBot:
-
     def __init__(self, api, verbose=False, **kwargs):
 
         self.api = api
         self.checks = []
         self.verbose = verbose
-        self.extra = kwargs.get('extra', dict())
-        self.extra['git_rev'] = self.extra.get('git_rev',
-            subprocess.check_output(["git", "describe", "--always"]).strip()).decode('utf-8')
-        self.extra['agent'] = self.extra.get('agent', 'fatcat_tools.ReviewBot')
-        self.poll_interval = kwargs.get('poll_interval', 10.0)
+        self.extra = kwargs.get("extra", dict())
+        self.extra["git_rev"] = self.extra.get(
+            "git_rev", subprocess.check_output(["git", "describe", "--always"]).strip()
+        ).decode("utf-8")
+        self.extra["agent"] = self.extra.get("agent", "fatcat_tools.ReviewBot")
+        self.poll_interval = kwargs.get("poll_interval", 10.0)
 
     def run_single(self, editgroup_id, annotate=True):
         eg = self.api.get_editgroup(editgroup_id)
@@ -96,7 +95,9 @@ class ReviewBot:
             since = datetime.datetime.utcnow()
         while True:
             # XXX: better isoformat conversion?
-            eg_list = self.api.get_editgroups_reviewable(since=since.isoformat()[:19] + "Z", limit=100)
+            eg_list = self.api.get_editgroups_reviewable(
+                since=since.isoformat()[:19] + "Z", limit=100
+            )
             if not eg_list:
                 print("Sleeping {} seconds...".format(self.poll_interval))
                 time.sleep(self.poll_interval)
@@ -104,8 +105,11 @@ class ReviewBot:
             for eg in eg_list:
                 # TODO: fetch annotations to ensure we haven't already annotated
                 annotation = self.review_editgroup(eg)
-                print("Reviewed {} disposition:{}".format(
-                    eg.editgroup_id, annotation.extra['disposition']))
+                print(
+                    "Reviewed {} disposition:{}".format(
+                        eg.editgroup_id, annotation.extra["disposition"]
+                    )
+                )
                 self.api.create_editgroup_annotation(eg.editgroup_id, annotation)
                 since = eg.submitted
             # to prevent busy loops (TODO: needs review/rethink; multiple
@@ -125,10 +129,9 @@ class ReviewBot:
         else:
             raise ValueError
 
-        for (status, title) in (('fail', 'Failed check'), ('warning', 'Warnings')):
+        for (status, title) in (("fail", "Failed check"), ("warning", "Warnings")):
             if result_counts[status] > 0:
-                comment += "\n\n### {} ({}):\n".format(
-                    status, result_counts[status])
+                comment += "\n\n### {} ({}):\n".format(status, result_counts[status])
             for result in results:
                 if result.status == status and result.check_type == "editgroup":
                     comment += "\n- {description}".format(description=result.description)
@@ -137,15 +140,18 @@ class ReviewBot:
                         check_type=result.check_type,
                         rev=result.rev,
                         entity_type=result.check_type,
-                        description=result.description)
+                        description=result.description,
+                    )
 
         extra = self.extra.copy()
-        extra.update({
-            "disposition": disposition,
-            "submit_timestamp": editgroup.submitted.isoformat(),
-            "checks": [check.name for check in self.checks],
-            "result_counts": dict(result_counts),
-        })
+        extra.update(
+            {
+                "disposition": disposition,
+                "submit_timestamp": editgroup.submitted.isoformat(),
+                "checks": [check.name for check in self.checks],
+                "result_counts": dict(result_counts),
+            }
+        )
         annotation = fatcat_openapi_client.EditgroupAnnotation(
             comment_markdown=comment,
             editgroup_id=editgroup.editgroup_id,
@@ -156,7 +162,7 @@ class ReviewBot:
     def result_counts(self, results):
         counts = Counter()
         for result in results:
-            counts['total'] += 1
+            counts["total"] += 1
             counts[result.status] += 1
         return counts
 
@@ -217,13 +223,18 @@ class DummyCheck(EditCheck):
     name = "DummyCheck"
 
     def check_editgroup(self, editgroup):
-        return CheckResult("pass", "editgroup",
+        return CheckResult(
+            "pass",
+            "editgroup",
             "every edit is precious, thanks [editor {editor_id}](/editor/{editor_id})!".format(
-                editor_id=editgroup.editor_id))
+                editor_id=editgroup.editor_id
+            ),
+        )
 
     def check_work(self, entity, edit):
         return CheckResult("pass", "work", "this work edit is beautiful")
 
+
 class DummyReviewBot(ReviewBot):
     """
     This bot reviews everything and always passes.
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
index ae9880e7..34212a6a 100644
--- a/python/fatcat_tools/transforms/access.py
+++ b/python/fatcat_tools/transforms/access.py
@@ -1,4 +1,3 @@
-
 from enum import Enum
 from typing import List, Optional
 
@@ -16,6 +15,7 @@ class AccessType(str, Enum):
     openlibrary = "openlibrary"
     wikipedia = "wikipedia"
 
+
 class AccessOption(BaseModel):
 
     access_type: AccessType
@@ -40,27 +40,31 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
     option found
     """
     options = []
-    for f in (release.files or []):
+    for f in release.files or []:
         thumbnail_url = None
-        if f.mimetype == 'application/pdf' and f.sha1 and f.urls:
+        if f.mimetype == "application/pdf" and f.sha1 and f.urls:
             # NOTE: scholar.archive.org does an actual database check before
             # generating these URLs, but we skip that for speed
             thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg"
-        for u in (f.urls or []):
-            if '://web.archive.org/' in u.url:
-                return [AccessOption(
-                    access_type="wayback",
-                    access_url=u.url,
-                    mimetype=f.mimetype,
-                    size_bytes=f.size,
-                    thumbnail_url=thumbnail_url,
-                )]
-            elif '://archive.org/' in u.url:
-                return [AccessOption(
-                    access_type="ia_file",
-                    access_url=u.url,
-                    mimetype=f.mimetype,
-                    size_bytes=f.size,
-                    thumbnail_url=thumbnail_url,
-                )]
+        for u in f.urls or []:
+            if "://web.archive.org/" in u.url:
+                return [
+                    AccessOption(
+                        access_type="wayback",
+                        access_url=u.url,
+                        mimetype=f.mimetype,
+                        size_bytes=f.size,
+                        thumbnail_url=thumbnail_url,
+                    )
+                ]
+            elif "://archive.org/" in u.url:
+                return [
+                    AccessOption(
+                        access_type="ia_file",
+                        access_url=u.url,
+                        mimetype=f.mimetype,
+                        size_bytes=f.size,
+                        thumbnail_url=thumbnail_url,
+                    )
+                ]
     return options
diff --git a/python/fatcat_tools/transforms/csl.py b/python/fatcat_tools/transforms/csl.py
index f8b26bce..2b39068a 100644
--- a/python/fatcat_tools/transforms/csl.py
+++ b/python/fatcat_tools/transforms/csl.py
@@ -1,4 +1,3 @@
-
 import json
 
 from citeproc import (
@@ -13,10 +12,10 @@ from citeproc_styles import get_style_filepath
 
 
 def contribs_by_role(contribs, role):
-    ret = [c.copy() for c in contribs if c['role'] == role]
-    [c.pop('role') for c in ret]
+    ret = [c.copy() for c in contribs if c["role"] == role]
+    [c.pop("role") for c in ret]
     # TODO: some note to self here
-    [c.pop('literal') for c in ret if 'literal' in c]
+    [c.pop("literal") for c in ret if "literal" in c]
     if not ret:
         return None
     else:
@@ -33,26 +32,30 @@ def release_to_csl(entity):
     Follows, but not enforced by: https://github.com/citation-style-language/schema/blob/master/csl-data.json
     """
     contribs = []
-    for contrib in (entity.contribs or []):
+    for contrib in entity.contribs or []:
         if contrib.creator:
             # Default to "local" (publication-specific) metadata; fall back to
             # creator-level
-            family = contrib.creator.surname or contrib.surname or (contrib.raw_name and contrib.raw_name.split()[-1])
+            family = (
+                contrib.creator.surname
+                or contrib.surname
+                or (contrib.raw_name and contrib.raw_name.split()[-1])
+            )
             if not family:
                 # CSL requires some surname (family name)
                 continue
             c = dict(
                 family=family,
                 given=contrib.creator.given_name or contrib.given_name,
-                #dropping-particle
-                #non-dropping-particle
-                #suffix
-                #comma-suffix
-                #static-ordering
+                # dropping-particle
+                # non-dropping-particle
+                # suffix
+                # comma-suffix
+                # static-ordering
                 literal=contrib.creator.display_name or contrib.raw_name,
-                #parse-names,
+                # parse-names,
                 # role must be defined; default to author
-                role=contrib.role or 'author',
+                role=contrib.role or "author",
             )
         else:
             family = contrib.surname or (contrib.raw_name and contrib.raw_name.split()[-1])
@@ -64,7 +67,7 @@ def release_to_csl(entity):
                 given=contrib.given_name,
                 literal=contrib.raw_name,
                 # role must be defined; default to author
-                role=contrib.role or 'author',
+                role=contrib.role or "author",
             )
         for k in list(c.keys()):
             if not c[k]:
@@ -78,93 +81,108 @@ def release_to_csl(entity):
 
     issued_date = None
     if entity.release_date:
-        issued_date = {"date-parts": [[
-            entity.release_date.year,
-            entity.release_date.month,
-            entity.release_date.day,
-        ]]}
+        issued_date = {
+            "date-parts": [
+                [
+                    entity.release_date.year,
+                    entity.release_date.month,
+                    entity.release_date.day,
+                ]
+            ]
+        }
     elif entity.release_year:
         issued_date = {"date-parts": [[entity.release_year]]}
 
     csl = dict(
-        #id,
-        #categories
-        type=entity.release_type or "article", # can't be blank
+        # id,
+        # categories
+        type=entity.release_type or "article",  # can't be blank
         language=entity.language,
-        #journalAbbreviation
-        #shortTitle
+        # journalAbbreviation
+        # shortTitle
         ## see below for all contrib roles
-        #accessed
-        #container
-        #event-date
+        # accessed
+        # container
+        # event-date
         issued=issued_date,
-        #original-date
-        #submitted
+        # original-date
+        # submitted
         abstract=abstract,
-        #annote
-        #archive
-        #archive_location
-        #archive-place
-        #authority
-        #call-number
-        #chapter-number
-        #citation-number
-        #citation-label
-        #collection-number
-        #collection-title
+        # annote
+        # archive
+        # archive_location
+        # archive-place
+        # authority
+        # call-number
+        # chapter-number
+        # citation-number
+        # citation-label
+        # collection-number
+        # collection-title
         container_title=entity.container and entity.container.name,
-        #container-title-short
-        #dimensions
+        # container-title-short
+        # dimensions
         DOI=entity.ext_ids.doi,
-        #edition
-        #event
-        #event-place
-        #first-reference-note-number
-        #genre
+        # edition
+        # event
+        # event-place
+        # first-reference-note-number
+        # genre
         ISBN=entity.ext_ids.isbn13,
         ISSN=entity.container and entity.container.issnl,
         issue=entity.issue,
-        #jurisdiction
-        #keyword
-        #locator
-        #medium
-        #note
-        #number
-        #number-of-pages
-        #number-of-volumes
-        #original-publisher
-        #original-publisher-place
-        #original-title
+        # jurisdiction
+        # keyword
+        # locator
+        # medium
+        # note
+        # number
+        # number-of-pages
+        # number-of-volumes
+        # original-publisher
+        # original-publisher-place
+        # original-title
         # TODO: page=entity.pages,
-        page_first=entity.pages and entity.pages.split('-')[0],
+        page_first=entity.pages and entity.pages.split("-")[0],
         PMCID=entity.ext_ids.pmcid,
         PMID=entity.ext_ids.pmid,
         publisher=(entity.container and entity.container.publisher) or entity.publisher,
-        #publisher-place
-        #references
-        #reviewed-title
-        #scale
-        #section
-        #source
-        #status
+        # publisher-place
+        # references
+        # reviewed-title
+        # scale
+        # section
+        # source
+        # status
         title=entity.title,
-        #title-short
-        #URL
-        #version
+        # title-short
+        # URL
+        # version
         volume=entity.volume,
-        #year-suffix
+        # year-suffix
     )
-    for role in ['author', 'collection-editor', 'composer', 'container-author',
-            'director', 'editor', 'editorial-director', 'interviewer',
-            'illustrator', 'original-author', 'recipient', 'reviewed-author',
-            'translator']:
+    for role in [
+        "author",
+        "collection-editor",
+        "composer",
+        "container-author",
+        "director",
+        "editor",
+        "editorial-director",
+        "interviewer",
+        "illustrator",
+        "original-author",
+        "recipient",
+        "reviewed-author",
+        "translator",
+    ]:
         cbr = contribs_by_role(contribs, role)
         if cbr:
             csl[role] = cbr
     # underline-to-dash
-    csl['container-title'] = csl.pop('container_title')
-    csl['page-first'] = csl.pop('page_first')
-    empty_keys = [k for k,v in csl.items() if not v]
+    csl["container-title"] = csl.pop("container_title")
+    csl["page-first"] = csl.pop("page_first")
+    empty_keys = [k for k, v in csl.items() if not v]
     for k in empty_keys:
         csl.pop(k)
     return csl
@@ -184,10 +202,11 @@ def refs_to_csl(entity):
                 title=ref.title,
                 issued=issued_date,
             )
-        csl['id'] = ref.key or ref.index, # zero- or one-indexed?
+        csl["id"] = (ref.key or ref.index,)  # zero- or one-indexed?
         ret.append(csl)
     return ret
 
+
 def citeproc_csl(csl_json, style, html=False):
     """
     Renders a release entity to a styled citation.
@@ -200,8 +219,8 @@ def citeproc_csl(csl_json, style, html=False):
     Returns a string; if the html flag is set, and the style isn't 'csl-json'
     or 'bibtex', it will be HTML. Otherwise plain text.
     """
-    if not csl_json.get('id'):
-        csl_json['id'] = "unknown"
+    if not csl_json.get("id"):
+        csl_json["id"] = "unknown"
     if style == "csl-json":
         return json.dumps(csl_json)
     bib_src = CiteProcJSON([csl_json])
@@ -211,7 +230,7 @@ def citeproc_csl(csl_json, style, html=False):
     style_path = get_style_filepath(style)
     bib_style = CitationStylesStyle(style_path, validate=False)
     bib = CitationStylesBibliography(bib_style, bib_src, form)
-    bib.register(Citation([CitationItem(csl_json['id'])]))
+    bib.register(Citation([CitationItem(csl_json["id"])]))
     lines = bib.bibliography()[0]
     if style == "bibtex":
         out = ""
@@ -222,6 +241,6 @@ def citeproc_csl(csl_json, style, html=False):
                 out += "\n " + line
             else:
                 out += line
-        return ''.join(out)
+        return "".join(out)
     else:
-        return ''.join(lines)
+        return "".join(lines)
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 1826d4eb..e39e9ea4 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -1,4 +1,3 @@
-
 import datetime
 from typing import Any, Dict, Optional
 
@@ -13,13 +12,14 @@ from fatcat_openapi_client import (
 
 
 def check_kbart(year: int, archive: dict) -> Optional[bool]:
-    if not archive or not archive.get('year_spans'):
+    if not archive or not archive.get("year_spans"):
         return None
-    for span in archive['year_spans']:
+    for span in archive["year_spans"]:
         if year >= span[0] and year <= span[1]:
             return True
     return False
 
+
 def test_check_kbart() -> None:
 
     assert check_kbart(1990, dict()) is None
@@ -40,87 +40,89 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) ->
     Raises exception on error (never returns None)
     """
 
-    if entity.state in ('redirect', 'deleted'):
+    if entity.state in ("redirect", "deleted"):
         return dict(
-            ident = entity.ident,
-            state = entity.state,
+            ident=entity.ident,
+            state=entity.state,
         )
-    elif entity.state != 'active':
+    elif entity.state != "active":
         raise ValueError("Unhandled entity state: {}".format(entity.state))
 
     # First, the easy ones (direct copy)
     release = entity
     t: Dict[str, Any] = dict(
-        doc_index_ts=datetime.datetime.utcnow().isoformat()+"Z",
-        ident = release.ident,
-        state = release.state,
-        revision = release.revision,
-        work_id = release.work_id,
-        title = release.title,
-        subtitle = release.subtitle,
-        original_title = release.original_title,
-        release_type = release.release_type,
-        release_stage = release.release_stage,
-        withdrawn_status = release.withdrawn_status,
-        language = release.language,
-        volume = release.volume,
-        issue = release.issue,
-        pages = release.pages,
-        number = release.number,
-        license = release.license_slug,
-        version = release.version,
-        doi = release.ext_ids.doi,
-        pmid = release.ext_ids.pmid,
-        pmcid = release.ext_ids.pmcid,
-        isbn13 = release.ext_ids.isbn13,
-        wikidata_qid = release.ext_ids.wikidata_qid,
-        core_id = release.ext_ids.core,
-        arxiv_id = release.ext_ids.arxiv,
-        jstor_id = release.ext_ids.jstor,
-        ark_id = release.ext_ids.ark,
-        mag_id = release.ext_ids.mag,
-        dblp_id = release.ext_ids.dblp,
-        doaj_id = release.ext_ids.doaj,
-        hdl = release.ext_ids.hdl,
-        tags = [],
+        doc_index_ts=datetime.datetime.utcnow().isoformat() + "Z",
+        ident=release.ident,
+        state=release.state,
+        revision=release.revision,
+        work_id=release.work_id,
+        title=release.title,
+        subtitle=release.subtitle,
+        original_title=release.original_title,
+        release_type=release.release_type,
+        release_stage=release.release_stage,
+        withdrawn_status=release.withdrawn_status,
+        language=release.language,
+        volume=release.volume,
+        issue=release.issue,
+        pages=release.pages,
+        number=release.number,
+        license=release.license_slug,
+        version=release.version,
+        doi=release.ext_ids.doi,
+        pmid=release.ext_ids.pmid,
+        pmcid=release.ext_ids.pmcid,
+        isbn13=release.ext_ids.isbn13,
+        wikidata_qid=release.ext_ids.wikidata_qid,
+        core_id=release.ext_ids.core,
+        arxiv_id=release.ext_ids.arxiv,
+        jstor_id=release.ext_ids.jstor,
+        ark_id=release.ext_ids.ark,
+        mag_id=release.ext_ids.mag,
+        dblp_id=release.ext_ids.dblp,
+        doaj_id=release.ext_ids.doaj,
+        hdl=release.ext_ids.hdl,
+        tags=[],
     )
 
-    t.update(dict(
-        is_oa = None,
-        is_longtail_oa = None,
-        is_preserved = None,
-        in_web = False,
-        in_dweb = False,
-        in_ia = False,
-        in_ia_sim = False,
-        in_kbart = None,
-        in_jstor = False,
-        in_doaj= bool(release.ext_ids.doaj),
-        in_shadows = False,
-    ))
+    t.update(
+        dict(
+            is_oa=None,
+            is_longtail_oa=None,
+            is_preserved=None,
+            in_web=False,
+            in_dweb=False,
+            in_ia=False,
+            in_ia_sim=False,
+            in_kbart=None,
+            in_jstor=False,
+            in_doaj=bool(release.ext_ids.doaj),
+            in_shadows=False,
+        )
+    )
 
     release_year = release.release_year
     if release.release_date:
         # .isoformat() results in, eg, '2010-10-22' (YYYY-MM-DD)
-        t['release_date'] = release.release_date.isoformat()
+        t["release_date"] = release.release_date.isoformat()
         if not release_year:
             release_year = release.release_date.year
     if release_year:
-        t['release_year'] = release_year
+        t["release_year"] = release_year
 
-    t['any_abstract'] = len(release.abstracts or []) > 0
-    t['ref_count'] = len(release.refs or [])
+    t["any_abstract"] = len(release.abstracts or []) > 0
+    t["ref_count"] = len(release.refs or [])
     ref_release_ids = []
-    for r in (release.refs or []):
+    for r in release.refs or []:
         if r.target_release_id:
             ref_release_ids.append(r.target_release_id)
-    t['ref_release_ids'] = ref_release_ids
-    t['ref_linked_count'] = len(ref_release_ids)
-    t['contrib_count'] = len(release.contribs or [])
+    t["ref_release_ids"] = ref_release_ids
+    t["ref_linked_count"] = len(ref_release_ids)
+    t["contrib_count"] = len(release.contribs or [])
     contrib_names = []
     contrib_affiliations = []
     creator_ids = []
-    for c in (release.contribs or []):
+    for c in release.contribs or []:
         if c.creator and c.creator.display_name:
             contrib_names.append(c.creator.display_name)
         elif c.raw_name:
@@ -132,193 +134,218 @@ def release_to_elasticsearch(entity: ReleaseEntity, force_bool: bool = True) ->
             creator_ids.append(c.creator_id)
         if c.raw_affiliation:
             contrib_affiliations.append(c.raw_affiliation)
-    t['contrib_names'] = contrib_names
-    t['creator_ids'] = creator_ids
-    t['affiliations'] = contrib_affiliations
+    t["contrib_names"] = contrib_names
+    t["creator_ids"] = creator_ids
+    t["affiliations"] = contrib_affiliations
 
     # TODO: mapping... probably by lookup?
-    t['affiliation_rors'] = None
+    t["affiliation_rors"] = None
 
     if release.container:
         t.update(_rte_container_helper(release.container, release_year))
 
     # fall back to release-level container metadata if container not linked or
     # missing context
-    if not t.get('publisher'):
-        t['publisher'] = release.publisher
-    if not t.get('container_name') and release.extra:
-        t['container_name'] = release.extra.get('container_name')
+    if not t.get("publisher"):
+        t["publisher"] = release.publisher
+    if not t.get("container_name") and release.extra:
+        t["container_name"] = release.extra.get("container_name")
 
-    if release.ext_ids.jstor or (release.ext_ids.doi and release.ext_ids.doi.startswith('10.2307/')):
-        t['in_jstor'] = True
+    if release.ext_ids.jstor or (
+        release.ext_ids.doi and release.ext_ids.doi.startswith("10.2307/")
+    ):
+        t["in_jstor"] = True
 
     # transform file/fileset/webcapture related fields
     t.update(_rte_content_helper(release))
 
     if release.ext_ids.doaj:
-        t['is_oa'] = True
+        t["is_oa"] = True
 
     if release.license_slug:
         # TODO: more/better checks here, particularly strict *not* OA licenses
         if release.license_slug.startswith("CC-"):
-            t['is_oa'] = True
+            t["is_oa"] = True
         if release.license_slug.startswith("ARXIV-"):
-            t['is_oa'] = True
+            t["is_oa"] = True
 
-    t['is_work_alias'] = None
+    t["is_work_alias"] = None
     extra = release.extra or dict()
     if extra:
-        if extra.get('is_oa'):
+        if extra.get("is_oa"):
             # NOTE: not actually setting this anywhere... but could
-            t['is_oa'] = True
-        if extra.get('is_work_alias') is not None:
-            t['is_work_alias'] = bool(extra.get('is_work_alias'))
-        if extra.get('longtail_oa'):
+            t["is_oa"] = True
+        if extra.get("is_work_alias") is not None:
+            t["is_work_alias"] = bool(extra.get("is_work_alias"))
+        if extra.get("longtail_oa"):
             # sometimes set by GROBID/matcher
-            t['is_oa'] = True
-            t['is_longtail_oa'] = True
-        if not t.get('container_name'):
-            t['container_name'] = extra.get('container_name')
-        if extra.get('crossref'):
-            if extra['crossref'].get('archive'):
+            t["is_oa"] = True
+            t["is_longtail_oa"] = True
+        if not t.get("container_name"):
+            t["container_name"] = extra.get("container_name")
+        if extra.get("crossref"):
+            if extra["crossref"].get("archive"):
                 # all crossref archives are KBART, I believe
-                t['in_kbart'] = True
+                t["in_kbart"] = True
         # backwards compatible subtitle fetching
-        if not t['subtitle'] and extra.get('subtitle'):
-            if type(extra['subtitle']) == list:
-                t['subtitle'] = extra['subtitle'][0]
+        if not t["subtitle"] and extra.get("subtitle"):
+            if type(extra["subtitle"]) == list:
+                t["subtitle"] = extra["subtitle"][0]
             else:
-                t['subtitle'] = extra['subtitle']
+                t["subtitle"] = extra["subtitle"]
 
-    t['first_page'] = None
+    t["first_page"] = None
     if release.pages:
-        first = release.pages.split('-')[0]
-        first = first.replace('p', '')
+        first = release.pages.split("-")[0]
+        first = first.replace("p", "")
         if first.isdigit():
-            t['first_page'] = first
+            t["first_page"] = first
         # TODO: non-numerical first pages
 
-    t['ia_microfilm_url'] = None
-    if t['in_ia_sim']:
+    t["ia_microfilm_url"] = None
+    if t["in_ia_sim"]:
         # TODO: determine URL somehow? I think this is in flux. Will probably
         # need extra metadata in the container extra field.
         # special case as a demo for now.
-        if release.container_id == "hl5g6d5msjcl7hlbyyvcsbhc2u" \
-                and release.release_year in (2011, 2013) \
-                and release.issue \
-                and release.issue.isdigit() \
-                and t['first_page']:
-            t['ia_microfilm_url'] = "https://archive.org/details/sim_bjog_{}-{:02d}/page/n{}".format(
+        if (
+            release.container_id == "hl5g6d5msjcl7hlbyyvcsbhc2u"
+            and release.release_year in (2011, 2013)
+            and release.issue
+            and release.issue.isdigit()
+            and t["first_page"]
+        ):
+            t[
+                "ia_microfilm_url"
+            ] = "https://archive.org/details/sim_bjog_{}-{:02d}/page/n{}".format(
                 release.release_year,
                 int(release.issue) - 1,
-                t['first_page'],
+                t["first_page"],
             )
 
-    t['doi_registrar'] = None
-    if extra and t['doi']:
-        for k in ('crossref', 'datacite', 'jalc'):
+    t["doi_registrar"] = None
+    if extra and t["doi"]:
+        for k in ("crossref", "datacite", "jalc"):
             if k in extra:
-                t['doi_registrar'] = k
-        if 'doi_registrar' not in t:
-            t['doi_registrar'] = 'crossref'
+                t["doi_registrar"] = k
+        if "doi_registrar" not in t:
+            t["doi_registrar"] = "crossref"
 
-    if t['doi']:
-        t['doi_prefix'] = t['doi'].split('/')[0]
+    if t["doi"]:
+        t["doi_prefix"] = t["doi"].split("/")[0]
 
-    if t['is_longtail_oa']:
-        t['is_oa'] = True
+    if t["is_longtail_oa"]:
+        t["is_oa"] = True
 
     # optionally coerce all flags from Optional[bool] to bool
     if force_bool:
-        for k in ('is_oa', 'is_longtail_oa', 'in_kbart', 'in_ia_sim',
-                  'in_jstor', 'in_web', 'in_dweb', 'in_shadows',
-                  'is_work_alias'):
+        for k in (
+            "is_oa",
+            "is_longtail_oa",
+            "in_kbart",
+            "in_ia_sim",
+            "in_jstor",
+            "in_web",
+            "in_dweb",
+            "in_shadows",
+            "is_work_alias",
+        ):
             t[k] = bool(t[k])
 
-    t['in_ia'] = bool(t['in_ia'])
-    t['is_preserved'] = bool(
-        t['is_preserved']
-        or t['in_ia']
-        or t['in_kbart']
-        or t['in_jstor']
-        or t.get('pmcid')
-        or t.get('arxiv_id')
+    t["in_ia"] = bool(t["in_ia"])
+    t["is_preserved"] = bool(
+        t["is_preserved"]
+        or t["in_ia"]
+        or t["in_kbart"]
+        or t["in_jstor"]
+        or t.get("pmcid")
+        or t.get("arxiv_id")
     )
 
-    if t['in_ia']:
-        t['preservation'] = 'bright'
-    elif t['is_preserved']:
-        t['preservation'] = 'dark'
-    elif t['in_shadows']:
-        t['preservation'] = 'shadows_only'
+    if t["in_ia"]:
+        t["preservation"] = "bright"
+    elif t["is_preserved"]:
+        t["preservation"] = "dark"
+    elif t["in_shadows"]:
+        t["preservation"] = "shadows_only"
     else:
-        t['preservation'] = 'none'
+        t["preservation"] = "none"
 
     return t
 
+
 def _rte_container_helper(container: ContainerEntity, release_year: Optional[int]) -> dict:
     """
     Container metadata sub-section of release_to_elasticsearch()
     """
     this_year = datetime.date.today().year
     t = dict()
-    t['publisher'] = container.publisher
-    t['container_name'] = container.name
+    t["publisher"] = container.publisher
+    t["container_name"] = container.name
     # this is container.ident, not release.container_id, because there may
     # be a redirect involved
-    t['container_id'] = container.ident
-    t['container_issnl'] = container.issnl
+    t["container_id"] = container.ident
+    t["container_issnl"] = container.issnl
     issns = [container.issnl, container.issne, container.issnp]
     issns = list(set([i for i in issns if i]))
-    t['container_issns'] = issns
-    t['container_type'] = container.container_type
-    t['container_publication_status'] = container.publication_status
+    t["container_issns"] = issns
+    t["container_type"] = container.container_type
+    t["container_publication_status"] = container.publication_status
     if container.extra:
         c_extra = container.extra
-        if c_extra.get('kbart') and release_year:
-            if check_kbart(release_year, c_extra['kbart'].get('jstor')):
-                t['in_jstor'] = True
-            if t.get('in_kbart') or t.get('in_jstor'):
-                t['in_kbart'] = True
-            for archive in ('portico', 'lockss', 'clockss', 'pkp_pln',
-                            'hathitrust', 'scholarsportal', 'cariniana'):
-                t['in_kbart'] = t.get('in_kbart') or check_kbart(release_year, c_extra['kbart'].get(archive))
+        if c_extra.get("kbart") and release_year:
+            if check_kbart(release_year, c_extra["kbart"].get("jstor")):
+                t["in_jstor"] = True
+            if t.get("in_kbart") or t.get("in_jstor"):
+                t["in_kbart"] = True
+            for archive in (
+                "portico",
+                "lockss",
+                "clockss",
+                "pkp_pln",
+                "hathitrust",
+                "scholarsportal",
+                "cariniana",
+            ):
+                t["in_kbart"] = t.get("in_kbart") or check_kbart(
+                    release_year, c_extra["kbart"].get(archive)
+                )
                 # recent KBART coverage is often not updated for the
                 # current year. So for current-year publications, consider
                 # coverage from *last* year to also be included in the
                 # Keeper
-                if not t.get('in_kbart') and release_year == this_year:
-                    t['in_kbart'] = check_kbart(this_year - 1, c_extra['kbart'].get(archive))
-
-        if c_extra.get('ia'):
-            if c_extra['ia'].get('sim') and release_year:
-                t['in_ia_sim'] = check_kbart(release_year, c_extra['ia']['sim'])
-            if c_extra['ia'].get('longtail_oa'):
-                t['is_longtail_oa'] = True
-        if c_extra.get('sherpa_romeo'):
-            if c_extra['sherpa_romeo'].get('color') == 'white':
-                t['is_oa'] = False
-        if c_extra.get('default_license') and c_extra.get('default_license').startswith('CC-'):
-            t['is_oa'] = True
-        if c_extra.get('doaj'):
-            if c_extra['doaj'].get('as_of'):
-                t['is_oa'] = True
-                t['in_doaj'] = True
-        if c_extra.get('road'):
-            if c_extra['road'].get('as_of'):
-                t['is_oa'] = True
-        if c_extra.get('szczepanski'):
-            if c_extra['szczepanski'].get('as_of'):
-                t['is_oa'] = True
-        if c_extra.get('country'):
-            t['country_code'] = c_extra['country']
-            t['country_code_upper'] = c_extra['country'].upper()
-        if c_extra.get('publisher_type'):
-            t['publisher_type'] = c_extra['publisher_type']
-        if c_extra.get('discipline'):
-            t['discipline'] = c_extra['discipline']
+                if not t.get("in_kbart") and release_year == this_year:
+                    t["in_kbart"] = check_kbart(this_year - 1, c_extra["kbart"].get(archive))
+
+        if c_extra.get("ia"):
+            if c_extra["ia"].get("sim") and release_year:
+                t["in_ia_sim"] = check_kbart(release_year, c_extra["ia"]["sim"])
+            if c_extra["ia"].get("longtail_oa"):
+                t["is_longtail_oa"] = True
+        if c_extra.get("sherpa_romeo"):
+            if c_extra["sherpa_romeo"].get("color") == "white":
+                t["is_oa"] = False
+        if c_extra.get("default_license") and c_extra.get("default_license").startswith("CC-"):
+            t["is_oa"] = True
+        if c_extra.get("doaj"):
+            if c_extra["doaj"].get("as_of"):
+                t["is_oa"] = True
+                t["in_doaj"] = True
+        if c_extra.get("road"):
+            if c_extra["road"].get("as_of"):
+                t["is_oa"] = True
+        if c_extra.get("szczepanski"):
+            if c_extra["szczepanski"].get("as_of"):
+                t["is_oa"] = True
+        if c_extra.get("country"):
+            t["country_code"] = c_extra["country"]
+            t["country_code_upper"] = c_extra["country"].upper()
+        if c_extra.get("publisher_type"):
+            t["publisher_type"] = c_extra["publisher_type"]
+        if c_extra.get("discipline"):
+            t["discipline"] = c_extra["discipline"]
     return t
 
+
 def _rte_content_helper(release: ReleaseEntity) -> dict:
     """
     File/FileSet/WebCapture sub-section of release_to_elasticsearch()
@@ -329,9 +356,9 @@ def _rte_content_helper(release: ReleaseEntity) -> dict:
     - any other URL
     """
     t = dict(
-        file_count = len(release.files or []),
-        fileset_count = len(release.filesets or []),
-        webcapture_count = len(release.webcaptures or []),
+        file_count=len(release.files or []),
+        fileset_count=len(release.filesets or []),
+        webcapture_count=len(release.webcaptures or []),
     )
 
     any_pdf_url = None
@@ -340,38 +367,42 @@ def _rte_content_helper(release: ReleaseEntity) -> dict:
     ia_pdf_url = None
 
     for f in release.files or []:
-        if f.extra and f.extra.get('shadows'):
-            t['in_shadows'] = True
-        is_pdf = 'pdf' in (f.mimetype or '')
-        for release_url in (f.urls or []):
+        if f.extra and f.extra.get("shadows"):
+            t["in_shadows"] = True
+        is_pdf = "pdf" in (f.mimetype or "")
+        for release_url in f.urls or []:
             # first generic flags
             t.update(_rte_url_helper(release_url))
 
             # then PDF specific stuff (for generating "best URL" fields)
-            if not f.mimetype and 'pdf' in release_url.url.lower():
+            if not f.mimetype and "pdf" in release_url.url.lower():
                 is_pdf = True
             if is_pdf:
                 any_pdf_url = release_url.url
-                if release_url.rel in ('webarchive', 'repository', 'repo'):
+                if release_url.rel in ("webarchive", "repository", "repo"):
                     good_pdf_url = release_url.url
-                if '//web.archive.org/' in release_url.url or '//archive.org/' in release_url.url:
+                if (
+                    "//web.archive.org/" in release_url.url
+                    or "//archive.org/" in release_url.url
+                ):
                     best_pdf_url = release_url.url
                     ia_pdf_url = release_url.url
 
     # here is where we bake-in PDF url priority; IA-specific
-    t['best_pdf_url'] = best_pdf_url or good_pdf_url or any_pdf_url
-    t['ia_pdf_url'] = ia_pdf_url
+    t["best_pdf_url"] = best_pdf_url or good_pdf_url or any_pdf_url
+    t["ia_pdf_url"] = ia_pdf_url
 
     for fs in release.filesets or []:
-        for url_obj in (fs.urls or []):
+        for url_obj in fs.urls or []:
             t.update(_rte_url_helper(url_obj))
 
     for wc in release.webcaptures or []:
-        for url_obj in (wc.archive_urls or []):
+        for url_obj in wc.archive_urls or []:
             t.update(_rte_url_helper(url_obj))
 
     return t
 
+
 def _rte_url_helper(url_obj) -> dict:
     """
     Takes a location URL ('url' and 'rel' keys) and returns generic preservation status.
@@ -382,17 +413,17 @@ def _rte_url_helper(url_obj) -> dict:
     these will be iteratively update() into the overal object.
     """
     t = dict()
-    if url_obj.rel in ('webarchive', 'repository', 'archive', 'repo'):
-        t['is_preserved'] = True
-    if '//web.archive.org/' in url_obj.url or '//archive.org/' in url_obj.url:
-        t['in_ia'] = True
-    if url_obj.url.lower().startswith('http') or url_obj.url.lower().startswith('ftp'):
-        t['in_web'] = True
-    if url_obj.rel in ('dweb', 'p2p', 'ipfs', 'dat', 'torrent'):
+    if url_obj.rel in ("webarchive", "repository", "archive", "repo"):
+        t["is_preserved"] = True
+    if "//web.archive.org/" in url_obj.url or "//archive.org/" in url_obj.url:
+        t["in_ia"] = True
+    if url_obj.url.lower().startswith("http") or url_obj.url.lower().startswith("ftp"):
+        t["in_web"] = True
+    if url_obj.rel in ("dweb", "p2p", "ipfs", "dat", "torrent"):
         # not sure what rel will be for this stuff
-        t['in_dweb'] = True
-    if '//www.jstor.org/' in url_obj.url:
-        t['in_jstor'] = True
+        t["in_dweb"] = True
+    if "//www.jstor.org/" in url_obj.url:
+        t["in_jstor"] = True
     return t
 
 
@@ -404,50 +435,59 @@ def container_to_elasticsearch(entity, force_bool=True, stats=None):
     Raises exception on error (never returns None)
     """
 
-    if entity.state in ('redirect', 'deleted'):
+    if entity.state in ("redirect", "deleted"):
         return dict(
-            ident = entity.ident,
-            state = entity.state,
+            ident=entity.ident,
+            state=entity.state,
         )
-    elif entity.state != 'active':
+    elif entity.state != "active":
         raise ValueError("Unhandled entity state: {}".format(entity.state))
 
     # First, the easy ones (direct copy)
     t = dict(
-        doc_index_ts=datetime.datetime.utcnow().isoformat()+"Z",
-        ident = entity.ident,
-        state = entity.state,
-        revision = entity.revision,
-
-        name = entity.name,
-        publisher = entity.publisher,
-        container_type = entity.container_type,
-        publication_status= entity.publication_status,
-        issnl = entity.issnl,
-        issne = entity.issne,
-        issnp = entity.issnp,
-        wikidata_qid = entity.wikidata_qid,
+        doc_index_ts=datetime.datetime.utcnow().isoformat() + "Z",
+        ident=entity.ident,
+        state=entity.state,
+        revision=entity.revision,
+        name=entity.name,
+        publisher=entity.publisher,
+        container_type=entity.container_type,
+        publication_status=entity.publication_status,
+        issnl=entity.issnl,
+        issne=entity.issne,
+        issnp=entity.issnp,
+        wikidata_qid=entity.wikidata_qid,
     )
 
     if not entity.extra:
         entity.extra = dict()
-    for key in ('country', 'languages', 'mimetypes', 'original_name',
-                'first_year', 'last_year', 'aliases', 'abbrev', 'region',
-                'discipline', 'publisher_type'):
+    for key in (
+        "country",
+        "languages",
+        "mimetypes",
+        "original_name",
+        "first_year",
+        "last_year",
+        "aliases",
+        "abbrev",
+        "region",
+        "discipline",
+        "publisher_type",
+    ):
         if entity.extra.get(key):
             t[key] = entity.extra[key]
 
-    if entity.extra.get('dblp') and entity.extra['dblp'].get('prefix'):
-        t['dblp_prefix'] = entity.extra['dblp']['prefix']
+    if entity.extra.get("dblp") and entity.extra["dblp"].get("prefix"):
+        t["dblp_prefix"] = entity.extra["dblp"]["prefix"]
 
-    if 'country' in t:
-        t['country_code'] = t.pop('country')
+    if "country" in t:
+        t["country_code"] = t.pop("country")
 
-    t['issns'] = [entity.issnl, entity.issne, entity.issnp]
-    for key in ('issnp', 'issne'):
+    t["issns"] = [entity.issnl, entity.issne, entity.issnp]
+    for key in ("issnp", "issne"):
         if entity.extra.get(key):
-            t['issns'].append(entity.extra[key])
-    t['issns'] = list(set([i for i in t['issns'] if i]))
+            t["issns"].append(entity.extra[key])
+    t["issns"] = list(set([i for i in t["issns"] if i]))
 
     in_doaj = None
     in_road = None
@@ -459,72 +499,72 @@ def container_to_elasticsearch(entity, force_bool=True, stats=None):
     keepers = []
 
     extra = entity.extra
-    if extra.get('doaj'):
-        if extra['doaj'].get('as_of'):
+    if extra.get("doaj"):
+        if extra["doaj"].get("as_of"):
             in_doaj = True
-    if extra.get('road'):
-        if extra['road'].get('as_of'):
+    if extra.get("road"):
+        if extra["road"].get("as_of"):
             in_road = True
-    if extra.get('szczepanski'):
-        if extra['szczepanski'].get('as_of'):
+    if extra.get("szczepanski"):
+        if extra["szczepanski"].get("as_of"):
             is_oa = True
-    if extra.get('default_license'):
-        if extra['default_license'].startswith('CC-'):
+    if extra.get("default_license"):
+        if extra["default_license"].startswith("CC-"):
             is_oa = True
-    t['sherpa_romeo_color'] = None
-    if extra.get('sherpa_romeo'):
-        t['sherpa_romeo_color'] = extra['sherpa_romeo'].get('color')
-        if extra['sherpa_romeo'].get('color') == 'white':
+    t["sherpa_romeo_color"] = None
+    if extra.get("sherpa_romeo"):
+        t["sherpa_romeo_color"] = extra["sherpa_romeo"].get("color")
+        if extra["sherpa_romeo"].get("color") == "white":
             is_oa = False
-    if extra.get('kbart'):
+    if extra.get("kbart"):
         any_kbart = True
-        if extra['kbart'].get('jstor'):
+        if extra["kbart"].get("jstor"):
             any_jstor = True
-        for k, v in extra['kbart'].items():
+        for k, v in extra["kbart"].items():
             if v and isinstance(v, dict):
                 keepers.append(k)
-    if extra.get('ia'):
-        if extra['ia'].get('sim'):
+    if extra.get("ia"):
+        if extra["ia"].get("sim"):
             any_ia_sim = True
-        if extra['ia'].get('longtail_oa'):
+        if extra["ia"].get("longtail_oa"):
             is_longtail_oa = True
-    t['is_superceded'] = bool(extra.get('superceded'))
+    t["is_superceded"] = bool(extra.get("superceded"))
 
-    t['keepers'] = keepers
-    t['in_doaj'] = bool(in_doaj)
-    t['in_road'] = bool(in_road)
-    t['any_kbart'] = bool(any_kbart)
+    t["keepers"] = keepers
+    t["in_doaj"] = bool(in_doaj)
+    t["in_road"] = bool(in_road)
+    t["any_kbart"] = bool(any_kbart)
     if force_bool:
-        t['is_oa'] = bool(in_doaj or in_road or is_oa)
-        t['is_longtail_oa'] = bool(is_longtail_oa)
-        t['any_jstor'] = bool(any_jstor)
-        t['any_ia_sim'] = bool(any_ia_sim)
+        t["is_oa"] = bool(in_doaj or in_road or is_oa)
+        t["is_longtail_oa"] = bool(is_longtail_oa)
+        t["any_jstor"] = bool(any_jstor)
+        t["any_ia_sim"] = bool(any_ia_sim)
     else:
-        t['is_oa'] = in_doaj or in_road or is_oa
-        t['is_longtail_oa'] = is_longtail_oa
-        t['any_jstor'] = any_jstor
-        t['any_ia_sim'] = any_ia_sim
+        t["is_oa"] = in_doaj or in_road or is_oa
+        t["is_longtail_oa"] = is_longtail_oa
+        t["any_jstor"] = any_jstor
+        t["any_ia_sim"] = any_ia_sim
 
     # mix in stats, if provided
     if stats:
-        t['releases_total'] = stats['total']
-        t['preservation_bright'] = stats['preservation']['bright']
-        t['preservation_dark'] = stats['preservation']['dark']
-        t['preservation_shadows_only'] = stats['preservation']['shadows_only']
-        t['preservation_none'] = stats['preservation']['none']
+        t["releases_total"] = stats["total"]
+        t["preservation_bright"] = stats["preservation"]["bright"]
+        t["preservation_dark"] = stats["preservation"]["dark"]
+        t["preservation_shadows_only"] = stats["preservation"]["shadows_only"]
+        t["preservation_none"] = stats["preservation"]["none"]
     return t
 
 
 def _type_of_edit(edit: EntityEdit) -> str:
     if edit.revision is None and edit.redirect_ident is None:
-        return 'delete'
+        return "delete"
     elif edit.redirect_ident:
         # redirect
-        return 'update'
+        return "update"
     elif edit.prev_revision is None and edit.redirect_ident is None and edit.revision:
-        return 'create'
+        return "create"
     else:
-        return 'update'
+        return "update"
 
 
 def changelog_to_elasticsearch(entity: ChangelogEntry) -> Dict[str, Any]:
@@ -536,7 +576,7 @@ def changelog_to_elasticsearch(entity: ChangelogEntry) -> Dict[str, Any]:
 
     editgroup = entity.editgroup
     t = dict(
-        doc_index_ts=datetime.datetime.utcnow().isoformat()+"Z",
+        doc_index_ts=datetime.datetime.utcnow().isoformat() + "Z",
         index=entity.index,
         editgroup_id=entity.editgroup_id,
         timestamp=entity.timestamp.isoformat(),
@@ -547,8 +587,8 @@ def changelog_to_elasticsearch(entity: ChangelogEntry) -> Dict[str, Any]:
     )
 
     extra = editgroup.extra or dict()
-    if extra.get('agent'):
-        t['agent'] = extra['agent']
+    if extra.get("agent"):
+        t["agent"] = extra["agent"]
 
     containers = [_type_of_edit(e) for e in editgroup.edits.containers]
     creators = [_type_of_edit(e) for e in editgroup.edits.creators]
@@ -558,27 +598,27 @@ def changelog_to_elasticsearch(entity: ChangelogEntry) -> Dict[str, Any]:
     releases = [_type_of_edit(e) for e in editgroup.edits.releases]
     works = [_type_of_edit(e) for e in editgroup.edits.works]
 
-    t['containers'] = len(containers)
-    t['new_containers'] = len([e for e in containers if e == 'create'])
-    t['creators'] = len(creators)
-    t['new_creators'] = len([e for e in creators if e == 'create'])
-    t['files'] = len(files)
-    t['new_files'] = len([e for e in files if e == 'create'])
-    t['filesets'] = len(filesets)
-    t['new_filesets'] = len([e for e in filesets if e == 'create'])
-    t['webcaptures'] = len(webcaptures)
-    t['new_webcaptures'] = len([e for e in webcaptures if e == 'create'])
-    t['releases'] = len(releases)
-    t['new_releases'] = len([e for e in releases if e == 'create'])
-    t['works'] = len(works)
-    t['new_works'] = len([e for e in works if e == 'create'])
+    t["containers"] = len(containers)
+    t["new_containers"] = len([e for e in containers if e == "create"])
+    t["creators"] = len(creators)
+    t["new_creators"] = len([e for e in creators if e == "create"])
+    t["files"] = len(files)
+    t["new_files"] = len([e for e in files if e == "create"])
+    t["filesets"] = len(filesets)
+    t["new_filesets"] = len([e for e in filesets if e == "create"])
+    t["webcaptures"] = len(webcaptures)
+    t["new_webcaptures"] = len([e for e in webcaptures if e == "create"])
+    t["releases"] = len(releases)
+    t["new_releases"] = len([e for e in releases if e == "create"])
+    t["works"] = len(works)
+    t["new_works"] = len([e for e in works if e == "create"])
 
     all_edits = containers + creators + files + filesets + webcaptures + releases + works
 
-    t['created'] = len([e for e in all_edits if e == 'create'])
-    t['updated'] = len([e for e in all_edits if e == 'update'])
-    t['deleted'] = len([e for e in all_edits if e == 'delete'])
-    t['total'] = len(all_edits)
+    t["created"] = len([e for e in all_edits if e == "create"])
+    t["updated"] = len([e for e in all_edits if e == "update"])
+    t["deleted"] = len([e for e in all_edits if e == "delete"])
+    t["total"] = len(all_edits)
     return t
 
 
@@ -590,47 +630,47 @@ def file_to_elasticsearch(entity: FileEntity) -> Dict[str, Any]:
     Raises exception on error (never returns None)
     """
 
-    if entity.state in ('redirect', 'deleted'):
+    if entity.state in ("redirect", "deleted"):
         return dict(
-            ident = entity.ident,
-            state = entity.state,
+            ident=entity.ident,
+            state=entity.state,
         )
-    elif entity.state != 'active':
+    elif entity.state != "active":
         raise ValueError("Unhandled entity state: {}".format(entity.state))
 
     # First, the easy ones (direct copy)
     t = dict(
-        doc_index_ts=datetime.datetime.utcnow().isoformat()+"Z",
-        ident = entity.ident,
-        state = entity.state,
-        revision = entity.revision,
-        release_ids = entity.release_ids,
-        release_count = len(entity.release_ids),
-        mimetype = entity.mimetype,
-        size_bytes = entity.size,
-        sha1 = entity.sha1,
-        sha256 = entity.sha256,
-        md5 = entity.md5,
+        doc_index_ts=datetime.datetime.utcnow().isoformat() + "Z",
+        ident=entity.ident,
+        state=entity.state,
+        revision=entity.revision,
+        release_ids=entity.release_ids,
+        release_count=len(entity.release_ids),
+        mimetype=entity.mimetype,
+        size_bytes=entity.size,
+        sha1=entity.sha1,
+        sha256=entity.sha256,
+        md5=entity.md5,
     )
 
     parsed_urls = [tldextract.extract(u.url) for u in entity.urls]
-    t['hosts'] = list(set(['.'.join([seg for seg in pu if seg]) for pu in parsed_urls]))
-    t['domains'] = list(set([pu.registered_domain for pu in parsed_urls]))
-    t['rels'] = list(set([u.rel for u in entity.urls]))
+    t["hosts"] = list(set([".".join([seg for seg in pu if seg]) for pu in parsed_urls]))
+    t["domains"] = list(set([pu.registered_domain for pu in parsed_urls]))
+    t["rels"] = list(set([u.rel for u in entity.urls]))
 
-    t['in_ia'] = bool('archive.org' in t['domains'])
-    t['in_ia_petabox'] = bool('archive.org' in t['hosts'])
+    t["in_ia"] = bool("archive.org" in t["domains"])
+    t["in_ia_petabox"] = bool("archive.org" in t["hosts"])
 
     any_url = None
     good_url = None
     best_url = None
-    for release_url in (entity.urls or []):
+    for release_url in entity.urls or []:
         any_url = release_url.url
-        if release_url.rel in ('webarchive', 'repository'):
+        if release_url.rel in ("webarchive", "repository"):
             good_url = release_url.url
-        if '//web.archive.org/' in release_url.url or '//archive.org/' in release_url.url:
+        if "//web.archive.org/" in release_url.url or "//archive.org/" in release_url.url:
             best_url = release_url.url
     # here is where we bake-in priority; IA-specific
-    t['best_url'] = best_url or good_url or any_url
+    t["best_url"] = best_url or good_url or any_url
 
     return t
diff --git a/python/fatcat_tools/transforms/ingest.py b/python/fatcat_tools/transforms/ingest.py
index 9101a4ec..30b5b190 100644
--- a/python/fatcat_tools/transforms/ingest.py
+++ b/python/fatcat_tools/transforms/ingest.py
@@ -1,4 +1,3 @@
-
 INGEST_TYPE_CONTAINER_MAP = {
     # Optica
     "twtpsm6ytje3nhuqfu3pa7ca7u": "html",
@@ -14,7 +13,8 @@ INGEST_TYPE_CONTAINER_MAP = {
     "lovwr7ladjagzkhmoaszg7efqu": "html",
 }
 
-def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=None):
+
+def release_ingest_request(release, ingest_request_source="fatcat", ingest_type=None):
     """
     Takes a full release entity object and returns an ingest request (as dict),
     or None if it seems like this release shouldn't be ingested.
@@ -27,27 +27,35 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=
     calling code should check the returned type field.
     """
 
-    if release.state != 'active':
+    if release.state != "active":
         return None
 
     if (not ingest_type) and release.container_id:
         ingest_type = INGEST_TYPE_CONTAINER_MAP.get(release.container_id)
 
     if not ingest_type:
-        if release.release_type == 'stub':
+        if release.release_type == "stub":
             return None
-        elif release.release_type in ['component', 'graphic']:
-            ingest_type = 'component'
-        elif release.release_type == 'dataset':
-            ingest_type = 'dataset'
-        elif release.release_type == 'software':
-            ingest_type = 'software'
-        elif release.release_type == 'post-weblog':
-            ingest_type = 'html'
-        elif release.release_type in ['article-journal', 'article', 'chapter', 'paper-conference', 'book', 'report', 'thesis']:
-            ingest_type = 'pdf'
+        elif release.release_type in ["component", "graphic"]:
+            ingest_type = "component"
+        elif release.release_type == "dataset":
+            ingest_type = "dataset"
+        elif release.release_type == "software":
+            ingest_type = "software"
+        elif release.release_type == "post-weblog":
+            ingest_type = "html"
+        elif release.release_type in [
+            "article-journal",
+            "article",
+            "chapter",
+            "paper-conference",
+            "book",
+            "report",
+            "thesis",
+        ]:
+            ingest_type = "pdf"
         else:
-            ingest_type = 'pdf'
+            ingest_type = "pdf"
 
     # generate a URL where we expect to find fulltext
     url = None
@@ -59,8 +67,10 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=
         link_source_id = release.ext_ids.arxiv
     elif release.ext_ids.pmcid and ingest_type == "pdf":
         # TODO: how to tell if an author manuscript in PMC vs. published?
-        #url = "https://www.ncbi.nlm.nih.gov/pmc/articles/{}/pdf/".format(release.ext_ids.pmcid)
-        url = "http://europepmc.org/backend/ptpmcrender.fcgi?accid={}&blobtype=pdf".format(release.ext_ids.pmcid)
+        # url = "https://www.ncbi.nlm.nih.gov/pmc/articles/{}/pdf/".format(release.ext_ids.pmcid)
+        url = "http://europepmc.org/backend/ptpmcrender.fcgi?accid={}&blobtype=pdf".format(
+            release.ext_ids.pmcid
+        )
         link_source = "pmc"
         link_source_id = release.ext_ids.pmcid
     elif release.ext_ids.doi:
@@ -75,19 +85,19 @@ def release_ingest_request(release, ingest_request_source='fatcat', ingest_type=
     ext_ids = dict([(k, v) for (k, v) in ext_ids.items() if v])
 
     ingest_request = {
-        'ingest_type': ingest_type,
-        'ingest_request_source': ingest_request_source,
-        'base_url': url,
-        'release_stage': release.release_stage,
-        'fatcat': {
-            'release_ident': release.ident,
-            'work_ident': release.work_id,
+        "ingest_type": ingest_type,
+        "ingest_request_source": ingest_request_source,
+        "base_url": url,
+        "release_stage": release.release_stage,
+        "fatcat": {
+            "release_ident": release.ident,
+            "work_ident": release.work_id,
         },
-        'ext_ids': ext_ids,
+        "ext_ids": ext_ids,
     }
 
     if link_source and link_source_id:
-        ingest_request['link_source'] = link_source
-        ingest_request['link_source_id'] = link_source_id
+        ingest_request["link_source"] = link_source
+        ingest_request["link_source_id"] = link_source_id
 
     return ingest_request
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index a61e364c..1e4cb41d 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -1,4 +1,3 @@
-
 import json
 import time
 
@@ -16,11 +15,9 @@ class ChangelogWorker(FatcatWorker):
     """
 
     def __init__(self, api, kafka_hosts, produce_topic, poll_interval=10.0, offset=None):
-        super().__init__(kafka_hosts=kafka_hosts,
-                         produce_topic=produce_topic,
-                         api=api)
+        super().__init__(kafka_hosts=kafka_hosts, produce_topic=produce_topic, api=api)
         self.poll_interval = poll_interval
-        self.offset = offset    # the fatcat changelog offset, not the kafka offset
+        self.offset = offset  # the fatcat changelog offset, not the kafka offset
 
     def run(self):
 
@@ -31,7 +28,7 @@ class ChangelogWorker(FatcatWorker):
             print("Checking for most recent changelog offset...")
             msg = most_recent_message(self.produce_topic, self.kafka_config)
             if msg:
-                self.offset = json.loads(msg.decode('utf-8'))['index']
+                self.offset = json.loads(msg.decode("utf-8"))["index"]
             else:
                 self.offset = 0
             print("Most recent changelog index in Kafka seems to be {}".format(self.offset))
@@ -44,28 +41,29 @@ class ChangelogWorker(FatcatWorker):
                 raise KafkaException(err)
 
         producer_conf = self.kafka_config.copy()
-        producer_conf.update({
-            'delivery.report.only.error': True,
-            'default.topic.config': {
-                'request.required.acks': -1, # all brokers must confirm
-            },
-        })
+        producer_conf.update(
+            {
+                "delivery.report.only.error": True,
+                "default.topic.config": {
+                    "request.required.acks": -1,  # all brokers must confirm
+                },
+            }
+        )
         producer = Producer(producer_conf)
 
         while True:
             latest = int(self.api.get_changelog(limit=1)[0].index)
             if latest > self.offset:
-                print("Fetching changelogs from {} through {}".format(
-                    self.offset+1, latest))
-            for i in range(self.offset+1, latest+1):
+                print("Fetching changelogs from {} through {}".format(self.offset + 1, latest))
+            for i in range(self.offset + 1, latest + 1):
                 cle = self.api.get_changelog_entry(i)
                 obj = self.api.api_client.sanitize_for_serialization(cle)
                 producer.produce(
                     self.produce_topic,
-                    json.dumps(obj).encode('utf-8'),
+                    json.dumps(obj).encode("utf-8"),
                     key=str(i),
                     on_delivery=fail_fast,
-                    #NOTE timestamp could be timestamp=cle.timestamp (?)
+                    # NOTE timestamp could be timestamp=cle.timestamp (?)
                 )
                 self.offset = i
             producer.flush()
@@ -79,12 +77,19 @@ class EntityUpdatesWorker(FatcatWorker):
     from API) to update topics.
     """
 
-    def __init__(self, api, kafka_hosts, consume_topic, release_topic,
-            file_topic, container_topic, ingest_file_request_topic,
-            work_ident_topic, poll_interval=5.0):
-        super().__init__(kafka_hosts=kafka_hosts,
-                         consume_topic=consume_topic,
-                         api=api)
+    def __init__(
+        self,
+        api,
+        kafka_hosts,
+        consume_topic,
+        release_topic,
+        file_topic,
+        container_topic,
+        ingest_file_request_topic,
+        work_ident_topic,
+        poll_interval=5.0,
+    ):
+        super().__init__(kafka_hosts=kafka_hosts, consume_topic=consume_topic, api=api)
         self.release_topic = release_topic
         self.file_topic = file_topic
         self.container_topic = container_topic
@@ -150,7 +155,7 @@ class EntityUpdatesWorker(FatcatWorker):
             # Transactions of the Japan Society of Mechanical Engineers
             "10.1299/kikai",
             # protocols.io
-            "10.17504/"
+            "10.17504/",
         ]
 
     def want_live_ingest(self, release, ingest_request):
@@ -163,40 +168,40 @@ class EntityUpdatesWorker(FatcatWorker):
         ingest crawling (via wayback SPN).
         """
 
-        link_source = ingest_request.get('ingest_request')
-        ingest_type = ingest_request.get('ingest_type')
-        doi = ingest_request.get('ext_ids', {}).get('doi')
+        link_source = ingest_request.get("ingest_request")
+        ingest_type = ingest_request.get("ingest_type")
+        doi = ingest_request.get("ext_ids", {}).get("doi")
         es = release_to_elasticsearch(release)
 
         is_document = release.release_type in (
-            'article',
-            'article-journal',
-            'article-newspaper',
-            'book',
-            'chapter',
-            'editorial',
-            'interview',
-            'legal_case',
-            'legislation',
-            'letter',
-            'manuscript',
-            'paper-conference',
-            'patent',
-            'peer_review',
-            'post',
-            'report',
-            'retraction',
-            'review',
-            'review-book',
-            'thesis',
+            "article",
+            "article-journal",
+            "article-newspaper",
+            "book",
+            "chapter",
+            "editorial",
+            "interview",
+            "legal_case",
+            "legislation",
+            "letter",
+            "manuscript",
+            "paper-conference",
+            "patent",
+            "peer_review",
+            "post",
+            "report",
+            "retraction",
+            "review",
+            "review-book",
+            "thesis",
         )
         is_not_pdf = release.release_type in (
-            'component',
-            'dataset',
-            'figure',
-            'graphic',
-            'software',
-            'stub',
+            "component",
+            "dataset",
+            "figure",
+            "graphic",
+            "software",
+            "stub",
         )
 
         # accept list sets a default "crawl it" despite OA metadata for
@@ -207,19 +212,23 @@ class EntityUpdatesWorker(FatcatWorker):
                 if doi.startswith(prefix):
                     in_acceptlist = True
 
-        if self.ingest_oa_only and link_source not in ('arxiv', 'pmc'):
+        if self.ingest_oa_only and link_source not in ("arxiv", "pmc"):
 
             # most datacite documents are in IRs and should be crawled
             is_datacite_doc = False
-            if release.extra and ('datacite' in release.extra) and is_document:
+            if release.extra and ("datacite" in release.extra) and is_document:
                 is_datacite_doc = True
-            if not (es['is_oa'] or in_acceptlist or is_datacite_doc):
+            if not (es["is_oa"] or in_acceptlist or is_datacite_doc):
                 return False
 
         # big publishers *generally* have accurate OA metadata, use
         # preservation networks, and block our crawlers. So unless OA, or
         # explicitly on accept list, or not preserved, skip crawling
-        if es.get('publisher_type') == 'big5' and es.get('is_preserved') and not (es['is_oa'] or in_acceptlist):
+        if (
+            es.get("publisher_type") == "big5"
+            and es.get("is_preserved")
+            and not (es["is_oa"] or in_acceptlist)
+        ):
             return False
 
         # if ingest_type is pdf but release_type is almost certainly not a PDF,
@@ -233,23 +242,24 @@ class EntityUpdatesWorker(FatcatWorker):
                     return False
 
         # figshare
-        if doi and (doi.startswith('10.6084/') or doi.startswith('10.25384/')):
+        if doi and (doi.startswith("10.6084/") or doi.startswith("10.25384/")):
             # don't crawl "most recent version" (aka "group") DOIs
             if not release.version:
                 return False
 
         # zenodo
-        if doi and doi.startswith('10.5281/'):
+        if doi and doi.startswith("10.5281/"):
             # if this is a "grouping" DOI of multiple "version" DOIs, do not crawl (will crawl the versioned DOIs)
-            if release.extra and release.extra.get('relations'):
-                for rel in release.extra['relations']:
-                    if (rel.get('relationType') == 'HasVersion' and rel.get('relatedIdentifier', '').startswith('10.5281/')):
+            if release.extra and release.extra.get("relations"):
+                for rel in release.extra["relations"]:
+                    if rel.get("relationType") == "HasVersion" and rel.get(
+                        "relatedIdentifier", ""
+                    ).startswith("10.5281/"):
                         return False
 
         return True
 
     def run(self):
-
         def fail_fast(err, msg):
             if err is not None:
                 print("Kafka producer delivery error: {}".format(err))
@@ -278,36 +288,40 @@ class EntityUpdatesWorker(FatcatWorker):
             for p in partitions:
                 if p.error:
                     raise KafkaException(p.error)
-            print("Kafka partitions rebalanced: {} / {}".format(
-                consumer, partitions))
+            print("Kafka partitions rebalanced: {} / {}".format(consumer, partitions))
 
         consumer_conf = self.kafka_config.copy()
-        consumer_conf.update({
-            'group.id': self.consumer_group,
-            'on_commit': fail_fast,
-            # messages don't have offset marked as stored until pushed to
-            # elastic, but we do auto-commit stored offsets to broker
-            'enable.auto.commit': True,
-            'enable.auto.offset.store': False,
-            # user code timeout; if no poll after this long, assume user code
-            # hung and rebalance (default: 5min)
-            'max.poll.interval.ms': 180000,
-            'default.topic.config': {
-                'auto.offset.reset': 'latest',
-            },
-        })
+        consumer_conf.update(
+            {
+                "group.id": self.consumer_group,
+                "on_commit": fail_fast,
+                # messages don't have offset marked as stored until pushed to
+                # elastic, but we do auto-commit stored offsets to broker
+                "enable.auto.commit": True,
+                "enable.auto.offset.store": False,
+                # user code timeout; if no poll after this long, assume user code
+                # hung and rebalance (default: 5min)
+                "max.poll.interval.ms": 180000,
+                "default.topic.config": {
+                    "auto.offset.reset": "latest",
+                },
+            }
+        )
         consumer = Consumer(consumer_conf)
 
         producer_conf = self.kafka_config.copy()
-        producer_conf.update({
-            'delivery.report.only.error': True,
-            'default.topic.config': {
-                'request.required.acks': -1, # all brokers must confirm
-            },
-        })
+        producer_conf.update(
+            {
+                "delivery.report.only.error": True,
+                "default.topic.config": {
+                    "request.required.acks": -1,  # all brokers must confirm
+                },
+            }
+        )
         producer = Producer(producer_conf)
 
-        consumer.subscribe([self.consume_topic],
+        consumer.subscribe(
+            [self.consume_topic],
             on_assign=on_rebalance,
             on_revoke=on_rebalance,
         )
@@ -316,14 +330,16 @@ class EntityUpdatesWorker(FatcatWorker):
         while True:
             msg = consumer.poll(self.poll_interval)
             if not msg:
-                print("nothing new from kafka (poll_interval: {} sec)".format(self.poll_interval))
+                print(
+                    "nothing new from kafka (poll_interval: {} sec)".format(self.poll_interval)
+                )
                 continue
             if msg.error():
                 raise KafkaException(msg.error())
 
-            cle = json.loads(msg.value().decode('utf-8'))
-            #print(cle)
-            print("processing changelog index {}".format(cle['index']))
+            cle = json.loads(msg.value().decode("utf-8"))
+            # print(cle)
+            print("processing changelog index {}".format(cle["index"]))
             release_ids = []
             new_release_ids = []
             file_ids = []
@@ -331,27 +347,27 @@ class EntityUpdatesWorker(FatcatWorker):
             webcapture_ids = []
             container_ids = []
             work_ids = []
-            release_edits = cle['editgroup']['edits']['releases']
+            release_edits = cle["editgroup"]["edits"]["releases"]
             for re in release_edits:
-                release_ids.append(re['ident'])
+                release_ids.append(re["ident"])
                 # filter to direct release edits which are not updates
-                if not re.get('prev_revision') and not re.get('redirect_ident'):
-                    new_release_ids.append(re['ident'])
-            file_edits = cle['editgroup']['edits']['files']
+                if not re.get("prev_revision") and not re.get("redirect_ident"):
+                    new_release_ids.append(re["ident"])
+            file_edits = cle["editgroup"]["edits"]["files"]
             for e in file_edits:
-                file_ids.append(e['ident'])
-            fileset_edits = cle['editgroup']['edits']['filesets']
+                file_ids.append(e["ident"])
+            fileset_edits = cle["editgroup"]["edits"]["filesets"]
             for e in fileset_edits:
-                fileset_ids.append(e['ident'])
-            webcapture_edits = cle['editgroup']['edits']['webcaptures']
+                fileset_ids.append(e["ident"])
+            webcapture_edits = cle["editgroup"]["edits"]["webcaptures"]
             for e in webcapture_edits:
-                webcapture_ids.append(e['ident'])
-            container_edits = cle['editgroup']['edits']['containers']
+                webcapture_ids.append(e["ident"])
+            container_edits = cle["editgroup"]["edits"]["containers"]
             for e in container_edits:
-                container_ids.append(e['ident'])
-            work_edits = cle['editgroup']['edits']['works']
+                container_ids.append(e["ident"])
+            work_edits = cle["editgroup"]["edits"]["works"]
             for e in work_edits:
-                work_ids.append(e['ident'])
+                work_ids.append(e["ident"])
 
             # TODO: do these fetches in parallel using a thread pool?
             for ident in set(file_ids):
@@ -363,8 +379,8 @@ class EntityUpdatesWorker(FatcatWorker):
                 file_dict = self.api.api_client.sanitize_for_serialization(file_entity)
                 producer.produce(
                     self.file_topic,
-                    json.dumps(file_dict).encode('utf-8'),
-                    key=ident.encode('utf-8'),
+                    json.dumps(file_dict).encode("utf-8"),
+                    key=ident.encode("utf-8"),
                     on_delivery=fail_fast,
                 )
 
@@ -385,30 +401,34 @@ class EntityUpdatesWorker(FatcatWorker):
                 container_dict = self.api.api_client.sanitize_for_serialization(container)
                 producer.produce(
                     self.container_topic,
-                    json.dumps(container_dict).encode('utf-8'),
-                    key=ident.encode('utf-8'),
+                    json.dumps(container_dict).encode("utf-8"),
+                    key=ident.encode("utf-8"),
                     on_delivery=fail_fast,
                 )
 
             for ident in set(release_ids):
-                release = self.api.get_release(ident, expand="files,filesets,webcaptures,container")
+                release = self.api.get_release(
+                    ident, expand="files,filesets,webcaptures,container"
+                )
                 if release.work_id:
                     work_ids.append(release.work_id)
                 release_dict = self.api.api_client.sanitize_for_serialization(release)
                 producer.produce(
                     self.release_topic,
-                    json.dumps(release_dict).encode('utf-8'),
-                    key=ident.encode('utf-8'),
+                    json.dumps(release_dict).encode("utf-8"),
+                    key=ident.encode("utf-8"),
                     on_delivery=fail_fast,
                 )
                 # for ingest requests, filter to "new" active releases with no matched files
                 if release.ident in new_release_ids:
-                    ir = release_ingest_request(release, ingest_request_source='fatcat-changelog')
+                    ir = release_ingest_request(
+                        release, ingest_request_source="fatcat-changelog"
+                    )
                     if ir and not release.files and self.want_live_ingest(release, ir):
                         producer.produce(
                             self.ingest_file_request_topic,
-                            json.dumps(ir).encode('utf-8'),
-                            #key=None,
+                            json.dumps(ir).encode("utf-8"),
+                            # key=None,
                             on_delivery=fail_fast,
                         )
 
@@ -420,13 +440,13 @@ class EntityUpdatesWorker(FatcatWorker):
                     key=key,
                     type="fatcat_work",
                     work_ident=ident,
-                    updated=cle['timestamp'],
-                    fatcat_changelog_index=cle['index'],
+                    updated=cle["timestamp"],
+                    fatcat_changelog_index=cle["index"],
                 )
                 producer.produce(
                     self.work_ident_topic,
-                    json.dumps(work_ident_dict).encode('utf-8'),
-                    key=key.encode('utf-8'),
+                    json.dumps(work_ident_dict).encode("utf-8"),
+                    key=key.encode("utf-8"),
                     on_delivery=fail_fast,
                 )
 
diff --git a/python/fatcat_tools/workers/elasticsearch.py b/python/fatcat_tools/workers/elasticsearch.py
index f411073d..0d75f964 100644
--- a/python/fatcat_tools/workers/elasticsearch.py
+++ b/python/fatcat_tools/workers/elasticsearch.py
@@ -1,4 +1,3 @@
-
 import json
 import sys
 
@@ -26,12 +25,20 @@ class ElasticsearchReleaseWorker(FatcatWorker):
     Uses a consumer group to manage offset.
     """
 
-    def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None,
-            elasticsearch_backend="http://localhost:9200", elasticsearch_index="fatcat",
-            elasticsearch_release_index="fatcat_releases",
-            batch_size=200, api_host="https://api.fatcat.wiki/v0", query_stats=False):
-        super().__init__(kafka_hosts=kafka_hosts,
-                         consume_topic=consume_topic)
+    def __init__(
+        self,
+        kafka_hosts,
+        consume_topic,
+        poll_interval=10.0,
+        offset=None,
+        elasticsearch_backend="http://localhost:9200",
+        elasticsearch_index="fatcat",
+        elasticsearch_release_index="fatcat_releases",
+        batch_size=200,
+        api_host="https://api.fatcat.wiki/v0",
+        query_stats=False,
+    ):
+        super().__init__(kafka_hosts=kafka_hosts, consume_topic=consume_topic)
         self.consumer_group = "elasticsearch-updates3"
         self.batch_size = batch_size
         self.poll_interval = poll_interval
@@ -63,45 +70,53 @@ class ElasticsearchReleaseWorker(FatcatWorker):
                     print("Bailing out...", file=sys.stderr)
                     # TODO: should it be sys.exit(-1)?
                     raise KafkaException(p.error)
-            #print("Kafka consumer commit successful")
+            # print("Kafka consumer commit successful")
             pass
 
         def on_rebalance(consumer, partitions):
             for p in partitions:
                 if p.error:
                     raise KafkaException(p.error)
-            print("Kafka partitions rebalanced: {} / {}".format(
-                consumer, partitions), file=sys.stderr)
+            print(
+                "Kafka partitions rebalanced: {} / {}".format(consumer, partitions),
+                file=sys.stderr,
+            )
 
         consumer_conf = self.kafka_config.copy()
-        consumer_conf.update({
-            'group.id': self.consumer_group,
-            'on_commit': fail_fast,
-            # messages don't have offset marked as stored until pushed to
-            # elastic, but we do auto-commit stored offsets to broker
-            'enable.auto.commit': True,
-            'enable.auto.offset.store': False,
-            # user code timeout; if no poll after this long, assume user code
-            # hung and rebalance (default: 5min)
-            'max.poll.interval.ms': 60000,
-            'default.topic.config': {
-                'auto.offset.reset': 'latest',
-            },
-        })
+        consumer_conf.update(
+            {
+                "group.id": self.consumer_group,
+                "on_commit": fail_fast,
+                # messages don't have offset marked as stored until pushed to
+                # elastic, but we do auto-commit stored offsets to broker
+                "enable.auto.commit": True,
+                "enable.auto.offset.store": False,
+                # user code timeout; if no poll after this long, assume user code
+                # hung and rebalance (default: 5min)
+                "max.poll.interval.ms": 60000,
+                "default.topic.config": {
+                    "auto.offset.reset": "latest",
+                },
+            }
+        )
         consumer = Consumer(consumer_conf)
-        consumer.subscribe([self.consume_topic],
+        consumer.subscribe(
+            [self.consume_topic],
             on_assign=on_rebalance,
             on_revoke=on_rebalance,
         )
 
         while True:
-            batch = consumer.consume(
-                num_messages=self.batch_size,
-                timeout=self.poll_interval)
+            batch = consumer.consume(num_messages=self.batch_size, timeout=self.poll_interval)
             if not batch:
                 if not consumer.assignment():
                     print("... no Kafka consumer partitions assigned yet", file=sys.stderr)
-                print("... nothing new from kafka, try again (interval: {}".format(self.poll_interval), file=sys.stderr)
+                print(
+                    "... nothing new from kafka, try again (interval: {}".format(
+                        self.poll_interval
+                    ),
+                    file=sys.stderr,
+                )
                 continue
             print("... got {} kafka messages".format(len(batch)), file=sys.stderr)
             # first check errors on entire batch...
@@ -111,19 +126,24 @@ class ElasticsearchReleaseWorker(FatcatWorker):
             # ... then process
             bulk_actions = []
             for msg in batch:
-                json_str = msg.value().decode('utf-8')
+                json_str = msg.value().decode("utf-8")
                 entity = entity_from_json(json_str, self.entity_type, api_client=ac)
                 assert isinstance(entity, self.entity_type)
                 if self.entity_type == ChangelogEntry:
                     key = entity.index
                     # might need to fetch from API
-                    if not (entity.editgroup and entity.editgroup.editor): # pylint: disable=no-member # (TODO)
+                    if not (
+                        entity.editgroup and entity.editgroup.editor
+                    ):  # pylint: disable=no-member # (TODO)
                         entity = api.get_changelog_entry(entity.index)
                 else:
                     key = entity.ident  # pylint: disable=no-member # (TODO)
 
-                if self.entity_type != ChangelogEntry and entity.state == 'wip':
-                    print(f"WARNING: skipping state=wip entity: {self.entity_type.__name__} {entity.ident}", file=sys.stderr)
+                if self.entity_type != ChangelogEntry and entity.state == "wip":
+                    print(
+                        f"WARNING: skipping state=wip entity: {self.entity_type.__name__} {entity.ident}",
+                        file=sys.stderr,
+                    )
                     continue
 
                 if self.entity_type == ContainerEntity and self.query_stats:
@@ -138,9 +158,15 @@ class ElasticsearchReleaseWorker(FatcatWorker):
                     doc_dict = self.transform_func(entity)
 
                 # TODO: handle deletions from index
-                bulk_actions.append(json.dumps({
-                    "index": { "_id": key, },
-                }))
+                bulk_actions.append(
+                    json.dumps(
+                        {
+                            "index": {
+                                "_id": key,
+                            },
+                        }
+                    )
+                )
                 bulk_actions.append(json.dumps(doc_dict))
 
             # if only WIP entities, then skip
@@ -149,15 +175,22 @@ class ElasticsearchReleaseWorker(FatcatWorker):
                     consumer.store_offsets(message=msg)
                 continue
 
-            print("Upserting, eg, {} (of {} {} in elasticsearch)".format(key, len(batch), self.entity_type.__name__), file=sys.stderr)
+            print(
+                "Upserting, eg, {} (of {} {} in elasticsearch)".format(
+                    key, len(batch), self.entity_type.__name__
+                ),
+                file=sys.stderr,
+            )
             elasticsearch_endpoint = "{}/{}/_bulk".format(
-                self.elasticsearch_backend,
-                self.elasticsearch_index)
-            resp = requests.post(elasticsearch_endpoint,
+                self.elasticsearch_backend, self.elasticsearch_index
+            )
+            resp = requests.post(
+                elasticsearch_endpoint,
                 headers={"Content-Type": "application/x-ndjson"},
-                data="\n".join(bulk_actions) + "\n")
+                data="\n".join(bulk_actions) + "\n",
+            )
             resp.raise_for_status()
-            if resp.json()['errors']:
+            if resp.json()["errors"]:
                 desc = "Elasticsearch errors from post to {}:".format(elasticsearch_endpoint)
                 print(desc, file=sys.stderr)
                 print(resp.content, file=sys.stderr)
@@ -169,20 +202,29 @@ class ElasticsearchReleaseWorker(FatcatWorker):
 
 
 class ElasticsearchContainerWorker(ElasticsearchReleaseWorker):
-
-    def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None,
-            query_stats=False, elasticsearch_release_index="fatcat_release",
-            elasticsearch_backend="http://localhost:9200", elasticsearch_index="fatcat",
-            batch_size=200):
-        super().__init__(kafka_hosts=kafka_hosts,
-                         consume_topic=consume_topic,
-                         poll_interval=poll_interval,
-                         offset=offset,
-                         elasticsearch_backend=elasticsearch_backend,
-                         elasticsearch_index=elasticsearch_index,
-                         elasticsearch_release_index=elasticsearch_release_index,
-                         query_stats=query_stats,
-                         batch_size=batch_size)
+    def __init__(
+        self,
+        kafka_hosts,
+        consume_topic,
+        poll_interval=10.0,
+        offset=None,
+        query_stats=False,
+        elasticsearch_release_index="fatcat_release",
+        elasticsearch_backend="http://localhost:9200",
+        elasticsearch_index="fatcat",
+        batch_size=200,
+    ):
+        super().__init__(
+            kafka_hosts=kafka_hosts,
+            consume_topic=consume_topic,
+            poll_interval=poll_interval,
+            offset=offset,
+            elasticsearch_backend=elasticsearch_backend,
+            elasticsearch_index=elasticsearch_index,
+            elasticsearch_release_index=elasticsearch_release_index,
+            query_stats=query_stats,
+            batch_size=batch_size,
+        )
         # previous group got corrupted (by pykafka library?)
         self.consumer_group = "elasticsearch-updates3"
         self.entity_type = ContainerEntity
@@ -196,11 +238,18 @@ class ElasticsearchChangelogWorker(ElasticsearchReleaseWorker):
     Note: Very early versions of changelog entries did not contain details
     about the editor or extra fields.
     """
-    def __init__(self, kafka_hosts, consume_topic, poll_interval=10.0, offset=None,
-            elasticsearch_backend="http://localhost:9200", elasticsearch_index="fatcat_changelog",
-            batch_size=200):
-        super().__init__(kafka_hosts=kafka_hosts,
-                         consume_topic=consume_topic)
+
+    def __init__(
+        self,
+        kafka_hosts,
+        consume_topic,
+        poll_interval=10.0,
+        offset=None,
+        elasticsearch_backend="http://localhost:9200",
+        elasticsearch_index="fatcat_changelog",
+        batch_size=200,
+    ):
+        super().__init__(kafka_hosts=kafka_hosts, consume_topic=consume_topic)
         self.consumer_group = "elasticsearch-updates3"
         self.batch_size = batch_size
         self.poll_interval = poll_interval
diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py
index 8c2936be..baec44f4 100644
--- a/python/fatcat_tools/workers/worker_common.py
+++ b/python/fatcat_tools/workers/worker_common.py
@@ -1,4 +1,3 @@
-
 from confluent_kafka import Consumer, KafkaException, TopicPartition
 
 
@@ -13,22 +12,21 @@ def most_recent_message(topic, kafka_config):
     print("Fetching most Kafka message from {}".format(topic))
 
     conf = kafka_config.copy()
-    conf.update({
-        'group.id': 'worker-init-last-msg', # should never commit
-        'delivery.report.only.error': True,
-        'enable.auto.commit': False,
-        'default.topic.config': {
-            'request.required.acks': -1,
-            'auto.offset.reset': 'latest',
-        },
-    })
+    conf.update(
+        {
+            "group.id": "worker-init-last-msg",  # should never commit
+            "delivery.report.only.error": True,
+            "enable.auto.commit": False,
+            "default.topic.config": {
+                "request.required.acks": -1,
+                "auto.offset.reset": "latest",
+            },
+        }
+    )
 
     consumer = Consumer(conf)
 
-    hwm = consumer.get_watermark_offsets(
-        TopicPartition(topic, 0),
-        timeout=5.0,
-        cached=False)
+    hwm = consumer.get_watermark_offsets(TopicPartition(topic, 0), timeout=5.0, cached=False)
     if not hwm:
         raise Exception("Kafka consumer timeout, or topic {} doesn't exist".format(topic))
     print("High watermarks: {}".format(hwm))
@@ -37,7 +35,7 @@ def most_recent_message(topic, kafka_config):
         print("topic is new; not 'most recent message'")
         return None
 
-    consumer.assign([TopicPartition(topic, 0, hwm[1]-1)])
+    consumer.assign([TopicPartition(topic, 0, hwm[1] - 1)])
     msg = consumer.poll(2.0)
     consumer.close()
     if not msg:
@@ -56,8 +54,8 @@ class FatcatWorker:
         if api:
             self.api = api
         self.kafka_config = {
-            'bootstrap.servers': kafka_hosts,
-            'message.max.bytes': 20000000, # ~20 MBytes; broker-side max is ~50 MBytes
+            "bootstrap.servers": kafka_hosts,
+            "message.max.bytes": 20000000,  # ~20 MBytes; broker-side max is ~50 MBytes
         }
         self.produce_topic = produce_topic
         self.consume_topic = consume_topic