typing: initial annotations on importers

This commit just adds the type annotations, doesn't do fixes to code to make type checking pass.
author: Bryan Newbold <bnewbold@robocracy.org> 2021-11-03 12:57:32 -0700
committer: Bryan Newbold <bnewbold@robocracy.org> 2021-11-03 16:46:07 -0700
commit: caf1cb316ed18820f3239a285ef14bf45ef963a2 (patch)
tree: 2d3713773dac769878154f61c2eb9f7804f1a60c /python/fatcat_tools/importers
parent: 10a2374051568edf3d872988e730328d899a0fdd (diff)
download: fatcat-caf1cb316ed18820f3239a285ef14bf45ef963a2.tar.gz
fatcat-caf1cb316ed18820f3239a285ef14bf45ef963a2.zip
22 files changed, 443 insertions, 274 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py
index ae4f9049..2fb7be55 100644
--- a/python/fatcat_tools/importers/arabesque.py
+++ b/python/fatcat_tools/importers/arabesque.py
@@ -1,4 +1,7 @@
+from typing import Any, Dict, List, Optional
+
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, FileEntity
 
 from .common import SANE_MAX_RELEASES, SANE_MAX_URLS, EntityImporter, b32_hex, make_rel_url
 
@@ -36,7 +39,9 @@ class ArabesqueMatchImporter(EntityImporter):
     - a mode to insert bare files even if identifier not known?
     """
 
-    def __init__(self, api, extid_type, require_grobid=True, **kwargs):
+    def __init__(
+        self, api: ApiClient, extid_type: str, require_grobid: bool = True, **kwargs
+    ) -> None:
 
         eg_desc = (
             kwargs.get("editgroup_description", None)
@@ -59,7 +64,7 @@ class ArabesqueMatchImporter(EntityImporter):
         else:
             print("NOT checking GROBID status column")
 
-    def want(self, row):
+    def want(self, row: Any) -> bool:
         if self.require_grobid and not row["postproc_status"] == "200":
             return False
         if (
@@ -76,7 +81,7 @@ class ArabesqueMatchImporter(EntityImporter):
         else:
             return False
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> Optional[FileEntity]:
 
         extid = row["identifier"].strip()
 
@@ -131,7 +136,7 @@ class ArabesqueMatchImporter(EntityImporter):
         )
         return fe
 
-    def try_update(self, fe):
+    def try_update(self, fe: FileEntity) -> bool:
         # lookup sha1, or create new entity
         existing = None
         try:
@@ -182,7 +187,7 @@ class ArabesqueMatchImporter(EntityImporter):
         self.counts["update"] += 1
         return False
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[FileEntity]) -> None:
         self.api.create_file_auto_batch(
             fatcat_openapi_client.FileAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/arxiv.py b/python/fatcat_tools/importers/arxiv.py
index 0957db2c..1d50dd9a 100644
--- a/python/fatcat_tools/importers/arxiv.py
+++ b/python/fatcat_tools/importers/arxiv.py
@@ -2,9 +2,11 @@ import datetime
 import json
 import re
 import sys
+from typing import Any, Dict, List, Optional
 
 import fatcat_openapi_client
 from bs4 import BeautifulSoup
+from fatcat_openapi_client import ApiClient, ReleaseEntity
 from pylatexenc.latex2text import LatexNodes2Text
 
 from .common import EntityImporter
@@ -13,7 +15,7 @@ from .crossref import lookup_license_slug
 latex2text = LatexNodes2Text()
 
 
-def latex_to_text(raw):
+def latex_to_text(raw: str) -> str:
     try:
         return latex2text.latex_to_text(raw).strip()
     except AttributeError:
@@ -22,7 +24,7 @@ def latex_to_text(raw):
         return raw.strip()
 
 
-def parse_arxiv_authors(raw):
+def parse_arxiv_authors(raw: str) -> List[str]:
     if not raw:
         return []
     raw = raw.replace("*", "")
@@ -41,7 +43,7 @@ def parse_arxiv_authors(raw):
     return authors
 
 
-def test_parse_arxiv_authors():
+def test_parse_arxiv_authors() -> None:
 
     assert parse_arxiv_authors(
         "Raphael Chetrite, Shamik Gupta, Izaak Neri and \\'Edgar Rold\\'an"
@@ -88,7 +90,7 @@ class ArxivRawImporter(EntityImporter):
           the "most recent" version; can be a simple sort?
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description",
@@ -107,15 +109,17 @@ class ArxivRawImporter(EntityImporter):
         )
         self._test_override = False
 
-    def parse_record(self, record):
+    # TODO: record is really a beautiful soup element, but setting to 'Any' to
+    # make initial type annotations simple
+    def parse_record(self, record: Any) -> Optional[List[ReleaseEntity]]:
 
         if not record:
             return None
         metadata = record.arXivRaw
         if not metadata:
             return None
-        extra = dict()
-        extra_arxiv = dict()
+        extra: Dict[str, Any] = dict()
+        extra_arxiv: Dict[str, Any] = dict()
 
         # don't know!
         release_type = "article"
@@ -134,7 +138,7 @@ class ArxivRawImporter(EntityImporter):
             for i, a in enumerate(authors)
         ]
 
-        lang = "en"  # the vast majority in english
+        lang: Optional[str] = "en"  # the vast majority in english
         if metadata.comments and metadata.comments.get_text():
             comments = metadata.comments.get_text().replace("\n", " ").strip()
             extra_arxiv["comments"] = comments
@@ -229,7 +233,7 @@ class ArxivRawImporter(EntityImporter):
             ).date()
             # TODO: source_type?
             versions.append(
-                fatcat_openapi_client.ReleaseEntity(
+                ReleaseEntity(
                     work_id=None,
                     title=title,
                     # original_title
@@ -261,7 +265,7 @@ class ArxivRawImporter(EntityImporter):
                 versions[-1].release_stage = "accepted"
         return versions
 
-    def try_update(self, versions):
+    def try_update(self, versions: List[ReleaseEntity]) -> bool:
         """
         This is pretty complex! There is no batch/bezerk mode for arxiv importer.
 
@@ -344,7 +348,7 @@ class ArxivRawImporter(EntityImporter):
 
         return False
 
-    def insert_batch(self, batch_batch):
+    def insert_batch(self, batch_batch: List[ReleaseEntity]) -> None:
         # there is no batch/bezerk mode for arxiv importer, except for testing
         if self._test_override:
             for batch in batch_batch:
@@ -360,7 +364,7 @@ class ArxivRawImporter(EntityImporter):
         else:
             raise NotImplementedError()
 
-    def parse_file(self, handle):
+    def parse_file(self, handle: Any) -> None:
 
         # 1. open with beautiful soup
         soup = BeautifulSoup(handle, "xml")
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py
index e9de42fc..b88117e0 100755
--- a/python/fatcat_tools/importers/cdl_dash_dat.py
+++ b/python/fatcat_tools/importers/cdl_dash_dat.py
@@ -7,10 +7,13 @@ import os
 import subprocess
 import sys
 import urllib
+import urllib.parse
+from typing import Any, Dict, List, Optional, Tuple
 
 import fatcat_openapi_client
 import magic
 from fatcat_openapi_client import (
+    ApiClient,
     Editgroup,
     FilesetEntity,
     FilesetFile,
@@ -24,7 +27,7 @@ from .common import clean
 from .crossref import lookup_license_slug
 
 
-def single_file(prefix, path):
+def single_file(prefix: str, path: str) -> FilesetFile:
 
     full = prefix + path
     size_bytes = os.stat(full).st_size
@@ -59,7 +62,7 @@ def single_file(prefix, path):
     return fsf
 
 
-def make_manifest(base_dir):
+def make_manifest(base_dir: str) -> List[FilesetFile]:
     manifest = []
     for root, dirs, files in os.walk(base_dir):
         for f in files:
@@ -67,7 +70,9 @@ def make_manifest(base_dir):
     return manifest
 
 
-def cdl_dash_release(meta, extra=None):
+def cdl_dash_release(
+    meta: Dict[str, Any], extra: Optional[Dict[str, Any]] = None
+) -> ReleaseEntity:
 
     if not extra:
         extra = dict()
@@ -124,7 +129,7 @@ def cdl_dash_release(meta, extra=None):
     return r
 
 
-def make_release_fileset(dat_path):
+def make_release_fileset(dat_path: str) -> Tuple[ReleaseEntity, FilesetEntity]:
 
     if dat_path.endswith("/"):
         dat_path = dat_path[:-1]
@@ -170,7 +175,12 @@ def make_release_fileset(dat_path):
     return (release, fs)
 
 
-def auto_cdl_dash_dat(api, dat_path, release_id=None, editgroup_id=None):
+def auto_cdl_dash_dat(
+    api: ApiClient,
+    dat_path: str,
+    release_id: Optional[str] = None,
+    editgroup_id: Optional[str] = None,
+) -> Tuple[Optional[str], Optional[ReleaseEntity], Optional[FilesetEntity]]:
 
     git_rev = subprocess.check_output(["git", "describe", "--always"]).strip().decode("utf-8")
 
diff --git a/python/fatcat_tools/importers/chocula.py b/python/fatcat_tools/importers/chocula.py
index 8d2a89b6..842c7853 100644
--- a/python/fatcat_tools/importers/chocula.py
+++ b/python/fatcat_tools/importers/chocula.py
@@ -1,4 +1,7 @@
+from typing import Any, Dict, List, Optional
+
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, ContainerEntity
 
 from .common import EntityImporter, clean
 
@@ -12,7 +15,7 @@ class ChoculaImporter(EntityImporter):
     See guide for details on the many 'extra' fields used here.
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description",
@@ -22,7 +25,7 @@ class ChoculaImporter(EntityImporter):
         eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.ChoculaImporter")
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
-    def want(self, raw_record):
+    def want(self, raw_record: Any) -> bool:
         if not raw_record.get("ident") and not raw_record.get("_known_issnl"):
             self.counts["skip-unknown-new-issnl"] += 1
             return False
@@ -30,7 +33,7 @@ class ChoculaImporter(EntityImporter):
             return True
         return False
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> Optional[ContainerEntity]:
         """
         row is a python dict (parsed from JSON).
 
@@ -75,7 +78,7 @@ class ChoculaImporter(EntityImporter):
         elif "journal " in name.lower():
             container_type = "journal"
 
-        ce = fatcat_openapi_client.ContainerEntity(
+        ce = ContainerEntity(
             issnl=row["issnl"],
             issnp=row["extra"].get("issnp"),
             issne=row["extra"].get("issne"),
@@ -88,7 +91,7 @@ class ChoculaImporter(EntityImporter):
         )
         return ce
 
-    def try_update(self, ce):
+    def try_update(self, ce: ContainerEntity) -> bool:
 
         existing = None
         if ce.ident:
@@ -193,7 +196,7 @@ class ChoculaImporter(EntityImporter):
         # if we got this far, it's a bug
         raise NotImplementedError
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ContainerEntity]) -> None:
         self.api.create_container_auto_batch(
             fatcat_openapi_client.ContainerAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index 0b68e5fe..fd472d11 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -7,7 +7,7 @@ import subprocess
 import sys
 import xml.etree.ElementTree as ET
 from collections import Counter
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Sequence, Tuple
 
 import elasticsearch
 import fatcat_openapi_client
@@ -16,7 +16,14 @@ import fuzzycat.verify
 import lxml
 from bs4 import BeautifulSoup
 from confluent_kafka import Consumer, KafkaException
-from fatcat_openapi_client import ReleaseEntity
+from fatcat_openapi_client import (
+    ApiClient,
+    ContainerEntity,
+    EntityEdit,
+    FileEntity,
+    FilesetEntity,
+    ReleaseEntity,
+)
 from fatcat_openapi_client.rest import ApiException
 from fuzzycat.matching import match_release_fuzzy
 
@@ -90,7 +97,7 @@ DOMAIN_REL_MAP: Dict[str, str] = {
 }
 
 
-def make_rel_url(raw_url: str, default_link_rel: str = "web"):
+def make_rel_url(raw_url: str, default_link_rel: str = "web") -> Tuple[str, str]:
     # this is where we map specific domains to rel types, and also filter out
     # bad domains, invalid URLs, etc
     rel = default_link_rel
@@ -101,7 +108,7 @@ def make_rel_url(raw_url: str, default_link_rel: str = "web"):
     return (rel, raw_url)
 
 
-def test_make_rel_url():
+def test_make_rel_url() -> None:
     assert make_rel_url("http://example.com/thing.pdf")[0] == "web"
     assert make_rel_url("http://example.com/thing.pdf", default_link_rel="jeans")[0] == "jeans"
     assert (
@@ -145,7 +152,7 @@ class EntityImporter:
             implementors must write insert_batch appropriately
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_extra = kwargs.get("editgroup_extra", dict())
         eg_extra["git_rev"] = eg_extra.get(
@@ -212,7 +219,7 @@ class EntityImporter:
         # implementations should fill this in
         raise NotImplementedError
 
-    def finish(self):
+    def finish(self) -> Counter:
         """
         Gets called as cleanup at the end of imports, but can also be called at
         any time to "snip off" current editgroup progress. In other words, safe
@@ -238,7 +245,7 @@ class EntityImporter:
 
         return self.counts
 
-    def get_editgroup_id(self, edits=1):
+    def get_editgroup_id(self, edits: int = 1) -> str:
         if self._edit_count >= self.edit_batch_size:
             if self.submit_mode:
                 self.api.submit_editgroup(self._editgroup_id)
@@ -257,30 +264,31 @@ class EntityImporter:
             self._editgroup_id = eg.editgroup_id
 
         self._edit_count += edits
+        assert self._editgroup_id
         return self._editgroup_id
 
-    def create_container(self, entity):
+    def create_container(self, entity: ContainerEntity) -> EntityEdit:
         eg_id = self.get_editgroup_id()
         self.counts["inserted.container"] += 1
         return self.api.create_container(eg_id, entity)
 
-    def create_release(self, entity):
+    def create_release(self, entity: ReleaseEntity) -> EntityEdit:
         eg_id = self.get_editgroup_id()
         self.counts["inserted.release"] += 1
         return self.api.create_release(eg_id, entity)
 
-    def create_file(self, entity):
+    def create_file(self, entity: FileEntity) -> EntityEdit:
         eg_id = self.get_editgroup_id()
         self.counts["inserted.file"] += 1
         return self.api.create_file(eg_id, entity)
 
-    def updated(self):
+    def updated(self) -> None:
         """
         Implementations should call this from try_update() if the update was successful
         """
         self.counts["update"] += 1
 
-    def push_entity(self, entity):
+    def push_entity(self, entity: Any) -> None:
         self._entity_queue.append(entity)
         if len(self._entity_queue) >= self.edit_batch_size:
             self.insert_batch(self._entity_queue)
@@ -294,7 +302,7 @@ class EntityImporter:
         """
         return True
 
-    def try_update(self, raw_record):
+    def try_update(self, raw_record: Any) -> Optional[bool]:
         """
         Passed the output of parse_record(). Should try to find an existing
         entity and update it (PUT), decide we should do nothing (based on the
@@ -307,15 +315,17 @@ class EntityImporter:
         """
         raise NotImplementedError
 
-    def insert_batch(self, raw_records: List[Any]):
+    def insert_batch(self, raw_records: List[Any]) -> None:
         raise NotImplementedError
 
     def is_orcid(self, orcid: str) -> bool:
         # TODO: replace with clean_orcid() from fatcat_tools.normal
         return self._orcid_regex.match(orcid) is not None
 
-    def lookup_orcid(self, orcid: str):
-        """Caches calls to the Orcid lookup API endpoint in a local dict"""
+    def lookup_orcid(self, orcid: str) -> Optional[str]:
+        """Caches calls to the Orcid lookup API endpoint in a local dict.
+
+        Returns a creator fatcat ident if found, else None"""
         if not self.is_orcid(orcid):
             return None
         if orcid in self._orcid_id_map:
@@ -335,7 +345,7 @@ class EntityImporter:
         # TODO: replace with clean_doi() from fatcat_tools.normal
         return doi.startswith("10.") and doi.count("/") >= 1
 
-    def lookup_doi(self, doi: str):
+    def lookup_doi(self, doi: str) -> Optional[str]:
         """Caches calls to the doi lookup API endpoint in a local dict
 
         For identifier lookups only (not full object fetches)"""
@@ -354,7 +364,7 @@ class EntityImporter:
         self._doi_id_map[doi] = release_id  # might be None
         return release_id
 
-    def lookup_pmid(self, pmid: str):
+    def lookup_pmid(self, pmid: str) -> Optional[str]:
         """Caches calls to the pmid lookup API endpoint in a local dict
 
         For identifier lookups only (not full object fetches)"""
@@ -374,7 +384,7 @@ class EntityImporter:
     def is_issnl(self, issnl: str) -> bool:
         return len(issnl) == 9 and issnl[4] == "-"
 
-    def lookup_issnl(self, issnl: str):
+    def lookup_issnl(self, issnl: str) -> Optional[str]:
         """Caches calls to the ISSN-L lookup API endpoint in a local dict"""
         if issnl in self._issnl_id_map:
             return self._issnl_id_map[issnl]
@@ -389,7 +399,7 @@ class EntityImporter:
         self._issnl_id_map[issnl] = container_id  # might be None
         return container_id
 
-    def read_issn_map_file(self, issn_map_file):
+    def read_issn_map_file(self, issn_map_file: Sequence) -> None:
         print("Loading ISSN map file...", file=sys.stderr)
         self._issn_issnl_map = dict()
         for line in issn_map_file:
@@ -407,7 +417,7 @@ class EntityImporter:
         return self._issn_issnl_map.get(issn)
 
     @staticmethod
-    def generic_file_cleanups(existing):
+    def generic_file_cleanups(existing: FileEntity) -> FileEntity:
         """
         Conservative cleanup of existing file entities.
 
@@ -453,7 +463,7 @@ class EntityImporter:
         return existing
 
     @staticmethod
-    def generic_fileset_cleanups(existing):
+    def generic_fileset_cleanups(existing: FilesetEntity) -> FilesetEntity:
         return existing
 
     def match_existing_release_fuzzy(
@@ -520,10 +530,10 @@ class RecordPusher:
     wraps an importer and pushes records in to it.
     """
 
-    def __init__(self, importer, **kwargs):
+    def __init__(self, importer: EntityImporter, **kwargs) -> None:
         self.importer = importer
 
-    def run(self):
+    def run(self) -> Counter:
         """
         This will look something like:
 
@@ -536,11 +546,11 @@ class RecordPusher:
 
 
 class JsonLinePusher(RecordPusher):
-    def __init__(self, importer, json_file, **kwargs):
+    def __init__(self, importer: EntityImporter, json_file: Sequence, **kwargs) -> None:
         self.importer = importer
         self.json_file = json_file
 
-    def run(self):
+    def run(self) -> Counter:
         for line in self.json_file:
             if not line:
                 continue
@@ -552,11 +562,11 @@ class JsonLinePusher(RecordPusher):
 
 
 class CsvPusher(RecordPusher):
-    def __init__(self, importer, csv_file, **kwargs):
+    def __init__(self, importer: EntityImporter, csv_file: Any, **kwargs) -> None:
         self.importer = importer
         self.reader = csv.DictReader(csv_file, delimiter=kwargs.get("delimiter", ","))
 
-    def run(self):
+    def run(self) -> Counter:
         for line in self.reader:
             if not line:
                 continue
@@ -567,11 +577,11 @@ class CsvPusher(RecordPusher):
 
 
 class LinePusher(RecordPusher):
-    def __init__(self, importer, text_file, **kwargs):
+    def __init__(self, importer: EntityImporter, text_file: Sequence, **kwargs) -> None:
         self.importer = importer
         self.text_file = text_file
 
-    def run(self):
+    def run(self) -> Counter:
         for line in self.text_file:
             if not line:
                 continue
@@ -582,14 +592,21 @@ class LinePusher(RecordPusher):
 
 
 class SqlitePusher(RecordPusher):
-    def __init__(self, importer, db_file, table_name, where_clause="", **kwargs):
+    def __init__(
+        self,
+        importer: EntityImporter,
+        db_file: str,
+        table_name: str,
+        where_clause: str = "",
+        **kwargs
+    ) -> None:
         self.importer = importer
         self.db = sqlite3.connect(db_file, isolation_level="EXCLUSIVE")
         self.db.row_factory = sqlite3.Row
         self.table_name = table_name
         self.where_clause = where_clause
 
-    def run(self):
+    def run(self) -> Counter:
         cur = self.db.execute("SELECT * FROM {} {};".format(self.table_name, self.where_clause))
         for row in cur:
             self.importer.push_record(row)
@@ -599,12 +616,18 @@ class SqlitePusher(RecordPusher):
 
 
 class Bs4XmlLinesPusher(RecordPusher):
-    def __init__(self, importer, xml_file, prefix_filter=None, **kwargs):
+    def __init__(
+        self,
+        importer: EntityImporter,
+        xml_file: Sequence,
+        prefix_filter: Optional[str] = None,
+        **kwargs
+    ) -> None:
         self.importer = importer
         self.xml_file = xml_file
         self.prefix_filter = prefix_filter
 
-    def run(self):
+    def run(self) -> Counter:
         for line in self.xml_file:
             if not line:
                 continue
@@ -619,12 +642,14 @@ class Bs4XmlLinesPusher(RecordPusher):
 
 
 class Bs4XmlFilePusher(RecordPusher):
-    def __init__(self, importer, xml_file, record_tag, **kwargs):
+    def __init__(
+        self, importer: EntityImporter, xml_file: Any, record_tag: str, **kwargs
+    ) -> None:
         self.importer = importer
         self.xml_file = xml_file
         self.record_tag = record_tag
 
-    def run(self):
+    def run(self) -> Counter:
         soup = BeautifulSoup(self.xml_file, "xml")
         for record in soup.find_all(self.record_tag):
             self.importer.push_record(record)
@@ -654,13 +679,20 @@ class Bs4XmlLargeFilePusher(RecordPusher):
     by inner container/release API lookup caches.
     """
 
-    def __init__(self, importer, xml_file, record_tags, use_lxml=False, **kwargs):
+    def __init__(
+        self,
+        importer: EntityImporter,
+        xml_file: Any,
+        record_tags: List[str],
+        use_lxml: bool = False,
+        **kwargs
+    ) -> None:
         self.importer = importer
         self.xml_file = xml_file
         self.record_tags = record_tags
         self.use_lxml = use_lxml
 
-    def run(self):
+    def run(self) -> Counter:
         if self.use_lxml:
             elem_iter = lxml.etree.iterparse(self.xml_file, ["start", "end"], load_dtd=True)
         else:
@@ -691,12 +723,14 @@ class Bs4XmlLargeFilePusher(RecordPusher):
 
 
 class Bs4XmlFileListPusher(RecordPusher):
-    def __init__(self, importer, list_file, record_tag, **kwargs):
+    def __init__(
+        self, importer: EntityImporter, list_file: Sequence, record_tag: str, **kwargs
+    ) -> None:
         self.importer = importer
         self.list_file = list_file
         self.record_tag = record_tag
 
-    def run(self):
+    def run(self) -> Counter:
         for xml_path in self.list_file:
             xml_path = xml_path.strip()
             if not xml_path or xml_path.startswith("#"):
@@ -717,7 +751,15 @@ class KafkaBs4XmlPusher(RecordPusher):
     Fetch XML for an article from Kafka, parse via Bs4.
     """
 
-    def __init__(self, importer, kafka_hosts, kafka_env, topic_suffix, group, **kwargs):
+    def __init__(
+        self,
+        importer: EntityImporter,
+        kafka_hosts: str,
+        kafka_env: str,
+        topic_suffix: str,
+        group: str,
+        **kwargs
+    ) -> None:
         self.importer = importer
         self.consumer = make_kafka_consumer(
             kafka_hosts,
@@ -729,7 +771,7 @@ class KafkaBs4XmlPusher(RecordPusher):
         self.poll_interval = kwargs.get("poll_interval", 5.0)
         self.consume_batch_size = kwargs.get("consume_batch_size", 25)
 
-    def run(self):
+    def run(self) -> Counter:
         count = 0
         last_push = datetime.datetime.now()
         while True:
@@ -784,7 +826,15 @@ class KafkaBs4XmlPusher(RecordPusher):
 
 
 class KafkaJsonPusher(RecordPusher):
-    def __init__(self, importer, kafka_hosts, kafka_env, topic_suffix, group, **kwargs):
+    def __init__(
+        self,
+        importer: EntityImporter,
+        kafka_hosts: str,
+        kafka_env: str,
+        topic_suffix: str,
+        group: str,
+        **kwargs
+    ) -> None:
         self.importer = importer
         self.consumer = make_kafka_consumer(
             kafka_hosts,
@@ -797,7 +847,7 @@ class KafkaJsonPusher(RecordPusher):
         self.consume_batch_size = kwargs.get("consume_batch_size", 100)
         self.force_flush = kwargs.get("force_flush", False)
 
-    def run(self):
+    def run(self) -> Counter:
         count = 0
         last_push = datetime.datetime.now()
         last_force_flush = datetime.datetime.now()
@@ -862,10 +912,12 @@ class KafkaJsonPusher(RecordPusher):
         return counts
 
 
-def make_kafka_consumer(hosts, env, topic_suffix, group, kafka_namespace="fatcat"):
+def make_kafka_consumer(
+    hosts: str, env: str, topic_suffix: str, group: str, kafka_namespace: str = "fatcat"
+) -> Consumer:
     topic_name = "{}-{}.{}".format(kafka_namespace, env, topic_suffix)
 
-    def fail_fast(err, partitions):
+    def fail_fast(err: Any, partitions: List[Any]) -> None:
         if err is not None:
             print("Kafka consumer commit error: {}".format(err))
             print("Bailing out...")
@@ -900,7 +952,7 @@ def make_kafka_consumer(hosts, env, topic_suffix, group, kafka_namespace="fatcat
         },
     }
 
-    def on_rebalance(consumer, partitions):
+    def on_rebalance(consumer: Consumer, partitions: List[Any]) -> None:
         for p in partitions:
             if p.error:
                 raise KafkaException(p.error)
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index d0017002..689989d2 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -1,9 +1,9 @@
 import datetime
 import sqlite3
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional, Sequence
 
 import fatcat_openapi_client
-from fatcat_openapi_client import ReleaseEntity
+from fatcat_openapi_client import ApiClient, ReleaseContrib, ReleaseEntity
 
 from .common import EntityImporter, clean
 
@@ -90,7 +90,7 @@ LICENSE_SLUG_MAP: Dict[str, str] = {
 }
 
 
-def lookup_license_slug(raw: str) -> Optional[str]:
+def lookup_license_slug(raw: Optional[str]) -> Optional[str]:
     if not raw:
         return None
     raw = raw.strip().replace("http://", "//").replace("https://", "//")
@@ -102,7 +102,7 @@ def lookup_license_slug(raw: str) -> Optional[str]:
     return LICENSE_SLUG_MAP.get(raw)
 
 
-def test_lookup_license_slug():
+def test_lookup_license_slug() -> None:
 
     assert lookup_license_slug("https://creativecommons.org/licenses/by-nc/3.0/") == "CC-BY-NC"
     assert (
@@ -133,13 +133,13 @@ class CrossrefImporter(EntityImporter):
     See https://github.com/CrossRef/rest-api-doc for JSON schema notes
     """
 
-    def __init__(self, api, issn_map_file, **kwargs):
+    def __init__(self, api: ApiClient, issn_map_file: Sequence, **kwargs) -> None:
 
         eg_desc: Optional[str] = kwargs.get(
             "editgroup_description",
             "Automated import of Crossref DOI metadata, harvested from REST API",
         )
-        eg_extra: Optional[dict] = kwargs.get("editgroup_extra", dict())
+        eg_extra: Dict[str, Any] = kwargs.get("editgroup_extra", dict())
         eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.CrossrefImporter")
         super().__init__(
             api,
@@ -249,7 +249,7 @@ class CrossrefImporter(EntityImporter):
         release_type = self.map_release_type(obj["type"])
 
         # contribs
-        def do_contribs(obj_list, ctype):
+        def do_contribs(obj_list: List[Dict[str, Any]], ctype: str) -> List[ReleaseContrib]:
             contribs = []
             for i, am in enumerate(obj_list):
                 creator_id = None
@@ -257,15 +257,15 @@ class CrossrefImporter(EntityImporter):
                     creator_id = self.lookup_orcid(am["ORCID"].split("/")[-1])
                 # Sorry humans :(
                 if am.get("given") and am.get("family"):
-                    raw_name = "{} {}".format(am["given"], am["family"])
+                    raw_name: Optional[str] = "{} {}".format(am["given"], am["family"])
                 elif am.get("family"):
                     raw_name = am["family"]
                 else:
                     # TODO: can end up empty
                     raw_name = am.get("name") or am.get("given")
-                extra = dict()
+                extra: Dict[str, Any] = dict()
                 if ctype == "author":
-                    index = i
+                    index: Optional[int] = i
                 else:
                     index = None
                 raw_affiliation = None
@@ -284,7 +284,7 @@ class CrossrefImporter(EntityImporter):
                 assert ctype in ("author", "editor", "translator")
                 raw_name = clean(raw_name)
                 contribs.append(
-                    fatcat_openapi_client.ReleaseContrib(
+                    ReleaseContrib(
                         creator_id=creator_id,
                         index=index,
                         raw_name=raw_name,
@@ -559,7 +559,7 @@ class CrossrefImporter(EntityImporter):
         )
         return re
 
-    def try_update(self, re):
+    def try_update(self, re: ReleaseEntity) -> bool:
 
         # lookup existing DOI (don't need to try other ext idents for crossref)
         existing = None
@@ -577,7 +577,7 @@ class CrossrefImporter(EntityImporter):
 
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ReleaseEntity]) -> None:
         self.api.create_release_auto_batch(
             fatcat_openapi_client.ReleaseAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 4c174b0b..7cc5fa20 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -14,11 +14,13 @@ import json
 import re
 import sqlite3
 import sys
+from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
 
 import dateparser
 import fatcat_openapi_client
 import langdetect
 import pycountry
+from fatcat_openapi_client import ApiClient, ReleaseContrib, ReleaseEntity
 
 from fatcat_tools.normal import clean_doi
 from fatcat_tools.transforms import entity_to_dict
@@ -29,7 +31,7 @@ from .common import EntityImporter, clean
 MAX_ABSTRACT_LENGTH = 2048
 
 # https://guide.fatcat.wiki/entity_container.html#container_type-vocabulary
-CONTAINER_TYPE_MAP = {
+CONTAINER_TYPE_MAP: Dict[str, str] = {
     "Journal": "journal",
     "Series": "journal",
     "Book Series": "book-series",
@@ -38,7 +40,7 @@ CONTAINER_TYPE_MAP = {
 # The docs/guide should be the canonical home for these mappings; update there
 # first.  Map various datacite type types to CSL-ish types. None means TODO or
 # remove.
-DATACITE_TYPE_MAP = {
+DATACITE_TYPE_MAP: Dict[str, Dict[str, Optional[str]]] = {
     "ris": {
         "THES": "thesis",
         "SOUND": "song",  # 99.9% maps to citeproc song, so use that (exception: report)
@@ -128,7 +130,7 @@ DATACITE_TYPE_MAP = {
 }
 
 # DATACITE_UNKNOWN_MARKERS via https://support.datacite.org/docs/schema-values-unknown-information-v43.
-DATACITE_UNKNOWN_MARKERS = (
+DATACITE_UNKNOWN_MARKERS: List[str] = [
     "(:unac)",  # temporarily inaccessible
     "(:unal)",  # unallowed, suppressed intentionally
     "(:unap)",  # not applicable, makes no sense
@@ -139,11 +141,11 @@ DATACITE_UNKNOWN_MARKERS = (
     "(:null)",  # explicitly and meaningfully empty
     "(:tba)",  # to be assigned or announced later
     "(:etal)",  # too numerous to list (et alia)
-)
+]
 
 # UNKNOWN_MARKERS joins official datacite markers with a generic tokens marking
 # unknown values.
-UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(
+UNKNOWN_MARKERS: Set[str] = set(DATACITE_UNKNOWN_MARKERS).union(
     set(
         (
             "NA",
@@ -159,7 +161,7 @@ UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(
 UNKNOWN_MARKERS_LOWER = set((v.lower() for v in UNKNOWN_MARKERS))
 
 # Any "min" number of "tokens" will signal "spam", https://fatcat.wiki/release/rzcpjwukobd4pj36ipla22cnoi
-DATACITE_TITLE_SPAM_WORDGROUPS = [
+DATACITE_TITLE_SPAM_WORDGROUPS: List[Dict[str, Any]] = [
     {
         "tokens": (
             "full",
@@ -180,7 +182,7 @@ DATACITE_TITLE_SPAM_WORDGROUPS = [
 ]
 
 # TODO(martin): merge this with other maps and lookup functions, eventually.
-LICENSE_SLUG_MAP = {
+LICENSE_SLUG_MAP: Dict[str, str] = {
     "//archaeologydataservice.ac.uk/advice/termsofuseandaccess.xhtml/": "ADS-UK",
     "//archaeologydataservice.ac.uk/advice/termsofuseandaccess/": "ADS-UK",
     "//arxiv.org/licenses/nonexclusive-distrib/1.0/": "ARXIV-1.0",
@@ -222,7 +224,14 @@ class DataciteImporter(EntityImporter):
     Importer for datacite records.
     """
 
-    def __init__(self, api, issn_map_file, debug=False, insert_log_file=None, **kwargs):
+    def __init__(
+        self,
+        api: ApiClient,
+        issn_map_file: Sequence,
+        debug: bool = False,
+        insert_log_file: bool = None,
+        **kwargs
+    ) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description",
@@ -255,7 +264,7 @@ class DataciteImporter(EntityImporter):
 
         print("datacite with debug={}".format(self.debug), file=sys.stderr)
 
-    def lookup_ext_ids(self, doi):
+    def lookup_ext_ids(self, doi: str) -> Dict[str, Any]:
         """
         Return dictionary of identifiers referring to the same things as the given DOI.
         """
@@ -291,7 +300,7 @@ class DataciteImporter(EntityImporter):
             jstor_id=None,
         )
 
-    def parse_record(self, obj):
+    def parse_record(self, obj: Dict[str, Any]) -> Optional[ReleaseEntity]:
         """
         Mapping datacite JSON to ReleaseEntity.
         """
@@ -413,7 +422,7 @@ class DataciteImporter(EntityImporter):
         # Start with clear stages, e.g. published. TODO(martin): we could
         # probably infer a bit more from the relations, e.g.
         # "IsPreviousVersionOf" or "IsNewVersionOf".
-        release_stage = "published"
+        release_stage: Optional[str] = "published"
 
         # TODO(martin): If 'state' is not 'findable' or 'isActive' is not true,
         # we might want something else than 'published'. See also:
@@ -628,7 +637,7 @@ class DataciteImporter(EntityImporter):
             release_type = "review"
 
         # Extra information.
-        extra_datacite = dict()
+        extra_datacite: Dict[str, Any] = dict()
 
         if license_extra:
             extra_datacite["license"] = license_extra
@@ -675,7 +684,7 @@ class DataciteImporter(EntityImporter):
         if relations:
             extra_datacite["relations"] = relations
 
-        extra = dict()
+        extra: Dict[str, Any] = dict()
 
         # "1.0.0", "v1.305.2019", "Final", "v1.0.0", "v0.3.0", "1", "0.19.0",
         # "3.1", "v1.1", "{version}", "4.0", "10329", "11672", "11555",
@@ -734,7 +743,7 @@ class DataciteImporter(EntityImporter):
         return re
 
     @staticmethod
-    def datacite_release_type(doi, attributes):
+    def datacite_release_type(doi: str, attributes: Dict[str, Any]) -> Optional[str]:
         """
         Release type. Try to determine the release type from a variety of types
         supplied in datacite. The "attributes.types.resourceType" is
@@ -766,7 +775,7 @@ class DataciteImporter(EntityImporter):
         return release_type
 
     @staticmethod
-    def biblio_hacks(re):
+    def biblio_hacks(re: ReleaseEntity) -> ReleaseEntity:
         """
         This function handles known special cases. For example,
         publisher-specific or platform-specific workarounds.
@@ -817,7 +826,7 @@ class DataciteImporter(EntityImporter):
 
         return re
 
-    def try_update(self, re):
+    def try_update(self, re: ReleaseEntity) -> bool:
         """
         When debug is true, write the RE to stdout, not to the database. Might
         hide schema mismatch bugs.
@@ -842,7 +851,7 @@ class DataciteImporter(EntityImporter):
 
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ReleaseEntity]) -> None:
         print("inserting batch ({})".format(len(batch)), file=sys.stderr)
         if self.insert_log_file:
             with open(self.insert_log_file, "a") as f:
@@ -858,7 +867,13 @@ class DataciteImporter(EntityImporter):
             )
         )
 
-    def parse_datacite_creators(self, creators, role="author", set_index=True, doi=None):
+    def parse_datacite_creators(
+        self,
+        creators: List[Dict[str, Any]],
+        role: str = "author",
+        set_index: bool = True,
+        doi: Optional[str] = None,
+    ) -> List[ReleaseContrib]:
         """
         Parses a list of creators into a list of ReleaseContrib objects. Set
         set_index to False, if the index contrib field should be left blank.
@@ -868,12 +883,12 @@ class DataciteImporter(EntityImporter):
         # "attributes.creators[].nameIdentifiers[].nameIdentifierScheme":
         # ["LCNA", "GND", "email", "NAF", "OSF", "RRID", "ORCID",
         # "SCOPUS", "NRCPID", "schema.org", "GRID", "MGDS", "VIAF", "JACoW-ID"].
-        contribs = []
+        contribs: List[ReleaseContrib] = []
 
         # Names, that should be ignored right away.
         name_blocklist = set(("Occdownload Gbif.Org",))
 
-        i = 0
+        i: Optional[int] = 0
         for c in creators:
             if not set_index:
                 i = None
@@ -983,7 +998,9 @@ class DataciteImporter(EntityImporter):
         return contribs
 
 
-def contributor_list_contains_contributor(contributor_list, contributor):
+def contributor_list_contains_contributor(
+    contributor_list: ReleaseContrib, contributor: ReleaseContrib
+) -> bool:
     """
     Given a list of contributors, determine, whether contrib is in that list.
     """
@@ -998,7 +1015,7 @@ def contributor_list_contains_contributor(contributor_list, contributor):
     return False
 
 
-def lookup_license_slug(raw):
+def lookup_license_slug(raw: Optional[str]) -> Optional[str]:
     """
     Resolve a variety of strings into a some pseudo-canonical form, e.g.
     CC-BY-ND, CC-0, MIT and so on.
@@ -1101,7 +1118,9 @@ def lookup_license_slug(raw):
     return LICENSE_SLUG_MAP.get(raw)
 
 
-def find_original_language_title(item, min_length=4, max_questionmarks=3):
+def find_original_language_title(
+    item: Dict[str, Any], min_length: int = 4, max_questionmarks: int = 3
+) -> Optional[str]:
     """
     Perform a few checks before returning a potential original language title.
 
@@ -1126,7 +1145,9 @@ def find_original_language_title(item, min_length=4, max_questionmarks=3):
     return None
 
 
-def parse_datacite_titles(titles):
+def parse_datacite_titles(
+    titles: List[Dict[str, Any]]
+) -> Tuple[Optional[str], Optional[str], Optional[str]]:
     """
     Given a list of title items from datacite, return 3-tuple (title,
     original_language_title, subtitle).
@@ -1158,7 +1179,9 @@ def parse_datacite_titles(titles):
     return title, original_language_title, subtitle
 
 
-def parse_single_date(value):
+def parse_single_date(
+    value: Optional[str],
+) -> Tuple[Optional[datetime.date], Optional[int], Optional[int]]:
     """
     Given a single string containing a date in arbitrary format, try to return
     tuple (date: datetime.date, month: int, year: int).
@@ -1186,10 +1209,12 @@ def parse_single_date(value):
     return None, None, None
 
 
-def parse_datacite_dates(dates):
+def parse_datacite_dates(
+    dates: List[Dict[str, Any]],
+) -> Tuple[Optional[datetime.date], Optional[int], Optional[int]]:
     """
     Given a list of date fields (under .dates), return tuple, (release_date,
-    release_year).
+    release_month, release_year).
     """
     release_date, release_month, release_year = None, None, None
 
@@ -1226,9 +1251,13 @@ def parse_datacite_dates(dates):
         Pattern("%Y", "y"),
     )
 
-    def parse_item(item):
+    def parse_item(
+        item: Dict[str, Any]
+    ) -> Tuple[Optional[datetime.date], Optional[int], Optional[int]]:
         result, value, year_only = None, str(item.get("date", "")) or "", False
-        release_date, release_month, release_year = None, None, None
+        release_date: Optional[datetime.date] = None
+        release_month: Optional[int] = None
+        release_year: Optional[int] = None
 
         for layout, granularity in common_patterns:
             try:
@@ -1285,7 +1314,7 @@ def parse_datacite_dates(dates):
     return release_date, release_month, release_year
 
 
-def index_form_to_display_name(s):
+def index_form_to_display_name(s: str) -> str:
     """
     Try to convert an index form name, like 'Razis, Panos A' into display_name,
     e.g. 'Panos A Razis'.
diff --git a/python/fatcat_tools/importers/dblp_container.py b/python/fatcat_tools/importers/dblp_container.py
index 603a6271..36fe5f00 100644
--- a/python/fatcat_tools/importers/dblp_container.py
+++ b/python/fatcat_tools/importers/dblp_container.py
@@ -4,8 +4,10 @@ pre-scraped in to JSON from HTML pages.
 """
 
 import sys  # noqa: F401
+from typing import Any, Dict, List, Optional, Sequence
 
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, ContainerEntity
 
 from fatcat_tools.importers.common import EntityImporter
 from fatcat_tools.normal import clean_str
@@ -13,8 +15,13 @@ from fatcat_tools.normal import clean_str
 
 class DblpContainerImporter(EntityImporter):
     def __init__(
-        self, api, issn_map_file, dblp_container_map_file, dblp_container_map_output, **kwargs
-    ):
+        self,
+        api: ApiClient,
+        issn_map_file: Sequence,
+        dblp_container_map_file: Sequence,
+        dblp_container_map_output: Any,
+        **kwargs
+    ) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description",
@@ -29,7 +36,7 @@ class DblpContainerImporter(EntityImporter):
         self.read_issn_map_file(issn_map_file)
         print("\t".join(["dblp_prefix", "container_id"]), file=self.dblp_container_map_output)
 
-    def read_dblp_container_map_file(self, dblp_container_map_file) -> None:
+    def read_dblp_container_map_file(self, dblp_container_map_file: Sequence) -> None:
         self._dblp_container_map = dict()
         print("Loading existing dblp prefix container map file...", file=sys.stderr)
         for line in dblp_container_map_file:
@@ -44,15 +51,15 @@ class DblpContainerImporter(EntityImporter):
             file=sys.stderr,
         )
 
-    def lookup_dblp_prefix(self, prefix):
+    def lookup_dblp_prefix(self, prefix: str) -> Optional[str]:
         if not prefix:
             return None
         return self._dblp_container_map.get(prefix)
 
-    def want(self, raw_record):
+    def want(self, raw_record: Any) -> bool:
         return True
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> Optional[ContainerEntity]:
         """
         row is a python dict (parsed from JSON).
 
@@ -77,7 +84,7 @@ class DblpContainerImporter(EntityImporter):
             if issnl:
                 break
 
-        extra = {
+        extra: Dict[str, Any] = {
             "dblp": {
                 "prefix": dblp_prefix,
             },
@@ -98,7 +105,7 @@ class DblpContainerImporter(EntityImporter):
         )
         return ce
 
-    def try_update(self, ce):
+    def try_update(self, ce: ContainerEntity) -> bool:
 
         dblp_prefix = ce.extra["dblp"]["prefix"]
         existing = None
@@ -135,7 +142,7 @@ class DblpContainerImporter(EntityImporter):
         # shouldn't get here
         raise NotImplementedError()
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ContainerEntity]) -> None:
         """
         Because we want to print a prefix/container_id match for each row, we
         require a special batch insert method
diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py
index e73e5f33..cb56432a 100644
--- a/python/fatcat_tools/importers/dblp_release.py
+++ b/python/fatcat_tools/importers/dblp_release.py
@@ -24,10 +24,11 @@ import datetime
 import json
 import sys  # noqa: F401
 import warnings
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 
 import bs4
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, ReleaseEntity
 
 from fatcat_tools.importers.common import EntityImporter
 from fatcat_tools.normal import (
@@ -44,7 +45,9 @@ from fatcat_tools.transforms import entity_to_dict
 
 
 class DblpReleaseImporter(EntityImporter):
-    def __init__(self, api, dblp_container_map_file=None, **kwargs):
+    def __init__(
+        self, api: ApiClient, dblp_container_map_file: Optional[Sequence] = None, **kwargs
+    ) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description", "Automated import of dblp metadata via XML records"
@@ -70,7 +73,7 @@ class DblpReleaseImporter(EntityImporter):
         # "data",  # no instances in 2020-11 dump
     ]
 
-    def read_dblp_container_map_file(self, dblp_container_map_file) -> None:
+    def read_dblp_container_map_file(self, dblp_container_map_file: Optional[Sequence]) -> None:
         self._dblp_container_map = dict()
         if not dblp_container_map_file:
             print(
@@ -91,12 +94,12 @@ class DblpReleaseImporter(EntityImporter):
             file=sys.stderr,
         )
 
-    def lookup_dblp_prefix(self, prefix):
+    def lookup_dblp_prefix(self, prefix: Optional[str]) -> Optional[str]:
         if not prefix:
             return None
         return self._dblp_container_map.get(prefix)
 
-    def want(self, xml_elem):
+    def want(self, xml_elem: Any) -> bool:
         if xml_elem.name not in self.ELEMENT_TYPES:
             self.counts["skip-type"] += 1
             return False
@@ -108,7 +111,8 @@ class DblpReleaseImporter(EntityImporter):
             return False
         return True
 
-    def parse_record(self, xml_elem):
+    # TODO: xml_elem could be typed instead of 'Any' for better type checking
+    def parse_record(self, xml_elem: Any) -> Optional[ReleaseEntity]:
         """
         - title
             => may contain <i>, <sub>, <sup>, <tt>
@@ -255,7 +259,7 @@ class DblpReleaseImporter(EntityImporter):
             dblp_extra["part_of_key"] = part_of_key
 
         # generic extra
-        extra = dict()
+        extra: Dict[str, Any] = dict()
         if not container_id and container_name:
             extra["container_name"] = container_name
 
@@ -312,14 +316,14 @@ class DblpReleaseImporter(EntityImporter):
         return re
 
     @staticmethod
-    def biblio_hacks(re):
+    def biblio_hacks(re: ReleaseEntity) -> ReleaseEntity:
         """
         This function handles known special cases. For example,
         publisher-specific or platform-specific workarounds.
         """
         return re
 
-    def try_update(self, re):
+    def try_update(self, re: ReleaseEntity) -> bool:
 
         # lookup existing release by dblp article id
         existing = None
@@ -411,7 +415,7 @@ class DblpReleaseImporter(EntityImporter):
 
         return False
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ReleaseEntity]) -> None:
         self.api.create_release_auto_batch(
             fatcat_openapi_client.ReleaseAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/doaj_article.py b/python/fatcat_tools/importers/doaj_article.py
index 56045ea7..9ff4f3fb 100644
--- a/python/fatcat_tools/importers/doaj_article.py
+++ b/python/fatcat_tools/importers/doaj_article.py
@@ -6,9 +6,10 @@ DOAJ API schema and docs: https://doaj.org/api/v1/docs
 
 import datetime
 import warnings
-from typing import List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, ReleaseEntity
 
 from fatcat_tools.importers.common import EntityImporter
 from fatcat_tools.normal import (
@@ -28,7 +29,7 @@ MAX_ABSTRACT_LENGTH = 2048
 
 
 class DoajArticleImporter(EntityImporter):
-    def __init__(self, api, issn_map_file, **kwargs):
+    def __init__(self, api: ApiClient, issn_map_file: Sequence, **kwargs) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description",
@@ -49,10 +50,10 @@ class DoajArticleImporter(EntityImporter):
         self.this_year = datetime.datetime.now().year
         self.read_issn_map_file(issn_map_file)
 
-    def want(self, obj):
+    def want(self, raw_record: Dict[str, Any]) -> bool:
         return True
 
-    def parse_record(self, obj):
+    def parse_record(self, obj: Dict[str, Any]) -> Optional[ReleaseEntity]:
         """
         bibjson {
             abstract (string, optional),
@@ -108,7 +109,7 @@ class DoajArticleImporter(EntityImporter):
         publisher = clean_str(bibjson["journal"].get("publisher"))
 
         try:
-            release_year = int(bibjson.get("year"))
+            release_year: Optional[int] = int(bibjson.get("year"))
         except (TypeError, ValueError):
             release_year = None
         release_month = parse_month(clean_str(bibjson.get("month")))
@@ -148,7 +149,7 @@ class DoajArticleImporter(EntityImporter):
         contribs = self.doaj_contribs(bibjson.get("author") or [])
 
         # DOAJ-specific extra
-        doaj_extra = dict()
+        doaj_extra: Dict[str, Any] = dict()
         if bibjson.get("subject"):
             doaj_extra["subject"] = bibjson.get("subject")
         if bibjson.get("keywords"):
@@ -157,7 +158,7 @@ class DoajArticleImporter(EntityImporter):
             ]
 
         # generic extra
-        extra = dict()
+        extra: Dict[str, Any] = dict()
         if country:
             extra["country"] = country
         if not container_id and container_name:
@@ -194,14 +195,14 @@ class DoajArticleImporter(EntityImporter):
         return re
 
     @staticmethod
-    def biblio_hacks(re):
+    def biblio_hacks(re: ReleaseEntity) -> ReleaseEntity:
         """
         This function handles known special cases. For example,
         publisher-specific or platform-specific workarounds.
         """
         return re
 
-    def try_update(self, re):
+    def try_update(self, re: ReleaseEntity) -> bool:
 
         # lookup existing release by DOAJ article id
         existing = None
@@ -276,7 +277,7 @@ class DoajArticleImporter(EntityImporter):
 
         return False
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ReleaseEntity]) -> None:
         self.api.create_release_auto_batch(
             fatcat_openapi_client.ReleaseAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/file_meta.py b/python/fatcat_tools/importers/file_meta.py
index 26584ff3..892c1dcd 100644
--- a/python/fatcat_tools/importers/file_meta.py
+++ b/python/fatcat_tools/importers/file_meta.py
@@ -1,4 +1,7 @@
+from typing import Any, Dict
+
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, FileEntity
 
 from .common import EntityImporter
 
@@ -14,7 +17,7 @@ class FileMetaImporter(EntityImporter):
     imported which were missing file size, mimetype, md5, and/or sha256.
     """
 
-    def __init__(self, api, require_grobid=True, **kwargs):
+    def __init__(self, api: ApiClient, require_grobid: bool = True, **kwargs):
 
         eg_desc = kwargs.pop("editgroup_description", None) or "File metadata updates"
         eg_extra = kwargs.pop("editgroup_extra", dict())
@@ -22,14 +25,14 @@ class FileMetaImporter(EntityImporter):
         kwargs["do_updates"] = kwargs.get("do_updates", True)
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
-    def want(self, row):
+    def want(self, row: Any) -> bool:
         for k in ("sha1hex", "sha256hex", "md5hex", "size_bytes", "mimetype"):
             if not row.get(k):
                 self.counts["skip-missing-field"] += 1
                 return False
         return True
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> FileEntity:
 
         # bezerk mode doesn't make sense for this importer
         assert self.bezerk_mode is False
@@ -44,7 +47,7 @@ class FileMetaImporter(EntityImporter):
         )
         return fe
 
-    def try_update(self, fe):
+    def try_update(self, fe: FileEntity) -> bool:
 
         # lookup sha1, or create new entity
         existing = None
diff --git a/python/fatcat_tools/importers/fileset_generic.py b/python/fatcat_tools/importers/fileset_generic.py
index dd8f5600..2207b938 100644
--- a/python/fatcat_tools/importers/fileset_generic.py
+++ b/python/fatcat_tools/importers/fileset_generic.py
@@ -1,4 +1,7 @@
+from typing import Any, Dict, List, Optional
+
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, FilesetEntity
 
 from fatcat_tools import entity_from_dict
 
@@ -17,7 +20,7 @@ class FilesetImporter(EntityImporter):
     Currently only creates (insert), no updates.
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = kwargs.pop("editgroup_description", None) or "Generic Fileset entity import"
         eg_extra = kwargs.pop("editgroup_extra", dict())
@@ -29,7 +32,7 @@ class FilesetImporter(EntityImporter):
         # bezerk mode doesn't make sense for this importer
         assert self.bezerk_mode is False
 
-    def want(self, row):
+    def want(self, row: Dict[str, Any]) -> bool:
         if not row.get("release_ids"):
             self.counts["skip-no-release-ids"] += 1
             return False
@@ -47,7 +50,7 @@ class FilesetImporter(EntityImporter):
                     return False
         return True
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> Optional[FilesetEntity]:
 
         fse = entity_from_dict(
             row,
@@ -57,7 +60,7 @@ class FilesetImporter(EntityImporter):
         fse = self.generic_fileset_cleanups(fse)
         return fse
 
-    def try_update(self, fse):
+    def try_update(self, fse: FilesetEntity) -> bool:
 
         if not self.skip_release_fileset_check:
             for release_id in fse.release_ids:
@@ -74,7 +77,7 @@ class FilesetImporter(EntityImporter):
         # do the insert
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[FilesetEntity]) -> None:
         self.api.create_fileset_auto_batch(
             fatcat_openapi_client.FilesetAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index f7bb5357..830c9bbb 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -2,8 +2,10 @@
 
 import base64
 import json
+from typing import Any, Dict, List, Optional
 
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, FileEntity, ReleaseEntity
 
 from .common import EntityImporter, clean, make_rel_url
 
@@ -22,7 +24,7 @@ class GrobidMetadataImporter(EntityImporter):
     TODO: relaxing 'None' constraint on parse_record() might make this refactor-able.
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description",
@@ -34,10 +36,10 @@ class GrobidMetadataImporter(EntityImporter):
         self.default_link_rel = kwargs.get("default_link_rel", "web")
         self.longtail_oa = kwargs.get("longtail_oa", False)
 
-    def want(self, raw_record):
+    def want(self, raw_record: Any) -> bool:
         return True
 
-    def parse_record(self, row):
+    def parse_record(self, row: str) -> Optional[FileEntity]:
 
         fields = row.split("\t")
         sha1_key = fields[0]
@@ -72,12 +74,12 @@ class GrobidMetadataImporter(EntityImporter):
         fe.release_ids.append(release_edit.ident)
         return fe
 
-    def parse_grobid_json(self, obj):
+    def parse_grobid_json(self, obj: Dict[str, Any]) -> Optional[ReleaseEntity]:
 
         if not obj.get("title"):
             return None
 
-        extra_grobid = dict()
+        extra_grobid: Dict[str, Any] = dict()
 
         abstract = obj.get("abstract")
         if abstract and len(abstract) < MAX_ABSTRACT_BYTES and len(abstract) > 10:
@@ -103,7 +105,7 @@ class GrobidMetadataImporter(EntityImporter):
 
         refs = []
         for raw in obj.get("citations", []):
-            cite_extra = dict()
+            cite_extra: Dict[str, Any] = dict()
             year = None
             if raw.get("date"):
                 try:
@@ -162,13 +164,15 @@ class GrobidMetadataImporter(EntityImporter):
             publisher=clean(obj["journal"].get("publisher")),
             volume=clean(obj["journal"].get("volume")),
             issue=clean(obj["journal"].get("issue")),
-            abstracts=abstracts,
+            abstracts=abstracts or None,
             ext_ids=fatcat_openapi_client.ReleaseExtIds(),
-            extra=extra,
+            extra=extra or None,
         )
         return re
 
-    def parse_file_metadata(self, sha1_key, cdx, mimetype, file_size):
+    def parse_file_metadata(
+        self, sha1_key: str, cdx: Dict[str, Any], mimetype: str, file_size: int
+    ) -> FileEntity:
 
         sha1 = (
             base64.b16encode(base64.b32decode(sha1_key.replace("sha1:", "")))
@@ -197,11 +201,11 @@ class GrobidMetadataImporter(EntityImporter):
 
         return fe
 
-    def try_update(self, entity):
+    def try_update(self, re: FileEntity) -> bool:
         # did the exists check in 'parse_record()', because we needed to create a release
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[FileEntity]) -> None:
         self.api.create_file_auto_batch(
             fatcat_openapi_client.FileAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index e0a6c3f5..e13ce4bd 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -1,12 +1,23 @@
 import datetime
+from typing import Any, Dict, List, Optional
 
 import fatcat_openapi_client
+from fatcat_openapi_client import (
+    ApiClient,
+    FileEntity,
+    FilesetEntity,
+    FilesetUrl,
+    FileUrl,
+    WebcaptureEntity,
+)
 
 from .common import EntityImporter, make_rel_url
 
 
 class IngestFileResultImporter(EntityImporter):
-    def __init__(self, api, require_grobid=True, **kwargs):
+    def __init__(
+        self, api: fatcat_openapi_client.ApiClient, require_grobid: bool = True, **kwargs
+    ) -> None:
 
         eg_desc = (
             kwargs.pop("editgroup_description", None)
@@ -41,7 +52,7 @@ class IngestFileResultImporter(EntityImporter):
         if kwargs.get("skip_source_allowlist", False):
             self.ingest_request_source_allowlist = []
 
-    def want_file(self, row) -> bool:
+    def want_file(self, row: Dict[str, Any]) -> bool:
         """
         File-specific part of want(). Generic across general ingest and save-paper-now.
         """
@@ -76,7 +87,7 @@ class IngestFileResultImporter(EntityImporter):
 
         return True
 
-    def want_ingest(self, row) -> bool:
+    def want_ingest(self, row: Dict[str, Any]) -> bool:
         """
         Sandcrawler ingest-specific part of want(). Generic across file and
         webcapture ingest.
@@ -115,7 +126,7 @@ class IngestFileResultImporter(EntityImporter):
 
         return True
 
-    def want(self, row):
+    def want(self, row: Dict[str, Any]) -> bool:
         """
         Overall logic here probably needs work (TODO):
 
@@ -137,7 +148,7 @@ class IngestFileResultImporter(EntityImporter):
 
         return True
 
-    def parse_ingest_release_ident(self, row):
+    def parse_ingest_release_ident(self, row: Dict[str, Any]) -> Optional[str]:
 
         request = row["request"]
         fatcat = request.get("fatcat")
@@ -178,7 +189,7 @@ class IngestFileResultImporter(EntityImporter):
 
         return release_ident
 
-    def parse_terminal(self, row):
+    def parse_terminal(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
         terminal = row.get("terminal")
         if not terminal:
             # support old cdx-only ingest results
@@ -206,7 +217,7 @@ class IngestFileResultImporter(EntityImporter):
         )
         return terminal
 
-    def parse_urls(self, row, terminal):
+    def parse_urls(self, row: Dict[str, Any], terminal: Dict[str, Any]) -> List[FileUrl]:
 
         request = row["request"]
 
@@ -224,10 +235,10 @@ class IngestFileResultImporter(EntityImporter):
         )
         urls = [url, ("webarchive", wayback)]
 
-        urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
+        urls = [FileUrl(rel=rel, url=url) for (rel, url) in urls]
         return urls
 
-    def parse_edit_extra(self, row):
+    def parse_edit_extra(self, row: Dict[str, Any]) -> Dict[str, Any]:
 
         request = row["request"]
         edit_extra = dict()
@@ -251,7 +262,7 @@ class IngestFileResultImporter(EntityImporter):
 
         return edit_extra
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> FileEntity:
 
         request = row["request"]
         file_meta = row["file_meta"]
@@ -283,7 +294,7 @@ class IngestFileResultImporter(EntityImporter):
 
         urls = self.parse_urls(row, terminal)
 
-        fe = fatcat_openapi_client.FileEntity(
+        fe = FileEntity(
             md5=file_meta["md5hex"],
             sha1=file_meta["sha1hex"],
             sha256=file_meta["sha256hex"],
@@ -298,7 +309,7 @@ class IngestFileResultImporter(EntityImporter):
             fe.edit_extra = edit_extra
         return fe
 
-    def try_update(self, fe):
+    def try_update(self, fe: FileEntity) -> bool:
         # lookup sha1, or create new entity
         existing = None
         try:
@@ -330,7 +341,7 @@ class IngestFileResultImporter(EntityImporter):
         self.counts["skip-update-disabled"] += 1
         return False
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[FileEntity]) -> None:
         if self.submit_mode:
             eg = self.api.create_editgroup(
                 fatcat_openapi_client.Editgroup(
@@ -358,7 +369,7 @@ class SavePaperNowFileImporter(IngestFileResultImporter):
     them for further human review (as opposed to accepting by default).
     """
 
-    def __init__(self, api, submit_mode=True, **kwargs):
+    def __init__(self, api: ApiClient, submit_mode: bool = True, **kwargs) -> None:
 
         eg_desc = (
             kwargs.pop("editgroup_description", None)
@@ -371,7 +382,7 @@ class SavePaperNowFileImporter(IngestFileResultImporter):
         kwargs["do_updates"] = False
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
-    def want(self, row):
+    def want(self, row: Dict[str, Any]) -> bool:
 
         source = row["request"].get("ingest_request_source")
         if not source:
@@ -397,7 +408,7 @@ class IngestWebResultImporter(IngestFileResultImporter):
     into webcapture objects.
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = (
             kwargs.pop("editgroup_description", None)
@@ -408,7 +419,7 @@ class IngestWebResultImporter(IngestFileResultImporter):
         kwargs["do_updates"] = False
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
-    def want(self, row):
+    def want(self, row: Dict[str, Any]) -> bool:
 
         if not self.want_ingest(row):
             return False
@@ -426,7 +437,7 @@ class IngestWebResultImporter(IngestFileResultImporter):
 
         return True
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> Optional[WebcaptureEntity]:
 
         request = row["request"]
         file_meta = row["file_meta"]
@@ -512,7 +523,7 @@ class IngestWebResultImporter(IngestFileResultImporter):
             wc.edit_extra = edit_extra
         return wc
 
-    def try_update(self, wc):
+    def try_update(self, wc: WebcaptureEntity) -> bool:
 
         # check for existing edits-in-progress with same URL
         for other in self._entity_queue:
@@ -539,7 +550,7 @@ class IngestWebResultImporter(IngestFileResultImporter):
         # so go ahead and insert!
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[WebcaptureEntity]) -> None:
         if self.submit_mode:
             eg = self.api.create_editgroup(
                 fatcat_openapi_client.Editgroup(
@@ -565,7 +576,7 @@ class SavePaperNowWebImporter(IngestWebResultImporter):
     Like SavePaperNowFileImporter, but for webcapture (HTML) ingest.
     """
 
-    def __init__(self, api, submit_mode=True, **kwargs):
+    def __init__(self, api: ApiClient, submit_mode: bool = True, **kwargs) -> None:
 
         eg_desc = (
             kwargs.pop("editgroup_description", None)
@@ -577,7 +588,7 @@ class SavePaperNowWebImporter(IngestWebResultImporter):
         kwargs["do_updates"] = False
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
-    def want(self, row):
+    def want(self, row: Dict[str, Any]) -> bool:
         """
         Relatively custom want() here, a synthesis of other filters.
 
@@ -617,7 +628,7 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
     results into fileset objects.
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = (
             kwargs.pop("editgroup_description", None)
@@ -629,7 +640,7 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
         self.max_file_count = 300
 
-    def want_fileset(self, row):
+    def want_fileset(self, row: Dict[str, Any]) -> bool:
 
         if not row.get("manifest") or len(row.get("manifest")) == 0:
             self.counts["skip-empty-manifest"] += 1
@@ -645,7 +656,7 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
 
         return True
 
-    def want(self, row):
+    def want(self, row: Dict[str, Any]) -> bool:
 
         if not self.want_ingest(row):
             return False
@@ -662,7 +673,7 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
 
         return True
 
-    def parse_fileset_urls(self, row):
+    def parse_fileset_urls(self, row: Dict[str, Any]) -> List[FilesetUrl]:
         if not row.get("strategy"):
             return []
         strategy = row["strategy"]
@@ -717,7 +728,7 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
             )
         return urls
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> FilesetEntity:
 
         request = row["request"]
 
@@ -735,7 +746,7 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
             self.counts["skip-release-not-found"] += 1
             return None
 
-        entity_extra = dict()
+        entity_extra: Dict[str, Any] = dict()
         edit_extra = self.parse_edit_extra(row)
         edit_extra["ingest_strategy"] = row["ingest_strategy"]
         if row.get("platform"):
@@ -789,12 +800,12 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
             fe.edit_extra = edit_extra
         return fe
 
-    def try_update(self, wc):
+    def try_update(self, fse: FilesetEntity) -> bool:
 
         # check for existing edits-in-progress with same URL
         for other in self._entity_queue:
             # XXX: how to duplicate check?
-            if other.original_url == wc.original_url:
+            if other.original_url == fse.original_url:
                 self.counts["skip-in-queue"] += 1
                 return False
 
@@ -802,12 +813,12 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
         # existing = None
 
         # NOTE: in lieu of existing checks (by lookup), only allow one fileset per release
-        release = self.api.get_release(wc.release_ids[0], expand="filesets")
+        release = self.api.get_release(fse.release_ids[0], expand="filesets")
         if release.filesets:
             # XXX: how to duplicate check filesets?
             # check if this is an existing match, or just a similar hit
             for other in release.filesets:
-                if wc.original_url == other.original_url:
+                if fse.original_url == other.original_url:
                     # TODO: compare very similar timestamps of same time (different formats)
                     self.counts["exists"] += 1
                     return False
@@ -816,7 +827,7 @@ class IngestFilesetResultImporter(IngestFileResultImporter):
 
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[FilesetEntity]) -> None:
         if self.submit_mode:
             eg = self.api.create_editgroup(
                 fatcat_openapi_client.Editgroup(
@@ -842,7 +853,7 @@ class SavePaperNowFilesetImporter(IngestFilesetResultImporter):
     Like SavePaperNowFileImporter, but for fileset/dataset ingest.
     """
 
-    def __init__(self, api, submit_mode=True, **kwargs):
+    def __init__(self, api: ApiClient, submit_mode: bool = True, **kwargs) -> None:
 
         eg_desc = (
             kwargs.pop("editgroup_description", None)
@@ -854,7 +865,7 @@ class SavePaperNowFilesetImporter(IngestFilesetResultImporter):
         kwargs["do_updates"] = False
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
-    def want(self, row):
+    def want(self, row: Dict[str, Any]) -> bool:
 
         source = row["request"].get("ingest_request_source")
         if not source:
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index a7e06e6a..f540c264 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -1,16 +1,19 @@
 import datetime
 import sqlite3
 import sys
+from typing import Any, Dict, List, Optional, Sequence
 
 import fatcat_openapi_client
 from bs4 import BeautifulSoup
+from fatcat_openapi_client import ApiClient, ReleaseContrib, ReleaseEntity
 
 from fatcat_tools.normal import clean_doi
 
 from .common import DATE_FMT, EntityImporter, clean, is_cjk
 
 
-def parse_jalc_persons(raw_persons):
+# TODO: should be List[Tag] not List[Any] for full type annotations
+def parse_jalc_persons(raw_persons: List[Any]) -> List[ReleaseContrib]:
     """
     For the most part, JALC DC names are in either japanese or english. The
     two common patterns are a list alternating between the two (in which case
@@ -47,7 +50,7 @@ def parse_jalc_persons(raw_persons):
         if lang == "en" and surname and given_name:
             # english names order is flipped
             name = "{} {}".format(given_name, surname)
-        rc = fatcat_openapi_client.ReleaseContrib(
+        rc = ReleaseContrib(
             raw_name=name, surname=surname, given_name=given_name, role="author"
         )
         # add an extra hint field; won't end up in serialized object
@@ -100,7 +103,7 @@ class JalcImporter(EntityImporter):
     NOTE: some JALC DOIs seem to get cross-registered with Crossref
     """
 
-    def __init__(self, api, issn_map_file, **kwargs):
+    def __init__(self, api: ApiClient, issn_map_file: Sequence, **kwargs) -> None:
 
         eg_desc = kwargs.get("editgroup_description", "Automated import of JALC DOI metadata")
         eg_extra = kwargs.get("editgroup_extra", dict())
@@ -125,7 +128,7 @@ class JalcImporter(EntityImporter):
 
         self.read_issn_map_file(issn_map_file)
 
-    def lookup_ext_ids(self, doi):
+    def lookup_ext_ids(self, doi: str) -> Dict[str, Any]:
         if self.extid_map_db is None:
             return dict(
                 core_id=None,
@@ -158,10 +161,12 @@ class JalcImporter(EntityImporter):
             jstor_id=None,
         )
 
-    def want(self, obj):
+    def want(self, raw_record: Any) -> bool:
         return True
 
-    def parse_record(self, record):
+    # TODO: mypy annotations partially skipped on this function ('Any' instead of 'Tag')
+    # for now because # XML # parsing # annotations are large and complex
+    def parse_record(self, record: Any) -> Optional[ReleaseEntity]:
         """
         record is a beautiful soup object
         returns a ReleaseEntity, or None
@@ -170,8 +175,8 @@ class JalcImporter(EntityImporter):
         fields.
         """
 
-        extra = dict()
-        extra_jalc = dict()
+        extra: Dict[str, Any] = dict()
+        extra_jalc: Dict[str, Any] = dict()
 
         titles = record.find_all("title")
         if not titles:
@@ -254,7 +259,7 @@ class JalcImporter(EntityImporter):
 
         publisher = None
         container_name = None
-        container_extra = dict()
+        container_extra: Dict[str, Any] = dict()
 
         if record.publicationName:
             pubs = [
@@ -335,7 +340,7 @@ class JalcImporter(EntityImporter):
         if not title:
             return None
 
-        re = fatcat_openapi_client.ReleaseEntity(
+        re = ReleaseEntity(
             work_id=None,
             title=title,
             original_title=clean(original_title),
@@ -364,7 +369,7 @@ class JalcImporter(EntityImporter):
         )
         return re
 
-    def try_update(self, re):
+    def try_update(self, re: ReleaseEntity) -> bool:
 
         # lookup existing DOI
         existing = None
@@ -384,7 +389,7 @@ class JalcImporter(EntityImporter):
 
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ReleaseEntity]) -> None:
         self.api.create_release_auto_batch(
             fatcat_openapi_client.ReleaseAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
@@ -394,7 +399,7 @@ class JalcImporter(EntityImporter):
             )
         )
 
-    def parse_file(self, handle):
+    def parse_file(self, handle: Any) -> None:
         """
         Helper for testing; can run this file stand-alone instead of using a pusher
         """
@@ -408,4 +413,3 @@ class JalcImporter(EntityImporter):
             # print(json.dumps(resp))
             print(resp)
             # sys.exit(-1)
-
diff --git a/python/fatcat_tools/importers/journal_metadata.py b/python/fatcat_tools/importers/journal_metadata.py
index 6d1fefa3..a45e49f3 100644
--- a/python/fatcat_tools/importers/journal_metadata.py
+++ b/python/fatcat_tools/importers/journal_metadata.py
@@ -1,9 +1,12 @@
+from typing import Any, Dict, List, Optional
+
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, ContainerEntity
 
 from .common import EntityImporter, clean
 
 
-def or_none(s):
+def or_none(s: Optional[str]) -> Optional[str]:
     if s is None:
         return None
     if len(s) == 0:
@@ -11,7 +14,7 @@ def or_none(s):
     return s
 
 
-def truthy(s):
+def truthy(s: Optional[str]) -> Optional[bool]:
     if s is None:
         return None
     s = s.lower()
@@ -32,7 +35,7 @@ class JournalMetadataImporter(EntityImporter):
     See guide for details on the many 'extra' fields used here.
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description",
@@ -42,12 +45,12 @@ class JournalMetadataImporter(EntityImporter):
         eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.JournalMetadataImporter")
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
-    def want(self, raw_record):
+    def want(self, raw_record: Any) -> bool:
         if raw_record.get("issnl") and raw_record.get("name"):
             return True
         return False
 
-    def parse_record(self, row):
+    def parse_record(self, row: Dict[str, Any]) -> Optional[ContainerEntity]:
         """
         row is a python dict (parsed from JSON).
 
@@ -106,7 +109,7 @@ class JournalMetadataImporter(EntityImporter):
         if not name:
             return None
 
-        ce = fatcat_openapi_client.ContainerEntity(
+        ce = ContainerEntity(
             issnl=row["issnl"],
             issne=row.get("issne"),
             issnp=row.get("issnp"),
@@ -118,7 +121,7 @@ class JournalMetadataImporter(EntityImporter):
         )
         return ce
 
-    def try_update(self, ce):
+    def try_update(self, ce: ContainerEntity) -> bool:
 
         existing = None
         try:
@@ -148,7 +151,7 @@ class JournalMetadataImporter(EntityImporter):
         # if we got this far, it's a bug
         raise NotImplementedError
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ContainerEntity]) -> None:
         self.api.create_container_auto_batch(
             fatcat_openapi_client.ContainerAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/jstor.py b/python/fatcat_tools/importers/jstor.py
index 287fb308..0a6eec65 100644
--- a/python/fatcat_tools/importers/jstor.py
+++ b/python/fatcat_tools/importers/jstor.py
@@ -2,9 +2,11 @@ import datetime
 import json
 import sys
 import warnings
+from typing import Any, Dict, List, Optional, Sequence
 
 import fatcat_openapi_client
 from bs4 import BeautifulSoup
+from fatcat_openapi_client import ApiClient, ReleaseEntity
 
 from .common import LANG_MAP_MARC, EntityImporter, clean
 from .crossref import CONTAINER_TYPE_MAP
@@ -32,7 +34,7 @@ class JstorImporter(EntityImporter):
     Collection)
     """
 
-    def __init__(self, api, issn_map_file, **kwargs):
+    def __init__(self, api: ApiClient, issn_map_file: Sequence, **kwargs) -> None:
 
         eg_desc = kwargs.get("editgroup_description", "Automated import of JSTOR XML metadata")
         eg_extra = kwargs.get("editgroup_extra", dict())
@@ -49,19 +51,22 @@ class JstorImporter(EntityImporter):
 
         self.read_issn_map_file(issn_map_file)
 
-    def map_container_type(self, crossref_type):
+    def map_container_type(self, crossref_type: Optional[str]) -> Optional[str]:
         return CONTAINER_TYPE_MAP.get(crossref_type)
 
-    def want(self, obj):
+    def want(self, raw_record: Any) -> bool:
         return True
 
-    def parse_record(self, article):
+    # TODO: mypy annotations partially skipped on this function ('Any' instead of
+    # 'BeautifulSoup') for now because XML parsing annotations are large and
+    # complex
+    def parse_record(self, article: Any) -> Optional[ReleaseEntity]:
 
         journal_meta = article.front.find("journal-meta")
         article_meta = article.front.find("article-meta")
 
-        extra = dict()
-        extra_jstor = dict()
+        extra: Dict[str, Any] = dict()
+        extra_jstor: Dict[str, Any] = dict()
 
         release_type = JSTOR_TYPE_MAP.get(article["article-type"])
         title = article_meta.find("article-title")
@@ -269,7 +274,7 @@ class JstorImporter(EntityImporter):
         )
         return re
 
-    def try_update(self, re):
+    def try_update(self, re: ReleaseEntity) -> bool:
 
         # first, lookup existing by JSTOR id (which much be defined)
         existing = None
@@ -313,7 +318,7 @@ class JstorImporter(EntityImporter):
 
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ReleaseEntity]) -> None:
         self.api.create_release_auto_batch(
             fatcat_openapi_client.ReleaseAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
@@ -323,7 +328,7 @@ class JstorImporter(EntityImporter):
             )
         )
 
-    def parse_file(self, handle):
+    def parse_file(self, handle: Any) -> None:
 
         # 1. open with beautiful soup
         soup = BeautifulSoup(handle, "xml")
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 7c2a6a87..9c80dd72 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -1,4 +1,7 @@
+from typing import Any, Dict, List, Optional
+
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, FileEntity
 
 from fatcat_tools.normal import clean_doi
 
@@ -29,7 +32,7 @@ class MatchedImporter(EntityImporter):
     - core_id, wikidata_id, pmcid, pmid: not as lists
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = (
             kwargs.pop("editgroup_description", None)
@@ -41,10 +44,10 @@ class MatchedImporter(EntityImporter):
         self.default_link_rel = kwargs.get("default_link_rel", "web")
         self.default_mimetype = kwargs.get("default_mimetype", None)
 
-    def want(self, raw_record):
+    def want(self, raw_record: Any) -> bool:
         return True
 
-    def parse_record(self, obj):
+    def parse_record(self, obj: Dict[str, Any]) -> Optional[FileEntity]:
         dois = [d.lower() for d in obj.get("dois", [])]
 
         # lookup dois
@@ -129,7 +132,7 @@ class MatchedImporter(EntityImporter):
             if urls[0].url.endswith(".pdf"):
                 mimetype = "application/pdf"
 
-        fe = fatcat_openapi_client.FileEntity(
+        fe = FileEntity(
             md5=obj.get("md5"),
             sha1=obj["sha1"],
             sha256=obj.get("sha256"),
@@ -140,7 +143,7 @@ class MatchedImporter(EntityImporter):
         )
         return fe
 
-    def try_update(self, fe):
+    def try_update(self, fe: FileEntity) -> bool:
         # lookup sha1, or create new entity
         existing = None
         try:
@@ -207,7 +210,7 @@ class MatchedImporter(EntityImporter):
         self.counts["update"] += 1
         return False
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[FileEntity]) -> None:
         self.api.create_file_auto_batch(
             fatcat_openapi_client.FileAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index b514e6e5..430cdd0f 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -1,11 +1,13 @@
 import sys
+from typing import Any, Dict, List, Optional
 
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, CreatorEntity
 
 from .common import EntityImporter, clean
 
 
-def value_or_none(e):
+def value_or_none(e: Any) -> Any:
     if type(e) == dict:
         e = e.get("value")
     if type(e) == str and len(e) == 0:
@@ -22,7 +24,7 @@ def value_or_none(e):
 
 
 class OrcidImporter(EntityImporter):
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = kwargs.get(
             "editgroup_description",
@@ -32,10 +34,10 @@ class OrcidImporter(EntityImporter):
         eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.OrcidImporter")
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
 
-    def want(self, raw_record):
+    def want(self, raw_record: Any) -> bool:
         return True
 
-    def parse_record(self, obj):
+    def parse_record(self, obj: Dict[str, Any]) -> Optional[CreatorEntity]:
         """
         obj is a python dict (parsed from json).
         returns a CreatorEntity
@@ -67,7 +69,7 @@ class OrcidImporter(EntityImporter):
         if not display:
             # must have *some* name
             return None
-        ce = fatcat_openapi_client.CreatorEntity(
+        ce = CreatorEntity(
             orcid=orcid,
             given_name=clean(given),
             surname=clean(sur),
@@ -76,10 +78,10 @@ class OrcidImporter(EntityImporter):
         )
         return ce
 
-    def try_update(self, raw_record):
+    def try_update(self, ce: CreatorEntity) -> bool:
         existing = None
         try:
-            existing = self.api.lookup_creator(orcid=raw_record.orcid)
+            existing = self.api.lookup_creator(orcid=ce.orcid)
         except fatcat_openapi_client.rest.ApiException as err:
             if err.status != 404:
                 raise err
@@ -92,7 +94,7 @@ class OrcidImporter(EntityImporter):
 
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[CreatorEntity]) -> None:
         self.api.create_creator_auto_batch(
             fatcat_openapi_client.CreatorAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index 97433445..41268925 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -2,9 +2,11 @@ import datetime
 import json
 import sys
 import warnings
+from typing import Any, Dict, List, Optional, Sequence
 
 import fatcat_openapi_client
 from bs4 import BeautifulSoup
+from fatcat_openapi_client import ApiClient, ReleaseEntity
 
 from fatcat_tools.normal import clean_doi, clean_issn, clean_pmcid, clean_pmid
 
@@ -328,7 +330,9 @@ class PubmedImporter(EntityImporter):
     TODO: MEDLINE doesn't include PMC/OA license; could include in importer?
     """
 
-    def __init__(self, api, issn_map_file, lookup_refs=True, **kwargs):
+    def __init__(
+        self, api: ApiClient, issn_map_file: Sequence, lookup_refs: bool = True, **kwargs
+    ):
 
         eg_desc = kwargs.get(
             "editgroup_description", "Automated import of PubMed/MEDLINE XML metadata"
@@ -347,10 +351,13 @@ class PubmedImporter(EntityImporter):
         self.create_containers = kwargs.get("create_containers", True)
         self.read_issn_map_file(issn_map_file)
 
-    def want(self, obj):
+    def want(self, raw_record: BeautifulSoup) -> bool:
         return True
 
-    def parse_record(self, a):
+    # TODO: mypy annotations partially skipped on this function ('Any' instead of
+    # 'BeautifulSoup') for now because XML parsing annotations are large and
+    # complex
+    def parse_record(self, a: Any) -> ReleaseEntity:
 
         medline = a.MedlineCitation
         # PubmedData isn't required by DTD, but seems to always be present
@@ -482,8 +489,8 @@ class PubmedImporter(EntityImporter):
             pub_date = journal.PubDate
         if not pub_date:
             pub_date = journal.JournalIssue.PubDate
-        release_date = None
-        release_year = None
+        release_date: Optional[str] = None
+        release_year: Optional[int] = None
         if pub_date.Year:
             release_year = int(pub_date.Year.string)
             if pub_date.find("Day") and pub_date.find("Month"):
@@ -578,7 +585,7 @@ class PubmedImporter(EntityImporter):
                         abstracts.append(abst)
         other_abstracts = medline.find_all("OtherAbstract")
         for other in other_abstracts:
-            lang = "en"
+            lang: Optional[str] = "en"
             if other.get("Language"):
                 lang = LANG_MAP_MARC.get(other["Language"])
             abst = fatcat_openapi_client.ReleaseAbstract(
@@ -666,7 +673,7 @@ class PubmedImporter(EntityImporter):
             # that there may be multiple ReferenceList (eg, sometimes one per
             # Reference)
             for ref in pubmed.find_all("Reference"):
-                ref_extra = dict()
+                ref_extra: Dict[str, Any] = dict()
                 ref_doi = ref.find("ArticleId", IdType="doi")
                 if ref_doi:
                     ref_doi = clean_doi(ref_doi.string)
@@ -740,7 +747,7 @@ class PubmedImporter(EntityImporter):
         )
         return re
 
-    def try_update(self, re):
+    def try_update(self, re: ReleaseEntity) -> bool:
 
         # first, lookup existing by PMID (which must be defined)
         existing = None
@@ -831,7 +838,7 @@ class PubmedImporter(EntityImporter):
 
         return True
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[ReleaseEntity]) -> None:
         self.api.create_release_auto_batch(
             fatcat_openapi_client.ReleaseAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
@@ -841,7 +848,7 @@ class PubmedImporter(EntityImporter):
             )
         )
 
-    def parse_file(self, handle):
+    def parse_file(self, handle: Any) -> None:
 
         # 1. open with beautiful soup
         soup = BeautifulSoup(handle, "xml")
diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py
index 78eeec7a..520258cb 100644
--- a/python/fatcat_tools/importers/shadow.py
+++ b/python/fatcat_tools/importers/shadow.py
@@ -1,4 +1,7 @@
+from typing import Any, Dict, List, Optional
+
 import fatcat_openapi_client
+from fatcat_openapi_client import ApiClient, FileEntity
 
 from fatcat_tools.normal import clean_doi, clean_isbn13, clean_pmid
 
@@ -27,7 +30,7 @@ class ShadowLibraryImporter(EntityImporter):
         - datetime
     """
 
-    def __init__(self, api, **kwargs):
+    def __init__(self, api: ApiClient, **kwargs) -> None:
 
         eg_desc = (
             kwargs.pop("editgroup_description", None)
@@ -38,7 +41,7 @@ class ShadowLibraryImporter(EntityImporter):
         super().__init__(api, editgroup_description=eg_desc, editgroup_extra=eg_extra, **kwargs)
         self.default_link_rel = kwargs.get("default_link_rel", "web")
 
-    def want(self, raw_record):
+    def want(self, raw_record: Any) -> bool:
         """
         Only want to import records with complete file-level metadata
         """
@@ -51,7 +54,7 @@ class ShadowLibraryImporter(EntityImporter):
             return False
         return True
 
-    def parse_record(self, obj):
+    def parse_record(self, obj: Dict[str, Any]) -> Optional[FileEntity]:
         """
         We do the release lookup in this method. Try DOI, then PMID, last ISBN13.
         """
@@ -104,7 +107,7 @@ class ShadowLibraryImporter(EntityImporter):
             urls.append(("webarchive", wayback))
         urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
 
-        fe = fatcat_openapi_client.FileEntity(
+        fe = FileEntity(
             md5=obj["file_meta"]["md5hex"],
             sha1=obj["file_meta"]["sha1hex"],
             sha256=obj["file_meta"]["sha256hex"],
@@ -116,7 +119,7 @@ class ShadowLibraryImporter(EntityImporter):
         )
         return fe
 
-    def try_update(self, fe):
+    def try_update(self, fe: FileEntity) -> Optional[bool]:
         # lookup sha1, or create new entity
         existing = None
         try:
@@ -189,7 +192,7 @@ class ShadowLibraryImporter(EntityImporter):
         self.counts["update"] += 1
         return False
 
-    def insert_batch(self, batch):
+    def insert_batch(self, batch: List[FileEntity]) -> None:
         self.api.create_file_auto_batch(
             fatcat_openapi_client.FileAutoBatch(
                 editgroup=fatcat_openapi_client.Editgroup(
diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py
index 22fefad3..f9ee29c9 100755
--- a/python/fatcat_tools/importers/wayback_static.py
+++ b/python/fatcat_tools/importers/wayback_static.py
@@ -12,12 +12,14 @@ import hashlib
 import json
 import subprocess
 import sys
+from typing import Any, Dict, List, Optional, Tuple
 
 import requests
 from bs4 import BeautifulSoup
 from fatcat_openapi_client import (
     ApiClient,
     Editgroup,
+    EntityEdit,
     WebcaptureCdxLine,
     WebcaptureEntity,
     WebcaptureUrl,
@@ -30,7 +32,7 @@ GWB_URL_BASE = "https://web.archive.org/web"
 REQ_SESSION = requests.Session()
 
 
-def parse_wbm_url(url):
+def parse_wbm_url(url: str) -> Tuple[str, datetime.datetime, str]:
     """Takes a wayback machine URL, and returns a tuple:
 
     (timestamp, datetime, original_url)
@@ -42,7 +44,7 @@ def parse_wbm_url(url):
     return (chunks[4], parse_wbm_timestamp(chunks[4]), "/".join(chunks[5:]))
 
 
-def test_parse_wbm_url():
+def test_parse_wbm_url() -> None:
     u = "http://web.archive.org/web/20010712114837/http://www.dlib.org/dlib/june01/reich/06reich.html"
     assert parse_wbm_url(u) == (
         "20010712114837",
@@ -51,7 +53,7 @@ def test_parse_wbm_url():
     )
 
 
-def parse_wbm_timestamp(timestamp):
+def parse_wbm_timestamp(timestamp: str) -> datetime.datetime:
     """
     Takes a complete WBM timestamp string (like "20020327115625") and returns a
     python datetime object (UTC)
@@ -71,18 +73,20 @@ def parse_wbm_timestamp(timestamp):
     )
 
 
-def test_parse_wbm_timestamp():
+def test_parse_wbm_timestamp() -> None:
     assert parse_wbm_timestamp("20010712114837") == datetime.datetime(2001, 7, 12, 11, 48, 37)
 
 
-def fetch_wbm(url):
+def fetch_wbm(url: str) -> bytes:
     resp = REQ_SESSION.get(url)
     resp.raise_for_status()
     assert resp.content
     return resp.content
 
 
-def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None):
+def lookup_cdx(
+    embed_url: str, verify_hashes: bool = True, cdx_output: Any = None
+) -> Optional[WebcaptureCdxLine]:
     sys.stderr.write(embed_url + "\n")
     assert embed_url.startswith("/web/")
     embed_url = embed_url.split("/")
@@ -132,7 +136,7 @@ def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None):
         return None
 
 
-def wayback_url_to_relative(url):
+def wayback_url_to_relative(url: str) -> Optional[str]:
     """
     Wayback URLs can be relative or absolute in rewritten documents. This
     function converts any form of rewritten URL to a relative (to
@@ -149,7 +153,7 @@ def wayback_url_to_relative(url):
         return None
 
 
-def extract_embeds(soup):
+def extract_embeds(soup: BeautifulSoup) -> List[str]:
 
     embeds = set()
 
@@ -175,7 +179,7 @@ def extract_embeds(soup):
     return list(embeds)
 
 
-def static_wayback_webcapture(wayback_url, cdx_output=None):
+def static_wayback_webcapture(wayback_url: str, cdx_output: Any = None) -> WebcaptureEntity:
     """
     Given a complete wayback machine capture URL, like:
 
@@ -214,7 +218,9 @@ def static_wayback_webcapture(wayback_url, cdx_output=None):
     return wc
 
 
-def auto_wayback_static(api, release_id, wayback_url, editgroup_id=None):
+def auto_wayback_static(
+    api: ApiClient, release_id: str, wayback_url: str, editgroup_id: Optional[str] = None
+) -> Tuple[Optional[str], Optional[EntityEdit]]:
     """
     Returns a tuple: (editgroup_id, edit). If failed, both are None
     """
@@ -250,7 +256,7 @@ def auto_wayback_static(api, release_id, wayback_url, editgroup_id=None):
     return (editgroup_id, edit)
 
 
-def main():
+def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--verbose", action="store_true", help="verbose output")
     parser.add_argument("wayback_url", type=str, help="URL of wayback capture to extract from")
author	Bryan Newbold <bnewbold@robocracy.org>	2021-11-03 12:57:32 -0700
committer	Bryan Newbold <bnewbold@robocracy.org>	2021-11-03 16:46:07 -0700
commit	caf1cb316ed18820f3239a285ef14bf45ef963a2 (patch)
tree	2d3713773dac769878154f61c2eb9f7804f1a60c /python/fatcat_tools/importers
parent	10a2374051568edf3d872988e730328d899a0fdd (diff)
download	fatcat-caf1cb316ed18820f3239a285ef14bf45ef963a2.tar.gz fatcat-caf1cb316ed18820f3239a285ef14bf45ef963a2.zip