more progress on type annotations and linting

author: Bryan Newbold <bnewbold@archive.org> 2021-10-26 16:59:32 -0700
committer: Bryan Newbold <bnewbold@archive.org> 2021-10-26 16:59:32 -0700
commit: 4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8 (patch)
tree: a15635b11ca66d5cdbbc1c3f6eaa73fd5fe35801 /python/ia_pdf_match.py
parent: f08bbeb7981fd692ffc9277d15d282883a408051 (diff)
download: sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.tar.gz
sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.zip
1 files changed, 5 insertions, 4 deletions
diff --git a/python/ia_pdf_match.py b/python/ia_pdf_match.py
index c3d9c16..ac17003 100755
--- a/python/ia_pdf_match.py
+++ b/python/ia_pdf_match.py
@@ -23,9 +23,10 @@ When invoking import matched, be sure to:
 
 import json
 import sys
+from typing import Any, Dict, Optional
 
 
-def parse(obj):
+def parse(obj: dict) -> Optional[Dict[str, Any]]:
     if obj['metadata']['identifier'].endswith('-test') or obj['metadata'].get('test'):
         print('skip: test item', file=sys.stderr)
         return None
@@ -42,7 +43,7 @@ def parse(obj):
         extid = extid.replace('http://arxiv.org/abs/', '')
         #print(extid)
         assert '/' in extid or '.' in extid
-        if not 'v' in extid or not extid[-1].isdigit():
+        if 'v' not in extid or not extid[-1].isdigit():
             print('skip: non-versioned arxiv_id', file=sys.stderr)
             return None
     elif obj['metadata']['identifier'].startswith('paper-doi-10_'):
@@ -97,13 +98,13 @@ def parse(obj):
     return match
 
 
-def run():
+def run() -> None:
     for line in sys.stdin:
         if not line:
             continue
         obj = json.loads(line)
         match = parse(obj)
-        if match:
+        if match is not None:
             print(json.dumps(match, sort_keys=True))
author	Bryan Newbold <bnewbold@archive.org>	2021-10-26 16:59:32 -0700
committer	Bryan Newbold <bnewbold@archive.org>	2021-10-26 16:59:32 -0700
commit	4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8 (patch)
tree	a15635b11ca66d5cdbbc1c3f6eaa73fd5fe35801 /python/ia_pdf_match.py
parent	f08bbeb7981fd692ffc9277d15d282883a408051 (diff)
download	sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.tar.gz sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.zip