aboutsummaryrefslogtreecommitdiffstats
path: root/python/ia_pdf_match.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 16:59:32 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 16:59:32 -0700
commit4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8 (patch)
treea15635b11ca66d5cdbbc1c3f6eaa73fd5fe35801 /python/ia_pdf_match.py
parentf08bbeb7981fd692ffc9277d15d282883a408051 (diff)
downloadsandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.tar.gz
sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.zip
more progress on type annotations and linting
Diffstat (limited to 'python/ia_pdf_match.py')
-rwxr-xr-xpython/ia_pdf_match.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/python/ia_pdf_match.py b/python/ia_pdf_match.py
index c3d9c16..ac17003 100755
--- a/python/ia_pdf_match.py
+++ b/python/ia_pdf_match.py
@@ -23,9 +23,10 @@ When invoking import matched, be sure to:
import json
import sys
+from typing import Any, Dict, Optional
-def parse(obj):
+def parse(obj: dict) -> Optional[Dict[str, Any]]:
if obj['metadata']['identifier'].endswith('-test') or obj['metadata'].get('test'):
print('skip: test item', file=sys.stderr)
return None
@@ -42,7 +43,7 @@ def parse(obj):
extid = extid.replace('http://arxiv.org/abs/', '')
#print(extid)
assert '/' in extid or '.' in extid
- if not 'v' in extid or not extid[-1].isdigit():
+ if 'v' not in extid or not extid[-1].isdigit():
print('skip: non-versioned arxiv_id', file=sys.stderr)
return None
elif obj['metadata']['identifier'].startswith('paper-doi-10_'):
@@ -97,13 +98,13 @@ def parse(obj):
return match
-def run():
+def run() -> None:
for line in sys.stdin:
if not line:
continue
obj = json.loads(line)
match = parse(obj)
- if match:
+ if match is not None:
print(json.dumps(match, sort_keys=True))