diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 16:59:32 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 16:59:32 -0700 |
commit | 4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8 (patch) | |
tree | a15635b11ca66d5cdbbc1c3f6eaa73fd5fe35801 /python/ia_pdf_match.py | |
parent | f08bbeb7981fd692ffc9277d15d282883a408051 (diff) | |
download | sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.tar.gz sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.zip |
more progress on type annotations and linting
Diffstat (limited to 'python/ia_pdf_match.py')
-rwxr-xr-x | python/ia_pdf_match.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/python/ia_pdf_match.py b/python/ia_pdf_match.py index c3d9c16..ac17003 100755 --- a/python/ia_pdf_match.py +++ b/python/ia_pdf_match.py @@ -23,9 +23,10 @@ When invoking import matched, be sure to: import json import sys +from typing import Any, Dict, Optional -def parse(obj): +def parse(obj: dict) -> Optional[Dict[str, Any]]: if obj['metadata']['identifier'].endswith('-test') or obj['metadata'].get('test'): print('skip: test item', file=sys.stderr) return None @@ -42,7 +43,7 @@ def parse(obj): extid = extid.replace('http://arxiv.org/abs/', '') #print(extid) assert '/' in extid or '.' in extid - if not 'v' in extid or not extid[-1].isdigit(): + if 'v' not in extid or not extid[-1].isdigit(): print('skip: non-versioned arxiv_id', file=sys.stderr) return None elif obj['metadata']['identifier'].startswith('paper-doi-10_'): @@ -97,13 +98,13 @@ def parse(obj): return match -def run(): +def run() -> None: for line in sys.stdin: if not line: continue obj = json.loads(line) match = parse(obj) - if match: + if match is not None: print(json.dumps(match, sort_keys=True)) |