aboutsummaryrefslogtreecommitdiffstats
path: root/python/ia_pdf_match.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-18 14:54:09 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-18 15:00:27 -0800
commitcbe1af25bdfefd5a6bd9da6014440eaf88ee8e26 (patch)
tree1c5eec4e34afb8c43975850a1d36ff1841bc0fcf /python/ia_pdf_match.py
parent1a45fe3f3ef3122355f49470fbbcd6dc82c959b2 (diff)
downloadsandcrawler-cbe1af25bdfefd5a6bd9da6014440eaf88ee8e26.tar.gz
sandcrawler-cbe1af25bdfefd5a6bd9da6014440eaf88ee8e26.zip
refactor: use print(..., file=sys.stderr)
Should use logging soon, but this seems more idiomatic in the meanwhile.
Diffstat (limited to 'python/ia_pdf_match.py')
-rwxr-xr-xpython/ia_pdf_match.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/python/ia_pdf_match.py b/python/ia_pdf_match.py
index c5a5e11..20c65bb 100755
--- a/python/ia_pdf_match.py
+++ b/python/ia_pdf_match.py
@@ -27,7 +27,7 @@ import json
def parse(obj):
if obj['metadata']['identifier'].endswith('-test') or obj['metadata'].get('test'):
- sys.stderr.write('skip: test item\n')
+ print('skip: test item', file=sys.stderr)
return None
extid_type = None
@@ -36,14 +36,14 @@ def parse(obj):
extid_type = 'arxiv'
extid = obj['metadata'].get('source')
if not extid:
- sys.stderr.write('skip: no source\n')
+ print('skip: no source', file=sys.stderr)
return None
assert extid.startswith('http://arxiv.org/abs/')
extid = extid.replace('http://arxiv.org/abs/', '')
#print(extid)
assert '/' in extid or '.' in extid
if not 'v' in extid or not extid[-1].isdigit():
- sys.stderr.write('skip: non-versioned arxiv_id\n')
+ print('skip: non-versioned arxiv_id', file=sys.stderr)
return None
elif obj['metadata']['identifier'].startswith('paper-doi-10_'):
extid_type = 'doi'
@@ -67,9 +67,9 @@ def parse(obj):
pdf_file = f
break
if not pdf_file:
- sys.stderr.write('skip: no PDF found: {}\n'.format(obj['metadata']['identifier']))
+ print('skip: no PDF found: {}'.format(obj['metadata']['identifier']), file=sys.stderr)
#for f in obj['files']:
- # sys.stderr.write(f['format'] + "\n")
+ # print(f['format'], file=sys.stderr)
return None
assert pdf_file['name'].endswith('.pdf')