aboutsummaryrefslogtreecommitdiffstats
path: root/python/scripts/unpaywall2ingestrequest.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/scripts/unpaywall2ingestrequest.py')
-rwxr-xr-xpython/scripts/unpaywall2ingestrequest.py19
1 files changed, 11 insertions, 8 deletions
diff --git a/python/scripts/unpaywall2ingestrequest.py b/python/scripts/unpaywall2ingestrequest.py
index 590b429..b79f316 100755
--- a/python/scripts/unpaywall2ingestrequest.py
+++ b/python/scripts/unpaywall2ingestrequest.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
-
"""
Transform an unpaywall dump (JSON) into ingest requests.
"""
@@ -26,17 +25,19 @@ DOMAIN_BLOCKLIST = [
]
RELEASE_STAGE_MAP = {
- 'draftVersion': 'draft',
+ 'draftVersion': 'draft',
'submittedVersion': 'submitted',
- 'acceptedVersion': 'accepted',
+ 'acceptedVersion': 'accepted',
'publishedVersion': 'published',
- 'updatedVersion': 'updated',
+ 'updatedVersion': 'updated',
}
+
def canon(s):
parsed = urlcanon.parse_url(s)
return str(urlcanon.whatwg(parsed))
+
def transform(obj):
"""
Transforms from a single unpaywall object to zero or more ingest requests.
@@ -86,6 +87,7 @@ def transform(obj):
return requests
+
def run(args):
for l in args.json_file:
if not l.strip():
@@ -96,17 +98,18 @@ def run(args):
for r in requests:
print("{}".format(json.dumps(r, sort_keys=True)))
+
def main():
- parser = argparse.ArgumentParser(
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('json_file',
- help="unpaywall dump file to use",
- type=argparse.FileType('r'))
+ help="unpaywall dump file to use",
+ type=argparse.FileType('r'))
subparsers = parser.add_subparsers()
args = parser.parse_args()
run(args)
+
if __name__ == '__main__':
main()