diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 12:54:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 12:54:37 -0700 |
commit | 05bd7cbcc62588e431c5efd533189e246b2a997e (patch) | |
tree | abcc707a451e77ea1e8c5ac9a5925b97a4bd139a /python/scripts/unpaywall2ingestrequest.py | |
parent | f3f424e42f2f4f383103cf80b30a00cfa6cfc179 (diff) | |
download | sandcrawler-05bd7cbcc62588e431c5efd533189e246b2a997e.tar.gz sandcrawler-05bd7cbcc62588e431c5efd533189e246b2a997e.zip |
make fmt
Diffstat (limited to 'python/scripts/unpaywall2ingestrequest.py')
-rwxr-xr-x | python/scripts/unpaywall2ingestrequest.py | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/python/scripts/unpaywall2ingestrequest.py b/python/scripts/unpaywall2ingestrequest.py index 590b429..b79f316 100755 --- a/python/scripts/unpaywall2ingestrequest.py +++ b/python/scripts/unpaywall2ingestrequest.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - """ Transform an unpaywall dump (JSON) into ingest requests. """ @@ -26,17 +25,19 @@ DOMAIN_BLOCKLIST = [ ] RELEASE_STAGE_MAP = { - 'draftVersion': 'draft', + 'draftVersion': 'draft', 'submittedVersion': 'submitted', - 'acceptedVersion': 'accepted', + 'acceptedVersion': 'accepted', 'publishedVersion': 'published', - 'updatedVersion': 'updated', + 'updatedVersion': 'updated', } + def canon(s): parsed = urlcanon.parse_url(s) return str(urlcanon.whatwg(parsed)) + def transform(obj): """ Transforms from a single unpaywall object to zero or more ingest requests. @@ -86,6 +87,7 @@ def transform(obj): return requests + def run(args): for l in args.json_file: if not l.strip(): @@ -96,17 +98,18 @@ def run(args): for r in requests: print("{}".format(json.dumps(r, sort_keys=True))) + def main(): - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('json_file', - help="unpaywall dump file to use", - type=argparse.FileType('r')) + help="unpaywall dump file to use", + type=argparse.FileType('r')) subparsers = parser.add_subparsers() args = parser.parse_args() run(args) + if __name__ == '__main__': main() |