diff options
-rwxr-xr-x | python/scripts/ingestrequest_row2json.py | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/python/scripts/ingestrequest_row2json.py b/python/scripts/ingestrequest_row2json.py new file mode 100755 index 0000000..065dd3b --- /dev/null +++ b/python/scripts/ingestrequest_row2json.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 + +""" +This script is used to turn ingest request postgres rows (in JSON export +format) back in to regular ingest request JSON. + +The only difference is the name and location of some optional keys. +""" + +import sys +import json +import argparse + + +def transform(row): + """ + dict-to-dict + """ + row.pop('created', None) + extra = row.pop('request', None) or {} + for k in ('ext_ids', 'edit_extra'): + if k in extra: + row[k] = extra[k] + if 'release_ident' in extra: + row['fatcat'] = dict(release_ident=extra['release_ident']) + return row + +def run(args): + for l in args.json_file: + if not l.strip(): + continue + req = transform(json.loads(l)) + print(json.dumps(req, sort_keys=True)) + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('json_file', + help="arabesque output file to use", + type=argparse.FileType('r')) + subparsers = parser.add_subparsers() + + args = parser.parse_args() + + run(args) + +if __name__ == '__main__': + main() |