aboutsummaryrefslogtreecommitdiffstats
path: root/python/scripts/ingestrequest_row2json.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/scripts/ingestrequest_row2json.py')
-rwxr-xr-xpython/scripts/ingestrequest_row2json.py36
1 files changed, 22 insertions, 14 deletions
diff --git a/python/scripts/ingestrequest_row2json.py b/python/scripts/ingestrequest_row2json.py
index 494ec7a..8a353ca 100755
--- a/python/scripts/ingestrequest_row2json.py
+++ b/python/scripts/ingestrequest_row2json.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
-
"""
This script is used to turn ingest request postgres rows (in JSON export
format) back in to regular ingest request JSON.
@@ -7,24 +6,25 @@ format) back in to regular ingest request JSON.
The only difference is the name and location of some optional keys.
"""
-import sys
-import json
import argparse
+import json
+import sys
def transform(row):
"""
dict-to-dict
"""
- row.pop('created', None)
- extra = row.pop('request', None) or {}
- for k in ('ext_ids', 'edit_extra'):
+ row.pop("created", None)
+ extra = row.pop("request", None) or {}
+ for k in ("ext_ids", "edit_extra"):
if k in extra:
row[k] = extra[k]
- if 'release_ident' in extra:
- row['fatcat'] = dict(release_ident=extra['release_ident'])
+ if "release_ident" in extra:
+ row["fatcat"] = dict(release_ident=extra["release_ident"])
return row
+
def run(args):
for l in args.json_file:
if not l.strip():
@@ -33,19 +33,27 @@ def run(args):
req = transform(json.loads(l))
except:
print(l, file=sys.stderr)
+ if args.force_recrawl:
+ req["force_recrawl"] = True
print(json.dumps(req, sort_keys=True))
+
def main():
- parser = argparse.ArgumentParser(
- formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser.add_argument('json_file',
- help="arabesque output file to use",
- type=argparse.FileType('r'))
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument(
+ "json_file", help="SQL output JSON file to process", type=argparse.FileType("r")
+ )
+ parser.add_argument(
+ "--force-recrawl",
+ action="store_true",
+ help="whether to add recrawl (SPNv2) flag to request",
+ )
subparsers = parser.add_subparsers()
args = parser.parse_args()
run(args)
-if __name__ == '__main__':
+
+if __name__ == "__main__":
main()