diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-07-15 13:07:10 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-07-15 13:07:10 -0700 |
commit | ce386a7a1f38c2c6289ce9d0bde269b4b1349154 (patch) | |
tree | 44a1350b65e5c8fd4cff5cf888c6cec0b6692672 | |
parent | 7d8d9a8bcc827b136b4dc148f6a2c8c4dd2bbd8b (diff) | |
download | sandcrawler-ce386a7a1f38c2c6289ce9d0bde269b4b1349154.tar.gz sandcrawler-ce386a7a1f38c2c6289ce9d0bde269b4b1349154.zip |
row2json script: add flag to enable recrawling
-rwxr-xr-x | python/scripts/ingestrequest_row2json.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/python/scripts/ingestrequest_row2json.py b/python/scripts/ingestrequest_row2json.py index d52e793..0a457cd 100755 --- a/python/scripts/ingestrequest_row2json.py +++ b/python/scripts/ingestrequest_row2json.py @@ -33,13 +33,20 @@ def run(args): req = transform(json.loads(l)) except: print(l, file=sys.stderr) + if args.force_recrawl: + req["force_recrawl"] = True print(json.dumps(req, sort_keys=True)) def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( - "json_file", help="arabesque output file to use", type=argparse.FileType("r") + "json_file", help="SQL output JSON file to process", type=argparse.FileType("r") + ) + parser.add_argument( + "force_recrawl", + action="store_true", + help="whether to add recrawl (SPNv2) flag to request", ) subparsers = parser.add_subparsers() |