aboutsummaryrefslogtreecommitdiffstats
path: root/python/scripts
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-07-15 13:07:10 -0700
committerBryan Newbold <bnewbold@archive.org>2022-07-15 13:07:10 -0700
commitce386a7a1f38c2c6289ce9d0bde269b4b1349154 (patch)
tree44a1350b65e5c8fd4cff5cf888c6cec0b6692672 /python/scripts
parent7d8d9a8bcc827b136b4dc148f6a2c8c4dd2bbd8b (diff)
downloadsandcrawler-ce386a7a1f38c2c6289ce9d0bde269b4b1349154.tar.gz
sandcrawler-ce386a7a1f38c2c6289ce9d0bde269b4b1349154.zip
row2json script: add flag to enable recrawling
Diffstat (limited to 'python/scripts')
-rwxr-xr-xpython/scripts/ingestrequest_row2json.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/python/scripts/ingestrequest_row2json.py b/python/scripts/ingestrequest_row2json.py
index d52e793..0a457cd 100755
--- a/python/scripts/ingestrequest_row2json.py
+++ b/python/scripts/ingestrequest_row2json.py
@@ -33,13 +33,20 @@ def run(args):
req = transform(json.loads(l))
except:
print(l, file=sys.stderr)
+ if args.force_recrawl:
+ req["force_recrawl"] = True
print(json.dumps(req, sort_keys=True))
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
- "json_file", help="arabesque output file to use", type=argparse.FileType("r")
+ "json_file", help="SQL output JSON file to process", type=argparse.FileType("r")
+ )
+ parser.add_argument(
+ "force_recrawl",
+ action="store_true",
+ help="whether to add recrawl (SPNv2) flag to request",
)
subparsers = parser.add_subparsers()