diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-12-18 14:53:33 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-12-18 14:53:35 -0800 |
commit | 1a45fe3f3ef3122355f49470fbbcd6dc82c959b2 (patch) | |
tree | 9f9e60c2814c91ad2b394c95b5ff5c9c9345d72d | |
parent | 0ea8bfb3485bc4db5cb7a17397d6c37f407a65b4 (diff) | |
download | sandcrawler-1a45fe3f3ef3122355f49470fbbcd6dc82c959b2.tar.gz sandcrawler-1a45fe3f3ef3122355f49470fbbcd6dc82c959b2.zip |
refactor: sort keys in JSON output
This makes debugging by tailing Kafka topics a lot more readable
-rwxr-xr-x | python/grobid2json.py | 3 | ||||
-rwxr-xr-x | python/ia_pdf_match.py | 2 | ||||
-rwxr-xr-x | python/ingest_file.py | 6 | ||||
-rwxr-xr-x | python/kafka_grobid.py | 2 |
4 files changed, 7 insertions, 6 deletions
diff --git a/python/grobid2json.py b/python/grobid2json.py index 977c772..0d85e5e 100755 --- a/python/grobid2json.py +++ b/python/grobid2json.py @@ -177,7 +177,8 @@ def main(): # pragma no cover content = open(filename, 'r') print(json.dumps( teixml2json(content, - encumbered=(not args.no_encumbered)))) + encumbered=(not args.no_encumbered)), + sort_keys=True)) if __name__=='__main__': # pragma no cover main() diff --git a/python/ia_pdf_match.py b/python/ia_pdf_match.py index bc814de..c5a5e11 100755 --- a/python/ia_pdf_match.py +++ b/python/ia_pdf_match.py @@ -102,7 +102,7 @@ def run(): obj = json.loads(line) match = parse(obj) if match: - print(json.dumps(match)) + print(json.dumps(match, sort_keys=True)) if __name__ == '__main__': run() diff --git a/python/ingest_file.py b/python/ingest_file.py index 1980e3d..460b576 100755 --- a/python/ingest_file.py +++ b/python/ingest_file.py @@ -16,7 +16,7 @@ def run_single_ingest(args): ) ingester = IngestFileWorker() result = ingester.process(request) - print(json.dumps(result)) + print(json.dumps(result, sort_keys=True)) return result def run_requests(args): @@ -25,7 +25,7 @@ def run_requests(args): for l in args.json_file: request = json.loads(l.strip()) result = ingester.process(request) - print(json.dumps(result)) + print(json.dumps(result, sort_keys=True)) def run_api(args): port = 8083 @@ -67,7 +67,7 @@ def main(): args = parser.parse_args() if not args.__dict__.get("func"): - sys.stderr.write("tell me what to do!\n") + print("tell me what to do!", file=sys.stderr) sys.exit(-1) args.func(args) diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py index dd6ab63..05e48bd 100755 --- a/python/kafka_grobid.py +++ b/python/kafka_grobid.py @@ -282,7 +282,7 @@ class KafkaGrobidWorker: grobid_output.get('key'), status)) sys.stdout.flush() - producer.produce(json.dumps(grobid_output).encode('utf-8')) + producer.produce(json.dumps(grobid_output, sort_keys=True).encode('utf-8')) sequential_failures = 0 else: sys.stderr.write("failed to extract: {}\n".format(status)) |