refactor: sort keys in JSON output

This makes debugging by tailing Kafka topics a lot more readable
author: Bryan Newbold <bnewbold@archive.org> 2019-12-18 14:53:33 -0800
committer: Bryan Newbold <bnewbold@archive.org> 2019-12-18 14:53:35 -0800
commit: 1a45fe3f3ef3122355f49470fbbcd6dc82c959b2 (patch)
tree: 9f9e60c2814c91ad2b394c95b5ff5c9c9345d72d /python
parent: 0ea8bfb3485bc4db5cb7a17397d6c37f407a65b4 (diff)
download: sandcrawler-1a45fe3f3ef3122355f49470fbbcd6dc82c959b2.tar.gz
sandcrawler-1a45fe3f3ef3122355f49470fbbcd6dc82c959b2.zip
4 files changed, 7 insertions, 6 deletions
diff --git a/python/grobid2json.py b/python/grobid2json.py
index 977c772..0d85e5e 100755
--- a/python/grobid2json.py
+++ b/python/grobid2json.py
@@ -177,7 +177,8 @@ def main():   # pragma no cover
         content = open(filename, 'r')
         print(json.dumps(
             teixml2json(content,
-               encumbered=(not args.no_encumbered))))
+               encumbered=(not args.no_encumbered)),
+            sort_keys=True))
 
 if __name__=='__main__':   # pragma no cover
     main()
diff --git a/python/ia_pdf_match.py b/python/ia_pdf_match.py
index bc814de..c5a5e11 100755
--- a/python/ia_pdf_match.py
+++ b/python/ia_pdf_match.py
@@ -102,7 +102,7 @@ def run():
         obj = json.loads(line)
         match = parse(obj)
         if match:
-            print(json.dumps(match))
+            print(json.dumps(match, sort_keys=True))
 
 if __name__ == '__main__':
     run()
diff --git a/python/ingest_file.py b/python/ingest_file.py
index 1980e3d..460b576 100755
--- a/python/ingest_file.py
+++ b/python/ingest_file.py
@@ -16,7 +16,7 @@ def run_single_ingest(args):
     )
     ingester = IngestFileWorker()
     result = ingester.process(request)
-    print(json.dumps(result))
+    print(json.dumps(result, sort_keys=True))
     return result
 
 def run_requests(args):
@@ -25,7 +25,7 @@ def run_requests(args):
     for l in args.json_file:
         request = json.loads(l.strip())
         result = ingester.process(request)
-        print(json.dumps(result))
+        print(json.dumps(result, sort_keys=True))
 
 def run_api(args):
     port = 8083
@@ -67,7 +67,7 @@ def main():
 
     args = parser.parse_args()
     if not args.__dict__.get("func"):
-        sys.stderr.write("tell me what to do!\n")
+        print("tell me what to do!", file=sys.stderr)
         sys.exit(-1)
 
     args.func(args)
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py
index dd6ab63..05e48bd 100755
--- a/python/kafka_grobid.py
+++ b/python/kafka_grobid.py
@@ -282,7 +282,7 @@ class KafkaGrobidWorker:
                         grobid_output.get('key'),
                         status))
                     sys.stdout.flush()
-                    producer.produce(json.dumps(grobid_output).encode('utf-8'))
+                    producer.produce(json.dumps(grobid_output, sort_keys=True).encode('utf-8'))
                     sequential_failures = 0
                 else:
                     sys.stderr.write("failed to extract: {}\n".format(status))
author	Bryan Newbold <bnewbold@archive.org>	2019-12-18 14:53:33 -0800
committer	Bryan Newbold <bnewbold@archive.org>	2019-12-18 14:53:35 -0800
commit	1a45fe3f3ef3122355f49470fbbcd6dc82c959b2 (patch)
tree	9f9e60c2814c91ad2b394c95b5ff5c9c9345d72d /python
parent	0ea8bfb3485bc4db5cb7a17397d6c37f407a65b4 (diff)
download	sandcrawler-1a45fe3f3ef3122355f49470fbbcd6dc82c959b2.tar.gz sandcrawler-1a45fe3f3ef3122355f49470fbbcd6dc82c959b2.zip