aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-18 14:53:33 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-18 14:53:35 -0800
commit1a45fe3f3ef3122355f49470fbbcd6dc82c959b2 (patch)
tree9f9e60c2814c91ad2b394c95b5ff5c9c9345d72d /python
parent0ea8bfb3485bc4db5cb7a17397d6c37f407a65b4 (diff)
downloadsandcrawler-1a45fe3f3ef3122355f49470fbbcd6dc82c959b2.tar.gz
sandcrawler-1a45fe3f3ef3122355f49470fbbcd6dc82c959b2.zip
refactor: sort keys in JSON output
This makes debugging by tailing Kafka topics a lot more readable
Diffstat (limited to 'python')
-rwxr-xr-xpython/grobid2json.py3
-rwxr-xr-xpython/ia_pdf_match.py2
-rwxr-xr-xpython/ingest_file.py6
-rwxr-xr-xpython/kafka_grobid.py2
4 files changed, 7 insertions, 6 deletions
diff --git a/python/grobid2json.py b/python/grobid2json.py
index 977c772..0d85e5e 100755
--- a/python/grobid2json.py
+++ b/python/grobid2json.py
@@ -177,7 +177,8 @@ def main(): # pragma no cover
content = open(filename, 'r')
print(json.dumps(
teixml2json(content,
- encumbered=(not args.no_encumbered))))
+ encumbered=(not args.no_encumbered)),
+ sort_keys=True))
if __name__=='__main__': # pragma no cover
main()
diff --git a/python/ia_pdf_match.py b/python/ia_pdf_match.py
index bc814de..c5a5e11 100755
--- a/python/ia_pdf_match.py
+++ b/python/ia_pdf_match.py
@@ -102,7 +102,7 @@ def run():
obj = json.loads(line)
match = parse(obj)
if match:
- print(json.dumps(match))
+ print(json.dumps(match, sort_keys=True))
if __name__ == '__main__':
run()
diff --git a/python/ingest_file.py b/python/ingest_file.py
index 1980e3d..460b576 100755
--- a/python/ingest_file.py
+++ b/python/ingest_file.py
@@ -16,7 +16,7 @@ def run_single_ingest(args):
)
ingester = IngestFileWorker()
result = ingester.process(request)
- print(json.dumps(result))
+ print(json.dumps(result, sort_keys=True))
return result
def run_requests(args):
@@ -25,7 +25,7 @@ def run_requests(args):
for l in args.json_file:
request = json.loads(l.strip())
result = ingester.process(request)
- print(json.dumps(result))
+ print(json.dumps(result, sort_keys=True))
def run_api(args):
port = 8083
@@ -67,7 +67,7 @@ def main():
args = parser.parse_args()
if not args.__dict__.get("func"):
- sys.stderr.write("tell me what to do!\n")
+ print("tell me what to do!", file=sys.stderr)
sys.exit(-1)
args.func(args)
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py
index dd6ab63..05e48bd 100755
--- a/python/kafka_grobid.py
+++ b/python/kafka_grobid.py
@@ -282,7 +282,7 @@ class KafkaGrobidWorker:
grobid_output.get('key'),
status))
sys.stdout.flush()
- producer.produce(json.dumps(grobid_output).encode('utf-8'))
+ producer.produce(json.dumps(grobid_output, sort_keys=True).encode('utf-8'))
sequential_failures = 0
else:
sys.stderr.write("failed to extract: {}\n".format(status))