json: exclude None in output, and sort keys

These are both size/performance enhancements. Not including 'None' values will reduce document sizes on-disk and over network, particularly for intermediate objects. Sorting by key should improve compression ratios across multiple documents, both on-disk (gzip) and in elasticsearch itself: https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html#_put_fields_in_the_same_order_in_documents
author: Bryan Newbold <bnewbold@archive.org> 2020-07-27 15:54:55 -0700
committer: Bryan Newbold <bnewbold@archive.org> 2020-07-27 15:54:57 -0700
commit: 6443fd5756779e2cca986e73e5008a073ef96452 (patch)
tree: deaac6ff17a9dbafcd2142c1b2a4679691403634
parent: 0234f6188b6f10dad96e2ea8dc67d2674c26c02d (diff)
download: fatcat-scholar-6443fd5756779e2cca986e73e5008a073ef96452.tar.gz
fatcat-scholar-6443fd5756779e2cca986e73e5008a073ef96452.zip
3 files changed, 4 insertions, 4 deletions
diff --git a/fatcat_scholar/sim_pipeline.py b/fatcat_scholar/sim_pipeline.py
index f99471f..95e5cad 100644
--- a/fatcat_scholar/sim_pipeline.py
+++ b/fatcat_scholar/sim_pipeline.py
@@ -149,7 +149,7 @@ class SimPipeline:
                         issue_item_metadata=full_issue["issue_item_metadata"],
                     ),
                 )
-                print(bundle.json())
+                print(bundle.json(exclude_none=True, sort_keys=True))
                 count += 1
                 if limit is not None and count >= limit:
                     break
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index f6e2c46..7afbce5 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -364,7 +364,7 @@ def run_transform(infile: Sequence) -> None:
         es_doc = transform_heavy(heavy)
         if not es_doc:
             continue
-        print(es_doc.json())
+        print(es_doc.json(exclude_none=True, sort_keys=True))
 
 
 def main() -> None:
diff --git a/fatcat_scholar/work_pipeline.py b/fatcat_scholar/work_pipeline.py
index accbc1d..b2ceaf8 100644
--- a/fatcat_scholar/work_pipeline.py
+++ b/fatcat_scholar/work_pipeline.py
@@ -355,7 +355,7 @@ class WorkPipeline:
                 continue
             if batch:
                 ib = self.process_release_list(batch)
-                print(ib.json())
+                print(ib.json(exclude_none=True))
                 batch_work_id = None
             batch = [
                 release,
@@ -364,7 +364,7 @@ class WorkPipeline:
 
         if batch:
             ib = self.process_release_list(batch)
-            print(ib.json())
+            print(ib.json(exclude_none=True, sort_keys=True))
 
 
 def main() -> None:
author	Bryan Newbold <bnewbold@archive.org>	2020-07-27 15:54:55 -0700
committer	Bryan Newbold <bnewbold@archive.org>	2020-07-27 15:54:57 -0700
commit	6443fd5756779e2cca986e73e5008a073ef96452 (patch)
tree	deaac6ff17a9dbafcd2142c1b2a4679691403634
parent	0234f6188b6f10dad96e2ea8dc67d2674c26c02d (diff)
download	fatcat-scholar-6443fd5756779e2cca986e73e5008a073ef96452.tar.gz fatcat-scholar-6443fd5756779e2cca986e73e5008a073ef96452.zip