4 files changed, 19 insertions, 9 deletions
diff --git a/covid19_tool.py b/covid19_tool.py
index 1cd0e48..b9bea44 100755
--- a/covid19_tool.py
+++ b/covid19_tool.py
@@ -9,7 +9,7 @@ Licensed the same as code under fatcat_covid19/
 import sys
 import argparse
 
-from fatcat_covid19.webface import app
+from fatcat_covid19.enrich import enrich_fatcat_file
 from fatcat_covid19.derivatives import enrich_derivatives_file
 from fatcat_covid19.transform import transform_es_file
 
@@ -40,12 +40,15 @@ def main():
 
     sub_enrich_fatcat = subparsers.add_parser('enrich-fatcat',
         help="lookup fatcat releases from JSON metadata")
+    sub_enrich_fatcat.set_defaults(
+        action='enrich-fatcat',
+    )
     sub_enrich_fatcat.add_argument('json_file',
         help="input JSON rows file (eg, CORD-19 parsed JSON)",
         type=argparse.FileType('r'))
     sub_enrich_fatcat.add_argument('--json-output',
         help="file to write to",
-        type=argparse.FileType('r'),
+        type=argparse.FileType('w'),
         default=sys.stdout)
 
     sub_enrich_derivatives = subparsers.add_parser('enrich-derivatives',
@@ -58,7 +61,7 @@ def main():
         type=argparse.FileType('r'))
     sub_enrich_derivatives.add_argument('--json-output',
         help="file to write ",
-        type=argparse.FileType('r'),
+        type=argparse.FileType('w'),
         default=sys.stdout)
     sub_enrich_derivatives.add_argument('--base-dir',
         help="directory to look for files (in 'pdf' subdirectory)",
@@ -66,20 +69,25 @@ def main():
 
     sub_transform_es = subparsers.add_parser('transform-es',
         help="transform fulltext JSON to elasticsearch schema JSON")
+    sub_transform_es.set_defaults(
+        action='transform-es',
+    )
     sub_transform_es.add_argument('json_file',
         help="input JSON rows file (fulltext)",
         type=argparse.FileType('r'))
     sub_transform_es.add_argument('--json-output',
         help="file to write to",
-        type=argparse.FileType('r'),
+        type=argparse.FileType('w'),
         default=sys.stdout)
 
     args = parser.parse_args()
 
     if args.action == 'webface':
+        # don't import until we use app; otherwise sentry exception reporting happens
+        from fatcat_covid19.webface import app
         app.run(debug=args.debug, host=args.host, port=args.port)
     elif args.action == 'enrich-fatcat':
-        transform_es_file(args.json_file, args.json_output)
+        enrich_fatcat_file(args.json_file, args.json_output)
     elif args.action == 'enrich-derivatives':
         enrich_derivatives_file(args.json_file, args.json_output,
             args.base_dir)
diff --git a/fatcat_covid19/derivatives.py b/fatcat_covid19/derivatives.py
index 5ade0ef..c9339e8 100644
--- a/fatcat_covid19/derivatives.py
+++ b/fatcat_covid19/derivatives.py
@@ -126,7 +126,7 @@ def enrich_derivatives_file(json_input, json_output, base_dir):
     """
     for l in json_input:
         l = json.loads(l)
-        result = do_line(l, base_dir)
+        result = enrich_derivatives_row(l, base_dir)
         if result:
             print(json.dumps(result, sort_keys=True), file=json_output)
 
diff --git a/fatcat_covid19/enrich.py b/fatcat_covid19/enrich.py
index 458c83d..e7d6da2 100644
--- a/fatcat_covid19/enrich.py
+++ b/fatcat_covid19/enrich.py
@@ -51,7 +51,7 @@ def enrich_fatcat_row(row, api_session):
     if fatcat_release:
         row['fatcat_release'] = fatcat_release
         row['release_id'] = fatcat_release['ident']
-    print(json.dumps(row, sort_keys=True))
+    return row
 
 
 def enrich_fatcat_file(json_input, json_output):
@@ -65,3 +65,4 @@ def enrich_fatcat_file(json_input, json_output):
         result = enrich_fatcat_row(l, api_session)
         if result:
             print(json.dumps(result, sort_keys=True), file=json_output)
+
diff --git a/fatcat_covid19/transform.py b/fatcat_covid19/transform.py
index c31c9f4..16774ab 100644
--- a/fatcat_covid19/transform.py
+++ b/fatcat_covid19/transform.py
@@ -68,7 +68,8 @@ def fulltext_to_elasticsearch(row, force_bool=True):
     if release.get('abstracts'):
         for a in release['abstracts']:
             abstracts.append(a['content'])
-            abstract_langs.append(a['lang'])
+            if a.get('lang'):
+                abstract_langs.append(a['lang'])
 
     contrib_names = []
     contrib_affiliations = []
@@ -199,6 +200,6 @@ def transform_es_file(json_input, json_output):
     """
     for l in json_input:
         l = json.loads(l)
-        result = fulltext_to_elasticsearch(l, args)
+        result = fulltext_to_elasticsearch(l)
         if result:
             print(json.dumps(result, sort_keys=True), file=json_output)