aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_covid19
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-03 18:13:00 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-03 18:13:00 -0700
commit6c0e50dc8fdf6f09ff34b3a78ffe2241769b8d4f (patch)
treec7b0bf1a220ef909ec5a8955a5dd84f5af09515a /fatcat_covid19
parent3599ebce9941fabe06640d1f664cb4949eddd74d (diff)
downloadfatcat-covid19-6c0e50dc8fdf6f09ff34b3a78ffe2241769b8d4f.tar.gz
fatcat-covid19-6c0e50dc8fdf6f09ff34b3a78ffe2241769b8d4f.zip
fixes from prod
Diffstat (limited to 'fatcat_covid19')
-rw-r--r--fatcat_covid19/derivatives.py2
-rw-r--r--fatcat_covid19/enrich.py3
-rw-r--r--fatcat_covid19/transform.py5
3 files changed, 6 insertions, 4 deletions
diff --git a/fatcat_covid19/derivatives.py b/fatcat_covid19/derivatives.py
index 5ade0ef..c9339e8 100644
--- a/fatcat_covid19/derivatives.py
+++ b/fatcat_covid19/derivatives.py
@@ -126,7 +126,7 @@ def enrich_derivatives_file(json_input, json_output, base_dir):
"""
for l in json_input:
l = json.loads(l)
- result = do_line(l, base_dir)
+ result = enrich_derivatives_row(l, base_dir)
if result:
print(json.dumps(result, sort_keys=True), file=json_output)
diff --git a/fatcat_covid19/enrich.py b/fatcat_covid19/enrich.py
index 458c83d..e7d6da2 100644
--- a/fatcat_covid19/enrich.py
+++ b/fatcat_covid19/enrich.py
@@ -51,7 +51,7 @@ def enrich_fatcat_row(row, api_session):
if fatcat_release:
row['fatcat_release'] = fatcat_release
row['release_id'] = fatcat_release['ident']
- print(json.dumps(row, sort_keys=True))
+ return row
def enrich_fatcat_file(json_input, json_output):
@@ -65,3 +65,4 @@ def enrich_fatcat_file(json_input, json_output):
result = enrich_fatcat_row(l, api_session)
if result:
print(json.dumps(result, sort_keys=True), file=json_output)
+
diff --git a/fatcat_covid19/transform.py b/fatcat_covid19/transform.py
index c31c9f4..16774ab 100644
--- a/fatcat_covid19/transform.py
+++ b/fatcat_covid19/transform.py
@@ -68,7 +68,8 @@ def fulltext_to_elasticsearch(row, force_bool=True):
if release.get('abstracts'):
for a in release['abstracts']:
abstracts.append(a['content'])
- abstract_langs.append(a['lang'])
+ if a.get('lang'):
+ abstract_langs.append(a['lang'])
contrib_names = []
contrib_affiliations = []
@@ -199,6 +200,6 @@ def transform_es_file(json_input, json_output):
"""
for l in json_input:
l = json.loads(l)
- result = fulltext_to_elasticsearch(l, args)
+ result = fulltext_to_elasticsearch(l)
if result:
print(json.dumps(result, sort_keys=True), file=json_output)