aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_covid19
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-08 15:59:59 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-08 15:59:59 -0700
commit858147b071103c505bff643e35b503c623f20284 (patch)
treeacfc5cdba1315cf99ae31aac0cf969623e81f1e5 /fatcat_covid19
parent1552f5dcd8b0abe89f53182c7a495a7b8dc008fc (diff)
downloadfatcat-covid19-858147b071103c505bff643e35b503c623f20284.tar.gz
fatcat-covid19-858147b071103c505bff643e35b503c623f20284.zip
refactor parse_cord19_csv.py into tool
Diffstat (limited to 'fatcat_covid19')
-rw-r--r--fatcat_covid19/parse.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/fatcat_covid19/parse.py b/fatcat_covid19/parse.py
new file mode 100644
index 0000000..ce2bf26
--- /dev/null
+++ b/fatcat_covid19/parse.py
@@ -0,0 +1,21 @@
+
+import sys
+import csv
+import json
+
+
+def parse_cord19_file(csv_path, json_output):
+ """
+ Trivial helper to transform the CORD-19 CSV file to JSON, and rename a
+ couple of the column keys.
+ """
+
+ with open(csv_path, newline='') as csvfile:
+ reader = csv.DictReader(csvfile)
+ for row in reader:
+ row = dict(row)
+ row['mag_id'] = row.pop('Microsoft Academic Paper ID')
+ row['who_covidence_id'] = row.pop('WHO #Covidence').replace('#', '')
+ obj = dict(cord19_paper=row)
+ print(json.dumps(obj, sort_keys=True), file=json_output)
+