From 858147b071103c505bff643e35b503c623f20284 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 8 Apr 2020 15:59:59 -0700 Subject: refactor parse_cord19_csv.py into tool --- fatcat_covid19/parse.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 fatcat_covid19/parse.py (limited to 'fatcat_covid19') diff --git a/fatcat_covid19/parse.py b/fatcat_covid19/parse.py new file mode 100644 index 0000000..ce2bf26 --- /dev/null +++ b/fatcat_covid19/parse.py @@ -0,0 +1,21 @@ + +import sys +import csv +import json + + +def parse_cord19_file(csv_path, json_output): + """ + Trivial helper to transform the CORD-19 CSV file to JSON, and rename a + couple of the column keys. + """ + + with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + row = dict(row) + row['mag_id'] = row.pop('Microsoft Academic Paper ID') + row['who_covidence_id'] = row.pop('WHO #Covidence').replace('#', '') + obj = dict(cord19_paper=row) + print(json.dumps(obj, sort_keys=True), file=json_output) + -- cgit v1.2.3