diff options
Diffstat (limited to 'fatcat_covid19')
-rw-r--r-- | fatcat_covid19/parse.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/fatcat_covid19/parse.py b/fatcat_covid19/parse.py new file mode 100644 index 0000000..ce2bf26 --- /dev/null +++ b/fatcat_covid19/parse.py @@ -0,0 +1,21 @@ + +import sys +import csv +import json + + +def parse_cord19_file(csv_path, json_output): + """ + Trivial helper to transform the CORD-19 CSV file to JSON, and rename a + couple of the column keys. + """ + + with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + row = dict(row) + row['mag_id'] = row.pop('Microsoft Academic Paper ID') + row['who_covidence_id'] = row.pop('WHO #Covidence').replace('#', '') + obj = dict(cord19_paper=row) + print(json.dumps(obj, sort_keys=True), file=json_output) + |