diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-04-08 15:59:59 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-04-08 15:59:59 -0700 |
commit | 858147b071103c505bff643e35b503c623f20284 (patch) | |
tree | acfc5cdba1315cf99ae31aac0cf969623e81f1e5 /fatcat_covid19 | |
parent | 1552f5dcd8b0abe89f53182c7a495a7b8dc008fc (diff) | |
download | fatcat-covid19-858147b071103c505bff643e35b503c623f20284.tar.gz fatcat-covid19-858147b071103c505bff643e35b503c623f20284.zip |
refactor parse_cord19_csv.py into tool
Diffstat (limited to 'fatcat_covid19')
-rw-r--r-- | fatcat_covid19/parse.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/fatcat_covid19/parse.py b/fatcat_covid19/parse.py new file mode 100644 index 0000000..ce2bf26 --- /dev/null +++ b/fatcat_covid19/parse.py @@ -0,0 +1,21 @@ + +import sys +import csv +import json + + +def parse_cord19_file(csv_path, json_output): + """ + Trivial helper to transform the CORD-19 CSV file to JSON, and rename a + couple of the column keys. + """ + + with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + row = dict(row) + row['mag_id'] = row.pop('Microsoft Academic Paper ID') + row['who_covidence_id'] = row.pop('WHO #Covidence').replace('#', '') + obj = dict(cord19_paper=row) + print(json.dumps(obj, sort_keys=True), file=json_output) + |