diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-05-29 11:46:33 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-05-29 11:46:33 -0700 |
commit | 0b5d6523df0176b4ac76703b6fb7bf5d4aa85d29 (patch) | |
tree | 75568dd8112ef590116567f76141ed77cab6d101 | |
parent | 67634b72cfa012ecc18d26048d96fe4610c974ba (diff) | |
download | fatcat-covid19-0b5d6523df0176b4ac76703b6fb7bf5d4aa85d29.tar.gz fatcat-covid19-0b5d6523df0176b4ac76703b6fb7bf5d4aa85d29.zip |
metadata parse: new column titles
-rw-r--r-- | fatcat_covid19/parse.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/fatcat_covid19/parse.py b/fatcat_covid19/parse.py index ce2bf26..aea3d25 100644 --- a/fatcat_covid19/parse.py +++ b/fatcat_covid19/parse.py @@ -14,8 +14,10 @@ def parse_cord19_file(csv_path, json_output): reader = csv.DictReader(csvfile) for row in reader: row = dict(row) - row['mag_id'] = row.pop('Microsoft Academic Paper ID') - row['who_covidence_id'] = row.pop('WHO #Covidence').replace('#', '') + # Previously had to rename these columns + #row['mag_id'] = row.pop('Microsoft Academic Paper ID') + #row['who_covidence_id'] = row.pop('WHO #Covidence').replace('#', '') + row['who_covidence_id'] = row['who_covidence_id'].replace('#', '') obj = dict(cord19_paper=row) print(json.dumps(obj, sort_keys=True), file=json_output) |