aboutsummaryrefslogtreecommitdiffstats
path: root/bin/parse_cord19_csv.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-01 15:27:25 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-01 15:27:25 -0700
commit485c8b8432d839bb3cc0bd67152adda4bbf0df20 (patch)
treebe8b0b11c3a9ec722366945bcd880245222cc6c7 /bin/parse_cord19_csv.py
parent7f30c43cd4955d596ebc51a56aea7996b1005e47 (diff)
downloadfatcat-covid19-485c8b8432d839bb3cc0bd67152adda4bbf0df20.tar.gz
fatcat-covid19-485c8b8432d839bb3cc0bd67152adda4bbf0df20.zip
move scripts/ to bin/
Diffstat (limited to 'bin/parse_cord19_csv.py')
-rwxr-xr-xbin/parse_cord19_csv.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/bin/parse_cord19_csv.py b/bin/parse_cord19_csv.py
new file mode 100755
index 0000000..536e5d3
--- /dev/null
+++ b/bin/parse_cord19_csv.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import sys
+import csv
+import json
+
+CSVFILE = sys.argv[1]
+
+with open(CSVFILE, newline='') as csvfile:
+ reader = csv.DictReader(csvfile)
+ for row in reader:
+ row = dict(row)
+ row['mag_id'] = row.pop('Microsoft Academic Paper ID')
+ row['who_covidence_id'] = row.pop('WHO #Covidence').replace('#', '')
+ print(json.dumps(row, sort_keys=True))