aboutsummaryrefslogtreecommitdiffstats
path: root/bin/parse_cord19_csv.py
blob: dbc6cc593588a94a203e9d57c6ab4b1be0198496 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/env python3

"""
Trivial helper to transform the CORD-19 CSV file to JSON, and rename a couple
of the column keys.
"""

import sys
import csv
import json

CSVFILE = sys.argv[1]

with open(CSVFILE, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        row = dict(row)
        row['mag_id'] = row.pop('Microsoft Academic Paper ID')
        row['who_covidence_id'] = row.pop('WHO #Covidence').replace('#', '')
        obj = dict(cord19_paper=row)
        print(json.dumps(obj, sort_keys=True))