diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-11-13 16:46:09 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-11-13 16:46:09 -0800 |
commit | 47e0b699b2a350b0081a64ebbcaba991c53cfb52 (patch) | |
tree | e7967828a6c4284d1bd189ac973c5f0041965179 /notes/ingest/es_csv_to_json.py | |
parent | 885bff50bbe57322ad32f4fbfab8d846e54671f2 (diff) | |
download | sandcrawler-47e0b699b2a350b0081a64ebbcaba991c53cfb52.tar.gz sandcrawler-47e0b699b2a350b0081a64ebbcaba991c53cfb52.zip |
ingest/backfill notes
Diffstat (limited to 'notes/ingest/es_csv_to_json.py')
-rwxr-xr-x | notes/ingest/es_csv_to_json.py | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/notes/ingest/es_csv_to_json.py b/notes/ingest/es_csv_to_json.py new file mode 100755 index 0000000..4cd1811 --- /dev/null +++ b/notes/ingest/es_csv_to_json.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +""" + input like: + + doi,ident,"release_stage" + "10.7554/elife.38904",mxj534diw5gatc26rkif3io5xm,published + "10.7554/elife.41855",kag74qc6dfex7ftpfkf7iaus44,published + "10.7554/elife.41156",ienee5vxcbbbfhs2q54h4455hu,published + "10.7554/elife.43230",52rpllol2rcndjqs3xfwcldeka,published + "10.7554/elife.42591",fpz642gihrc3jd2vibg6gnjrxm,published + + output like: + + { + "base_url": "https://doi.org/10.7554/elife.38904", + "ext_ids": { + "doi": "10.7554/elife.38904" + }, + "fatcat_release": "mxj534diw5gatc26rkif3io5xm", + "release_stage": "published" + } +""" + +import csv, sys, json + +reader = csv.DictReader(sys.stdin) +for row in reader: + d = { + "base_url": "https://doi.org/{}".format(row['doi']), + "ext_ids": { + "doi": row['doi'], + }, + "fatcat_release": row['ident'], + "release_stage": row['release_stage'], + } + print(json.dumps(d)) |