aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_covid19
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-03 15:13:43 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-03 15:13:43 -0700
commit328892b0e571395fbbf8a22ca8a4216c6cf71074 (patch)
tree77a7b3565d1c876eb2035192c9bc125d573caab8 /fatcat_covid19
parent7bf8ae73b8b5dfca4d17f353cdbec669e69bbbec (diff)
downloadfatcat-covid19-328892b0e571395fbbf8a22ca8a4216c6cf71074.tar.gz
fatcat-covid19-328892b0e571395fbbf8a22ca8a4216c6cf71074.zip
refactor enrich into fatcat_covid19
Diffstat (limited to 'fatcat_covid19')
-rw-r--r--fatcat_covid19/enrich.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/fatcat_covid19/enrich.py b/fatcat_covid19/enrich.py
new file mode 100644
index 0000000..245a357
--- /dev/null
+++ b/fatcat_covid19/enrich.py
@@ -0,0 +1,67 @@
+
+import sys
+import json
+import datetime
+
+from fatcat_covid19.common import requests_retry_session
+
+
+def enrich_fatcat_row(row, api_session):
+
+ cord19_paper = row.get('cord19_paper')
+ if not cord19_paper:
+ return row
+
+ pubmed_id = cord19_paper.get('pubmed_id') or None
+ pmcid = cord19_paper.get('pmcid') or None
+ doi = cord19_paper.get('doi') or None
+ fatcat_release = None
+
+ if doi == '0.1126/science.abb7331':
+ doi = '10.1126/science.abb7331'
+
+ if not fatcat_release and pmcid:
+ resp = api_session.get('https://api.fatcat.wiki/v0/release/lookup',
+ params={
+ 'pmcid': pmcid,
+ 'expand': 'container,files,filesets,webcaptures',
+ 'hide': 'abstracts,references',
+ })
+ if resp.status_code == 200:
+ fatcat_release = resp.json()
+ if not fatcat_release and doi:
+ resp = api_session.get('https://api.fatcat.wiki/v0/release/lookup',
+ params={
+ 'doi': doi,
+ 'expand': 'container,files,filesets,webcaptures',
+ 'hide': 'abstracts,references',
+ })
+ if resp.status_code == 200:
+ fatcat_release = resp.json()
+ if not fatcat_release and pubmed_id:
+ resp = api_session.get('https://api.fatcat.wiki/v0/release/lookup',
+ params={
+ 'pmid': pubmed_id,
+ 'expand': 'container,files,filesets,webcaptures',
+ 'hide': 'abstracts,references',
+ })
+ if resp.status_code == 200:
+ fatcat_release = resp.json()
+
+ if fatcat_release:
+ row['fatcat_release'] = fatcat_release
+ row['release_id'] = fatcat_release['ident']
+ print(json.dumps(row, sort_keys=True))
+
+
+def enrich_fatcat_file(json_input, json_output):
+ """
+ Takes a JSON-transformed CORD-19 *metadata* file and enriches it with
+ fatcat metadata.
+ """
+ api_session = requests_retry_session()
+ for l in json_input:
+ l = json.loads(l)
+ result = enrich_fatcat_row(l, api_session)
+ if result:
+ print(json.dumps(result, sort_keys=True), file=json_output)