1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
import sys
import json
import datetime
from fatcat_covid19.common import requests_retry_session
def enrich_fatcat_row(row, api_session):
cord19_paper = row.get('cord19_paper')
if not cord19_paper:
return row
pubmed_id = cord19_paper.get('pubmed_id') or None
pmcid = cord19_paper.get('pmcid') or None
doi = cord19_paper.get('doi') or None
fatcat_release = None
if doi == '0.1126/science.abb7331':
doi = '10.1126/science.abb7331'
if not fatcat_release and pmcid:
resp = api_session.get('https://api.fatcat.wiki/v0/release/lookup',
params={
'pmcid': pmcid,
'expand': 'container,files,filesets,webcaptures',
'hide': 'references',
})
if resp.status_code == 200:
fatcat_release = resp.json()
if not fatcat_release and doi:
resp = api_session.get('https://api.fatcat.wiki/v0/release/lookup',
params={
'doi': doi,
'expand': 'container,files,filesets,webcaptures',
'hide': 'references',
})
if resp.status_code == 200:
fatcat_release = resp.json()
if not fatcat_release and pubmed_id:
resp = api_session.get('https://api.fatcat.wiki/v0/release/lookup',
params={
'pmid': pubmed_id,
'expand': 'container,files,filesets,webcaptures',
'hide': 'references',
})
if resp.status_code == 200:
fatcat_release = resp.json()
if fatcat_release:
row['fatcat_release'] = fatcat_release
row['release_id'] = fatcat_release['ident']
return row
def enrich_fatcat_file(json_input, json_output):
"""
Takes a JSON-transformed CORD-19 *metadata* file and enriches it with
fatcat metadata.
"""
api_session = requests_retry_session()
for l in json_input:
l = json.loads(l)
result = enrich_fatcat_row(l, api_session)
if result:
print(json.dumps(result, sort_keys=True), file=json_output)
|