1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
from typing import Optional
DOI_PREFIX_MAP = {
# simple entries (mostly OA and platforms)
"10.2307": {"domain": "jstor.org"},
"10.11501": {"domain": "ndl.go.jp"},
"10.6084": {"domain": "figshare.com"},
"10.5281": {"domain": "zenodo.org"},
"10.1590": {"domain": "scielo.br"},
"10.1371": {"domain": "plos.org"},
"10.1155": {"domain": "hindawi.com"},
"10.7554": {"domain": "elifesciences.com"},
"10.1145": {"domain": "acm.org"},
# more complex publisher mappings (verify journal/publisher)
"10.1016": {"domain": "elsevier.com", "publisher": "elsevier"},
"10.1007": {"domain": "springer.com", "publisher": "springer"},
"10.1186": {"domain": "springer.com", "publisher": "springer"},
"10.1002": {"domain": "wiley.com", "publisher": "wiley"},
"10.1109": {"domain": "ieee.com", "publisher": "ieee"},
"10.1080": {"domain": "tandfonline.com", "publisher": "informa"},
"10.1093": {"domain": "oup.com", "publisher": "oxford"},
"10.1111": {"domain": "sagepub.com", "publisher": "sage"},
"10.1042": {"domain": "sagepub.com", "publisher": "sage"},
"10.1177": {"domain": "sagepub.com", "publisher": "sage"},
"10.1021": {"domain": "acs.org", "publisher": "acs"},
"10.1017": {"domain": "cambridge.org", "publisher": "cambridge"},
# "10.1097": {"domain": "lww.org", "publisher": "wolters"},
"10.1515": {"domain": "degruyter.com", "publisher": "gruyter"},
"10.1038": {"domain": "nature.com", "container_name": "nature"},
"10.1163": {"domain": "brill.com", "publisher": "brill"},
"10.3390": {"domain": "mdpi.com", "publisher": "mdpi"},
"10.1128": {"domain": "asm.org", "publisher": "microbiology"},
"10.1103": {"domain": "aps.org", "publisher": "physical"},
"10.3389": {"domain": "frontiersin.org", "publisher": "frontiers"},
"10.1136": {"domain": "bmj.org", "publisher": "bmj"},
"10.1088": {"domain": "iop.org", "publisher": "iop"},
"10.1086": {"domain": "iop.org", "publisher": "iop"},
"10.1142": {"domain": "worldscientific.com", "publisher": "world"},
"10.1126": {"domain": "sciencemag.org", "container_name": "science"},
"10.1162": {"domain": "mitpressjournals.org", "publisher": "mit"},
"10.1045": {"domain": "dlib.org", "container_name": "d-lib"},
"10.17723": {"domain": "archivists.org", "publisher": "archiv"},
"10.2139": {"domain": "ssrn.com", "container_name": "social science"},
}
def doi_link_domain(
doi_prefix: str, container_name: Optional[str], publisher: Optional[str]
) -> Optional[str]:
"""
Takes a DOI prefix and a publisher name, and tries to guess which domain
name the DOI will resolve to. This is used for display only.
helpful: https://gist.github.com/TomDemeranville/8699224
TODO: JSTOR, biorxiv, medrxiv, zenodo, figshare, dryad, etc
"""
# manual cases first
if doi_prefix == "10.1101" and container_name:
if "biorxiv" in container_name.lower():
return "biorxiv.org"
elif "medrxiv" in container_name.lower():
return "medrxiv.org"
else:
return None
elif doi_prefix == "10.1101" and container_name:
if "lancet" in container_name.lower():
return "thelancet.com"
# then the map
meta = DOI_PREFIX_MAP.get(doi_prefix)
if not meta:
return None
if meta.get("publisher"):
if not publisher or meta["publisher"] not in publisher.lower():
return None
if meta.get("container_name"):
if not container_name or meta["container_name"] not in container_name.lower():
return None
return meta.get("domain")
|