aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-01-21 19:25:09 -0800
committerBryan Newbold <bnewbold@archive.org>2021-01-21 19:36:21 -0800
commit05cd1bb7bdcefe8ec596e572baafb9da9a8838b2 (patch)
tree5c49a54bf594e5d0008264984e12e1d1a881c1fe
parent5096b0b7407aadfb97093615696f951829dc3506 (diff)
downloadfatcat-scholar-05cd1bb7bdcefe8ec596e572baafb9da9a8838b2.tar.gz
fatcat-scholar-05cd1bb7bdcefe8ec596e572baafb9da9a8838b2.zip
refactor DOI domain lookup into python code; expand table
-rw-r--r--fatcat_scholar/biblio_hacks.py82
-rw-r--r--fatcat_scholar/schema.py14
-rw-r--r--fatcat_scholar/templates/search_macros.html34
3 files changed, 101 insertions, 29 deletions
diff --git a/fatcat_scholar/biblio_hacks.py b/fatcat_scholar/biblio_hacks.py
new file mode 100644
index 0000000..935d1ff
--- /dev/null
+++ b/fatcat_scholar/biblio_hacks.py
@@ -0,0 +1,82 @@
+from typing import Optional
+
+DOI_PREFIX_MAP = {
+ # simple entries (mostly OA and platforms)
+ "10.2307": {"domain": "jstor.org"},
+ "10.11501": {"domain": "ndl.go.jp"},
+ "10.6084": {"domain": "figshare.com"},
+ "10.5281": {"domain": "zenodo.org"},
+ "10.1590": {"domain": "scielo.br"},
+ "10.1371": {"domain": "plos.org"},
+ "10.1155": {"domain": "hindawi.com"},
+ "10.7554": {"domain": "elifesciences.com"},
+ "10.1145": {"domain": "acm.org"},
+ # more complex publisher mappings (verify journal/publisher)
+ "10.1016": {"domain": "elsevier.com", "publisher": "elsevier"},
+ "10.1007": {"domain": "springer.com", "publisher": "springer"},
+ "10.1186": {"domain": "springer.com", "publisher": "springer"},
+ "10.1002": {"domain": "wiley.com", "publisher": "wiley"},
+ "10.1109": {"domain": "ieee.com", "publisher": "ieee"},
+ "10.1080": {"domain": "tandfonline.com", "publisher": "informa"},
+ "10.1093": {"domain": "oup.com", "publisher": "oxford"},
+ "10.1111": {"domain": "sagepub.com", "publisher": "sage"},
+ "10.1042": {"domain": "sagepub.com", "publisher": "sage"},
+ "10.1177": {"domain": "sagepub.com", "publisher": "sage"},
+ "10.1021": {"domain": "acs.org", "publisher": "acs"},
+ "10.1017": {"domain": "cambridge.org", "publisher": "cambridge"},
+ # "10.1097": {"domain": "lww.org", "publisher": "wolters"},
+ "10.1515": {"domain": "degruyter.com", "publisher": "gruyter"},
+ "10.1038": {"domain": "nature.com", "container_name": "nature"},
+ "10.1163": {"domain": "brill.com", "publisher": "brill"},
+ "10.3390": {"domain": "mdpi.com", "publisher": "mdpi"},
+ "10.1128": {"domain": "asm.org", "publisher": "microbiology"},
+ "10.1103": {"domain": "aps.org", "publisher": "physical"},
+ "10.3389": {"domain": "frontiersin.org", "publisher": "frontiers"},
+ "10.1136": {"domain": "bmj.org", "publisher": "bmj"},
+ "10.1088": {"domain": "iop.org", "publisher": "iop"},
+ "10.1086": {"domain": "iop.org", "publisher": "iop"},
+ "10.1142": {"domain": "worldscientific.com", "publisher": "world"},
+ "10.1126": {"domain": "sciencemag.org", "container_name": "science"},
+ "10.1162": {"domain": "mitpressjournals.org", "publisher": "mit"},
+ "10.1045": {"domain": "dlib.org", "container_name": "d-lib"},
+ "10.17723": {"domain": "archivists.org", "publisher": "archiv"},
+ "10.2139": {"domain": "ssrn.com", "container_name": "social science"},
+}
+
+
+def doi_link_domain(
+ doi_prefix: str, container_name: Optional[str], publisher: Optional[str]
+) -> Optional[str]:
+ """
+ Takes a DOI prefix and a publisher name, and tries to guess which domain
+ name the DOI will resolve to. This is used for display only.
+
+ helpful: https://gist.github.com/TomDemeranville/8699224
+
+ TODO: JSTOR, biorxiv, medrxiv, zenodo, figshare, dryad, etc
+ """
+
+ # manual cases first
+ if doi_prefix == "10.1101" and container_name:
+ if "biorxiv" in container_name.lower():
+ return "biorxiv.org"
+ elif "medrxiv" in container_name.lower():
+ return "medrxiv.org"
+ else:
+ return None
+ elif doi_prefix == "10.1101" and container_name:
+ if "lancet" in container_name.lower():
+ return "thelancet.com"
+
+ # then the map
+ meta = DOI_PREFIX_MAP.get(doi_prefix)
+ if not meta:
+ return None
+
+ if meta.get("publisher"):
+ if not publisher or meta["publisher"] not in publisher.lower():
+ return None
+ if meta.get("container_name"):
+ if not container_name or meta["container_name"] not in container_name.lower():
+ return None
+ return meta.get("domain")
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index d3a91a7..480b8fa 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -19,6 +19,7 @@ from pydantic import BaseModel
from fatcat_openapi_client import ReleaseEntity, ReleaseContrib
from fatcat_scholar.api_entities import entity_to_dict
+from fatcat_scholar.biblio_hacks import doi_link_domain
class DocType(str, Enum):
@@ -104,6 +105,19 @@ class ScholarBiblio(BaseModel):
contrib_names: List[str]
affiliations: List[str]
+ def doi_link_domain(self, default: str = "doi.org") -> str:
+ if not self.doi_prefix:
+ return default
+ domain = doi_link_domain(
+ self.doi_prefix,
+ container_name=self.container_name,
+ publisher=self.publisher,
+ )
+ if domain:
+ return domain
+ else:
+ return default
+
def citation_str(self, style: str) -> Optional[str]:
"""
Tries to format this biblio metadata as a citation string. If it fails,
diff --git a/fatcat_scholar/templates/search_macros.html b/fatcat_scholar/templates/search_macros.html
index f580cee..25ba7f5 100644
--- a/fatcat_scholar/templates/search_macros.html
+++ b/fatcat_scholar/templates/search_macros.html
@@ -99,40 +99,16 @@
</a>
{% endmacro %}
-{% macro doi_access_button(biblio, is_oa=False) %}
- {% if biblio.doi %}
- {% set publisher = "" or (biblio.publisher and biblio.publisher.lower()) %}
- {% set container_name = "" or (biblio.container_name and biblio.container_name.lower()) %}
- <a target="_blank" rel="external noopener noreferrer" href="https://doi.org/{{ biblio.doi }}">
+{% macro doi_access_button(paper, is_oa=False) %}
+ {% if paper.biblio and paper.biblio.doi %}
+ <a target="_blank" rel="external noopener noreferrer" href="https://doi.org/{{ paper.biblio.doi }}">
<button class="ui left aligned compact blue labeled icon button serp-button">
{% if is_oa %}
<i class="unlock alternate icon" style="background-color: #fb971f;"></i>
{% else %}
<i class="external alternate icon"></i>
{% endif %}
- {# TODO: detect prefix? JSTOR, biorxiv, medrxiv, zenodo, figshare, dryad, etc #}
- {# helpful: https://gist.github.com/TomDemeranville/8699224 #}
- {% if biblio.doi_prefix == "10.6084" %}
- figshare.com
- {% elif biblio.doi_prefix == "10.5281" %}
- zenodo.org
- {% elif biblio.doi_prefix == "10.1371" %}
- plos.org
- {% elif biblio.doi_prefix == "10.1101" and "biorxiv" in container_name %}
- biorxiv.org
- {% elif biblio.doi_prefix == "10.1101" and "medrxiv" in container_name %}
- medrxiv.org
- {% elif biblio.doi_prefix == "10.1016" and "elsevier" in publisher %}
- elsevier.com
- {% elif biblio.doi_prefix in ["10.1186", "10.1007"] and "springer" in publisher %}
- springer.com
- {% elif biblio.doi_prefix in ["10.1042", "10.1111", "10.1177"] and "sage" in publisher %}
- sagepub.com
- {% elif biblio.doi_prefix in ["10.1080"] and "taylor" in publisher %}
- tandfonline.com
- {% else %}
- Publisher / doi.org
- {% endif %}
+ {{ paper._obj.biblio.doi_link_domain("Publisher / doi.org") }}
</button>
</a>
{% endif %}
@@ -453,7 +429,7 @@
{% endif %}
{# publisher / repository #}
- {{ doi_access_button(paper.biblio, is_oa=("oa" in paper.tags)) }}
+ {{ doi_access_button(paper, is_oa=("oa" in paper.tags)) }}
{# trusted platform fulltext links #}
{{ platform_access_button(paper.biblio) }}