diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-09 13:26:35 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-09 13:26:35 +0200 |
commit | 002764b5b1f8f27bd8ae42d33b2a6f42a2a4b7a1 (patch) | |
tree | 38b7aa860202812f951ee5dc86da3a79200258ff /fuzzycat/grobid_unstructured.py | |
parent | f9ef1c989b4f85c81ac5f24b08f0d636636e7a4b (diff) | |
parent | e05f4c4973fc3573d3707d4d90779fad094ced6f (diff) | |
download | fuzzycat-002764b5b1f8f27bd8ae42d33b2a6f42a2a4b7a1.tar.gz fuzzycat-002764b5b1f8f27bd8ae42d33b2a6f42a2a4b7a1.zip |
Merge branch 'master' of git.archive.org:webgroup/fuzzycat
* 'master' of git.archive.org:webgroup/fuzzycat:
simplify README for general audience; move some content to notes
sandcrawler slugify: lower-case greek ambiguity (OCR)
DOI clean/normalize helper; and use in verification etc
verify: page count parsing and comparison improvements
Diffstat (limited to 'fuzzycat/grobid_unstructured.py')
-rw-r--r-- | fuzzycat/grobid_unstructured.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/fuzzycat/grobid_unstructured.py b/fuzzycat/grobid_unstructured.py index 79c39d3..5462ae1 100644 --- a/fuzzycat/grobid_unstructured.py +++ b/fuzzycat/grobid_unstructured.py @@ -18,6 +18,7 @@ from fatcat_openapi_client import ReleaseContrib, ReleaseEntity, ReleaseExtIds from fuzzycat.config import settings from fuzzycat.grobid2json import biblio_info +from fuzzycat.utils import clean_doi GROBID_API_BASE = settings.get("GROBID_API_BASE", "https://grobid.qa.fatcat.wiki") @@ -89,7 +90,7 @@ def grobid_ref_to_release(ref: dict) -> ReleaseEntity: issue=ref.get("issue"), pages=ref.get("pages"), ext_ids=ReleaseExtIds( - doi=ref.get("doi"), + doi=clean_doi(ref.get("doi")), pmid=ref.get("pmid"), pmcid=ref.get("pmcid"), arxiv=ref.get("arxiv_id"), |