diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 15:33:29 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 18:25:58 -0700 |
commit | 33211915773a0c77d064c55c1b02ceed6f455feb (patch) | |
tree | 1828505db917686e7223d41e97c6446223f2da32 /fatcat_scholar/query_citation.py | |
parent | 6c103e4dc48e7e0c0f6cdedc18b0afe33babf1ac (diff) | |
download | fatcat-scholar-33211915773a0c77d064c55c1b02ceed6f455feb.tar.gz fatcat-scholar-33211915773a0c77d064c55c1b02ceed6f455feb.zip |
replace grobid2json with grobid_tei_xml
This first iteration uses the .to_legacy_dict() helpers for backwards
compatibility
Diffstat (limited to 'fatcat_scholar/query_citation.py')
-rw-r--r-- | fatcat_scholar/query_citation.py | 13 |
1 files changed, 5 insertions, 8 deletions
diff --git a/fatcat_scholar/query_citation.py b/fatcat_scholar/query_citation.py index 6cc9086..dea4f02 100644 --- a/fatcat_scholar/query_citation.py +++ b/fatcat_scholar/query_citation.py @@ -10,9 +10,7 @@ timeout and try/except! In the future, perhaps should be async so it can run in parallel with "regular" query? """ -import io import sys -import xml.etree.ElementTree as ET from typing import Any, Optional, Tuple import fuzzycat.common @@ -20,9 +18,9 @@ import fuzzycat.verify import requests from fatcat_openapi_client import ReleaseContrib, ReleaseEntity, ReleaseExtIds from fuzzycat.matching import match_release_fuzzy +from grobid_tei_xml import parse_citations_xml from fatcat_scholar.api_entities import entity_to_dict -from fatcat_scholar.grobid2json import biblio_info def grobid_process_citation( @@ -47,11 +45,10 @@ def grobid_process_citation( def transform_grobid(raw_xml: str) -> Optional[dict]: - # first, remove any xmlns stuff - raw_xml = raw_xml.replace('xmlns="http://www.tei-c.org/ns/1.0"', "") - tree = ET.parse(io.StringIO(raw_xml)) - root = tree.getroot() - ref = biblio_info(root, ns="") + ref_list = parse_citations_xml(raw_xml) + if not ref_list: + return None + ref = ref_list[0] if not any(ref.values()): return None return ref |