aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/query_citation.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-27 15:33:29 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-27 18:25:58 -0700
commit33211915773a0c77d064c55c1b02ceed6f455feb (patch)
tree1828505db917686e7223d41e97c6446223f2da32 /fatcat_scholar/query_citation.py
parent6c103e4dc48e7e0c0f6cdedc18b0afe33babf1ac (diff)
downloadfatcat-scholar-33211915773a0c77d064c55c1b02ceed6f455feb.tar.gz
fatcat-scholar-33211915773a0c77d064c55c1b02ceed6f455feb.zip
replace grobid2json with grobid_tei_xml
This first iteration uses the .to_legacy_dict() helpers for backwards compatibility
Diffstat (limited to 'fatcat_scholar/query_citation.py')
-rw-r--r--fatcat_scholar/query_citation.py13
1 files changed, 5 insertions, 8 deletions
diff --git a/fatcat_scholar/query_citation.py b/fatcat_scholar/query_citation.py
index 6cc9086..dea4f02 100644
--- a/fatcat_scholar/query_citation.py
+++ b/fatcat_scholar/query_citation.py
@@ -10,9 +10,7 @@ timeout and try/except! In the future, perhaps should be async so it can run in
parallel with "regular" query?
"""
-import io
import sys
-import xml.etree.ElementTree as ET
from typing import Any, Optional, Tuple
import fuzzycat.common
@@ -20,9 +18,9 @@ import fuzzycat.verify
import requests
from fatcat_openapi_client import ReleaseContrib, ReleaseEntity, ReleaseExtIds
from fuzzycat.matching import match_release_fuzzy
+from grobid_tei_xml import parse_citations_xml
from fatcat_scholar.api_entities import entity_to_dict
-from fatcat_scholar.grobid2json import biblio_info
def grobid_process_citation(
@@ -47,11 +45,10 @@ def grobid_process_citation(
def transform_grobid(raw_xml: str) -> Optional[dict]:
- # first, remove any xmlns stuff
- raw_xml = raw_xml.replace('xmlns="http://www.tei-c.org/ns/1.0"', "")
- tree = ET.parse(io.StringIO(raw_xml))
- root = tree.getroot()
- ref = biblio_info(root, ns="")
+ ref_list = parse_citations_xml(raw_xml)
+ if not ref_list:
+ return None
+ ref = ref_list[0]
if not any(ref.values()):
return None
return ref