summaryrefslogtreecommitdiffstats
path: root/fatcat_scholar
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-28 13:55:50 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-28 13:55:50 -0700
commit48fd26b1cf5dbb1e404a099a748c7b9c4a9375c3 (patch)
tree80691672c50682a38a87a7dc108689fd131e62d2 /fatcat_scholar
parent2079756616cb2556257fb5f861628188a917f0cf (diff)
downloadfatcat-scholar-48fd26b1cf5dbb1e404a099a748c7b9c4a9375c3.tar.gz
fatcat-scholar-48fd26b1cf5dbb1e404a099a748c7b9c4a9375c3.zip
grobid: switch to more ergonomic parse_citation_xml()
Diffstat (limited to 'fatcat_scholar')
-rw-r--r--fatcat_scholar/query_citation.py16
1 files changed, 4 insertions, 12 deletions
diff --git a/fatcat_scholar/query_citation.py b/fatcat_scholar/query_citation.py
index 0233acc..83717e8 100644
--- a/fatcat_scholar/query_citation.py
+++ b/fatcat_scholar/query_citation.py
@@ -11,14 +11,14 @@ parallel with "regular" query?
"""
import sys
-from typing import Any, List, Optional, Tuple
+from typing import Any, Optional, Tuple
import fuzzycat.common
import fuzzycat.verify
import requests
from fatcat_openapi_client import ReleaseContrib, ReleaseEntity, ReleaseExtIds
from fuzzycat.matching import match_release_fuzzy
-from grobid_tei_xml import GrobidBiblio, parse_citations_xml
+from grobid_tei_xml import GrobidBiblio, parse_citation_xml
from fatcat_scholar.api_entities import entity_to_dict
@@ -44,14 +44,6 @@ def grobid_process_citation(
return grobid_response.text
-def transform_grobid(raw_xml: str) -> Optional[GrobidBiblio]:
- ref_list: List[GrobidBiblio] = parse_citations_xml(raw_xml)
- # check for unmatched or empty references
- if not ref_list or not ref_list[0].to_dict():
- return None
- return ref_list[0]
-
-
def ref_to_release(ref: GrobidBiblio) -> ReleaseEntity:
contribs = []
for author in ref.authors or []:
@@ -152,7 +144,7 @@ def try_fuzzy_match(
resp = grobid_process_citation(citation, grobid_host=grobid_host, timeout=3.0)
if not resp:
return None
- ref = transform_grobid(resp)
+ ref = parse_citation_xml(resp)
if not ref:
return None
release = ref_to_release(ref)
@@ -180,7 +172,7 @@ if __name__ == "__main__":
print(resp)
if not resp:
sys.exit(0)
- ref = transform_grobid(resp)
+ ref = parse_citation_xml(resp)
print(ref)
if not ref:
sys.exit(0)