From 48fd26b1cf5dbb1e404a099a748c7b9c4a9375c3 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 28 Oct 2021 13:55:50 -0700 Subject: grobid: switch to more ergonomic parse_citation_xml() --- fatcat_scholar/query_citation.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fatcat_scholar/query_citation.py b/fatcat_scholar/query_citation.py index 0233acc..83717e8 100644 --- a/fatcat_scholar/query_citation.py +++ b/fatcat_scholar/query_citation.py @@ -11,14 +11,14 @@ parallel with "regular" query? """ import sys -from typing import Any, List, Optional, Tuple +from typing import Any, Optional, Tuple import fuzzycat.common import fuzzycat.verify import requests from fatcat_openapi_client import ReleaseContrib, ReleaseEntity, ReleaseExtIds from fuzzycat.matching import match_release_fuzzy -from grobid_tei_xml import GrobidBiblio, parse_citations_xml +from grobid_tei_xml import GrobidBiblio, parse_citation_xml from fatcat_scholar.api_entities import entity_to_dict @@ -44,14 +44,6 @@ def grobid_process_citation( return grobid_response.text -def transform_grobid(raw_xml: str) -> Optional[GrobidBiblio]: - ref_list: List[GrobidBiblio] = parse_citations_xml(raw_xml) - # check for unmatched or empty references - if not ref_list or not ref_list[0].to_dict(): - return None - return ref_list[0] - - def ref_to_release(ref: GrobidBiblio) -> ReleaseEntity: contribs = [] for author in ref.authors or []: @@ -152,7 +144,7 @@ def try_fuzzy_match( resp = grobid_process_citation(citation, grobid_host=grobid_host, timeout=3.0) if not resp: return None - ref = transform_grobid(resp) + ref = parse_citation_xml(resp) if not ref: return None release = ref_to_release(ref) @@ -180,7 +172,7 @@ if __name__ == "__main__": print(resp) if not resp: sys.exit(0) - ref = transform_grobid(resp) + ref = parse_citation_xml(resp) print(ref) if not ref: sys.exit(0) -- cgit v1.2.3