From 774ac98c2ca0a1c66c3283d466245cc487d602d3 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sun, 13 Sep 2020 23:37:09 -0700 Subject: refs transform: both GROBID and fatcat refs --- fatcat_scholar/transform.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'fatcat_scholar') diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index b4b5c8d..50b6810 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -536,6 +536,10 @@ def refs_from_release_refs(release: ReleaseEntity) -> Sequence[RefStructured]: def refs_from_heavy(heavy: IntermediateBundle) -> Sequence[RefStructured]: + """ + Current behavior is to return *both* fatcat refs and GROBID refs if + available. + """ if heavy.doc_type != DocType.work: return [] @@ -546,19 +550,22 @@ def refs_from_heavy(heavy: IntermediateBundle) -> Sequence[RefStructured]: r for r in heavy.releases if r.ident == heavy.biblio_release_ident ][0] + refs: List[RefStructured] = [] + if primary_release.refs: # TODO: what about other releases? - return refs_from_release_refs(primary_release) - elif heavy.grobid_fulltext: + refs.extend(refs_from_release_refs(primary_release)) + + if heavy.grobid_fulltext: fulltext_release = [ r for r in heavy.releases if r.ident == heavy.grobid_fulltext["release_ident"] ][0] tei_dict = teixml2json(heavy.grobid_fulltext["tei_xml"]) - return refs_from_grobid(fulltext_release, tei_dict) - else: - return [] + refs.extend(refs_from_grobid(fulltext_release, tei_dict)) + + return refs def run_transform(infile: Sequence) -> None: -- cgit v1.2.3