aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-09-13 23:37:09 -0700
committerBryan Newbold <bnewbold@archive.org>2020-09-13 23:37:09 -0700
commit774ac98c2ca0a1c66c3283d466245cc487d602d3 (patch)
tree93add99bf9d4467ebb59797e99eb8abb4a62ec6d
parent2e659b6bad1ab429d36fcd8cb1a686eab81e6d89 (diff)
downloadfatcat-scholar-774ac98c2ca0a1c66c3283d466245cc487d602d3.tar.gz
fatcat-scholar-774ac98c2ca0a1c66c3283d466245cc487d602d3.zip
refs transform: both GROBID and fatcat refs
-rw-r--r--fatcat_scholar/transform.py17
1 files changed, 12 insertions, 5 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index b4b5c8d..50b6810 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -536,6 +536,10 @@ def refs_from_release_refs(release: ReleaseEntity) -> Sequence[RefStructured]:
def refs_from_heavy(heavy: IntermediateBundle) -> Sequence[RefStructured]:
+ """
+ Current behavior is to return *both* fatcat refs and GROBID refs if
+ available.
+ """
if heavy.doc_type != DocType.work:
return []
@@ -546,19 +550,22 @@ def refs_from_heavy(heavy: IntermediateBundle) -> Sequence[RefStructured]:
r for r in heavy.releases if r.ident == heavy.biblio_release_ident
][0]
+ refs: List[RefStructured] = []
+
if primary_release.refs:
# TODO: what about other releases?
- return refs_from_release_refs(primary_release)
- elif heavy.grobid_fulltext:
+ refs.extend(refs_from_release_refs(primary_release))
+
+ if heavy.grobid_fulltext:
fulltext_release = [
r
for r in heavy.releases
if r.ident == heavy.grobid_fulltext["release_ident"]
][0]
tei_dict = teixml2json(heavy.grobid_fulltext["tei_xml"])
- return refs_from_grobid(fulltext_release, tei_dict)
- else:
- return []
+ refs.extend(refs_from_grobid(fulltext_release, tei_dict))
+
+ return refs
def run_transform(infile: Sequence) -> None: