From d52cb39476aad977ffe8b73b16e831f78d3ab8fe Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 14 Sep 2020 00:16:22 -0700 Subject: refs and grobid2json bugfixes from testing --- fatcat_scholar/transform.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fatcat_scholar/transform.py') diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 7ff30fe..af794e6 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -452,10 +452,17 @@ def refs_from_grobid(release: ReleaseEntity, tei_dict: dict) -> Sequence[RefStru for ref in tei_dict.get("citations") or []: ref_date = ref.get("date") or None ref_year: Optional[int] = None - if ref_date and len(ref_date) > 4 and ref_date[:4].isdigit(): + if ref_date and len(ref_date) >= 4 and ref_date[:4].isdigit(): ref_year = int(ref_date[:4]) - authors = ref.get("authors") or [] - authors = [a for a in authors if type(a) == str] + ref_authors = ref.get("authors") or [] + authors: List[str] = [] + for a in ref_authors: + if isinstance(a, str): + authors.append(a) + elif isinstance(a, dict): + if a.get("name"): + assert isinstance(a["name"], str) + authors.append(a["name"]) output.append( RefStructured( biblio=RefBiblio( -- cgit v1.2.3