aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/transform.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-09-14 00:16:22 -0700
committerBryan Newbold <bnewbold@archive.org>2020-09-14 00:16:22 -0700
commitd52cb39476aad977ffe8b73b16e831f78d3ab8fe (patch)
tree36902f6a0a4ff8b9d20ff4342320178d7c192396 /fatcat_scholar/transform.py
parent71ce30e7547871cb6fe02fa4237af735bd6b9c49 (diff)
downloadfatcat-scholar-d52cb39476aad977ffe8b73b16e831f78d3ab8fe.tar.gz
fatcat-scholar-d52cb39476aad977ffe8b73b16e831f78d3ab8fe.zip
refs and grobid2json bugfixes from testing
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r--fatcat_scholar/transform.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 7ff30fe..af794e6 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -452,10 +452,17 @@ def refs_from_grobid(release: ReleaseEntity, tei_dict: dict) -> Sequence[RefStru
for ref in tei_dict.get("citations") or []:
ref_date = ref.get("date") or None
ref_year: Optional[int] = None
- if ref_date and len(ref_date) > 4 and ref_date[:4].isdigit():
+ if ref_date and len(ref_date) >= 4 and ref_date[:4].isdigit():
ref_year = int(ref_date[:4])
- authors = ref.get("authors") or []
- authors = [a for a in authors if type(a) == str]
+ ref_authors = ref.get("authors") or []
+ authors: List[str] = []
+ for a in ref_authors:
+ if isinstance(a, str):
+ authors.append(a)
+ elif isinstance(a, dict):
+ if a.get("name"):
+ assert isinstance(a["name"], str)
+ authors.append(a["name"])
output.append(
RefStructured(
biblio=RefBiblio(