From d7c04087572915c93df753eb38db986921b357f6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 5 Jan 2022 09:06:51 -0800 Subject: refs transform: handle rare missing ref 'id' This impacted one single DOI in the most recent dump/transform --- fatcat_scholar/transform.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 027fb47..ae4a181 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -799,7 +799,13 @@ def refs_from_crossref( for ref in crossref.get("grobid_refs") or []: # TODO: some kind of check whether we should include this, based on # source date or similar? - grobid_refs[ref["id"]] = ref + if ref.get("id"): + grobid_refs[ref["id"]] = ref + else: + print( + f"WARN: missing grobid ref for doi:{release.ext_ids.doi}", + file=sys.stderr, + ) output = [] for i, ref in enumerate(record.get("reference", [])): if ref.get("unstructured") and ref["key"] in grobid_refs: -- cgit v1.2.3