aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_scholar/transform.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-01-05 09:06:51 -0800
committerBryan Newbold <bnewbold@archive.org>2022-01-05 09:06:53 -0800
commitd7c04087572915c93df753eb38db986921b357f6 (patch)
treef57451f50cac8c05efd77b674bbb45a956194600 /fatcat_scholar/transform.py
parentcb94a1809af6733615769012271a55a5ec542402 (diff)
downloadfatcat-scholar-d7c04087572915c93df753eb38db986921b357f6.tar.gz
fatcat-scholar-d7c04087572915c93df753eb38db986921b357f6.zip
refs transform: handle rare missing ref 'id'
This impacted one single DOI in the most recent dump/transform
Diffstat (limited to 'fatcat_scholar/transform.py')
-rw-r--r--fatcat_scholar/transform.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 027fb47..ae4a181 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -799,7 +799,13 @@ def refs_from_crossref(
for ref in crossref.get("grobid_refs") or []:
# TODO: some kind of check whether we should include this, based on
# source date or similar?
- grobid_refs[ref["id"]] = ref
+ if ref.get("id"):
+ grobid_refs[ref["id"]] = ref
+ else:
+ print(
+ f"WARN: missing grobid ref for doi:{release.ext_ids.doi}",
+ file=sys.stderr,
+ )
output = []
for i, ref in enumerate(record.get("reference", [])):
if ref.get("unstructured") and ref["key"] in grobid_refs: