diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-01-05 09:06:51 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-01-05 09:06:53 -0800 |
commit | d7c04087572915c93df753eb38db986921b357f6 (patch) | |
tree | f57451f50cac8c05efd77b674bbb45a956194600 /fatcat_scholar | |
parent | cb94a1809af6733615769012271a55a5ec542402 (diff) | |
download | fatcat-scholar-d7c04087572915c93df753eb38db986921b357f6.tar.gz fatcat-scholar-d7c04087572915c93df753eb38db986921b357f6.zip |
refs transform: handle rare missing ref 'id'
This impacted one single DOI in the most recent dump/transform
Diffstat (limited to 'fatcat_scholar')
-rw-r--r-- | fatcat_scholar/transform.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 027fb47..ae4a181 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -799,7 +799,13 @@ def refs_from_crossref( for ref in crossref.get("grobid_refs") or []: # TODO: some kind of check whether we should include this, based on # source date or similar? - grobid_refs[ref["id"]] = ref + if ref.get("id"): + grobid_refs[ref["id"]] = ref + else: + print( + f"WARN: missing grobid ref for doi:{release.ext_ids.doi}", + file=sys.stderr, + ) output = [] for i, ref in enumerate(record.get("reference", [])): if ref.get("unstructured") and ref["key"] in grobid_refs: |