From c1c620640a821589882d50ecbb31b1ff743ec26d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 19 Mar 2020 23:36:25 -0700 Subject: crossref: skip stub OUP title It seems like OUP pre-registers DOIs with this place-holder title, then updates the Crossref metdata when the paper is actually published. We should wait until the real title is available before creating an entity. --- python/fatcat_tools/importers/crossref.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index d8abf3eb..bd070ef1 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -163,6 +163,14 @@ class CrossrefImporter(EntityImporter): self.counts['skip-blank-title'] += 1 return False + # these are pre-registered DOIs before the actual record is ready + # title is a list of titles + if obj.get('title')[0].strip().lower() in [ + "OUP accepted manuscript".lower(), + ]: + self.counts['skip-stub-title'] += 1 + return False + # do most of these checks in-line below return True -- cgit v1.2.3