summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-03-19 23:36:25 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-03-19 23:46:45 -0700
commitc1c620640a821589882d50ecbb31b1ff743ec26d (patch)
tree1cf0473b989db5f67ac588fa82dc0baad3b34bbc /python
parentb694e74bf72b498301e31459dedfcc1f56400c21 (diff)
downloadfatcat-c1c620640a821589882d50ecbb31b1ff743ec26d.tar.gz
fatcat-c1c620640a821589882d50ecbb31b1ff743ec26d.zip
crossref: skip stub OUP title
It seems like OUP pre-registers DOIs with this place-holder title, then updates the Crossref metdata when the paper is actually published. We should wait until the real title is available before creating an entity.
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/importers/crossref.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index d8abf3eb..bd070ef1 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -163,6 +163,14 @@ class CrossrefImporter(EntityImporter):
self.counts['skip-blank-title'] += 1
return False
+ # these are pre-registered DOIs before the actual record is ready
+ # title is a list of titles
+ if obj.get('title')[0].strip().lower() in [
+ "OUP accepted manuscript".lower(),
+ ]:
+ self.counts['skip-stub-title'] += 1
+ return False
+
# do most of these checks in-line below
return True