summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-06-25 10:40:20 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-06-25 10:40:20 -0700
commitc2146fe7b5d299dab2692d2346d0037a944ac799 (patch)
tree4da08d779872d1ed392bd7cd38945f403668ca1d
parentc7687e259cd003b3737a8bd9dd1ae51bf1f15a1e (diff)
downloadfatcat-c2146fe7b5d299dab2692d2346d0037a944ac799.tar.gz
fatcat-c2146fe7b5d299dab2692d2346d0037a944ac799.zip
slightly more robust crossref import
-rw-r--r--python/fatcat/crossref_importer.py9
1 files changed, 9 insertions, 0 deletions
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py
index 06c162f0..18dd2498 100644
--- a/python/fatcat/crossref_importer.py
+++ b/python/fatcat/crossref_importer.py
@@ -22,6 +22,11 @@ class FatcatCrossrefImporter(FatcatImporter):
if (not 'author' in obj) or (not 'title' in obj):
return None
+ # Other ways to be out of scope (provisionally)
+ if ((not 'type' in obj) or (not 'container-title' in obj) or
+ len(obj['container-title']) < 1):
+ return None
+
# contribs
contribs = []
for i, am in enumerate(obj['author']):
@@ -62,6 +67,10 @@ class FatcatCrossrefImporter(FatcatImporter):
for i, rm in enumerate(obj.get('reference', [])):
try:
year = int(rm.get('year'))
+ if year > 2025 or year < 1000:
+ # NOTE: will need to update/config in the future!
+ # NOTE: are there crossref works with year < 1000?
+ return None
except:
year = None
refs.append(fatcat_client.ReleaseRef(