aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-09-23 21:24:59 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-09-23 21:24:59 -0700
commit50051beb7635711f13346ade947ed00e55ee832c (patch)
treeb9d8b688ca9e63762fe9906b24b435be3e37cde8
parentac8dbbfbf7e0c90f10acafa16f20d1a254fd913e (diff)
downloadfatcat-50051beb7635711f13346ade947ed00e55ee832c.tar.gz
fatcat-50051beb7635711f13346ade947ed00e55ee832c.zip
filter out huge (> 2500 row) release fields
-rw-r--r--python/fatcat/crossref_importer.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py
index 1adc6d70..122ecdf0 100644
--- a/python/fatcat/crossref_importer.py
+++ b/python/fatcat/crossref_importer.py
@@ -189,6 +189,11 @@ class FatcatCrossrefImporter(FatcatImporter):
# external identifiers
extids = self.lookup_ext_ids(doi=obj['DOI'].lower())
+ # TODO: filter out huge releases; we'll get them later (and fix bug in
+ # fatcatd)
+ if max(len(contribs), len(refs)) > 2500:
+ return None
+
re = fatcat_client.ReleaseEntity(
work_id=None,
title=obj['title'][0],