From 50051beb7635711f13346ade947ed00e55ee832c Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sun, 23 Sep 2018 21:24:59 -0700 Subject: filter out huge (> 2500 row) release fields --- python/fatcat/crossref_importer.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'python') diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py index 1adc6d70..122ecdf0 100644 --- a/python/fatcat/crossref_importer.py +++ b/python/fatcat/crossref_importer.py @@ -189,6 +189,11 @@ class FatcatCrossrefImporter(FatcatImporter): # external identifiers extids = self.lookup_ext_ids(doi=obj['DOI'].lower()) + # TODO: filter out huge releases; we'll get them later (and fix bug in + # fatcatd) + if max(len(contribs), len(refs)) > 2500: + return None + re = fatcat_client.ReleaseEntity( work_id=None, title=obj['title'][0], -- cgit v1.2.3