diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-23 21:24:59 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-23 21:24:59 -0700 |
commit | 50051beb7635711f13346ade947ed00e55ee832c (patch) | |
tree | b9d8b688ca9e63762fe9906b24b435be3e37cde8 | |
parent | ac8dbbfbf7e0c90f10acafa16f20d1a254fd913e (diff) | |
download | fatcat-50051beb7635711f13346ade947ed00e55ee832c.tar.gz fatcat-50051beb7635711f13346ade947ed00e55ee832c.zip |
filter out huge (> 2500 row) release fields
-rw-r--r-- | python/fatcat/crossref_importer.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py index 1adc6d70..122ecdf0 100644 --- a/python/fatcat/crossref_importer.py +++ b/python/fatcat/crossref_importer.py @@ -189,6 +189,11 @@ class FatcatCrossrefImporter(FatcatImporter): # external identifiers extids = self.lookup_ext_ids(doi=obj['DOI'].lower()) + # TODO: filter out huge releases; we'll get them later (and fix bug in + # fatcatd) + if max(len(contribs), len(refs)) > 2500: + return None + re = fatcat_client.ReleaseEntity( work_id=None, title=obj['title'][0], |