From 09c584774b242374625d481ae043e8162d94ab52 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 23 Apr 2019 18:38:32 -0700 Subject: add limits to match importers --- python/fatcat_tools/importers/arabesque.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'python/fatcat_tools/importers/arabesque.py') diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 4353795a..7838a7ff 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -5,7 +5,7 @@ import base64 import sqlite3 import itertools import fatcat_client -from .common import EntityImporter, clean, make_rel_url +from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS def b32_hex(s): @@ -130,6 +130,10 @@ class ArabesqueMatchImporter(EntityImporter): urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in urls] + if len(urls) > SANE_MAX_URLS: + self.counts['skip-too-many-url'] += 1 + return None + fe = fatcat_client.FileEntity( sha1=b32_hex(row['final_sha1']), mimetype=row['final_mimetype'], @@ -174,7 +178,13 @@ class ArabesqueMatchImporter(EntityImporter): # merge the existing into this one and update existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls])) existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls] + if len(existing.urls) > SANE_MAX_URLS: + self.counts['skip-update-too-many-url'] += 1 + return None existing.release_ids = list(set(fe.release_ids + existing.release_ids)) + if len(existing.release_ids) > SANE_MAX_RELEASES: + self.counts['skip-update-too-many-url'] += 1 + return None existing.mimetype = existing.mimetype or fe.mimetype edit = self.api.update_file(existing.ident, existing, editgroup_id=self.get_editgroup_id()) self._edits_inflight.append(edit) -- cgit v1.2.3