From 3bb922f3f7e0ffd20cbf8eb22a59abefd35bf678 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 18 Apr 2019 15:47:20 -0700 Subject: arabesque importer does fewer updates --- python/fatcat_tools/importers/arabesque.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 3e951b73..e143ad99 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -145,9 +145,16 @@ class ArabesqueMatchImporter(EntityImporter): return False if not self.do_updates: - self.counts['update-disabled'] += 1 + self.counts['skip-update-disabled'] += 1 return False + if set(fe.release_ids) == set(existing.release_ids): + existing_urls = set([u.url for u in existing.urls]) + new_urls = set([u.url for u in fe.urls]) + if existing_urls.issuperset(new_urls): + self.counts['skip-update-nothing-new'] += 1 + return False + # merge the existing into this one and update existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls])) existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls] -- cgit v1.2.3