diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-18 15:47:20 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-04-18 15:47:20 -0700 |
commit | 3bb922f3f7e0ffd20cbf8eb22a59abefd35bf678 (patch) | |
tree | 79af4919bec72cf2254be22e04d44bfaebab979f | |
parent | b74f0df223e8eab5aec50cbbeaab2980eb8a5bf4 (diff) | |
download | fatcat-3bb922f3f7e0ffd20cbf8eb22a59abefd35bf678.tar.gz fatcat-3bb922f3f7e0ffd20cbf8eb22a59abefd35bf678.zip |
arabesque importer does fewer updates
-rw-r--r-- | python/fatcat_tools/importers/arabesque.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 3e951b73..e143ad99 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -145,9 +145,16 @@ class ArabesqueMatchImporter(EntityImporter): return False if not self.do_updates: - self.counts['update-disabled'] += 1 + self.counts['skip-update-disabled'] += 1 return False + if set(fe.release_ids) == set(existing.release_ids): + existing_urls = set([u.url for u in existing.urls]) + new_urls = set([u.url for u in fe.urls]) + if existing_urls.issuperset(new_urls): + self.counts['skip-update-nothing-new'] += 1 + return False + # merge the existing into this one and update existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls])) existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls] |