summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/matched.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/matched.py')
-rw-r--r--python/fatcat_tools/importers/matched.py14
1 files changed, 7 insertions, 7 deletions
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index 9c80dd72..70290d81 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -102,20 +102,20 @@ class MatchedImporter(EntityImporter):
return None
# parse URLs and CDX
- urls = set()
+ urls_set = set()
for url in obj.get("urls", []):
url = make_rel_url(url, default_link_rel=self.default_link_rel)
if url is not None:
- urls.add(url)
+ urls_set.add(url)
for cdx in obj.get("cdx", []):
original = cdx["url"]
if cdx.get("dt"):
wayback = "https://web.archive.org/web/{}/{}".format(cdx["dt"], original)
- urls.add(("webarchive", wayback))
+ urls_set.add(("webarchive", wayback))
url = make_rel_url(original, default_link_rel=self.default_link_rel)
if url is not None:
- urls.add(url)
- urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
+ urls_set.add(url)
+ urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls_set]
if len(urls) == 0:
self.counts["skip-no-urls"] += 1
return None
@@ -195,11 +195,11 @@ class MatchedImporter(EntityImporter):
if len(existing.urls) > SANE_MAX_URLS:
self.counts["skip-update-too-many-url"] += 1
- return None
+ return False
existing.release_ids = list(set(fe.release_ids + existing.release_ids))
if len(existing.release_ids) > SANE_MAX_RELEASES:
self.counts["skip-update-too-many-releases"] += 1
- return None
+ return False
existing.mimetype = existing.mimetype or fe.mimetype
existing.size = existing.size or fe.size
existing.md5 = existing.md5 or fe.md5