diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-08-21 16:08:20 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-08-21 16:08:20 -0700 |
commit | cdecb18701587277ba75756b2401279770421ba3 (patch) | |
tree | 0589f0d72efd577825a741a716e636694cc6ba27 /python/fatcat_tools | |
parent | cc7ebbc9afa540cff04989db1edb0913f0d46a54 (diff) | |
download | fatcat-cdecb18701587277ba75756b2401279770421ba3.tar.gz fatcat-cdecb18701587277ba75756b2401279770421ba3.zip |
fixes and test coverage for file_meta importer
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/file_meta.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/python/fatcat_tools/importers/file_meta.py b/python/fatcat_tools/importers/file_meta.py index c3728570..1e9d2ab7 100644 --- a/python/fatcat_tools/importers/file_meta.py +++ b/python/fatcat_tools/importers/file_meta.py @@ -28,11 +28,15 @@ class FileMetaImporter(EntityImporter): def want(self, row): for k in ('sha1hex', 'sha256hex', 'md5hex', 'size_bytes', 'mimetype'): if not row.get(k): + self.counts['skip-missing-field'] += 1 return False return True def parse_record(self, row): + # bezerk mode doesn't make sense for this importer + assert self.bezerk_mode == False + file_meta = row fe = fatcat_openapi_client.FileEntity( md5=file_meta['md5hex'], @@ -44,6 +48,7 @@ class FileMetaImporter(EntityImporter): return fe def try_update(self, fe): + # lookup sha1, or create new entity existing = None try: @@ -53,18 +58,18 @@ class FileMetaImporter(EntityImporter): raise err if not existing: - self.counts['skip-no-match'] + self.counts['skip-no-match'] += 1 return False - if (existing.md5 and existing.sha256 and existing.size_bytes and existing.mimetype): - self.counts['skip-existing-complete'] + if (existing.md5 and existing.sha256 and existing.size and existing.mimetype): + self.counts['skip-existing-complete'] += 1 return False existing.md5 = existing.md5 or fe.md5 existing.sha256 = existing.sha256 or fe.sha256 - existing.file_bytes = existing.file_bytes or fe.file_bytes + existing.size = existing.size or fe.size existing.mimetype = existing.mimetype or fe.mimetype - self.api.update_container(self.get_editgroup_id(), existing.ident, existing) + self.api.update_file(self.get_editgroup_id(), existing.ident, existing) self.counts['update'] += 1 return False |