summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-08-21 16:08:20 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-08-21 16:08:20 -0700
commitcdecb18701587277ba75756b2401279770421ba3 (patch)
tree0589f0d72efd577825a741a716e636694cc6ba27 /python/fatcat_tools
parentcc7ebbc9afa540cff04989db1edb0913f0d46a54 (diff)
downloadfatcat-cdecb18701587277ba75756b2401279770421ba3.tar.gz
fatcat-cdecb18701587277ba75756b2401279770421ba3.zip
fixes and test coverage for file_meta importer
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/file_meta.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/python/fatcat_tools/importers/file_meta.py b/python/fatcat_tools/importers/file_meta.py
index c3728570..1e9d2ab7 100644
--- a/python/fatcat_tools/importers/file_meta.py
+++ b/python/fatcat_tools/importers/file_meta.py
@@ -28,11 +28,15 @@ class FileMetaImporter(EntityImporter):
def want(self, row):
for k in ('sha1hex', 'sha256hex', 'md5hex', 'size_bytes', 'mimetype'):
if not row.get(k):
+ self.counts['skip-missing-field'] += 1
return False
return True
def parse_record(self, row):
+ # bezerk mode doesn't make sense for this importer
+ assert self.bezerk_mode == False
+
file_meta = row
fe = fatcat_openapi_client.FileEntity(
md5=file_meta['md5hex'],
@@ -44,6 +48,7 @@ class FileMetaImporter(EntityImporter):
return fe
def try_update(self, fe):
+
# lookup sha1, or create new entity
existing = None
try:
@@ -53,18 +58,18 @@ class FileMetaImporter(EntityImporter):
raise err
if not existing:
- self.counts['skip-no-match']
+ self.counts['skip-no-match'] += 1
return False
- if (existing.md5 and existing.sha256 and existing.size_bytes and existing.mimetype):
- self.counts['skip-existing-complete']
+ if (existing.md5 and existing.sha256 and existing.size and existing.mimetype):
+ self.counts['skip-existing-complete'] += 1
return False
existing.md5 = existing.md5 or fe.md5
existing.sha256 = existing.sha256 or fe.sha256
- existing.file_bytes = existing.file_bytes or fe.file_bytes
+ existing.size = existing.size or fe.size
existing.mimetype = existing.mimetype or fe.mimetype
- self.api.update_container(self.get_editgroup_id(), existing.ident, existing)
+ self.api.update_file(self.get_editgroup_id(), existing.ident, existing)
self.counts['update'] += 1
return False