From 689da76d1c759d6368d760b4a1fa942e16095a40 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 15 Jan 2020 14:13:34 -0800 Subject: ingest: improve tests, support old ingest results --- python/fatcat_tools/importers/ingest.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 3d391bd8..82a33aaa 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -112,9 +112,18 @@ class IngestFileResultImporter(EntityImporter): terminal = row.get('terminal') if not terminal: - # TODO: support archive.org hits? - self.counts['skip-no-terminal'] += 1 - return None + # support old cdx-only ingest results + cdx = row.get('cdx') + if not cdx: + # TODO: support archive.org hits? + self.counts['skip-no-terminal'] += 1 + return None + else: + terminal = { + 'terminal_url': cdx['url'], + 'terminal_dt': cdx['datetime'], + 'terminal_status_code': cdx.get('status_code') or cdx.get('http_status'), + } # work around old schema if not 'terminal_url' in terminal: -- cgit v1.2.3