diff options
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/ingest.py | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 3d391bd8..82a33aaa 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -112,9 +112,18 @@ class IngestFileResultImporter(EntityImporter): terminal = row.get('terminal') if not terminal: - # TODO: support archive.org hits? - self.counts['skip-no-terminal'] += 1 - return None + # support old cdx-only ingest results + cdx = row.get('cdx') + if not cdx: + # TODO: support archive.org hits? + self.counts['skip-no-terminal'] += 1 + return None + else: + terminal = { + 'terminal_url': cdx['url'], + 'terminal_dt': cdx['datetime'], + 'terminal_status_code': cdx.get('status_code') or cdx.get('http_status'), + } # work around old schema if not 'terminal_url' in terminal: |