diff options
Diffstat (limited to 'python/fatcat_tools')
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 15 | 
1 files changed, 12 insertions, 3 deletions
| diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 3d391bd8..82a33aaa 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -112,9 +112,18 @@ class IngestFileResultImporter(EntityImporter):          terminal = row.get('terminal')          if not terminal: -            # TODO: support archive.org hits? -            self.counts['skip-no-terminal'] += 1 -            return None +            # support old cdx-only ingest results +            cdx = row.get('cdx') +            if not cdx: +                # TODO: support archive.org hits? +                self.counts['skip-no-terminal'] += 1 +                return None +            else: +                terminal = { +                    'terminal_url': cdx['url'], +                    'terminal_dt': cdx['datetime'], +                    'terminal_status_code': cdx.get('status_code') or cdx.get('http_status'), +                }          # work around old schema          if not 'terminal_url' in terminal: | 
