aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-01-15 14:13:34 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-01-15 14:13:34 -0800
commit689da76d1c759d6368d760b4a1fa942e16095a40 (patch)
treeca78cd6841875b3c7d55d046b3c7a206e604b60f /python/fatcat_tools
parenta02d51650bb5a3165ec89e822f43ff98807d01c3 (diff)
downloadfatcat-689da76d1c759d6368d760b4a1fa942e16095a40.tar.gz
fatcat-689da76d1c759d6368d760b4a1fa942e16095a40.zip
ingest: improve tests, support old ingest results
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/ingest.py15
1 files changed, 12 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 3d391bd8..82a33aaa 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -112,9 +112,18 @@ class IngestFileResultImporter(EntityImporter):
terminal = row.get('terminal')
if not terminal:
- # TODO: support archive.org hits?
- self.counts['skip-no-terminal'] += 1
- return None
+ # support old cdx-only ingest results
+ cdx = row.get('cdx')
+ if not cdx:
+ # TODO: support archive.org hits?
+ self.counts['skip-no-terminal'] += 1
+ return None
+ else:
+ terminal = {
+ 'terminal_url': cdx['url'],
+ 'terminal_dt': cdx['datetime'],
+ 'terminal_status_code': cdx.get('status_code') or cdx.get('http_status'),
+ }
# work around old schema
if not 'terminal_url' in terminal: