summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/ingest.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/ingest.py')
-rw-r--r--python/fatcat_tools/importers/ingest.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index cd3d53f6..04ff8db6 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -86,7 +86,7 @@ class IngestFileResultImporter(EntityImporter):
self.counts['skip-ingest_request_source'] += 1
return False
- if row['request'].get('link_source') not in ('arxiv', 'pmc', 'unpaywall', 'doi', 'mag', 's2', 'doaj'):
+ if row['request'].get('link_source') not in ('arxiv', 'pmc', 'unpaywall', 'doi', 'mag', 's2', 'doaj', 'dblp'):
self.counts['skip-link-source'] += 1
return False
@@ -437,7 +437,9 @@ class IngestWebResultImporter(IngestFileResultImporter):
if 'revisit_cdx' in row:
terminal_cdx = row['revisit_cdx']
assert terminal_cdx['surt']
- assert terminal_cdx['url'] == terminal['terminal_url']
+ if terminal_cdx['url'] != terminal['terminal_url']:
+ self.counts['skip-terminal-url-mismatch'] += 1
+ return None
wc_cdx = []
# primary resource first