summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/fatcat_tools/importers/ingest.py17
1 files changed, 10 insertions, 7 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index 12e14e52..bb410b63 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -46,18 +46,21 @@ class IngestFileResultImporter(EntityImporter):
The current logic is intentionally conservative as a first step.
"""
- if self.require_grobid and not row.get('grobid', {}).get('status_code') == 200:
- self.counts['skip-grobid'] += 1
+ if row.get('hit') != True:
+ self.counts['skip-hit'] += 1
return False
- if self.ingest_request_source_whitelist and row.get('ingest_request_source') not in self.ingest_request_source_whitelist:
+ if self.ingest_request_source_whitelist and row['request'].get('ingest_request_source') not in self.ingest_request_source_whitelist:
self.counts['skip-ingest_request_source'] += 1
return False
- if row.get('hit') == True and row.get('file_meta'):
- return True
- else:
- self.counts['skip-hit'] += 1
+ if not row.get('file_meta'):
+ self.counts['skip-file-meta'] += 1
+ return False
+ if self.require_grobid and row.get('grobid', {}).get('status_code') != 200:
+ self.counts['skip-grobid'] += 1
return False
+ return True
+
def parse_record(self, row):
request = row['request']