diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 17 | 
1 files changed, 10 insertions, 7 deletions
| diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 12e14e52..bb410b63 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -46,18 +46,21 @@ class IngestFileResultImporter(EntityImporter):          The current logic is intentionally conservative as a first step.          """ -        if self.require_grobid and not row.get('grobid', {}).get('status_code') == 200: -            self.counts['skip-grobid'] += 1 +        if row.get('hit') != True: +            self.counts['skip-hit'] += 1              return False -        if self.ingest_request_source_whitelist and row.get('ingest_request_source') not in self.ingest_request_source_whitelist: +        if self.ingest_request_source_whitelist and row['request'].get('ingest_request_source') not in self.ingest_request_source_whitelist:              self.counts['skip-ingest_request_source'] += 1              return False -        if row.get('hit') == True and row.get('file_meta'): -            return True -        else: -            self.counts['skip-hit'] += 1 +        if not row.get('file_meta'): +            self.counts['skip-file-meta'] += 1 +            return False +        if self.require_grobid and row.get('grobid', {}).get('status_code') != 200: +            self.counts['skip-grobid'] += 1              return False +        return True +      def parse_record(self, row):          request = row['request'] | 
