From 4412e40237b97a75483bc37231dc497a06e5ef9f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 24 Dec 2019 15:55:39 -0800 Subject: allow arabesque backfill ingests for some source types --- python/fatcat_tools/importers/ingest.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index ca741eb2..33c40eff 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -29,6 +29,7 @@ class IngestFileResultImporter(EntityImporter): self.ingest_request_source_whitelist = [ 'fatcat-changelog', 'fatcat-ingest-container', + 'arabesque', ] if kwargs.get('skip_source_whitelist', False): self.ingest_request_source_whitelist = [] @@ -55,6 +56,10 @@ class IngestFileResultImporter(EntityImporter): if self.ingest_request_source_whitelist and source not in self.ingest_request_source_whitelist: self.counts['skip-ingest_request_source'] += 1 return False + if source.startswith('arabesque'): + if row['reqeust'].get('link_source') not in ('arxiv', 'pmc'): + self.counts['skip-arabesque-source'] += 1 + return False if source.startswith('savepapernow'): # never process async savepapernow requests self.counts['skip-savepapernow'] += 1 -- cgit v1.2.3