aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-12-24 15:55:39 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-12-24 15:55:39 -0800
commit4412e40237b97a75483bc37231dc497a06e5ef9f (patch)
treef920b6cafd91982d9435fb101e7c58655c3ccf60
parent7222131f172ef26eebf964e8b17b024d7ccebb24 (diff)
downloadfatcat-4412e40237b97a75483bc37231dc497a06e5ef9f.tar.gz
fatcat-4412e40237b97a75483bc37231dc497a06e5ef9f.zip
allow arabesque backfill ingests for some source types
-rw-r--r--python/fatcat_tools/importers/ingest.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index ca741eb2..33c40eff 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -29,6 +29,7 @@ class IngestFileResultImporter(EntityImporter):
self.ingest_request_source_whitelist = [
'fatcat-changelog',
'fatcat-ingest-container',
+ 'arabesque',
]
if kwargs.get('skip_source_whitelist', False):
self.ingest_request_source_whitelist = []
@@ -55,6 +56,10 @@ class IngestFileResultImporter(EntityImporter):
if self.ingest_request_source_whitelist and source not in self.ingest_request_source_whitelist:
self.counts['skip-ingest_request_source'] += 1
return False
+ if source.startswith('arabesque'):
+ if row['reqeust'].get('link_source') not in ('arxiv', 'pmc'):
+ self.counts['skip-arabesque-source'] += 1
+ return False
if source.startswith('savepapernow'):
# never process async savepapernow requests
self.counts['skip-savepapernow'] += 1