diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-24 15:55:39 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-24 15:55:39 -0800 | 
| commit | 4412e40237b97a75483bc37231dc497a06e5ef9f (patch) | |
| tree | f920b6cafd91982d9435fb101e7c58655c3ccf60 /python/fatcat_tools | |
| parent | 7222131f172ef26eebf964e8b17b024d7ccebb24 (diff) | |
| download | fatcat-4412e40237b97a75483bc37231dc497a06e5ef9f.tar.gz fatcat-4412e40237b97a75483bc37231dc497a06e5ef9f.zip | |
allow arabesque backfill ingests for some source types
Diffstat (limited to 'python/fatcat_tools')
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 5 | 
1 files changed, 5 insertions, 0 deletions
| diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index ca741eb2..33c40eff 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -29,6 +29,7 @@ class IngestFileResultImporter(EntityImporter):          self.ingest_request_source_whitelist = [              'fatcat-changelog',              'fatcat-ingest-container', +            'arabesque',          ]          if kwargs.get('skip_source_whitelist', False):              self.ingest_request_source_whitelist = [] @@ -55,6 +56,10 @@ class IngestFileResultImporter(EntityImporter):          if self.ingest_request_source_whitelist and source not in self.ingest_request_source_whitelist:              self.counts['skip-ingest_request_source'] += 1              return False +        if source.startswith('arabesque'): +            if row['reqeust'].get('link_source') not in ('arxiv', 'pmc'): +                self.counts['skip-arabesque-source'] += 1 +                return False          if source.startswith('savepapernow'):              # never process async savepapernow requests              self.counts['skip-savepapernow'] += 1 | 
