summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-12-03 16:38:08 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-12-03 16:38:10 -0800
commitf1bf46cac6c59964c285f03bf93c2856eeb231ef (patch)
tree79810683c76b973c13e283b1c15b0c3ef0192bb2 /python/fatcat_tools/importers
parent0d9d71038b8a77baaeb7e9118d5b191b60eed7cc (diff)
downloadfatcat-f1bf46cac6c59964c285f03bf93c2856eeb231ef.tar.gz
fatcat-f1bf46cac6c59964c285f03bf93c2856eeb231ef.zip
tweaks to file ingest importer
- allow overriding source filter whitelist (common case for CLI use) - fix editgroup description env variable pass-through
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/ingest.py7
1 files changed, 4 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index bb410b63..7dad13ce 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -11,9 +11,9 @@ class IngestFileResultImporter(EntityImporter):
def __init__(self, api, require_grobid=True, **kwargs):
- eg_desc = kwargs.get('editgroup_description',
+ eg_desc = kwargs.pop('editgroup_description',
"Files crawled from web using sandcrawler ingest tool")
- eg_extra = kwargs.get('editgroup_extra', dict())
+ eg_extra = kwargs.pop('editgroup_extra', dict())
eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IngestFileResultImporter')
super().__init__(api,
editgroup_description=eg_desc,
@@ -29,7 +29,8 @@ class IngestFileResultImporter(EntityImporter):
else:
print("NOT checking GROBID success")
self.ingest_request_source_whitelist = ['fatcat-changelog']
- #self.ingest_request_source_whitelist = []
+ if kwargs.get('skip_source_whitelist', False):
+ self.ingest_request_source_whitelist = []
def want(self, row):
"""