diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-03 16:38:08 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-12-03 16:38:10 -0800 | 
| commit | f1bf46cac6c59964c285f03bf93c2856eeb231ef (patch) | |
| tree | 79810683c76b973c13e283b1c15b0c3ef0192bb2 /python/fatcat_tools/importers | |
| parent | 0d9d71038b8a77baaeb7e9118d5b191b60eed7cc (diff) | |
| download | fatcat-f1bf46cac6c59964c285f03bf93c2856eeb231ef.tar.gz fatcat-f1bf46cac6c59964c285f03bf93c2856eeb231ef.zip  | |
tweaks to file ingest importer
- allow overriding source filter whitelist (common case for CLI use)
- fix editgroup description env variable pass-through
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 7 | 
1 files changed, 4 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index bb410b63..7dad13ce 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -11,9 +11,9 @@ class IngestFileResultImporter(EntityImporter):      def __init__(self, api, require_grobid=True, **kwargs): -        eg_desc = kwargs.get('editgroup_description', +        eg_desc = kwargs.pop('editgroup_description',              "Files crawled from web using sandcrawler ingest tool") -        eg_extra = kwargs.get('editgroup_extra', dict()) +        eg_extra = kwargs.pop('editgroup_extra', dict())          eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IngestFileResultImporter')          super().__init__(api,              editgroup_description=eg_desc, @@ -29,7 +29,8 @@ class IngestFileResultImporter(EntityImporter):          else:              print("NOT checking GROBID success")          self.ingest_request_source_whitelist = ['fatcat-changelog'] -        #self.ingest_request_source_whitelist = [] +        if kwargs.get('skip_source_whitelist', False): +            self.ingest_request_source_whitelist = []      def want(self, row):          """  | 
