diff options
Diffstat (limited to 'python')
| -rwxr-xr-x | python/fatcat_import.py | 6 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/ingest.py | 7 | 
2 files changed, 10 insertions, 3 deletions
| diff --git a/python/fatcat_import.py b/python/fatcat_import.py index aa789dd9..656fe87d 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -77,6 +77,7 @@ def run_arabesque_match(args):              args.json_file):          print("Supply one of --sqlite-file or --json-file")      ami = ArabesqueMatchImporter(args.api, +        editgroup_description=args.editgroup_description_override,          do_updates=args.do_updates,          require_grobid=(not args.no_require_grobid),          extid_type=args.extid_type, @@ -91,6 +92,8 @@ def run_arabesque_match(args):  def run_ingest_file(args):      ifri = IngestFileResultImporter(args.api, +        editgroup_description=args.editgroup_description_override, +        skip_source_whitelist=args.skip_source_whitelist,          do_updates=args.do_updates,          default_link_rel=args.default_link_rel,          require_grobid=(not args.no_require_grobid), @@ -333,6 +336,9 @@ def main():      sub_ingest_file.add_argument('json_file',          help="ingest_file JSON file to import from",          default=sys.stdin, type=argparse.FileType('r')) +    sub_ingest_file.add_argument('--skip-source-whitelist', +        action='store_true', +        help="don't filter import based on request source whitelist")      sub_ingest_file.add_argument('--kafka-mode',          action='store_true',          help="consume from kafka topic (not stdin)") diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index bb410b63..7dad13ce 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -11,9 +11,9 @@ class IngestFileResultImporter(EntityImporter):      def __init__(self, api, require_grobid=True, **kwargs): -        eg_desc = kwargs.get('editgroup_description', +        eg_desc = kwargs.pop('editgroup_description',              "Files crawled from web using sandcrawler ingest tool") -        eg_extra = kwargs.get('editgroup_extra', dict()) +        eg_extra = kwargs.pop('editgroup_extra', dict())          eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.IngestFileResultImporter')          super().__init__(api,              editgroup_description=eg_desc, @@ -29,7 +29,8 @@ class IngestFileResultImporter(EntityImporter):          else:              print("NOT checking GROBID success")          self.ingest_request_source_whitelist = ['fatcat-changelog'] -        #self.ingest_request_source_whitelist = [] +        if kwargs.get('skip_source_whitelist', False): +            self.ingest_request_source_whitelist = []      def want(self, row):          """ | 
