diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/arabesque.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 9 | 
2 files changed, 9 insertions, 4 deletions
| diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index ea38ec2f..e9376d96 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -66,7 +66,7 @@ class ArabesqueMatchImporter(EntityImporter):          self.extid_type = extid_type          self.default_link_rel = kwargs.get("default_link_rel", "web")          assert self.default_link_rel -        self.default_mime = kwargs.get("default_mime", None) +        self.default_mimetype = kwargs.get("default_mimetype", None)          self.do_updates = kwargs.get("do_updates", False)          self.require_grobid = require_grobid          if self.require_grobid: @@ -136,7 +136,7 @@ class ArabesqueMatchImporter(EntityImporter):          fe = fatcat_client.FileEntity(              sha1=b32_hex(row['final_sha1']), -            mimetype=row['final_mimetype'], +            mimetype=row['final_mimetype'] or self.default_mimetype,              release_ids=[re.ident],              urls=urls,          ) diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 4d78fdc9..04ce4573 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -42,7 +42,7 @@ class MatchedImporter(EntityImporter):              editgroup_extra=eg_extra,              **kwargs)          self.default_link_rel = kwargs.get("default_link_rel", "web") -        self.default_mime = kwargs.get("default_mime", None) +        self.default_mimetype = kwargs.get("default_mimetype", None)      def want(self, raw_record):          return True @@ -100,12 +100,17 @@ class MatchedImporter(EntityImporter):          if size:              size = int(size) +        mimetype = obj.get('mimetype', self.default_mimetype) +        if not mimetype and urls: +            if urls[0].url.endswith('.pdf'): +                mimetype = 'application/pdf' +          fe = fatcat_client.FileEntity(              md5=obj.get('md5'),              sha1=obj['sha1'],              sha256=obj.get('sha256'),              size=size, -            mimetype=obj.get('mimetype'), +            mimetype=mimetype,              release_ids=release_ids,              urls=urls,          ) | 
