diff options
Diffstat (limited to 'python')
| -rwxr-xr-x | python/fatcat_import.py | 4 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 29 | ||||
| -rw-r--r-- | python/fatcat_web/templates/creator_view.html | 2 | 
3 files changed, 31 insertions, 4 deletions
| diff --git a/python/fatcat_import.py b/python/fatcat_import.py index d76f706f..8595d16b 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -62,6 +62,7 @@ def run_matched(args):      fmi = MatchedImporter(args.api,          edit_batch_size=args.batch_size,          editgroup_description=args.editgroup_description_override, +        default_link_rel=args.default_link_rel,          default_mimetype=args.default_mimetype)      JsonLinePusher(fmi, args.json_file).run() @@ -267,6 +268,9 @@ def main():      sub_matched.add_argument('--bezerk-mode',          action='store_true',          help="don't lookup existing files, just insert (clobbers; only for fast bootstrap)") +    sub_matched.add_argument('--default-link-rel', +        default="web", +        help="default URL rel for matches (eg, 'publisher', 'web')")      sub_arabesque_match = subparsers.add_parser('arabesque')      sub_arabesque_match.set_defaults( diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 04ce4573..3ef617d3 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -4,6 +4,8 @@ import json  import sqlite3  import itertools  import fatcat_client + +from fatcat_tools.normal import *  from .common import EntityImporter, clean, make_rel_url, SANE_MAX_RELEASES, SANE_MAX_URLS @@ -53,6 +55,10 @@ class MatchedImporter(EntityImporter):          # lookup dois          re_list = set()          for doi in dois: +            doi = clean_doi(doi) +            if not doi: +                self.counts['skip-bad-doi'] += 1 +                return None              try:                  re = self.api.lookup_release(doi=doi)              except fatcat_client.rest.ApiException as err: @@ -64,12 +70,28 @@ class MatchedImporter(EntityImporter):                  pass              else:                  re_list.add(re.ident) + +        # look up other external ids +        for extid_type in ('arxiv', 'pmid', 'pmcid', 'jstor', 'wikidata_qid', 'core', 'isbn13', 'ark'): +            extid = obj.get(extid_type) +            if extid: +                try: +                    re = self.api.lookup_release(**{extid_type: extid}) +                except fatcat_client.rest.ApiException as err: +                    if err.status != 404: +                        raise err +                    re = None +                if re is None: +                    pass +                else: +                    re_list.add(re.ident) +          release_ids = list(re_list)          if len(release_ids) == 0: -            self.counts['skip-no-doi'] += 1 +            self.counts['skip-no-releases'] += 1              return None          if len(release_ids) > SANE_MAX_RELEASES: -            self.counts['skip-too-many-dois'] += 1 +            self.counts['skip-too-many-releases'] += 1              return None          # parse URLs and CDX @@ -142,11 +164,12 @@ class MatchedImporter(EntityImporter):              return None          existing.release_ids = list(set(fe.release_ids + existing.release_ids))          if len(existing.release_ids) > SANE_MAX_RELEASES: -            self.counts['skip-update-too-many-url'] += 1 +            self.counts['skip-update-too-many-releases'] += 1              return None          existing.mimetype = existing.mimetype or fe.mimetype          existing.size = existing.size or fe.size          existing.md5 = existing.md5 or fe.md5 +        existing.sha1 = existing.sha1 or fe.sha1          existing.sha256 = existing.sha256 or fe.sha256          self.api.update_file(self.get_editgroup_id(), existing.ident, existing)          self.counts['update'] += 1 diff --git a/python/fatcat_web/templates/creator_view.html b/python/fatcat_web/templates/creator_view.html index 1b550316..874a0b92 100644 --- a/python/fatcat_web/templates/creator_view.html +++ b/python/fatcat_web/templates/creator_view.html @@ -25,7 +25,7 @@  <br>  <h3>Releases</h3> -{% if creator._releases != [] %} +{% if creator._releases != [] and creator._releases != None %}    <p>This creator has contributed to:    {{ entity_macros.release_list(creator._releases) }}  {% else %} | 
