diff options
Diffstat (limited to 'python/fatcat_tools')
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 6 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 8 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/matched.py | 8 | 
3 files changed, 14 insertions, 8 deletions
| diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 75132901..00c719f1 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -111,7 +111,7 @@ class CrossrefImporter(EntityImporter):          return CROSSREF_TYPE_MAP.get(crossref_type)      def map_container_type(self, crossref_type): -        return CONTAINER_TYPE_MAP.get(release_type) +        return CONTAINER_TYPE_MAP.get(crossref_type)      def want(self, obj):          if not obj.get('title'): @@ -238,7 +238,7 @@ class CrossrefImporter(EntityImporter):              if rm.get('DOI'):                  extra['doi'] = rm.get('DOI').lower()              # TODO: what fields here? CSL citation stuff -            for k in ('authors', 'editor', 'edition', 'authority', 'version', +            for k in ('author', 'editor', 'edition', 'authority', 'version',                      'genre', 'url', 'event', 'issue', 'volume', 'date',                      'accessed_date', 'issued', 'page', 'medium',                      'collection_title', 'chapter_number'): @@ -253,7 +253,7 @@ class CrossrefImporter(EntityImporter):                  # doing lookups would be a second import pass                  target_release_id=None,                  key=key, -                year=clean(year), +                year=year,                  container_name=clean(container_name),                  title=clean(rm.get('title')),                  locator=clean(rm.get('first-page')), diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 468b0ede..9d95fe0b 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -32,6 +32,7 @@ class GrobidMetadataImporter(EntityImporter):              editgroup_description=eg_desc,              editgroup_extra=eg_extra)          self.default_link_rel = kwargs.get("default_link_rel", "web") +        self.longtail_oa = kwargs.get("longtail_oa", False)      def want(self, raw_record):          return True @@ -130,12 +131,13 @@ class GrobidMetadataImporter(EntityImporter):          if obj.get('doi'):              extra['doi'] = obj['doi']          if obj['journal'] and obj['journal'].get('name'): -            extra['container_name'] = obj['journal']['name'] - -        extra['is_longtail_oa'] = True +            extra['container_name'] = clean(obj['journal']['name'])          # TODO: ISSN/eISSN handling? or just journal name lookup? +        if self.longtail_oa: +            extra['longtail_oa'] = True +          if extra:              extra = dict(grobid=extra)          else: diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index 123e3530..2ec6c95d 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -120,6 +120,9 @@ class MatchedImporter(EntityImporter):              if err.status != 404:                  raise err +        if not existing: +            return True +          fe.release_ids = list(set(fe.release_ids + existing.release_ids))          if set(fe.release_ids) == set(existing.release_ids) and len(existing.urls) > 0:              # no new release matches *and* there are already existing URLs @@ -127,13 +130,14 @@ class MatchedImporter(EntityImporter):              return False          # merge the existing into this one and update -        existing.urls = list(set(fe.urls + existing.urls)) +        existing.urls = list(set([(u.rel, u.url) for u in fe.urls + existing.urls])) +        existing.urls = [fatcat_client.FileEntityUrls(rel=rel, url=url) for (rel, url) in existing.urls]          existing.release_ids = list(set(fe.release_ids + existing.release_ids))          existing.mimetype = existing.mimetype or fe.mimetype          existing.size = existing.size or fe.size          existing.md5 = existing.md5 or fe.md5          existing.sha256 = existing.sha256 or fe.sha256 -        self.api.update_file(existing.ident, existing) +        self.api.update_file(existing.ident, existing, editgroup_id=self._get_editgroup())          self.counts['update'] += 1          return False | 
