diff options
Diffstat (limited to 'python/fatcat_tools')
| -rwxr-xr-x | python/fatcat_tools/importers/cdl_dash_dat.py | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 16 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 2 | ||||
| -rwxr-xr-x | python/fatcat_tools/importers/wayback_static.py | 1 | 
4 files changed, 18 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/cdl_dash_dat.py b/python/fatcat_tools/importers/cdl_dash_dat.py index e1252b6d..c1ea075d 100755 --- a/python/fatcat_tools/importers/cdl_dash_dat.py +++ b/python/fatcat_tools/importers/cdl_dash_dat.py @@ -70,7 +70,6 @@ def cdl_dash_release(meta, extra=None):          if extid['value'].startswith('ark:'):              ark_id = extid['value']      assert ark_id -    extra['ark_id'] = ark_id      license_slug = lookup_license_slug(meta['rights']['uri']) @@ -98,6 +97,7 @@ def cdl_dash_release(meta, extra=None):      r = ReleaseEntity(          ext_ids=ReleaseEntityExtIds(              doi=doi, +            ark=ark_id,          ),          title=clean(meta['title'], force_xml=True),          publisher=clean(meta['publisher']), diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 999ce13f..c875010c 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -218,6 +218,8 @@ class CrossrefImporter(EntityImporter):                      creator_id=creator_id,                      index=index,                      raw_name=raw_name, +                    given_name=clean(am.get('given')), +                    surname=clean(am.get('family')),                      raw_affiliation=clean(raw_affiliation),                      role=ctype,                      extra=extra)) @@ -318,13 +320,15 @@ class CrossrefImporter(EntityImporter):          if not container_id:              if obj.get('container-title'):                  extra['container_name'] = clean(obj['container-title'][0]) -        for key in ('group-title', 'subtitle'): +        for key in ('group-title'):              val = obj.get(key)              if val:                  if type(val) == list:                      val = val[0]                  if type(val) == str: -                    extra[key] = clean(val) +                    val = clean(val) +                    if val: +                        extra[key] = clean(val)                  else:                      extra[key] = val          # crossref-nested extra keys @@ -397,6 +401,13 @@ class CrossrefImporter(EntityImporter):                  # title can't be just a single character                  return None +        subtitle = None +        if obj.get('subtitle'): +            subtitle = clean(obj.get('subtitle')[0], force_xml=True) +            if not subtitle or len(subtitle) <= 1: +                # subtitle can't be just a single character +                return None +          if extra_crossref:              extra['crossref'] = extra_crossref          if not extra: @@ -406,6 +417,7 @@ class CrossrefImporter(EntityImporter):              work_id=None,              container_id=container_id,              title=title, +            subtitle=subtitle,              original_title=original_title,              release_type=release_type,              release_stage=release_stage, diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index ba91d183..9e99bc0a 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -94,6 +94,8 @@ class GrobidMetadataImporter(EntityImporter):              contribs.append(fatcat_client.ReleaseContrib(                  index=i,                  raw_name=clean(a['name']), +                given_name=clean(a.get('given_name')), +                surname=clean(a.get('surname')),                  role="author",                  extra=None)) diff --git a/python/fatcat_tools/importers/wayback_static.py b/python/fatcat_tools/importers/wayback_static.py index 114920f7..c4f4f21e 100755 --- a/python/fatcat_tools/importers/wayback_static.py +++ b/python/fatcat_tools/importers/wayback_static.py @@ -120,6 +120,7 @@ def lookup_cdx(embed_url, verify_hashes=True, cdx_output=None):              resp.raise_for_status()              assert webcapture_cdx.sha1 == hashlib.sha1(resp.content).digest().hex()              webcapture_cdx.sha256 = hashlib.sha256(resp.content).digest().hex() +            webcapture_cdx.size_bytes = len(resp.content)          return webcapture_cdx      else:          return None  | 
