diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/common.py | 1 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 14 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 22 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/jalc.py | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 2 | 
6 files changed, 18 insertions, 25 deletions
| diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index c0578224..c692a38d 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -692,7 +692,6 @@ class Bs4XmlLargeFilePusher(RecordPusher):      def run(self):          elem_iter = ET.iterparse(self.xml_file, ["start", "end"]) -        i = 0          root = None          for (event, element) in elem_iter:              if not root and event == "start": diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 854e3d9f..71f08952 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -278,15 +278,15 @@ class CrossrefImporter(EntityImporter):          # license slug          license_slug = None          license_extra = [] -        for l in obj.get('license', []): -            if l['content-version'] not in ('vor', 'unspecified'): +        for lic in obj.get('license', []): +            if lic['content-version'] not in ('vor', 'unspecified'):                  continue -            slug = lookup_license_slug(l['URL']) +            slug = lookup_license_slug(lic['URL'])              if slug:                  license_slug = slug -            if 'start' in l: -                l['start'] = l['start']['date-time'] -            license_extra.append(l) +            if 'start' in lic: +                lic['start'] = lic['start']['date-time'] +            license_extra.append(lic)          # references          refs = [] @@ -297,7 +297,7 @@ class CrossrefImporter(EntityImporter):                  # NOTE: are there crossref works with year < 100?                  if year > 2025 or year < 100:                      year = None -            except: +            except (TypeError, ValueError):                  year = None              ref_extra = dict()              key = rm.get('key') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index ebb29feb..f93362d6 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -191,12 +191,6 @@ LICENSE_SLUG_MAP = {      "//spdx.org/licenses/OGL-Canada-2.0.json": "OGL-Canada",  } -# TODO(martin): drop this after 3.7 upgrade -try: -    isascii = str.isascii # new in 3.7, https://docs.python.org/3/library/stdtypes.html#str.isascii -except AttributeError: -    isascii = lambda s: len(s) == len(s.encode()) -  class DataciteImporter(EntityImporter):      """ @@ -287,7 +281,7 @@ class DataciteImporter(EntityImporter):              print('skipping record without a DOI', file=sys.stderr)              return -        if not isascii(doi): +        if not str.isascii(doi):              print('[{}] skipping non-ascii doi for now'.format(doi))              return None @@ -466,7 +460,7 @@ class DataciteImporter(EntityImporter):              try:                  _ = int(first_page) < int(last_page)                  pages = '{}-{}'.format(first_page, last_page) -            except ValueError as err: +            except ValueError as err:  # noqa: F841                  # TODO(martin): This is more debug than info.                  # print('[{}] {}'.format(doi, err), file=sys.stderr)                  pass @@ -478,11 +472,11 @@ class DataciteImporter(EntityImporter):          license_slug = None          license_extra = [] -        for l in attributes.get('rightsList', []): -            slug = lookup_license_slug(l.get('rightsUri')) +        for lic in attributes.get('rightsList', []): +            slug = lookup_license_slug(lic.get('rightsUri'))              if slug:                  license_slug = slug -            license_extra.append(l) +            license_extra.append(lic)          # Release type. Try to determine the release type from a variety of          # types supplied in datacite. The "attributes.types.resourceType" is @@ -524,7 +518,7 @@ class DataciteImporter(EntityImporter):          value = attributes.get('language', '') or ''          try:              language = pycountry.languages.lookup(value).alpha_2 -        except (LookupError, AttributeError) as err: +        except (LookupError, AttributeError) as err:  # noqa: F841              pass              # TODO(martin): Print this on debug level, only.              # print('[{}] language lookup miss for {}: {}'.format(doi, value, err), file=sys.stderr) @@ -549,7 +543,7 @@ class DataciteImporter(EntityImporter):              if isinstance(text, list):                  try:                      text = "\n".join(text) -                except TypeError as err: +                except TypeError:                      continue # Bail out, if it is not a list of strings.              # Limit length. @@ -760,7 +754,7 @@ class DataciteImporter(EntityImporter):          i = 0          for c in creators:              if not set_index: -               i = None +                i = None              nameType = c.get('nameType', '') or ''              if nameType in ('', 'Personal'):                  creator_id = None diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 5ec6cc3c..a811c856 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -104,7 +104,7 @@ class GrobidMetadataImporter(EntityImporter):              if raw.get('date'):                  try:                      year = int(raw['date'].strip()[:4]) -                except: +                except (IndexError, ValueError):                      pass              for key in ('volume', 'url', 'issue', 'publisher'):                  if raw.get(key): diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 38aa00eb..9bf2621c 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -308,7 +308,7 @@ class JalcImporter(EntityImporter):              work_id=None,              title=title,              original_title=clean(original_title), -            release_type="article-journal", +            release_type=release_type,              release_stage='published',              release_date=release_date,              release_year=release_year, diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index d8a6842c..0ff55c05 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -450,7 +450,7 @@ class PubmedImporter(EntityImporter):          if issnp:              container_extra['issnp'] = issnp.string          if not issnl: -            issnll = self.issn2issnl(issnp) +            issnl = self.issn2issnl(issnp)          if issnl:              container_id = self.lookup_issnl(issnl) | 
