aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/common.py1
-rw-r--r--python/fatcat_tools/importers/crossref.py14
-rw-r--r--python/fatcat_tools/importers/datacite.py22
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py2
-rw-r--r--python/fatcat_tools/importers/jalc.py2
-rw-r--r--python/fatcat_tools/importers/pubmed.py2
6 files changed, 18 insertions, 25 deletions
diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py
index c0578224..c692a38d 100644
--- a/python/fatcat_tools/importers/common.py
+++ b/python/fatcat_tools/importers/common.py
@@ -692,7 +692,6 @@ class Bs4XmlLargeFilePusher(RecordPusher):
def run(self):
elem_iter = ET.iterparse(self.xml_file, ["start", "end"])
- i = 0
root = None
for (event, element) in elem_iter:
if not root and event == "start":
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 854e3d9f..71f08952 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -278,15 +278,15 @@ class CrossrefImporter(EntityImporter):
# license slug
license_slug = None
license_extra = []
- for l in obj.get('license', []):
- if l['content-version'] not in ('vor', 'unspecified'):
+ for lic in obj.get('license', []):
+ if lic['content-version'] not in ('vor', 'unspecified'):
continue
- slug = lookup_license_slug(l['URL'])
+ slug = lookup_license_slug(lic['URL'])
if slug:
license_slug = slug
- if 'start' in l:
- l['start'] = l['start']['date-time']
- license_extra.append(l)
+ if 'start' in lic:
+ lic['start'] = lic['start']['date-time']
+ license_extra.append(lic)
# references
refs = []
@@ -297,7 +297,7 @@ class CrossrefImporter(EntityImporter):
# NOTE: are there crossref works with year < 100?
if year > 2025 or year < 100:
year = None
- except:
+ except (TypeError, ValueError):
year = None
ref_extra = dict()
key = rm.get('key')
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index ebb29feb..f93362d6 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -191,12 +191,6 @@ LICENSE_SLUG_MAP = {
"//spdx.org/licenses/OGL-Canada-2.0.json": "OGL-Canada",
}
-# TODO(martin): drop this after 3.7 upgrade
-try:
- isascii = str.isascii # new in 3.7, https://docs.python.org/3/library/stdtypes.html#str.isascii
-except AttributeError:
- isascii = lambda s: len(s) == len(s.encode())
-
class DataciteImporter(EntityImporter):
"""
@@ -287,7 +281,7 @@ class DataciteImporter(EntityImporter):
print('skipping record without a DOI', file=sys.stderr)
return
- if not isascii(doi):
+ if not str.isascii(doi):
print('[{}] skipping non-ascii doi for now'.format(doi))
return None
@@ -466,7 +460,7 @@ class DataciteImporter(EntityImporter):
try:
_ = int(first_page) < int(last_page)
pages = '{}-{}'.format(first_page, last_page)
- except ValueError as err:
+ except ValueError as err: # noqa: F841
# TODO(martin): This is more debug than info.
# print('[{}] {}'.format(doi, err), file=sys.stderr)
pass
@@ -478,11 +472,11 @@ class DataciteImporter(EntityImporter):
license_slug = None
license_extra = []
- for l in attributes.get('rightsList', []):
- slug = lookup_license_slug(l.get('rightsUri'))
+ for lic in attributes.get('rightsList', []):
+ slug = lookup_license_slug(lic.get('rightsUri'))
if slug:
license_slug = slug
- license_extra.append(l)
+ license_extra.append(lic)
# Release type. Try to determine the release type from a variety of
# types supplied in datacite. The "attributes.types.resourceType" is
@@ -524,7 +518,7 @@ class DataciteImporter(EntityImporter):
value = attributes.get('language', '') or ''
try:
language = pycountry.languages.lookup(value).alpha_2
- except (LookupError, AttributeError) as err:
+ except (LookupError, AttributeError) as err: # noqa: F841
pass
# TODO(martin): Print this on debug level, only.
# print('[{}] language lookup miss for {}: {}'.format(doi, value, err), file=sys.stderr)
@@ -549,7 +543,7 @@ class DataciteImporter(EntityImporter):
if isinstance(text, list):
try:
text = "\n".join(text)
- except TypeError as err:
+ except TypeError:
continue # Bail out, if it is not a list of strings.
# Limit length.
@@ -760,7 +754,7 @@ class DataciteImporter(EntityImporter):
i = 0
for c in creators:
if not set_index:
- i = None
+ i = None
nameType = c.get('nameType', '') or ''
if nameType in ('', 'Personal'):
creator_id = None
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 5ec6cc3c..a811c856 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -104,7 +104,7 @@ class GrobidMetadataImporter(EntityImporter):
if raw.get('date'):
try:
year = int(raw['date'].strip()[:4])
- except:
+ except (IndexError, ValueError):
pass
for key in ('volume', 'url', 'issue', 'publisher'):
if raw.get(key):
diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py
index 38aa00eb..9bf2621c 100644
--- a/python/fatcat_tools/importers/jalc.py
+++ b/python/fatcat_tools/importers/jalc.py
@@ -308,7 +308,7 @@ class JalcImporter(EntityImporter):
work_id=None,
title=title,
original_title=clean(original_title),
- release_type="article-journal",
+ release_type=release_type,
release_stage='published',
release_date=release_date,
release_year=release_year,
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index d8a6842c..0ff55c05 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -450,7 +450,7 @@ class PubmedImporter(EntityImporter):
if issnp:
container_extra['issnp'] = issnp.string
if not issnl:
- issnll = self.issn2issnl(issnp)
+ issnl = self.issn2issnl(issnp)
if issnl:
container_id = self.lookup_issnl(issnl)