From 5c39fa4b9a2c43d94d1eef68ac24f1d0099d219a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Jul 2020 15:28:40 -0700 Subject: simple lint (flake8) fixes in tests The pytest fixture syntax interacts weirdly with flake8 tests, so ignore the "redefinition" and "unused variable" errors more carefully for .py files under ./tests/ --- python/.flake8 | 2 +- python/tests/api_editgroups.py | 1 - python/tests/api_entity_state.py | 2 +- python/tests/api_releases.py | 2 +- python/tests/import_ingest.py | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) (limited to 'python') diff --git a/python/.flake8 b/python/.flake8 index 34f6131c..bb1baa71 100644 --- a/python/.flake8 +++ b/python/.flake8 @@ -10,4 +10,4 @@ max-line-length = 120 per-file-ignores = */__init__.py: F401 tests/*.py: F401,F811 - tests/transform_csl.py: W291 + tests/transform_csl.py: F401,F811,W291 diff --git a/python/tests/api_editgroups.py b/python/tests/api_editgroups.py index 142687c2..b4420b29 100644 --- a/python/tests/api_editgroups.py +++ b/python/tests/api_editgroups.py @@ -98,7 +98,6 @@ def test_editgroup_auto_batch(api): def test_batch_params(api): - eg = quick_eg(api) c1 = CreatorEntity(display_name="test auto_batch") c2 = CreatorEntity(display_name="test another auto_batch") diff --git a/python/tests/api_entity_state.py b/python/tests/api_entity_state.py index d5ba6301..4b49083e 100644 --- a/python/tests/api_entity_state.py +++ b/python/tests/api_entity_state.py @@ -359,7 +359,7 @@ def test_self_redirect(api): c1_redirect = CreatorEntity(redirect=c1.ident) eg = quick_eg(api) with pytest.raises(fatcat_openapi_client.rest.ApiException): - merge_edit = api.update_creator(eg.editgroup_id, c1.ident, c1_redirect) + api.update_creator(eg.editgroup_id, c1.ident, c1_redirect) def test_wip_redirect(api): diff --git a/python/tests/api_releases.py b/python/tests/api_releases.py index c4c05ea6..9c70f655 100644 --- a/python/tests/api_releases.py +++ b/python/tests/api_releases.py @@ -177,7 +177,7 @@ def test_empty_fields(api): title="something", contribs=[ReleaseContrib(raw_name="somebody")], ext_ids=ReleaseExtIds()) - r1edit = api.create_release(eg.editgroup_id, r1) + api.create_release(eg.editgroup_id, r1) with pytest.raises(fatcat_openapi_client.rest.ApiException): r2 = ReleaseEntity(title="", ext_ids=ReleaseExtIds()) diff --git a/python/tests/import_ingest.py b/python/tests/import_ingest.py index ebe2923c..4a46232a 100644 --- a/python/tests/import_ingest.py +++ b/python/tests/import_ingest.py @@ -63,7 +63,7 @@ def test_ingest_importer_stage(ingest_importer, api): eg = quick_eg(api) r1 = api.lookup_release(doi="10.123/abc") r1.release_stage = row['release_stage'] - c1 = api.update_release(eg.editgroup_id, r1.ident, r1) + api.update_release(eg.editgroup_id, r1.ident, r1) api.accept_editgroup(eg.editgroup_id) # set ingest request stage -- cgit v1.2.3 From 1b28ba68e3b25a0cded2e88b88134d8d332f8d96 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Jul 2020 15:29:48 -0700 Subject: fix actual typo in tests (caught by lint) --- python/tests/api_annotations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'python') diff --git a/python/tests/api_annotations.py b/python/tests/api_annotations.py index 0606b637..79acaa4b 100644 --- a/python/tests/api_annotations.py +++ b/python/tests/api_annotations.py @@ -30,5 +30,5 @@ def test_annotations(api): if thing.annotation_id == a[0].annotation_id: found = thing break - assert thing - assert thing.extra['thing'] == "thang" + assert found + assert found.extra['thing'] == "thang" -- cgit v1.2.3 From 578ce63bd17df11d3ca7cd1083feebe4b92e457c Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Jul 2020 15:31:03 -0700 Subject: simple lint (flake8) fixes over python codebase These should not have any behavior changes, though a number of exception catches are now more general, and there may be long-tail exceptions getting thrown in these statements. --- python/fatcat_tools/importers/common.py | 1 - python/fatcat_tools/importers/crossref.py | 14 +++++++------- python/fatcat_tools/importers/datacite.py | 14 +++++++------- python/fatcat_tools/importers/grobid_metadata.py | 2 +- python/fatcat_tools/importers/jalc.py | 2 +- python/fatcat_web/__init__.py | 2 +- python/fatcat_web/routes.py | 2 +- 7 files changed, 18 insertions(+), 19 deletions(-) (limited to 'python') diff --git a/python/fatcat_tools/importers/common.py b/python/fatcat_tools/importers/common.py index c0578224..c692a38d 100644 --- a/python/fatcat_tools/importers/common.py +++ b/python/fatcat_tools/importers/common.py @@ -692,7 +692,6 @@ class Bs4XmlLargeFilePusher(RecordPusher): def run(self): elem_iter = ET.iterparse(self.xml_file, ["start", "end"]) - i = 0 root = None for (event, element) in elem_iter: if not root and event == "start": diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 854e3d9f..71f08952 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -278,15 +278,15 @@ class CrossrefImporter(EntityImporter): # license slug license_slug = None license_extra = [] - for l in obj.get('license', []): - if l['content-version'] not in ('vor', 'unspecified'): + for lic in obj.get('license', []): + if lic['content-version'] not in ('vor', 'unspecified'): continue - slug = lookup_license_slug(l['URL']) + slug = lookup_license_slug(lic['URL']) if slug: license_slug = slug - if 'start' in l: - l['start'] = l['start']['date-time'] - license_extra.append(l) + if 'start' in lic: + lic['start'] = lic['start']['date-time'] + license_extra.append(lic) # references refs = [] @@ -297,7 +297,7 @@ class CrossrefImporter(EntityImporter): # NOTE: are there crossref works with year < 100? if year > 2025 or year < 100: year = None - except: + except (TypeError, ValueError): year = None ref_extra = dict() key = rm.get('key') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index ebb29feb..50d73798 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -466,7 +466,7 @@ class DataciteImporter(EntityImporter): try: _ = int(first_page) < int(last_page) pages = '{}-{}'.format(first_page, last_page) - except ValueError as err: + except ValueError as err: # noqa: F841 # TODO(martin): This is more debug than info. # print('[{}] {}'.format(doi, err), file=sys.stderr) pass @@ -478,11 +478,11 @@ class DataciteImporter(EntityImporter): license_slug = None license_extra = [] - for l in attributes.get('rightsList', []): - slug = lookup_license_slug(l.get('rightsUri')) + for lic in attributes.get('rightsList', []): + slug = lookup_license_slug(lic.get('rightsUri')) if slug: license_slug = slug - license_extra.append(l) + license_extra.append(lic) # Release type. Try to determine the release type from a variety of # types supplied in datacite. The "attributes.types.resourceType" is @@ -524,7 +524,7 @@ class DataciteImporter(EntityImporter): value = attributes.get('language', '') or '' try: language = pycountry.languages.lookup(value).alpha_2 - except (LookupError, AttributeError) as err: + except (LookupError, AttributeError) as err: # noqa: F841 pass # TODO(martin): Print this on debug level, only. # print('[{}] language lookup miss for {}: {}'.format(doi, value, err), file=sys.stderr) @@ -549,7 +549,7 @@ class DataciteImporter(EntityImporter): if isinstance(text, list): try: text = "\n".join(text) - except TypeError as err: + except TypeError: continue # Bail out, if it is not a list of strings. # Limit length. @@ -760,7 +760,7 @@ class DataciteImporter(EntityImporter): i = 0 for c in creators: if not set_index: - i = None + i = None nameType = c.get('nameType', '') or '' if nameType in ('', 'Personal'): creator_id = None diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 5ec6cc3c..a811c856 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -104,7 +104,7 @@ class GrobidMetadataImporter(EntityImporter): if raw.get('date'): try: year = int(raw['date'].strip()[:4]) - except: + except (IndexError, ValueError): pass for key in ('volume', 'url', 'issue', 'publisher'): if raw.get(key): diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 38aa00eb..9bf2621c 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -308,7 +308,7 @@ class JalcImporter(EntityImporter): work_id=None, title=title, original_title=clean(original_title), - release_type="article-journal", + release_type=release_type, release_stage='published', release_date=release_date, release_year=release_year, diff --git a/python/fatcat_web/__init__.py b/python/fatcat_web/__init__.py index 56a2e020..562ffeb2 100644 --- a/python/fatcat_web/__init__.py +++ b/python/fatcat_web/__init__.py @@ -71,7 +71,7 @@ mwoauth = MWOAuth( mwoauth.handshaker.user_agent = "fatcat.wiki;python_web_interface" app.register_blueprint(mwoauth.bp, url_prefix='/auth/wikipedia') -from fatcat_web import routes, editing_routes, auth, cors, forms +from fatcat_web import routes, editing_routes, auth, cors, forms # noqa: E402 # TODO: blocking on ORCID support in loginpass if Config.ORCID_CLIENT_ID: diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 4684f799..2489ac03 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -935,7 +935,7 @@ def create_auth_token(): try: duration_seconds = int(duration_seconds) assert duration_seconds >= 1 - except: + except (ValueError, AssertionError): flash("duration_seconds must be a positive non-zero integer") abort(400) -- cgit v1.2.3 From 06d4f13307b1c2591f7f27f56d97d9b463b0e14e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Jul 2020 15:32:18 -0700 Subject: remove isascii() work around definition in importers/datacite.py We are python3.7 now, so this isn't needed. --- python/fatcat_tools/importers/datacite.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'python') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 50d73798..f93362d6 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -191,12 +191,6 @@ LICENSE_SLUG_MAP = { "//spdx.org/licenses/OGL-Canada-2.0.json": "OGL-Canada", } -# TODO(martin): drop this after 3.7 upgrade -try: - isascii = str.isascii # new in 3.7, https://docs.python.org/3/library/stdtypes.html#str.isascii -except AttributeError: - isascii = lambda s: len(s) == len(s.encode()) - class DataciteImporter(EntityImporter): """ @@ -287,7 +281,7 @@ class DataciteImporter(EntityImporter): print('skipping record without a DOI', file=sys.stderr) return - if not isascii(doi): + if not str.isascii(doi): print('[{}] skipping non-ascii doi for now'.format(doi)) return None -- cgit v1.2.3 From fc93f35996cab23984d9a45f1a411b8776e437bf Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Jul 2020 15:32:41 -0700 Subject: fix issnl typo in pubmed Oh no! This bug may actually have had significant negative impact on metadata in fatcat, in terms of missing container_id associations with pubmed entities. There are about 500k release entities with a PMID but no container_id. Of those, 89k have at least a container_name. Unclear how many would have matched to ISSN-L and thus to a container. --- python/fatcat_tools/importers/pubmed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python') diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index d8a6842c..0ff55c05 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -450,7 +450,7 @@ class PubmedImporter(EntityImporter): if issnp: container_extra['issnp'] = issnp.string if not issnl: - issnll = self.issn2issnl(issnp) + issnl = self.issn2issnl(issnp) if issnl: container_id = self.lookup_issnl(issnl) -- cgit v1.2.3