diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 19:51:48 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 19:51:51 -0700 |
commit | 4c77bdb8d92523935454f1c406c954913f923c01 (patch) | |
tree | 2b2a1221cc78683afb9f18a87ccfd10ef0afbc64 /python/fatcat_tools/importers | |
parent | 3da07382d682a0c474ddc79f748a50ad2cc758cd (diff) | |
download | fatcat-4c77bdb8d92523935454f1c406c954913f923c01.tar.gz fatcat-4c77bdb8d92523935454f1c406c954913f923c01.zip |
lint: resolve existing mypy type errors
Adds annotations and re-workes dataflow to satisfy existing mypy issues,
without adding any additional type annotations to, eg, function
signatures.
There will probably be many more type errors when annotations are all
added.
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 26 | ||||
-rw-r--r-- | python/fatcat_tools/importers/dblp_release.py | 21 | ||||
-rw-r--r-- | python/fatcat_tools/importers/doaj_article.py | 2 |
3 files changed, 27 insertions, 22 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 606d4bb1..d0017002 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -393,8 +393,6 @@ class CrossrefImporter(EntityImporter): ): if clean(rm.get(k)): ref_extra[k] = clean(rm[k]) - if not ref_extra: - ref_extra = None refs.append( fatcat_openapi_client.ReleaseRef( index=i, @@ -406,7 +404,7 @@ class CrossrefImporter(EntityImporter): title=clean(rm.get("article-title")), locator=clean(rm.get("first-page")), # TODO: just dump JSON somewhere here? - extra=ref_extra, + extra=ref_extra or None, ) ) @@ -421,8 +419,8 @@ class CrossrefImporter(EntityImporter): ) # extra fields - extra = dict() - extra_crossref = dict() + extra: Dict[str, Any] = dict() + extra_crossref: Dict[str, Any] = dict() # top-level extra keys if not container_id: if obj.get("container-title"): @@ -471,13 +469,13 @@ class CrossrefImporter(EntityImporter): "dissertation", "book-chapter", ): - release_stage = "published" + release_stage: Optional[str] = "published" else: # unknown release_stage = None # external identifiers - extids: Dict[str, Any] = self.lookup_ext_ids(doi=obj["DOI"].lower()) + extids: Dict[str, Any] = self.lookup_ext_ids(doi=obj["DOI"].lower()) or {} # filter out unreasonably huge releases if len(abstracts) > 100: @@ -512,7 +510,7 @@ class CrossrefImporter(EntityImporter): title: Optional[str] = None if obj.get("title"): - title = clean(obj.get("title")[0], force_xml=True) + title = clean(obj["title"][0], force_xml=True) if not title or len(title) <= 1: # title can't be just a single character self.counts["skip-blank-title"] += 1 @@ -520,15 +518,13 @@ class CrossrefImporter(EntityImporter): subtitle = None if obj.get("subtitle"): - subtitle = clean(obj.get("subtitle")[0], force_xml=True) + subtitle = clean(obj["subtitle"][0], force_xml=True) if not subtitle or len(subtitle) <= 1: # subtitle can't be just a single character subtitle = None if extra_crossref: extra["crossref"] = extra_crossref - if not extra: - extra = None re = ReleaseEntity( work_id=None, @@ -556,10 +552,10 @@ class CrossrefImporter(EntityImporter): pages=clean(obj.get("page")), language=clean(obj.get("language")), license_slug=license_slug, - extra=extra, - abstracts=abstracts, - contribs=contribs, - refs=refs, + extra=extra or None, + abstracts=abstracts or None, + contribs=contribs or None, + refs=refs or None, ) return re diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py index 5baa6cd6..e73e5f33 100644 --- a/python/fatcat_tools/importers/dblp_release.py +++ b/python/fatcat_tools/importers/dblp_release.py @@ -26,6 +26,7 @@ import sys # noqa: F401 import warnings from typing import Any, List, Optional +import bs4 import fatcat_openapi_client from fatcat_tools.importers.common import EntityImporter @@ -420,7 +421,9 @@ class DblpReleaseImporter(EntityImporter): ) ) - def dblp_contribs(self, authors: List[dict]) -> List[fatcat_openapi_client.ReleaseContrib]: + def dblp_contribs( + self, elem: bs4.element.Tag + ) -> List[fatcat_openapi_client.ReleaseContrib]: """ - author (multiple; each a single string) => may have HTML entities @@ -431,21 +434,23 @@ class DblpReleaseImporter(EntityImporter): """ contribs = [] index = 0 - for elem in authors.find_all("author"): + for elem in elem.find_all("author"): contrib = self.dblp_contrib_single(elem) contrib.role = "author" contrib.index = index contribs.append(contrib) index += 1 - for elem in authors.find_all("editor"): + for elem in elem.find_all("editor"): contrib = self.dblp_contrib_single(elem) contrib.role = "editor" contribs.append(contrib) return contribs - def dblp_contrib_single(self, elem: Any) -> fatcat_openapi_client.ReleaseContrib: + def dblp_contrib_single( + self, elem: bs4.element.Tag + ) -> fatcat_openapi_client.ReleaseContrib: """ In the future, might try to implement creator key-ificiation and lookup here. @@ -461,11 +466,15 @@ class DblpReleaseImporter(EntityImporter): raw_name = clean_str(elem.text) # remove number in author name, if present - if raw_name.split()[-1].isdigit(): + if raw_name and raw_name.split()[-1].isdigit(): raw_name = " ".join(raw_name.split()[:-1]) if elem.get("orcid"): - orcid = clean_orcid(elem["orcid"]) + orcid_val = elem["orcid"] + if isinstance(orcid_val, list): + orcid = clean_orcid(orcid_val[0]) + else: + orcid = clean_orcid(orcid_val) if orcid: creator_id = self.lookup_orcid(orcid) if not creator_id: diff --git a/python/fatcat_tools/importers/doaj_article.py b/python/fatcat_tools/importers/doaj_article.py index cd063337..56045ea7 100644 --- a/python/fatcat_tools/importers/doaj_article.py +++ b/python/fatcat_tools/importers/doaj_article.py @@ -382,7 +382,7 @@ class DoajArticleImporter(EntityImporter): if not license.get("open_access"): continue slug = license.get("type") - if slug.startswith("CC "): + if slug and slug.startswith("CC "): slug = slug.replace("CC ", "cc-").lower() return slug return None |