diff options
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/arabesque.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 3 | ||||
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/dblp_release.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/doaj_article.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/file_meta.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/fileset_generic.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/ingest.py | 12 | ||||
-rw-r--r-- | python/fatcat_tools/importers/matched.py | 4 | ||||
-rw-r--r-- | python/fatcat_tools/importers/orcid.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/pubmed.py | 2 | ||||
-rw-r--r-- | python/fatcat_tools/importers/shadow.py | 2 |
12 files changed, 21 insertions, 22 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py index 79fb10d3..ccf35446 100644 --- a/python/fatcat_tools/importers/arabesque.py +++ b/python/fatcat_tools/importers/arabesque.py @@ -62,13 +62,13 @@ class ArabesqueMatchImporter(EntityImporter): def want(self, row): if self.require_grobid and not row['postproc_status'] == "200": return False - if (row['hit'] == True + if (row['hit'] is True and row['final_sha1'] and row['final_timestamp'] and row['final_timestamp'] != "-" and len(row['final_timestamp']) == 14 and row['final_mimetype'] - and row['hit'] == True + and row['hit'] is True and row['identifier']): return True else: diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index bd72a781..38c19a63 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -205,7 +205,7 @@ class CrossrefImporter(EntityImporter): return None # Do require the 'title' keys to exist, as release entities do - if (not 'title' in obj) or (not obj['title']): + if ('title' not in obj) or (not obj['title']): self.counts['skip-blank-title'] += 1 return None @@ -429,7 +429,6 @@ class CrossrefImporter(EntityImporter): release_year = raw_date[0] release_date = None - original_title: Optional[str] = None if obj.get('original-title'): ot = obj.get('original-title') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index eb49596f..1593e6f8 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -319,7 +319,7 @@ class DataciteImporter(EntityImporter): # 17871 | translator # 10870584 | # (4 rows) - # + # # Related: https://guide.fatcat.wiki/entity_release.html -- role # (string, of a set): the type of contribution, from a controlled # vocabulary. TODO: vocabulary needs review. @@ -1046,7 +1046,7 @@ def find_original_language_title(item, min_length=4, max_questionmarks=3): Example input: {'title': 'Some title', 'original_language_title': 'Some title'} """ - if not 'original_language_title' in item: + if 'original_language_title' not in item: return None title = item.get('title') if not title: diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py index 670f190b..fa5cb842 100644 --- a/python/fatcat_tools/importers/dblp_release.py +++ b/python/fatcat_tools/importers/dblp_release.py @@ -93,7 +93,7 @@ class DblpReleaseImporter(EntityImporter): return self._dblp_container_map.get(prefix) def want(self, xml_elem): - if not xml_elem.name in self.ELEMENT_TYPES: + if xml_elem.name not in self.ELEMENT_TYPES: self.counts['skip-type'] += 1 return False if not xml_elem.get('key'): @@ -243,7 +243,7 @@ class DblpReleaseImporter(EntityImporter): # dblp-specific extra dblp_extra = dict(type=dblp_type) note = clean_str(xml_elem.note and xml_elem.note.text) - if note and not 'base-search.net' in note: + if note and 'base-search.net' not in note: dblp_extra['note'] = note if part_of_key: dblp_extra['part_of_key'] = part_of_key diff --git a/python/fatcat_tools/importers/doaj_article.py b/python/fatcat_tools/importers/doaj_article.py index 191a65d8..833089ae 100644 --- a/python/fatcat_tools/importers/doaj_article.py +++ b/python/fatcat_tools/importers/doaj_article.py @@ -73,7 +73,7 @@ class DoajArticleImporter(EntityImporter): } """ - if not obj or not isinstance(obj, dict) or not 'bibjson' in obj: + if not obj or not isinstance(obj, dict) or 'bibjson' not in obj: self.counts['skip-empty'] += 1 return None diff --git a/python/fatcat_tools/importers/file_meta.py b/python/fatcat_tools/importers/file_meta.py index 9f4b9e06..3d9f5923 100644 --- a/python/fatcat_tools/importers/file_meta.py +++ b/python/fatcat_tools/importers/file_meta.py @@ -35,7 +35,7 @@ class FileMetaImporter(EntityImporter): def parse_record(self, row): # bezerk mode doesn't make sense for this importer - assert self.bezerk_mode == False + assert self.bezerk_mode is False file_meta = row fe = fatcat_openapi_client.FileEntity( diff --git a/python/fatcat_tools/importers/fileset_generic.py b/python/fatcat_tools/importers/fileset_generic.py index f0ad5460..13352fb2 100644 --- a/python/fatcat_tools/importers/fileset_generic.py +++ b/python/fatcat_tools/importers/fileset_generic.py @@ -30,7 +30,7 @@ class FilesetImporter(EntityImporter): **kwargs) # bezerk mode doesn't make sense for this importer - assert self.bezerk_mode == False + assert self.bezerk_mode is False def want(self, row): if not row.get('release_ids'): diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index cb663330..4d4efc0a 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -78,7 +78,7 @@ class IngestFileResultImporter(EntityImporter): Sandcrawler ingest-specific part of want(). Generic across file and webcapture ingest. """ - if row.get('hit') != True: + if row.get('hit') is not True: self.counts['skip-hit'] += 1 return False source = row['request'].get('ingest_request_source') @@ -178,9 +178,9 @@ class IngestFileResultImporter(EntityImporter): } # work around old schema - if not 'terminal_url' in terminal: + if 'terminal_url' not in terminal: terminal['terminal_url'] = terminal['url'] - if not 'terminal_dt' in terminal: + if 'terminal_dt' not in terminal: terminal['terminal_dt'] = terminal['dt'] # convert CDX-style digits to ISO-style timestamp @@ -358,7 +358,7 @@ class SavePaperNowFileImporter(IngestFileResultImporter): self.counts['skip-not-savepapernow'] += 1 return False - if row.get('hit') != True: + if row.get('hit') is not True: self.counts['skip-hit'] += 1 return False @@ -459,7 +459,7 @@ class IngestWebResultImporter(IngestFileResultImporter): for resource in row.get('html_resources', []): timestamp = resource['timestamp'] - if not "+" in timestamp and not "Z" in timestamp: + if "+" not in timestamp and "Z" not in timestamp: timestamp += "Z" wc_cdx.append(fatcat_openapi_client.WebcaptureCdxLine( surt=resource['surt'], @@ -808,7 +808,7 @@ class SavePaperNowFilesetImporter(IngestFilesetResultImporter): self.counts['skip-not-savepapernow'] += 1 return False - if row.get('hit') != True: + if row.get('hit') is not True: self.counts['skip-hit'] += 1 return False diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py index e0e4fc3c..09807276 100644 --- a/python/fatcat_tools/importers/matched.py +++ b/python/fatcat_tools/importers/matched.py @@ -94,7 +94,7 @@ class MatchedImporter(EntityImporter): urls = set() for url in obj.get('urls', []): url = make_rel_url(url, default_link_rel=self.default_link_rel) - if url != None: + if url is not None: urls.add(url) for cdx in obj.get('cdx', []): original = cdx['url'] @@ -104,7 +104,7 @@ class MatchedImporter(EntityImporter): original) urls.add(("webarchive", wayback)) url = make_rel_url(original, default_link_rel=self.default_link_rel) - if url != None: + if url is not None: urls.add(url) urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls] if len(urls) == 0: diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py index 21feea9e..4412a46d 100644 --- a/python/fatcat_tools/importers/orcid.py +++ b/python/fatcat_tools/importers/orcid.py @@ -40,7 +40,7 @@ class OrcidImporter(EntityImporter): returns a CreatorEntity """ - if not 'person' in obj: + if 'person' not in obj: return False name = obj['person']['name'] diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py index c9907c5e..00ad54d0 100644 --- a/python/fatcat_tools/importers/pubmed.py +++ b/python/fatcat_tools/importers/pubmed.py @@ -590,7 +590,7 @@ class PubmedImporter(EntityImporter): orcid = orcid.replace("http://orcid.org/", "") elif orcid.startswith("https://orcid.org/"): orcid = orcid.replace("https://orcid.org/", "") - elif not '-' in orcid: + elif '-' not in orcid: orcid = "{}-{}-{}-{}".format( orcid[0:4], orcid[4:8], diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py index fa9b4d10..77205cee 100644 --- a/python/fatcat_tools/importers/shadow.py +++ b/python/fatcat_tools/importers/shadow.py @@ -95,7 +95,7 @@ class ShadowLibraryImporter(EntityImporter): urls = [] if obj.get('cdx'): url = make_rel_url(obj['cdx']['url'], default_link_rel=self.default_link_rel) - if url != None: + if url is not None: urls.append(url) wayback = "https://web.archive.org/web/{}/{}".format( obj['cdx']['datetime'], |