aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r--python/fatcat_tools/importers/arabesque.py4
-rw-r--r--python/fatcat_tools/importers/crossref.py3
-rw-r--r--python/fatcat_tools/importers/datacite.py4
-rw-r--r--python/fatcat_tools/importers/dblp_release.py4
-rw-r--r--python/fatcat_tools/importers/doaj_article.py2
-rw-r--r--python/fatcat_tools/importers/file_meta.py2
-rw-r--r--python/fatcat_tools/importers/fileset_generic.py2
-rw-r--r--python/fatcat_tools/importers/ingest.py12
-rw-r--r--python/fatcat_tools/importers/matched.py4
-rw-r--r--python/fatcat_tools/importers/orcid.py2
-rw-r--r--python/fatcat_tools/importers/pubmed.py2
-rw-r--r--python/fatcat_tools/importers/shadow.py2
12 files changed, 21 insertions, 22 deletions
diff --git a/python/fatcat_tools/importers/arabesque.py b/python/fatcat_tools/importers/arabesque.py
index 79fb10d3..ccf35446 100644
--- a/python/fatcat_tools/importers/arabesque.py
+++ b/python/fatcat_tools/importers/arabesque.py
@@ -62,13 +62,13 @@ class ArabesqueMatchImporter(EntityImporter):
def want(self, row):
if self.require_grobid and not row['postproc_status'] == "200":
return False
- if (row['hit'] == True
+ if (row['hit'] is True
and row['final_sha1']
and row['final_timestamp']
and row['final_timestamp'] != "-"
and len(row['final_timestamp']) == 14
and row['final_mimetype']
- and row['hit'] == True
+ and row['hit'] is True
and row['identifier']):
return True
else:
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index bd72a781..38c19a63 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -205,7 +205,7 @@ class CrossrefImporter(EntityImporter):
return None
# Do require the 'title' keys to exist, as release entities do
- if (not 'title' in obj) or (not obj['title']):
+ if ('title' not in obj) or (not obj['title']):
self.counts['skip-blank-title'] += 1
return None
@@ -429,7 +429,6 @@ class CrossrefImporter(EntityImporter):
release_year = raw_date[0]
release_date = None
-
original_title: Optional[str] = None
if obj.get('original-title'):
ot = obj.get('original-title')
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index eb49596f..1593e6f8 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -319,7 +319,7 @@ class DataciteImporter(EntityImporter):
# 17871 | translator
# 10870584 |
# (4 rows)
- #
+ #
# Related: https://guide.fatcat.wiki/entity_release.html -- role
# (string, of a set): the type of contribution, from a controlled
# vocabulary. TODO: vocabulary needs review.
@@ -1046,7 +1046,7 @@ def find_original_language_title(item, min_length=4, max_questionmarks=3):
Example input: {'title': 'Some title', 'original_language_title': 'Some title'}
"""
- if not 'original_language_title' in item:
+ if 'original_language_title' not in item:
return None
title = item.get('title')
if not title:
diff --git a/python/fatcat_tools/importers/dblp_release.py b/python/fatcat_tools/importers/dblp_release.py
index 670f190b..fa5cb842 100644
--- a/python/fatcat_tools/importers/dblp_release.py
+++ b/python/fatcat_tools/importers/dblp_release.py
@@ -93,7 +93,7 @@ class DblpReleaseImporter(EntityImporter):
return self._dblp_container_map.get(prefix)
def want(self, xml_elem):
- if not xml_elem.name in self.ELEMENT_TYPES:
+ if xml_elem.name not in self.ELEMENT_TYPES:
self.counts['skip-type'] += 1
return False
if not xml_elem.get('key'):
@@ -243,7 +243,7 @@ class DblpReleaseImporter(EntityImporter):
# dblp-specific extra
dblp_extra = dict(type=dblp_type)
note = clean_str(xml_elem.note and xml_elem.note.text)
- if note and not 'base-search.net' in note:
+ if note and 'base-search.net' not in note:
dblp_extra['note'] = note
if part_of_key:
dblp_extra['part_of_key'] = part_of_key
diff --git a/python/fatcat_tools/importers/doaj_article.py b/python/fatcat_tools/importers/doaj_article.py
index 191a65d8..833089ae 100644
--- a/python/fatcat_tools/importers/doaj_article.py
+++ b/python/fatcat_tools/importers/doaj_article.py
@@ -73,7 +73,7 @@ class DoajArticleImporter(EntityImporter):
}
"""
- if not obj or not isinstance(obj, dict) or not 'bibjson' in obj:
+ if not obj or not isinstance(obj, dict) or 'bibjson' not in obj:
self.counts['skip-empty'] += 1
return None
diff --git a/python/fatcat_tools/importers/file_meta.py b/python/fatcat_tools/importers/file_meta.py
index 9f4b9e06..3d9f5923 100644
--- a/python/fatcat_tools/importers/file_meta.py
+++ b/python/fatcat_tools/importers/file_meta.py
@@ -35,7 +35,7 @@ class FileMetaImporter(EntityImporter):
def parse_record(self, row):
# bezerk mode doesn't make sense for this importer
- assert self.bezerk_mode == False
+ assert self.bezerk_mode is False
file_meta = row
fe = fatcat_openapi_client.FileEntity(
diff --git a/python/fatcat_tools/importers/fileset_generic.py b/python/fatcat_tools/importers/fileset_generic.py
index f0ad5460..13352fb2 100644
--- a/python/fatcat_tools/importers/fileset_generic.py
+++ b/python/fatcat_tools/importers/fileset_generic.py
@@ -30,7 +30,7 @@ class FilesetImporter(EntityImporter):
**kwargs)
# bezerk mode doesn't make sense for this importer
- assert self.bezerk_mode == False
+ assert self.bezerk_mode is False
def want(self, row):
if not row.get('release_ids'):
diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py
index cb663330..4d4efc0a 100644
--- a/python/fatcat_tools/importers/ingest.py
+++ b/python/fatcat_tools/importers/ingest.py
@@ -78,7 +78,7 @@ class IngestFileResultImporter(EntityImporter):
Sandcrawler ingest-specific part of want(). Generic across file and
webcapture ingest.
"""
- if row.get('hit') != True:
+ if row.get('hit') is not True:
self.counts['skip-hit'] += 1
return False
source = row['request'].get('ingest_request_source')
@@ -178,9 +178,9 @@ class IngestFileResultImporter(EntityImporter):
}
# work around old schema
- if not 'terminal_url' in terminal:
+ if 'terminal_url' not in terminal:
terminal['terminal_url'] = terminal['url']
- if not 'terminal_dt' in terminal:
+ if 'terminal_dt' not in terminal:
terminal['terminal_dt'] = terminal['dt']
# convert CDX-style digits to ISO-style timestamp
@@ -358,7 +358,7 @@ class SavePaperNowFileImporter(IngestFileResultImporter):
self.counts['skip-not-savepapernow'] += 1
return False
- if row.get('hit') != True:
+ if row.get('hit') is not True:
self.counts['skip-hit'] += 1
return False
@@ -459,7 +459,7 @@ class IngestWebResultImporter(IngestFileResultImporter):
for resource in row.get('html_resources', []):
timestamp = resource['timestamp']
- if not "+" in timestamp and not "Z" in timestamp:
+ if "+" not in timestamp and "Z" not in timestamp:
timestamp += "Z"
wc_cdx.append(fatcat_openapi_client.WebcaptureCdxLine(
surt=resource['surt'],
@@ -808,7 +808,7 @@ class SavePaperNowFilesetImporter(IngestFilesetResultImporter):
self.counts['skip-not-savepapernow'] += 1
return False
- if row.get('hit') != True:
+ if row.get('hit') is not True:
self.counts['skip-hit'] += 1
return False
diff --git a/python/fatcat_tools/importers/matched.py b/python/fatcat_tools/importers/matched.py
index e0e4fc3c..09807276 100644
--- a/python/fatcat_tools/importers/matched.py
+++ b/python/fatcat_tools/importers/matched.py
@@ -94,7 +94,7 @@ class MatchedImporter(EntityImporter):
urls = set()
for url in obj.get('urls', []):
url = make_rel_url(url, default_link_rel=self.default_link_rel)
- if url != None:
+ if url is not None:
urls.add(url)
for cdx in obj.get('cdx', []):
original = cdx['url']
@@ -104,7 +104,7 @@ class MatchedImporter(EntityImporter):
original)
urls.add(("webarchive", wayback))
url = make_rel_url(original, default_link_rel=self.default_link_rel)
- if url != None:
+ if url is not None:
urls.add(url)
urls = [fatcat_openapi_client.FileUrl(rel=rel, url=url) for (rel, url) in urls]
if len(urls) == 0:
diff --git a/python/fatcat_tools/importers/orcid.py b/python/fatcat_tools/importers/orcid.py
index 21feea9e..4412a46d 100644
--- a/python/fatcat_tools/importers/orcid.py
+++ b/python/fatcat_tools/importers/orcid.py
@@ -40,7 +40,7 @@ class OrcidImporter(EntityImporter):
returns a CreatorEntity
"""
- if not 'person' in obj:
+ if 'person' not in obj:
return False
name = obj['person']['name']
diff --git a/python/fatcat_tools/importers/pubmed.py b/python/fatcat_tools/importers/pubmed.py
index c9907c5e..00ad54d0 100644
--- a/python/fatcat_tools/importers/pubmed.py
+++ b/python/fatcat_tools/importers/pubmed.py
@@ -590,7 +590,7 @@ class PubmedImporter(EntityImporter):
orcid = orcid.replace("http://orcid.org/", "")
elif orcid.startswith("https://orcid.org/"):
orcid = orcid.replace("https://orcid.org/", "")
- elif not '-' in orcid:
+ elif '-' not in orcid:
orcid = "{}-{}-{}-{}".format(
orcid[0:4],
orcid[4:8],
diff --git a/python/fatcat_tools/importers/shadow.py b/python/fatcat_tools/importers/shadow.py
index fa9b4d10..77205cee 100644
--- a/python/fatcat_tools/importers/shadow.py
+++ b/python/fatcat_tools/importers/shadow.py
@@ -95,7 +95,7 @@ class ShadowLibraryImporter(EntityImporter):
urls = []
if obj.get('cdx'):
url = make_rel_url(obj['cdx']['url'], default_link_rel=self.default_link_rel)
- if url != None:
+ if url is not None:
urls.append(url)
wayback = "https://web.archive.org/web/{}/{}".format(
obj['cdx']['datetime'],