aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/misc.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 18:12:23 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 18:12:23 -0700
commit485dd2cfd120c52bbc5cc7745e44176d1003b40d (patch)
tree966bf78a4bd3cc1f6c94efb8fc3054a8a441dab0 /python/sandcrawler/misc.py
parent7087e7f65d8b81e29af44a43c1067bb2ec618c4e (diff)
downloadsandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.tar.gz
sandcrawler-485dd2cfd120c52bbc5cc7745e44176d1003b40d.zip
lint collection membership (last lint for now)
Diffstat (limited to 'python/sandcrawler/misc.py')
-rw-r--r--python/sandcrawler/misc.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/misc.py b/python/sandcrawler/misc.py
index 83a4626..1c779ce 100644
--- a/python/sandcrawler/misc.py
+++ b/python/sandcrawler/misc.py
@@ -58,7 +58,7 @@ def gen_file_metadata(blob: bytes, allow_empty: bool = False) -> dict:
# crude checks for XHTML or JATS XML, using only first 1 kB of file
if b"<htm" in blob[:1024] and b'xmlns="http://www.w3.org/1999/xhtml"' in blob[:1024]:
mimetype = "application/xhtml+xml"
- elif b"<article " in blob[:1024] and not b"<html" in blob[:1024]:
+ elif b"<article " in blob[:1024] and b"<html" not in blob[:1024]:
mimetype = "application/jats+xml"
hashes = [
hashlib.sha1(),
@@ -88,7 +88,7 @@ def gen_file_metadata_path(path: str, allow_empty: bool = False) -> dict:
# crude checks for XHTML or JATS XML, using only first 1 kB of file
if b"<htm" in blob[:1024] and b'xmlns="http://www.w3.org/1999/xhtml"' in blob[:1024]:
mimetype = "application/xhtml+xml"
- elif b"<article " in blob[:1024] and not b"<html" in blob[:1024]:
+ elif b"<article " in blob[:1024] and b"<html" not in blob[:1024]:
mimetype = "application/jats+xml"
hashes = [
hashlib.sha1(),