aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/ingest_file.py2
-rw-r--r--python/sandcrawler/ingest_fileset.py2
2 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py
index b3f2a8e..49c7ddf 100644
--- a/python/sandcrawler/ingest_file.py
+++ b/python/sandcrawler/ingest_file.py
@@ -682,7 +682,7 @@ class IngestFileWorker(SandcrawlerWorker):
html_doc = HTMLParser(resource.body)
html_biblio = html_extract_biblio(resource.terminal_url, html_doc)
if html_biblio:
- if not 'html_biblio' in result or html_biblio.title:
+ if 'html_biblio' not in result and html_biblio.title:
result['html_biblio'] = json.loads(
html_biblio.json(exclude_none=True))
#print(f" setting html_biblio: {result['html_biblio']}", file=sys.stderr)
diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py
index bf06a39..d88fb46 100644
--- a/python/sandcrawler/ingest_fileset.py
+++ b/python/sandcrawler/ingest_fileset.py
@@ -177,7 +177,7 @@ class IngestFilesetWorker(IngestFileWorker):
html_doc = HTMLParser(resource.body)
html_biblio = html_extract_biblio(resource.terminal_url, html_doc)
if html_biblio:
- if not 'html_biblio' in result or html_biblio.title:
+ if 'html_biblio' not in result and html_biblio.title:
result['html_biblio'] = json.loads(
html_biblio.json(exclude_none=True))
#print(f" setting html_biblio: {result['html_biblio']}", file=sys.stderr)