From 41fae4c294e2ba43370b4a4193c0f6107201dbf0 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@archive.org>
Date: Tue, 26 Oct 2021 18:13:38 -0700
Subject: bugfix: setting html_biblio on ingest results

This was caught during lint cleanup
---
 python/sandcrawler/ingest_fileset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'python/sandcrawler/ingest_fileset.py')

diff --git a/python/sandcrawler/ingest_fileset.py b/python/sandcrawler/ingest_fileset.py
index bf06a39..d88fb46 100644
--- a/python/sandcrawler/ingest_fileset.py
+++ b/python/sandcrawler/ingest_fileset.py
@@ -177,7 +177,7 @@ class IngestFilesetWorker(IngestFileWorker):
                     html_doc = HTMLParser(resource.body)
                     html_biblio = html_extract_biblio(resource.terminal_url, html_doc)
                     if html_biblio:
-                        if not 'html_biblio' in result or html_biblio.title:
+                        if 'html_biblio' not in result and html_biblio.title:
                             result['html_biblio'] = json.loads(
                                 html_biblio.json(exclude_none=True))
                             #print(f"  setting html_biblio: {result['html_biblio']}", file=sys.stderr)
-- 
cgit v1.2.3