aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-11-30 17:04:41 -0800
committerBryan Newbold <bnewbold@archive.org>2021-11-30 17:04:41 -0800
commita53dac1a9787abc9b25008b96505b450eee010ac (patch)
treefdc239b37256ef659fe2e6a834f595eff17f49a3
parentc535cbf365ccf13781dc5abf086ed94017c760fa (diff)
downloadchocula-a53dac1a9787abc9b25008b96505b450eee010ac.tar.gz
chocula-a53dac1a9787abc9b25008b96505b450eee010ac.zip
in fatcat exports, skip 'UNKNOWN_TITLE'
-rw-r--r--chocula/database.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/chocula/database.py b/chocula/database.py
index f3ae536..db14700 100644
--- a/chocula/database.py
+++ b/chocula/database.py
@@ -696,6 +696,10 @@ class ChoculaDatabase:
counts["empty-name"] += 1
continue
+ if "UNKNOWN_TITLE" in row["name"]:
+ counts["empty-name"] += 1
+ continue
+
if len(name) <= 2:
counts["short-name"] += 1
continue
@@ -774,6 +778,7 @@ class ChoculaDatabase:
"nla.gov.au",
"purl.nla.gov.au",
"www.bibliothek.uni-regensburg.de",
+ "zdb.uni-bielefeld.de",
):
# individual books or google searches, not journal/conference homepages
# LOC scanned newspapers