From 70894830001d2698d341f085fc3f2c6409171342 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 17 Nov 2020 03:12:54 +0100 Subject: update blacklist --- fuzzycat/verify.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 704b034..180a6ac 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -64,18 +64,6 @@ get_key_values = operator.itemgetter("k", "v") # There titles appear too often, so ignore them for now. TITLE_BLACKLIST = set([ - "actualités professionnelles", - "association notes", - "addenda", - "beyond the flyleaf", - "schlussbemerkung", - "editors/ editorial board", - "conference report", - "editorial board and publication information", - "front & back matter", - "abstract withdrawn", - "briefs", - "proceedings of societies", "", ":{unav)", "[others]", @@ -91,15 +79,20 @@ TITLE_BLACKLIST = set([ "about this journal", "about this title", "abréviations", + "abstract withdrawn", "abstracts of papers from other journals", "abstracts of papers to appear in future issues", "abstracts", + "background", "acknowledgement of reviewers", + "acknowledgement to reviewers", "acknowledgements to reviewers", "acknowledgements", "acknowledgment of reviewers", "acknowledgments", + "actualités professionnelles", "actualités", + "addenda", "agradecimento", "agradecimientos", "all pdfs of this category", @@ -113,6 +106,7 @@ TITLE_BLACKLIST = set([ "appendix d.", "around the world", "arthrobacter sp.", + "association notes", "aufgaben", "ausgewählte literatur", "author index", @@ -120,17 +114,20 @@ TITLE_BLACKLIST = set([ "back matter", "backmatter", "bericht", + "beyond the flyleaf", "bibliography", "book review", "book reviews", "books received", "bookseller's catalogue", + "briefs", "bureau of investigation", "calendar", "canto", "canto", "conclusion", "conclusions", + "conference report", "contents", "contributors", "copyright", @@ -140,12 +137,15 @@ TITLE_BLACKLIST = set([ "cover", "dedication", "discussion", + "editorial board and publication information", "editorial board", "editorial", + "editors/ editorial board", "educators personally", "einleitung", "erratum", "foreword", + "front & back matter", "front cover", "front matter", "frontmatter", @@ -176,11 +176,13 @@ TITLE_BLACKLIST = set([ "occurrence download", "oup accepted manuscript", "parliamentary intelligence", + "people and places", "petitions.xlsx", "positions available", "preface", "preliminary material", "preservation image", + "proceedings of societies", "production", "references", "regulations", @@ -188,6 +190,7 @@ TITLE_BLACKLIST = set([ "research items", "reviews of books", "reviews", + "schlussbemerkung", "short notices", "streptomyces sp.", "subject index", -- cgit v1.2.3