diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-11-17 03:33:45 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-11-17 03:33:45 +0100 |
commit | 49b5c05ae829b8d43736c74d00f93b73d344269a (patch) | |
tree | bca5c8120a061fdb66197a4145d9b7d77ec6efa5 | |
parent | 86d8226840d4db95959db303938a142fa2759aa2 (diff) | |
download | fuzzycat-49b5c05ae829b8d43736c74d00f93b73d344269a.tar.gz fuzzycat-49b5c05ae829b8d43736c74d00f93b73d344269a.zip |
update blacklist
-rw-r--r-- | fuzzycat/verify.py | 73 |
1 files changed, 65 insertions, 8 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 79f7080..4e12dba 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -63,19 +63,13 @@ get_key_values = operator.itemgetter("k", "v") # There titles appear too often, so ignore them for now. TITLE_BLACKLIST = set([ - "communications to the editor", - "eingesandte schriften", - "medical diary." - "aacci corporate members", - "a personal note", - "new members", - "special announcement", - "norway", "", ":{unav)", "[others]", "[s.n.]", "a correction", + "a personal note", + "aacci corporate members", "abbildung", "abbildungsnachweis", "abbreviations and acronyms", @@ -108,8 +102,10 @@ TITLE_BLACKLIST = set([ "announcements", "annual meeting", "annual report", + "appendix c", "appendix d", "appendix d.", + "archaeology", "around the world", "arthrobacter sp.", "association notes", @@ -123,35 +119,49 @@ TITLE_BLACKLIST = set([ "bericht", "beyond the flyleaf", "bibliography", + "book received", "book review", "book reviews", "books received", "bookseller's catalogue", "boundary creek times", + "briefer notices", "briefs", + "bulletin board", "bureau of investigation", "calendar", "canto", "canto", + "communications to the editor", + "company news", "conclusion", "conclusions", "conference report", + "conferences", "contents", + "continuing education", "contributors", "copyright", "correction", "correspondence", "corrigendum", "cover", + "daily building record", + "data_sheet_1.zip", "dedication", + "diary of events", "discussion", "editorial board and publication information", "editorial board", + "editorial committee", "editorial", "editors/ editorial board", + "educational intelligence", "educators personally", + "eingesandte schriften", "einleitung", "erratum", + "events calendar", "foreword", "front & back matter", "front cover", @@ -159,59 +169,106 @@ TITLE_BLACKLIST = set([ "frontmatter", "fundraising", "gbif occurrence download", + "geleitwort", + "general information", "general medical council", + "general", + "geographical notes", + "i. einleitung", "in this issue", "index des auteurs", "index des noms de personnes", "index", + "industrial literature", + "industry news", + "industry", "inhalt", "inhalt-impressum", "inhalt.impressum", "interlude", "introduction", + "issue highlights", "issue information", + "journal scan", "kongresskalender" + "les auteurs", "letter to the editor", "letters to the editor", "list of delegates", + "list of figures and tables", "map projections", "masthead", "medical annotations", "medical annotations.", + "medical diary of the week.", + "medical diary." + "medical diary.", "methotrexate", + "mexico", "miscellany", + "moyie leader", + "nachwort", + "new books, etc.", + "new members", "news section", "news", + "nivolumab", + "norway", "not available", "note of appreciation / note de reconnaissance", + "notes for contributors", "notes", + "notice to contributors", + "nova et vetera", + "obituary notices", "occurrence download", + "official photograph taken on the british western front in france", "oup accepted manuscript", + "outside front cover", "parliamentary intelligence", + "patent report", + "people and events", "people and places", "petitions.xlsx", "positions available", "preface", "preliminary material", + "preparations and appliances", "preservation image", "proceedings of societies", "production", + "recent books", + "recto", "references", "regulations", "reply", "research items", + "reviews and notices", "reviews of books", "reviews", "schlussbemerkung", "short notices", + "society news", + "special announcement", + "st. bartholomew's hospital", "streptomyces sp.", "subject index", + "subscription page", "table of contents", "taxonomic abstract for the species.", "thank you", "the applause data release 2", + "the new westminster news", + "titelseiten", + "title page/editorial board", "transactions", + "transcript", + "unidentified", + "united kingdom", "veranstaltungen", + "veranstaltungskalender", + "verein deutscher chemiker", + "vorrede", "奥付", "投稿規定", "目次", |