diff options
| -rw-r--r-- | fuzzycat/verify.py | 73 | 
1 files changed, 65 insertions, 8 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 79f7080..4e12dba 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -63,19 +63,13 @@ get_key_values = operator.itemgetter("k", "v")  # There titles appear too often, so ignore them for now.  TITLE_BLACKLIST = set([ -    "communications to the editor", -    "eingesandte schriften", -    "medical diary." -    "aacci corporate members", -    "a personal note", -    "new members", -    "special announcement", -    "norway",      "",      ":{unav)",      "[others]",      "[s.n.]",      "a correction", +    "a personal note", +    "aacci corporate members",      "abbildung",      "abbildungsnachweis",      "abbreviations and acronyms", @@ -108,8 +102,10 @@ TITLE_BLACKLIST = set([      "announcements",      "annual meeting",      "annual report", +    "appendix c",      "appendix d",      "appendix d.", +    "archaeology",      "around the world",      "arthrobacter sp.",      "association notes", @@ -123,35 +119,49 @@ TITLE_BLACKLIST = set([      "bericht",      "beyond the flyleaf",      "bibliography", +    "book received",      "book review",      "book reviews",      "books received",      "bookseller's catalogue",      "boundary creek times", +    "briefer notices",      "briefs", +    "bulletin board",      "bureau of investigation",      "calendar",      "canto",      "canto", +    "communications to the editor", +    "company news",      "conclusion",      "conclusions",      "conference report", +    "conferences",      "contents", +    "continuing education",      "contributors",      "copyright",      "correction",      "correspondence",      "corrigendum",      "cover", +    "daily building record", +    "data_sheet_1.zip",      "dedication", +    "diary of events",      "discussion",      "editorial board and publication information",      "editorial board", +    "editorial committee",      "editorial",      "editors/ editorial board", +    "educational intelligence",      "educators personally", +    "eingesandte schriften",      "einleitung",      "erratum", +    "events calendar",      "foreword",      "front & back matter",      "front cover", @@ -159,59 +169,106 @@ TITLE_BLACKLIST = set([      "frontmatter",      "fundraising",      "gbif occurrence download", +    "geleitwort", +    "general information",      "general medical council", +    "general", +    "geographical notes", +    "i. einleitung",      "in this issue",      "index des auteurs",      "index des noms de personnes",      "index", +    "industrial literature", +    "industry news", +    "industry",      "inhalt",      "inhalt-impressum",      "inhalt.impressum",      "interlude",      "introduction", +    "issue highlights",      "issue information", +    "journal scan",      "kongresskalender" +    "les auteurs",      "letter to the editor",      "letters to the editor",      "list of delegates", +    "list of figures and tables",      "map projections",      "masthead",      "medical annotations",      "medical annotations.", +    "medical diary of the week.", +    "medical diary." +    "medical diary.",      "methotrexate", +    "mexico",      "miscellany", +    "moyie leader", +    "nachwort", +    "new books, etc.", +    "new members",      "news section",      "news", +    "nivolumab", +    "norway",      "not available",      "note of appreciation / note de reconnaissance", +    "notes for contributors",      "notes", +    "notice to contributors", +    "nova et vetera", +    "obituary notices",      "occurrence download", +    "official photograph taken on the british western front in france",      "oup accepted manuscript", +    "outside front cover",      "parliamentary intelligence", +    "patent report", +    "people and events",      "people and places",      "petitions.xlsx",      "positions available",      "preface",      "preliminary material", +    "preparations and appliances",      "preservation image",      "proceedings of societies",      "production", +    "recent books", +    "recto",      "references",      "regulations",      "reply",      "research items", +    "reviews and notices",      "reviews of books",      "reviews",      "schlussbemerkung",      "short notices", +    "society news", +    "special announcement", +    "st. bartholomew's hospital",      "streptomyces sp.",      "subject index", +    "subscription page",      "table of contents",      "taxonomic abstract for the species.",      "thank you",      "the applause data release 2", +    "the new westminster news", +    "titelseiten", +    "title page/editorial board",      "transactions", +    "transcript", +    "unidentified", +    "united kingdom",      "veranstaltungen", +    "veranstaltungskalender", +    "verein deutscher chemiker", +    "vorrede",      "奥付",      "投稿規定",      "目次",  | 
