diff options
-rw-r--r-- | fuzzycat/cleanups.py | 17 | ||||
-rw-r--r-- | fuzzycat/journals.py | 30 |
2 files changed, 47 insertions, 0 deletions
diff --git a/fuzzycat/cleanups.py b/fuzzycat/cleanups.py new file mode 100644 index 0000000..d806e51 --- /dev/null +++ b/fuzzycat/cleanups.py @@ -0,0 +1,17 @@ + +""" +Various shared cleanup approaches. +""" + +from fuzzycat.utils import StringPipeline, normalize_whitespace, normalize_ampersand + + +# These transformations should not affect the name or a journal. +basic = StringPipeline([ + str.lower, + normalize_whitespace, + normalize_ampersand, + lambda v: v.rstrip("."), +]) + + diff --git a/fuzzycat/journals.py b/fuzzycat/journals.py new file mode 100644 index 0000000..8c61f3a --- /dev/null +++ b/fuzzycat/journals.py @@ -0,0 +1,30 @@ +# coding: utf-8 + +""" +Journal name matching. Includes names from issn database and abbreviations. +""" + +import shelve + +class JournalLookup: + """ + Lookup allows to lookup journals, using a database of real journal names. + + >>> lookup = JournalLookup() + >>> lookup["Philosophica"] + {'1857-9272', '2232-299X', '2232-3007', '2232-3015'} + + """ + def __init__(self, namedb='namedb'): + self.db = shelve.open(namedb) + + def __getitem__(self, v): + return self.db[v] + + def get(self, v, cleanup_pipeline=None): + if not cleanup_pipeline: + return self.db[v] + return self.db[cleanup_pipeline(v)] + + def close(self): + self.db.close() |