diff options
| author | Martin Czygan <martin.czygan@gmail.com> | 2020-08-15 18:18:25 +0200 | 
|---|---|---|
| committer | Martin Czygan <martin.czygan@gmail.com> | 2020-08-15 18:18:25 +0200 | 
| commit | 9b416db2393988ae5bf097f754e885848ee31636 (patch) | |
| tree | 11f4cbccd131d2d8f8c8b71fcfc3b93fb871181e /fuzzycat | |
| parent | 2f948cfbb484241178fa7e8c7abd8b0c40a9db24 (diff) | |
| download | fuzzycat-9b416db2393988ae5bf097f754e885848ee31636.tar.gz fuzzycat-9b416db2393988ae5bf097f754e885848ee31636.zip  | |
separate cleanups
Diffstat (limited to 'fuzzycat')
| -rw-r--r-- | fuzzycat/cleanups.py | 17 | ||||
| -rw-r--r-- | fuzzycat/journals.py | 30 | 
2 files changed, 47 insertions, 0 deletions
diff --git a/fuzzycat/cleanups.py b/fuzzycat/cleanups.py new file mode 100644 index 0000000..d806e51 --- /dev/null +++ b/fuzzycat/cleanups.py @@ -0,0 +1,17 @@ + +""" +Various shared cleanup approaches. +""" + +from fuzzycat.utils import StringPipeline, normalize_whitespace, normalize_ampersand + + +# These transformations should not affect the name or a journal. +basic = StringPipeline([ +    str.lower, +    normalize_whitespace, +    normalize_ampersand, +    lambda v: v.rstrip("."), +]) + + diff --git a/fuzzycat/journals.py b/fuzzycat/journals.py new file mode 100644 index 0000000..8c61f3a --- /dev/null +++ b/fuzzycat/journals.py @@ -0,0 +1,30 @@ +# coding: utf-8 + +""" +Journal name matching. Includes names from issn database and abbreviations. +""" + +import shelve + +class JournalLookup: +    """ +    Lookup allows to lookup journals, using a database of real journal names. + +        >>> lookup = JournalLookup() +        >>> lookup["Philosophica"] +        {'1857-9272', '2232-299X', '2232-3007', '2232-3015'} + +    """ +    def __init__(self, namedb='namedb'): +        self.db = shelve.open(namedb) + +    def __getitem__(self, v): +        return self.db[v] + +    def get(self, v, cleanup_pipeline=None): +        if not cleanup_pipeline: +            return self.db[v] +        return self.db[cleanup_pipeline(v)] + +    def close(self): +        self.db.close()  | 
