diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-08-12 15:02:36 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-08-12 15:02:36 +0200 |
commit | cf9e283239794247a849b1ad788fa49e664db96e (patch) | |
tree | ea0e39e8112f394cfda0ca8ab79f3df77d4a55e3 /fuzzycat/utils.py | |
parent | 5a307829670888fedd696e6220c84feed1fe6b64 (diff) | |
download | fuzzycat-cf9e283239794247a849b1ad788fa49e664db96e.tar.gz fuzzycat-cf9e283239794247a849b1ad788fa49e664db96e.zip |
issn: generate a name to issn mapping
This allows to make suggestions about potentially ambiguous titles.
Maybe suggest a minimal length.
Ultimately, there are only about 2M journal titles. If an arbitrary
string must match a journal title (not a generic container title), then
we can use a combination of direct lookup; plus some extra processing
based on this dataset.
Diffstat (limited to 'fuzzycat/utils.py')
-rw-r--r-- | fuzzycat/utils.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py index 3a4be99..97125ce 100644 --- a/fuzzycat/utils.py +++ b/fuzzycat/utils.py @@ -2,6 +2,7 @@ import collections import itertools +import json import re import string from typing import Any, Callable, DefaultDict, Dict, List, Optional, Sequence @@ -10,6 +11,21 @@ A couple of utilities, may be split up into separate modules. """ +class SetEncoder(json.JSONEncoder): + """ + Helper to encode python sets into JSON lists. + So you can write something like this: + json.dumps({"things": set([1, 2, 3])}, cls=SetEncoder) + """ + def default(self, obj): + """ + Decorate call to standard implementation. + """ + if isinstance(obj, set): + return list(obj) + return json.JSONEncoder.default(self, obj) + + class StringPipeline: """ Minimalistic grouping of functions applied on an input string to produce |