diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-14 20:31:54 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-14 20:31:54 +0100 |
commit | 54a5fc7f3fa6893a83143a1755aa6b4497efa33c (patch) | |
tree | cdce9081d3e4410e757c7a95d57bd8509e7cf57c /fuzzycat/utils.py | |
parent | d3891ff1242627464e7e0eee68ab07a61c0678d4 (diff) | |
download | fuzzycat-54a5fc7f3fa6893a83143a1755aa6b4497efa33c.tar.gz fuzzycat-54a5fc7f3fa6893a83143a1755aa6b4497efa33c.zip |
verify: move out some code to utils
Diffstat (limited to 'fuzzycat/utils.py')
-rw-r--r-- | fuzzycat/utils.py | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py index 7f08fa3..682f912 100644 --- a/fuzzycat/utils.py +++ b/fuzzycat/utils.py @@ -1,3 +1,4 @@ +import collections import io import itertools import re @@ -10,15 +11,34 @@ printable_no_punct = string.digits + string.ascii_letters + string.whitespace # More correct: https://www.johndcook.com/blog/2016/02/04/regular-expression-to-match-a-chemical-element/ CHEM_FORMULA = re.compile(r"([A-Z]{1,2}[0-9]{1,2})+") +ParsedPages = collections.namedtuple("ParsedPages", "start end count") + def parse_page_string(s): """ Parse typical page strings, e.g. 150-180. """ - raise NotImplementedError() + if not s: + raise ValueError('page parsing: empty string') + if s.isnumeric(): + return ParsedPages(start=int(s), end=int(s), count=1) + page_pattern = re.compile("([0-9]{1,})-([0-9]{1,})") + match = page_pattern.match(s) + if not match: + raise ValueError('cannot parse page pattern from {}'.format(s)) + start, end = match.groups() + if len(end) == 1 and start and start[-1] < end: + # 261-5, odd, but happens + end = start[:-1] + end + a, b = int(start), int(end) + if a > b: + raise ValueError('invalid page range: {}'.format(s)) + count = b - a + 1 + return ParsedPages(start=a, end=b, count=count) def dict_key_exists(doc, path): """ - Return true, if key at a given path exists. XXX: probably already in glom. + Return true, if key in a dictionary at a given path exists. XXX: probably + already in glom. """ try: _ = glom(doc, path) |