diff options
-rw-r--r-- | chocula/util.py | 73 |
1 files changed, 39 insertions, 34 deletions
diff --git a/chocula/util.py b/chocula/util.py index bff93ec..a52e1ec 100644 --- a/chocula/util.py +++ b/chocula/util.py @@ -123,27 +123,29 @@ OTHER_PUBLISHERS = [ def parse_lang(s: str) -> Optional[str]: if not s or s in ("Not applicable", "Multiple languages", "Unknown"): return None - s = s.strip().split(',')[0].split()[0] + s = s.strip().split(",")[0].split()[0] try: lang = pycountry.languages.lookup(s) - if lang.alpha_3 in ('mul', 'mis'): + if lang.alpha_3 in ("mul", "mis"): return None return lang.alpha_2.lower() except LookupError: - #print(f"unknown lang: {s}", file=sys.stderr) + # print(f"unknown lang: {s}", file=sys.stderr) return None except AttributeError: print(f"partial lang for s={s}: {lang}", file=sys.stderr) return None + def test_parse_lang(): - assert parse_lang('') is None - assert parse_lang('asdf blah') is None - assert parse_lang('en') == 'en' - assert parse_lang('EN') == 'en' - assert parse_lang('ENG') == 'en' - assert parse_lang('English') == 'en' - assert parse_lang('Portuguese') == 'pt' + assert parse_lang("") is None + assert parse_lang("asdf blah") is None + assert parse_lang("en") == "en" + assert parse_lang("EN") == "en" + assert parse_lang("ENG") == "en" + assert parse_lang("English") == "en" + assert parse_lang("Portuguese") == "pt" + def parse_country(s: str) -> Optional[str]: if not s or s in ("Unknown"): @@ -151,11 +153,11 @@ def parse_country(s: str) -> Optional[str]: s = s.strip() if s.lower() in ("usa", "new york (state)", "washington (state)"): - return 'us' + return "us" if s.lower() in ("russia (federation)", "russia"): - return 'ru' + return "ru" if s == "Québec (Province)": - s = 'Canada' + s = "Canada" if s == "China (Republic : 1949- )": return "tw" if s == "Brunei": @@ -163,16 +165,16 @@ def parse_country(s: str) -> Optional[str]: if s.startswith("Congo "): s = "Congo" if s.lower() == "iran": - return 'ir' + return "ir" if s.lower() == "bermuda islands": - return 'bm' + return "bm" if s.lower() == "burma": - s = 'myanmar' + s = "myanmar" if s.lower() in ("korea (south)", "south korea"): - return 'kr' + return "kr" if s.lower() in ("england", "scotland", "wales"): - return 'uk' - s = s.replace(' (Republic)', '').replace(" (Federation)", '') + return "uk" + s = s.replace(" (Republic)", "").replace(" (Federation)", "") try: country = pycountry.countries.lookup(s) @@ -186,24 +188,25 @@ def parse_country(s: str) -> Optional[str]: except LookupError: sub = None - s = s.replace(' (State)', '').replace(" (Province)", '') + s = s.replace(" (State)", "").replace(" (Province)", "") if sub: return sub.country_code.lower() else: - #print(f"unknown country: {s}", file=sys.stderr) + # print(f"unknown country: {s}", file=sys.stderr) return None + def test_parse_country(): - assert parse_country('') is None - assert parse_country('asdf blah') is None - assert parse_country('us') == 'us' - assert parse_country('USA') == 'us' - assert parse_country('United States of America') == 'us' - assert parse_country('united States') == 'us' - assert parse_country('Massachusetts') == 'us' - assert parse_country('Russia') == 'ru' - assert parse_country('Japan') == 'jp' + assert parse_country("") is None + assert parse_country("asdf blah") is None + assert parse_country("us") == "us" + assert parse_country("USA") == "us" + assert parse_country("United States of America") == "us" + assert parse_country("united States") == "us" + assert parse_country("Massachusetts") == "us" + assert parse_country("Russia") == "ru" + assert parse_country("Japan") == "jp" def parse_mimetypes(val: str) -> Optional[List[str]]: @@ -219,11 +222,13 @@ def parse_mimetypes(val: str) -> Optional[List[str]]: return None return [mimetype] + def test_parse_mimetypes(): - assert parse_mimetypes('') is None - assert parse_mimetypes('asdf blah') is None - assert parse_mimetypes('application/pdf') == ['application/pdf'] - assert parse_mimetypes('PDF') == ['application/pdf'] + assert parse_mimetypes("") is None + assert parse_mimetypes("asdf blah") is None + assert parse_mimetypes("application/pdf") == ["application/pdf"] + assert parse_mimetypes("PDF") == ["application/pdf"] + def gaps_to_spans(first, last, gaps): if not gaps: |