From 2ad4ad111c67dd5c3c9f91a3b2cd56325a7022d6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 23 Jun 2020 18:33:01 -0700 Subject: improve lang parsing --- chocula/directories/awol.py | 6 ++++-- chocula/directories/scielo.py | 6 ++++-- chocula/directories/sim.py | 2 +- chocula/directories/vanished_disapeared.py | 2 +- chocula/directories/vanished_inactive.py | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/chocula/directories/awol.py b/chocula/directories/awol.py index f780c6e..2642f0e 100644 --- a/chocula/directories/awol.py +++ b/chocula/directories/awol.py @@ -1,7 +1,7 @@ from typing import Iterable, Optional import json -from chocula.util import clean_str, clean_issn +from chocula.util import clean_str, clean_issn, parse_lang from chocula.common import DirectoryLoader from chocula.database import DirectoryInfo, HomepageUrl @@ -67,7 +67,9 @@ class AwolLoader(DirectoryLoader): issne=clean_issn(issn_info.get("electronic", "")), issnp=clean_issn(issn_info.get("print", "")), name=clean_str(record.get("title")), - langs=list(filter(lambda s: len(s) == 2, record["languages"])), + langs=[ + lang for lang in [parse_lang(s) for s in record["languages"]] if lang + ], ) if record["url"]: homepage = HomepageUrl.from_url(record["url"]) diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py index 0ed8fde..728ce02 100644 --- a/chocula/directories/scielo.py +++ b/chocula/directories/scielo.py @@ -1,7 +1,7 @@ from typing import Iterable, Optional import json -from chocula.util import clean_str, clean_issn +from chocula.util import clean_str, clean_issn, parse_lang from chocula.common import DirectoryLoader from chocula.database import DirectoryInfo, HomepageUrl @@ -35,7 +35,9 @@ class ScieloLoader(DirectoryLoader): publisher=clean_str((record.get("publisher_name") or [""])[0]), abbrev=clean_str(record["abbreviated_iso_title"]), platform="scielo", - langs=list(filter(lambda s: len(s) == 2, record["languages"])), + langs=[ + lang for lang in [parse_lang(s) for s in record["languages"]] if lang + ], country=country, extra=extra, ) diff --git a/chocula/directories/sim.py b/chocula/directories/sim.py index 5dde9cc..fedcc29 100644 --- a/chocula/directories/sim.py +++ b/chocula/directories/sim.py @@ -67,7 +67,7 @@ class SimLoader(DirectoryLoader): publisher=clean_str(row["Publisher"]), raw_issn=row["ISSN"][:9], custom_id=row.get("NA Pub Cat ID").strip() or None, - langs=[parse_lang(row["Pub Language"])], + langs=[lang for lang in [parse_lang(row["Pub Language"])] if lang], extra=extra, ) return info diff --git a/chocula/directories/vanished_disapeared.py b/chocula/directories/vanished_disapeared.py index a5e4c38..c9d2bf9 100644 --- a/chocula/directories/vanished_disapeared.py +++ b/chocula/directories/vanished_disapeared.py @@ -51,7 +51,7 @@ class VanishedDisapearedLoader(DirectoryLoader): issne=clean_issn(record["E-ISSN"]), name=clean_str(record["Journal Name"]), publisher=clean_str(record["Publisher"]), - langs=[parse_lang(record["Language(s)"])], + langs=[lang for lang in [parse_lang(record["Language(s)"])] if lang], country=parse_country(record["Country"]), ) homepage = HomepageUrl.from_url(record["Internet Archive Link"]) diff --git a/chocula/directories/vanished_inactive.py b/chocula/directories/vanished_inactive.py index 7996084..253940c 100644 --- a/chocula/directories/vanished_inactive.py +++ b/chocula/directories/vanished_inactive.py @@ -44,7 +44,7 @@ class VanishedInactiveLoader(DirectoryLoader): issne=clean_issn(record["EISSN"]), name=clean_str(record["Title"]), publisher=clean_str(record["Publisher"]), - langs=[parse_lang(record["Language"])], + langs=[lang for lang in [parse_lang(record["Language"])] if lang], country=parse_country(record["Country"]), ) return info -- cgit v1.2.3