diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 18:33:01 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 18:33:01 -0700 |
commit | 2ad4ad111c67dd5c3c9f91a3b2cd56325a7022d6 (patch) | |
tree | 80ddc8983c3a37f1d5fc42c087f86aaa2145ac2c | |
parent | 1c768554d1ccaa270b267940aeca2e9bed885bf2 (diff) | |
download | chocula-2ad4ad111c67dd5c3c9f91a3b2cd56325a7022d6.tar.gz chocula-2ad4ad111c67dd5c3c9f91a3b2cd56325a7022d6.zip |
improve lang parsing
-rw-r--r-- | chocula/directories/awol.py | 6 | ||||
-rw-r--r-- | chocula/directories/scielo.py | 6 | ||||
-rw-r--r-- | chocula/directories/sim.py | 2 | ||||
-rw-r--r-- | chocula/directories/vanished_disapeared.py | 2 | ||||
-rw-r--r-- | chocula/directories/vanished_inactive.py | 2 |
5 files changed, 11 insertions, 7 deletions
diff --git a/chocula/directories/awol.py b/chocula/directories/awol.py index f780c6e..2642f0e 100644 --- a/chocula/directories/awol.py +++ b/chocula/directories/awol.py @@ -1,7 +1,7 @@ from typing import Iterable, Optional import json -from chocula.util import clean_str, clean_issn +from chocula.util import clean_str, clean_issn, parse_lang from chocula.common import DirectoryLoader from chocula.database import DirectoryInfo, HomepageUrl @@ -67,7 +67,9 @@ class AwolLoader(DirectoryLoader): issne=clean_issn(issn_info.get("electronic", "")), issnp=clean_issn(issn_info.get("print", "")), name=clean_str(record.get("title")), - langs=list(filter(lambda s: len(s) == 2, record["languages"])), + langs=[ + lang for lang in [parse_lang(s) for s in record["languages"]] if lang + ], ) if record["url"]: homepage = HomepageUrl.from_url(record["url"]) diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py index 0ed8fde..728ce02 100644 --- a/chocula/directories/scielo.py +++ b/chocula/directories/scielo.py @@ -1,7 +1,7 @@ from typing import Iterable, Optional import json -from chocula.util import clean_str, clean_issn +from chocula.util import clean_str, clean_issn, parse_lang from chocula.common import DirectoryLoader from chocula.database import DirectoryInfo, HomepageUrl @@ -35,7 +35,9 @@ class ScieloLoader(DirectoryLoader): publisher=clean_str((record.get("publisher_name") or [""])[0]), abbrev=clean_str(record["abbreviated_iso_title"]), platform="scielo", - langs=list(filter(lambda s: len(s) == 2, record["languages"])), + langs=[ + lang for lang in [parse_lang(s) for s in record["languages"]] if lang + ], country=country, extra=extra, ) diff --git a/chocula/directories/sim.py b/chocula/directories/sim.py index 5dde9cc..fedcc29 100644 --- a/chocula/directories/sim.py +++ b/chocula/directories/sim.py @@ -67,7 +67,7 @@ class SimLoader(DirectoryLoader): publisher=clean_str(row["Publisher"]), raw_issn=row["ISSN"][:9], custom_id=row.get("NA Pub Cat ID").strip() or None, - langs=[parse_lang(row["Pub Language"])], + langs=[lang for lang in [parse_lang(row["Pub Language"])] if lang], extra=extra, ) return info diff --git a/chocula/directories/vanished_disapeared.py b/chocula/directories/vanished_disapeared.py index a5e4c38..c9d2bf9 100644 --- a/chocula/directories/vanished_disapeared.py +++ b/chocula/directories/vanished_disapeared.py @@ -51,7 +51,7 @@ class VanishedDisapearedLoader(DirectoryLoader): issne=clean_issn(record["E-ISSN"]), name=clean_str(record["Journal Name"]), publisher=clean_str(record["Publisher"]), - langs=[parse_lang(record["Language(s)"])], + langs=[lang for lang in [parse_lang(record["Language(s)"])] if lang], country=parse_country(record["Country"]), ) homepage = HomepageUrl.from_url(record["Internet Archive Link"]) diff --git a/chocula/directories/vanished_inactive.py b/chocula/directories/vanished_inactive.py index 7996084..253940c 100644 --- a/chocula/directories/vanished_inactive.py +++ b/chocula/directories/vanished_inactive.py @@ -44,7 +44,7 @@ class VanishedInactiveLoader(DirectoryLoader): issne=clean_issn(record["EISSN"]), name=clean_str(record["Title"]), publisher=clean_str(record["Publisher"]), - langs=[parse_lang(record["Language"])], + langs=[lang for lang in [parse_lang(record["Language"])] if lang], country=parse_country(record["Country"]), ) return info |