aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 18:33:01 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 18:33:01 -0700
commit2ad4ad111c67dd5c3c9f91a3b2cd56325a7022d6 (patch)
tree80ddc8983c3a37f1d5fc42c087f86aaa2145ac2c
parent1c768554d1ccaa270b267940aeca2e9bed885bf2 (diff)
downloadchocula-2ad4ad111c67dd5c3c9f91a3b2cd56325a7022d6.tar.gz
chocula-2ad4ad111c67dd5c3c9f91a3b2cd56325a7022d6.zip
improve lang parsing
-rw-r--r--chocula/directories/awol.py6
-rw-r--r--chocula/directories/scielo.py6
-rw-r--r--chocula/directories/sim.py2
-rw-r--r--chocula/directories/vanished_disapeared.py2
-rw-r--r--chocula/directories/vanished_inactive.py2
5 files changed, 11 insertions, 7 deletions
diff --git a/chocula/directories/awol.py b/chocula/directories/awol.py
index f780c6e..2642f0e 100644
--- a/chocula/directories/awol.py
+++ b/chocula/directories/awol.py
@@ -1,7 +1,7 @@
from typing import Iterable, Optional
import json
-from chocula.util import clean_str, clean_issn
+from chocula.util import clean_str, clean_issn, parse_lang
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
@@ -67,7 +67,9 @@ class AwolLoader(DirectoryLoader):
issne=clean_issn(issn_info.get("electronic", "")),
issnp=clean_issn(issn_info.get("print", "")),
name=clean_str(record.get("title")),
- langs=list(filter(lambda s: len(s) == 2, record["languages"])),
+ langs=[
+ lang for lang in [parse_lang(s) for s in record["languages"]] if lang
+ ],
)
if record["url"]:
homepage = HomepageUrl.from_url(record["url"])
diff --git a/chocula/directories/scielo.py b/chocula/directories/scielo.py
index 0ed8fde..728ce02 100644
--- a/chocula/directories/scielo.py
+++ b/chocula/directories/scielo.py
@@ -1,7 +1,7 @@
from typing import Iterable, Optional
import json
-from chocula.util import clean_str, clean_issn
+from chocula.util import clean_str, clean_issn, parse_lang
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
@@ -35,7 +35,9 @@ class ScieloLoader(DirectoryLoader):
publisher=clean_str((record.get("publisher_name") or [""])[0]),
abbrev=clean_str(record["abbreviated_iso_title"]),
platform="scielo",
- langs=list(filter(lambda s: len(s) == 2, record["languages"])),
+ langs=[
+ lang for lang in [parse_lang(s) for s in record["languages"]] if lang
+ ],
country=country,
extra=extra,
)
diff --git a/chocula/directories/sim.py b/chocula/directories/sim.py
index 5dde9cc..fedcc29 100644
--- a/chocula/directories/sim.py
+++ b/chocula/directories/sim.py
@@ -67,7 +67,7 @@ class SimLoader(DirectoryLoader):
publisher=clean_str(row["Publisher"]),
raw_issn=row["ISSN"][:9],
custom_id=row.get("NA Pub Cat ID").strip() or None,
- langs=[parse_lang(row["Pub Language"])],
+ langs=[lang for lang in [parse_lang(row["Pub Language"])] if lang],
extra=extra,
)
return info
diff --git a/chocula/directories/vanished_disapeared.py b/chocula/directories/vanished_disapeared.py
index a5e4c38..c9d2bf9 100644
--- a/chocula/directories/vanished_disapeared.py
+++ b/chocula/directories/vanished_disapeared.py
@@ -51,7 +51,7 @@ class VanishedDisapearedLoader(DirectoryLoader):
issne=clean_issn(record["E-ISSN"]),
name=clean_str(record["Journal Name"]),
publisher=clean_str(record["Publisher"]),
- langs=[parse_lang(record["Language(s)"])],
+ langs=[lang for lang in [parse_lang(record["Language(s)"])] if lang],
country=parse_country(record["Country"]),
)
homepage = HomepageUrl.from_url(record["Internet Archive Link"])
diff --git a/chocula/directories/vanished_inactive.py b/chocula/directories/vanished_inactive.py
index 7996084..253940c 100644
--- a/chocula/directories/vanished_inactive.py
+++ b/chocula/directories/vanished_inactive.py
@@ -44,7 +44,7 @@ class VanishedInactiveLoader(DirectoryLoader):
issne=clean_issn(record["EISSN"]),
name=clean_str(record["Title"]),
publisher=clean_str(record["Publisher"]),
- langs=[parse_lang(record["Language"])],
+ langs=[lang for lang in [parse_lang(record["Language"])] if lang],
country=parse_country(record["Country"]),
)
return info