diff options
Diffstat (limited to 'chocula/directories/vanished_inactive.py')
-rw-r--r-- | chocula/directories/vanished_inactive.py | 32 |
1 files changed, 13 insertions, 19 deletions
diff --git a/chocula/directories/vanished_inactive.py b/chocula/directories/vanished_inactive.py index 253940c..8b23525 100644 --- a/chocula/directories/vanished_inactive.py +++ b/chocula/directories/vanished_inactive.py @@ -1,9 +1,9 @@ import csv from typing import Iterable, Optional -from chocula.util import clean_str, clean_issn, parse_lang, parse_country +from chocula.util import clean_str, clean_issn from chocula.common import DirectoryLoader -from chocula.database import DirectoryInfo +from chocula.database import DirectoryInfo, HomepageUrl class VanishedInactiveLoader(DirectoryLoader): @@ -13,38 +13,32 @@ class VanishedInactiveLoader(DirectoryLoader): CSV headers: - - Source - Title - - Identifier - - Publisher - - Comment - - Language + - URL - ISSN - EISSN - - Keyword - - Start Year - - End Year - - Added on date - - Subjects - - Country - - Publication fee - - Further Information """ source_slug = "vanished_inactive" def open_file(self) -> Iterable: - return csv.DictReader(open(self.config.vanished_inactive.filepath)) + return csv.DictReader(open(self.config.vanished_inactive.filepath), delimiter=";") def parse_record(self, record) -> Optional[DirectoryInfo]: + # HACK + record["Title"] = record["\ufeffTitle"] + if not record["Title"]: + return None + info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["ISSN"]), issne=clean_issn(record["EISSN"]), name=clean_str(record["Title"]), - publisher=clean_str(record["Publisher"]), - langs=[lang for lang in [parse_lang(record["Language"])] if lang], - country=parse_country(record["Country"]), ) + + homepage = HomepageUrl.from_url(record["URL"]) + if homepage: + info.homepage_urls.append(homepage) return info |