From 2dec107e37280b55dddae74cd0328f2d5c7979b6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sun, 13 Sep 2020 22:12:03 -0700 Subject: update vanished journal importer for 2020-09-03 dataset --- chocula/directories/vanished_disapeared.py | 16 +++++---------- chocula/directories/vanished_inactive.py | 32 ++++++++++++------------------ 2 files changed, 18 insertions(+), 30 deletions(-) (limited to 'chocula') diff --git a/chocula/directories/vanished_disapeared.py b/chocula/directories/vanished_disapeared.py index c9d2bf9..de25434 100644 --- a/chocula/directories/vanished_disapeared.py +++ b/chocula/directories/vanished_disapeared.py @@ -19,32 +19,29 @@ class VanishedDisapearedLoader(DirectoryLoader): - E-ISSN - URL - Publisher - - blank + - - Language(s) - Country - society_affiliation - other_sci_affiliation - - Discipline - Discipline Group - Start Year - End Year - Last Year Online - Actively Publishing - Internet Archive Link - - Verified - - Comments - - The Keepers (archived) - - Archive Link - - Mikael (1 = agree with Lisa) """ source_slug = "vanished_disapeared" def open_file(self) -> Iterable: - return csv.DictReader(open(self.config.vanished_disapeared.filepath)) + return csv.DictReader(open(self.config.vanished_disapeared.filepath), delimiter=";") def parse_record(self, record) -> Optional[DirectoryInfo]: + if not record["Journal Name"]: + return None + info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["ISSN"]), @@ -55,9 +52,6 @@ class VanishedDisapearedLoader(DirectoryLoader): country=parse_country(record["Country"]), ) homepage = HomepageUrl.from_url(record["Internet Archive Link"]) - if homepage: - info.homepage_urls.append(homepage) - homepage = HomepageUrl.from_url(record["Archive Link"]) if homepage: info.homepage_urls.append(homepage) return info diff --git a/chocula/directories/vanished_inactive.py b/chocula/directories/vanished_inactive.py index 253940c..8b23525 100644 --- a/chocula/directories/vanished_inactive.py +++ b/chocula/directories/vanished_inactive.py @@ -1,9 +1,9 @@ import csv from typing import Iterable, Optional -from chocula.util import clean_str, clean_issn, parse_lang, parse_country +from chocula.util import clean_str, clean_issn from chocula.common import DirectoryLoader -from chocula.database import DirectoryInfo +from chocula.database import DirectoryInfo, HomepageUrl class VanishedInactiveLoader(DirectoryLoader): @@ -13,38 +13,32 @@ class VanishedInactiveLoader(DirectoryLoader): CSV headers: - - Source - Title - - Identifier - - Publisher - - Comment - - Language + - URL - ISSN - EISSN - - Keyword - - Start Year - - End Year - - Added on date - - Subjects - - Country - - Publication fee - - Further Information """ source_slug = "vanished_inactive" def open_file(self) -> Iterable: - return csv.DictReader(open(self.config.vanished_inactive.filepath)) + return csv.DictReader(open(self.config.vanished_inactive.filepath), delimiter=";") def parse_record(self, record) -> Optional[DirectoryInfo]: + # HACK + record["Title"] = record["\ufeffTitle"] + if not record["Title"]: + return None + info = DirectoryInfo( directory_slug=self.source_slug, raw_issn=clean_issn(record["ISSN"]), issne=clean_issn(record["EISSN"]), name=clean_str(record["Title"]), - publisher=clean_str(record["Publisher"]), - langs=[lang for lang in [parse_lang(record["Language"])] if lang], - country=parse_country(record["Country"]), ) + + homepage = HomepageUrl.from_url(record["URL"]) + if homepage: + info.homepage_urls.append(homepage) return info -- cgit v1.2.3