from typing import Iterable, Optional import csv from chocula.util import ( clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP, ) from chocula.common import DirectoryLoader from chocula.database import DirectoryInfo, HomepageUrl class DoajLoader(DirectoryLoader): """ CSV Columns: - Journal title - Journal URL - Alternative title - Journal ISSN (print version) - Journal EISSN (online version) - Publisher - Society or institution - "Platform - host or aggregator" - Country of publisher - Journal article processing charges (APCs) - APC information URL - APC amount - Currency - Journal article submission fee - Submission fee URL - Submission fee amount - Submission fee currency - Number of articles publish in the last calendar year - Number of articles information URL - Journal waiver policy (for developing country authors etc) - Waiver policy information URL - Digital archiving policy or program(s) - Archiving: national library - Archiving: other - Archiving infomation URL - Journal full-text crawl permission - Permanent article identifiers - Journal provides download statistics - Download statistics information URL - First calendar year journal provided online Open Access content - Full text formats - Keywords - Full text language - URL for the Editorial Board page - Review process - Review process information URL - URL for journal's aims & scope - URL for journal's instructions for authors - Journal plagiarism screening policy - Plagiarism information URL - Average number of weeks between submission and publication - URL for journal's Open Access statement - Machine-readable CC licensing information embedded or displayed in articles - URL to an example page with embedded licensing information - Journal license - License attributes - URL for license terms - Does this journal allow unrestricted reuse in compliance with BOAI? - Deposit policy directory - Author holds copyright without restrictions - Copyright information URL - Author holds publishing rights without restrictions - Publishing rights information URL - DOAJ Seal - Tick: Accepted after March 2014 - Added on Date - Subjects """ source_slug = "doaj" def open_file(self) -> Iterable: return csv.DictReader(open(self.config.doaj.filepath)) def parse_record(self, row) -> Optional[DirectoryInfo]: # TODO: Subjects, Permanent article identifiers, work_level stuff info = DirectoryInfo( directory_slug=self.source_slug, issnp=row["Journal ISSN (print version)"], issne=row["Journal EISSN (online version)"], name=clean_str(row["Journal title"]), publisher=clean_str(row["Publisher"]), platform=PLATFORM_MAP.get(row["Platform, host or aggregator"]), country=parse_country(row["Country of publisher"]), ) lang = parse_lang(row["Full text language"]) if lang: info.langs.append(lang) info.extra["mimetypes"] = parse_mimetypes(row["Full text formats"]) info.extra["as_of"] = self.config.snapshot.date if row["DOAJ Seal"]: info.extra["seal"] = {"no": False, "yes": True}[row["DOAJ Seal"].lower()] if row["Digital archiving policy or program(s)"]: info.extra["archive"] = [ a.strip() for a in row["Digital archiving policy or program(s)"].split(",") if a.strip() ] elif row["Archiving: national library"]: info.extra["archive"] = ["national-library"] crawl_permission = row["Journal full-text crawl permission"] if crawl_permission: info.extra["crawl_permission"] = dict(Yes=True, No=False)[crawl_permission] default_license = row["Journal license"] if default_license and default_license.startswith("CC"): info.extra["default_license"] = default_license.replace("CC ", "CC-").strip() url = row["Journal URL"] if url: homepage = HomepageUrl.from_url(row["Journal URL"]) if homepage: info.homepage_urls.append(homepage) return info