From 6862b14ba632c05b49c06541587d72cedd3ce737 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 23 Apr 2021 10:26:28 -0700 Subject: doaj: updates for new file format; removed some fields/metadata --- chocula/directories/doaj.py | 98 ++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 55 deletions(-) (limited to 'chocula') diff --git a/chocula/directories/doaj.py b/chocula/directories/doaj.py index e9c94c9..f9531b1 100644 --- a/chocula/directories/doaj.py +++ b/chocula/directories/doaj.py @@ -3,10 +3,8 @@ import csv from chocula.util import ( clean_str, - parse_mimetypes, parse_country, parse_lang, - PLATFORM_MAP, ) from chocula.common import DirectoryLoader from chocula.database import DirectoryInfo, HomepageUrl @@ -18,62 +16,57 @@ class DoajLoader(DirectoryLoader): - Journal title - Journal URL + - URL in DOAJ - Alternative title - Journal ISSN (print version) - Journal EISSN (online version) + - Keywords + - Languages in which the journal accepts manuscripts - Publisher - - Society or institution - - "Platform - - host or aggregator" - Country of publisher - - Journal article processing charges (APCs) - - APC information URL - - APC amount - - Currency - - Journal article submission fee - - Submission fee URL - - Submission fee amount - - Submission fee currency - - Number of articles publish in the last calendar year - - Number of articles information URL - - Journal waiver policy (for developing country authors etc) - - Waiver policy information URL - - Digital archiving policy or program(s) - - Archiving: national library - - Archiving: other - - Archiving infomation URL - - Journal full-text crawl permission - - Permanent article identifiers - - Journal provides download statistics - - Download statistics information URL - - First calendar year journal provided online Open Access content - - Full text formats - - Keywords - - Full text language - - URL for the Editorial Board page - - Review process - - Review process information URL - - URL for journal's aims & scope - - URL for journal's instructions for authors - - Journal plagiarism screening policy - - Plagiarism information URL - - Average number of weeks between submission and publication - - URL for journal's Open Access statement - - Machine-readable CC licensing information embedded or displayed in articles - - URL to an example page with embedded licensing information + - Society or institution + - Country of society or institution - Journal license - License attributes - URL for license terms - - Does this journal allow unrestricted reuse in compliance with BOAI? - - Deposit policy directory + - Machine-readable CC licensing information embedded or displayed in articles + - URL to an example page with embedded licensing information - Author holds copyright without restrictions - Copyright information URL - - Author holds publishing rights without restrictions - - Publishing rights information URL + - Review process + - Review process information URL + - Journal plagiarism screening policy + - Plagiarism information URL + - URL for journal's aims & scope + - URL for the Editorial Board page + - URL for journal's instructions for authors + - Average number of weeks between article submission and publication + - APC + - APC information URL + - APC amount + - Journal waiver policy (for developing country authors etc) + - Waiver policy information URL + - Has other fees + - Other submission fees information URL + - Preservation Services + - Preservation Service: national library + - Preservation information URL + - Deposit policy directory + - URL for deposit policy + - Persistent article identifiers + - Article metadata includes ORCIDs + - Journal complies with I4OC standards for open citations + - Does this journal allow unrestricted reuse in compliance with BOAI? + - URL for journal's Open Access statement + - Continues + - Continued By + - LCC Codes + - Subjects - DOAJ Seal - - Tick: Accepted after March 2014 - Added on Date - - Subjects + - Last updated Date + - Number of Article Records + - Most Recent Article Added """ source_slug = "doaj" @@ -90,31 +83,26 @@ class DoajLoader(DirectoryLoader): issne=row["Journal EISSN (online version)"], name=clean_str(row["Journal title"]), publisher=clean_str(row["Publisher"]), - platform=PLATFORM_MAP.get(row["Platform, host or aggregator"]), country=parse_country(row["Country of publisher"]), ) - lang = parse_lang(row["Full text language"]) + lang = parse_lang(row["Languages in which the journal accepts manuscripts"]) if lang: info.langs.append(lang) - info.extra["mimetypes"] = parse_mimetypes(row["Full text formats"]) info.extra["as_of"] = self.config.snapshot.date if row["DOAJ Seal"]: info.extra["seal"] = {"no": False, "yes": True}[row["DOAJ Seal"].lower()] - if row["Digital archiving policy or program(s)"]: + if row["Preservation Services"]: info.extra["archive"] = [ a.strip() - for a in row["Digital archiving policy or program(s)"].split(",") + for a in row["Preservation Services"].split(",") if a.strip() ] - elif row["Archiving: national library"]: + elif row["Preservation Service: national library"]: info.extra["archive"] = ["national-library"] - crawl_permission = row["Journal full-text crawl permission"] - if crawl_permission: - info.extra["crawl_permission"] = dict(Yes=True, No=False)[crawl_permission] default_license = row["Journal license"] if default_license and default_license.startswith("CC"): info.extra["default_license"] = default_license.replace( -- cgit v1.2.3