aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-04-23 10:26:28 -0700
committerBryan Newbold <bnewbold@archive.org>2021-04-23 10:26:28 -0700
commit6862b14ba632c05b49c06541587d72cedd3ce737 (patch)
treeb2af515f2cede20c176f6779e4fa85ae7d8940d4 /chocula
parent1d004d91519c52a2987a351d38da6322b6b6619f (diff)
downloadchocula-6862b14ba632c05b49c06541587d72cedd3ce737.tar.gz
chocula-6862b14ba632c05b49c06541587d72cedd3ce737.zip
doaj: updates for new file format; removed some fields/metadata
Diffstat (limited to 'chocula')
-rw-r--r--chocula/directories/doaj.py98
1 files changed, 43 insertions, 55 deletions
diff --git a/chocula/directories/doaj.py b/chocula/directories/doaj.py
index e9c94c9..f9531b1 100644
--- a/chocula/directories/doaj.py
+++ b/chocula/directories/doaj.py
@@ -3,10 +3,8 @@ import csv
from chocula.util import (
clean_str,
- parse_mimetypes,
parse_country,
parse_lang,
- PLATFORM_MAP,
)
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
@@ -18,62 +16,57 @@ class DoajLoader(DirectoryLoader):
- Journal title
- Journal URL
+ - URL in DOAJ
- Alternative title
- Journal ISSN (print version)
- Journal EISSN (online version)
+ - Keywords
+ - Languages in which the journal accepts manuscripts
- Publisher
- - Society or institution
- - "Platform
- - host or aggregator"
- Country of publisher
- - Journal article processing charges (APCs)
- - APC information URL
- - APC amount
- - Currency
- - Journal article submission fee
- - Submission fee URL
- - Submission fee amount
- - Submission fee currency
- - Number of articles publish in the last calendar year
- - Number of articles information URL
- - Journal waiver policy (for developing country authors etc)
- - Waiver policy information URL
- - Digital archiving policy or program(s)
- - Archiving: national library
- - Archiving: other
- - Archiving infomation URL
- - Journal full-text crawl permission
- - Permanent article identifiers
- - Journal provides download statistics
- - Download statistics information URL
- - First calendar year journal provided online Open Access content
- - Full text formats
- - Keywords
- - Full text language
- - URL for the Editorial Board page
- - Review process
- - Review process information URL
- - URL for journal's aims & scope
- - URL for journal's instructions for authors
- - Journal plagiarism screening policy
- - Plagiarism information URL
- - Average number of weeks between submission and publication
- - URL for journal's Open Access statement
- - Machine-readable CC licensing information embedded or displayed in articles
- - URL to an example page with embedded licensing information
+ - Society or institution
+ - Country of society or institution
- Journal license
- License attributes
- URL for license terms
- - Does this journal allow unrestricted reuse in compliance with BOAI?
- - Deposit policy directory
+ - Machine-readable CC licensing information embedded or displayed in articles
+ - URL to an example page with embedded licensing information
- Author holds copyright without restrictions
- Copyright information URL
- - Author holds publishing rights without restrictions
- - Publishing rights information URL
+ - Review process
+ - Review process information URL
+ - Journal plagiarism screening policy
+ - Plagiarism information URL
+ - URL for journal's aims & scope
+ - URL for the Editorial Board page
+ - URL for journal's instructions for authors
+ - Average number of weeks between article submission and publication
+ - APC
+ - APC information URL
+ - APC amount
+ - Journal waiver policy (for developing country authors etc)
+ - Waiver policy information URL
+ - Has other fees
+ - Other submission fees information URL
+ - Preservation Services
+ - Preservation Service: national library
+ - Preservation information URL
+ - Deposit policy directory
+ - URL for deposit policy
+ - Persistent article identifiers
+ - Article metadata includes ORCIDs
+ - Journal complies with I4OC standards for open citations
+ - Does this journal allow unrestricted reuse in compliance with BOAI?
+ - URL for journal's Open Access statement
+ - Continues
+ - Continued By
+ - LCC Codes
+ - Subjects
- DOAJ Seal
- - Tick: Accepted after March 2014
- Added on Date
- - Subjects
+ - Last updated Date
+ - Number of Article Records
+ - Most Recent Article Added
"""
source_slug = "doaj"
@@ -90,31 +83,26 @@ class DoajLoader(DirectoryLoader):
issne=row["Journal EISSN (online version)"],
name=clean_str(row["Journal title"]),
publisher=clean_str(row["Publisher"]),
- platform=PLATFORM_MAP.get(row["Platform, host or aggregator"]),
country=parse_country(row["Country of publisher"]),
)
- lang = parse_lang(row["Full text language"])
+ lang = parse_lang(row["Languages in which the journal accepts manuscripts"])
if lang:
info.langs.append(lang)
- info.extra["mimetypes"] = parse_mimetypes(row["Full text formats"])
info.extra["as_of"] = self.config.snapshot.date
if row["DOAJ Seal"]:
info.extra["seal"] = {"no": False, "yes": True}[row["DOAJ Seal"].lower()]
- if row["Digital archiving policy or program(s)"]:
+ if row["Preservation Services"]:
info.extra["archive"] = [
a.strip()
- for a in row["Digital archiving policy or program(s)"].split(",")
+ for a in row["Preservation Services"].split(",")
if a.strip()
]
- elif row["Archiving: national library"]:
+ elif row["Preservation Service: national library"]:
info.extra["archive"] = ["national-library"]
- crawl_permission = row["Journal full-text crawl permission"]
- if crawl_permission:
- info.extra["crawl_permission"] = dict(Yes=True, No=False)[crawl_permission]
default_license = row["Journal license"]
if default_license and default_license.startswith("CC"):
info.extra["default_license"] = default_license.replace(