1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
from typing import Iterable, Optional, Dict, Any
import csv
from chocula.util import (
clean_str,
parse_mimetypes,
parse_country,
parse_lang,
PLATFORM_MAP,
)
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
class DoajLoader(DirectoryLoader):
"""
CSV Columns:
- Journal title
- Journal URL
- Alternative title
- Journal ISSN (print version)
- Journal EISSN (online version)
- Publisher
- Society or institution
- "Platform
- host or aggregator"
- Country of publisher
- Journal article processing charges (APCs)
- APC information URL
- APC amount
- Currency
- Journal article submission fee
- Submission fee URL
- Submission fee amount
- Submission fee currency
- Number of articles publish in the last calendar year
- Number of articles information URL
- Journal waiver policy (for developing country authors etc)
- Waiver policy information URL
- Digital archiving policy or program(s)
- Archiving: national library
- Archiving: other
- Archiving infomation URL
- Journal full-text crawl permission
- Permanent article identifiers
- Journal provides download statistics
- Download statistics information URL
- First calendar year journal provided online Open Access content
- Full text formats
- Keywords
- Full text language
- URL for the Editorial Board page
- Review process
- Review process information URL
- URL for journal's aims & scope
- URL for journal's instructions for authors
- Journal plagiarism screening policy
- Plagiarism information URL
- Average number of weeks between submission and publication
- URL for journal's Open Access statement
- Machine-readable CC licensing information embedded or displayed in articles
- URL to an example page with embedded licensing information
- Journal license
- License attributes
- URL for license terms
- Does this journal allow unrestricted reuse in compliance with BOAI?
- Deposit policy directory
- Author holds copyright without restrictions
- Copyright information URL
- Author holds publishing rights without restrictions
- Publishing rights information URL
- DOAJ Seal
- Tick: Accepted after March 2014
- Added on Date
- Subjects
"""
source_slug = "doaj"
def open_file(self) -> Iterable:
return csv.DictReader(open(self.config.doaj.filepath))
def parse_record(self, row) -> Optional[DirectoryInfo]:
# TODO: Subjects, Permanent article identifiers, work_level stuff
info = DirectoryInfo(
directory_slug=self.source_slug,
issnp=row["Journal ISSN (print version)"],
issne=row["Journal EISSN (online version)"],
name=clean_str(row["Journal title"]),
publisher=clean_str(row["Publisher"]),
platform=PLATFORM_MAP.get(row["Platform, host or aggregator"]),
country=parse_country(row["Country of publisher"]),
)
lang = parse_lang(row["Full text language"])
if lang:
info.langs.append(lang)
extra: Dict[str, Any] = dict(doaj=dict())
extra["mimetypes"] = parse_mimetypes(row["Full text formats"])
extra["doaj"]["as_of"] = self.config.snapshot.date
if row["DOAJ Seal"]:
extra["doaj"]["seal"] = {"no": False, "yes": True}[row["DOAJ Seal"].lower()]
if row["Digital archiving policy or program(s)"]:
extra["archive"] = [
a.strip()
for a in row["Digital archiving policy or program(s)"].split(",")
if a.strip()
]
elif row["Archiving: national library"]:
extra["archive"] = ["national-library"]
crawl_permission = row["Journal full-text crawl permission"]
if crawl_permission:
extra["crawl-permission"] = dict(Yes=True, No=False)[crawl_permission]
default_license = row["Journal license"]
if default_license and default_license.startswith("CC"):
extra["default_license"] = default_license.replace("CC ", "CC-").strip()
url = row["Journal URL"]
if url:
homepage = HomepageUrl.from_url(row["Journal URL"])
if homepage:
info.homepage_urls.append(homepage)
return info
|