1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
from typing import Iterable, Optional
import csv
from chocula.util import (
clean_str,
parse_country,
parse_lang,
)
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
class DoajLoader(DirectoryLoader):
"""
CSV Columns:
- Journal title
- Journal URL
- URL in DOAJ
- Alternative title
- Journal ISSN (print version)
- Journal EISSN (online version)
- Keywords
- Languages in which the journal accepts manuscripts
- Publisher
- Country of publisher
- Society or institution
- Country of society or institution
- Journal license
- License attributes
- URL for license terms
- Machine-readable CC licensing information embedded or displayed in articles
- URL to an example page with embedded licensing information
- Author holds copyright without restrictions
- Copyright information URL
- Review process
- Review process information URL
- Journal plagiarism screening policy
- Plagiarism information URL
- URL for journal's aims & scope
- URL for the Editorial Board page
- URL for journal's instructions for authors
- Average number of weeks between article submission and publication
- APC
- APC information URL
- APC amount
- Journal waiver policy (for developing country authors etc)
- Waiver policy information URL
- Has other fees
- Other submission fees information URL
- Preservation Services
- Preservation Service: national library
- Preservation information URL
- Deposit policy directory
- URL for deposit policy
- Persistent article identifiers
- Article metadata includes ORCIDs
- Journal complies with I4OC standards for open citations
- Does this journal allow unrestricted reuse in compliance with BOAI?
- URL for journal's Open Access statement
- Continues
- Continued By
- LCC Codes
- Subjects
- DOAJ Seal
- Added on Date
- Last updated Date
- Number of Article Records
- Most Recent Article Added
"""
source_slug = "doaj"
def open_file(self) -> Iterable:
return csv.DictReader(open(self.config.doaj.filepath))
def parse_record(self, row) -> Optional[DirectoryInfo]:
# TODO: Subjects, Permanent article identifiers, work_level stuff
info = DirectoryInfo(
directory_slug=self.source_slug,
issnp=row["Journal ISSN (print version)"],
issne=row["Journal EISSN (online version)"],
name=clean_str(row["Journal title"]),
publisher=clean_str(row["Publisher"]),
country=parse_country(row["Country of publisher"]),
)
lang = parse_lang(row["Languages in which the journal accepts manuscripts"])
if lang:
info.langs.append(lang)
info.extra["as_of"] = self.config.snapshot.date
if row["DOAJ Seal"]:
info.extra["seal"] = {"no": False, "yes": True}[row["DOAJ Seal"].lower()]
if row["Preservation Services"]:
info.extra["archive"] = [
a.strip() for a in row["Preservation Services"].split(",") if a.strip()
]
elif row["Preservation Service: national library"]:
info.extra["archive"] = ["national-library"]
default_license = row["Journal license"]
if default_license and default_license.startswith("CC"):
info.extra["default_license"] = default_license.replace(
"CC ", "CC-"
).strip()
url = row["Journal URL"]
if url:
homepage = HomepageUrl.from_url(row["Journal URL"])
if homepage:
info.homepage_urls.append(homepage)
return info
|