1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
from typing import Iterable, Optional, Dict, Any
import csv
from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
class DoajLoader(DirectoryLoader):
"""
CSV Columns:
- Journal title
- Journal URL
- Alternative title
- Journal ISSN (print version)
- Journal EISSN (online version)
- Publisher
- Society or institution
- "Platform
- host or aggregator"
- Country of publisher
- Journal article processing charges (APCs)
- APC information URL
- APC amount
- Currency
- Journal article submission fee
- Submission fee URL
- Submission fee amount
- Submission fee currency
- Number of articles publish in the last calendar year
- Number of articles information URL
- Journal waiver policy (for developing country authors etc)
- Waiver policy information URL
- Digital archiving policy or program(s)
- Archiving: national library
- Archiving: other
- Archiving infomation URL
- Journal full-text crawl permission
- Permanent article identifiers
- Journal provides download statistics
- Download statistics information URL
- First calendar year journal provided online Open Access content
- Full text formats
- Keywords
- Full text language
- URL for the Editorial Board page
- Review process
- Review process information URL
- URL for journal's aims & scope
- URL for journal's instructions for authors
- Journal plagiarism screening policy
- Plagiarism information URL
- Average number of weeks between submission and publication
- URL for journal's Open Access statement
- Machine-readable CC licensing information embedded or displayed in articles
- URL to an example page with embedded licensing information
- Journal license
- License attributes
- URL for license terms
- Does this journal allow unrestricted reuse in compliance with BOAI?
- Deposit policy directory
- Author holds copyright without restrictions
- Copyright information URL
- Author holds publishing rights without restrictions
- Publishing rights information URL
- DOAJ Seal
- Tick: Accepted after March 2014
- Added on Date
- Subjects
"""
source_slug = "doaj"
def open_file(self) -> Iterable:
return csv.DictReader(open(self.config.DOAJ_FILE))
def parse_record(self, row) -> Optional[DirectoryInfo]:
# TODO: Subjects, Permanent article identifiers, work_level stuff
info = DirectoryInfo(
directory_slug=self.source_slug,
issnp=row['Journal ISSN (print version)'],
issne=row['Journal EISSN (online version)'],
name=clean_str(row['Journal title']),
publisher=clean_str(row['Publisher']),
platform=PLATFORM_MAP.get(row['Platform, host or aggregator']),
country=parse_country(row['Country of publisher']),
)
lang = parse_lang(row['Full text language'])
if lang:
info.langs.append(lang)
extra: Dict[str, Any] = dict(doaj=dict())
extra['mimetypes'] = parse_mimetypes(row['Full text formats'])
extra['doaj']['as_of'] = self.config.DOAJ_DATE
if row['DOAJ Seal']:
extra['doaj']['seal'] = {"no": False, "yes": True}[row['DOAJ Seal'].lower()]
if row['Digital archiving policy or program(s)']:
extra['archive'] = [a.strip() for a in row['Digital archiving policy or program(s)'].split(',') if a.strip()]
elif row['Archiving: national library']:
extra['archive'] = ['national-library']
crawl_permission = row['Journal full-text crawl permission']
if crawl_permission:
extra['crawl-permission'] = dict(Yes=True, No=False)[crawl_permission]
default_license = row['Journal license']
if default_license and default_license.startswith('CC'):
extra['default_license'] = default_license.replace('CC ', 'CC-').strip()
url = row['Journal URL']
if url:
homepage = HomepageUrl.from_url(row['Journal URL'])
if homepage:
info.homepage_urls.append(homepage)
return info
|