aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/directories/doaj.py
diff options
context:
space:
mode:
Diffstat (limited to 'chocula/directories/doaj.py')
-rw-r--r--chocula/directories/doaj.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/chocula/directories/doaj.py b/chocula/directories/doaj.py
new file mode 100644
index 0000000..5d1aa21
--- /dev/null
+++ b/chocula/directories/doaj.py
@@ -0,0 +1,120 @@
+
+from typing import Iterable, Optional, Dict, Any
+import csv
+
+from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class DoajLoader(DirectoryLoader):
+ """
+ CSV Columns:
+
+ - Journal title
+ - Journal URL
+ - Alternative title
+ - Journal ISSN (print version)
+ - Journal EISSN (online version)
+ - Publisher
+ - Society or institution
+ - "Platform
+ - host or aggregator"
+ - Country of publisher
+ - Journal article processing charges (APCs)
+ - APC information URL
+ - APC amount
+ - Currency
+ - Journal article submission fee
+ - Submission fee URL
+ - Submission fee amount
+ - Submission fee currency
+ - Number of articles publish in the last calendar year
+ - Number of articles information URL
+ - Journal waiver policy (for developing country authors etc)
+ - Waiver policy information URL
+ - Digital archiving policy or program(s)
+ - Archiving: national library
+ - Archiving: other
+ - Archiving infomation URL
+ - Journal full-text crawl permission
+ - Permanent article identifiers
+ - Journal provides download statistics
+ - Download statistics information URL
+ - First calendar year journal provided online Open Access content
+ - Full text formats
+ - Keywords
+ - Full text language
+ - URL for the Editorial Board page
+ - Review process
+ - Review process information URL
+ - URL for journal's aims & scope
+ - URL for journal's instructions for authors
+ - Journal plagiarism screening policy
+ - Plagiarism information URL
+ - Average number of weeks between submission and publication
+ - URL for journal's Open Access statement
+ - Machine-readable CC licensing information embedded or displayed in articles
+ - URL to an example page with embedded licensing information
+ - Journal license
+ - License attributes
+ - URL for license terms
+ - Does this journal allow unrestricted reuse in compliance with BOAI?
+ - Deposit policy directory
+ - Author holds copyright without restrictions
+ - Copyright information URL
+ - Author holds publishing rights without restrictions
+ - Publishing rights information URL
+ - DOAJ Seal
+ - Tick: Accepted after March 2014
+ - Added on Date
+ - Subjects
+ """
+
+ source_slug = "doaj"
+
+ def open_file(self) -> Iterable:
+ return csv.DictReader(open(self.config.DOAJ_FILE))
+
+ def parse_record(self, row) -> Optional[DirectoryInfo]:
+ # TODO: Subjects, Permanent article identifiers, work_level stuff
+
+ info = DirectoryInfo(
+ directory_slug=self.source_slug,
+ issnp=row['Journal ISSN (print version)'],
+ issne=row['Journal EISSN (online version)'],
+ name=clean_str(row['Journal title']),
+ publisher=clean_str(row['Publisher']),
+ platform=PLATFORM_MAP.get(row['Platform, host or aggregator']),
+ country=parse_country(row['Country of publisher']),
+ )
+
+ lang = parse_lang(row['Full text language'])
+ if lang:
+ info.langs.append(lang)
+
+ extra: Dict[str, Any] = dict(doaj=dict())
+ extra['mimetypes'] = parse_mimetypes(row['Full text formats'])
+ extra['doaj']['as_of'] = self.config.DOAJ_DATE
+ if row['DOAJ Seal']:
+ extra['doaj']['seal'] = {"no": False, "yes": True}[row['DOAJ Seal'].lower()]
+
+ if row['Digital archiving policy or program(s)']:
+ extra['archive'] = [a.strip() for a in row['Digital archiving policy or program(s)'].split(',') if a.strip()]
+ elif row['Archiving: national library']:
+ extra['archive'] = ['national-library']
+
+ crawl_permission = row['Journal full-text crawl permission']
+ if crawl_permission:
+ extra['crawl-permission'] = dict(Yes=True, No=False)[crawl_permission]
+ default_license = row['Journal license']
+ if default_license and default_license.startswith('CC'):
+ extra['default_license'] = default_license.replace('CC ', 'CC-').strip()
+
+ url = row['Journal URL']
+ if url:
+ homepage = HomepageUrl.from_url(row['Journal URL'])
+ if homepage:
+ info.homepage_urls.append(homepage)
+ return info
+