diff options
Diffstat (limited to 'chocula/directories/doaj.py')
-rw-r--r-- | chocula/directories/doaj.py | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/chocula/directories/doaj.py b/chocula/directories/doaj.py new file mode 100644 index 0000000..5d1aa21 --- /dev/null +++ b/chocula/directories/doaj.py @@ -0,0 +1,120 @@ + +from typing import Iterable, Optional, Dict, Any +import csv + +from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP +from chocula.common import DirectoryLoader +from chocula.database import DirectoryInfo, HomepageUrl + + +class DoajLoader(DirectoryLoader): + """ + CSV Columns: + + - Journal title + - Journal URL + - Alternative title + - Journal ISSN (print version) + - Journal EISSN (online version) + - Publisher + - Society or institution + - "Platform + - host or aggregator" + - Country of publisher + - Journal article processing charges (APCs) + - APC information URL + - APC amount + - Currency + - Journal article submission fee + - Submission fee URL + - Submission fee amount + - Submission fee currency + - Number of articles publish in the last calendar year + - Number of articles information URL + - Journal waiver policy (for developing country authors etc) + - Waiver policy information URL + - Digital archiving policy or program(s) + - Archiving: national library + - Archiving: other + - Archiving infomation URL + - Journal full-text crawl permission + - Permanent article identifiers + - Journal provides download statistics + - Download statistics information URL + - First calendar year journal provided online Open Access content + - Full text formats + - Keywords + - Full text language + - URL for the Editorial Board page + - Review process + - Review process information URL + - URL for journal's aims & scope + - URL for journal's instructions for authors + - Journal plagiarism screening policy + - Plagiarism information URL + - Average number of weeks between submission and publication + - URL for journal's Open Access statement + - Machine-readable CC licensing information embedded or displayed in articles + - URL to an example page with embedded licensing information + - Journal license + - License attributes + - URL for license terms + - Does this journal allow unrestricted reuse in compliance with BOAI? + - Deposit policy directory + - Author holds copyright without restrictions + - Copyright information URL + - Author holds publishing rights without restrictions + - Publishing rights information URL + - DOAJ Seal + - Tick: Accepted after March 2014 + - Added on Date + - Subjects + """ + + source_slug = "doaj" + + def open_file(self) -> Iterable: + return csv.DictReader(open(self.config.DOAJ_FILE)) + + def parse_record(self, row) -> Optional[DirectoryInfo]: + # TODO: Subjects, Permanent article identifiers, work_level stuff + + info = DirectoryInfo( + directory_slug=self.source_slug, + issnp=row['Journal ISSN (print version)'], + issne=row['Journal EISSN (online version)'], + name=clean_str(row['Journal title']), + publisher=clean_str(row['Publisher']), + platform=PLATFORM_MAP.get(row['Platform, host or aggregator']), + country=parse_country(row['Country of publisher']), + ) + + lang = parse_lang(row['Full text language']) + if lang: + info.langs.append(lang) + + extra: Dict[str, Any] = dict(doaj=dict()) + extra['mimetypes'] = parse_mimetypes(row['Full text formats']) + extra['doaj']['as_of'] = self.config.DOAJ_DATE + if row['DOAJ Seal']: + extra['doaj']['seal'] = {"no": False, "yes": True}[row['DOAJ Seal'].lower()] + + if row['Digital archiving policy or program(s)']: + extra['archive'] = [a.strip() for a in row['Digital archiving policy or program(s)'].split(',') if a.strip()] + elif row['Archiving: national library']: + extra['archive'] = ['national-library'] + + crawl_permission = row['Journal full-text crawl permission'] + if crawl_permission: + extra['crawl-permission'] = dict(Yes=True, No=False)[crawl_permission] + default_license = row['Journal license'] + if default_license and default_license.startswith('CC'): + extra['default_license'] = default_license.replace('CC ', 'CC-').strip() + + url = row['Journal URL'] + if url: + homepage = HomepageUrl.from_url(row['Journal URL']) + if homepage: + info.homepage_urls.append(homepage) + return info + |