From 57db2db336c08031324e44b2d2880fbd4b6893c9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 1 Jun 2020 17:01:20 -0700 Subject: 'everything' at least partially working --- chocula/directories/__init__.py | 19 +++++++++++ chocula/directories/sim.py | 71 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 chocula/directories/__init__.py create mode 100644 chocula/directories/sim.py (limited to 'chocula/directories') diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py new file mode 100644 index 0000000..4bed696 --- /dev/null +++ b/chocula/directories/__init__.py @@ -0,0 +1,19 @@ + +from chocula.directories.crossref import CrossrefLoader +from chocula.directories.doaj import DoajLoader +from chocula.directories.entrez import EntrezLoader +from chocula.directories.ezb import EzbLoader +from chocula.directories.gold_oa import GoldOALoader +from chocula.directories.norwegian import NorwegianLoader +from chocula.directories.openapc import OpenAPCLoader +from chocula.directories.road import RoadLoader +from chocula.directories.sherpa_romeo import SherpaRomeoLoader +from chocula.directories.sim import SimLoader +from chocula.directories.szczepanski import SzczepanskiLoader +from chocula.directories.wikidata import WikidataLoader + +ALL_CHOCULA_DIR_CLASSES = [ + CrossrefLoader, DoajLoader, EntrezLoader,EzbLoader, GoldOALoader, + NorwegianLoader, OpenAPCLoader, RoadLoader, SherpaRomeoLoader, + SzczepanskiLoader, WikidataLoader, SimLoader, +] diff --git a/chocula/directories/sim.py b/chocula/directories/sim.py new file mode 100644 index 0000000..c0c02df --- /dev/null +++ b/chocula/directories/sim.py @@ -0,0 +1,71 @@ + +from typing import Iterable, Optional, Dict, Any +import csv + +from chocula.util import clean_str, parse_mimetypes, parse_country, parse_lang, PLATFORM_MAP, gaps_to_spans +from chocula.common import DirectoryLoader +from chocula.database import DirectoryInfo, HomepageUrl + + +class SimLoader(DirectoryLoader): + + source_slug = "sim" + + def open_file(self) -> Iterable: + return csv.DictReader(open(self.config.sim.filepath)) + + def parse_record(self, row) -> Optional[DirectoryInfo]: + + """ + NA Pub Cat ID + Title + Publisher + ISSN + Impact Rank + Total Cities + Journal Impact Factor + Eigenfact or Score + First Volume + Last Volume + NA Gaps + "Scholarly / Peer-\n Reviewed" + "Peer-\n Reviewed" + Pub Type + Pub Language + Subjects + """ + # TODO: 'Pub Type' + + extra = {} + first_year = row['First Volume'] + if first_year: + first_year = int(first_year) + extra['first_year'] = int(row['First Volume']) + else: + first_year = None + last_year = row['Last Volume'] + if last_year: + last_year = int(last_year) + extra['last_year'] = last_year + else: + last_year = None + gaps = [int(g) for g in row['NA Gaps'].split(';') if g.strip()] + if gaps: + extra['gaps'] = gaps + if first_year and last_year: + extra['year_spans'] = gaps_to_spans(first_year, last_year, gaps) + extra['scholarly_peer_reviewed'] = row["Scholarly / Peer-\nReviewed"] + extra['peer_reviewed'] = row["Peer-\nReviewed"] + extra['pub_type'] = clean_str(row["Pub Type"]) + + info = DirectoryInfo( + directory_slug=self.source_slug, + name=clean_str(row['Title']), + publisher=clean_str(row['Publisher']), + raw_issn=row['ISSN'][:9], + custom_id=row.get('NA Pub Cat ID').strip() or None, + langs=[parse_lang(row['Pub Language'])], + extra=extra, + ) + return info + -- cgit v1.2.3