From a52e5f39346e28bdb2eb58f17a298177659dbbfe Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 8 Oct 2020 17:36:07 -0700 Subject: database support for scholarsportal and cariniana preservation holdings --- chocula/common.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ chocula/database.py | 2 ++ chocula/kbart.py | 20 +++++++++++++++++++- 3 files changed, 72 insertions(+), 1 deletion(-) (limited to 'chocula') diff --git a/chocula/common.py b/chocula/common.py index edd48a3..94c4c7f 100644 --- a/chocula/common.py +++ b/chocula/common.py @@ -148,6 +148,9 @@ class KbartLoader: else: new_spans = [[record.start_year, record.end_year]] record.year_spans = merge_spans(old_spans, new_spans) + elif record.year_spans: + old_spans = existing.year_spans or [] + record.year_spans = merge_spans(old_spans, record.year_spans) kbart_dict[record.issnl] = record counts["unique-issnl"] = len(kbart_dict) @@ -218,6 +221,54 @@ class OnixCsvLoader(KbartLoader): return record +class CarinianaCsvLoader(KbartLoader): + """ + Similar to the KBART loader class, but for custom CSV files instead of + KBART formated TSV. + + CSV columns: + - Region + - Knowledge Area + - Publisher + - Title + - ISSN + - eISSN + - Preserved Volumes + - Preserved Years + - In Progress Volumes + - In Progress Years + + TODO: volumes + """ + + def open_file(self) -> Iterable: + return csv.DictReader(open(self.file_path(), "r")) + + def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: + + raw_issn = clean_issn(row["ISSN"]) + issne = clean_issn(row["ISSN"]) + issnl = issn_db.issn2issnl(raw_issn or issne or "") + # convert list of years to a set of year spans + years = [int(y.strip()) for y in row["Preserved Years"].split(";") if y] + year_spans = merge_spans([], [[y, y] for y in years]) + record = KbartRecord( + issnl=issnl, + issne=issne, + issnp=None, + embargo=None, + title=clean_str(row["Title"]), + publisher=clean_str(row["Publisher"]), + url=None, + start_year=None, + end_year=None, + start_volume=None, + end_volume=None, + year_spans=year_spans, + ) + return record + + class HathifilesLoader(KbartLoader): """ Similar to the KBART loader class, but for Hathifiles bulk format. diff --git a/chocula/database.py b/chocula/database.py index 3e8db57..d4fbbed 100644 --- a/chocula/database.py +++ b/chocula/database.py @@ -837,6 +837,8 @@ class ChoculaDatabase: "jstor", "pkp_pln", "hathitrust", + "scholarsportal", + "cariniana", ): extra["kbart"] = extra.get("kbart", {}) extra["kbart"][drow["slug"]] = dict(year_spans=dextra["year_spans"]) diff --git a/chocula/kbart.py b/chocula/kbart.py index 3944430..23c5533 100644 --- a/chocula/kbart.py +++ b/chocula/kbart.py @@ -1,4 +1,4 @@ -from chocula.common import KbartLoader, OnixCsvLoader, HathifilesLoader +from chocula.common import KbartLoader, OnixCsvLoader, CarinianaCsvLoader, HathifilesLoader class ClockssKbartLoader(KbartLoader): @@ -41,6 +41,22 @@ class PkpPlnOnixLoader(OnixCsvLoader): return self.config.pkp_pln.filepath +class CarinianaLoader(CarinianaCsvLoader): + + source_slug = "cariniana" + + def file_path(self) -> str: + return self.config.cariniana.filepath + + +class ScholarsportalOnixLoader(OnixCsvLoader): + + source_slug = "scholarsportal" + + def file_path(self) -> str: + return self.config.scholarsportal.filepath + + class HathitrustLoader(HathifilesLoader): source_slug = "hathitrust" @@ -55,5 +71,7 @@ ALL_CHOCULA_KBART_CLASSES = [ PorticoKbartLoader, JstorKbartLoader, PkpPlnOnixLoader, + CarinianaLoader, + ScholarsportalOnixLoader, HathitrustLoader, ] -- cgit v1.2.3