diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-10-08 17:36:07 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-08 17:36:07 -0700 |
commit | a52e5f39346e28bdb2eb58f17a298177659dbbfe (patch) | |
tree | 551ea758efd05d90fbece22897222610dd054871 /chocula | |
parent | 886c23d2e8ae3c0e05d8300e42e5934fae5a0496 (diff) | |
download | chocula-a52e5f39346e28bdb2eb58f17a298177659dbbfe.tar.gz chocula-a52e5f39346e28bdb2eb58f17a298177659dbbfe.zip |
database support for scholarsportal and cariniana preservation holdings
Diffstat (limited to 'chocula')
-rw-r--r-- | chocula/common.py | 51 | ||||
-rw-r--r-- | chocula/database.py | 2 | ||||
-rw-r--r-- | chocula/kbart.py | 20 |
3 files changed, 72 insertions, 1 deletions
diff --git a/chocula/common.py b/chocula/common.py index edd48a3..94c4c7f 100644 --- a/chocula/common.py +++ b/chocula/common.py @@ -148,6 +148,9 @@ class KbartLoader: else: new_spans = [[record.start_year, record.end_year]] record.year_spans = merge_spans(old_spans, new_spans) + elif record.year_spans: + old_spans = existing.year_spans or [] + record.year_spans = merge_spans(old_spans, record.year_spans) kbart_dict[record.issnl] = record counts["unique-issnl"] = len(kbart_dict) @@ -218,6 +221,54 @@ class OnixCsvLoader(KbartLoader): return record +class CarinianaCsvLoader(KbartLoader): + """ + Similar to the KBART loader class, but for custom CSV files instead of + KBART formated TSV. + + CSV columns: + - Region + - Knowledge Area + - Publisher + - Title + - ISSN + - eISSN + - Preserved Volumes + - Preserved Years + - In Progress Volumes + - In Progress Years + + TODO: volumes + """ + + def open_file(self) -> Iterable: + return csv.DictReader(open(self.file_path(), "r")) + + def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: + + raw_issn = clean_issn(row["ISSN"]) + issne = clean_issn(row["ISSN"]) + issnl = issn_db.issn2issnl(raw_issn or issne or "") + # convert list of years to a set of year spans + years = [int(y.strip()) for y in row["Preserved Years"].split(";") if y] + year_spans = merge_spans([], [[y, y] for y in years]) + record = KbartRecord( + issnl=issnl, + issne=issne, + issnp=None, + embargo=None, + title=clean_str(row["Title"]), + publisher=clean_str(row["Publisher"]), + url=None, + start_year=None, + end_year=None, + start_volume=None, + end_volume=None, + year_spans=year_spans, + ) + return record + + class HathifilesLoader(KbartLoader): """ Similar to the KBART loader class, but for Hathifiles bulk format. diff --git a/chocula/database.py b/chocula/database.py index 3e8db57..d4fbbed 100644 --- a/chocula/database.py +++ b/chocula/database.py @@ -837,6 +837,8 @@ class ChoculaDatabase: "jstor", "pkp_pln", "hathitrust", + "scholarsportal", + "cariniana", ): extra["kbart"] = extra.get("kbart", {}) extra["kbart"][drow["slug"]] = dict(year_spans=dextra["year_spans"]) diff --git a/chocula/kbart.py b/chocula/kbart.py index 3944430..23c5533 100644 --- a/chocula/kbart.py +++ b/chocula/kbart.py @@ -1,4 +1,4 @@ -from chocula.common import KbartLoader, OnixCsvLoader, HathifilesLoader +from chocula.common import KbartLoader, OnixCsvLoader, CarinianaCsvLoader, HathifilesLoader class ClockssKbartLoader(KbartLoader): @@ -41,6 +41,22 @@ class PkpPlnOnixLoader(OnixCsvLoader): return self.config.pkp_pln.filepath +class CarinianaLoader(CarinianaCsvLoader): + + source_slug = "cariniana" + + def file_path(self) -> str: + return self.config.cariniana.filepath + + +class ScholarsportalOnixLoader(OnixCsvLoader): + + source_slug = "scholarsportal" + + def file_path(self) -> str: + return self.config.scholarsportal.filepath + + class HathitrustLoader(HathifilesLoader): source_slug = "hathitrust" @@ -55,5 +71,7 @@ ALL_CHOCULA_KBART_CLASSES = [ PorticoKbartLoader, JstorKbartLoader, PkpPlnOnixLoader, + CarinianaLoader, + ScholarsportalOnixLoader, HathitrustLoader, ] |