aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-08 17:36:07 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-08 17:36:07 -0700
commita52e5f39346e28bdb2eb58f17a298177659dbbfe (patch)
tree551ea758efd05d90fbece22897222610dd054871 /chocula
parent886c23d2e8ae3c0e05d8300e42e5934fae5a0496 (diff)
downloadchocula-a52e5f39346e28bdb2eb58f17a298177659dbbfe.tar.gz
chocula-a52e5f39346e28bdb2eb58f17a298177659dbbfe.zip
database support for scholarsportal and cariniana preservation holdings
Diffstat (limited to 'chocula')
-rw-r--r--chocula/common.py51
-rw-r--r--chocula/database.py2
-rw-r--r--chocula/kbart.py20
3 files changed, 72 insertions, 1 deletions
diff --git a/chocula/common.py b/chocula/common.py
index edd48a3..94c4c7f 100644
--- a/chocula/common.py
+++ b/chocula/common.py
@@ -148,6 +148,9 @@ class KbartLoader:
else:
new_spans = [[record.start_year, record.end_year]]
record.year_spans = merge_spans(old_spans, new_spans)
+ elif record.year_spans:
+ old_spans = existing.year_spans or []
+ record.year_spans = merge_spans(old_spans, record.year_spans)
kbart_dict[record.issnl] = record
counts["unique-issnl"] = len(kbart_dict)
@@ -218,6 +221,54 @@ class OnixCsvLoader(KbartLoader):
return record
+class CarinianaCsvLoader(KbartLoader):
+ """
+ Similar to the KBART loader class, but for custom CSV files instead of
+ KBART formated TSV.
+
+ CSV columns:
+ - Region
+ - Knowledge Area
+ - Publisher
+ - Title
+ - ISSN
+ - eISSN
+ - Preserved Volumes
+ - Preserved Years
+ - In Progress Volumes
+ - In Progress Years
+
+ TODO: volumes
+ """
+
+ def open_file(self) -> Iterable:
+ return csv.DictReader(open(self.file_path(), "r"))
+
+ def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]:
+
+ raw_issn = clean_issn(row["ISSN"])
+ issne = clean_issn(row["ISSN"])
+ issnl = issn_db.issn2issnl(raw_issn or issne or "")
+ # convert list of years to a set of year spans
+ years = [int(y.strip()) for y in row["Preserved Years"].split(";") if y]
+ year_spans = merge_spans([], [[y, y] for y in years])
+ record = KbartRecord(
+ issnl=issnl,
+ issne=issne,
+ issnp=None,
+ embargo=None,
+ title=clean_str(row["Title"]),
+ publisher=clean_str(row["Publisher"]),
+ url=None,
+ start_year=None,
+ end_year=None,
+ start_volume=None,
+ end_volume=None,
+ year_spans=year_spans,
+ )
+ return record
+
+
class HathifilesLoader(KbartLoader):
"""
Similar to the KBART loader class, but for Hathifiles bulk format.
diff --git a/chocula/database.py b/chocula/database.py
index 3e8db57..d4fbbed 100644
--- a/chocula/database.py
+++ b/chocula/database.py
@@ -837,6 +837,8 @@ class ChoculaDatabase:
"jstor",
"pkp_pln",
"hathitrust",
+ "scholarsportal",
+ "cariniana",
):
extra["kbart"] = extra.get("kbart", {})
extra["kbart"][drow["slug"]] = dict(year_spans=dextra["year_spans"])
diff --git a/chocula/kbart.py b/chocula/kbart.py
index 3944430..23c5533 100644
--- a/chocula/kbart.py
+++ b/chocula/kbart.py
@@ -1,4 +1,4 @@
-from chocula.common import KbartLoader, OnixCsvLoader, HathifilesLoader
+from chocula.common import KbartLoader, OnixCsvLoader, CarinianaCsvLoader, HathifilesLoader
class ClockssKbartLoader(KbartLoader):
@@ -41,6 +41,22 @@ class PkpPlnOnixLoader(OnixCsvLoader):
return self.config.pkp_pln.filepath
+class CarinianaLoader(CarinianaCsvLoader):
+
+ source_slug = "cariniana"
+
+ def file_path(self) -> str:
+ return self.config.cariniana.filepath
+
+
+class ScholarsportalOnixLoader(OnixCsvLoader):
+
+ source_slug = "scholarsportal"
+
+ def file_path(self) -> str:
+ return self.config.scholarsportal.filepath
+
+
class HathitrustLoader(HathifilesLoader):
source_slug = "hathitrust"
@@ -55,5 +71,7 @@ ALL_CHOCULA_KBART_CLASSES = [
PorticoKbartLoader,
JstorKbartLoader,
PkpPlnOnixLoader,
+ CarinianaLoader,
+ ScholarsportalOnixLoader,
HathitrustLoader,
]