aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-08-31 20:03:37 -0700
committerBryan Newbold <bnewbold@archive.org>2020-08-31 20:03:37 -0700
commit4cafb8dee8dac0816907088878a86e8ff1c7b320 (patch)
tree75a979cb7b31e8356cec8a167b5bd59c9c1e3794 /chocula
parent44548721f42eb71ee08cd8667cf129cddb2d3341 (diff)
downloadchocula-4cafb8dee8dac0816907088878a86e8ff1c7b320.tar.gz
chocula-4cafb8dee8dac0816907088878a86e8ff1c7b320.zip
add support for PKP PLN (KBART-like)
Diffstat (limited to 'chocula')
-rw-r--r--chocula/__main__.py1
-rw-r--r--chocula/common.py46
-rw-r--r--chocula/kbart.py11
3 files changed, 57 insertions, 1 deletions
diff --git a/chocula/__main__.py b/chocula/__main__.py
index 4eb1982..ebd4120 100644
--- a/chocula/__main__.py
+++ b/chocula/__main__.py
@@ -40,6 +40,7 @@ Commands:
clockss
lockss
portico
+ pkp_pln
See TODO.md for more work-in-progress
"""
diff --git a/chocula/common.py b/chocula/common.py
index a87a736..763997b 100644
--- a/chocula/common.py
+++ b/chocula/common.py
@@ -170,3 +170,49 @@ class KbartLoader:
cur.close()
db.db.commit()
return counts
+
+
+class OnixCsvLoader(KbartLoader):
+ """
+ Similar to the KBART loader class, but for ONIX CSV files instead of KBART
+ formated TSV.
+
+ CSV columns:
+ - ISSN
+ - Title
+ - Publisher
+ - Url
+ - Vol
+ - No
+ - Published
+ - Deposited
+ """
+
+ def open_file(self) -> Iterable:
+ f = open(self.file_path(), "r")
+ # skip first line of PKP PLN Onix file, which is a "generated date" header
+ if self.source_slug == "pkp_pln":
+ next(f)
+ return csv.DictReader(f)
+
+ def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]:
+
+ raw_issn = clean_issn(row["ISSN"])
+ issnl = issn_db.issn2issnl(raw_issn or "")
+ start_year = int(row["Published"][:4])
+ start_volume = clean_str(row["Vol"])
+ record = KbartRecord(
+ issnl=issnl,
+ issne=None,
+ issnp=None,
+ embargo=None,
+ title=clean_str(row["Title"]),
+ publisher=clean_str(row["Publisher"]),
+ url=HomepageUrl.from_url(row["Url"]),
+ start_year=start_year,
+ end_year=start_year,
+ start_volume=start_volume,
+ end_volume=start_volume,
+ year_spans=[],
+ )
+ return record
diff --git a/chocula/kbart.py b/chocula/kbart.py
index e74e19f..5fd0acc 100644
--- a/chocula/kbart.py
+++ b/chocula/kbart.py
@@ -1,4 +1,4 @@
-from chocula.common import KbartLoader
+from chocula.common import KbartLoader, OnixCsvLoader
class ClockssKbartLoader(KbartLoader):
@@ -33,9 +33,18 @@ class JstorKbartLoader(KbartLoader):
return self.config.jstor.filepath
+class PkpPlnOnixLoader(OnixCsvLoader):
+
+ source_slug = "pkp_pln"
+
+ def file_path(self) -> str:
+ return self.config.pkp_pln.filepath
+
+
ALL_CHOCULA_KBART_CLASSES = [
ClockssKbartLoader,
LockssKbartLoader,
PorticoKbartLoader,
JstorKbartLoader,
+ PkpPlnOnixLoader,
]