From 4cafb8dee8dac0816907088878a86e8ff1c7b320 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 31 Aug 2020 20:03:37 -0700 Subject: add support for PKP PLN (KBART-like) --- chocula/common.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'chocula/common.py') diff --git a/chocula/common.py b/chocula/common.py index a87a736..763997b 100644 --- a/chocula/common.py +++ b/chocula/common.py @@ -170,3 +170,49 @@ class KbartLoader: cur.close() db.db.commit() return counts + + +class OnixCsvLoader(KbartLoader): + """ + Similar to the KBART loader class, but for ONIX CSV files instead of KBART + formated TSV. + + CSV columns: + - ISSN + - Title + - Publisher + - Url + - Vol + - No + - Published + - Deposited + """ + + def open_file(self) -> Iterable: + f = open(self.file_path(), "r") + # skip first line of PKP PLN Onix file, which is a "generated date" header + if self.source_slug == "pkp_pln": + next(f) + return csv.DictReader(f) + + def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: + + raw_issn = clean_issn(row["ISSN"]) + issnl = issn_db.issn2issnl(raw_issn or "") + start_year = int(row["Published"][:4]) + start_volume = clean_str(row["Vol"]) + record = KbartRecord( + issnl=issnl, + issne=None, + issnp=None, + embargo=None, + title=clean_str(row["Title"]), + publisher=clean_str(row["Publisher"]), + url=HomepageUrl.from_url(row["Url"]), + start_year=start_year, + end_year=start_year, + start_volume=start_volume, + end_volume=start_volume, + year_spans=[], + ) + return record -- cgit v1.2.3