diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-08-31 20:03:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-08-31 20:03:37 -0700 |
commit | 4cafb8dee8dac0816907088878a86e8ff1c7b320 (patch) | |
tree | 75a979cb7b31e8356cec8a167b5bd59c9c1e3794 /chocula/common.py | |
parent | 44548721f42eb71ee08cd8667cf129cddb2d3341 (diff) | |
download | chocula-4cafb8dee8dac0816907088878a86e8ff1c7b320.tar.gz chocula-4cafb8dee8dac0816907088878a86e8ff1c7b320.zip |
add support for PKP PLN (KBART-like)
Diffstat (limited to 'chocula/common.py')
-rw-r--r-- | chocula/common.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/chocula/common.py b/chocula/common.py index a87a736..763997b 100644 --- a/chocula/common.py +++ b/chocula/common.py @@ -170,3 +170,49 @@ class KbartLoader: cur.close() db.db.commit() return counts + + +class OnixCsvLoader(KbartLoader): + """ + Similar to the KBART loader class, but for ONIX CSV files instead of KBART + formated TSV. + + CSV columns: + - ISSN + - Title + - Publisher + - Url + - Vol + - No + - Published + - Deposited + """ + + def open_file(self) -> Iterable: + f = open(self.file_path(), "r") + # skip first line of PKP PLN Onix file, which is a "generated date" header + if self.source_slug == "pkp_pln": + next(f) + return csv.DictReader(f) + + def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: + + raw_issn = clean_issn(row["ISSN"]) + issnl = issn_db.issn2issnl(raw_issn or "") + start_year = int(row["Published"][:4]) + start_volume = clean_str(row["Vol"]) + record = KbartRecord( + issnl=issnl, + issne=None, + issnp=None, + embargo=None, + title=clean_str(row["Title"]), + publisher=clean_str(row["Publisher"]), + url=HomepageUrl.from_url(row["Url"]), + start_year=start_year, + end_year=start_year, + start_volume=start_volume, + end_volume=start_volume, + year_spans=[], + ) + return record |