diff options
Diffstat (limited to 'chocula/common.py')
-rw-r--r-- | chocula/common.py | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/chocula/common.py b/chocula/common.py index a5b3739..455649a 100644 --- a/chocula/common.py +++ b/chocula/common.py @@ -1,4 +1,3 @@ - import sys import csv import datetime @@ -17,7 +16,8 @@ from chocula.database import DirectoryInfo, IssnDatabase, HomepageUrl csv.field_size_limit(1310720) THIS_YEAR = datetime.date.today().year -class DirectoryLoader(): + +class DirectoryLoader: source_slug: str = "GENERIC" @@ -35,7 +35,7 @@ class DirectoryLoader(): counts: Counter = Counter() cur = db.db.cursor() for record in self.open_file(): - counts['total'] += 1 + counts["total"] += 1 info = self.parse_record(record) if info: status = db.insert_directory(info, cur=cur) @@ -44,6 +44,7 @@ class DirectoryLoader(): db.db.commit() return counts + @dataclass class KbartRecord: issnl: Optional[str] @@ -60,7 +61,7 @@ class KbartRecord: year_spans: List[Any] -class KbartLoader(): +class KbartLoader: source_slug: str = "GENERIC" @@ -68,19 +69,19 @@ class KbartLoader(): self.config = config def file_path(self) -> str: - #return self.config.TEMPLATE.filepath) + # return self.config.TEMPLATE.filepath) raise NotImplementedError() def open_file(self) -> Iterable: - raw_file = open(self.file_path(), 'rb').read().decode(errors='replace') + raw_file = open(self.file_path(), "rb").read().decode(errors="replace") fixed_file = ftfy.fix_text(raw_file) - reader = csv.DictReader(fixed_file.split('\n'), delimiter='\t') + reader = csv.DictReader(fixed_file.split("\n"), delimiter="\t") return reader def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]: - issne: Optional[str] = clean_issn(row['online_identifier'] or "") - issnp: Optional[str] = clean_issn(row['print_identifier'] or "") + issne: Optional[str] = clean_issn(row["online_identifier"] or "") + issnp: Optional[str] = clean_issn(row["print_identifier"] or "") issnl: Optional[str] = None if issne: issnl = issn_db.issn2issnl(issne) @@ -88,31 +89,31 @@ class KbartLoader(): issnl = issn_db.issn2issnl(issnp) start_year: Optional[int] = None end_year: Optional[int] = None - if row['date_first_issue_online']: - start_year = int(row['date_first_issue_online'][:4]) - if row['date_last_issue_online']: - end_year = int(row['date_last_issue_online'][:4]) - end_volume = row['num_last_vol_online'] + if row["date_first_issue_online"]: + start_year = int(row["date_first_issue_online"][:4]) + if row["date_last_issue_online"]: + end_year = int(row["date_last_issue_online"][:4]) + end_volume = row["num_last_vol_online"] # hack to handle open-ended preservation - if end_year is None and end_volume and '(present)' in end_volume: + if end_year is None and end_volume and "(present)" in end_volume: end_year = THIS_YEAR record = KbartRecord( issnl=issnl, issnp=issnp, issne=issne, - title=clean_str(row['publication_title']), - publisher=clean_str(row['publisher_name']), - url=HomepageUrl.from_url(row['title_url']), - embargo=clean_str(row['embargo_info']), + title=clean_str(row["publication_title"]), + publisher=clean_str(row["publisher_name"]), + url=HomepageUrl.from_url(row["title_url"]), + embargo=clean_str(row["embargo_info"]), start_year=start_year, end_year=end_year, - start_volume=clean_str(row['num_first_vol_online']), - end_volume=clean_str(row['num_last_vol_online']), + start_volume=clean_str(row["num_first_vol_online"]), + end_volume=clean_str(row["num_last_vol_online"]), year_spans=[], ) - if record.start_volume == 'null': + if record.start_volume == "null": record.start_volume = None - if record.end_volume == 'null': + if record.end_volume == "null": record.end_volume = None return record @@ -126,18 +127,18 @@ class KbartLoader(): counts: Counter = Counter() kbart_dict: Dict[str, KbartRecord] = dict() for row in self.open_file(): - counts['total'] += 1 + counts["total"] += 1 record = self.parse_record(row, db.issn_db) if record is None: - counts['skip-parse'] += 1 + counts["skip-parse"] += 1 continue elif not record.issnl: - counts['skip-issnl'] += 1 + counts["skip-issnl"] += 1 continue elif record.start_year is None or record.end_year is None: - counts['partial-missing-years'] += 1 - counts['parsed'] += 1 + counts["partial-missing-years"] += 1 + counts["parsed"] += 1 existing = kbart_dict.get(record.issnl, record) if record.start_year and record.end_year: @@ -149,7 +150,7 @@ class KbartLoader(): record.year_spans = merge_spans(old_spans, new_spans) kbart_dict[record.issnl] = record - counts['unique-issnl'] = len(kbart_dict) + counts["unique-issnl"] = len(kbart_dict) cur = db.db.cursor() for issnl, record in kbart_dict.items(): info = DirectoryInfo( @@ -169,4 +170,3 @@ class KbartLoader(): cur.close() db.db.commit() return counts - |