aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'chocula/common.py')
-rw-r--r--chocula/common.py60
1 files changed, 30 insertions, 30 deletions
diff --git a/chocula/common.py b/chocula/common.py
index a5b3739..455649a 100644
--- a/chocula/common.py
+++ b/chocula/common.py
@@ -1,4 +1,3 @@
-
import sys
import csv
import datetime
@@ -17,7 +16,8 @@ from chocula.database import DirectoryInfo, IssnDatabase, HomepageUrl
csv.field_size_limit(1310720)
THIS_YEAR = datetime.date.today().year
-class DirectoryLoader():
+
+class DirectoryLoader:
source_slug: str = "GENERIC"
@@ -35,7 +35,7 @@ class DirectoryLoader():
counts: Counter = Counter()
cur = db.db.cursor()
for record in self.open_file():
- counts['total'] += 1
+ counts["total"] += 1
info = self.parse_record(record)
if info:
status = db.insert_directory(info, cur=cur)
@@ -44,6 +44,7 @@ class DirectoryLoader():
db.db.commit()
return counts
+
@dataclass
class KbartRecord:
issnl: Optional[str]
@@ -60,7 +61,7 @@ class KbartRecord:
year_spans: List[Any]
-class KbartLoader():
+class KbartLoader:
source_slug: str = "GENERIC"
@@ -68,19 +69,19 @@ class KbartLoader():
self.config = config
def file_path(self) -> str:
- #return self.config.TEMPLATE.filepath)
+ # return self.config.TEMPLATE.filepath)
raise NotImplementedError()
def open_file(self) -> Iterable:
- raw_file = open(self.file_path(), 'rb').read().decode(errors='replace')
+ raw_file = open(self.file_path(), "rb").read().decode(errors="replace")
fixed_file = ftfy.fix_text(raw_file)
- reader = csv.DictReader(fixed_file.split('\n'), delimiter='\t')
+ reader = csv.DictReader(fixed_file.split("\n"), delimiter="\t")
return reader
def parse_record(self, row: dict, issn_db: IssnDatabase) -> Optional[KbartRecord]:
- issne: Optional[str] = clean_issn(row['online_identifier'] or "")
- issnp: Optional[str] = clean_issn(row['print_identifier'] or "")
+ issne: Optional[str] = clean_issn(row["online_identifier"] or "")
+ issnp: Optional[str] = clean_issn(row["print_identifier"] or "")
issnl: Optional[str] = None
if issne:
issnl = issn_db.issn2issnl(issne)
@@ -88,31 +89,31 @@ class KbartLoader():
issnl = issn_db.issn2issnl(issnp)
start_year: Optional[int] = None
end_year: Optional[int] = None
- if row['date_first_issue_online']:
- start_year = int(row['date_first_issue_online'][:4])
- if row['date_last_issue_online']:
- end_year = int(row['date_last_issue_online'][:4])
- end_volume = row['num_last_vol_online']
+ if row["date_first_issue_online"]:
+ start_year = int(row["date_first_issue_online"][:4])
+ if row["date_last_issue_online"]:
+ end_year = int(row["date_last_issue_online"][:4])
+ end_volume = row["num_last_vol_online"]
# hack to handle open-ended preservation
- if end_year is None and end_volume and '(present)' in end_volume:
+ if end_year is None and end_volume and "(present)" in end_volume:
end_year = THIS_YEAR
record = KbartRecord(
issnl=issnl,
issnp=issnp,
issne=issne,
- title=clean_str(row['publication_title']),
- publisher=clean_str(row['publisher_name']),
- url=HomepageUrl.from_url(row['title_url']),
- embargo=clean_str(row['embargo_info']),
+ title=clean_str(row["publication_title"]),
+ publisher=clean_str(row["publisher_name"]),
+ url=HomepageUrl.from_url(row["title_url"]),
+ embargo=clean_str(row["embargo_info"]),
start_year=start_year,
end_year=end_year,
- start_volume=clean_str(row['num_first_vol_online']),
- end_volume=clean_str(row['num_last_vol_online']),
+ start_volume=clean_str(row["num_first_vol_online"]),
+ end_volume=clean_str(row["num_last_vol_online"]),
year_spans=[],
)
- if record.start_volume == 'null':
+ if record.start_volume == "null":
record.start_volume = None
- if record.end_volume == 'null':
+ if record.end_volume == "null":
record.end_volume = None
return record
@@ -126,18 +127,18 @@ class KbartLoader():
counts: Counter = Counter()
kbart_dict: Dict[str, KbartRecord] = dict()
for row in self.open_file():
- counts['total'] += 1
+ counts["total"] += 1
record = self.parse_record(row, db.issn_db)
if record is None:
- counts['skip-parse'] += 1
+ counts["skip-parse"] += 1
continue
elif not record.issnl:
- counts['skip-issnl'] += 1
+ counts["skip-issnl"] += 1
continue
elif record.start_year is None or record.end_year is None:
- counts['partial-missing-years'] += 1
- counts['parsed'] += 1
+ counts["partial-missing-years"] += 1
+ counts["parsed"] += 1
existing = kbart_dict.get(record.issnl, record)
if record.start_year and record.end_year:
@@ -149,7 +150,7 @@ class KbartLoader():
record.year_spans = merge_spans(old_spans, new_spans)
kbart_dict[record.issnl] = record
- counts['unique-issnl'] = len(kbart_dict)
+ counts["unique-issnl"] = len(kbart_dict)
cur = db.db.cursor()
for issnl, record in kbart_dict.items():
info = DirectoryInfo(
@@ -169,4 +170,3 @@ class KbartLoader():
cur.close()
db.db.commit()
return counts
-