1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
from typing import Iterable, Optional, Dict, Any
import csv
from chocula.util import (
clean_str,
parse_lang,
gaps_to_spans,
)
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo
class SimLoader(DirectoryLoader):
source_slug = "sim"
def open_file(self) -> Iterable:
return csv.DictReader(open(self.config.sim.filepath))
def parse_record(self, row) -> Optional[DirectoryInfo]:
"""
NA Pub Cat ID
Title
Publisher
ISSN
Impact Rank
Total Cities
Journal Impact Factor
Eigenfact or Score
First Volume
Last Volume
NA Gaps
"Scholarly / Peer-\n Reviewed"
"Peer-\n Reviewed"
Pub Type
Pub Language
Subjects
"""
# TODO: 'Pub Type'
extra: Dict[str, Any] = {}
first_year = row["First Volume"]
if first_year:
first_year = int(first_year)
extra["first_year"] = int(row["First Volume"])
else:
first_year = None
last_year = row["Last Volume"]
if last_year:
last_year = int(last_year)
extra["last_year"] = last_year
else:
last_year = None
gaps = [int(g) for g in row["NA Gaps"].split(";") if g.strip()]
if gaps:
extra["gaps"] = gaps
if first_year and last_year:
extra["year_spans"] = gaps_to_spans(first_year, last_year, gaps)
extra["scholarly_peer_reviewed"] = row["Scholarly / Peer-\nReviewed"]
extra["peer_reviewed"] = row["Peer-\nReviewed"]
extra["pub_type"] = clean_str(row["Pub Type"])
info = DirectoryInfo(
directory_slug=self.source_slug,
name=clean_str(row["Title"]),
publisher=clean_str(row["Publisher"]),
raw_issn=row["ISSN"][:9],
custom_id=row.get("NA Pub Cat ID").strip() or None,
langs=[lang for lang in [parse_lang(row["Pub Language"])] if lang],
extra=extra,
)
return info
|