aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/directories/road.py
blob: bc550fdcc38373921e6d6a9c01d20a0e351a2bf6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from typing import Iterable, Optional
import csv

from chocula.util import clean_str
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl


class RoadLoader(DirectoryLoader):
    """
    CSV Columns:

    - ISSN
    - ISSN-L
    - Short Title
    - Title
    - Publisher
    - URL1
    - URL2
    - Region
    - Lang1
    - Lang2
    """

    source_slug = "road"

    def open_file(self) -> Iterable:
        return csv.DictReader(
            open(self.config.road.filepath),
            delimiter="\t",
            fieldnames=(
                "ISSN",
                "ISSN-L",
                "Short Title",
                "Title",
                "Publisher",
                "URL1",
                "URL2",
                "Region",
                "Lang1",
                "Lang2",
            ),
        )

    def parse_record(self, row) -> Optional[DirectoryInfo]:
        info = DirectoryInfo(
            directory_slug=self.source_slug,
            raw_issn=row["ISSN-L"],
            name=clean_str(row["Short Title"]),
            publisher=clean_str(row["Publisher"]),
            langs=[l for l in (row["Lang1"], row["Lang2"]) if l],
        )

        # TODO: region mapping: "Europe and North America"
        # TODO: lang mapping: already alpha-3

        # homepages
        for url in [u for u in (row["URL1"], row["URL2"]) if u]:
            homepage = HomepageUrl.from_url(url)
            if homepage:
                info.homepage_urls.append(homepage)

        return info