chocula/directories/norwegian.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

from typing import Iterable, Optional
import csv

from chocula.util import clean_str, parse_lang, parse_country
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl


class NorwegianLoader(DirectoryLoader):
    """
    CSV Columns (2020 file):

        NSD tidsskrift_id
        Original title
        International title
        Print ISSN
        Online ISSN
        Open Access
        NPI Academic Discipline
        NPI Scientific Field
        Level 2020
        Level 2019
        Level 2018
        Level 2017
        Level 2016
        Level 2015
        Level 2014
        Level 2013
        Level 2012
        Level 2011
        Level 2010
        Level 2009
        Level 2008
        Level 2007
        Level 2006
        Level 2005
        Level 2004
        itar_id
        NSD forlag_id
        Publishing Company
        Publisher
        Country of publication
        Language
        Conference Proceedings
        Established
        Ceased
        URL

    """

    source_slug = "norwegian"

    def open_file(self) -> Iterable:
        return csv.DictReader(
            open(self.config.norwegian.filepath, encoding="ISO-8859-1"), delimiter=";"
        )

    def parse_record(self, row) -> Optional[DirectoryInfo]:
        info = DirectoryInfo(
            directory_slug=self.source_slug,
            issnp=row["Print ISSN"],
            issne=row["Online ISSN"],
            custom_id=clean_str(row["NSD tidsskrift_id"]),
            publisher=clean_str(row["Publisher"]),
            country=parse_country(row["Country of publication"]),
            name=clean_str(row.get("International title")),
            langs=[lang for lang in [parse_lang(row["Language"])] if lang],
        )

        info.extra["as_of"] = self.config.norwegian.date
        if row["Level 2019"]:
            info.extra["level"] = int(row["Level 2019"])

        if row["Original title"] != row["International title"]:
            info.original_name = clean_str(row["Original title"])

        url = HomepageUrl.from_url(row["URL"])
        if url:
            info.homepage_urls.append(url)

        return info