aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/directories/mag.py
blob: 2b7b8e5718226cc0f2e6dfbadc166ad3e6d44acf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from typing import Iterable, Optional
import csv

from chocula.util import clean_str, clean_issn
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl


class MagLoader(DirectoryLoader):
    """
    TSV Columns (from schema docs):

        1    JournalId    long    PRIMARY KEY
        2    Rank    uint    See FAQ
        3    NormalizedName    string
        4    DisplayName    string
        5    Issn    string
        6    Publisher    string
        7    Webpage    string
        8    PaperCount    long
        9    PaperFamilyCount    long    See FAQ
        10   CitationCount    long
        11   CreatedDate    DateTime

    """

    source_slug = "mag"

    def open_file(self) -> Iterable:
        return csv.DictReader(
            open(self.config.mag.filepath, "r"),
            delimiter="\t",
            fieldnames=[
                "JournalId",
                "Rank",
                "NormalizedName",
                "DisplayName",
                "Issn",
                "Publisher",
                "Webpage",
                "PaperCount",
                "PaperFamilyCount",
                "CitationCount",
                "CreatedDate",
            ],
        )

    def parse_record(self, record) -> Optional[DirectoryInfo]:
        info = DirectoryInfo(
            directory_slug=self.source_slug,
            raw_issn=clean_issn(record["Issn"]),
            custom_id=record["JournalId"],
            name=clean_str(record["DisplayName"]),
            publisher=clean_str(record["Publisher"]),
        )
        homepage = HomepageUrl.from_url(record["Webpage"] or "")
        if homepage:
            info.homepage_urls.append(homepage)

        return info