aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/directories/mag.py
blob: dbbc324f8475851c7fcf12148f4f7f73178bd23c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from typing import Iterable, Optional
import csv

from chocula.util import clean_str, clean_issn
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl


class MagLoader(DirectoryLoader):
    """
    TSV Columns (from schema docs):

        1	JournalId	long	PRIMARY KEY
        2	Rank	uint	See FAQ
        3	NormalizedName	string	
        4	DisplayName	string	
        5	Issn	string	
        6	Publisher	string	
        7	Webpage	string	
        8	PaperCount	long	
        9	PaperFamilyCount	long	See FAQ
        10	CitationCount	long	
        11	CreatedDate	DateTime

    """

    source_slug = "mag"

    def open_file(self) -> Iterable:
        return csv.DictReader(
            open(self.config.mag.filepath, "r"),
            delimiter="\t",
            fieldnames=[
                "JournalId",
                "Rank",
                "NormalizedName",
                "DisplayName",
                "Issn",
                "Publisher",
                "Webpage",
                "PaperCount",
                "PaperFamilyCount",
                "CitationCount",
                "CreatedDate",
            ],
        )

    def parse_record(self, record) -> Optional[DirectoryInfo]:
        info = DirectoryInfo(
            directory_slug=self.source_slug,
            raw_issn=clean_issn(record["Issn"]),
            custom_id=record["JournalId"],
            name=clean_str(record["DisplayName"]),
            publisher=clean_str(record["Publisher"]),
        )
        homepage = HomepageUrl.from_url(record["Webpage"] or "")
        if homepage:
            info.homepage_urls.append(homepage)

        return info