blob: 4b058b30105ea105ec619a9583baf37a52a74058 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
import csv
from typing import Iterable, Optional
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl
class ManualHomepageLoader(DirectoryLoader):
source_slug = "manual_homepages"
def open_file(self) -> Iterable:
return csv.DictReader(
open(self.config.manual_homepages.filepath),
delimiter="\t",
)
def parse_record(self, record) -> Optional[DirectoryInfo]:
"""
Most of this metadata comes from chocula/fatcat; we are only interested
in the homepage URLs.
The "corrected titles" have been manually entered into fatcat directly.
CSV columns:
- issnl
- issnp
- issne
- name
- corrected title
- publisher
- country
- lang
- release_count
- Homepage URL
- Inactive
"""
info = DirectoryInfo(
directory_slug=self.source_slug,
issnl=record["issnl"],
)
url = record["Homepage URL"]
if url is None or url.lower() == "unknown" or len(url) < 4:
return None
homepage = HomepageUrl.from_url(url)
if homepage:
info.homepage_urls.append(homepage)
if homepage is None:
return None
return info
|