aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 11:39:10 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 11:41:49 -0700
commit9f53eb4c4fd4030965fe004184c803b41fa49b04 (patch)
tree70532f2f32efdcb5326f7844560e6b3e8a5441f1 /chocula
parentcd7fce808e60c09d184b7ec7e72570494a87d399 (diff)
downloadchocula-9f53eb4c4fd4030965fe004184c803b41fa49b04.tar.gz
chocula-9f53eb4c4fd4030965fe004184c803b41fa49b04.zip
new manual homepage source
Diffstat (limited to 'chocula')
-rw-r--r--chocula/directories/__init__.py2
-rw-r--r--chocula/directories/manual_homepages.py47
2 files changed, 49 insertions, 0 deletions
diff --git a/chocula/directories/__init__.py b/chocula/directories/__init__.py
index 90e6f26..6f32c8e 100644
--- a/chocula/directories/__init__.py
+++ b/chocula/directories/__init__.py
@@ -11,6 +11,7 @@ from chocula.directories.sim import SimLoader
from chocula.directories.scielo import ScieloLoader
from chocula.directories.szczepanski import SzczepanskiLoader
from chocula.directories.wikidata import WikidataLoader
+from chocula.directories.manual_homepages import ManualHomepageLoader
ALL_CHOCULA_DIR_CLASSES = [
CrossrefLoader,
@@ -26,4 +27,5 @@ ALL_CHOCULA_DIR_CLASSES = [
WikidataLoader,
SimLoader,
ScieloLoader,
+ ManualHomepageLoader,
]
diff --git a/chocula/directories/manual_homepages.py b/chocula/directories/manual_homepages.py
new file mode 100644
index 0000000..3f84794
--- /dev/null
+++ b/chocula/directories/manual_homepages.py
@@ -0,0 +1,47 @@
+import csv
+from typing import Iterable, Optional
+
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class ManualHomepageLoader(DirectoryLoader):
+
+ source_slug = "manual_homepages"
+
+ def open_file(self) -> Iterable:
+ return csv.DictReader(
+ open(self.config.manual_homepages.filepath), delimiter="\t",
+ )
+
+ def parse_record(self, record) -> Optional[DirectoryInfo]:
+ """
+ Most of this metadata comes from chocula/fatcat; we are only interested
+ in the homepage URLs.
+
+ The "corrected titles" have been manually entered into fatcat directly.
+
+ CSV columns:
+ - issnl
+ - issnp
+ - issne
+ - name
+ - corrected title
+ - publisher
+ - country
+ - lang
+ - release_count
+ - Homepage URL
+ - Inactive
+ """
+
+ info = DirectoryInfo(directory_slug=self.source_slug, issnl=record["issnl"],)
+ url = record["Homepage URL"]
+ if url is None or url.lower() == "unknown" or len(url) < 4:
+ return None
+ homepage = HomepageUrl.from_url(url)
+ if homepage:
+ info.homepage_urls.append(homepage)
+ if homepage is None:
+ return None
+ return info