aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 17:24:25 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 17:24:25 -0700
commit49608bfdd473eeae993086c98c572f735073936e (patch)
tree48d0e1379e8d85cf66b826c8e679fbf462910d4f /chocula
parent6f9bd607be3fbf2d77368ba2a0a5b35589b8cc60 (diff)
downloadchocula-49608bfdd473eeae993086c98c572f735073936e.tar.gz
chocula-49608bfdd473eeae993086c98c572f735073936e.zip
ZDB homepage (FIZE) scrape importer
Diffstat (limited to 'chocula')
-rw-r--r--chocula/directories/zdb_fize.py34
1 files changed, 34 insertions, 0 deletions
diff --git a/chocula/directories/zdb_fize.py b/chocula/directories/zdb_fize.py
new file mode 100644
index 0000000..a40139a
--- /dev/null
+++ b/chocula/directories/zdb_fize.py
@@ -0,0 +1,34 @@
+import json
+from typing import Iterable, Optional
+
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class ZdbFizeLoader(DirectoryLoader):
+ """
+ URL metadata scraped from ZDB "FIZE" interface. Consists of just ISSN / URL
+ pair.
+
+ Only interested in the homepage.
+ """
+
+ source_slug = "zdb_fize"
+
+ def open_file(self) -> Iterable:
+ return open(self.config.zdb_fize.filepath, "r")
+
+ def parse_record(self, record) -> Optional[DirectoryInfo]:
+
+ if not record.strip():
+ return None
+ record = json.loads(record)
+
+ info = DirectoryInfo(directory_slug=self.source_slug, issnl=record["issn"])
+
+ homepage = HomepageUrl.from_url(record["homepage"])
+ if homepage:
+ info.homepage_urls.append(homepage)
+ else:
+ return None
+ return info