diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 17:24:25 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-23 17:24:25 -0700 |
commit | 49608bfdd473eeae993086c98c572f735073936e (patch) | |
tree | 48d0e1379e8d85cf66b826c8e679fbf462910d4f /chocula | |
parent | 6f9bd607be3fbf2d77368ba2a0a5b35589b8cc60 (diff) | |
download | chocula-49608bfdd473eeae993086c98c572f735073936e.tar.gz chocula-49608bfdd473eeae993086c98c572f735073936e.zip |
ZDB homepage (FIZE) scrape importer
Diffstat (limited to 'chocula')
-rw-r--r-- | chocula/directories/zdb_fize.py | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/chocula/directories/zdb_fize.py b/chocula/directories/zdb_fize.py new file mode 100644 index 0000000..a40139a --- /dev/null +++ b/chocula/directories/zdb_fize.py @@ -0,0 +1,34 @@ +import json +from typing import Iterable, Optional + +from chocula.common import DirectoryLoader +from chocula.database import DirectoryInfo, HomepageUrl + + +class ZdbFizeLoader(DirectoryLoader): + """ + URL metadata scraped from ZDB "FIZE" interface. Consists of just ISSN / URL + pair. + + Only interested in the homepage. + """ + + source_slug = "zdb_fize" + + def open_file(self) -> Iterable: + return open(self.config.zdb_fize.filepath, "r") + + def parse_record(self, record) -> Optional[DirectoryInfo]: + + if not record.strip(): + return None + record = json.loads(record) + + info = DirectoryInfo(directory_slug=self.source_slug, issnl=record["issn"]) + + homepage = HomepageUrl.from_url(record["homepage"]) + if homepage: + info.homepage_urls.append(homepage) + else: + return None + return info |