From 49608bfdd473eeae993086c98c572f735073936e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 23 Jun 2020 17:24:25 -0700 Subject: ZDB homepage (FIZE) scrape importer --- chocula/directories/zdb_fize.py | 34 ++++++++++++++++++++++++++++ tests/files/zdb_fize_homepage_available.json | 25 ++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 chocula/directories/zdb_fize.py create mode 100644 tests/files/zdb_fize_homepage_available.json diff --git a/chocula/directories/zdb_fize.py b/chocula/directories/zdb_fize.py new file mode 100644 index 0000000..a40139a --- /dev/null +++ b/chocula/directories/zdb_fize.py @@ -0,0 +1,34 @@ +import json +from typing import Iterable, Optional + +from chocula.common import DirectoryLoader +from chocula.database import DirectoryInfo, HomepageUrl + + +class ZdbFizeLoader(DirectoryLoader): + """ + URL metadata scraped from ZDB "FIZE" interface. Consists of just ISSN / URL + pair. + + Only interested in the homepage. + """ + + source_slug = "zdb_fize" + + def open_file(self) -> Iterable: + return open(self.config.zdb_fize.filepath, "r") + + def parse_record(self, record) -> Optional[DirectoryInfo]: + + if not record.strip(): + return None + record = json.loads(record) + + info = DirectoryInfo(directory_slug=self.source_slug, issnl=record["issn"]) + + homepage = HomepageUrl.from_url(record["homepage"]) + if homepage: + info.homepage_urls.append(homepage) + else: + return None + return info diff --git a/tests/files/zdb_fize_homepage_available.json b/tests/files/zdb_fize_homepage_available.json new file mode 100644 index 0000000..3799c52 --- /dev/null +++ b/tests/files/zdb_fize_homepage_available.json @@ -0,0 +1,25 @@ +{"issn":"2229-4937","homepage":"https://search.ebscohost.com/direct.asp?db=aph&jid=HE1T&scope=site"} +{"issn":"0882-1666","homepage":"https://onlinelibrary.wiley.com/loi/1520684x"} +{"issn":"2141-7482","homepage":"http://www.e3journals.org/journal.php?jid=1"} +{"issn":"0195-8208","homepage":"https://search.ebscohost.com/direct.asp?db=aph&jid=TFM&scope=site"} +{"issn":"2548-3218","homepage":"https://jurnal.ugm.ac.id/sasdayajournal/index"} +{"issn":"1045-2699","homepage":"https://onlinelibrary.wiley.com/loi/15227111"} +{"issn":"0021-4396","homepage":"https://www.jstage.jst.go.jp/browse/imono/-char/ja"} +{"issn":"1040-2861","homepage":"https://search.ebscohost.com/direct.asp?db=aph&jid=MHC&scope=site"} +{"issn":"0083-4041","homepage":"https://heinonline.org/HOL/Index?index=journals/qland&collection=journals"} +{"issn":"0187-3180","homepage":"http://www.redalyc.org/revista.oa?id=883"} +{"issn":"1938-1972","homepage":"http://www.tandfonline.com/toc/kder20/current#.VR0Ji2PLbDw"} +{"issn":"2043-1155","homepage":"http://gateway.proquest.com/openurl?url_ver=Z39.88-2004&res_dat=xri:pqm&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&genre=journal&req_dat=xri:pqil:&svc_dat=xri:pqil:context=title&rft_dat=xri:pqd:PMID=7755"} +{"issn":"2360-8560","homepage":"http://www.academicjournals.org/JPTAF/"} +{"issn":"0473-1425","homepage":"http://orchidee.de/e-paper/taxonomische-mitteilungen/"} +{"issn":"2413-0974","homepage":"http://maplants.elpub.ru/jour"} +{"issn":"1594-2848","homepage":"http://www.ppgedizioni.it/pages/riviste_trends.aspx"} +{"issn":"1813-176X","homepage":"http://www.medwelljournals.com/journalhome.php?jid=1813-176x"} +{"issn":"0287-0762","homepage":"https://www.jstage.jst.go.jp/browse/jsvc1968"} +{"issn":"0274-8096","homepage":"https://search.ebscohost.com/direct.asp?db=bth&jid=8MI&scope=site"} +{"issn":"2366-3987","homepage":"https://onlinelibrary.wiley.com/journal/23663987"} +{"issn":"1868-940X","homepage":"http://www.doabooks.org/doab?func=advancedSearch&uiLanguage=en&fromWeb=1&first=1&query1=Personalmanagement+und+Organisation&field1=all&bool1=AND&query2=&field2=all&pubYear=allYears&fromYear=&toYear"} +{"issn":"2236-5192","homepage":"http://www2.marilia.unesp.br/revistas/index.php/educacaoemrevista/index"} +{"issn":"1751-8490","homepage":"http://gateway.proquest.com/openurl?url_ver=Z39.88-2004&res_dat=xri:pqm&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&genre=journal&req_dat=xri:pqil:&svc_dat=xri:pqil:context=title&rft_dat=xri:pqd:PMID=1820382"} +{"issn":"2447-2115","homepage":"http://ggaging.com/previous-numbers"} +{"issn":"2572-3618","homepage":"http://www.tandfonline.com/toc/rcle20/current"} -- cgit v1.2.3