aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 17:24:25 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 17:24:25 -0700
commit49608bfdd473eeae993086c98c572f735073936e (patch)
tree48d0e1379e8d85cf66b826c8e679fbf462910d4f
parent6f9bd607be3fbf2d77368ba2a0a5b35589b8cc60 (diff)
downloadchocula-49608bfdd473eeae993086c98c572f735073936e.tar.gz
chocula-49608bfdd473eeae993086c98c572f735073936e.zip
ZDB homepage (FIZE) scrape importer
-rw-r--r--chocula/directories/zdb_fize.py34
-rw-r--r--tests/files/zdb_fize_homepage_available.json25
2 files changed, 59 insertions, 0 deletions
diff --git a/chocula/directories/zdb_fize.py b/chocula/directories/zdb_fize.py
new file mode 100644
index 0000000..a40139a
--- /dev/null
+++ b/chocula/directories/zdb_fize.py
@@ -0,0 +1,34 @@
+import json
+from typing import Iterable, Optional
+
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class ZdbFizeLoader(DirectoryLoader):
+ """
+ URL metadata scraped from ZDB "FIZE" interface. Consists of just ISSN / URL
+ pair.
+
+ Only interested in the homepage.
+ """
+
+ source_slug = "zdb_fize"
+
+ def open_file(self) -> Iterable:
+ return open(self.config.zdb_fize.filepath, "r")
+
+ def parse_record(self, record) -> Optional[DirectoryInfo]:
+
+ if not record.strip():
+ return None
+ record = json.loads(record)
+
+ info = DirectoryInfo(directory_slug=self.source_slug, issnl=record["issn"])
+
+ homepage = HomepageUrl.from_url(record["homepage"])
+ if homepage:
+ info.homepage_urls.append(homepage)
+ else:
+ return None
+ return info
diff --git a/tests/files/zdb_fize_homepage_available.json b/tests/files/zdb_fize_homepage_available.json
new file mode 100644
index 0000000..3799c52
--- /dev/null
+++ b/tests/files/zdb_fize_homepage_available.json
@@ -0,0 +1,25 @@
+{"issn":"2229-4937","homepage":"https://search.ebscohost.com/direct.asp?db=aph&amp;jid=HE1T&amp;scope=site"}
+{"issn":"0882-1666","homepage":"https://onlinelibrary.wiley.com/loi/1520684x"}
+{"issn":"2141-7482","homepage":"http://www.e3journals.org/journal.php?jid=1"}
+{"issn":"0195-8208","homepage":"https://search.ebscohost.com/direct.asp?db=aph&amp;jid=TFM&amp;scope=site"}
+{"issn":"2548-3218","homepage":"https://jurnal.ugm.ac.id/sasdayajournal/index"}
+{"issn":"1045-2699","homepage":"https://onlinelibrary.wiley.com/loi/15227111"}
+{"issn":"0021-4396","homepage":"https://www.jstage.jst.go.jp/browse/imono/-char/ja"}
+{"issn":"1040-2861","homepage":"https://search.ebscohost.com/direct.asp?db=aph&amp;jid=MHC&amp;scope=site"}
+{"issn":"0083-4041","homepage":"https://heinonline.org/HOL/Index?index=journals/qland&amp;collection=journals"}
+{"issn":"0187-3180","homepage":"http://www.redalyc.org/revista.oa?id=883"}
+{"issn":"1938-1972","homepage":"http://www.tandfonline.com/toc/kder20/current#.VR0Ji2PLbDw"}
+{"issn":"2043-1155","homepage":"http://gateway.proquest.com/openurl?url_ver=Z39.88-2004&amp;res_dat=xri:pqm&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:journal&amp;genre=journal&amp;req_dat=xri:pqil:&amp;svc_dat=xri:pqil:context=title&amp;rft_dat=xri:pqd:PMID=7755"}
+{"issn":"2360-8560","homepage":"http://www.academicjournals.org/JPTAF/"}
+{"issn":"0473-1425","homepage":"http://orchidee.de/e-paper/taxonomische-mitteilungen/"}
+{"issn":"2413-0974","homepage":"http://maplants.elpub.ru/jour"}
+{"issn":"1594-2848","homepage":"http://www.ppgedizioni.it/pages/riviste_trends.aspx"}
+{"issn":"1813-176X","homepage":"http://www.medwelljournals.com/journalhome.php?jid=1813-176x"}
+{"issn":"0287-0762","homepage":"https://www.jstage.jst.go.jp/browse/jsvc1968"}
+{"issn":"0274-8096","homepage":"https://search.ebscohost.com/direct.asp?db=bth&amp;jid=8MI&amp;scope=site"}
+{"issn":"2366-3987","homepage":"https://onlinelibrary.wiley.com/journal/23663987"}
+{"issn":"1868-940X","homepage":"http://www.doabooks.org/doab?func=advancedSearch&amp;uiLanguage=en&amp;fromWeb=1&amp;first=1&amp;query1=Personalmanagement+und+Organisation&amp;field1=all&amp;bool1=AND&amp;query2=&amp;field2=all&amp;pubYear=allYears&amp;fromYear=&amp;toYear"}
+{"issn":"2236-5192","homepage":"http://www2.marilia.unesp.br/revistas/index.php/educacaoemrevista/index"}
+{"issn":"1751-8490","homepage":"http://gateway.proquest.com/openurl?url_ver=Z39.88-2004&amp;res_dat=xri:pqm&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:journal&amp;genre=journal&amp;req_dat=xri:pqil:&amp;svc_dat=xri:pqil:context=title&amp;rft_dat=xri:pqd:PMID=1820382"}
+{"issn":"2447-2115","homepage":"http://ggaging.com/previous-numbers"}
+{"issn":"2572-3618","homepage":"http://www.tandfonline.com/toc/rcle20/current"}