aboutsummaryrefslogtreecommitdiffstats
path: root/chocula/directories/issn_meta.py
diff options
context:
space:
mode:
Diffstat (limited to 'chocula/directories/issn_meta.py')
-rw-r--r--chocula/directories/issn_meta.py61
1 files changed, 61 insertions, 0 deletions
diff --git a/chocula/directories/issn_meta.py b/chocula/directories/issn_meta.py
new file mode 100644
index 0000000..e6abb3c
--- /dev/null
+++ b/chocula/directories/issn_meta.py
@@ -0,0 +1,61 @@
+from typing import Iterable, Optional
+import json
+
+from chocula.util import clean_str, clean_issn, parse_country
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class IssnMetaLoader(DirectoryLoader):
+ """
+ This is JSON-LD (-ish) scraped from portal.issn.org, filtered down to only
+ journals already in the corpus, or matching a couple other criteria.
+
+ Metadata we expect to get:
+
+ - high quality English title
+ - URLs
+ - country
+
+ TODO: non-english alternative titles
+ """
+
+ source_slug = "issn_meta"
+
+ def open_file(self) -> Iterable:
+ return open(self.config.issn_meta.filepath, "r")
+
+ def parse_record(self, row) -> Optional[DirectoryInfo]:
+
+ row = json.loads(row)
+
+ info = DirectoryInfo(directory_slug=self.source_slug,)
+ # format is an array of metadata elements
+ for el in row:
+ if (
+ "value" in el
+ and el["@id"].startswith("http://id.loc.gov/vocabulary/countries")
+ and len(el["@id"].split("/")[-1]) == 2
+ ):
+ info.country = parse_country(el["value"])
+ if not "@type" in el:
+ continue
+ if el["@type"] == "http://id.loc.gov/ontologies/bibframe/IssnL":
+ info.issnl = clean_issn(el["value"])
+ if "mainTitle" in el:
+ info.name = clean_str(el["mainTitle"])
+ if el.get("format") == "vocabularies/medium#Print":
+ info.issnp = clean_issn(el["issn"])
+ elif el.get("format") == "vocabularies/medium#Electronic":
+ info.issne = clean_issn(el["issn"])
+ urls = el.get("url", [])
+ if isinstance(urls, str):
+ urls = [
+ urls,
+ ]
+ for url in urls:
+ homepage = HomepageUrl.from_url(url)
+ if homepage:
+ info.homepage_urls.append(homepage)
+
+ return info