aboutsummaryrefslogtreecommitdiffstats
path: root/chocula
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-23 17:22:51 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-23 17:22:51 -0700
commita4f9030d1aa49e18e699fcf37d336fa2f03f804c (patch)
tree990fac79953c07aefa3d313f52b77f2c6ee5f965 /chocula
parentee74b524b55ec6a8cb8120d890a07071174638d7 (diff)
downloadchocula-a4f9030d1aa49e18e699fcf37d336fa2f03f804c.tar.gz
chocula-a4f9030d1aa49e18e699fcf37d336fa2f03f804c.zip
AWOL directory importer
Diffstat (limited to 'chocula')
-rw-r--r--chocula/directories/awol.py76
1 files changed, 76 insertions, 0 deletions
diff --git a/chocula/directories/awol.py b/chocula/directories/awol.py
new file mode 100644
index 0000000..f780c6e
--- /dev/null
+++ b/chocula/directories/awol.py
@@ -0,0 +1,76 @@
+from typing import Iterable, Optional
+import json
+
+from chocula.util import clean_str, clean_issn
+from chocula.common import DirectoryLoader
+from chocula.database import DirectoryInfo, HomepageUrl
+
+
+class AwolLoader(DirectoryLoader):
+ """
+ AWOL: Ancient World Online index
+
+ JSON keys:
+
+ "authors",
+ "contributors",
+ "description",
+ "domain",
+ "editors",
+ "end_date",
+ "extent",
+ "form",
+ "frequency",
+ "identifiers",
+ "is_part_of",
+ "issuance",
+ "issue",
+ "issued_dates",
+ "keywords",
+ "languages",
+ "places",
+ "provenance",
+ "publishers",
+ "related_resources",
+ "resource_key",
+ "responsibility",
+ "start_date",
+ "subordinate_resources",
+ "title",
+ "title_alternates",
+ "title_extended",
+ "type",
+ "url",
+ "url_alternates",
+ "volume",
+ "year",
+ "zenon_id",
+ "zotero_id"
+ """
+
+ source_slug = "awol"
+
+ def open_file(self) -> Iterable:
+ return open(self.config.awol.filepath)
+
+ def parse_record(self, line) -> Optional[DirectoryInfo]:
+ record = json.loads(line)
+
+ issn_info = record.get("identifiers", {}).get("issn", {})
+ # sometimes is a list
+ for k in "generic", "electronic", "print":
+ if type(issn_info.get(k)) == list:
+ issn_info[k] = issn_info[k][0]
+ info = DirectoryInfo(
+ directory_slug=self.source_slug,
+ raw_issn=clean_issn(issn_info.get("generic", "")),
+ issne=clean_issn(issn_info.get("electronic", "")),
+ issnp=clean_issn(issn_info.get("print", "")),
+ name=clean_str(record.get("title")),
+ langs=list(filter(lambda s: len(s) == 2, record["languages"])),
+ )
+ if record["url"]:
+ homepage = HomepageUrl.from_url(record["url"])
+ if homepage:
+ info.homepage_urls.append(homepage)
+ return info