from typing import Iterable, Optional
import json

from chocula.util import clean_str, clean_issn, parse_country
from chocula.common import DirectoryLoader
from chocula.database import DirectoryInfo, HomepageUrl


class IssnMetaLoader(DirectoryLoader):
    """
    This is JSON-LD (-ish) scraped from portal.issn.org, filtered down to only
    journals already in the corpus, or matching a couple other criteria.

    Metadata we expect to get:

    - high quality English title
    - URLs
    - country

    TODO: non-english alternative titles
    """

    source_slug = "issn_meta"

    def open_file(self) -> Iterable:
        return open(self.config.issn_meta.filepath, "r")

    def parse_record(self, row) -> Optional[DirectoryInfo]:

        row = json.loads(row)

        info = DirectoryInfo(directory_slug=self.source_slug,)
        # format is an array of metadata elements
        for el in row:
            if (
                "value" in el
                and el["@id"].startswith("http://id.loc.gov/vocabulary/countries")
                and len(el["@id"].split("/")[-1]) == 2
            ):
                info.country = parse_country(el["value"])
            if not "@type" in el:
                continue
            if el["@type"] == "http://id.loc.gov/ontologies/bibframe/IssnL":
                info.issnl = clean_issn(el["value"])
            if "mainTitle" in el:
                info.name = clean_str(el["mainTitle"])
                if el.get("format") == "vocabularies/medium#Print":
                    info.issnp = clean_issn(el["issn"])
                elif el.get("format") == "vocabularies/medium#Electronic":
                    info.issne = clean_issn(el["issn"])
            urls = el.get("url", [])
            if isinstance(urls, str):
                urls = [
                    urls,
                ]
            for url in urls:
                homepage = HomepageUrl.from_url(url)
                if homepage:
                    info.homepage_urls.append(homepage)

        return info