import datetime
import json
import sys
import warnings
from typing import Any, Dict, List, Optional, Sequence

import fatcat_openapi_client
from bs4 import BeautifulSoup
from fatcat_openapi_client import ApiClient, ReleaseEntity

from .common import LANG_MAP_MARC, EntityImporter, clean
from .crossref import CONTAINER_TYPE_MAP

# TODO: more entries?
JSTOR_CONTRIB_MAP = {
    "author": "author",
    "editor": "editor",
    "translator": "translator",
    "illustrator": "illustrator",
}

JSTOR_TYPE_MAP = {
    "book-review": "review-book",
    "editorial": "editorial",
    "misc": "stub",
    "news": "article",
    "research-article": "article-journal",
}


class JstorImporter(EntityImporter):
    """
    Importer for JSTOR bulk XML metadata (eg, from their Early Journals
    Collection)
    """

    def __init__(self, api: ApiClient, issn_map_file: Sequence, **kwargs) -> None:

        eg_desc = kwargs.get("editgroup_description", "Automated import of JSTOR XML metadata")
        eg_extra = kwargs.get("editgroup_extra", dict())
        eg_extra["agent"] = eg_extra.get("agent", "fatcat_tools.JstorImporter")
        super().__init__(
            api,
            issn_map_file=issn_map_file,
            editgroup_description=eg_desc,
            editgroup_extra=eg_extra,
            **kwargs
        )

        self.create_containers = kwargs.get("create_containers", True)

        self.read_issn_map_file(issn_map_file)

    def map_container_type(self, crossref_type: Optional[str]) -> Optional[str]:
        if not crossref_type:
            return None
        return CONTAINER_TYPE_MAP.get(crossref_type)

    def want(self, raw_record: Any) -> bool:
        return True

    # TODO: mypy annotations partially skipped on this function ('Any' instead of
    # 'BeautifulSoup') for now because XML parsing annotations are large and
    # complex
    def parse_record(self, article: Any) -> Optional[ReleaseEntity]:

        journal_meta = article.front.find("journal-meta")
        article_meta = article.front.find("article-meta")

        extra: Dict[str, Any] = dict()
        extra_jstor: Dict[str, Any] = dict()

        release_type = JSTOR_TYPE_MAP.get(article["article-type"])
        title = article_meta.find("article-title")
        if title and title.get_text():
            title = title.get_text().replace("\n", " ").strip()
        elif title and not title.get_text():
            title = None

        if (
            not title
            and release_type
            and release_type.startswith("review")
            and article_meta.product.source
        ):
            title = "Review: {}".format(
                article_meta.product.source.replace("\n", " ").get_text()
            )

        if not title:
            return None

        if title.endswith("."):
            title = title[:-1]

        if "[Abstract]" in title:
            # TODO: strip the "[Abstract]" bit?
            release_type = "abstract"
        elif "[Editorial" in title:
            release_type = "editorial"
        elif "[Letter" in title:
            release_type = "letter"
        elif "[Poem" in title or "[Photograph" in title:
            release_type = None

        if title.startswith("[") and title.endswith("]"):
            # strip brackets if that is all that is there (eg, translation or non-english)
            title = title[1:-1]

        # JSTOR journal-id
        journal_ids = [j.string for j in journal_meta.find_all("journal-id")]
        if journal_ids:
            extra_jstor["journal_ids"] = journal_ids

        journal_title = journal_meta.find("journal-title").get_text().replace("\n", " ")
        publisher = journal_meta.find("publisher-name").get_text().replace("\n", " ")
        issn = journal_meta.find("issn")
        if issn:
            issn = issn.string
            if len(issn) == 8:
                issn = "{}-{}".format(issn[0:4], issn[4:8])
            else:
                assert len(issn) == 9

        issnl = self.issn2issnl(issn)
        container_id = None
        if issnl:
            container_id = self.lookup_issnl(issnl)

        # create container if it doesn't exist
        if (
            container_id is None
            and self.create_containers
            and (issnl is not None)
            and journal_title
        ):
            ce = fatcat_openapi_client.ContainerEntity(
                issnl=issnl,
                publisher=publisher,
                container_type=self.map_container_type(release_type),
                name=clean(journal_title, force_xml=True),
            )
            ce_edit = self.create_container(ce)
            container_id = ce_edit.ident
            self._issnl_id_map[issnl] = container_id

        doi = article_meta.find("article-id", {"pub-id-type": "doi"})
        if doi:
            doi = doi.string.lower().strip()

        jstor_id = article_meta.find("article-id", {"pub-id-type": "jstor"})
        if jstor_id:
            jstor_id = jstor_id.string.strip()
        if not jstor_id and doi:
            assert doi.startswith("10.2307/")
            jstor_id = doi.replace("10.2307/", "")
        assert jstor_id and int(jstor_id)

        contribs = []
        cgroup = article_meta.find("contrib-group")
        if cgroup:
            for c in cgroup.find_all("contrib"):
                given = c.find("given-names")
                if given:
                    given = clean(given.get_text().replace("\n", " "))
                surname = c.find("surname")
                if surname:
                    surname = clean(surname.get_text().replace("\n", " "))
                raw_name = c.find("string-name")
                if raw_name:
                    raw_name = clean(raw_name.get_text().replace("\n", " "))

                if not raw_name:
                    if given and surname:
                        raw_name = "{} {}".format(given, surname)
                    elif surname:
                        raw_name = surname

                role = JSTOR_CONTRIB_MAP.get(c.get("contrib-type", "author"))
                if not role and c.get("contrib-type"):
                    sys.stderr.write("NOT IN JSTOR_CONTRIB_MAP: {}\n".format(c["contrib-type"]))
                contribs.append(
                    fatcat_openapi_client.ReleaseContrib(
                        role=role,
                        raw_name=raw_name,
                        given_name=given,
                        surname=surname,
                    )
                )

        for i, contrib in enumerate(contribs):
            if contrib.raw_name != "et al.":
                contrib.index = i

        release_year = None
        release_date = None
        pub_date = article_meta.find("pub-date")
        if pub_date and pub_date.year:
            release_year = int(pub_date.year.string)
            if pub_date.month and pub_date.day:
                release_date = datetime.date(
                    release_year, int(pub_date.month.string), int(pub_date.day.string)
                )
                if release_date.day == 1 and release_date.month == 1:
                    # suspect jan 1st dates get set by JSTOR when actual
                    # date not known (citation needed), so drop them
                    release_date = None

        volume = None
        if article_meta.volume:
            volume = article_meta.volume.string or None

        issue = None
        if article_meta.issue:
            issue = article_meta.issue.string or None

        pages = None
        if article_meta.find("page-range"):
            pages = article_meta.find("page-range").string
        elif article_meta.fpage:
            pages = article_meta.fpage.string

        language = None
        cm = article_meta.find("custom-meta")
        if cm.find("meta-name").string == "lang":
            language = cm.find("meta-value").string.split()[0]
            language = LANG_MAP_MARC.get(language)
            if not language:
                warnings.warn("MISSING MARC LANG: {}".format(cm.find("meta-value").string))

        # JSTOR issue-id
        if article_meta.find("issue-id"):
            issue_id = clean(article_meta.find("issue-id").string)
            if issue_id:
                extra_jstor["issue_id"] = issue_id

        # everything in JSTOR is published
        release_stage = "published"

        # extra:
        #   withdrawn_date
        #   translation_of
        #   subtitle
        #   aliases
        #   container_name
        #   group-title
        #   pubmed: retraction refs
        if extra_jstor:
            extra["jstor"] = extra_jstor

        re = fatcat_openapi_client.ReleaseEntity(
            # work_id
            title=title,
            # original_title
            release_type=release_type,
            release_stage=release_stage,
            release_date=release_date,
            release_year=release_year,
            ext_ids=fatcat_openapi_client.ReleaseExtIds(
                doi=doi,
                jstor=jstor_id,
            ),
            volume=volume,
            issue=issue,
            pages=pages,
            publisher=publisher,
            language=language,
            # license_slug
            # content, mimetype, lang
            # abstracts=abstracts,
            contribs=contribs,
            # key, year, container_name, title, locator
            # extra: volume, authors, issue, publisher, identifiers
            # refs=refs,
            #   name, type, publisher, issnl
            #   extra: issnp, issne, original_name, languages, country
            container_id=container_id,
            extra=extra or None,
        )
        return re

    def try_update(self, re: ReleaseEntity) -> bool:

        # first, lookup existing by JSTOR id (which much be defined)
        existing = None
        try:
            existing = self.api.lookup_release(jstor=re.ext_ids.jstor)
        except fatcat_openapi_client.rest.ApiException as err:
            if err.status != 404:
                raise err

        # then try DOI lookup if there is one (try JSTOR prefix+jstor_id if
        # there isn't a DOI set)
        if not existing:
            doi = re.ext_ids.doi
            if not doi:
                doi = "10.2307/{}".format(re.ext_ids.jstor)
            try:
                existing = self.api.lookup_release(doi=doi)
            except fatcat_openapi_client.rest.ApiException as err:
                if err.status != 404:
                    raise err

        if existing and existing.ext_ids.jstor:
            # don't update if it already has JSTOR ID
            self.counts["exists"] += 1
            return False
        elif existing:
            # but do update if only DOI was set
            existing.ext_ids.jstor = re.ext_ids.jstor
            existing.extra["jstor"] = re.extra["jstor"]
            # better release_type detection, and some other fields
            # TODO: don't do this over-writing in the future? assuming here
            # this is a one-time batch import over/extending bootstrap crossref
            # metadata
            existing.release_type = re.release_type
            existing.publisher = re.publisher
            existing.contribs = re.contribs
            existing.language = re.language
            self.api.update_release(self.get_editgroup_id(), existing.ident, existing)
            self.counts["update"] += 1
            return False

        return True

    def insert_batch(self, batch: List[ReleaseEntity]) -> None:
        self.api.create_release_auto_batch(
            fatcat_openapi_client.ReleaseAutoBatch(
                editgroup=fatcat_openapi_client.Editgroup(
                    description=self.editgroup_description, extra=self.editgroup_extra
                ),
                entity_list=batch,
            )
        )

    def parse_file(self, handle: Any) -> None:

        # 1. open with beautiful soup
        soup = BeautifulSoup(handle, "xml")

        # 2. iterate over articles, call parse_article on each
        for article in soup.find_all("article"):
            resp = self.parse_record(article)
            print(json.dumps(resp))
            # sys.exit(-1)