"""
This file contains helpers to fuzzy match a raw citation string:

- try to parse it with GROBID into structured form
- transform the GROBID XML response to a simple dict/struct
- run fuzzycat lookup

Note that this chain hits several external services, and should be wrapped in a
timeout and try/except! In the future, perhaps should be async so it can run in
parallel with "regular" query?
"""

import io
import sys
import xml.etree.ElementTree as ET
from typing import Any, Optional, Tuple

import fuzzycat.common
import fuzzycat.verify
import requests
from fatcat_openapi_client import ReleaseContrib, ReleaseEntity, ReleaseExtIds
from fuzzycat.matching import match_release_fuzzy

from fatcat_scholar.api_entities import entity_to_dict
from fatcat_scholar.grobid2json import biblio_info


def grobid_process_citation(
    raw: str, grobid_host: str = "https://grobid.qa.fatcat.wiki", timeout: float = 10.0
) -> Optional[str]:
    try:
        grobid_response = requests.post(
            grobid_host + "/api/processCitation",
            data={"citations": raw, "consolidateCitations": 0,},
            timeout=timeout,
        )
    except requests.Timeout:
        print("GROBID request (HTTP POST) timeout", file=sys.stderr)
        return None
    if grobid_response.status_code != 200:
        print(f"GROBID request (HTTP POST) failed: {grobid_response}", file=sys.stderr)
        return None
    return grobid_response.text


def transform_grobid(raw_xml: str) -> Optional[dict]:
    # first, remove any xmlns stuff
    raw_xml = raw_xml.replace('xmlns="http://www.tei-c.org/ns/1.0"', "")
    tree = ET.parse(io.StringIO(raw_xml))
    root = tree.getroot()
    ref = biblio_info(root, ns="")
    if not any(ref.values()):
        return None
    return ref


def ref_to_release(ref: dict) -> ReleaseEntity:
    contribs = []
    for author in ref.get("authors") or []:
        contribs.append(
            ReleaseContrib(
                raw_name=author.get("name"),
                given_name=author.get("given_name"),
                surname=author.get("surname"),
            )
        )
    release = ReleaseEntity(
        title=ref.get("title"),
        contribs=contribs,
        volume=ref.get("volume"),
        issue=ref.get("issue"),
        pages=ref.get("pages"),
        ext_ids=ReleaseExtIds(
            doi=ref.get("doi"),
            pmid=ref.get("pmid"),
            pmcid=ref.get("pmcid"),
            arxiv=ref.get("arxiv_id"),
        ),
    )
    if ref.get("journal"):
        release.extra = {"container_name": ref.get("journal")}
    if ref.get("date"):
        if len(ref["date"]) == 4 and ref["date"].isdigit():
            release.release_year = int(ref["date"])
    return release


def fuzzy_match(
    release: ReleaseEntity, es_client: Any, api_client: Any, timeout: float = 10.0
) -> Optional[Tuple[str, str, ReleaseEntity]]:
    """
    This helper function uses fuzzycat (and elasticsearch) to look for
    existing release entities with similar metadata.

    Returns None if there was no match of any kind, or a single tuple
    (status: str, reason: str, existing: ReleaseEntity) if there was a match.

    Status string is one of the fuzzycat.common.Status, with "strongest
    match" in this sorted order:

    - EXACT
    - STRONG
    - WEAK
    - AMBIGUOUS

    Eg, if there is any EXACT match that is always returned; an AMBIGIOUS
    result is only returned if all the candidate matches were ambiguous.

    TODO: actually do something with timeout
    """

    # this map used to establish priority order of verified matches
    STATUS_SORT = {
        fuzzycat.common.Status.TODO: 0,
        fuzzycat.common.Status.EXACT: 10,
        fuzzycat.common.Status.STRONG: 20,
        fuzzycat.common.Status.WEAK: 30,
        fuzzycat.common.Status.AMBIGUOUS: 40,
        fuzzycat.common.Status.DIFFERENT: 60,
    }

    # TODO: the size here is a first guess; what should it really be?
    candidates = match_release_fuzzy(release, size=10, es=es_client)
    if not candidates:
        return None

    release_dict = entity_to_dict(release, api_client=api_client.api_client)
    verified = [
        (
            fuzzycat.verify.verify(
                release_dict, entity_to_dict(c, api_client=api_client.api_client)
            ),
            c,
        )
        for c in candidates
    ]

    # chose the "closest" match
    closest = sorted(verified, key=lambda v: STATUS_SORT[v[0].status])[0]
    if closest[0].status == fuzzycat.common.Status.DIFFERENT:
        return None
    elif closest[0].status == fuzzycat.common.Status.TODO:
        raise NotImplementedError("fuzzycat verify hit a Status.TODO")
    else:
        return (closest[0].status.name, closest[0].reason.value, closest[1])


def try_fuzzy_match(
    citation: str, grobid_host: str, es_client: Any, fatcat_api_client: Any
) -> Optional[str]:
    """
    All-in-one helper method
    """
    resp = grobid_process_citation(citation, grobid_host=grobid_host, timeout=3.0)
    if not resp:
        return None
    ref = transform_grobid(resp)
    if not ref:
        return None
    release = ref_to_release(ref)

    matches = fuzzy_match(
        release, es_client=es_client, api_client=fatcat_api_client, timeout=3.0
    )
    if not matches or matches[0] not in ("EXACT", "STRONG", "WEAK"):
        return None
    return f"work_{matches[2].work_id}"


if __name__ == "__main__":
    """
    Demo showing how to integrate the above functions together.
    """
    import os

    import elasticsearch
    import fatcat_openapi_client

    citation = sys.argv[1]
    print("Sending to GROBID...")
    resp = grobid_process_citation(citation)
    print(resp)
    if not resp:
        sys.exit(0)
    ref = transform_grobid(resp)
    print(ref)
    if not ref:
        sys.exit(0)
    release = ref_to_release(ref)
    print(release)

    es_backend = os.environ.get(
        "ELASTICSEARCH_FATCAT_BASE", "https://search.fatcat.wiki"
    )
    es_client = elasticsearch.Elasticsearch(es_backend)
    api_conf = fatcat_openapi_client.Configuration()
    api_conf.host = os.environ.get("FATCAT_API_HOST", "https://api.fatcat.wiki/v0")
    api_client = fatcat_openapi_client.DefaultApi(
        fatcat_openapi_client.ApiClient(api_conf)
    )
    matches = fuzzy_match(release, es_client=es_client, api_client=api_client)
    print(matches)
    if not matches or matches[0] not in ("EXACT", "STRONG", "WEAK"):
        sys.exit(0)
    print(matches[2].work_id)