package skate

import (
	"bytes"
	"crypto/sha1"
	"encoding/gob"
	"fmt"
	"regexp"
	"strconv"
	"strings"
	"sync"
	"time"

	"git.archive.org/martin/cgraph/skate/isbn"
	"git.archive.org/martin/cgraph/skate/set"
)

var (
	// Various ISBN patterns tailored towards more dirty values, e.g. with
	// artifacts from OCR, etc.
	isbn10Regex = regexp.MustCompile(`[O0-9xX -]{10,18}`)
	isbn13Regex = regexp.MustCompile(`9[O0-9xX -]{12,20}`)

	// Related to CSL processing.
	yearMonthPat    = regexp.MustCompile(`[12][0-9]{3,3}-(0?[1-9]|1[12])`)
	yearMonthDayPat = regexp.MustCompile(`[12][0-9]{3,3}-(0?[1-9]|1[12])-([0]?[1-9]|[12][0-9]|[3][01])`)

	// openLibraryDateLayouts, e.g. as found in Open Library Editions,
	// .publish_date.
	openLibraryDateLayouts = []string{
		"1 Jan 2006",
		"2006",
		"2006-02",
		"2006-02-01",
		"Jan 2, 2006",
		"Jan 2006",
	}

	rune16pool = sync.Pool{
		New: func() interface{} {
			return make([]rune, 0, 16)
		},
	}
	setPool = sync.Pool{
		New: func() interface{} {
			return set.New()
		},
	}
)

// Ref is a reference document (derived from "heavy intermediate schema"), can
// be very partial.
type Ref struct {
	Biblio       Biblio `json:"biblio"`
	Index        int64  `json:"index,omitempty"`
	Key          string `json:"key,omitempty"`
	Locator      string `json:"locator,omitempty"`
	RefSource    string `json:"ref_source,omitempty"`
	ReleaseYear  int    `json:"release_year,omitempty"`
	ReleaseIdent string `json:"release_ident,omitempty"`
	ReleaseStage string `json:"release_stage,omitempty"`
	WorkIdent    string `json:"work_ident,omitempty"`
}

// Biblio contains bibliographic information for a Ref.
type Biblio struct {
	ArxivId         string   `json:"arxiv_id,omitempty"`
	ContainerName   string   `json:"container_name,omitempty"`
	ContribRawNames []string `json:"contrib_raw_names,omitempty"`
	DOI             string   `json:"doi,omitempty"`
	Issue           string   `json:"issue,omitempty"`
	PMCID           string   `json:"pmcid,omitempty"`
	PMID            string   `json:"pmid,omitempty"`
	Pages           string   `json:"pages,omitempty"`
	Publisher       string   `json:"publisher,omitempty"`
	Title           string   `json:"title,omitempty"`
	Unstructured    string   `json:"unstructured,omitempty"`
	Url             string   `json:"url,omitempty"`
	Volume          string   `json:"volume,omitempty"`
	Year            int      `json:"year,omitempty"`
	// Any field we may require as part of our processing should go into an
	// extra section.
	Extra struct {
		ISBN []string `json:"isbn"`
	} `json:"extra"`
}

// RefToRelease converts a ref to a release. We want this e.g. for a release to
// release fuzzy verification, when one of the docs is a ref.
func RefToRelease(ref *Ref) (*Release, error) {
	var (
		release  Release
		b        = ref.Biblio
		contribs = make([]struct {
			Index   int    `json:"index,omitempty"`
			RawName string `json:"raw_name,omitempty"`
			Role    string `json:"role,omitempty"`
		}, len(b.ContribRawNames))
	)
	release.Ident = ref.ReleaseIdent
	release.WorkID = ref.WorkIdent
	release.ExtIDs.Arxiv = b.ArxivId
	release.ExtIDs.DOI = SanitizeDOI(b.DOI)
	release.ExtIDs.PMID = b.PMID
	release.ExtIDs.PMCID = b.PMCID
	release.Title = b.Title
	release.Publisher = b.Publisher
	release.ContainerName = b.ContainerName
	release.Volume = b.Volume
	release.Issue = b.Issue
	release.Pages = b.Pages
	// Skip some accidental zero or bogus years.
	if ref.ReleaseYear > 1000 {
		release.ReleaseYearValue = fmt.Sprintf("%d", ref.ReleaseYear)
	}
	for i, name := range b.ContribRawNames {
		contribs[i].Index = i
		contribs[i].RawName = name
	}
	release.Contribs = contribs
	if strings.Contains(strings.ToLower(ref.Biblio.Unstructured), "isbn") {
		release.ExtIDs.ISBN = ParseIsbn(ref.Biblio.Unstructured)
	}
	// Extra info stashed into extra.skate.ref.
	release.Extra.Skate.Ref.Index = ref.Index
	release.Extra.Skate.Ref.Key = ref.Key
	release.Extra.Skate.Ref.Locator = ref.Locator
	release.Extra.Skate.Ref.Source = ref.RefSource
	return &release, nil
}

// ReleaseToUnstructured tries to render a sensible string, e.g. for frontend
// display of unmatched and other relations. Some examples:
// https://guides.lib.uw.edu/c.php?g=341448&p=4076094 - no specific style, just
// try to be readable.
func ReleaseToUnstructured(r *Release) string {
	var (
		sb    strings.Builder
		names = make([]string, len(r.Contribs))
	)
	for i := 0; i < len(r.Contribs); i++ {
		names[i] = r.Contribs[i].RawName
	}
	fmt.Fprintf(&sb, "%s", strings.Join(names, ", "))
	if r.Title != "" {
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ". ")
		}
		fmt.Fprintf(&sb, `%s`, r.Title)
	}
	if len(r.Subtitle()) > 0 {
		fmt.Fprintf(&sb, ": %s", strings.Join(r.Subtitle(), " "))
	}
	if r.ContainerName != "" {
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ". ")
		}
		fmt.Fprintf(&sb, `%s`, r.ContainerName)
	}
	if r.Volume != "" {
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ", ")
		}
		fmt.Fprintf(&sb, `vol. %s`, r.Volume)
	}
	if r.Issue != "" {
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ", ")
		}
		fmt.Fprintf(&sb, `no. %s`, r.Issue)
	}
	if r.ReleaseYear() > 0 {
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ", ")
		}
		fmt.Fprintf(&sb, `%s`, r.ReleaseYearString())
	}
	if r.Pages != "" {
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ", ")
		}
		fmt.Fprintf(&sb, `pp. %s`, r.Pages)
	}
	if r.Publisher != "" {
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ", ")
		}
		fmt.Fprintf(&sb, `%s`, r.Publisher)
	}
	switch {
	case r.ExtIDs.DOI != "":
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ", ")
		}
		fmt.Fprintf(&sb, `%s`, r.ExtIDs.DOI)
	case len(r.ExtIDs.ISBN) > 0:
		if sb.Len() > 0 {
			fmt.Fprintf(&sb, ", ")
		}
		if isbn13, err := isbn.To13(r.ExtIDs.ISBN[0]); err == nil {
			fmt.Fprintf(&sb, `%s`, isbn13)
		} else {
			fmt.Fprintf(&sb, `%s`, r.ExtIDs.ISBN[0])
		}
	}
	return sb.String()
}

// ParseIsbn tries to find and validate ISBN from a string. Returns a
// list of unique, unsorted and validated ISBN13, e.g. 9780123838520.
func ParseIsbn(s string) []string {
	// Some example inputs:
	// ISBN: 10: 0137822693, pp: 373
	// Robotec, E. (1996). Scorbot ER VII, User's Manual, Eshed Robotec,
	// ISBN9652910333. Shannon, C. (1948). A Mathematical Theory of
	// Communication. The Bell System Technical Journal. July; October,
	// Vol. 27, pp. 379-423; 623-656.
	// Artech House, ISBN: 978-1-60807-201-9, 2011.
	// ...
	var (
		candidates10 = isbn10Regex.FindAllString(s, -1)
		candidates13 = isbn13Regex.FindAllString(s, -1)
		u            []rune
		z            string
		err          error
	)
	valid := setPool.Get().(set.Set)
	valid.Clear()
	defer setPool.Put(valid)
	for _, v := range append(candidates10, candidates13...) {
		u = rune16pool.Get().([]rune)
		u = u[:0]
		for _, c := range v {
			if c == 'O' {
				c = '0'
			}
			if c >= '0' && c <= '9' || c == 'X' || c == 'x' {
				u = append(u, c)
			}
		}
		z = string(u)
		rune16pool.Put(u)
		if !isbn.Validate(z) {
			continue
		}
		if len(z) < 12 {
			if z, err = isbn.To13(z); err != nil {
				continue
			}
		}
		valid.Add(z)
	}
	return valid.Slice()
}

// Release document. Note that we may have different types for some fields
// (e.g. string, int, etc.).  Mitigation for now is to make the field an
// interface{}, name the field "...Value" and to add a method with the field
// name, doing type assertion.  Example: ReleaseYearValue interface{},
// ReleaseYear() int, etc.
//
// Extra field gets a section for "skate" for conversion related values.
type Release struct {
	ContainerID   string `json:"container_id,omitempty"`
	ContainerName string `json:"container_name,omitempty"` // when not resolved
	Container     struct {
		ContainerType string `json:"container_type"`
		Ident         string `json:"ident"`
		Issnl         string `json:"issnl"`
		Name          string `json:"name"`
		Publisher     string `json:"publisher"`
		Revision      string `json:"revision"`
		State         string `json:"state"`
		WikidataQid   string `json:"wikidata_qid"`
	} `json:"container"`
	Contribs []struct {
		Index   int    `json:"index,omitempty"`
		RawName string `json:"raw_name,omitempty"`
		Role    string `json:"role,omitempty"`
	} `json:"contribs,omitempty"`
	ExtIDs struct {
		Arxiv       string   `json:"arxiv,omitempty"`
		Core        string   `json:"core,omitempty"`
		DOI         string   `json:"doi,omitempty"`
		ISBN        []string `json:"isbn,omitempty"` // should be isbn13
		Jstor       string   `json:"jstor,omitempty"`
		OLID        string   `json:"olid,omitempty"`
		PMCID       string   `json:"pmcid,omitempty"`
		PMID        string   `json:"pmid,omitempty"`
		WikidataQID string   `json:"wikidata_qid,omitempty"`
	} `json:"ext_ids,omitempty"`
	Ident     string `json:"ident,omitempty"`
	Publisher string `json:"publisher,omitempty"`
	Refs      []struct {
		ContainerName string `json:"container_name,omitempty"`
		Extra         struct {
			DOI     string   `json:"doi,omitempty"`
			Authors []string `json:"authors,omitempty"`
			Key     string   `json:"key,omitempty"`
			Year    string   `json:"year,omitempty"`
			Locator string   `json:"locator,omitempty"`
			Volume  string   `json:"volume,omitempty"`
		} `json:"extra"`
		Index   int64  `json:"index,omitempty"`
		Key     string `json:"key,omitempty"`
		Locator string `json:"locator,omitempty"`
		Year    int64  `json:"year,omitempty"`
	} `json:"refs,omitempty"`
	ReleaseDate      string      `json:"release_date,omitempty"`
	ReleaseYearValue interface{} `json:"release_year,omitempty"` // might be int or str
	ReleaseStage     string      `json:"release_stage,omitempty"`
	ReleaseType      string      `json:"release_type,omitempty"`
	Issue            string      `json:"issue,omitempty"`
	Volume           string      `json:"volume,omitempty"`
	Pages            string      `json:"pages,omitempty"`
	Title            string      `json:"title,omitempty"`
	WorkID           string      `json:"work_id,omitempty"`
	Extra            struct {
		ContainerName string      `json:"container_name,omitempty"`
		SubtitleValue interface{} `json:"subtitle,omitempty"` // []str or str
		Crossref      struct {
			Type string `json:"type,omitempty"`
		} `json:"crossref,omitempty"`
		DataCite struct {
			MetadataVersion int                `json:"metadataVersion,omitempty"`
			Relations       []DataCiteRelation `json:"relations,omitempty"`
		} `json:"datacite,omitempty"`
		// Anything in the "Skate" substruct should be considered private to
		// skate, nothing to depend upon outside this pipeline.
		Skate struct {
			// Mark as converted from "ref", "rg" or other schemas.
			Status string `json:"status,omitempty"`
			// Carry the ref index and key around.
			Ref struct {
				Index   int64  `json:"index,omitempty"`
				Key     string `json:"key,omitempty"`
				Locator string `json:"locator,omitempty"`
				Source  string `json:"source,omitempty"`
			} `json:"ref,omitempty"`
			ResearchGate struct {
				URL string `json:"url,omitempty"`
			} `json:"rg,omitempty"`
			// At a point, where we have a release that was a ref and we only
			// have a partial container name, we can include any discovered
			// container name here.
			ResolvedContainerName string `json:"resolved_container_name"`
		} `json:"skate,omitempty"`
		OpenLibrary struct {
			HasFulltext   bool     `json:"has_fulltext,omitempty"`
			WorkID        string   `json:"work,omitempty"`
			SourceRecords []string `json:"source_records,omitempty"`
		} `json:"ol,omitempty"`
	} `json:"extra,omitempty"`
}

// Subtitle returns a slice of subtitle strings.
func (r *Release) Subtitle() (result []string) {
	switch v := r.Extra.SubtitleValue.(type) {
	case []interface{}:
		for _, e := range v {
			result = append(result, fmt.Sprintf("%v", e))
		}
		return result
	case []string:
		return v
	case string:
		return []string{v}
	}
	return []string{}
}

// ReleaseYearString returns release year as string.
func (r *Release) ReleaseYearString() string {
	return fmt.Sprintf("%d", r.ReleaseYear())
}

// ReleaseYear returns year as int, no further validity checks.
func (r *Release) ReleaseYear() int {
	switch v := r.ReleaseYearValue.(type) {
	case int:
		return v
	case float64:
		return int(v)
	case string:
		w, err := strconv.Atoi(v)
		if err != nil {
			return 0
		}
		return w
	default:
		return 0
	}
}

// DataCiteRelation as it appears in the release extra field.
type DataCiteRelation struct {
	RelatedIdentifierType  string      `json:"relatedIdentifierType,omitempty"`
	RelatedIdentifierValue interface{} `json:"relatedIdentifier,omitempty"`
}

// RelatedIdentifier returns the identifier as string.
func (r *DataCiteRelation) RelatedIdentifier() string {
	switch v := r.RelatedIdentifierValue.(type) {
	case string:
		return v
	default:
		return fmt.Sprintf("%v", v)
	}
}

// SitemapEntry in a basic JSON style, e.g. for https://archive.org/details/rg_sitemap_2021_02_23.
type SitemapEntry struct {
	Lastmod string `json:"lastmod,omitempty"`
	Title   string `json:"title,omitempty"`
	URL     string `json:"url,omitempty"`
}

// BiblioRef as a prototype for citation graph elasticsearch indexing,
// https://is.gd/yicTom.
type BiblioRef struct {
	Key                    string `json:"_id,omitempty"`        // TODO: rename this to ID or something like that
	IndexedTs              string `json:"indexed_ts,omitempty"` // https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html
	SourceReleaseIdent     string `json:"source_release_ident,omitempty"`
	SourceWorkIdent        string `json:"source_work_ident,omitempty"`
	SourceWikipediaArticle string `json:"source_wikipedia_article,omitempty"`
	SourceReleaseStage     string `json:"source_release_stage,omitempty"`
	SourceYear             string `json:"source_year,omitempty"` // TODO: should this be source_release_year
	RefIndex               int64  `json:"ref_index,omitempty"`   // 1-based
	RefKey                 string `json:"ref_key,omitempty"`
	RefLocator             string `json:"ref_locator,omitempty"`
	TargetReleaseIdent     string `json:"target_release_ident,omitempty"`
	TargetWorkIdent        string `json:"target_work_ident,omitempty"`
	TargetOpenLibraryWork  string `json:"target_openlibrary_work,omitempty"`
	TargetURLSurt          string `json:"target_url_surt,omitempty"`
	TargetURL              string `json:"target_url,omitempty"`
	MatchProvenance        string `json:"match_provenance,omitempty"`
	MatchStatus            string `json:"match_status,omitempty"`
	MatchReason            string `json:"match_reason,omitempty"`
	TargetUnstructured     string `json:"target_unstructured,omitempty"`
	TargetCSL              *CSL   `json:"target_csl,omitempty"`
	SourceDOI              string `json:"source_doi,omitempty"`
	TargetDOI              string `json:"target_doi,omitempty"`
}

// CSL is a subset of citation style language schema.
// https://github.com/citation-style-language/schema,
// https://navneethg.github.io/jsonschemaviewer/. This is a subset only.
type CSL struct {
	Accessed            *CSLDate    `json:"accessed,omitempty"`
	Author              []CSLAuthor `json:"author,omitempty"`
	CollectionTitle     string      `json:"collection-title,omitempty"`
	ContainerTitle      string      `json:"container-title,omitempty"`
	ContainerTitleShort string      `json:"container-title-short,omitempty"`
	DOI                 string      `json:"DOI,omitempty"`
	ID                  string      `json:"id,omitempty"`
	ISBN                string      `json:"ISBN,omitempty"`
	ISSN                string      `json:"ISSN,omitempty"`
	Issue               string      `json:"issue,omitempty"`
	Issued              *CSLDate    `json:"issued,omitempty"`
	JournalAbbreviation string      `json:"journalAbbreviation,omitempty"`
	Language            string      `json:"language,omitempty"`
	NumberOfPages       string      `json:"number-of-pages,omitempty"`
	OriginalTitle       string      `json:"original-title,omitempty"`
	PMCID               string      `json:"PMCID,omitempty"`
	PMID                string      `json:"PMID,omitempty"`
	Page                string      `json:"page,omitempty"`
	PageFirst           string      `json:"page-first,omitempty"`
	Publisher           string      `json:"publisher,omitempty"`
	Source              string      `json:"source,omitempty"`
	Status              string      `json:"status,omitempty"`
	Title               string      `json:"title,omitempty"`
	TitleShort          string      `json:"title-short,omitempty"`
	Type                string      `json:"type,omitempty"`
	URL                 string      `json:"URL,omitempty"`
	Volume              string      `json:"volume,omitempty"`
	VolumeTitle         string      `json:"volume-title,omitempty"`
	VolumeTitleShort    string      `json:"volume-title-short,omitempty"`
	YearSuffix          string      `json:"year-suffix,omitempty"`
}

// A date field is a complex field that expresses a date or a range of dates.
// An example date field in CSL is issued, which identifies the date an item
// was issued or published. Date fields can be expressed in two different
// formats. The first format is an array format (note the double-nesting of the
// array). To express a date range in this format, the ending date would be set
// as a second array. The second date format is a raw string. The recommended
// encoding is a string that represents the date in a numberic year-month-day
// format.
// https://docs.citationstyles.org/en/stable/specification.html#date-part, in a
// json schema: https://git.io/J411z
//
// Raw might be removed in the future: https://discourse.citationstyles.org/t/raw-dates-vs-date-parts/1533/12
type CSLDate struct {
	Raw   string  `json:"raw,omitempty"`
	Parts [][]int `json:"date-parts,omitempty"`
}

func (c *CSLDate) String() string {
	if c.Raw != "" {
		return c.Raw
	}
	// The "date-parts" field has quite some spec around them, also some open
	// issues (e.g. schema allows for string and numbers, which might lead to
	// issues like: https://github.com/zotero/zotero/issues/1603).
	var renderField = func(v interface{}, padding int) string {
		switch padding {
		case 2:
			switch w := v.(type) {
			case string:
				return w
			case int:
				return fmt.Sprintf("%02d", v)
			default:
				return fmt.Sprintf("%v", v)
			}
		default:
			switch w := v.(type) {
			case string:
				return w
			case int:
				return fmt.Sprintf("%d", v)
			default:
				return fmt.Sprintf("%v", v)
			}
		}
	}
	switch {
	case len(c.Parts) == 1:
		switch len(c.Parts[0]) {
		case 0:
			return ""
		case 1:
			return renderField(c.Parts[0][0], 4)
		case 2:
			v := fmt.Sprintf("%s-%s",
				renderField(c.Parts[0][0], 4),
				renderField(c.Parts[0][1], 2),
			)
			// Assume year, month for the moment.
			if yearMonthPat.MatchString(v) {
				return v
			}
		case 3:
			v := fmt.Sprintf("%s-%s-%s",
				renderField(c.Parts[0][0], 4),
				renderField(c.Parts[0][1], 2),
				renderField(c.Parts[0][2], 2))
			// Assume year, month, day for the moment.
			if yearMonthDayPat.MatchString(v) {
				return v
			}
		}
	}
	return ""
}

// Some personal names are represented by a single field (e.g. mononyms such as
// “Prince” or “Plato”). In such cases, the name can be delivered as a lone
// family element. Institutional names may be delivered in the same way, but it
// is preferred to set them instead as a literal element.
//
// We include RawName, for holding unparsed name, which is not a literal.
type CSLAuthor struct {
	Family  string `json:"family,omitempty"`
	Given   string `json:"given,omitempty"`
	Literal string `json:"literal,omitempty"`
	RawName string `json:"raw_name,omitempty"`
}

func (b *BiblioRef) Reset() {
	b.Key = ""
	b.IndexedTs = ""
	b.SourceReleaseIdent = ""
	b.SourceWorkIdent = ""
	b.SourceWikipediaArticle = ""
	b.SourceReleaseStage = ""
	b.SourceYear = ""
	b.RefIndex = 0
	b.RefKey = ""
	b.RefLocator = ""
	b.TargetReleaseIdent = ""
	b.TargetWorkIdent = ""
	b.TargetOpenLibraryWork = ""
	b.TargetURLSurt = ""
	b.TargetURL = ""
	b.MatchProvenance = ""
	b.MatchStatus = ""
	b.MatchReason = ""
	b.TargetUnstructured = ""
	b.TargetCSL = nil
}

// LinkHash returns a string that will be the same, if source and target are
// equal; different otherwise. This can be used to detect duplicate links. This
// should always return a non-empty string, but would return an empty string,
// if it would fail.
func (b *BiblioRef) LinkHash() string {
	switch {
	case b.SourceReleaseIdent != "" && b.TargetReleaseIdent != "":
		return fmt.Sprintf("fc:%s--fc:%s", b.SourceReleaseIdent, b.TargetReleaseIdent)
	case b.SourceReleaseIdent != "" && b.TargetOpenLibraryWork != "":
		return fmt.Sprintf("fc:%s--ol:%s", b.SourceReleaseIdent, b.TargetOpenLibraryWork)
	case b.SourceReleaseIdent != "" && b.TargetURL != "":
		return fmt.Sprintf("fc:%s--wb:%s", b.SourceReleaseIdent, b.TargetURL)
	case b.SourceReleaseIdent != "" && b.TargetURLSurt != "":
		return fmt.Sprintf("fc:%s--wb:%s", b.SourceReleaseIdent, b.TargetURLSurt)
	default:
		var (
			buf bytes.Buffer
			enc = gob.NewEncoder(&buf)
			h   = sha1.New()
		)
		if err := enc.Encode(b); err != nil {
			return ""
		}
		if _, err := buf.WriteTo(h); err != nil {
			return ""
		}
		return fmt.Sprintf("%x", h.Sum(nil))
	}
}

// MinimalCitations variant from
// archive.org/details/wikipedia_citations_2020-07-14. Part of the naming was
// already inconsistent.
type MinimalCitations struct {
	IDList         string `json:"ID_list"`
	PageTitle      string `json:"page_title"`
	Title          string `json:"Title"`
	TypeOfCitation string `json:"type_of_citation"`
	// We may have other languages in the future. If this is non, "en" might be
	// used as default.
	Language string `json:"lang"`
}

// IDList with commonly used identifier from wikipedia citations.
type IDList struct {
	ISBN  string `json:"isbn,omitempty"`
	DOI   string `json:"doi,omitempty"`
	PMID  string `json:"pmid,omitempty"`
	ISSN  string `json:"issn,omitempty"`
	JSTOR string `json:"jstor,omitempty"`
	PMC   string `json:"pmc,omitempty"`
	ARXIV string `json:"arxiv,omitempty"`
	OL    string `json:"ol,omitempty"`
}

// IsZero returns true, if none of the identifiers is defined.
func (l *IDList) IsZero() bool {
	return *l == IDList{}
}

// ParseIDList parses out the identifiers from a citation document, the IDList
// values look something like this: "{BIBCODE=1992ApJ...399L..31C,
// DOI=10.1086/186599}".
func (c *MinimalCitations) ParseIDList() (result IDList) {
	if len(c.IDList) < 3 {
		return result
	}
	var (
		s     = c.IDList[1 : len(c.IDList)-1] // get rid of "{" and "}"
		parts = strings.Split(s, ",")
		pair  []string
	)
	for _, part := range parts {
		pair = strings.Split(part, "=")
		if len(pair) != 2 {
			continue
		}
		pair[0] = strings.TrimSpace(pair[0])
		pair[1] = strings.TrimSpace(pair[1])
		switch pair[0] {
		case "ISBN":
			result.ISBN = pair[1]
		case "DOI":
			result.DOI = SanitizeDOI(pair[1])
		case "PMID":
			result.PMID = pair[1]
		case "ISSN":
			result.ISSN = pair[1]
		case "PMC":
			result.PMC = pair[1]
		case "JSTOR":
			result.JSTOR = pair[1]
		case "ARXIV":
			result.ARXIV = pair[1]
		case "OL":
			result.OL = pair[1]
		default:
			continue
		}
	}
	return result
}

// OpenLibrarySolrDoc from data dump (solr). Note: we most likely only need OL
// editions.
type OpenLibrarySolrDoc struct {
	AuthorFacet      []string `json:"author_facet"`
	AuthorKey        []string `json:"author_key"`
	AuthorName       []string `json:"author_name"`
	CoverEditionKey  string   `json:"cover_edition_key"`
	CoverI           int64    `json:"cover_i"`
	EbookCountI      int64    `json:"ebook_count_i"`
	EditionCount     int64    `json:"edition_count"`
	EditionKey       []string `json:"edition_key"`
	FirstPublishYear int64    `json:"first_publish_year"`
	HasFulltext      bool     `json:"has_fulltext"`
	IdGoodreads      []string `json:"id_goodreads"`
	IdLibrarything   []string `json:"id_librarything"`
	Isbn             []string `json:"isbn"`
	Key              string   `json:"key"`
	Language         []string `json:"language"`
	LastModifiedI    int64    `json:"last_modified_i"`
	PublishDate      []string `json:"publish_date"`
	PublishYear      []int64  `json:"publish_year"`
	Publisher        []string `json:"publisher"`
	PublisherFacet   []string `json:"publisher_facet"`
	Seed             []string `json:"seed"`
	Text             []string `json:"text"`
	Title            string   `json:"title"`
	TitleSuggest     []string `json:"title_suggest"`
	Type             string   `json:"type"`
	Version          int64    `json:"_version_"`
}

// OpenLibrarySolrDocToRelease convert OL data into a release.
func OpenLibrarySolrDocToRelease(w *OpenLibrarySolrDoc) (*Release, error) {
	var (
		release  Release
		contribs = make([]struct {
			Index   int    `json:"index,omitempty"`
			RawName string `json:"raw_name,omitempty"`
			Role    string `json:"role,omitempty"`
		}, len(w.AuthorName))
		s = set.New()
	)
	for i, author := range w.AuthorName {
		contribs[i].RawName = author
	}
	release.Title = w.Title
	if len(w.PublishYear) > 0 {
		release.ReleaseYearValue = w.FirstPublishYear
	}
	for _, v := range w.Isbn {
		switch {
		case len(v) < 13:
			if w, err := isbn.To13(v); err == nil {
				s.Add(w)
			}
		default:
			s.Add(v)
		}
	}
	if len(w.Publisher) > 0 {
		release.Publisher = w.Publisher[0]
	}
	release.ExtIDs.ISBN = s.Slice()
	release.ExtIDs.OLID = strings.Replace(w.Key, "/works/", "", 1)
	release.Extra.OpenLibrary.HasFulltext = w.HasFulltext
	return &release, nil
}

// OpenLibraryEdition document, see: https://openlibrary.org/developers/dumps.
type OpenLibraryEdition struct {
	Authors []struct {
		Key string `json:"key"`
	} `json:"authors"`
	ByStatement string  `json:"by_statement"`
	Covers      []int64 `json:"covers"`
	Created     struct {
		Type  string `json:"type"`
		Value string `json:"value"`
	} `json:"created"`
	DeweyDecimalClass []string `json:"dewey_decimal_class"`
	Genres            []string `json:"genres"`
	Identifiers       struct {
		Goodreads    []string `json:"goodreads"`
		Librarything []string `json:"librarything"`
	} `json:"identifiers"`
	Isbn10    []string `json:"isbn_10"`
	Isbn13    []string `json:"isbn_13"`
	Key       string   `json:"key"`
	Languages []struct {
		Key string `json:"key"`
	} `json:"languages"`
	LastModified struct {
		Type  string `json:"type"`
		Value string `json:"value"`
	} `json:"last_modified"`
	LatestRevision    int64    `json:"latest_revision"`
	LcClassifications []string `json:"lc_classifications"`
	Lccn              []string `json:"lccn"`

	// null
	// {
	//   "type": "/type/text",
	//   "value": "Includes bibliographical references (p. 137-143)."
	// }
	// null
	// "Includes bibliographical references (p. 203-205) and index."
	// null
	Notes interface{} `json:"notes"`

	NumberOfPages  int64    `json:"number_of_pages"`
	Ocaid          string   `json:"ocaid"`
	Pagination     string   `json:"pagination"`
	PublishCountry string   `json:"publish_country"`
	PublishDate    string   `json:"publish_date"`
	PublishPlaces  []string `json:"publish_places"`
	Publishers     []string `json:"publishers"`
	Revision       int64    `json:"revision"`
	SourceRecords  []string `json:"source_records"`
	SubjectPlace   []string `json:"subject_place"`
	Subjects       []string `json:"subjects"`
	Subtitle       string   `json:"subtitle"`
	Title          string   `json:"title"`
	Type           struct {
		Key string `json:"key"`
	} `json:"type"`
	Works []struct {
		Key string `json:"key"`
	} `json:"works"`
}

// Isbns returns all found ISBN: unique, sorted, non-normalized.
func (v OpenLibraryEdition) Isbns() []string {
	s := set.New()
	for _, w := range v.Isbn10 {
		s.Add(w)
	}
	for _, w := range v.Isbn13 {
		s.Add(w)
	}
	return s.Sorted()
}

// OpenLibraryEditionToRelease convert OL edition record into a release. Takes
// a (potentially empty) author map to replace author keys (like
// "/author/OL18273A") with names.
func OpenLibraryEditionToRelease(v *OpenLibraryEdition, authorMap map[string]string) (*Release, error) {
	var (
		release  Release
		contribs = make([]struct {
			Index   int    `json:"index,omitempty"`
			RawName string `json:"raw_name,omitempty"`
			Role    string `json:"role,omitempty"`
		}, len(v.Authors))
	)
	for i, author := range v.Authors {
		// /authors/OL1076839A     Ralph S. Hattox
		// /authors/OL1054947A     Conferenza economica dell'area pisana (1984 Pisa, Italy)
		// /authors/OL1069856A     Terry S. Boutet
		// /authors/OL1077217A     Pamela Beck
		//  ...
		name, ok := authorMap[author.Key]
		if !ok {
			continue
		}
		contribs[i].RawName = name
	}
	release.Ident = v.Key
	release.Contribs = contribs
	if v.Subtitle != "" {
		release.Title = fmt.Sprintf("%s: %s", v.Title, v.Subtitle)
		release.Extra.SubtitleValue = v.Subtitle
	} else {
		release.Title = v.Title
	}
	release.ExtIDs.ISBN = v.Isbns()
	if len(v.Publishers) > 0 {
		release.Publisher = v.Publishers[0]
	}
	if len(v.Works) > 0 {
		// TODO: We should be ok with just the basename, e.g. just "OL10896658M"
		release.Extra.OpenLibrary.WorkID = v.Works[0].Key
		release.WorkID = v.Works[0].Key
	}
	// e.g.
	// "source_records": [
	//   "amazon:0531203093",
	//   "ia:multiplebirths0000land",
	//   "marc:marc_loc_2016/BooksAll.2016.part25.utf8:104915596:921"
	// ]
	release.Extra.OpenLibrary.SourceRecords = v.SourceRecords
	for _, l := range openLibraryDateLayouts {
		t, err := time.Parse(l, v.PublishDate)
		if err != nil {
			continue
		}
		release.ReleaseYearValue = t.Year()
		break
	}
	return &release, nil
}

// ShortenOpenLibraryIdentifier shortens an id like "/books/OL10899962M" to
// "OL10899962M" or returns the same string, if no simplifications can be made.
// TODO: add test.
func ShortenOpenLibraryIdentifier(s string) string {
	fields := strings.Split(s, "/")
	for _, f := range fields {
		if strings.HasPrefix(f, "OL") {
			return f
		}
	}
	return s
}