#!/usr/bin/env python3

import datetime
import json
import sys

MAX_ABSTRACT_BYTES = 4096


def parse_grobid_json(obj):

    if not obj.get("title"):
        return None

    extra = dict()

    if obj.get("abstract") and len(obj.get("abstract")) < MAX_ABSTRACT_BYTES:
        abobj = dict(mimetype="text/plain", language=None, content=obj.get("abstract").strip())
        abstracts = [abobj]
    else:
        abstracts = None

    contribs = []
    for a in obj.get("authors", []):
        c = dict(raw_name=a, role="author")
        contribs.append(c)

    refs = []
    for raw in obj.get("citations", []):
        extra = dict()
        ref = dict()
        ref["key"] = raw.get("id")
        if raw.get("title"):
            ref["title"] = raw["title"].strip()
        if raw.get("date"):
            try:
                year = int(raw["date"].strip()[:4])
                ref["year"] = year
            except:
                pass
        for key in ("volume", "url", "issue", "publisher"):
            if raw.get(key):
                extra[key] = raw[key].strip()
        if raw.get("authors"):
            extra["authors"] = [a["name"] for a in raw["authors"]]
        if extra:
            extra = dict(grobid=extra)
        else:
            extra = None
        ref["extra"] = extra
        refs.append(ref)

    release_type = "journal-article"
    release_date = None
    if obj.get("date"):
        # TODO: only returns year, ever? how to handle?
        release_date = datetime.datetime(year=obj["date"], month=1, day=1)

    if obj.get("doi"):
        extra["doi"] = obj["doi"].lower()
    if obj["journal"].get("name"):
        extra["container_name"] = obj["journal"]["name"]

    extra["is_longtail_oa"] = True

    # TODO: ISSN/eISSN handling? or just journal name lookup?

    if extra:
        extra = dict(grobid=extra)
    else:
        extra = None

    return dict(
        title=obj["title"].strip(),
        contribs=contribs,
        publisher=obj["journal"].get("publisher"),
        volume=obj["journal"].get("volume"),
        issue=obj["journal"].get("issue"),
        abstracts=abstracts,
        release_type=release_type,
        release_date=release_date,
        extra=extra,
    )


def run():
    for line in sys.stdin:
        obj = json.loads(line)
        out = parse_grobid_json(obj)
        if out:
            print(out)


if __name__ == "__main__":
    run()