diff options
Diffstat (limited to 'python')
| -rwxr-xr-x | python/fatcat_export.py | 61 | ||||
| -rw-r--r-- | python/fatcat_tools/__init__.py | 2 | ||||
| -rw-r--r-- | python/fatcat_tools/transforms.py | 165 | 
3 files changed, 226 insertions, 2 deletions
| diff --git a/python/fatcat_export.py b/python/fatcat_export.py index 33e23202..a9d46142 100755 --- a/python/fatcat_export.py +++ b/python/fatcat_export.py @@ -10,12 +10,19 @@ without permission by an third party.  import sys  import json  import argparse + +from citeproc import CitationStylesStyle, CitationStylesBibliography +from citeproc import Citation, CitationItem +from citeproc import formatter +from citeproc.source.json import CiteProcJSON +from citeproc_styles import get_style_filepath +  import fatcat_client  from fatcat_client.rest import ApiException  from fatcat_client import ReleaseEntity, ContainerEntity, ChangelogEntry  from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \      release_to_elasticsearch, container_to_elasticsearch, \ -    changelog_to_elasticsearch, public_api +    changelog_to_elasticsearch, public_api, release_to_csl  def run_export_releases(args): @@ -52,6 +59,43 @@ def run_transform_changelogs(args):          args.json_output.write(              json.dumps(changelog_to_elasticsearch(entity)) + '\n') +def run_citeproc_releases(args): +    for line in args.json_input: +        line = line.strip() +        if not line: +            continue +        entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client) +        csl_json = release_to_csl(entity) +        # XXX: +        csl_json['id'] = "release:" + (entity.ident or "unknown") +        if args.style == "csl-json": +            args.json_output.write(json.dumps(csl_json) + "\n") +            continue +        bib_src = CiteProcJSON([csl_json]) +        form = formatter.plain +        if args.html: +            form = formatter.html +        style_path = get_style_filepath(args.style) +        bib_style = CitationStylesStyle(style_path, validate=False) +        bib = CitationStylesBibliography(bib_style, bib_src, form) +        bib.register(Citation([CitationItem(csl_json['id'])])) +        # XXX: +        #args.json_output.write( +        #    json.dumps(release_to_csl(entity)) + '\n') +        lines = bib.bibliography()[0] +        if args.style == "bibtex": +            for l in lines: +                if l.startswith(" @"): +                    args.json_output.write("\n@") +                elif l.startswith(" "): +                    #print("line: START|{}|END".format(l)) +                    args.json_output.write("\n  " + l) +                else: +                    args.json_output.write(l) +        else: +            args.json_output.write(''.join(lines) + "\n") +        print() +  def run_export_changelog(args):      end = args.end      if end is None: @@ -109,6 +153,21 @@ def main():          help="where to send output",          default=sys.stdout, type=argparse.FileType('w')) +    sub_citeproc_releases = subparsers.add_parser('citeproc-releases') +    sub_citeproc_releases.set_defaults(func=run_citeproc_releases) +    sub_citeproc_releases.add_argument('json_input', +        help="JSON-per-line of release entities", +        default=sys.stdin, type=argparse.FileType('r')) +    sub_citeproc_releases.add_argument('json_output', +        help="where to send output", +        default=sys.stdout, type=argparse.FileType('w')) +    sub_citeproc_releases.add_argument('--style', +        help="citation style to output", +        default='csl-json') +    sub_citeproc_releases.add_argument('--html', +        action='store_true', +        help="output HTML, not plain text") +      sub_changelog = subparsers.add_parser('changelog')      sub_changelog.set_defaults(func=run_export_changelog)      sub_changelog.add_argument('--start', diff --git a/python/fatcat_tools/__init__.py b/python/fatcat_tools/__init__.py index 64c45062..c72ccd47 100644 --- a/python/fatcat_tools/__init__.py +++ b/python/fatcat_tools/__init__.py @@ -3,4 +3,4 @@ from .api_auth import authenticated_api, public_api  from .fcid import fcid2uuid, uuid2fcid  from .transforms import entity_to_dict, entity_from_json, \      release_to_elasticsearch, container_to_elasticsearch, \ -    changelog_to_elasticsearch +    changelog_to_elasticsearch, release_to_csl diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py index 0e5afd6a..f49b5ac9 100644 --- a/python/fatcat_tools/transforms.py +++ b/python/fatcat_tools/transforms.py @@ -319,6 +319,171 @@ def container_to_elasticsearch(entity, force_bool=True):      return t +def contribs_by_role(contribs, role): +    ret = [c.copy() for c in contribs if c['role'] == role] +    [c.pop('role') for c in ret] +    # XXX: +    [c.pop('literal') for c in ret] +    if not ret: +        return None +    else: +        return ret + + +def release_to_csl(entity): +    """ +    Returns a python dict which can be json.dumps() to get a CSL-JSON (aka, +    citeproc-JSON, aka Citation Style Language JSON) + +    This function will likely become an API method/endpoint + +    Follows, but not enforced by: https://github.com/citation-style-language/schema/blob/master/csl-data.json +    """ +    contribs = [] +    for contrib in (entity.contribs or []): +        if contrib.creator: +            # TODO: should we actually be pulling creator metadata? or just +            # using release-local raw metadata? +            c = dict( +                family=contrib.creator.surname, +                given=contrib.creator.given_name, +                #dropping-particle +                #non-dropping-particle +                #suffix +                #comma-suffix +                #static-ordering +                literal=contrib.raw_name, # or display_name? +                #parse-names, +                role=contrib.role, +            ) +        else: +            c = dict( +                # XXX: possible inclusion of full name metadata in release_contrib +                family=contrib.raw_name.split()[-1], +                literal=contrib.raw_name, +                role=contrib.role, +            ) +        for k in list(c.keys()): +            if not c[k]: +                c.pop(k) +        contribs.append(c) +    abstract = None +    if entity.abstracts: +        abstract = entity.abstracts[0].content + +    issued_date = None +    if entity.release_date: +        issued_date = {"date-parts": [[ +            entity.release_date.year, +            entity.release_date.month, +            entity.release_date.day, +        ]]} +    elif entity.release_year: +        issued_date = {"date-parts": [[entity.release_year]]} + +    csl = dict( +        #id, +        #categories +        type=entity.release_type or "article", # XXX: can't be blank +        language=entity.language, +        #journalAbbreviation +        #shortTitle +        ## see below for all contrib roles +        #accessed +        #container +        #event-date +        issued=issued_date, +        #original-date +        #submitted +        abstract=abstract, +        #annote +        #archive +        #archive_location +        #archive-place +        #authority +        #call-number +        #chapter-number +        #citation-number +        #citation-label +        #collection-number +        #collection-title +        container_title=entity.container and entity.container.name, +        #container-title-short +        #dimensions +        DOI=entity.doi, +        #edition +        #event +        #event-place +        #first-reference-note-number +        #genre +        ISBN=entity.isbn13, +        ISSN=entity.container and entity.container.issnl, +        issue=entity.issue, +        #jurisdiction +        #keyword +        #locator +        #medium +        #note +        #number +        #number-of-pages +        #number-of-volumes +        #original-publisher +        #original-publisher-place +        #original-title +        # XXX: page=entity.pages, +        page_first=entity.pages.split('-')[0], +        PMCID=entity.pmcid, +        PMID=entity.pmid, +        publisher=(entity.container and entity.container.publisher) or entity.publisher, +        #publisher-place +        #references +        #reviewed-title +        #scale +        #section +        #source +        #status +        title=entity.title, +        #title-short +        #URL +        #version +        volume=entity.volume, +        #year-suffix +    ) +    for role in ['author', 'collection-editor', 'composer', 'container-author', +            'director', 'editor', 'editorial-director', 'interviewer', +            'illustrator', 'original-author', 'recipient', 'reviewed-author', +            'translator']: +        cbr = contribs_by_role(contribs, role) +        if cbr: +            csl[role] = cbr +    # underline-to-dash +    csl['container-title'] = csl.pop('container_title') +    csl['page-first'] = csl.pop('page_first') +    empty_keys = [k for k,v in csl.items() if not v] +    for k in empty_keys: +        csl.pop(k) +    return csl + + +def refs_to_csl(entity): +    ret = [] +    for ref in entity.refs: +        if ref.release_id and False: +            # TODO: fetch full entity from API and convert with release_to_csl +            raise NotImplementedError +        else: +            issued_date = None +            if ref.year: +                issued_date = [[ref.year]] +            csl = dict( +                title=ref.title, +                issued=issued_date, +            ) +        csl['id'] = ref.key or ref.index, # zero- or one-indexed? +        ret.append(csl) +    return ret + +  def changelog_to_elasticsearch(entity):      editgroup = entity.editgroup | 
