aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-03-11 15:31:25 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-03-11 16:05:45 -0700
commit180328d25549ef5517d4870f1069c790cabf1c71 (patch)
treea937115f39dddb917c93081487dc4cc40a472061 /python
parent318ba16a98d77b87c76bb353e87aeffbe79b7176 (diff)
downloadfatcat-180328d25549ef5517d4870f1069c790cabf1c71.tar.gz
fatcat-180328d25549ef5517d4870f1069c790cabf1c71.zip
basic demo CSL/citeproc transform code
Needs tests
Diffstat (limited to 'python')
-rwxr-xr-xpython/fatcat_export.py61
-rw-r--r--python/fatcat_tools/__init__.py2
-rw-r--r--python/fatcat_tools/transforms.py165
3 files changed, 226 insertions, 2 deletions
diff --git a/python/fatcat_export.py b/python/fatcat_export.py
index 33e23202..a9d46142 100755
--- a/python/fatcat_export.py
+++ b/python/fatcat_export.py
@@ -10,12 +10,19 @@ without permission by an third party.
import sys
import json
import argparse
+
+from citeproc import CitationStylesStyle, CitationStylesBibliography
+from citeproc import Citation, CitationItem
+from citeproc import formatter
+from citeproc.source.json import CiteProcJSON
+from citeproc_styles import get_style_filepath
+
import fatcat_client
from fatcat_client.rest import ApiException
from fatcat_client import ReleaseEntity, ContainerEntity, ChangelogEntry
from fatcat_tools import uuid2fcid, entity_from_json, entity_to_dict, \
release_to_elasticsearch, container_to_elasticsearch, \
- changelog_to_elasticsearch, public_api
+ changelog_to_elasticsearch, public_api, release_to_csl
def run_export_releases(args):
@@ -52,6 +59,43 @@ def run_transform_changelogs(args):
args.json_output.write(
json.dumps(changelog_to_elasticsearch(entity)) + '\n')
+def run_citeproc_releases(args):
+ for line in args.json_input:
+ line = line.strip()
+ if not line:
+ continue
+ entity = entity_from_json(line, ReleaseEntity, api_client=args.api.api_client)
+ csl_json = release_to_csl(entity)
+ # XXX:
+ csl_json['id'] = "release:" + (entity.ident or "unknown")
+ if args.style == "csl-json":
+ args.json_output.write(json.dumps(csl_json) + "\n")
+ continue
+ bib_src = CiteProcJSON([csl_json])
+ form = formatter.plain
+ if args.html:
+ form = formatter.html
+ style_path = get_style_filepath(args.style)
+ bib_style = CitationStylesStyle(style_path, validate=False)
+ bib = CitationStylesBibliography(bib_style, bib_src, form)
+ bib.register(Citation([CitationItem(csl_json['id'])]))
+ # XXX:
+ #args.json_output.write(
+ # json.dumps(release_to_csl(entity)) + '\n')
+ lines = bib.bibliography()[0]
+ if args.style == "bibtex":
+ for l in lines:
+ if l.startswith(" @"):
+ args.json_output.write("\n@")
+ elif l.startswith(" "):
+ #print("line: START|{}|END".format(l))
+ args.json_output.write("\n " + l)
+ else:
+ args.json_output.write(l)
+ else:
+ args.json_output.write(''.join(lines) + "\n")
+ print()
+
def run_export_changelog(args):
end = args.end
if end is None:
@@ -109,6 +153,21 @@ def main():
help="where to send output",
default=sys.stdout, type=argparse.FileType('w'))
+ sub_citeproc_releases = subparsers.add_parser('citeproc-releases')
+ sub_citeproc_releases.set_defaults(func=run_citeproc_releases)
+ sub_citeproc_releases.add_argument('json_input',
+ help="JSON-per-line of release entities",
+ default=sys.stdin, type=argparse.FileType('r'))
+ sub_citeproc_releases.add_argument('json_output',
+ help="where to send output",
+ default=sys.stdout, type=argparse.FileType('w'))
+ sub_citeproc_releases.add_argument('--style',
+ help="citation style to output",
+ default='csl-json')
+ sub_citeproc_releases.add_argument('--html',
+ action='store_true',
+ help="output HTML, not plain text")
+
sub_changelog = subparsers.add_parser('changelog')
sub_changelog.set_defaults(func=run_export_changelog)
sub_changelog.add_argument('--start',
diff --git a/python/fatcat_tools/__init__.py b/python/fatcat_tools/__init__.py
index 64c45062..c72ccd47 100644
--- a/python/fatcat_tools/__init__.py
+++ b/python/fatcat_tools/__init__.py
@@ -3,4 +3,4 @@ from .api_auth import authenticated_api, public_api
from .fcid import fcid2uuid, uuid2fcid
from .transforms import entity_to_dict, entity_from_json, \
release_to_elasticsearch, container_to_elasticsearch, \
- changelog_to_elasticsearch
+ changelog_to_elasticsearch, release_to_csl
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py
index 0e5afd6a..f49b5ac9 100644
--- a/python/fatcat_tools/transforms.py
+++ b/python/fatcat_tools/transforms.py
@@ -319,6 +319,171 @@ def container_to_elasticsearch(entity, force_bool=True):
return t
+def contribs_by_role(contribs, role):
+ ret = [c.copy() for c in contribs if c['role'] == role]
+ [c.pop('role') for c in ret]
+ # XXX:
+ [c.pop('literal') for c in ret]
+ if not ret:
+ return None
+ else:
+ return ret
+
+
+def release_to_csl(entity):
+ """
+ Returns a python dict which can be json.dumps() to get a CSL-JSON (aka,
+ citeproc-JSON, aka Citation Style Language JSON)
+
+ This function will likely become an API method/endpoint
+
+ Follows, but not enforced by: https://github.com/citation-style-language/schema/blob/master/csl-data.json
+ """
+ contribs = []
+ for contrib in (entity.contribs or []):
+ if contrib.creator:
+ # TODO: should we actually be pulling creator metadata? or just
+ # using release-local raw metadata?
+ c = dict(
+ family=contrib.creator.surname,
+ given=contrib.creator.given_name,
+ #dropping-particle
+ #non-dropping-particle
+ #suffix
+ #comma-suffix
+ #static-ordering
+ literal=contrib.raw_name, # or display_name?
+ #parse-names,
+ role=contrib.role,
+ )
+ else:
+ c = dict(
+ # XXX: possible inclusion of full name metadata in release_contrib
+ family=contrib.raw_name.split()[-1],
+ literal=contrib.raw_name,
+ role=contrib.role,
+ )
+ for k in list(c.keys()):
+ if not c[k]:
+ c.pop(k)
+ contribs.append(c)
+ abstract = None
+ if entity.abstracts:
+ abstract = entity.abstracts[0].content
+
+ issued_date = None
+ if entity.release_date:
+ issued_date = {"date-parts": [[
+ entity.release_date.year,
+ entity.release_date.month,
+ entity.release_date.day,
+ ]]}
+ elif entity.release_year:
+ issued_date = {"date-parts": [[entity.release_year]]}
+
+ csl = dict(
+ #id,
+ #categories
+ type=entity.release_type or "article", # XXX: can't be blank
+ language=entity.language,
+ #journalAbbreviation
+ #shortTitle
+ ## see below for all contrib roles
+ #accessed
+ #container
+ #event-date
+ issued=issued_date,
+ #original-date
+ #submitted
+ abstract=abstract,
+ #annote
+ #archive
+ #archive_location
+ #archive-place
+ #authority
+ #call-number
+ #chapter-number
+ #citation-number
+ #citation-label
+ #collection-number
+ #collection-title
+ container_title=entity.container and entity.container.name,
+ #container-title-short
+ #dimensions
+ DOI=entity.doi,
+ #edition
+ #event
+ #event-place
+ #first-reference-note-number
+ #genre
+ ISBN=entity.isbn13,
+ ISSN=entity.container and entity.container.issnl,
+ issue=entity.issue,
+ #jurisdiction
+ #keyword
+ #locator
+ #medium
+ #note
+ #number
+ #number-of-pages
+ #number-of-volumes
+ #original-publisher
+ #original-publisher-place
+ #original-title
+ # XXX: page=entity.pages,
+ page_first=entity.pages.split('-')[0],
+ PMCID=entity.pmcid,
+ PMID=entity.pmid,
+ publisher=(entity.container and entity.container.publisher) or entity.publisher,
+ #publisher-place
+ #references
+ #reviewed-title
+ #scale
+ #section
+ #source
+ #status
+ title=entity.title,
+ #title-short
+ #URL
+ #version
+ volume=entity.volume,
+ #year-suffix
+ )
+ for role in ['author', 'collection-editor', 'composer', 'container-author',
+ 'director', 'editor', 'editorial-director', 'interviewer',
+ 'illustrator', 'original-author', 'recipient', 'reviewed-author',
+ 'translator']:
+ cbr = contribs_by_role(contribs, role)
+ if cbr:
+ csl[role] = cbr
+ # underline-to-dash
+ csl['container-title'] = csl.pop('container_title')
+ csl['page-first'] = csl.pop('page_first')
+ empty_keys = [k for k,v in csl.items() if not v]
+ for k in empty_keys:
+ csl.pop(k)
+ return csl
+
+
+def refs_to_csl(entity):
+ ret = []
+ for ref in entity.refs:
+ if ref.release_id and False:
+ # TODO: fetch full entity from API and convert with release_to_csl
+ raise NotImplementedError
+ else:
+ issued_date = None
+ if ref.year:
+ issued_date = [[ref.year]]
+ csl = dict(
+ title=ref.title,
+ issued=issued_date,
+ )
+ csl['id'] = ref.key or ref.index, # zero- or one-indexed?
+ ret.append(csl)
+ return ret
+
+
def changelog_to_elasticsearch(entity):
editgroup = entity.editgroup