#!/usr/bin/env python3 """ License: MIT Author: Bryan Newbold Date: July 2017 See README.md and LICENSE. """ from __future__ import print_function import re import json import sys, os import difflib import argparse import requests import subprocess import logging as log DEFAULT_HEADER = """This page was generated automatically from Markdown using the 'divergence' tool. Edits will need to be merged manually.""" class DivergenceProgram: def __init__(self, user, password, url, space, force_update=False, include_toc=False, header=None, no_header=False): self.api = requests.Session() self.api.auth = (user, password) self.api.headers.update({'Content-Type': 'application/json'}) self.base_url = url self.default_space = space self.force_update = force_update self.include_toc = include_toc self.header = header # from command-line arg self.no_header = no_header # from command-line arg # TODO: clean up this code duplication... use pandoc data directory # instead? self.pandoc_helper_path = None for p in ('./pandoc_confluence.lua', '/usr/local/lib/divergence/pandoc_confluence.lua', '/usr/lib/divergence/pandoc_confluence.lua'): if os.path.exists(p): self.pandoc_helper_path = p break if self.pandoc_helper_path is None: log.error("Could not find pandoc helper (pandoc_confluence.lua), bailing") sys.exit(-1) self.pandoc_meta_path = None for p in ('./meta-json.template', '/usr/local/lib/divergence/meta-json.template', '/usr/lib/divergence/meta-json.template'): if os.path.exists(p): self.pandoc_meta_path = p break if self.pandoc_meta_path is None: log.error("Could not find pandoc helper (meta-json.template), bailing") sys.exit(-1) def get_page(self, title, space_key=None, page_id=None): """ Returns None if not found, otherwise a dict with id, space, and body (in storage format) """ if space_key is None: space_key = self.default_space if not page_id: resp = self.api.get(self.base_url + "/rest/api/content", params={"spaceKey": space_key, "title": title, "expand": "body.storage,body.editor,version,space", "type": "page"}) else: resp = self.api.get(self.base_url + "/rest/api/content/%d" % int(page_id), params={"expand": "body.storage,body.editor,version,space", "type": "page"}) log.debug(resp) log.debug(resp.content) assert resp.status_code == 200 respj = resp.json() if not page_id: if respj['size'] == 0: assert page_id is None, "Couldn't fetch given page id" return None assert respj['size'] == 1, "Expect single result for title lookup" page = respj['results'][0] assert page['space']['key'].upper() == space_key.upper(), "Expect spaces to match" else: # We did a fetch by page_id directly page = respj return {"id": int(page['id']), "version": int(page['version']['number']), "space": page['space']['key'], "body": page['body']['storage']['value'], "body_editor": page['body']['editor']['value']} def get_conversion(self, body): """ Uses the REST API to convert from storage to 'editor' format. """ resp = self.api.post(self.base_url + "/rest/api/contentbody/convert/editor", json={"representation": "storage", "value": body }) log.debug(resp) log.debug(resp.content) assert resp.status_code == 200 return resp.json()['value'] def create_page(self, title, body, space_key=None): if space_key is None: space_key = self.default_space resp = self.api.post(self.base_url + "/rest/api/content", json={"space": { "key": space_key }, "type": "page", "title": title, "body": { "storage": { "representation": "storage", "value": body } } } ) log.debug(resp) log.debug(resp.content) assert resp.status_code == 200 def update_page(self, title, body, page_id, prev_version): resp = self.api.put(self.base_url + "/rest/api/content/%d" % page_id, json={"type": "page", "title": title, "version": {"number": prev_version+1}, "body": { "storage": { "representation": "storage", "value": body } } } ) log.debug(resp) log.debug(resp.content) assert resp.status_code == 200 def title_from_path(self, path): title = path.split('.')[0].replace('_', ' ') # TODO: only alphanum and spaces? return title def convert(self, f, header=None): proc = subprocess.run(["pandoc", "-t", self.pandoc_helper_path, f], stdout=subprocess.PIPE) assert proc.returncode == 0 body = proc.stdout.decode('UTF-8') if self.include_toc: body = """ 1 3 """ + body if header: body = """

""" + header + """

\n""" + body return body def metadata(self, f): proc = subprocess.run(["pandoc", "--template", self.pandoc_meta_path, f], stdout=subprocess.PIPE) assert proc.returncode == 0 return json.loads(proc.stdout.decode('UTF-8')) def strip_tags(self, text): """ THIS IS NOT A SANITIZER, just a naive way to strip (most?) HTML tags. """ return re.sub('<[^<]+?>', '', text) def run(self, files): for f in files: meta = self.metadata(f) log.debug(meta) title = meta.get('confluence-page-title', self.title_from_path(f)) space_key = meta.get('confluence-space-key', self.default_space) page_id = meta.get('confluence-page-id') header = not self.no_header and ( # --no-header trumps all self.header or # command-line value gets priority meta.get('disclaimer-header') or # fall back to per-file DEFAULT_HEADER ) log.debug(title) body = self.convert(f, header) prev = self.get_page(title, space_key=space_key, page_id=page_id) log.debug(prev) if prev is None: self.create_page(title, body, space_key=space_key) print(f + ": created") else: prev_body = self.strip_tags(prev['body_editor']) this_body = self.strip_tags(self.get_conversion(body)) if prev_body != this_body or self.force_update: # Show a diff in verbose mode log.info('Diff of ' + f + ' changes:\n' + ''.join(difflib.unified_diff( prev_body.splitlines(keepends=True), this_body.splitlines(keepends=True), fromfile='old', tofile='new'))) self.update_page(title, body, prev['id'], prev['version']) print(f + ": updated") else: print(f + ": no change") def main(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=""" Simple Markdown-to-Confluence uploader, using pandoc and the Confluence REST API. required environment variables: CONFLUENCE_USER CONFLUENCE_PASSWORD CONFLUENCE_URL """) #usage="%(prog)s [options] -s ") parser.add_argument("-v", "--verbose", action="count", default=0, help="Show more debugging statements (can be repeated)") parser.add_argument("-s", "--space-key", default=None, help='Confluence Space Key (usually like "PROJ" or "~username")') parser.add_argument("-f", "--force", action='store_true', help='Forces an update even if we think nothing has changed') parser.add_argument("--header", action='store', help='Specify header to insert into the confluence document') parser.add_argument("--no-header", action='store_true', help='Disables inserting disclaimer headers into the confluence document') parser.add_argument("--toc", action='store_true', help='Inserts table-of-contents into the confluence document') parser.add_argument("FILE", nargs='+') args = parser.parse_args() if args.verbose > 1: log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) elif args.verbose > 0: log.basicConfig(format="%(levelname)s: %(message)s", level=log.INFO) else: log.basicConfig(format="%(levelname)s: %(message)s", level=log.WARN) try: user = os.environ['CONFLUENCE_USER'] password = os.environ['CONFLUENCE_PASSWORD'] url = os.environ['CONFLUENCE_URL'] except KeyError: parser.exit(-1, "Need to pass environment variable configs (see --help)\n") log.info("User: " + user) log.info("URL: " + url) if url.endswith('/'): url = url[:-1] if args.space_key is None: args.space_key = "~" + user log.warning("Defaulting to home space: %s" % args.space_key) if args.header and args.no_header: parser.exit(-1, "Pick one of --header and --no_header.\n") try: subprocess.check_output(['pandoc', '--version']) except: parser.exit(-1, "This script depends on 'pandoc', which doesn't " "seem to be installed.\n") dp = DivergenceProgram(user,password, url, args.space_key, force_update=args.force, header=args.header, include_toc=args.toc) dp.run(args.FILE) if __name__ == '__main__': main()