diff options
| author | bnewbold <bnewbold@robocracy.org> | 2016-05-05 17:02:48 -0400 | 
|---|---|---|
| committer | bnewbold <bnewbold@robocracy.org> | 2016-05-05 17:02:48 -0400 | 
| commit | 1c87cf9b7dfee65b6ea22e5336a0a1de168140dd (patch) | |
| tree | 5df4c522251bccc27eef3c898bdf5e59219bbf25 | |
| parent | 19aba9065649a9b41fe82783a48cb056304bd847 (diff) | |
| download | bnewnet-1c87cf9b7dfee65b6ea22e5336a0a1de168140dd.tar.gz bnewnet-1c87cf9b7dfee65b6ea22e5336a0a1de168140dd.zip | |
add sitemap and headerid plugins
| -rw-r--r-- | pelicanconf.py | 6 | ||||
| -rw-r--r-- | plugins/headerid/README.rst | 16 | ||||
| -rw-r--r-- | plugins/headerid/__init__.py | 1 | ||||
| -rw-r--r-- | plugins/headerid/headerid.py | 31 | ||||
| -rw-r--r-- | plugins/sitemap/Readme.rst | 74 | ||||
| -rw-r--r-- | plugins/sitemap/__init__.py | 1 | ||||
| -rw-r--r-- | plugins/sitemap/sitemap.py | 268 | 
7 files changed, 397 insertions, 0 deletions
| diff --git a/pelicanconf.py b/pelicanconf.py index ab333c7..71046bb 100644 --- a/pelicanconf.py +++ b/pelicanconf.py @@ -20,12 +20,18 @@ PLUGIN_PATHS = ['plugins']  THEME = "theme" +PLUGINS = ['sitemap', 'headerid'] + +SITEMAP = {'format': 'xml'} +  ARTICLE_URL = '{date:%Y}/{slug}/'  ARTICLE_SAVE_AS = '{date:%Y}/{slug}/index.html'  PAGE_URL = '{slug}/'  PAGE_SAVE_AS = '{slug}/index.html'  YEAR_ARCHIVE_SAVE_AS = '{date:%Y}/index.html' +MD_EXTENSIONS = ["codehilite(css_class=highlight)", "extra", "toc"] +  TIMEZONE = 'UTC'  DEFAULT_LANG = u'en' diff --git a/plugins/headerid/README.rst b/plugins/headerid/README.rst new file mode 100644 index 0000000..7bfa402 --- /dev/null +++ b/plugins/headerid/README.rst @@ -0,0 +1,16 @@ +Pelican ``headerid`` plugin +=========================== + +This plugin adds an anchor to each heading so you can deep-link to headers. +It is intended for formats such as reStructuredText that do not natively +generate these anchors. + +The ``HEADERID_LINK_CHAR`` config can be set to use a different char from ``*`` +for anchor text. + +For Markdown, this plugin is less relevant since the Python-Markdown library +includes a Table of Contents extension that will generate link anchors. +To enable the ``toc`` extension, add a line similar to the following example +to your Pelican settings file:: + +    MD_EXTENSIONS = ["codehilite(css_class=highlight)", "extra", "toc"] diff --git a/plugins/headerid/__init__.py b/plugins/headerid/__init__.py new file mode 100644 index 0000000..423261c --- /dev/null +++ b/plugins/headerid/__init__.py @@ -0,0 +1 @@ +from headerid import *
\ No newline at end of file diff --git a/plugins/headerid/headerid.py b/plugins/headerid/headerid.py new file mode 100644 index 0000000..ee9d265 --- /dev/null +++ b/plugins/headerid/headerid.py @@ -0,0 +1,31 @@ +from pelican import readers +from pelican.readers import PelicanHTMLTranslator +from pelican import signals +from docutils import nodes + +LINK_CHAR = '*' + + +def init_headerid(sender): +    global LINK_CHAR +    char = sender.settings.get('HEADERID_LINK_CHAR') +    if char: +        LINK_CHAR = char + +def register(): +    signals.initialized.connect(init_headerid) + + +    class HeaderIDPatchedPelicanHTMLTranslator(PelicanHTMLTranslator): +        def depart_title(self, node): +            close_tag = self.context[-1] +            parent = node.parent +            if isinstance(parent, nodes.section) and parent.hasattr('ids') and parent['ids']: +                anchor_name = parent['ids'][0] +                # add permalink anchor +                if close_tag.startswith('</h'): +                    self.body.append( +                        '<a class="headerlink" href="#%s" title="Permalink to this headline">%s</a>' % +                        (anchor_name, LINK_CHAR)) +            PelicanHTMLTranslator.depart_title(self, node) +    readers.PelicanHTMLTranslator = HeaderIDPatchedPelicanHTMLTranslator diff --git a/plugins/sitemap/Readme.rst b/plugins/sitemap/Readme.rst new file mode 100644 index 0000000..719c38b --- /dev/null +++ b/plugins/sitemap/Readme.rst @@ -0,0 +1,74 @@ +Sitemap +------- + +This plugin generates plain-text or XML sitemaps. You can use the ``SITEMAP`` +variable in your settings file to configure the behavior of the plugin. + +The ``SITEMAP`` variable must be a Python dictionary and can contain these keys: + +- ``format``, which sets the output format of the plugin (``xml`` or ``txt``) + +- ``priorities``, which is a dictionary with three keys: + +  - ``articles``, the priority for the URLs of the articles and their +    translations + +  - ``pages``, the priority for the URLs of the static pages + +  - ``indexes``, the priority for the URLs of the index pages, such as tags, +     author pages, categories indexes, archives, etc... + +  All the values of this dictionary must be decimal numbers between ``0`` and ``1``. + +- ``changefreqs``, which is a dictionary with three items: + +  - ``articles``, the update frequency of the articles + +  - ``pages``, the update frequency of the pages + +  - ``indexes``, the update frequency of the index pages + +  Valid frequency values are ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``, +  ``yearly`` and ``never``. + +You can exclude URLs from being included in the sitemap via regular expressions. +For example, to exclude all URLs containing ``tag/`` or ``category/`` you can +use the following ``SITEMAP`` setting. + +.. code-block:: python + +    SITEMAP = { +        'exclude': ['tag/', 'category/'] +    } + +If a key is missing or a value is incorrect, it will be replaced with the +default value. + +The sitemap is saved in ``<output_path>/sitemap.<format>``. + +.. note:: +   ``priorities`` and ``changefreqs`` are information for search engines. +   They are only used in the XML sitemaps. +   For more information: <http://www.sitemaps.org/protocol.html#xmlTagDefinitions> + +**Example** + +Here is an example configuration (it's also the default settings): + +.. code-block:: python + +    PLUGINS=['pelican.plugins.sitemap',] + +    SITEMAP = { +        'format': 'xml', +        'priorities': { +            'articles': 0.5, +            'indexes': 0.5, +            'pages': 0.5 +        }, +        'changefreqs': { +            'articles': 'monthly', +            'indexes': 'daily', +            'pages': 'monthly' +        } +    } diff --git a/plugins/sitemap/__init__.py b/plugins/sitemap/__init__.py new file mode 100644 index 0000000..6523d3a --- /dev/null +++ b/plugins/sitemap/__init__.py @@ -0,0 +1 @@ +from .sitemap import *
\ No newline at end of file diff --git a/plugins/sitemap/sitemap.py b/plugins/sitemap/sitemap.py new file mode 100644 index 0000000..8ce492a --- /dev/null +++ b/plugins/sitemap/sitemap.py @@ -0,0 +1,268 @@ +# -*- coding: utf-8 -*- +''' +Sitemap +------- + +The sitemap plugin generates plain-text or XML sitemaps. +''' + +from __future__ import unicode_literals + +import re +import collections +import os.path + +from datetime import datetime +from logging import warning, info +from codecs import open +from pytz import timezone + +from pelican import signals, contents +from pelican.utils import get_date + +TXT_HEADER = """{0}/index.html +{0}/archives.html +{0}/tags.html +{0}/categories.html +""" + +XML_HEADER = """<?xml version="1.0" encoding="utf-8"?> +<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" +xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" +xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> +""" + +XML_URL = """ +<url> +<loc>{0}/{1}</loc> +<lastmod>{2}</lastmod> +<changefreq>{3}</changefreq> +<priority>{4}</priority> +</url> +""" + +XML_FOOTER = """ +</urlset> +""" + + +def format_date(date): +    if date.tzinfo: +        tz = date.strftime('%z') +        tz = tz[:-2] + ':' + tz[-2:] +    else: +        tz = "-00:00" +    return date.strftime("%Y-%m-%dT%H:%M:%S") + tz + +class SitemapGenerator(object): + +    def __init__(self, context, settings, path, theme, output_path, *null): + +        self.output_path = output_path +        self.context = context +        self.now = datetime.now() +        self.siteurl = settings.get('SITEURL') + + +        self.default_timezone = settings.get('TIMEZONE', 'UTC') +        self.timezone = getattr(self, 'timezone', self.default_timezone) +        self.timezone = timezone(self.timezone) + +        self.format = 'xml' + +        self.changefreqs = { +            'articles': 'monthly', +            'indexes': 'daily', +            'pages': 'monthly' +        } + +        self.priorities = { +            'articles': 0.5, +            'indexes': 0.5, +            'pages': 0.5 +        } + +        self.sitemapExclude = [] + +        config = settings.get('SITEMAP', {}) + +        if not isinstance(config, dict): +            warning("sitemap plugin: the SITEMAP setting must be a dict") +        else: +            fmt = config.get('format') +            pris = config.get('priorities') +            chfreqs = config.get('changefreqs') +            self.sitemapExclude = config.get('exclude', []) + +            if fmt not in ('xml', 'txt'): +                warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'") +                warning("sitemap plugin: Setting SITEMAP['format'] on `xml'") +            elif fmt == 'txt': +                self.format = fmt +                return + +            valid_keys = ('articles', 'indexes', 'pages') +            valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly', +                    'yearly', 'never') + +            if isinstance(pris, dict): +                # We use items for Py3k compat. .iteritems() otherwise +                for k, v in pris.items(): +                    if k in valid_keys and not isinstance(v, (int, float)): +                        default = self.priorities[k] +                        warning("sitemap plugin: priorities must be numbers") +                        warning("sitemap plugin: setting SITEMAP['priorities']" +                                "['{0}'] on {1}".format(k, default)) +                        pris[k] = default +                self.priorities.update(pris) +            elif pris is not None: +                warning("sitemap plugin: SITEMAP['priorities'] must be a dict") +                warning("sitemap plugin: using the default values") + +            if isinstance(chfreqs, dict): +                # .items() for py3k compat. +                for k, v in chfreqs.items(): +                    if k in valid_keys and v not in valid_chfreqs: +                        default = self.changefreqs[k] +                        warning("sitemap plugin: invalid changefreq `{0}'".format(v)) +                        warning("sitemap plugin: setting SITEMAP['changefreqs']" +                                "['{0}'] on '{1}'".format(k, default)) +                        chfreqs[k] = default +                self.changefreqs.update(chfreqs) +            elif chfreqs is not None: +                warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict") +                warning("sitemap plugin: using the default values") + +    def write_url(self, page, fd): + +        if getattr(page, 'status', 'published') != 'published': +            return + +        # We can disable categories/authors/etc by using False instead of '' +        if not page.save_as: +            return + +        page_path = os.path.join(self.output_path, page.save_as) +        if not os.path.exists(page_path): +            return + +        lastdate = getattr(page, 'date', self.now) +        try: +            lastdate = self.get_date_modified(page, lastdate) +        except ValueError: +            warning("sitemap plugin: " + page.save_as + " has invalid modification date,") +            warning("sitemap plugin: using date value as lastmod.") +        lastmod = format_date(lastdate) + +        if isinstance(page, contents.Article): +            pri = self.priorities['articles'] +            chfreq = self.changefreqs['articles'] +        elif isinstance(page, contents.Page): +            pri = self.priorities['pages'] +            chfreq = self.changefreqs['pages'] +        else: +            pri = self.priorities['indexes'] +            chfreq = self.changefreqs['indexes'] + +        pageurl = '' if page.url == 'index.html' else page.url + +        #Exclude URLs from the sitemap: +        if self.format == 'xml': +            flag = False +            for regstr in self.sitemapExclude: +                if re.match(regstr, pageurl): +                    flag = True +                    break +            if not flag: +                fd.write(XML_URL.format(self.siteurl, pageurl, lastmod, chfreq, pri)) +        else: +            fd.write(self.siteurl + '/' + pageurl + '\n') + +    def get_date_modified(self, page, default): +        if hasattr(page, 'modified'): +            if isinstance(page.modified, datetime): +                return page.modified +            return get_date(page.modified) +        else: +            return default + +    def set_url_wrappers_modification_date(self, wrappers): +        for (wrapper, articles) in wrappers: +            lastmod = datetime.min.replace(tzinfo=self.timezone) +            for article in articles: +                lastmod = max(lastmod, article.date.replace(tzinfo=self.timezone)) +                try: +                    modified = self.get_date_modified(article, datetime.min).replace(tzinfo=self.timezone) +                    lastmod = max(lastmod, modified) +                except ValueError: +                    # Supressed: user will be notified. +                    pass +            setattr(wrapper, 'modified', str(lastmod)) + +    def generate_output(self, writer): +        path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format)) + +        pages = self.context['pages'] + self.context['articles'] \ +                + [ c for (c, a) in self.context['categories']] \ +                + [ t for (t, a) in self.context['tags']] \ +                + [ a for (a, b) in self.context['authors']] + +        self.set_url_wrappers_modification_date(self.context['categories']) +        self.set_url_wrappers_modification_date(self.context['tags']) +        self.set_url_wrappers_modification_date(self.context['authors']) + +        for article in self.context['articles']: +            pages += article.translations + +        info('writing {0}'.format(path)) + +        with open(path, 'w', encoding='utf-8') as fd: + +            if self.format == 'xml': +                fd.write(XML_HEADER) +            else: +                fd.write(TXT_HEADER.format(self.siteurl)) + +            FakePage = collections.namedtuple('FakePage', +                                              ['status', +                                               'date', +                                               'url', +                                               'save_as']) + +            for standard_page_url in ['index.html', +                                      'archives.html', +                                      'tags.html', +                                      'categories.html']: +                fake = FakePage(status='published', +                                date=self.now, +                                url=standard_page_url, +                                save_as=standard_page_url) +                self.write_url(fake, fd) + +            # add template pages +            # We use items for Py3k compat. .iteritems() otherwise +            for path, template_page_url in self.context['TEMPLATE_PAGES'].items(): + +                # don't add duplicate entry for index page +                if template_page_url == 'index.html': +                    continue + +                fake = FakePage(status='published', +                                date=self.now, +                                url=template_page_url, +                                save_as=template_page_url) +                self.write_url(fake, fd) + +            for page in pages: +                self.write_url(page, fd) + +            if self.format == 'xml': +                fd.write(XML_FOOTER) + + +def get_generators(generators): +    return SitemapGenerator + + +def register(): +    signals.get_generators.connect(get_generators) | 
