pdnew/markdown.py

# builtins
import re
import collections

# third party
import markupsafe
import peewee

import markdown_it
import mdit_py_plugins.footnote
import mdit_py_plugins.anchors
import mdit_py_plugins.tasklists
import pygments
import pygments.util
import pygments.lexers
import pygments.formatters

# internals
from application import app

import rendering

def code_highlight(code, name, attrs):

    formatter = pygments.formatters.HtmlFormatter()

    try:

        lexer = pygments.lexers.get_lexer_by_name(name)

    except pygments.util.ClassNotFound:

        app.logger.warning(f"Markdown code highlighting could not find lexer for language '{name}'.")
        return code

    return pygments.highlight(code, lexer, formatter)

def renderable_plugin(md):

    # add rule to recognize Renderable token
    md.inline.ruler.before('image', 'renderable_inline', renderable_inline_rule)
    md.add_render_rule('renderable_inline', renderable_inline_render)

def renderable_inline_rule(state, silent):

    """
    Syntax: #[classkey/handle] or #[classname/handle:mode]

    classkey: Name of the class of which to load an instance
    handle: parameter for the classes .load function, i.e. id or name
    mode: rendering mode passed to the instances .render function
    """

    if state.src[state.pos] == '#' and state.src[state.pos + 1] == '[':

        renderable_def_start = state.pos + 2
        renderable_def_end = renderable_def_start

        #for char in state.src[renderable_def_start:state.posMax]:
        for current_pos in range(renderable_def_start, state.posMax + 1):

            char = state.src[current_pos]

            if char == ']':

                break # end found, leave renderable_def_end at current value and stop loop

            elif current_pos == state.posMax:

                return False # reached end of string without finding closing bracket

            elif char == ' ':

                return False # spaces not allowed in renderable definitions

            else:

                renderable_def_end += 1 # end not yet found, continue

        if renderable_def_start == renderable_def_end: # empty definition is invalid
            return False

        renderable_def = state.src[renderable_def_start:renderable_def_end]
        renderable_def_parts = renderable_def.split('/')

        if len(renderable_def_parts) != 2:
            # only exactly one slash allowed, no more, no less
            return False

        renderable_class_key, rest = renderable_def_parts

        rest_parts = rest.split(':')

        if len(rest_parts) > 2:

            return False # more than one ':', invalid

        elif len(rest_parts) == 1:

            # no ':', use default mode
            renderable_handle = rest
            renderable_mode = 'inline'

        else: # 2 parts

            renderable_handle, renderable_mode = rest_parts

        if not silent:

            # no idea what silent is actually for, but we're mimicking the logic
            # in rules_inline/link.py. assumption: silent=True means no side-effects to state

            token = state.push('renderable_inline', 'tag-whatever', 0) # last param is "nesting", but no idea what it actually does
            token.attrs = {
                'renderable_def': renderable_def, # included for debugging, not actual use
                'renderable_class_key' : renderable_class_key,
                'renderable_handle': renderable_handle,
                'renderable_mode': renderable_mode,
            }

            state.pos = renderable_def_end + 1 # push parser forward to after the renderable_inline element
            state.md.inline.tokenize(state)

        return True

    else:

        return False

    return False # not what we want, but avoids infinite loop

def renderable_inline_render(self, tokens, idx, options, env):

    token = tokens[idx]

    class_key = token.attrs['renderable_class_key']
    handle = token.attrs['renderable_handle']
    mode = token.attrs['renderable_mode']
    body = ''

    if not class_key in rendering.Renderable.__class_descendants_lowercase__: # unknown class

        body = f'<div class="rendering-inline error">Inline rendering for unknown class \'{class_key}\'.</div>'

    else:
        cls = rendering.Renderable.__class_descendants_lowercase__[class_key]

        try:

            instance = cls.load(handle)

        except Exception as e:

            app.logger.error(f"Error when loading renderable in renderable_inline markdown rule: { str(e) }")
            body = f'<div class="rendering-inline error">Inline rendering for unknown {class_key} \'{handle}\'.</div>'

        else:

            # str() because otherwise string concat operations result in outer HTML
            # from other markdown elements being escaped. Markup is applied centrally
            # in MarkdownString.render.
            body = str(instance.render(mode))

    # this is a hack to break renderables out of <section>s
    # and avoid invalid </section></p><section>…</p>
    # any resulting "empty" <section><p></p></section> hidden via css
    return body
    #return f'</p></section>{ body }<section><p>'

def section_core_rule(state):

    tokens = []
    nesting = 0 # NOTE: Section nesting isn't really a thing, a bool whether a section is opened should suffice
    open_next = False # whether to open a new section in the next loop iteration

    regexp_renderable = re.compile('^#\[.+/.+\]\s*$')

    for idx, token in enumerate(state.tokens):

        if open_next:

            open_next = False
            tokens.append(markdown_it.token.Token('section_open', 'section', 1))
            nesting += 1

        if idx == 0: # for the first element in the document
            if token.type != 'heading_open':
                tokens.append(markdown_it.token.Token('section_open', 'section', 1))
                nesting += 1
            else:
                open_next = True

        elif token.type == 'heading_open' and token.tag == 'h2':

            tokens.append(markdown_it.token.Token('section_close', 'section', -1))
            nesting -= 1

        elif token.type == 'heading_close' and token.tag == 'h2':
            open_next = True

        elif    token.type == 'inline' and isinstance(token.content, str) and\
                re.match(regexp_renderable, token.content): # if current token is an inline renderable

            # check for directly following inline renderable
            if len(state.tokens) >= idx + 4:

                # if we have enough following tokens for the
                # </p><p>#[foo/bar] structure to exist

                token_check = state.tokens[idx + 3] # +3 to skip paragraph_close/paragraph_open tokens

                if  token_check.type == 'inline' and isinstance(token_check.content, str) and \
                    re.match(regexp_renderable, token_check.content): # next token is also an inline renderable

                    open_next = False

                else:

                    open_next = True
            else:

                open_next = False

            if nesting > 0:  # if we are *inside* a <section>

                # close the section to break out the inline renderable
                # and allow it to take the full width. opening a new
                # section afterwards handled through open_next set earlier.

                tokens.append(markdown_it.token.Token('section_close', 'section', -1))
                nesting -= 1

        tokens.append(token)

    if nesting > 0: # close any remaining nesting levels
        for i in range(nesting, 0, -1):
            tokens.append(markdown_it.token.Token('section_close', 'section', -1))

    state.tokens = tokens

def section_open(self, tokens, idx, options, env):
    return '<section>'

def section_close(self, tokens, idx, options, env):
    return '</section>'

def section_plugin(md):

    # splits output into <section>s, based on <h2> placements

    md.core.ruler.push("section_core", section_core_rule)
    md.add_render_rule("section_open", section_open)
    md.add_render_rule("section_close", section_close)

md_unsafe = markdown_it.MarkdownIt('commonmark', {'highlight': code_highlight})
md_unsafe.enable('table')
md_unsafe.enable('strikethrough')
md_unsafe.use(mdit_py_plugins.footnote.footnote_plugin)
md_unsafe.use(mdit_py_plugins.anchors.anchors_plugin, permalink=True, min_level=2, max_level=6)
md_unsafe.use(mdit_py_plugins.tasklists.tasklists_plugin, label=True)
md_unsafe.use(renderable_plugin)
md_unsafe.use(section_plugin)

md_safe = markdown_it.MarkdownIt('zero') # markdown profile with nothing active
md_safe.enable('emphasis')
md_safe.enable('strikethrough')
md_safe.enable('list')
md_safe.enable('blockquote')
md_safe.enable('backticks')
md_safe.enable('fence')

class MarkdownString(collections.UserString):

    md = md_unsafe

    def __init__(self, seq=None):

        if seq is None:
            seq = ''

        self._name = None
        super(MarkdownString, self).__init__(seq)

    def __get__(self, instance, cls):

        if instance is None:

            return self

        else:
            return instance.__dict__[self._name]

    def __set__(self, instance, value):

        instance.__dict__[self._name] = type(self)(value)

    def __set_name__(self, owner, name):

        self._name = name

    def __add__(self, other):

        return type(self)(str(self) + str(other))

    def __radd__(self, other):

        return self.__add__(other)

    def __iadd__(self, other):

        return self.__add__(other)

    def render(self, mode=None, format=None):

        """
        format is here for prototype compatibility but is ignored.
        mode functions differently. 'inline' will use markdown_it's `renderInline`.
        """

        if mode == 'inline':
            return markupsafe.Markup(self.md.renderInline(str(self)))

        return markupsafe.Markup(self.md.render(str(self)))

class MarkdownFieldAccessor(peewee.FieldAccessor):

    def __set__(self, instance, value):

        if value is not None and not isinstance(value, MarkdownString):

            value = MarkdownString(value)

        super().__set__(instance, value)

class MarkdownCharField(peewee.CharField):
    accessor_class = MarkdownFieldAccessor

class MarkdownTextField(peewee.TextField):
    accessor_class = MarkdownFieldAccessor

class SafeMarkdownString(MarkdownString):

    md = md_safe

class SafeMarkdownFieldAccessor(peewee.FieldAccessor):

    def __set__(self, instance, value):

        if value is not None and not isinstance(value, SafeMarkdownString):

            value = SafeMarkdownString(value)

        super().__set__(instance, value)

class SafeMarkdownCharField(peewee.CharField):
    accessor_class = SafeMarkdownFieldAccessor

class SafeMarkdownTextField(peewee.TextField):
    accessor_class = SafeMarkdownFieldAccessor