pdnew/markdown.py

360 lines
11 KiB
Python
Raw Permalink Normal View History

# builtins
import re
import collections
# third party
import markupsafe
import peewee
2024-05-07 03:11:48 +00:00
import markdown_it
import mdit_py_plugins.footnote
import mdit_py_plugins.anchors
import mdit_py_plugins.tasklists
import pygments
import pygments.util
import pygments.lexers
import pygments.formatters
# internals
2024-04-14 15:11:25 +00:00
from application import app
import rendering
2024-05-07 03:11:48 +00:00
def code_highlight(code, name, attrs):
formatter = pygments.formatters.HtmlFormatter()
try:
lexer = pygments.lexers.get_lexer_by_name(name)
except pygments.util.ClassNotFound:
app.logger.warning(f"Markdown code highlighting could not find lexer for language '{name}'.")
return code
return pygments.highlight(code, lexer, formatter)
def renderable_plugin(md):
# add rule to recognize Renderable token
md.inline.ruler.before('image', 'renderable_inline', renderable_inline_rule)
md.add_render_rule('renderable_inline', renderable_inline_render)
def renderable_inline_rule(state, silent):
"""
Syntax: #[classkey/handle] or #[classname/handle:mode]
classkey: Name of the class of which to load an instance
handle: parameter for the classes .load function, i.e. id or name
mode: rendering mode passed to the instances .render function
"""
if state.src[state.pos] == '#' and state.src[state.pos + 1] == '[':
renderable_def_start = state.pos + 2
renderable_def_end = renderable_def_start
#for char in state.src[renderable_def_start:state.posMax]:
for current_pos in range(renderable_def_start, state.posMax + 1):
char = state.src[current_pos]
if char == ']':
break # end found, leave renderable_def_end at current value and stop loop
elif current_pos == state.posMax:
return False # reached end of string without finding closing bracket
elif char == ' ':
return False # spaces not allowed in renderable definitions
else:
renderable_def_end += 1 # end not yet found, continue
if renderable_def_start == renderable_def_end: # empty definition is invalid
return False
renderable_def = state.src[renderable_def_start:renderable_def_end]
renderable_def_parts = renderable_def.split('/')
if len(renderable_def_parts) != 2:
# only exactly one slash allowed, no more, no less
return False
renderable_class_key, rest = renderable_def_parts
rest_parts = rest.split(':')
if len(rest_parts) > 2:
return False # more than one ':', invalid
elif len(rest_parts) == 1:
# no ':', use default mode
renderable_handle = rest
renderable_mode = 'inline'
else: # 2 parts
renderable_handle, renderable_mode = rest_parts
if not silent:
# no idea what silent is actually for, but we're mimicking the logic
# in rules_inline/link.py. assumption: silent=True means no side-effects to state
token = state.push('renderable_inline', 'tag-whatever', 0) # last param is "nesting", but no idea what it actually does
token.attrs = {
'renderable_def': renderable_def, # included for debugging, not actual use
'renderable_class_key' : renderable_class_key,
'renderable_handle': renderable_handle,
'renderable_mode': renderable_mode,
}
state.pos = renderable_def_end + 1 # push parser forward to after the renderable_inline element
state.md.inline.tokenize(state)
return True
else:
return False
return False # not what we want, but avoids infinite loop
def renderable_inline_render(self, tokens, idx, options, env):
token = tokens[idx]
class_key = token.attrs['renderable_class_key']
handle = token.attrs['renderable_handle']
mode = token.attrs['renderable_mode']
body = ''
if not class_key in rendering.Renderable.__class_descendants_lowercase__: # unknown class
body = f'<div class="rendering-inline error">Inline rendering for unknown class \'{class_key}\'.</div>'
else:
cls = rendering.Renderable.__class_descendants_lowercase__[class_key]
try:
instance = cls.load(handle)
except Exception as e:
app.logger.error(f"Error when loading renderable in renderable_inline markdown rule: { str(e) }")
body = f'<div class="rendering-inline error">Inline rendering for unknown {class_key} \'{handle}\'.</div>'
else:
# str() because otherwise string concat operations result in outer HTML
# from other markdown elements being escaped. Markup is applied centrally
# in MarkdownString.render.
body = str(instance.render(mode))
2024-09-01 16:46:49 +00:00
# this is a hack to break renderables out of <section>s
# and avoid invalid </section></p><section>…</p>
# any resulting "empty" <section><p></p></section> hidden via css
return body
#return f'</p></section>{ body }<section><p>'
def section_core_rule(state):
tokens = []
nesting = 0 # NOTE: Section nesting isn't really a thing, a bool whether a section is opened should suffice
open_next = False # whether to open a new section in the next loop iteration
regexp_renderable = re.compile('^#\[.+/.+\]\s*$')
for idx, token in enumerate(state.tokens):
if open_next:
open_next = False
tokens.append(markdown_it.token.Token('section_open', 'section', 1))
nesting += 1
if idx == 0: # for the first element in the document
if token.type != 'heading_open':
tokens.append(markdown_it.token.Token('section_open', 'section', 1))
nesting += 1
else:
open_next = True
elif token.type == 'heading_open' and token.tag == 'h2':
tokens.append(markdown_it.token.Token('section_close', 'section', -1))
nesting -= 1
elif token.type == 'heading_close' and token.tag == 'h2':
open_next = True
elif token.type == 'inline' and isinstance(token.content, str) and\
re.match(regexp_renderable, token.content): # if current token is an inline renderable
# check for directly following inline renderable
if len(state.tokens) >= idx + 4:
# if we have enough following tokens for the
# </p><p>#[foo/bar] structure to exist
token_check = state.tokens[idx + 3] # +3 to skip paragraph_close/paragraph_open tokens
if token_check.type == 'inline' and isinstance(token_check.content, str) and \
re.match(regexp_renderable, token_check.content): # next token is also an inline renderable
open_next = False
else:
open_next = True
else:
open_next = False
if nesting > 0: # if we are *inside* a <section>
# close the section to break out the inline renderable
# and allow it to take the full width. opening a new
# section afterwards handled through open_next set earlier.
tokens.append(markdown_it.token.Token('section_close', 'section', -1))
nesting -= 1
tokens.append(token)
if nesting > 0: # close any remaining nesting levels
for i in range(nesting, 0, -1):
tokens.append(markdown_it.token.Token('section_close', 'section', -1))
state.tokens = tokens
def section_open(self, tokens, idx, options, env):
return '<section>'
def section_close(self, tokens, idx, options, env):
return '</section>'
def section_plugin(md):
# splits output into <section>s, based on <h2> placements
md.core.ruler.push("section_core", section_core_rule)
md.add_render_rule("section_open", section_open)
md.add_render_rule("section_close", section_close)
md_unsafe = markdown_it.MarkdownIt('commonmark', {'highlight': code_highlight})
md_unsafe.enable('table')
md_unsafe.enable('strikethrough')
md_unsafe.use(mdit_py_plugins.footnote.footnote_plugin)
md_unsafe.use(mdit_py_plugins.anchors.anchors_plugin, permalink=True, min_level=2, max_level=6)
md_unsafe.use(mdit_py_plugins.tasklists.tasklists_plugin, label=True)
md_unsafe.use(renderable_plugin)
md_unsafe.use(section_plugin)
2024-05-07 03:11:48 +00:00
md_safe = markdown_it.MarkdownIt('zero') # markdown profile with nothing active
md_safe.enable('emphasis')
md_safe.enable('strikethrough')
md_safe.enable('list')
md_safe.enable('blockquote')
md_safe.enable('backticks')
md_safe.enable('fence')
class MarkdownString(collections.UserString):
md = md_unsafe
def __init__(self, seq=None):
if seq is None:
seq = ''
self._name = None
super(MarkdownString, self).__init__(seq)
def __get__(self, instance, cls):
if instance is None:
return self
else:
return instance.__dict__[self._name]
def __set__(self, instance, value):
instance.__dict__[self._name] = type(self)(value)
def __set_name__(self, owner, name):
self._name = name
def __add__(self, other):
return type(self)(str(self) + str(other))
def __radd__(self, other):
return self.__add__(other)
def __iadd__(self, other):
return self.__add__(other)
def render(self, mode=None, format=None):
"""
format is here for prototype compatibility but is ignored.
mode functions differently. 'inline' will use markdown_it's `renderInline`.
"""
if mode == 'inline':
return markupsafe.Markup(self.md.renderInline(str(self)))
return markupsafe.Markup(self.md.render(str(self)))
class MarkdownFieldAccessor(peewee.FieldAccessor):
def __set__(self, instance, value):
if value is not None and not isinstance(value, MarkdownString):
value = MarkdownString(value)
super().__set__(instance, value)
class MarkdownCharField(peewee.CharField):
accessor_class = MarkdownFieldAccessor
class MarkdownTextField(peewee.TextField):
accessor_class = MarkdownFieldAccessor
class SafeMarkdownString(MarkdownString):
md = md_safe
class SafeMarkdownFieldAccessor(peewee.FieldAccessor):
def __set__(self, instance, value):
if value is not None and not isinstance(value, SafeMarkdownString):
value = SafeMarkdownString(value)
super().__set__(instance, value)
class SafeMarkdownCharField(peewee.CharField):
accessor_class = SafeMarkdownFieldAccessor
class SafeMarkdownTextField(peewee.TextField):
accessor_class = SafeMarkdownFieldAccessor