mirror of
https://github.com/postgres/pgweb.git
synced 2025-08-10 00:42:06 +00:00

* Get rid of the django_markwhat dependency, and implement our own classes to get more control. In passing also remove django-markdown, because we never used that. * Instead of trying to clean markdown with regexps, use the bleach library (NEW DEPENDENCY) with special whitelisting of allowed tags based off standard markdown. This means that one can input links or formatting in HTML if one prefers, as long as it renders to the same subset of tags that markdown allows. * Replace javascript based client side preview with an actual call to a preview URL that renders the exact result using the same function, since the use of showdown on the client was increasingly starting to differ from the server, and since that cannot be kept secure the same way. Rewrite the client side javascript to work better with the now longer interval between updates of the preview. Long in planning, but never got around to it. Suggestion to use bleach for escaping from David Fetter.
55 lines
1.6 KiB
Python
55 lines
1.6 KiB
Python
import markdown
|
|
from bleach.sanitizer import Cleaner
|
|
from bleach.html5lib_shim import Filter
|
|
|
|
|
|
# Tags and attributes generated by markdown (anything that's not
|
|
# generated by markdown is clearly manually added html)
|
|
# This list is from the bleach-allowlist module, but adding a dependency
|
|
# on it just to get two arrays seems silly.
|
|
|
|
_markdown_tags = [
|
|
"h1", "h2", "h3", "h4", "h5", "h6",
|
|
"b", "i", "strong", "em", "tt",
|
|
"p", "br",
|
|
"span", "div", "blockquote", "code", "pre", "hr",
|
|
"ul", "ol", "li", "dd", "dt",
|
|
# "img", # img is optional in our markdown validation
|
|
"a",
|
|
"sub", "sup",
|
|
]
|
|
|
|
_markdown_attrs = {
|
|
"*": ["id"],
|
|
"img": ["src", "alt", "title"],
|
|
"a": ["href", "alt", "title"],
|
|
}
|
|
|
|
|
|
# Prevent relative links, by simply removing any href tag that does not have
|
|
# a : in it.
|
|
class RelativeLinkFilter(Filter):
|
|
def __iter__(self):
|
|
for token in Filter.__iter__(self):
|
|
if token['type'] in ['StartTag', 'EmptyTag'] and token['data']:
|
|
if (None, 'href') in token['data']:
|
|
# This means a href attribute with no namespace
|
|
if ':' not in token['data'][(None, 'href')]:
|
|
# Relative link!
|
|
del token['data'][(None, 'href')]
|
|
yield token
|
|
|
|
|
|
def pgmarkdown(value, allow_images=False, allow_relative_links=False):
|
|
tags = _markdown_tags
|
|
filters = []
|
|
|
|
if allow_images:
|
|
tags.append('img')
|
|
if not allow_relative_links:
|
|
filters.append(RelativeLinkFilter)
|
|
|
|
cleaner = Cleaner(tags=tags, attributes=_markdown_attrs, filters=filters)
|
|
|
|
return cleaner.clean(markdown.markdown(value))
|