Skip to content
Snippets Groups Projects
Commit 4eed6050 authored by Brian Moe's avatar Brian Moe
Browse files

Initial Revision

parent f2792128
No related branches found
No related tags found
No related merge requests found
from django import template
from django.utils.html import conditional_escape
from django.utils.safestring import mark_safe
import html5lib
register = template.Library()
# Yoinked from django_wysiwyg
#
def sanitize_html5lib(input):
"""
>>> sanitize_html5lib("foobar<p>adf<i></p>abc</i>")
u'foobar<p>adf<i></i></p><i>abc</i>'
>>> sanitize_html5lib('foobar<p style="color:red; remove:me; background-image: url(http://example.com/test.php?query_string=bad);">adf<script>alert("Uhoh!")</script><i></p>abc</i>')
u'foobar<p style="color: red;">adf&lt;script&gt;alert("Uhoh!")&lt;/script&gt;<i></i></p><i>abc</i>'
"""
from html5lib import treebuilders, treewalkers, serializer, sanitizer
p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer, tree=treebuilders.getTreeBuilder("dom"))
dom_tree = p.parseFragment(input)
walker = treewalkers.getTreeWalker("dom")
stream = walker(dom_tree)
s = serializer.htmlserializer.HTMLSerializer(omit_optional_tags=False)
return "".join(s.serialize(stream))
@register.filter
def sanitize(text):
rv = sanitize_html5lib(text)
return mark_safe(rv)
# XXX What is this? Why does it break things?
#sanitize.needs_autoescape = True
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment