from django import template
from BeautifulSoup import BeautifulSoup, Comment
import re
register = template.Library()
def sanitize(value, allowed_tags):
"""Argument should be in form 'tag2:attr1:attr2 tag2:attr1 tag3', where tags
are allowed HTML tags, and attrs are the allowed attributes for that tag.
"""
js_regex = re.compile(r'[\s]*(&#x.{1,7})?'.join(list('javascript')))
allowed_tags = [tag.split(':') for tag in allowed_tags.split()]
allowed_tags = dict((tag[0], tag[1:]) for tag in allowed_tags)
soup = BeautifulSoup(value)
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
comment.extract()
for tag in soup.findAll(True):
if tag.name not in allowed_tags:
tag.hidden = True
else:
tag.attrs = [(attr, js_regex.sub('', val)) for attr, val in tag.attrs
if attr in allowed_tags[tag.name]]
return soup.renderContents().decode('utf8')
register.filter(sanitize)
Comments
This script does not protect to XXS attacks
Try the following string:
<script><script type="text/javascript">alert("ok");<</script>/script>It results in:
<script type="text/javascript">alert("ok");</script>#
I added a new filter, which prevent better for XXS attacks. No configuration needed http://djangosnippets.org/snippets/2444/
#