# encoding: utf-8 # # Copyright (c) 2009 Thomas Kongevold Adamcik # # Snippet is released under the MIT License. So feel free to use it in other # projects as long as the notice remains intact :) # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # See http://www.djangosnippets.org/snippets/1312/ ''' HTML Validation Middleware ========================== Simple development middleware to ensure that responses validate as HTML. Dependencies: ------------- - tidy (http://utidylib.berlios.de/) Installation: ------------- Assuming this file has been place in your PYTHON_PATH (e.g. djangovalidation/middleware.py), simply add the following to your middleware settings: 'djangovalidation.middleware.HTMLValidationMiddleware', Remember that the order of your middleware settings does matter, this middleware should be placed before eg. GzipMiddleware, djangologging and any other middlewares that modify the response's content. Operation: ---------- Validation only kicks in under to following conditions: - DEBUG == True - HTML_VALIDATION_ENABLE == True (default) - REMOTE_ADDR in INTERNAL_IPS - 'html' in Content-Type - 'disable-validation' not in GET - request.is_ajax() == False - type(response) == HttpResponse - request.path doesn't match HTML_VALIDATION_URL_IGNORE To bypass the check any uri can be appended with ?disable-validation Settings: --------- - HTML_VALIDATION_ENABLE - Turns middleware on/off. Default: True - HTML_VALIDATION_ENCODING - Default: 'utf-8' - HTML_VALIDATION_DOCTYPE - Default: 'strict' - HTML_VALIDATION_IGNORE - Default: ['trimming empty <option>', '<table> lacks "summary" attribute'] - HTML_VALIDATION_URL_IGNORE - List of regular expressions to check request.path against when deciding if we should process the request. Default: [], - HTML_VALIDATION_XHTML - Default: True - HTML_VALIDATION_OPTIONS - Options that get passed to tidy, overrides previous settings. Default: based on above settings For more information about settings use the source and consult tidy's documentation. History ------- December 19, 2009: - Fix empty HTML_VALIDATION_URL_IGNORE. Thanks .iqqmuT July 12, 2009: - Ignore ajax request - Add HTML_VALIDATION_URL_IGNORE settings February 6, 2009: - Initial relase ''' import re import tidy from django.conf import settings from django.core.exceptions import MiddlewareNotUsed from django.http import HttpResponse, HttpResponseServerError from django.template import Context, Template class HTMLValidationMiddleware(object): ''' Checks that the response is valid HTML with proper Unicode. In the event of a failed check we show an simple page listing the HTML source and which errors need to be fixed. ''' # Validation errors to ignore. Can be overridden with VALIDATION_IGNORE setting ignore = [ 'trimming empty <option>', '<table> lacks "summary" attribute', ] # Options for tidy. Can be overridden with HTML_VALIDATION_OPTIONS setting options = { 'doctype': getattr(settings, 'HTML_VALIDATION_DOCTYPE', 'strict'), 'output_xhtml': getattr(settings, 'HTML_VALIDATION_XHTML', True), 'input_encoding': getattr(settings, 'HTML_VALIDATION_ENCODING', 'utf8'), } def __init__(self): if not settings.DEBUG or not getattr(settings, 'HTML_VALIDATION_ENABLE', True): raise MiddlewareNotUsed self.options = getattr(settings, 'HTML_VALIDATION_OPTIONS', self.options) self.ignore = set(getattr(settings, 'HTML_VALIDATION_IGNORE', self.ignore)) self.ignore_regexp = self._build_ignore_regexp(getattr(settings, 'HTML_VALIDATION_URL_IGNORE', [])) self.template = Template(self.HTML_VALIDATION_TEMPLATE.strip()) def process_response(self, request, response): if not self._should_validate(request, response): return response errors = self._validate(response) if not errors: return response context = self._get_context(response, errors) return HttpResponseServerError(self.template.render(context)) def _build_ignore_regexp(self, urls): if not urls: return None urls = [r'(%s)' % url for url in urls] return re.compile(r'(%s)' % r'|'.join(urls)) def _should_validate(self, request, response): return ('html' in response['Content-Type'] and 'disable-validation' not in request.GET and not request.is_ajax() and (not self.ignore_regexp or not self.ignore_regexp.search(request.path)) and request.META['REMOTE_ADDR'] in settings.INTERNAL_IPS and type(response) == HttpResponse) def _validate(self, response): errors = tidy.parseString(response.content, **self.options).errors return self._filter_errors(errors) def _filter_errors(self, errors): return filter(lambda e: e.message not in self.ignore, errors) def _get_context(self, response, errors): lines = [] error_dict = dict(map(lambda e: (e.line, e.message), errors)) for i, line in enumerate(response.content.split('\n')): lines.append((line, error_dict.get(i + 1, False))) return Context({'errors': errors, 'lines': lines,}) HTML_VALIDATION_TEMPLATE = """ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html lang="en"> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8"> <title>HTML validation error at {{ request.path_info|escape }}</title> <meta name="robots" content="NONE,NOARCHIVE"> <style type="text/css"> html * { padding: 0; margin: 0; } body * { padding: 10px 20px; } body * * { padding: 0; } body { font: small sans-serif; background: #eee; } body>div { border-bottom: 1px solid #ddd; } h1 { font-weight: normal; margin-bottom: 0.4em; } table { border: none; border-collapse: collapse; width: 100%; } td, th { vertical-align: top; padding: 2px 3px; } th { width: 6em; text-align: right; color: #666; padding-right: 0.5em; } #info { background: #f6f6f6; } #info th { width: 3em; } #summary { background: #ffc; } #explanation { background: #eee; border-bottom: 0px none; } .meta { margin: 1em 0; } .error { background: #FEE } </style> </head> <body> <div id="summary"> <h1>HTML validation error</h1> <p> Your HTML did not validate. If this page contains user content that might be the problem. Please fix the following: </p> <table class="meta"> {% for error in errors %} <tr> <th>Line: <a href="#line{{ error.line }}">{{ error.line }}</a></th> <td>{{ error.message|escape }}</td> </tr> {% endfor %} </table> <p> If you want to bypass this warning, click <a href="?disable-validation"> here</a>. Please note that this warning will persist until you fix the problems mentioned above. </p> </div> <div id="info"> <table> {% for line,error in lines %} <tr{% if error %} class="error"{% endif %}> <th id="line{{ forloop.counter }}"> {{ forloop.counter|stringformat:"03d" }} </th> <td{% if error %} title="{{ error }}"{% endif %}> <pre>{{ line }}</pre> </td> </tr> {% endfor %} </table> </div> <div id="explanation"> <p> You're seeing this error because you have not set <code>HTML_VALIDATION_ENABLE = False</code> in your Django settings file. Change that to <code>False</code>, and Django will stop validating your HTML. </p> </div> </body> </html>"""