"""
XHTML Degrader Middleware.
When sending contents with the XHTML media type, application/xhtml+xml, this
module checks to ensure that the user agent (web browser) is capable of
rendering it. If not, it changes the media type to text/html and makes the
contents more "HTML-friendly" (as per the XHTML 1.0 HTML Compatibility
Guidelines).
To use this middleware you need to add a reference to it in your settings.py
file, e.g.:
MIDDLEWARE_CLASSES = (
...
'YOURPATH.xhtmldegrader.middleware.XhtmlDegraderMiddleware',
)
(If you use GZipMiddleware, you should ensure that it appears in the list
before XhtmlDegraderMiddleware, to allow the XHTML Degrader to act first.)
"""
import re
_MEDIA_TYPE_RE = re.compile(r'application\/xhtml\+xml')
_EMPTY_TAG_END_RE = re.compile(r'(?<=\S)\/\>')
_PROCESSING_INSTRUCTION_RE = re.compile(r'\<\?.*\?\>')
def _supports_xhtml(request):
"""Examines an HTTP request header to determine whether the user agent
supports the XHTML media type (application/xhtml+xml). Returns True or
False."""
if '/xhtml+xml' in request.META.get('HTTP_ACCEPT', '').lower():
# User agent claims to support the XHTML media type.
return True
else:
# No reference to XHTML support.
return False
class XhtmlDegraderMiddleware(object):
"""Django middleware that "degrades" any contents sent as XHTML if the
requesting browser doesn't support the XHTML media type. Degrading involves
switching the content type to text/html, removing XML processing
instructions, etc.
If the HTTP response is already set to text/html, or set to any media type
other than application/xhtml+xml, this middleware will have no effect.
"""
def process_response(self, request, response):
# Check if this is XHTML, and check if the user agent supports XHTML.
if response['Content-Type'].split(';')[0] != 'application/xhtml+xml' \
or _supports_xhtml(request):
# The content is fine, simply return it.
return response
# If the response has already been compressed we can't modify it
# further, so just return it. (N.B. if you use GZipMiddleware, you
# should ensure that it's listed in MIDDLEWARE_CLASSES before
# XhtmlDegraderMiddleware, to allow the XHTML Degrader to act first.)
if response.has_header('Content-Encoding'):
# Already compressed, so we can't do anything useful with it.
return response
# The content is XHTML, and the user agent doesn't support it.
# Fix the media type:
response['Content-Type'] = _MEDIA_TYPE_RE.sub('text/html',
response['Content-Type'], 1)
if 'charset' not in response['Content-Type']:
response['Content-Type'] += '; charset=utf-8'
# Modify the response contents as required:
# Remove any XML processing instructions:
response.content = _PROCESSING_INSTRUCTION_RE.sub('',
response.content)
# Ensure there's a space before the trailing '/>' of empty elements:
response.content = _EMPTY_TAG_END_RE.sub(' />',
response.content)
# Lose any excess whitespace:
response.content = response.content.strip()
if not response.content.startswith('<!DOCTYPE'):
# Add a DOCTYPE, so that the user agent isn't in "quirks" mode.
response.content = '<!DOCTYPE html>\n' + response.content
return response
Comments
instead of
response.content[0:9] != '<!DOCTYPE':
I would suggest
not response.content.startswith('<!DOCTYPE')
This is more speed & memory efficient.
#
Great snippet, thanks.
One small suggestion, maybe you could also replace:
meta http-equiv="Content-type" content="application/xhtml+xml... etc, in the head section. To do so, I added:at the beginning:
_MEDIA_TYPE_HEAD_RE = re.compile(r'="application\/xhtml\+xml')and around line 70 add:
response.content = _MEDIA_TYPE_HEAD_RE.sub('="text/html', response.content, 1)#
I've made another little adjustment. I noticed that django-admin has several problems when served as application/xhtml+xml, so I adjusted this middleware so that anything served fro mthe /admin url is automatically degrade d too:
def process_response(self, request, response):
if not request.META['PATH_INFO'].startswith('/admin/'):
#
Looks like there is a bit too much escaping going on in the regular expressions:
_MEDIA_TYPE_RE = re.compile(r'application\/xhtml\+xml')
Can simply be:
_MEDIA_TYPE_RE = re.compile(r'application/xhtml\+xml')
and
_PROCESSING_INSTRUCTION_RE = re.compile(r'\<\?.*\?\>')
Can simple be:
_PROCESSING_INSTRUCTION_RE = re.compile(r'<\?.+\?>')
The following regex didn't make much sense at all, I don't know how the = fit in there, aswell as some of the other parts, here is the original:
_EMPTY_TAG_END_RE = re.compile(r'(?<=\S)\/\>')
Anyway, I rewrote it and this one works:
_EMPTY_TAG_END_RE = re.compile(r'(<.+)/>')
Also replace line 75 with this, or it wont work:
response.content = _EMPTY_TAG_END_RE.sub(r'\1>', response.content)
What this does is replace:
\<input type="button" /> with \<input type="button" > and \<input type="button"/> with \<input type="button">
I cannot get rid of the space at the end in the first example, not without adding extra python code anyway, and since the extra space does nothing bad, I just left it at that.
#
Sorry, I meant a * not a +:
_PROCESSING_INSTRUCTION_RE = re.compile(r'<\?.*\?>')
#