import re from urlparse import urlsplit, urlunsplit _coral_suffix = '.nyud.net' _regex = '(?P.*)(?P".*>)' _anchor_regex = re.compile( _regex ) class CoralCDNMiddleware(object): """ This middleware rewrites anchor tags contained in the response content so that the pages are fetched through the Coral Content Distribution Network [http://coralcdn.org/]. """ def process_response(self, request, response): # Function called by re.sub() to compute the replacement value # for any matches it finds. def a_replacer( match ): # The URL is captured by a named group in the regex. url = match.group( 'url' ) parts = urlsplit( url ) # Append the Coral CDN suffix to the 'netloc' URL part, # assuming it's there. If not, we're looking at local # reference so no need to rewrite the URL. if parts.netloc: # Append the suffix before any port number. netloc_parts = parts.netloc.split( ':' ) netloc_parts[0] += _coral_suffix # Replace the 'netloc' part of the urlsplit() result # tuple. parts = list( parts ) parts[1] = ':'.join( netloc_parts ) # Replace the named group 'url' in the match with the # new URL. prefix = match.group( 'prefix' ) suffix = match.group( 'suffix' ) anchor = prefix + urlunsplit( parts ) + suffix else: anchor = match.group() return anchor # Find all anchor tags in the response content and rewrite # them. response.content = _anchor_regex.sub( a_replacer, response.content ) return response