import re
from urlparse import urlsplit, urlunsplit

_coral_suffix = '.nyud.net'

_regex = '(?P<prefix><a.*href=")(?P<url>.*)(?P<suffix>".*>)'
_anchor_regex = re.compile( _regex )


class CoralCDNMiddleware(object):
    """
    This middleware rewrites anchor tags contained in the response
    content so that the pages are fetched through the Coral Content
    Distribution Network [http://coralcdn.org/].
    """
    def process_response(self, request, response):
        # Function called by re.sub() to compute the replacement value
        # for any matches it finds.
        def a_replacer( match ):
            # The URL is captured by a named group in the regex.
            url = match.group( 'url' )
            parts = urlsplit( url )
            # Append the Coral CDN suffix to the 'netloc' URL part,
            # assuming it's there. If not, we're looking at local
            # reference so no need to rewrite the URL.
            if parts.netloc:
                # Append the suffix before any port number.
                netloc_parts = parts.netloc.split( ':' )
                netloc_parts[0] += _coral_suffix

                # Replace the 'netloc' part of the urlsplit() result
                # tuple.
                parts = list( parts )
                parts[1] = ':'.join( netloc_parts )

                # Replace the named group 'url' in the match with the
                # new URL.
                prefix = match.group( 'prefix' )
                suffix = match.group( 'suffix' )
                anchor = prefix + urlunsplit( parts ) + suffix
            else:
                anchor = match.group()
                
            return anchor

        # Find all anchor tags in the response content and rewrite
        # them.
        response.content = _anchor_regex.sub( a_replacer, response.content )
        return response
