import re def autop(text): ''' Convert line breaks into

and
in an intelligent fashion. Adapted from Drupal. ''' # All block level tags block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6])' # Split at

, ,  tags.
    # We don't apply any processing to the contents of these tags to avoid messing
    # up code. We look for matched pairs and allow basic nesting. For example:
    # "processed  ignored  ignored  processed"
    chunks = re.split(r'(?i)(]*>)', text)
    # Note: PHP ensures the array consists of alternating delimiters and literals
    # and begins and ends with a literal (inserting NULL as required).
    # Also true for Python, which will insert empty strings as required.
    ignore = False
    ignoretag = ''
    output = ''
    for i, chunk in enumerate(chunks):
        if i % 2:
            # Opening or closing tag?
            open = (chunk[1] != '/')
            tag = chunk[2 - open:].split('[ >]', 2)
            if not ignore:
                if open:
                    ignore = True
                    ignoretag = tag
            # Only allow a matching tag to close it.
            elif not open and ignoretag == tag:
                ignore = False
                ignoretag = ''
        elif not ignore:
            chunk = re.sub(r'\n*$', '', chunk) + "\n\n" # just to make things a little easier, pad the end
            chunk = re.sub(r'
\s*
', r"\n\n", chunk)
            chunk = re.sub(r'(<' + block + '[^>]*>)', r"\n\1", chunk) # Space things out a little
            chunk = re.sub(r'()', r"\1\n\n", chunk) # Space things out a little
            chunk = re.sub(r"\n\n+", r"\n\n", chunk) # take care of duplicates
            chunk = re.sub(r'(?s)\n?(.+?)(?:\n\s*\n|\Z)', r"\1\n", chunk) # make paragraphs, including one at the end
            chunk = re.sub(r'\s*\n', r'', chunk) # under certain strange conditions it could create a P of entirely whitespace
            chunk = re.sub(r"(", r"\1", chunk) # problem with nested lists
            chunk = re.sub(r'(?i)
]*)>', r"
", chunk)
            chunk = chunk.replace('
', r'')
            chunk = re.sub(r'\s*(]*>)', r"\1", chunk)
            chunk = re.sub(r'(]*>)\s*', r"\1", chunk)
            chunk = re.sub(r'(?)\s*\n', r"
\n", chunk) # make line breaks
            chunk = re.sub(r'(]*>)\s*
', r"\1", chunk)
            chunk = re.sub(r'
(\s*)', r'\1', chunk)
            chunk = re.sub(r'&([^#])(?![A-Za-z0-9]{1,8};)', r'&\1', chunk)
        output += chunk
    return output