import re def autop(text): ''' Convert line breaks into
and
in an intelligent fashion.
Adapted from Drupal.
'''
# All block level tags
block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6])'
# Split at
, , tags.
# We don't apply any processing to the contents of these tags to avoid messing
# up code. We look for matched pairs and allow basic nesting. For example:
# "processed ignored ignored
processed"
chunks = re.split(r'(?i)(?(?:pre|script|style)[^>]*>)', text)
# Note: PHP ensures the array consists of alternating delimiters and literals
# and begins and ends with a literal (inserting NULL as required).
# Also true for Python, which will insert empty strings as required.
ignore = False
ignoretag = ''
output = ''
for i, chunk in enumerate(chunks):
if i % 2:
# Opening or closing tag?
open = (chunk[1] != '/')
tag = chunk[2 - open:].split('[ >]', 2)
if not ignore:
if open:
ignore = True
ignoretag = tag
# Only allow a matching tag to close it.
elif not open and ignoretag == tag:
ignore = False
ignoretag = ''
elif not ignore:
chunk = re.sub(r'\n*$', '', chunk) + "\n\n" # just to make things a little easier, pad the end
chunk = re.sub(r'
\s*
', r"\n\n", chunk)
chunk = re.sub(r'(<' + block + '[^>]*>)', r"\n\1", chunk) # Space things out a little
chunk = re.sub(r'(' + block + '>)', r"\1\n\n", chunk) # Space things out a little
chunk = re.sub(r"\n\n+", r"\n\n", chunk) # take care of duplicates
chunk = re.sub(r'(?s)\n?(.+?)(?:\n\s*\n|\Z)', r"\1
\n", chunk) # make paragraphs, including one at the end
chunk = re.sub(r'\s*
\n', r'', chunk) # under certain strange conditions it could create a P of entirely whitespace
chunk = re.sub(r"(
]*)>', r"') chunk = re.sub(r'', r'", chunk) chunk = chunk.replace('
\s*(?' + block + '[^>]*>)', r"\1", chunk) chunk = re.sub(r'(?' + block + '[^>]*>)\s*
', r"\1", chunk) chunk = re.sub(r'(?)\s*\n', r"