import re
import cgi
re_string = re.compile(r'(?P<htmlchars>[<&>])|(?P<space>^[ \t]+)|(?P<lineend>\r\n|\r|\n)|(?P<protocal>(^|\s)((http|ftp)://.*?))(\s|$)', re.S|re.M|re.I)
def plaintext2html(text, tabstop=4):
def do_sub(m):
c = m.groupdict()
if c['htmlchars']:
return cgi.escape(c['htmlchars'])
if c['lineend']:
return '<br>'
elif c['space']:
t = m.group().replace('\t', ' '*tabstop)
t = t.replace(' ', ' ')
return t
elif c['space'] == '\t':
return ' '*tabstop;
else:
url = m.group('protocal')
if url.startswith(' '):
prefix = ' '
url = url[1:]
else:
prefix = ''
last = m.groups()[-1]
if last in ['\n', '\r', '\r\n']:
last = '<br>'
return '%s<a href="%s">%s</a>%s' % (prefix, url, url, last)
return re.sub(re_string, do_sub, text)
Comments
how about adding a re expression to support auto convert http://djangobook.com to [HTML_REMOVED]http://djangobook.com[HTML_REMOVED]
With this, your script could be a minimal textarea filter that's quite handy to lots of websites.
#
sorry that the tags were stripped off from
<a href="http://djangobook.com">http://djangobook.com</a>#
you have a semicolon at the end of line 17. Seems like a syntax error
#