Much stolen from base truncate_html_words
. The difference is that this filter takes a number of characters as its argument and truncates to the nearest word boundary less than that count, rather than specifying a number of words.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | @register.filter
def truncatehtml_at_word(s, chars):
"""
Truncate a string to the nearest word boundary less than the given number
of characters. Whitespace is not included in the character count. If the
string contains HTML, tags and comments are also not included in the
character count. Closes opened HTML tags whose closing tags might have
been truncated.
"""
length = int(chars)
if length <= 0:
return u''
re_words = truncatehtml_at_word.re_words
re_tag = truncatehtml_at_word.re_tag
html4_singlets = truncatehtml_at_word.html4_singlets
# Count non-HTML characters and keep note of open tags.
open_tags = []
count = 0
pos = 0
truncate_at = 0
while count < length:
m = re_words.search(s, pos)
if not m:
# No more words in the string.
break
pos = m.end(0)
if m.group(1):
# It's an actual non-HTML word. If adding this word would exceed
# our length threshold, then we're done.
count += len(m.group(1))
if count > length:
break
# Otherwise, update our truncation point to include the word.
truncate_at = pos
continue
# Check for tag.
tag = re_tag.match(m.group(0))
if not tag:
continue
closing_tag, tagname, self_closing = tag.groups()
tagname = tagname.lower() # Element names are always case-insensitive
if self_closing or tagname in html4_singlets:
pass
elif closing_tag:
# Check for match in open tags list
try:
i = open_tags.index(tagname)
except ValueError:
pass
else:
# SGML: An end tag closes, back to the matching start tag, all
# unclosed intervening start tags with omitted end tags
open_tags = open_tags[i+1:]
else:
# Add it to the start of the open tags list
open_tags.insert(0, tagname)
truncate_at = pos
# Don't bother closing tags if we didn't need to truncate.
if truncate_at >= len(s):
return s
out = s[:truncate_at]
for tag in open_tags:
out += '</%s>' % tag
if len(out) < len(s):
out += ' …'
return out
truncatehtml_at_word.re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
truncatehtml_at_word.re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
# <p> is included here despite not being a true singlet to avoid adding
# incorrect closing tags to something like "para 1 <p> para 2 <p>".
truncatehtml_at_word.html4_singlets = ('br', 'col', 'link', 'base', 'img',
'param', 'area', 'hr', 'input', 'p')
|
More like this
- Template tag - list punctuation for a list of items by shapiromatron 8 months, 4 weeks ago
- JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 9 months ago
- Serializer factory with Django Rest Framework by julio 1 year, 3 months ago
- Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 4 months ago
- Help text hyperlinks by sa2812 1 year, 5 months ago
Comments
Please login first before commenting.