filter for extracting a number of paragraphs from any HTML code

Author:: rafadev
Posted:: June 10, 2011
Language:: Python
Version:: 1.2
Score:: 1 (after 1 ratings)

Download
Raw

With inspiration from: Unethical Blogger

This code parses any provided HTML content, and extracts a number of paragraphs specified, with all the content and tags inside them.

Example: Template variable "content" contains:

<a href="#>some text</a>
<p><strong>Testing</strong>testing testing this is a tester's life</p>
<div>I wont see the world</div>
<p>Another paragraph</p>

So, place this code in any loaded template module (inside a templatetags folder of your app... i.e. myapp/templatetags/myutils.py)

{% load myutils %}
{{ content|paragraphs:"1"}}

Would return:

<p><strong>Testing</strong>testing testing this is a tester's life</p>

Whereas

{% load myutils %}
{{ content|paragraphs:"2"}}

Returns:

<p><strong>Testing</strong>testing testing this is a tester's life</p>
<p>Another paragraph</p>

@register.filter
def paragraphs(var, arg):
    """ Retrieves n number of paragraphs from the supplied text. It doesn't remove 
    any existing tags inside paragraphs."""
    class ParagraphParser(HTMLParser):
        def __init__(self, *args, **kwargs):
            HTMLParser.__init__(self)
            self.stack = []
            self.paragraphs = int(arg)
            self.in_p = False
            self.p_count = 0
            
        def handle_starttag(self, tag, attrs):   
            if tag == 'p':
                if self.p_count < self.paragraphs:
                    self.in_p = True
                    self.p_count += 1
                else:
                    self.in_p = False
            
            if self.in_p:
                self.stack.append(self.__html_start_tag(tag, attrs))


        def handle_endtag(self, tag):
            if self.in_p:
                self.stack.append(u"</%s>" % (tag))
                if tag == 'p':
                    self.in_p = False
        
        def handle_startendtag(self, tag, attrs):
            if self.in_p:
                self.stack.append(self.__html_startend_tag(tag, attrs))

        def handle_data(self, data):
            if self.in_p:
                self.stack.append(data)

        def __html_attrs(self, attrs):
            _attrs = u""
            if attrs:
                _attrs = u" %s" % (' '.join([('%s="%s"' % (k,v)) for k,v in attrs.iteritems()]))
            return _attrs

        def __html_start_tag(self, tag, attrs):
            return u"<%s%s>" % (tag, self.__html_attrs(attrs)) 
        
        def __html_startend_tag(self, tag, attrs):
            return "<%s%s/>" % (tag, self.__html_attrs(attrs))

        def render(self):
            return u"".join(self.stack)


    parseme = ParagraphParser()
    
    try:
        parseme.feed(var)
    except HTMLParseError:
        return var
    
    return parseme.render()

# make sure output is not escaped... it contains HTML!
paragraphs.is_safe = True

Comments

Please login first before commenting.

filter for extracting a number of paragraphs from any HTML code

More like this

Comments