filter for extracting a number of paragraphs from any HTML code

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@register.filter
def paragraphs(var, arg):
    """ Retrieves n number of paragraphs from the supplied text. It doesn't remove 
    any existing tags inside paragraphs."""
    class ParagraphParser(HTMLParser):
        def __init__(self, *args, **kwargs):
            HTMLParser.__init__(self)
            self.stack = []
            self.paragraphs = int(arg)
            self.in_p = False
            self.p_count = 0
            
        def handle_starttag(self, tag, attrs):   
            if tag == 'p':
                if self.p_count < self.paragraphs:
                    self.in_p = True
                    self.p_count += 1
                else:
                    self.in_p = False
            
            if self.in_p:
                self.stack.append(self.__html_start_tag(tag, attrs))


        def handle_endtag(self, tag):
            if self.in_p:
                self.stack.append(u"</%s>" % (tag))
                if tag == 'p':
                    self.in_p = False
        
        def handle_startendtag(self, tag, attrs):
            if self.in_p:
                self.stack.append(self.__html_startend_tag(tag, attrs))

        def handle_data(self, data):
            if self.in_p:
                self.stack.append(data)

        def __html_attrs(self, attrs):
            _attrs = u""
            if attrs:
                _attrs = u" %s" % (' '.join([('%s="%s"' % (k,v)) for k,v in attrs.iteritems()]))
            return _attrs

        def __html_start_tag(self, tag, attrs):
            return u"<%s%s>" % (tag, self.__html_attrs(attrs)) 
        
        def __html_startend_tag(self, tag, attrs):
            return "<%s%s/>" % (tag, self.__html_attrs(attrs))

        def render(self):
            return u"".join(self.stack)


    parseme = ParagraphParser()
    
    try:
        parseme.feed(var)
    except HTMLParseError:
        return var
    
    return parseme.render()

# make sure output is not escaped... it contains HTML!
paragraphs.is_safe = True

More like this

  1. Another pygments for ReST by limodou 7 years, 1 month ago
  2. Page numbers with ... like in Digg by Ciantic 5 years ago
  3. Template tag to sort a list of links by pytechd 6 years, 8 months ago
  4. Filter for adding quote marks by olau 6 years, 5 months ago
  5. Paginator TemplateTag by trbs 6 years ago

Comments

(Forgotten your password?)