- August 8, 2007
- pygments beautifulsoup markdown
- 1 (after 1 ratings)
A variation on a theme, inspired by snippet 39 and snippet 119. The
intent is to provide a more generic and simple mechanism for combining
Markdown with Pygments. Common scenarios could include blogging or commenting. Snippet 119 seemed too specific and perhaps not as
efficient, needing to process the HTML twice to accomplish it's ends. The one snag in the implementation is the need to use a tag other than
code as a wrapper. See the comments for details.
You will need the BeautifulSoup module installed.
from django.db import models class Blog(models.Model): '''Bare bones blogging model''' title = models.CharField(maxlength=255) slug = models.SlugField(maxlength=255, prepopulate_from=('title',)) pub_date = models.DateTimeField() # the cooked view, cached for quick retrieval blog = models.TextField() # the raw markdown-encoded text, saved for subsequent edits markdown = models.TextField() def save(self): from datetime import datetime if not self.id and not self.pub_date: self.pub_date = datetime.now() self.blog = pygmented_markdown(self.markdown) super(Blog, self).save()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
from pygments.lexers import LEXERS, get_lexer_by_name from pygments import highlight from pygments.formatters import HtmlFormatter from BeautifulSoup import BeautifulSoup from markdown import markdown # a tuple of known lexer names _lexer_names = reduce(lambda a,b: a + b, LEXERS.itervalues(), ()) # default formatter _formatter = HtmlFormatter(cssclass='source') def pygmented_markdown(raw): ''' Accepts raw markdown text for markup processing. Using BeatifuleSoup on the results of markdown processing, the following constructs will be replaced by with pygmented highlighting. E.g.:: <pre class="???"> ... </pre> Where ``???`` is the name of a supported pygments lexer, e.g.: ``python``, ``css``, ``html``. Note: Semantically, it would make more sense to wrap the code in a ``<code>...</code>`` tag; however, my tests using markdown.py - as well as markdown.pl from John Gruber - have shown that the inner HTML of the ``<code>`` tag is not immune to translation. ''' soup = BeautifulSoup(markdown(raw)) for tag in soup.findAll('pre'): lexer_name = tag.get('class') if lexer_name and lexer_name in _lexer_names: lexer = get_lexer_by_name(lexer_name, stripnl=True, encoding='UTF-8') tag.replaceWith(highlight(tag.renderContents(), lexer, _formatter)) return unicode(soup)