#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import os import re import urllib from PIL import Image from spoon import Spoon CACHE_PATH = os.path.join(os.getcwd(), "djangobook-image-cache") rst2html_cmd = "rst2html" htmldoc_cmd = "htmldoc" books = [ ("essential", [ "overview", "install", "tutorial01", "tutorial02", "tutorial03", "tutorial04", "faq", "documentation" ]), ("deployment", [ "modpython", "fastcgi" ]), ("reference", [ "django-admin", "model-api", "db-api", "transactions", "templates", "templates_python", "newforms", "forms", "modelforms", "testing", "sessions", "cache", "i18n", "middleware", "settings", "url_dispatch", "request_response", "generic_views", "authentication", "shortcuts", "unicode", ]), ("contrib", [ "add_ons", "contenttypes", "csrf", "databrowse", "flatpages", "form_preview", "redirects", "sites", "sitemaps", "syndication_feeds", "webdesign" ]), ("solutions", [ "apache_auth", "static_files", "email", "legacy_databases", "outputting_pdf", "outputting_csv", ]), ("etc", [ "design_philosophies", "contributing", "admin_css", "api_stability", ]) ] book_template = """\ <html> <head><title>Django Developer Documentation: %(title)s</title> <body> %(html)s </body> </html> """ title_template = """ <html> <head><title>Django Developer Documentation: %(title)s</title> <body> <!-- ugly html - but looks good as pdf --> <br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/> <br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/> <img src="%(logo)s" width="100%%"><br/><br/> <table width="100%%" cellpadding="10"> <tr bgcolor="#092e20" ><td><font color="#ffffff" size="+4" face="Arial,Helvetica,Sans Serif"> Django Developer Documentation </font></td></tr> <tr><td><b><font size="+5">%(title)s</font></b></td></tr> </table> </body> </html> """ chapter_template = """\ <h1><a name="%(filename)s"></a>%(chapter_title)s</h1> %(chapter)s """ if not os.path.exists(CACHE_PATH): os.makedirs(CACHE_PATH) def load_image(url): filename = os.path.join( CACHE_PATH, re.sub("\W", "_", url)) if not os.path.exists(filename): print "* loading %s..." % url, sys.stdout.flush() # Thumbnail? if url.endswith("t.png"): try: urllib.urlretrieve(url[:-5] + ".png", filename) print "done" return filename except: pass urllib.urlretrieve(url, filename) print "done" return filename def make_pdf(bookname, html): logo = load_image("http://media.djangoproject.com/img/logos/django-logo-positive.png") title = bookname.capitalize() title_html = "%s-title.html" % bookname open(title_html, "w").write(title_template % vars()) open("%s.html" % bookname, "w").write(book_template % vars()) cmd = htmldoc_cmd + " --numbered -f %(bookname)s.pdf --titlefile %(title_html)s %(bookname)s.html" os.system(cmd % vars()) complete_html = "" for bookname, files in books: html = "" for filename in files: print bookname, filename target = "%s.html" % filename os.system(rst2html_cmd + " %(filename)s.txt > %(target)s" % vars()) spoon = Spoon(open(target).read()) # Change headings for hi in range(5, 0, -1): for i, tag in enumerate(spoon._["h%s" % hi]): tag["name"] = "h%s" % (hi + 1) for link in spoon._.a: href = link["@href"] if href.startswith("../") and href.endswith("/"): href = href[3:-1] if href in files: href = "#%s" % href else: href = "http://www.djangoproject.com/documentation/%s" % href link["@href"] = href # Fix embedded images for img in spoon._.img: src = img["@src"] if src.startswith("http"): img["@src"] = src = load_image(src) try: im = Image.open(src) width = im.size[0] if width > 400: img["@width"] = "100%" if im.format == "PSD": # yes there are photoshop files called .png on the site :( print "* converting photoshop file...", sys.stdout.flush() im.save(src, "png") print "done" except Exception, e: print e chapter_title = " ".join([ s.capitalize() for s in re.split("(\d+|[A-Za-z]+)", filename.replace("_", " ")) if s ]) chapter = spoon._.body.inner_html() html += chapter_template % vars() complete_html += html make_pdf(bookname, html) make_pdf("complete", complete_html)