#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
import re
import urllib
from PIL import Image
from spoon import Spoon
CACHE_PATH = os.path.join(os.getcwd(), "djangobook-image-cache")
rst2html_cmd = "rst2html"
htmldoc_cmd = "htmldoc"
books = [
("essential", [
"overview", "install",
"tutorial01", "tutorial02", "tutorial03", "tutorial04",
"faq", "documentation"
]),
("deployment", [
"modpython", "fastcgi"
]),
("reference", [
"django-admin",
"model-api", "db-api", "transactions",
"templates", "templates_python",
"newforms", "forms", "modelforms",
"testing",
"sessions",
"cache",
"i18n",
"middleware",
"settings",
"url_dispatch",
"request_response",
"generic_views",
"authentication",
"shortcuts",
"unicode",
]),
("contrib", [
"add_ons",
"contenttypes",
"csrf",
"databrowse",
"flatpages",
"form_preview",
"redirects",
"sites",
"sitemaps",
"syndication_feeds",
"webdesign"
]),
("solutions", [
"apache_auth",
"static_files",
"email",
"legacy_databases",
"outputting_pdf",
"outputting_csv",
]),
("etc", [
"design_philosophies",
"contributing",
"admin_css",
"api_stability",
])
]
book_template = """\
Django Developer Documentation: %(title)s
%(html)s
"""
title_template = """
Django Developer Documentation: %(title)s
Django Developer Documentation
|
%(title)s |
"""
chapter_template = """\
%(chapter_title)s
%(chapter)s
"""
if not os.path.exists(CACHE_PATH):
os.makedirs(CACHE_PATH)
def load_image(url):
filename = os.path.join(
CACHE_PATH,
re.sub("\W", "_", url))
if not os.path.exists(filename):
print "* loading %s..." % url,
sys.stdout.flush()
# Thumbnail?
if url.endswith("t.png"):
try:
urllib.urlretrieve(url[:-5] + ".png", filename)
print "done"
return filename
except:
pass
urllib.urlretrieve(url, filename)
print "done"
return filename
def make_pdf(bookname, html):
logo = load_image("http://media.djangoproject.com/img/logos/django-logo-positive.png")
title = bookname.capitalize()
title_html = "%s-title.html" % bookname
open(title_html, "w").write(title_template % vars())
open("%s.html" % bookname, "w").write(book_template % vars())
cmd = htmldoc_cmd + " --numbered -f %(bookname)s.pdf --titlefile %(title_html)s %(bookname)s.html"
os.system(cmd % vars())
complete_html = ""
for bookname, files in books:
html = ""
for filename in files:
print bookname, filename
target = "%s.html" % filename
os.system(rst2html_cmd + " %(filename)s.txt > %(target)s" % vars())
spoon = Spoon(open(target).read())
# Change headings
for hi in range(5, 0, -1):
for i, tag in enumerate(spoon._["h%s" % hi]):
tag["name"] = "h%s" % (hi + 1)
for link in spoon._.a:
href = link["@href"]
if href.startswith("../") and href.endswith("/"):
href = href[3:-1]
if href in files:
href = "#%s" % href
else:
href = "http://www.djangoproject.com/documentation/%s" % href
link["@href"] = href
# Fix embedded images
for img in spoon._.img:
src = img["@src"]
if src.startswith("http"):
img["@src"] = src = load_image(src)
try:
im = Image.open(src)
width = im.size[0]
if width > 400:
img["@width"] = "100%"
if im.format == "PSD":
# yes there are photoshop files called .png on the site :(
print "* converting photoshop file...",
sys.stdout.flush()
im.save(src, "png")
print "done"
except Exception, e:
print e
chapter_title = " ".join([
s.capitalize() for s in re.split("(\d+|[A-Za-z]+)", filename.replace("_", " ")) if s
])
chapter = spoon._.body.inner_html()
html += chapter_template % vars()
complete_html += html
make_pdf(bookname, html)
make_pdf("complete", complete_html)