Login

Create nice looking PDFs from developer documentation

Author:
henning
Posted:
December 18, 2007
Language:
Python
Version:
.96
Score:
6 (after 6 ratings)

You need htmldoc, rst2html, the Python Imaging Libraray, BeautfiulSoup and spoon. The Debian/Ubuntu-packages are called htmldoc, python-docutils, python-imaging and python-beautifulsoup You can get spoon.py http://beautifulspoon.googlecode.com/svn/trunk/spoon.py

To create the pdf files you have to call the script from django_src/docs

Here is an example output: http://henning.cco-ev.de/django/essential.pdf

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
import re
import urllib
from PIL import Image
from spoon import Spoon


CACHE_PATH = os.path.join(os.getcwd(), "djangobook-image-cache")


rst2html_cmd = "rst2html"
htmldoc_cmd = "htmldoc"


books = [
   ("essential", [
            "overview", "install", 
            "tutorial01", "tutorial02", "tutorial03", "tutorial04",
            "faq", "documentation"
            ]),
   ("deployment", [
            "modpython", "fastcgi"
            ]),
   ("reference", [
            "django-admin",
            "model-api", "db-api", "transactions",
            "templates", "templates_python",
            "newforms", "forms", "modelforms",
            "testing",
            "sessions", 
            "cache", 
            "i18n",
            "middleware", 
            "settings",
            "url_dispatch",
            "request_response",
            "generic_views",
            "authentication",
            "shortcuts",
            "unicode",
            ]),
    ("contrib", [
            "add_ons",
            "contenttypes",
            "csrf",
            "databrowse",
            "flatpages",
            "form_preview",
            "redirects",
            "sites",
            "sitemaps",
            "syndication_feeds",
            "webdesign"
            ]),
   ("solutions", [
           "apache_auth",
           "static_files",
           "email",
           "legacy_databases",
           "outputting_pdf",
           "outputting_csv",
           ]),
   ("etc", [
           "design_philosophies",
           "contributing",
           "admin_css",
           "api_stability",
           ])
            
]

book_template = """\
<html>
<head><title>Django Developer Documentation: %(title)s</title>
<body>
%(html)s
</body>
</html>
"""


title_template = """
<html>
<head><title>Django Developer Documentation: %(title)s</title>
<body>
<!-- ugly html - but looks good as pdf -->
<br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/>
<br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/><br/>

<img src="%(logo)s" width="100%%"><br/><br/>
<table width="100%%" cellpadding="10">
<tr bgcolor="#092e20" ><td><font color="#ffffff" size="+4" face="Arial,Helvetica,Sans Serif">
Django Developer Documentation
</font></td></tr>
<tr><td><b><font size="+5">%(title)s</font></b></td></tr>
</table>


</body>
</html>
"""



chapter_template = """\
<h1><a name="%(filename)s"></a>%(chapter_title)s</h1>
%(chapter)s
"""



if not os.path.exists(CACHE_PATH):
    os.makedirs(CACHE_PATH)

    
def load_image(url):
    filename = os.path.join(
       CACHE_PATH, 
       re.sub("\W", "_", url))
    if not os.path.exists(filename):
        print "* loading %s..." % url,
        sys.stdout.flush()
        # Thumbnail?
        if url.endswith("t.png"):
            try:
                urllib.urlretrieve(url[:-5] + ".png", filename)
                print "done"
                return filename
            except:
                pass
        urllib.urlretrieve(url, filename)
        print "done"
    return filename




def make_pdf(bookname, html):
    logo = load_image("http://media.djangoproject.com/img/logos/django-logo-positive.png")
    title = bookname.capitalize()
    title_html = "%s-title.html" %  bookname
    open(title_html, "w").write(title_template % vars())
    open("%s.html" % bookname, "w").write(book_template % vars())
    cmd = htmldoc_cmd + " --numbered -f %(bookname)s.pdf --titlefile %(title_html)s %(bookname)s.html"
    os.system(cmd % vars())



complete_html = ""           
for bookname, files in books:
    html = ""
    for filename in files:
        print bookname, filename
        target = "%s.html" % filename
        os.system(rst2html_cmd + " %(filename)s.txt > %(target)s" % vars())
        spoon = Spoon(open(target).read())
        # Change headings
        for hi in range(5, 0, -1):
            for i, tag in enumerate(spoon._["h%s" % hi]):
                tag["name"] = "h%s" % (hi + 1)
        for link in spoon._.a:
            href = link["@href"]
            if href.startswith("../") and href.endswith("/"):
                href = href[3:-1]
                if href in files:
                    href = "#%s" % href
                else:
                    href = "http://www.djangoproject.com/documentation/%s" % href
                link["@href"] = href
        # Fix embedded images
        for img in spoon._.img:
            src = img["@src"]
            if src.startswith("http"):
                img["@src"] = src = load_image(src)
            try:
                im = Image.open(src)
                width = im.size[0]
                if width > 400:
                    img["@width"] = "100%"
                if im.format == "PSD":
                    # yes there are photoshop files called .png on the site :(
                    print "* converting photoshop file...",
                    sys.stdout.flush()
                    im.save(src, "png")
                    print "done"
            except Exception, e:
                print e
        chapter_title = " ".join([
                s.capitalize() for s in re.split("(\d+|[A-Za-z]+)", filename.replace("_", " ")) if s
                ])
        chapter = spoon._.body.inner_html()
        html += chapter_template % vars()
    complete_html += html
    make_pdf(bookname, html)


make_pdf("complete", complete_html)

More like this

  1. Template tag - list punctuation for a list of items by shapiromatron 10 months, 2 weeks ago
  2. JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 10 months, 3 weeks ago
  3. Serializer factory with Django Rest Framework by julio 1 year, 5 months ago
  4. Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 6 months ago
  5. Help text hyperlinks by sa2812 1 year, 6 months ago

Comments

ant21 (on December 20, 2007):

It rocks! The pdfs it created are really nice. Thank you henning.

#

Please login first before commenting.