Django profiler

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2007, Dima Dogadaylo (www.mysoftparade.com)
# See also: http://www.mysoftparade.com/blog/django-profile-sql-performance/

import os
from os import path
import sys
from datetime import datetime
import re
import urlparse


_PATH_COLUMN = 'Path'

def profile(operations):
    """Do requested profile operations on each from requested apps."""
    debug('profile', options)
    import operator
    urls = get_urls(options.depth)
    print('Found %d urls.' % len(urls))
    info('urls', urls)
    
    for op in operations:
        results = []
        for url in urls:
            res = op(url)
            res[_PATH_COLUMN] = url
            results += [res]
        results.sort(key=operator.itemgetter(op.sort_key),\
                    reverse=getattr(op, 'reverse', False))
        keys = results[0].keys()
        keys.remove(_PATH_COLUMN)
        keys.insert(0, _PATH_COLUMN)    # ensure that path column is first
        # build report table
        report = [keys] + [[line[key] for key in keys] for line in results]
        print_report(op.name, report)

def check_status_codes(depth=3, ignore_codes=(200,)):
    """Try to load all known pages and return dictionary of failed pages."""
    failed_urls = {}
    urls = get_urls(depth)
    for url in urls:
        resp = _internal_request(url)
        if resp.status_code not in ignore_codes:
           failed_urls[url] = {'code': resp.status_code}
           if resp.status_code == 0:
               failed_urls[url]['error'] = resp.content
    return failed_urls
               
def props(**kwargs):
    def wrapper(func):
        def executor(*args, **kwargs):
            output = func(*args, **kwargs)
            return output
        for key, value in kwargs.items():
            setattr(executor, key, value)
        return executor
    return wrapper
        
@props(name='SQL queries usage', sort_key='SQL', reverse=True)
def profile_sql(url):
    """Find SQL queriers usage for each page"""
    if options.verbosity:
        print "profile_sql", url,
    from django.conf import settings
    old_debug = settings.DEBUG
    settings.DEBUG = True
    from django.db import connection
    connection.queries = []
    responce =_internal_request(url)
    if options.verbosity:
        print "%d SQL queries, status code: %s " %\
              (len(connection.queries), responce.status_code)
    if options.verbosity > 1:
        for query in connection.queries:
            print query['sql'], query['time']
    settings.DEBUG = old_debug
    return {'SQL': len(connection.queries), 'Status': responce.status_code}

@props(name='Page size', sort_key='Size, b', reverse=True)
def profile_size(url):
    """Find size of each page."""
    if options.verbosity:
        print "profile_size", url,
    responce =_internal_request(url)
    size = len(responce.content)
    img = link = 0
    if responce.status_code == 200:
        img = len(re.findall(r'<img.*?>', responce.content))
        link = len(re.findall(r'<link.*?>', responce.content))
    if options.verbosity:
        print ", size: %sKb, status code: %s " %\
              (size, responce.status_code)
    return {'Size, b': size, 'Status': responce.status_code,
            '<img>': img, '<link>': link}

def print_report(name, report):
    """Print report"""
    row_total = len(report)    
    col_total = len(report[0])
    #calculate max length of each column
    format = [reduce(max, [len(str(report[row][col])) for row in xrange(row_total)])\
              for col in xrange(col_total)]
    total_width = reduce(lambda x,y: x+y, format) + col_total - 1
    format = " ".join(["%%%ds" % width for width in format])
    print "*"*total_width
    print name
    print "*"*total_width
    for row in report:
        print format % tuple(row)
    print
       
def get_urls(depth=3, apps=None):
    urls = set(['/'])
    if options.read_urls:
        urls.update(get_predefined_pages(options.read_urls))
        info("%s: current len of urls=%s" % (options.read_urls, len(urls)))
        debug("\n---predefined urls", urls)
    if depth > 0:
        debug("\n---urls before get_model_urls()", urls)
        urls.update(get_model_urls())
    if depth > 1:
        urls.update(get_base_urls(urls))
    new_urls = urls
    while depth > 2 and new_urls:
        new_urls = get_urls_from_content(new_urls) - urls
        urls.update(new_urls)
        depth -= 1
        info("depth", depth, "new_urls", len(new_urls))
    if not options.all_urls:
        urls = remove_dublicated_views(urls)
    return list(urls)

def get_model_urls(apps = []):
    import operator
    from django.db.models import get_app, get_apps, get_models
    debug("get_model_urls", apps)
    # convert app labels to app modules
    apps = [get_app(app_label) for app_label in apps] or get_apps()
    # all models of all profiled apps
    classes = reduce(operator.add, [get_models(app) for app in apps])
    debug("all models:\n", classes)
    # remove classes without get_absolute_url()
    classes = [cls for cls in classes\
               if hasattr(cls, 'get_absolute_url') and\
               hasattr(cls.get_absolute_url, '__call__')]
    debug("models with get_absolute_url():\n", classes)
    urls = []
    for cls in classes:
        try:
            if cls._default_manager.count():
                url = cls._default_manager.filter()[0].get_absolute_url()
                debug(cls, " -> ", url)
                if url:
                    urls += [url]
        except Exception, e:
            error("Can't obtain url for %s: %s" % (cls, e))
    debug("get_model_urls(): ", urls)
    return urls

def is_valid_url(url):
    from django.core.urlresolvers import resolve
    try:
        resolve(url)
        return True
    except:
        return False
    
def get_base_urls(urls):
    """Returns also all valid parent urls for each url from urls"""
    import re
    debug("get_base_urls(): ", urls)    
    base_urls = set([re.sub(r"/[-\w\?=&%]+/?$", r"/", url) for url in urls])
    debug('base_urls', base_urls)
    base_urls = [url for url in base_urls if is_valid_url(url)]
    debug('valid base_urls', base_urls)
    return base_urls

def iter_page_urls(page, url):
    """ Parse page and generate embedded urls.
    >>> lines = ('<a href="/abs/url/"> <a name="name">',
    ... '<A class="klass" href="href"> <a\\nhref="new_line"> <a hRef="caSe">',
    ... '<a href="http://ext"> <a href="#anchor">  <a href=""></a>')
    >>> page = "\\n".join(lines)
    >>> [u for u in iter_page_urls(page, '/dir/')]
    ['/abs/url/', '/dir/href', '/dir/new_line', '/dir/caSe']
    """
    for i in re.finditer(r'<a[^>]*?href="(?P<href>[^"]*?)".*?>', page, re.I):
        href = i.group('href')
        # bypass external urls, anchors and empty string
        if not href or re.match('^(ftp|http[s]?)://.+|^#.*', href):
            continue
        if href and not href[0] == '/':
            from urlparse import urljoin
            href = urljoin(url, href)
        yield href
    
def get_urls_from_content(urls):
    """Returns link to resources contained inside pages."""
    debug('\nget_urls_from_content', urls)
    hrefs = set([])
    for url in urls:
        responce = _internal_request(url)
        if responce.status_code == 200:
            for path in iter_page_urls(responce.content, url):
                hrefs.add(path)
    debug('hrefs', hrefs)
    return hrefs

def remove_dublicated_views(hrefs):
    """Remove pages mapped to same view."""
    from django.core.urlresolvers import resolve, Resolver404
    
    resolvers = []
    unique_urls = []
    for href in hrefs:
        try:
            r = resolve(href)
            if not r:
                continue
        except Resolver404:
            continue
        view, args, kwargs = r[0], list(r[1]), r[2]        
        # resolve() don't return url mapping name, and when generic views are used
        # it's a problem, so we do this trick to find
        # "really" different generic views
        args = [arg for arg in args if arg not in href]
        kwargs = dict([k, v] for k, v in kwargs.items()\
                      if not isinstance(v, basestring) or v not in href)
        r = (view, args, kwargs)
        if r not in resolvers:
            resolvers += [r]
            unique_urls += [href]
    return unique_urls

def get_predefined_pages(fname):
    if os.path.exists(fname) and os.path.isfile(fname):
        f = None
        try:
            try:
                f = open(fname, 'rb')
                return [line.strip() for line in f]
            except Exception, e:
                sys.stderr.write("get_predefined_pages %s: %s" % (path, e))
        finally:
            close_file(f)
    return []
    
def _internal_request(url):
    """Request page with internal Django client."""
    from django.test.client import Client
    # many code assume request.META['REMOTE_ADDR'] and etc
    client = Client(REMOTE_ADDR="127.0.0.1", HTTP_HOST="localhost")
    try:
        resp = client.get(url)
    except Exception, e:
        error('url=%s error=%s' % (url, e))
        resp = type('object', (), {'status_code':0, 'content': str(e)})

    if resp.status_code in (500,) and options.save_errors:
        save_page(resp.content, url, options.save_errors)
    return resp


def save_page(page, url, dir):
    fname = url2path(url, dir)
    if not os.path.exists(os.path.dirname(fname)):
        os.makedirs(os.path.dirname(fname))
    write_file(fname, page)

def url2path(url, dir):
    scheme, location, path, query, fragment = urlparse.urlsplit(url)
    if not path or path.endswith('/'):
        path += 'index.html'
    return os.path.join(dir, "_".join(path.split('/')))

def error(*args):
    for arg in args:
        print >>sys.stderr,  arg,
    print >>sys.stderr

def info(*args):
    if options.verbosity > 0:
        for arg in args:
            print arg,
        print
        
def debug(*args):
    if options.verbosity > 1:
        for arg in args:
            print arg,
        print

def write_file(path, content, mode = "wb"):
    """Write content to file and retunr True is writing was sucessfull."""
    f = None
    try:
        try:
            f = open(path, mode)
            f.write(content)
        except Exception, e:
            sys.stderr.write("write_file %s: %s" % (path, e))
            return False
    finally:
        close_file(f)
    return True
            
def close_file(f):
    """Close file and retunr True is file was closed."""
    try:
        if f:
            f.close()
        return True
    except Exception, e:
        sys.stderr.write("close_file %s: %s" % (f, e))
        return False

PROFILERS = {
    'sql': profile_sql,
    'size': profile_size,
}

_default_options = {'verbosity': 0, 
                    'read_urls': './profile-pages.txt',
                    'depth': 4, 'all_urls': False,
                    'test': False, 'save_errors': None}
# will be redefined if run from command line
options = type('DefaultOptions', (), _default_options)

_usage  =  """%prog [options] [app_name ...]"""

def execute_from_command_line(argv):
    from optparse import OptionParser
    parser = OptionParser(version='0.1', usage = _usage)
    parser.set_defaults(**_default_options)
    parser.add_option('--settings',
        help='Python path to settings module, e.g. "myproject.settings.main". If this isn\'t provided, the DJANGO_SETTINGS_MODULE environment variable will be used.')
    parser.add_option('--pythonpath',
        help='Lets you manually add a directory the Python path, e.g. "/home/djangoprojects/myproject".')
    parser.add_option('--verbosity', action='store', dest='verbosity',
        type='choice', choices=['0', '1', '2'],
        help='Verbosity level; 0=minimal output, 1=normal output, 2=all output')
    parser.add_option('-a', '--all', action='store_true',\
                      dest='all_urls',
                      help='Profile all found urls without view checking.')
    parser.add_option('--test', action='store_true',\
                      dest='test', help='Run django-profile doctests.')
    parser.add_option('--check', action='store_true',\
                      dest='check', help='Check status codes and report broken pages.')
    parser.add_option('--depth', action='store', dest='depth',\
                      help='Logical url searching depth; 0,1,2,3')
    parser.add_option('--read_urls',
                      help='Optional file with paths of pages to profile. Default ./profile-pages.txt')
    parser.add_option('--save_errors',
                      help='Directory to save pages with 500 status code and other errors. No default.')
    global options
    options, args = parser.parse_args(argv[1:])

    if options.test:
        import doctest
        doctest.testmod()
        return
    
    try:
        profilers = args and [PROFILERS[ind] for ind in args] or PROFILERS.values()
    except Exception, e:
        parser.error("Invalid indicators==%s:%s" % (args, e))
    options.verbosity = int(options.verbosity)
    options.depth = int(options.depth)
    if options.settings:
        os.environ['DJANGO_SETTINGS_MODULE'] = options.settings
    if options.pythonpath:
        sys.path.insert(0, options.pythonpath)
    if not options.settings and not options.pythonpath:
        # behave like a manage.py
        try:
            import settings # Assumed to be in the same directory.
        except ImportError:
            import sys
            sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r.\nYou'll have to run django-profile.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__)
            sys.exit(1)
        from django.core.management import setup_environ
        setup_environ(settings)
        
    if options.check:
        failed = check_status_codes()
        for url, reason in failed.items():
            print url, '->', reason
        return
    profile(profilers)
        
if __name__ == '__main__':
    execute_from_command_line(sys.argv)

More like this

  1. Profiling middleware using cProfile by sgb 5 years, 11 months ago
  2. Profiling Middlware by udfalkso 7 years ago
  3. Debug Page Load Time Stats Middleware by udfalkso 6 years, 8 months ago
  4. Middleware to move tags <script> to the bottom by marinho 3 years, 9 months ago
  5. Profiling Middleware w/sorting by petrilli 4 years, 10 months ago

Comments

(Forgotten your password?)