Login

Custom collectstatic that uses etag and md5 digests to determine whether files on S3 have changed

Author:
millarm
Posted:
February 4, 2013
Language:
Python
Version:
1.4
Score:
4 (after 4 ratings)

For use with S3 BotoStorage

STATICFILES_STORAGE ="storages.backends.s3boto.S3BotoStorage"

and

AWS_PRELOAD_METADATA = True

Custom management command that compares the MD5 sum and etag from S3 and if the two are the same skips file copy.

This makes running collect static MUCH faster if you are using git as a source control system which updates timestamps.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from __future__ import with_statement

import os
import sys
from optparse import make_option

from django.core.files.storage import FileSystemStorage
from django.core.management.base import CommandError, NoArgsCommand
from django.utils.encoding import smart_str, smart_unicode
from django.utils.datastructures import SortedDict

from django.contrib.staticfiles import finders, storage

from django.contrib.staticfiles.management.commands import collectstatic
import hashlib

class Command(collectstatic.Command):

    def delete_file(self, path, prefixed_path, source_storage):
        """
        Checks if the target file should be deleted if it already exists
        """
        if self.storage.exists(prefixed_path):
            try:
                # attempt the S3 hash first
                if self.storage._wrapped.entries.get(prefixed_path).etag == '"%s"' % hashlib.md5(source_storage.open(path).read()).hexdigest():
                    self.log(u"Skipping '%s' (not modified based on MD5 SUM)" % path)
                    return False
            except:
                pass
            try:
                # When was the target file modified last time?
                target_last_modified = \
                    self.storage.modified_time(prefixed_path)
            except (OSError, NotImplementedError, AttributeError):
                # The storage doesn't support ``modified_time`` or failed
                pass
            else:
                try:
                    # When was the source file modified last time?
                    source_last_modified = source_storage.modified_time(path)
                except (OSError, NotImplementedError, AttributeError):
                    pass
                else:
                    # The full path of the target file
                    if self.local:
                        full_path = self.storage.path(prefixed_path)
                    else:
                        full_path = None
                    # Skip the file if the source file is younger
                    if target_last_modified >= source_last_modified:
                        if not ((self.symlink and full_path
                                 and not os.path.islink(full_path)) or
                                (not self.symlink and full_path
                                 and os.path.islink(full_path))):
                            if prefixed_path not in self.unmodified_files:
                                self.unmodified_files.append(prefixed_path)
                            self.log(u"Skipping '%s' (not modified)" % path)
                            return False
            # Then delete the existing file if really needed
            if self.dry_run:
                self.log(u"Pretending to delete '%s'" % path)
            else:
                self.log(u"Deleting '%s'" % path)
                self.storage.delete(prefixed_path)
        return True

More like this

  1. Template tag - list punctuation for a list of items by shapiromatron 1 year ago
  2. JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 1 year ago
  3. Serializer factory with Django Rest Framework by julio 1 year, 7 months ago
  4. Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 8 months ago
  5. Help text hyperlinks by sa2812 1 year, 8 months ago

Comments

antonagestam (on May 14, 2013):

Why all the unnecessary imports?

#

antonagestam (on May 14, 2013):

I made this a Github repo: https://github.com/antonagestam/collectfast

#

Please login first before commenting.