Custom collectstatic that uses etag and md5 digests to determine whether files on S3 have changed

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from __future__ import with_statement

import os
import sys
from optparse import make_option

from django.core.files.storage import FileSystemStorage
from django.core.management.base import CommandError, NoArgsCommand
from django.utils.encoding import smart_str, smart_unicode
from django.utils.datastructures import SortedDict

from django.contrib.staticfiles import finders, storage

from django.contrib.staticfiles.management.commands import collectstatic
import hashlib

class Command(collectstatic.Command):

    def delete_file(self, path, prefixed_path, source_storage):
        """
        Checks if the target file should be deleted if it already exists
        """
        if self.storage.exists(prefixed_path):
            try:
                # attempt the S3 hash first
                if self.storage._wrapped.entries.get(prefixed_path).etag == '"%s"' % hashlib.md5(source_storage.open(path).read()).hexdigest():
                    self.log(u"Skipping '%s' (not modified based on MD5 SUM)" % path)
                    return False
            except:
                pass
            try:
                # When was the target file modified last time?
                target_last_modified = \
                    self.storage.modified_time(prefixed_path)
            except (OSError, NotImplementedError, AttributeError):
                # The storage doesn't support ``modified_time`` or failed
                pass
            else:
                try:
                    # When was the source file modified last time?
                    source_last_modified = source_storage.modified_time(path)
                except (OSError, NotImplementedError, AttributeError):
                    pass
                else:
                    # The full path of the target file
                    if self.local:
                        full_path = self.storage.path(prefixed_path)
                    else:
                        full_path = None
                    # Skip the file if the source file is younger
                    if target_last_modified >= source_last_modified:
                        if not ((self.symlink and full_path
                                 and not os.path.islink(full_path)) or
                                (not self.symlink and full_path
                                 and os.path.islink(full_path))):
                            if prefixed_path not in self.unmodified_files:
                                self.unmodified_files.append(prefixed_path)
                            self.log(u"Skipping '%s' (not modified)" % path)
                            return False
            # Then delete the existing file if really needed
            if self.dry_run:
                self.log(u"Pretending to delete '%s'" % path)
            else:
                self.log(u"Deleting '%s'" % path)
                self.storage.delete(prefixed_path)
        return True

More like this

  1. Amazon S3 Enabled FileField and ImageField (with Boto) by natebeacham 4 years ago
  2. S3 static media uploader by phlex 5 years, 4 months ago
  3. Amazon S3 browser-based upload form by simon 4 years, 3 months ago
  4. Sorl Thumbnail + Amazon S3 by skoczen 4 years, 10 months ago
  5. Securely Signed S3 Links With Expiration by pjs 5 years, 5 months ago

Comments

antonagestam (on May 14, 2013):

Why all the unnecessary imports?

#

antonagestam (on May 14, 2013):

I made this a Github repo: https://github.com/antonagestam/collectfast

#

(Forgotten your password?)