Login

Using descriptors for lazy attribute caching

Author:
djypsy
Posted:
August 5, 2007
Language:
Python
Version:
.96
Score:
0 (after 0 ratings)

Python's descriptor protocol can seem a bit esoteric at first; however, it can be invaluable in handling everyday idioms and patterns - something that the Django framework authors have taken advantage of in numerous occasions (e.g.: auth middleware).

One such idiom I see and use often and would like to generalize is the attribute-existence check-or-set routine illustrated here:

def get_foo(self):
    if not hasattr(self, '_foo'):
        self._foo = init_foo()

    return self._foo

Rather than coding this up multiple times (either for a given class or across many unrelated classes), I would prefer to delegate this repetitive work to a descriptor and remain DRY. The means to this end is implemented as a variation on the Python property construct, and is intentionally over simplistic (I leave the details of the heavy lifting up to the reader).

The basic premise shown in source here is simply straight-forward Python, a quick and dirty example of how it could be utilized within a Django context is shown here:

from django.db import models
from cacheprop import CacheProperty2

ARTIFACT_TYPES = (
    ('F', _('File')),
    ('D', _('Directory')),
    ('A', _('Alias')),
)

class Artifact(models.Model):
    # model fields
    name = models.CharField(maxlength=64)
    type = models.CharField(maxlength=1, choices=ARTIFACT_TYPES)

    file_metadata = CacheProperty2(
        lambda self: self.filemetadata_set.get(artifact__id=self.id)
    )

class FileMetadata(models.Model):
    byte_size = models.IntegerField()
    artifact = models.ForeignKey(Artifact, unique=True)
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/local/bin/python
'''
Sample test code:

>>> f = Foo()         
>>> print f.x
1
>>> print f.y
2
>>> print f.z
2007-08-05 00:00:00
>>> print vars(f)
{'_z': datetime.datetime(2007, 8, 5, 0, 0), '_y': 2, '_x': 1}
>>> 
>>> b = Bar()
>>> print b.x
1
>>> print b.y
2
>>> print b.z
2007-08-05 00:00:00
>>> print vars(b)
{'_z': datetime.datetime(2007, 8, 5, 0, 0), '_y': 2, '_x': 1}
>>> 
>>> z = Baz()
>>> print z.x
1
>>> print z.y
2
>>> print z.z
2007-08-05 00:00:00
>>> print vars(z)
{}
'''

from datetime import datetime


#------------------------------------------------------------------
def simple_curry(f, *args, **kws):
    '''Simple helper function for pre-bundling a function with some 
    known arguments.'''
    def inner_curry(self):
        return f(self, *args, **kws)
        
    return inner_curry

#==================================================================
class CacheProperty(object):
    '''Caching mechanism that uses the supplied instance's 
    namespace for storage.'''

    #--------------------------------------------------------------
    def __init__(self, cache_var_name, initializer):
        self.initializer = initializer
        self.var = cache_var_name

    #--------------------------------------------------------------
    def __set__(self, *args):
        raise AttributeError, u'Read-only attribute'
        
    #--------------------------------------------------------------
    def __get__(self, instance, cls):
        if instance is None:
            raise AttributeError(
                u'%s must be accessed via instance' % (self.var,)
            )

        if not hasattr(instance, self.var):
            # print '... No Cache'
            if callable(self.initializer):
                setattr(instance, self.var, self.initializer(instance))
            else:
                setattr(instance, self.var, self.initializer)
            
        return getattr(instance, self.var)


#==================================================================
class CacheProperty2(object):
    '''Caching descriptor that stores the cached data itself.'''

    __NIL = object()

    #--------------------------------------------------------------
    def __init__(self, initializer):
        self.initializer = initializer
        self.cache = self.__NIL

    #--------------------------------------------------------------
    def __set__(self, *args):
        raise AttributeError, u'Read-only attribute'
        
    #--------------------------------------------------------------
    def __get__(self, instance, cls):
        if instance is None:
            raise AttributeError, u'Instance level access only'

        if self.cache == self.__NIL:
            # print '... No Cache'
            if callable(self.initializer):
                self.cache = self.initializer(instance)
            else:
                self.cache = self.initializer
            
        return self.cache

        
#==================================================================
class Foo(object):
    '''A typical representation of the attribute caching mechanism 
    in verbose fashion.'''

    #--------------------------------------------------------------
    def expensive_function(self, arg):
        # do something expensive here
        return arg
        
    #--------------------------------------------------------------
    def _get_x(self):
        if not hasattr(self, '_x'):
            self._x = self.expensive_function(1)
            
        return self._x

    #--------------------------------------------------------------
    def _get_y(self):
        if not hasattr(self, '_y'):
            self._y = self.expensive_function(2)
            
        return self._y
        
    #--------------------------------------------------------------
    def _get_z(self):
        if not hasattr(self, '_z'):
            self._z = datetime(2007,8,5)
            
        return self._z
        
    #--------------------------------------------------------------
    x = property(_get_x)
    y = property(_get_y)
    z = property(_get_z)

    
#==================================================================
class Bar(object):
    '''Exhibit "A" for caching attributes, in this case the data is
    stored directly within the instance.'''

    #--------------------------------------------------------------
    def expensive_function(self, arg):
        # do something expensive here
        return arg

    #--------------------------------------------------------------
    x = CacheProperty('_x', simple_curry(expensive_function, 1))
    y = CacheProperty('_y', simple_curry(expensive_function, 2))
    z = CacheProperty('_z', lambda self: datetime(2007,8,5))


#==================================================================
class Baz(object):
    '''Exhibit "B" for caching attributes, in this case the data is
    contained within the descriptor.'''

    #--------------------------------------------------------------
    def expensive_function(self, arg):
        # do something expensive here
        return arg

    #--------------------------------------------------------------
    x = CacheProperty2(simple_curry(expensive_function, 1))
    y = CacheProperty2(simple_curry(expensive_function, 2))
    z = CacheProperty2(lambda self: datetime(2007,8,5))


#------------------------------------------------------------------
def test():    
    # doctest does not seem to work in 2.3
    import doctest
    doctest.testmod()

    
###################################################################
if __name__ == '__main__':
    test()

More like this

  1. Template tag - list punctuation for a list of items by shapiromatron 11 months, 2 weeks ago
  2. JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 11 months, 3 weeks ago
  3. Serializer factory with Django Rest Framework by julio 1 year, 6 months ago
  4. Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 7 months ago
  5. Help text hyperlinks by sa2812 1 year, 7 months ago

Comments

Please login first before commenting.