dumpdata/loaddata with MySQL and ForeignKeys (Revision 2)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#!/usr/bin/env python
# Author:  <greencm@gmail.com>
#
# Purpose: Given a set of classes, sort them such that ones that have
#          ForeignKey relationships with later keys are show up after
#          the classes they depend on
#
# Created: 12/27/07
#
# Modified: 3/20/08
#
# Graham King added the abilility to walk other ManyToMany
# relationships as well as handling fixtures such as content types

from django.core.management import setup_environ
import settings
setup_environ(settings)
import sys
from django.db import models

from django.core.management.base import CommandError
from django.core import serializers


# Original topological sort code written by Ofer Faigon
# (www.bitformation.com) and used with permission
def foreign_key_sort(items):
    """Perform topological sort.
       items is a list of django classes
       Returns a list of the items in one of the possible orders, or None
       if partial_order contains a loop.
    """

    def add_node(graph, node):
        """Add a node to the graph if not already exists."""
        if not graph.has_key(node):
            graph[node] = [0] # 0 = number of arcs coming into this node.

    def add_arc(graph, fromnode, tonode):
        """Add an arc to a graph. Can create multiple arcs.
           The end nodes must already exist."""
        graph[fromnode].append(tonode)
        # Update the count of incoming arcs in tonode.

        graph[tonode][0] = graph[tonode][0] + 1
    # step 1 - create a directed graph with an arc a->b for each input
    # pair (a,b).
    # The graph is represented by a dictionary. The dictionary contains
    # a pair item:list for each node in the graph. /item/ is the value
    # of the node. /list/'s 1st item is the count of incoming arcs, and
    # the rest are the destinations of the outgoing arcs. For example:
    #           {'a':[0,'b','c'], 'b':[1], 'c':[1]}
    # represents the graph:   c <-- a --> b
    # The graph may contain loops and multiple arcs.
    # Note that our representation does not contain reference loops to
    # cause GC problems even when the represented graph contains loops,
    # because we keep the node names rather than references to the nodes.
    graph = {}

    # iterate once for the nodes
    for v in items:
        add_node(graph, v)

    # iterate again to pull out the dependency information
    for a in items:

        rel_lst = related_field_filter(a._meta.fields)    # Add foreign keys
        rel_lst.extend( a._meta.many_to_many )            # Add many to many
        
        for b in rel_lst:
            # print "adding arc %s <- %s" % (b.rel.to, a)
            add_arc(graph, b.rel.to, a)
        

    # Step 2 - find all roots (nodes with zero incoming arcs).
    roots = [node for (node,nodeinfo) in graph.items() if nodeinfo[0] == 0]

    # step 3 - repeatedly emit a root and remove it from the graph. Removing
    # a node may convert some of the node's direct children into roots.
    # Whenever that happens, we append the new roots to the list of
    # current roots.
    sorted = []
    while len(roots) != 0:
        # If len(roots) is always 1 when we get here, it means that
        # the input describes a complete ordering and there is only
        # one possible output.
        # When len(roots) > 1, we can choose any root to send to the
        # output; this freedom represents the multiple complete orderings
        # that satisfy the input restrictions. We arbitrarily take one of
        # the roots using pop(). Note that for the algorithm to be efficient,
        # this operation must be done in O(1) time.

        root = roots.pop()
        sorted.append(root)
        for child in graph[root][1:]:
            graph[child][0] = graph[child][0] - 1
            if graph[child][0] == 0:
                roots.append(child)
        del graph[root]

    if len(graph.items()) != 0:
        # There is a loop in the input.
        raise CircularReferenceException, "Circular Dependency Detected in Input"
    
    return sorted


# Problem: 
#

class CircularReferenceException(Exception):
    pass


def isclass(obj):
    if str(obj).find("<class") == 0:
        return True
    return False
    
def find_classes(module):
    classes = []

    for k,obj in module.__dict__.iteritems():
        if isclass(obj):
            # print k, "is a class!"
            classes.append(obj)

    return classes

def model_filter(lst):
    """ Given a list of classes, Filter out everything that's not an instance of models.Model """
    return filter(lambda x: issubclass(x, models.Model), lst)

def related_field_filter(lst):
    """ given a list of fields, return the ones that are related """

    ret = []
    for f in lst:
        s = str(f)
        if s.find('django.db.models.fields.related') >= 0:
            ret.append(f)

    return ret

def main():
    from optparse import OptionParser, make_option
    from django.db.models import get_app, get_apps, get_models
    
    option_list = (make_option('--format', default='json', dest='format',
                               help='Specifies the output serialization format for fixtures.'),
                   make_option('--indent', default=None, dest='indent', type='int',
                               help='Specifies the indent level to use when pretty-printing output'),
                   make_option('--traceback', default=False, dest='traceback', type='int',
                               help='Specifies the indent level to use when pretty-printing output'),

                   )
     
    help = 'Output the contents of the database as a fixture of the given format.'
    args = '[appname ...]'

    parser = OptionParser(option_list=option_list)

    (options,app_labels) = parser.parse_args()
    

    if len(app_labels) == 0:
        app_list = get_apps()
    else:
        app_list = [get_app(app_label) for app_label in app_labels]

    # Check that the serialization format exists; this is a shortcut to
    # avoid collating all the objects and _then_ failing.
    if options.format not in serializers.get_public_serializer_formats():
        raise CommandError("Unknown serialization format: %s" % options.format)

    try:
        serializers.get_serializer(options.format)
    except KeyError:
        raise CommandError("Unknown serialization format: %s" % options.format)

    objects = []
    models = []
    for app in app_list:
        models.extend( get_models(app) )

    models = foreign_key_sort(models)

    for model in models:
        objects.extend(model._default_manager.all())
    
    try:
        print serializers.serialize(options.format, objects, indent=options.indent)
    except Exception, e:
        if options.traceback:
            raise
        raise CommandError("Unable to serialize database: %s" % e)

if __name__=='__main__':
    main()

More like this

  1. dumpdata/loaddata with MySQL and ForeignKeys by cmgreen 6 years, 3 months ago
  2. dumpdata/loaddata with MySQL and ForeignKeys, as django command by brondsem 5 years ago
  3. Serializing booleans correctly when doing dumpdata from a MySQL database using Django 0.96 by chrj 5 years, 9 months ago
  4. Command to dump data as a python script by willhardy 5 years, 10 months ago
  5. Remove old fields on dumpdata generated json by facundo_olano 2 years, 6 months ago

Comments

prz (on December 3, 2012):

fails if 'self' is referenced on a model.

otherwise simple & nice

#

(Forgotten your password?)