# better_yaml.py

"""
Customized YAML serializer, with more condensed and readable output.
Rather than producing a flat list of objects with the same three attributes:

    - fields: {...}
      model: modelname
      pk: 123

This serializer nests the data, grouping by model name, then indexing by
primary key. For example, instead of this output, as produced by the default
YAML serializer:

    - fields: {name: blue}
      model: app.firstmodel
      pk: 3
    - fields: {name: red}
      model: app.firstmodel
      pk: 1
    - fields: {name: green}
      model: app.firstmodel
      pk: 2
    - fields: {name: crumbly}
      model: app.secondmodel
      pk: 2
    - fields: {name: squishy}
      model: app.secondmodel
      pk: 1

You'll get this output:

    app.firstmodel:
      1: {name: red}
      2: {name: green}
      3: {name: blue}
    app.secondmodel:
      1: {name: squishy}
      2: {name: crumbly}

To use this customized serializer and deserializer, save this file
somewhere in your Django project, then add this to your settings.py:

    SERIALIZATION_MODULES = {
        'yaml': 'path.to.better_yaml',
    }

Note that this serializer is NOT compatible with the default Django
YAML serializer; this one uses nested dictionaries, while the default
one uses a flat list of object dicts.

Requires PyYaml (http://pyyaml.org/), of course.
"""

from StringIO import StringIO
import yaml

from django.core.serializers.pyyaml import Serializer as YamlSerializer
from django.core.serializers.python import Deserializer as PythonDeserializer
from django.utils.encoding import smart_unicode

class Serializer (YamlSerializer):
    """
    Serialize database objects as nested dicts, indexed first by
    model name, then by primary key.
    """
    def start_serialization(self):
        self._current = None
        self.objects = {}

    def end_object(self, obj):
        model = smart_unicode(obj._meta)
        pk = obj._get_pk_val()

        if model not in self.objects:
            self.objects[model] = {}

        self.objects[model][pk] = self._current
        self._current = None


def Deserializer(stream_or_string, **options):
    """
    Deserialize a stream or string of YAML data,
    as written by the Serializer above.
    """
    if isinstance(stream_or_string, basestring):
        stream = StringIO(stream_or_string)
    else:
        stream = stream_or_string

    # Reconstruct the flat object list as PythonDeserializer expects
    # NOTE: This could choke on large data sets, since it
    # constructs the flattened data list in memory
    data = []
    for model, objects in yaml.load(stream).iteritems():
        # Add the model name back into each object dict
        for pk, fields in objects.iteritems():
            data.append({'model': model, 'pk': pk, 'fields': fields})

    # Deserialize the flattened data
    for obj in PythonDeserializer(data, **options):
        yield obj