"""
Improved YAML serializer by rspeer@mit.edu. Uses a stream of documents so that
it doesn't have to keep all database entries in memory.

Requires PyYaml (http://pyyaml.org/), but that's checked for in __init__.

To use it, add a line like this to your settings.py::
  
  SERIALIZATION_MODULES = {
      'yaml': 'path.to.import.this.module'
  }
"""

from StringIO import StringIO
import yaml
from django.utils.encoding import smart_unicode

try:
    import decimal
except ImportError:
    from django.utils import _decimal as decimal # Python 2.3 fallback

from django.db import models
from django.core.serializers.python import Serializer as PythonSerializer
from django.core.serializers.python import Deserializer as PythonDeserializer

class DjangoSafeDumper(yaml.SafeDumper):
    def represent_decimal(self, data):
        return self.represent_scalar('tag:yaml.org,2002:str', str(data))

DjangoSafeDumper.add_representer(decimal.Decimal, DjangoSafeDumper.represent_decimal)

class Serializer(PythonSerializer):
    """
    Convert a queryset to YAML.
    """
    
    internal_use_only = False
    
    def handle_field(self, obj, field):
        # A nasty special case: base YAML doesn't support serialization of time
        # types (as opposed to dates or datetimes, which it does support). Since
        # we want to use the "safe" serializer for better interoperability, we
        # need to do something with those pesky times. Converting 'em to strings
        # isn't perfect, but it's better than a "!!python/time" type which would
        # halt deserialization under any other language.
        if isinstance(field, models.TimeField) and getattr(obj, field.name) is not None:
            self._current[field.name] = str(getattr(obj, field.name))
        else:
            super(Serializer, self).handle_field(obj, field)
    
    def end_object(self, obj):
        the_object = {
            "model"  : smart_unicode(obj._meta),
            "pk"     : smart_unicode(obj._get_pk_val(), strings_only=True),
            "fields" : self._current
        }
        self._current = None
        dumpstr = yaml.dump(the_object, Dumper=DjangoSafeDumper,
        explicit_start=True, **self.options)
        self.stream.write(dumpstr)

    def start_serialization(self):
        self.options.pop('stream', None)
        self.options.pop('fields', None)
        PythonSerializer.start_serialization(self)

    def end_serialization(self):
        self.stream.close()

    def getvalue(self):
        return self.stream.getvalue()

def Deserializer(stream_or_string, **options):
    """
    Deserialize a stream or string of YAML data.
    """
    if isinstance(stream_or_string, basestring):
        stream = StringIO(stream_or_string)
    else:
        stream = stream_or_string
    for obj in PythonDeserializer(yaml.load_all(stream)):
        yield obj