I needed the ability to serialize and deserialize my database, which contains millions of objects. The existing XML serializer encountered spurious parse errors; the JSON serializer failed to handle UTF-8 even when it was asked to; and both the JSON and YAML serializers tried to keep all the representations in memory simultaneously.
This custom serializer is the only one that has done the job. It uses YAML's "stream of documents" model so that it can successfully serialize and deserialize large databases.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | """
Improved YAML serializer by [email protected]. Uses a stream of documents so that
it doesn't have to keep all database entries in memory.
Requires PyYaml (http://pyyaml.org/), but that's checked for in __init__.
To use it, add a line like this to your settings.py::
SERIALIZATION_MODULES = {
'yaml': 'path.to.import.this.module'
}
"""
from StringIO import StringIO
import yaml
from django.utils.encoding import smart_unicode
try:
import decimal
except ImportError:
from django.utils import _decimal as decimal # Python 2.3 fallback
from django.db import models
from django.core.serializers.python import Serializer as PythonSerializer
from django.core.serializers.python import Deserializer as PythonDeserializer
class DjangoSafeDumper(yaml.SafeDumper):
def represent_decimal(self, data):
return self.represent_scalar('tag:yaml.org,2002:str', str(data))
DjangoSafeDumper.add_representer(decimal.Decimal, DjangoSafeDumper.represent_decimal)
class Serializer(PythonSerializer):
"""
Convert a queryset to YAML.
"""
internal_use_only = False
def handle_field(self, obj, field):
# A nasty special case: base YAML doesn't support serialization of time
# types (as opposed to dates or datetimes, which it does support). Since
# we want to use the "safe" serializer for better interoperability, we
# need to do something with those pesky times. Converting 'em to strings
# isn't perfect, but it's better than a "!!python/time" type which would
# halt deserialization under any other language.
if isinstance(field, models.TimeField) and getattr(obj, field.name) is not None:
self._current[field.name] = str(getattr(obj, field.name))
else:
super(Serializer, self).handle_field(obj, field)
def end_object(self, obj):
the_object = {
"model" : smart_unicode(obj._meta),
"pk" : smart_unicode(obj._get_pk_val(), strings_only=True),
"fields" : self._current
}
self._current = None
dumpstr = yaml.dump(the_object, Dumper=DjangoSafeDumper,
explicit_start=True, **self.options)
self.stream.write(dumpstr)
def start_serialization(self):
self.options.pop('stream', None)
self.options.pop('fields', None)
PythonSerializer.start_serialization(self)
def end_serialization(self):
self.stream.close()
def getvalue(self):
return self.stream.getvalue()
def Deserializer(stream_or_string, **options):
"""
Deserialize a stream or string of YAML data.
"""
if isinstance(stream_or_string, basestring):
stream = StringIO(stream_or_string)
else:
stream = stream_or_string
for obj in PythonDeserializer(yaml.load_all(stream)):
yield obj
|
More like this
- Template tag - list punctuation for a list of items by shapiromatron 11 months, 2 weeks ago
- JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 11 months, 2 weeks ago
- Serializer factory with Django Rest Framework by julio 1 year, 6 months ago
- Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 7 months ago
- Help text hyperlinks by sa2812 1 year, 7 months ago
Comments
Please login first before commenting.