Login

XML tabledump to model

Author:
fivethreeo
Posted:
March 2, 2007
Language:
Python
Version:
Pre .96
Tags:
xml
Score:
2 (after 2 ratings)

Easy way to move data via XML dump in PhpMyAdmin

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from news.models import Article
from django.template.defaultfilters import slugify
from elementtree import ElementTree
import datetime, time

# Some code from aspn

class XmlListConfig(list):
    def __init__(self, aList):
        for element in aList:
            if element:
                # treat like dict
                if len(element) == 1 or element[0].tag != element[1].tag:
                    self.append(XmlDictConfig(element))
                # treat like list
                elif element[0].tag == element[1].tag:
                    self.append(XmlListConfig(element))
            elif element.text:
                text = element.text.strip()
                if text:
                    self.append(text)


class XmlDictConfig(dict):
    '''
    Example usage:

    >>> tree = ElementTree.parse('your_file.xml')
    >>> root = tree.getroot()
    >>> xmldict = XmlDictConfig(root)

    Or, if you want to use an XML string:

    >>> root = ElementTree.XML(xml_string)
    >>> xmldict = XmlDictConfig(root)

    And then use xmldict for what it is... a dict.
    '''
    def __init__(self, parent_element):
        if parent_element.items():
            self.update(dict(parent_element.items()))
        for element in parent_element:
            if element:
                # treat like dict - we assume that if the first two tags
                # in a series are different, then they are all different.
                if len(element) == 1 or element[0].tag != element[1].tag:
                    aDict = XmlDictConfig(element)
                # treat like list - we assume that if the first two tags
                # in a series are the same, then the rest are the same.
                else:
                    # here, we put the list in dictionary; the key is the
                    # tag name the list elements all share in common, and
                    # the value is the list itself 
                    aDict = {element[0].tag: XmlListConfig(element)}
                # if the tag has attributes, add those to the dict
                if element.items():
                    aDict.update(dict(element.items()))
                self.update({element.tag: aDict})
            # this assumes that if you've got an attribute in a tag,
            # you won't be having any text. This may or may not be a 
            # good idea -- time will tell. It works for the way we are
            # currently doing XML configuration files...
            elif element.items():
                self.update({element.tag: dict(element.items())})
            # finally, if there are no child tags and no attributes, extract
            # the text
            else:
                self.update({element.tag: element.text})

def fixdate(value):
    # sql date to datetime
    return datetime.date(*time.strptime(value, '%Y-%m-%d')[:3])

# get xml data
'''
<database>
    <table>
        <heading>
        .....
     <table>
        <heading>
'''
tree = ElementTree.parse('news.xml')
root = tree.getroot()
xmllist=[]
a = 0
for i in root:
     # make list of dicts
     xmllist = xmllist + [XmlDictConfig(i)]
     
for i in xmllist:
    # instanciate model
    slug = slugify(i['heading'])[:50]
    date = fixdate(i['date'])
    n = Article(slug=slug,heading=i['heading'],dato=date,summary=i['summary'] or ' ',tekst=i['content'] or ' ')
    # save model
    n.save()

More like this

  1. In-memory XML-RPC server based on URL by diverman 5 years, 1 month ago
  2. Syndication Feed for JSON by cato 2 years, 7 months ago
  3. An XML-RPC Decorator by blinks 8 years, 4 months ago
  4. An alternative model serializer for django models by isaact 6 years, 9 months ago
  5. Improved Accept middleware with webkit workaround by raven_nevermore 4 years, 8 months ago

Comments

Please login first before commenting.