Michael Williams wrote:
Hi All,
I'm looking for a quality Python XML implementation. All of the DOM
and SAX implementations I've come across so far are rather
convoluted. Are there any quality implementations that will (after
parsing the XML) return an object that is accessible by name? Such as
the following:
xml = """
<book>
<title>MyBook</title>
<author>the author</author>
</book>
"""
And after parsing the XML allow me to access it as so:
book.title
I need it to somehow convert my XML to intuitively referenceable
object. Any ideas? I could even do it myself if I knew the
mechanism by which python classes do this (create variables on the fly).
Thanks in advance!
Michael
Here's an approach to ElementTree that worked for me. It's not generic
or anything and a bit brittle (eg. it won't handle missing nodes) but
maybe for a simple, flat schema or for a prototype?
All the best
Gerard
(TOY CODE - NOT TESTED MUCH)
from elementtree import ElementTree
class ElementWrapper(object):
def __tostring(self):
return ElementTree.tostring(self.element)
def __fromstring(self, xml):
self.element = ElementTree.fromstring(xml)
xml = property( __tostring, __fromstring )
def __init__(self, element=None):
self.element = element
def __str__(self):
return self.xml
def parse(self, infile):
tree = ElementTree.parse(infile)
self.element = tree.getroot()
def write(self, outfile):
ElementTree.ElementTree(self.element).write(outfil e)
###########
from elementtree.ElementTree import Element
from elementwrapper import ElementWrapper
xmlns = 'http://schemas/email/0.1'
class MailDocument(ElementWrapper):
def __build_element(self):
root = Element('{%s}Mail' % xmlns)
root.append( Element('{%s}Date' % xmlns) )
root.append( Element('{%s}From' % xmlns) )
root.append( Element('{%s}Subject' % xmlns) )
root.append( Element('{%s}To' % xmlns) )
root.append( Element('{%s}Cc' % xmlns) )
root.append( Element('{%s}Body' % xmlns) )
root.append( Element('{%s}Attachments' % xmlns) )
self.element = root
################################################## ###
# Properties
#
def __get_uid(self):
return self.element.get('id')
def __set_uid(self, id=''):
self.element.set('id', id)
def __get_date(self):
return self.element[0].text
def __set_date(self, value=''):
self.element[0].text = value
def __get_from(self):
addr = self.element[1].get('address')
nm = self.element[1].get('name')
return addr, nm
def __get_subject(self):
return self.element[2].text
def __set_subject(self, value=''):
self.element[2].text = value
def __get_body(self):
return self.element[5].text
def __set_body(self, value=''):
self.element[5].text = value
uid = property( __get_uid, __set_uid )
From = property( __get_from)
subject = property( __get_subject, __set_subject )
date = property( __get_date, __set_date )
body = property( __get_body, __set_body )
def set_from_header(self, address='', name=''):
self.element[1].set('address', address)
self.element[1].set('name', name)
#
# End Properties
################################################## ###
################################################## ###
# Lists
#
def add_to_header(self, address='', name=''):
self.__add_mailto( self.element[3], address, name )
def remove_to_header(self, index):
elem = self.element[3][index]
self.element[3].remove(elem)
def add_cc_header(self, address='', name=''):
self.__add_mailto( self.element[4], address, name )
def remove_cc_header(self, index):
elem = self.element[4][index]
self.element[4].remove(elem)
def add_attachment(self, filename='', fileuri='', filetype=''):
elem = Element("{%s}Uri" % xmlns, value=fileuri, type=filetype
)
elem.text = filename
self.element[6].append( elem )
def remove_attachment(self, index):
elem = self.element[6][index]
self.element[6].remove(elem)
def __add_mailto(self, element, Address='', Name=''):
element.append( Element("{%s}mailto" % xmlns, address=Address,
name=Name ) )
def get_to_headers(self):
hdrs = []
for item in self.element[3]:
hdrs.append( ( item.get('address'), item.get('name') ) )
return hdrs
def get_cc_headers(self):
hdrs = []
for item in self.element[4]:
hdrs.append( (item.get('address'), item.get('name') ) )
return hdrs
def get_attachments(self):
ret = []
for item in self.__element[6]:
hdrs.append( (item.text, item.get('value'),
item.get('type') ) )
return hdrs
#
# End Lists
################################################## ######
################################################## ######
# Initialise
#
def __init__(self):
self.__build_element()
self.__set_uid()
self.__set_date()
self.__set_subject()
self.set_from_header()
self.__set_body()
#
# End Initialise
################################################## ######
xml_test ='''
<mail:Mail xmlns:mail="http://schemas/email/0.1">
<mail:Date>10/10/05</mail:Date>
<mail:From ad*********@org.org' name='Mr. Jones'/>
<mail:Subject>just a note</mail:Subject>
<mail:To>
<mail:mailto ad**********@org.org' name='Mrs Jones' />
<mail:mailto ad***************@org.org' name='Alan Nother' />
</mail:To>
<mail:Cc></mail:Cc>
<mail:Body>hi there,
just a note to say hi there!</mail:Body>
<mail:Attachments></mail:Attachments>
</mail:Mail>
'''
if __name__ == '__main__':
mail = MailDocument()
mail.xml = xml_test
#mail.parse('test/data/test.xml')
print 'From: ' + mail.From[0]
print 'Subject: ' + mail.subject
mail.set_from_header('n**@new.com')
print 'From: ' + mail.From[0]
mail.add_to_header('aaa.bbb@ccc', 'aaaaaa')
mail.add_to_header('fff.ggg@hhh', 'ffffff')
print 'To:'
for hdr in mail.get_to_headers():
print hdr
mail.remove_to_header(1)
print 'To:'
for hdr in mail.get_to_headers():
print hdr
#mail.write('test_copy.xml')