Michael Williams wrote:
Hi All,
I'm looking for a quality Python XML implementation. All of the DOM
and SAX implementations I've come across so far are rather
convoluted. Are there any quality implementations that will (after
parsing the XML) return an object that is accessible by name? Such as
the following:
xml = """
<book>
<title>MyBook </title>
<author>the author</author>
</book>
"""
And after parsing the XML allow me to access it as so:
book.title
I need it to somehow convert my XML to intuitively referenceable
object. Any ideas? I could even do it myself if I knew the
mechanism by which python classes do this (create variables on the fly).
Thanks in advance!
Michael
Here's an approach to ElementTree that worked for me. It's not generic
or anything and a bit brittle (eg. it won't handle missing nodes) but
maybe for a simple, flat schema or for a prototype?
All the best
Gerard
(TOY CODE - NOT TESTED MUCH)
from elementtree import ElementTree
class ElementWrapper( object):
def __tostring(self ):
return ElementTree.tos tring(self.elem ent)
def __fromstring(se lf, xml):
self.element = ElementTree.fro mstring(xml)
xml = property( __tostring, __fromstring )
def __init__(self, element=None):
self.element = element
def __str__(self):
return self.xml
def parse(self, infile):
tree = ElementTree.par se(infile)
self.element = tree.getroot()
def write(self, outfile):
ElementTree.Ele mentTree(self.e lement).write(o utfile)
###########
from elementtree.Ele mentTree import Element
from elementwrapper import ElementWrapper
xmlns = 'http://schemas/email/0.1'
class MailDocument(El ementWrapper):
def __build_element (self):
root = Element('{%s}Ma il' % xmlns)
root.append( Element('{%s}Da te' % xmlns) )
root.append( Element('{%s}Fr om' % xmlns) )
root.append( Element('{%s}Su bject' % xmlns) )
root.append( Element('{%s}To ' % xmlns) )
root.append( Element('{%s}Cc ' % xmlns) )
root.append( Element('{%s}Bo dy' % xmlns) )
root.append( Element('{%s}At tachments' % xmlns) )
self.element = root
############### ############### ############### ########
# Properties
#
def __get_uid(self) :
return self.element.ge t('id')
def __set_uid(self, id=''):
self.element.se t('id', id)
def __get_date(self ):
return self.element[0].text
def __set_date(self , value=''):
self.element[0].text = value
def __get_from(self ):
addr = self.element[1].get('address')
nm = self.element[1].get('name')
return addr, nm
def __get_subject(s elf):
return self.element[2].text
def __set_subject(s elf, value=''):
self.element[2].text = value
def __get_body(self ):
return self.element[5].text
def __set_body(self , value=''):
self.element[5].text = value
uid = property( __get_uid, __set_uid )
From = property( __get_from)
subject = property( __get_subject, __set_subject )
date = property( __get_date, __set_date )
body = property( __get_body, __set_body )
def set_from_header (self, address='', name=''):
self.element[1].set('address', address)
self.element[1].set('name', name)
#
# End Properties
############### ############### ############### ########
############### ############### ############### ########
# Lists
#
def add_to_header(s elf, address='', name=''):
self.__add_mail to( self.element[3], address, name )
def remove_to_heade r(self, index):
elem = self.element[3][index]
self.element[3].remove(elem)
def add_cc_header(s elf, address='', name=''):
self.__add_mail to( self.element[4], address, name )
def remove_cc_heade r(self, index):
elem = self.element[4][index]
self.element[4].remove(elem)
def add_attachment( self, filename='', fileuri='', filetype=''):
elem = Element("{%s}Ur i" % xmlns, value=fileuri, type=filetype
)
elem.text = filename
self.element[6].append( elem )
def remove_attachme nt(self, index):
elem = self.element[6][index]
self.element[6].remove(elem)
def __add_mailto(se lf, element, Address='', Name=''):
element.append( Element("{%s}ma ilto" % xmlns, address=Address ,
name=Name ) )
def get_to_headers( self):
hdrs = []
for item in self.element[3]:
hdrs.append( ( item.get('addre ss'), item.get('name' ) ) )
return hdrs
def get_cc_headers( self):
hdrs = []
for item in self.element[4]:
hdrs.append( (item.get('addr ess'), item.get('name' ) ) )
return hdrs
def get_attachments (self):
ret = []
for item in self.__element[6]:
hdrs.append( (item.text, item.get('value '),
item.get('type' ) ) )
return hdrs
#
# End Lists
############### ############### ############### ###########
############### ############### ############### ###########
# Initialise
#
def __init__(self):
self.__build_el ement()
self.__set_uid( )
self.__set_date ()
self.__set_subj ect()
self.set_from_h eader()
self.__set_body ()
#
# End Initialise
############### ############### ############### ###########
xml_test ='''
<mail:Mail xmlns:mail="htt p://schemas/email/0.1">
<mail:Date>10/10/05</mail:Date>
<mail:From ad*********@org .org' name='Mr. Jones'/>
<mail:Subject>j ust a note</mail:Subject>
<mail:To>
<mail:mailto ad**********@or g.org' name='Mrs Jones' />
<mail:mailto ad************* **@org.org' name='Alan Nother' />
</mail:To>
<mail:Cc></mail:Cc>
<mail:Body>hi there,
just a note to say hi there!</mail:Body>
<mail:Attachmen ts></mail:Attachment s>
</mail:Mail>
'''
if __name__ == '__main__':
mail = MailDocument()
mail.xml = xml_test
#mail.parse('te st/data/test.xml')
print 'From: ' + mail.From[0]
print 'Subject: ' + mail.subject
mail.set_from_h eader('n**@new. com')
print 'From: ' + mail.From[0]
mail.add_to_hea der('aaa.bbb@cc c', 'aaaaaa')
mail.add_to_hea der('fff.ggg@hh h', 'ffffff')
print 'To:'
for hdr in mail.get_to_hea ders():
print hdr
mail.remove_to_ header(1)
print 'To:'
for hdr in mail.get_to_hea ders():
print hdr
#mail.write('te st_copy.xml')