Here's a (surprise!) pyparsing solution. -- Paul

(Get pyparsing at

http://pyparsing.sourceforge.net.)

data = [

"""<td>04/01/2006</td><td>Wednesday</td><td> </td><td>09:14</td><td>12:44</td><td>12:50</td><td>17:58</td><td> </td><td> </td><td> </td><td> </td><td>08:14</td>""",

"""<td>03/01/2006</td><td>Tuesday</td><td>Annual_Holiday</td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td> </td><td>08:00</td>"""

]

from pyparsing import *

startTD,endTD = makeHTMLTags("TD")

startTD = startTD.suppress()

endTD = endTD.suppress()

dayOfWeek = oneOf("Sunday Monday Tuesday Wednesday Thursday Friday

Saturday")

nbsp = Literal(" ")

time = Combine(Word(nums,exact=2) + ":" + Word(nums,exact=2))

date = Combine(Word(nums,exact=2) + "/" + Word(nums,exact=2) + "/" +

Word(nums,exact=4))

entry = ( startTD + date.setResultsName("date") + endTD +

startTD + dayOfWeek.setResultsName("dayOfWeek") + endTD +

startTD + ( Suppress(nbsp) |

Word(alphanums+"_").setResultsName("name") ) + endTD +

OneOrMore(startTD + (Suppress(nbsp) | time) + endTD

).setResultsName("dates")

)

for d in data:

res = entry.parseString(d)

print res.date

print res.dayOfWeek

print res.name

print res.dates

print

Returns:

04/01/2006

Wednesday

['09:14', '12:44', '12:50', '17:58', '08:14']

03/01/2006

Tuesday

Annual_Holiday

['08:00']