On May 28, 3:20*am, globalrev <skanem...@yahoo.sewrote:
tried all kinds of combos to get this to work.
http://docs.python.org/lib/module-HTMLParser.html
from HTMLParser import HTMLParser
class MyHTMLParser(HTMLParser):
* * def handle_starttag(self, tag, attrs):
* * * * print "Encountered the beginning of a %s tag" % tag
* * def handle_endtag(self, tag):
* * * * print "Encountered the end of a %s tag" % tag
from HTMLParser import HTMLParser
import urllib
import myhtmlparser
x = MyHTMLParser(HTMLParser())
site = urllib.urlopen("http://docs.python.org/lib/module-
HTMLParser.html")
for row in site:
* * print x.handle_starttag()
this works fine to me:
from HTMLParser import HTMLParser
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
print "Encountered the beginning of a %s tag" % tag
def handle_endtag(self, tag):
print "Encountered the end of a %s tag" % tag
#from HTMLParser import HTMLParser
import urllib
#import mythmlparser
site = urllib.urlopen("http://docs.python.org/lib/module-
HTMLParser.html")
x = MyHTMLParser() # x = MyHTMLParser(HTMLParser())
x.feed(site.read())
x.close()
for row in site:
print x.handle_starttag()
site.close()
You should also read this:
http://www.diveintopython.org/html_p...ting_data.html
for example