By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
464,751 Members | 1,073 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 464,751 IT Pros & Developers. It's quick & easy.

make a simple search function for homepage

P: n/a
I want to add some simple search function for my homepage. It need to
search through all the html files of my homepage (about 300 pages), and
highlight the search words.

I made some test with HTMLParser, it works but slow. So, my question is
how can I improve its speed?

from HTMLParser import HTMLParser

class HightLightParser(HTMLParser):
def __init__(self, outfile, words):
self.outfile = outfile
self.words = words
self.found = False
HTMLParser.__init__(self)

def handle_starttag(self, tag, attrs):
self.outfile.write( self.get_starttag_text( ) )

def handle_endtag(self, tag):
self.outfile.write( "</%s>" % tag )

def handle_data(self, data):
for word in self.words:
data = data.replace(word, "<font color=red>%s</font>" % word)
#highlight
self.outfile.write(data)

class SearchParser(HTMLParser):
def __init__(self, words):
self.words = words
self.found = False
HTMLParser.__init__(self)

def handle_data(self, data):
for word in self.words:
if word in data: # search
self.found = True
words = ["the"]
x = SearchParser(words)
data = file("input.htm").read()
x.feed(data)
if x.found:
y = HightLightParser(file("output.htm", "w"),words)
y.feed(data)

Oct 31 '06 #1
Share this Question
Share on Google+
1 Reply

P: n/a
HYRY wrote:
I want to add some simple search function for my homepage. It need to
search through all the html files of my homepage (about 300 pages), and
highlight the search words.

I made some test with HTMLParser, it works but slow. So, my question is
how can I improve its speed?

from HTMLParser import HTMLParser

class HightLightParser(HTMLParser):
def __init__(self, outfile, words):
self.outfile = outfile
self.words = words
self.found = False
HTMLParser.__init__(self)

def handle_starttag(self, tag, attrs):
self.outfile.write( self.get_starttag_text( ) )

def handle_endtag(self, tag):
self.outfile.write( "</%s>" % tag )

def handle_data(self, data):
for word in self.words:
data = data.replace(word, "<font color=red>%s</font>" % word)
#highlight
self.outfile.write(data)

class SearchParser(HTMLParser):
def __init__(self, words):
self.words = words
self.found = False
HTMLParser.__init__(self)

def handle_data(self, data):
for word in self.words:
if word in data: # search
self.found = True
words = ["the"]
x = SearchParser(words)
data = file("input.htm").read()
x.feed(data)
if x.found:
y = HightLightParser(file("output.htm", "w"),words)
y.feed(data)
google "google".

Seriously though, perhaps you may want to index your pages first. Maybe
checkout divmod (http://divmod.org/trac/wiki/DivmodXapwrap).

James
Oct 31 '06 #2

This discussion thread is closed

Replies have been disabled for this discussion.