I want to search for a particular text on an html page ---> Build Complete
Further execution should be only if the build is successful which is denoted by the text 'Build Complete' on the webpage.
Expand|Select|Wrap|Line Numbers
- URL = "http://11.12.13.27:8080/cruisecontrol"
- from urllib2 import urlopen
- from HTMLParser import HTMLParser
- import re
- # Fetching links using HTMLParser
- def get_links(url):
- parser = MyHTMLParser()
- parser.feed(urlopen(url).read())
- parser.close()
- return parser.links
- # Build url for Deploy page
- def get_deploy_url():
- url = URL + "/buildresults/Poker-TTM_%s_nightly_build" % branch
- print url
- check_re = re.compile(r"/Build Complete/")
- print check_re
- if check_re.search(url):
- print "hello"
- for link in get_links(url):
- if link["href"].startswith("Deploy"):
- return "%s/%s" % (URL, link["href"])
- print link["href"]
- # Build url for Destination page
- def get_destination_url():
- url = get_deploy_url()
- print url
- destination_re = re.compile(r"%s" % destination)
- for link in get_links(url):
- if destination_re.search(link["href"]):
- return "http://11.12.13.27:8080/cruisecontrol/" + link["href"]
- # Parsing HTML pages
- class MyHTMLParser(HTMLParser):
- def __init__(self, *args, **kwd):
- HTMLParser.__init__(self, *args, **kwd)
- self.links = []
- def handle_starttag(self, tag, attrs):
- if tag == "a":
- attrs = dict(attrs)
- if "href" in attrs:
- self.links.append(dict(attrs))
- def handle_endtag(self, tag):
- pass
- if __name__ == "__main__":
- # Read the branch name and the test destination to deploy on
- lines = [x.split(':') for x in open("branch_dest.txt")]
- print lines
- branch = "%s" % lines[0][1].strip()
- print branch
- destination = "%s" % lines[1][1].strip()
- print destination
- final_url = get_destination_url()
- if final_url is None:
- print "Could not find a destination to deploy"
- else:
- print final_url
Expand|Select|Wrap|Line Numbers
- Traceback (most recent call last):
- File "C:\deploy_input.py", line 61, in <module>
- final_url = get_destination_url()
- File "C:\deploy_input.py", line 33, in get_destination_url
- for link in get_links(url):
- File "C:\deploy_input.py", line 11, in get_links
- parser.feed(urlopen(url).read())
- File "C:\Python26\lib\urllib2.py", line 126, in urlopen
- return _opener.open(url, data, timeout)
- File "C:\Python26\lib\urllib2.py", line 382, in open
- req.timeout = timeout
- AttributeError: 'NoneType' object has no attribute 'timeout'