here's the test python.. ugly as it is!!
Lodge It
New
All
About
?
Paste #83093
Paste Details
posted on 2008-08-23 @ 15:22
reply to this paste
download paste
compare with paste
select different colorscheme
Autumn Borland Bw Colorful Default Emacs Friendly Fruity Manni Murphy
Native Pastie Perldoc Trac Vs
toggle line numbers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 #!/usr/bin/python
#
# test.py
#
# scrapes/extracts the basic data for the college
#
#
# the app gets/stores
# name
# url
# address (street/city/state
# phone
#
################################################## ####################3
#test python script
import re
import libxml2dom
import urllib
import urllib2
import sys, string
from mechanize import Browser
import mechanize
#import tidy
import os.path
import cookielib
from libxml2dom import Node
from libxml2dom import NodeList
import subprocess
import MySQLdb
#import mysql_config
import time
########################
#
# Parse pricegrabber.com
########################
urlopen = urllib2.urlopen
##cj = urllib2.cookielib.LWPCookieJar()
Request = urllib2.Request
br = Browser()
br2 = Browser()
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values1 = {'name' : 'Michael Foord',
'location' : 'Northampton',
'language' : 'Python' }
headers = { 'User-Agent' : user_agent }
url="http://schedule.berkeley.edu/"
url="http://schedule.psu.edu/"
#=======================================
if __name__ == "__main__":
# main app
txdata = None
#----------------------------
# get the kentucky test pages
#br.set_cookiejar(cj)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.addheaders = [('User-Agent', 'Firefox')]
#cnt is the page count for the master url
murl=url
print "url =",murl
br.open(murl)
#cj.save(COOKIEFILE) # resave cookies
res = br.response() # this is a copy of response
s = res.read()
# s contains HTML not XML text
d = libxml2dom.parseString(s, html=1)
#get the input/text dialogs
#tn1 = "//div[@id='main_content']/form[1]/input[position()=1]/@name"
q="//img/parent::*/attribute::href"
q="//form[@name='cos_search1']/@action"
t1=d.xpath(q)
print "href = ",t1
print "hnode =",t1[0].nodeValue
print "htest =",t1[0].textContent
print "htesttt =",t1[0].toString()
sys.exit()
thanks!!
-----Original Message-----
From: py*****************************************@python .org
[mailto:py***************************************** @python.org]On Behalf
Of Fredrik Lundh
Sent: Saturday, August 23, 2008 5:58 AM
To: py*********@python.org
Subject: Re: xpath questions...
bruce wrote:
Regarding the xpath question I've posed, some have said that it shouldn'tbe
here on the mailing list. Give that I'm writing the test scripts/apps inusing
python, using the python libs, where else should it be posted?
I mean, I could post the entire sample script so you can see that it's
python, but I simplified the issue.there was zero Python content left after the simplification. maybe you
should at least mention what library you're using to "play around with
xpath and the html dom" ?
</F>
--
http://mail.python.org/mailman/listinfo/python-list