469,641 Members | 1,182 Online
Bytes | Developer Community
New Post

Home Posts Topics Members FAQ

Post your question to a community of 469,641 developers. It's quick & easy.

Debugging ScreenScrape Code

Hi All,
I have a very small screen scrape application, that has a small
problem. when I run the app and I have fiddler(an http tool to view
what is being sent by the requests/responses,
http://www.fiddlertool.com) the app works, and I am able to login to
the (intranet)website. If do not run the app while fiddler is running,
it does not work(the app returns html of the login page, instead of the
target page).

Here is the code, thanks in advance

Note it maybe easier to copy and paste this code into notepad to
view....

/*
* User: Mccollid
* Date: 10/3/2005
* Time: 11:25 AM
*
*/
using System;
using System.Drawing;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text;
using System.Web;

namespace ScreenScraper
{
/// <summary>
/// Description of MainForm.
/// </summary>
public class MainForm : System.Windows.Forms.Form
{
private System.Windows.Forms.Button button1;
private System.Windows.Forms.TextBox textBox1;
private string LOGIN_URL;
private string USERNAME;
private string PASSWORD;
private string SECRET_PAGE_URL;
private string COOKIEHOLDER;

public MainForm()
{
//
// The InitializeComponent() call is required for Windows Forms
designer support.
//

InitializeComponent();

//
// TODO: Add constructor code after the InitializeComponent() call.
//
}

[STAThread]
public static void Main(string[] args)
{
Application.Run(new MainForm());
}
void Button1Click(object sender, System.EventArgs e)
{
this.textBox1.Text="Connecting...";
this.LOGIN_URL="http://loginpage"; this.SECRET_PAGE_URL
="http://targetpage";
this.USERNAME ="UserName";
this.PASSWORD ="Password";

HttpWebRequest webrequest=WebRequest.Create(LOGIN_URL) as
HttpWebRequest;
StreamReader responseReader=new
StreamReader(webrequest.GetResponse().GetResponseS tream());

string responseData = responseReader.ReadToEnd();
//this.textBox1.Text=responseData;

//extract PathInfo value and build our post data
string pathInfo=ExtractPathInfo(responseData);
MessageBox.Show(pathInfo,pathInfo);
//string
postData=String.Format("pathInfo={0}&username={1}& password={2}&Login=Login",
pathInfo, USERNAME, PASSWORD);
string
postData=String.Format("username={1}&password={2}& pathInfo={0}",
pathInfo, USERNAME, PASSWORD);

this.textBox1.Text=postData;

//have a cookie container ready to receive the forms auth cookie
CookieContainer cookies=new CookieContainer();

//now post to the login form
webrequest=WebRequest.Create(LOGIN_URL) as HttpWebRequest;
webrequest.Method="Post";
webrequest.Credentials = CredentialCache.DefaultCredentials;
webrequest.UserAgent="User-Agent: Mozilla/4.0 (compatible; MSIE 6.0;
Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 1.0.3705)";
webrequest.Accept="Accept: image/gif, image/x-xbitmap, image/jpeg,
image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel,
application/vnd.ms-powerpoint, application/msword, */*";
webrequest.ContentType="application/x-www-form-urlencoded";

webrequest.AllowAutoRedirect=true;
webrequest.CookieContainer=cookies;

webrequest.Referer="http://unatime.merck.com/unatime/action/home";


//write the form values into the request message
StreamWriter requestWriter = new
StreamWriter(webrequest.GetRequestStream());
requestWriter.Write(postData);
requestWriter.Close();
//we don't need the contents of the response, just the cookie

try
{
webrequest.GetResponse().Close();
}
catch (WebException ee)
{
// MessageBox.Show(ee.Message);
// this.textBox1.Text=ee.Message;
}

//webrequest.GetResponse().Close();

//now we can send out cookie along with a request for the protected
page
webrequest = WebRequest.Create(SECRET_PAGE_URL) as HttpWebRequest;
webrequest.Method="Post";
webrequest.Credentials = CredentialCache.DefaultCredentials;
webrequest.UserAgent="User-Agent: Mozilla/4.0 (compatible; MSIE 6.0;
Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 1.0.3705)";
webrequest.Accept="Accept: image/gif, image/x-xbitmap, image/jpeg,
image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel,
application/vnd.ms-powerpoint, application/msword, */*";
webrequest.ContentType="application/x-www-form-urlencoded";
webrequest.AllowAutoRedirect=true;
webrequest.CookieContainer=cookies;
webrequest.Referer="http://unatime.merck.com/unatime/action/home";
responseReader=new
StreamReader(webrequest.GetResponse().GetResponseS tream());

//StreamReader readStream = new StreamReader
(webrequest.GetResponse().GetResponseStream(), Encoding.UTF8);

//and read the response
responseData = responseReader.ReadToEnd();
responseReader.Close();

//Response.Write(responseData);
this.textBox1.Text=responseData;

}
private string ExtractPathInfo(string s)
{
string viewStateNameDelimiter="pathInfo";
string valueDelimiter="value=\"";

int viewStateNamePosition=s.IndexOf(viewStateNameDelim iter);
int viewStateValuePosition=s.IndexOf(valueDelimiter,
viewStateNamePosition);

int viewStateStartPosition=viewStateValuePosition +
valueDelimiter.Length;
//int viewStateEndPosition=s.IndexOf("\"", viewStateStartPosition);
int viewStateEndPosition=s.IndexOf("\"", viewStateStartPosition);
return
HttpUtility.UrlEncodeUnicode(s.Substring(viewState StartPosition,
viewStateEndPosition-viewStateStartPosition));

}

}
}

Nov 17 '05 #1
6 1679
You are creating cookie container, which creates an empty cookie container.
You are assigning it to each web request. However, when you get your web
response, you aren't saving the cookies into the cookie container.
Therefore, on every call, you are failing to catch the cookies. It works
with the utility because that utility is catching the cookies for you.

--
--- Nick Malik [Microsoft]
MCSD, CFPS, Certified Scrummaster
http://blogs.msdn.com/nickmalik

Disclaimer: Opinions expressed in this forum are my own, and not
representative of my employer.
I do not answer questions on behalf of my employer. I'm just a
programmer helping programmers.
--
"Dan McCollick" <mc*********@hotmail.com> wrote in message
news:11**********************@g44g2000cwa.googlegr oups.com...
Hi All,
I have a very small screen scrape application, that has a small
problem. when I run the app and I have fiddler(an http tool to view
what is being sent by the requests/responses,
http://www.fiddlertool.com) the app works, and I am able to login to
the (intranet)website. If do not run the app while fiddler is running,
it does not work(the app returns html of the login page, instead of the
target page).

Here is the code, thanks in advance

Note it maybe easier to copy and paste this code into notepad to
view....

/*
* User: Mccollid
* Date: 10/3/2005
* Time: 11:25 AM
*
*/
using System;
using System.Drawing;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text;
using System.Web;

namespace ScreenScraper
{
/// <summary>
/// Description of MainForm.
/// </summary>
public class MainForm : System.Windows.Forms.Form
{
private System.Windows.Forms.Button button1;
private System.Windows.Forms.TextBox textBox1;
private string LOGIN_URL;
private string USERNAME;
private string PASSWORD;
private string SECRET_PAGE_URL;
private string COOKIEHOLDER;

public MainForm()
{
//
// The InitializeComponent() call is required for Windows Forms
designer support.
//

InitializeComponent();

//
// TODO: Add constructor code after the InitializeComponent() call.
//
}

[STAThread]
public static void Main(string[] args)
{
Application.Run(new MainForm());
}
void Button1Click(object sender, System.EventArgs e)
{
this.textBox1.Text="Connecting...";
this.LOGIN_URL="http://loginpage"; this.SECRET_PAGE_URL
="http://targetpage";
this.USERNAME ="UserName";
this.PASSWORD ="Password";

HttpWebRequest webrequest=WebRequest.Create(LOGIN_URL) as
HttpWebRequest;
StreamReader responseReader=new
StreamReader(webrequest.GetResponse().GetResponseS tream());

string responseData = responseReader.ReadToEnd();
//this.textBox1.Text=responseData;

//extract PathInfo value and build our post data
string pathInfo=ExtractPathInfo(responseData);
MessageBox.Show(pathInfo,pathInfo);
//string
postData=String.Format("pathInfo={0}&username={1}& password={2}&Login=Login",
pathInfo, USERNAME, PASSWORD);
string
postData=String.Format("username={1}&password={2}& pathInfo={0}",
pathInfo, USERNAME, PASSWORD);

this.textBox1.Text=postData;

//have a cookie container ready to receive the forms auth cookie
CookieContainer cookies=new CookieContainer();

//now post to the login form
webrequest=WebRequest.Create(LOGIN_URL) as HttpWebRequest;
webrequest.Method="Post";
webrequest.Credentials = CredentialCache.DefaultCredentials;
webrequest.UserAgent="User-Agent: Mozilla/4.0 (compatible; MSIE 6.0;
Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 1.0.3705)";
webrequest.Accept="Accept: image/gif, image/x-xbitmap, image/jpeg,
image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel,
application/vnd.ms-powerpoint, application/msword, */*";
webrequest.ContentType="application/x-www-form-urlencoded";

webrequest.AllowAutoRedirect=true;
webrequest.CookieContainer=cookies;

webrequest.Referer="http://unatime.merck.com/unatime/action/home";


//write the form values into the request message
StreamWriter requestWriter = new
StreamWriter(webrequest.GetRequestStream());
requestWriter.Write(postData);
requestWriter.Close();
//we don't need the contents of the response, just the cookie

try
{
webrequest.GetResponse().Close();
}
catch (WebException ee)
{
// MessageBox.Show(ee.Message);
// this.textBox1.Text=ee.Message;
}

//webrequest.GetResponse().Close();

//now we can send out cookie along with a request for the protected
page
webrequest = WebRequest.Create(SECRET_PAGE_URL) as HttpWebRequest;
webrequest.Method="Post";
webrequest.Credentials = CredentialCache.DefaultCredentials;
webrequest.UserAgent="User-Agent: Mozilla/4.0 (compatible; MSIE 6.0;
Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 1.0.3705)";
webrequest.Accept="Accept: image/gif, image/x-xbitmap, image/jpeg,
image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel,
application/vnd.ms-powerpoint, application/msword, */*";
webrequest.ContentType="application/x-www-form-urlencoded";
webrequest.AllowAutoRedirect=true;
webrequest.CookieContainer=cookies;
webrequest.Referer="http://unatime.merck.com/unatime/action/home";
responseReader=new
StreamReader(webrequest.GetResponse().GetResponseS tream());

//StreamReader readStream = new StreamReader
(webrequest.GetResponse().GetResponseStream(), Encoding.UTF8);

//and read the response
responseData = responseReader.ReadToEnd();
responseReader.Close();

//Response.Write(responseData);
this.textBox1.Text=responseData;

}
private string ExtractPathInfo(string s)
{
string viewStateNameDelimiter="pathInfo";
string valueDelimiter="value=\"";

int viewStateNamePosition=s.IndexOf(viewStateNameDelim iter);
int viewStateValuePosition=s.IndexOf(valueDelimiter,
viewStateNamePosition);

int viewStateStartPosition=viewStateValuePosition +
valueDelimiter.Length;
//int viewStateEndPosition=s.IndexOf("\"", viewStateStartPosition);
int viewStateEndPosition=s.IndexOf("\"", viewStateStartPosition);
return
HttpUtility.UrlEncodeUnicode(s.Substring(viewState StartPosition,
viewStateEndPosition-viewStateStartPosition));

}

}
}

Nov 17 '05 #2
When I try to add cookies my request times out?? I have indicated
where I think the problem lies(about half way down)...but I truely do
not understand.

HttpWebRequest wrequest= (HttpWebRequest) WebRequest.Create(LOGIN_URL)
as HttpWebRequest;

StreamReader responseReader = new
StreamReader(wrequest.GetResponse().GetResponseStr eam());

string responseData =responseReader.ReadToEnd().ToString();
responseReader.Close();

string pathInfo = ExtractPathInfo(responseData);
string postData=String.Format("username={1}&password={2}& pathInfo={0}",
pathInfo, USERNAME, PASSWORD);

wrequest=(HttpWebRequest) WebRequest.Create(LOGIN_URL) as
HttpWebRequest;
wrequest.Method="Post";
wrequest.ContentType = "application/x-www-form-urlencoded";
wrequest.CookieContainer= new CookieContainer();

MessageBox.Show("Cookie Section");
//cookies collected
//WHEN EXECUTING THIS CODE APP TIMES OUT
HttpWebResponse wresponse = (HttpWebResponse) wrequest.GetResponse();
wresponse.Cookies =
wrequest.CookieContainer.GetCookies(wrequest.Reque stUri);
this.textBox1.Text+=wresponse.StatusDescription;

StreamWriter rwriter= new StreamWriter(wrequest.GetRequestStream());
rwriter.Write(postData);
rwriter.Close();
MessageBox.Show("Target");
wrequest = (HttpWebRequest) WebRequest.Create(SECRET_PAGE_URL) as
HttpWebRequest;

//wrequest.CookieContainer= new CookieContainer();
responseReader = new
StreamReader(wrequest.GetResponse().GetResponseStr eam());

responseData=responseReader.ReadToEnd();
responseReader.Close();

this.textBox1.Text +=responseData.ToString();

this.textBox1.Text+="Done";

Nov 17 '05 #3
I am having trouble getting the HttpWebRequest.method ="Post"; When I
have this in the code, and run the app through fiddler, it is still
showing that a get was sent? am I calling this wrong?(it is
wrequest.method="post"; in my code).

Thanks
Dan

Nov 17 '05 #4
Also, when I check fiddler, it is saying that there are three cookies
being returned. Yet if i do a
MessageBox.Show(wresponse.getcookies(wrequest.requ est.uri).count.toString());
i only return 2? I can't figure out why this code works
sometimes...and then doesn't work, then works...then doesn't work...

Nov 17 '05 #5
Hi Dan,

First suggestion: use a different variable for the first request from the
second one.

Second suggestion: create the cookie container into a seperate variable
(like you were doing in the first snippet you posted)

Third suggestion: copy the cookies OUT of the first response into the cookie
container. Then assign these cookies IN to the second request.

You are discarding the cookies every time. The only reason it works at all
is by accident, because you've been using the same variable. However,
memory cookies won't transfer, only persistent cookies will, and your site
expects the memory cookie to hold the login token (pretty normal behavior).

--
--- Nick Malik [Microsoft]
MCSD, CFPS, Certified Scrummaster
http://blogs.msdn.com/nickmalik

Disclaimer: Opinions expressed in this forum are my own, and not
representative of my employer.
I do not answer questions on behalf of my employer. I'm just a
programmer helping programmers.
--
"Dan McCollick" <mc*********@hotmail.com> wrote in message
news:11**********************@g49g2000cwa.googlegr oups.com...
When I try to add cookies my request times out?? I have indicated
where I think the problem lies(about half way down)...but I truely do
not understand.

HttpWebRequest wrequest= (HttpWebRequest) WebRequest.Create(LOGIN_URL)
as HttpWebRequest;

StreamReader responseReader = new
StreamReader(wrequest.GetResponse().GetResponseStr eam());

string responseData =responseReader.ReadToEnd().ToString();
responseReader.Close();

string pathInfo = ExtractPathInfo(responseData);
string postData=String.Format("username={1}&password={2}& pathInfo={0}",
pathInfo, USERNAME, PASSWORD);

wrequest=(HttpWebRequest) WebRequest.Create(LOGIN_URL) as
HttpWebRequest;
wrequest.Method="Post";
wrequest.ContentType = "application/x-www-form-urlencoded";
wrequest.CookieContainer= new CookieContainer();

MessageBox.Show("Cookie Section");
//cookies collected
//WHEN EXECUTING THIS CODE APP TIMES OUT
HttpWebResponse wresponse = (HttpWebResponse) wrequest.GetResponse();
wresponse.Cookies =
wrequest.CookieContainer.GetCookies(wrequest.Reque stUri);
this.textBox1.Text+=wresponse.StatusDescription;

StreamWriter rwriter= new StreamWriter(wrequest.GetRequestStream());
rwriter.Write(postData);
rwriter.Close();
MessageBox.Show("Target");
wrequest = (HttpWebRequest) WebRequest.Create(SECRET_PAGE_URL) as
HttpWebRequest;

//wrequest.CookieContainer= new CookieContainer();
responseReader = new
StreamReader(wrequest.GetResponse().GetResponseStr eam());

responseData=responseReader.ReadToEnd();
responseReader.Close();

this.textBox1.Text +=responseData.ToString();

this.textBox1.Text+="Done";

Nov 17 '05 #6
Thank you so much. Works great now.

Nov 17 '05 #7

This discussion thread is closed

Replies have been disabled for this discussion.

Similar topics

3 posts views Thread by R Millman | last post: by
5 posts views Thread by Velvet | last post: by
8 posts views Thread by razael1 | last post: by
By using this site, you agree to our Privacy Policy and Terms of Use.