this is a webservice i used to scrape telephone directory information from
anywho.com. It's pretty rough code but you should get the general idea
[WebMethod]
public string PhoneLookup(str ing strNumber, ref int counter)
{
string strResult = string.Empty, searchtext = string.Empty;
try
{
// Create a new 'Uri' object with the specified string.
Uri myUri =new
Uri("http://www.anywho.com/qry/wp_rl?npa="+str Number.Substrin g(0,3) +
"&telephone ="+ strNumber.Subst ring(3,7) + "&btnsubmit.x=3 6&btnsubmit.y=9 ");
// Creates an HttpWebRequest with the specified URL.
HttpWebRequest myHttpWebReques t = (HttpWebRequest )WebRequest.Cre ate(myUri);
myHttpWebReques t.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT
5.1; Q312461; .NET CLR 1.0.3705)";
HttpWebResponse res = (HttpWebRespons e)myHttpWebRequ est.GetResponse ();
StreamReader sr = new StreamReader(re s.GetResponseSt ream(),
System.Text.Enc oding.UTF8);
string pageContent = sr.ReadToEnd();
res.Close();
sr.Close();
int startpos = pageContent.Ind exOf(@"bin/amap.cgi?") + 10;
if(startpos != -1)
{
int endpost = pageContent.Ind exOf("Maps & Directions");
if(endpost != -1)
searchtext = pageContent.Sub string(startpos , endpost - startpos + 17);
searchtext = searchtext.Repl ace(">Maps & Direct"," ");
searchtext = searchtext.Repl ace("gi?lastnam e="," ");
searchtext = searchtext.Repl ace("firstname= ","\n");
searchtext = searchtext.Repl ace("+"," ");
searchtext = searchtext.Repl ace("="," ");
searchtext = searchtext.Repl ace("&"," ");
searchtext = searchtext.Repl ace("\\"," ");
searchtext = searchtext.Repl ace("\""," ");
searchtext = searchtext.Repl ace("city","\n" );
searchtext = searchtext.Repl ace("state","\n ");
searchtext = searchtext.Repl ace("zip","\n") ;
searchtext = searchtext.Repl ace("country"," \n");
searchtext = searchtext.Repl ace("npatelepho ne","\n");
searchtext = searchtext.Repl ace("streetaddr ess","");
strResult = searchtext;
sr.Close();
pageContent = string.Empty;
}
}
catch
{
// return "No records exist";
// count++;
return null;
}
Agent.SelectedI ndex = -1;
if(strResult.Tr im() == string.Empty)
return null;
counter++;
return strResult + "\n\n";
}
--
Regards,
Alvin Bruney
[ASP.NET MVP
http://mvp.support.microsoft.com/default.aspx]
Got tidbits? Get it here...
http://tinyurl.com/27cok
"_BNC" <_B**@nospam.ne t> wrote in message
news:89******** *************** *********@4ax.c om...
(Re extracting just visible text from HTML)
On Mon, 22 Nov 2004 22:20:26 -0500, "Alvin Bruney [MVP]" <vapor at
steaming post office> wrote:
well it's not that difficult to concoct a good regex expression to do
this.
if you post a request for help in c# newsgroup, i'm sure you can get
someone
like chris r to write you one. I steal stuff off of regexlib and modify it
for my devious purposes. i'm no good at writing that stuff from scratch.
I confess to xposting this to the C# group in the hope that someone would
notice it. I'm sure I've seen something like this at one time, too. But
it would involve breaking down HTML tables, translating symbols like
'nbsp' and all that. I'm sure it's been done, but it soulds like a lot of
wheel-reinventing if I have to do it myself.
i the webrequest code if you want it by the way
That would be nice! Thanks.
BNC