Hi all,
I have the following code I created to do multiple websites data crawling
using Asynchronous Thread calling it works fine however I'm confused when it
comes to make the calling thread stops or sleep until all other threads
within the threadpool have all finished their jobs.
what happens at the moment is that every other thread "should" just
concatenate string data to public string variable when it finishes, but
however the code will just go and execute the rest of the calling method
without all thread being completed.
I would be grateful if you could help me out with a solution on how to force
the code to wait for all methods to finish, here is the code, thank you:
//The calling method is in a different class, different file, it calls
ScanSites() method below passing an array of multiple web addresses
namespace Crawler
{
class Bot
{
public static string res = "";
public Bot()
{
}
public void ScanSites(string str)
{
// for each URL in the collection...
for (x = 0; x < URLs.Length- 1; x++)
{
WebRequest request = HttpWebRequest.Create(URLs[x]);
// RequestState is a custom class to pass info
RequestState state = new RequestState(request, URLs[x]);
IAsyncResult result = request.BeginGetResponse(new
AsyncCallback(UpdateItem), state);
ThreadPool.RegisterWaitForSingleObject(result.Asyn cWaitHandle, new
WaitOrTimerCallback(ScanTimeoutCallback), state, (30 * 1000), true);
}
}
private void UpdateItem(IAsyncResult result)
{
// grab the custom state object
RequestState state = (RequestState)result.AsyncState;
WebRequest request = (WebRequest)state.Request;
// get the Response
HttpWebResponse response =
(HttpWebResponse)request.EndGetResponse(result);
StringBuilder sb = new StringBuilder(4096);
byte[] buf = new byte[2048];
int count;
while ((count = response.GetResponseStream().Read(buf, 0, buf.Length - 8)) >
0)
{
sb.Append(Encoding.UTF8.GetString(buf, 0, count));
}
StreamWriter sw = new StreamWriter("C:\\Rep\\" + state.URL);
sw.Write(sb);
sw.Close();
//here is the "res" variable that will hold the URL address concatenated |
and followed by another URL retrieved from another thread
res += state.URL+ "|";
}
private static void ScanTimeoutCallback(object state, bool timedOut)
{
if (timedOut)
{
RequestState reqState = (RequestState)state;
if (reqState != null)
reqState.Request.Abort();
}
}
}
class RequestState
{
public WebRequest Request; // holds the request
public string URL;
// public object Data; // store any data in this
public RequestState(WebRequest request, string url)
{
this.Request = request;
this.URL= docid;
// this.Data = data;
}
}