By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
438,468 Members | 1,859 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 438,468 IT Pros & Developers. It's quick & easy.

crawler crawler....help needed

P: 2
well i got this code from java.sun.com and tried modiifying it in all the possible ways,but to no good.. stil its not workin..pleas help me out and try postin good workinw web cralwer if u have.. need help asap...


import java.applet.Applet;
import java.text.*;
import java.awt.*;
import java.awt.List;
import java.awt.event.*;
import java.util.*;


import java.net.*;
import java.io.*;

public class WebCrawler extends Applet implements ActionListener, Runnable {
public static final String SEARCH = "Search";
public static final String STOP = "Stop";
public static final String DISALLOW = "Disallow:";
public static final int SEARCH_LIMIT = 50;

Panel panelMain;
List listMatches;
Label labelStatus;

Vector vectorToSearch;
Vector vectorSearched;
Vector vectorMatches;

Thread searchThread;

TextField textURL;
Choice choiceType;

public void init() {

panelMain = new Panel();
panelMain.setLayout(new BorderLayout(5, 5));

Panel panelEntry = new Panel();
panelEntry.setLayout(new BorderLayout(5, 5));

Panel panelURL = new Panel();
panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelURL = new Label("Starting URL: ", Label.RIGHT);
panelURL.add(labelURL);
textURL = new TextField("", 40);
panelURL.add(textURL);
panelEntry.add("North", panelURL);

Panel panelType = new Panel();
panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelType = new Label("Content type: ", Label.RIGHT);
panelType.add(labelType);
choiceType = new Choice();
choiceType.addItem("text/html");
choiceType.addItem("audio/basic");
choiceType.addItem("audio/au");
choiceType.addItem("audio/aiff");
choiceType.addItem("audio/wav");
choiceType.addItem("video/mpeg");
choiceType.addItem("video/x-avi");
panelType.add(choiceType);
panelEntry.add("South", panelType);

panelMain.add("North", panelEntry);

Panel panelListButtons = new Panel();
panelListButtons.setLayout(new BorderLayout(5, 5));

Panel panelList = new Panel();
panelList.setLayout(new BorderLayout(5, 5));
Label labelResults = new Label("Search results");
panelList.add("North", labelResults);
Panel panelListCurrent = new Panel();
panelListCurrent.setLayout(new BorderLayout(5, 5));
listMatches = new List(10);
panelListCurrent.add("North", listMatches);
labelStatus = new Label("");
panelListCurrent.add("South", labelStatus);
panelList.add("South", panelListCurrent);

panelListButtons.add("North", panelList);

Panel panelButtons = new Panel();
Button buttonSearch = new Button(SEARCH);
buttonSearch.addActionListener(this);
panelButtons.add(buttonSearch);
Button buttonStop = new Button(STOP);
buttonStop.addActionListener(this);
panelButtons.add(buttonStop);

panelListButtons.add("South", panelButtons);

panelMain.add("South", panelListButtons);

add(panelMain);
setVisible(true);

repaint();

vectorToSearch = new Vector();
vectorSearched = new Vector();
vectorMatches = new Vector();

URLConnection.setDefaultAllowUserInteraction(false );
}

public void start() {
}

public void stop() {
if (searchThread != null) {
setStatus("stopping...");
searchThread = null;
}
}

public void destroy() {
}


public void paint(Graphics g) {
g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);

panelMain.paint(g);
panelMain.paintComponents(g);
}

public void run() {
String strURL = textURL.getText();
String strTargetType = choiceType.getSelectedItem();
int numberSearched = 0;
int numberFound = 0;

if (strURL.length() == 0) {
setStatus("ERROR: must enter a starting URL");
return;
}

vectorToSearch.removeAllElements();
vectorSearched.removeAllElements();
vectorMatches.removeAllElements();
listMatches.removeAll();

vectorToSearch.addElement(strURL);

while ((vectorToSearch.size() > 0)
&& (Thread.currentThread() == searchThread)) {
strURL = (String) vectorToSearch.elementAt(0);

setStatus("searching " + strURL);

URL url;
try {
url = new URL(strURL);
} catch (MalformedURLException e) {
setStatus("ERROR: invalid URL " + strURL);
break;
}

vectorToSearch.removeElementAt(0);
vectorSearched.addElement(strURL);

if (url.getProtocol().compareTo("http") != 0)
break;


try {
URLConnection urlConnection = url.openConnection();

urlConnection.setAllowUserInteraction(false);

InputStream urlStream = url.openStream();
String type
= urlConnection.guessContentTypeFromStream(urlStream );
if (type == null)
break;
if (type.compareTo("text/html") != 0)
break;

byte b[] = new byte[1000];
int numRead = urlStream.read(b);
String content = new String(b, 0, numRead);
while (numRead != -1) {
if (Thread.currentThread() != searchThread)
break;
numRead = urlStream.read(b);
if (numRead != -1) {
String newContent = new String(b, 0, numRead);
content += newContent;
}
}
urlStream.close();

if (Thread.currentThread() != searchThread)
break;

String lowerCaseContent = content.toLowerCase();

int index = 0;
while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
{
if ((index = lowerCaseContent.indexOf("href", index)) == -1)
break;
if ((index = lowerCaseContent.indexOf("=", index)) == -1)
break;

if (Thread.currentThread() != searchThread)
break;

index++;
String remaining = content.substring(index);

StringTokenizer st
= new StringTokenizer(remaining, "\t\n\r\">#");
String strLink = st.nextToken();

URL urlLink;
try {
urlLink = new URL(url, strLink);
strLink = urlLink.toString();
} catch (MalformedURLException e) {
setStatus("ERROR: bad URL " + strLink);
continue;
}

if (urlLink.getProtocol().compareTo("http") != 0)
break;

if (Thread.currentThread() != searchThread)
break;

try {
URLConnection urlLinkConnection
= urlLink.openConnection();
urlLinkConnection.setAllowUserInteraction(false);
InputStream linkStream = urlLink.openStream();
String strType
= urlLinkConnection.guessContentTypeFromStream(linkS tream);
linkStream.close();

if (strType == null)
break;
if (strType.compareTo("text/html") == 0) {
if ((!vectorSearched.contains(strLink))
&& (!vectorToSearch.contains(strLink))) {

}
}

if (strType.compareTo(strTargetType) == 0) {
if (vectorMatches.contains(strLink) == false) {
listMatches.add(strLink);
vectorMatches.addElement(strLink);
numberFound++;
if (numberFound >= SEARCH_LIMIT)
break;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strLink);
continue;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strURL);
break;
}

numberSearched++;
if (numberSearched >= SEARCH_LIMIT)
break;
}

if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
setStatus("reached search limit of " + SEARCH_LIMIT);
else
setStatus("done");
searchThread = null;

}

void setStatus(String status) {
labelStatus.setText(status);
}

public void actionPerformed(ActionEvent event) {
String command = event.getActionCommand();

if (command.compareTo(SEARCH) == 0) {
setStatus("searching...");

if (searchThread == null) {
searchThread = new Thread(this);
}
searchThread.start();
}
else if (command.compareTo(STOP) == 0) {
stop();
}
}
public static void main (String argv[])
{
Frame f = new Frame("My Crawler");
WebCrawler applet = new WebCrawler();
f.add("Center", applet);

/* Behind a firewall set your proxy and port here!
*/
String prox = "192.168.16.230";
Properties props= new Properties(System.getProperties());
props.put("http.proxySet","true");
props.put("http.proxyHost", "prox");
props.put("http.proxyPort", "8080");


Properties newprops = new Properties(props);
System.setProperties(newprops);



applet.init();
applet.start();
f.pack();
f.show();
}

}
Jan 5 '07 #1
Share this Question
Share on Google+
3 Replies


P: 2
well i got this code from java.sun.com and tried modiifying it in all the possible ways,but to no good.. stil its not workin..pleas help me out and try postin good workinw web cralwer if u have.. need help asap...


import java.applet.Applet;
import java.text.*;
import java.awt.*;
import java.awt.List;
import java.awt.event.*;
import java.util.*;


import java.net.*;
import java.io.*;

public class WebCrawler extends Applet implements ActionListener, Runnable {
public static final String SEARCH = "Search";
public static final String STOP = "Stop";
public static final String DISALLOW = "Disallow:";
public static final int SEARCH_LIMIT = 50;

Panel panelMain;
List listMatches;
Label labelStatus;

Vector vectorToSearch;
Vector vectorSearched;
Vector vectorMatches;

Thread searchThread;

TextField textURL;
Choice choiceType;

public void init() {

panelMain = new Panel();
panelMain.setLayout(new BorderLayout(5, 5));

Panel panelEntry = new Panel();
panelEntry.setLayout(new BorderLayout(5, 5));

Panel panelURL = new Panel();
panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelURL = new Label("Starting URL: ", Label.RIGHT);
panelURL.add(labelURL);
textURL = new TextField("", 40);
panelURL.add(textURL);
panelEntry.add("North", panelURL);

Panel panelType = new Panel();
panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelType = new Label("Content type: ", Label.RIGHT);
panelType.add(labelType);
choiceType = new Choice();
choiceType.addItem("text/html");
choiceType.addItem("audio/basic");
choiceType.addItem("audio/au");
choiceType.addItem("audio/aiff");
choiceType.addItem("audio/wav");
choiceType.addItem("video/mpeg");
choiceType.addItem("video/x-avi");
panelType.add(choiceType);
panelEntry.add("South", panelType);

panelMain.add("North", panelEntry);

Panel panelListButtons = new Panel();
panelListButtons.setLayout(new BorderLayout(5, 5));

Panel panelList = new Panel();
panelList.setLayout(new BorderLayout(5, 5));
Label labelResults = new Label("Search results");
panelList.add("North", labelResults);
Panel panelListCurrent = new Panel();
panelListCurrent.setLayout(new BorderLayout(5, 5));
listMatches = new List(10);
panelListCurrent.add("North", listMatches);
labelStatus = new Label("");
panelListCurrent.add("South", labelStatus);
panelList.add("South", panelListCurrent);

panelListButtons.add("North", panelList);

Panel panelButtons = new Panel();
Button buttonSearch = new Button(SEARCH);
buttonSearch.addActionListener(this);
panelButtons.add(buttonSearch);
Button buttonStop = new Button(STOP);
buttonStop.addActionListener(this);
panelButtons.add(buttonStop);

panelListButtons.add("South", panelButtons);

panelMain.add("South", panelListButtons);

add(panelMain);
setVisible(true);

repaint();

vectorToSearch = new Vector();
vectorSearched = new Vector();
vectorMatches = new Vector();

URLConnection.setDefaultAllowUserInteraction(false );
}

public void start() {
}

public void stop() {
if (searchThread != null) {
setStatus("stopping...");
searchThread = null;
}
}

public void destroy() {
}


public void paint(Graphics g) {
g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);

panelMain.paint(g);
panelMain.paintComponents(g);
}

public void run() {
String strURL = textURL.getText();
String strTargetType = choiceType.getSelectedItem();
int numberSearched = 0;
int numberFound = 0;

if (strURL.length() == 0) {
setStatus("ERROR: must enter a starting URL");
return;
}

vectorToSearch.removeAllElements();
vectorSearched.removeAllElements();
vectorMatches.removeAllElements();
listMatches.removeAll();

vectorToSearch.addElement(strURL);

while ((vectorToSearch.size() > 0)
&& (Thread.currentThread() == searchThread)) {
strURL = (String) vectorToSearch.elementAt(0);

setStatus("searching " + strURL);

URL url;
try {
url = new URL(strURL);
} catch (MalformedURLException e) {
setStatus("ERROR: invalid URL " + strURL);
break;
}

vectorToSearch.removeElementAt(0);
vectorSearched.addElement(strURL);

if (url.getProtocol().compareTo("http") != 0)
break;


try {
URLConnection urlConnection = url.openConnection();

urlConnection.setAllowUserInteraction(false);

InputStream urlStream = url.openStream();
String type
= urlConnection.guessContentTypeFromStream(urlStream );
if (type == null)
break;
if (type.compareTo("text/html") != 0)
break;

byte b[] = new byte[1000];
int numRead = urlStream.read(b);
String content = new String(b, 0, numRead);
while (numRead != -1) {
if (Thread.currentThread() != searchThread)
break;
numRead = urlStream.read(b);
if (numRead != -1) {
String newContent = new String(b, 0, numRead);
content += newContent;
}
}
urlStream.close();

if (Thread.currentThread() != searchThread)
break;

String lowerCaseContent = content.toLowerCase();

int index = 0;
while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
{
if ((index = lowerCaseContent.indexOf("href", index)) == -1)
break;
if ((index = lowerCaseContent.indexOf("=", index)) == -1)
break;

if (Thread.currentThread() != searchThread)
break;

index++;
String remaining = content.substring(index);

StringTokenizer st
= new StringTokenizer(remaining, "\t\n\r\">#");
String strLink = st.nextToken();

URL urlLink;
try {
urlLink = new URL(url, strLink);
strLink = urlLink.toString();
} catch (MalformedURLException e) {
setStatus("ERROR: bad URL " + strLink);
continue;
}

if (urlLink.getProtocol().compareTo("http") != 0)
break;

if (Thread.currentThread() != searchThread)
break;

try {
URLConnection urlLinkConnection
= urlLink.openConnection();
urlLinkConnection.setAllowUserInteraction(false);
InputStream linkStream = urlLink.openStream();
String strType
= urlLinkConnection.guessContentTypeFromStream(linkS tream);
linkStream.close();

if (strType == null)
break;
if (strType.compareTo("text/html") == 0) {
if ((!vectorSearched.contains(strLink))
&& (!vectorToSearch.contains(strLink))) {

}
}

if (strType.compareTo(strTargetType) == 0) {
if (vectorMatches.contains(strLink) == false) {
listMatches.add(strLink);
vectorMatches.addElement(strLink);
numberFound++;
if (numberFound >= SEARCH_LIMIT)
break;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strLink);
continue;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strURL);
break;
}

numberSearched++;
if (numberSearched >= SEARCH_LIMIT)
break;
}

if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
setStatus("reached search limit of " + SEARCH_LIMIT);
else
setStatus("done");
searchThread = null;

}

void setStatus(String status) {
labelStatus.setText(status);
}

public void actionPerformed(ActionEvent event) {
String command = event.getActionCommand();

if (command.compareTo(SEARCH) == 0) {
setStatus("searching...");

if (searchThread == null) {
searchThread = new Thread(this);
}
searchThread.start();
}
else if (command.compareTo(STOP) == 0) {
stop();
}
}
public static void main (String argv[])
{
Frame f = new Frame("My Crawler");
WebCrawler applet = new WebCrawler();
f.add("Center", applet);

/* Behind a firewall set your proxy and port here!
*/
String prox = "192.168.16.230";
Properties props= new Properties(System.getProperties());
props.put("http.proxySet","true");
props.put("http.proxyHost", "prox");
props.put("http.proxyPort", "8080");


Properties newprops = new Properties(props);
System.setProperties(newprops);



applet.init();
applet.start();
f.pack();
f.show();
}

}
Jan 5 '07 #2

10K+
P: 13,264
Expand|Select|Wrap|Line Numbers
  1.  
  2.  
  3. import java.applet.Applet;
  4. import java.text.*;
  5. import java.awt.*;
  6. import java.awt.List;
  7. import java.awt.event.*;
  8. import java.util.*;
  9.  
  10.  
  11. import java.net.*;
  12. import java.io.*; 
  13.  
  14. public class WebCrawler extends Applet implements ActionListener, Runnable {
  15. public static final String SEARCH = "Search";
  16. public static final String STOP = "Stop";
  17. public static final String DISALLOW = "Disallow:";
  18. public static final int SEARCH_LIMIT = 50;
  19.  
  20. Panel panelMain;
  21. List listMatches;
  22. Label labelStatus;
  23.  
  24. Vector vectorToSearch;
  25. Vector vectorSearched;
  26. Vector vectorMatches;
  27.  
  28. Thread searchThread;
  29.  
  30. TextField textURL;
  31. Choice choiceType;
  32.  
  33. public void init() {
  34.  
  35. panelMain = new Panel();
  36. panelMain.setLayout(new BorderLayout(5, 5));
  37.  
  38. Panel panelEntry = new Panel();
  39. panelEntry.setLayout(new BorderLayout(5, 5));
  40.  
  41. Panel panelURL = new Panel();
  42. panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
  43. Label labelURL = new Label("Starting URL: ", Label.RIGHT);
  44. panelURL.add(labelURL);
  45. textURL = new TextField("", 40);
  46. panelURL.add(textURL);
  47. panelEntry.add("North", panelURL);
  48.  
  49. Panel panelType = new Panel();
  50. panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
  51. Label labelType = new Label("Content type: ", Label.RIGHT);
  52. panelType.add(labelType);
  53. choiceType = new Choice();
  54. choiceType.addItem("text/html");
  55. choiceType.addItem("audio/basic");
  56. choiceType.addItem("audio/au");
  57. choiceType.addItem("audio/aiff");
  58. choiceType.addItem("audio/wav");
  59. choiceType.addItem("video/mpeg");
  60. choiceType.addItem("video/x-avi");
  61. panelType.add(choiceType);
  62. panelEntry.add("South", panelType);
  63.  
  64. panelMain.add("North", panelEntry);
  65.  
  66. Panel panelListButtons = new Panel();
  67. panelListButtons.setLayout(new BorderLayout(5, 5));
  68.  
  69. Panel panelList = new Panel();
  70. panelList.setLayout(new BorderLayout(5, 5));
  71. Label labelResults = new Label("Search results");
  72. panelList.add("North", labelResults);
  73. Panel panelListCurrent = new Panel();
  74. panelListCurrent.setLayout(new BorderLayout(5, 5));
  75. listMatches = new List(10);
  76. panelListCurrent.add("North", listMatches);
  77. labelStatus = new Label("");
  78. panelListCurrent.add("South", labelStatus);
  79. panelList.add("South", panelListCurrent);
  80.  
  81. panelListButtons.add("North", panelList);
  82.  
  83. Panel panelButtons = new Panel();
  84. Button buttonSearch = new Button(SEARCH);
  85. buttonSearch.addActionListener(this);
  86. panelButtons.add(buttonSearch);
  87. Button buttonStop = new Button(STOP);
  88. buttonStop.addActionListener(this);
  89. panelButtons.add(buttonStop);
  90.  
  91. panelListButtons.add("South", panelButtons);
  92.  
  93. panelMain.add("South", panelListButtons);
  94.  
  95. add(panelMain);
  96. setVisible(true);
  97.  
  98. repaint();
  99.  
  100. vectorToSearch = new Vector();
  101. vectorSearched = new Vector();
  102. vectorMatches = new Vector();
  103.  
  104. URLConnection.setDefaultAllowUserInteraction(false );
  105. }
  106.  
  107. public void start() {
  108. }
  109.  
  110. public void stop() {
  111. if (searchThread != null) {
  112. setStatus("stopping...");
  113. searchThread = null;
  114. }
  115. }
  116.  
  117. public void destroy() {
  118. }
  119.  
  120.  
  121. public void paint(Graphics g) {
  122. g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);
  123.  
  124. panelMain.paint(g);
  125. panelMain.paintComponents(g);
  126. }
  127.  
  128. public void run() {
  129. String strURL = textURL.getText();
  130. String strTargetType = choiceType.getSelectedItem();
  131. int numberSearched = 0;
  132. int numberFound = 0;
  133.  
  134. if (strURL.length() == 0) {
  135. setStatus("ERROR: must enter a starting URL");
  136. return;
  137. }
  138.  
  139. vectorToSearch.removeAllElements();
  140. vectorSearched.removeAllElements();
  141. vectorMatches.removeAllElements();
  142. listMatches.removeAll();
  143.  
  144. vectorToSearch.addElement(strURL);
  145.  
  146. while ((vectorToSearch.size() > 0)
  147. && (Thread.currentThread() == searchThread)) {
  148. strURL = (String) vectorToSearch.elementAt(0);
  149.  
  150. setStatus("searching " + strURL);
  151.  
  152. URL url;
  153. try {
  154. url = new URL(strURL);
  155. } catch (MalformedURLException e) {
  156. setStatus("ERROR: invalid URL " + strURL);
  157. break;
  158. }
  159.  
  160. vectorToSearch.removeElementAt(0);
  161. vectorSearched.addElement(strURL);
  162.  
  163. if (url.getProtocol().compareTo("http") != 0)
  164. break;
  165.  
  166.  
  167. try {
  168. URLConnection urlConnection = url.openConnection();
  169.  
  170. urlConnection.setAllowUserInteraction(false);
  171.  
  172. InputStream urlStream = url.openStream();
  173. String type
  174. = urlConnection.guessContentTypeFromStream(urlStream );
  175. if (type == null)
  176. break;
  177. if (type.compareTo("text/html") != 0)
  178. break;
  179.  
  180. byte b[] = new byte[1000];
  181. int numRead = urlStream.read(b);
  182. String content = new String(b, 0, numRead);
  183. while (numRead != -1) {
  184. if (Thread.currentThread() != searchThread)
  185. break;
  186. numRead = urlStream.read(b);
  187. if (numRead != -1) {
  188. String newContent = new String(b, 0, numRead);
  189. content += newContent;
  190. }
  191. }
  192. urlStream.close();
  193.  
  194. if (Thread.currentThread() != searchThread)
  195. break;
  196.  
  197. String lowerCaseContent = content.toLowerCase();
  198.  
  199. int index = 0;
  200. while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
  201. {
  202. if ((index = lowerCaseContent.indexOf("href", index)) == -1)
  203. break;
  204. if ((index = lowerCaseContent.indexOf("=", index)) == -1)
  205. break;
  206.  
  207. if (Thread.currentThread() != searchThread)
  208. break;
  209.  
  210. index++;
  211. String remaining = content.substring(index);
  212.  
  213. StringTokenizer st
  214. = new StringTokenizer(remaining, "\t\n\r\">#");
  215. String strLink = st.nextToken();
  216.  
  217. URL urlLink;
  218. try {
  219. urlLink = new URL(url, strLink);
  220. strLink = urlLink.toString();
  221. } catch (MalformedURLException e) {
  222. setStatus("ERROR: bad URL " + strLink);
  223. continue;
  224. }
  225.  
  226. if (urlLink.getProtocol().compareTo("http") != 0)
  227. break;
  228.  
  229. if (Thread.currentThread() != searchThread)
  230. break;
  231.  
  232. try {
  233. URLConnection urlLinkConnection
  234. = urlLink.openConnection();
  235. urlLinkConnection.setAllowUserInteraction(false);
  236. InputStream linkStream = urlLink.openStream();
  237. String strType
  238. = urlLinkConnection.guessContentTypeFromStream(linkStream);
  239. linkStream.close();
  240.  
  241. if (strType == null)
  242. break;
  243. if (strType.compareTo("text/html") == 0) {
  244. if ((!vectorSearched.contains(strLink))
  245. && (!vectorToSearch.contains(strLink))) {
  246.  
  247. }
  248. }
  249.  
  250. if (strType.compareTo(strTargetType) == 0) {
  251. if (vectorMatches.contains(strLink) == false) {
  252. listMatches.add(strLink);
  253. vectorMatches.addElement(strLink);
  254. numberFound++;
  255. if (numberFound >= SEARCH_LIMIT)
  256. break;
  257. }
  258. }
  259. } catch (IOException e) {
  260. setStatus("ERROR: couldn't open URL " + strLink);
  261. continue;
  262. }
  263. }
  264. } catch (IOException e) {
  265. setStatus("ERROR: couldn't open URL " + strURL);
  266. break;
  267. }
  268.  
  269. numberSearched++;
  270. if (numberSearched >= SEARCH_LIMIT)
  271. break;
  272. }
  273.  
  274. if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
  275. setStatus("reached search limit of " + SEARCH_LIMIT);
  276. else
  277. setStatus("done");
  278. searchThread = null;
  279.  
  280. }
  281.  
  282. void setStatus(String status) {
  283. labelStatus.setText(status);
  284. }
  285.  
  286. public void actionPerformed(ActionEvent event) {
  287. String command = event.getActionCommand();
  288.  
  289. if (command.compareTo(SEARCH) == 0) {
  290. setStatus("searching...");
  291.  
  292. if (searchThread == null) {
  293. searchThread = new Thread(this);
  294. }
  295. searchThread.start();
  296. }
  297. else if (command.compareTo(STOP) == 0) {
  298. stop();
  299. }
  300. }
  301. public static void main (String argv[])
  302. {
  303. Frame f = new Frame("My Crawler");
  304. WebCrawler applet = new WebCrawler();
  305. f.add("Center", applet);
  306.  
  307. /* Behind a firewall set your proxy and port here!
  308. */
  309. String prox = "192.168.16.230";
  310. Properties props= new Properties(System.getProperties());
  311. props.put("http.proxySet","true");
  312. props.put("http.proxyHost", "prox");
  313. props.put("http.proxyPort", "8080");
  314.  
  315.  
  316. Properties newprops = new Properties(props);
  317. System.setProperties(newprops);
  318.  
  319.  
  320.  
  321. applet.init();
  322. applet.start();
  323. f.pack();
  324. f.show();
  325. }
  326.  
  327. }
  328.  
  329.  
  330.  
Try this. I just removed one space that was giving an error and it worked for me. If it does not work post the error that you get.
Jan 5 '07 #3

10K+
P: 13,264
well i got this code from java.sun.com and tried modiifying it in all the possible ways,but to no good.. stil its not workin..pleas help me out and try postin good workinw web cralwer if u have.. need help asap...


import java.applet.Applet;
import java.text.*;
import java.awt.*;
import java.awt.List;
import java.awt.event.*;
import java.util.*;


import java.net.*;
import java.io.*;

public class WebCrawler extends Applet implements ActionListener, Runnable {
public static final String SEARCH = "Search";
public static final String STOP = "Stop";
public static final String DISALLOW = "Disallow:";
public static final int SEARCH_LIMIT = 50;

Panel panelMain;
List listMatches;
Label labelStatus;

Vector vectorToSearch;
Vector vectorSearched;
Vector vectorMatches;

Thread searchThread;

TextField textURL;
Choice choiceType;

public void init() {

panelMain = new Panel();
panelMain.setLayout(new BorderLayout(5, 5));

Panel panelEntry = new Panel();
panelEntry.setLayout(new BorderLayout(5, 5));

Panel panelURL = new Panel();
panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelURL = new Label("Starting URL: ", Label.RIGHT);
panelURL.add(labelURL);
textURL = new TextField("", 40);
panelURL.add(textURL);
panelEntry.add("North", panelURL);

Panel panelType = new Panel();
panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelType = new Label("Content type: ", Label.RIGHT);
panelType.add(labelType);
choiceType = new Choice();
choiceType.addItem("text/html");
choiceType.addItem("audio/basic");
choiceType.addItem("audio/au");
choiceType.addItem("audio/aiff");
choiceType.addItem("audio/wav");
choiceType.addItem("video/mpeg");
choiceType.addItem("video/x-avi");
panelType.add(choiceType);
panelEntry.add("South", panelType);

panelMain.add("North", panelEntry);

Panel panelListButtons = new Panel();
panelListButtons.setLayout(new BorderLayout(5, 5));

Panel panelList = new Panel();
panelList.setLayout(new BorderLayout(5, 5));
Label labelResults = new Label("Search results");
panelList.add("North", labelResults);
Panel panelListCurrent = new Panel();
panelListCurrent.setLayout(new BorderLayout(5, 5));
listMatches = new List(10);
panelListCurrent.add("North", listMatches);
labelStatus = new Label("");
panelListCurrent.add("South", labelStatus);
panelList.add("South", panelListCurrent);

panelListButtons.add("North", panelList);

Panel panelButtons = new Panel();
Button buttonSearch = new Button(SEARCH);
buttonSearch.addActionListener(this);
panelButtons.add(buttonSearch);
Button buttonStop = new Button(STOP);
buttonStop.addActionListener(this);
panelButtons.add(buttonStop);

panelListButtons.add("South", panelButtons);

panelMain.add("South", panelListButtons);

add(panelMain);
setVisible(true);

repaint();

vectorToSearch = new Vector();
vectorSearched = new Vector();
vectorMatches = new Vector();

URLConnection.setDefaultAllowUserInteraction(false );
}

public void start() {
}

public void stop() {
if (searchThread != null) {
setStatus("stopping...");
searchThread = null;
}
}

public void destroy() {
}


public void paint(Graphics g) {
g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);

panelMain.paint(g);
panelMain.paintComponents(g);
}

public void run() {
String strURL = textURL.getText();
String strTargetType = choiceType.getSelectedItem();
int numberSearched = 0;
int numberFound = 0;

if (strURL.length() == 0) {
setStatus("ERROR: must enter a starting URL");
return;
}

vectorToSearch.removeAllElements();
vectorSearched.removeAllElements();
vectorMatches.removeAllElements();
listMatches.removeAll();

vectorToSearch.addElement(strURL);

while ((vectorToSearch.size() > 0)
&& (Thread.currentThread() == searchThread)) {
strURL = (String) vectorToSearch.elementAt(0);

setStatus("searching " + strURL);

URL url;
try {
url = new URL(strURL);
} catch (MalformedURLException e) {
setStatus("ERROR: invalid URL " + strURL);
break;
}

vectorToSearch.removeElementAt(0);
vectorSearched.addElement(strURL);

if (url.getProtocol().compareTo("http") != 0)
break;


try {
URLConnection urlConnection = url.openConnection();

urlConnection.setAllowUserInteraction(false);

InputStream urlStream = url.openStream();
String type
= urlConnection.guessContentTypeFromStream(urlStream );
if (type == null)
break;
if (type.compareTo("text/html") != 0)
break;

byte b[] = new byte[1000];
int numRead = urlStream.read(b);
String content = new String(b, 0, numRead);
while (numRead != -1) {
if (Thread.currentThread() != searchThread)
break;
numRead = urlStream.read(b);
if (numRead != -1) {
String newContent = new String(b, 0, numRead);
content += newContent;
}
}
urlStream.close();

if (Thread.currentThread() != searchThread)
break;

String lowerCaseContent = content.toLowerCase();

int index = 0;
while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
{
if ((index = lowerCaseContent.indexOf("href", index)) == -1)
break;
if ((index = lowerCaseContent.indexOf("=", index)) == -1)
break;

if (Thread.currentThread() != searchThread)
break;

index++;
String remaining = content.substring(index);

StringTokenizer st
= new StringTokenizer(remaining, "\t\n\r\">#");
String strLink = st.nextToken();

URL urlLink;
try {
urlLink = new URL(url, strLink);
strLink = urlLink.toString();
} catch (MalformedURLException e) {
setStatus("ERROR: bad URL " + strLink);
continue;
}

if (urlLink.getProtocol().compareTo("http") != 0)
break;

if (Thread.currentThread() != searchThread)
break;

try {
URLConnection urlLinkConnection
= urlLink.openConnection();
urlLinkConnection.setAllowUserInteraction(false);
InputStream linkStream = urlLink.openStream();
String strType
= urlLinkConnection.guessContentTypeFromStream(linkS tream);
linkStream.close();

if (strType == null)
break;
if (strType.compareTo("text/html") == 0) {
if ((!vectorSearched.contains(strLink))
&& (!vectorToSearch.contains(strLink))) {

}
}

if (strType.compareTo(strTargetType) == 0) {
if (vectorMatches.contains(strLink) == false) {
listMatches.add(strLink);
vectorMatches.addElement(strLink);
numberFound++;
if (numberFound >= SEARCH_LIMIT)
break;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strLink);
continue;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strURL);
break;
}

numberSearched++;
if (numberSearched >= SEARCH_LIMIT)
break;
}

if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
setStatus("reached search limit of " + SEARCH_LIMIT);
else
setStatus("done");
searchThread = null;

}

void setStatus(String status) {
labelStatus.setText(status);
}

public void actionPerformed(ActionEvent event) {
String command = event.getActionCommand();

if (command.compareTo(SEARCH) == 0) {
setStatus("searching...");

if (searchThread == null) {
searchThread = new Thread(this);
}
searchThread.start();
}
else if (command.compareTo(STOP) == 0) {
stop();
}
}
public static void main (String argv[])
{
Frame f = new Frame("My Crawler");
WebCrawler applet = new WebCrawler();
f.add("Center", applet);

/* Behind a firewall set your proxy and port here!
*/
String prox = "192.168.16.230";
Properties props= new Properties(System.getProperties());
props.put("http.proxySet","true");
props.put("http.proxyHost", "prox");
props.put("http.proxyPort", "8080");


Properties newprops = new Properties(props);
System.setProperties(newprops);



applet.init();
applet.start();
f.pack();
f.show();
}

}
Please do not double post
Jan 5 '07 #4

Post your reply

Sign in to post your reply or Sign up for a free account.