well i got this code from java.sun.com and tried modiifying it in all the possible ways,but to no good.. stil its not workin..pleas help me out and try postin good workinw web cralwer if u have.. need help asap...
import java.applet.Applet;
import java.text.*;
import java.awt.*;
import java.awt.List;
import java.awt.event.*;
import java.util.*;
import java.net.*;
import java.io.*;
public class WebCrawler extends Applet implements ActionListener, Runnable {
public static final String SEARCH = "Search";
public static final String STOP = "Stop";
public static final String DISALLOW = "Disallow:";
public static final int SEARCH_LIMIT = 50;
Panel panelMain;
List listMatches;
Label labelStatus;
Vector vectorToSearch;
Vector vectorSearched;
Vector vectorMatches;
Thread searchThread;
TextField textURL;
Choice choiceType;
public void init() {
panelMain = new Panel();
panelMain.setLayout(new BorderLayout(5, 5));
Panel panelEntry = new Panel();
panelEntry.setLayout(new BorderLayout(5, 5));
Panel panelURL = new Panel();
panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelURL = new Label("Starting URL: ", Label.RIGHT);
panelURL.add(labelURL);
textURL = new TextField("", 40);
panelURL.add(textURL);
panelEntry.add("North", panelURL);
Panel panelType = new Panel();
panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelType = new Label("Content type: ", Label.RIGHT);
panelType.add(labelType);
choiceType = new Choice();
choiceType.addItem("text/html");
choiceType.addItem("audio/basic");
choiceType.addItem("audio/au");
choiceType.addItem("audio/aiff");
choiceType.addItem("audio/wav");
choiceType.addItem("video/mpeg");
choiceType.addItem("video/x-avi");
panelType.add(choiceType);
panelEntry.add("South", panelType);
panelMain.add("North", panelEntry);
Panel panelListButtons = new Panel();
panelListButtons.setLayout(new BorderLayout(5, 5));
Panel panelList = new Panel();
panelList.setLayout(new BorderLayout(5, 5));
Label labelResults = new Label("Search results");
panelList.add("North", labelResults);
Panel panelListCurrent = new Panel();
panelListCurrent.setLayout(new BorderLayout(5, 5));
listMatches = new List(10);
panelListCurrent.add("North", listMatches);
labelStatus = new Label("");
panelListCurrent.add("South", labelStatus);
panelList.add("South", panelListCurrent);
panelListButtons.add("North", panelList);
Panel panelButtons = new Panel();
Button buttonSearch = new Button(SEARCH);
buttonSearch.addActionListener(this);
panelButtons.add(buttonSearch);
Button buttonStop = new Button(STOP);
buttonStop.addActionListener(this);
panelButtons.add(buttonStop);
panelListButtons.add("South", panelButtons);
panelMain.add("South", panelListButtons);
add(panelMain);
setVisible(true);
repaint();
vectorToSearch = new Vector();
vectorSearched = new Vector();
vectorMatches = new Vector();
URLConnection.setDefaultAllowUserInteraction(false );
}
public void start() {
}
public void stop() {
if (searchThread != null) {
setStatus("stopping...");
searchThread = null;
}
}
public void destroy() {
}
public void paint(Graphics g) {
g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);
panelMain.paint(g);
panelMain.paintComponents(g);
}
public void run() {
String strURL = textURL.getText();
String strTargetType = choiceType.getSelectedItem();
int numberSearched = 0;
int numberFound = 0;
if (strURL.length() == 0) {
setStatus("ERROR: must enter a starting URL");
return;
}
vectorToSearch.removeAllElements();
vectorSearched.removeAllElements();
vectorMatches.removeAllElements();
listMatches.removeAll();
vectorToSearch.addElement(strURL);
while ((vectorToSearch.size() > 0)
&& (Thread.currentThread() == searchThread)) {
strURL = (String) vectorToSearch.elementAt(0);
setStatus("searching " + strURL);
URL url;
try {
url = new URL(strURL);
} catch (MalformedURLException e) {
setStatus("ERROR: invalid URL " + strURL);
break;
}
vectorToSearch.removeElementAt(0);
vectorSearched.addElement(strURL);
if (url.getProtocol().compareTo("http") != 0)
break;
try {
URLConnection urlConnection = url.openConnection();
urlConnection.setAllowUserInteraction(false);
InputStream urlStream = url.openStream();
String type
= urlConnection.guessContentTypeFromStream(urlStream );
if (type == null)
break;
if (type.compareTo("text/html") != 0)
break;
byte b[] = new byte[1000];
int numRead = urlStream.read(b);
String content = new String(b, 0, numRead);
while (numRead != -1) {
if (Thread.currentThread() != searchThread)
break;
numRead = urlStream.read(b);
if (numRead != -1) {
String newContent = new String(b, 0, numRead);
content += newContent;
}
}
urlStream.close();
if (Thread.currentThread() != searchThread)
break;
String lowerCaseContent = content.toLowerCase();
int index = 0;
while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
{
if ((index = lowerCaseContent.indexOf("href", index)) == -1)
break;
if ((index = lowerCaseContent.indexOf("=", index)) == -1)
break;
if (Thread.currentThread() != searchThread)
break;
index++;
String remaining = content.substring(index);
StringTokenizer st
= new StringTokenizer(remaining, "\t\n\r\">#");
String strLink = st.nextToken();
URL urlLink;
try {
urlLink = new URL(url, strLink);
strLink = urlLink.toString();
} catch (MalformedURLException e) {
setStatus("ERROR: bad URL " + strLink);
continue;
}
if (urlLink.getProtocol().compareTo("http") != 0)
break;
if (Thread.currentThread() != searchThread)
break;
try {
URLConnection urlLinkConnection
= urlLink.openConnection();
urlLinkConnection.setAllowUserInteraction(false);
InputStream linkStream = urlLink.openStream();
String strType
= urlLinkConnection.guessContentTypeFromStream(linkS tream);
linkStream.close();
if (strType == null)
break;
if (strType.compareTo("text/html") == 0) {
if ((!vectorSearched.contains(strLink))
&& (!vectorToSearch.contains(strLink))) {
}
}
if (strType.compareTo(strTargetType) == 0) {
if (vectorMatches.contains(strLink) == false) {
listMatches.add(strLink);
vectorMatches.addElement(strLink);
numberFound++;
if (numberFound >= SEARCH_LIMIT)
break;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strLink);
continue;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strURL);
break;
}
numberSearched++;
if (numberSearched >= SEARCH_LIMIT)
break;
}
if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
setStatus("reached search limit of " + SEARCH_LIMIT);
else
setStatus("done");
searchThread = null;
}
void setStatus(String status) {
labelStatus.setText(status);
}
public void actionPerformed(ActionEvent event) {
String command = event.getActionCommand();
if (command.compareTo(SEARCH) == 0) {
setStatus("searching...");
if (searchThread == null) {
searchThread = new Thread(this);
}
searchThread.start();
}
else if (command.compareTo(STOP) == 0) {
stop();
}
}
public static void main (String argv[])
{
Frame f = new Frame("My Crawler");
WebCrawler applet = new WebCrawler();
f.add("Center", applet);
/* Behind a firewall set your proxy and port here!
*/
String prox = "192.168.16.230";
Properties props= new Properties(System.getProperties());
props.put("http.proxySet","true");
props.put("http.proxyHost", "prox");
props.put("http.proxyPort", "8080");
Properties newprops = new Properties(props);
System.setProperties(newprops);
applet.init();
applet.start();
f.pack();
f.show();
}
}
3 1865
well i got this code from java.sun.com and tried modiifying it in all the possible ways,but to no good.. stil its not workin..pleas help me out and try postin good workinw web cralwer if u have.. need help asap...
import java.applet.Applet;
import java.text.*;
import java.awt.*;
import java.awt.List;
import java.awt.event.*;
import java.util.*;
import java.net.*;
import java.io.*;
public class WebCrawler extends Applet implements ActionListener, Runnable {
public static final String SEARCH = "Search";
public static final String STOP = "Stop";
public static final String DISALLOW = "Disallow:";
public static final int SEARCH_LIMIT = 50;
Panel panelMain;
List listMatches;
Label labelStatus;
Vector vectorToSearch;
Vector vectorSearched;
Vector vectorMatches;
Thread searchThread;
TextField textURL;
Choice choiceType;
public void init() {
panelMain = new Panel();
panelMain.setLayout(new BorderLayout(5, 5));
Panel panelEntry = new Panel();
panelEntry.setLayout(new BorderLayout(5, 5));
Panel panelURL = new Panel();
panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelURL = new Label("Starting URL: ", Label.RIGHT);
panelURL.add(labelURL);
textURL = new TextField("", 40);
panelURL.add(textURL);
panelEntry.add("North", panelURL);
Panel panelType = new Panel();
panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelType = new Label("Content type: ", Label.RIGHT);
panelType.add(labelType);
choiceType = new Choice();
choiceType.addItem("text/html");
choiceType.addItem("audio/basic");
choiceType.addItem("audio/au");
choiceType.addItem("audio/aiff");
choiceType.addItem("audio/wav");
choiceType.addItem("video/mpeg");
choiceType.addItem("video/x-avi");
panelType.add(choiceType);
panelEntry.add("South", panelType);
panelMain.add("North", panelEntry);
Panel panelListButtons = new Panel();
panelListButtons.setLayout(new BorderLayout(5, 5));
Panel panelList = new Panel();
panelList.setLayout(new BorderLayout(5, 5));
Label labelResults = new Label("Search results");
panelList.add("North", labelResults);
Panel panelListCurrent = new Panel();
panelListCurrent.setLayout(new BorderLayout(5, 5));
listMatches = new List(10);
panelListCurrent.add("North", listMatches);
labelStatus = new Label("");
panelListCurrent.add("South", labelStatus);
panelList.add("South", panelListCurrent);
panelListButtons.add("North", panelList);
Panel panelButtons = new Panel();
Button buttonSearch = new Button(SEARCH);
buttonSearch.addActionListener(this);
panelButtons.add(buttonSearch);
Button buttonStop = new Button(STOP);
buttonStop.addActionListener(this);
panelButtons.add(buttonStop);
panelListButtons.add("South", panelButtons);
panelMain.add("South", panelListButtons);
add(panelMain);
setVisible(true);
repaint();
vectorToSearch = new Vector();
vectorSearched = new Vector();
vectorMatches = new Vector();
URLConnection.setDefaultAllowUserInteraction(false );
}
public void start() {
}
public void stop() {
if (searchThread != null) {
setStatus("stopping...");
searchThread = null;
}
}
public void destroy() {
}
public void paint(Graphics g) {
g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);
panelMain.paint(g);
panelMain.paintComponents(g);
}
public void run() {
String strURL = textURL.getText();
String strTargetType = choiceType.getSelectedItem();
int numberSearched = 0;
int numberFound = 0;
if (strURL.length() == 0) {
setStatus("ERROR: must enter a starting URL");
return;
}
vectorToSearch.removeAllElements();
vectorSearched.removeAllElements();
vectorMatches.removeAllElements();
listMatches.removeAll();
vectorToSearch.addElement(strURL);
while ((vectorToSearch.size() > 0)
&& (Thread.currentThread() == searchThread)) {
strURL = (String) vectorToSearch.elementAt(0);
setStatus("searching " + strURL);
URL url;
try {
url = new URL(strURL);
} catch (MalformedURLException e) {
setStatus("ERROR: invalid URL " + strURL);
break;
}
vectorToSearch.removeElementAt(0);
vectorSearched.addElement(strURL);
if (url.getProtocol().compareTo("http") != 0)
break;
try {
URLConnection urlConnection = url.openConnection();
urlConnection.setAllowUserInteraction(false);
InputStream urlStream = url.openStream();
String type
= urlConnection.guessContentTypeFromStream(urlStream );
if (type == null)
break;
if (type.compareTo("text/html") != 0)
break;
byte b[] = new byte[1000];
int numRead = urlStream.read(b);
String content = new String(b, 0, numRead);
while (numRead != -1) {
if (Thread.currentThread() != searchThread)
break;
numRead = urlStream.read(b);
if (numRead != -1) {
String newContent = new String(b, 0, numRead);
content += newContent;
}
}
urlStream.close();
if (Thread.currentThread() != searchThread)
break;
String lowerCaseContent = content.toLowerCase();
int index = 0;
while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
{
if ((index = lowerCaseContent.indexOf("href", index)) == -1)
break;
if ((index = lowerCaseContent.indexOf("=", index)) == -1)
break;
if (Thread.currentThread() != searchThread)
break;
index++;
String remaining = content.substring(index);
StringTokenizer st
= new StringTokenizer(remaining, "\t\n\r\">#");
String strLink = st.nextToken();
URL urlLink;
try {
urlLink = new URL(url, strLink);
strLink = urlLink.toString();
} catch (MalformedURLException e) {
setStatus("ERROR: bad URL " + strLink);
continue;
}
if (urlLink.getProtocol().compareTo("http") != 0)
break;
if (Thread.currentThread() != searchThread)
break;
try {
URLConnection urlLinkConnection
= urlLink.openConnection();
urlLinkConnection.setAllowUserInteraction(false);
InputStream linkStream = urlLink.openStream();
String strType
= urlLinkConnection.guessContentTypeFromStream(linkS tream);
linkStream.close();
if (strType == null)
break;
if (strType.compareTo("text/html") == 0) {
if ((!vectorSearched.contains(strLink))
&& (!vectorToSearch.contains(strLink))) {
}
}
if (strType.compareTo(strTargetType) == 0) {
if (vectorMatches.contains(strLink) == false) {
listMatches.add(strLink);
vectorMatches.addElement(strLink);
numberFound++;
if (numberFound >= SEARCH_LIMIT)
break;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strLink);
continue;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strURL);
break;
}
numberSearched++;
if (numberSearched >= SEARCH_LIMIT)
break;
}
if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
setStatus("reached search limit of " + SEARCH_LIMIT);
else
setStatus("done");
searchThread = null;
}
void setStatus(String status) {
labelStatus.setText(status);
}
public void actionPerformed(ActionEvent event) {
String command = event.getActionCommand();
if (command.compareTo(SEARCH) == 0) {
setStatus("searching...");
if (searchThread == null) {
searchThread = new Thread(this);
}
searchThread.start();
}
else if (command.compareTo(STOP) == 0) {
stop();
}
}
public static void main (String argv[])
{
Frame f = new Frame("My Crawler");
WebCrawler applet = new WebCrawler();
f.add("Center", applet);
/* Behind a firewall set your proxy and port here!
*/
String prox = "192.168.16.230";
Properties props= new Properties(System.getProperties());
props.put("http.proxySet","true");
props.put("http.proxyHost", "prox");
props.put("http.proxyPort", "8080");
Properties newprops = new Properties(props);
System.setProperties(newprops);
applet.init();
applet.start();
f.pack();
f.show();
}
}
-
-
-
import java.applet.Applet;
-
import java.text.*;
-
import java.awt.*;
-
import java.awt.List;
-
import java.awt.event.*;
-
import java.util.*;
-
-
-
import java.net.*;
-
import java.io.*;
-
-
public class WebCrawler extends Applet implements ActionListener, Runnable {
-
public static final String SEARCH = "Search";
-
public static final String STOP = "Stop";
-
public static final String DISALLOW = "Disallow:";
-
public static final int SEARCH_LIMIT = 50;
-
-
Panel panelMain;
-
List listMatches;
-
Label labelStatus;
-
-
Vector vectorToSearch;
-
Vector vectorSearched;
-
Vector vectorMatches;
-
-
Thread searchThread;
-
-
TextField textURL;
-
Choice choiceType;
-
-
public void init() {
-
-
panelMain = new Panel();
-
panelMain.setLayout(new BorderLayout(5, 5));
-
-
Panel panelEntry = new Panel();
-
panelEntry.setLayout(new BorderLayout(5, 5));
-
-
Panel panelURL = new Panel();
-
panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
-
Label labelURL = new Label("Starting URL: ", Label.RIGHT);
-
panelURL.add(labelURL);
-
textURL = new TextField("", 40);
-
panelURL.add(textURL);
-
panelEntry.add("North", panelURL);
-
-
Panel panelType = new Panel();
-
panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
-
Label labelType = new Label("Content type: ", Label.RIGHT);
-
panelType.add(labelType);
-
choiceType = new Choice();
-
choiceType.addItem("text/html");
-
choiceType.addItem("audio/basic");
-
choiceType.addItem("audio/au");
-
choiceType.addItem("audio/aiff");
-
choiceType.addItem("audio/wav");
-
choiceType.addItem("video/mpeg");
-
choiceType.addItem("video/x-avi");
-
panelType.add(choiceType);
-
panelEntry.add("South", panelType);
-
-
panelMain.add("North", panelEntry);
-
-
Panel panelListButtons = new Panel();
-
panelListButtons.setLayout(new BorderLayout(5, 5));
-
-
Panel panelList = new Panel();
-
panelList.setLayout(new BorderLayout(5, 5));
-
Label labelResults = new Label("Search results");
-
panelList.add("North", labelResults);
-
Panel panelListCurrent = new Panel();
-
panelListCurrent.setLayout(new BorderLayout(5, 5));
-
listMatches = new List(10);
-
panelListCurrent.add("North", listMatches);
-
labelStatus = new Label("");
-
panelListCurrent.add("South", labelStatus);
-
panelList.add("South", panelListCurrent);
-
-
panelListButtons.add("North", panelList);
-
-
Panel panelButtons = new Panel();
-
Button buttonSearch = new Button(SEARCH);
-
buttonSearch.addActionListener(this);
-
panelButtons.add(buttonSearch);
-
Button buttonStop = new Button(STOP);
-
buttonStop.addActionListener(this);
-
panelButtons.add(buttonStop);
-
-
panelListButtons.add("South", panelButtons);
-
-
panelMain.add("South", panelListButtons);
-
-
add(panelMain);
-
setVisible(true);
-
-
repaint();
-
-
vectorToSearch = new Vector();
-
vectorSearched = new Vector();
-
vectorMatches = new Vector();
-
-
URLConnection.setDefaultAllowUserInteraction(false );
-
}
-
-
public void start() {
-
}
-
-
public void stop() {
-
if (searchThread != null) {
-
setStatus("stopping...");
-
searchThread = null;
-
}
-
}
-
-
public void destroy() {
-
}
-
-
-
public void paint(Graphics g) {
-
g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);
-
-
panelMain.paint(g);
-
panelMain.paintComponents(g);
-
}
-
-
public void run() {
-
String strURL = textURL.getText();
-
String strTargetType = choiceType.getSelectedItem();
-
int numberSearched = 0;
-
int numberFound = 0;
-
-
if (strURL.length() == 0) {
-
setStatus("ERROR: must enter a starting URL");
-
return;
-
}
-
-
vectorToSearch.removeAllElements();
-
vectorSearched.removeAllElements();
-
vectorMatches.removeAllElements();
-
listMatches.removeAll();
-
-
vectorToSearch.addElement(strURL);
-
-
while ((vectorToSearch.size() > 0)
-
&& (Thread.currentThread() == searchThread)) {
-
strURL = (String) vectorToSearch.elementAt(0);
-
-
setStatus("searching " + strURL);
-
-
URL url;
-
try {
-
url = new URL(strURL);
-
} catch (MalformedURLException e) {
-
setStatus("ERROR: invalid URL " + strURL);
-
break;
-
}
-
-
vectorToSearch.removeElementAt(0);
-
vectorSearched.addElement(strURL);
-
-
if (url.getProtocol().compareTo("http") != 0)
-
break;
-
-
-
try {
-
URLConnection urlConnection = url.openConnection();
-
-
urlConnection.setAllowUserInteraction(false);
-
-
InputStream urlStream = url.openStream();
-
String type
-
= urlConnection.guessContentTypeFromStream(urlStream );
-
if (type == null)
-
break;
-
if (type.compareTo("text/html") != 0)
-
break;
-
-
byte b[] = new byte[1000];
-
int numRead = urlStream.read(b);
-
String content = new String(b, 0, numRead);
-
while (numRead != -1) {
-
if (Thread.currentThread() != searchThread)
-
break;
-
numRead = urlStream.read(b);
-
if (numRead != -1) {
-
String newContent = new String(b, 0, numRead);
-
content += newContent;
-
}
-
}
-
urlStream.close();
-
-
if (Thread.currentThread() != searchThread)
-
break;
-
-
String lowerCaseContent = content.toLowerCase();
-
-
int index = 0;
-
while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
-
{
-
if ((index = lowerCaseContent.indexOf("href", index)) == -1)
-
break;
-
if ((index = lowerCaseContent.indexOf("=", index)) == -1)
-
break;
-
-
if (Thread.currentThread() != searchThread)
-
break;
-
-
index++;
-
String remaining = content.substring(index);
-
-
StringTokenizer st
-
= new StringTokenizer(remaining, "\t\n\r\">#");
-
String strLink = st.nextToken();
-
-
URL urlLink;
-
try {
-
urlLink = new URL(url, strLink);
-
strLink = urlLink.toString();
-
} catch (MalformedURLException e) {
-
setStatus("ERROR: bad URL " + strLink);
-
continue;
-
}
-
-
if (urlLink.getProtocol().compareTo("http") != 0)
-
break;
-
-
if (Thread.currentThread() != searchThread)
-
break;
-
-
try {
-
URLConnection urlLinkConnection
-
= urlLink.openConnection();
-
urlLinkConnection.setAllowUserInteraction(false);
-
InputStream linkStream = urlLink.openStream();
-
String strType
-
= urlLinkConnection.guessContentTypeFromStream(linkStream);
-
linkStream.close();
-
-
if (strType == null)
-
break;
-
if (strType.compareTo("text/html") == 0) {
-
if ((!vectorSearched.contains(strLink))
-
&& (!vectorToSearch.contains(strLink))) {
-
-
}
-
}
-
-
if (strType.compareTo(strTargetType) == 0) {
-
if (vectorMatches.contains(strLink) == false) {
-
listMatches.add(strLink);
-
vectorMatches.addElement(strLink);
-
numberFound++;
-
if (numberFound >= SEARCH_LIMIT)
-
break;
-
}
-
}
-
} catch (IOException e) {
-
setStatus("ERROR: couldn't open URL " + strLink);
-
continue;
-
}
-
}
-
} catch (IOException e) {
-
setStatus("ERROR: couldn't open URL " + strURL);
-
break;
-
}
-
-
numberSearched++;
-
if (numberSearched >= SEARCH_LIMIT)
-
break;
-
}
-
-
if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
-
setStatus("reached search limit of " + SEARCH_LIMIT);
-
else
-
setStatus("done");
-
searchThread = null;
-
-
}
-
-
void setStatus(String status) {
-
labelStatus.setText(status);
-
}
-
-
public void actionPerformed(ActionEvent event) {
-
String command = event.getActionCommand();
-
-
if (command.compareTo(SEARCH) == 0) {
-
setStatus("searching...");
-
-
if (searchThread == null) {
-
searchThread = new Thread(this);
-
}
-
searchThread.start();
-
}
-
else if (command.compareTo(STOP) == 0) {
-
stop();
-
}
-
}
-
public static void main (String argv[])
-
{
-
Frame f = new Frame("My Crawler");
-
WebCrawler applet = new WebCrawler();
-
f.add("Center", applet);
-
-
/* Behind a firewall set your proxy and port here!
-
*/
-
String prox = "192.168.16.230";
-
Properties props= new Properties(System.getProperties());
-
props.put("http.proxySet","true");
-
props.put("http.proxyHost", "prox");
-
props.put("http.proxyPort", "8080");
-
-
-
Properties newprops = new Properties(props);
-
System.setProperties(newprops);
-
-
-
-
applet.init();
-
applet.start();
-
f.pack();
-
f.show();
-
}
-
-
}
-
-
-
Try this. I just removed one space that was giving an error and it worked for me. If it does not work post the error that you get.
well i got this code from java.sun.com and tried modiifying it in all the possible ways,but to no good.. stil its not workin..pleas help me out and try postin good workinw web cralwer if u have.. need help asap...
import java.applet.Applet;
import java.text.*;
import java.awt.*;
import java.awt.List;
import java.awt.event.*;
import java.util.*;
import java.net.*;
import java.io.*;
public class WebCrawler extends Applet implements ActionListener, Runnable {
public static final String SEARCH = "Search";
public static final String STOP = "Stop";
public static final String DISALLOW = "Disallow:";
public static final int SEARCH_LIMIT = 50;
Panel panelMain;
List listMatches;
Label labelStatus;
Vector vectorToSearch;
Vector vectorSearched;
Vector vectorMatches;
Thread searchThread;
TextField textURL;
Choice choiceType;
public void init() {
panelMain = new Panel();
panelMain.setLayout(new BorderLayout(5, 5));
Panel panelEntry = new Panel();
panelEntry.setLayout(new BorderLayout(5, 5));
Panel panelURL = new Panel();
panelURL.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelURL = new Label("Starting URL: ", Label.RIGHT);
panelURL.add(labelURL);
textURL = new TextField("", 40);
panelURL.add(textURL);
panelEntry.add("North", panelURL);
Panel panelType = new Panel();
panelType.setLayout(new FlowLayout(FlowLayout.LEFT, 5, 5));
Label labelType = new Label("Content type: ", Label.RIGHT);
panelType.add(labelType);
choiceType = new Choice();
choiceType.addItem("text/html");
choiceType.addItem("audio/basic");
choiceType.addItem("audio/au");
choiceType.addItem("audio/aiff");
choiceType.addItem("audio/wav");
choiceType.addItem("video/mpeg");
choiceType.addItem("video/x-avi");
panelType.add(choiceType);
panelEntry.add("South", panelType);
panelMain.add("North", panelEntry);
Panel panelListButtons = new Panel();
panelListButtons.setLayout(new BorderLayout(5, 5));
Panel panelList = new Panel();
panelList.setLayout(new BorderLayout(5, 5));
Label labelResults = new Label("Search results");
panelList.add("North", labelResults);
Panel panelListCurrent = new Panel();
panelListCurrent.setLayout(new BorderLayout(5, 5));
listMatches = new List(10);
panelListCurrent.add("North", listMatches);
labelStatus = new Label("");
panelListCurrent.add("South", labelStatus);
panelList.add("South", panelListCurrent);
panelListButtons.add("North", panelList);
Panel panelButtons = new Panel();
Button buttonSearch = new Button(SEARCH);
buttonSearch.addActionListener(this);
panelButtons.add(buttonSearch);
Button buttonStop = new Button(STOP);
buttonStop.addActionListener(this);
panelButtons.add(buttonStop);
panelListButtons.add("South", panelButtons);
panelMain.add("South", panelListButtons);
add(panelMain);
setVisible(true);
repaint();
vectorToSearch = new Vector();
vectorSearched = new Vector();
vectorMatches = new Vector();
URLConnection.setDefaultAllowUserInteraction(false );
}
public void start() {
}
public void stop() {
if (searchThread != null) {
setStatus("stopping...");
searchThread = null;
}
}
public void destroy() {
}
public void paint(Graphics g) {
g.drawRect(0, 0, getSize().width - 1, getSize().height - 1);
panelMain.paint(g);
panelMain.paintComponents(g);
}
public void run() {
String strURL = textURL.getText();
String strTargetType = choiceType.getSelectedItem();
int numberSearched = 0;
int numberFound = 0;
if (strURL.length() == 0) {
setStatus("ERROR: must enter a starting URL");
return;
}
vectorToSearch.removeAllElements();
vectorSearched.removeAllElements();
vectorMatches.removeAllElements();
listMatches.removeAll();
vectorToSearch.addElement(strURL);
while ((vectorToSearch.size() > 0)
&& (Thread.currentThread() == searchThread)) {
strURL = (String) vectorToSearch.elementAt(0);
setStatus("searching " + strURL);
URL url;
try {
url = new URL(strURL);
} catch (MalformedURLException e) {
setStatus("ERROR: invalid URL " + strURL);
break;
}
vectorToSearch.removeElementAt(0);
vectorSearched.addElement(strURL);
if (url.getProtocol().compareTo("http") != 0)
break;
try {
URLConnection urlConnection = url.openConnection();
urlConnection.setAllowUserInteraction(false);
InputStream urlStream = url.openStream();
String type
= urlConnection.guessContentTypeFromStream(urlStream );
if (type == null)
break;
if (type.compareTo("text/html") != 0)
break;
byte b[] = new byte[1000];
int numRead = urlStream.read(b);
String content = new String(b, 0, numRead);
while (numRead != -1) {
if (Thread.currentThread() != searchThread)
break;
numRead = urlStream.read(b);
if (numRead != -1) {
String newContent = new String(b, 0, numRead);
content += newContent;
}
}
urlStream.close();
if (Thread.currentThread() != searchThread)
break;
String lowerCaseContent = content.toLowerCase();
int index = 0;
while ((index = lowerCaseContent.indexOf("<a", index)) != -1)
{
if ((index = lowerCaseContent.indexOf("href", index)) == -1)
break;
if ((index = lowerCaseContent.indexOf("=", index)) == -1)
break;
if (Thread.currentThread() != searchThread)
break;
index++;
String remaining = content.substring(index);
StringTokenizer st
= new StringTokenizer(remaining, "\t\n\r\">#");
String strLink = st.nextToken();
URL urlLink;
try {
urlLink = new URL(url, strLink);
strLink = urlLink.toString();
} catch (MalformedURLException e) {
setStatus("ERROR: bad URL " + strLink);
continue;
}
if (urlLink.getProtocol().compareTo("http") != 0)
break;
if (Thread.currentThread() != searchThread)
break;
try {
URLConnection urlLinkConnection
= urlLink.openConnection();
urlLinkConnection.setAllowUserInteraction(false);
InputStream linkStream = urlLink.openStream();
String strType
= urlLinkConnection.guessContentTypeFromStream(linkS tream);
linkStream.close();
if (strType == null)
break;
if (strType.compareTo("text/html") == 0) {
if ((!vectorSearched.contains(strLink))
&& (!vectorToSearch.contains(strLink))) {
}
}
if (strType.compareTo(strTargetType) == 0) {
if (vectorMatches.contains(strLink) == false) {
listMatches.add(strLink);
vectorMatches.addElement(strLink);
numberFound++;
if (numberFound >= SEARCH_LIMIT)
break;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strLink);
continue;
}
}
} catch (IOException e) {
setStatus("ERROR: couldn't open URL " + strURL);
break;
}
numberSearched++;
if (numberSearched >= SEARCH_LIMIT)
break;
}
if (numberSearched >= SEARCH_LIMIT || numberFound >= SEARCH_LIMIT)
setStatus("reached search limit of " + SEARCH_LIMIT);
else
setStatus("done");
searchThread = null;
}
void setStatus(String status) {
labelStatus.setText(status);
}
public void actionPerformed(ActionEvent event) {
String command = event.getActionCommand();
if (command.compareTo(SEARCH) == 0) {
setStatus("searching...");
if (searchThread == null) {
searchThread = new Thread(this);
}
searchThread.start();
}
else if (command.compareTo(STOP) == 0) {
stop();
}
}
public static void main (String argv[])
{
Frame f = new Frame("My Crawler");
WebCrawler applet = new WebCrawler();
f.add("Center", applet);
/* Behind a firewall set your proxy and port here!
*/
String prox = "192.168.16.230";
Properties props= new Properties(System.getProperties());
props.put("http.proxySet","true");
props.put("http.proxyHost", "prox");
props.put("http.proxyPort", "8080");
Properties newprops = new Properties(props);
System.setProperties(newprops);
applet.init();
applet.start();
f.pack();
f.show();
}
}
Please do not double post
Sign in to post your reply or Sign up for a free account.
Similar topics
by: Metropolis |
last post by:
Hello All,
I am currently trying to teach a web crawler how to identify blogs,
that is I am trying to determine a fairly inclusive set of criteria
that will help my crawler to identify them.
...
|
by: Benjamin Lefevre |
last post by:
I am currently developping a web crawler, mainly crawling mobile page (wml,
mobile xhtml) but not only (also html/xml/...), and I ask myself which speed
I can reach.
This crawler is developped in...
|
by: Steve Ocsic |
last post by:
Hi,
I've coded a basic crawler where by you enter the URL and it will then
crawl the said URL. What I would like to do now is to take it one
step further and do the following:
1. pick up the...
|
by: Nicolas |
last post by:
I need HELP!!!!!
The crawler (Google or other) don't index my web site unless the web site is
currently visited
If there is nobody visiting those .aspx page therefor activating the aspnet
no...
|
by: Bill |
last post by:
Has anyone used/tested Request.Browser.Crawler ? Is it reliable, or are there false
positives/negatives?
Thanks!
|
by: abhinav |
last post by:
Hi guys.I have to implement a topical crawler as a part of my
project.What language should i implement
C or Python?Python though has fast development cycle but my concern is
speed also.I want to...
|
by: Petrosa |
last post by:
Hey all,
I have a project that i need to make a web crawler to find links in a website, and then represent the site's structure in a 3D tree. I have found an example at...
|
by: kishorealla |
last post by:
Hello
I need to create a web bot/crawler/spider that would go into different web sites and collect data for us and store in a database. The crawler needs to 'READ' the options on a website (either...
|
by: sonich |
last post by:
I need simple web crawler,
I found Ruya, but it's seems not currently maintained.
Does anybody know good web crawler on python or with python interface?
|
by: Naresh1 |
last post by:
What is WebLogic Admin Training?
WebLogic Admin Training is a specialized program designed to equip individuals with the skills and knowledge required to effectively administer and manage Oracle...
|
by: WisdomUfot |
last post by:
It's an interesting question you've got about how Gmail hides the HTTP referrer when a link in an email is clicked. While I don't have the specific technical details, Gmail likely implements measures...
|
by: Oralloy |
last post by:
Hello Folks,
I am trying to hook up a CPU which I designed using SystemC to I/O pins on an FPGA.
My problem (spelled failure) is with the synthesis of my design into a bitstream, not the C++...
|
by: BLUEPANDA |
last post by:
At BluePanda Dev, we're passionate about building high-quality software and sharing our knowledge with the community. That's why we've created a SaaS starter kit that's not only easy to use but also...
|
by: Ricardo de Mila |
last post by:
Dear people, good afternoon...
I have a form in msAccess with lots of controls and a specific routine must be triggered if the mouse_down event happens in any control.
Than I need to discover what...
|
by: Johno34 |
last post by:
I have this click event on my form. It speaks to a Datasheet Subform
Private Sub Command260_Click()
Dim r As DAO.Recordset
Set r = Form_frmABCD.Form.RecordsetClone
r.MoveFirst
Do
If...
|
by: ezappsrUS |
last post by:
Hi,
I wonder if someone knows where I am going wrong below. I have a continuous form and two labels where only one would be visible depending on the checkbox being checked or not. Below is the...
|
by: jack2019x |
last post by:
hello, Is there code or static lib for hook swapchain present?
I wanna hook dxgi swapchain present for dx11 and dx9.
|
by: F22F35 |
last post by:
I am a newbie to Access (most programming for that matter). I need help in creating an Access database that keeps the history of each user in a database. For example, a user might have lesson 1 sent...
| |