I have 2 code like below to grab a news website for my site. However,
when I click some links (such as : http://wwww.vnexpress.net/xxx/xxxx )
inside the site which I want to grab, it has some errors. Can any body
help me ??
Demo : news.thuthao.info
The real website : vnexpress.net
---------------------------index.php-------------------------------
<?php
function grabData($source_to_grab, $delimiter_start, $delimiter_stop,
$str_to_replace='', $str_replace='', $extra_data='') {
$fd = ""; $start_pos = $end_pos = 0;
$source_to_grab = fopen($source_to_grab, "r");
while(true) {
if($end_pos > $start_pos) {
$result = substr($fd, $start_pos, $end_pos-$start_pos);
$result .= $delimiter_stop;
break;
}//10
$data = fread($source_to_grab, 8192);
if(strlen($data) == 0) break;
$fd .= $data;
if(!$start_pos) $start_pos = strpos($fd, $delimiter_start);
if($start_pos) $end_pos = strpos(substr($fd, $start_pos),
$delimiter_stop) + $start_pos;
}
fclose($source_to_grab);
return str_replace($str_to_replace, $str_replace, $extra_data.$result);
}//19
$url = "http://vnexpress.net/Vietnam/Home/";
$delimiter_start = '<table width="100%" cellspacing=0 cellpadding=0
border=0><tr bgcolor="#CCCCCC">';
$delimiter_stop = '<td width=210 valign=top><A
href="/Vietnam/Home/buuthiep.gif" class=Normal></A>';
$web = grabData($url, $delimiter_start, $delimiter_stop, 'img src="/',
'img src="http://vnexpress.net/', '');
$web = str_replace('href="',
'href="http://www.thuthao.info/news/chitiet.php?url=', $web);
$header = '<html><head><meta http-equiv="Content-Type"
content="text/html; charset=UTF-8"><link rel="stylesheet"
href="Default.css" type="text/css"><title>NGUYEN HUYNH THU THAO
NEWS</title></head><body topmargin=3 leftmargin=0 marginheight=3
marginwidth=0>';
$footer = '</tr></table></body></html>';
$full = $header.$web.$footer;
echo '<div align=center><a href="http://news.thuthao.info">Trang
nhất</a> - <a href="http://www.thuthao.info">Trang chủ</a> - <a
href="http://forum.thuthao.info">Diá»…n Ä‘Ã*n</a></div>';
echo '<tr> </tr>';
echo $full;
------------------------------------------------------------------
--------------------------chitiet.php------------------------------
<?php
function grabData($source_to_grab, $delimiter_start, $delimiter_stop,
$str_to_replace='', $str_replace='', $extra_data='') {
$fd = ""; $start_pos = $end_pos = 0;
$source_to_grab = fopen($source_to_grab, "r");
while(true) {
if($end_pos > $start_pos) {
$result = substr($fd, $start_pos, $end_pos-$start_pos);
$result .= $delimiter_stop;
break;
}//10
$data = fread($source_to_grab, 8192);
if(strlen($data) == 0) break;
$fd .= $data;
if(!$start_pos) $start_pos = strpos($fd, $delimiter_start);
if($start_pos) $end_pos = strpos(substr($fd, $start_pos),
$delimiter_stop) + $start_pos;
}
fclose($source_to_grab);
return str_replace($str_to_replace, $str_replace, $extra_data.$result);
}//19
$url = 'http://vnexpress.net'.$url;
$begin1 = '<table id="CContainer" border=0 cellpadding=0 cellspacing=0
width="100%">';
$begin2 = '<table width="100%" cellspacing=0 cellpadding=0 border=0>';
$delimiter_stop = '</ul>';
$web = grabData($url, $begin1, $delimiter_stop, '', '', '');
if (strlen($web) == 0) $web = grabData($url, $begin2 , $delimiter_stop,
'', '', '');
$web = str_replace('src="','src="'.$url.'/',$web);
$web =
str_replace('src="'.$url.'//','src="http://vnexpress.net/',$web);
$web = str_replace('href="',
'href="http://www.thuthao.info/news/chitiet.php?url=', $web);
$web =
str_replace('href="www.thuthao.info/news/chitiet.php?url=javascript:history.go(-1)',
'href="javascript:history.go(-1)', $web);
$header = '<html><head><meta http-equiv="Content-Type"
content="text/html; charset=UTF-8"><link rel="stylesheet"
href="Default.css" type="text/css"><title>NGUYEN HUYNH THU THAO -
NEWS</title></head><body topmargin=3 leftmargin=0 marginheight=3
marginwidth=0>';
$footer = '</td></tr><tr><td align="center"
nowrap></td></tr></table></body></html>';
$full = $header.$web.$footer;
echo '<div align=center><a href="http://news.thuthao.info">Trang
nhất</a> - <a href="http://www.thuthao.info">Trang chủ</a> - <a
href="http://forum.thuthao.info">Diá»…n Ä‘Ã*n</a></div>';
echo '<tr> </tr>';
echo $full;
--------------------------------------------------------------------