Hello,
I'm trying to scrape daily titles from http://www.doopes.com/?cat=35444&lan...xc=&inc=&opt=0
But i'm getting lost using preg_match. Can someone help me with this script?
Thanks in advance!
[php]
[Enter code here]<?php
$today = date("Y-m-d");
// Get page
$url = "http://www.doopes.com/?cat=35444&lang=1&num=5&mode=0&from=$today&to=$tod ay&exc=&inc=&opt=0";
//$data = implode("", file($url));
$ch = curl_init();
$timeout = 5;
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$data = curl_exec($ch);
curl_close($ch);
// Get content items
preg_match_all ("/<tbody>([^`]*?)<\/table>/", $data, $matches);
// Begin feed
header ("Content-Type: text/xml; charset=ISO-8859-1");
echo "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n";
?>
<rss version="2.0"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:admin="http://webns.net/mvcb/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<channel>
<title>Latest Scene Releases</title>
<description>Scene releases of <?php echo $today;?> provided by http://flx-tech.net</description>
<link>http://www.flx-tech.net</link>
<language>en-us</language>
<?
// Loop through each content item
foreach ($matches[0] as $match) {
// First, get title
preg_match ("/<td>([^`]*?)<\/td/", $match, $temp);
$title = $temp['1'];
$title = strip_tags($title);
$title = trim($title);
// Second, get url
preg_match ("/<a href=\"([^`]*?)\">/", $match, $temp);
$url = $temp['1'];
$url = trim($url);
// Third, get text
preg_match ("/<p>([^`]*?)<span class=\"byline\">/", $match, $temp);
$text = $temp['1'];
$text = trim($text);
// Fourth, and finally, get author
preg_match ("/<span class=\"byline\">By ([^`]*?)<\/span>/", $match, $temp);
$author = $temp['1'];
$author = trim($author);
// Echo RSS XML
echo "<item>\n";
echo "\t\t\t<title>" . strip_tags($title) . "</title>\n";
echo "\t\t\t<link>http://www.phpit.net" . strip_tags($url) . "</link>\n";
echo "\t\t\t<description>" . strip_tags($text) . "</description>\n";
echo "\t\t\t<content:encoded><![CDATA[ \n";
echo $text . "\n";
echo " ]]></content:encoded>\n";
echo "\t\t\t<dc:creator>" . strip_tags($author) . "</dc:creator>\n";
echo "\t\t</item>\n";
}
?>
</channel>
</rss>
[/php]