簡單來說,SRSS v0.1 和蕭易玄兄的Super RSS 服務差不多都是把網頁內容轉成RSS.(不過要自己host)
Licence : GPL (改得好記住拿出和大家分享)
Usage:
http://your-site.com/srss.php?
rss_url=http://taiwan.cnet.com/news/archives/
&rss_start_string=OTHER%20STORY%20HIGHLIGHTS
&rss_end_string=OTHER%20STORY%20HIGHLIGHTS[/url]
Todos:
Frontend
remove <img>... inside <a>
better charset detecting
Bug Report:
post here or email to me
代碼:
<?php
/* SRSS v0.1
*
* CopyRight @2004 Dick Tang (freecityhk@hotmail.com)
* Licence : GPL
*
*/
function microtime_float()
{
list($usec, $sec) = explode(" ", microtime());
return ((float)$usec + (float)$sec);
}
//$rss_title=$_REQUEST['rss_title'];
$rss_url=$_REQUEST['rss_url'];
$rss_start_string=$_REQUEST['rss_start_string'];
$rss_end_string=$_REQUEST['rss_end_string'];
$rss_url_array=parse_url($rss_url);
$rss_url_array['path']=dirname($rss_url_array['path']);
$rss_base_url=$rss_url_array['scheme'].'://'.$rss_url_array['host'].$rss_url_array['path'].'/';
$rss_site_url=$rss_url_array['scheme'].'://'.$rss_url_array['host'];
//$excpet_string=array("<img","<script");
$rss_file_content="";
$time_start = microtime_float();
//get html
$fp=fopen($rss_url, "r");
if ($fp){
while (!feof($fp)){
$rss_file_content .= fgets($fp, 4096);
}
fclose($fp);
}
$time_end = microtime_float();
$time['getfile'] = $time_end - $time_start;
$time_start = microtime_float();
$rss_file_content=str_replace("\n","",$rss_file_content);
//get title
preg_match("|<title>(.*)</title>|Ui",$rss_file_content,$out);
$rss_title=$out[1];
//get charset
preg_match("|charset=(.*)\"|Ui",$rss_file_content,$out);
$rss_charset=$out[1];
//$rss_charset="big5";
$time_end = microtime_float();
$time['preproccess'] = $time_end - $time_start;
$time_start = microtime_float();
//paser html into sp.
preg_match("|$rss_start_string(.*)$rss_end_string|U",$rss_file_content,$out);
$rss_sp_range=$out[1];
$time_end = microtime_float();
$time['intosp'] = $time_end - $time_start;
$time_start = microtime_float();
preg_match_all("|<a[^>]+>(.*)</[^>]+>|Ui",$rss_sp_range,$items, PREG_SET_ORDER);
$time_end = microtime_float();
$time['matchall'] = $time_end - $time_start;
$time_start = microtime_float();
foreach ($items as $key => $item){
if (preg_match('|href=["](.*)["]|Ui',$item[0],$out)){
$items[$key][2]=$out[1];
}
elseif (preg_match('|href=(.*)\040|Ui',$item[0],$out)){
$items[$key][2]=$out[1];
}
else {
preg_match('|href=(.*)>|Ui',$item[0],$out);
$items[$key][2]=$out[1];
}
}
$time_end = microtime_float();
$time['getlink'] = $time_end - $time_start;
$time_start = microtime_float();
header('Content-type: text/xml');
?>
<?php echo '<?xml version="1.0" encoding="' . $rss_charset . '"?>'."\n"; ?>
<?php echo '<?xml-stylesheet href="rss.css" type="text/css"?>'."\n"; ?>
<?php
//echo "xml version=\"1.0\" encoding=\"$charset\"";
//echo "xml-stylesheet href=\"rss.css\" type=\"text/css\"";
echo "<rss version=\"2.0\">"."\n";
echo "<channel>"."\n";
echo "<title>SRSS - $rss_title</title>"."\n";
echo "<link>" . $rss_url . "</link>"."\n";
echo "<description>Generated by SRSS 0.1</description>"."\n";
foreach ($items as $item){
$item['title']=$item[1];
$item['description']=$item[1];
if (substr($item[2], 0, 1)=="/"){
$item['link']=$rss_site_url . $item[2];
}
elseif (substr($item[2], 0, 4)!="http"){
$item['link']=$rss_base_url . $item[2];
}
else{
$item['link']=$item[2];
}
echo "<item>"."\n";
echo "<title>" . $item['title'] . "</title>"."\n";
echo "<link>" . $item['link'] . "</link>"."\n";
echo "<guid>" . $item['link'] . "</guid>"."\n";
echo "<pubDate>" . date("D M j G:i:s T") . "</pubDate>"."\n";
echo "<description>" . $item['description'] . "</description>"."\n";
echo "</item>"."\n";
}
echo "</channel>"."\n";
echo "</rss>"."\n";
$time_end = microtime_float();
$time['output'] = $time_end - $time_start;
echo "<!--"."\n";
echo "debug info."."\n";
print_r($time);
echo "-->";
?>