<?
/*
Author: Alexey G. Piyanin (e-mail: drdrzlo at mail dot ru)
Date: Jun 7 2006
Title: Get wikipedia page content
*/
include('SAXParser.php');
function character($str){
global $isComment,$startContent,$endContent,$commentPos;
if(!$startContent){
if($isComment && trim($str)=='start content') $startContent=true;
}else{
if($isComment && trim($str)=='end content') { $endContent=$commentPos; /*return(-1);*/ }
}
}
function comment($start,$pos){
global $isComment,$startContent,$commentPos,$beginContent;
//----
if($startContent && !$start && $beginContent==0) $beginContent=$pos+3;
//----
$isComment=$start;
//----
$commentPos=$pos;
}
$URL = 'http://en.wikipedia.org/wiki/Kalimpong';
#---
$isComment = false;
$commentPos = 0;
$startContent = false;
#---
$beginContent = 0;
$endContent = 0;
#---
$parser = new HTML_SAXParser();
$parser->initFunc('','','character','comment');
#---
$content=join('',file($URL)); // ATTENTION!!! replace for correct loading content
?>
<html>
<body>
<center>Source page:<br><iframe src="<?=$URL?>" width="600" height="400" ></iframe><br><br></center>
Content:<br>
<?
$parser->parseString($content);
//----
echo substr($content,$beginContent,$endContent-$beginContent);
?>
</body></html>
|