<?
/*
Author: Alexey G. Piyanin (e-mail: drdrzlo at mail dot ru)
Date: Jun 7 2006
Title: Get page part
*/
include('SAXParser.php');
function begin($tag,$attributes,$readSize){
global $stack,$t,$isBeginNews,$news,$currentNewsIndex;
if (!in_array($tag,$t)) array_unshift($stack,$tag);
if ($isBeginNews){
if ($tag=='a' && join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
$news[$currentNewsIndex]['href'] = $attributes['href'];
}elseif($currentNewsIndex>0 && $tag=='table' && join('/',$stack)=='table/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
return -1;
}
}
}
function endTag($tag,$readSize){
global $stack,$isBeginNews,$news,$currentNewsIndex;
if ($isBeginNews && $tag=='a' && join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
$currentNewsIndex++;
}
while(reset($stack)!=$tag && !empty($stack)) array_shift($stack);
array_shift($stack);
}
function character($str){
global $stack,$isBeginNews,$news,$currentNewsIndex;
//----
if (!$isBeginNews){
if (join('/',$stack)=='font/a/b/td/tr/table/td/tr/table/td/tr/table/font/center/body/html' && strtolower($str)=='in the news') $isBeginNews = true; // begin "In the News" part
}else{
if (join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html')
$news[$currentNewsIndex]['text'] = $str;
}
}
$t = array('br','meta','img','spacer','input','base','hr','link',);
$stack = array();
$URL = 'http://yahoo.com';
$isBeginNews = false;
$currentNewsIndex = 0;
$news = array();
$parser = new HTML_SAXParser();
$parser->initFunc('begin','endTag','character');?>
<html>
<body>
<center>Source page:<br><iframe src="<?=$URL?>" width="600" height="400" ></iframe><br><br></center>
News list (part "In the News"):<br>
<?$parser->parse($URL);
foreach($news as $row){?>
<a href="<?=$URL.'/'.$row['href']?>" target="_blank"><?=$row['text']?></a><br>
<?}?>
</body></html>
|