Login   Register  
PHP Classes
elePHPant
Icontem

File: example5.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Alexey G. Piyanin  >  HTML SAX Parser  >  example5.php  >  Download  
File: example5.php
Role: Example script
Content type: text/plain
Description: Example #5 (get page part - news on yahoo.com)
Class: HTML SAX Parser
Parse HTML documents using regular expressions
Author: By
Last change: fix some error in paring
Date: 2006-06-18 12:28
Size: 2,053 bytes
 

Contents

Class file image Download
<?
/*
Author: Alexey G. Piyanin (e-mail: drdrzlo at mail dot ru)
Date:   Jun 7 2006
Title:  Get page part
*/
include('SAXParser.php');

function 
begin($tag,$attributes,$readSize){
  global 
$stack,$t,$isBeginNews,$news,$currentNewsIndex;
  if (!
in_array($tag,$t)) array_unshift($stack,$tag);
  if (
$isBeginNews){
    if (
$tag=='a' && join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
      
$news[$currentNewsIndex]['href'] = $attributes['href'];
    }elseif(
$currentNewsIndex>&& $tag=='table' && join('/',$stack)=='table/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
      return -
1;
    }
  }
}

function 
endTag($tag,$readSize){
  global 
$stack,$isBeginNews,$news,$currentNewsIndex;
  if (
$isBeginNews && $tag=='a' && join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
    
$currentNewsIndex++;
  }
  while(
reset($stack)!=$tag && !empty($stack)) array_shift($stack);
  
array_shift($stack);
}

function 
character($str){
  global 
$stack,$isBeginNews,$news,$currentNewsIndex;
  
//----
  
if (!$isBeginNews){
    if (
join('/',$stack)=='font/a/b/td/tr/table/td/tr/table/td/tr/table/font/center/body/html' && strtolower($str)=='in the news'$isBeginNews true// begin "In the News" part
  
}else{
    if (
join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html')
      
$news[$currentNewsIndex]['text'] = $str;
  }
}

$t = array('br','meta','img','spacer','input','base','hr','link',);
$stack = array();
$URL 'http://yahoo.com';

$isBeginNews false;

$currentNewsIndex 0;
$news = array();
$parser = new HTML_SAXParser();
$parser->initFunc('begin','endTag','character');?>
<html>
<body>
<center>Source page:<br><iframe src="<?=$URL?>" width="600" height="400" ></iframe><br><br></center>
News list (part "In the News"):<br>
<?$parser->parse($URL);
foreach(
$news as $row){?>
<a href="<?=$URL.'/'.$row['href']?>" target="_blank"><?=$row['text']?></a><br>
<?}?>
</body></html>