Login   Register  
PHP Classes
elePHPant
Icontem

File: example6.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Alexey G. Piyanin  >  HTML SAX Parser  >  example6.php  >  Download  
File: example6.php
Role: Example script
Content type: text/plain
Description: Example #6 (get wikipedia page content)
Class: HTML SAX Parser
Parse HTML documents using regular expressions
Author: By
Last change: change description
Date: 2006-06-18 12:30
Size: 1,308 bytes
 

Contents

Class file image Download
<?
/*
Author: Alexey G. Piyanin (e-mail: drdrzlo at mail dot ru)
Date:   Jun 7 2006
Title:  Get wikipedia page content
*/
include('SAXParser.php');

function 
character($str){
  global 
$isComment,$startContent,$endContent,$commentPos;
  if(!
$startContent){
    if(
$isComment && trim($str)=='start content'$startContent=true;
  }else{
    if(
$isComment && trim($str)=='end content') { $endContent=$commentPos/*return(-1);*/ }
  }
}

function 
comment($start,$pos){
  global 
$isComment,$startContent,$commentPos,$beginContent;
  
//----
  
if($startContent && !$start && $beginContent==0$beginContent=$pos+3;
  
//----
  
$isComment=$start;
  
//----
  
$commentPos=$pos;
}

$URL 'http://en.wikipedia.org/wiki/Kalimpong';
#---
$isComment    false;
$commentPos   0;
$startContent false;
#---
$beginContent 0;
$endContent   0;
#---
$parser = new HTML_SAXParser();
$parser->initFunc('','','character','comment');
#---
$content=join('',file($URL)); // ATTENTION!!! replace for correct loading content
?>
<html>
<body>
<center>Source page:<br><iframe src="<?=$URL?>" width="600" height="400" ></iframe><br><br></center>
Content:<br>
<?
$parser
->parseString($content);
//----
echo substr($content,$beginContent,$endContent-$beginContent);
?>
</body></html>