Login   Register  
PHP Classes
elePHPant
Icontem

File: spiderExamp.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of greg jackson  >  Spider Class  >  spiderExamp.php  >  Download  
File: spiderExamp.php
Role: Example script
Content type: text/plain
Description: Example usage - NOTE: BE KIND TO THE BBC AND DO NOT RUN THIS WITHOUT CHANGING THE PARAMETERS
Class: Spider Class
Crawl a site following and retrieving linked pages
Author: By
Last change: Removed links to unnecessary scripts
Date: 2005-11-01 02:41
Size: 1,297 bytes
 

Contents

Class file image Download
<?php

require_once("spiderClass.php");

getSport();

exit;

function 
getSport ($strSport="/football/"$strDetail1="/middlesbrough|boro\b/"$strDetail2="/prem/"){
    
$strStartURL "http://www.bbc.co.uk";
    
$arrLinksRegex = array(=> array("/sport/"), => array($strSport$strDetail1$strDetail2), => array($strDetail1$strDetail2), => array($strDetail1));
    
$objSportSpider = new spiderScraper;
    
$objSportSpider -> spiderStart($strStartURL);
    
$objSportSpider -> arrLinksRegex $arrLinksRegex;
    
$objSportSpider -> intCrawlDepth 4;

    for (
$i 1$i <= 50$i++) {
        
$timePrev $objSportSpider->timeLapsed;
        
$arrFetchedPage $objSportSpider -> spiderNextPage();
        if(
$arrFetchedPage["error"]>0){
            echo 
"<br>Error: ".$arrFetchedPage["errortext"];
        } else {
            echo 
$i.": Depth: ".$objSportSpider->intCurrentDepth." -Seq: ".$objSportSpider->intCurrentSequence." ".($objSportSpider->timeLapsed $timePrev)."secs - ";
            echo 
" URL: ".$arrFetchedPage[0]."<br><hr>";
            echo 
"<br>";
            if(
array_key_exists(1,$arrFetchedPage) && isset($arrFetchedPage[1])){
                if(
preg_match($strDetail1,$arrFetchedPage[1])>0){
                    echo 
$arrFetchedPage[1]."<br><hr>";    
                }
            }
        }
    }
    echo 
"total time: ".$objSportSpider->timeLapsed." secs<br>";
// end function
?>