<?php
require_once("spiderClass.php");
getSport();
exit;
function getSport ($strSport="/football/", $strDetail1="/middlesbrough|boro\b/", $strDetail2="/prem/"){
$strStartURL = "http://www.bbc.co.uk";
$arrLinksRegex = array(1 => array("/sport/"), 2 => array($strSport, $strDetail1, $strDetail2), 3 => array($strDetail1, $strDetail2), 3 => array($strDetail1));
$objSportSpider = new spiderScraper;
$objSportSpider -> spiderStart($strStartURL);
$objSportSpider -> arrLinksRegex = $arrLinksRegex;
$objSportSpider -> intCrawlDepth = 4;
for ($i = 1; $i <= 50; $i++) {
$timePrev = $objSportSpider->timeLapsed;
$arrFetchedPage = $objSportSpider -> spiderNextPage();
if($arrFetchedPage["error"]>0){
echo "<br>Error: ".$arrFetchedPage["errortext"];
} else {
echo $i.": Depth: ".$objSportSpider->intCurrentDepth." -Seq: ".$objSportSpider->intCurrentSequence." ".($objSportSpider->timeLapsed - $timePrev)."secs - ";
echo " URL: ".$arrFetchedPage[0]."<br><hr>";
echo "<br>";
if(array_key_exists(1,$arrFetchedPage) && isset($arrFetchedPage[1])){
if(preg_match($strDetail1,$arrFetchedPage[1])>0){
echo $arrFetchedPage[1]."<br><hr>";
}
}
}
}
echo "total time: ".$objSportSpider->timeLapsed." secs<br>";
} // end function
?>
|