<?
// by Diego Garrido de Almeida
// garridez@gmail.com
abstract class Surf{
const SEARCH_PATTERN = 'http://www.google.com.br/search?hl=pt-BR&q=site:#DOMAIN#+#QUERY#&start=#OFFSET#';
private $query, $domain;
public function __construct($query){
$this->setQuery($query);
}
public function getQuery(){
return urlencode(str_replace('+', ' ', $this->query));
}
public function setQuery($query){
$this->query = $query;
}
public function getDomain(){
return $this->domain;
}
public function setDomain($domain){
$this->domain = $domain;
}
public function getSearchUrl($page){
$pattern = array();
$pattern['#DOMAIN#'] = $this->getDomain();
$pattern['#QUERY#'] = $this->getQuery();
$pattern['#OFFSET#'] = ($page-1)*10;
return str_replace(array_keys($pattern), array_values($pattern), self::SEARCH_PATTERN);
}
public function getField($row){
$dsc = $row->getElementsByTagName('div');
$field = null;
foreach($dsc as $line){
if($line->getAttribute('class') == 's'){
$field = $line;
continue;
}
}
return $field;
}
public function performSearch($page = 1){
// result Object
$search = new stdClass();
$file = file_get_contents($this->getSearchUrl($page));
// html DOM
$doc = new DOMDocument();
@$doc->loadHTML($file);
// results verify
if(!$container = $doc->getElementByID('ires'))
return false;
$found = $container->getElementsByTagName('li');
$results = array();
foreach($found as $row){
if($row->getAttribute('class') == 'g'){
$file = $this->stripFile($row);
if(strlen(trim($file->name)) > 0)
$results[] = $this->stripFile($row);
}
}
$search->results = $results;
// get total pages
$navegation = $doc->getElementByID('nav');
$item = $navegation->getElementsByTagName('td')->length - 2;
$search->totalPages = $navegation->getElementsByTagName('td')->item($item)->nodeValue;
$search->page = $page;
return $search;
}
}
?>
|