<?php
/* Autho: Rizwan Abbas rizwan@zeropoint.it
* Organization: zeropoint.it
* This script crawls the data from OLX only
* Currently it targets cars category and all its html elements
*
*/
ini_set('max_execution_time', 0);
error_reporting(E_ALL);
include_once('simple_html_dom.php');//open source class
include_once("scraper.php");
function crawl($searchURL,$p)
{
$collection = array();
$time = time();
$portals = 0;
$o=0;
$p=0;
$g=0;
$html = file_get_html($searchURL);
$list = $html->find('td[class=offer onclick]');
foreach($list as $ad) {
$url = $ad->find('a[class=marginright5 link linkWithHash detailsLink]',0)->href;
if($url!=''){
echo "\n\n $url \n\n";
$data = getDetails($url);
if(sizeof($data)<=0){
continue;
}
$data['attributes']['City'] = trim($ad->find('small[class=breadcrumb small] >span',0)->innertext);
$category = explode('<span>',trim($ad->find('small[class=breadcrumb small]',0)->innertext));
$category = explode(' » ',$category[0]);
$data['attributes']['Category'] = trim($category['0']);
$data['carinfo']['Category'] = trim($category['0']);
$data['carinfo']['Model'] = 'not mentioned';
$data['carinfo']['City'] = trim($data['attributes']['City']);
$data['attributes']['Manufacturer'] = trim($category['1']);
$collection[] = $data;
//die();
}
}
return $collection;
}//crawl
//crawl only olx
$categories = array('cars','motorcycles','scooters','bicycles','commercial-vehicles','parts-accessories','other-vehicles');
$portal = "olx";
$pages= 2;
$category ="dvd-cat-238";
$start = date('h:i:s',time());
for($i=1;$i<=$pages;$i++){
if($i==1){
$searchURL = "http://olx.com/$category/";
}else{
$searchURL = "http://olx.com/$category/?page=".$i;
}
$collection = crawl($searchURL,$portal);
print"<pre>";
print_r($collection);
}
$end = date('h:i:s',time());
print"Started At:".$start;
print"Ended At:".$end;
?>
|