<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Scraper demo</title>
</head>
<body>
<?php
include 'scraper.php';
/*
* Examples
*/
/* comment this line with // to enable example section
// Example 1
// Scan single page and grab data
try
{
$scrap = new Scraper();
//set url to scan
$scrap->setBaseUrl('test.html');
//definition of points where data are
$scrap->addDataTarget('title', '#product h1');
$scrap->addDataTarget('category', '#product #category');
$scrap->addDataTarget('description', '#product #description');
$scrap->addDataTarget('price', '#product #price');
//run scan
$data = $scrap->process();
//show results
var_dump($data);
}
catch (Exception $e)
{
echo $e->getMessage();
}
//*/
///* comment this line with // to enable example section
// Example 2
// Scan base url with range id's
try
{
$scrap = new Scraper();
//set base url with token named ##TOKEN##
$scrap->setBaseUrl('http://szukaj.pl.mobile.eu/pojazdy/details.html?id=##TOKEN##');
//Set the scan range for the token
$scrap->addRangeScanRule(151598039, 151598042, '##TOKEN##');
//definition of points where data are
$scrap->addDataTarget('name', '.headline .margin h1');
$scrap->addDataTarget('price', '#buyerpricegross');
$scrap->addDataTarget('image', '#imageWrapper #thumbnailoverlay a');
//run
$data = $scrap->process();
//output
foreach ($data as $row)
{
echo "<strong>Name:</strong> " . $row['name'] . "<br />";
echo "<strong>Price:</strong> " . $row['price'] . "<br />";
echo $row['image'] . "<br />";
echo "<br /> <hr /><br />";
}
//var_dump($data);
}
catch (Exception $e)
{
echo $e->getMessage();
}
//*/
/* comment this line with // to enable example section
// Example 3
// Scan list of urls
try
{
$scrap = new Scraper();
//define urls to scan
$myUrls = array(
'http://wlasnorecznie.boo.pl/wlasnorecznie/content/ciasteczka',
'http://wlasnorecznie.boo.pl/wlasnorecznie/content/ziarenka-kawy',
'http://wlasnorecznie.boo.pl/wlasnorecznie/content/pacman-3d'
);
//add urls to scraper
$scrap->addListScanRule($myUrls);
//define data points
$scrap->addDataTarget('title', '#content .ogloszenie_item h1');
$scrap->addDataTarget('image', '#content .ogloszenie_thumb a');
$scrap->addDataTarget('price', '#content .ogloszenie_item:contains(\'Cena:\')');
//start
$data = $scrap->process();
//show results
var_dump($data);
}
catch (Exception $e)
{
echo $e->getMessage();
}
//*/
?>
</body></html>
|