PHP Classes

File: index.php

Recommend this page to a friend!
  Classes of Jacek Lukasiewicz   Web scraper   index.php   Download  
File: index.php
Role: Example script
Content type: text/plain
Description: example using
Class: Web scraper
Extract information from Web site pages
Author: By
Last change:
Date: 12 years ago
Size: 2,818 bytes
 

Contents

Class file image Download
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
    <head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Scraper demo</title>
</head>
<body>
<?php

include 'scraper.php';

/*
 * Examples
 */


/* comment this line with // to enable example section
 // Example 1
 // Scan single page and grab data
try
{
    $scrap = new Scraper();
   
    //set url to scan
    $scrap->setBaseUrl('test.html');
   
    //definition of points where data are
    $scrap->addDataTarget('title', '#product h1');
    $scrap->addDataTarget('category', '#product #category');
    $scrap->addDataTarget('description', '#product #description');
    $scrap->addDataTarget('price', '#product #price');
   
    //run scan
    $data = $scrap->process();
   
    //show results
    var_dump($data);

}
catch (Exception $e)
{
    echo $e->getMessage();
}
//*/


///* comment this line with // to enable example section
 // Example 2
 // Scan base url with range id's
  
try
{
   
$scrap = new Scraper();
   
   
//set base url with token named ##TOKEN##
   
$scrap->setBaseUrl('http://szukaj.pl.mobile.eu/pojazdy/details.html?id=##TOKEN##');
   
   
//Set the scan range for the token
   
$scrap->addRangeScanRule(151598039, 151598042, '##TOKEN##');
   
   
//definition of points where data are
   
$scrap->addDataTarget('name', '.headline .margin h1');
   
$scrap->addDataTarget('price', '#buyerpricegross');
   
$scrap->addDataTarget('image', '#imageWrapper #thumbnailoverlay a');
   
   
   
//run
   
$data = $scrap->process();
   
   
//output
   
foreach ($data as $row)
    {
        echo
"<strong>Name:</strong> " . $row['name'] . "<br />";
        echo
"<strong>Price:</strong> " . $row['price'] . "<br />";
        echo
$row['image'] . "<br />";
        echo
"<br /> <hr /><br />";
    }
   
//var_dump($data);

}
catch (
Exception $e)
{
    echo
$e->getMessage();
}

//*/




/* comment this line with // to enable example section
 // Example 3
 // Scan list of urls
try
{
    $scrap = new Scraper();
   
    //define urls to scan
    $myUrls = array(
        'http://wlasnorecznie.boo.pl/wlasnorecznie/content/ciasteczka',
        'http://wlasnorecznie.boo.pl/wlasnorecznie/content/ziarenka-kawy',
        'http://wlasnorecznie.boo.pl/wlasnorecznie/content/pacman-3d'
    );
   
    //add urls to scraper
    $scrap->addListScanRule($myUrls);
   
    //define data points
    $scrap->addDataTarget('title', '#content .ogloszenie_item h1');
    $scrap->addDataTarget('image', '#content .ogloszenie_thumb a');
    $scrap->addDataTarget('price', '#content .ogloszenie_item:contains(\'Cena:\')');
   
    //start
    $data = $scrap->process();
   
    //show results
    var_dump($data);
}
catch (Exception $e)
{
    echo $e->getMessage();
}
//*/


?>
</body></html>