<?php
// ==============================================================================
// File name : adv_wdt.php
// Version : 2011-05-07
// Author : Tomasz Malewski - varsovie@o2.pl
// License : BSD without greetings.
// Briefing : A Website Watchdog scanning webpage without RSS for update.
class adv_wdt {
function adv_wdt() {
$this->margin=0; // during insert put margin , then during diff checking if length exceed percentage length change is valid
$this->mailto=''; // during insert put mailto field in table, then e-mail notificaiton out when diff true
$this->diffupdate= true; // during diff checking update table with last seend values
$this->diffchecksum= false; // during diff compare checksum too instead of length only
$this->debug=0;
} // adv_wdt init function
/* Function curl */
/* classic curl feature it returns page content. It's most compatible solution with bypass for CURLOPT_FOLLOWLOCATION and open_basedir restriction issue */
/* For more advanced curl I suggest use snoppy.class */
function curl($url){
$limit=5; // limit page redirection, 5 is usually enough.
for ($redirect=0; $redirect < $limit; $redirect++){
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
// curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // too buggy in most php.ini with open_basedir restriction.
// echo "<hr>".$redirect.$url.$header; // DEBUG for steps in redirection
$header = curl_exec($ch);
if (curl_getinfo($ch, CURLINFO_HTTP_CODE) == 301 || curl_getinfo($ch, CURLINFO_HTTP_CODE) == 302){
preg_match('/Location:(.*?)\n/', $header, $matches);
$url = trim(array_pop($matches));
} // if 301
else {
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_URL, $url);
$header = curl_exec($ch);
curl_close($ch);
$this->curl[length]=strlen($header);
$this->curl[checksum]=md5($header);
return ($header);break;exit;
} // else for 301
// if ($redirect==$limit) break; // no needed anymore
} // redirect
echo " *** Timeout";
} // curl
function sql_connect ($sqlhost, $sqluser, $sqlpass, $sqldb, $sqltable,$sqlport=3306 ){
$this->con = mysql_connect($sqlhost.':'.$sqlport, $sqluser, $sqlpass);
mysql_select_db($sqldb,$this->con);
$this->sqldatabase = $sqldb; // needed for SQL delete feature
$this->sqltable = $sqltable;
if( !mysql_num_rows( mysql_query("SHOW TABLES LIKE '".$sqltable."'"))){
echo "ADV_WDT: *** Error: Temporary SQL table doesn't exist, check manual for adv_wdt php class to create one.<br>";return false;
} // if table exists
return true;
}
function sql_flush () { // delete everything from table !!!
mysql_query ("DELETE FROM `".$this->sqldatabase."`.`".$this->sqltable."` ") or die(mysql_error());
} // sqlflush
function sql_addURL ($url) {
$html= $this->curl ($url);
mysql_query ("insert into ".$this->sqltable." (`URL`,`active`,`changed`,checksum,length,margin,create_DT,`mailto`)
values ('".$url."','1','0','".md5($html)."','".strlen($html)."','0',NOW(),'".$this->mailto."')") or die(mysql_error());
} // sql addURL
function sql_delURL ($url) {
mysql_query ("DELETE FROM `".$this->sqldatabase."`.`".$this->sqltable."` WHERE `URL` = '".$url."' limit 1") or die(mysql_error());
} // sqldelURL
function sql_diffURL ($url) {
$sqlq1="select * from ".$this->sqltable." where `URL`='".$url."' and active=1 limit 1";
$sqlq1b=mysql_query($sqlq1) or die(mysql_error());
while($sqlq1c = mysql_fetch_assoc($sqlq1b)) {
$newhtml = $this->curl ($url);
// $margin = abs ( 100 - round ( (strlen($newhtml) / $sqlq1c[length])*100,0)); // match percentage difference between old & new HTML length
$margin_down = $sqlq1c[length] - round ( $sqlq1c[margin] * $sqlq1c[length]);
$margin_top = $sqlq1c[length] + round ( $sqlq1c[margin] * $sqlq1c[length]);
// if (strlen($newhtml)!=$sqlq1c[length]) { // replace with new line for much easier compare way :
if ( strlen($newhtml) < $margin_down || strlen($newhtml) > $margin_top || ($this->diffchecksum== true && md5($newhtml)!=$sqlq1c[checksum]) ) {
if ($this->debug==true) { echo " *** page has changed $margin len: ".$sqlq1c[length].'->'.strlen($newhtml)."<br>";} // DEBUG
// ADD mailto routine -- defualt commented to avoid mail spam during developing
// if (strlen($sqlq1c[mailto])>4) { mail ($sqlq1c[mailto],"ADV_WDT: Page Changed ","$url")};
mysql_query("update ".$this->sqltable." set changed=1,update_DT=NOW() where `URL`='".$url."' limit 1") or die(mysql_error()); // last step update record
if ($this->diffupdate== true) {
mysql_query("update ".$this->sqltable." set length='".strlen($newhtml)."',checksum='".md5($newhtml)."' where `URL`='".$url."' limit 1") or die(mysql_error()); // last step update record
} // diffupdate
return true;
} // if page changed
else
{
mysql_query("update ".$this->sqltable." set changed=0,update_DT=NOW() where `URL`='".$url."' limit 1") or die(mysql_error()); // last step update record
return false;
} // else if page changed
} // while sqlq1c
} // sqldiffURL
function sql_diffURLcron() {
$sqlq2="select URL from ".$this->sqltable." where active=1";
$sqlq2b=mysql_query($sqlq2) or die(mysql_error());
while($sqlq2c = mysql_fetch_assoc($sqlq2b)) {
// print_r ($sqlq2c);
$this->sql_diffURL ($sqlq2c[URL]);
} // sql
} // diffURL cron
function sql_activateURL ($url)
{
mysql_query("update ".$this->sqltable." set active=1,changed=0,update_DT=NOW() where `URL`='".$url."' limit 1") or die(mysql_error());
} // sqlactivateurl
function sql_deactivateURL ($url)
{
mysql_query("update ".$this->sqltable." set active=0,update_DT=NOW() where `URL`='".$url."' limit 1") or die(mysql_error());
} // sql deactivateurl
} // class adv_wdt
?>
|