<?php
/**
* Apache access_log file parser.
*
* @author Rolands Kusiņš
* @license GPL
*
*/
class ApacheAccessLogParser{
// Regex patterns for log file format
private $patterns = array(
'%h' => '(?P<ip>\S+)',// IP address of client
'%l' => '(?P<identity>\S+)',// Identity of user determined by identd
'%u' => '(?P<username>\S+)',// User name determined by HTTP authentication
'%t' => '(?P<datetime>\S+:\d+:\d+:\d+ \+\S+)',// Time the server finished processing request (17/Jan/2014:04:12:06 +0000)
'%r' => '(?P<request>(\s*\S+\s*)|(\s*\S+\s*\S+\s*)|(\s*\S+\s*\S+\s*\S+\s*))',// Request from client ("GET / HTTP/1.1")
'%s' => '(?P<statuscode>\S+)',// HTTP status code sent from server to client (200, 400, 403, etc)
'%b' => '(?P<size>\S+)',// Size of response sent to client in bytes
'%v' => '(?P<referer>\S+)',// Referer, page that sent to this URL
'%i' => '(?P<agent>.*?)',// User agent identification string
);
// File format
private $format = "%h %l %u \[%t\] \"%r\" %s %b";
// Object for log file writing
public $log = null;
// Suspicious patterns
public $suspiciousPatterns = array();
// Parsed line data
private $data = array();
/**
* Check Apache access log file for new entries and match against patterns
*
* @param array $apacheAccessLogFile with information about Apache access log file (path, offset, format)
* @param array $ipInfo with information about suspicious IP addresses
* @param boolean $updateHostData will be updated to true if $ipInfo is updated
* @param boolean $updateOffsets will be updated to true if new lines were parsed
* @return integer with suspicious activity pattern match count
*/
public function parseFile(&$apacheAccessLogFile, &$ipInfo, &$updateHostData, &$updateOffsets){
$newMatchCount = 0;
// Reset offset if file size has reduced (truncated)
$fileSize = filesize($apacheAccessLogFile['path']);
if($fileSize < $apacheAccessLogFile['offset']){
$apacheAccessLogFile['offset'] = 0;
}
// Open apache access log file for reading
$f = @fopen($apacheAccessLogFile['path'],"r");
if($f){
// Seek to last position we know
fseek($f, $apacheAccessLogFile['offset']);
// Read new lines until end of file
while(!feof($f)){
// Read line
$line = @fgets($f,4096);
if($line !== false){
$line = trim($line);
// Update parser with current file line format
if(isset($apacheAccessLogFile['format'])) $this->format = $apacheAccessLogFile['format'];
// If we are able to parse a line
if($this->parseLine($line) == true){
// If we match suspicious pattern
if($this->matchSuspiciousPatterns() == true){
// Init count for ip if it is first time we see it
if(!isset($ipInfo[$this->data['ip']])) $ipInfo[$this->data['ip']] = array(
'count' => 0,
);
// Increase pattern match count
$ipInfo[$this->data['ip']]['count']++;
// Try parsing time of request
$time = strtotime($this->data['datetime']);
if($time != false && (!isset($ipInfo[$this->data['ip']]['lastactivity']) || $ipInfo[$this->data['ip']]['lastactivity'] < $time)) $ipInfo[$this->data['ip']]['lastactivity'] = $time;
// We need to update host data, because we changed IP match count
$updateHostData = true;
// We found new match against pattern
$newMatchCount++;
}
} else{
// Output filename and line that we were unable to parse, this might later be unnecesarry spam, but for development&testing it helps
$this->log->write("Unable to parse line! ".$apacheAccessLogFile['path'].": ".$line,"error");
}
}
// Slepp for 10 microseconds (so that we don't take all CPU resources and leave small part to other processes
usleep(10);
}
// Get current offset
$currentOffset = ftell($f);
if($apacheAccessLogFile['offset'] != $currentOffset){
// Update current offset for file
$apacheAccessLogFile['offset'] = $currentOffset;
// Because offset has changed, we need to update file data
$updateOffsets = true;
}
@fclose($f);
}
return $newMatchCount;
}
/**
* Parse single line
*
* @param string $line
* @return boolean
*/
private function parseLine($line){
// Init data
$this->data = array();
// Get keys of patterns
$tmp = array_keys($this->patterns);
// Replace format identifiers with regexp patterns to create pattern for whole line
$formatPattern = str_replace($tmp, $this->patterns, $this->format);
// Escape quotes in pattern
$formatPattern = str_replace("\"", "\\\"", $formatPattern);
$formatPattern = "/^".$formatPattern."/";
$data = array();
// Perform a match on line with format
preg_match($formatPattern, $line, $data);
// If match succeeded, then we try to get some data
if(count($data) > 0){
if(isset($data['ip'])) $this->data['ip'] = $data['ip'];
if(isset($data['identity'])) $this->data['identity'] = $data['identity'];
if(isset($data['username'])) $this->data['username'] = $data['username'];
if(isset($data['datetime'])) $this->data['datetime'] = $data['datetime'];
if(isset($data['request'])) $this->data['request'] = $data['request'];
if(isset($data['statuscode'])) $this->data['statuscode'] = $data['statuscode'];
if(isset($data['size'])) $this->data['size'] = $data['size'];
if(isset($data['referer'])) $this->data['referer'] = $data['referer'];
if(isset($data['agent'])) $this->data['agent'] = $data['agent'];
}
if(count($this->data) > 0) return true;
else return false;
}
/**
* Match patterns against request to find suspicious activities
*
* @return boolean
*/
private function matchSuspiciousPatterns(){
foreach($this->suspiciousPatterns as &$pattern){
if(isset($this->data['request'])){
if(preg_match($pattern, $this->data['request'])){
return true;
}
}
}
return false;
}
}
?>
|