<?php
/**
* pfpFileTree.inc.php
*
* tools formanipulating file trees
* @author Colin Mckinnon
*
* NB manipulating large trees of files can quickly fill up available memory - and
* this class makes deleting lots of files in a single go very easy - please be
* careful
*/
class pfpFileTree
{
// ------------------- properties --------------
/**
* @var string - canonical path this object refers to
* @access protected
*/
var $baseDir;
/**
* read-only
* @var integer - number of files affected by the last operation
* @access public - read only
*/
var $opsCompleted;
/**
* config setting
* normally if an action cannot be carried on a file/subdir
* it will cause the current recusrion to stop - setting this
* to true will make the recursion press on with the files it
* is able to access
* @var bool
* @access public
*/
var $ignoreNoDescend;
/**
* config setting
* @var bool
* @access public
*/
var $caseSensitive;
/**
* config setting - amount of memory used to stop processing (as a ratio)
* @var float
* @access public
*/
var $maxMemRatio;
/**
* config settings - how long to wait between memory checks
* @var integer
* @access public
*/
var $memFreq;
/**
* because call_user_func does not support pass-by-reference we keep
* any data at the object level
*/
var $data;
/**
* populated when something unplanned happenned
* @var string
* @access public
*/
var $error;
/**
* user callbacks - these add additional attributes to files
* populated by addCallback
* @var mixed
* @access protected
*/
var $userFn;
// -------------------- methods ------------------------
/**
* @param string baseDir - directory to consider as tree root
*
* Constructor
*/
function pfpFileTree($baseDir)
{
$this->ignoreNoDescend=false;
$this->caseSensitive=true;
if (stristr(PHP_OS, 'WIN')) {
$this->caseSensitive=false;
}
$this->data=array();
$this->baseDir=$this->fixUpRoot($baseDir);
$this->userFn=array();
$this->maxMemRatio=0.9;
$this->memFreq=50;
}
/**
* @access public
*
* clear the data stored for the current fileTree
*/
function clear()
{
clearstatcache();
$this->opsCompleted=count($this->data);
$this->data=array();
$this->error='';
}
/**
* @access protected
* @param string treeRoot - path to base directory
* @return mixed - false if failed, string otherwise
*
* utility method
*/
function fixUpRoot($treeRoot)
{
$treeRoot=realpath($treeRoot);
if (is_dir($treeRoot)) {
if (substr($treeRoot,-1,1)!=='/') {
$treeRoot.='/';
}
} else {
$this->error=$treeRoot . "is not a directory";
return false;
}
return $treeRoot;
}
/**
* @access public
* @param mixed $cb - a callback - either a function name or an array($obj, 'method')
*
* callBacks are applied to decorate each file read with additional attributes
* called with 1 argument:
* canonical path
* and should return an array with the attribute(s) as key(s)
* and the corresponding values
*/
function addCallback($cb)
{
$this->userFn[]=$cb;
}
/**
* @access public
*/
function delCallBack($cb)
{
foreach ($this->userFn as $key=>$val) {
if ($val===$cb) {
unset($this->userFn[$key]);
// don't break - there may be duplicates
}
}
}
/**
* @access public
* @param array filter - optional pattern to apply to files
* @return array - key is path relative to basedir, value is array of attributes
*
* Note that this function creates an entry point into a reseable recursion which
* uses a callback to perfrom required action on files
* i.e. will only be called once usually
* See readTreeStat method for attributes
*
* The filter array is an array with attrbutes as key names
* and conditions as values.
* Integer values (times and sizes) can be prefixed by
* + or > to extract files with a value greater than passed
* or prefixed by - or < to extract files with a value less
* otherwise those with an exact match or returned
*
* e.g.
* filter( array(
* 'name'=>'*.png',
* 'size' => '>4096',
* 'w' => false));
* will strip out everything except files with a .png extension
* and size greater than 4k which are not writeable
*/
function readTree($filter=array())
{
$this->error='';
$d=$this->baseDir;
$this->opsCompleted=0;
$this->error='';
$callback=array($this, 'readTreeWorker');
$ret=$this->runRecursion($callback,$this->baseDir,$filter);
if ($ret===true) return $this->data;
return false;
}
/**
* @access protected
*/
function readTreeWorker($f, $filter)
{
$this->opsCompleted++;
$effPath=substr($f, strlen($this->baseDir));
$this->data[$effPath]=$this->readTreeStat($f);
if (!$this->filterFile($effPath,$filter)) {
unset($this->data[$effPath]);
$this->opsCompleted--;
}
return true;
}
/**
* @access protected
* @param string $f - canonical file path
* @return array of attributes
*
* Attributes are:
* 'exists': bool
* 'size': in bytes
* 'mtime': timestamp when file last modified
* 'type': f/d/o (file/directory/other)
* 'w': bool - is the file writeable
* 'r': bool - is the file readable
*/
function readTreeStat($f)
{
$out=array();
$out['exists']=file_exists($f);
$out['size']=filesize($f);
$out['mtime']=filemtime($f);
if (is_dir($f)) {
$out['type']='d';
} else if (is_file($f) || is_link($f)) {
$out['type']='f';
} else {
$out['type']='o';
}
// the is_ fns return 1 or null on my installation???!!!!
$out['w']=is_writable($f) ? true : false;
$out['r']=is_readable($f) ? true : false;
foreach ($this->userFn as $callback) {
$out=array_merge($out, call_user_func($callback, $f));
}
return $out;
}
/**
* utility function to print the files and attributes measured
*/
function ls()
{
print "ls output:\n";
foreach ($this->data as $k=>$f) {
print "$k";
// print "$k " . json_encode($f) . "\n";
foreach ($this->data[$k] as $attr=>$val) {
print ",$attr=>$val";
}
print "\n";
}
}
/**
* This does the recursion on behalf of all the other public methods
* Note that recursion runs depth first
*/
function runRecursion($callback,$dir,$arg)
{
$dh=opendir($dir);
if (!$dh && $this->ignoreNoDescend) {
return true;
}
if (!$dh) {
$this->error='unable to read dir ' . dir;
return false;
}
while ($f=readdir($dh)) {
if (!$this->checkMem()) return false;
if (($f=='.') || ($f=='..')) continue;
if (is_dir($dir . $f)) {
if (!$this->runRecursion($callback, $dir . $f . '/', $arg)) {
return false;
}
}
if (!call_user_func($callback, $dir . $f, $arg)) return false;
}
closedir($dh);
return true;
}
/**
* @access public
* @return bool - true if successful
*
* $this->data no longer contains the filetree after this operatiion
* NB $this->data is not updated automatically - to find the current
* state of the files:
* $this->clear();
* $this->readTree()
*/
function delFiles()
{
$this->opsCompleted=0;
$this->error='';
if ($this->canDelete()) {
$this->sortFiles(array('name'=>'-'));
// ^ since we want delete cotents of a dir before the dir
foreach ($this->data as $f=>$k) {
if (!$this->data[$k]['exists']) continue;
$file=$this->baseDir . $f;
if (is_file($file) && @unlink($file)) {
$this->opsCompleted++;
$this->data[$effPath]['exists']=false;
continue;
} else if (is_dir($file) && @rmdir($file)) {
$this->opsCompleted++;
$this->data[$effPath]['exists']=false;
continue;
}
// not bothered about other file types
}
} else {
$this->error="Permissions do not allow forall files to be deleted";
return false;
}
return true;
}
/**
* @param mixed $callback - string (function name) or array (object, method) to apply
* @param mixed $arg - static data passed to callback - changes are lost
* @return bool the value returned by the callback
*
* Note that the recursion ends when the callback returns false
* the first parameter passed to $callback is the canonical file path, the
* second is $arg
* It is not possible to call-by-reference with call_user_func() in a non-
* deprecated manner
* note that if you want to modify the dataset then you need to include a
* **reference** to this pfpFileTree instance within arg
* Note also that this function returns a boolean - not the current data array
*/
function applyCallback($callback, $arg=false)
{
$this->opsCompleted=0;
$this->error='';
foreach ($this->data as $effPath=>$attrs) {
$this->opsCompleted++;
if (!call_user_func($callback, $this->baseDir . $effPath, $attrs, $arg)) {
return false;
}
}
return true;
}
/**
* @param string $otherTree
* @return array of relative filepaths with a status
*
* returns an array describing the existence of files with
* the same path relative to the other root
* Note that files in the other tree which are not
* listed in this tree are ignored
*
* status of files is:
* 0 if same in both trees
* 1 if this is different and newer,
* -1 if this is different and older
* 2 if not exists in other
* 3 if type mismatch (file/dir)
*/
function compareTree($otherTree)
{
$otherTree=$this->fixUpRoot($otherTree);
if ($otherTree===$this->baseDir) {
$this->error="Other tree is same as current";
return false;
}
if ($otherTree===false) return false;
$this->opsCompleted=0;
$this->error='';
$callback=array($this, 'compareTreeWorker');
$ret=$this->runRecursion($callback,$this->baseDir,$otherTree);
if ($ret===true) {
return $this->data;
}
return false;
}
/**
* @access protected
*/
function compareTreeWorker ($file, $otherRoot)
{
$effPath = substr($file, strlen ($this->baseDir));
if (!file_exists ($otherRoot.$effPath)) {
$this->data[$effPath]['cmp'] = 2;
return true;
}
if (is_dir($otherRoot . $effPath)) {
$type='d';
} else if (is_file($otherRoot . $effPath) || is_link($otherRoot . $effPath)) {
$type='f';
} else {
$type='o';
}
if ($type!=$this->data[$effPath]['type']) {
$this->error="file type mismatch for $effPath";
$this->data[$effPath]['cmp'] = 3;
return false;
}
if ($type=='d') {
// only the names need to match for dirs
$this->data[$effPath]['cmp'] = 0;
return true;
}
if (filesize($this->baseDir . $effPath) == filesize($otherRoot . $effPath)) {
$this->data[$effPath]['md5']=md5_file($this->baseDir . $effPath);
if ($this->data[$effPath]['md5'] == md5_file($otherRoot . $effPath)) {
$this->data[$effPath]['cmp']=0;
return true;
}
}
if (filemtime($this->baseDir . $effPath) > filemtime(otherRoot . $effPath)) {
$this->data[$effPath]['cmp']=1;
return true;
} else {
$this->data[$effPath]['cmp']=-1;
return true;
}
}
/**
* @access public
* @param string $otherTree - root of locaton to write current tree to
* @return mixed - false if one or more files not writeable
*
* Copy the current fileset to another location
*/
function writeTo($otherTree)
{
$otherTree=$this->fixupRoot($otherTree);
if (!$this->canWriteTo($otherTree)) {
return false;
}
foreach ($this->data as $k=>$f) {
if (!$this->data[$k]['exists']) continue;
if (!$this->writeToWorker($k, $f, $otherTree)) {
// this should never hapen?
return $false;
}
}
return $this->data;
}
/**
* @access protected
*/
function writeToWorker($effPath, $attrs, $other)
{
if ($attrs['type']=='o') {
$this->error="Can only process files and directories";
return false;
}
$dir=dirname($other . $effPath);
if ($attrs['type']=='d') {
$dir=$other . $effPath;
}
if (!is_dir($dir)) {
if (!mkdir($dir, umask(),true)) {
// this should never happen
$this->error="Failed to create directory $dir";
return false;
}
}
if ($attrs['type']=='f') {
// need to copy file to!
if (!copy($this->baseDir . $effPath, $other . $effPath)) {
// this should not hapen
$this->error="Copy failed for $effPath";
return false;
}
}
return true;
}
/**
* @access public
* @param string $otherTree - root of locaton to write current tree to
* @return bool - false if one or more files not writeable
*
* test if the tree can be written to another location
*/
function canWriteTo($otherTree)
{
$otherTree=$this->fixUpRoot($otherTree);
if ($otherTree===false) return false;
if ($otherTree===$this->baseDir) {
$this->error="other directory is same as current";
return false;
}
foreach ($this->data as $k=>$attrs) {
$this->canWriteToWorker($k,$otherTree);
}
foreach ($this->data as $k=>$f) {
if ($this->data[$k]['can_w']===false) return false;
}
return true;
}
/**
* @access protected
*/
function canWriteToWorker ($effPath, $otherRoot)
{
if (!$this->data[$effPath]['exists']) {
// we treat it as if we can write
$this->data[$effPath]['can_w']=true;
return true;
}
if ($this->data['type']=='o') {
// not going to try to deal with links, devices, sockets
$this->error="Can only process files and directories";
$this->data[$effPath]['can_w']=false;
return false;
}
$try=$otherRoot . $effPath;
while (strlen($try)+1>strlen($otherRoot) && !file_exists($try)) {
$try=dirname($try);
}
$this->data[$effPath]['can_w']=is_writeable($try);
return true;
}
/**
* @access public
* @return bool - true if files can be deleted
*
* checks if all the files currently referenced in $data can be
* deleted
*/
function canDelFiles()
{
foreach ($this->data as $k=>$f) {
$this->canDelWorker($this->baseDir . $k);
}
foreach ($this->data as $k=>$f) {
if ($this->data[$k]['can_d']===false) return false;
}
return true;
}
/**
* @access protected
*/
function canDelWorker($file)
{
$effPath=substr($file, strlen($this->baseDir));
if (!$this->data[$effPath]['exists']) {
// already deleted
$this->data[$effPath]['can_d']=true;
return true;
}
if (is_writeable(dirname($file))) {
$this->data[$effPath]['can_d']=true;
return true;
} else {
$this->data[$effPath]['can_d']=false;
}
return true;
}
/**
* @access public
* @param array $attr - set of attributes to match (see below)
* @return array - data after applying filter
*
* Filter the current data set based on an array of filters
* Integer values (times and sizes) can be prefixed by
* + or > to extract files with a value greater than passed
* or prefixed by - or < to extract files with a value less
* otherwise those with an exact match or returned
*
* e.g.
* filter( array(
* 'name'=>'*.png',
* 'size' => '>4096',
* 'w' => false));
* will strip out everything except files with a .png extension
* and size greater than 4k which are not writeable
*/
function filter($attr)
{
$initcount=count($this->data);
foreach($this->data as $effPath=>$dat) {
if (!$this->filterFile($effRoot, $attr)) {
unset($this->data[$effPath]);
}
}
$this->opsCompleted=$initcount-count($this->data);
return $this->data;
}
/**
* @access public
* @param array $attr
* @return object - new instance of $this
*
* like filter -but this leaves the current dataset intact
* and returns a new (filtered) instance of this class
*/
function subset($attr)
{
$result=clone $this;
$result->filter($attr);
return $result;
}
/**
* @access protected
* @param string $file - canonical file name
* @param array $filter
* @return bool - true if selected by filter
*/
function filterFile($file, $filter)
{
foreach($filter as $cmp_k=>$cmp_v) {
if (!@$this->filterAttr($file,$cmp_k, $cmp_v)) {
return false;
}
}
return true;
}
/**
* @access protected
* @param string $path - the current path being checked
* @param string $key - the attribute to check
* @param mixed $val - the value to compare against
* @return bool - true if the data for the path matches the key/val
* false if it does not match or the attribute is not populated
*/
function filterAttr($path, $key, $val)
{
if ($key=='name') {
return fnmatch($val,$path,($this->caseSensitive ? FNM_CASEFOLD : 0));
}
if (!array_key_exists($key, $this->data[$path])) {
return false;
}
switch (substr($val, 0, 1)) {
case '>':
case '+':
if ($this->data[$path][$key]<=(integer)substr($val,1)) {
return false;
}
break;
case '<':
case '-':
if ($this->data[$path][$key]>=(integer)substr($val,1)) {
return false;
}
break;
case '!':
if ($this->data[$path][$key]==substr($val,1)) {
return false;
}
break;
default:
if ($val==='true') $val=true;
if ($val==='false') $val=false;
if ($this->data[$path][$key]!=$val) {
return false;
}
break;
}
return true;
}
/**
* @access public
* @param array $attr - keys are attributes to sort on, a value of '-' sorts descending
*
* note that the order if entries in the array determines their priority in sorting
* eg
* sortFiles(array('size'=>'+','mtime'=>'-'));
* small files appear first (ascending), if 2 files have the same size, the
* newest appears first (descending)
*/
function sortFiles($attr)
{
$this->sortAttr=$attr;
if (count($attr)) {
if (!uksort($this->data, array($this,'sortWorker'))) {
$this->error="Sort failed for unknown reason";
return false;
}
}
return $this->data;
}
/**
* @access protected
*/
function sortWorker($a, $b)
{
foreach($this->sortAttr as $key=>$mode) {
if ($key=='name') {
// we are sorting using the keys
if ($this->caseSensitive) {
$cmp=strcasecmp($a, $b) * ($mode=='-' ? -1 : 1);
} else {
$cmp=strncasecmp($a, $b) * ($mode=='-' ? -1 : 1);
}
if ($cmp) return $cmp;
}
// sorting using using attributes
if ($this->data[$a][$key]===$this->data[$b][$key]) {
// same - try sorting by next attribute
continue;
}
if ($this->data[$a][$key]>$this->data[$b][$key]) {
return ($mode=='-' ? -1 : 1);
}
return ($mode=='-' ? 1 : -1);
}
return 0;
}
/**
* @access public
* @return bool
*
* uses the maxMemRatio an memFreq variables to assess
* current memory usage - to prevent ugly errors when its full
*
* memFreq determines the number of files read between full memory checks
* maxMemRatio is the ratio of used to total memory - above which
* processing should stop
*/
function checkMem()
{
static $iterations;
static $mlimit;
if (!$mlimit) {
$mlimit=ini_get('memory_limit');
$mul=1;
switch(substr($mlimit,-1)) {
case 'G':
$mul*=1024; // no break deliberate
case 'M':
$mul*=1024;
case 'K':
$mul*=1024;
$mlimit=$mul * (integer)$mlimit;
break;
default:
$mlimit=(integer)$mlimit;
break;
}
if ($mlimit<=0) {
$mlimit=4294967296; // surely big enuf?
}
}
if ($this->memFreq>$iterations) {
$iterations++;
return true;
}
$usedRatio=memory_get_usage()/$mlimit;
$iterations=0;
if ($this->maxMemRatio < $usedRatio) {
$this->error='Memory usage at ' . $usedRatio . '%';
return false;
}
return true;
}
} // end class pfpFileTree
|