<?php
/**
* \mainpage Linear least squares fitting package
*
* \section intro Introduction
* This package will be use to perform linear least squares fitting (<a href="http://en.wikipedia.org/wiki/Linear_regression">See linear regression</a>).
* Other types of linear fitting are polynomial fitting and functional linear fitting.
*
* \section usage Usage
*
* The way this package can be use is described in several steps setting data, set degree in case of polynomial fitting or add the user defined functions,
* add more data (optional), perform fitting, get confidence intervals, goodness of fitting and interpolate value sets.
* After setting new data, degree or functions new fittings can be performed using new loaded data
*
* \subsection setdata Setting data
*
* Using the method SetData you can set new data to perform the Least squares fitting. Setting new data implies previous fitting is removed and everything is initialized
*
* \subsection setdegree Setting polynome degree
*
* In case of PolyFit class is used (polynome fitting) the degree of the polynome will be fit has to be set. For this purpose Setdegree method will be used
*
* \subsection setfunction Setting user defined functions
*
* In case of FunctionFit class is used (linear functional fitting) the array of functions will be fit has to be set. For this purpose SetFunctions method will be used
*
* \subsection adddata Adding more data
*
* In case a new fitting is going to be pewrform with extra data without losing previously set data the method AddData will be used
*
* \subsection fit Fitting
*
* Once all the necesary information has been provided the fitting will be performed using Fit method. This method will return an array with the coefficients for each variable in case
* of LinearFit class is used, for each power in case of PolyFit, for each function in case of FunctionFit
*
* \subsection conf Confidence interval for each coefficient
*
* Least squares regression has a <a href="http://stattrek.com/regression/slope-confidence-interval.aspx">related error for coefficient estimation</a>. You can get this error using ConfInterval method
*
* \subsection goodness Goodness of fitting
*
* The <a href="http://www.graphpad.com/guides/prism/6/curve-fitting/index.htm?r2_ameasureofgoodness_of_fitoflinearregression.htm">goodness of fitting</a> can be got with the r2 method
*
* \subsection getval get estimated values
*
* Once the fitting ids performed set of values can be calculated using the previously calculated coefficients using the GetValue method.
* \section license License
* Copyright (C) 2013 José Gómez López
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
**/
/**
* LinearFit
*
* @author José Gómez López <jose.gomez.lopez@gmail.com>
*
* Multivariate linear least squares fitting class
*
* This class perform multivariate linear squares fitting
* in the form y=a0+a1*x1+ ... +an*xn
* each ai are coefficients to calculte
* each xi are the independent variables
**/
class LinearFit
{
private $X;
private $Y;
private $Coeffs;
private $Conf;
private $Size;
private $Vars;
private $nY;
/**
* Initialize data
*
* Initialize internal data and prepare for next calculation
**/
protected function init()
{
$this->X=array();
$this->Y=array();
$this->Coeffs=array();
$this->Conf=array();
$this->Size=0;
$this->Vars=0;
$this->nY=0;
} // End of init()
/**
* Constructor
*
* Create a new object
**/
public function __construct ()
{
$this->init();
} // End of function __construct
/**
* Add New data
*
* Add new data to the fitting process
* @param array double $adX vector with values for each variable
* @param (array) double $dY value corresponding to $adX
**/
public function AddData ($adX, $dY)
{
if ($this->Size>0 && $this->Vars>0 && $this->Vars != count($adX))
{
throw new Exception('# of variables of stored data and input vector is different');
}
else
{
$this->Size++;
$this->Vars=count($adX);
if (is_array($dY))
{
/*
* Several fittings will be performed at the same time
*/
if ($this->nY==0)
{
$this->nY=count($dY);
}
if (count($dY) != $this->nY )
{
throw new Exception('Number of Y elements is wrong');
}
}
else
{
/*
* Only a fitting will be performed
*/
if ($this->nY==0)
{
$this->nY=1;
}
if ($this->nY != 1)
{
throw new Exception('Y must be an array');
}
}
/*
* Provided set is added
*/
$this->Y[]=$dY;
$this->X[]=$adX;
} // End of if
} // End of AddData
/**
* Set New data
*
* Set new data destroying previous data
* @param array double $adX matrix with a set of X values
* @param array double $adY vector of values corresponding to each row of $adX
**/
public function SetData($adX, $adY)
{
if (count($adX) != count($adY))
{
throw new Exception('The size of both arrays should be the same');
}
else
{
if (is_array($adX[0]))
{
$this->Size=count($adX);
$this->Vars=count($adX[0]);
$this->Y=$adY;
$this->X=$adX;
$this->Coeffs=array();
$this->Conf=array();
if ( is_array($adY[0]) )
{
$this->nY=count($adY[0]);
}
else
{
$this->nY=1;
}
}
} // End of if
} // End of setData
/**
* Two-tailed standard normal probality
*
* Returns the two-tailed standard normal probability
* corresponding to dZ
* @param double $dZ Value of normal distribution
*
* @return double two-tailed standard normal probability
**/
private function Nnorm_p($dZ)
{
$dA1 = 0.0000053830;
$dA2 = 0.0000488906;
$dA3 = 0.0000380036;
$dA4 = 0.0032776263;
$dA5 = 0.0211410061;
$dA6 = 0.0498673470;
$dZ = abs($dZ);
$dP = ((((($dA1*$dZ+$dA2)*$dZ+$dA3)*$dZ+$dA4)*$dZ+$dA5)*$dZ+$dA6)*$dZ+1;
$dP = pow($dP, -16);
return $dP;
} //End of Nnorm_p
/**
* Normal distribution value given a half-middle type probability
*
* Normal distribution value given a half-middle type probability
* @param double $dP Probability
*
* @return double Normal distribution value
**/
private function Nnorm_z($dP)
{
$dA0= 2.5066282;
$dA1=-18.6150006;
$dA2= 41.3911977;
$dA3=-25.4410605;
$dB1= -8.4735109;
$dB2= 23.0833674;
$dB3=-21.0622410;
$dB4= 3.1308291;
$dC0= -2.7871893;
$dC1= -2.2979648;
$dC2= 4.8501413;
$dC3= 2.3212128;
$dD1= 3.5438892;
$dD2= 1.6370678;
if ($dP>0.42)
{
$dR=sqrt(-log(0.5-$dP));
$dZ=((($dC3*$dR+$dC2)*$dR+$dC1)*$dR+$dC0)/(($dD2*$dR+$dD1)*$dR+1);
}
else {
$dR=$dP*$dP;
$dZ=$dP*((($dA3*$dR+$dA2)*$dR+$dA1)*$dR+$dA0)/(((($dB4*$dR+$dB3)*$dR+$dB2)*$dR+$dB1)*$dR+1);
} /* End of if */
return $dZ;
} // End of Nnorm_z
/**
* t-Student's value
*
* Hill's aprox. inverse t-Student's distribution:
* Comm. of A.C.M. Vol. 13 No.10 1970 pg 620.
* Calculates t-Student's distribution for the given
* degrees of freedom and two-tail probability.
* @param $dP Probability
* @param $iFd Degrees of freedom
*
* @return double aproximate t-Stuxdent's distribution value
**/
private function Hills_inv_t ($dP, $iFd)
{
if ($iFd == 1)
{
$dT = cos($dP*M_PI/2)/sin($dP*M_PI/2);
}
else if ($iFd == 2)
{
$dT = sqrt(2/($dP*(2 - $dP)) - 2);
}
else
{
$dA = 1/($iFd - 0.5);
$dB = 48/($dA*$dA);
$dC = ((20700*$dA/$dB - 98)*$dA - 16)*$dA + 96.36;
$dD = ((94.5/($dB + $dC) - 3)/$dB + 1)*sqrt($dA*M_PI*0.5)*$iFd;
$dX = $dD*$dP;
$dY = pow($dX, 2/$iFd);
if ($dY > 0.05 + $dA)
{
$dX = $this->Norm_z(0.5*(1 - $dP));
$dY = $dX*$dX;
if ($iFd < 5) $dC = $dC + 0.3*($iFd - 4.5)*($dX + 0.6);
$dC = (((0.05*$dD*$dX - 5)*$dX - 7)*$dX - 2)*$dX + $dB + $dC;
$dY = (((((0.4*$dY + 6.3)*$dY + 36)*$dY + 94.5)/$dC - $dY - 3)/$dB + 1)*$dX;
$dY = $dA*$dY*$dY;
if ($dY > 0.002)
{
$dY = exp($dY) - 1;
}
else $dY = 0.5*$dY*$dY + $dY;
$dT = sqrt($iFd*$dY);
} /* End of if */
else
{
$dY = ((1/((($iFd + 6)/($iFd*$dY) - 0.089*$dD - 0.822)*($iFd + 2)*3) + 0.5/($iFd + 4))*$dY - 1)*($iFd + 1)/($iFd + 2) + 1/$dY;
$dT = sqrt($iFd*$dY);
} /* End of if */
} /* End of if */
return $dT;
}
/**
* t-Student's distribution
*
* Returns an accurate t-Student's distribution to tolerance
* 0.0001 for the given degrees of freedom and two-tail
* probability.
* @param $dP Probability
* @param $iFd Degrees of freedom
*
* @return double acurrate t-Stuxdent's distribution value
**/
private function T ($dProb, $iDeg)
{
$dDiff = 1;
$dP0 = 1-$dProb;
$dP1 = $dP0;
while (abs($dDiff) > .0001)
{
$dT = $this->Hills_inv_t($dP1, $iDeg); /* initial rough value */
$dDiff = $this->T_p($dT,$iDeg) - $dP0; /* compare result with forward fn */
$dP1 -= $dDiff; /* small adjustment to p1 */
} /* End of while */
return $dT;
}
/**
* Two-tail probability
*
* Calculates two-tail probability given a t-Student's
* distribution value and degrees of freedom
* @param $dT t-Student's distribution value
* @param $iFd Degrees of freedom
*
* @return double two tailed probality
**/
private function T_p ($dT, $iFd)
{
$dAbst = abs($dT);
$dTsq = $dT*$dT;
if ($iFd== 1)
{
$dP = 1 - 2*atan($dAbst)/M_PI;
}
else if ($iFd== 2)
{
$dP = 1 - $dAbst/sqrt($dTsq + 2);
}
else if ($iFd== 3)
{
$dP = 1 - 2*(atan($dAbst/sqrt(3)) + $dAbst*sqrt(3)/($dTsq + 3))/M_PI;
}
else if ($iFd== 4)
{
$dP = 1 - $dAbst*(1 + 2/($dTsq + 4))/sqrt($dTsq + 4);
}
else {
$dZ = T_z($dAbst, $iFd);
if ($iFd>4)
{
$dP = $this->Norm_p($dZ);
}
else
{
$dP = $this->Norm_p($dZ);
} /* End of if */
} /* End of if */
return $dP;
}
/**
* Normal distribution value from t-Student's
*
* Converts a t-Student's distribution value to Normal distribution
* value given the degrees of freedom at the two-tail probability level.
* @param $dT t-Student's distribution value
* @param $iFd Degrees of freedom
*
* @return double Normal distribution value from t-Student's value
**/
private function T_z ($dT, $iFd)
{
$dA9 = $iFd - 0.5;
$dB9 = 48*$dA9*$dA9;
$dT9 = $dT*$dT/$iFd;
if ($dT9 >= 0.04)
{
$dZ8 = $dA9*log(1+$dT9);
}
else
{
$dZ8 = $dA9*(((1 - $dT9*0.75)*$dT9/3 - 0.5)*$dT9 + 1)*$dT9;
} /* End of if */
$dP7 = ((0.4*$dZ8 + 3.3)*$dZ8 + 24)*$dZ8 + 85.5;
$dB7 = 0.8*pow($dZ8, 2) + 100 + $dB9;
$dZ = (1 + (-$dP7/$dB7 + $dZ8 + 3)/$dB9)*sqrt($dZ8);
return $dZ;
}
/**
* Solve linear system
*
* Solve a linear system using Gauss-Jordan method
* @param array double input/output $adX
* @paran array double input/output $adY output the solution
*
**/
private function SolveLinear(&$adX, &$adY)
{
$iMax=0;
$dMax=0;
$dFactor=0;
$adTemp=array();
$iDim=count($adX);
if ( !is_array($adX) || !is_array($adY))
{
throw new Exception('Both inputs should be matrix');
} // End of if
if ( !is_array($adX[0]) || !is_array($adY[0]))
{
throw new Exception('Both inputs should be matrix');
} // End of if
if ( $iDim != count($adX[0]))
{
throw new Exception('X matrix should be squared');
} // End of if
if ( $iDim != count($adY))
{
throw new Exception('Both input should have the same number of rows');
} // End of if
$iSol=count($adY[0]);
for ($i=0; $i<$iDim; $i++)
{
/*************************************
* Search maximum value in the column
*************************************/
for ($j=$i, $dMax=0; $j<$iDim; $j++)
{
if ($adX[$j][$i]*$adX[$j][$i]>$dMax*$dMax)
{
$dMax=$adX[$j][$i];
$iMax=$j;
} /* End of if */
} /* End of for */
if ( $dMax*$dMax>0 )
{
/************************************************
* Put the maximum value Row in current position
************************************************/
$adTemp=$adX[$i];
$adX[$i]=$adX[$iMax];
$adX[$iMax]=$adTemp;
$adTemp=$adY[$i];
$adY[$i]=$adY[$iMax];
$adY[$iMax]=$adTemp;
/*************************************************
* Set the diagonal element to 1
*************************************************/
for ($j=0; $j<$iDim; $j++)
{
$adX[$i][$j]/=$dMax;
} /* End of for */
for ($j=0; $j<$iSol; $j++)
{
$adY[$i][$j]/=$dMax;
} /* End of for */
/****************************************************
* Set 0 to the non diagonal elements in the column
****************************************************/
for ($j=0; $j<$iDim; $j++)
{
if ($i!=$j)
{
$dFactor=$adX[$j][$i];
for ($k=$i; $k<$iDim; $k++)
{
$adX[$j][$k]-=$dFactor*$adX[$i][$k];
} /* End of for */
for ($k=0; $k<$iSol; $k++)
{
$adY[$j][$k]-=$dFactor*$adY[$i][$k];
} /* End of for */
} /* End of if */
} /* End of for */
}
else
{
/*
* Singular matrix
*/
throw new Exception('Input matrix is singular');
}
} /* End of for */
} // End of SolveLinear
/**
* Build the lesat squares matrix
*
* Build the Least Squares matrix using the X vectors
*
* @return array double Least squares matrix
**/
private function LSMatrix()
{
$adX=array();
/*
* Matrix initialization
*/
for ($i=0; $i<=$this->Vars; $i++)
{
for ($j=0; $j<=$this->Vars; $j++)
{
$adX[$i][$j]=0;
}
}
/*
* Filling the first row and column
* This matrix is symetric
*/
$adX[0][0]=$this->Size;
for ($i=0; $i<$this->Vars; $i++)
{
for ($j=0; $j<$this->Size; $j++)
{
$adX[$i+1][0]+=$this->X[$j][$i];
}
$adX[0][$i+1]=$adX[$i+1][0];
}
/*
* rest of the matrix filling
*/
for ($i=0; $i<$this->Vars; $i++)
{
for ($j=$i; $j<$this->Vars; $j++)
{
for ($k=0; $k<$this->Size; $k++)
{
$adX[$i+1][$j+1]+=$this->X[$k][$i]*$this->X[$k][$j];
}
$adX[$j+1][$i+1]=$adX[$i+1][$j+1];
}
}
return $adX;
}
/**
* Perform linear fitting
*
* perform linnear fitting process with the suplied data
* @return array double Coefficient matrix
**/
public function Fit()
{
if ($this->Size == 0)
{
throw new Exception('There is no loaded data');
}
$adX=array();
$adY=array();
/*
* Initialize result vector set
*/
for ($i=0; $i<=$this->Vars; $i++)
{
for ($j=0; $j<$this->nY; $j++)
{
$adY[$i][$j]=0;
}
}
/*
* Filling the results vector set
* from the second elemet
*/
for ($i=0; $i<$this->Vars; $i++)
{
for ($j=0; $j<$this->Size; $j++)
{
for ($k=0; $k<$this->nY; $k++)
{
$dY=($this->nY>1)?$this->Y[$j][$k]: $this->Y[$j];
$adY[$i+1][$k]+=$this->X[$j][$i]*$dY;
}
}
}
/*
* Filling the first element of
* the results vector set
*/
for ($j=0; $j<$this->Size; $j++)
{
for ($k=0; $k<$this->nY; $k++)
{
$dY=($this->nY>1)?$this->Y[$j][$k]: $this->Y[$j];
$adY[0][$k]+=$dY;
}
}
/*
* Build van der Monde matrix
* (least squares matrix)
*/
$adX=$this->LSMatrix();
/*
* solves the iLeast squares system system
*/
$this->SolveLinear($adX, $adY);
if ($this->nY == 1)
{
for ($i=0; $i<count($adY); $i++)
{
$adT[$i]=$adY[$i][0];
}
$adY=$adT;
}
$this->Coeffs=$adY;
return $adY;
} // End of Fit
/**
* Get the confidence interval band for the parameters
*
* Get the values of an input using the calculated coefficients
* @param array double $adX input to get the values
*
* @return array double Error estimation for coefficients
**/
public function ConfInterval($dProb)
{
if (count($this->Coeffs)==0)
{
throw new Exception('Fitting is not performed yet');
}
if (count($this->Conf)==0)
{
for ($i=0; $i<=$this->Vars; $i++)
{
for ($j=0; $j<=$this->Vars; $j++)
{
$adI[$i][$j]=0;
}
$adI[$i][$i]=1;
}
$adX=$this->LSMatrix();
$this->SolveLinear($adX, $adI);
$adX=$this->X;
$adY=self::GetValues($adX);
$dT=$this->T($dProb, $this->Size-$this->Vars-1);
if ($this->nY == 1)
{
for ($i=0, $dS=0; $i<$this->Size; $i++)
{
$dS+=($adY[$i]-$this->Y[$i])*($adY[$i]-$this->Y[$i]);
}
$dS/=$this->Size-$this->Vars-1;
for ($i=0; $i<=$this->Vars; $i++)
{
$this->Conf[$i]=$dT*sqrt($adI[$i][$i]*$dS);
}
}
else
{
for ($ii=0; $ii<$this->nY; $ii++)
{
for ($i=0, $adSi[$ii]=0; $i<$this->Size; $i++)
{
$adSi[$ii]+=($adY[$i][$ii]-$this->Y[$i][$ii])*($adY[$i][$ii]-$this->Y[$i][$ii]);
}
$adSi[$ii]/=$this->Size-$this->Vars-1;
}
for ($i=0; $i<=$this->Vars; $i++)
{
for ($j=0; $j<$this->nY; $j++)
{
$this->Conf[$i][$j]=$dT*sqrt($adI[$i][$i]*$adSi[$j]);
}
}
}
}
return $this->Conf;
}
/**
* Get the goodness of regression
*
* Get the goodness of regression value.
* It will be between 0 and 1. As the closest to 1
* the best will be the fitting
* @param array double $adX input to get the values
*
* @return double goodness of regression
**/
public function r2()
{
if (count($this->Coeffs)==0)
{
throw new Exception('Fitting is not performed yet');
}
$adX=$this->X;
$adY=self::GetValues($adX);
if ($this->nY == 1)
{
for ($i=0, $dYm=0; $i<$this->Size; $i++)
{
$dYm+=$adY[$i];
}
$dYm/=$this->Size;
for ($i=0, $dS=0, $dStot=0; $i<$this->Size; $i++)
{
$dS+=($adY[$i]-$this->Y[$i])*($adY[$i]-$this->Y[$i]);
$dStot+=($dYm-$this->Y[$i])*($dYm-$this->Y[$i]);
}
return (1-$dS/$dStot);
}
else
{
for ($ii=0; $ii<$this->nY; $ii++)
{
for ($i=0, $adYm[$ii]=0; $i<$this->Size; $i++)
{
$adYm[$ii]+=$adY[$i][$ii];
}
$adYm[$ii]/=$this->Size;
for ($i=0, $adS[$ii]=0, $adStot[$ii]=0; $i<$this->Size; $i++)
{
$adS[$ii]+=($adY[$i][$ii]-$this->Y[$i][$ii])*($adY[$i][$ii]-$this->Y[$i][$ii]);
$adStot[$ii]+=($adYm[$ii]-$this->Y[$i][$ii])*($adYm[$ii]-$this->Y[$i][$ii]);
}
}
$adR2=array();
for ($j=0; $j<$this->nY; $j++)
{
$adR2[]=(1-$adS[$j]/$adStot[$j]);
}
return $adR2;
}
}
/**
* Get values from an input
*
* Get the values of an input using the calculated coefficients
* @param array double $adX input to get the values
*
* @return (array) double value / value set for the provided
* vector / vector set using the calculated coefficients
**/
public function GetValues($adX)
{
if (count($this->Coeffs)==0)
{
throw new Exception('Fitting is not performed yet');
}
if (is_array($adX[0]))
{
if (count($adX[0])!=$this->Vars)
{
throw new Exception('Number of columns should the same as variables');
}
else
{
for ($i=0; $i<count($adX); $i++)
{
if ($this->nY==1)
{
$adTmp[$i]=$this->Coeffs[0];
for ($j=0; $j<$this->Vars; $j++)
{
$adTmp[$i]+=$this->Coeffs[$j+1]*$adX[$i][$j];
}
}
else
{
for ($k=0; $k<$this->nY; $k++)
{
$adTmp[$i][$k]=$this->Coeffs[0][$k];
for ($j=0; $j<$this->Vars; $j++)
{
$adTmp[$i][$k]+=$this->Coeffs[$j+1][$k]*$adX[$i][$j];
}
}
}
}
}
}
else
{
if ($this->nY==1)
{
$adTmp=$this->Coeffs[0];
for ($j=0; $j<$this->Vars; $j++)
{
$adTmp+=$this->Coeffs[$j+1]*$adX[$j];
}
}
else
{
for ($k=0; $k<$this->nY; $k++)
{
$adTmp[$k]=$this->Coeffs[0][$k];
for ($j=0; $j<$this->Vars; $j++)
{
$adTmp[$k]+=($this->Coeffs[$j+1][$k])*$adX[$j];
}
}
}
}
return $adTmp;
}
} // End of class LinearFit
?>
|