Login   Register  
PHP Classes
elePHPant
Icontem

File: gnix.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Cesar D. Rodas  >  Guaranix Full Text  >  gnix.php  >  Download  
File: gnix.php
Role: Class source
Content type: text/plain
Description: The main class of this project
Class: Guaranix Full Text
Index text documents for full text searching
Author: By
Last change: Change the Guaranix License
Date: 2006-09-06 14:55
Size: 10,400 bytes
 

Contents

Class file image Download
<?

define('MYSQL', 'mysql' , true);
define('SQLITE', 'sqlite' , true);
define('SQLITE_TIMEOUT',1000,true);
define('SEPARADORES'," ,|-:;\n\r\t!?<>(){}[]#\\/&.$%='@\"",true);

$gnixpath = dirname(__FILE__);

include "${gnixpath}/libtextcat/saddorlibtextcat.php";
include "${gnixpath}/stemmer/english.php";
include "${gnixpath}/stemmer/spanish.php";
include "${gnixpath}/mysql.php";
include "${gnixpath}/sqlite.php";
include "${gnixpath}/tokenizer.php";

$WordCache = array();
$StemmingCache = array();
$LangCache = array();
$TipoCache = array();


class Gnix
{
    var $db; /* database handler */
    var $textcat; /* libtextcat handler*/
    var $stemmer; /* stemmer handler*/
    var $lang; /* lang */
	var $langId;
    var $id; /* document id*/
	var $docid; /* document id*/
    var $wordlist = array();

    var $main_writed; /* an auxiliar var*/
    function Gnix($param, $db = SQLITE)
    {
        if (!isset($param['db']))
        	die("There is missing the database name in the param "); 
        if ($db == SQLITE)
            $this->db = new gnix_sqlite;
        else if ($db == MYSQL)
            $this->db = new gnix_mysql;

        $this->db->open($param['db'],$param['host'],$param['user'],$param['pass']);
        $this->textcat = new SaddorLibTextCat("libtextcat");
    }


    /*
        Install
    */
    function Install()
    {
        $fp = fopen("db.".$this->db->version,"r");
            $content = fread($fp,filesize("db.".$this->db->version));
        fclose($fp);
        $this->db->exec($content);
    }


    /*
        Index Function 
    */
    function Index($texto,$ranking = 1)
    {
        if (is_array($texto))
        {
            $this->IndexArray($texto);
            return;
        }
        $this->main_writed = false;
        $this->RegisterText();
        $this->GetLang($texto);
		$this->RegisterLang($this->lang);
        $this->IndexFullText($texto,$ranking);
    }

    function IndexArray($texto,$ranking = 1)
    {
        if (!is_array($texto))
        {
            $this->Index($texto);
            return;
        }
        $this->main_writed = false;
        $this->RegisterText();
        foreach ($texto as $clave => $valor)
        {
			$this->GetLang($valor);
			$this->RegisterLang($this->lang);
			$this->IndexFullText($valor,$clave,$ranking);
        }
    }

    function RegisterText()
    {
        $this->db->query("select max(docid) as total from ft_docs");
        $result = $this->db->getvalue();
		$result['total']++;
        $this->db->query("insert into ft_docs(docid) values('".$result['total']."')");
        $this->id = $this->db->lastinsert();
		$this->docid = $result['total'];
    }

    function RegisterWord($word)
    {
		global $WordCache;
		
		if (isset($WordCache[$this->langId][$word]))
			return $WordCache[$word][$this->langId];
		
			

        $this->db->query("select id from ft_word where word = '${word}'");
        if ($this->db->count() == 0)
        {
			$stemmed = $this->Stemmer($word);	
            $this->db->query("insert into ft_word(id,word,stemmed,lang) values('".$this->RegisterStemmed($stemmed)."','".$word."','".$stemmed."','".$this->langId."')");
			$WordCache[$word][$this->langId] = $this->db->lastinsert();
			return $WordCache[$word][$this->langId];
        }
		
        $result = $this->db->getvalue();
        $WordCache[$word][$this->langId] = $result[0];
		return $result[0];
    }
	
	function RegisterStemmed($word)
	{
	    $this->db->query("select id from ft_word where stemmed = '${word}' and lang = '".$this->langId."' limit 1");
        if ($this->db->count() == 0)
        {
            $this->db->query("select max(id) from ft_word");
            $result = $this->db->getvalue();
        	return $result[0]+1;
        }
        $result = $this->db->getvalue();
        return $result[0];
	}

	function RegisterLang($lang)
	{
		global $LangCache;
		if (isset($LangCache[$lang]))
			return $LangCache[$lang];
			
	    $this->db->query("select id from ft_lang where lang = '${lang}'");
        if ($this->db->count() == 0)
        {
            $this->db->query("insert into ft_lang(lang) values('".$lang."')");
            $this->langId = $this->db->lastinsert();
        	$LangCache[$lang] = $this->langId;	
			return;
		}
        $result = $this->db->getvalue();
        $this->langId = $result[0];
		$LangCache[$lang] = $this->langId;
	}
	
    function RegisterTipo($word)
    {
		global $TipoCache;
		if (isset($TipoCache[$word]))
			return $TipoCache[$word];
			
        $this->db->query("select id from ft_tipos where titulo = '${word}'");
        if ($this->db->count() == 0)
        {
            $this->db->query("insert into ft_tipos(titulo) values('".$word."')");
			$TipoCache[$word] = $this->db->lastinsert();
            return $TipoCache[$word];
        }
        $result = $this->db->getvalue();
		$TipoCache[$word] = $result[0];
        return $result[0];
    }

    function IndexFullText($text, $titulo = 'text', $ranking = 1)
    {
		
        $tipo = $this->RegisterTipo($titulo);
		 
        if ( $this->main_writed == false) 
        {
            $this->db->query("update ft_docs set contenido = '".addslashes($text)."',tipo = '".$tipo."' where docid = ".$this->docid);
            $this->main_writed = true;
        }
        else
            $this->db->query("insert into ft_docs(docid,tipo,contenido) values('{$this->docid}','${tipo}','".addslashes($text)."')");

		
		$text = strtolower($text);
        $t = microtime_float();
		$index = $this->InvertedIndex($text);
		
		
		$sql = "";
		if ($this->db->version == "MySQL")
			$sql .= "insert into ft_index(docid,wordid,posicion,ranking,lang) values";
			
		foreach ($index as $pos => $wordid)
		{
			$rank =  $ranking / ($pos+1);
			if ($this->db->version == "MySQL")
				$sql .= "('".$this->docid."','${wordid}','${pos}','${rank}','".$this->langId."'),";
			else
				$sql .= "insert into ft_index(docid,wordid,posicion,ranking,lang) values('".$this->docid."','${wordid}','${pos}','${rank}','".$this->langId."');";
		}
		
		if ($this->db->version == "MySQL")
			$sql = substr($sql,0,strlen($sql)  -1 );
			
		$this->db->insert($sql);
		unset($sql);
		
		
		$t = microtime_float() - $t;
		print "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Done in <strong>".$t."</strong> seconds<br>";

    }

	function InvertedIndex($text)
	{		
		$Word = array();
		$word = strtok($text,separadores);
		while ($word != NULL)
        {
			if (strlen($word) > 1)
			{
				$Word[] = $this->RegisterWord($word);
			}
			$word = strtok(separadores);
		}
		return $Word;
	}
	
    /*
        Lang Tools
    */
    function GetLang($text)
    {
        $lang = "unknown";
        $this->textcat->WhatLang($text);

        $langy = $this->textcat->ranking;
        
		foreach ($langy as $langx => $points)
        {
            $lang  = $langx;
            break;
        }
		$this->stemmer = NULL;
        if ($lang == "english")
            $this->stemmer = new EnglishStemmer;
        elseif ($lang == "spanish")
            $this->stemmer = new SpanishStemmer;
        else
            print "The stemmer for lang ".$lang." is not supported<br>";
			
        $this->lang =  $lang;
    }       
    function Stemmer($word) 
    {
		global $StemmingCache;
		
		if (!isset($StemmingCache[$word]))
		{	
			$stemmedword = $this->stemmer == NULL ? $word : $this->stemmer->Stem($word);
			$StemmingCache[$word] = $stemmedword; 					
		}
		else
		{ 
			$stemmedword = $StemmingCache[$word] ;
		}
		return $stemmedword;
		
    }


    /*
        Search Function
    */
    function Search($arg,$page_star = 0)
    {
		$return = array();
		
		$token = new Tokenizer;
		$token->parser(stripslashes($arg));
		$sql = $this->BuildSQL();
		
				die($sql);
				
		$sql1 = str_replace("[options]","count(*)",$sql);

		
		$this->db->query($sql1);
		$result = $this->db->getvalue();
		$return['Total'] =  $result[0];
		if ($return['Total'] == 0)
			return $return;
			
		$sql1 = str_replace("[options]","t0.*",$sql)."group by docid order by ranking  limit ${page_star},20";

		$this->db->query($sql1);
		
		
		while ($row =  $this->db->getvalue())
			$id[] = $row[0]; /*Ids order by rank*/
		
		$sql = "select ft_docs.docid,ft_tipos.titulo, ft_docs.contenido from ft_docs inner join ft_tipos on (ft_docs.tipo = ft_tipos.id) where docid IN (".implode(",",$id).")";
		
		
		$this->db->query($sql);
		while ($row =  $this->db->getvalue())
			$tmpresults[$row[0]][$row[1]] = $row[2];
		
		//Sort by the rank
		for($i=0; $i < count($id); $i++)
		{
			foreach($tmpresults[ $id[$i] ]  as $tipo => $contenido)
			{
				$return[$id[$i]][$tipo] = $contenido;
			}
		}
		unset($tmpresults);

		return $return;
    }
	
	function BuildSQL()
	{
		global $TreeArray;
		global $boolean;
		$i=0;
		$sql = "select [options] from [table] where";
		
		for($e=0; $e < count($TreeArray); $e++)
		{
			if ($TreeArray[$e] == "")
				continue;
			if (array_search($TreeArray[$e],$boolean))
				continue;
			if (strchr($TreeArray[$e]," ") === false)
				$sql.= " (t${i}.wordid = '".$this->Word2Id($TreeArray[$e++])."') ";
			else
			{
				$word = explode(" ",$TreeArray[$e++]);
				$sql.="(";
				foreach($word as $w)
				{
					$sql.= " t${i}.wordid = '".$this->Word2Id($w)."' and";
					$x = $i+1;
					$sql .= " t${i}.posicion + 1 = t${x}.posicion and";

					$i++;
				}
				$sql = substr($sql,0,strlen($sql) - 3);
				$sql .= ")";
			}
			if ($TreeArray[$e] == "NOT")
				$sql.=" and ".$TreeArray[$e];
			else
				$sql.=" ".$TreeArray[$e];	
			$i++;
		}
		
		$table = "ft_index as t0 ";
		for($e = 1; $e < $i; $e++)
			$table.=" inner join ft_index as t${e} on (t0.docid = t${e}.docid) ";
		$sql = str_replace("[table]",$table,$sql);
		return $sql;
	}

	function Word2Id($word)
	{
		$sql = "select id from ft_word where word = '${word}'";
		$this->db->query($sql);
		$result = $this->db->getvalue();
		return isset($result[0]) ? $result[0] : -1;
	}
	
}



function microtime_float()
{
   list($usec, $sec) = explode(" ", microtime());
   return ((float)$usec + (float)$sec);
}

?>