<?
define('MYSQL', 'mysql' , true);
define('SQLITE', 'sqlite' , true);
define('SQLITE_TIMEOUT',1000,true);
define('SEPARADORES'," ,|-:;\n\r\t!?<>(){}[]#\\/&.$%='@\"",true);
$gnixpath = dirname(__FILE__);
include "${gnixpath}/libtextcat/saddorlibtextcat.php";
include "${gnixpath}/stemmer/english.php";
include "${gnixpath}/stemmer/spanish.php";
include "${gnixpath}/mysql.php";
include "${gnixpath}/sqlite.php";
include "${gnixpath}/tokenizer.php";
$WordCache = array();
$StemmingCache = array();
$LangCache = array();
$TipoCache = array();
class Gnix
{
var $db; /* database handler */
var $textcat; /* libtextcat handler*/
var $stemmer; /* stemmer handler*/
var $lang; /* lang */
var $langId;
var $id; /* document id*/
var $docid; /* document id*/
var $wordlist = array();
var $main_writed; /* an auxiliar var*/
function Gnix($param, $db = SQLITE)
{
if (!isset($param['db']))
die("There is missing the database name in the param ");
if ($db == SQLITE)
$this->db = new gnix_sqlite;
else if ($db == MYSQL)
$this->db = new gnix_mysql;
$this->db->open($param['db'],$param['host'],$param['user'],$param['pass']);
$this->textcat = new SaddorLibTextCat("libtextcat");
}
/*
Install
*/
function Install()
{
$fp = fopen("db.".$this->db->version,"r");
$content = fread($fp,filesize("db.".$this->db->version));
fclose($fp);
$this->db->exec($content);
}
/*
Index Function
*/
function Index($texto,$ranking = 1)
{
if (is_array($texto))
{
$this->IndexArray($texto);
return;
}
$this->main_writed = false;
$this->RegisterText();
$this->GetLang($texto);
$this->RegisterLang($this->lang);
$this->IndexFullText($texto,$ranking);
}
function IndexArray($texto,$ranking = 1)
{
if (!is_array($texto))
{
$this->Index($texto);
return;
}
$this->main_writed = false;
$this->RegisterText();
foreach ($texto as $clave => $valor)
{
$this->GetLang($valor);
$this->RegisterLang($this->lang);
$this->IndexFullText($valor,$clave,$ranking);
}
}
function RegisterText()
{
$this->db->query("select max(docid) as total from ft_docs");
$result = $this->db->getvalue();
$result['total']++;
$this->db->query("insert into ft_docs(docid) values('".$result['total']."')");
$this->id = $this->db->lastinsert();
$this->docid = $result['total'];
}
function RegisterWord($word)
{
global $WordCache;
if (isset($WordCache[$this->langId][$word]))
return $WordCache[$word][$this->langId];
$this->db->query("select id from ft_word where word = '${word}'");
if ($this->db->count() == 0)
{
$stemmed = $this->Stemmer($word);
$this->db->query("insert into ft_word(id,word,stemmed,lang) values('".$this->RegisterStemmed($stemmed)."','".$word."','".$stemmed."','".$this->langId."')");
$WordCache[$word][$this->langId] = $this->db->lastinsert();
return $WordCache[$word][$this->langId];
}
$result = $this->db->getvalue();
$WordCache[$word][$this->langId] = $result[0];
return $result[0];
}
function RegisterStemmed($word)
{
$this->db->query("select id from ft_word where stemmed = '${word}' and lang = '".$this->langId."' limit 1");
if ($this->db->count() == 0)
{
$this->db->query("select max(id) from ft_word");
$result = $this->db->getvalue();
return $result[0]+1;
}
$result = $this->db->getvalue();
return $result[0];
}
function RegisterLang($lang)
{
global $LangCache;
if (isset($LangCache[$lang]))
return $LangCache[$lang];
$this->db->query("select id from ft_lang where lang = '${lang}'");
if ($this->db->count() == 0)
{
$this->db->query("insert into ft_lang(lang) values('".$lang."')");
$this->langId = $this->db->lastinsert();
$LangCache[$lang] = $this->langId;
return;
}
$result = $this->db->getvalue();
$this->langId = $result[0];
$LangCache[$lang] = $this->langId;
}
function RegisterTipo($word)
{
global $TipoCache;
if (isset($TipoCache[$word]))
return $TipoCache[$word];
$this->db->query("select id from ft_tipos where titulo = '${word}'");
if ($this->db->count() == 0)
{
$this->db->query("insert into ft_tipos(titulo) values('".$word."')");
$TipoCache[$word] = $this->db->lastinsert();
return $TipoCache[$word];
}
$result = $this->db->getvalue();
$TipoCache[$word] = $result[0];
return $result[0];
}
function IndexFullText($text, $titulo = 'text', $ranking = 1)
{
$tipo = $this->RegisterTipo($titulo);
if ( $this->main_writed == false)
{
$this->db->query("update ft_docs set contenido = '".addslashes($text)."',tipo = '".$tipo."' where docid = ".$this->docid);
$this->main_writed = true;
}
else
$this->db->query("insert into ft_docs(docid,tipo,contenido) values('{$this->docid}','${tipo}','".addslashes($text)."')");
$text = strtolower($text);
$t = microtime_float();
$index = $this->InvertedIndex($text);
$sql = "";
if ($this->db->version == "MySQL")
$sql .= "insert into ft_index(docid,wordid,posicion,ranking,lang) values";
foreach ($index as $pos => $wordid)
{
$rank = $ranking / ($pos+1);
if ($this->db->version == "MySQL")
$sql .= "('".$this->docid."','${wordid}','${pos}','${rank}','".$this->langId."'),";
else
$sql .= "insert into ft_index(docid,wordid,posicion,ranking,lang) values('".$this->docid."','${wordid}','${pos}','${rank}','".$this->langId."');";
}
if ($this->db->version == "MySQL")
$sql = substr($sql,0,strlen($sql) -1 );
$this->db->insert($sql);
unset($sql);
$t = microtime_float() - $t;
print " Done in <strong>".$t."</strong> seconds<br>";
}
function InvertedIndex($text)
{
$Word = array();
$word = strtok($text,separadores);
while ($word != NULL)
{
if (strlen($word) > 1)
{
$Word[] = $this->RegisterWord($word);
}
$word = strtok(separadores);
}
return $Word;
}
/*
Lang Tools
*/
function GetLang($text)
{
$lang = "unknown";
$this->textcat->WhatLang($text);
$langy = $this->textcat->ranking;
foreach ($langy as $langx => $points)
{
$lang = $langx;
break;
}
$this->stemmer = NULL;
if ($lang == "english")
$this->stemmer = new EnglishStemmer;
elseif ($lang == "spanish")
$this->stemmer = new SpanishStemmer;
else
print "The stemmer for lang ".$lang." is not supported<br>";
$this->lang = $lang;
}
function Stemmer($word)
{
global $StemmingCache;
if (!isset($StemmingCache[$word]))
{
$stemmedword = $this->stemmer == NULL ? $word : $this->stemmer->Stem($word);
$StemmingCache[$word] = $stemmedword;
}
else
{
$stemmedword = $StemmingCache[$word] ;
}
return $stemmedword;
}
/*
Search Function
*/
function Search($arg,$page_star = 0)
{
$return = array();
$token = new Tokenizer;
$token->parser(stripslashes($arg));
$sql = $this->BuildSQL();
die($sql);
$sql1 = str_replace("[options]","count(*)",$sql);
$this->db->query($sql1);
$result = $this->db->getvalue();
$return['Total'] = $result[0];
if ($return['Total'] == 0)
return $return;
$sql1 = str_replace("[options]","t0.*",$sql)."group by docid order by ranking limit ${page_star},20";
$this->db->query($sql1);
while ($row = $this->db->getvalue())
$id[] = $row[0]; /*Ids order by rank*/
$sql = "select ft_docs.docid,ft_tipos.titulo, ft_docs.contenido from ft_docs inner join ft_tipos on (ft_docs.tipo = ft_tipos.id) where docid IN (".implode(",",$id).")";
$this->db->query($sql);
while ($row = $this->db->getvalue())
$tmpresults[$row[0]][$row[1]] = $row[2];
//Sort by the rank
for($i=0; $i < count($id); $i++)
{
foreach($tmpresults[ $id[$i] ] as $tipo => $contenido)
{
$return[$id[$i]][$tipo] = $contenido;
}
}
unset($tmpresults);
return $return;
}
function BuildSQL()
{
global $TreeArray;
global $boolean;
$i=0;
$sql = "select [options] from [table] where";
for($e=0; $e < count($TreeArray); $e++)
{
if ($TreeArray[$e] == "")
continue;
if (array_search($TreeArray[$e],$boolean))
continue;
if (strchr($TreeArray[$e]," ") === false)
$sql.= " (t${i}.wordid = '".$this->Word2Id($TreeArray[$e++])."') ";
else
{
$word = explode(" ",$TreeArray[$e++]);
$sql.="(";
foreach($word as $w)
{
$sql.= " t${i}.wordid = '".$this->Word2Id($w)."' and";
$x = $i+1;
$sql .= " t${i}.posicion + 1 = t${x}.posicion and";
$i++;
}
$sql = substr($sql,0,strlen($sql) - 3);
$sql .= ")";
}
if ($TreeArray[$e] == "NOT")
$sql.=" and ".$TreeArray[$e];
else
$sql.=" ".$TreeArray[$e];
$i++;
}
$table = "ft_index as t0 ";
for($e = 1; $e < $i; $e++)
$table.=" inner join ft_index as t${e} on (t0.docid = t${e}.docid) ";
$sql = str_replace("[table]",$table,$sql);
return $sql;
}
function Word2Id($word)
{
$sql = "select id from ft_word where word = '${word}'";
$this->db->query($sql);
$result = $this->db->getvalue();
return isset($result[0]) ? $result[0] : -1;
}
}
function microtime_float()
{
list($usec, $sec) = explode(" ", microtime());
return ((float)$usec + (float)$sec);
}
?>
|