PHP Classes

File: gnix.php

Recommend this page to a friend!
  Classes of Cesar D. Rodas   Guaranix Full Text   gnix.php   Download  
File: gnix.php
Role: Class source
Content type: text/plain
Description: The main class of this project
Class: Guaranix Full Text
Index text documents for full text searching
Author: By
Last change: Change the Guaranix License
Date: 18 years ago
Size: 10,400 bytes
 

Contents

Class file image Download
<? define('MYSQL', 'mysql' , true); define('SQLITE', 'sqlite' , true); define('SQLITE_TIMEOUT',1000,true); define('SEPARADORES'," ,|-:;\n\r\t!?<>(){}[]#\\/&.$%='@\"",true); $gnixpath = dirname(__FILE__); include "${gnixpath}/libtextcat/saddorlibtextcat.php"; include "${gnixpath}/stemmer/english.php"; include "${gnixpath}/stemmer/spanish.php"; include "${gnixpath}/mysql.php"; include "${gnixpath}/sqlite.php"; include "${gnixpath}/tokenizer.php"; $WordCache = array(); $StemmingCache = array(); $LangCache = array(); $TipoCache = array(); class Gnix { var $db; /* database handler */ var $textcat; /* libtextcat handler*/ var $stemmer; /* stemmer handler*/ var $lang; /* lang */ var $langId; var $id; /* document id*/ var $docid; /* document id*/ var $wordlist = array(); var $main_writed; /* an auxiliar var*/ function Gnix($param, $db = SQLITE) { if (!isset($param['db'])) die("There is missing the database name in the param "); if ($db == SQLITE) $this->db = new gnix_sqlite; else if ($db == MYSQL) $this->db = new gnix_mysql; $this->db->open($param['db'],$param['host'],$param['user'],$param['pass']); $this->textcat = new SaddorLibTextCat("libtextcat"); } /* Install */ function Install() { $fp = fopen("db.".$this->db->version,"r"); $content = fread($fp,filesize("db.".$this->db->version)); fclose($fp); $this->db->exec($content); } /* Index Function */ function Index($texto,$ranking = 1) { if (is_array($texto)) { $this->IndexArray($texto); return; } $this->main_writed = false; $this->RegisterText(); $this->GetLang($texto); $this->RegisterLang($this->lang); $this->IndexFullText($texto,$ranking); } function IndexArray($texto,$ranking = 1) { if (!is_array($texto)) { $this->Index($texto); return; } $this->main_writed = false; $this->RegisterText(); foreach ($texto as $clave => $valor) { $this->GetLang($valor); $this->RegisterLang($this->lang); $this->IndexFullText($valor,$clave,$ranking); } } function RegisterText() { $this->db->query("select max(docid) as total from ft_docs"); $result = $this->db->getvalue(); $result['total']++; $this->db->query("insert into ft_docs(docid) values('".$result['total']."')"); $this->id = $this->db->lastinsert(); $this->docid = $result['total']; } function RegisterWord($word) { global $WordCache; if (isset($WordCache[$this->langId][$word])) return $WordCache[$word][$this->langId]; $this->db->query("select id from ft_word where word = '${word}'"); if ($this->db->count() == 0) { $stemmed = $this->Stemmer($word); $this->db->query("insert into ft_word(id,word,stemmed,lang) values('".$this->RegisterStemmed($stemmed)."','".$word."','".$stemmed."','".$this->langId."')"); $WordCache[$word][$this->langId] = $this->db->lastinsert(); return $WordCache[$word][$this->langId]; } $result = $this->db->getvalue(); $WordCache[$word][$this->langId] = $result[0]; return $result[0]; } function RegisterStemmed($word) { $this->db->query("select id from ft_word where stemmed = '${word}' and lang = '".$this->langId."' limit 1"); if ($this->db->count() == 0) { $this->db->query("select max(id) from ft_word"); $result = $this->db->getvalue(); return $result[0]+1; } $result = $this->db->getvalue(); return $result[0]; } function RegisterLang($lang) { global $LangCache; if (isset($LangCache[$lang])) return $LangCache[$lang]; $this->db->query("select id from ft_lang where lang = '${lang}'"); if ($this->db->count() == 0) { $this->db->query("insert into ft_lang(lang) values('".$lang."')"); $this->langId = $this->db->lastinsert(); $LangCache[$lang] = $this->langId; return; } $result = $this->db->getvalue(); $this->langId = $result[0]; $LangCache[$lang] = $this->langId; } function RegisterTipo($word) { global $TipoCache; if (isset($TipoCache[$word])) return $TipoCache[$word]; $this->db->query("select id from ft_tipos where titulo = '${word}'"); if ($this->db->count() == 0) { $this->db->query("insert into ft_tipos(titulo) values('".$word."')"); $TipoCache[$word] = $this->db->lastinsert(); return $TipoCache[$word]; } $result = $this->db->getvalue(); $TipoCache[$word] = $result[0]; return $result[0]; } function IndexFullText($text, $titulo = 'text', $ranking = 1) { $tipo = $this->RegisterTipo($titulo); if ( $this->main_writed == false) { $this->db->query("update ft_docs set contenido = '".addslashes($text)."',tipo = '".$tipo."' where docid = ".$this->docid); $this->main_writed = true; } else $this->db->query("insert into ft_docs(docid,tipo,contenido) values('{$this->docid}','${tipo}','".addslashes($text)."')"); $text = strtolower($text); $t = microtime_float(); $index = $this->InvertedIndex($text); $sql = ""; if ($this->db->version == "MySQL") $sql .= "insert into ft_index(docid,wordid,posicion,ranking,lang) values"; foreach ($index as $pos => $wordid) { $rank = $ranking / ($pos+1); if ($this->db->version == "MySQL") $sql .= "('".$this->docid."','${wordid}','${pos}','${rank}','".$this->langId."'),"; else $sql .= "insert into ft_index(docid,wordid,posicion,ranking,lang) values('".$this->docid."','${wordid}','${pos}','${rank}','".$this->langId."');"; } if ($this->db->version == "MySQL") $sql = substr($sql,0,strlen($sql) -1 ); $this->db->insert($sql); unset($sql); $t = microtime_float() - $t; print "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Done in <strong>".$t."</strong> seconds<br>"; } function InvertedIndex($text) { $Word = array(); $word = strtok($text,separadores); while ($word != NULL) { if (strlen($word) > 1) { $Word[] = $this->RegisterWord($word); } $word = strtok(separadores); } return $Word; } /* Lang Tools */ function GetLang($text) { $lang = "unknown"; $this->textcat->WhatLang($text); $langy = $this->textcat->ranking; foreach ($langy as $langx => $points) { $lang = $langx; break; } $this->stemmer = NULL; if ($lang == "english") $this->stemmer = new EnglishStemmer; elseif ($lang == "spanish") $this->stemmer = new SpanishStemmer; else print "The stemmer for lang ".$lang." is not supported<br>"; $this->lang = $lang; } function Stemmer($word) { global $StemmingCache; if (!isset($StemmingCache[$word])) { $stemmedword = $this->stemmer == NULL ? $word : $this->stemmer->Stem($word); $StemmingCache[$word] = $stemmedword; } else { $stemmedword = $StemmingCache[$word] ; } return $stemmedword; } /* Search Function */ function Search($arg,$page_star = 0) { $return = array(); $token = new Tokenizer; $token->parser(stripslashes($arg)); $sql = $this->BuildSQL(); die($sql); $sql1 = str_replace("[options]","count(*)",$sql); $this->db->query($sql1); $result = $this->db->getvalue(); $return['Total'] = $result[0]; if ($return['Total'] == 0) return $return; $sql1 = str_replace("[options]","t0.*",$sql)."group by docid order by ranking limit ${page_star},20"; $this->db->query($sql1); while ($row = $this->db->getvalue()) $id[] = $row[0]; /*Ids order by rank*/ $sql = "select ft_docs.docid,ft_tipos.titulo, ft_docs.contenido from ft_docs inner join ft_tipos on (ft_docs.tipo = ft_tipos.id) where docid IN (".implode(",",$id).")"; $this->db->query($sql); while ($row = $this->db->getvalue()) $tmpresults[$row[0]][$row[1]] = $row[2]; //Sort by the rank for($i=0; $i < count($id); $i++) { foreach($tmpresults[ $id[$i] ] as $tipo => $contenido) { $return[$id[$i]][$tipo] = $contenido; } } unset($tmpresults); return $return; } function BuildSQL() { global $TreeArray; global $boolean; $i=0; $sql = "select [options] from [table] where"; for($e=0; $e < count($TreeArray); $e++) { if ($TreeArray[$e] == "") continue; if (array_search($TreeArray[$e],$boolean)) continue; if (strchr($TreeArray[$e]," ") === false) $sql.= " (t${i}.wordid = '".$this->Word2Id($TreeArray[$e++])."') "; else { $word = explode(" ",$TreeArray[$e++]); $sql.="("; foreach($word as $w) { $sql.= " t${i}.wordid = '".$this->Word2Id($w)."' and"; $x = $i+1; $sql .= " t${i}.posicion + 1 = t${x}.posicion and"; $i++; } $sql = substr($sql,0,strlen($sql) - 3); $sql .= ")"; } if ($TreeArray[$e] == "NOT") $sql.=" and ".$TreeArray[$e]; else $sql.=" ".$TreeArray[$e]; $i++; } $table = "ft_index as t0 "; for($e = 1; $e < $i; $e++) $table.=" inner join ft_index as t${e} on (t0.docid = t${e}.docid) "; $sql = str_replace("[table]",$table,$sql); return $sql; } function Word2Id($word) { $sql = "select id from ft_word where word = '${word}'"; $this->db->query($sql); $result = $this->db->getvalue(); return isset($result[0]) ? $result[0] : -1; } } function microtime_float() { list($usec, $sec) = explode(" ", microtime()); return ((float)$usec + (float)$sec); } ?>