<?
// Created By: Joe Stump <joestump98@yahoo.com>
// Created On: 2000-12-09
// Notes:
// This was created because I seem to be asked for this feature
// on a lot of the projects I work on so it only made sense to
// create some reusable code.
//
// This class takes care of just about everything. Just make sure
// you write your table defs write in the config file and it will
// create the necessary tables, make the dictionaries, build the
// indexes and, finally, make SQL queries for you on the fly.
//
// $query_tables is a list of tables that you (or the user) wants to
// search - ie. $query_tables = array('news','links','articles'); and
// $query_sql is a few queries based on those tables. So $query_sql
// would return:
//
// $query_sql = array(news => 'SELECT ...',
// links => 'SELECT ...',
// articles => 'SELECT ...');
//
// Then you just run those (adding on any LIMIT's you might want) and
// PRESTO! you have search results. I've scaled this implementation to
// roughly 2M rows with search times in the .02 to .07 second range for
// results LIMIT'd to 10 - so don't worry about scalability.
//
// As usual, this is released under the GNU License found at:
// http://www.gnu.org
class Search
{
// Variables for indexing
var $tables = array(); // Table definitions
var $dictionary = array(); // Unique words in Table(s)
var $index = array(); // Index of words for each record
var $return_code = array(); // See reference for return codes
var $stop_words = array(); // See Search() for a list of stop words
// Variables for searching
var $query = ''; // The user's query
var $query_sql = array(); // The SQL built based on that query
var $query_tables = array(); // The tables you wish to search
function return_tables()
{
return $this->tables;
}
function return_dictionary()
{
return $this->dictionary;
}
function return_index()
{
return $this->index;
}
function return_code()
{
return $this->return_code;
}
function return_stop_words()
{
return $this->stop_words;
}
function return_query()
{
return $this->query;
}
function return_query_sql()
{
return $this->query_sql;
}
function return_query_tables()
{
return $this->query_tables;
}
function set_tables($new_tables)
{
$this->tables = $new_tables;
}
function set_dictionary($new_dictionary)
{
$this->dictionary = $new_dictionary;
}
function set_index($new_index)
{
$this->index = $new_index;
}
function set_code($new_code)
{
$this->return_code = $new_code;
}
function set_stop_words($new_stop_words)
{
$this->stop_words = $new_stop_words;
}
function set_query($new_query)
{
$this->query = $new_query;
}
function set_query_sql($new_sql)
{
$this->query_sql = $new_sql;
}
function set_query_tables($new_tables)
{
$this->query_tables = $new_tables;
}
function Search($tables)
{
$stop_words = array('a','about','above','across','after','against',
'all','almost','also','among','an','and','anyhow',
'apart','as','at','around','author','be','below',
'could','do','down','due','each','either','enought',
'etc','even','ever','except','from','front','further',
'given','get','go','had','have','he','her','him','here',
'herself','himself','hi','how','i','ie','if','in','inc',
'indoor','inside','instead','into','inward','is','it',
'kind','kg','km','last','latter','less','let','like',
'little','made','many','maybe','me','meantime',
'meanwhile','might','more','most','mr','ms','much',
'must','my','myself','name','need','now','nowhere',
'nope','obtain','of','off','often','ok','onto','or',
'own','per','perform','perhaps','pelnti','possible',
'present','provide','quite','rather','really','relate',
'report','require','result','round','said','shalt',
'she','should','shown','sinc','slept','slew','slung',
'slunk','so','some','somebody','somehow','sometime',
'somewhat','spake','spat','spoke','spoken','sprang',
'sprung','stage','still','such','test','than','that',
'the','thee','their','them','themselves','then',
'there','these','those','thou','though','through','thru',
'thy','too','toward','type','under','unless','until',
'up','upon','us','various','very','vs','want','we',
'week','well','were','what','when','where','what',
'which','why','while','who','whoa','whole','whom','whose',
'why','will','wilt','with','within','without','worst',
'wow','yet','yipp','you','your','yourself');
$this->Setup();
$this->set_tables($tables);
$this->set_stop_words($stop_words);
} // Search()
function Setup()
{
if(!file_exists('./db.obj'))
{
echo 'Fatal Error: "db.obj" Not Found!'."\n";
exit;
}
else
{
$db = new DB();
$db->set_sql("SHOW TABLES");
if(!$db->select_query())
{
echo 'Fatal Error: Unable to access SQL server!'."\n";
exit;
}
}
} // Setup()
function SetUpTables()
{
$tables = $this->return_tables();
while(list(,$table) = each($tables))
{
$dictionary = $table['table_name'].'_dictionary';
$index = $table['table_name'].'_index';
$key_def = $table['key_def'];
$db1 = new DB();
$db1->set_sql("DESC $dictionary");
if(!$db1->select_query())
{
$sql = "CREATE TABLE ${dictionary}(
wordID INT(11) UNSIGNED NOT NULL DEFAULT '0' AUTO_INCREMENT,
word char(15) NOT NULL,
PRIMARY KEY (wordID),
UNIQUE ID (wordID),
KEY (word),
UNIQUE WD (word))";
$db2 = new DB();
$db2->set_sql($sql);
$db2->alter_query();
unset($db2);
}
$db3 = new DB();
$db3->set_sql("DESC $index");
if(!$db3->select_query())
{
$sql = "CREATE TABLE ${index}(
wordID INT(11) UNSIGNED NOT NULL,
docID ${key_def},
hits TINYINT(3) UNSIGNED NOT NULL,
KEY (wordID),
KEY (docID),
KEY (hits))";
$db4 = new DB();
$db4->set_sql($sql);
$db4->alter_query();
unset($db4);
}
unset($db1);
unset($db3);
}
} // SetUpTables()
function BuildDictionary()
{
$tables = $this->return_tables();
while(list(,$table) = each($tables))
{
extract($table);
$db0 = new DB();
$db0->set_sql("DELETE FROM ${table_name}_dictionary");
$db0->alter_query();
$sql = "SELECT ".implode(',',$fields)." FROM $table_name";
$db = new DB();
$db->set_sql($sql);
if($db->select_query())
{
while(list(,$row) = each($db->rows))
{
$string = '';
for($i = 0 ; $i < sizeof($fields) ; ++$i)
{
$string .= ' '.$this->FormatString($row[$fields[$i]]);
}
$words = explode(' ',$string);
for($i = 0 ; $i < sizeof($words) ; ++$i)
{
$insert_word = $words[$i];
$sql = "INSERT INTO ${table_name}_dictionary (word) VALUES ('$insert_word')";
$db1 = new DB();
$db1->set_sql($sql);
$db1->alter_query();
unset($db1);
}
unset($words);
}
}
$db2 = new DB();
$db2->set_sql("DELETE FROM ${table_name}_dictionary WHERE word=''");
$db2->alter_query();
unset($db);
unset($db2);
}
} // BuildDictionary()
function CreateIndex()
{
$tables = $this->return_tables();
while(list(,$table) = each($tables))
{
extract($table);
$db0 = new DB();
$db0->set_sql("DELETE FROM ${table_name}_index");
$db0->alter_query();
$sql = "SELECT ${table_key},".implode(',',$fields)." FROM $table_name";
$db = new DB();
$db->set_sql($sql);
if($db->select_query())
{
while(list(,$row) = each($db->rows))
{
$string = '';
$key = $row[$table_key];
for($i = 0 ; $i < sizeof($fields) ; ++$i)
{
$string .= $this->FormatString(strip_tags(stripslashes($row[$fields[$i]])));
}
$words = explode(' ',$string);
for($i = 0 ; $i < sizeof($words) ; ++$i)
{
$index[$words[$i]] += 1;
}
while(list($word,$hits) = each($index))
{
$db1 = new DB();
$db1->set_sql("SELECT wordID FROM ${table_name}_dictionary WHERE word='$word'");
if($db1->select_query())
{
$wordID = $db1->rows[0]['wordID'];
$sql = "INSERT INTO ${table_name}_index (wordID,docID,hits)
VALUES ('$wordID','$key','$hits')";
$db2 = new DB();
$db2->set_sql($sql);
$db2->alter_query();
}
unset($db1);
unset($db2);
}
unset($index);
}
}
unset($db);
}
} // CreateIndex()
function GetWordIDs($table)
{
$strings = explode(' ',$this->return_query());
for($i = 0 ; $i <= 3 ; ++$i)
{
$word = $this->FormatString($strings[$i]);
if(!in_array($word,$this->stop_words))
{
$sql = "SELECT wordID FROM ${table}_dictionary WHERE word='$word'";
$db = new DB();
$db->set_sql($sql);
if($db->select_query())
{
$ret[] = $db->rows[0]['wordID'];
}
unset($db);
}
}
return $ret;
} // GetWordIDs()
function BuildSQL($search_tables)
{
$hits = array();
$dict = array();
$indx = array();
$ands = array();
$tables = $this->return_tables();
while(list(,$table) = each($tables))
{
extract($table);
if(in_array($table_name,$search_tables))
{
$wordIDs = $this->GetWordIDs($table_name);
for($i = 0; $i < sizeof($wordIDs) ; ++$i)
{
$hits[] = "I".$i.".hits";
$dict[] = " ${table_name}_dictionary as D".$i." ";
$indx[] = " ${table_name}_index as I".$i." ";
$ands[] = "C.".$table_key."=I".$i.".docID && I".$i.".wordID=D".$i.".wordID && D".$i.".wordID=".$wordIDs[$i]." ";
}
if(is_array($hits) && is_array($dict) && is_array($indx) && is_array($ands))
{
$sql = "SELECT C.*,";
$sql .= "(".implode('+',$hits).") as hits ";
$sql .= "from ".$table_name." as C,".implode(',',$dict).",".implode(',',$indx);
$sql .= " WHERE ".implode(' && ',$ands);
$sql .= " ORDER BY hits DESC";
}
$ret[$table_name] = $sql;
}
unset($sql);
unset($hits);
unset($indx);
unset($dict);
unset($ands);
unset($wordIDs);
}
$this->set_query_sql($ret);
} // BuildSQL()
function FormatString($string)
{
$string = strtolower(strip_tags($string));
$string = ereg_replace("[0-9,.!_/\:#$)(*&^%-+=?;]",' ',$string);
return $string;
} // FormatString()
} // EOF
?>
|