<?php
/***************************************************************************
* lib_dictionary.php (ver. 1.1.1)
* Copyright (c) 2007 David Frendin (david.frendin@gmail.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version. See the GNU General Public License
* for more details.
*
***************************************************************************/
/***************************************************************************
* Description:
* This library checks if a word is wrongly spelled, and makes 3 (or less/more) suggestions
* to what might be the word the user ment.
*
* This spell lexicon uses mysql to store/load dictionary data and similar_text / metaphone
* to determine if a dictornary word might be a suitable correction.
*
* The lib_dictionary library does _not_ require pspell or aspell, or any external
* applications or dictionaries.
*
* Credits to:
* Myself for writing it, Reza Saleh (zaalion@yahoo.com) for inspiration and english wordlist,
* Oxymoron (php portalen) for optimization and you for reading it!
*
***************************************************************************/
class dictclass
{
var $is_loaded;
var $dictionary;
function spell_phrase($phrase, $debug_data = false)
{
if ($debug_data)
{
$t1 = microtime(true);
$t2 = microtime(true);
$t1 = microtime(true);
}
$words = explode(" ", $phrase);
foreach ( $words as $word )
{
$word = ereg_replace("[^A-Za-z0-9]", "", $word); //remove any special characters - makes
//it incompatible with non-english languages
//... done to remove quotation marks, comas, dots etc
$word = strip_tags($word);
if ($this->does_word_exist($word) == -1)
{
$phrase = str_replace($word, "<span style=\"color: #aa0000; font-weight: bold;\">$word</span>", $phrase);
}
}
if ($debug_data)
{
$t2 = microtime(true);
$phrase = $phrase . " " . sprintf('%.1f', ($t2 - $t1)*100 ) . "ms</b> (".sprintf('%.2f', ($t2 - $t1) )."s)";
}
return $phrase;
}
//
// checkhighest
// checks all values in dictionary array against the word.
// # returns: -1 on empty, otherwise an sorted array with the top possible correct words, ranging from $match[0] and up
//
function checkhighest($word, $max = 2)
{
if (empty($this->dictionary))
return -1;
$cnt = 0;
foreach ($this->dictionary as $lookup)
{
similar_text($lookup['word'], $word, $p);
if ($p > $match[$cnt]['p'])
{
if ($cnt < $max)
$cnt++;
for ($i=0; $i<=$cnt; $i++)
{
if ($p > $match[$i]['p'])
{
$match[$i]['word'] = $lookup['word'];
$match[$i]['p'] = $p;
break;
}
}
}
}
return $match;
}
//
// load_dictionary
// does a search for possible correct words based on word lenght and metaphone.
// # returns: -1 if not found any, or array containing all found words
//
function load_dictionary($word, $size=1)
{
global $db;
$mphone = metaphone($word);
if (strlen($mphone) > 3)
$mphone = substr($mphone, 1, strlen($mphone)-2);
$min = strlen($word) - $size-1;
$max = strlen($word) + $size;
if ($min < 1)
$min = 1;
$sql = "SELECT * FROM " . DICTIONARY_TABLE . " WHERE len BETWEEN {$min} AND {$max} AND (metaphone LIKE '{$mphone}%' OR metaphoner LIKE concat(reverse('{$mphone}'), '%'))";
$result = $db->sql_query($sql) or die("error");
$cnt = 0;
$cnt = $db->sql_numrows($result);
if ($cnt == 0)
{
$this->is_loaded = false;
return false;
}
else
{
$this->is_loaded = true;
$this->dictionary = $db->sql_fetchrowset($result);
return $row;
}
}
//
// does_word_exist
// connects to db to determine if the word exists in the dictionary (if so, the word is correctly spelled)
// # returns: -1 on false, or a single-dimentional array from the db record
//
function does_word_exist($word)
{
global $db;
$sql="SELECT * FROM " . DICTIONARY_TABLE . " WHERE ( word = '".strtolower($word)."' )";
$result = $db->sql_query($sql) or die("sql could not connect [does_word_exist]");
if ($db->sql_numrows($result) == 0)
return -1;
else
return $row;
}
//
// add_from_file
// add new words from a file
//
function add_from_file($path)
{
global $db;
$spc = array("\r\n", "\n", "\r");
set_time_limit(400); //it takes time to process large wordlists
$handle = @fopen($path, "r");
if ($handle)
{
while (!feof($handle))
{
$line = fgets($handle, 4096);
$line = str_replace($spc, "", $line);
$line = trim($line);
$line = strtolower($line);
$exists = $this->does_word_exist(mysql_real_escape_string($line));
if ($exists == -1)
{
$sql = "INSERT INTO " . DICTIONARY_TABLE . " (word, metaphone, len, metaphoner) VALUES (\"".mysql_real_escape_string($line)."\", \"".metaphone($line)."\", \"strlen($line)\", \"".metaphone($line)."\")";
echo "adding: $line<br>";
$result = $db->sql_query($sql) or die("could not connect to db [add_from_file]");
}
}
fclose($handle);
echo "added wordlist";
}
}
}
?>
|