Login   Register  
PHP Classes
elePHPant
Icontem

File: Sql_Lexer.class.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Tom Schaefer  >  SQL Parse and Compile  >  Sql_Lexer.class.php  >  Download  
File: Sql_Lexer.class.php
Role: Class source
Content type: text/plain
Description: tokenizes a sql string
Class: SQL Parse and Compile
Parse and compose SQL queries programatically
Author: By
Last change: update
Date: 2009-02-06 00:36
Size: 19,535 bytes
 

Contents

Class file image Download
<?php

/**
 * PHP ctype compatibility functions. See the PHP ctype module for more
 * information on usage.
 *
 * @author John Millaway
 * @author Brent Cook
 * @author Thomas Schaefer
 * 
 * Note: These functions expect an integer argument, like the C versions
 * To use with a PHP character, use ord($c). These functions do not support
 * string arguments like their PHP extension counterparts
 */
if (!extension_loaded('ctype')) {
    function ctype_alnum($c) {
        static $ctype__;
        return ($ctype__[$c] & 7); // (1 | 2 | 4)
    }
    function ctype_alpha($c) {
        static $ctype__;
        return ($ctype__[$c] & 3); // (1 | 2)
    }
    function ctype_cntrl($c) {
        static $ctype__;
        return ($ctype__[$c] & 40);
    }
    function ctype_digit($c) {
        static $ctype__;
        return ($ctype__[$c] & 4);
    }
    function ctype_graph($c) {
        static $ctype__;
        return ($ctype__[$c] & 27); // (20 | 1 | 2 | 4)
    }
    function ctype_lower($c) {
        static $ctype__;
        return ($ctype__[$c] & 2);
    }
    function ctype_print($c) {
        static $ctype__;
        return ($ctype__[$c] & 227); // (20 | 1 | 2 | 4 | 200)
    }
    function ctype_punct($c) {
        static $ctype__;
        return ($ctype__[$c] & 20);
    }
    function ctype_space($c) {
        static $ctype__;
        return ($ctype__[$c] & 10);
    }
    function ctype_upper($c) {
        static $ctype__;
        return ($ctype__[$c] & 1);
    }
    function ctype_xdigit($c) {
        static $ctype__;
        return ($ctype__[$c] & 104); // (100 | 4));
    }
    $ctype__ = array(
    	  32,32,32,32,32,32,32,32,32,40,40,40,40,40,32,32,32,32,32,32,32,32,32,
          32,32,32,32,32,32,32,32,32,-120,16,16,16,16,16,16,16,16,16,16,16,16,
          16,16,16,4,4,4,4,4,4,4,4,4,4,16,16,16,16,16,16,16,65,65,65,65,65,65,
          1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,16,16,16,16,16,16,66,66,66,
          66,66,66,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,16,16,16,16,32,0,0,
          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
}

// {{{ token definitions
// variables: 'ident', 'sys_var'
// values:    'real_val', 'text_val', 'int_val', null
// }}}

final class Sql_Lexer
{
	// array of valid tokens for the lexer to recognize
	// format is 'token literal'=>TOKEN_VALUE
	public $symbols = array();

	// {{{ instance variables
	public $tokPtr = 0;
	public $tokStart = 0;
	public $tokLen = 0;
	public $tokText = '';
	public $lineNo = 0;
	public $lineBegin = 0;
	public $string = '';
	public $stringLen = 0;

	// Will not be altered by skip()
	public $tokAbsStart = 0;
	public $skipText = '';

	// Provide lookahead capability.
	public $lookahead = 3;
	// Specify how many tokens to save in tokenStack, so the
	// token stream can be pushed back.
	public $tokenStack = array();
	public $stackPtr = 0;
	// }}}

	// {{{ incidental functions
	public function __construct($string = '', $lookahead=0)
	{
		$this->string = $string;
		$this->stringLen = strlen($string);
		$this->lookahead = $lookahead;
	}

	public function get() {
		++$this->tokPtr;
		++$this->tokLen;
		return ($this->tokPtr <= $this->stringLen) ? $this->string{$this->tokPtr - 1} : null;
	}

	public function unget() {
		--$this->tokPtr;
		--$this->tokLen;
	}

	public function skip() {
		++$this->tokStart;
		return ($this->tokPtr != $this->stringLen) ? $this->string{$this->tokPtr++} : '';
	}

	public function revert() {
		$this->tokPtr = $this->tokStart;
		$this->tokLen = 0;
	}

	public function isCompop($c) {
		return (($c == '<') || ($c == '>') || ($c == '=') || ($c == '!'));
	}
	// }}}

	// {{{ pushBack()
	/*
	* Push back a token, so the very next call to lex() will return that token.
	* Calls to this function will be ignored if there is no lookahead specified
	* to the constructor, or the pushBack() function has already been called the
	* maximum number of token's that can be looked ahead.
	*/
	public function pushBack()
	{
		if($this->lookahead>0 && count($this->tokenStack)>0 && $this->stackPtr>0) {
			$this->stackPtr--;
		}
	}
	// }}}
	
	// {{{ lex()
	public function lex()
	{
		if($this->lookahead>0) {
			// The stackPtr, should always be the same as the count of
			// elements in the tokenStack.  The stackPtr, can be thought
			// of as pointing to the next token to be added.  If however
			// a pushBack() call is made, the stackPtr, will be less than the
			// count, to indicate that we should take that token from the
			// stack, instead of calling nextToken for a new token.
			if ($this->stackPtr<count($this->tokenStack)) {

				$this->tokText = $this->tokenStack[$this->stackPtr]['tokText'];
				$this->skipText = $this->tokenStack[$this->stackPtr]['skipText'];
				$token = $this->tokenStack[$this->stackPtr]['token'];

				// We have read the token, so now iterate again.
				$this->stackPtr++;
				return $token;

			} else {

				// If $tokenStack is full (equal to lookahead), pop the oldest
				// element off, to make room for the new one.

				if ($this->stackPtr == $this->lookahead) {
					// For some reason array_shift and
					// array_pop screw up the indexing, so we do it manually.
					for($i=0; $i<(count($this->tokenStack)-1); $i++) {
						$this->tokenStack[$i] = $this->tokenStack[$i+1];
					}

					// Indicate that we should put the element in
					// at the stackPtr position.
					$this->stackPtr--;
				}

				$token = $this->nextToken();
				$this->tokenStack[$this->stackPtr] =
				array('token'=>$token,
                      'tokText'=>$this->tokText,
                      'skipText'=>$this->skipText);
				$this->stackPtr++;
				return $token;
			}
		}
		else
		{
			return $this->nextToken();
		}
	}
	// }}}

	public function getToken() {
		return array("type"=>$this->token,"value"=>$this->tokText,"line"=>$this->lineNo,"charno"=>$this->tokAbsStart);	
	}
	
    public function hasNextToken()
    {
    	$tokPtr = $this->tokPtr;
    	$tokStart = $this->tokStart;
    	$tokText = $this->tokText;
    	$tokLen = $this->tokLen;
    	$tokAbsStart = $this->tokAbsStart;
    	
    	$this->lex();    	
    	
    	$tok = ($this->tokText!="*end of input*")?true:false;
    	
    	if($tok){
	    	$this->tokPtr = $tokPtr;
	    	$this->tokStart = $tokStart;
	    	$this->tokText = $tokText;
	    	$this->tokAbsStart = $tokAbsStart;
	    	$this->tokStack=null;
    	} 
    	return $tok;
    }
    
	/**
	* nextTokenIs
	* @param string $token
	* @param bool $return
	* @return mixed
	*/
    public function nextTokenIs($token , $return=false)
    {
    	$tokPtr = $this->tokPtr;
    	$tokStart = $this->tokStart;
    	$tokText = $this->tokText;
    	$tokLen = $this->tokLen;
    	$tokAbsStart = $this->tokAbsStart;
    	
    	$nextToken = $this->nextToken();    	
    	$tok = ($nextToken==$token)?true:false;

		if($return and $tok) {
    	    return $nextToken;
    	}
    	
    	if($tok) {
	    	$this->tokPtr = $tokPtr;
	    	$this->tokStart = $tokStart;
	    	$this->tokText = $tokText;
	    	$this->tokAbsStart = $tokAbsStart;
	    	$this->tokStack=null;
    	} 
    	return $tok;
    }

    public function lookaheadToken()
    {
    	$tokPtr = $this->tokPtr;
    	$tokStart = $this->tokStart;
    	$tokText = $this->tokText;
    	$tokLen = $this->tokLen;
    	$tokAbsStart = $this->tokAbsStart;
    	
    	$nextToken = $this->nextToken();    	
    	$this->tokPtr = $tokPtr;
    	$this->tokStart = $tokStart;
    	$this->tokText = $tokText;
    	$this->tokAbsStart = $tokAbsStart;
    	$this->tokStack=null;
    	return $nextToken;
    }

    public function tokenIsNot($token)
    {
    	$tokens = is_array($token) ? $token : array($token);
    	$check=true;
    	foreach($tokens as $tok) {
    		if(Sql_Object::token()==$tok){
    			$check=false;
    		}
    	}
    	return $check;
    }

    public function tokenIs($token)
    {
    	$tokens = is_array($token) ? $token : array($token);
    	$check=false;
    	foreach($tokens as $tok) {
    		if(Sql_Object::token()==$tok){
    			$check=true;
    		}
    	}
    	return $check;
    }

	/**
	* nextTextIs
	* @param string $token
	* @param bool $return
	* @return mixed
	*/
    public function nextTextIs($token , $return=false)
    {
    	$tokPtr = $this->tokPtr;
    	$tokStart = $this->tokStart;
    	$tokText = $this->tokText;
    	$tokLen = $this->tokLen;
    	$tokAbsStart = $this->tokAbsStart;
    	
    	$this->nextToken();
    	    	
    	$tok = ($this->tokText==$token)?true:false;    	
    	if($return and $tok) {
    	    return $this->tokText;
    	}
    	
    	if($tok){
	    	$this->tokPtr = $tokPtr;
	    	$this->tokStart = $tokStart;
	    	$this->tokText = $tokText;
	    	$this->tokAbsStart = $tokAbsStart;
	    	$this->tokStack=null;
    	} 
    	return $tok;
    }

	// {{{ nextToken()
	public function nextToken()
	{
		if ($this->string == '') return;
		$state = 0;
		$this->tokAbsStart = $this->tokStart;

		while (true){
			//echo "State: $state, Char: $c\n";
			switch($state) {
				// {{{ State 0 : Start of token
				case 0:
					$this->tokPtr = $this->tokStart;
					$this->tokText = '';
					$this->tokLen = 0;
					$c = $this->get();

					if (is_null($c)) { // End Of Input
						$state = 1000;
						break;
					}

					while (($c == ' ') || ($c == "\t")
					|| ($c == "\n") || ($c == "\r")) {
						if ($c == "\n" || $c == "\r") {
							// Handle MAC/Unix/Windows line endings.
							if($c == "\r") {
								$c = $this->skip();

								// If not DOS newline
								if($c != "\n")
								$this->unget();
							}
							++$this->lineNo;
							$this->lineBegin = $this->tokPtr;
						}

						$c = $this->skip();
						$this->tokLen = 1;
					}

					// Escape quotes and backslashes
					if ($c == '\\') {
						$t = $this->get();
						if ($t == '\'' || $t == '\\' || $t == '"') {
							$this->tokText = $t;
							$this->tokStart = $this->tokPtr;
							return $this->tokText;
						} else {
							$this->unget();

							// Unknown token.  Revert to single char
							$state = 999;
							break;
						}
					}

					if (($c == '\'') || ($c == '"')) { // text string
						$quote = $c;
						$state = 12;
						break;
					}

					if ($c == '_') { // system variable
						$state = 18;
						break;
					}

					if (ctype_alpha(ord($c)) || ($c == '`')) { // keyword or ident
						$state = 1;
						break;
					}

					if (ctype_digit(ord($c))) { // real or int number
						$state = 5;
						break;
					}

					if ($c == '.') {
						$t = $this->get();
						if ($t == '.') { // ellipsis
							if ($this->get() == '.') {
								$this->tokText = '...';
								$this->tokStart = $this->tokPtr;
								return $this->tokText;
							} else {
								$state = 999;
								break;
							}
						} else if (ctype_digit(ord($t))) { // real number
							$this->unget();
							$state = 7;
							break;
						} else { // period
							$this->unget();
						}
					}

					if ($c == '#') { // Comments
						$state = 14;
						break;
					}
					if ($c == '-') {
						$t = $this->get();
						if ($t == '-') {
							$state = 14;
							break;
						} elseif ($t == ' ') {
							$state = 15;
							break;
						} elseif (is_numeric( $t )) {
							$state = 15;
							break;
						} elseif (ord($t)==32 ) {
							$state = 16;
							break;
						} else { // negative number
							$this->unget();
							$state = 5;
							break;
						}
					}

					if ($c == '+') {
						$t = $this->get();
						if ($t == '+') {
							$state = 14;
							break;
						} elseif ($t == ' ') {
							$state = 15;
							break;
						} elseif (is_numeric( $t )) {
							$state = 15;
							break;
						}
					}

					if ($this->isCompop($c)) { // comparison operator
						$state = 10;
						break;
					}
					// Unknown token.  Revert to single char
					$state = 999;
					break;
					// }}}

					// {{{ State 1 : Incomplete keyword or ident
				case 1:
					$c = $this->get();
					if (ctype_alnum(ord($c)) || ($c == '_') || ($c == '.') || ($c == '`')) {
						$state = 1;
						break;
					}
					$state = 2;
					break;
					// }}}

					/* {{{ State 2 : Complete keyword or ident */
				case 2:
					$this->unget();
					$this->tokText = substr($this->string, $this->tokStart,
					$this->tokLen);

					$testToken = strtolower($this->tokText);
					if (isset($this->symbols[$testToken])) {

						$this->skipText = substr($this->string, $this->tokAbsStart,
						$this->tokStart-$this->tokAbsStart);
						$this->tokStart = $this->tokPtr;
						return $testToken;
					} else {
						$this->skipText = substr($this->string, $this->tokAbsStart,
						$this->tokStart-$this->tokAbsStart);
						$this->tokStart = $this->tokPtr;
						return 'ident';
					}
					break;
					// }}}

					// {{{ State 5: Incomplete real or int number
				case 5:
					$c = $this->get();
					if (ctype_digit(ord($c))) {
						$state = 5;
						break;
					} else if ($c == '.') {
						$t = $this->get();
						if($t == '.') { // ellipsis
							$this->unget();
						} else { // real number
							$state = 7;
							break;
						}
					} else if(ctype_alpha(ord($c))) { // number must end with non-alpha character
						$state = 999;
						break;
					} else {
						// complete number
						$state = 6;
						break;
					}
					// }}}

					// {{{ State 6: Complete integer number
				case 6:
					$this->unget();
					$this->tokText = intval(substr($this->string, $this->tokStart,
					$this->tokLen));
					$this->skipText = substr($this->string, $this->tokAbsStart,
					$this->tokStart-$this->tokAbsStart);
					$this->tokStart = $this->tokPtr;
					return 'int_val';
					break;
					// }}}

					// {{{ State 7: Incomplete real number
				case 7:
					$c = $this->get();

					/* Analogy Start */
					if ($c == 'e' || $c == 'E') {
						$state = 15;
						break;
					}
					/* Analogy End   */

					if (ctype_digit(ord($c))) {
						$state = 7;
						break;
					}
					$state = 8;
					break;
					// }}}

					// {{{ State 8: Complete real number */
				case 8:
					$this->unget();
					$this->tokText = floatval(substr($this->string, $this->tokStart,
					$this->tokLen));
					$this->skipText = substr($this->string, $this->tokAbsStart,
					$this->tokStart-$this->tokAbsStart);
					$this->tokStart = $this->tokPtr;
					return 'real_val';
					// }}}

					// {{{ State 10: Incomplete comparison operator
				case 10:
					$c = $this->get();
					if ($this->isCompop($c))
					{
						$state = 10;
						break;
					}
					$state = 11;
					break;
					// }}}

					// {{{ State 11: Complete comparison operator
				case 11:
					$this->unget();
					$this->tokText = substr($this->string, $this->tokStart,
					$this->tokLen);
					if($this->tokText) {
						$this->skipText = substr($this->string, $this->tokAbsStart,
						$this->tokStart-$this->tokAbsStart);
						$this->tokStart = $this->tokPtr;
						return $this->tokText;
					}
					$state = 999;
					break;
					// }}}

					// {{{ State 12: Incomplete text string
				case 12:
					$bail = false;
					while (!$bail) {
						switch ($this->get()) {
							case '':
								$this->tokText = null;
								$bail = true;
								break;
							case "\\":
								if (!$this->get()) {
									$this->tokText = null;
									$bail = true;
								}
								//$bail = true;
								break;
							case $quote:
								$this->tokText = stripslashes(substr($this->string,
								($this->tokStart+1), ($this->tokLen-2)));
								$bail = true;
								break;
						}
					}
					if (!is_null($this->tokText)) {
						$state = 13;
						break;
					}
					$state = 999;
					break;
					// }}}

					// {{{ State 13: Complete text string
							case 13:
								$this->skipText = substr($this->string, $this->tokAbsStart,
								$this->tokStart-$this->tokAbsStart);
								$this->tokStart = $this->tokPtr;
								return 'text_val';
								break;
								// }}}

								// {{{ State 14: Comment
							case 14:
								$c = $this->skip();
								if ($c == "\n" || $c == "\r" || $c == "") {
									// Handle MAC/Unix/Windows line endings.
									if ($c == "\r") {
										$c = $this->skip();
										// If not DOS newline
										if ($c != "\n") {
											$this->unget();
										}
									}

									if ($c != "") {
										++$this->lineNo;
										$this->lineBegin = $this->tokPtr;
									}

									// We need to skip all the text.
									$this->tokStart = $this->tokPtr;
									$state = 0;
								} else {
									$state = 14;
								}
								break;
								// }}}

								// {{{ State 15: Exponent Sign in Scientific Notation
							case 15:
								$c = $this->get();
								if($c == '-' || $c == '+' || $c == '/'  || $c == '*') {
									$state = 16;
									break;
								}
								$state = 999;
								break;
								// }}}

								// {{{ state 16: Exponent Value-first digit in Scientific Notation
							case 16:
								$c = $this->get();
								if (ctype_digit(ord($c))) {
									$state = 17;
									break;
								}
								$state = 999;  // if no digit, then token is unknown
								break;
								// }}}

								// {{{ State 17: Exponent Value in Scientific Notation
							case 17:
								$c = $this->get();
								if (ctype_digit(ord($c))) {
									$state = 17;
									break;
								}
								$state = 8;  // At least 1 exponent digit was required
								break;
								// }}}

								// {{{ State 18 : Incomplete System Variable
							case 18:
								$c = $this->get();
								if (ctype_alnum(ord($c)) || $c == '_') {
									$state = 18;
									break;
								}
								$state = 19;
								break;
								// }}}

								// {{{ State 19: Complete Sys Var
							case 19:
								$this->unget();
								$this->tokText = substr($this->string, $this->tokStart,
								$this->tokLen);
								$this->skipText = substr($this->string, $this->tokAbsStart,
								$this->tokStart-$this->tokAbsStart);
								$this->tokStart = $this->tokPtr;
								return 'sys_var';
								// }}}

								// {{{ State 999 : Unknown token.  Revert to single char
							case 999:
								$this->revert();
								$this->tokText = $this->get();
								$this->skipText = substr($this->string, $this->tokAbsStart,
								$this->tokStart-$this->tokAbsStart);
								$this->tokStart = $this->tokPtr;
								return $this->tokText;
								// }}}

								// {{{ State 1000 : End Of Input
							case 1000:
								$this->tokText = "*end of input*";
								$this->skipText = substr($this->string, $this->tokAbsStart,
								$this->tokStart-$this->tokAbsStart);
								$this->tokStart = $this->tokPtr;
								return null;
								// }}}
			}
		}
	}
	// }}}
}