<?php namespace eMacros;
use eMacros\Exception\ParseException;
class Parser { /** * Validated symbols * @var array */ public static $map = array(); /* * SYMBOL REGEX */ const PARENTHESES = '()'; const COMMENT_PREFIX = ';'; const WHITESPACES = " \t\n\r\f\v\0"; /** * Float validation regex * @var string */ const REAL_PATTERN = '{^[+-]?((\d+|(\d*\.\d+|\d+\.\d*))e[+-]?\d+|\d*\.\d+|\d+\.\d*)}i'; /** * Integer validation regex * @var unknown */ const INTEGER_PATTERN = '/^([+-]?)(0x([0-9a-f]+)|0([0-7]+)|[1-9]\d*|0)/i'; /** * String validation regex * @var string */ const STRING_PATTERN = '/^"([^"\\\\]|\\\\.)*"|^\'([^\'\\\\]|\\\\.)*\'/'; /** * Escape string replacement regex * @var string */ const STRING_ESCAPE_PATTERN = '/\\\\(([0-7]{1,3})|x([0-9A-Fa-f]{1,2})|.)/'; /** * Symbol validation regex * @var string */ const SYMBOL_PATTERN = '{^[^\s\d(){}\[\]"\';][^\s\'"(){}\[\];]*}'; /** * Parses a program * @param string $program * @throws ParseException * @return \eMacros\GenericList */ public static function parse($program) { $i = 0; $len = strlen($program); $forms = array(); while ($i < $len) { if (strpos(self::WHITESPACES, $program[$i]) === false) { try { $form = self::parseExpression(substr($program, $i), $offset); if (!is_null($form)) $forms[] = $form; } catch (ParseException $e) { throw new ParseException($program, $e->offset + $i); } $i += $offset; } else { ++$i; } } return $forms; } /** * Parses an inner expression * @param string $form * @param int $offset * @throws ParseException * @return mixed */ public static function parseExpression($form, &$offset) { static $parentheses = null; if (is_null($parentheses)) { $_parentheses = self::PARENTHESES; $parentheses = array(); for ($i = 0, $len = strlen($_parentheses); $i < $len; $i += 2) { $parentheses[$_parentheses[$i]] = $_parentheses[$i + 1]; } unset($_parentheses); } if (isset($form[0], $parentheses[$form[0]])) { $end = $parentheses[$form[0]]; $values = array(); $i = 1; $len = strlen($form); while ($i < $len && $form[$i] != $end) { if (strpos(self::WHITESPACES, $form[$i]) !== false) { ++$i; continue; } try { $values[] = self::parseExpression(substr($form, $i), $_offset); $i += $_offset; } catch (ParseException $e) { throw new ParseException($form, $i + $e->offset); } } if (isset($form[$i]) && $form[$i] == $end) { $offset = $i + 1; return new GenericList($values); }
throw new ParseException($form, $i); } elseif (isset($form[0]) && $form[0] == self::COMMENT_PREFIX) { $offset = strlen(strtok($form, "\n")); return null; } elseif (preg_match(self::REAL_PATTERN, $form, $matches)) { $offset = strlen($matches[0]); return new Literal((float) $matches[0]); } elseif (preg_match(self::INTEGER_PATTERN, $form, $matches)) { $offset = strlen($matches[0]); $sign = $matches[1] == '-' ? -1 : 1; $value = !empty($matches[3]) ? hexdec($matches[3]) : (!empty($matches[4]) ? octdec($matches[4]) : $matches[2]); return new Literal($sign * $value); } elseif (preg_match(self::STRING_PATTERN, $form, $matches)) { list($parsed) = $matches; $offset = strlen($parsed); return new Literal(preg_replace_callback(self::STRING_ESCAPE_PATTERN, array(__CLASS__, 'unescapeString'), substr($parsed, 1, -1))); } elseif (preg_match(self::SYMBOL_PATTERN, $form, $matches)) { $symbol = $matches[0]; $offset = strlen($matches[0]); //store validated symbols in order to reduce checks if (array_key_exists($symbol, self::$map)) { return self::$map[$symbol]; } self::$map[$symbol] = new Symbol($symbol); return self::$map[$symbol]; } throw new ParseException($form, 0); } /** * Replaces special characters in a string * @param array $matches * @return string */ protected static function unescapeString($matches) { static $map = array('n' => "\n", 'r' => "\r", 't' => "\t", 'v' => "\v", 'f' => "\f"); if (!empty($matches[2])) { return chr(octdec($matches[2])); } elseif (!empty($matches[3])) { return chr(hexdec($matches[3])); } elseif (isset($map[$matches[1]])) { return $map[$matches[1]]; } return $matches[1]; } /** * Flushes validated symbols table */ public static function flush() { self::$map = array(); } } ?>
|