Login   Register  
PHP Classes
elePHPant
Icontem

File: compactor.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Oliver Lillie  >  HTML Compactor  >  compactor.php  >  Download  
File: compactor.php
Role: Class source
Content type: text/plain
Description: Class Source
Class: HTML Compactor
Reduce HTML document size by removing white-spaces
Author: By
Last change: Updated to v0.6.0
Added deflate/gzip compression
Date: 2008-04-30 06:38
Size: 18,667 bytes
 

Contents

Class file image Download
<?php

	/**
	 * @author Oliver Lillie (aka buggedcom) <publicmail@buggedcom.co.uk>
	 *
	 * @license BSD
	 * @copyright Copyright (c) 2008 Oliver Lillie <http://www.buggedcom.co.uk>
	 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
	 * files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
	 * modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
	 * is furnished to do so, subject to the following conditions:  The above copyright notice and this permission notice shall be
	 * included in all copies or substantial portions of the Software.
	 *
	 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
	 * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
	 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
	 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	 *
	 * @name Compactor
	 * @version 0.6.0
	 * @abstract This class can be used in speeding up delivery of webpages front the server to the client browser, by compacting
	 * the whitespace. There are multiple options for compacting, including both horizontal and vertical whitespace removal and
	 * css/javascript compacting also. The class can also compact the output of a php script using automatic output buffering. 
	 *
	 * @example compressor.example1.php Compacts HTML using the default options.
	 * @example compressor.example2.php Compacts remote HTML with custom javascript compression.
	 * @example
	 * <?php
	 * 		// this example will automatically compact any buffered output from the script
	 * 		$compactor = new Compactor(array(
	 * 			'use_buffer'			=> true,
	 * 			'buffer_echo'			=> true,
	 * 			'compact_on_shutdown'	=> true
	 * 		));
	 * ?>
	 *
	 * @note The functions to provide deflate functionality are partially lifted from 
	 * minify http://code.google.com/p/minify/
	 */

	class Compactor
	{

		/**
		 * Holds the options array
		 * @access private
		 * @var array
		 */
		private $_options 			= array(
// 			line_break; string; The type of line break used in the HTML that you are processing.
// 			ie, \r, \r\n, \n or PHP_EOL
			'line_break' 						=> PHP_EOL,
// 			preserved_tags; array; An array of html tags whose innerHTML contents format require preserving.
			'preserved_tags'					=> array('textarea', 'pre', 'script', 'style', 'code'),
// 			preserved_boundry; string; The holding block that is used to replace the contents of the preserved tags
// 			while the compacting is taking place.
			'preserved_boundry'					=> '@@PRESERVEDTAG@@',
// 			use_buffer; boolean; You can optionally use output buffering instead of providing the html for compacting.
			'use_buffer' 						=> false,
// 			buffer_echo; boolean; If after processing you want to output the content set this to true, otherwise it
// 			will be up to you to echo out the compacted html.
			'buffer_echo'						=> true,
// 			compact_on_shutdown; boolean; If 'use_buffer' is enabled and this option is also enalbed it will create a 
// 			register_shutdown_function that will process the buffered output at the end of the script without any hassle.
			'compact_on_shutdown'				=> false,
// 			strip_comments; boolean; This will strip html comments from the html. NOTE, if the below option 'keep_conditional_comments'
// 			is not set to true then conditional Internet Explorer comments will also be stripped.
			'strip_comments' 					=> true,
// 			keep_conditional_comments; boolean; Only applies if the baove option 'strip_comments' is set to true.
// 			Only if the client browser is Internet Explorer then the conditional comments are kept.
			'keep_conditional_comments'			=> true,
// 			conditional_boundries; array; The holding block boudries that are used to replace the opening and
// 			closing tags of the conditional comments.
			'conditional_boundries'				=> array('@@IECOND-OPEN@@', '@@IECOND-CLOSE@@'),
// 			compress_horizontal; boolean; Removes horizontal whitespace of the HTML, ie left to right whitespace (spaces and tabs).
			'compress_horizontal'				=> true,
// 			compress_vertical; boolean; Removes vertical whitespace of the HTML, ie line breaks.		
			'compress_vertical'					=> true,
// 			compress_scripts; boolean; Compresses content from script tags using a simple algorythm. Removes javascript comments,
// 			and horizontal and vertical whitespace. Note as only a simple algorythm is used there are limitations to the script 
// 			and you may want to use a more complex script like 'minify' http://code.google.com/p/minify/ or 'jsmin'
// 			http://code.google.com/p/jsmin-php/ See test3.php for an example.
			'compress_scripts'					=> true,
// 			script_compression_callback; boolean; The name of a callback for custom js compression. See test3.php for an example.	
			'script_compression_callback' 		=> false,
// 			script_compression_callback_args; array; Any additional args for the callback. The javascript will be put to the
// 			front of the array.
			'script_compression_callback_args' 	=> array(),
// 			compress_css; boolean; Compresses CSS style tags.		
			'compress_css'						=> true,
// 			deflates the output using a deflate/gzip method
			'deflate'							=> false
		);
		
		/**
		 * Holds the preserved blocks so multiple scans of the html don't have to be made.
		 * @access private
		 * @var mixed 
		 */
		private $_preserved_blocks  = false;

		/**
		 * Holds which method of delate should be used.
		 * @access private
		 * @var mixed 
		 */
		private $_deflate_encoding = null;

		/**
		 * Constructor
		 */
		function __construct($options=array())
		{
			$this->setOption($options);
			if($this->_options['compact_on_shutdown'])
			{
				$this->setOption(array(
					'use_buffer' => true,
					'buffer_echo' => true
				));
			}
			if($this->_options['use_buffer'])
			{
				ob_start();
			}
			if($this->_options['compact_on_shutdown'])
			{
				register_shutdown_function(array(&$this, 'squeeze'));
			}
		}
		
		/**
		 * Sets an option in the option array();
		 * 
		 * @access public
		 * @param mixed $varname Can take the form of an array of options to set a string of an option name.
		 * @param mixed $varvalue The value of the option you are setting.
		 **/
		public function setOption($varname, $varvalue=null)
		{
			$keys = array_keys($this->_options);
			if(gettype($varname) == 'array')
			{
				foreach($varname as $name=>$value)
				{
					if(in_array($name, $keys))
					{
						$this->_options[$name] = $value;
					}
				}
			}
			else
			{
				if(in_array($varname, $keys))
				{
					$this->_options[$varname] = $varvalue;
				}
			}
		}
		
		/**
		 * Compresses the html, either that is supplied to the function or if the use_buffer
		 * option is enabled then the buffer is grabbed for compression.
		 * 
		 * @access public
		 * @param string $html HTML string required for compression, however if the use_buffer option
		 * 		is enabled the param can be left out because it will be ignored anyway.
		 * @return string
		 */
		public function squeeze($html=null)
		{
			if($this->_options['use_buffer'])
			{
				$html = ob_get_clean();
			}
// 			unify the line breaks so we have clean html to work with
			$html = $this->_unifyLineBreaks($html);
// 			compress any script tags if required
			if($this->_options['compress_scripts'] || $this->_options['compress_css'])
			{
				$html = $this->_compressScriptAndStyleTags($html);
			}
// 			make the compressions
			if($this->_options['strip_comments'])
			{
				$html = $this->_stripHTMLComments($html);
			}
			if($this->_options['compress_horizontal'])
			{
				$html = $this->_compressHorizontally($html);
			}
			if($this->_options['compress_vertical'])
			{
				$html = $this->_compressVertically($html);
			}
// 			replace the preserved blocks with their original content
			$html = $this->_reinstatePreservedBlocks($html);
// 			deflate
			if($this->_options['deflate'])
			{
				$html = $this->_deflate($html);
			}
// 			if the html i to be echoed out then do the echo
			if($this->_options['buffer_echo'])
			{
				header('Content-Length: '.strlen($html));
				if($this->_options['deflate'])
				{
					$this->outputDeflateHeaders($html);
				}
				echo $html;
			}
			return $html;
		}
		
		/**
		 * Strips HTML Comments from the buffer whilst making a check to see if
		 * Inernet Explorer conditional comments should be stripped or not.
		 *
		 * @access private
		 * @param string $html The HTML string for comment removal.
		 * @return string
		 */
		private function _stripHTMLComments($html)
		{
			$keep_conditionals = false;
// 			only process if the Internet Explorer conditional statements are to be kept
			if($this->_options['keep_conditional_comments'])
			{
// 				check that the opening browser is internet explorer
				$msie = '/msie\s(.*).*(win)/i';
			    $keep_conditionals = (isset($_SERVER['HTTP_USER_AGENT']) && preg_match($msie, $_SERVER['HTTP_USER_AGENT']));
// 			    $keep_doctype = false;
// 			    if(strpos($html, '<!DOCTYPE'))
// 			    {
// 					$html = str_replace('<!DOCTYPE', '--**@@DOCTYPE@@**--', $html);
// 			   	 	$keep_doctype = true;
// 			    }
// 			    ie conditionals are to be kept so substitute
				if($keep_conditionals)
				{
					$html = str_replace(array('<!--[if', '<![endif]-->'), $this->_options['conditional_boundries'], $html);
				}
			}			
// 		    remove comments
		    $html = preg_replace('/<!--(.|\s)*?-->/', '', $html);
// 		    $html = preg_replace ('@<![\s\S]*?--[ \t\n\r]*>@', '', $html);
// 		   	re sub-in the conditionals if required.
			if($keep_conditionals)
			{
				$html = str_replace($this->_options['conditional_boundries'], array('<!--[if', '<![endif]-->'), $html);
			}
// 		    if($keep_doctype)
// 		    {
// 				$html = str_replace('--**@@DOCTYPE@@**--', '<!DOCTYPE', $html);
// 		    }
// 			return the buffer
			return $html;
		}
		
		/**
		 * Finds html blocks to preserve the formatting for.
		 * 
		 * @access private
		 * @param string $html
		 * @return string
		 */
		private function _extractPreservedBlocks($html)
		{
			if($this->_preserved_blocks !== false)
			{
				return $html;
			}
 			$tag_string = implode('|', $this->_options['preserved_tags']);
// 			get the textarea matches
			preg_match_all("!<(".$tag_string.")[^>]*>.*?</(".$tag_string.")>!is", $html, $preserved_area_match);
			$this->_preserved_blocks = $preserved_area_match[0];
// 			replace the textareas inerds with markers
			return preg_replace("!<(".$tag_string.")[^>]*>.*?</(".$tag_string.")>!is", $this->_options['preserved_boundry'], $html);
		}
		
		/**
		 * Replaces any preservations made with the original content.
		 * 
		 * @access private
		 * @param string $html
		 * @return string
		 */
		private function _reinstatePreservedBlocks($html)
		{
			if($this->_preserved_blocks === false)
			{
				return $html;
			}
			foreach($this->_preserved_blocks as $curr_block)
			{
				$html = preg_replace("!".$this->_options['preserved_boundry']."!", $curr_block, $html, 1);
			}
			return $html;
		}
		
		/**
		 * Compresses white space horizontally (ie spaces, tabs etc) whilst preserving
		 * textarea and pre content.
		 *
		 * @access private
		 * @param string $html
		 * @return string
		 */
		private function _compressHorizontally($html)
		{
			$html = $this->_extractPreservedBlocks($html);
// 			remove the white space
			$html = preg_replace('/((?<!\?>)'.$this->_options['line_break'].')[\s]+/m', '\1', $html);
// 			Remove extra spaces
			return preg_replace('/\t+/', '', $html);
		}

		/**
		 * Compresses white space vertically (ie line breaks) whilst preserving
		 * textarea and pre content.
		 *
		 * @access private
		 * @param string $html
		 * @param mixed $textarea_blocks false if no textarea blocks have already been taken out, otherwise an array.
		 * @return unknown
		 */
		private function _compressVertically($html)
		{
			$html = $this->_extractPreservedBlocks($html);
// 			remove the line breaks
			return str_replace($this->_options['line_break'], '', $html);
		}
		
		/**
		 * Converts line breaks from the different platforms onto the one type.
		 *
		 * @access private
		 * @param string $html HTML string
		 * @param string $break The format of the line break you want to unify to. ie \r\n or \n
		 * @return string
		 */
		private function _unifyLineBreaks($html)
		{
		    return preg_replace ("/\015\012|\015|\012/", $this->_options['line_break'], $html);
		}
		
		/**
		 * Compresses white space vertically (ie line breaks) whilst preserving
		 * textarea and pre content. This uses the classes '_simpleCodeCompress' to compress
		 * the javascript, however it would be advisable to use another library such as 
		 * 'minify' http://code.google.com/p/minify/ because this function has certain 
		 * limitations with comments and other regex expressions. You can set another function 
		 * callback using the 'compress_js_callback' option.
		 *
		 * @access private
		 * @param string $html
		 * @return string
		 */
		private function _compressScriptAndStyleTags($html)
		{
			$compress_scripts = $this->_options['compress_scripts'];
			$compress_css = $this->_options['compress_css'];
			$use_script_callback = $this->_options['script_compression_callback'] != false;
// 			pregmatch all the script tags
			$scripts = preg_match_all("!(<(style|script)[^>]*>(?:\\s*<\\!--)?)(.*?)((?://-->\\s*)?</(style|script)>)!is", $html, $scriptparts);
// 			collect and compress the parts
			$compressed = array();
			$parts = array();
			for($i=0; $i<count($scriptparts[0]); $i++)
			{
				$code = trim($scriptparts[3][$i]);
				$not_empty = !empty($code);
				$is_script = ($compress_scripts && $scriptparts[2][$i] == 'script');
				if($not_empty && ($is_script || ($compress_css && $scriptparts[2][$i] == 'style')))
				{
					if($is_script && $use_script_callback)
					{
						$callback_args = $this->_options['script_compression_callback_args'];
						if(gettype($callback_args) !== 'array')
						{
							$callback_args = array($callback_args);
						}
						array_unshift($callback_args, $code);
						$minified = call_user_func_array($this->_options['script_compression_callback'], $callback_args);
					}
					else
					{
						$minified = $this->_simpleCodeCompress($code);
					}
					array_push($parts, $scriptparts[0][$i]);
					array_push($compressed, trim($scriptparts[1][$i]).$minified.trim($scriptparts[4][$i]));
				}
			}
// 			do the replacements and return
			return str_replace($parts, $compressed, $html);
		}
		
		/**
		 * Use simple preg_replace to compresses code (ie javascript and css) whitespace.
		 * It would be advisable to use another library such as 'minify' http://code.google.com/p/minify/
		 * because this function has certain limitations with comments and other regex expressions.
		 * You can set another function callback using the 'compress_js_callback' option.
		 *
		 * @access private
		 * @param string $code Code string
		 * @return string
		 **/
		private function _simpleCodeCompress($code)
		{
// 			Remove multiline comment
			$code = preg_replace('/\/\*(?!-)[\x00-\xff]*?\*\//', '', $code);
// 			Remove single line comment
// 			$code = preg_replace('/[^:]\/\/.*/', '', $code);
			$code = preg_replace('/\\/\\/[^\\n\\r]*[\\n\\r]/', '', $code);
			$code = preg_replace('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', '', $code);
// 			Remove extra spaces
			$code = preg_replace('/\s+/', ' ', $code);
// 			Remove spaces that can be removed
			return preg_replace('/\s?([\{\};\=\(\)\/\+\*-])\s?/', "\\1", $code);
		}
		
	    /**
	     * Determine the client's best encoding method from the HTTP Accept-Encoding 
	     * header.
	     * @access private
	     * @return array two values, 1st is the actual encoding method, 2nd is the
	     * alias of that method to use in the Content-Encoding header (some browsers
	     * call gzip "x-gzip" etc.)
	     */
		private function _getAcceptedEncoding()
		{
			if (! isset($_SERVER['HTTP_ACCEPT_ENCODING']) || $this->_isBrokenInternetExplorer())
			{
				return array('', '');
			}
			if (preg_match('@(?:^|,)\s*((?:x-)?gzip)\s*(?:$|,|;\s*q=(?:0\.|1))@', $_SERVER['HTTP_ACCEPT_ENCODING'], $matches))
			{
				return array('gzip', $matches[1]);
			}
			if (preg_match('@(?:^|,)\s*deflate\s*(?:$|,|;\s*q=(?:0\.|1))@', $_SERVER['HTTP_ACCEPT_ENCODING']))
			{
				return array('deflate', 'deflate');
			}
			if (preg_match('@(?:^|,)\s*((?:x-)?compress)\s*(?:$|,|;\s*q=(?:0\.|1))@', $_SERVER['HTTP_ACCEPT_ENCODING'], $matches))
			{
				return array('compress', $matches[1]);
			}
			return array('', '');
		}
				
	    /**
	     * Determines if the client is a buggy version of Internet Explorer
	     * @access private
	     * @return boolean
	     */
		private function _isBrokenInternetExplorer()
		{
			if (strstr($_SERVER['HTTP_USER_AGENT'], 'Opera') || !preg_match('/^Mozilla\/4\.0 \(compatible; MSIE ([0-9]\.[0-9])/i', $_SERVER['HTTP_USER_AGENT'], $matches))
			{
				return false;
			}
			$version = floatval($matches[1]);
			return $version < 6 || ($version == 6 && !strstr($_SERVER['HTTP_USER_AGENT'], 'SV1'));
		}
		
	    /**
	     * Determines if the client is a buggy version of Internet Explorer
	     * @access private
	     * @param string $html The html to deflate.
	     * @return string Returns html on encoding failure or compressed data on success
	     */
		private function _deflate($html)
		{
			$encoding = $this->_getAcceptedEncoding();
			$this->_deflate_encoding = $encoding[1];
			$deflate_level = is_bool($this->_options['deflate']) ? 6 : $this->_options['deflate'];
			if (empty($encoding[0]) || !$deflate_level || !extension_loaded('zlib'))
			{
				return $html;
			}
			if ($encoding[0] === 'gzip')
			{
				$encoded = gzencode($html, $deflate_level);
			}
			else if ($encoding[0] === 'deflate')
			{
				$encoded = gzdeflate($html, $deflate_level);
			} 
			else
			{
				$encoded = gzcompress($html, $deflate_level);
			}
			return $encoded === false ? $html : $encoded;
		}
		
		/**
		 * Outputs the deflated headers necessary for sending
		 * delflated content.
		 * @access public
		 * @param string $html The html/delfated data that has been encoded.
		 */
		public function outputDeflateHeaders($html)
		{
			header('Content-Length: '.strlen($html));
			header('Content-Encoding: '.$this->_deflate_encoding);
			header('Vary: Accept-Encoding');
		}

		
	}