<?php
/**
* Transform the awful HTML of Flash into standardized HTML.
*/
class Flash2HTML {
private $parser;
private $nodes = array();
private $entities = array();
private $content = NULL;
private $dataHandler = NULL;
public $plainText = FALSE;
public $protectEmail = FALSE;
public function Flash2HTML() {
$this->setTagTransformation('B', 'strong');
$this->setTagTransformation('I', 'em');
$this->setTagTransformation('LI', 'li');
$this->setStartTagHandler('A', array($this, "a_startTagHandler"));
$this->setStartTagHandler('FONT', array($this, "font_startTagHandler"));
$this->setStartTagHandler('IMG', array($this, "img_startTagHandler"));
$this->setStartTagHandler("P", array($this, "p_startTagHandler"));
$this->setEndTagHandler("P", array($this, "p_endTagHandler"));
$this->setStartTagHandler('TEXTFORMAT', array($this, "textformat_startTagHandler"));
$this->setStartTagHandler('U', array($this, "u_startTagHandler"));
}
/**
*
* @param String $data
* @param Array $properties
* @return String
*/
public function __invoke($data, $properties = array()) {
return $this->html($data, $properties);
}
/**
*
* @param String $tag
*/
public function ignoreTag($tag) {
$offset = array_search($tag, array_keys($this->entities));
if ($offset !== FALSE) {
array_splice($this->entities, $offset, 1);
}
}
/**
*
* @param String $from_tag
* @param String $to_tag
*/
public function setTagTransformation($from_tag, $to_tag) {
if (preg_match("/^\w+$/", $to_tag, $matches)) {
$this->entities[$from_tag]["tag"] = $matches[0];
$this->entities[$from_tag]["attributes"] = NULL;
$this->entities[$from_tag]["closed"] = FALSE;
$this->entities[$from_tag]["start_tag_handler"] = NULL;
$this->entities[$from_tag]["data_tag_handler"] = NULL;
$this->entities[$from_tag]["end_tag_handler"] = NULL;
} else
if (preg_match("/^<(\w+)\s+(.*)(\/?)>$/U", $to_tag, $matches)) {
$this->entities[$from_tag]["tag"] = $matches[1];
$this->entities[$from_tag]["attributes"] = $matches[2];
$this->entities[$from_tag]["closed"] = $matches[3] == "/";
$this->entities[$from_tag]["start_tag_handler"] = NULL;
$this->entities[$from_tag]["data_tag_handler"] = NULL;
$this->entities[$from_tag]["end_tag_handler"] = NULL;
}
}
/**
*
* @param String $tag
* @param Function $start_tag_handler
*/
public function setStartTagHandler($tag, $start_tag_handler) {
if (!array_key_exists($tag, $this->entities)) {
$this->setTagTransformation($tag, strtolower($tag));
}
$this->entities[$tag]["start_tag_handler"] = $start_tag_handler;
}
/**
*
* @param String $tag
* @param Function $data_handler
*/
public function setDataHandler($data_handler) {
$this->dataHandler = $data_handler;
}
/**
*
* @param String $tag
* @param Function $end_tag_handler
*/
public function setEndTagHandler($tag, $end_tag_handler) {
if (!array_key_exists($tag, $this->entities)) {
$this->setTagTransformation($tag, strtolower($tag));
}
$this->entities[$tag]["end_tag_handler"] = $end_tag_handler;
}
public function removeStartTagHandler($tag) {
$this->entities[$tag]["start_tag_handler"] = NULL;
}
public function removeDataTagHandler($tag) {
$this->entities[$tag]["data_tag_handler"] = NULL;
}
public function removeEndTagHandler($tag) {
$this->entities[$tag]["end_tag_handler"] = NULL;
}
private function p_startTagHandler($parser, $tag, $attrs) {
return NULL;
}
private function p_endTagHandler($parser, $tag) {
return "<br />\n";
}
/**
*
* @param String $data
* @param Array $properties
* @return String
*/
public function html($data, $properties = array()) {
$uniqid = uniqid("unicode_");
$data = json_encode($data);
$data = preg_replace('/\\\u([0-9a-z]{4})/', "$uniqid\$1", $data);
$data = json_decode($data);
$original_properties = array();
foreach ($properties as $key => $value) {
$original_properties[$key] = $this->$key;
$this->$key = $value;
}
$data = $this->prepare($data);
$this->parser = xml_parser_create();
xml_set_object($this->parser, $this);
xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, TRUE);
xml_set_element_handler($this->parser, "startTagHandler", "endTagHandler");
xml_set_character_data_handler($this->parser, "dataHandler");
xml_parse($this->parser, $data);
xml_parser_free($this->parser);
$ret = $this->content;
$this->content = NULL;
$ret = utf8_decode(str_replace(array('–', '’'), array('–', '’'), $ret));
if ($this->protectEmail) {
$ret = preg_replace_callback("/mailto:(.*)\"/U", array($this, "protectEmailCallback"), $ret);
}
// restore original properties
foreach ($original_properties as $key => $value) {
$this->$key = $value;
}
$ret = preg_replace("/$uniqid([0-9a-z]{4})/", '&#x$1;', $ret);
$ret = preg_replace(array("/<br \/>\n$/", "/<br \/>\n<\/div>$/"), array(NULL, "</div>"), $ret);
$ret = preg_replace("/<span[^>]*><\/span>/U", NULL, $ret);
$ret = preg_replace("/<a([^>]*)><span style=\"text-decoration: underline; \">(.*)<\/span><\/a>/U", "<a\$1>\$2</a>", $ret);
$ret = preg_replace("/<li>(.*)<\/li>/", "<ul><li>$1</li>\n</ul>\n", $ret);
$ret = preg_replace("/<li>/", "\n\t<li>", $ret);
$ret = preg_replace("/<br \/>\n<ul>/", "<ul>", $ret);
$ret = preg_replace("/<\/ul>\n<br \/>/", "</ul>", $ret);
return $ret;
}
private function prepare($data) {
if (!$this->plainText) {
$data = preg_replace_callback("/HREF=\"(.*)\"/U", array($this, "fixHREFCallback"), $data);
$data = preg_replace("/<IMG(.*)>/U", "<IMG$1 />", $data);
}
return "<root>$data</root>";
}
private function startTagHandler($parser, $tag, $attrs) {
if ($this->plainText) {
return;
}
if (!key_exists($tag, $this->entities)) {
return;
}
$entity = $this->entities[$tag];
if ($entity['start_tag_handler'] != NULL) {
$str = call_user_func($entity["start_tag_handler"], $parser, $tag, $attrs);
if (preg_match("/^<(\w+)/", $str, $matches)) {
$tag_name = $matches[1];
$entity["tag"] = $tag_name;
$entity["closed"] = preg_match("/.*\/>/", $str) === 1;
}
$this->content .= $str;
} else {
$this->content .= "<$entity[tag]";
if (strlen($entity["attributes"]) > 0) {
$this->content .= " $entity[attributes]";
}
if ($entity["closed"]) {
$this->content .= " />";
} else {
$this->content .= ">";
}
}
array_push($this->nodes, $entity);
}
private function dataHandler($parser, $cdata) {
if ($this->plainText) {
$this->content .= $cdata;
} else {
$str = NULL;
if ($cdata == "&") {
$str = "&";
} else {
$str = preg_replace_callback("/\s{2,}/", array($this, "replaceSpacesCallback"), $cdata);
}
if ($this->dataHandler != NULL) {
$str = call_user_func($this->dataHandler, $parser, $cdata);
}
$this->content .= $str;
}
}
private function endTagHandler($parser, $tag) {
if (!key_exists($tag, $this->entities)) {
return;
}
if ($this->plainText) {
if ($tag == "P") {
$this->content .= "\n";
}
return;
}
$entity = array_pop($this->nodes);
if (!$entity["closed"]) {
if ($entity['end_tag_handler'] != NULL) {
$this->content .= call_user_func($entity["end_tag_handler"], $parser, $tag);
} else {
$this->content .= "</$entity[tag]>";
}
}
}
private function a_startTagHandler($parser, $tag, $attrs) {
$ret = '<a href="' . htmlspecialchars($attrs["HREF"]) . '"';
$ret .= array_key_exists("TARGET", $attrs) && (strlen($attrs["TARGET"]) > 0) ? ' target="' . $attrs["TARGET"] . '"' : NULL;
$ret .= '>';
return $ret;
}
private function font_startTagHandler($parser, $tag, $attrs) {
$ret = '<span style="';
$ret .= array_key_exists("FACE", $attrs) ? "font-family: '" . $attrs["FACE"] . "'; " : NULL;
$ret .= array_key_exists("SIZE", $attrs) ? "font-size: " . $attrs["SIZE"] . "px; " : NULL;
$ret .= array_key_exists("COLOR", $attrs) ? "color: " . $attrs["COLOR"] . "; " : NULL;
$ret .= array_key_exists("LEADING", $attrs) ? "line-height: " . $attrs["LEADING"] . "px; " : NULL;
$ret .= array_key_exists("LETTERSPACING", $attrs) ? "letter-spacing: " . $attrs["LETTERSPACING"] . "px; " : NULL;
$ret .= '">';
return $ret;
}
private function img_startTagHandler($parser, $tag, $attrs) {
$style = $attrs["ALIGN"] == "right" ? "float: right; margin-left: 10px; " : "float: left; margin-right: 10px; ";
$ret = "<img style=\"$style\" src=\"$attrs[SRC]\" alt=\"\" />";
return $ret;
}
private function textformat_startTagHandler($parser, $tag, $attrs) {
$ret = NULL;
if (array_key_exists("BLOCKINDENT", $attrs) || array_key_exists("LEADING", $attrs)) {
$ret = '<div style="';
$ret .= array_key_exists("BLOCKINDENT", $attrs) ? "margin-left: " . $attrs["BLOCKINDENT"] . "px; " : NULL;
$ret .= array_key_exists("LEADING", $attrs) ? "margin-bottom: " . $attrs["LEADING"] . "px; " : NULL;
$ret .= '">';
}
return $ret;
}
private function u_startTagHandler($parser, $tag, $attrs) {
$ret = '<span style="text-decoration: underline; ">';
return $ret;
}
private function replaceSpacesCallback($matches) {
$matches[0];
return str_repeat(" ", strlen($matches[0]));
}
private function protectEmailCallback($matches) {
$ret = NULL;
$str = $matches[1];
$len = strlen($str);
for ($i = 0; $i < $len; $i++) {
$ret .= "&#x" . strtoupper(dechex(ord($str[$i]))) . ";";
}
return "mailto:$ret\"";
}
private function fixHREFCallback($matches) {
return 'HREF="' . htmlentities($matches[1]) . '"';
}
}
?>
|