<?php
/**
* Class XMLDebugger version 1.1
* @abstract Simple XML debugger that reads line-by-line until find a mistake with the document.
* @example http://xml.bubaweb.com/php_xml_debugger.html
* @author Thales Jacobi @link http://www.thalesjacobi.com
* @version 1.0
* @todo Update all class debugging lines to print XML elements
* Hosted by: Bubaweb.com @link http://xml.bubaweb.com/
* Created at 08/SEP/09
*/
class XMLDebugger
{
var $xml='';
var $param='';
var $source='';
var $error_check=false;
/**
* Construct
* If $auto_load_xml is set to false, function load_xml() must be called manually
*
* @param mixed $param
* @param mixed $source
* @param mixed $auto_load_xml
* @return string
*/
function XMLDebugger($param='',$source='',$auto_load_xml=true)
{
#var_dump($param);echo "<br />";
#var_dump($source);echo "<br />";echo "<br />";
if(!$param || !$source)return $this->output_class_error();
$this->param=strtolower($param);
$this->source=$source;
if($auto_load_xml)$this->load_xml();
}
/**
* Class debug
*
* @param mixed $do_or_not
*/
function do_error_checking($do_or_not=false){ $this->error_check=$do_or_not; }
/**
* Standard class error message output
*
*/
function output_class_error()
{
$dom = new DomDocument('1.0');
$debbug_result = $dom->appendChild($dom->createElement('debbug_result'));
$system_message = $debbug_result->appendChild($dom->createElement('system_message'));
$system_message->appendChild($dom->createTextNode("No parameter indentified. Refer to <a href='http://xml.bubaweb.com/php_xml_debbuger.html'>http://xml.bubaweb.com/php_xml_debbuger.html</a> for more information."));
$dom->formatOutput = true;
return $dom->saveXML();
}
/**
* Load XML from a destination URL. It can be a news feed (any type of) or any other XML document
*
* @param mixed $url
*/
function load_xml_from_url()
{
$curl= curl_init($this->source);
curl_setopt ($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($curl, CURLOPT_USERAGENT, 'PHP XML Debugger v1.0 (+http://xml.bubaweb.com/php_xml_debbuger.html)');
$this->xml=curl_exec ($curl);
}
/**
* Set the XML variable
*
*/
function load_xml()
{
if($this->param=='url')$this->load_xml_from_url();
else $this->xml=$this->source;
}
/**
* Execute XML Debugger
*
*/
function do_debug()
{
$dom = new DomDocument('1.0');
$debbug_result = $dom->appendChild($dom->createElement('debbug_result'));
# Class validation
if(!$this->xml) return $this->output_class_error();
# Load XML
$xml=trim(strtolower($this->xml));
$xml=str_replace(array('"/>',"'/>"),array('" />',"' />"),$xml);
$xml_exploded=preg_split("/\>\n?\s?\</x",$xml);
#echo "<pre>".print_r($xml_exploded,true)."</pre>";
# @ DEBUG
if($this->error_check)echo "<pre>".print_r($xml_exploded,true)."</pre>";
# Vars
$doc_tag_correct=false;
$error=false;
$DOCTYPE_found=false;
# Check first elements and build main array
if(strstr($xml_exploded[1],"xml") && strstr($xml_exploded[1],"version"))
{
foreach ($xml_exploded as $arr_key=>$line)
{
if(
in_array($arr_key,array(0,1)) ||
substr($line,0,4)=='?xml' || # ignore XSLT declarations){continue;}
substr($line,0,3)=='!--' # ignore comments
) continue;
if($line){ $xml_array[]=$this->dublecheck_xml_line($line); }# build array with what's imnportant
}
unset($xml_exploded);
}
else
{
preg_match("/\<\?xml(.*)\?\>/i",$xml,$matches);
if(!is_array($matches))
{
$xml_message = $debbug_result->appendChild($dom->createElement('first_line_missing'));
$xml_message->appendChild($dom->createTextNode("First XML declaration wasn't found"));
$error=true;
}
}
#echo "<pre>".print_r($xml_array,true)."</pre>";exit;
# Read XML
if(is_array($xml_array))
{
# @ DEBUG
if($this->error_check)echo "<pre>".print_r($xml_array,true)."</pre>";
foreach ($xml_array as $key=>$line)
{
$line=trim($line);
if($this->error_check)echo "\n".$line."\n<br />";
# -- Dealling with CDATA <version>1.1</version>
if(substr($line,0,8)=='![cdata[')$CDATA_skip=true;
if(substr($line,-2)==']]' || substr($line,-3)==']]>'){ $CDATA_skip=false; continue; }
if($CDATA_skip)continue;
# -- Fetching DOCTYPE. Not interpreting it! <version>1.2</version>
if(substr($line,0,8)=='!doctype')
{
if(!$DOCTYPE_found)$DOCTYPE_found=true;
else
{
$xml_message = $debbug_result->appendChild($dom->createElement('too_many_doctypes'));
$xml_message->appendChild($dom->createTextNode("Two DOCTYPES found, should be only one don't you think?"));
$error=true;
}
continue; # stop reading this line 'cause it won't be well formated
}
# -- Dealling with 'normal' tags
# Get 'normal' open tag
if(substr($line,0,1)!='/')
{
# Get tag content
$tag=explode(">",$line);
# Get attributes
$tag[0]=trim($tag[0]);# Trim introduced <version>1.1</version>
$single_quote_check_attrs=explode("' ",$tag[0]);# Accept spaces in attribute values introduced <version>1.2</version>
$double_quote_check_attrs=explode('" ',$tag[0]);
if(sizeof($single_quote_check_attrs)==sizeof($double_quote_check_attrs))$attrs=explode(" ",$tag[0]);
elseif(sizeof($single_quote_check_attrs)>sizeof($double_quote_check_attrs))
{
#echo "<pre>".print_r($single_quote_check_attrs,true)."</pre>";
foreach ($single_quote_check_attrs as $sqk=>$sqv)
{
if($this->attribute_valid($sqv))# Duble validate values <version>1.3</version>
{
$single_quote_check_attrs[$sqk]=$sqv."'";
}
}
$attrs=$single_quote_check_attrs;
}
elseif(sizeof($single_quote_check_attrs)<sizeof($double_quote_check_attrs))
{
foreach ($double_quote_check_attrs as $sqk=>$sqv)
{
if($this->attribute_valid($sqv))
{
$double_quote_check_attrs[$sqk]=$sqv.'"';
}
}
$attrs=$double_quote_check_attrs;
}
# if it's the first tag, it's the doc tag
if($key==0)
{
$doc_tag=$this->dublecheck_xml_tag($attrs[0]);#Get the tag name if attributes pass through the previous checks <version>1.3</version>
}
$current_tag_name=$this->dublecheck_xml_tag($attrs[0]);
# -- Register open tag
$tags[$current_tag_name]['opened']=true;
# Treat attr
if(is_array($attrs))
{
unset($attrs[0]);
foreach ($attrs as $attr)
{
$attr_name='';$attr_val='';$single_quote=0;$double_quote=0;
$attr=trim($attr);
#echo "Attr: ".$attr.'<br />';
# Attributes treatment
if(substr_count($attr,"=")>=2)
{
$tmp_attr=explode("=",$attr);
#echo "<pre>".print_r($tmp_attr,true)."</pre>";
foreach($tmp_attr as $k=>$v){ if($k!=0&&$k!=1){ $tmp_attr[1].=$v; }}
$attr_name=$tmp_attr[0];
$attr_val=$tmp_attr[1];
unset($tmp_attr);
}
else list($attr_name,$attr_val)=explode("=",$attr);
#echo $attr_name.' - '.$attr_val."\n<br>";
$attr_name=trim($attr_name);
$attr_val=trim($attr_val);
# Single tags closing
if($attr_name=='/')
{
$tags[$current_tag_name]['closed']=true;
if($doc_tag==$current_tag_name)$doc_tag_correct=true;
continue;
}
if(!$attr_name)
{
$xml_message = $debbug_result->appendChild($dom->createElement('attribute_name_missing'));
$xml_message->appendChild($dom->createTextNode("Every <u>attribute</u> must have a <strong>name</strong>"));
$error=true;
}
if(!$attr_val)
{
$xml_message = $debbug_result->appendChild($dom->createElement('attribute_value_missing'));
$xml_message->appendChild($dom->createTextNode("Every <u>attribute</u> must have a <strong>value</strong> (tag <".utf8_encode($current_tag_name).">, attribute ".utf8_encode($attr_name).")"));
$error=true;
}
# -- Register attr.
#echo $attr_name.' - '.$attr_val."\n";
if($attr_name && $attr_val)
{
foreach (count_chars($attr_val, 1) as $i => $val)
{
# @ DEBUG
if($this->error_check)
{
$system_message = $debbug_result->appendChild($dom->createElement('debug'));
$system_message->appendChild($dom->createTextNode("DEBUG: $i = ".chr($i)." <small>(line ".__LINE__.")</small>"));
}
# Quote marks
#echo chr($i)."\n";
if(chr($i)=="'")$single_quote=$val;
elseif(chr($i)=='"')$double_quote=$val;
}
# @ DEBUG
#echo 'single_quote: '.$single_quote.' - double_quote: '.$double_quote."\n";
if($this->error_check)
{
$system_message = $debbug_result->appendChild($dom->createElement('debug'));
$system_message->appendChild($dom->createTextNode("DEBUG: single_quote: ".utf8_encode($single_quote)." double_quote: ".utf8_encode($double_quote)));
}
# Check quotes around attribute values
if(($single_quote==2 && $double_quote!=0) || ($double_quote==2 && $single_quote!=0))
{
$xml_message = $debbug_result->appendChild($dom->createElement('quote_mark_problem'));
$xml_message->appendChild($dom->createTextNode("Too many quotation marks on the <u>attribute's values</u> (tag ".utf8_encode($current_tag_name).", attribute ".utf8_encode($attr_name).")"));
$error=true;
}
elseif(($single_quote==1 && $double_quote!=0) || ($double_quote==1 && $single_quote!=0))
{
$xml_message = $debbug_result->appendChild($dom->createElement('quote_mark_problem'));
$xml_message->appendChild($dom->createTextNode("Quote marks to open and close must be the same on the <u>attribute's values</u> (tag ".utf8_encode($current_tag_name).", attribute ".utf8_encode($attr_name).")"));
$error=true;
}
elseif($single_quote!=2 && $double_quote!=2)
{
$xml_message = $debbug_result->appendChild($dom->createElement('quote_mark_problem'));
$xml_message->appendChild($dom->createTextNode("Quote marks must open and close the <u>attribute's values</u> (tag ".utf8_encode($current_tag_name).", attribute ".utf8_encode($attr_name).")"));
$error=true;
}
}
}
}
}
else
{
# If not all content of the XML is encapsulated by a tag, this part of the code will simply strip that out
# introduced <version>1.2</version>
$line=substr($line,0,strpos($line,">"));
# register closing tag
$end_tag=str_replace(array("/",">"),array("",""),$line);
$tags[$end_tag]['closed']=true;
if($doc_tag==$end_tag)$doc_tag_correct=true;
}
}
if(!$doc_tag_correct)
{
$xml_message = $debbug_result->appendChild($dom->createElement('document_tag_missing'));
$xml_message->appendChild($dom->createTextNode("<strong>Document main tag is missing</strong>."));
$error=true;
}
if(!$DOCTYPE_found)
{
$warning_msgs['doctype_not_found']="DOCTYPE tag wasn't found near the top of the XML. It isn't compulsory, but you might face problems with web services that aren't flexible!";
$warning=true;
}
#echo "<pre>".print_r($tags,true)."</pre>";
foreach ($tags as $tag_name=>$explore)
{
if(!array_key_exists("opened",$tags[$tag_name]))
{
$xml_message = $debbug_result->appendChild($dom->createElement('tag_problem'));
$xml_message->appendChild($dom->createTextNode("Missing open tag <".utf8_encode($tag_name).">."));
$error=true;
}
if(!array_key_exists("closed",$tags[$tag_name]))
{
$xml_message = $debbug_result->appendChild($dom->createElement('tag_problem'));
$xml_message->appendChild($dom->createTextNode("Missing closing tag </".utf8_encode($tag_name).">."));
$error=true;
}
}
}
if(!$error)
{
$xml_message = $debbug_result->appendChild($dom->createElement('success'));
$xml_message->appendChild($dom->createTextNode("<strong>XML structure is correctly formatted.</strong> Congratulations!<br />"));
}
if($warning)
{
foreach ($warning_msgs as $k=>$v)
{
$xml_message = $debbug_result->appendChild($dom->createElement('warning_'.$k));
$xml_message->appendChild($dom->createTextNode($v));
}
}
$dom->formatOutput = true;
return $dom->saveXML();
}
function attribute_valid($sqv)
{
if(substr($sqv,-1)!="'" &&
substr($sqv,-1)!='"' &&
!in_array($sqv,array('/'))) return true;
return false;
}
function dublecheck_xml_tag($attr_0)
{
#echo $attr_0.' - ';
$tag_fetch=preg_replace('/[\'|\"].*[\'|\"]/','',$attr_0);
$tmp_tag_array=explode(' ',$tag_fetch);
#echo $tmp_tag_array[0].'<br />';
return $tmp_tag_array[0];
}
function dublecheck_xml_line($line)
{
if(strpos($line,'>')===false)return $line.'>';
else return $line;
}
}
?>
|