function xml_entities($text, $charset = 'Windows-1252'){
// Debug and Test
// $text = "test & ™ &trade; abc ® &reg; -";
// First we encode html characters that are also invalid in xml
$text = htmlentities($text, ENT_COMPAT, $charset, false);
// XML character entity array from Wiki
// Note: ' is useless in UTF-8 or in UTF-16
$arr_xml_special_char = array(""","&","'","<",">");
// Building the regex string to exclude all strings with xml special char
$arr_xml_special_char_regex = "(?";
foreach($arr_xml_special_char as $key => $value){
$arr_xml_special_char_regex .= "(?!$value)";
$arr_xml_special_char_regex .= ")";
// Scan the array for &something_not_xml; syntax
$pattern = "/$arr_xml_special_char_regex&([a-zA-Z0-9]+;)/";
// Replace the &something_not_xml; with &something_not_xml;
$replacement = '&${1}';
return preg_replace($pattern, $replacement, $text);
function xml_entity_decode($text, $charset = 'Windows-1252'){
// Double decode, so if the value was &trade; it will become Trademark
$text = html_entity_decode($text, ENT_COMPAT, $charset);
$text = html_entity_decode($text, ENT_COMPAT, $charset);
return $text;