PHP Classes

File: src/voku/helper/data/utf8_fix.php

Recommend this page to a friend!
  Classes of Lars Moelleken   Portable UTF-8   src/voku/helper/data/utf8_fix.php   Download  
File: src/voku/helper/data/utf8_fix.php
Role: Auxiliary script
Content type: text/plain
Description: Auxiliary script
Class: Portable UTF-8
Manipulate UTF-8 text strings in pure PHP
Author: By
Last change: [*]: update the changelog + coḿment
Date: 2 years ago
Size: 4,128 bytes
 

Contents

Class file image Download
<?php

// code source: https://github.com/devgeniem/wp-sanitize-accented-uploads/blob/master/plugin.php#L152
// table source: http://www.i18nqa.com/debug/utf8-debug.html

return [
   
// 3 char errors
   
'â??' => '?',
   
'â??' => '?',
   
'â?¦' => '?',
   
'â?¡' => '?',
   
'â?°' => '?',
   
'â?¹' => '?',
   
'â??' => '?',
   
'â??' => '?',
   
'â??' => '?',
   
'â?¢' => '?',
   
'â??' => '?',
   
'â??' => '?',
   
'â?¢' => '?',
   
'â?º' => '?',
   
'â?¬' => '?',
   
// 2 char errors
   
"\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
   
"\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
   
"\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
   
"\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
   
"\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
   
"\xc2\x86" => "\xe2\x80\xa0", // DAGGER
   
"\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
   
"\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
   
"\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
   
"\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
   
"\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
   
"\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
   
"\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
   
"\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
   
"\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
   
"\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
   
"\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
   
"\xc2\x95" => "\xe2\x80\xa2", // BULLET
   
"\xc2\x96" => "\xe2\x80\x93", // EN DASH
   
"\xc2\x97" => "\xe2\x80\x94", // EM DASH
   
"\xc2\x98" => "\xcb\x9c", // SMALL TILDE
   
'Ã?' => 'Â',
   
'Æ?' => '?',
   
'Ã?' => 'Ã',
   
'Ã?' => 'Ä',
   
'Ã?' => 'Å',
   
//'â?' => '?', // duplicate key
   
'Ã?' => 'Æ',
   
'Ã?' => 'Ç',
   
'Ë?' => '?',
   
'Ã?' => 'È',
   
'Ã?' => 'É',
   
'Ã?' => 'Ê',
   
'Ã?' => 'Ë',
   
'Å?' => '?',
   
'Ã?' => 'Ì',
   
'Ž' => '?',
   
'Ã?' => 'Î',
   
'Ã?' => 'Ñ',
   
'Ã?' => 'Ò',
   
'Ã?' => 'Ó',
   
'â?' => '?',
   
'Ã?' => 'Ô',
   
'Ã?' => 'Õ',
   
'Ã?' => 'Ö',
   
'Ã?' => '×',
   
'Ë?' => '?',
   
'Ã?' => 'Ø',
   
'Ã?' => 'Ù',
   
'Å¡' => '?',
   
'Ã?' => 'Ú',
   
'Ã?' => 'Û',
   
'Å?' => '?',
   
'Ã?' => 'Ü',
   
'ž' => '?',
   
'Ã?' => 'Þ',
   
'Ÿ' => '?',
   
'Ã?' => 'ß',
   
'¡' => '¡',
   
'á' => 'á',
   
'¢' => '¢',
   
'â' => 'â',
   
'£' => '£',
   
'ã' => 'ã',
   
'¤' => '¤',
   
'ä' => 'ä',
   
'Â¥' => '¥',
   
'Ã¥' => 'å',
   
'¦' => '¦',
   
'æ' => 'æ',
   
'§' => '§',
   
'ç' => 'ç',
   
'¨' => '¨',
   
'è' => 'è',
   
'©' => '©',
   
'é' => 'é',
   
'ª' => 'ª',
   
'ê' => 'ê',
   
'«' => '«',
   
'ë' => 'ë',
   
'¬' => '¬',
   
'ì' => 'ì',
   
'®' => '®',
   
'î' => 'î',
   
'¯' => '¯',
   
'ï' => 'ï',
   
'°' => '°',
   
'ð' => 'ð',
   
'±' => '±',
   
'ñ' => 'ñ',
   
'²' => '²',
   
'ò' => 'ò',
   
'³' => '³',
   
'ó' => 'ó',
   
'´' => '´',
   
'ô' => 'ô',
   
'µ' => 'µ',
   
'õ' => 'õ',
   
'¶' => '¶',
   
'ö' => 'ö',
   
'·' => '·',
   
'÷' => '÷',
   
'¸' => '¸',
   
'ø' => 'ø',
   
'¹' => '¹',
   
'ù' => 'ù',
   
'º' => 'º',
   
'ú' => 'ú',
   
'»' => '»',
   
'û' => 'û',
   
'¼' => '¼',
   
'ü' => 'ü',
   
'½' => '½',
   
'ý' => 'ý',
   
'¾' => '¾',
   
'þ' => 'þ',
   
'¿' => '¿',
   
'ÿ' => 'ÿ',
   
'Ã?' => 'À',
   
// 1 char errors last (don't use them, because of false-positives)
    //'Ã' => 'Á',
    //'Å' => '?',
    //'Ã' => 'Í',
    //'Ã' => 'Ï',
    //'Ã' => 'Ð',
    //'Ã' => 'Ý',
    //'Ã' => 'à',
    //'Ã' => 'í',
];