<?php
// Maps alien Unicode characters such as special spaces, letters with ligatures to their ascii string equivalent
$unicode_to_ansi = array
(
// Found in some Polish documents - not sure of the translation
0x0084 => '"',
0x0085 => '...',
0x0092 => '"',
0x0094 => '"',
0x0096 => '-',
// End Polish
0x00A0 => ' ', // Non-breakable space
0x00AB => '"', // Left pointing double angle quotation mark
0x00AD => '', // Break Opportunity After: generally provide a line break opportunity after the character
0x00C6 => 'AE', // AE with ligature (Æ)
0x00E6 => 'ae', // ae with ligature (æ)
0x1680 => ' ', // OGHAM space mark
0x0152 => 'OE', // OE with ligature (Œ)
0x0153 => 'oe', // oe with ligature (œ)
0x1D6B => 'ue', // ue with ligature
0x2000 => ' ', // EN quad
0x2001 => ' ', // EM quad
0x2002 => ' ', // EN space
0x2003 => ' ', // EM space
0x2004 => ' ', // 3-per-EM space
0x2005 => ' ', // 4-per-EM space
0x2006 => ' ', // 6-per-EM space
0x2007 => ' ', // Figure space
0x2008 => ' ' , // Punctuation space
0x2009 => ' ', // Thin s1pace
0x200A => ' ', // Hair space
0x200B => ' ', // Zero-width space
0x200C => ' ', // Zero-width non-joiner
0x200D => '', // Zero-width joiner
0x2010 => '-', // Narrow hyphen
0x2011 => '-', // Non-breaking hyphen
0x2012 => '-', // Figure dash (has the same width as digits)
0x2013 => '-', // EN dash (used to indicate range of values)
0x2014 => ' - ', // EM dash (used to make a break in a flow of sentences)
0x2015 => '- ', // Horizontal bar, used to introduce quoted text
0x2018 => "'", // German right single quote
0x2019 => "'", // Secondary level quotation
0x201A => "'", // German left single quote
0x201B => "'", // Reversed quote
0x201C => '"', // Left double quotation mark
0x201D => '"', // Double quote-apostrophe
0x201E => '"', // Lower double quote-apostrophe
0x2026 => '...', // Ellipsis
0x2028 => "\n", // Line separator
0x2029 => "\n", // Paragraph separator
0x202F => ' ', // Narrow non-break space
0x2039 => "'", // Single left pointing angle quotation mark
0x203A => "'", // Single right pointing angle quotation mark
0x2053 => '~', // Large tilde
0x205F => ' ', // Medium mathematical space
0x2060 => '', // Word joiner
0x207B => '-', // Superscript minus
0x208B => '-', // Subscript minus
0x2160 => 'I', // Roman numeral : I
0x2161 => 'II', // Roman numeral : II
0x2162 => 'III', // Roman numeral : III
0x2163 => 'IV', // Roman numeral : IV
0x2164 => 'V', // Roman numeral : V
0x2165 => 'VI', // Roman numeral : VI
0x2166 => 'VII', // Roman numeral : VII
0x2167 => 'VIII', // Roman numeral : VIII
0x2168 => 'IX', // Roman numeral : IX
0x2169 => 'X', // Roman numeral : X
0x216A => 'XI', // Roman numeral : XI
0x216B => 'XII', // Roman numeral : XII
0x216C => 'L', // Roman numeral : L
0x216D => 'C', // Roman numeral : C
0x216E => 'D', // Roman numeral : D
0x216F => 'M', // Roman numeral : M
0x2170 => 'i', // Roman numeral : i
0x2171 => 'ii', // Roman numeral : ii
0x2172 => 'iii', // Roman numeral : iii
0x2173 => 'iv', // Roman numeral : iv
0x2174 => 'v', // Roman numeral : v
0x2175 => 'vi', // Roman numeral : vi
0x2176 => 'vii', // Roman numeral : vii
0x2177 => 'viii', // Roman numeral : viii
0x2178 => 'ix', // Roman numeral : ix
0x2179 => 'x', // Roman numeral : x
0x217A => 'xi', // Roman numeral : xi
0x217B => 'xii', // Roman numeral : xii
0x217C => 'l', // Roman numeral : l
0x217D => 'c', // Roman numeral : c
0x217E => 'd', // Roman numeral : d
0x217F => 'm', // Roman numeral : m
0x2212 => '-', // Minus sign (arithmetic operator)
0x2758 => '|', // Light vertical bar
0x2759 => '|', // Medium vertical bar
0x2E3A => '-', // Two-EM dash
0x2E3B => '-', // Three-EM dash
0x3000 => ' ', // Ideographic space
0x301D => '"', // Reversed double prime quotation mark
0x301E => '"', // Double prime quotation map,
0x301F => '"', // Low double prime quotation mark
0xA728 => 'TZ', // TZ with ligature
0xA729 => 'tz', // tz with ligature
0xA732 => 'AA', // AA with ligature
0xA733 => 'aa', // aa with ligature
0xA734 => 'AO', // AO with ligature
0xA735 => 'ao', // ao with ligature
0xA736 => 'AU', // AU with ligature
0xA737 => 'au', // au with ligature
0xA738 => 'AV', // AV with ligature
0xA739 => 'av', // av with ligature
0xA73A => 'AV', // AV with ligature and bar
0xA73B => 'av', // av with ligature and bar
0xA73C => 'AY', // AY with ligature
0xA73D => 'ay', // ay with ligature
0xA74E => 'OO', // OO with ligature
0xA74F => 'oo', // oo with ligature
0xA760 => 'VY', // VY with ligature
0xA761 => 'vy', // vy with ligature
0xFB00 => 'ff', // ff with ligature
0xFB01 => 'fi', // fi with ligature
0xFB02 => 'fl', // fl with ligature
0xFB03 => 'ffi', // ffi with ligature
0xFB04 => 'ffl', // ffl with ligature
0xFB05 => 'ft', // ft with ligature
0xFB06 => 'st', // st with ligature
0xFF08 => '(',
0xFF09 => ')',
0xFE31 => '|', // Vertical em dash
0xFE32 => '|', // Vertical en dash
0xFE58 => '-', // Small em dash
0xFE63 => '-', // Small ASCII hyphen
0xFF02 => '"', // Full width quotation mark
0xFF07 => "'", // Full width apostrophe
0xFF0D => '-', // Full-width hyphen variant of ascii hyphen
0xFEFF => ' ', // Zero-width non-breaking space
) ;
|