-<?php \r
+<?php\r
/**\r
* @author "Sebastián Grignoli" <grignoli@framework2.com.ar>\r
* @package Encoding\r
*/\r
\r
class Encoding {\r
- \r
+\r
protected static $win1252ToUtf8 = array(\r
128 => "\xe2\x82\xac",\r
\r
158 => "\xc5\xbe",\r
159 => "\xc5\xb8"\r
);\r
- \r
+\r
protected static $brokenUtf8ToUtf8 = array(\r
"\xc2\x80" => "\xe2\x82\xac",\r
- \r
+\r
"\xc2\x82" => "\xe2\x80\x9a",\r
"\xc2\x83" => "\xc6\x92",\r
"\xc2\x84" => "\xe2\x80\x9e",\r
"\xc2\x8a" => "\xc5\xa0",\r
"\xc2\x8b" => "\xe2\x80\xb9",\r
"\xc2\x8c" => "\xc5\x92",\r
- \r
+\r
"\xc2\x8e" => "\xc5\xbd",\r
- \r
- \r
+\r
+\r
"\xc2\x91" => "\xe2\x80\x98",\r
"\xc2\x92" => "\xe2\x80\x99",\r
"\xc2\x93" => "\xe2\x80\x9c",\r
"\xc2\x9a" => "\xc5\xa1",\r
"\xc2\x9b" => "\xe2\x80\xba",\r
"\xc2\x9c" => "\xc5\x93",\r
- \r
+\r
"\xc2\x9e" => "\xc5\xbe",\r
"\xc2\x9f" => "\xc5\xb8"\r
);\r
- \r
+\r
protected static $utf8ToWin1252 = array(\r
"\xe2\x82\xac" => "\x80",\r
- \r
+\r
"\xe2\x80\x9a" => "\x82",\r
"\xc6\x92" => "\x83",\r
"\xe2\x80\x9e" => "\x84",\r
"\xc5\xa0" => "\x8a",\r
"\xe2\x80\xb9" => "\x8b",\r
"\xc5\x92" => "\x8c",\r
- \r
+\r
"\xc5\xbd" => "\x8e",\r
- \r
- \r
+\r
+\r
"\xe2\x80\x98" => "\x91",\r
"\xe2\x80\x99" => "\x92",\r
"\xe2\x80\x9c" => "\x93",\r
"\xc5\xa1" => "\x9a",\r
"\xe2\x80\xba" => "\x9b",\r
"\xc5\x93" => "\x9c",\r
- \r
+\r
"\xc5\xbe" => "\x9e",\r
"\xc5\xb8" => "\x9f"\r
);\r
* Function Encoding::toUTF8\r
*\r
* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.\r
- * \r
+ *\r
* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.\r
*\r
* It may fail to convert characters to UTF-8 if they fall into one of these scenarios:\r
* are followed by any of these: ("group B")\r
* ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿\r
* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»\r
- * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) \r
+ * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)\r
* is also a valid unicode character, and will be left unchanged.\r
*\r
* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,\r
}\r
return $text;\r
} elseif(is_string($text)) {\r
- \r
+\r
$max = strlen($text);\r
$buf = "";\r
for($i = 0; $i < $max; $i++){\r
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));\r
return $text;\r
}\r
- \r
+\r
static function UTF8FixWin1252Chars($text){\r
- // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 \r
+ // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1\r
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.\r
// See: http://en.wikipedia.org/wiki/Windows-1252\r
- \r
+\r
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);\r
}\r
- \r
+\r
static function removeBOM($str=""){\r
if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {\r
$str=substr($str, 3);\r