From 42c80841c846610be280218d53fcde06b0f0063b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Fri, 6 Dec 2013 09:45:27 +0100 Subject: [change] we now use Full-Text RSS 3.1, thank you so much @fivefilters --- inc/3rdparty/Encoding.php | 262 ---------------------------------------------- 1 file changed, 262 deletions(-) delete mode 100644 inc/3rdparty/Encoding.php (limited to 'inc/3rdparty/Encoding.php') diff --git a/inc/3rdparty/Encoding.php b/inc/3rdparty/Encoding.php deleted file mode 100644 index 577763b4..00000000 --- a/inc/3rdparty/Encoding.php +++ /dev/null @@ -1,262 +0,0 @@ - - * @package Encoding - * @version 1.1 - * @link http://www.framework2.com.ar/dzone/forceUTF8-es/ - * @example http://www.framework2.com.ar/dzone/forceUTF8-es/ - */ - -class Encoding { - - protected static $win1252ToUtf8 = array( - 128 => "\xe2\x82\xac", - - 130 => "\xe2\x80\x9a", - 131 => "\xc6\x92", - 132 => "\xe2\x80\x9e", - 133 => "\xe2\x80\xa6", - 134 => "\xe2\x80\xa0", - 135 => "\xe2\x80\xa1", - 136 => "\xcb\x86", - 137 => "\xe2\x80\xb0", - 138 => "\xc5\xa0", - 139 => "\xe2\x80\xb9", - 140 => "\xc5\x92", - - 142 => "\xc5\xbd", - - - 145 => "\xe2\x80\x98", - 146 => "\xe2\x80\x99", - 147 => "\xe2\x80\x9c", - 148 => "\xe2\x80\x9d", - 149 => "\xe2\x80\xa2", - 150 => "\xe2\x80\x93", - 151 => "\xe2\x80\x94", - 152 => "\xcb\x9c", - 153 => "\xe2\x84\xa2", - 154 => "\xc5\xa1", - 155 => "\xe2\x80\xba", - 156 => "\xc5\x93", - - 158 => "\xc5\xbe", - 159 => "\xc5\xb8" - ); - - protected static $brokenUtf8ToUtf8 = array( - "\xc2\x80" => "\xe2\x82\xac", - - "\xc2\x82" => "\xe2\x80\x9a", - "\xc2\x83" => "\xc6\x92", - "\xc2\x84" => "\xe2\x80\x9e", - "\xc2\x85" => "\xe2\x80\xa6", - "\xc2\x86" => "\xe2\x80\xa0", - "\xc2\x87" => "\xe2\x80\xa1", - "\xc2\x88" => "\xcb\x86", - "\xc2\x89" => "\xe2\x80\xb0", - "\xc2\x8a" => "\xc5\xa0", - "\xc2\x8b" => "\xe2\x80\xb9", - "\xc2\x8c" => "\xc5\x92", - - "\xc2\x8e" => "\xc5\xbd", - - - "\xc2\x91" => "\xe2\x80\x98", - "\xc2\x92" => "\xe2\x80\x99", - "\xc2\x93" => "\xe2\x80\x9c", - "\xc2\x94" => "\xe2\x80\x9d", - "\xc2\x95" => "\xe2\x80\xa2", - "\xc2\x96" => "\xe2\x80\x93", - "\xc2\x97" => "\xe2\x80\x94", - "\xc2\x98" => "\xcb\x9c", - "\xc2\x99" => "\xe2\x84\xa2", - "\xc2\x9a" => "\xc5\xa1", - "\xc2\x9b" => "\xe2\x80\xba", - "\xc2\x9c" => "\xc5\x93", - - "\xc2\x9e" => "\xc5\xbe", - "\xc2\x9f" => "\xc5\xb8" - ); - - protected static $utf8ToWin1252 = array( - "\xe2\x82\xac" => "\x80", - - "\xe2\x80\x9a" => "\x82", - "\xc6\x92" => "\x83", - "\xe2\x80\x9e" => "\x84", - "\xe2\x80\xa6" => "\x85", - "\xe2\x80\xa0" => "\x86", - "\xe2\x80\xa1" => "\x87", - "\xcb\x86" => "\x88", - "\xe2\x80\xb0" => "\x89", - "\xc5\xa0" => "\x8a", - "\xe2\x80\xb9" => "\x8b", - "\xc5\x92" => "\x8c", - - "\xc5\xbd" => "\x8e", - - - "\xe2\x80\x98" => "\x91", - "\xe2\x80\x99" => "\x92", - "\xe2\x80\x9c" => "\x93", - "\xe2\x80\x9d" => "\x94", - "\xe2\x80\xa2" => "\x95", - "\xe2\x80\x93" => "\x96", - "\xe2\x80\x94" => "\x97", - "\xcb\x9c" => "\x98", - "\xe2\x84\xa2" => "\x99", - "\xc5\xa1" => "\x9a", - "\xe2\x80\xba" => "\x9b", - "\xc5\x93" => "\x9c", - - "\xc5\xbe" => "\x9e", - "\xc5\xb8" => "\x9f" - ); - - static function toUTF8($text){ - /** - * Function Encoding::toUTF8 - * - * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. - * - * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. - * - * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: - * - * 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß - * are followed by any of these: ("group B") - * ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿ - * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» - * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) - * is also a valid unicode character, and will be left unchanged. - * - * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, - * 3) when any of these: ðñòó are followed by THREE chars from group B. - * - * @name toUTF8 - * @param string $text Any string. - * @return string The same string, UTF8 encoded - * - */ - - if(is_array($text)) - { - foreach($text as $k => $v) - { - $text[$k] = self::toUTF8($v); - } - return $text; - } elseif(is_string($text)) { - - $max = strlen($text); - $buf = ""; - for($i = 0; $i < $max; $i++){ - $c1 = $text{$i}; - if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already - $c2 = $i+1 >= $max? "\x00" : $text{$i+1}; - $c3 = $i+2 >= $max? "\x00" : $text{$i+2}; - $c4 = $i+3 >= $max? "\x00" : $text{$i+3}; - if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8 - if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already - $buf .= $c1 . $c2; - $i++; - } else { //not valid UTF8. Convert it. - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = ($c1 & "\x3f") | "\x80"; - $buf .= $cc1 . $cc2; - } - } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8 - if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already - $buf .= $c1 . $c2 . $c3; - $i = $i + 2; - } else { //not valid UTF8. Convert it. - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = ($c1 & "\x3f") | "\x80"; - $buf .= $cc1 . $cc2; - } - } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8 - if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already - $buf .= $c1 . $c2 . $c3; - $i = $i + 2; - } else { //not valid UTF8. Convert it. - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = ($c1 & "\x3f") | "\x80"; - $buf .= $cc1 . $cc2; - } - } else { //doesn't look like UTF8, but should be converted - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = (($c1 & "\x3f") | "\x80"); - $buf .= $cc1 . $cc2; - } - } elseif(($c1 & "\xc0") == "\x80"){ // needs conversion - if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases - $buf .= self::$win1252ToUtf8[ord($c1)]; - } else { - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = (($c1 & "\x3f") | "\x80"); - $buf .= $cc1 . $cc2; - } - } else { // it doesn't need convesion - $buf .= $c1; - } - } - return $buf; - } else { - return $text; - } - } - - static function toWin1252($text) { - if(is_array($text)) { - foreach($text as $k => $v) { - $text[$k] = self::toWin1252($v); - } - return $text; - } elseif(is_string($text)) { - return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))); - } else { - return $text; - } - } - - static function toISO8859($text) { - return self::toWin1252($text); - } - - static function toLatin1($text) { - return self::toWin1252($text); - } - - static function fixUTF8($text){ - if(is_array($text)) { - foreach($text as $k => $v) { - $text[$k] = self::fixUTF8($v); - } - return $text; - } - - $last = ""; - while($last <> $text){ - $last = $text; - $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); - } - $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); - return $text; - } - - static function UTF8FixWin1252Chars($text){ - // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 - // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. - // See: http://en.wikipedia.org/wiki/Windows-1252 - - return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); - } - - static function removeBOM($str=""){ - if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) { - $str=substr($str, 3); - } - return $str; - } -} \ No newline at end of file -- cgit v1.2.3