diff options
Diffstat (limited to 'inc/Encoding.php')
-rwxr-xr-x | inc/Encoding.php | 42 |
1 files changed, 21 insertions, 21 deletions
diff --git a/inc/Encoding.php b/inc/Encoding.php index ac107af9..577763b4 100755 --- a/inc/Encoding.php +++ b/inc/Encoding.php | |||
@@ -1,4 +1,4 @@ | |||
1 | <?php | 1 | <?php |
2 | /** | 2 | /** |
3 | * @author "Sebastián Grignoli" <grignoli@framework2.com.ar> | 3 | * @author "Sebastián Grignoli" <grignoli@framework2.com.ar> |
4 | * @package Encoding | 4 | * @package Encoding |
@@ -8,7 +8,7 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | class Encoding { | 10 | class Encoding { |
11 | 11 | ||
12 | protected static $win1252ToUtf8 = array( | 12 | protected static $win1252ToUtf8 = array( |
13 | 128 => "\xe2\x82\xac", | 13 | 128 => "\xe2\x82\xac", |
14 | 14 | ||
@@ -43,10 +43,10 @@ class Encoding { | |||
43 | 158 => "\xc5\xbe", | 43 | 158 => "\xc5\xbe", |
44 | 159 => "\xc5\xb8" | 44 | 159 => "\xc5\xb8" |
45 | ); | 45 | ); |
46 | 46 | ||
47 | protected static $brokenUtf8ToUtf8 = array( | 47 | protected static $brokenUtf8ToUtf8 = array( |
48 | "\xc2\x80" => "\xe2\x82\xac", | 48 | "\xc2\x80" => "\xe2\x82\xac", |
49 | 49 | ||
50 | "\xc2\x82" => "\xe2\x80\x9a", | 50 | "\xc2\x82" => "\xe2\x80\x9a", |
51 | "\xc2\x83" => "\xc6\x92", | 51 | "\xc2\x83" => "\xc6\x92", |
52 | "\xc2\x84" => "\xe2\x80\x9e", | 52 | "\xc2\x84" => "\xe2\x80\x9e", |
@@ -58,10 +58,10 @@ class Encoding { | |||
58 | "\xc2\x8a" => "\xc5\xa0", | 58 | "\xc2\x8a" => "\xc5\xa0", |
59 | "\xc2\x8b" => "\xe2\x80\xb9", | 59 | "\xc2\x8b" => "\xe2\x80\xb9", |
60 | "\xc2\x8c" => "\xc5\x92", | 60 | "\xc2\x8c" => "\xc5\x92", |
61 | 61 | ||
62 | "\xc2\x8e" => "\xc5\xbd", | 62 | "\xc2\x8e" => "\xc5\xbd", |
63 | 63 | ||
64 | 64 | ||
65 | "\xc2\x91" => "\xe2\x80\x98", | 65 | "\xc2\x91" => "\xe2\x80\x98", |
66 | "\xc2\x92" => "\xe2\x80\x99", | 66 | "\xc2\x92" => "\xe2\x80\x99", |
67 | "\xc2\x93" => "\xe2\x80\x9c", | 67 | "\xc2\x93" => "\xe2\x80\x9c", |
@@ -74,14 +74,14 @@ class Encoding { | |||
74 | "\xc2\x9a" => "\xc5\xa1", | 74 | "\xc2\x9a" => "\xc5\xa1", |
75 | "\xc2\x9b" => "\xe2\x80\xba", | 75 | "\xc2\x9b" => "\xe2\x80\xba", |
76 | "\xc2\x9c" => "\xc5\x93", | 76 | "\xc2\x9c" => "\xc5\x93", |
77 | 77 | ||
78 | "\xc2\x9e" => "\xc5\xbe", | 78 | "\xc2\x9e" => "\xc5\xbe", |
79 | "\xc2\x9f" => "\xc5\xb8" | 79 | "\xc2\x9f" => "\xc5\xb8" |
80 | ); | 80 | ); |
81 | 81 | ||
82 | protected static $utf8ToWin1252 = array( | 82 | protected static $utf8ToWin1252 = array( |
83 | "\xe2\x82\xac" => "\x80", | 83 | "\xe2\x82\xac" => "\x80", |
84 | 84 | ||
85 | "\xe2\x80\x9a" => "\x82", | 85 | "\xe2\x80\x9a" => "\x82", |
86 | "\xc6\x92" => "\x83", | 86 | "\xc6\x92" => "\x83", |
87 | "\xe2\x80\x9e" => "\x84", | 87 | "\xe2\x80\x9e" => "\x84", |
@@ -93,10 +93,10 @@ class Encoding { | |||
93 | "\xc5\xa0" => "\x8a", | 93 | "\xc5\xa0" => "\x8a", |
94 | "\xe2\x80\xb9" => "\x8b", | 94 | "\xe2\x80\xb9" => "\x8b", |
95 | "\xc5\x92" => "\x8c", | 95 | "\xc5\x92" => "\x8c", |
96 | 96 | ||
97 | "\xc5\xbd" => "\x8e", | 97 | "\xc5\xbd" => "\x8e", |
98 | 98 | ||
99 | 99 | ||
100 | "\xe2\x80\x98" => "\x91", | 100 | "\xe2\x80\x98" => "\x91", |
101 | "\xe2\x80\x99" => "\x92", | 101 | "\xe2\x80\x99" => "\x92", |
102 | "\xe2\x80\x9c" => "\x93", | 102 | "\xe2\x80\x9c" => "\x93", |
@@ -109,7 +109,7 @@ class Encoding { | |||
109 | "\xc5\xa1" => "\x9a", | 109 | "\xc5\xa1" => "\x9a", |
110 | "\xe2\x80\xba" => "\x9b", | 110 | "\xe2\x80\xba" => "\x9b", |
111 | "\xc5\x93" => "\x9c", | 111 | "\xc5\x93" => "\x9c", |
112 | 112 | ||
113 | "\xc5\xbe" => "\x9e", | 113 | "\xc5\xbe" => "\x9e", |
114 | "\xc5\xb8" => "\x9f" | 114 | "\xc5\xb8" => "\x9f" |
115 | ); | 115 | ); |
@@ -119,7 +119,7 @@ class Encoding { | |||
119 | * Function Encoding::toUTF8 | 119 | * Function Encoding::toUTF8 |
120 | * | 120 | * |
121 | * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. | 121 | * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. |
122 | * | 122 | * |
123 | * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. | 123 | * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. |
124 | * | 124 | * |
125 | * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: | 125 | * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: |
@@ -128,7 +128,7 @@ class Encoding { | |||
128 | * are followed by any of these: ("group B") | 128 | * are followed by any of these: ("group B") |
129 | * ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿ | 129 | * ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿ |
130 | * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» | 130 | * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» |
131 | * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) | 131 | * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) |
132 | * is also a valid unicode character, and will be left unchanged. | 132 | * is also a valid unicode character, and will be left unchanged. |
133 | * | 133 | * |
134 | * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, | 134 | * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B, |
@@ -148,7 +148,7 @@ class Encoding { | |||
148 | } | 148 | } |
149 | return $text; | 149 | return $text; |
150 | } elseif(is_string($text)) { | 150 | } elseif(is_string($text)) { |
151 | 151 | ||
152 | $max = strlen($text); | 152 | $max = strlen($text); |
153 | $buf = ""; | 153 | $buf = ""; |
154 | for($i = 0; $i < $max; $i++){ | 154 | for($i = 0; $i < $max; $i++){ |
@@ -244,15 +244,15 @@ class Encoding { | |||
244 | $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); | 244 | $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); |
245 | return $text; | 245 | return $text; |
246 | } | 246 | } |
247 | 247 | ||
248 | static function UTF8FixWin1252Chars($text){ | 248 | static function UTF8FixWin1252Chars($text){ |
249 | // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 | 249 | // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 |
250 | // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. | 250 | // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. |
251 | // See: http://en.wikipedia.org/wiki/Windows-1252 | 251 | // See: http://en.wikipedia.org/wiki/Windows-1252 |
252 | 252 | ||
253 | return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); | 253 | return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); |
254 | } | 254 | } |
255 | 255 | ||
256 | static function removeBOM($str=""){ | 256 | static function removeBOM($str=""){ |
257 | if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) { | 257 | if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) { |
258 | $str=substr($str, 3); | 258 | $str=substr($str, 3); |