blob: de896f8784099e633bd29549e333115995ccc2f9 (
plain) (
tree)
|
|
<?php
/**
* Description of CharacterEntities
*
* @author Sander
*/
class CharacterEntities {
public static function convert($str){
//Assume the encoding is UTF-8 -> output is UTF-8
return $str;
//return utf8_encode($str);
//Convert to CP1252
list($from, $to) = CharacterEntities::generateTables();
return str_replace($from, $to, $str);
}
private static function generateTables(){
$from = array();
$to = array();
for($i = 0; $i < 256; $i++){
$from[$i] = $to[$i] = chr($i);
}
$from[0x80] = "€";
$from[0x82] = "‚";
$from[0x83] = "ƒ";
$from[0x84] = "„";
$from[0x85] = "…";
$from[0x86] = "†";
$from[0x87] = "‡";
$from[0x88] = "ˆ";
$from[0x89] = "‰";
$from[0x8A] = "Š";
$from[0x8B] = "‹";
$from[0x8C] = "Œ";
$from[0x8E] = "Ž";
$from[0x91] = "‘";
$from[0x92] = "’";
$from[0x93] = "“";
$from[0x94] = "”";
$from[0x95] = "•";
$from[0x96] = "–";
$from[0x97] = "—";
$from[0x98] = "˜";
$from[0x99] = "™";
$from[0x9A] = "š";
$from[0x9B] = "›";
$from[0x9C] = "œ";
$from[0x9E] = "ž";
$from[0x9F] = "Ÿ";
$from[0xA1] = "¡";
$from[0xA2] = "¢";
$from[0xA3] = "£";
$from[0xA4] = "¤";
$from[0xA5] = "¥";
$from[0xA6] = "¦";
$from[0xA7] = "§";
$from[0xA8] = "¨";
$from[0xA9] = "©";
$from[0xAA] = "ª";
$from[0xAB] = "«";
$from[0xAC] = "¬";
$from[0xAE] = "®";
$from[0xAF] = "¯";
$from[0xB0] = "°";
$from[0xB1] = "±";
$from[0xB2] = "²";
$from[0xB3] = "³";
$from[0xB4] = "´";
$from[0xB5] = "µ";
$from[0xB6] = "¶";
$from[0xB7] = "·";
$from[0xB8] = "¸";
$from[0xB9] = "¹";
$from[0xBA] = "º";
$from[0xBB] = "»";
$from[0xBC] = "¼";
$from[0xBD] = "½";
$from[0xBE] = "¾";
$from[0xBF] = "¿";
$from[0xC0] = "À";
$from[0xC1] = "Á";
$from[0xC2] = "Â";
$from[0xC3] = "Ã";
$from[0xC4] = "Ä";
$from[0xC5] = "Å";
$from[0xC6] = "Æ";
$from[0xC7] = "Ç";
$from[0xC8] = "È";
$from[0xC9] = "É";
$from[0xCA] = "Ê";
$from[0xCB] = "Ë";
$from[0xCC] = "Ì";
$from[0xCD] = "Í";
$from[0xCE] = "Î";
$from[0xCF] = "Ï";
$from[0xD0] = "Ð";
$from[0xD1] = "Ñ";
$from[0xD2] = "Ò";
$from[0xD3] = "Ó";
$from[0xD4] = "Ô";
$from[0xD5] = "Õ";
$from[0xD6] = "Ö";
$from[0xD7] = "×";
$from[0xD8] = "Ø";
$from[0xD9] = "Ù";
$from[0xDA] = "Ú";
$from[0xDB] = "Û";
$from[0xDC] = "Ü";
$from[0xDD] = "Ý";
$from[0xDE] = "Þ";
$from[0xDF] = "ß";
$from[0xE0] = "à";
$from[0xE1] = "á";
$from[0xE2] = "â";
$from[0xE3] = "ã";
$from[0xE4] = "ä";
$from[0xE5] = "å";
$from[0xE6] = "æ";
$from[0xE7] = "ç";
$from[0xE8] = "è";
$from[0xE9] = "é";
$from[0xEA] = "ê";
$from[0xEB] = "ë";
$from[0xEC] = "ì";
$from[0xED] = "í";
$from[0xEE] = "î";
$from[0xEF] = "ï";
$from[0xF0] = "ð";
$from[0xF1] = "ñ";
$from[0xF2] = "ò";
$from[0xF3] = "ó";
$from[0xF4] = "ô";
$from[0xF5] = "õ";
$from[0xF6] = "ö";
$from[0xF7] = "÷";
$from[0xF8] = "ø";
$from[0xF9] = "ù";
$from[0xFA] = "ú";
$from[0xFB] = "û";
$from[0xFC] = "ü";
$from[0xFD] = "ý";
$from[0xFE] = "þ";
$from[0xFF] = "ÿ";
return array($from, $to);
}
/*
00 = U+0000 : NULL
01 = U+0001 : START OF HEADING
02 = U+0002 : START OF TEXT
03 = U+0003 : END OF TEXT
04 = U+0004 : END OF TRANSMISSION
05 = U+0005 : ENQUIRY
06 = U+0006 : ACKNOWLEDGE
07 = U+0007 : BELL
08 = U+0008 : BACKSPACE
09 = U+0009 : HORIZONTAL TABULATION
0A = U+000A : LINE FEED
0B = U+000B : VERTICAL TABULATION
0C = U+000C : FORM FEED
0D = U+000D : CARRIAGE RETURN
0E = U+000E : SHIFT OUT
0F = U+000F : SHIFT IN
10 = U+0010 : DATA LINK ESCAPE
11 = U+0011 : DEVICE CONTROL ONE
12 = U+0012 : DEVICE CONTROL TWO
13 = U+0013 : DEVICE CONTROL THREE
14 = U+0014 : DEVICE CONTROL FOUR
15 = U+0015 : NEGATIVE ACKNOWLEDGE
16 = U+0016 : SYNCHRONOUS IDLE
17 = U+0017 : END OF TRANSMISSION BLOCK
18 = U+0018 : CANCEL
19 = U+0019 : END OF MEDIUM
1A = U+001A : SUBSTITUTE
1B = U+001B : ESCAPE
1C = U+001C : FILE SEPARATOR
1D = U+001D : GROUP SEPARATOR
1E = U+001E : RECORD SEPARATOR
1F = U+001F : UNIT SEPARATOR
20 = U+0020 : SPACE
21 = U+0021 : EXCLAMATION MARK
22 = U+0022 : QUOTATION MARK
23 = U+0023 : NUMBER SIGN
24 = U+0024 : DOLLAR SIGN
25 = U+0025 : PERCENT SIGN
26 = U+0026 : AMPERSAND
27 = U+0027 : APOSTROPHE
28 = U+0028 : LEFT PARENTHESIS
29 = U+0029 : RIGHT PARENTHESIS
2A = U+002A : ASTERISK
2B = U+002B : PLUS SIGN
2C = U+002C : COMMA
2D = U+002D : HYPHEN-MINUS
2E = U+002E : FULL STOP
2F = U+002F : SOLIDUS
30 = U+0030 : DIGIT ZERO
31 = U+0031 : DIGIT ONE
32 = U+0032 : DIGIT TWO
33 = U+0033 : DIGIT THREE
34 = U+0034 : DIGIT FOUR
35 = U+0035 : DIGIT FIVE
36 = U+0036 : DIGIT SIX
37 = U+0037 : DIGIT SEVEN
38 = U+0038 : DIGIT EIGHT
39 = U+0039 : DIGIT NINE
3A = U+003A : COLON
3B = U+003B : SEMICOLON
3C = U+003C : LESS-THAN SIGN
3D = U+003D : EQUALS SIGN
3E = U+003E : GREATER-THAN SIGN
3F = U+003F : QUESTION MARK
40 = U+0040 : COMMERCIAL AT
41 = U+0041 : LATIN CAPITAL LETTER A
42 = U+0042 : LATIN CAPITAL LETTER B
43 = U+0043 : LATIN CAPITAL LETTER C
44 = U+0044 : LATIN CAPITAL LETTER D
45 = U+0045 : LATIN CAPITAL LETTER E
46 = U+0046 : LATIN CAPITAL LETTER F
47 = U+0047 : LATIN CAPITAL LETTER G
48 = U+0048 : LATIN CAPITAL LETTER H
49 = U+0049 : LATIN CAPITAL LETTER I
4A = U+004A : LATIN CAPITAL LETTER J
4B = U+004B : LATIN CAPITAL LETTER K
4C = U+004C : LATIN CAPITAL LETTER L
4D = U+004D : LATIN CAPITAL LETTER M
4E = U+004E : LATIN CAPITAL LETTER N
4F = U+004F : LATIN CAPITAL LETTER O
50 = U+0050 : LATIN CAPITAL LETTER P
51 = U+0051 : LATIN CAPITAL LETTER Q
52 = U+0052 : LATIN CAPITAL LETTER R
53 = U+0053 : LATIN CAPITAL LETTER S
54 = U+0054 : LATIN CAPITAL LETTER T
55 = U+0055 : LATIN CAPITAL LETTER U
56 = U+0056 : LATIN CAPITAL LETTER V
57 = U+0057 : LATIN CAPITAL LETTER W
58 = U+0058 : LATIN CAPITAL LETTER X
59 = U+0059 : LATIN CAPITAL LETTER Y
5A = U+005A : LATIN CAPITAL LETTER Z
5B = U+005B : LEFT SQUARE BRACKET
5C = U+005C : REVERSE SOLIDUS
5D = U+005D : RIGHT SQUARE BRACKET
5E = U+005E : CIRCUMFLEX ACCENT
5F = U+005F : LOW LINE
60 = U+0060 : GRAVE ACCENT
61 = U+0061 : LATIN SMALL LETTER A
62 = U+0062 : LATIN SMALL LETTER B
63 = U+0063 : LATIN SMALL LETTER C
64 = U+0064 : LATIN SMALL LETTER D
65 = U+0065 : LATIN SMALL LETTER E
66 = U+0066 : LATIN SMALL LETTER F
67 = U+0067 : LATIN SMALL LETTER G
68 = U+0068 : LATIN SMALL LETTER H
69 = U+0069 : LATIN SMALL LETTER I
6A = U+006A : LATIN SMALL LETTER J
6B = U+006B : LATIN SMALL LETTER K
6C = U+006C : LATIN SMALL LETTER L
6D = U+006D : LATIN SMALL LETTER M
6E = U+006E : LATIN SMALL LETTER N
6F = U+006F : LATIN SMALL LETTER O
70 = U+0070 : LATIN SMALL LETTER P
71 = U+0071 : LATIN SMALL LETTER Q
72 = U+0072 : LATIN SMALL LETTER R
73 = U+0073 : LATIN SMALL LETTER S
74 = U+0074 : LATIN SMALL LETTER T
75 = U+0075 : LATIN SMALL LETTER U
76 = U+0076 : LATIN SMALL LETTER V
77 = U+0077 : LATIN SMALL LETTER W
78 = U+0078 : LATIN SMALL LETTER X
79 = U+0079 : LATIN SMALL LETTER Y
7A = U+007A : LATIN SMALL LETTER Z
7B = U+007B : LEFT CURLY BRACKET
7C = U+007C : VERTICAL LINE
7D = U+007D : RIGHT CURLY BRACKET
7E = U+007E : TILDE
7F = U+007F : DELETE
80 = U+20AC : EURO SIGN
82 = U+201A : SINGLE LOW-9 QUOTATION MARK
83 = U+0192 : LATIN SMALL LETTER F WITH HOOK
84 = U+201E : DOUBLE LOW-9 QUOTATION MARK
85 = U+2026 : HORIZONTAL ELLIPSIS
86 = U+2020 : DAGGER
87 = U+2021 : DOUBLE DAGGER
88 = U+02C6 : MODIFIER LETTER CIRCUMFLEX ACCENT
89 = U+2030 : PER MILLE SIGN
8A = U+0160 : LATIN CAPITAL LETTER S WITH CARON
8B = U+2039 : SINGLE LEFT-POINTING ANGLE QUOTATION MARK
8C = U+0152 : LATIN CAPITAL LIGATURE OE
8E = U+017D : LATIN CAPITAL LETTER Z WITH CARON
91 = U+2018 : LEFT SINGLE QUOTATION MARK
92 = U+2019 : RIGHT SINGLE QUOTATION MARK
93 = U+201C : LEFT DOUBLE QUOTATION MARK
94 = U+201D : RIGHT DOUBLE QUOTATION MARK
95 = U+2022 : BULLET
96 = U+2013 : EN DASH
97 = U+2014 : EM DASH
98 = U+02DC : SMALL TILDE
99 = U+2122 : TRADE MARK SIGN
9A = U+0161 : LATIN SMALL LETTER S WITH CARON
9B = U+203A : SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
9C = U+0153 : LATIN SMALL LIGATURE OE
9E = U+017E : LATIN SMALL LETTER Z WITH CARON
9F = U+0178 : LATIN CAPITAL LETTER Y WITH DIAERESIS
A0 = U+00A0 : NO-BREAK SPACE
A1 = U+00A1 : INVERTED EXCLAMATION MARK
A2 = U+00A2 : CENT SIGN
A3 = U+00A3 : POUND SIGN
A4 = U+00A4 : CURRENCY SIGN
A5 = U+00A5 : YEN SIGN
A6 = U+00A6 : BROKEN BAR
A7 = U+00A7 : SECTION SIGN
A8 = U+00A8 : DIAERESIS
A9 = U+00A9 : COPYRIGHT SIGN
AA = U+00AA : FEMININE ORDINAL INDICATOR
AB = U+00AB : LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
AC = U+00AC : NOT SIGN
AD = U+00AD : SOFT HYPHEN
AE = U+00AE : REGISTERED SIGN
AF = U+00AF : MACRON
B0 = U+00B0 : DEGREE SIGN
B1 = U+00B1 : PLUS-MINUS SIGN
B2 = U+00B2 : SUPERSCRIPT TWO
B3 = U+00B3 : SUPERSCRIPT THREE
B4 = U+00B4 : ACUTE ACCENT
B5 = U+00B5 : MICRO SIGN
B6 = U+00B6 : PILCROW SIGN
B7 = U+00B7 : MIDDLE DOT
B8 = U+00B8 : CEDILLA
B9 = U+00B9 : SUPERSCRIPT ONE
BA = U+00BA : MASCULINE ORDINAL INDICATOR
BB = U+00BB : RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
BC = U+00BC : VULGAR FRACTION ONE QUARTER
BD = U+00BD : VULGAR FRACTION ONE HALF
BE = U+00BE : VULGAR FRACTION THREE QUARTERS
BF = U+00BF : INVERTED QUESTION MARK
C0 = U+00C0 : LATIN CAPITAL LETTER A WITH GRAVE
C1 = U+00C1 : LATIN CAPITAL LETTER A WITH ACUTE
C2 = U+00C2 : LATIN CAPITAL LETTER A WITH CIRCUMFLEX
C3 = U+00C3 : LATIN CAPITAL LETTER A WITH TILDE
C4 = U+00C4 : LATIN CAPITAL LETTER A WITH DIAERESIS
C5 = U+00C5 : LATIN CAPITAL LETTER A WITH RING ABOVE
C6 = U+00C6 : LATIN CAPITAL LETTER AE
C7 = U+00C7 : LATIN CAPITAL LETTER C WITH CEDILLA
C8 = U+00C8 : LATIN CAPITAL LETTER E WITH GRAVE
C9 = U+00C9 : LATIN CAPITAL LETTER E WITH ACUTE
CA = U+00CA : LATIN CAPITAL LETTER E WITH CIRCUMFLEX
CB = U+00CB : LATIN CAPITAL LETTER E WITH DIAERESIS
CC = U+00CC : LATIN CAPITAL LETTER I WITH GRAVE
CD = U+00CD : LATIN CAPITAL LETTER I WITH ACUTE
CE = U+00CE : LATIN CAPITAL LETTER I WITH CIRCUMFLEX
CF = U+00CF : LATIN CAPITAL LETTER I WITH DIAERESIS
D0 = U+00D0 : LATIN CAPITAL LETTER ETH
D1 = U+00D1 : LATIN CAPITAL LETTER N WITH TILDE
D2 = U+00D2 : LATIN CAPITAL LETTER O WITH GRAVE
D3 = U+00D3 : LATIN CAPITAL LETTER O WITH ACUTE
D4 = U+00D4 : LATIN CAPITAL LETTER O WITH CIRCUMFLEX
D5 = U+00D5 : LATIN CAPITAL LETTER O WITH TILDE
D6 = U+00D6 : LATIN CAPITAL LETTER O WITH DIAERESIS
D7 = U+00D7 : MULTIPLICATION SIGN
D8 = U+00D8 : LATIN CAPITAL LETTER O WITH STROKE
D9 = U+00D9 : LATIN CAPITAL LETTER U WITH GRAVE
DA = U+00DA : LATIN CAPITAL LETTER U WITH ACUTE
DB = U+00DB : LATIN CAPITAL LETTER U WITH CIRCUMFLEX
DC = U+00DC : LATIN CAPITAL LETTER U WITH DIAERESIS
DD = U+00DD : LATIN CAPITAL LETTER Y WITH ACUTE
DE = U+00DE : LATIN CAPITAL LETTER THORN
DF = U+00DF : LATIN SMALL LETTER SHARP S
E0 = U+00E0 : LATIN SMALL LETTER A WITH GRAVE
E1 = U+00E1 : LATIN SMALL LETTER A WITH ACUTE
E2 = U+00E2 : LATIN SMALL LETTER A WITH CIRCUMFLEX
E3 = U+00E3 : LATIN SMALL LETTER A WITH TILDE
E4 = U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
E5 = U+00E5 : LATIN SMALL LETTER A WITH RING ABOVE
E6 = U+00E6 : LATIN SMALL LETTER AE
E7 = U+00E7 : LATIN SMALL LETTER C WITH CEDILLA
E8 = U+00E8 : LATIN SMALL LETTER E WITH GRAVE
E9 = U+00E9 : LATIN SMALL LETTER E WITH ACUTE
EA = U+00EA : LATIN SMALL LETTER E WITH CIRCUMFLEX
EB = U+00EB : LATIN SMALL LETTER E WITH DIAERESIS
EC = U+00EC : LATIN SMALL LETTER I WITH GRAVE
ED = U+00ED : LATIN SMALL LETTER I WITH ACUTE
EE = U+00EE : LATIN SMALL LETTER I WITH CIRCUMFLEX
EF = U+00EF : LATIN SMALL LETTER I WITH DIAERESIS
F0 = U+00F0 : LATIN SMALL LETTER ETH
F1 = U+00F1 : LATIN SMALL LETTER N WITH TILDE
F2 = U+00F2 : LATIN SMALL LETTER O WITH GRAVE
F3 = U+00F3 : LATIN SMALL LETTER O WITH ACUTE
F4 = U+00F4 : LATIN SMALL LETTER O WITH CIRCUMFLEX
F5 = U+00F5 : LATIN SMALL LETTER O WITH TILDE
F6 = U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
F7 = U+00F7 : DIVISION SIGN
F8 = U+00F8 : LATIN SMALL LETTER O WITH STROKE
F9 = U+00F9 : LATIN SMALL LETTER U WITH GRAVE
FA = U+00FA : LATIN SMALL LETTER U WITH ACUTE
FB = U+00FB : LATIN SMALL LETTER U WITH CIRCUMFLEX
FC = U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
FD = U+00FD : LATIN SMALL LETTER Y WITH ACUTE
FE = U+00FE : LATIN SMALL LETTER THORN
FF = U+00FF : LATIN SMALL LETTER Y WITH DIAERESIS
*
*/
}
?>
|