aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/libraries/MOBIClass/CharacterEntities.php
diff options
context:
space:
mode:
authorThomas Citharel <tcit@tcit.fr>2014-12-22 14:33:01 +0100
committerThomas Citharel <tcit@tcit.fr>2014-12-22 14:33:01 +0100
commitf3917b1ee4bba812e82da57dd181f50d62926db4 (patch)
treecb4b04931e97e4a69500bf9d7aa5c21985cab59e /inc/3rdparty/libraries/MOBIClass/CharacterEntities.php
parent311baf86befde0557faea614ca4d13bb2bd2cc66 (diff)
parent9254b6cf460edec3a59e9ccc19873481a1d19c90 (diff)
downloadwallabag-f3917b1ee4bba812e82da57dd181f50d62926db4.tar.gz
wallabag-f3917b1ee4bba812e82da57dd181f50d62926db4.tar.zst
wallabag-f3917b1ee4bba812e82da57dd181f50d62926db4.zip
Merge branch 'dev' into randomarticle
Diffstat (limited to 'inc/3rdparty/libraries/MOBIClass/CharacterEntities.php')
-rw-r--r--inc/3rdparty/libraries/MOBIClass/CharacterEntities.php413
1 files changed, 413 insertions, 0 deletions
diff --git a/inc/3rdparty/libraries/MOBIClass/CharacterEntities.php b/inc/3rdparty/libraries/MOBIClass/CharacterEntities.php
new file mode 100644
index 00000000..de896f87
--- /dev/null
+++ b/inc/3rdparty/libraries/MOBIClass/CharacterEntities.php
@@ -0,0 +1,413 @@
1<?php
2
3/**
4 * Description of CharacterEntities
5 *
6 * @author Sander
7 */
8class CharacterEntities {
9 public static function convert($str){
10 //Assume the encoding is UTF-8 -> output is UTF-8
11 return $str;
12 //return utf8_encode($str);
13 //Convert to CP1252
14 list($from, $to) = CharacterEntities::generateTables();
15 return str_replace($from, $to, $str);
16 }
17
18 private static function generateTables(){
19 $from = array();
20 $to = array();
21
22 for($i = 0; $i < 256; $i++){
23 $from[$i] = $to[$i] = chr($i);
24 }
25
26 $from[0x80] = "€";
27 $from[0x82] = "‚";
28 $from[0x83] = "ƒ";
29 $from[0x84] = "„";
30 $from[0x85] = "…";
31 $from[0x86] = "†";
32 $from[0x87] = "‡";
33 $from[0x88] = "ˆ";
34 $from[0x89] = "‰";
35 $from[0x8A] = "Š";
36 $from[0x8B] = "‹";
37 $from[0x8C] = "Œ";
38 $from[0x8E] = "Ž";
39
40 $from[0x91] = "‘";
41 $from[0x92] = "’";
42 $from[0x93] = "“";
43 $from[0x94] = "”";
44 $from[0x95] = "•";
45 $from[0x96] = "–";
46 $from[0x97] = "—";
47 $from[0x98] = "˜";
48 $from[0x99] = "™";
49 $from[0x9A] = "š";
50 $from[0x9B] = "›";
51 $from[0x9C] = "œ";
52 $from[0x9E] = "ž";
53 $from[0x9F] = "Ÿ";
54
55 $from[0xA1] = "¡";
56 $from[0xA2] = "¢";
57 $from[0xA3] = "£";
58 $from[0xA4] = "¤";
59 $from[0xA5] = "¥";
60 $from[0xA6] = "¦";
61 $from[0xA7] = "§";
62 $from[0xA8] = "¨";
63 $from[0xA9] = "©";
64 $from[0xAA] = "ª";
65 $from[0xAB] = "«";
66 $from[0xAC] = "¬";
67 $from[0xAE] = "®";
68 $from[0xAF] = "¯";
69
70 $from[0xB0] = "°";
71 $from[0xB1] = "±";
72 $from[0xB2] = "²";
73 $from[0xB3] = "³";
74 $from[0xB4] = "´";
75 $from[0xB5] = "µ";
76 $from[0xB6] = "¶";
77 $from[0xB7] = "·";
78 $from[0xB8] = "¸";
79 $from[0xB9] = "¹";
80 $from[0xBA] = "º";
81 $from[0xBB] = "»";
82 $from[0xBC] = "¼";
83 $from[0xBD] = "½";
84 $from[0xBE] = "¾";
85 $from[0xBF] = "¿";
86
87 $from[0xC0] = "À";
88 $from[0xC1] = "Á";
89 $from[0xC2] = "Â";
90 $from[0xC3] = "Ã";
91 $from[0xC4] = "Ä";
92 $from[0xC5] = "Å";
93 $from[0xC6] = "Æ";
94 $from[0xC7] = "Ç";
95 $from[0xC8] = "È";
96 $from[0xC9] = "É";
97 $from[0xCA] = "Ê";
98 $from[0xCB] = "Ë";
99 $from[0xCC] = "Ì";
100 $from[0xCD] = "Í";
101 $from[0xCE] = "Î";
102 $from[0xCF] = "Ï";
103
104 $from[0xD0] = "Ð";
105 $from[0xD1] = "Ñ";
106 $from[0xD2] = "Ò";
107 $from[0xD3] = "Ó";
108 $from[0xD4] = "Ô";
109 $from[0xD5] = "Õ";
110 $from[0xD6] = "Ö";
111 $from[0xD7] = "×";
112 $from[0xD8] = "Ø";
113 $from[0xD9] = "Ù";
114 $from[0xDA] = "Ú";
115 $from[0xDB] = "Û";
116 $from[0xDC] = "Ü";
117 $from[0xDD] = "Ý";
118 $from[0xDE] = "Þ";
119 $from[0xDF] = "ß";
120
121 $from[0xE0] = "à";
122 $from[0xE1] = "á";
123 $from[0xE2] = "â";
124 $from[0xE3] = "ã";
125 $from[0xE4] = "ä";
126 $from[0xE5] = "å";
127 $from[0xE6] = "æ";
128 $from[0xE7] = "ç";
129 $from[0xE8] = "è";
130 $from[0xE9] = "é";
131 $from[0xEA] = "ê";
132 $from[0xEB] = "ë";
133 $from[0xEC] = "ì";
134 $from[0xED] = "í";
135 $from[0xEE] = "î";
136 $from[0xEF] = "ï";
137
138 $from[0xF0] = "ð";
139 $from[0xF1] = "ñ";
140 $from[0xF2] = "ò";
141 $from[0xF3] = "ó";
142 $from[0xF4] = "ô";
143 $from[0xF5] = "õ";
144 $from[0xF6] = "ö";
145 $from[0xF7] = "÷";
146 $from[0xF8] = "ø";
147 $from[0xF9] = "ù";
148 $from[0xFA] = "ú";
149 $from[0xFB] = "û";
150 $from[0xFC] = "ü";
151 $from[0xFD] = "ý";
152 $from[0xFE] = "þ";
153 $from[0xFF] = "ÿ";
154
155
156 return array($from, $to);
157 }
158 /*
159 00 = U+0000 : NULL
16001 = U+0001 : START OF HEADING
16102 = U+0002 : START OF TEXT
16203 = U+0003 : END OF TEXT
16304 = U+0004 : END OF TRANSMISSION
16405 = U+0005 : ENQUIRY
16506 = U+0006 : ACKNOWLEDGE
16607 = U+0007 : BELL
16708 = U+0008 : BACKSPACE
16809 = U+0009 : HORIZONTAL TABULATION
1690A = U+000A : LINE FEED
1700B = U+000B : VERTICAL TABULATION
1710C = U+000C : FORM FEED
1720D = U+000D : CARRIAGE RETURN
1730E = U+000E : SHIFT OUT
1740F = U+000F : SHIFT IN
17510 = U+0010 : DATA LINK ESCAPE
17611 = U+0011 : DEVICE CONTROL ONE
17712 = U+0012 : DEVICE CONTROL TWO
17813 = U+0013 : DEVICE CONTROL THREE
17914 = U+0014 : DEVICE CONTROL FOUR
18015 = U+0015 : NEGATIVE ACKNOWLEDGE
18116 = U+0016 : SYNCHRONOUS IDLE
18217 = U+0017 : END OF TRANSMISSION BLOCK
18318 = U+0018 : CANCEL
18419 = U+0019 : END OF MEDIUM
1851A = U+001A : SUBSTITUTE
1861B = U+001B : ESCAPE
1871C = U+001C : FILE SEPARATOR
1881D = U+001D : GROUP SEPARATOR
1891E = U+001E : RECORD SEPARATOR
1901F = U+001F : UNIT SEPARATOR
19120 = U+0020 : SPACE
19221 = U+0021 : EXCLAMATION MARK
19322 = U+0022 : QUOTATION MARK
19423 = U+0023 : NUMBER SIGN
19524 = U+0024 : DOLLAR SIGN
19625 = U+0025 : PERCENT SIGN
19726 = U+0026 : AMPERSAND
19827 = U+0027 : APOSTROPHE
19928 = U+0028 : LEFT PARENTHESIS
20029 = U+0029 : RIGHT PARENTHESIS
2012A = U+002A : ASTERISK
2022B = U+002B : PLUS SIGN
2032C = U+002C : COMMA
2042D = U+002D : HYPHEN-MINUS
2052E = U+002E : FULL STOP
2062F = U+002F : SOLIDUS
20730 = U+0030 : DIGIT ZERO
20831 = U+0031 : DIGIT ONE
20932 = U+0032 : DIGIT TWO
21033 = U+0033 : DIGIT THREE
21134 = U+0034 : DIGIT FOUR
21235 = U+0035 : DIGIT FIVE
21336 = U+0036 : DIGIT SIX
21437 = U+0037 : DIGIT SEVEN
21538 = U+0038 : DIGIT EIGHT
21639 = U+0039 : DIGIT NINE
2173A = U+003A : COLON
2183B = U+003B : SEMICOLON
2193C = U+003C : LESS-THAN SIGN
2203D = U+003D : EQUALS SIGN
2213E = U+003E : GREATER-THAN SIGN
2223F = U+003F : QUESTION MARK
22340 = U+0040 : COMMERCIAL AT
22441 = U+0041 : LATIN CAPITAL LETTER A
22542 = U+0042 : LATIN CAPITAL LETTER B
22643 = U+0043 : LATIN CAPITAL LETTER C
22744 = U+0044 : LATIN CAPITAL LETTER D
22845 = U+0045 : LATIN CAPITAL LETTER E
22946 = U+0046 : LATIN CAPITAL LETTER F
23047 = U+0047 : LATIN CAPITAL LETTER G
23148 = U+0048 : LATIN CAPITAL LETTER H
23249 = U+0049 : LATIN CAPITAL LETTER I
2334A = U+004A : LATIN CAPITAL LETTER J
2344B = U+004B : LATIN CAPITAL LETTER K
2354C = U+004C : LATIN CAPITAL LETTER L
2364D = U+004D : LATIN CAPITAL LETTER M
2374E = U+004E : LATIN CAPITAL LETTER N
2384F = U+004F : LATIN CAPITAL LETTER O
23950 = U+0050 : LATIN CAPITAL LETTER P
24051 = U+0051 : LATIN CAPITAL LETTER Q
24152 = U+0052 : LATIN CAPITAL LETTER R
24253 = U+0053 : LATIN CAPITAL LETTER S
24354 = U+0054 : LATIN CAPITAL LETTER T
24455 = U+0055 : LATIN CAPITAL LETTER U
24556 = U+0056 : LATIN CAPITAL LETTER V
24657 = U+0057 : LATIN CAPITAL LETTER W
24758 = U+0058 : LATIN CAPITAL LETTER X
24859 = U+0059 : LATIN CAPITAL LETTER Y
2495A = U+005A : LATIN CAPITAL LETTER Z
2505B = U+005B : LEFT SQUARE BRACKET
2515C = U+005C : REVERSE SOLIDUS
2525D = U+005D : RIGHT SQUARE BRACKET
2535E = U+005E : CIRCUMFLEX ACCENT
2545F = U+005F : LOW LINE
25560 = U+0060 : GRAVE ACCENT
25661 = U+0061 : LATIN SMALL LETTER A
25762 = U+0062 : LATIN SMALL LETTER B
25863 = U+0063 : LATIN SMALL LETTER C
25964 = U+0064 : LATIN SMALL LETTER D
26065 = U+0065 : LATIN SMALL LETTER E
26166 = U+0066 : LATIN SMALL LETTER F
26267 = U+0067 : LATIN SMALL LETTER G
26368 = U+0068 : LATIN SMALL LETTER H
26469 = U+0069 : LATIN SMALL LETTER I
2656A = U+006A : LATIN SMALL LETTER J
2666B = U+006B : LATIN SMALL LETTER K
2676C = U+006C : LATIN SMALL LETTER L
2686D = U+006D : LATIN SMALL LETTER M
2696E = U+006E : LATIN SMALL LETTER N
2706F = U+006F : LATIN SMALL LETTER O
27170 = U+0070 : LATIN SMALL LETTER P
27271 = U+0071 : LATIN SMALL LETTER Q
27372 = U+0072 : LATIN SMALL LETTER R
27473 = U+0073 : LATIN SMALL LETTER S
27574 = U+0074 : LATIN SMALL LETTER T
27675 = U+0075 : LATIN SMALL LETTER U
27776 = U+0076 : LATIN SMALL LETTER V
27877 = U+0077 : LATIN SMALL LETTER W
27978 = U+0078 : LATIN SMALL LETTER X
28079 = U+0079 : LATIN SMALL LETTER Y
2817A = U+007A : LATIN SMALL LETTER Z
2827B = U+007B : LEFT CURLY BRACKET
2837C = U+007C : VERTICAL LINE
2847D = U+007D : RIGHT CURLY BRACKET
2857E = U+007E : TILDE
2867F = U+007F : DELETE
28780 = U+20AC : EURO SIGN
28882 = U+201A : SINGLE LOW-9 QUOTATION MARK
28983 = U+0192 : LATIN SMALL LETTER F WITH HOOK
29084 = U+201E : DOUBLE LOW-9 QUOTATION MARK
29185 = U+2026 : HORIZONTAL ELLIPSIS
29286 = U+2020 : DAGGER
29387 = U+2021 : DOUBLE DAGGER
29488 = U+02C6 : MODIFIER LETTER CIRCUMFLEX ACCENT
29589 = U+2030 : PER MILLE SIGN
2968A = U+0160 : LATIN CAPITAL LETTER S WITH CARON
2978B = U+2039 : SINGLE LEFT-POINTING ANGLE QUOTATION MARK
2988C = U+0152 : LATIN CAPITAL LIGATURE OE
2998E = U+017D : LATIN CAPITAL LETTER Z WITH CARON
30091 = U+2018 : LEFT SINGLE QUOTATION MARK
30192 = U+2019 : RIGHT SINGLE QUOTATION MARK
30293 = U+201C : LEFT DOUBLE QUOTATION MARK
30394 = U+201D : RIGHT DOUBLE QUOTATION MARK
30495 = U+2022 : BULLET
30596 = U+2013 : EN DASH
30697 = U+2014 : EM DASH
30798 = U+02DC : SMALL TILDE
30899 = U+2122 : TRADE MARK SIGN
3099A = U+0161 : LATIN SMALL LETTER S WITH CARON
3109B = U+203A : SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
3119C = U+0153 : LATIN SMALL LIGATURE OE
3129E = U+017E : LATIN SMALL LETTER Z WITH CARON
3139F = U+0178 : LATIN CAPITAL LETTER Y WITH DIAERESIS
314A0 = U+00A0 : NO-BREAK SPACE
315A1 = U+00A1 : INVERTED EXCLAMATION MARK
316A2 = U+00A2 : CENT SIGN
317A3 = U+00A3 : POUND SIGN
318A4 = U+00A4 : CURRENCY SIGN
319A5 = U+00A5 : YEN SIGN
320A6 = U+00A6 : BROKEN BAR
321A7 = U+00A7 : SECTION SIGN
322A8 = U+00A8 : DIAERESIS
323A9 = U+00A9 : COPYRIGHT SIGN
324AA = U+00AA : FEMININE ORDINAL INDICATOR
325AB = U+00AB : LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
326AC = U+00AC : NOT SIGN
327AD = U+00AD : SOFT HYPHEN
328AE = U+00AE : REGISTERED SIGN
329AF = U+00AF : MACRON
330B0 = U+00B0 : DEGREE SIGN
331B1 = U+00B1 : PLUS-MINUS SIGN
332B2 = U+00B2 : SUPERSCRIPT TWO
333B3 = U+00B3 : SUPERSCRIPT THREE
334B4 = U+00B4 : ACUTE ACCENT
335B5 = U+00B5 : MICRO SIGN
336B6 = U+00B6 : PILCROW SIGN
337B7 = U+00B7 : MIDDLE DOT
338B8 = U+00B8 : CEDILLA
339B9 = U+00B9 : SUPERSCRIPT ONE
340BA = U+00BA : MASCULINE ORDINAL INDICATOR
341BB = U+00BB : RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
342BC = U+00BC : VULGAR FRACTION ONE QUARTER
343BD = U+00BD : VULGAR FRACTION ONE HALF
344BE = U+00BE : VULGAR FRACTION THREE QUARTERS
345BF = U+00BF : INVERTED QUESTION MARK
346C0 = U+00C0 : LATIN CAPITAL LETTER A WITH GRAVE
347C1 = U+00C1 : LATIN CAPITAL LETTER A WITH ACUTE
348C2 = U+00C2 : LATIN CAPITAL LETTER A WITH CIRCUMFLEX
349C3 = U+00C3 : LATIN CAPITAL LETTER A WITH TILDE
350C4 = U+00C4 : LATIN CAPITAL LETTER A WITH DIAERESIS
351C5 = U+00C5 : LATIN CAPITAL LETTER A WITH RING ABOVE
352C6 = U+00C6 : LATIN CAPITAL LETTER AE
353C7 = U+00C7 : LATIN CAPITAL LETTER C WITH CEDILLA
354C8 = U+00C8 : LATIN CAPITAL LETTER E WITH GRAVE
355C9 = U+00C9 : LATIN CAPITAL LETTER E WITH ACUTE
356CA = U+00CA : LATIN CAPITAL LETTER E WITH CIRCUMFLEX
357CB = U+00CB : LATIN CAPITAL LETTER E WITH DIAERESIS
358CC = U+00CC : LATIN CAPITAL LETTER I WITH GRAVE
359CD = U+00CD : LATIN CAPITAL LETTER I WITH ACUTE
360CE = U+00CE : LATIN CAPITAL LETTER I WITH CIRCUMFLEX
361CF = U+00CF : LATIN CAPITAL LETTER I WITH DIAERESIS
362D0 = U+00D0 : LATIN CAPITAL LETTER ETH
363D1 = U+00D1 : LATIN CAPITAL LETTER N WITH TILDE
364D2 = U+00D2 : LATIN CAPITAL LETTER O WITH GRAVE
365D3 = U+00D3 : LATIN CAPITAL LETTER O WITH ACUTE
366D4 = U+00D4 : LATIN CAPITAL LETTER O WITH CIRCUMFLEX
367D5 = U+00D5 : LATIN CAPITAL LETTER O WITH TILDE
368D6 = U+00D6 : LATIN CAPITAL LETTER O WITH DIAERESIS
369D7 = U+00D7 : MULTIPLICATION SIGN
370D8 = U+00D8 : LATIN CAPITAL LETTER O WITH STROKE
371D9 = U+00D9 : LATIN CAPITAL LETTER U WITH GRAVE
372DA = U+00DA : LATIN CAPITAL LETTER U WITH ACUTE
373DB = U+00DB : LATIN CAPITAL LETTER U WITH CIRCUMFLEX
374DC = U+00DC : LATIN CAPITAL LETTER U WITH DIAERESIS
375DD = U+00DD : LATIN CAPITAL LETTER Y WITH ACUTE
376DE = U+00DE : LATIN CAPITAL LETTER THORN
377DF = U+00DF : LATIN SMALL LETTER SHARP S
378E0 = U+00E0 : LATIN SMALL LETTER A WITH GRAVE
379E1 = U+00E1 : LATIN SMALL LETTER A WITH ACUTE
380E2 = U+00E2 : LATIN SMALL LETTER A WITH CIRCUMFLEX
381E3 = U+00E3 : LATIN SMALL LETTER A WITH TILDE
382E4 = U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
383E5 = U+00E5 : LATIN SMALL LETTER A WITH RING ABOVE
384E6 = U+00E6 : LATIN SMALL LETTER AE
385E7 = U+00E7 : LATIN SMALL LETTER C WITH CEDILLA
386E8 = U+00E8 : LATIN SMALL LETTER E WITH GRAVE
387E9 = U+00E9 : LATIN SMALL LETTER E WITH ACUTE
388EA = U+00EA : LATIN SMALL LETTER E WITH CIRCUMFLEX
389EB = U+00EB : LATIN SMALL LETTER E WITH DIAERESIS
390EC = U+00EC : LATIN SMALL LETTER I WITH GRAVE
391ED = U+00ED : LATIN SMALL LETTER I WITH ACUTE
392EE = U+00EE : LATIN SMALL LETTER I WITH CIRCUMFLEX
393EF = U+00EF : LATIN SMALL LETTER I WITH DIAERESIS
394F0 = U+00F0 : LATIN SMALL LETTER ETH
395F1 = U+00F1 : LATIN SMALL LETTER N WITH TILDE
396F2 = U+00F2 : LATIN SMALL LETTER O WITH GRAVE
397F3 = U+00F3 : LATIN SMALL LETTER O WITH ACUTE
398F4 = U+00F4 : LATIN SMALL LETTER O WITH CIRCUMFLEX
399F5 = U+00F5 : LATIN SMALL LETTER O WITH TILDE
400F6 = U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
401F7 = U+00F7 : DIVISION SIGN
402F8 = U+00F8 : LATIN SMALL LETTER O WITH STROKE
403F9 = U+00F9 : LATIN SMALL LETTER U WITH GRAVE
404FA = U+00FA : LATIN SMALL LETTER U WITH ACUTE
405FB = U+00FB : LATIN SMALL LETTER U WITH CIRCUMFLEX
406FC = U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
407FD = U+00FD : LATIN SMALL LETTER Y WITH ACUTE
408FE = U+00FE : LATIN SMALL LETTER THORN
409FF = U+00FF : LATIN SMALL LETTER Y WITH DIAERESIS
410 *
411 */
412}
413?>