diff options
author | tcit <tcit@tcit.fr> | 2014-07-24 21:56:04 +0200 |
---|---|---|
committer | tcit <tcit@tcit.fr> | 2014-07-24 21:56:04 +0200 |
commit | fb9df0c269f36703909b8b259abbdbed29881ecd (patch) | |
tree | 03069262fe6a7bd5891f5649d31fc057b3ca8541 /inc/3rdparty/libraries/MOBIClass | |
parent | c70bfefc68fcc96b1ce57845e5b2942a596239ec (diff) | |
download | wallabag-fb9df0c269f36703909b8b259abbdbed29881ecd.tar.gz wallabag-fb9df0c269f36703909b8b259abbdbed29881ecd.tar.zst wallabag-fb9df0c269f36703909b8b259abbdbed29881ecd.zip |
use directly MOBIClass
Diffstat (limited to 'inc/3rdparty/libraries/MOBIClass')
30 files changed, 4023 insertions, 0 deletions
diff --git a/inc/3rdparty/libraries/MOBIClass/CharacterEntities.php b/inc/3rdparty/libraries/MOBIClass/CharacterEntities.php new file mode 100644 index 00000000..de896f87 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/CharacterEntities.php | |||
@@ -0,0 +1,413 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of CharacterEntities | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class CharacterEntities { | ||
9 | public static function convert($str){ | ||
10 | //Assume the encoding is UTF-8 -> output is UTF-8 | ||
11 | return $str; | ||
12 | //return utf8_encode($str); | ||
13 | //Convert to CP1252 | ||
14 | list($from, $to) = CharacterEntities::generateTables(); | ||
15 | return str_replace($from, $to, $str); | ||
16 | } | ||
17 | |||
18 | private static function generateTables(){ | ||
19 | $from = array(); | ||
20 | $to = array(); | ||
21 | |||
22 | for($i = 0; $i < 256; $i++){ | ||
23 | $from[$i] = $to[$i] = chr($i); | ||
24 | } | ||
25 | |||
26 | $from[0x80] = "€"; | ||
27 | $from[0x82] = "‚"; | ||
28 | $from[0x83] = "ƒ"; | ||
29 | $from[0x84] = "„"; | ||
30 | $from[0x85] = "…"; | ||
31 | $from[0x86] = "†"; | ||
32 | $from[0x87] = "‡"; | ||
33 | $from[0x88] = "ˆ"; | ||
34 | $from[0x89] = "‰"; | ||
35 | $from[0x8A] = "Š"; | ||
36 | $from[0x8B] = "‹"; | ||
37 | $from[0x8C] = "Œ"; | ||
38 | $from[0x8E] = "Ž"; | ||
39 | |||
40 | $from[0x91] = "‘"; | ||
41 | $from[0x92] = "’"; | ||
42 | $from[0x93] = "“"; | ||
43 | $from[0x94] = "”"; | ||
44 | $from[0x95] = "•"; | ||
45 | $from[0x96] = "–"; | ||
46 | $from[0x97] = "—"; | ||
47 | $from[0x98] = "˜"; | ||
48 | $from[0x99] = "™"; | ||
49 | $from[0x9A] = "š"; | ||
50 | $from[0x9B] = "›"; | ||
51 | $from[0x9C] = "œ"; | ||
52 | $from[0x9E] = "ž"; | ||
53 | $from[0x9F] = "Ÿ"; | ||
54 | |||
55 | $from[0xA1] = "¡"; | ||
56 | $from[0xA2] = "¢"; | ||
57 | $from[0xA3] = "£"; | ||
58 | $from[0xA4] = "¤"; | ||
59 | $from[0xA5] = "¥"; | ||
60 | $from[0xA6] = "¦"; | ||
61 | $from[0xA7] = "§"; | ||
62 | $from[0xA8] = "¨"; | ||
63 | $from[0xA9] = "©"; | ||
64 | $from[0xAA] = "ª"; | ||
65 | $from[0xAB] = "«"; | ||
66 | $from[0xAC] = "¬"; | ||
67 | $from[0xAE] = "®"; | ||
68 | $from[0xAF] = "¯"; | ||
69 | |||
70 | $from[0xB0] = "°"; | ||
71 | $from[0xB1] = "±"; | ||
72 | $from[0xB2] = "²"; | ||
73 | $from[0xB3] = "³"; | ||
74 | $from[0xB4] = "´"; | ||
75 | $from[0xB5] = "µ"; | ||
76 | $from[0xB6] = "¶"; | ||
77 | $from[0xB7] = "·"; | ||
78 | $from[0xB8] = "¸"; | ||
79 | $from[0xB9] = "¹"; | ||
80 | $from[0xBA] = "º"; | ||
81 | $from[0xBB] = "»"; | ||
82 | $from[0xBC] = "¼"; | ||
83 | $from[0xBD] = "½"; | ||
84 | $from[0xBE] = "¾"; | ||
85 | $from[0xBF] = "¿"; | ||
86 | |||
87 | $from[0xC0] = "À"; | ||
88 | $from[0xC1] = "Á"; | ||
89 | $from[0xC2] = "Â"; | ||
90 | $from[0xC3] = "Ã"; | ||
91 | $from[0xC4] = "Ä"; | ||
92 | $from[0xC5] = "Å"; | ||
93 | $from[0xC6] = "Æ"; | ||
94 | $from[0xC7] = "Ç"; | ||
95 | $from[0xC8] = "È"; | ||
96 | $from[0xC9] = "É"; | ||
97 | $from[0xCA] = "Ê"; | ||
98 | $from[0xCB] = "Ë"; | ||
99 | $from[0xCC] = "Ì"; | ||
100 | $from[0xCD] = "Í"; | ||
101 | $from[0xCE] = "Î"; | ||
102 | $from[0xCF] = "Ï"; | ||
103 | |||
104 | $from[0xD0] = "Ð"; | ||
105 | $from[0xD1] = "Ñ"; | ||
106 | $from[0xD2] = "Ò"; | ||
107 | $from[0xD3] = "Ó"; | ||
108 | $from[0xD4] = "Ô"; | ||
109 | $from[0xD5] = "Õ"; | ||
110 | $from[0xD6] = "Ö"; | ||
111 | $from[0xD7] = "×"; | ||
112 | $from[0xD8] = "Ø"; | ||
113 | $from[0xD9] = "Ù"; | ||
114 | $from[0xDA] = "Ú"; | ||
115 | $from[0xDB] = "Û"; | ||
116 | $from[0xDC] = "Ü"; | ||
117 | $from[0xDD] = "Ý"; | ||
118 | $from[0xDE] = "Þ"; | ||
119 | $from[0xDF] = "ß"; | ||
120 | |||
121 | $from[0xE0] = "à"; | ||
122 | $from[0xE1] = "á"; | ||
123 | $from[0xE2] = "â"; | ||
124 | $from[0xE3] = "ã"; | ||
125 | $from[0xE4] = "ä"; | ||
126 | $from[0xE5] = "å"; | ||
127 | $from[0xE6] = "æ"; | ||
128 | $from[0xE7] = "ç"; | ||
129 | $from[0xE8] = "è"; | ||
130 | $from[0xE9] = "é"; | ||
131 | $from[0xEA] = "ê"; | ||
132 | $from[0xEB] = "ë"; | ||
133 | $from[0xEC] = "ì"; | ||
134 | $from[0xED] = "í"; | ||
135 | $from[0xEE] = "î"; | ||
136 | $from[0xEF] = "ï"; | ||
137 | |||
138 | $from[0xF0] = "ð"; | ||
139 | $from[0xF1] = "ñ"; | ||
140 | $from[0xF2] = "ò"; | ||
141 | $from[0xF3] = "ó"; | ||
142 | $from[0xF4] = "ô"; | ||
143 | $from[0xF5] = "õ"; | ||
144 | $from[0xF6] = "ö"; | ||
145 | $from[0xF7] = "÷"; | ||
146 | $from[0xF8] = "ø"; | ||
147 | $from[0xF9] = "ù"; | ||
148 | $from[0xFA] = "ú"; | ||
149 | $from[0xFB] = "û"; | ||
150 | $from[0xFC] = "ü"; | ||
151 | $from[0xFD] = "ý"; | ||
152 | $from[0xFE] = "þ"; | ||
153 | $from[0xFF] = "ÿ"; | ||
154 | |||
155 | |||
156 | return array($from, $to); | ||
157 | } | ||
158 | /* | ||
159 | 00 = U+0000 : NULL | ||
160 | 01 = U+0001 : START OF HEADING | ||
161 | 02 = U+0002 : START OF TEXT | ||
162 | 03 = U+0003 : END OF TEXT | ||
163 | 04 = U+0004 : END OF TRANSMISSION | ||
164 | 05 = U+0005 : ENQUIRY | ||
165 | 06 = U+0006 : ACKNOWLEDGE | ||
166 | 07 = U+0007 : BELL | ||
167 | 08 = U+0008 : BACKSPACE | ||
168 | 09 = U+0009 : HORIZONTAL TABULATION | ||
169 | 0A = U+000A : LINE FEED | ||
170 | 0B = U+000B : VERTICAL TABULATION | ||
171 | 0C = U+000C : FORM FEED | ||
172 | 0D = U+000D : CARRIAGE RETURN | ||
173 | 0E = U+000E : SHIFT OUT | ||
174 | 0F = U+000F : SHIFT IN | ||
175 | 10 = U+0010 : DATA LINK ESCAPE | ||
176 | 11 = U+0011 : DEVICE CONTROL ONE | ||
177 | 12 = U+0012 : DEVICE CONTROL TWO | ||
178 | 13 = U+0013 : DEVICE CONTROL THREE | ||
179 | 14 = U+0014 : DEVICE CONTROL FOUR | ||
180 | 15 = U+0015 : NEGATIVE ACKNOWLEDGE | ||
181 | 16 = U+0016 : SYNCHRONOUS IDLE | ||
182 | 17 = U+0017 : END OF TRANSMISSION BLOCK | ||
183 | 18 = U+0018 : CANCEL | ||
184 | 19 = U+0019 : END OF MEDIUM | ||
185 | 1A = U+001A : SUBSTITUTE | ||
186 | 1B = U+001B : ESCAPE | ||
187 | 1C = U+001C : FILE SEPARATOR | ||
188 | 1D = U+001D : GROUP SEPARATOR | ||
189 | 1E = U+001E : RECORD SEPARATOR | ||
190 | 1F = U+001F : UNIT SEPARATOR | ||
191 | 20 = U+0020 : SPACE | ||
192 | 21 = U+0021 : EXCLAMATION MARK | ||
193 | 22 = U+0022 : QUOTATION MARK | ||
194 | 23 = U+0023 : NUMBER SIGN | ||
195 | 24 = U+0024 : DOLLAR SIGN | ||
196 | 25 = U+0025 : PERCENT SIGN | ||
197 | 26 = U+0026 : AMPERSAND | ||
198 | 27 = U+0027 : APOSTROPHE | ||
199 | 28 = U+0028 : LEFT PARENTHESIS | ||
200 | 29 = U+0029 : RIGHT PARENTHESIS | ||
201 | 2A = U+002A : ASTERISK | ||
202 | 2B = U+002B : PLUS SIGN | ||
203 | 2C = U+002C : COMMA | ||
204 | 2D = U+002D : HYPHEN-MINUS | ||
205 | 2E = U+002E : FULL STOP | ||
206 | 2F = U+002F : SOLIDUS | ||
207 | 30 = U+0030 : DIGIT ZERO | ||
208 | 31 = U+0031 : DIGIT ONE | ||
209 | 32 = U+0032 : DIGIT TWO | ||
210 | 33 = U+0033 : DIGIT THREE | ||
211 | 34 = U+0034 : DIGIT FOUR | ||
212 | 35 = U+0035 : DIGIT FIVE | ||
213 | 36 = U+0036 : DIGIT SIX | ||
214 | 37 = U+0037 : DIGIT SEVEN | ||
215 | 38 = U+0038 : DIGIT EIGHT | ||
216 | 39 = U+0039 : DIGIT NINE | ||
217 | 3A = U+003A : COLON | ||
218 | 3B = U+003B : SEMICOLON | ||
219 | 3C = U+003C : LESS-THAN SIGN | ||
220 | 3D = U+003D : EQUALS SIGN | ||
221 | 3E = U+003E : GREATER-THAN SIGN | ||
222 | 3F = U+003F : QUESTION MARK | ||
223 | 40 = U+0040 : COMMERCIAL AT | ||
224 | 41 = U+0041 : LATIN CAPITAL LETTER A | ||
225 | 42 = U+0042 : LATIN CAPITAL LETTER B | ||
226 | 43 = U+0043 : LATIN CAPITAL LETTER C | ||
227 | 44 = U+0044 : LATIN CAPITAL LETTER D | ||
228 | 45 = U+0045 : LATIN CAPITAL LETTER E | ||
229 | 46 = U+0046 : LATIN CAPITAL LETTER F | ||
230 | 47 = U+0047 : LATIN CAPITAL LETTER G | ||
231 | 48 = U+0048 : LATIN CAPITAL LETTER H | ||
232 | 49 = U+0049 : LATIN CAPITAL LETTER I | ||
233 | 4A = U+004A : LATIN CAPITAL LETTER J | ||
234 | 4B = U+004B : LATIN CAPITAL LETTER K | ||
235 | 4C = U+004C : LATIN CAPITAL LETTER L | ||
236 | 4D = U+004D : LATIN CAPITAL LETTER M | ||
237 | 4E = U+004E : LATIN CAPITAL LETTER N | ||
238 | 4F = U+004F : LATIN CAPITAL LETTER O | ||
239 | 50 = U+0050 : LATIN CAPITAL LETTER P | ||
240 | 51 = U+0051 : LATIN CAPITAL LETTER Q | ||
241 | 52 = U+0052 : LATIN CAPITAL LETTER R | ||
242 | 53 = U+0053 : LATIN CAPITAL LETTER S | ||
243 | 54 = U+0054 : LATIN CAPITAL LETTER T | ||
244 | 55 = U+0055 : LATIN CAPITAL LETTER U | ||
245 | 56 = U+0056 : LATIN CAPITAL LETTER V | ||
246 | 57 = U+0057 : LATIN CAPITAL LETTER W | ||
247 | 58 = U+0058 : LATIN CAPITAL LETTER X | ||
248 | 59 = U+0059 : LATIN CAPITAL LETTER Y | ||
249 | 5A = U+005A : LATIN CAPITAL LETTER Z | ||
250 | 5B = U+005B : LEFT SQUARE BRACKET | ||
251 | 5C = U+005C : REVERSE SOLIDUS | ||
252 | 5D = U+005D : RIGHT SQUARE BRACKET | ||
253 | 5E = U+005E : CIRCUMFLEX ACCENT | ||
254 | 5F = U+005F : LOW LINE | ||
255 | 60 = U+0060 : GRAVE ACCENT | ||
256 | 61 = U+0061 : LATIN SMALL LETTER A | ||
257 | 62 = U+0062 : LATIN SMALL LETTER B | ||
258 | 63 = U+0063 : LATIN SMALL LETTER C | ||
259 | 64 = U+0064 : LATIN SMALL LETTER D | ||
260 | 65 = U+0065 : LATIN SMALL LETTER E | ||
261 | 66 = U+0066 : LATIN SMALL LETTER F | ||
262 | 67 = U+0067 : LATIN SMALL LETTER G | ||
263 | 68 = U+0068 : LATIN SMALL LETTER H | ||
264 | 69 = U+0069 : LATIN SMALL LETTER I | ||
265 | 6A = U+006A : LATIN SMALL LETTER J | ||
266 | 6B = U+006B : LATIN SMALL LETTER K | ||
267 | 6C = U+006C : LATIN SMALL LETTER L | ||
268 | 6D = U+006D : LATIN SMALL LETTER M | ||
269 | 6E = U+006E : LATIN SMALL LETTER N | ||
270 | 6F = U+006F : LATIN SMALL LETTER O | ||
271 | 70 = U+0070 : LATIN SMALL LETTER P | ||
272 | 71 = U+0071 : LATIN SMALL LETTER Q | ||
273 | 72 = U+0072 : LATIN SMALL LETTER R | ||
274 | 73 = U+0073 : LATIN SMALL LETTER S | ||
275 | 74 = U+0074 : LATIN SMALL LETTER T | ||
276 | 75 = U+0075 : LATIN SMALL LETTER U | ||
277 | 76 = U+0076 : LATIN SMALL LETTER V | ||
278 | 77 = U+0077 : LATIN SMALL LETTER W | ||
279 | 78 = U+0078 : LATIN SMALL LETTER X | ||
280 | 79 = U+0079 : LATIN SMALL LETTER Y | ||
281 | 7A = U+007A : LATIN SMALL LETTER Z | ||
282 | 7B = U+007B : LEFT CURLY BRACKET | ||
283 | 7C = U+007C : VERTICAL LINE | ||
284 | 7D = U+007D : RIGHT CURLY BRACKET | ||
285 | 7E = U+007E : TILDE | ||
286 | 7F = U+007F : DELETE | ||
287 | 80 = U+20AC : EURO SIGN | ||
288 | 82 = U+201A : SINGLE LOW-9 QUOTATION MARK | ||
289 | 83 = U+0192 : LATIN SMALL LETTER F WITH HOOK | ||
290 | 84 = U+201E : DOUBLE LOW-9 QUOTATION MARK | ||
291 | 85 = U+2026 : HORIZONTAL ELLIPSIS | ||
292 | 86 = U+2020 : DAGGER | ||
293 | 87 = U+2021 : DOUBLE DAGGER | ||
294 | 88 = U+02C6 : MODIFIER LETTER CIRCUMFLEX ACCENT | ||
295 | 89 = U+2030 : PER MILLE SIGN | ||
296 | 8A = U+0160 : LATIN CAPITAL LETTER S WITH CARON | ||
297 | 8B = U+2039 : SINGLE LEFT-POINTING ANGLE QUOTATION MARK | ||
298 | 8C = U+0152 : LATIN CAPITAL LIGATURE OE | ||
299 | 8E = U+017D : LATIN CAPITAL LETTER Z WITH CARON | ||
300 | 91 = U+2018 : LEFT SINGLE QUOTATION MARK | ||
301 | 92 = U+2019 : RIGHT SINGLE QUOTATION MARK | ||
302 | 93 = U+201C : LEFT DOUBLE QUOTATION MARK | ||
303 | 94 = U+201D : RIGHT DOUBLE QUOTATION MARK | ||
304 | 95 = U+2022 : BULLET | ||
305 | 96 = U+2013 : EN DASH | ||
306 | 97 = U+2014 : EM DASH | ||
307 | 98 = U+02DC : SMALL TILDE | ||
308 | 99 = U+2122 : TRADE MARK SIGN | ||
309 | 9A = U+0161 : LATIN SMALL LETTER S WITH CARON | ||
310 | 9B = U+203A : SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | ||
311 | 9C = U+0153 : LATIN SMALL LIGATURE OE | ||
312 | 9E = U+017E : LATIN SMALL LETTER Z WITH CARON | ||
313 | 9F = U+0178 : LATIN CAPITAL LETTER Y WITH DIAERESIS | ||
314 | A0 = U+00A0 : NO-BREAK SPACE | ||
315 | A1 = U+00A1 : INVERTED EXCLAMATION MARK | ||
316 | A2 = U+00A2 : CENT SIGN | ||
317 | A3 = U+00A3 : POUND SIGN | ||
318 | A4 = U+00A4 : CURRENCY SIGN | ||
319 | A5 = U+00A5 : YEN SIGN | ||
320 | A6 = U+00A6 : BROKEN BAR | ||
321 | A7 = U+00A7 : SECTION SIGN | ||
322 | A8 = U+00A8 : DIAERESIS | ||
323 | A9 = U+00A9 : COPYRIGHT SIGN | ||
324 | AA = U+00AA : FEMININE ORDINAL INDICATOR | ||
325 | AB = U+00AB : LEFT-POINTING DOUBLE ANGLE QUOTATION MARK | ||
326 | AC = U+00AC : NOT SIGN | ||
327 | AD = U+00AD : SOFT HYPHEN | ||
328 | AE = U+00AE : REGISTERED SIGN | ||
329 | AF = U+00AF : MACRON | ||
330 | B0 = U+00B0 : DEGREE SIGN | ||
331 | B1 = U+00B1 : PLUS-MINUS SIGN | ||
332 | B2 = U+00B2 : SUPERSCRIPT TWO | ||
333 | B3 = U+00B3 : SUPERSCRIPT THREE | ||
334 | B4 = U+00B4 : ACUTE ACCENT | ||
335 | B5 = U+00B5 : MICRO SIGN | ||
336 | B6 = U+00B6 : PILCROW SIGN | ||
337 | B7 = U+00B7 : MIDDLE DOT | ||
338 | B8 = U+00B8 : CEDILLA | ||
339 | B9 = U+00B9 : SUPERSCRIPT ONE | ||
340 | BA = U+00BA : MASCULINE ORDINAL INDICATOR | ||
341 | BB = U+00BB : RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK | ||
342 | BC = U+00BC : VULGAR FRACTION ONE QUARTER | ||
343 | BD = U+00BD : VULGAR FRACTION ONE HALF | ||
344 | BE = U+00BE : VULGAR FRACTION THREE QUARTERS | ||
345 | BF = U+00BF : INVERTED QUESTION MARK | ||
346 | C0 = U+00C0 : LATIN CAPITAL LETTER A WITH GRAVE | ||
347 | C1 = U+00C1 : LATIN CAPITAL LETTER A WITH ACUTE | ||
348 | C2 = U+00C2 : LATIN CAPITAL LETTER A WITH CIRCUMFLEX | ||
349 | C3 = U+00C3 : LATIN CAPITAL LETTER A WITH TILDE | ||
350 | C4 = U+00C4 : LATIN CAPITAL LETTER A WITH DIAERESIS | ||
351 | C5 = U+00C5 : LATIN CAPITAL LETTER A WITH RING ABOVE | ||
352 | C6 = U+00C6 : LATIN CAPITAL LETTER AE | ||
353 | C7 = U+00C7 : LATIN CAPITAL LETTER C WITH CEDILLA | ||
354 | C8 = U+00C8 : LATIN CAPITAL LETTER E WITH GRAVE | ||
355 | C9 = U+00C9 : LATIN CAPITAL LETTER E WITH ACUTE | ||
356 | CA = U+00CA : LATIN CAPITAL LETTER E WITH CIRCUMFLEX | ||
357 | CB = U+00CB : LATIN CAPITAL LETTER E WITH DIAERESIS | ||
358 | CC = U+00CC : LATIN CAPITAL LETTER I WITH GRAVE | ||
359 | CD = U+00CD : LATIN CAPITAL LETTER I WITH ACUTE | ||
360 | CE = U+00CE : LATIN CAPITAL LETTER I WITH CIRCUMFLEX | ||
361 | CF = U+00CF : LATIN CAPITAL LETTER I WITH DIAERESIS | ||
362 | D0 = U+00D0 : LATIN CAPITAL LETTER ETH | ||
363 | D1 = U+00D1 : LATIN CAPITAL LETTER N WITH TILDE | ||
364 | D2 = U+00D2 : LATIN CAPITAL LETTER O WITH GRAVE | ||
365 | D3 = U+00D3 : LATIN CAPITAL LETTER O WITH ACUTE | ||
366 | D4 = U+00D4 : LATIN CAPITAL LETTER O WITH CIRCUMFLEX | ||
367 | D5 = U+00D5 : LATIN CAPITAL LETTER O WITH TILDE | ||
368 | D6 = U+00D6 : LATIN CAPITAL LETTER O WITH DIAERESIS | ||
369 | D7 = U+00D7 : MULTIPLICATION SIGN | ||
370 | D8 = U+00D8 : LATIN CAPITAL LETTER O WITH STROKE | ||
371 | D9 = U+00D9 : LATIN CAPITAL LETTER U WITH GRAVE | ||
372 | DA = U+00DA : LATIN CAPITAL LETTER U WITH ACUTE | ||
373 | DB = U+00DB : LATIN CAPITAL LETTER U WITH CIRCUMFLEX | ||
374 | DC = U+00DC : LATIN CAPITAL LETTER U WITH DIAERESIS | ||
375 | DD = U+00DD : LATIN CAPITAL LETTER Y WITH ACUTE | ||
376 | DE = U+00DE : LATIN CAPITAL LETTER THORN | ||
377 | DF = U+00DF : LATIN SMALL LETTER SHARP S | ||
378 | E0 = U+00E0 : LATIN SMALL LETTER A WITH GRAVE | ||
379 | E1 = U+00E1 : LATIN SMALL LETTER A WITH ACUTE | ||
380 | E2 = U+00E2 : LATIN SMALL LETTER A WITH CIRCUMFLEX | ||
381 | E3 = U+00E3 : LATIN SMALL LETTER A WITH TILDE | ||
382 | E4 = U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS | ||
383 | E5 = U+00E5 : LATIN SMALL LETTER A WITH RING ABOVE | ||
384 | E6 = U+00E6 : LATIN SMALL LETTER AE | ||
385 | E7 = U+00E7 : LATIN SMALL LETTER C WITH CEDILLA | ||
386 | E8 = U+00E8 : LATIN SMALL LETTER E WITH GRAVE | ||
387 | E9 = U+00E9 : LATIN SMALL LETTER E WITH ACUTE | ||
388 | EA = U+00EA : LATIN SMALL LETTER E WITH CIRCUMFLEX | ||
389 | EB = U+00EB : LATIN SMALL LETTER E WITH DIAERESIS | ||
390 | EC = U+00EC : LATIN SMALL LETTER I WITH GRAVE | ||
391 | ED = U+00ED : LATIN SMALL LETTER I WITH ACUTE | ||
392 | EE = U+00EE : LATIN SMALL LETTER I WITH CIRCUMFLEX | ||
393 | EF = U+00EF : LATIN SMALL LETTER I WITH DIAERESIS | ||
394 | F0 = U+00F0 : LATIN SMALL LETTER ETH | ||
395 | F1 = U+00F1 : LATIN SMALL LETTER N WITH TILDE | ||
396 | F2 = U+00F2 : LATIN SMALL LETTER O WITH GRAVE | ||
397 | F3 = U+00F3 : LATIN SMALL LETTER O WITH ACUTE | ||
398 | F4 = U+00F4 : LATIN SMALL LETTER O WITH CIRCUMFLEX | ||
399 | F5 = U+00F5 : LATIN SMALL LETTER O WITH TILDE | ||
400 | F6 = U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS | ||
401 | F7 = U+00F7 : DIVISION SIGN | ||
402 | F8 = U+00F8 : LATIN SMALL LETTER O WITH STROKE | ||
403 | F9 = U+00F9 : LATIN SMALL LETTER U WITH GRAVE | ||
404 | FA = U+00FA : LATIN SMALL LETTER U WITH ACUTE | ||
405 | FB = U+00FB : LATIN SMALL LETTER U WITH CIRCUMFLEX | ||
406 | FC = U+00FC : LATIN SMALL LETTER U WITH DIAERESIS | ||
407 | FD = U+00FD : LATIN SMALL LETTER Y WITH ACUTE | ||
408 | FE = U+00FE : LATIN SMALL LETTER THORN | ||
409 | FF = U+00FF : LATIN SMALL LETTER Y WITH DIAERESIS | ||
410 | * | ||
411 | */ | ||
412 | } | ||
413 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/ContentProvider.php b/inc/3rdparty/libraries/MOBIClass/ContentProvider.php new file mode 100644 index 00000000..dcf9c4de --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/ContentProvider.php | |||
@@ -0,0 +1,22 @@ | |||
1 | <?php | ||
2 | |||
3 | abstract class ContentProvider{ | ||
4 | /** | ||
5 | * Get the text data to be integrated in the MOBI file | ||
6 | * @return string | ||
7 | */ | ||
8 | public abstract function getTextData(); | ||
9 | /** | ||
10 | * Get the images (an array containing the jpeg data). Array entry 0 will | ||
11 | * correspond to image record 0. | ||
12 | * @return array | ||
13 | */ | ||
14 | public abstract function getImages(); | ||
15 | /** | ||
16 | * Get the metadata in the form of a hashtable (for example, title or author). | ||
17 | * @return array | ||
18 | */ | ||
19 | public abstract function getMetaData(); | ||
20 | } | ||
21 | |||
22 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/EXTHHelper.php b/inc/3rdparty/libraries/MOBIClass/EXTHHelper.php new file mode 100644 index 00000000..275142bf --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/EXTHHelper.php | |||
@@ -0,0 +1,132 @@ | |||
1 | <?php | ||
2 | //Reference: http://wiki.mobileread.com/wiki/MOBI | ||
3 | |||
4 | class EXTHHelper{ | ||
5 | static function typeToText($type){ | ||
6 | $types = self::$types; | ||
7 | if(isset($types[$type])){ | ||
8 | return $types[$type]; | ||
9 | } | ||
10 | return $type; | ||
11 | } | ||
12 | static function textToType($text){ | ||
13 | $text = strtolower($text); | ||
14 | if(isset(self::$flippedTypes[$text])){ | ||
15 | return self::$flippedTypes[$text]; | ||
16 | } | ||
17 | return false; | ||
18 | } | ||
19 | |||
20 | static function convert($n, $size){ | ||
21 | $mask = 0xFF; | ||
22 | $out = ""; | ||
23 | for($i = 0; $i < $size; $i++){ | ||
24 | $out = chr(($n & $mask) >> (8*$i)).$out; | ||
25 | $mask = $mask << 8; | ||
26 | } | ||
27 | return $out; | ||
28 | } | ||
29 | |||
30 | static function getRightRepresentation($type, $value){ | ||
31 | if($type >= 100 && $type < 200){ | ||
32 | return $value; | ||
33 | }else{ | ||
34 | return self::toHex($value); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | static function toHex($value){ | ||
39 | $out = ""; | ||
40 | for($i = 0, $len = strlen($value); $i < $len; $i++){ | ||
41 | if($i > 0) $out .= " "; | ||
42 | $hex = dechex(ord($value[$i])); | ||
43 | if(strlen($hex) < 2) $hex = "0".$hex; | ||
44 | $out .= $hex; | ||
45 | } | ||
46 | return $out; | ||
47 | } | ||
48 | |||
49 | |||
50 | static private $types = array( | ||
51 | 1 => "drm server id", | ||
52 | 2 => "drm commerce id", | ||
53 | 3 => "drm ebookbase book id", | ||
54 | 100 => "author", | ||
55 | 101 => "publisher", | ||
56 | 102 => "imprint", | ||
57 | 103 => "description", | ||
58 | 104 => "isbn", | ||
59 | 105 => "subject", | ||
60 | 106 => "publishingdate", | ||
61 | 107 => "review", | ||
62 | 108 => "contributor", | ||
63 | 109 => "rights", | ||
64 | 110 => "subjectcode", | ||
65 | 111 => "type", | ||
66 | 112 => "source", | ||
67 | 113 => "asin", | ||
68 | 114 => "versionnumber", | ||
69 | 115 => "sample", | ||
70 | 116 => "startreading", | ||
71 | 118 => "retail price", | ||
72 | 119 => "retail price currency", | ||
73 | 201 => "coveroffset", | ||
74 | 202 => "thumboffset", | ||
75 | 203 => "hasfakecover", | ||
76 | 204 => "Creator Software", | ||
77 | 205 => "Creator Major Version", | ||
78 | 206 => "Creator Minor Version", | ||
79 | 207 => "Creator Build Number", | ||
80 | 208 => "watermark", | ||
81 | 209 => "tamper proof keys", | ||
82 | 300 => "fontsignature", | ||
83 | 401 => "clippinglimit", | ||
84 | 402 => "publisherlimit", | ||
85 | 403 => "403", | ||
86 | 404 => "ttsflag", | ||
87 | 501 => "cdetype", | ||
88 | 502 => "lastupdatetime", | ||
89 | 503 => "updatedtitle" | ||
90 | ); | ||
91 | static private $flippedTypes = array( | ||
92 | "drm server id" => 1, | ||
93 | "drm commerce id" => 2, | ||
94 | "drm ebookbase book id" => 3, | ||
95 | "author" => 100, | ||
96 | "publisher" => 101, | ||
97 | "imprint" => 102, | ||
98 | "description" => 103, | ||
99 | "isbn" => 104, | ||
100 | "subject" => 105, | ||
101 | "publishingdate" => 106, | ||
102 | "review" => 107, | ||
103 | "contributor" => 108, | ||
104 | "rights" => 109, | ||
105 | "subjectcode" => 110, | ||
106 | "type" => 111, | ||
107 | "source" => 112, | ||
108 | "asin" => 113, | ||
109 | "versionnumber" => 114, | ||
110 | "sample" => 115, | ||
111 | "startreading" => 116, | ||
112 | "retail price" => 118, | ||
113 | "retail price currency" => 119, | ||
114 | "coveroffset" => 201, | ||
115 | "thumboffset" => 202, | ||
116 | "hasfakecover" => 203, | ||
117 | "Creator Software" => 204, | ||
118 | "Creator Major Version" => 205, | ||
119 | "Creator Minor Version" => 206, | ||
120 | "Creator Build Number" => 207, | ||
121 | "watermark" => 208, | ||
122 | "tamper proof keys" => 209, | ||
123 | "fontsignature" => 300, | ||
124 | "clippinglimit" => 401, | ||
125 | "publisherlimit" => 402, | ||
126 | "403" => 403, | ||
127 | "ttsflag" => 404, | ||
128 | "cdetype" => 501, | ||
129 | "lastupdatetime" => 502, | ||
130 | "updatedtitle" => 503 | ||
131 | ); | ||
132 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileByte.php b/inc/3rdparty/libraries/MOBIClass/FileByte.php new file mode 100644 index 00000000..05fc7d04 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileByte.php | |||
@@ -0,0 +1,41 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileByte | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FileByte extends FileObject { | ||
9 | private $data; | ||
10 | |||
11 | /** | ||
12 | * Make a short to be stored in a file | ||
13 | * @param short $n | ||
14 | */ | ||
15 | public function __construct($n = 0){ | ||
16 | parent::__construct(1); | ||
17 | $this->set($n); | ||
18 | } | ||
19 | |||
20 | public function get(){ | ||
21 | return $this->data; | ||
22 | } | ||
23 | |||
24 | public function set($value){ | ||
25 | $this->data = intval($value) & 0xFF; | ||
26 | } | ||
27 | |||
28 | public function serialize() { | ||
29 | return $this->byteToString($this->data); | ||
30 | } | ||
31 | |||
32 | public function unserialize($data) { | ||
33 | __construct($this->toInt($data)); | ||
34 | } | ||
35 | |||
36 | |||
37 | public function __toString(){ | ||
38 | return "FileByte: {".$this->byteAsString($this->data)."}"; | ||
39 | } | ||
40 | } | ||
41 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileDate.php b/inc/3rdparty/libraries/MOBIClass/FileDate.php new file mode 100644 index 00000000..2284eba2 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileDate.php | |||
@@ -0,0 +1,40 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileDate | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FileDate extends FileObject { | ||
9 | private $data; | ||
10 | |||
11 | /** | ||
12 | * Make an integer to be stored in a file | ||
13 | * @param int $n | ||
14 | */ | ||
15 | public function __construct($n = 0){ | ||
16 | parent::__construct(4); | ||
17 | $this->set($n); | ||
18 | } | ||
19 | |||
20 | public function get(){ | ||
21 | return $this->data; | ||
22 | } | ||
23 | |||
24 | public function set($value){ | ||
25 | $this->data = intval($value); | ||
26 | } | ||
27 | |||
28 | public function serialize() { | ||
29 | return $this->intToString($this->data); | ||
30 | } | ||
31 | |||
32 | public function unserialize($data) { | ||
33 | __construct($this->toInt($data)); | ||
34 | } | ||
35 | |||
36 | public function __toString(){ | ||
37 | return "FileDate: {".(date("r", $this->data-94694400))."}"; | ||
38 | } | ||
39 | } | ||
40 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileElement.php b/inc/3rdparty/libraries/MOBIClass/FileElement.php new file mode 100644 index 00000000..552d04a8 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileElement.php | |||
@@ -0,0 +1,89 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileElement | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FileElement { | ||
9 | /** | ||
10 | * @var FileObject | ||
11 | */ | ||
12 | public $elements; | ||
13 | |||
14 | /** | ||
15 | * Make a record to be stored in a file | ||
16 | * @param Record $record | ||
17 | */ | ||
18 | public function __construct($elements = array()){ | ||
19 | $this->elements = $elements; | ||
20 | } | ||
21 | |||
22 | public function getByteLength(){ | ||
23 | return $this->getLength(); | ||
24 | } | ||
25 | |||
26 | public function getLength(){ | ||
27 | $total = 0; | ||
28 | foreach($this->elements as $val){ | ||
29 | $total += $val->getByteLength(); | ||
30 | } | ||
31 | return $total; | ||
32 | } | ||
33 | |||
34 | public function offsetToEntry($name){ | ||
35 | $pos = 0; | ||
36 | foreach($this->elements as $key=>$value){ | ||
37 | if($name == $key){ | ||
38 | break; | ||
39 | } | ||
40 | $pos += $value->getByteLength(); | ||
41 | } | ||
42 | return $pos; | ||
43 | } | ||
44 | |||
45 | public function exists($key){ | ||
46 | return isset($this->elements[$key]); | ||
47 | } | ||
48 | /** | ||
49 | * @param string $key | ||
50 | * @return FileObject | ||
51 | */ | ||
52 | public function get($key){ | ||
53 | return $this->elements[$key]; | ||
54 | } | ||
55 | |||
56 | /** | ||
57 | * @param string $key | ||
58 | * @param FileObject $value | ||
59 | */ | ||
60 | public function set($key, $value){ | ||
61 | $this->elements[$key] = $value; | ||
62 | } | ||
63 | |||
64 | public function add($key, $value){ | ||
65 | $this->elements[$key] = $value; | ||
66 | } | ||
67 | |||
68 | public function serialize() { | ||
69 | $result = ""; | ||
70 | foreach($this->elements as $val){ | ||
71 | $result .= $val->serialize(); | ||
72 | } | ||
73 | return $result; | ||
74 | } | ||
75 | |||
76 | public function unserialize($data) { | ||
77 | //TODO: If reading is needed -> way more complex | ||
78 | } | ||
79 | |||
80 | public function __toString(){ | ||
81 | $output = "FileElement (".$this->getByteLength()." bytes): {\n"; | ||
82 | foreach($this->elements as $key=>$value){ | ||
83 | $output .= "\t".$key.": ".$value."\n"; | ||
84 | } | ||
85 | $output .= "}"; | ||
86 | return $output; | ||
87 | } | ||
88 | } | ||
89 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileInt.php b/inc/3rdparty/libraries/MOBIClass/FileInt.php new file mode 100644 index 00000000..ebe86f86 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileInt.php | |||
@@ -0,0 +1,40 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileInt | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FileInt extends FileObject { | ||
9 | private $data; | ||
10 | |||
11 | /** | ||
12 | * Make an integer to be stored in a file | ||
13 | * @param int $n | ||
14 | */ | ||
15 | public function __construct($n = 0){ | ||
16 | parent::__construct(4); | ||
17 | $this->set($n); | ||
18 | } | ||
19 | |||
20 | public function get(){ | ||
21 | return $this->data; | ||
22 | } | ||
23 | |||
24 | public function set($value){ | ||
25 | $this->data = intval($value); | ||
26 | } | ||
27 | |||
28 | public function serialize() { | ||
29 | return $this->intToString($this->data); | ||
30 | } | ||
31 | |||
32 | public function unserialize($data) { | ||
33 | __construct($this->toInt($data)); | ||
34 | } | ||
35 | |||
36 | public function __toString(){ | ||
37 | return "FileInt: {".$this->intAsString($this->data)."}"; | ||
38 | } | ||
39 | } | ||
40 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileObject.php b/inc/3rdparty/libraries/MOBIClass/FileObject.php new file mode 100644 index 00000000..0df17df1 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileObject.php | |||
@@ -0,0 +1,168 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileObject | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | abstract class FileObject { | ||
9 | private $byteLength = -1; | ||
10 | |||
11 | public function __construct($byteLength = -1){ | ||
12 | $this->byteLength = $byteLength; | ||
13 | } | ||
14 | |||
15 | public function getByteLength(){ | ||
16 | if($this->byteLength >= 0){ | ||
17 | return $this->byteLength; | ||
18 | } | ||
19 | return $this->getLength(); | ||
20 | } | ||
21 | |||
22 | public function getLength(){ | ||
23 | throw new Exception("Sub-class needs to implement this if it doesn't have a fixed length"); | ||
24 | } | ||
25 | |||
26 | /** | ||
27 | * Convert a string to byte format (maximum 4 bytes) | ||
28 | * @param string $string Input string | ||
29 | * @return int Output integer | ||
30 | */ | ||
31 | public function toInt($string){ | ||
32 | $out = 0; | ||
33 | for($i = 0, $len = min(4, strlen($string)); $i < $len; $i++){ | ||
34 | $out = $out | (ord($string[$i]) << (($len-$i-1)*8)); | ||
35 | } | ||
36 | return $out; | ||
37 | } | ||
38 | |||
39 | /** | ||
40 | * Convert a byte (stored in an integer) to a string | ||
41 | * @param byte $int | ||
42 | * @return string | ||
43 | */ | ||
44 | public function byteToString($int){ | ||
45 | return $this->toString($int, 1); | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | * Convert a byte (stored in an integer) to a string | ||
50 | * @param byte $int | ||
51 | * @return string | ||
52 | */ | ||
53 | public function byteAsString($int){ | ||
54 | return $this->asString($int, 1); | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * Convert a short (stored in an integer) to a string | ||
59 | * @param short $int | ||
60 | * @return string | ||
61 | */ | ||
62 | public function shortToString($int){ | ||
63 | return $this->toString($int, 2); | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * Convert a short (stored in an integer) to a string | ||
68 | * @param short $int | ||
69 | * @return string | ||
70 | */ | ||
71 | public function shortAsString($int){ | ||
72 | return $this->asString($int, 2); | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * Convert a tri-byte (stored in an integer) to a string | ||
77 | * @param tri-byte $int | ||
78 | * @return string | ||
79 | */ | ||
80 | public function triToString($int){ | ||
81 | return $this->toString($int, 3); | ||
82 | } | ||
83 | |||
84 | /** | ||
85 | * Convert a tri-byte (stored in an integer) to a string | ||
86 | * @param tri-byte $int | ||
87 | * @return string | ||
88 | */ | ||
89 | public function triAsString($int){ | ||
90 | return $this->asString($int, 3); | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * Convert an integer to a string | ||
95 | * @param int $int | ||
96 | * @return string | ||
97 | */ | ||
98 | public function intToString($int){ | ||
99 | return $this->toString($int, 4); | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * Convert an integer to a string | ||
104 | * @param int $int | ||
105 | * @return string | ||
106 | */ | ||
107 | public function intAsString($int){ | ||
108 | return $this->asString($int, 4); | ||
109 | } | ||
110 | |||
111 | /** | ||
112 | * Convert a number of n bytes to a string | ||
113 | * @param int $int Number that should be converted | ||
114 | * @param int $size Number of bytes to convert | ||
115 | * @return string Output string | ||
116 | */ | ||
117 | private function toString($int, $size){ | ||
118 | $out = ""; | ||
119 | for($i = 0; $i < $size; $i++){ | ||
120 | $out = chr($int & 0xFF).$out; | ||
121 | $int = $int >> 8; | ||
122 | } | ||
123 | return $out; | ||
124 | } | ||
125 | |||
126 | /** | ||
127 | * Convert a number of n bytes to a string | ||
128 | * @param int $int Number that should be converted | ||
129 | * @param int $size Number of bytes to convert | ||
130 | * @return string Output string | ||
131 | */ | ||
132 | private function asString($int, $size){ | ||
133 | $out = ""; | ||
134 | for($i = 0; $i < $size; $i++){ | ||
135 | if($i > 0) $out = " ".$out; | ||
136 | $byte = dechex($int & 0xFF); | ||
137 | if(strlen($byte) == 1) $byte = "0".$byte; | ||
138 | $out = $byte.$out; | ||
139 | $int = $int >> 8; | ||
140 | } | ||
141 | return $out; | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * Get the value | ||
146 | * @return mixed Value to get | ||
147 | */ | ||
148 | abstract public function get(); | ||
149 | |||
150 | /** | ||
151 | * Set the value | ||
152 | * @return mixed Value to set | ||
153 | */ | ||
154 | abstract public function set($value); | ||
155 | |||
156 | /** | ||
157 | * Serialize the object | ||
158 | * @return string String representation | ||
159 | */ | ||
160 | abstract public function serialize(); | ||
161 | |||
162 | /** | ||
163 | * Unserialize the object | ||
164 | * @param string $data String representation | ||
165 | */ | ||
166 | abstract public function unserialize($data); | ||
167 | } | ||
168 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileRecord.php b/inc/3rdparty/libraries/MOBIClass/FileRecord.php new file mode 100644 index 00000000..494a72e4 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileRecord.php | |||
@@ -0,0 +1,46 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileRecord | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FileRecord extends FileObject { | ||
9 | /** | ||
10 | * @var Record | ||
11 | */ | ||
12 | private $record; | ||
13 | |||
14 | /** | ||
15 | * Make a record to be stored in a file | ||
16 | * @param Record $record | ||
17 | */ | ||
18 | public function __construct($record){ | ||
19 | $this->record = $record; | ||
20 | } | ||
21 | |||
22 | public function getByteLength(){ | ||
23 | return $this->getLength(); | ||
24 | } | ||
25 | |||
26 | public function getLength(){ | ||
27 | return $this->record->getLength(); | ||
28 | } | ||
29 | |||
30 | public function get(){ | ||
31 | return $this->record; | ||
32 | } | ||
33 | |||
34 | public function set($record){ | ||
35 | $this->record = $record; | ||
36 | } | ||
37 | |||
38 | public function serialize() { | ||
39 | return $this->record->serialize(); | ||
40 | } | ||
41 | |||
42 | public function unserialize($data) { | ||
43 | __construct($this->record->unserialize($data)); | ||
44 | } | ||
45 | } | ||
46 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileShort.php b/inc/3rdparty/libraries/MOBIClass/FileShort.php new file mode 100644 index 00000000..9921ea82 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileShort.php | |||
@@ -0,0 +1,41 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileShort | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FileShort extends FileObject { | ||
9 | private $data; | ||
10 | |||
11 | /** | ||
12 | * Make a short to be stored in a file | ||
13 | * @param short $n | ||
14 | */ | ||
15 | public function __construct($n = 0){ | ||
16 | parent::__construct(2); | ||
17 | $this->set($n); | ||
18 | } | ||
19 | |||
20 | public function get(){ | ||
21 | return $this->data; | ||
22 | } | ||
23 | |||
24 | public function set($value){ | ||
25 | $this->data = intval($value) & 0xFFFF; | ||
26 | } | ||
27 | |||
28 | public function serialize() { | ||
29 | return $this->shortToString($this->data); | ||
30 | } | ||
31 | |||
32 | public function unserialize($data) { | ||
33 | __construct($this->toInt($data)); | ||
34 | } | ||
35 | |||
36 | |||
37 | public function __toString(){ | ||
38 | return "FileShort: {".$this->shortAsString($this->data)."}"; | ||
39 | } | ||
40 | } | ||
41 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileString.php b/inc/3rdparty/libraries/MOBIClass/FileString.php new file mode 100644 index 00000000..16e906a6 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileString.php | |||
@@ -0,0 +1,83 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileString | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FileString extends FileObject { | ||
9 | private $forcedLength; | ||
10 | private $data; | ||
11 | |||
12 | /** | ||
13 | * Make a string to be stored in a file | ||
14 | * @param string|int $first Optional, if it is a string, it will be the contents, | ||
15 | * if it is a number, it will set the forced length. | ||
16 | * @param int $second Optional, will set the forced length. Can only be used when the | ||
17 | * first argument is contents. | ||
18 | */ | ||
19 | public function __construct($first = null, $second = null){ | ||
20 | $this->forcedLength = -1; | ||
21 | $this->data = ""; | ||
22 | |||
23 | if($second != null){ | ||
24 | $this->data = $first; | ||
25 | $this->forcedLength = $second; | ||
26 | }else if($first != null){ | ||
27 | if(is_string($first)){ | ||
28 | $this->data = $first; | ||
29 | }else{ | ||
30 | $this->forcedLength = $first; | ||
31 | } | ||
32 | } | ||
33 | } | ||
34 | |||
35 | public function getByteLength(){ | ||
36 | return $this->getLength(); | ||
37 | } | ||
38 | |||
39 | public function getLength(){ | ||
40 | if($this->forcedLength >= 0){ | ||
41 | return $this->forcedLength; | ||
42 | } | ||
43 | return strlen($this->data); | ||
44 | } | ||
45 | |||
46 | public function get(){ | ||
47 | return $this->data; | ||
48 | } | ||
49 | |||
50 | public function set($value){ | ||
51 | $this->data = $value; | ||
52 | } | ||
53 | |||
54 | public function serialize() { | ||
55 | $output = $this->data; | ||
56 | $curLength = strlen($output); | ||
57 | |||
58 | if($this->forcedLength >= 0){ | ||
59 | if($this->forcedLength > $curLength){ | ||
60 | return str_pad($output, $this->forcedLength, "\0", STR_PAD_RIGHT); | ||
61 | }elseif($this->forcedLength == $curLength){ | ||
62 | return $output; | ||
63 | }else{ | ||
64 | return substr($output, 0, $this->forcedLength); | ||
65 | } | ||
66 | } | ||
67 | return $output; | ||
68 | } | ||
69 | |||
70 | public function unserialize($data) { | ||
71 | __construct($data); | ||
72 | } | ||
73 | |||
74 | public function __toString(){ | ||
75 | $out = "FileString"; | ||
76 | if($this->forcedLength >= 0){ | ||
77 | $out .= " ".$this->forcedLength; | ||
78 | } | ||
79 | $out .= ": {\"".str_replace(array(" ", "\0"), " ", $this->serialize())."\"}"; | ||
80 | return $out; | ||
81 | } | ||
82 | } | ||
83 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/FileTri.php b/inc/3rdparty/libraries/MOBIClass/FileTri.php new file mode 100644 index 00000000..6cacc0b0 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/FileTri.php | |||
@@ -0,0 +1,41 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FileTri | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FileTri extends FileObject { | ||
9 | private $data; | ||
10 | |||
11 | /** | ||
12 | * Make a tri-byte to be stored in a file | ||
13 | * @param tri-byte $n | ||
14 | */ | ||
15 | public function __construct($n = 0){ | ||
16 | parent::__construct(3); | ||
17 | $this->set($n); | ||
18 | } | ||
19 | |||
20 | public function get(){ | ||
21 | return $this->data; | ||
22 | } | ||
23 | |||
24 | public function set($value){ | ||
25 | $this->data = intval($value) & 0xFFFFFF; | ||
26 | } | ||
27 | |||
28 | public function serialize() { | ||
29 | return $this->triToString($this->data); | ||
30 | } | ||
31 | |||
32 | public function unserialize($data) { | ||
33 | __construct($this->toInt($data)); | ||
34 | } | ||
35 | |||
36 | |||
37 | public function __toString(){ | ||
38 | return "FileTri: {".$this->triAsString($this->data)."}"; | ||
39 | } | ||
40 | } | ||
41 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/Http.php b/inc/3rdparty/libraries/MOBIClass/Http.php new file mode 100644 index 00000000..9e5852e3 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/Http.php | |||
@@ -0,0 +1,171 @@ | |||
1 | <?php | ||
2 | class Http{ | ||
3 | private static $cache = false; | ||
4 | |||
5 | public static function Request($url){ | ||
6 | $url_parts = parse_url($url); | ||
7 | $url_parts["port"] = isset($url_parts["port"]) ? $url_parts["port"] : 80; | ||
8 | $url_parts["path"] = isset($url_parts["path"]) ? $url_parts["path"] : "/"; | ||
9 | |||
10 | return self::FullRequest("GET", $url_parts["host"], $url_parts["port"], $url_parts["path"]); | ||
11 | } | ||
12 | |||
13 | public static function FullRequest( | ||
14 | $verb = 'GET', /* HTTP Request Method (GET and POST supported) */ | ||
15 | $ip, /* Target IP/Hostname */ | ||
16 | $port = 80, /* Target TCP port */ | ||
17 | $uri = '/', /* Target URI */ | ||
18 | $getdata = array(), /* HTTP GET Data ie. array('var1' => 'val1', 'var2' => 'val2') */ | ||
19 | $postdata = array(), /* HTTP POST Data ie. array('var1' => 'val1', 'var2' => 'val2') */ | ||
20 | $cookie = array(), /* HTTP Cookie Data ie. array('var1' => 'val1', 'var2' => 'val2') */ | ||
21 | $custom_headers = array(), /* Custom HTTP headers ie. array('Referer: http://localhost/ */ | ||
22 | $timeout = 1000, /* Socket timeout in milliseconds */ | ||
23 | $req_hdr = false, /* Include HTTP request headers */ | ||
24 | $res_hdr = false, /* Include HTTP response headers */ | ||
25 | $depth = 4 /* Depth of the iteration left (to avoid redirection loops) */ | ||
26 | ) | ||
27 | { | ||
28 | if(self::$cache){ | ||
29 | $cacheFile = "cache/".$ip."/".str_replace("/", "...", $uri); | ||
30 | |||
31 | if(is_file($cacheFile)){ | ||
32 | $data = file_get_contents($cacheFile); | ||
33 | |||
34 | return self::resolveTruncated($data); | ||
35 | } | ||
36 | } | ||
37 | $ret = ''; | ||
38 | $verb = strtoupper($verb); | ||
39 | $cookie_str = ''; | ||
40 | $getdata_str = count($getdata) ? '?' : ''; | ||
41 | $postdata_str = ''; | ||
42 | |||
43 | foreach ($getdata as $k => $v) | ||
44 | $getdata_str .= urlencode($k) .'='. urlencode($v); | ||
45 | |||
46 | foreach ($postdata as $k => $v) | ||
47 | $postdata_str .= urlencode($k) .'='. urlencode($v) .'&'; | ||
48 | |||
49 | foreach ($cookie as $k => $v) | ||
50 | $cookie_str .= urlencode($k) .'='. urlencode($v) .'; '; | ||
51 | |||
52 | $crlf = "\r\n"; | ||
53 | $req = $verb .' '. $uri . $getdata_str .' HTTP/1.1' . $crlf; | ||
54 | $req .= 'Host: '. $ip . $crlf; | ||
55 | $req .= 'User-Agent: Mozilla/5.0 Firefox/3.6.12' . $crlf; | ||
56 | $req .= 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' . $crlf; | ||
57 | $req .= 'Accept-Language: en-us,en;q=0.5' . $crlf; | ||
58 | $req .= 'Accept-Encoding: deflate' . $crlf; | ||
59 | $req .= 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7' . $crlf; | ||
60 | |||
61 | |||
62 | foreach ($custom_headers as $k => $v) | ||
63 | $req .= $k .': '. $v . $crlf; | ||
64 | |||
65 | if (!empty($cookie_str)) | ||
66 | $req .= 'Cookie: '. substr($cookie_str, 0, -2) . $crlf; | ||
67 | |||
68 | if ($verb == 'POST' && !empty($postdata_str)) | ||
69 | { | ||
70 | $postdata_str = substr($postdata_str, 0, -1); | ||
71 | $req .= 'Content-Type: application/x-www-form-urlencoded' . $crlf; | ||
72 | $req .= 'Content-Length: '. strlen($postdata_str) . $crlf . $crlf; | ||
73 | $req .= $postdata_str; | ||
74 | } | ||
75 | else $req .= $crlf; | ||
76 | |||
77 | if ($req_hdr) | ||
78 | $ret .= $req; | ||
79 | |||
80 | if (($fp = @fsockopen($ip, $port, $errno, $errstr)) == false) | ||
81 | return "Error $errno: $errstr\n"; | ||
82 | |||
83 | stream_set_timeout($fp, 0, $timeout * 1000); | ||
84 | |||
85 | fputs($fp, $req); | ||
86 | $ret .= stream_get_contents($fp); | ||
87 | fclose($fp); | ||
88 | |||
89 | $headerSplit = strpos($ret, "\r\n\r\n"); | ||
90 | $header = substr($ret, 0, $headerSplit); | ||
91 | |||
92 | $redirectURL = self::CheckForRedirect($header); | ||
93 | |||
94 | if($redirectURL !== false){ | ||
95 | if($depth > 0){ | ||
96 | $url_parts = parse_url($redirectURL); | ||
97 | $url_parts["port"] = isset($url_parts["port"]) ? $url_parts["port"] : 80; | ||
98 | $url_parts["path"] = isset($url_parts["path"]) ? $url_parts["path"] : "/"; | ||
99 | |||
100 | return self::FullRequest($verb, $url_parts["host"], $url_parts["port"], $url_parts["path"], $getdata, $postdata, $cookie, $custom_headers, $timeout, $req_hdr, $res_hdr, $depth-1); | ||
101 | }else{ | ||
102 | return "Redirect loop, stopping..."; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | $truncated = false; | ||
107 | $headerLines = explode("\r\n", $header); | ||
108 | foreach($headerLines as $line){ | ||
109 | list($name, $value) = explode(":", $line); | ||
110 | $name = trim($name); | ||
111 | $value = trim($value); | ||
112 | |||
113 | if(strtolower($name) == "transfer-encoding" && strtolower($value) == "chunked"){ //TODO: Put right values! | ||
114 | $truncated = true; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | if (!$res_hdr) | ||
119 | $ret = substr($ret, $headerSplit + 4); | ||
120 | |||
121 | if($truncated){ | ||
122 | $ret = self::resolveTruncated($ret); | ||
123 | } | ||
124 | if(self::$cache){ | ||
125 | if(!is_dir("cache")){ | ||
126 | mkdir("cache"); | ||
127 | } | ||
128 | if(!is_dir("cache/".$ip)){ | ||
129 | mkdir("cache/".$ip); | ||
130 | } | ||
131 | if(!is_file("cache/".$ip."/".str_replace("/", "...", $uri))){ | ||
132 | $h = fopen("cache/".$ip."/".str_replace("/", "...", $uri), "w"); | ||
133 | fwrite($h, $ret); | ||
134 | fclose($h); | ||
135 | } | ||
136 | } | ||
137 | |||
138 | return $ret; | ||
139 | } | ||
140 | |||
141 | private static function resolveTruncated($data){ | ||
142 | $pos = 0; | ||
143 | $end = strlen($data); | ||
144 | $out = ""; | ||
145 | |||
146 | while($pos < $end){ | ||
147 | $endVal = strpos($data, "\r\n", $pos); | ||
148 | $value = hexdec(substr($data, $pos, $endVal-$pos)); | ||
149 | $out .= substr($data, $endVal+2, $value); | ||
150 | $pos = $endVal+2+$value; | ||
151 | } | ||
152 | |||
153 | return $out; | ||
154 | } | ||
155 | |||
156 | private static function CheckForRedirect($header){ | ||
157 | $firstLine = substr($header, 0, strpos($header, "\r\n")); | ||
158 | list($httpVersion, $statusCode, $message) = explode(" ", $firstLine); | ||
159 | |||
160 | if(substr($statusCode, 0, 1) == "3"){ | ||
161 | $part = substr($header, strpos(strtolower($header), "location: ")+strlen("location: ")); | ||
162 | $location = trim(substr($part, 0, strpos($part, "\r\n"))); | ||
163 | |||
164 | if(strlen($location) > 0){ | ||
165 | return $location; | ||
166 | } | ||
167 | } | ||
168 | return false; | ||
169 | } | ||
170 | } | ||
171 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/ImageHandler.php b/inc/3rdparty/libraries/MOBIClass/ImageHandler.php new file mode 100644 index 00000000..bcb48e9f --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/ImageHandler.php | |||
@@ -0,0 +1,40 @@ | |||
1 | <?php | ||
2 | |||
3 | class ImageHandler { | ||
4 | /** | ||
5 | * Download an image | ||
6 | * @param string $url Url to the image | ||
7 | * @return false|string False if failed, else the data of the image (converted to grayscale jpeg) | ||
8 | */ | ||
9 | public static function DownloadImage($url){ | ||
10 | $data = Http::Request($url); | ||
11 | $imgFile = @imagecreatefromstring($data); | ||
12 | |||
13 | if($imgFile !== false){ | ||
14 | $result = self::CreateImage($imgFile); | ||
15 | imagedestroy($imgFile); | ||
16 | return $result; | ||
17 | } | ||
18 | return false; | ||
19 | } | ||
20 | /** | ||
21 | * Create an image | ||
22 | * @param resource $img Create an image created with createimagetruecolor | ||
23 | * @return false|string False if failed, else the data of the image (converted to grayscale jpeg) | ||
24 | */ | ||
25 | public static function CreateImage($img){ | ||
26 | try{ | ||
27 | imagefilter($img, IMG_FILTER_GRAYSCALE); | ||
28 | |||
29 | ob_start(); | ||
30 | imagejpeg($img); | ||
31 | $image = ob_get_contents(); | ||
32 | ob_end_clean(); | ||
33 | |||
34 | return $image; | ||
35 | }catch(Exception $e){ | ||
36 | return false; | ||
37 | } | ||
38 | } | ||
39 | } | ||
40 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/MOBI.php b/inc/3rdparty/libraries/MOBIClass/MOBI.php new file mode 100644 index 00000000..17e718c1 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/MOBI.php | |||
@@ -0,0 +1,192 @@ | |||
1 | <?php | ||
2 | require_once(dirname(__FILE__)."/readability/Readability.php"); | ||
3 | require_once(dirname(__FILE__).'/CharacterEntities.php'); | ||
4 | require_once(dirname(__FILE__).'/constants.php'); | ||
5 | require_once(dirname(__FILE__).'/ContentProvider.php'); | ||
6 | require_once(dirname(__FILE__).'/MultipleFileHandler.php'); | ||
7 | require_once(dirname(__FILE__)."/downloaders/FanFictionNet.php"); | ||
8 | require_once(dirname(__FILE__).'/EXTHHelper.php'); | ||
9 | require_once(dirname(__FILE__).'/FileObject.php'); | ||
10 | require_once(dirname(__FILE__).'/FileByte.php'); | ||
11 | require_once(dirname(__FILE__).'/FileDate.php'); | ||
12 | require_once(dirname(__FILE__).'/FileElement.php'); | ||
13 | require_once(dirname(__FILE__).'/FileInt.php'); | ||
14 | require_once(dirname(__FILE__).'/FileRecord.php'); | ||
15 | require_once(dirname(__FILE__).'/FileShort.php'); | ||
16 | require_once(dirname(__FILE__).'/FileString.php'); | ||
17 | require_once(dirname(__FILE__).'/FileTri.php'); | ||
18 | require_once(dirname(__FILE__).'/Http.php'); | ||
19 | require_once(dirname(__FILE__).'/http_build_url.php'); | ||
20 | require_once(dirname(__FILE__).'/ImageHandler.php'); | ||
21 | require_once(dirname(__FILE__).'/MOBIFile.php'); | ||
22 | require_once(dirname(__FILE__).'/OnlineArticle.php'); | ||
23 | require_once(dirname(__FILE__).'/PalmRecord.php'); | ||
24 | require_once(dirname(__FILE__).'/Prc.php'); | ||
25 | require_once(dirname(__FILE__).'/PreprocessedArticle.php'); | ||
26 | require_once(dirname(__FILE__).'/RecognizeURL.php'); | ||
27 | require_once(dirname(__FILE__).'/Record.php'); | ||
28 | require_once(dirname(__FILE__).'/RecordFactory.php'); | ||
29 | require_once(dirname(__FILE__).'/Settings.php'); | ||
30 | |||
31 | /** | ||
32 | * Description of MOBI. | ||
33 | * | ||
34 | * Usage: | ||
35 | * include("MOBIClass/MOBI.php"); | ||
36 | * | ||
37 | * $mobi = new MOBI(); | ||
38 | * | ||
39 | * //Then use one of the following ways to prepare information (it should be in the form of valid html) | ||
40 | * $mobi->setInternetSource($url); //Load URL, the result will be cleaned using a Readability port | ||
41 | * $mobi->setFileSource($file); //Load a local file without any extra changes | ||
42 | * $mobi->setData($data); //Load data | ||
43 | * | ||
44 | * //If you want, you can set some optional settings (see Settings.php for all recognized settings) | ||
45 | * $options = array( | ||
46 | * "title"=>"Insert title here", | ||
47 | * "author"=>"Author" | ||
48 | * ); | ||
49 | * $mobi->setOptions($options); | ||
50 | * | ||
51 | * //Then there are two ways to output it: | ||
52 | * $mobi->save($file); //Save the file locally | ||
53 | * $mobi->download($name); //Let the client download the file, make sure the page | ||
54 | * //that calls it doesn't output anything, otherwise it might | ||
55 | * //conflict with the download. $name contains the file name, | ||
56 | * //usually something like "title.mobi" (where the title should | ||
57 | * //be cleaned so as not to contain illegal characters). | ||
58 | * | ||
59 | * | ||
60 | * @author Sander Kromwijk | ||
61 | */ | ||
62 | class MOBI { | ||
63 | private $source = false; | ||
64 | private $images = array(); | ||
65 | private $optional = array(); | ||
66 | private $imgCounter = 0; | ||
67 | private $debug = false; | ||
68 | private $prc = false; | ||
69 | |||
70 | public function __construct(){ | ||
71 | |||
72 | } | ||
73 | |||
74 | public function getTitle(){ | ||
75 | if(isset($this->optional["title"])){ | ||
76 | return $this->optional["title"]; | ||
77 | } | ||
78 | return false; | ||
79 | } | ||
80 | |||
81 | /** | ||
82 | * Set a content provider as source | ||
83 | * @param ContentProvider $content Content Provider to use | ||
84 | */ | ||
85 | public function setContentProvider($content){ | ||
86 | $this->setOptions($content->getMetaData()); | ||
87 | $this->setImages($content->getImages()); | ||
88 | $this->setData($content->getTextData()); | ||
89 | } | ||
90 | |||
91 | /** | ||
92 | * Set a local file as source | ||
93 | * @param string $file Path to the file | ||
94 | */ | ||
95 | public function setFileSource($file){ | ||
96 | $this->setData(file_get_contents($file)); | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * Set the data to use | ||
101 | * @param string $data Data to put in the file | ||
102 | */ | ||
103 | public function setData($data){ | ||
104 | //$data = utf8_encode($data); | ||
105 | $data = CharacterEntities::convert($data); | ||
106 | //$data = utf8_decode($data); | ||
107 | //$this->source = iconv('UTF-8', 'ISO-8859-1//TRANSLIT', $data); | ||
108 | $this->source = $data; | ||
109 | $this->prc = false; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * Set the images to use | ||
114 | * @param array $data Data to put in the file | ||
115 | */ | ||
116 | public function setImages($data){ | ||
117 | $this->images = $data; | ||
118 | $this->prc = false; | ||
119 | } | ||
120 | |||
121 | /** | ||
122 | * Set options, usually for things like titles, authors, etc... | ||
123 | * @param array $options Options to set | ||
124 | */ | ||
125 | public function setOptions($options){ | ||
126 | $this->optional = $options; | ||
127 | $this->prc = false; | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * Prepare the prc file | ||
132 | * @return Prc The file that can be used to be saved/downloaded | ||
133 | */ | ||
134 | private function preparePRC(){ | ||
135 | if($this->source === false){ | ||
136 | throw new Exception("No data set"); | ||
137 | } | ||
138 | if($this->prc !== false) return $this->prc; | ||
139 | |||
140 | $data = $this->source; | ||
141 | $len = strlen($data); | ||
142 | |||
143 | $settings = new Settings($this->optional); | ||
144 | $rec = new RecordFactory($settings); | ||
145 | $dataRecords = $rec->createRecords($data); | ||
146 | $nRecords = sizeof($dataRecords); | ||
147 | $mobiHeader = new PalmRecord($settings, $dataRecords, $nRecords, $len, sizeof($this->images)); | ||
148 | array_unshift($dataRecords, $mobiHeader); | ||
149 | $dataRecords = array_merge($dataRecords, $this->images); | ||
150 | $dataRecords[] = $rec->createFLISRecord(); | ||
151 | $dataRecords[] = $rec->createFCISRecord($len); | ||
152 | $dataRecords[] = $rec->createEOFRecord(); | ||
153 | $this->prc = new Prc($settings, $dataRecords); | ||
154 | return $this->prc; | ||
155 | } | ||
156 | |||
157 | /** | ||
158 | * Save the file locally | ||
159 | * @param string $filename Path to save the file | ||
160 | */ | ||
161 | public function save($filename){ | ||
162 | $prc = $this->preparePRC(); | ||
163 | $prc->save($filename); | ||
164 | } | ||
165 | |||
166 | /** | ||
167 | * Let the client download the file. Warning! No data should be | ||
168 | * outputted before or after. | ||
169 | * @param string $name Name used for download, usually "title.mobi" | ||
170 | */ | ||
171 | public function download($name){ | ||
172 | $prc = $this->preparePRC(); | ||
173 | $data = $prc->serialize(); | ||
174 | $length = strlen($data); | ||
175 | |||
176 | if($this->debug) return; //In debug mode, don't start the download | ||
177 | |||
178 | header("Content-Type: application/x-mobipocket-ebook"); | ||
179 | header("Content-Disposition: attachment; filename=\"".$name."\""); | ||
180 | header("Content-Transfer-Encoding: binary"); | ||
181 | header("Accept-Ranges: bytes"); | ||
182 | header("Cache-control: private"); | ||
183 | header('Pragma: private'); | ||
184 | header("Expires: Mon, 26 Jul 1997 05:00:00 GMT"); | ||
185 | header("Content-Length: ".$length); | ||
186 | |||
187 | echo $data; | ||
188 | //Finished! | ||
189 | } | ||
190 | |||
191 | } | ||
192 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/MOBIFile.php b/inc/3rdparty/libraries/MOBIClass/MOBIFile.php new file mode 100644 index 00000000..349227ae --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/MOBIFile.php | |||
@@ -0,0 +1,157 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * This is the way MOBI files should be created if you want all features (TOC, images). | ||
4 | * | ||
5 | * File modified by Dawson for use in eBook Creator | ||
6 | * Added pagebreaks and a setting to remove table of contents. | ||
7 | */ | ||
8 | |||
9 | class MOBIFile extends ContentProvider { | ||
10 | const PARAGRAPH = 0; | ||
11 | const H2 = 1; | ||
12 | const H3 = 2; | ||
13 | const IMAGE = 3; | ||
14 | const PAGEBREAK = 4; | ||
15 | |||
16 | private $settings = array("title" => "Unknown Title", "toc" => true); | ||
17 | private $parts = array(); | ||
18 | private $images = array(); | ||
19 | |||
20 | /** | ||
21 | * Get the text data (the "html" code) | ||
22 | */ | ||
23 | public function getTextData(){ | ||
24 | $prefix = "<html><head><guide><reference title='CONTENT' type='toc' filepos=0000000000 /></guide></head><body>"; | ||
25 | |||
26 | $title = "<h1>".$this->settings["title"]."</h1>"; | ||
27 | |||
28 | list($text, $entries) = $this->generateText(); | ||
29 | |||
30 | if($this->settings["toc"]) { | ||
31 | $toc = $this->generateTOC($entries); //Generate TOC to get the right length | ||
32 | $toc = $this->generateTOC($entries, strlen($prefix)+strlen($toc)+strlen($title)); //Generate the real TOC | ||
33 | } | ||
34 | |||
35 | $suffix = "</body></html>"; | ||
36 | |||
37 | return $prefix.$toc.$title.$text.$suffix; | ||
38 | } | ||
39 | |||
40 | /** | ||
41 | * Generate the body's text and the chapter entries | ||
42 | * @return array($string, $entries) $string is the html data, $entries | ||
43 | * contains the level, the title and the position of the titles. | ||
44 | */ | ||
45 | public function generateText(){ | ||
46 | $str = ""; | ||
47 | $entries = array(); | ||
48 | |||
49 | for($i = 0; $i < sizeof($this->parts); $i++){ | ||
50 | list($type, $data) = $this->parts[$i]; | ||
51 | $id = "title_".$i; | ||
52 | switch($type){ | ||
53 | case self::PARAGRAPH: | ||
54 | $str .= "<p>".$data."</p>"; | ||
55 | break; | ||
56 | case self::PAGEBREAK: | ||
57 | $str .= '<mbp:pagebreak/>'; | ||
58 | break; | ||
59 | case self::H2: | ||
60 | $entries[] = array("level" => 2, "position" => strlen($str), "title" => $data, "id" => $id); | ||
61 | $str .= "<h2 id='" . $id . "'>".$data."</h2>"; | ||
62 | break; | ||
63 | case self::H3: | ||
64 | $entries[] = array("level" => 3, "position" => strlen($str), "title" => $data, "id" => $id); | ||
65 | $str .= "<h3 id='" . $id . "'>".$data."</h3>"; | ||
66 | break; | ||
67 | case self::IMAGE: | ||
68 | $str .= "<img recindex=".str_pad($data+1, 10, "0", STR_PAD_LEFT)." />"; | ||
69 | break; | ||
70 | } | ||
71 | } | ||
72 | return array($str, $entries); | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * Generate a TOC | ||
77 | * @param $entries The entries array generated by generateText | ||
78 | * @param $base The zero position | ||
79 | */ | ||
80 | public function generateTOC($entries, $base = 0){ | ||
81 | $toc = "<h2>Contents</h2>"; | ||
82 | $toc .= "<blockquote><table summary='Table of Contents'><col/><tbody>"; | ||
83 | for($i = 0, $len = sizeof($entries); $i < $len; $i++){ | ||
84 | $entry = $entries[$i]; | ||
85 | $pos = str_pad($entry["position"]+$base, 10, "0", STR_PAD_LEFT); | ||
86 | $toc .= "<tr><td><a href='#".$entry["id"]."' filepos='".$pos."'>".$entry["title"]."</a></td></tr>"; | ||
87 | } | ||
88 | return $toc."</tbody></b></table></blockquote><mbp:pagebreak/>"; | ||
89 | } | ||
90 | |||
91 | /** | ||
92 | * Get the file records of the images | ||
93 | */ | ||
94 | public function getImages(){ | ||
95 | return $this->images; | ||
96 | } | ||
97 | |||
98 | /** | ||
99 | * Get the metadata | ||
100 | */ | ||
101 | public function getMetaData(){ | ||
102 | return $this->settings; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * Change the file's settings. For example set("author", "John Doe") or set("title", "The adventures of John Doe"). | ||
107 | * @param $key Key of the setting to insert. | ||
108 | */ | ||
109 | public function set($key, $value){ | ||
110 | $this->settings[$key] = $value; | ||
111 | } | ||
112 | |||
113 | /** | ||
114 | * Get the file's settings. | ||
115 | */ | ||
116 | public function get($key){ | ||
117 | return $this->settings[$key]; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * Append a paragraph of text to the file. | ||
122 | * @param string $text The text to insert. | ||
123 | */ | ||
124 | public function appendParagraph($text){ | ||
125 | $this->parts[] = array(self::PARAGRAPH, $text); | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * Append a chapter title (H2) | ||
130 | * @param string $title The title to insert. | ||
131 | */ | ||
132 | public function appendChapterTitle($title){ | ||
133 | $this->parts[] = array(self::H2, $title); | ||
134 | } | ||
135 | |||
136 | /** | ||
137 | * Append a section title (H3) | ||
138 | * @param string $title The title to insert. | ||
139 | */ | ||
140 | public function appendSectionTitle($title){ | ||
141 | $this->parts[] = array(self::H3, $title); | ||
142 | } | ||
143 | |||
144 | public function appendPageBreak() { | ||
145 | $this->parts[] = array(self::PAGEBREAK, null); | ||
146 | } | ||
147 | |||
148 | /** | ||
149 | * Append an image. | ||
150 | * @param resource $img An image file (for example, created by `imagecreate`) | ||
151 | */ | ||
152 | public function appendImage($img){ | ||
153 | $imgIndex = sizeof($this->images); | ||
154 | $this->images[] = new FileRecord(new Record(ImageHandler::CreateImage($img))); | ||
155 | $this->parts[] = array(self::IMAGE, $imgIndex); | ||
156 | } | ||
157 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/MultipleFileHandler.php b/inc/3rdparty/libraries/MOBIClass/MultipleFileHandler.php new file mode 100644 index 00000000..e9792816 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/MultipleFileHandler.php | |||
@@ -0,0 +1,136 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of MultipleFileHandler | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | abstract class MultipleFileHandler extends ContentProvider { | ||
9 | /** | ||
10 | * @var array | ||
11 | */ | ||
12 | private $files = array(); | ||
13 | /** | ||
14 | * @var array | ||
15 | */ | ||
16 | private $images = array(); | ||
17 | /** | ||
18 | * @var array | ||
19 | */ | ||
20 | private $metadata = array(); | ||
21 | |||
22 | private $toc = array(); | ||
23 | |||
24 | /** | ||
25 | * Add a page to the file | ||
26 | * @param string $contents Contents of the chapter/page | ||
27 | * @param string $title Optional, title of the chapter/page. Will automatically add a h2 | ||
28 | * before the contents | ||
29 | */ | ||
30 | public function addPage($contents, $title = ""){ | ||
31 | if($title != ""){ | ||
32 | //TODO: Add in TOC (and add a way of generating it | ||
33 | $contents = "<h2>".$title."</h2>".$contents."<mbp:pagebreak>"; | ||
34 | } | ||
35 | $pos = 0; | ||
36 | |||
37 | if(sizeof($this->toc) > 0){ | ||
38 | $lastToc = $this->toc[sizeof($this->toc)-1]; | ||
39 | $lastFile = $this->files[sizeof($this->files)-1]; | ||
40 | $pos = $lastToc["pos"] + strlen($lastFile) + 1; | ||
41 | } | ||
42 | |||
43 | $this->files[] = $contents; | ||
44 | $this->toc[] = array("title"=>$title, "pos"=>$pos); | ||
45 | } | ||
46 | |||
47 | /** | ||
48 | * Add an image to the file | ||
49 | * @param string $imageContents Data string containing the binary data of the image | ||
50 | * @return int The reference of the image | ||
51 | */ | ||
52 | public function addImage($imageContents){ | ||
53 | $this->images[] = $imageContents; | ||
54 | return sizeof($this->images)-1; | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * Add an image to the file | ||
59 | * @param string $url Url to the image | ||
60 | * @return int The reference of the image, false if the image couldn't be downloaded | ||
61 | */ | ||
62 | public function addImageFromUrl($url){ | ||
63 | $image = ImageHandler::DownloadImage($url); | ||
64 | |||
65 | if($image === false) return false; | ||
66 | return $this->addImage($image); | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * Set the metadata | ||
71 | * @param string $key Key | ||
72 | * @param string $value Value | ||
73 | */ | ||
74 | public function setMetadata($key, $value){ | ||
75 | $this->metadata[$key] = $value; | ||
76 | } | ||
77 | |||
78 | /** | ||
79 | * Get the text data to be integrated in the MOBI file | ||
80 | * @return string | ||
81 | */ | ||
82 | public function getTextData(){ | ||
83 | $data = implode("\n", $this->files); | ||
84 | $begin = "<html><head><guide><reference title='CONTENT' type='toc' filepos=0000000000 /></guide></head><body>"; | ||
85 | $beforeTOC = $begin.$data; | ||
86 | |||
87 | $tocPos = strlen($beforeTOC); | ||
88 | |||
89 | $toc = $this->generateTOC(strlen($begin)); | ||
90 | |||
91 | $customBegin = "<html><head><guide><reference title='CONTENT' type='toc' filepos=".$this->forceLength($tocPos, 10)." /></guide></head><body>"; | ||
92 | $data = $customBegin.$data.$toc."</body></html>"; | ||
93 | return $data; | ||
94 | } | ||
95 | |||
96 | public function forceLength($n, $l){ | ||
97 | $str = $n.""; | ||
98 | $cur = strlen($str); | ||
99 | while($cur < $l){ | ||
100 | $str = "0".$str; | ||
101 | $cur++; | ||
102 | } | ||
103 | return $str; | ||
104 | } | ||
105 | |||
106 | public function generateTOC($base = 0){ | ||
107 | $toc = "<h2>Contents</h2>"; | ||
108 | $toc .= "<blockquote><table summary='Table of Contents'><b><col/><col/><tbody>"; | ||
109 | for($i = 0, $len = sizeof($this->toc); $i < $len; $i++){ | ||
110 | $entry = $this->toc[$i]; | ||
111 | $position = $entry["pos"]+$base; | ||
112 | $toc .= "<tr><td>".($i+1).".</td><td><a filepos=".$position.">".$entry["title"]."</a></td></tr>"; | ||
113 | } | ||
114 | $toc .= "</tbody></b></table></blockquote>"; | ||
115 | |||
116 | return $toc; | ||
117 | } | ||
118 | /** | ||
119 | * Get the images (an array containing the jpeg data). Array entry 0 will | ||
120 | * correspond to image record 0. | ||
121 | * @return array | ||
122 | */ | ||
123 | public function getImages(){ | ||
124 | return $this->images; | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * Get the metadata in the form of a hashtable (for example, title or author). | ||
129 | * @return array | ||
130 | */ | ||
131 | public function getMetaData(){ | ||
132 | return $this->metadata; | ||
133 | } | ||
134 | |||
135 | } | ||
136 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/OnlineArticle.php b/inc/3rdparty/libraries/MOBIClass/OnlineArticle.php new file mode 100644 index 00000000..ec3182fe --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/OnlineArticle.php | |||
@@ -0,0 +1,116 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of OnlineArticle | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class OnlineArticle extends ContentProvider { | ||
9 | private $text; | ||
10 | private $images; | ||
11 | private $metadata = array(); | ||
12 | private $imgCounter = 0; | ||
13 | |||
14 | public function __construct($url) { | ||
15 | if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url; | ||
16 | |||
17 | $data = Http::Request($url); | ||
18 | //$enc = mb_detect_encoding($str, "UTF-8,ISO-8859-1,ASCII"); | ||
19 | $html = mb_convert_encoding($data, "UTF-8", "UTF-8,ISO-8859-1,ASCII"); | ||
20 | //$html = utf8_encode($html); | ||
21 | $r = new Readability($html, $url); | ||
22 | $r->init(); | ||
23 | if(!isset($this->metadata["title"])){ | ||
24 | $this->metadata["title"] = CharacterEntities::convert(strip_tags($r->getTitle()->innerHTML)); | ||
25 | } | ||
26 | if(!isset($this->metadata["author"])){ | ||
27 | $parts = parse_url($url); | ||
28 | $this->metadata["author"] = $parts["host"]; | ||
29 | } | ||
30 | |||
31 | $article = $r->getContent()->innerHTML; | ||
32 | if(substr($article, 0, 5) == "<body"){ | ||
33 | $article = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/></head>".$article."</html>"; | ||
34 | }else{ | ||
35 | $article = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/></head><body>".$article."</body></html>"; | ||
36 | } | ||
37 | $doc = new DOMDocument(); | ||
38 | @$doc->loadHTML($article) or die($article); | ||
39 | $doc->normalizeDocument(); | ||
40 | |||
41 | $this->images = $this->handleImages($doc, $url); | ||
42 | $this->text = $doc->saveHTML(); | ||
43 | } | ||
44 | |||
45 | /** | ||
46 | * Get the text data to be integrated in the MOBI file | ||
47 | * @return string | ||
48 | */ | ||
49 | public function getTextData(){ | ||
50 | return $this->text; | ||
51 | } | ||
52 | /** | ||
53 | * Get the images (an array containing the jpeg data). Array entry 0 will | ||
54 | * correspond to image record 0. | ||
55 | * @return array | ||
56 | */ | ||
57 | public function getImages(){ | ||
58 | return $this->images; | ||
59 | } | ||
60 | /** | ||
61 | * Get the metadata in the form of a hashtable (for example, title or author). | ||
62 | * @return array | ||
63 | */ | ||
64 | public function getMetaData(){ | ||
65 | return $this->metadata; | ||
66 | } | ||
67 | /** | ||
68 | * | ||
69 | * @param DOMElement $dom | ||
70 | * @return array | ||
71 | */ | ||
72 | private function handleImages($dom, $url){ | ||
73 | $images = array(); | ||
74 | |||
75 | $parts = parse_url($url); | ||
76 | |||
77 | $savedImages = array(); | ||
78 | |||
79 | $imgElements = $dom->getElementsByTagName('img'); | ||
80 | foreach($imgElements as $img) { | ||
81 | $src = $img->getAttribute("src"); | ||
82 | |||
83 | $is_root = false; | ||
84 | if(substr($src, 0, 1) == "/"){ | ||
85 | $is_root = true; | ||
86 | } | ||
87 | |||
88 | $parsed = parse_url($src); | ||
89 | |||
90 | if(!isset($parsed["host"])){ | ||
91 | if($is_root){ | ||
92 | $src = http_build_url($url, $parsed, HTTP_URL_REPLACE); | ||
93 | }else{ | ||
94 | $src = http_build_url($url, $parsed, HTTP_URL_JOIN_PATH); | ||
95 | } | ||
96 | } | ||
97 | $img->setAttribute("src", ""); | ||
98 | if(isset($savedImages[$src])){ | ||
99 | $img->setAttribute("recindex", $savedImages[$src]); | ||
100 | }else{ | ||
101 | $image = ImageHandler::DownloadImage($src); | ||
102 | |||
103 | if($image !== false){ | ||
104 | $images[$this->imgCounter] = new FileRecord(new Record($image)); | ||
105 | |||
106 | $img->setAttribute("recindex", $this->imgCounter); | ||
107 | $savedImages[$src] = $this->imgCounter; | ||
108 | $this->imgCounter++; | ||
109 | } | ||
110 | } | ||
111 | } | ||
112 | |||
113 | return $images; | ||
114 | } | ||
115 | } | ||
116 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/PalmRecord.php b/inc/3rdparty/libraries/MOBIClass/PalmRecord.php new file mode 100644 index 00000000..d0de8dfe --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/PalmRecord.php | |||
@@ -0,0 +1,136 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * A Record of a PDB file | ||
4 | * | ||
5 | * @author Sander | ||
6 | */ | ||
7 | class PalmRecord extends FileObject { | ||
8 | /** | ||
9 | * @var FileElement | ||
10 | */ | ||
11 | private $elements; | ||
12 | |||
13 | public function __construct($settings, $records, $textRecords, $textLength, $images){ | ||
14 | $this->elements = new FileElement(array( | ||
15 | "compression"=>new FileShort(), | ||
16 | "unused"=>new FileShort(), | ||
17 | "textLength"=>new FileInt(), | ||
18 | "recordCount"=>new FileShort(), | ||
19 | "recordSize"=>new FileShort(), | ||
20 | "encryptionType"=>new FileShort(), | ||
21 | "unused2"=>new FileShort(), | ||
22 | //MOBI Header | ||
23 | "mobiIdentifier"=>new FileString("MOBI", 4), | ||
24 | "mobiHeaderLength"=>new FileInt(), | ||
25 | "mobiType"=>new FileInt(), | ||
26 | "textEncoding"=>new FileInt(), | ||
27 | "uniqueID"=>new FileInt(), | ||
28 | "fileVersion"=>new FileInt(), | ||
29 | "reserved"=>new FileString(40), | ||
30 | "firstNonBookIndex"=>new FileInt(), | ||
31 | "fullNameOffset"=>new FileInt(), | ||
32 | "fullNameLength"=>new FileInt(), | ||
33 | "locale"=>new FileInt(), | ||
34 | "inputLanguage"=>new FileInt(), | ||
35 | "outputLanguage"=>new FileInt(), | ||
36 | "minimumVersion"=>new FileInt(), | ||
37 | "firstImageIndex"=>new FileInt(), | ||
38 | "huffmanRecordOffset"=>new FileInt(), | ||
39 | "huffmanRecordCount"=>new FileInt(), | ||
40 | "unused3"=>new FileString(8), | ||
41 | "exthFlags"=>new FileInt(0x40), | ||
42 | "unknown"=>new FileString(32), | ||
43 | "drmOffset"=>new FileInt(0xFFFFFFFF), | ||
44 | "drmCount"=>new FileShort(0xFFFFFFFF), | ||
45 | "drmSize"=>new FileShort(), | ||
46 | "drmFlags"=>new FileInt(), | ||
47 | "mobiFiller"=>new FileString(72), | ||
48 | //EXTH Header | ||
49 | "exthIdentifier"=>new FileString("EXTH", 4), | ||
50 | "exthHeaderLength"=>new FileInt(), | ||
51 | "exthRecordCount"=>new FileInt(), | ||
52 | "exthRecords"=>new FileElement(), | ||
53 | "exthPadding"=>new FileString(), | ||
54 | //"fullNamePadding"=>new FileString(100), | ||
55 | "fullName"=>new FileString() | ||
56 | )); | ||
57 | |||
58 | //Set values from the info block | ||
59 | foreach($settings->values as $name => $val){ | ||
60 | //echo $name.", "; | ||
61 | if($this->elements->exists($name)){ | ||
62 | $this->elements->get($name)->set($settings->get($name)); | ||
63 | } | ||
64 | } | ||
65 | |||
66 | $els = $settings->values; | ||
67 | |||
68 | $exthElems = new FileElement(); | ||
69 | $i = 0; | ||
70 | $l = 0; | ||
71 | foreach($els as $name=>$val){ | ||
72 | $type = EXTHHelper::textToType($name); | ||
73 | if($type !== false){ | ||
74 | $type = new FileInt($type); | ||
75 | $length = new FileInt(8+strlen($val)); | ||
76 | $data = new FileString($val); | ||
77 | $l += 8+strlen($val); | ||
78 | $exthElems->add("type".$i, $type); | ||
79 | $exthElems->add("length".$i, $length); | ||
80 | $exthElems->add("data".$i, $data); | ||
81 | $i++; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | if($images > 0){ | ||
86 | $this->elements->get("firstImageIndex")->set($textRecords+1); | ||
87 | } | ||
88 | $this->elements->get("firstNonBookIndex")->set($textRecords+2+$images); | ||
89 | $this->elements->get("reserved")->set(str_pad("", 40, chr(255), STR_PAD_RIGHT)); | ||
90 | $this->elements->get("exthRecordCount")->set($i); | ||
91 | $this->elements->set("exthRecords", $exthElems); | ||
92 | $pad = $l%4; | ||
93 | $pad = (4-$pad)%4; | ||
94 | $this->elements->get("exthPadding")->set(str_pad("", $pad, "\0", STR_PAD_RIGHT)); | ||
95 | $this->elements->get("exthHeaderLength")->set(12+$l+$pad); | ||
96 | |||
97 | |||
98 | $this->elements->get("recordCount")->set($textRecords); | ||
99 | |||
100 | $this->elements->get("fullNameOffset")->set($this->elements->offsetToEntry("fullName")); | ||
101 | $this->elements->get("fullNameLength")->set(strlen($settings->get("title"))); | ||
102 | $this->elements->get("fullName")->set($settings->get("title")); | ||
103 | $this->elements->get("textLength")->set($textLength); | ||
104 | } | ||
105 | |||
106 | public function getByteLength(){ | ||
107 | return $this->getLength(); | ||
108 | } | ||
109 | |||
110 | public function getLength(){ | ||
111 | return $this->elements->getByteLength(); | ||
112 | } | ||
113 | |||
114 | public function get(){ | ||
115 | return $this; | ||
116 | } | ||
117 | |||
118 | public function set($elements){ | ||
119 | throw new Exception("Unallowed set"); | ||
120 | } | ||
121 | |||
122 | public function serialize() { | ||
123 | return $this->elements->serialize(); | ||
124 | } | ||
125 | |||
126 | public function unserialize($data) { | ||
127 | $this->elements->unserialize($data); | ||
128 | } | ||
129 | |||
130 | public function __toString(){ | ||
131 | $output = "PalmDoc Record (".$this->getByteLength()." bytes):\n"; | ||
132 | $output .= $this->elements; | ||
133 | return $output; | ||
134 | } | ||
135 | } | ||
136 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/Prc.php b/inc/3rdparty/libraries/MOBIClass/Prc.php new file mode 100644 index 00000000..c40b5805 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/Prc.php | |||
@@ -0,0 +1,97 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of Prc | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class Prc extends FileElement { | ||
9 | public function __construct($settings, $records){ | ||
10 | parent::__construct(array( | ||
11 | "title"=>new FileString(32), | ||
12 | "attributes"=>new FileShort(), | ||
13 | "version"=>new FileShort(), | ||
14 | "creationTime"=>new FileDate(), | ||
15 | "modificationTime"=>new FileDate(), | ||
16 | "backupTime"=>new FileDate(), | ||
17 | "modificationNumber"=>new FileInt(), | ||
18 | "appInfoID"=>new FileInt(), | ||
19 | "sortInfoID"=>new FileInt(), | ||
20 | "prcType"=>new FileString(4), | ||
21 | "creator"=>new FileString(4), | ||
22 | "uniqueIDSeed"=>new FileInt(), | ||
23 | "nextRecordListID"=>new FileInt(), | ||
24 | "numberRecords"=>new FileShort(), | ||
25 | "recordList"=>new FileElement(), | ||
26 | "filler"=>new FileShort(), | ||
27 | "records"=>new FileElement() | ||
28 | )); | ||
29 | |||
30 | //Set values from the info block | ||
31 | foreach($this->elements as $name => $val){ | ||
32 | if($settings->exists($name)){ | ||
33 | $this->get($name)->set($settings->get($name)); | ||
34 | } | ||
35 | } | ||
36 | |||
37 | $this->get("numberRecords")->set(sizeof($records)); | ||
38 | |||
39 | $i = 0; | ||
40 | foreach($records as $record){ | ||
41 | $offset = new FileInt(); | ||
42 | $attr = new FileByte(); | ||
43 | $uniqueID = new FileTri($i); | ||
44 | |||
45 | $this->elements["recordList"]->add("Rec".$i, new FileElement(array( | ||
46 | "offset"=>$offset, | ||
47 | "attribute"=>$attr, | ||
48 | "uniqueID"=>$uniqueID | ||
49 | ))); | ||
50 | |||
51 | $this->elements["records"]->add("Rec".$i, $record); | ||
52 | $i++; | ||
53 | } | ||
54 | |||
55 | $this->updateOffsets($records); | ||
56 | } | ||
57 | |||
58 | public function getByteLength(){ | ||
59 | throw new Exception("Test"); | ||
60 | } | ||
61 | |||
62 | public function updateOffsets($records){ | ||
63 | $base = $this->offsetToEntry("records"); | ||
64 | |||
65 | $i = 0; | ||
66 | |||
67 | foreach($records as $record){ | ||
68 | $el = $this->elements["recordList"]->get("Rec".$i); | ||
69 | |||
70 | $local = $this->elements["records"]->offsetToEntry("Rec".$i); | ||
71 | |||
72 | $el->get("offset")->set($base+$local); | ||
73 | |||
74 | $i++; | ||
75 | } | ||
76 | } | ||
77 | |||
78 | public function save($file){ | ||
79 | $handle = fopen($file, "w"); | ||
80 | fwrite($handle, $this->serialize()); | ||
81 | fclose($handle); | ||
82 | } | ||
83 | |||
84 | public function output(){ | ||
85 | echo $this->serialize(); | ||
86 | } | ||
87 | |||
88 | public function __toString(){ | ||
89 | $output = "Prc (".$this->getByteLength()." bytes): {\n"; | ||
90 | foreach($this->elements as $key=>$value){ | ||
91 | $output .= "\t".$key.": ".$value."\n"; | ||
92 | } | ||
93 | $output .= "}"; | ||
94 | return $output; | ||
95 | } | ||
96 | } | ||
97 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/PreprocessedArticle.php b/inc/3rdparty/libraries/MOBIClass/PreprocessedArticle.php new file mode 100644 index 00000000..2e992404 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/PreprocessedArticle.php | |||
@@ -0,0 +1,89 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of OnlineArticle | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class PreprocessedArticle extends ContentProvider { | ||
9 | private $text; | ||
10 | private $images; | ||
11 | private $metadata = array(); | ||
12 | private $imgCounter = 0; | ||
13 | |||
14 | public function __construct($textData, $imageLinks, $metadata) { | ||
15 | $this->text = $textData; | ||
16 | $this->metadata = $metadata; | ||
17 | |||
18 | $this->images = $this->downloadImages($imageLinks); | ||
19 | } | ||
20 | |||
21 | /** | ||
22 | * Create a Preprocessed article from a json string | ||
23 | * @param string $json JSON data. Should be of the following format: | ||
24 | * {"text": "TEXT", "images: ["imageURL1", "imageURL2"], "metadata": {"key": "value"}} | ||
25 | * | ||
26 | * Note: Any image tags should have the recindex attribute set to the appropriate index (the | ||
27 | * same index as the image in the array) | ||
28 | * @return PreprocessedArticle The generated preprocessed array | ||
29 | */ | ||
30 | static public function CreateFromJson($json){ | ||
31 | $data = json_decode($json); | ||
32 | return new PreprocessedArticle($data["text"], $data["images"], $data["metadata"]); | ||
33 | } | ||
34 | |||
35 | /** | ||
36 | * Get the text data to be integrated in the MOBI file | ||
37 | * @return string | ||
38 | */ | ||
39 | public function getTextData(){ | ||
40 | return $this->text; | ||
41 | } | ||
42 | /** | ||
43 | * Get the images (an array containing the jpeg data). Array entry 0 will | ||
44 | * correspond to image record 0. | ||
45 | * @return array | ||
46 | */ | ||
47 | public function getImages(){ | ||
48 | return $this->images; | ||
49 | } | ||
50 | /** | ||
51 | * Get the metadata in the form of a hashtable (for example, title or author). | ||
52 | * @return array | ||
53 | */ | ||
54 | public function getMetaData(){ | ||
55 | return $this->metadata; | ||
56 | } | ||
57 | /** | ||
58 | * | ||
59 | * @param DOMElement $dom | ||
60 | * @return array | ||
61 | */ | ||
62 | private function downloadImages($links){ | ||
63 | $images = array(); | ||
64 | foreach($links as $link) { | ||
65 | $imgFile = @imagecreatefromstring(Http::Request($link)); | ||
66 | |||
67 | if($imgFile === false){ | ||
68 | $imgFile = @imagecreate(1, 1); | ||
69 | $black = @imagecolorallocate($imgFile, 255, 255, 255); | ||
70 | } | ||
71 | if($imgFile !== false){ | ||
72 | @imagefilter($imgFile, IMG_FILTER_GRAYSCALE); | ||
73 | |||
74 | ob_start(); | ||
75 | @imagejpeg($imgFile); | ||
76 | $image = ob_get_contents(); | ||
77 | ob_end_clean(); | ||
78 | |||
79 | $images[$this->imgCounter] = new FileRecord(new Record($image)); | ||
80 | imagedestroy($imgFile); | ||
81 | |||
82 | $this->imgCounter++; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | return $images; | ||
87 | } | ||
88 | } | ||
89 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/RecognizeURL.php b/inc/3rdparty/libraries/MOBIClass/RecognizeURL.php new file mode 100644 index 00000000..6319ed57 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/RecognizeURL.php | |||
@@ -0,0 +1,16 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of RecognizeURL | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class RecognizeURL { | ||
9 | public static function GetContentHandler($url){ | ||
10 | if(FanFictionNet::Matches($url)){ | ||
11 | return new FanFictionNet($url); | ||
12 | } | ||
13 | return null; | ||
14 | } | ||
15 | } | ||
16 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/Record.php b/inc/3rdparty/libraries/MOBIClass/Record.php new file mode 100644 index 00000000..3cb39582 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/Record.php | |||
@@ -0,0 +1,96 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * A Record of a PDB file | ||
4 | * | ||
5 | * @author Sander | ||
6 | */ | ||
7 | class Record extends FileObject { | ||
8 | /** | ||
9 | * Data in the record | ||
10 | * @var string | ||
11 | */ | ||
12 | private $data; | ||
13 | /** | ||
14 | * Length of the record | ||
15 | * @var int | ||
16 | */ | ||
17 | private $length; | ||
18 | |||
19 | /** | ||
20 | * Create a record | ||
21 | * @param string $data Data contained in the record | ||
22 | * @param int $length Length of the record (if set to -1, | ||
23 | * the length of $data will be taken) | ||
24 | */ | ||
25 | public function __construct($data = "", $length = -1){ | ||
26 | $this->data = $data; | ||
27 | if($length >= 0){ | ||
28 | $this->length = $length; | ||
29 | }else{ | ||
30 | $this->length = strlen($data); | ||
31 | } | ||
32 | } | ||
33 | |||
34 | public function compress($compression_method){ | ||
35 | switch($compression_method){ | ||
36 | case NO_COMPRESSION: | ||
37 | //Finished! | ||
38 | break; | ||
39 | case PALMDOC_COMPRESSION: | ||
40 | throw new Exception("Not implemented yet"); | ||
41 | break; | ||
42 | case HUFF: | ||
43 | throw new Exception("Not implemented yet"); | ||
44 | break; | ||
45 | default: | ||
46 | throw new Exception("Invalid argument"); | ||
47 | } | ||
48 | } | ||
49 | |||
50 | public function getByteLength(){ | ||
51 | return $this->getLength(); | ||
52 | } | ||
53 | |||
54 | /** | ||
55 | * Get the length of the record | ||
56 | * @return int Length of the data | ||
57 | */ | ||
58 | public function getLength(){ | ||
59 | return $this->length; | ||
60 | } | ||
61 | |||
62 | /** | ||
63 | * Get the data contained in the record | ||
64 | * @return string Data contained in the record | ||
65 | */ | ||
66 | public function get(){ | ||
67 | return $this->data; | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * Set the data contained in the record | ||
72 | * @param string $value Data contained in the record | ||
73 | */ | ||
74 | public function set($value){ | ||
75 | $this->data = $value; | ||
76 | } | ||
77 | |||
78 | public function serialize(){ | ||
79 | return $this->data; | ||
80 | } | ||
81 | public function unserialize($data){ | ||
82 | __construct($data); | ||
83 | } | ||
84 | |||
85 | public function __toString() { | ||
86 | $toShow = $this->data; | ||
87 | if(strlen($this->data) > 103){ | ||
88 | $toShow = substr($this->data, 0, 100)."..."; | ||
89 | } | ||
90 | $out = "Record: {\n"; | ||
91 | $out .= "\t".htmlspecialchars($toShow)."\n"; | ||
92 | $out .= "}"; | ||
93 | return $out; | ||
94 | } | ||
95 | } | ||
96 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/RecordFactory.php b/inc/3rdparty/libraries/MOBIClass/RecordFactory.php new file mode 100644 index 00000000..12806fe3 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/RecordFactory.php | |||
@@ -0,0 +1,115 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Helper class to help with creating records from a | ||
5 | * long data stream | ||
6 | * | ||
7 | * @author Sander | ||
8 | */ | ||
9 | class RecordFactory { | ||
10 | /** | ||
11 | * Settings for the record factory | ||
12 | * @var Settings | ||
13 | */ | ||
14 | private $settings; | ||
15 | |||
16 | /** | ||
17 | * Create the helper class | ||
18 | * @param Settings $settings The Settings to be used for the records | ||
19 | */ | ||
20 | public function __construct($settings){ | ||
21 | $this->settings = $settings; | ||
22 | } | ||
23 | |||
24 | /** | ||
25 | * Create records from a data string | ||
26 | * @param string $data | ||
27 | * @return array(Record) | ||
28 | */ | ||
29 | public function createRecords($data){ | ||
30 | $records = array(); | ||
31 | $size = $this->settings->get("recordSize"); | ||
32 | $compression = $this->settings->get("compression"); | ||
33 | |||
34 | $dataEntries = mb_str_split($data, $size); | ||
35 | |||
36 | for($i = 0, $len = sizeof($dataEntries); $i < $len; $i++){ | ||
37 | $records[$i] = new Record($dataEntries[$i]); | ||
38 | $records[$i]->compress($compression); | ||
39 | } | ||
40 | |||
41 | return $records; | ||
42 | } | ||
43 | |||
44 | public function createEOFRecord(){ | ||
45 | return new Record(0xe98e0d0a); | ||
46 | } | ||
47 | |||
48 | public function createFCISRecord($textLength){ | ||
49 | $r = "FCIS"; | ||
50 | $r .= $this->asString(20, 4); | ||
51 | $r .= $this->asString(16, 4); | ||
52 | $r .= $this->asString(1, 4); | ||
53 | $r .= $this->asString(0, 4); | ||
54 | $r .= $this->asString($textLength, 4); | ||
55 | $r .= $this->asString(0, 4); | ||
56 | $r .= $this->asString(32, 4); | ||
57 | $r .= $this->asString(8, 4); | ||
58 | $r .= $this->asString(1, 2); | ||
59 | $r .= $this->asString(1, 2); | ||
60 | $r .= $this->asString(0, 4); | ||
61 | return new Record($r); | ||
62 | } | ||
63 | |||
64 | public function createFLISRecord(){ | ||
65 | $r = "FLIS"; | ||
66 | $r .= $this->asString(8, 4); | ||
67 | $r .= $this->asString(65, 2); | ||
68 | $r .= $this->asString(0, 2); | ||
69 | $r .= $this->asString(0, 4); | ||
70 | $r .= $this->asString(-1, 4); | ||
71 | $r .= $this->asString(1, 2); | ||
72 | $r .= $this->asString(3, 2); | ||
73 | $r .= $this->asString(3, 4); | ||
74 | $r .= $this->asString(1, 4); | ||
75 | $r .= $this->asString(-1, 4); | ||
76 | return new Record($r); | ||
77 | } | ||
78 | |||
79 | private function asString($int, $size){ | ||
80 | $out = ""; | ||
81 | for($i = 0; $i < $size; $i++){ | ||
82 | if($i > 0) $out = " ".$out; | ||
83 | $byte = dechex($int & 0xFF); | ||
84 | if(strlen($byte) == 1) $byte = "0".$byte; | ||
85 | $out = $byte.$out; | ||
86 | $int = $int >> 8; | ||
87 | } | ||
88 | return $out; | ||
89 | } | ||
90 | |||
91 | public function __toString() { | ||
92 | $out = "Record Factory: {\n"; | ||
93 | $out .= "\tRecord Size: ".$this->settings->get("recordSize")."\n"; | ||
94 | $out .= "\tCompression: ".$this->settings->get("compression")."\n"; | ||
95 | $out .= "}"; | ||
96 | return $out; | ||
97 | } | ||
98 | } | ||
99 | function mb_str_split($string, $split_length = 1){ | ||
100 | mb_internal_encoding('UTF-8'); | ||
101 | mb_regex_encoding('UTF-8'); | ||
102 | |||
103 | $split_length = ($split_length <= 0) ? 1 : $split_length; | ||
104 | |||
105 | $mb_strlen = mb_strlen($string, 'utf-8'); | ||
106 | |||
107 | $array = array(); | ||
108 | |||
109 | for($i = 0; $i < $mb_strlen; $i += $split_length){ | ||
110 | $array[] = mb_substr($string, $i, $split_length); | ||
111 | } | ||
112 | |||
113 | return $array; | ||
114 | } | ||
115 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/Settings.php b/inc/3rdparty/libraries/MOBIClass/Settings.php new file mode 100644 index 00000000..ddcf2054 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/Settings.php | |||
@@ -0,0 +1,97 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of Settings | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class Settings { | ||
9 | /** | ||
10 | * Values of the settings | ||
11 | * @var array | ||
12 | */ | ||
13 | public $values; | ||
14 | |||
15 | /** | ||
16 | * Construct a Settings object with the default settings. If necessary, | ||
17 | * those settings can be extended with additional settings | ||
18 | * @param array $additionalSettings Additional settings to add (should | ||
19 | * be added with a key/value pair format. | ||
20 | */ | ||
21 | public function __construct($additionalSettings = array()) { | ||
22 | // Most values shouldn't be changed (the result will be an invalid file) | ||
23 | $this->values = array( | ||
24 | "attributes"=>0, | ||
25 | "version"=>0, | ||
26 | "creationTime"=>time()+94694400, | ||
27 | "modificationTime"=>time()+94694400, | ||
28 | "backupTime"=>0, | ||
29 | "modificationNumber"=>0, | ||
30 | "appInfoID"=>0, | ||
31 | "sortInfoID"=>0, | ||
32 | "prcType"=>"BOOK", | ||
33 | "creator"=>"MOBI", | ||
34 | "uniqueIDSeed"=>rand(), | ||
35 | "nextRecordListID"=>0, | ||
36 | "recordAttributes"=>0, | ||
37 | "compression"=>NO_COMPRESSION, | ||
38 | "recordSize"=>RECORD_SIZE, | ||
39 | "encryptionType"=>NO_ENCRYPTION, | ||
40 | "mobiIdentifier"=>"MOBI", | ||
41 | "mobiHeaderLength"=>0xe8, | ||
42 | "mobiType"=>MOBIPOCKET_BOOK, | ||
43 | "textEncoding"=>UTF8, | ||
44 | "uniqueID"=>rand(), | ||
45 | "fileVersion"=>6, | ||
46 | "locale"=>0x09, | ||
47 | "inputLanguage"=>0, | ||
48 | "outputLanguage"=>0, | ||
49 | "minimumVersion"=>6, | ||
50 | "huffmanRecordOffset"=>0, | ||
51 | "huffmanRecordCount"=>0, | ||
52 | "exthFlags"=>0x40, | ||
53 | "drmOffset"=>0xFFFFFFFF, | ||
54 | "drmCount"=>0, | ||
55 | "drmSize"=>0, | ||
56 | "drmFlags"=>0, | ||
57 | "extraDataFlags"=>0, | ||
58 | "exthIdentifier"=>"EXTH", | ||
59 | // These can be changed without any risk | ||
60 | "title"=>"Unknown title", | ||
61 | "author"=>"Unknown author", | ||
62 | "subject"=>"Unknown subject" | ||
63 | ); | ||
64 | |||
65 | foreach($additionalSettings as $key=>$value){ | ||
66 | $this->values[$key] = $value; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * Get a value from the settings | ||
72 | * @param string $key Key of the setting | ||
73 | * @return mixed The value of the setting | ||
74 | */ | ||
75 | public function get($key){ | ||
76 | return $this->values[$key]; | ||
77 | } | ||
78 | |||
79 | /** | ||
80 | * Checks if a value is set | ||
81 | * @param string $key Key of the setting | ||
82 | * @return bool True if the value exists | ||
83 | */ | ||
84 | public function exists($key){ | ||
85 | return isset($this->values[$key]); | ||
86 | } | ||
87 | |||
88 | public function __toString() { | ||
89 | $out = "Settings: {\n"; | ||
90 | foreach($this->values as $key=>$value){ | ||
91 | $out .= "\t".$key.": ".$value."\n"; | ||
92 | } | ||
93 | $out .= "}"; | ||
94 | return $out; | ||
95 | } | ||
96 | } | ||
97 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/constants.php b/inc/3rdparty/libraries/MOBIClass/constants.php new file mode 100644 index 00000000..bd363118 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/constants.php | |||
@@ -0,0 +1,11 @@ | |||
1 | <?php | ||
2 | define("NO_COMPRESSION", 1); | ||
3 | define("PALMDOC_COMPRESSION", 2); | ||
4 | define("HUFF", 17480); | ||
5 | define("RECORD_SIZE", 4096); | ||
6 | |||
7 | define("NO_ENCRYPTION", 0); | ||
8 | |||
9 | define("MOBIPOCKET_BOOK", 2); | ||
10 | define("CP1252", 1252); | ||
11 | define("UTF8", 65001); \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/downloaders/FanFictionNet.php b/inc/3rdparty/libraries/MOBIClass/downloaders/FanFictionNet.php new file mode 100644 index 00000000..65d5a466 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/downloaders/FanFictionNet.php | |||
@@ -0,0 +1,125 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Description of FanFictionNet | ||
5 | * | ||
6 | * @author Sander | ||
7 | */ | ||
8 | class FanFictionNet extends MultipleFileHandler { | ||
9 | private static $prefix = "http://www.fanfiction.net/s/"; | ||
10 | private $downloadedMetadata = false; | ||
11 | private $id = 0; | ||
12 | private $chapterCount = -1; | ||
13 | |||
14 | public function __construct($url) { | ||
15 | $ending = substr($url, strlen(self::$prefix)); | ||
16 | $this->id = intval(substr($ending, 0, strpos($ending, "/"))); | ||
17 | |||
18 | for($i = 1; $i <= max(1, $this->chapterCount); $i++){ | ||
19 | $this->addChapter($i); | ||
20 | } | ||
21 | } | ||
22 | |||
23 | private function addChapter($n){ | ||
24 | $doc = new DOMDocument(); | ||
25 | $file = Http::Request(self::$prefix.$this->id."/".$n."/"); | ||
26 | @$doc->loadHTML($file) or die($file); | ||
27 | |||
28 | if(!$this->downloadedMetadata){ | ||
29 | $this->loadMetadata($doc); | ||
30 | $this->downloadedMetadata = true; | ||
31 | } | ||
32 | if($this->chapterCount < 0){ | ||
33 | $this->chapterCount = $this->getNumberChapters($doc); | ||
34 | |||
35 | if($this->chapterCount > 4){ | ||
36 | die("Too many files to download, don't use php for this!"); | ||
37 | } | ||
38 | } | ||
39 | |||
40 | $textEl = $doc->getElementById("storytext"); | ||
41 | if($textEl == null) die("Error: ".$doc->saveHTML()); | ||
42 | $horizontalRulebars = $doc->getElementsByTagName('hr'); | ||
43 | /** | ||
44 | * @var DOMNode | ||
45 | */ | ||
46 | $hr; | ||
47 | foreach($horizontalRulebars as $hr) { | ||
48 | $hr->setAttribute("size", null); | ||
49 | $hr->setAttribute("noshade", null); | ||
50 | } | ||
51 | $text = $this->innerHtml($textEl); | ||
52 | |||
53 | $title = ""; | ||
54 | $selects = $doc->getElementsByTagName('select'); | ||
55 | foreach($selects as $select) { | ||
56 | if($select->hasAttribute("name") && $select->getAttribute("name") == "chapter"){ | ||
57 | $options = $select->getElementsByTagName("option"); | ||
58 | |||
59 | $test = $n.". "; | ||
60 | foreach($options as $option){ | ||
61 | $val = $option->nodeValue; | ||
62 | if(substr($val, 0, strlen($test)) == $test){ | ||
63 | $title = substr($val, strlen($test)); | ||
64 | break; | ||
65 | } | ||
66 | } | ||
67 | break; | ||
68 | } | ||
69 | } | ||
70 | $this->addPage($text, $title); | ||
71 | } | ||
72 | |||
73 | private function getNumberChapters($doc){ | ||
74 | $selects = $doc->getElementsByTagName('select'); | ||
75 | foreach($selects as $select) { | ||
76 | if($select->hasAttribute("name") && $select->getAttribute("name") == "chapter"){ | ||
77 | $options = $select->getElementsByTagName("option"); | ||
78 | |||
79 | $count = $options->length; | ||
80 | return $count; | ||
81 | } | ||
82 | } | ||
83 | } | ||
84 | |||
85 | private function loadMetadata($doc){ | ||
86 | //Author | ||
87 | $links = $doc->getElementsByTagName('a'); | ||
88 | foreach($links as $link) { | ||
89 | if($link == null){ | ||
90 | var_dump($link); | ||
91 | } | ||
92 | if($link->hasAttribute("href") && substr($link->getAttribute("href"), 0, 3) == "/u/"){ | ||
93 | $this->setMetadata("author", $link->nodeValue); | ||
94 | } | ||
95 | } | ||
96 | //Title | ||
97 | /* | ||
98 | $links = $doc->getElementsByTagName('link'); | ||
99 | foreach($links as $link) { | ||
100 | if($link->hasAttribute("rel") && $link->getAttribute("rel") == "canonical"){ | ||
101 | $url = $link->getAttribute("href"); | ||
102 | $title = str_replace("_", " ", substr($url, strrpos($url, "/")+1)); | ||
103 | $this->setMetadata("title", $title); | ||
104 | } | ||
105 | }*/ | ||
106 | |||
107 | //TODO: Find a more reliable way to extract the title | ||
108 | $title = $doc->getElementsByTagName("b")->item(0)->nodeValue; | ||
109 | $this->setMetadata("title", $title); | ||
110 | } | ||
111 | |||
112 | private function innerHtml($node){ | ||
113 | $doc = new DOMDocument(); | ||
114 | foreach ($node->childNodes as $child) | ||
115 | $doc->appendChild($doc->importNode($child, true)); | ||
116 | |||
117 | return $doc->saveHTML(); | ||
118 | } | ||
119 | |||
120 | public static function Matches($url){ | ||
121 | //TODO: Implement with regex | ||
122 | return strpos($url, self::$prefix) !== false; | ||
123 | } | ||
124 | } | ||
125 | ?> | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/http_build_url.php b/inc/3rdparty/libraries/MOBIClass/http_build_url.php new file mode 100644 index 00000000..b475edb0 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/http_build_url.php | |||
@@ -0,0 +1,94 @@ | |||
1 | <?php | ||
2 | if(!is_callable("http_build_url")){ | ||
3 | define('HTTP_URL_REPLACE', 1); // Replace every part of the first URL when there's one of the second URL | ||
4 | define('HTTP_URL_JOIN_PATH', 2); // Join relative paths | ||
5 | define('HTTP_URL_JOIN_QUERY', 4); // Join query strings | ||
6 | define('HTTP_URL_STRIP_USER', 8); // Strip any user authentication information | ||
7 | define('HTTP_URL_STRIP_PASS', 16); // Strip any password authentication information | ||
8 | define('HTTP_URL_STRIP_AUTH', 32); // Strip any authentication information | ||
9 | define('HTTP_URL_STRIP_PORT', 64); // Strip explicit port numbers | ||
10 | define('HTTP_URL_STRIP_PATH', 128); // Strip complete path | ||
11 | define('HTTP_URL_STRIP_QUERY', 256); // Strip query string | ||
12 | define('HTTP_URL_STRIP_FRAGMENT', 512); // Strip any fragments (#identifier) | ||
13 | define('HTTP_URL_STRIP_ALL', 1024); // Strip anything but scheme and host | ||
14 | |||
15 | // Build an URL | ||
16 | // The parts of the second URL will be merged into the first according to the flags argument. | ||
17 | // | ||
18 | // @param mixed (Part(s) of) an URL in form of a string or associative array like parse_url() returns | ||
19 | // @param mixed Same as the first argument | ||
20 | // @param int A bitmask of binary or'ed HTTP_URL constants (Optional)HTTP_URL_REPLACE is the default | ||
21 | // @param array If set, it will be filled with the parts of the composed url like parse_url() would return | ||
22 | function http_build_url($url, $parts = array (), $flags = HTTP_URL_REPLACE, &$new_url = false) { | ||
23 | $keys = array ( | ||
24 | 'user', | ||
25 | 'pass', | ||
26 | 'port', | ||
27 | 'path', | ||
28 | 'query', | ||
29 | 'fragment' | ||
30 | ); | ||
31 | |||
32 | // HTTP_URL_STRIP_ALL becomes all the HTTP_URL_STRIP_Xs | ||
33 | if ($flags & HTTP_URL_STRIP_ALL) { | ||
34 | $flags |= HTTP_URL_STRIP_USER; | ||
35 | $flags |= HTTP_URL_STRIP_PASS; | ||
36 | $flags |= HTTP_URL_STRIP_PORT; | ||
37 | $flags |= HTTP_URL_STRIP_PATH; | ||
38 | $flags |= HTTP_URL_STRIP_QUERY; | ||
39 | $flags |= HTTP_URL_STRIP_FRAGMENT; | ||
40 | } | ||
41 | // HTTP_URL_STRIP_AUTH becomes HTTP_URL_STRIP_USER and HTTP_URL_STRIP_PASS | ||
42 | else if ($flags & HTTP_URL_STRIP_AUTH) { | ||
43 | $flags |= HTTP_URL_STRIP_USER; | ||
44 | $flags |= HTTP_URL_STRIP_PASS; | ||
45 | } | ||
46 | |||
47 | // Parse the original URL | ||
48 | $parse_url = parse_url($url); | ||
49 | |||
50 | // Scheme and Host are always replaced | ||
51 | if (isset($parts['scheme'])) | ||
52 | $parse_url['scheme'] = $parts['scheme']; | ||
53 | |||
54 | if (isset($parts['host'])) | ||
55 | $parse_url['host'] = $parts['host']; | ||
56 | |||
57 | // (If applicable) Replace the original URL with it's new parts | ||
58 | if ($flags & HTTP_URL_REPLACE) { | ||
59 | foreach ($keys as $key) { | ||
60 | if (isset($parts[$key])) | ||
61 | $parse_url[$key] = $parts[$key]; | ||
62 | } | ||
63 | } else { | ||
64 | // Join the original URL path with the new path | ||
65 | if (isset($parts['path']) && ($flags & HTTP_URL_JOIN_PATH)) { | ||
66 | if (isset($parse_url['path'])) | ||
67 | $parse_url['path'] = rtrim(str_replace(basename($parse_url['path']), '', $parse_url['path']), '/') . '/' . ltrim($parts['path'], '/'); | ||
68 | else | ||
69 | $parse_url['path'] = $parts['path']; | ||
70 | } | ||
71 | |||
72 | // Join the original query string with the new query string | ||
73 | if (isset($parts['query']) && ($flags & HTTP_URL_JOIN_QUERY)) { | ||
74 | if (isset($parse_url['query'])) | ||
75 | $parse_url['query'] .= '&' . $parts['query']; | ||
76 | else | ||
77 | $parse_url['query'] = $parts['query']; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | // Strips all the applicable sections of the URL | ||
82 | // Note: Scheme and Host are never stripped | ||
83 | foreach ($keys as $key) { | ||
84 | if ($flags & (int)constant('HTTP_URL_STRIP_' . strtoupper($key))) | ||
85 | unset($parse_url[$key]); | ||
86 | } | ||
87 | |||
88 | $new_url = $parse_url; | ||
89 | |||
90 | return ((isset($parse_url['scheme'])) ? $parse_url['scheme'] . '://' : '') . ((isset($parse_url['user'])) ? $parse_url['user'] . ((isset($parse_url['pass'])) ? ':' . $parse_url['pass'] : '') . '@' : '') | ||
91 | . ((isset($parse_url['host'])) ? $parse_url['host'] : '') . ((isset($parse_url['port'])) ? ':' . $parse_url['port'] : '') . ((isset($parse_url['path'])) ? $parse_url['path'] : '') | ||
92 | . ((isset($parse_url['query'])) ? '?' . $parse_url['query'] : '') . ((isset($parse_url['fragment'])) ? '#' . $parse_url['fragment'] : ''); | ||
93 | } | ||
94 | } | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/readability/JSLikeHTMLElement.php b/inc/3rdparty/libraries/MOBIClass/readability/JSLikeHTMLElement.php new file mode 100644 index 00000000..1a8ec88c --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/readability/JSLikeHTMLElement.php | |||
@@ -0,0 +1,110 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * JavaScript-like HTML DOM Element | ||
4 | * | ||
5 | * This class extends PHP's DOMElement to allow | ||
6 | * users to get and set the innerHTML property of | ||
7 | * HTML elements in the same way it's done in | ||
8 | * JavaScript. | ||
9 | * | ||
10 | * Example usage: | ||
11 | * @code | ||
12 | * require_once 'JSLikeHTMLElement.php'; | ||
13 | * header('Content-Type: text/plain'); | ||
14 | * $doc = new DOMDocument(); | ||
15 | * $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); | ||
16 | * $doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>'); | ||
17 | * $elem = $doc->getElementsByTagName('div')->item(0); | ||
18 | * | ||
19 | * // print innerHTML | ||
20 | * echo $elem->innerHTML; // prints '<p>Para 1</p><p>Para 2</p>' | ||
21 | * echo "\n\n"; | ||
22 | * | ||
23 | * // set innerHTML | ||
24 | * $elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>'; | ||
25 | * echo $elem->innerHTML; // prints '<a href="http://fivefilters.org">FiveFilters.org</a>' | ||
26 | * echo "\n\n"; | ||
27 | * | ||
28 | * // print document (with our changes) | ||
29 | * echo $doc->saveXML(); | ||
30 | * @endcode | ||
31 | * | ||
32 | * @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net | ||
33 | * @see http://fivefilters.org (the project this was written for) | ||
34 | */ | ||
35 | class JSLikeHTMLElement extends DOMElement | ||
36 | { | ||
37 | /** | ||
38 | * Used for setting innerHTML like it's done in JavaScript: | ||
39 | * @code | ||
40 | * $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>'; | ||
41 | * @endcode | ||
42 | */ | ||
43 | public function __set($name, $value) { | ||
44 | if ($name == 'innerHTML') { | ||
45 | // first, empty the element | ||
46 | for ($x=$this->childNodes->length-1; $x>=0; $x--) { | ||
47 | $this->removeChild($this->childNodes->item($x)); | ||
48 | } | ||
49 | // $value holds our new inner HTML | ||
50 | if ($value != '') { | ||
51 | $f = $this->ownerDocument->createDocumentFragment(); | ||
52 | // appendXML() expects well-formed markup (XHTML) | ||
53 | $result = @$f->appendXML($value); // @ to suppress PHP warnings | ||
54 | if ($result) { | ||
55 | if ($f->hasChildNodes()) $this->appendChild($f); | ||
56 | } else { | ||
57 | // $value is probably ill-formed | ||
58 | $f = new DOMDocument(); | ||
59 | $value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8'); | ||
60 | // Using <htmlfragment> will generate a warning, but so will bad HTML | ||
61 | // (and by this point, bad HTML is what we've got). | ||
62 | // We use it (and suppress the warning) because an HTML fragment will | ||
63 | // be wrapped around <html><body> tags which we don't really want to keep. | ||
64 | // Note: despite the warning, if loadHTML succeeds it will return true. | ||
65 | $result = @$f->loadHTML('<htmlfragment>'.$value.'</htmlfragment>'); | ||
66 | if ($result) { | ||
67 | $import = $f->getElementsByTagName('htmlfragment')->item(0); | ||
68 | foreach ($import->childNodes as $child) { | ||
69 | $importedNode = $this->ownerDocument->importNode($child, true); | ||
70 | $this->appendChild($importedNode); | ||
71 | } | ||
72 | } else { | ||
73 | // oh well, we tried, we really did. :( | ||
74 | // this element is now empty | ||
75 | } | ||
76 | } | ||
77 | } | ||
78 | } else { | ||
79 | $trace = debug_backtrace(); | ||
80 | trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); | ||
81 | } | ||
82 | } | ||
83 | |||
84 | /** | ||
85 | * Used for getting innerHTML like it's done in JavaScript: | ||
86 | * @code | ||
87 | * $string = $div->innerHTML; | ||
88 | * @endcode | ||
89 | */ | ||
90 | public function __get($name) | ||
91 | { | ||
92 | if ($name == 'innerHTML') { | ||
93 | $inner = ''; | ||
94 | foreach ($this->childNodes as $child) { | ||
95 | $inner .= $this->ownerDocument->saveXML($child); | ||
96 | } | ||
97 | return $inner; | ||
98 | } | ||
99 | |||
100 | $trace = debug_backtrace(); | ||
101 | trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); | ||
102 | return null; | ||
103 | } | ||
104 | |||
105 | public function __toString() | ||
106 | { | ||
107 | return '['.$this->tagName.']'; | ||
108 | } | ||
109 | } | ||
110 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/MOBIClass/readability/Readability.php b/inc/3rdparty/libraries/MOBIClass/readability/Readability.php new file mode 100644 index 00000000..91554243 --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/readability/Readability.php | |||
@@ -0,0 +1,1069 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Arc90's Readability ported to PHP for FiveFilters.org | ||
4 | * Based on readability.js version 1.7.1 (without multi-page support) | ||
5 | * ------------------------------------------------------ | ||
6 | * Original URL: http://lab.arc90.com/experiments/readability/js/readability.js | ||
7 | * Arc90's project URL: http://lab.arc90.com/experiments/readability/ | ||
8 | * JS Source: http://code.google.com/p/arc90labs-readability | ||
9 | * Ported by: Keyvan Minoukadeh, http://www.keyvan.net | ||
10 | * More information: http://fivefilters.org/content-only/ | ||
11 | * License: Apache License, Version 2.0 | ||
12 | * Requires: PHP5 | ||
13 | * Date: 2010-10-29 | ||
14 | * | ||
15 | * Differences between the PHP port and the original | ||
16 | * ------------------------------------------------------ | ||
17 | * Arc90's Readability is designed to run in the browser. It works on the DOM | ||
18 | * tree (the parsed HTML) after the page's CSS styles have been applied and | ||
19 | * Javascript code executed. This PHP port does not run inside a browser. | ||
20 | * We use PHP's ability to parse HTML to build our DOM tree, but we cannot | ||
21 | * rely on CSS or Javascript support. As such, the results will not always | ||
22 | * match Arc90's Readability. (For example, if a web page contains CSS style | ||
23 | * rules or Javascript code which hide certain HTML elements from display, | ||
24 | * Arc90's Readability will dismiss those from consideration but our PHP port, | ||
25 | * unable to understand CSS or Javascript, will not know any better.) | ||
26 | * | ||
27 | * Another significant difference is that the aim of Arc90's Readability is | ||
28 | * to re-present the main content block of a given web page so users can | ||
29 | * read it more easily in their browsers. Correct identification, clean up, | ||
30 | * and separation of the content block is only a part of this process. | ||
31 | * This PHP port is only concerned with this part, it does not include code | ||
32 | * that relates to presentation in the browser - Arc90 already do | ||
33 | * that extremely well, and for PDF output there's FiveFilters.org's | ||
34 | * PDF Newspaper: http://fivefilters.org/pdf-newspaper/. | ||
35 | * | ||
36 | * Finally, this class contains methods that might be useful for developers | ||
37 | * working on HTML document fragments. So without deviating too much from | ||
38 | * the original code (which I don't want to do because it makes debugging | ||
39 | * and updating more difficult), I've tried to make it a little more | ||
40 | * developer friendly. You should be able to use the methods here on | ||
41 | * existing DOMElement objects without passing an entire HTML document to | ||
42 | * be parsed. | ||
43 | */ | ||
44 | |||
45 | // This class allows us to do JavaScript like assignements to innerHTML | ||
46 | require_once(dirname(__FILE__).'/JSLikeHTMLElement.php'); | ||
47 | |||
48 | // Alternative usage (for testing only!) | ||
49 | // uncomment the lins below and call Readability.php in your browser | ||
50 | // passing it the URL of the page you'd like content from, e.g.: | ||
51 | // Readability.php?url=http://medialens.org/alerts/09/090615_the_guardian_climate.php | ||
52 | |||
53 | /* | ||
54 | if (!isset($_GET['url']) || $_GET['url'] == '') { | ||
55 | die('Please pass a URL to the script. E.g. Readability.php?url=bla.com/story.html'); | ||
56 | } | ||
57 | $url = $_GET['url']; | ||
58 | if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url; | ||
59 | $html = file_get_contents($url); | ||
60 | $r = new Readability($html, $url); | ||
61 | $r->init(); | ||
62 | echo $r->articleContent->innerHTML; | ||
63 | */ | ||
64 | |||
65 | class Readability | ||
66 | { | ||
67 | public $version = '1.7.1-without-multi-page'; | ||
68 | public $convertLinksToFootnotes = false; | ||
69 | public $revertForcedParagraphElements = true; | ||
70 | public $articleTitle; | ||
71 | public $articleContent; | ||
72 | public $dom; | ||
73 | public $url = null; // optional - URL where HTML was retrieved | ||
74 | public $debug = false; | ||
75 | protected $body = null; // | ||
76 | protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later | ||
77 | protected $flags = 7; // 1 | 2 | 4; // Start with all flags set. | ||
78 | protected $success = false; // indicates whether we were able to extract or not | ||
79 | |||
80 | /** | ||
81 | * All of the regular expressions in use within readability. | ||
82 | * Defined up here so we don't instantiate them repeatedly in loops. | ||
83 | **/ | ||
84 | public $regexps = array( | ||
85 | 'unlikelyCandidates' => '/combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i', | ||
86 | 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', | ||
87 | 'positive' => '/article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i', | ||
88 | 'negative' => '/combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i', | ||
89 | 'divToPElements' => '/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i', | ||
90 | 'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i', | ||
91 | 'replaceFonts' => '/<(\/?)font[^>]*>/i', | ||
92 | // 'trimRe' => '/^\s+|\s+$/g', // PHP has trim() | ||
93 | 'normalize' => '/\s{2,}/', | ||
94 | 'killBreaks' => '/(<br\s*\/?>(\s| ?)*){1,}/', | ||
95 | 'video' => '/http:\/\/(www\.)?(youtube|vimeo)\.com/i', | ||
96 | 'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i' | ||
97 | ); | ||
98 | |||
99 | /* constants */ | ||
100 | const FLAG_STRIP_UNLIKELYS = 1; | ||
101 | const FLAG_WEIGHT_CLASSES = 2; | ||
102 | const FLAG_CLEAN_CONDITIONALLY = 4; | ||
103 | |||
104 | /** | ||
105 | * Create instance of Readability | ||
106 | * @param string UTF-8 encoded string | ||
107 | * @param string (optional) URL associated with HTML (used for footnotes) | ||
108 | */ | ||
109 | function __construct($html, $url=null) | ||
110 | { | ||
111 | /* Turn all double br's into p's */ | ||
112 | /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */ | ||
113 | $html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html); | ||
114 | $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html); | ||
115 | $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); | ||
116 | $this->dom = new DOMDocument(); | ||
117 | $this->dom->preserveWhiteSpace = false; | ||
118 | $this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); | ||
119 | @$this->dom->loadHTML($html); | ||
120 | $this->url = $url; | ||
121 | } | ||
122 | |||
123 | /** | ||
124 | * Get article title element | ||
125 | * @return DOMElement | ||
126 | */ | ||
127 | public function getTitle() { | ||
128 | return $this->articleTitle; | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * Get article content element | ||
133 | * @return DOMElement | ||
134 | */ | ||
135 | public function getContent() { | ||
136 | return $this->articleContent; | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * Runs readability. | ||
141 | * | ||
142 | * Workflow: | ||
143 | * 1. Prep the document by removing script tags, css, etc. | ||
144 | * 2. Build readability's DOM tree. | ||
145 | * 3. Grab the article content from the current dom tree. | ||
146 | * 4. Replace the current DOM tree with the new one. | ||
147 | * 5. Read peacefully. | ||
148 | * | ||
149 | * @return boolean true if we found content, false otherwise | ||
150 | **/ | ||
151 | public function init() | ||
152 | { | ||
153 | $this->removeScripts($this->dom); | ||
154 | |||
155 | // Assume successful outcome | ||
156 | $this->success = true; | ||
157 | |||
158 | $bodyElems = $this->dom->getElementsByTagName('body'); | ||
159 | if ($bodyElems->length > 0) { | ||
160 | if ($this->bodyCache == null) { | ||
161 | $this->bodyCache = $bodyElems->item(0)->innerHTML; | ||
162 | } | ||
163 | if ($this->body == null) { | ||
164 | $this->body = $bodyElems->item(0); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | $this->prepDocument(); | ||
169 | |||
170 | //die($this->dom->documentElement->parentNode->nodeType); | ||
171 | //$this->setInnerHTML($this->dom->documentElement, $this->getInnerHTML($this->dom->documentElement)); | ||
172 | //die($this->getInnerHTML($this->dom->documentElement)); | ||
173 | |||
174 | /* Build readability's DOM tree */ | ||
175 | $overlay = $this->dom->createElement('div'); | ||
176 | $innerDiv = $this->dom->createElement('div'); | ||
177 | $articleTitle = $this->getArticleTitle(); | ||
178 | $articleContent = $this->grabArticle(); | ||
179 | |||
180 | if (!$articleContent) { | ||
181 | $this->success = false; | ||
182 | $articleContent = $this->dom->createElement('div'); | ||
183 | $articleContent->setAttribute('id', 'readability-content'); | ||
184 | $articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>'; | ||
185 | } | ||
186 | |||
187 | $overlay->setAttribute('id', 'readOverlay'); | ||
188 | $innerDiv->setAttribute('id', 'readInner'); | ||
189 | |||
190 | /* Glue the structure of our document together. */ | ||
191 | $innerDiv->appendChild($articleTitle); | ||
192 | $innerDiv->appendChild($articleContent); | ||
193 | $overlay->appendChild($innerDiv); | ||
194 | |||
195 | /* Clear the old HTML, insert the new content. */ | ||
196 | $this->body->innerHTML = ''; | ||
197 | $this->body->appendChild($overlay); | ||
198 | //document.body.insertBefore(overlay, document.body.firstChild); | ||
199 | $this->body->removeAttribute('style'); | ||
200 | |||
201 | $this->postProcessContent($articleContent); | ||
202 | |||
203 | // Set title and content instance variables | ||
204 | $this->articleTitle = $articleTitle; | ||
205 | $this->articleContent = $articleContent; | ||
206 | |||
207 | return $this->success; | ||
208 | } | ||
209 | |||
210 | /** | ||
211 | * Debug | ||
212 | */ | ||
213 | protected function dbg($msg) { | ||
214 | if ($this->debug) echo '* ',$msg, '<br />', "\n"; | ||
215 | } | ||
216 | |||
217 | /** | ||
218 | * Run any post-process modifications to article content as necessary. | ||
219 | * | ||
220 | * @param DOMElement | ||
221 | * @return void | ||
222 | */ | ||
223 | public function postProcessContent($articleContent) { | ||
224 | if ($this->convertLinksToFootnotes && !preg_match('/wikipedia\.org/', @$this->url)) { | ||
225 | $this->addFootnotes($articleContent); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | /** | ||
230 | * Get the article title as an H1. | ||
231 | * | ||
232 | * @return DOMElement | ||
233 | */ | ||
234 | protected function getArticleTitle() { | ||
235 | $curTitle = ''; | ||
236 | $origTitle = ''; | ||
237 | |||
238 | try { | ||
239 | $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); | ||
240 | } catch(Exception $e) {} | ||
241 | |||
242 | if (preg_match('/ [\|\-] /', $curTitle)) | ||
243 | { | ||
244 | $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); | ||
245 | |||
246 | if (count(explode(' ', $curTitle)) < 3) { | ||
247 | $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); | ||
248 | } | ||
249 | } | ||
250 | else if (strpos($curTitle, ': ') !== false) | ||
251 | { | ||
252 | $curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle); | ||
253 | |||
254 | if (count(explode(' ', $curTitle)) < 3) { | ||
255 | $curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle); | ||
256 | } | ||
257 | } | ||
258 | else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) | ||
259 | { | ||
260 | $hOnes = $this->dom->getElementsByTagName('h1'); | ||
261 | if($hOnes->length == 1) | ||
262 | { | ||
263 | $curTitle = $this->getInnerText($hOnes->item(0)); | ||
264 | } | ||
265 | } | ||
266 | |||
267 | $curTitle = trim($curTitle); | ||
268 | |||
269 | if (count(explode(' ', $curTitle)) <= 4) { | ||
270 | $curTitle = $origTitle; | ||
271 | } | ||
272 | |||
273 | $articleTitle = $this->dom->createElement('h1'); | ||
274 | $articleTitle->innerHTML = $curTitle; | ||
275 | |||
276 | return $articleTitle; | ||
277 | } | ||
278 | |||
279 | /** | ||
280 | * Prepare the HTML document for readability to scrape it. | ||
281 | * This includes things like stripping javascript, CSS, and handling terrible markup. | ||
282 | * | ||
283 | * @return void | ||
284 | **/ | ||
285 | protected function prepDocument() { | ||
286 | /** | ||
287 | * In some cases a body element can't be found (if the HTML is totally hosed for example) | ||
288 | * so we create a new body node and append it to the document. | ||
289 | */ | ||
290 | if($this->dom->documentElement == null){ | ||
291 | throw new Exception("No document element"); | ||
292 | } | ||
293 | if ($this->body == null) | ||
294 | { | ||
295 | $this->body = $this->dom->createElement('body'); | ||
296 | $this->dom->documentElement->appendChild($this->body); | ||
297 | } | ||
298 | |||
299 | $this->body->setAttribute('id', 'readabilityBody'); | ||
300 | |||
301 | /* Remove all style tags in head */ | ||
302 | $styleTags = $this->dom->getElementsByTagName('style'); | ||
303 | for ($i = $styleTags->length-1; $i >= 0; $i--) | ||
304 | { | ||
305 | $styleTags->item($i)->parentNode->removeChild($styleTags->item($i)); | ||
306 | } | ||
307 | |||
308 | /* Turn all double br's into p's */ | ||
309 | /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */ | ||
310 | //document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '</p><p>').replace(readability.regexps.replaceFonts, '<$1span>'); | ||
311 | // We do this in the constructor for PHP as that's when we have raw HTML - before parsing it into a DOM tree. | ||
312 | // Manipulating innerHTML as it's done in JS is not possible in PHP. | ||
313 | } | ||
314 | |||
315 | /** | ||
316 | * For easier reading, convert this document to have footnotes at the bottom rather than inline links. | ||
317 | * @see http://www.roughtype.com/archives/2010/05/experiments_in.php | ||
318 | * | ||
319 | * @return void | ||
320 | **/ | ||
321 | public function addFootnotes($articleContent) { | ||
322 | $footnotesWrapper = $this->dom->createElement('div'); | ||
323 | $footnotesWrapper->setAttribute('id', 'readability-footnotes'); | ||
324 | $footnotesWrapper->innerHTML = '<h3>References</h3>'; | ||
325 | |||
326 | $articleFootnotes = $this->dom->createElement('ol'); | ||
327 | $articleFootnotes->setAttribute('id', 'readability-footnotes-list'); | ||
328 | $footnotesWrapper->appendChild($articleFootnotes); | ||
329 | |||
330 | $articleLinks = $articleContent->getElementsByTagName('a'); | ||
331 | |||
332 | $linkCount = 0; | ||
333 | for ($i = 0; $i < $articleLinks->length; $i++) | ||
334 | { | ||
335 | $articleLink = $articleLinks->item($i); | ||
336 | $footnoteLink = $articleLink->cloneNode(true); | ||
337 | $refLink = $this->dom->createElement('a'); | ||
338 | $footnote = $this->dom->createElement('li'); | ||
339 | $linkDomain = @parse_url($footnoteLink->getAttribute('href'), PHP_URL_HOST); | ||
340 | if (!$linkDomain && isset($this->url)) $linkDomain = @parse_url($this->url, PHP_URL_HOST); | ||
341 | //linkDomain = footnoteLink.host ? footnoteLink.host : document.location.host, | ||
342 | $linkText = $this->getInnerText($articleLink); | ||
343 | |||
344 | if ((strpos($articleLink->getAttribute('class'), 'readability-DoNotFootnote') !== false) || preg_match($this->regexps['skipFootnoteLink'], $linkText)) { | ||
345 | continue; | ||
346 | } | ||
347 | |||
348 | $linkCount++; | ||
349 | |||
350 | /** Add a superscript reference after the article link */ | ||
351 | $refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount); | ||
352 | $refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>'; | ||
353 | $refLink->setAttribute('class', 'readability-DoNotFootnote'); | ||
354 | $refLink->setAttribute('style', 'color: inherit;'); | ||
355 | |||
356 | //TODO: does this work or should we use DOMNode.isSameNode()? | ||
357 | if ($articleLink->parentNode->lastChild == $articleLink) { | ||
358 | $articleLink->parentNode->appendChild($refLink); | ||
359 | } else { | ||
360 | $articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling); | ||
361 | } | ||
362 | |||
363 | $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;'); | ||
364 | $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount); | ||
365 | |||
366 | $footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> '; | ||
367 | |||
368 | $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText); | ||
369 | $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount); | ||
370 | |||
371 | $footnote->appendChild($footnoteLink); | ||
372 | if ($linkDomain) $footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>'; | ||
373 | |||
374 | $articleFootnotes->appendChild($footnote); | ||
375 | } | ||
376 | |||
377 | if ($linkCount > 0) { | ||
378 | $articleContent->appendChild($footnotesWrapper); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | /** | ||
383 | * Reverts P elements with class 'readability-styled' | ||
384 | * to text nodes - which is what they were before. | ||
385 | * | ||
386 | * @param DOMElement | ||
387 | * @return void | ||
388 | */ | ||
389 | function revertReadabilityStyledElements($articleContent) { | ||
390 | $xpath = new DOMXPath($articleContent->ownerDocument); | ||
391 | $elems = $xpath->query('.//p[@class="readability-styled"]', $articleContent); | ||
392 | //$elems = $articleContent->getElementsByTagName('p'); | ||
393 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
394 | $e = $elems->item($i); | ||
395 | $e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e); | ||
396 | //if ($e->hasAttribute('class') && $e->getAttribute('class') == 'readability-styled') { | ||
397 | // $e->parentNode->replaceChild($this->dom->createTextNode($e->textContent), $e); | ||
398 | //} | ||
399 | } | ||
400 | } | ||
401 | |||
402 | /** | ||
403 | * Prepare the article node for display. Clean out any inline styles, | ||
404 | * iframes, forms, strip extraneous <p> tags, etc. | ||
405 | * | ||
406 | * @param DOMElement | ||
407 | * @return void | ||
408 | */ | ||
409 | function prepArticle($articleContent) { | ||
410 | $this->cleanStyles($articleContent); | ||
411 | $this->killBreaks($articleContent); | ||
412 | if ($this->revertForcedParagraphElements) { | ||
413 | $this->revertReadabilityStyledElements($articleContent); | ||
414 | } | ||
415 | |||
416 | /* Clean out junk from the article content */ | ||
417 | $this->cleanConditionally($articleContent, 'form'); | ||
418 | $this->clean($articleContent, 'object'); | ||
419 | $this->clean($articleContent, 'h1'); | ||
420 | |||
421 | /** | ||
422 | * If there is only one h2, they are probably using it | ||
423 | * as a header and not a subheader, so remove it since we already have a header. | ||
424 | ***/ | ||
425 | if ($articleContent->getElementsByTagName('h2')->length == 1) { | ||
426 | $this->clean($articleContent, 'h2'); | ||
427 | } | ||
428 | $this->clean($articleContent, 'iframe'); | ||
429 | |||
430 | $this->cleanHeaders($articleContent); | ||
431 | |||
432 | /* Do these last as the previous stuff may have removed junk that will affect these */ | ||
433 | $this->cleanConditionally($articleContent, 'table'); | ||
434 | $this->cleanConditionally($articleContent, 'ul'); | ||
435 | $this->cleanConditionally($articleContent, 'div'); | ||
436 | |||
437 | /* Remove extra paragraphs */ | ||
438 | $articleParagraphs = $articleContent->getElementsByTagName('p'); | ||
439 | for ($i = $articleParagraphs->length-1; $i >= 0; $i--) | ||
440 | { | ||
441 | $imgCount = $articleParagraphs->item($i)->getElementsByTagName('img')->length; | ||
442 | $embedCount = $articleParagraphs->item($i)->getElementsByTagName('embed')->length; | ||
443 | $objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length; | ||
444 | |||
445 | if ($imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $this->getInnerText($articleParagraphs->item($i), false) == '') | ||
446 | { | ||
447 | $articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i)); | ||
448 | } | ||
449 | } | ||
450 | |||
451 | try { | ||
452 | $articleContent->innerHTML = preg_replace('/<br[^>]*>\s*<p/i', '<p', $articleContent->innerHTML); | ||
453 | //articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p'); | ||
454 | } | ||
455 | catch (Exception $e) { | ||
456 | $this->dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " . $e); | ||
457 | } | ||
458 | } | ||
459 | |||
460 | /** | ||
461 | * Initialize a node with the readability object. Also checks the | ||
462 | * className/id for special names to add to its score. | ||
463 | * | ||
464 | * @param Element | ||
465 | * @return void | ||
466 | **/ | ||
467 | protected function initializeNode($node) { | ||
468 | $readability = $this->dom->createAttribute('readability'); | ||
469 | $readability->value = 0; // this is our contentScore | ||
470 | $node->setAttributeNode($readability); | ||
471 | |||
472 | switch (strtoupper($node->tagName)) { // unsure if strtoupper is needed, but using it just in case | ||
473 | case 'DIV': | ||
474 | $readability->value += 5; | ||
475 | break; | ||
476 | |||
477 | case 'PRE': | ||
478 | case 'TD': | ||
479 | case 'BLOCKQUOTE': | ||
480 | $readability->value += 3; | ||
481 | break; | ||
482 | |||
483 | case 'ADDRESS': | ||
484 | case 'OL': | ||
485 | case 'UL': | ||
486 | case 'DL': | ||
487 | case 'DD': | ||
488 | case 'DT': | ||
489 | case 'LI': | ||
490 | case 'FORM': | ||
491 | $readability->value -= 3; | ||
492 | break; | ||
493 | |||
494 | case 'H1': | ||
495 | case 'H2': | ||
496 | case 'H3': | ||
497 | case 'H4': | ||
498 | case 'H5': | ||
499 | case 'H6': | ||
500 | case 'TH': | ||
501 | $readability->value -= 5; | ||
502 | break; | ||
503 | } | ||
504 | $readability->value += $this->getClassWeight($node); | ||
505 | } | ||
506 | |||
507 | /*** | ||
508 | * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is | ||
509 | * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. | ||
510 | * | ||
511 | * @return DOMElement | ||
512 | **/ | ||
513 | protected function grabArticle($page=null) { | ||
514 | $stripUnlikelyCandidates = $this->flagIsActive(self::FLAG_STRIP_UNLIKELYS); | ||
515 | if (!$page) $page = $this->dom; | ||
516 | $allElements = $page->getElementsByTagName('*'); | ||
517 | /** | ||
518 | * First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs | ||
519 | * into P tags where they have been used inappropriately (as in, where they contain no other block level elements.) | ||
520 | * | ||
521 | * Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 | ||
522 | * TODO: Shouldn't this be a reverse traversal? | ||
523 | **/ | ||
524 | $node = null; | ||
525 | $nodesToScore = array(); | ||
526 | for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); $nodeIndex++) { | ||
527 | //for ($nodeIndex=$targetList->length-1; $nodeIndex >= 0; $nodeIndex--) { | ||
528 | //$node = $targetList->item($nodeIndex); | ||
529 | $tagName = strtoupper($node->tagName); | ||
530 | /* Remove unlikely candidates */ | ||
531 | if ($stripUnlikelyCandidates) { | ||
532 | $unlikelyMatchString = $node->getAttribute('class') . $node->getAttribute('id'); | ||
533 | if ( | ||
534 | preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) && | ||
535 | !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) && | ||
536 | $tagName != 'BODY' | ||
537 | ) | ||
538 | { | ||
539 | $this->dbg('Removing unlikely candidate - ' . $unlikelyMatchString); | ||
540 | //$nodesToRemove[] = $node; | ||
541 | $node->parentNode->removeChild($node); | ||
542 | $nodeIndex--; | ||
543 | continue; | ||
544 | } | ||
545 | } | ||
546 | |||
547 | if ($tagName == 'P' || $tagName == 'TD' || $tagName == 'PRE') { | ||
548 | $nodesToScore[] = $node; | ||
549 | } | ||
550 | |||
551 | /* Turn all divs that don't have children block level elements into p's */ | ||
552 | if ($tagName == 'DIV') { | ||
553 | if (!preg_match($this->regexps['divToPElements'], $node->innerHTML)) { | ||
554 | //$this->dbg('Altering div to p'); | ||
555 | $newNode = $this->dom->createElement('p'); | ||
556 | try { | ||
557 | $newNode->innerHTML = $node->innerHTML; | ||
558 | //$nodesToReplace[] = array('new'=>$newNode, 'old'=>$node); | ||
559 | $node->parentNode->replaceChild($newNode, $node); | ||
560 | $nodeIndex--; | ||
561 | $nodesToScore[] = $node; // or $newNode? | ||
562 | } | ||
563 | catch(Exception $e) { | ||
564 | $this->dbg('Could not alter div to p, reverting back to div.: ' . $e); | ||
565 | } | ||
566 | } | ||
567 | else | ||
568 | { | ||
569 | /* EXPERIMENTAL */ | ||
570 | // TODO: change these p elements back to text nodes after processing | ||
571 | for ($i = 0, $il = $node->childNodes->length; $i < $il; $i++) { | ||
572 | $childNode = $node->childNodes->item($i); | ||
573 | if ($childNode->nodeType == 3) { // XML_TEXT_NODE | ||
574 | //$this->dbg('replacing text node with a p tag with the same content.'); | ||
575 | $p = $this->dom->createElement('p'); | ||
576 | $p->innerHTML = $childNode->nodeValue; | ||
577 | $p->setAttribute('style', 'display: inline;'); | ||
578 | $p->setAttribute('class', 'readability-styled'); | ||
579 | $childNode->parentNode->replaceChild($p, $childNode); | ||
580 | } | ||
581 | } | ||
582 | } | ||
583 | } | ||
584 | } | ||
585 | |||
586 | /** | ||
587 | * Loop through all paragraphs, and assign a score to them based on how content-y they look. | ||
588 | * Then add their score to their parent node. | ||
589 | * | ||
590 | * A score is determined by things like number of commas, class names, etc. Maybe eventually link density. | ||
591 | **/ | ||
592 | $candidates = array(); | ||
593 | for ($pt=0; $pt < count($nodesToScore); $pt++) { | ||
594 | $parentNode = $nodesToScore[$pt]->parentNode; | ||
595 | // $grandParentNode = $parentNode ? $parentNode->parentNode : null; | ||
596 | $grandParentNode = !$parentNode ? null : (($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null); | ||
597 | $innerText = $this->getInnerText($nodesToScore[$pt]); | ||
598 | |||
599 | if (!$parentNode || !isset($parentNode->tagName)) { | ||
600 | continue; | ||
601 | } | ||
602 | |||
603 | /* If this paragraph is less than 25 characters, don't even count it. */ | ||
604 | if(strlen($innerText) < 25) { | ||
605 | continue; | ||
606 | } | ||
607 | |||
608 | /* Initialize readability data for the parent. */ | ||
609 | if (!$parentNode->hasAttribute('readability')) | ||
610 | { | ||
611 | $this->initializeNode($parentNode); | ||
612 | $candidates[] = $parentNode; | ||
613 | } | ||
614 | |||
615 | /* Initialize readability data for the grandparent. */ | ||
616 | if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName)) | ||
617 | { | ||
618 | $this->initializeNode($grandParentNode); | ||
619 | $candidates[] = $grandParentNode; | ||
620 | } | ||
621 | |||
622 | $contentScore = 0; | ||
623 | |||
624 | /* Add a point for the paragraph itself as a base. */ | ||
625 | $contentScore++; | ||
626 | |||
627 | /* Add points for any commas within this paragraph */ | ||
628 | $contentScore += count(explode(',', $innerText)); | ||
629 | |||
630 | /* For every 100 characters in this paragraph, add another point. Up to 3 points. */ | ||
631 | $contentScore += min(floor(strlen($innerText) / 100), 3); | ||
632 | |||
633 | /* Add the score to the parent. The grandparent gets half. */ | ||
634 | $parentNode->getAttributeNode('readability')->value += $contentScore; | ||
635 | |||
636 | if ($grandParentNode) { | ||
637 | $grandParentNode->getAttributeNode('readability')->value += $contentScore/2; | ||
638 | } | ||
639 | } | ||
640 | |||
641 | /** | ||
642 | * After we've calculated scores, loop through all of the possible candidate nodes we found | ||
643 | * and find the one with the highest score. | ||
644 | **/ | ||
645 | $topCandidate = null; | ||
646 | for ($c=0, $cl=count($candidates); $c < $cl; $c++) | ||
647 | { | ||
648 | /** | ||
649 | * Scale the final candidates score based on link density. Good content should have a | ||
650 | * relatively small link density (5% or less) and be mostly unaffected by this operation. | ||
651 | **/ | ||
652 | $readability = $candidates[$c]->getAttributeNode('readability'); | ||
653 | $readability->value = $readability->value * (1-$this->getLinkDensity($candidates[$c])); | ||
654 | |||
655 | $this->dbg('Candidate: ' . $candidates[$c]->tagName . ' (' . $candidates[$c]->getAttribute('class') . ':' . $candidates[$c]->getAttribute('id') . ') with score ' . $readability->value); | ||
656 | |||
657 | if (!$topCandidate || $readability->value > (int)$topCandidate->getAttribute('readability')) { | ||
658 | $topCandidate = $candidates[$c]; | ||
659 | } | ||
660 | } | ||
661 | |||
662 | /** | ||
663 | * If we still have no top candidate, just use the body as a last resort. | ||
664 | * We also have to copy the body node so it is something we can modify. | ||
665 | **/ | ||
666 | if ($topCandidate === null || strtoupper($topCandidate->tagName) == 'BODY') | ||
667 | { | ||
668 | $topCandidate = $this->dom->createElement('div'); | ||
669 | $topCandidate->innerHTML = ($page instanceof DOMDocument) ? $page->saveXML($page->documentElement) : $page->innerHTML; | ||
670 | $page->innerHTML = ''; | ||
671 | $page->appendChild($topCandidate); | ||
672 | $this->initializeNode($topCandidate); | ||
673 | } | ||
674 | |||
675 | /** | ||
676 | * Now that we have the top candidate, look through its siblings for content that might also be related. | ||
677 | * Things like preambles, content split by ads that we removed, etc. | ||
678 | **/ | ||
679 | $articleContent = $this->dom->createElement('div'); | ||
680 | $articleContent->setAttribute('id', 'readability-content'); | ||
681 | $siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2); | ||
682 | $siblingNodes = $topCandidate->parentNode->childNodes; | ||
683 | |||
684 | for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++) | ||
685 | { | ||
686 | $siblingNode = $siblingNodes->item($s); | ||
687 | $append = false; | ||
688 | |||
689 | $this->dbg('Looking at sibling node: ' . $siblingNode->nodeName . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : '')); | ||
690 | |||
691 | //dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown')); | ||
692 | |||
693 | if ($siblingNode === $topCandidate) | ||
694 | // or if ($siblingNode->isSameNode($topCandidate)) | ||
695 | { | ||
696 | $append = true; | ||
697 | } | ||
698 | |||
699 | $contentBonus = 0; | ||
700 | /* Give a bonus if sibling nodes and top candidates have the example same classname */ | ||
701 | if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') { | ||
702 | $contentBonus += ((int)$topCandidate->getAttribute('readability')) * 0.2; | ||
703 | } | ||
704 | |||
705 | if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability') && (((int)$siblingNode->getAttribute('readability')) + $contentBonus) >= $siblingScoreThreshold) | ||
706 | { | ||
707 | $append = true; | ||
708 | } | ||
709 | |||
710 | if (strtoupper($siblingNode->nodeName) == 'P') { | ||
711 | $linkDensity = $this->getLinkDensity($siblingNode); | ||
712 | $nodeContent = $this->getInnerText($siblingNode); | ||
713 | $nodeLength = strlen($nodeContent); | ||
714 | |||
715 | if ($nodeLength > 80 && $linkDensity < 0.25) | ||
716 | { | ||
717 | $append = true; | ||
718 | } | ||
719 | else if ($nodeLength < 80 && $linkDensity === 0 && preg_match('/\.( |$)/', $nodeContent)) | ||
720 | { | ||
721 | $append = true; | ||
722 | } | ||
723 | } | ||
724 | |||
725 | if ($append) | ||
726 | { | ||
727 | $this->dbg('Appending node: ' . $siblingNode->nodeName); | ||
728 | |||
729 | $nodeToAppend = null; | ||
730 | $sibNodeName = strtoupper($siblingNode->nodeName); | ||
731 | if ($sibNodeName != 'DIV' && $sibNodeName != 'P') { | ||
732 | /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */ | ||
733 | |||
734 | $this->dbg('Altering siblingNode of ' . $sibNodeName . ' to div.'); | ||
735 | $nodeToAppend = $this->dom->createElement('div'); | ||
736 | try { | ||
737 | $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id')); | ||
738 | $nodeToAppend->innerHTML = $siblingNode->innerHTML; | ||
739 | } | ||
740 | catch(Exception $e) | ||
741 | { | ||
742 | $this->dbg('Could not alter siblingNode to div, reverting back to original.'); | ||
743 | $nodeToAppend = $siblingNode; | ||
744 | $s--; | ||
745 | $sl--; | ||
746 | } | ||
747 | } else { | ||
748 | $nodeToAppend = $siblingNode; | ||
749 | $s--; | ||
750 | $sl--; | ||
751 | } | ||
752 | |||
753 | /* To ensure a node does not interfere with readability styles, remove its classnames */ | ||
754 | $nodeToAppend->removeAttribute('class'); | ||
755 | |||
756 | /* Append sibling and subtract from our list because it removes the node when you append to another node */ | ||
757 | $articleContent->appendChild($nodeToAppend); | ||
758 | } | ||
759 | } | ||
760 | |||
761 | /** | ||
762 | * So we have all of the content that we need. Now we clean it up for presentation. | ||
763 | **/ | ||
764 | $this->prepArticle($articleContent); | ||
765 | |||
766 | /** | ||
767 | * Now that we've gone through the full algorithm, check to see if we got any meaningful content. | ||
768 | * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher | ||
769 | * likelihood of finding the content, and the sieve approach gives us a higher likelihood of | ||
770 | * finding the -right- content. | ||
771 | **/ | ||
772 | if (strlen($this->getInnerText($articleContent, false)) < 250) | ||
773 | { | ||
774 | $this->body->innerHTML = $this->bodyCache; | ||
775 | |||
776 | if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { | ||
777 | $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); | ||
778 | return $this->grabArticle($this->body); | ||
779 | } | ||
780 | else if ($this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) { | ||
781 | $this->removeFlag(self::FLAG_WEIGHT_CLASSES); | ||
782 | return $this->grabArticle($this->body); | ||
783 | } | ||
784 | else if ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { | ||
785 | $this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY); | ||
786 | return $this->grabArticle($this->body); | ||
787 | } | ||
788 | else { | ||
789 | return false; | ||
790 | } | ||
791 | } | ||
792 | return $articleContent; | ||
793 | } | ||
794 | |||
795 | /** | ||
796 | * Remove script tags from document | ||
797 | * | ||
798 | * @param DOMElement | ||
799 | * @return void | ||
800 | */ | ||
801 | public function removeScripts($doc) { | ||
802 | $scripts = $doc->getElementsByTagName('script'); | ||
803 | for($i = $scripts->length-1; $i >= 0; $i--) | ||
804 | { | ||
805 | $scripts->item($i)->parentNode->removeChild($scripts->item($i)); | ||
806 | } | ||
807 | } | ||
808 | |||
809 | /** | ||
810 | * Get the inner text of a node. | ||
811 | * This also strips out any excess whitespace to be found. | ||
812 | * | ||
813 | * @param DOMElement $ | ||
814 | * @param boolean $normalizeSpaces (default: true) | ||
815 | * @return string | ||
816 | **/ | ||
817 | public function getInnerText($e, $normalizeSpaces=true) { | ||
818 | $textContent = ''; | ||
819 | |||
820 | if (!isset($e->textContent) || $e->textContent == '') { | ||
821 | return ''; | ||
822 | } | ||
823 | |||
824 | $textContent = trim($e->textContent); | ||
825 | |||
826 | if ($normalizeSpaces) { | ||
827 | return preg_replace($this->regexps['normalize'], ' ', $textContent); | ||
828 | } else { | ||
829 | return $textContent; | ||
830 | } | ||
831 | } | ||
832 | |||
833 | /** | ||
834 | * Get the number of times a string $s appears in the node $e. | ||
835 | * | ||
836 | * @param DOMElement $e | ||
837 | * @param string - what to count. Default is "," | ||
838 | * @return number (integer) | ||
839 | **/ | ||
840 | public function getCharCount($e, $s=',') { | ||
841 | return substr_count($this->getInnerText($e), $s); | ||
842 | } | ||
843 | |||
844 | /** | ||
845 | * Remove the style attribute on every $e and under. | ||
846 | * | ||
847 | * @param DOMElement $e | ||
848 | * @return void | ||
849 | */ | ||
850 | public function cleanStyles($e) { | ||
851 | $elems = $e->getElementsByTagName('*'); | ||
852 | foreach ($elems as $elem) { | ||
853 | $elem->removeAttribute('style'); | ||
854 | } | ||
855 | } | ||
856 | |||
857 | /** | ||
858 | * Get the density of links as a percentage of the content | ||
859 | * This is the amount of text that is inside a link divided by the total text in the node. | ||
860 | * | ||
861 | * @param DOMElement $e | ||
862 | * @return number (float) | ||
863 | */ | ||
864 | public function getLinkDensity($e) { | ||
865 | $links = $e->getElementsByTagName('a'); | ||
866 | $textLength = strlen($this->getInnerText($e)); | ||
867 | $linkLength = 0; | ||
868 | for ($i=0, $il=$links->length; $i < $il; $i++) | ||
869 | { | ||
870 | $linkLength += strlen($this->getInnerText($links->item($i))); | ||
871 | } | ||
872 | if ($textLength > 0) { | ||
873 | return $linkLength / $textLength; | ||
874 | } else { | ||
875 | return 0; | ||
876 | } | ||
877 | } | ||
878 | |||
879 | /** | ||
880 | * Get an elements class/id weight. Uses regular expressions to tell if this | ||
881 | * element looks good or bad. | ||
882 | * | ||
883 | * @param DOMElement $e | ||
884 | * @return number (Integer) | ||
885 | */ | ||
886 | public function getClassWeight($e) { | ||
887 | if(!$this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) { | ||
888 | return 0; | ||
889 | } | ||
890 | |||
891 | $weight = 0; | ||
892 | |||
893 | /* Look for a special classname */ | ||
894 | if ($e->hasAttribute('class') && $e->getAttribute('class') != '') | ||
895 | { | ||
896 | if (preg_match($this->regexps['negative'], $e->getAttribute('class'))) { | ||
897 | $weight -= 25; | ||
898 | } | ||
899 | if (preg_match($this->regexps['positive'], $e->getAttribute('class'))) { | ||
900 | $weight += 25; | ||
901 | } | ||
902 | } | ||
903 | |||
904 | /* Look for a special ID */ | ||
905 | if ($e->hasAttribute('id') && $e->getAttribute('id') != '') | ||
906 | { | ||
907 | if (preg_match($this->regexps['negative'], $e->getAttribute('id'))) { | ||
908 | $weight -= 25; | ||
909 | } | ||
910 | if (preg_match($this->regexps['positive'], $e->getAttribute('id'))) { | ||
911 | $weight += 25; | ||
912 | } | ||
913 | } | ||
914 | return $weight; | ||
915 | } | ||
916 | |||
917 | /** | ||
918 | * Remove extraneous break tags from a node. | ||
919 | * | ||
920 | * @param DOMElement $node | ||
921 | * @return void | ||
922 | */ | ||
923 | public function killBreaks($node) { | ||
924 | $html = $node->innerHTML; | ||
925 | $html = preg_replace($this->regexps['killBreaks'], '<br />', $html); | ||
926 | $node->innerHTML = $html; | ||
927 | } | ||
928 | |||
929 | /** | ||
930 | * Clean a node of all elements of type "tag". | ||
931 | * (Unless it's a youtube/vimeo video. People love movies.) | ||
932 | * | ||
933 | * @param DOMElement $e | ||
934 | * @param string $tag | ||
935 | * @return void | ||
936 | */ | ||
937 | public function clean($e, $tag) { | ||
938 | $targetList = $e->getElementsByTagName($tag); | ||
939 | $isEmbed = ($tag == 'object' || $tag == 'embed'); | ||
940 | |||
941 | for ($y=$targetList->length-1; $y >= 0; $y--) { | ||
942 | /* Allow youtube and vimeo videos through as people usually want to see those. */ | ||
943 | if ($isEmbed) { | ||
944 | $attributeValues = ''; | ||
945 | for ($i=0, $il=$targetList->item($y)->attributes->length; $i < $il; $i++) { | ||
946 | $attributeValues .= $targetList->item($y)->attributes->item($i)->value . '|'; // DOMAttr? (TODO: test) | ||
947 | } | ||
948 | |||
949 | /* First, check the elements attributes to see if any of them contain youtube or vimeo */ | ||
950 | if (preg_match($this->regexps['video'], $attributeValues)) { | ||
951 | continue; | ||
952 | } | ||
953 | |||
954 | /* Then check the elements inside this element for the same. */ | ||
955 | if (preg_match($this->regexps['video'], $targetList->item($y)->innerHTML)) { | ||
956 | continue; | ||
957 | } | ||
958 | } | ||
959 | $targetList->item($y)->parentNode->removeChild($targetList->item($y)); | ||
960 | } | ||
961 | } | ||
962 | |||
963 | /** | ||
964 | * Clean an element of all tags of type "tag" if they look fishy. | ||
965 | * "Fishy" is an algorithm based on content length, classnames, | ||
966 | * link density, number of images & embeds, etc. | ||
967 | * | ||
968 | * @param DOMElement $e | ||
969 | * @param string $tag | ||
970 | * @return void | ||
971 | */ | ||
972 | public function cleanConditionally($e, $tag) { | ||
973 | if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { | ||
974 | return; | ||
975 | } | ||
976 | |||
977 | $tagsList = $e->getElementsByTagName($tag); | ||
978 | $curTagsLength = $tagsList->length; | ||
979 | |||
980 | /** | ||
981 | * Gather counts for other typical elements embedded within. | ||
982 | * Traverse backwards so we can remove nodes at the same time without effecting the traversal. | ||
983 | * | ||
984 | * TODO: Consider taking into account original contentScore here. | ||
985 | */ | ||
986 | for ($i=$curTagsLength-1; $i >= 0; $i--) { | ||
987 | $weight = $this->getClassWeight($tagsList->item($i)); | ||
988 | $contentScore = ($tagsList->item($i)->hasAttribute('readability')) ? (int)$tagsList->item($i)->getAttribute('readability') : 0; | ||
989 | |||
990 | $this->dbg('Cleaning Conditionally ' . $tagsList->item($i)->tagName . ' (' . $tagsList->item($i)->getAttribute('class') . ':' . $tagsList->item($i)->getAttribute('id') . ')' . (($tagsList->item($i)->hasAttribute('readability')) ? (' with score ' . $tagsList->item($i)->getAttribute('readability')) : '')); | ||
991 | |||
992 | if ($weight + $contentScore < 0) { | ||
993 | $tagsList->item($i)->parentNode->removeChild($tagsList->item($i)); | ||
994 | } | ||
995 | else if ( $this->getCharCount($tagsList->item($i), ',') < 10) { | ||
996 | /** | ||
997 | * If there are not very many commas, and the number of | ||
998 | * non-paragraph elements is more than paragraphs or other ominous signs, remove the element. | ||
999 | **/ | ||
1000 | $p = $tagsList->item($i)->getElementsByTagName('p')->length; | ||
1001 | $img = $tagsList->item($i)->getElementsByTagName('img')->length; | ||
1002 | $li = $tagsList->item($i)->getElementsByTagName('li')->length-100; | ||
1003 | $input = $tagsList->item($i)->getElementsByTagName('input')->length; | ||
1004 | |||
1005 | $embedCount = 0; | ||
1006 | $embeds = $tagsList->item($i)->getElementsByTagName('embed'); | ||
1007 | for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) { | ||
1008 | if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) { | ||
1009 | $embedCount++; | ||
1010 | } | ||
1011 | } | ||
1012 | |||
1013 | $linkDensity = $this->getLinkDensity($tagsList->item($i)); | ||
1014 | $contentLength = strlen($this->getInnerText($tagsList->item($i))); | ||
1015 | $toRemove = false; | ||
1016 | |||
1017 | if ( $img > $p ) { | ||
1018 | $toRemove = true; | ||
1019 | } else if ($li > $p && $tag != 'ul' && $tag != 'ol') { | ||
1020 | $toRemove = true; | ||
1021 | } else if ( $input > floor($p/3) ) { | ||
1022 | $toRemove = true; | ||
1023 | } else if ($contentLength < 25 && ($img === 0 || $img > 2) ) { | ||
1024 | $toRemove = true; | ||
1025 | } else if($weight < 25 && $linkDensity > 0.2) { | ||
1026 | $toRemove = true; | ||
1027 | } else if($weight >= 25 && $linkDensity > 0.5) { | ||
1028 | $toRemove = true; | ||
1029 | } else if(($embedCount == 1 && $contentLength < 75) || $embedCount > 1) { | ||
1030 | $toRemove = true; | ||
1031 | } | ||
1032 | |||
1033 | if ($toRemove) { | ||
1034 | $tagsList->item($i)->parentNode->removeChild($tagsList->item($i)); | ||
1035 | } | ||
1036 | } | ||
1037 | } | ||
1038 | } | ||
1039 | |||
1040 | /** | ||
1041 | * Clean out spurious headers from an Element. Checks things like classnames and link density. | ||
1042 | * | ||
1043 | * @param DOMElement $e | ||
1044 | * @return void | ||
1045 | */ | ||
1046 | public function cleanHeaders($e) { | ||
1047 | for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) { | ||
1048 | $headers = $e->getElementsByTagName('h' . $headerIndex); | ||
1049 | for ($i=$headers->length-1; $i >=0; $i--) { | ||
1050 | if ($this->getClassWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) { | ||
1051 | $headers->item($i)->parentNode->removeChild($headers->item($i)); | ||
1052 | } | ||
1053 | } | ||
1054 | } | ||
1055 | } | ||
1056 | |||
1057 | public function flagIsActive($flag) { | ||
1058 | return ($this->flags & $flag) > 0; | ||
1059 | } | ||
1060 | |||
1061 | public function addFlag($flag) { | ||
1062 | $this->flags = $this->flags | $flag; | ||
1063 | } | ||
1064 | |||
1065 | public function removeFlag($flag) { | ||
1066 | $this->flags = $this->flags & ~$flag; | ||
1067 | } | ||
1068 | } | ||
1069 | ?> \ No newline at end of file | ||