diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-23 02:28:56 -0800 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-23 02:28:56 -0800 |
commit | 60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2 (patch) | |
tree | 515c4b9c8286ae363f77722c91acb878151dc386 /inc | |
parent | cbfd5a1019f47fadefd8490dae9f039ae894298d (diff) | |
parent | da5fc42f615eeb45a702604970f94967507fb432 (diff) | |
download | wallabag-60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2.tar.gz wallabag-60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2.tar.zst wallabag-60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2.zip |
Merge pull request #363 from inthepoche/dev1.3.0
poche 1.3.0
Diffstat (limited to 'inc')
860 files changed, 31143 insertions, 7836 deletions
diff --git a/inc/3rdparty/Encoding.php b/inc/3rdparty/Encoding.php deleted file mode 100644 index 577763b4..00000000 --- a/inc/3rdparty/Encoding.php +++ /dev/null | |||
@@ -1,262 +0,0 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * @author "Sebastián Grignoli" <grignoli@framework2.com.ar> | ||
4 | * @package Encoding | ||
5 | * @version 1.1 | ||
6 | * @link http://www.framework2.com.ar/dzone/forceUTF8-es/ | ||
7 | * @example http://www.framework2.com.ar/dzone/forceUTF8-es/ | ||
8 | */ | ||
9 | |||
10 | class Encoding { | ||
11 | |||
12 | protected static $win1252ToUtf8 = array( | ||
13 | 128 => "\xe2\x82\xac", | ||
14 | |||
15 | 130 => "\xe2\x80\x9a", | ||
16 | 131 => "\xc6\x92", | ||
17 | 132 => "\xe2\x80\x9e", | ||
18 | 133 => "\xe2\x80\xa6", | ||
19 | 134 => "\xe2\x80\xa0", | ||
20 | 135 => "\xe2\x80\xa1", | ||
21 | 136 => "\xcb\x86", | ||
22 | 137 => "\xe2\x80\xb0", | ||
23 | 138 => "\xc5\xa0", | ||
24 | 139 => "\xe2\x80\xb9", | ||
25 | 140 => "\xc5\x92", | ||
26 | |||
27 | 142 => "\xc5\xbd", | ||
28 | |||
29 | |||
30 | 145 => "\xe2\x80\x98", | ||
31 | 146 => "\xe2\x80\x99", | ||
32 | 147 => "\xe2\x80\x9c", | ||
33 | 148 => "\xe2\x80\x9d", | ||
34 | 149 => "\xe2\x80\xa2", | ||
35 | 150 => "\xe2\x80\x93", | ||
36 | 151 => "\xe2\x80\x94", | ||
37 | 152 => "\xcb\x9c", | ||
38 | 153 => "\xe2\x84\xa2", | ||
39 | 154 => "\xc5\xa1", | ||
40 | 155 => "\xe2\x80\xba", | ||
41 | 156 => "\xc5\x93", | ||
42 | |||
43 | 158 => "\xc5\xbe", | ||
44 | 159 => "\xc5\xb8" | ||
45 | ); | ||
46 | |||
47 | protected static $brokenUtf8ToUtf8 = array( | ||
48 | "\xc2\x80" => "\xe2\x82\xac", | ||
49 | |||
50 | "\xc2\x82" => "\xe2\x80\x9a", | ||
51 | "\xc2\x83" => "\xc6\x92", | ||
52 | "\xc2\x84" => "\xe2\x80\x9e", | ||
53 | "\xc2\x85" => "\xe2\x80\xa6", | ||
54 | "\xc2\x86" => "\xe2\x80\xa0", | ||
55 | "\xc2\x87" => "\xe2\x80\xa1", | ||
56 | "\xc2\x88" => "\xcb\x86", | ||
57 | "\xc2\x89" => "\xe2\x80\xb0", | ||
58 | "\xc2\x8a" => "\xc5\xa0", | ||
59 | "\xc2\x8b" => "\xe2\x80\xb9", | ||
60 | "\xc2\x8c" => "\xc5\x92", | ||
61 | |||
62 | "\xc2\x8e" => "\xc5\xbd", | ||
63 | |||
64 | |||
65 | "\xc2\x91" => "\xe2\x80\x98", | ||
66 | "\xc2\x92" => "\xe2\x80\x99", | ||
67 | "\xc2\x93" => "\xe2\x80\x9c", | ||
68 | "\xc2\x94" => "\xe2\x80\x9d", | ||
69 | "\xc2\x95" => "\xe2\x80\xa2", | ||
70 | "\xc2\x96" => "\xe2\x80\x93", | ||
71 | "\xc2\x97" => "\xe2\x80\x94", | ||
72 | "\xc2\x98" => "\xcb\x9c", | ||
73 | "\xc2\x99" => "\xe2\x84\xa2", | ||
74 | "\xc2\x9a" => "\xc5\xa1", | ||
75 | "\xc2\x9b" => "\xe2\x80\xba", | ||
76 | "\xc2\x9c" => "\xc5\x93", | ||
77 | |||
78 | "\xc2\x9e" => "\xc5\xbe", | ||
79 | "\xc2\x9f" => "\xc5\xb8" | ||
80 | ); | ||
81 | |||
82 | protected static $utf8ToWin1252 = array( | ||
83 | "\xe2\x82\xac" => "\x80", | ||
84 | |||
85 | "\xe2\x80\x9a" => "\x82", | ||
86 | "\xc6\x92" => "\x83", | ||
87 | "\xe2\x80\x9e" => "\x84", | ||
88 | "\xe2\x80\xa6" => "\x85", | ||
89 | "\xe2\x80\xa0" => "\x86", | ||
90 | "\xe2\x80\xa1" => "\x87", | ||
91 | "\xcb\x86" => "\x88", | ||
92 | "\xe2\x80\xb0" => "\x89", | ||
93 | "\xc5\xa0" => "\x8a", | ||
94 | "\xe2\x80\xb9" => "\x8b", | ||
95 | "\xc5\x92" => "\x8c", | ||
96 | |||
97 | "\xc5\xbd" => "\x8e", | ||
98 | |||
99 | |||
100 | "\xe2\x80\x98" => "\x91", | ||
101 | "\xe2\x80\x99" => "\x92", | ||
102 | "\xe2\x80\x9c" => "\x93", | ||
103 | "\xe2\x80\x9d" => "\x94", | ||
104 | "\xe2\x80\xa2" => "\x95", | ||
105 | "\xe2\x80\x93" => "\x96", | ||
106 | "\xe2\x80\x94" => "\x97", | ||
107 | "\xcb\x9c" => "\x98", | ||
108 | "\xe2\x84\xa2" => "\x99", | ||
109 | "\xc5\xa1" => "\x9a", | ||
110 | "\xe2\x80\xba" => "\x9b", | ||
111 | "\xc5\x93" => "\x9c", | ||
112 | |||
113 | "\xc5\xbe" => "\x9e", | ||
114 | "\xc5\xb8" => "\x9f" | ||
115 | ); | ||
116 | |||
117 | static function toUTF8($text){ | ||
118 | /** | ||
119 | * Function Encoding::toUTF8 | ||
120 | * | ||
121 | * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. | ||
122 | * | ||
123 | * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. | ||
124 | * | ||
125 | * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: | ||
126 | * | ||
127 | * 1) when any of these characters: ÀÃÂÃÄÅÆÇÈÉÊËÌÃÃŽÃÃÑÒÓÔÕÖ×ØÙÚÛÜÃÞß | ||
128 | * are followed by any of these: ("group B") | ||
129 | * ¡¢£¤¥¦§¨©ª«¬Â®¯°±²³´µ¶•¸¹º»¼½¾¿ | ||
130 | * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ» | ||
131 | * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB) | ||
132 | * is also a valid unicode character, and will be left unchanged. | ||
133 | * | ||
134 | * 2) when any of these: à áâãäåæçèéêëìÃîï are followed by TWO chars from group B, | ||
135 | * 3) when any of these: ðñòó are followed by THREE chars from group B. | ||
136 | * | ||
137 | * @name toUTF8 | ||
138 | * @param string $text Any string. | ||
139 | * @return string The same string, UTF8 encoded | ||
140 | * | ||
141 | */ | ||
142 | |||
143 | if(is_array($text)) | ||
144 | { | ||
145 | foreach($text as $k => $v) | ||
146 | { | ||
147 | $text[$k] = self::toUTF8($v); | ||
148 | } | ||
149 | return $text; | ||
150 | } elseif(is_string($text)) { | ||
151 | |||
152 | $max = strlen($text); | ||
153 | $buf = ""; | ||
154 | for($i = 0; $i < $max; $i++){ | ||
155 | $c1 = $text{$i}; | ||
156 | if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already | ||
157 | $c2 = $i+1 >= $max? "\x00" : $text{$i+1}; | ||
158 | $c3 = $i+2 >= $max? "\x00" : $text{$i+2}; | ||
159 | $c4 = $i+3 >= $max? "\x00" : $text{$i+3}; | ||
160 | if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8 | ||
161 | if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already | ||
162 | $buf .= $c1 . $c2; | ||
163 | $i++; | ||
164 | } else { //not valid UTF8. Convert it. | ||
165 | $cc1 = (chr(ord($c1) / 64) | "\xc0"); | ||
166 | $cc2 = ($c1 & "\x3f") | "\x80"; | ||
167 | $buf .= $cc1 . $cc2; | ||
168 | } | ||
169 | } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8 | ||
170 | if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already | ||
171 | $buf .= $c1 . $c2 . $c3; | ||
172 | $i = $i + 2; | ||
173 | } else { //not valid UTF8. Convert it. | ||
174 | $cc1 = (chr(ord($c1) / 64) | "\xc0"); | ||
175 | $cc2 = ($c1 & "\x3f") | "\x80"; | ||
176 | $buf .= $cc1 . $cc2; | ||
177 | } | ||
178 | } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8 | ||
179 | if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already | ||
180 | $buf .= $c1 . $c2 . $c3; | ||
181 | $i = $i + 2; | ||
182 | } else { //not valid UTF8. Convert it. | ||
183 | $cc1 = (chr(ord($c1) / 64) | "\xc0"); | ||
184 | $cc2 = ($c1 & "\x3f") | "\x80"; | ||
185 | $buf .= $cc1 . $cc2; | ||
186 | } | ||
187 | } else { //doesn't look like UTF8, but should be converted | ||
188 | $cc1 = (chr(ord($c1) / 64) | "\xc0"); | ||
189 | $cc2 = (($c1 & "\x3f") | "\x80"); | ||
190 | $buf .= $cc1 . $cc2; | ||
191 | } | ||
192 | } elseif(($c1 & "\xc0") == "\x80"){ // needs conversion | ||
193 | if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases | ||
194 | $buf .= self::$win1252ToUtf8[ord($c1)]; | ||
195 | } else { | ||
196 | $cc1 = (chr(ord($c1) / 64) | "\xc0"); | ||
197 | $cc2 = (($c1 & "\x3f") | "\x80"); | ||
198 | $buf .= $cc1 . $cc2; | ||
199 | } | ||
200 | } else { // it doesn't need convesion | ||
201 | $buf .= $c1; | ||
202 | } | ||
203 | } | ||
204 | return $buf; | ||
205 | } else { | ||
206 | return $text; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | static function toWin1252($text) { | ||
211 | if(is_array($text)) { | ||
212 | foreach($text as $k => $v) { | ||
213 | $text[$k] = self::toWin1252($v); | ||
214 | } | ||
215 | return $text; | ||
216 | } elseif(is_string($text)) { | ||
217 | return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))); | ||
218 | } else { | ||
219 | return $text; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | static function toISO8859($text) { | ||
224 | return self::toWin1252($text); | ||
225 | } | ||
226 | |||
227 | static function toLatin1($text) { | ||
228 | return self::toWin1252($text); | ||
229 | } | ||
230 | |||
231 | static function fixUTF8($text){ | ||
232 | if(is_array($text)) { | ||
233 | foreach($text as $k => $v) { | ||
234 | $text[$k] = self::fixUTF8($v); | ||
235 | } | ||
236 | return $text; | ||
237 | } | ||
238 | |||
239 | $last = ""; | ||
240 | while($last <> $text){ | ||
241 | $last = $text; | ||
242 | $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); | ||
243 | } | ||
244 | $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); | ||
245 | return $text; | ||
246 | } | ||
247 | |||
248 | static function UTF8FixWin1252Chars($text){ | ||
249 | // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 | ||
250 | // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. | ||
251 | // See: http://en.wikipedia.org/wiki/Windows-1252 | ||
252 | |||
253 | return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); | ||
254 | } | ||
255 | |||
256 | static function removeBOM($str=""){ | ||
257 | if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) { | ||
258 | $str=substr($str, 3); | ||
259 | } | ||
260 | return $str; | ||
261 | } | ||
262 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/Readability.php b/inc/3rdparty/Readability.php deleted file mode 100644 index 7605871c..00000000 --- a/inc/3rdparty/Readability.php +++ /dev/null | |||
@@ -1,1138 +0,0 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Arc90's Readability ported to PHP for FiveFilters.org | ||
4 | * Based on readability.js version 1.7.1 (without multi-page support) | ||
5 | * Updated to allow HTML5 parsing with html5lib | ||
6 | * Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds | ||
7 | * ------------------------------------------------------ | ||
8 | * Original URL: http://lab.arc90.com/experiments/readability/js/readability.js | ||
9 | * Arc90's project URL: http://lab.arc90.com/experiments/readability/ | ||
10 | * JS Source: http://code.google.com/p/arc90labs-readability | ||
11 | * Ported by: Keyvan Minoukadeh, http://www.keyvan.net | ||
12 | * More information: http://fivefilters.org/content-only/ | ||
13 | * License: Apache License, Version 2.0 | ||
14 | * Requires: PHP5 | ||
15 | * Date: 2012-09-19 | ||
16 | * | ||
17 | * Differences between the PHP port and the original | ||
18 | * ------------------------------------------------------ | ||
19 | * Arc90's Readability is designed to run in the browser. It works on the DOM | ||
20 | * tree (the parsed HTML) after the page's CSS styles have been applied and | ||
21 | * Javascript code executed. This PHP port does not run inside a browser. | ||
22 | * We use PHP's ability to parse HTML to build our DOM tree, but we cannot | ||
23 | * rely on CSS or Javascript support. As such, the results will not always | ||
24 | * match Arc90's Readability. (For example, if a web page contains CSS style | ||
25 | * rules or Javascript code which hide certain HTML elements from display, | ||
26 | * Arc90's Readability will dismiss those from consideration but our PHP port, | ||
27 | * unable to understand CSS or Javascript, will not know any better.) | ||
28 | * | ||
29 | * Another significant difference is that the aim of Arc90's Readability is | ||
30 | * to re-present the main content block of a given web page so users can | ||
31 | * read it more easily in their browsers. Correct identification, clean up, | ||
32 | * and separation of the content block is only a part of this process. | ||
33 | * This PHP port is only concerned with this part, it does not include code | ||
34 | * that relates to presentation in the browser - Arc90 already do | ||
35 | * that extremely well, and for PDF output there's FiveFilters.org's | ||
36 | * PDF Newspaper: http://fivefilters.org/pdf-newspaper/. | ||
37 | * | ||
38 | * Finally, this class contains methods that might be useful for developers | ||
39 | * working on HTML document fragments. So without deviating too much from | ||
40 | * the original code (which I don't want to do because it makes debugging | ||
41 | * and updating more difficult), I've tried to make it a little more | ||
42 | * developer friendly. You should be able to use the methods here on | ||
43 | * existing DOMElement objects without passing an entire HTML document to | ||
44 | * be parsed. | ||
45 | */ | ||
46 | |||
47 | // This class allows us to do JavaScript like assignements to innerHTML | ||
48 | require_once(dirname(__FILE__).'/JSLikeHTMLElement.php'); | ||
49 | |||
50 | // Alternative usage (for testing only!) | ||
51 | // uncomment the lines below and call Readability.php in your browser | ||
52 | // passing it the URL of the page you'd like content from, e.g.: | ||
53 | // Readability.php?url=http://medialens.org/alerts/09/090615_the_guardian_climate.php | ||
54 | |||
55 | /* | ||
56 | if (!isset($_GET['url']) || $_GET['url'] == '') { | ||
57 | die('Please pass a URL to the script. E.g. Readability.php?url=bla.com/story.html'); | ||
58 | } | ||
59 | $url = $_GET['url']; | ||
60 | if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url; | ||
61 | $html = file_get_contents($url); | ||
62 | $r = new Readability($html, $url); | ||
63 | $r->init(); | ||
64 | echo $r->articleContent->innerHTML; | ||
65 | */ | ||
66 | |||
67 | class Readability | ||
68 | { | ||
69 | public $version = '1.7.1-without-multi-page'; | ||
70 | public $convertLinksToFootnotes = false; | ||
71 | public $revertForcedParagraphElements = true; | ||
72 | public $articleTitle; | ||
73 | public $articleContent; | ||
74 | public $dom; | ||
75 | public $url = null; // optional - URL where HTML was retrieved | ||
76 | public $debug = false; | ||
77 | public $lightClean = true; // preserves more content (experimental) added 2012-09-19 | ||
78 | protected $body = null; // | ||
79 | protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later | ||
80 | protected $flags = 7; // 1 | 2 | 4; // Start with all flags set. | ||
81 | protected $success = false; // indicates whether we were able to extract or not | ||
82 | |||
83 | /** | ||
84 | * All of the regular expressions in use within readability. | ||
85 | * Defined up here so we don't instantiate them repeatedly in loops. | ||
86 | **/ | ||
87 | public $regexps = array( | ||
88 | 'unlikelyCandidates' => '/combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i', | ||
89 | 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', | ||
90 | 'positive' => '/article|body|content|entry|hentry|main|page|attachment|pagination|post|text|blog|story/i', | ||
91 | 'negative' => '/combx|comment|com-|contact|foot|footer|_nav|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i', | ||
92 | 'divToPElements' => '/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i', | ||
93 | 'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i', | ||
94 | 'replaceFonts' => '/<(\/?)font[^>]*>/i', | ||
95 | // 'trimRe' => '/^\s+|\s+$/g', // PHP has trim() | ||
96 | 'normalize' => '/\s{2,}/', | ||
97 | 'killBreaks' => '/(<br\s*\/?>(\s| ?)*){1,}/', | ||
98 | 'video' => '!//(player\.|www\.)?(youtube|vimeo|viddler)\.com!i', | ||
99 | 'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i' | ||
100 | ); | ||
101 | |||
102 | /* constants */ | ||
103 | const FLAG_STRIP_UNLIKELYS = 1; | ||
104 | const FLAG_WEIGHT_CLASSES = 2; | ||
105 | const FLAG_CLEAN_CONDITIONALLY = 4; | ||
106 | |||
107 | /** | ||
108 | * Create instance of Readability | ||
109 | * @param string UTF-8 encoded string | ||
110 | * @param string (optional) URL associated with HTML (used for footnotes) | ||
111 | * @param string which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib') | ||
112 | */ | ||
113 | function __construct($html, $url=null, $parser='libxml') | ||
114 | { | ||
115 | $this->url = $url; | ||
116 | /* Turn all double br's into p's */ | ||
117 | $html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html); | ||
118 | $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html); | ||
119 | $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); | ||
120 | if (trim($html) == '') $html = '<html></html>'; | ||
121 | if ($parser=='html5lib' && ($this->dom = HTML5_Parser::parse($html))) { | ||
122 | // all good | ||
123 | } else { | ||
124 | $this->dom = new DOMDocument(); | ||
125 | $this->dom->preserveWhiteSpace = false; | ||
126 | @$this->dom->loadHTML($html); | ||
127 | } | ||
128 | $this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * Get article title element | ||
133 | * @return DOMElement | ||
134 | */ | ||
135 | public function getTitle() { | ||
136 | return $this->articleTitle; | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * Get article content element | ||
141 | * @return DOMElement | ||
142 | */ | ||
143 | public function getContent() { | ||
144 | return $this->articleContent; | ||
145 | } | ||
146 | |||
147 | /** | ||
148 | * Runs readability. | ||
149 | * | ||
150 | * Workflow: | ||
151 | * 1. Prep the document by removing script tags, css, etc. | ||
152 | * 2. Build readability's DOM tree. | ||
153 | * 3. Grab the article content from the current dom tree. | ||
154 | * 4. Replace the current DOM tree with the new one. | ||
155 | * 5. Read peacefully. | ||
156 | * | ||
157 | * @return boolean true if we found content, false otherwise | ||
158 | **/ | ||
159 | public function init() | ||
160 | { | ||
161 | if (!isset($this->dom->documentElement)) return false; | ||
162 | $this->removeScripts($this->dom); | ||
163 | //die($this->getInnerHTML($this->dom->documentElement)); | ||
164 | |||
165 | // Assume successful outcome | ||
166 | $this->success = true; | ||
167 | |||
168 | $bodyElems = $this->dom->getElementsByTagName('body'); | ||
169 | if ($bodyElems->length > 0) { | ||
170 | if ($this->bodyCache == null) { | ||
171 | $this->bodyCache = $bodyElems->item(0)->innerHTML; | ||
172 | } | ||
173 | if ($this->body == null) { | ||
174 | $this->body = $bodyElems->item(0); | ||
175 | } | ||
176 | } | ||
177 | |||
178 | $this->prepDocument(); | ||
179 | |||
180 | //die($this->dom->documentElement->parentNode->nodeType); | ||
181 | //$this->setInnerHTML($this->dom->documentElement, $this->getInnerHTML($this->dom->documentElement)); | ||
182 | //die($this->getInnerHTML($this->dom->documentElement)); | ||
183 | |||
184 | /* Build readability's DOM tree */ | ||
185 | $overlay = $this->dom->createElement('div'); | ||
186 | $innerDiv = $this->dom->createElement('div'); | ||
187 | $articleTitle = $this->getArticleTitle(); | ||
188 | $articleContent = $this->grabArticle(); | ||
189 | |||
190 | if (!$articleContent) { | ||
191 | $this->success = false; | ||
192 | $articleContent = $this->dom->createElement('div'); | ||
193 | $articleContent->setAttribute('id', 'readability-content'); | ||
194 | $articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>'; | ||
195 | } | ||
196 | |||
197 | $overlay->setAttribute('id', 'readOverlay'); | ||
198 | $innerDiv->setAttribute('id', 'readInner'); | ||
199 | |||
200 | /* Glue the structure of our document together. */ | ||
201 | $innerDiv->appendChild($articleTitle); | ||
202 | $innerDiv->appendChild($articleContent); | ||
203 | $overlay->appendChild($innerDiv); | ||
204 | |||
205 | /* Clear the old HTML, insert the new content. */ | ||
206 | $this->body->innerHTML = ''; | ||
207 | $this->body->appendChild($overlay); | ||
208 | //document.body.insertBefore(overlay, document.body.firstChild); | ||
209 | $this->body->removeAttribute('style'); | ||
210 | |||
211 | $this->postProcessContent($articleContent); | ||
212 | |||
213 | // Set title and content instance variables | ||
214 | $this->articleTitle = $articleTitle; | ||
215 | $this->articleContent = $articleContent; | ||
216 | |||
217 | return $this->success; | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * Debug | ||
222 | */ | ||
223 | protected function dbg($msg) { | ||
224 | if ($this->debug) echo '* ',$msg, "\n"; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * Run any post-process modifications to article content as necessary. | ||
229 | * | ||
230 | * @param DOMElement | ||
231 | * @return void | ||
232 | */ | ||
233 | public function postProcessContent($articleContent) { | ||
234 | if ($this->convertLinksToFootnotes && !preg_match('/wikipedia\.org/', @$this->url)) { | ||
235 | $this->addFootnotes($articleContent); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * Get the article title as an H1. | ||
241 | * | ||
242 | * @return DOMElement | ||
243 | */ | ||
244 | protected function getArticleTitle() { | ||
245 | $curTitle = ''; | ||
246 | $origTitle = ''; | ||
247 | |||
248 | try { | ||
249 | $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); | ||
250 | } catch(Exception $e) {} | ||
251 | |||
252 | if (preg_match('/ [\|\-] /', $curTitle)) | ||
253 | { | ||
254 | $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); | ||
255 | |||
256 | if (count(explode(' ', $curTitle)) < 3) { | ||
257 | $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); | ||
258 | } | ||
259 | } | ||
260 | else if (strpos($curTitle, ': ') !== false) | ||
261 | { | ||
262 | $curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle); | ||
263 | |||
264 | if (count(explode(' ', $curTitle)) < 3) { | ||
265 | $curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle); | ||
266 | } | ||
267 | } | ||
268 | else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) | ||
269 | { | ||
270 | $hOnes = $this->dom->getElementsByTagName('h1'); | ||
271 | if($hOnes->length == 1) | ||
272 | { | ||
273 | $curTitle = $this->getInnerText($hOnes->item(0)); | ||
274 | } | ||
275 | } | ||
276 | |||
277 | $curTitle = trim($curTitle); | ||
278 | |||
279 | if (count(explode(' ', $curTitle)) <= 4) { | ||
280 | $curTitle = $origTitle; | ||
281 | } | ||
282 | |||
283 | $articleTitle = $this->dom->createElement('h1'); | ||
284 | $articleTitle->innerHTML = $curTitle; | ||
285 | |||
286 | return $articleTitle; | ||
287 | } | ||
288 | |||
289 | /** | ||
290 | * Prepare the HTML document for readability to scrape it. | ||
291 | * This includes things like stripping javascript, CSS, and handling terrible markup. | ||
292 | * | ||
293 | * @return void | ||
294 | **/ | ||
295 | protected function prepDocument() { | ||
296 | /** | ||
297 | * In some cases a body element can't be found (if the HTML is totally hosed for example) | ||
298 | * so we create a new body node and append it to the document. | ||
299 | */ | ||
300 | if ($this->body == null) | ||
301 | { | ||
302 | $this->body = $this->dom->createElement('body'); | ||
303 | $this->dom->documentElement->appendChild($this->body); | ||
304 | } | ||
305 | $this->body->setAttribute('id', 'readabilityBody'); | ||
306 | |||
307 | /* Remove all style tags in head */ | ||
308 | $styleTags = $this->dom->getElementsByTagName('style'); | ||
309 | for ($i = $styleTags->length-1; $i >= 0; $i--) | ||
310 | { | ||
311 | $styleTags->item($i)->parentNode->removeChild($styleTags->item($i)); | ||
312 | } | ||
313 | |||
314 | /* Turn all double br's into p's */ | ||
315 | /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */ | ||
316 | //document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '</p><p>').replace(readability.regexps.replaceFonts, '<$1span>'); | ||
317 | // We do this in the constructor for PHP as that's when we have raw HTML - before parsing it into a DOM tree. | ||
318 | // Manipulating innerHTML as it's done in JS is not possible in PHP. | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * For easier reading, convert this document to have footnotes at the bottom rather than inline links. | ||
323 | * @see http://www.roughtype.com/archives/2010/05/experiments_in.php | ||
324 | * | ||
325 | * @return void | ||
326 | **/ | ||
327 | public function addFootnotes($articleContent) { | ||
328 | $footnotesWrapper = $this->dom->createElement('div'); | ||
329 | $footnotesWrapper->setAttribute('id', 'readability-footnotes'); | ||
330 | $footnotesWrapper->innerHTML = '<h3>References</h3>'; | ||
331 | |||
332 | $articleFootnotes = $this->dom->createElement('ol'); | ||
333 | $articleFootnotes->setAttribute('id', 'readability-footnotes-list'); | ||
334 | $footnotesWrapper->appendChild($articleFootnotes); | ||
335 | |||
336 | $articleLinks = $articleContent->getElementsByTagName('a'); | ||
337 | |||
338 | $linkCount = 0; | ||
339 | for ($i = 0; $i < $articleLinks->length; $i++) | ||
340 | { | ||
341 | $articleLink = $articleLinks->item($i); | ||
342 | $footnoteLink = $articleLink->cloneNode(true); | ||
343 | $refLink = $this->dom->createElement('a'); | ||
344 | $footnote = $this->dom->createElement('li'); | ||
345 | $linkDomain = @parse_url($footnoteLink->getAttribute('href'), PHP_URL_HOST); | ||
346 | if (!$linkDomain && isset($this->url)) $linkDomain = @parse_url($this->url, PHP_URL_HOST); | ||
347 | //linkDomain = footnoteLink.host ? footnoteLink.host : document.location.host, | ||
348 | $linkText = $this->getInnerText($articleLink); | ||
349 | |||
350 | if ((strpos($articleLink->getAttribute('class'), 'readability-DoNotFootnote') !== false) || preg_match($this->regexps['skipFootnoteLink'], $linkText)) { | ||
351 | continue; | ||
352 | } | ||
353 | |||
354 | $linkCount++; | ||
355 | |||
356 | /** Add a superscript reference after the article link */ | ||
357 | $refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount); | ||
358 | $refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>'; | ||
359 | $refLink->setAttribute('class', 'readability-DoNotFootnote'); | ||
360 | $refLink->setAttribute('style', 'color: inherit;'); | ||
361 | |||
362 | //TODO: does this work or should we use DOMNode.isSameNode()? | ||
363 | if ($articleLink->parentNode->lastChild == $articleLink) { | ||
364 | $articleLink->parentNode->appendChild($refLink); | ||
365 | } else { | ||
366 | $articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling); | ||
367 | } | ||
368 | |||
369 | $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;'); | ||
370 | $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount); | ||
371 | |||
372 | $footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> '; | ||
373 | |||
374 | $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText); | ||
375 | $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount); | ||
376 | |||
377 | $footnote->appendChild($footnoteLink); | ||
378 | if ($linkDomain) $footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>'; | ||
379 | |||
380 | $articleFootnotes->appendChild($footnote); | ||
381 | } | ||
382 | |||
383 | if ($linkCount > 0) { | ||
384 | $articleContent->appendChild($footnotesWrapper); | ||
385 | } | ||
386 | } | ||
387 | |||
388 | /** | ||
389 | * Reverts P elements with class 'readability-styled' | ||
390 | * to text nodes - which is what they were before. | ||
391 | * | ||
392 | * @param DOMElement | ||
393 | * @return void | ||
394 | */ | ||
395 | function revertReadabilityStyledElements($articleContent) { | ||
396 | $xpath = new DOMXPath($articleContent->ownerDocument); | ||
397 | $elems = $xpath->query('.//p[@class="readability-styled"]', $articleContent); | ||
398 | //$elems = $articleContent->getElementsByTagName('p'); | ||
399 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
400 | $e = $elems->item($i); | ||
401 | $e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e); | ||
402 | //if ($e->hasAttribute('class') && $e->getAttribute('class') == 'readability-styled') { | ||
403 | // $e->parentNode->replaceChild($this->dom->createTextNode($e->textContent), $e); | ||
404 | //} | ||
405 | } | ||
406 | } | ||
407 | |||
408 | /** | ||
409 | * Prepare the article node for display. Clean out any inline styles, | ||
410 | * iframes, forms, strip extraneous <p> tags, etc. | ||
411 | * | ||
412 | * @param DOMElement | ||
413 | * @return void | ||
414 | */ | ||
415 | function prepArticle($articleContent) { | ||
416 | $this->cleanStyles($articleContent); | ||
417 | $this->killBreaks($articleContent); | ||
418 | if ($this->revertForcedParagraphElements) { | ||
419 | $this->revertReadabilityStyledElements($articleContent); | ||
420 | } | ||
421 | |||
422 | /* Clean out junk from the article content */ | ||
423 | $this->cleanConditionally($articleContent, 'form'); | ||
424 | $this->clean($articleContent, 'object'); | ||
425 | $this->clean($articleContent, 'h1'); | ||
426 | |||
427 | /** | ||
428 | * If there is only one h2, they are probably using it | ||
429 | * as a header and not a subheader, so remove it since we already have a header. | ||
430 | ***/ | ||
431 | if (!$this->lightClean && ($articleContent->getElementsByTagName('h2')->length == 1)) { | ||
432 | $this->clean($articleContent, 'h2'); | ||
433 | } | ||
434 | $this->clean($articleContent, 'iframe'); | ||
435 | |||
436 | $this->cleanHeaders($articleContent); | ||
437 | |||
438 | /* Do these last as the previous stuff may have removed junk that will affect these */ | ||
439 | $this->cleanConditionally($articleContent, 'table'); | ||
440 | $this->cleanConditionally($articleContent, 'ul'); | ||
441 | $this->cleanConditionally($articleContent, 'div'); | ||
442 | |||
443 | /* Remove extra paragraphs */ | ||
444 | $articleParagraphs = $articleContent->getElementsByTagName('p'); | ||
445 | for ($i = $articleParagraphs->length-1; $i >= 0; $i--) | ||
446 | { | ||
447 | $imgCount = $articleParagraphs->item($i)->getElementsByTagName('img')->length; | ||
448 | $embedCount = $articleParagraphs->item($i)->getElementsByTagName('embed')->length; | ||
449 | $objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length; | ||
450 | $iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length; | ||
451 | |||
452 | if ($imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $iframeCount === 0 && $this->getInnerText($articleParagraphs->item($i), false) == '') | ||
453 | { | ||
454 | $articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i)); | ||
455 | } | ||
456 | } | ||
457 | |||
458 | try { | ||
459 | $articleContent->innerHTML = preg_replace('/<br[^>]*>\s*<p/i', '<p', $articleContent->innerHTML); | ||
460 | //articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p'); | ||
461 | } | ||
462 | catch (Exception $e) { | ||
463 | $this->dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " . $e); | ||
464 | } | ||
465 | } | ||
466 | |||
467 | /** | ||
468 | * Initialize a node with the readability object. Also checks the | ||
469 | * className/id for special names to add to its score. | ||
470 | * | ||
471 | * @param Element | ||
472 | * @return void | ||
473 | **/ | ||
474 | protected function initializeNode($node) { | ||
475 | $readability = $this->dom->createAttribute('readability'); | ||
476 | $readability->value = 0; // this is our contentScore | ||
477 | $node->setAttributeNode($readability); | ||
478 | |||
479 | switch (strtoupper($node->tagName)) { // unsure if strtoupper is needed, but using it just in case | ||
480 | case 'DIV': | ||
481 | $readability->value += 5; | ||
482 | break; | ||
483 | |||
484 | case 'PRE': | ||
485 | case 'TD': | ||
486 | case 'BLOCKQUOTE': | ||
487 | $readability->value += 3; | ||
488 | break; | ||
489 | |||
490 | case 'ADDRESS': | ||
491 | case 'OL': | ||
492 | case 'UL': | ||
493 | case 'DL': | ||
494 | case 'DD': | ||
495 | case 'DT': | ||
496 | case 'LI': | ||
497 | case 'FORM': | ||
498 | $readability->value -= 3; | ||
499 | break; | ||
500 | |||
501 | case 'H1': | ||
502 | case 'H2': | ||
503 | case 'H3': | ||
504 | case 'H4': | ||
505 | case 'H5': | ||
506 | case 'H6': | ||
507 | case 'TH': | ||
508 | $readability->value -= 5; | ||
509 | break; | ||
510 | } | ||
511 | $readability->value += $this->getClassWeight($node); | ||
512 | } | ||
513 | |||
514 | /*** | ||
515 | * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is | ||
516 | * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. | ||
517 | * | ||
518 | * @return DOMElement | ||
519 | **/ | ||
520 | protected function grabArticle($page=null) { | ||
521 | $stripUnlikelyCandidates = $this->flagIsActive(self::FLAG_STRIP_UNLIKELYS); | ||
522 | if (!$page) $page = $this->dom; | ||
523 | $allElements = $page->getElementsByTagName('*'); | ||
524 | /** | ||
525 | * First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs | ||
526 | * into P tags where they have been used inappropriately (as in, where they contain no other block level elements.) | ||
527 | * | ||
528 | * Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 | ||
529 | * TODO: Shouldn't this be a reverse traversal? | ||
530 | **/ | ||
531 | $node = null; | ||
532 | $nodesToScore = array(); | ||
533 | for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); $nodeIndex++) { | ||
534 | //for ($nodeIndex=$targetList->length-1; $nodeIndex >= 0; $nodeIndex--) { | ||
535 | //$node = $targetList->item($nodeIndex); | ||
536 | $tagName = strtoupper($node->tagName); | ||
537 | /* Remove unlikely candidates */ | ||
538 | if ($stripUnlikelyCandidates) { | ||
539 | $unlikelyMatchString = $node->getAttribute('class') . $node->getAttribute('id'); | ||
540 | if ( | ||
541 | preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) && | ||
542 | !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) && | ||
543 | $tagName != 'BODY' | ||
544 | ) | ||
545 | { | ||
546 | $this->dbg('Removing unlikely candidate - ' . $unlikelyMatchString); | ||
547 | //$nodesToRemove[] = $node; | ||
548 | $node->parentNode->removeChild($node); | ||
549 | $nodeIndex--; | ||
550 | continue; | ||
551 | } | ||
552 | } | ||
553 | |||
554 | if ($tagName == 'P' || $tagName == 'TD' || $tagName == 'PRE') { | ||
555 | $nodesToScore[] = $node; | ||
556 | } | ||
557 | |||
558 | /* Turn all divs that don't have children block level elements into p's */ | ||
559 | if ($tagName == 'DIV') { | ||
560 | if (!preg_match($this->regexps['divToPElements'], $node->innerHTML)) { | ||
561 | //$this->dbg('Altering div to p'); | ||
562 | $newNode = $this->dom->createElement('p'); | ||
563 | try { | ||
564 | $newNode->innerHTML = $node->innerHTML; | ||
565 | //$nodesToReplace[] = array('new'=>$newNode, 'old'=>$node); | ||
566 | $node->parentNode->replaceChild($newNode, $node); | ||
567 | $nodeIndex--; | ||
568 | $nodesToScore[] = $node; // or $newNode? | ||
569 | } | ||
570 | catch(Exception $e) { | ||
571 | $this->dbg('Could not alter div to p, reverting back to div.: ' . $e); | ||
572 | } | ||
573 | } | ||
574 | else | ||
575 | { | ||
576 | /* EXPERIMENTAL */ | ||
577 | // TODO: change these p elements back to text nodes after processing | ||
578 | for ($i = 0, $il = $node->childNodes->length; $i < $il; $i++) { | ||
579 | $childNode = $node->childNodes->item($i); | ||
580 | if ($childNode->nodeType == 3) { // XML_TEXT_NODE | ||
581 | //$this->dbg('replacing text node with a p tag with the same content.'); | ||
582 | $p = $this->dom->createElement('p'); | ||
583 | $p->innerHTML = $childNode->nodeValue; | ||
584 | $p->setAttribute('style', 'display: inline;'); | ||
585 | $p->setAttribute('class', 'readability-styled'); | ||
586 | $childNode->parentNode->replaceChild($p, $childNode); | ||
587 | } | ||
588 | } | ||
589 | } | ||
590 | } | ||
591 | } | ||
592 | |||
593 | /** | ||
594 | * Loop through all paragraphs, and assign a score to them based on how content-y they look. | ||
595 | * Then add their score to their parent node. | ||
596 | * | ||
597 | * A score is determined by things like number of commas, class names, etc. Maybe eventually link density. | ||
598 | **/ | ||
599 | $candidates = array(); | ||
600 | for ($pt=0; $pt < count($nodesToScore); $pt++) { | ||
601 | $parentNode = $nodesToScore[$pt]->parentNode; | ||
602 | // $grandParentNode = $parentNode ? $parentNode->parentNode : null; | ||
603 | $grandParentNode = !$parentNode ? null : (($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null); | ||
604 | $innerText = $this->getInnerText($nodesToScore[$pt]); | ||
605 | |||
606 | if (!$parentNode || !isset($parentNode->tagName)) { | ||
607 | continue; | ||
608 | } | ||
609 | |||
610 | /* If this paragraph is less than 25 characters, don't even count it. */ | ||
611 | if(strlen($innerText) < 25) { | ||
612 | continue; | ||
613 | } | ||
614 | |||
615 | /* Initialize readability data for the parent. */ | ||
616 | if (!$parentNode->hasAttribute('readability')) | ||
617 | { | ||
618 | $this->initializeNode($parentNode); | ||
619 | $candidates[] = $parentNode; | ||
620 | } | ||
621 | |||
622 | /* Initialize readability data for the grandparent. */ | ||
623 | if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName)) | ||
624 | { | ||
625 | $this->initializeNode($grandParentNode); | ||
626 | $candidates[] = $grandParentNode; | ||
627 | } | ||
628 | |||
629 | $contentScore = 0; | ||
630 | |||
631 | /* Add a point for the paragraph itself as a base. */ | ||
632 | $contentScore++; | ||
633 | |||
634 | /* Add points for any commas within this paragraph */ | ||
635 | $contentScore += count(explode(',', $innerText)); | ||
636 | |||
637 | /* For every 100 characters in this paragraph, add another point. Up to 3 points. */ | ||
638 | $contentScore += min(floor(strlen($innerText) / 100), 3); | ||
639 | |||
640 | /* Add the score to the parent. The grandparent gets half. */ | ||
641 | $parentNode->getAttributeNode('readability')->value += $contentScore; | ||
642 | |||
643 | if ($grandParentNode) { | ||
644 | $grandParentNode->getAttributeNode('readability')->value += $contentScore/2; | ||
645 | } | ||
646 | } | ||
647 | |||
648 | /** | ||
649 | * After we've calculated scores, loop through all of the possible candidate nodes we found | ||
650 | * and find the one with the highest score. | ||
651 | **/ | ||
652 | $topCandidate = null; | ||
653 | for ($c=0, $cl=count($candidates); $c < $cl; $c++) | ||
654 | { | ||
655 | /** | ||
656 | * Scale the final candidates score based on link density. Good content should have a | ||
657 | * relatively small link density (5% or less) and be mostly unaffected by this operation. | ||
658 | **/ | ||
659 | $readability = $candidates[$c]->getAttributeNode('readability'); | ||
660 | $readability->value = $readability->value * (1-$this->getLinkDensity($candidates[$c])); | ||
661 | |||
662 | $this->dbg('Candidate: ' . $candidates[$c]->tagName . ' (' . $candidates[$c]->getAttribute('class') . ':' . $candidates[$c]->getAttribute('id') . ') with score ' . $readability->value); | ||
663 | |||
664 | if (!$topCandidate || $readability->value > (int)$topCandidate->getAttribute('readability')) { | ||
665 | $topCandidate = $candidates[$c]; | ||
666 | } | ||
667 | } | ||
668 | |||
669 | /** | ||
670 | * If we still have no top candidate, just use the body as a last resort. | ||
671 | * We also have to copy the body node so it is something we can modify. | ||
672 | **/ | ||
673 | if ($topCandidate === null || strtoupper($topCandidate->tagName) == 'BODY') | ||
674 | { | ||
675 | $topCandidate = $this->dom->createElement('div'); | ||
676 | if ($page instanceof DOMDocument) { | ||
677 | if (!isset($page->documentElement)) { | ||
678 | // we don't have a body either? what a mess! :) | ||
679 | } else { | ||
680 | $topCandidate->innerHTML = $page->documentElement->innerHTML; | ||
681 | $page->documentElement->innerHTML = ''; | ||
682 | $page->documentElement->appendChild($topCandidate); | ||
683 | } | ||
684 | } else { | ||
685 | $topCandidate->innerHTML = $page->innerHTML; | ||
686 | $page->innerHTML = ''; | ||
687 | $page->appendChild($topCandidate); | ||
688 | } | ||
689 | $this->initializeNode($topCandidate); | ||
690 | } | ||
691 | |||
692 | /** | ||
693 | * Now that we have the top candidate, look through its siblings for content that might also be related. | ||
694 | * Things like preambles, content split by ads that we removed, etc. | ||
695 | **/ | ||
696 | $articleContent = $this->dom->createElement('div'); | ||
697 | $articleContent->setAttribute('id', 'readability-content'); | ||
698 | $siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2); | ||
699 | $siblingNodes = $topCandidate->parentNode->childNodes; | ||
700 | if (!isset($siblingNodes)) { | ||
701 | $siblingNodes = new stdClass; | ||
702 | $siblingNodes->length = 0; | ||
703 | } | ||
704 | |||
705 | for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++) | ||
706 | { | ||
707 | $siblingNode = $siblingNodes->item($s); | ||
708 | $append = false; | ||
709 | |||
710 | $this->dbg('Looking at sibling node: ' . $siblingNode->nodeName . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : '')); | ||
711 | |||
712 | //dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown')); | ||
713 | |||
714 | if ($siblingNode === $topCandidate) | ||
715 | // or if ($siblingNode->isSameNode($topCandidate)) | ||
716 | { | ||
717 | $append = true; | ||
718 | } | ||
719 | |||
720 | $contentBonus = 0; | ||
721 | /* Give a bonus if sibling nodes and top candidates have the example same classname */ | ||
722 | if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') { | ||
723 | $contentBonus += ((int)$topCandidate->getAttribute('readability')) * 0.2; | ||
724 | } | ||
725 | |||
726 | if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability') && (((int)$siblingNode->getAttribute('readability')) + $contentBonus) >= $siblingScoreThreshold) | ||
727 | { | ||
728 | $append = true; | ||
729 | } | ||
730 | |||
731 | if (strtoupper($siblingNode->nodeName) == 'P') { | ||
732 | $linkDensity = $this->getLinkDensity($siblingNode); | ||
733 | $nodeContent = $this->getInnerText($siblingNode); | ||
734 | $nodeLength = strlen($nodeContent); | ||
735 | |||
736 | if ($nodeLength > 80 && $linkDensity < 0.25) | ||
737 | { | ||
738 | $append = true; | ||
739 | } | ||
740 | else if ($nodeLength < 80 && $linkDensity === 0 && preg_match('/\.( |$)/', $nodeContent)) | ||
741 | { | ||
742 | $append = true; | ||
743 | } | ||
744 | } | ||
745 | |||
746 | if ($append) | ||
747 | { | ||
748 | $this->dbg('Appending node: ' . $siblingNode->nodeName); | ||
749 | |||
750 | $nodeToAppend = null; | ||
751 | $sibNodeName = strtoupper($siblingNode->nodeName); | ||
752 | if ($sibNodeName != 'DIV' && $sibNodeName != 'P') { | ||
753 | /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */ | ||
754 | |||
755 | $this->dbg('Altering siblingNode of ' . $sibNodeName . ' to div.'); | ||
756 | $nodeToAppend = $this->dom->createElement('div'); | ||
757 | try { | ||
758 | $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id')); | ||
759 | $nodeToAppend->innerHTML = $siblingNode->innerHTML; | ||
760 | } | ||
761 | catch(Exception $e) | ||
762 | { | ||
763 | $this->dbg('Could not alter siblingNode to div, reverting back to original.'); | ||
764 | $nodeToAppend = $siblingNode; | ||
765 | $s--; | ||
766 | $sl--; | ||
767 | } | ||
768 | } else { | ||
769 | $nodeToAppend = $siblingNode; | ||
770 | $s--; | ||
771 | $sl--; | ||
772 | } | ||
773 | |||
774 | /* To ensure a node does not interfere with readability styles, remove its classnames */ | ||
775 | $nodeToAppend->removeAttribute('class'); | ||
776 | |||
777 | /* Append sibling and subtract from our list because it removes the node when you append to another node */ | ||
778 | $articleContent->appendChild($nodeToAppend); | ||
779 | } | ||
780 | } | ||
781 | |||
782 | /** | ||
783 | * So we have all of the content that we need. Now we clean it up for presentation. | ||
784 | **/ | ||
785 | $this->prepArticle($articleContent); | ||
786 | |||
787 | /** | ||
788 | * Now that we've gone through the full algorithm, check to see if we got any meaningful content. | ||
789 | * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher | ||
790 | * likelihood of finding the content, and the sieve approach gives us a higher likelihood of | ||
791 | * finding the -right- content. | ||
792 | **/ | ||
793 | if (strlen($this->getInnerText($articleContent, false)) < 250) | ||
794 | { | ||
795 | // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7 | ||
796 | // in the meantime, we check and create an empty element if it's not there. | ||
797 | if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body'); | ||
798 | $this->body->innerHTML = $this->bodyCache; | ||
799 | |||
800 | if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { | ||
801 | $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); | ||
802 | return $this->grabArticle($this->body); | ||
803 | } | ||
804 | else if ($this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) { | ||
805 | $this->removeFlag(self::FLAG_WEIGHT_CLASSES); | ||
806 | return $this->grabArticle($this->body); | ||
807 | } | ||
808 | else if ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { | ||
809 | $this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY); | ||
810 | return $this->grabArticle($this->body); | ||
811 | } | ||
812 | else { | ||
813 | # this line was commented by Nicolas LÅ“uillet 8/8/13 due to some urls not parsed | ||
814 | // return false; | ||
815 | } | ||
816 | } | ||
817 | return $articleContent; | ||
818 | } | ||
819 | |||
820 | /** | ||
821 | * Remove script tags from document | ||
822 | * | ||
823 | * @param DOMElement | ||
824 | * @return void | ||
825 | */ | ||
826 | public function removeScripts($doc) { | ||
827 | $scripts = $doc->getElementsByTagName('script'); | ||
828 | for($i = $scripts->length-1; $i >= 0; $i--) | ||
829 | { | ||
830 | $scripts->item($i)->parentNode->removeChild($scripts->item($i)); | ||
831 | } | ||
832 | } | ||
833 | |||
834 | /** | ||
835 | * Get the inner text of a node. | ||
836 | * This also strips out any excess whitespace to be found. | ||
837 | * | ||
838 | * @param DOMElement $ | ||
839 | * @param boolean $normalizeSpaces (default: true) | ||
840 | * @return string | ||
841 | **/ | ||
842 | public function getInnerText($e, $normalizeSpaces=true) { | ||
843 | $textContent = ''; | ||
844 | |||
845 | if (!isset($e->textContent) || $e->textContent == '') { | ||
846 | return ''; | ||
847 | } | ||
848 | |||
849 | $textContent = trim($e->textContent); | ||
850 | |||
851 | if ($normalizeSpaces) { | ||
852 | return preg_replace($this->regexps['normalize'], ' ', $textContent); | ||
853 | } else { | ||
854 | return $textContent; | ||
855 | } | ||
856 | } | ||
857 | |||
858 | /** | ||
859 | * Get the number of times a string $s appears in the node $e. | ||
860 | * | ||
861 | * @param DOMElement $e | ||
862 | * @param string - what to count. Default is "," | ||
863 | * @return number (integer) | ||
864 | **/ | ||
865 | public function getCharCount($e, $s=',') { | ||
866 | return substr_count($this->getInnerText($e), $s); | ||
867 | } | ||
868 | |||
869 | /** | ||
870 | * Remove the style attribute on every $e and under. | ||
871 | * | ||
872 | * @param DOMElement $e | ||
873 | * @return void | ||
874 | */ | ||
875 | public function cleanStyles($e) { | ||
876 | if (!is_object($e)) return; | ||
877 | $elems = $e->getElementsByTagName('*'); | ||
878 | foreach ($elems as $elem) { | ||
879 | $elem->removeAttribute('style'); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | /** | ||
884 | * Get the density of links as a percentage of the content | ||
885 | * This is the amount of text that is inside a link divided by the total text in the node. | ||
886 | * | ||
887 | * @param DOMElement $e | ||
888 | * @return number (float) | ||
889 | */ | ||
890 | public function getLinkDensity($e) { | ||
891 | $links = $e->getElementsByTagName('a'); | ||
892 | $textLength = strlen($this->getInnerText($e)); | ||
893 | $linkLength = 0; | ||
894 | for ($i=0, $il=$links->length; $i < $il; $i++) | ||
895 | { | ||
896 | $linkLength += strlen($this->getInnerText($links->item($i))); | ||
897 | } | ||
898 | if ($textLength > 0) { | ||
899 | return $linkLength / $textLength; | ||
900 | } else { | ||
901 | return 0; | ||
902 | } | ||
903 | } | ||
904 | |||
905 | /** | ||
906 | * Get an elements class/id weight. Uses regular expressions to tell if this | ||
907 | * element looks good or bad. | ||
908 | * | ||
909 | * @param DOMElement $e | ||
910 | * @return number (Integer) | ||
911 | */ | ||
912 | public function getClassWeight($e) { | ||
913 | if(!$this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) { | ||
914 | return 0; | ||
915 | } | ||
916 | |||
917 | $weight = 0; | ||
918 | |||
919 | /* Look for a special classname */ | ||
920 | if ($e->hasAttribute('class') && $e->getAttribute('class') != '') | ||
921 | { | ||
922 | if (preg_match($this->regexps['negative'], $e->getAttribute('class'))) { | ||
923 | $weight -= 25; | ||
924 | } | ||
925 | if (preg_match($this->regexps['positive'], $e->getAttribute('class'))) { | ||
926 | $weight += 25; | ||
927 | } | ||
928 | } | ||
929 | |||
930 | /* Look for a special ID */ | ||
931 | if ($e->hasAttribute('id') && $e->getAttribute('id') != '') | ||
932 | { | ||
933 | if (preg_match($this->regexps['negative'], $e->getAttribute('id'))) { | ||
934 | $weight -= 25; | ||
935 | } | ||
936 | if (preg_match($this->regexps['positive'], $e->getAttribute('id'))) { | ||
937 | $weight += 25; | ||
938 | } | ||
939 | } | ||
940 | return $weight; | ||
941 | } | ||
942 | |||
943 | /** | ||
944 | * Remove extraneous break tags from a node. | ||
945 | * | ||
946 | * @param DOMElement $node | ||
947 | * @return void | ||
948 | */ | ||
949 | public function killBreaks($node) { | ||
950 | $html = $node->innerHTML; | ||
951 | $html = preg_replace($this->regexps['killBreaks'], '<br />', $html); | ||
952 | $node->innerHTML = $html; | ||
953 | } | ||
954 | |||
955 | /** | ||
956 | * Clean a node of all elements of type "tag". | ||
957 | * (Unless it's a youtube/vimeo video. People love movies.) | ||
958 | * | ||
959 | * Updated 2012-09-18 to preserve youtube/vimeo iframes | ||
960 | * | ||
961 | * @param DOMElement $e | ||
962 | * @param string $tag | ||
963 | * @return void | ||
964 | */ | ||
965 | public function clean($e, $tag) { | ||
966 | $targetList = $e->getElementsByTagName($tag); | ||
967 | $isEmbed = ($tag == 'iframe' || $tag == 'object' || $tag == 'embed'); | ||
968 | |||
969 | for ($y=$targetList->length-1; $y >= 0; $y--) { | ||
970 | /* Allow youtube and vimeo videos through as people usually want to see those. */ | ||
971 | if ($isEmbed) { | ||
972 | $attributeValues = ''; | ||
973 | for ($i=0, $il=$targetList->item($y)->attributes->length; $i < $il; $i++) { | ||
974 | $attributeValues .= $targetList->item($y)->attributes->item($i)->value . '|'; // DOMAttr? (TODO: test) | ||
975 | } | ||
976 | |||
977 | /* First, check the elements attributes to see if any of them contain youtube or vimeo */ | ||
978 | if (preg_match($this->regexps['video'], $attributeValues)) { | ||
979 | continue; | ||
980 | } | ||
981 | |||
982 | /* Then check the elements inside this element for the same. */ | ||
983 | if (preg_match($this->regexps['video'], $targetList->item($y)->innerHTML)) { | ||
984 | continue; | ||
985 | } | ||
986 | } | ||
987 | $targetList->item($y)->parentNode->removeChild($targetList->item($y)); | ||
988 | } | ||
989 | } | ||
990 | |||
991 | /** | ||
992 | * Clean an element of all tags of type "tag" if they look fishy. | ||
993 | * "Fishy" is an algorithm based on content length, classnames, | ||
994 | * link density, number of images & embeds, etc. | ||
995 | * | ||
996 | * @param DOMElement $e | ||
997 | * @param string $tag | ||
998 | * @return void | ||
999 | */ | ||
1000 | public function cleanConditionally($e, $tag) { | ||
1001 | if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { | ||
1002 | return; | ||
1003 | } | ||
1004 | |||
1005 | $tagsList = $e->getElementsByTagName($tag); | ||
1006 | $curTagsLength = $tagsList->length; | ||
1007 | |||
1008 | /** | ||
1009 | * Gather counts for other typical elements embedded within. | ||
1010 | * Traverse backwards so we can remove nodes at the same time without effecting the traversal. | ||
1011 | * | ||
1012 | * TODO: Consider taking into account original contentScore here. | ||
1013 | */ | ||
1014 | for ($i=$curTagsLength-1; $i >= 0; $i--) { | ||
1015 | $weight = $this->getClassWeight($tagsList->item($i)); | ||
1016 | $contentScore = ($tagsList->item($i)->hasAttribute('readability')) ? (int)$tagsList->item($i)->getAttribute('readability') : 0; | ||
1017 | |||
1018 | $this->dbg('Cleaning Conditionally ' . $tagsList->item($i)->tagName . ' (' . $tagsList->item($i)->getAttribute('class') . ':' . $tagsList->item($i)->getAttribute('id') . ')' . (($tagsList->item($i)->hasAttribute('readability')) ? (' with score ' . $tagsList->item($i)->getAttribute('readability')) : '')); | ||
1019 | |||
1020 | if ($weight + $contentScore < 0) { | ||
1021 | $tagsList->item($i)->parentNode->removeChild($tagsList->item($i)); | ||
1022 | } | ||
1023 | else if ( $this->getCharCount($tagsList->item($i), ',') < 10) { | ||
1024 | /** | ||
1025 | * If there are not very many commas, and the number of | ||
1026 | * non-paragraph elements is more than paragraphs or other ominous signs, remove the element. | ||
1027 | **/ | ||
1028 | $p = $tagsList->item($i)->getElementsByTagName('p')->length; | ||
1029 | $img = $tagsList->item($i)->getElementsByTagName('img')->length; | ||
1030 | $li = $tagsList->item($i)->getElementsByTagName('li')->length-100; | ||
1031 | $input = $tagsList->item($i)->getElementsByTagName('input')->length; | ||
1032 | $a = $tagsList->item($i)->getElementsByTagName('a')->length; | ||
1033 | |||
1034 | $embedCount = 0; | ||
1035 | $embeds = $tagsList->item($i)->getElementsByTagName('embed'); | ||
1036 | for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) { | ||
1037 | if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) { | ||
1038 | $embedCount++; | ||
1039 | } | ||
1040 | } | ||
1041 | $embeds = $tagsList->item($i)->getElementsByTagName('iframe'); | ||
1042 | for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) { | ||
1043 | if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) { | ||
1044 | $embedCount++; | ||
1045 | } | ||
1046 | } | ||
1047 | |||
1048 | $linkDensity = $this->getLinkDensity($tagsList->item($i)); | ||
1049 | $contentLength = strlen($this->getInnerText($tagsList->item($i))); | ||
1050 | $toRemove = false; | ||
1051 | |||
1052 | if ($this->lightClean) { | ||
1053 | $this->dbg('Light clean...'); | ||
1054 | if ( ($img > $p) && ($img > 4) ) { | ||
1055 | $this->dbg(' more than 4 images and more image elements than paragraph elements'); | ||
1056 | $toRemove = true; | ||
1057 | } else if ($li > $p && $tag != 'ul' && $tag != 'ol') { | ||
1058 | $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>'); | ||
1059 | $toRemove = true; | ||
1060 | } else if ( $input > floor($p/3) ) { | ||
1061 | $this->dbg(' too many <input> elements'); | ||
1062 | $toRemove = true; | ||
1063 | } else if ($contentLength < 25 && ($embedCount === 0 && ($img === 0 || $img > 2))) { | ||
1064 | $this->dbg(' content length less than 25 chars, 0 embeds and either 0 images or more than 2 images'); | ||
1065 | $toRemove = true; | ||
1066 | } else if($weight < 25 && $linkDensity > 0.2) { | ||
1067 | $this->dbg(' weight smaller than 25 and link density above 0.2'); | ||
1068 | $toRemove = true; | ||
1069 | } else if($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) { | ||
1070 | $this->dbg(' more than 2 links and weight above 25 but link density greater than 0.5'); | ||
1071 | $toRemove = true; | ||
1072 | } else if($embedCount > 3) { | ||
1073 | $this->dbg(' more than 3 embeds'); | ||
1074 | $toRemove = true; | ||
1075 | } | ||
1076 | } else { | ||
1077 | $this->dbg('Standard clean...'); | ||
1078 | if ( $img > $p ) { | ||
1079 | $this->dbg(' more image elements than paragraph elements'); | ||
1080 | $toRemove = true; | ||
1081 | } else if ($li > $p && $tag != 'ul' && $tag != 'ol') { | ||
1082 | $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>'); | ||
1083 | $toRemove = true; | ||
1084 | } else if ( $input > floor($p/3) ) { | ||
1085 | $this->dbg(' too many <input> elements'); | ||
1086 | $toRemove = true; | ||
1087 | } else if ($contentLength < 25 && ($img === 0 || $img > 2) ) { | ||
1088 | $this->dbg(' content length less than 25 chars and 0 images, or more than 2 images'); | ||
1089 | $toRemove = true; | ||
1090 | } else if($weight < 25 && $linkDensity > 0.2) { | ||
1091 | $this->dbg(' weight smaller than 25 and link density above 0.2'); | ||
1092 | $toRemove = true; | ||
1093 | } else if($weight >= 25 && $linkDensity > 0.5) { | ||
1094 | $this->dbg(' weight above 25 but link density greater than 0.5'); | ||
1095 | $toRemove = true; | ||
1096 | } else if(($embedCount == 1 && $contentLength < 75) || $embedCount > 1) { | ||
1097 | $this->dbg(' 1 embed and content length smaller than 75 chars, or more than one embed'); | ||
1098 | $toRemove = true; | ||
1099 | } | ||
1100 | } | ||
1101 | |||
1102 | if ($toRemove) { | ||
1103 | //$this->dbg('Removing: '.$tagsList->item($i)->innerHTML); | ||
1104 | $tagsList->item($i)->parentNode->removeChild($tagsList->item($i)); | ||
1105 | } | ||
1106 | } | ||
1107 | } | ||
1108 | } | ||
1109 | |||
1110 | /** | ||
1111 | * Clean out spurious headers from an Element. Checks things like classnames and link density. | ||
1112 | * | ||
1113 | * @param DOMElement $e | ||
1114 | * @return void | ||
1115 | */ | ||
1116 | public function cleanHeaders($e) { | ||
1117 | for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) { | ||
1118 | $headers = $e->getElementsByTagName('h' . $headerIndex); | ||
1119 | for ($i=$headers->length-1; $i >=0; $i--) { | ||
1120 | if ($this->getClassWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) { | ||
1121 | $headers->item($i)->parentNode->removeChild($headers->item($i)); | ||
1122 | } | ||
1123 | } | ||
1124 | } | ||
1125 | } | ||
1126 | |||
1127 | public function flagIsActive($flag) { | ||
1128 | return ($this->flags & $flag) > 0; | ||
1129 | } | ||
1130 | |||
1131 | public function addFlag($flag) { | ||
1132 | $this->flags = $this->flags | $flag; | ||
1133 | } | ||
1134 | |||
1135 | public function removeFlag($flag) { | ||
1136 | $this->flags = $this->flags & ~$flag; | ||
1137 | } | ||
1138 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/Session.class.php b/inc/3rdparty/Session.class.php index df913a06..32624559 100644 --- a/inc/3rdparty/Session.class.php +++ b/inc/3rdparty/Session.class.php | |||
@@ -32,6 +32,8 @@ class Session | |||
32 | // If the user does not access any page within this time, | 32 | // If the user does not access any page within this time, |
33 | // his/her session is considered expired (3600 sec. = 1 hour) | 33 | // his/her session is considered expired (3600 sec. = 1 hour) |
34 | public static $inactivityTimeout = 3600; | 34 | public static $inactivityTimeout = 3600; |
35 | // Extra timeout for long sessions (if enabled) (82800 sec. = 23 hours) | ||
36 | public static $longSessionTimeout = 82800; | ||
35 | // If you get disconnected often or if your IP address changes often. | 37 | // If you get disconnected often or if your IP address changes often. |
36 | // Let you disable session cookie hijacking protection | 38 | // Let you disable session cookie hijacking protection |
37 | public static $disableSessionProtection = false; | 39 | public static $disableSessionProtection = false; |
@@ -106,6 +108,7 @@ class Session | |||
106 | $password, | 108 | $password, |
107 | $loginTest, | 109 | $loginTest, |
108 | $passwordTest, | 110 | $passwordTest, |
111 | $longlastingsession, | ||
109 | $pValues = array()) | 112 | $pValues = array()) |
110 | { | 113 | { |
111 | self::banInit(); | 114 | self::banInit(); |
@@ -118,7 +121,11 @@ class Session | |||
118 | $_SESSION['username'] = $login; | 121 | $_SESSION['username'] = $login; |
119 | // Set session expiration. | 122 | // Set session expiration. |
120 | $_SESSION['expires_on'] = time() + self::$inactivityTimeout; | 123 | $_SESSION['expires_on'] = time() + self::$inactivityTimeout; |
121 | 124 | if ($longlastingsession) { | |
125 | $_SESSION['longlastingsession'] = self::$longSessionTimeout; | ||
126 | $_SESSION['expires_on'] += $_SESSION['longlastingsession']; | ||
127 | } | ||
128 | |||
122 | foreach ($pValues as $key => $value) { | 129 | foreach ($pValues as $key => $value) { |
123 | $_SESSION[$key] = $value; | 130 | $_SESSION[$key] = $value; |
124 | } | 131 | } |
@@ -136,7 +143,7 @@ class Session | |||
136 | */ | 143 | */ |
137 | public static function logout() | 144 | public static function logout() |
138 | { | 145 | { |
139 | unset($_SESSION['uid'],$_SESSION['ip'],$_SESSION['expires_on'],$_SESSION['tokens'], $_SESSION['login'], $_SESSION['pass'], $_SESSION['poche_user']); | 146 | unset($_SESSION['uid'],$_SESSION['ip'],$_SESSION['expires_on'],$_SESSION['tokens'], $_SESSION['login'], $_SESSION['pass'], $_SESSION['longlastingsession'], $_SESSION['poche_user']); |
140 | } | 147 | } |
141 | 148 | ||
142 | /** | 149 | /** |
diff --git a/inc/3rdparty/config.php b/inc/3rdparty/config.php new file mode 100644 index 00000000..61fc0d0e --- /dev/null +++ b/inc/3rdparty/config.php | |||
@@ -0,0 +1,405 @@ | |||
1 | <?php | ||
2 | /* Full-Text RSS config */ | ||
3 | |||
4 | // ......IMPORTANT...................................... | ||
5 | // ..................................................... | ||
6 | // Please do not change this file (config.php) directly. | ||
7 | // Save a copy as custom_config.php and make your | ||
8 | // changes to that instead. It will automatically | ||
9 | // override anything in config.php. Because config.php | ||
10 | // always gets loaded anyway, you can simply specify | ||
11 | // options you'd like to override in custom_config.php. | ||
12 | // ..................................................... | ||
13 | |||
14 | // Create config object | ||
15 | if (!isset($options)) $options = new stdClass(); | ||
16 | |||
17 | // Enable service | ||
18 | // ---------------------- | ||
19 | // Set this to false if you want to disable the service. | ||
20 | // If set to false, no feed is produced and users will | ||
21 | // be told that the service is disabled. | ||
22 | $options->enabled = true; | ||
23 | |||
24 | // Debug mode | ||
25 | // ---------------------- | ||
26 | // Enable or disable debugging. When enabled debugging works by passing | ||
27 | // &debug to the makefulltextfeed.php querystring. | ||
28 | // Valid values: | ||
29 | // true or 'user' (default) - let user decide | ||
30 | // 'admin' - debug works only for logged in admin users | ||
31 | // false - disabled | ||
32 | $options->debug = true; | ||
33 | |||
34 | // Default entries (without access key) | ||
35 | // ---------------------- | ||
36 | // The number of feed items to process when no API key is supplied | ||
37 | // and no &max=x value is supplied in the querystring. | ||
38 | $options->default_entries = 5; | ||
39 | |||
40 | // Max entries (without access key) | ||
41 | // ---------------------- | ||
42 | // The maximum number of feed items to process when no access key is supplied. | ||
43 | // This limits the user-supplied &max=x value. For example, if the user | ||
44 | // asks for 20 items to be processed (&max=20), if max_entries is set to | ||
45 | // 10, only 10 will be processed. | ||
46 | $options->max_entries = 10; | ||
47 | |||
48 | // Rewrite relative URLs | ||
49 | // ---------------------- | ||
50 | // With this enabled relative URLs found in the extracted content | ||
51 | // block are automatically rewritten as absolute URLs. | ||
52 | $options->rewrite_relative_urls = true; | ||
53 | |||
54 | // Exclude items if extraction fails | ||
55 | // --------------------------------- | ||
56 | // Excludes items from the resulting feed | ||
57 | // if we cannot extract any content from the | ||
58 | // item URL. | ||
59 | // Possible values... | ||
60 | // Enable: true | ||
61 | // Disable: false (default) | ||
62 | // User decides: 'user' (this option will appear on the form) | ||
63 | $options->exclude_items_on_fail = 'user'; | ||
64 | |||
65 | // Enable multi-page support | ||
66 | // ------------------------- | ||
67 | // If enabled, we will try to follow next page links on multi-page articles. | ||
68 | // Currently this only happens for sites where next_page_link has been defined | ||
69 | // in a site config file. | ||
70 | $options->multipage = true; | ||
71 | |||
72 | // Enable caching | ||
73 | // ---------------------- | ||
74 | // Enable this if you'd like to cache results | ||
75 | // for 10 minutes. Cache files are written to disk (in cache/ subfolders | ||
76 | // - which must be writable). | ||
77 | // Initially it's best to keep this disabled to make sure everything works | ||
78 | // as expected. If you have APC enabled, please also see smart_cache in the | ||
79 | // advanced section. | ||
80 | $options->caching = false; | ||
81 | |||
82 | // Cache directory | ||
83 | // ---------------------- | ||
84 | // Only used if caching is true | ||
85 | $options->cache_dir = dirname(__FILE__).'/cache'; | ||
86 | |||
87 | // Message to prepend (without access key) | ||
88 | // ---------------------- | ||
89 | // HTML to insert at the beginning of each feed item when no access key is supplied. | ||
90 | // Substitution tags: | ||
91 | // {url} - Feed item URL | ||
92 | // {effective-url} - Feed item URL after we've followed all redirects | ||
93 | $options->message_to_prepend = ''; | ||
94 | |||
95 | // Message to append (without access key) | ||
96 | // ---------------------- | ||
97 | // HTML to insert at the end of each feed item when no access key is supplied. | ||
98 | // Substitution tags: | ||
99 | // {url} - Feed item URL | ||
100 | // {effective-url} - Feed item URL after we've followed all redirects | ||
101 | $options->message_to_append = ''; | ||
102 | |||
103 | // Error message when content extraction fails (without access key) | ||
104 | // ---------------------- | ||
105 | $options->error_message = '[unable to retrieve full-text content]'; | ||
106 | |||
107 | // Keep enclosure in feed items | ||
108 | // If enabled, we will try to preserve enclosures if present. | ||
109 | // ---------------------- | ||
110 | $options->keep_enclosures = true; | ||
111 | |||
112 | // Detect language | ||
113 | // --------------- | ||
114 | // Should we try and find/guess the language of the article being processed? | ||
115 | // Values will be placed inside the <dc:language> element inside each <item> element | ||
116 | // Possible values: | ||
117 | // * Ignore language: 0 | ||
118 | // * Use article/feed metadata (e.g. HTML lang attribute): 1 (default) | ||
119 | // * As above, but guess if not present: 2 | ||
120 | // * Always guess: 3 | ||
121 | // * User decides: 'user' (value of 0-3 can be passed in querystring: e.g. &l=2) | ||
122 | $options->detect_language = 1; | ||
123 | |||
124 | // Registration key | ||
125 | // --------------- | ||
126 | // The registration key is optional. It is not required to use Full-Text RSS, | ||
127 | // and does not affect the normal operation of Full-Text RSS. It is currently | ||
128 | // only used on admin pages which help you update site patterns with the | ||
129 | // latest version offered by FiveFilters.org. For these admin-related | ||
130 | // tasks to complete, we will require a valid registration key. | ||
131 | // If you would like one, you can purchase the latest version of Full-Text RSS | ||
132 | // at http://fivefilters.org/content-only/ | ||
133 | // Your registration key will automatically be sent in the confirmation email. | ||
134 | // Once you have it, simply copy and paste it here. | ||
135 | $options->registration_key = ''; | ||
136 | |||
137 | ///////////////////////////////////////////////// | ||
138 | /// RESTRICT ACCESS ///////////////////////////// | ||
139 | ///////////////////////////////////////////////// | ||
140 | |||
141 | // Admin credentials | ||
142 | // ---------------------- | ||
143 | // Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials. | ||
144 | // To use these pages, enter a password here and you'll be prompted for it when you try to access those pages. | ||
145 | // If no password or username is set, pages requiring admin privelages will be inaccessible. | ||
146 | // The default username is 'admin'. | ||
147 | // If overriding with an environment variable, separate username and password with a colon, e.g.: | ||
148 | // ftr_admin_credentials: admin:my-secret-password | ||
149 | // Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password'); | ||
150 | $options->admin_credentials = array('username'=>'admin', 'password'=>'admin'); | ||
151 | |||
152 | // URLs to allow | ||
153 | // ---------------------- | ||
154 | // List of URLs (or parts of a URL) which the service will accept. | ||
155 | // If the list is empty, all URLs (except those specified in the blocked list below) | ||
156 | // will be permitted. | ||
157 | // Empty: array(); | ||
158 | // Non-empty example: array('example.com', 'anothersite.org'); | ||
159 | $options->allowed_urls = array(); | ||
160 | |||
161 | // URLs to block | ||
162 | // ---------------------- | ||
163 | // List of URLs (or parts of a URL) which the service will not accept. | ||
164 | // Note: this list is ignored if allowed_urls is not empty | ||
165 | $options->blocked_urls = array(); | ||
166 | |||
167 | // Key holder(s) only? | ||
168 | // ---------------------- | ||
169 | // Set this to true if you want to restrict access only to | ||
170 | // those with a key (see below to specify key(s)). | ||
171 | // If set to true, no feed is produced unless a valid | ||
172 | // key is provided. | ||
173 | $options->key_required = false; | ||
174 | |||
175 | // Favour item titles in feed | ||
176 | // ---------------------- | ||
177 | // By default, when processing feeds, we assume item titles in the feed | ||
178 | // have not been truncated. So after processing web pages, the extracted titles | ||
179 | // are not used in the generated feed. If you prefer to have extracted titles in | ||
180 | // the feed you can either set this to false, in which case we will always favour | ||
181 | // extracted titles. Alternatively, if set to 'user' (default) we'll use the | ||
182 | // extracted title if you pass '&use_extracted_title' in the querystring. | ||
183 | // Possible values: | ||
184 | // * Favour feed titles: true | ||
185 | // * Favour extracted titles: false | ||
186 | // * Favour feed titles with user override: 'user' (default) | ||
187 | // Note: this has no effect when the input URL is to a web page - in these cases | ||
188 | // we always use the extracted title in the generated feed. | ||
189 | $options->favour_feed_titles = 'user'; | ||
190 | |||
191 | // Access keys (password protected access) | ||
192 | // ------------------------------------ | ||
193 | // NOTE: You do not need an API key from fivefilters.org to run your own | ||
194 | // copy of the code. This is here if you'd like to restrict access to | ||
195 | // _your_ copy. | ||
196 | // Keys let you group users - those with a key and those without - and | ||
197 | // restrict access to the service to those without a key. | ||
198 | // If you want everyone to access the service in the same way, you can | ||
199 | // leave the array below empty and ignore the access key options further down. | ||
200 | // The options further down let you control how the service should behave | ||
201 | // in each mode. | ||
202 | // Note: Explicitly including the index number (1 and 2 in the examples below) | ||
203 | // is highly recommended (when generating feeds, we encode the key and | ||
204 | // refer to it by index number and hash). | ||
205 | $options->api_keys = array(); | ||
206 | // Example: | ||
207 | // $options->api_keys[1] = 'secret-key-1'; | ||
208 | // $options->api_keys[2] = 'secret-key-2'; | ||
209 | |||
210 | // Default entries (with access key) | ||
211 | // ---------------------- | ||
212 | // The number of feed items to process when a valid access key is supplied. | ||
213 | $options->default_entries_with_key = 5; | ||
214 | |||
215 | // Max entries (with access key) | ||
216 | // ---------------------- | ||
217 | // The maximum number of feed items to process when a valid access key is supplied. | ||
218 | $options->max_entries_with_key = 10; | ||
219 | |||
220 | ///////////////////////////////////////////////// | ||
221 | /// ADVANCED OPTIONS //////////////////////////// | ||
222 | ///////////////////////////////////////////////// | ||
223 | |||
224 | // Enable XSS filter? | ||
225 | // ---------------------- | ||
226 | // We have not enabled this by default because we assume the majority of | ||
227 | // our users do not display the HTML retrieved by Full-Text RSS | ||
228 | // in a web page without further processing. If you subscribe to our generated | ||
229 | // feeds in your news reader application, it should, if it's good software, already | ||
230 | // filter the resulting HTML for XSS attacks, making it redundant for | ||
231 | // Full-Text RSS do the same. Similarly with frameworks/CMS which display | ||
232 | // feed content - the content should be treated like any other user-submitted content. | ||
233 | // | ||
234 | // If you are writing an application yourself which is processing feeds generated by | ||
235 | // Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks | ||
236 | // or enable this option. This might be useful if you are processing our generated | ||
237 | // feeds with JavaScript on the client side - although there's client side xss | ||
238 | // filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer | ||
239 | // | ||
240 | // If enabled, we'll pass retrieved HTML content through htmLawed with | ||
241 | // safe flag on and style attributes denied, see | ||
242 | // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6 | ||
243 | // Note: if enabled this will also remove certain elements you may want to preserve, such as iframes. | ||
244 | // | ||
245 | // Valid values: | ||
246 | // true - enabled, all content will be filtered | ||
247 | // 'user' (default) - user must pass &xss in makefulltextfeed.php querystring to enable | ||
248 | // false - disabled | ||
249 | $options->xss_filter = 'user'; | ||
250 | |||
251 | // Allowed parsers | ||
252 | // ---------------------- | ||
253 | // Full-Text RSS attempts to use PHP's libxml extension to process HTML. | ||
254 | // While fast, on some sites it may not always produce good results. | ||
255 | // For these sites, you can specify an alternative HTML parser: | ||
256 | // parser: html5lib | ||
257 | // The html5lib parser is bundled with Full-Text RSS. | ||
258 | // see http://code.google.com/p/html5lib/ | ||
259 | // | ||
260 | // To disable HTML parsing with html5lib, you can remove it from this list. | ||
261 | // By default we allow both: libxml and html5lib. | ||
262 | $options->allowed_parsers = array('libxml', 'html5lib'); | ||
263 | //$options->allowed_parsers = array('libxml'); //disable html5lib - forcing libxml in all cases | ||
264 | |||
265 | // Enable Cross-Origin Resource Sharing (CORS) | ||
266 | // ---------------------- | ||
267 | // If enabled we'll send the following HTTP header | ||
268 | // Access-Control-Allow-Origin: * | ||
269 | // see http://en.wikipedia.org/wiki/Cross-origin_resource_sharing | ||
270 | $options->cors = false; | ||
271 | |||
272 | // Use APC user cache? | ||
273 | // ---------------------- | ||
274 | // If enabled we will store site config files (when requested | ||
275 | // for the first time) in APC's user cache. Keys prefixed with 'sc.' | ||
276 | // This improves performance by reducing disk access. | ||
277 | // Note: this has no effect if APC is unavailable on your server. | ||
278 | $options->apc = true; | ||
279 | |||
280 | // Smart cache (experimental) | ||
281 | // ---------------------- | ||
282 | // With this option enabled we will not cache to disk immediately. | ||
283 | // We will store the cache key in APC and if it's requested again | ||
284 | // we will cache results to disk. Keys prefixed with 'cache.' | ||
285 | // This improves performance by reducing disk access. | ||
286 | // Note: this has no effect if APC is disabled or unavailable on your server, | ||
287 | // or if you have caching disabled. | ||
288 | $options->smart_cache = true; | ||
289 | |||
290 | // Fingerprints | ||
291 | // ---------------------- | ||
292 | // key is fingerprint (fragment to find in HTML) | ||
293 | // value is host name to use for site config lookup if fingerprint matches | ||
294 | $options->fingerprints = array( | ||
295 | // Posterous | ||
296 | '<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true), | ||
297 | // Blogger | ||
298 | '<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true), | ||
299 | '<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true), | ||
300 | // WordPress (hosted) | ||
301 | // '<meta name="generator" content="WordPress.com"' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true), | ||
302 | // WordPress (self-hosted and hosted) | ||
303 | '<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true) | ||
304 | ); | ||
305 | |||
306 | // User Agent strings - mapping domain names | ||
307 | // ---------------------- | ||
308 | // e.g. $options->user_agents = array('example.org' => 'PHP/5.2'); | ||
309 | $options->user_agents = array( 'lifehacker.com' => 'PHP/5.2', | ||
310 | 'gawker.com' => 'PHP/5.2', | ||
311 | 'deadspin.com' => 'PHP/5.2', | ||
312 | 'kotaku.com' => 'PHP/5.2', | ||
313 | 'jezebel.com' => 'PHP/5.2', | ||
314 | 'io9.com' => 'PHP/5.2', | ||
315 | 'jalopnik.com' => 'PHP/5.2', | ||
316 | 'gizmodo.com' => 'PHP/5.2', | ||
317 | '.wikipedia.org' => 'Mozilla/5.2', | ||
318 | '.fok.nl' => 'Googlebot/2.1', | ||
319 | 'getpocket.com' => 'PHP/5.2' | ||
320 | ); | ||
321 | |||
322 | // URL Rewriting | ||
323 | // ---------------------- | ||
324 | // Currently allows simple string replace of URLs. | ||
325 | // Useful for rewriting certain URLs to point to a single page | ||
326 | // or HTML view. Although using the single_page_link site config | ||
327 | // instruction is the preferred way to do this, sometimes, as | ||
328 | // with Google Docs URLs, it's not possible. | ||
329 | // Note: this might move to the site config file at some point. | ||
330 | $options->rewrite_url = array( | ||
331 | // Rewrite public Google Docs URLs to point to HTML view: | ||
332 | // if a URL contains docs.google.com, replace /Doc? with /View? | ||
333 | 'docs.google.com' => array('/Doc?' => '/View?'), | ||
334 | 'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'), | ||
335 | '.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org'), | ||
336 | 'm.vanityfair.com' => array('m.vanityfair.com' => 'www.vanityfair.com') | ||
337 | ); | ||
338 | |||
339 | // Content-Type exceptions | ||
340 | // ----------------------- | ||
341 | // Here you can define different actions based | ||
342 | // on the Content-Type header returned by server. | ||
343 | // MIME type as key, action as value. | ||
344 | // Valid actions: | ||
345 | // * 'exclude' - exclude this item from the result | ||
346 | // * 'link' - create HTML link to the item | ||
347 | $options->content_type_exc = array( | ||
348 | 'application/pdf' => array('action'=>'link', 'name'=>'PDF'), | ||
349 | 'image' => array('action'=>'link', 'name'=>'Image'), | ||
350 | 'audio' => array('action'=>'link', 'name'=>'Audio'), | ||
351 | 'video' => array('action'=>'link', 'name'=>'Video') | ||
352 | ); | ||
353 | |||
354 | // Cache directory level | ||
355 | // ---------------------- | ||
356 | // Spread cache files over different directories (only used if caching is enabled). | ||
357 | // Used to prevent large number of files in one directory. | ||
358 | // This corresponds to Zend_Cache's hashed_directory_level | ||
359 | // see http://framework.zend.com/manual/en/zend.cache.backends.html | ||
360 | // It's best not to change this if you're unsure. | ||
361 | $options->cache_directory_level = 0; | ||
362 | |||
363 | // Cache cleanup | ||
364 | // ------------- | ||
365 | // 0 = script will not clean cache (rename cachecleanup.php and use it for scheduled (e.g. cron) cache cleanup) | ||
366 | // 1 = clean cache everytime the script runs (not recommended) | ||
367 | // 100 = clean cache roughly once every 100 script runs | ||
368 | // x = clean cache roughly once every x script runs | ||
369 | // ...you get the idea :) | ||
370 | $options->cache_cleanup = 100; | ||
371 | |||
372 | ///////////////////////////////////////////////// | ||
373 | /// DO NOT CHANGE ANYTHING BELOW THIS /////////// | ||
374 | ///////////////////////////////////////////////// | ||
375 | |||
376 | if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1'); | ||
377 | |||
378 | if (basename(__FILE__) == 'config.php') { | ||
379 | if (file_exists(dirname(__FILE__).'/custom_config.php')) { | ||
380 | require_once dirname(__FILE__).'/custom_config.php'; | ||
381 | } | ||
382 | |||
383 | // check for environment variables - often used on cloud platforms | ||
384 | // environment variables should be prefixed with 'ftr_', e.g. | ||
385 | // ftr_max_entries: 1 | ||
386 | // will set the max_entries value to 1. | ||
387 | foreach ($options as $_key=>&$_val) { | ||
388 | $_key = "ftr_$_key"; | ||
389 | if (($_env = getenv($_key)) !== false) { | ||
390 | if (is_array($_val)) { | ||
391 | if ($_key === 'ftr_admin_credentials') { | ||
392 | $_val = array_combine(array('username', 'password'), array_map('trim', explode(':', $_env, 2))); | ||
393 | if ($_val === false) $_val = array('username'=>'admin', 'password'=>''); | ||
394 | } | ||
395 | } elseif ($_env === 'true' || $_env === 'false') { | ||
396 | $_val = ($_env === 'true'); | ||
397 | } elseif (is_numeric($_env)) { | ||
398 | $_val = (int)$_env; | ||
399 | } else { // string | ||
400 | $_val = $_env; | ||
401 | } | ||
402 | } | ||
403 | } | ||
404 | unset($_key, $_val, $_env); | ||
405 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/content-extractor/SiteConfig.php b/inc/3rdparty/content-extractor/SiteConfig.php deleted file mode 100644 index 089e10c6..00000000 --- a/inc/3rdparty/content-extractor/SiteConfig.php +++ /dev/null | |||
@@ -1,184 +0,0 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Site Config | ||
4 | * | ||
5 | * Each instance of this class should hold extraction patterns and other directives | ||
6 | * for a website. See ContentExtractor class to see how it's used. | ||
7 | * | ||
8 | * @version 0.6 | ||
9 | * @date 2011-10-30 | ||
10 | * @author Keyvan Minoukadeh | ||
11 | * @copyright 2011 Keyvan Minoukadeh | ||
12 | * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 | ||
13 | */ | ||
14 | |||
15 | class SiteConfig | ||
16 | { | ||
17 | // Use first matching element as title (0 or more xpath expressions) | ||
18 | public $title = array(); | ||
19 | |||
20 | // Use first matching element as body (0 or more xpath expressions) | ||
21 | public $body = array(); | ||
22 | |||
23 | // Use first matching element as author (0 or more xpath expressions) | ||
24 | public $author = array(); | ||
25 | |||
26 | // Use first matching element as date (0 or more xpath expressions) | ||
27 | public $date = array(); | ||
28 | |||
29 | // Strip elements matching these xpath expressions (0 or more) | ||
30 | public $strip = array(); | ||
31 | |||
32 | // Strip elements which contain these strings (0 or more) in the id or class attribute | ||
33 | public $strip_id_or_class = array(); | ||
34 | |||
35 | // Strip images which contain these strings (0 or more) in the src attribute | ||
36 | public $strip_image_src = array(); | ||
37 | |||
38 | // Additional HTTP headers to send | ||
39 | // NOT YET USED | ||
40 | public $http_header = array(); | ||
41 | |||
42 | // Process HTML with tidy before creating DOM | ||
43 | public $tidy = true; | ||
44 | |||
45 | // Autodetect title/body if xpath expressions fail to produce results. | ||
46 | // Note that this applies to title and body separately, ie. | ||
47 | // * if we get a body match but no title match, this option will determine whether we autodetect title | ||
48 | // * if neither match, this determines whether we autodetect title and body. | ||
49 | // Also note that this only applies when there is at least one xpath expression in title or body, ie. | ||
50 | // * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected) | ||
51 | // * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results. | ||
52 | // Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content). | ||
53 | public $autodetect_on_failure = true; | ||
54 | |||
55 | // Clean up content block - attempt to remove elements that appear to be superfluous | ||
56 | public $prune = true; | ||
57 | |||
58 | // Test URL - if present, can be used to test the config above | ||
59 | public $test_url = null; | ||
60 | |||
61 | // Single-page link - should identify a link element or URL pointing to the page holding the entire article | ||
62 | // This is useful for sites which split their articles across multiple pages. Links to such pages tend to | ||
63 | // display the first page with links to the other pages at the bottom. Often there is also a link to a page | ||
64 | // which displays the entire article on one page (e.g. 'print view'). | ||
65 | // This should be an XPath expression identifying the link to that page. If present and we find a match, | ||
66 | // we will retrieve that page and the rest of the options in this config will be applied to the new page. | ||
67 | public $single_page_link = array(); | ||
68 | |||
69 | // Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed | ||
70 | public $single_page_link_in_feed = array(); | ||
71 | |||
72 | // TODO: which parser to use for turning raw HTML into a DOMDocument | ||
73 | public $parser = 'libxml'; | ||
74 | |||
75 | // String replacement to be made on HTML before processing begins | ||
76 | public $replace_string = array(); | ||
77 | |||
78 | // the options below cannot be set in the config files which this class represents | ||
79 | |||
80 | public static $debug = false; | ||
81 | protected static $config_path; | ||
82 | protected static $config_path_fallback; | ||
83 | protected static $config_cache = array(); | ||
84 | const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/'; | ||
85 | |||
86 | protected static function debug($msg) { | ||
87 | if (self::$debug) { | ||
88 | $mem = round(memory_get_usage()/1024, 2); | ||
89 | $memPeak = round(memory_get_peak_usage()/1024, 2); | ||
90 | echo '* ',$msg; | ||
91 | echo ' - mem used: ',$mem," (peak: $memPeak)\n"; | ||
92 | ob_flush(); | ||
93 | flush(); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | public static function set_config_path($path, $fallback=null) { | ||
98 | self::$config_path = $path; | ||
99 | self::$config_path_fallback = $fallback; | ||
100 | } | ||
101 | |||
102 | public static function add_to_cache($host, SiteConfig $config) { | ||
103 | $host = strtolower($host); | ||
104 | self::$config_cache[$host] = $config; | ||
105 | } | ||
106 | |||
107 | // returns SiteConfig instance if an appropriate one is found, false otherwise | ||
108 | public static function build($host) { | ||
109 | $host = strtolower($host); | ||
110 | if (substr($host, 0, 4) == 'www.') $host = substr($host, 4); | ||
111 | if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, $host)) return false; | ||
112 | // check for site configuration | ||
113 | $try = array($host); | ||
114 | $split = explode('.', $host); | ||
115 | if (count($split) > 1) { | ||
116 | array_shift($split); | ||
117 | $try[] = '.'.implode('.', $split); | ||
118 | } | ||
119 | foreach ($try as $h) { | ||
120 | if (array_key_exists($h, self::$config_cache)) { | ||
121 | self::debug("... cached ($h)"); | ||
122 | return self::$config_cache[$h]; | ||
123 | } elseif (file_exists(self::$config_path."/$h.txt")) { | ||
124 | self::debug("... from file ($h)"); | ||
125 | $file = self::$config_path."/$h.txt"; | ||
126 | break; | ||
127 | } | ||
128 | } | ||
129 | if (!isset($file)) { | ||
130 | if (isset(self::$config_path_fallback)) { | ||
131 | self::debug("... trying fallback ($host)"); | ||
132 | foreach ($try as $h) { | ||
133 | if (file_exists(self::$config_path_fallback."/$h.txt")) { | ||
134 | self::debug("... from fallback file ($h)"); | ||
135 | $file = self::$config_path_fallback."/$h.txt"; | ||
136 | break; | ||
137 | } | ||
138 | } | ||
139 | if (!isset($file)) { | ||
140 | self::debug("... no match in fallback directory"); | ||
141 | return false; | ||
142 | } | ||
143 | } else { | ||
144 | self::debug("... no match ($host)"); | ||
145 | return false; | ||
146 | } | ||
147 | } | ||
148 | $config_file = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); | ||
149 | if (!$config_file || !is_array($config_file)) return false; | ||
150 | $config = new SiteConfig(); | ||
151 | foreach ($config_file as $line) { | ||
152 | $line = trim($line); | ||
153 | |||
154 | // skip comments, empty lines | ||
155 | if ($line == '' || $line[0] == '#') continue; | ||
156 | |||
157 | // get command | ||
158 | $command = explode(':', $line, 2); | ||
159 | // if there's no colon ':', skip this line | ||
160 | if (count($command) != 2) continue; | ||
161 | $val = trim($command[1]); | ||
162 | $command = trim($command[0]); | ||
163 | if ($command == '' || $val == '') continue; | ||
164 | |||
165 | // check for commands where we accept multiple statements | ||
166 | if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'http_header'))) { | ||
167 | array_push($config->$command, $val); | ||
168 | // check for single statement commands that evaluate to true or false | ||
169 | } elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) { | ||
170 | $config->$command = ($val == 'yes'); | ||
171 | // check for single statement commands stored as strings | ||
172 | } elseif (in_array($command, array('test_url', 'parser'))) { | ||
173 | $config->$command = $val; | ||
174 | } elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) { | ||
175 | if (in_array($match[1], array('replace_string'))) { | ||
176 | $command = $match[1]; | ||
177 | array_push($config->$command, array($match[2], $val)); | ||
178 | } | ||
179 | } | ||
180 | } | ||
181 | return $config; | ||
182 | } | ||
183 | } | ||
184 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/feedwriter/DummySingleItemFeed.php b/inc/3rdparty/feedwriter/DummySingleItemFeed.php deleted file mode 100644 index 5f2f871e..00000000 --- a/inc/3rdparty/feedwriter/DummySingleItemFeed.php +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | <?php | ||
2 | // create single item dummy feed object | ||
3 | class DummySingleItemFeed { | ||
4 | public $item; | ||
5 | function __construct($url) { $this->item = new DummySingleItem($url); } | ||
6 | public function get_title() { return ''; } | ||
7 | public function get_description() { return 'Content extracted from '.$this->item->url; } | ||
8 | public function get_link() { return $this->item->url; } | ||
9 | public function get_language() { return false; } | ||
10 | public function get_image_url() { return false; } | ||
11 | public function get_items($start=0, $max=1) { return array(0=>$this->item); } | ||
12 | } | ||
13 | class DummySingleItem { | ||
14 | public $url; | ||
15 | function __construct($url) { $this->url = $url; } | ||
16 | public function get_permalink() { return $this->url; } | ||
17 | public function get_title() { return ''; } | ||
18 | public function get_date($format='') { return false; } | ||
19 | public function get_author($key=0) { return null; } | ||
20 | public function get_authors() { return null; } | ||
21 | public function get_description() { return ''; } | ||
22 | public function get_enclosure($key=0, $prefer=null) { return null; } | ||
23 | public function get_enclosures() { return null; } | ||
24 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/Zend/Cache.php b/inc/3rdparty/libraries/Zend/Cache.php new file mode 100644 index 00000000..d28cb559 --- /dev/null +++ b/inc/3rdparty/libraries/Zend/Cache.php | |||
@@ -0,0 +1,250 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Zend Framework | ||
4 | * | ||
5 | * LICENSE | ||
6 | * | ||
7 | * This source file is subject to the new BSD license that is bundled | ||
8 | * with this package in the file LICENSE.txt. | ||
9 | * It is also available through the world-wide-web at this URL: | ||
10 | * http://framework.zend.com/license/new-bsd | ||
11 | * If you did not receive a copy of the license and are unable to | ||
12 | * obtain it through the world-wide-web, please send an email | ||
13 | * to license@zend.com so we can send you a copy immediately. | ||
14 | * | ||
15 | * @category Zend | ||
16 | * @package Zend_Cache | ||
17 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
18 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
19 | * @version $Id: Cache.php 24656 2012-02-26 06:02:53Z adamlundrigan $ | ||
20 | */ | ||
21 | |||
22 | |||
23 | /** | ||
24 | * @package Zend_Cache | ||
25 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
26 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
27 | */ | ||
28 | abstract class Zend_Cache | ||
29 | { | ||
30 | |||
31 | /** | ||
32 | * Standard frontends | ||
33 | * | ||
34 | * @var array | ||
35 | */ | ||
36 | public static $standardFrontends = array('Core', 'Output', 'Class', 'File', 'Function', 'Page'); | ||
37 | |||
38 | /** | ||
39 | * Standard backends | ||
40 | * | ||
41 | * @var array | ||
42 | */ | ||
43 | public static $standardBackends = array('File', 'Sqlite', 'Memcached', 'Libmemcached', 'Apc', 'ZendPlatform', | ||
44 | 'Xcache', 'TwoLevels', 'WinCache', 'ZendServer_Disk', 'ZendServer_ShMem'); | ||
45 | |||
46 | /** | ||
47 | * Standard backends which implement the ExtendedInterface | ||
48 | * | ||
49 | * @var array | ||
50 | */ | ||
51 | public static $standardExtendedBackends = array('File', 'Apc', 'TwoLevels', 'Memcached', 'Libmemcached', 'Sqlite', 'WinCache'); | ||
52 | |||
53 | /** | ||
54 | * Only for backward compatibility (may be removed in next major release) | ||
55 | * | ||
56 | * @var array | ||
57 | * @deprecated | ||
58 | */ | ||
59 | public static $availableFrontends = array('Core', 'Output', 'Class', 'File', 'Function', 'Page'); | ||
60 | |||
61 | /** | ||
62 | * Only for backward compatibility (may be removed in next major release) | ||
63 | * | ||
64 | * @var array | ||
65 | * @deprecated | ||
66 | */ | ||
67 | public static $availableBackends = array('File', 'Sqlite', 'Memcached', 'Libmemcached', 'Apc', 'ZendPlatform', 'Xcache', 'WinCache', 'TwoLevels'); | ||
68 | |||
69 | /** | ||
70 | * Consts for clean() method | ||
71 | */ | ||
72 | const CLEANING_MODE_ALL = 'all'; | ||
73 | const CLEANING_MODE_OLD = 'old'; | ||
74 | const CLEANING_MODE_MATCHING_TAG = 'matchingTag'; | ||
75 | const CLEANING_MODE_NOT_MATCHING_TAG = 'notMatchingTag'; | ||
76 | const CLEANING_MODE_MATCHING_ANY_TAG = 'matchingAnyTag'; | ||
77 | |||
78 | /** | ||
79 | * Factory | ||
80 | * | ||
81 | * @param mixed $frontend frontend name (string) or Zend_Cache_Frontend_ object | ||
82 | * @param mixed $backend backend name (string) or Zend_Cache_Backend_ object | ||
83 | * @param array $frontendOptions associative array of options for the corresponding frontend constructor | ||
84 | * @param array $backendOptions associative array of options for the corresponding backend constructor | ||
85 | * @param boolean $customFrontendNaming if true, the frontend argument is used as a complete class name ; if false, the frontend argument is used as the end of "Zend_Cache_Frontend_[...]" class name | ||
86 | * @param boolean $customBackendNaming if true, the backend argument is used as a complete class name ; if false, the backend argument is used as the end of "Zend_Cache_Backend_[...]" class name | ||
87 | * @param boolean $autoload if true, there will no require_once for backend and frontend (useful only for custom backends/frontends) | ||
88 | * @throws Zend_Cache_Exception | ||
89 | * @return Zend_Cache_Core|Zend_Cache_Frontend | ||
90 | */ | ||
91 | public static function factory($frontend, $backend, $frontendOptions = array(), $backendOptions = array(), $customFrontendNaming = false, $customBackendNaming = false, $autoload = false) | ||
92 | { | ||
93 | if (is_string($backend)) { | ||
94 | $backendObject = self::_makeBackend($backend, $backendOptions, $customBackendNaming, $autoload); | ||
95 | } else { | ||
96 | if ((is_object($backend)) && (in_array('Zend_Cache_Backend_Interface', class_implements($backend)))) { | ||
97 | $backendObject = $backend; | ||
98 | } else { | ||
99 | self::throwException('backend must be a backend name (string) or an object which implements Zend_Cache_Backend_Interface'); | ||
100 | } | ||
101 | } | ||
102 | if (is_string($frontend)) { | ||
103 | $frontendObject = self::_makeFrontend($frontend, $frontendOptions, $customFrontendNaming, $autoload); | ||
104 | } else { | ||
105 | if (is_object($frontend)) { | ||
106 | $frontendObject = $frontend; | ||
107 | } else { | ||
108 | self::throwException('frontend must be a frontend name (string) or an object'); | ||
109 | } | ||
110 | } | ||
111 | $frontendObject->setBackend($backendObject); | ||
112 | return $frontendObject; | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * Backend Constructor | ||
117 | * | ||
118 | * @param string $backend | ||
119 | * @param array $backendOptions | ||
120 | * @param boolean $customBackendNaming | ||
121 | * @param boolean $autoload | ||
122 | * @return Zend_Cache_Backend | ||
123 | */ | ||
124 | public static function _makeBackend($backend, $backendOptions, $customBackendNaming = false, $autoload = false) | ||
125 | { | ||
126 | if (!$customBackendNaming) { | ||
127 | $backend = self::_normalizeName($backend); | ||
128 | } | ||
129 | if (in_array($backend, Zend_Cache::$standardBackends)) { | ||
130 | // we use a standard backend | ||
131 | $backendClass = 'Zend_Cache_Backend_' . $backend; | ||
132 | // security controls are explicit | ||
133 | require_once realpath(dirname(__FILE__).'/..').DIRECTORY_SEPARATOR.str_replace('_', DIRECTORY_SEPARATOR, $backendClass) . '.php'; | ||
134 | } else { | ||
135 | // we use a custom backend | ||
136 | if (!preg_match('~^[\w\\\\]+$~D', $backend)) { | ||
137 | Zend_Cache::throwException("Invalid backend name [$backend]"); | ||
138 | } | ||
139 | if (!$customBackendNaming) { | ||
140 | // we use this boolean to avoid an API break | ||
141 | $backendClass = 'Zend_Cache_Backend_' . $backend; | ||
142 | } else { | ||
143 | $backendClass = $backend; | ||
144 | } | ||
145 | if (!$autoload) { | ||
146 | $file = str_replace('_', DIRECTORY_SEPARATOR, $backendClass) . '.php'; | ||
147 | if (!(self::_isReadable($file))) { | ||
148 | self::throwException("file $file not found in include_path"); | ||
149 | } | ||
150 | require_once $file; | ||
151 | } | ||
152 | } | ||
153 | return new $backendClass($backendOptions); | ||
154 | } | ||
155 | |||
156 | /** | ||
157 | * Frontend Constructor | ||
158 | * | ||
159 | * @param string $frontend | ||
160 | * @param array $frontendOptions | ||
161 | * @param boolean $customFrontendNaming | ||
162 | * @param boolean $autoload | ||
163 | * @return Zend_Cache_Core|Zend_Cache_Frontend | ||
164 | */ | ||
165 | public static function _makeFrontend($frontend, $frontendOptions = array(), $customFrontendNaming = false, $autoload = false) | ||
166 | { | ||
167 | if (!$customFrontendNaming) { | ||
168 | $frontend = self::_normalizeName($frontend); | ||
169 | } | ||
170 | if (in_array($frontend, self::$standardFrontends)) { | ||
171 | // we use a standard frontend | ||
172 | // For perfs reasons, with frontend == 'Core', we can interact with the Core itself | ||
173 | $frontendClass = 'Zend_Cache_' . ($frontend != 'Core' ? 'Frontend_' : '') . $frontend; | ||
174 | // security controls are explicit | ||
175 | require_once realpath(dirname(__FILE__).'/..').DIRECTORY_SEPARATOR.str_replace('_', DIRECTORY_SEPARATOR, $frontendClass) . '.php'; | ||
176 | } else { | ||
177 | // we use a custom frontend | ||
178 | if (!preg_match('~^[\w\\\\]+$~D', $frontend)) { | ||
179 | Zend_Cache::throwException("Invalid frontend name [$frontend]"); | ||
180 | } | ||
181 | if (!$customFrontendNaming) { | ||
182 | // we use this boolean to avoid an API break | ||
183 | $frontendClass = 'Zend_Cache_Frontend_' . $frontend; | ||
184 | } else { | ||
185 | $frontendClass = $frontend; | ||
186 | } | ||
187 | if (!$autoload) { | ||
188 | $file = str_replace('_', DIRECTORY_SEPARATOR, $frontendClass) . '.php'; | ||
189 | if (!(self::_isReadable($file))) { | ||
190 | self::throwException("file $file not found in include_path"); | ||
191 | } | ||
192 | require_once $file; | ||
193 | } | ||
194 | } | ||
195 | return new $frontendClass($frontendOptions); | ||
196 | } | ||
197 | |||
198 | /** | ||
199 | * Throw an exception | ||
200 | * | ||
201 | * Note : for perf reasons, the "load" of Zend/Cache/Exception is dynamic | ||
202 | * @param string $msg Message for the exception | ||
203 | * @throws Zend_Cache_Exception | ||
204 | */ | ||
205 | public static function throwException($msg, Exception $e = null) | ||
206 | { | ||
207 | // For perfs reasons, we use this dynamic inclusion | ||
208 | require_once 'Zend/Cache/Exception.php'; | ||
209 | throw new Zend_Cache_Exception($msg, 0, $e); | ||
210 | } | ||
211 | |||
212 | /** | ||
213 | * Normalize frontend and backend names to allow multiple words TitleCased | ||
214 | * | ||
215 | * @param string $name Name to normalize | ||
216 | * @return string | ||
217 | */ | ||
218 | protected static function _normalizeName($name) | ||
219 | { | ||
220 | $name = ucfirst(strtolower($name)); | ||
221 | $name = str_replace(array('-', '_', '.'), ' ', $name); | ||
222 | $name = ucwords($name); | ||
223 | $name = str_replace(' ', '', $name); | ||
224 | if (stripos($name, 'ZendServer') === 0) { | ||
225 | $name = 'ZendServer_' . substr($name, strlen('ZendServer')); | ||
226 | } | ||
227 | |||
228 | return $name; | ||
229 | } | ||
230 | |||
231 | /** | ||
232 | * Returns TRUE if the $filename is readable, or FALSE otherwise. | ||
233 | * This function uses the PHP include_path, where PHP's is_readable() | ||
234 | * does not. | ||
235 | * | ||
236 | * Note : this method comes from Zend_Loader (see #ZF-2891 for details) | ||
237 | * | ||
238 | * @param string $filename | ||
239 | * @return boolean | ||
240 | */ | ||
241 | private static function _isReadable($filename) | ||
242 | { | ||
243 | if (!$fh = @fopen($filename, 'r', true)) { | ||
244 | return false; | ||
245 | } | ||
246 | @fclose($fh); | ||
247 | return true; | ||
248 | } | ||
249 | |||
250 | } | ||
diff --git a/inc/3rdparty/libraries/Zend/Cache/Backend.php b/inc/3rdparty/libraries/Zend/Cache/Backend.php new file mode 100644 index 00000000..803fd446 --- /dev/null +++ b/inc/3rdparty/libraries/Zend/Cache/Backend.php | |||
@@ -0,0 +1,290 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Zend Framework | ||
4 | * | ||
5 | * LICENSE | ||
6 | * | ||
7 | * This source file is subject to the new BSD license that is bundled | ||
8 | * with this package in the file LICENSE.txt. | ||
9 | * It is also available through the world-wide-web at this URL: | ||
10 | * http://framework.zend.com/license/new-bsd | ||
11 | * If you did not receive a copy of the license and are unable to | ||
12 | * obtain it through the world-wide-web, please send an email | ||
13 | * to license@zend.com so we can send you a copy immediately. | ||
14 | * | ||
15 | * @category Zend | ||
16 | * @package Zend_Cache | ||
17 | * @subpackage Zend_Cache_Backend | ||
18 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
19 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
20 | * @version $Id: Backend.php 24989 2012-06-21 07:24:13Z mabe $ | ||
21 | */ | ||
22 | |||
23 | |||
24 | /** | ||
25 | * @package Zend_Cache | ||
26 | * @subpackage Zend_Cache_Backend | ||
27 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
28 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
29 | */ | ||
30 | class Zend_Cache_Backend | ||
31 | { | ||
32 | /** | ||
33 | * Frontend or Core directives | ||
34 | * | ||
35 | * =====> (int) lifetime : | ||
36 | * - Cache lifetime (in seconds) | ||
37 | * - If null, the cache is valid forever | ||
38 | * | ||
39 | * =====> (int) logging : | ||
40 | * - if set to true, a logging is activated throw Zend_Log | ||
41 | * | ||
42 | * @var array directives | ||
43 | */ | ||
44 | protected $_directives = array( | ||
45 | 'lifetime' => 3600, | ||
46 | 'logging' => false, | ||
47 | 'logger' => null | ||
48 | ); | ||
49 | |||
50 | /** | ||
51 | * Available options | ||
52 | * | ||
53 | * @var array available options | ||
54 | */ | ||
55 | protected $_options = array(); | ||
56 | |||
57 | /** | ||
58 | * Constructor | ||
59 | * | ||
60 | * @param array $options Associative array of options | ||
61 | * @throws Zend_Cache_Exception | ||
62 | * @return void | ||
63 | */ | ||
64 | public function __construct(array $options = array()) | ||
65 | { | ||
66 | while (list($name, $value) = each($options)) { | ||
67 | $this->setOption($name, $value); | ||
68 | } | ||
69 | } | ||
70 | |||
71 | /** | ||
72 | * Set the frontend directives | ||
73 | * | ||
74 | * @param array $directives Assoc of directives | ||
75 | * @throws Zend_Cache_Exception | ||
76 | * @return void | ||
77 | */ | ||
78 | public function setDirectives($directives) | ||
79 | { | ||
80 | if (!is_array($directives)) Zend_Cache::throwException('Directives parameter must be an array'); | ||
81 | while (list($name, $value) = each($directives)) { | ||
82 | if (!is_string($name)) { | ||
83 | Zend_Cache::throwException("Incorrect option name : $name"); | ||
84 | } | ||
85 | $name = strtolower($name); | ||
86 | if (array_key_exists($name, $this->_directives)) { | ||
87 | $this->_directives[$name] = $value; | ||
88 | } | ||
89 | |||
90 | } | ||
91 | |||
92 | $this->_loggerSanity(); | ||
93 | } | ||
94 | |||
95 | /** | ||
96 | * Set an option | ||
97 | * | ||
98 | * @param string $name | ||
99 | * @param mixed $value | ||
100 | * @throws Zend_Cache_Exception | ||
101 | * @return void | ||
102 | */ | ||
103 | public function setOption($name, $value) | ||
104 | { | ||
105 | if (!is_string($name)) { | ||
106 | Zend_Cache::throwException("Incorrect option name : $name"); | ||
107 | } | ||
108 | $name = strtolower($name); | ||
109 | if (array_key_exists($name, $this->_options)) { | ||
110 | $this->_options[$name] = $value; | ||
111 | } | ||
112 | } | ||
113 | |||
114 | /** | ||
115 | * Returns an option | ||
116 | * | ||
117 | * @param string $name Optional, the options name to return | ||
118 | * @throws Zend_Cache_Exceptions | ||
119 | * @return mixed | ||
120 | */ | ||
121 | public function getOption($name) | ||
122 | { | ||
123 | $name = strtolower($name); | ||
124 | |||
125 | if (array_key_exists($name, $this->_options)) { | ||
126 | return $this->_options[$name]; | ||
127 | } | ||
128 | |||
129 | if (array_key_exists($name, $this->_directives)) { | ||
130 | return $this->_directives[$name]; | ||
131 | } | ||
132 | |||
133 | Zend_Cache::throwException("Incorrect option name : {$name}"); | ||
134 | } | ||
135 | |||
136 | /** | ||
137 | * Get the life time | ||
138 | * | ||
139 | * if $specificLifetime is not false, the given specific life time is used | ||
140 | * else, the global lifetime is used | ||
141 | * | ||
142 | * @param int $specificLifetime | ||
143 | * @return int Cache life time | ||
144 | */ | ||
145 | public function getLifetime($specificLifetime) | ||
146 | { | ||
147 | if ($specificLifetime === false) { | ||
148 | return $this->_directives['lifetime']; | ||
149 | } | ||
150 | return $specificLifetime; | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * Return true if the automatic cleaning is available for the backend | ||
155 | * | ||
156 | * DEPRECATED : use getCapabilities() instead | ||
157 | * | ||
158 | * @deprecated | ||
159 | * @return boolean | ||
160 | */ | ||
161 | public function isAutomaticCleaningAvailable() | ||
162 | { | ||
163 | return true; | ||
164 | } | ||
165 | |||
166 | /** | ||
167 | * Determine system TMP directory and detect if we have read access | ||
168 | * | ||
169 | * inspired from Zend_File_Transfer_Adapter_Abstract | ||
170 | * | ||
171 | * @return string | ||
172 | * @throws Zend_Cache_Exception if unable to determine directory | ||
173 | */ | ||
174 | public function getTmpDir() | ||
175 | { | ||
176 | $tmpdir = array(); | ||
177 | foreach (array($_ENV, $_SERVER) as $tab) { | ||
178 | foreach (array('TMPDIR', 'TEMP', 'TMP', 'windir', 'SystemRoot') as $key) { | ||
179 | if (isset($tab[$key]) && is_string($tab[$key])) { | ||
180 | if (($key == 'windir') or ($key == 'SystemRoot')) { | ||
181 | $dir = realpath($tab[$key] . '\\temp'); | ||
182 | } else { | ||
183 | $dir = realpath($tab[$key]); | ||
184 | } | ||
185 | if ($this->_isGoodTmpDir($dir)) { | ||
186 | return $dir; | ||
187 | } | ||
188 | } | ||
189 | } | ||
190 | } | ||
191 | $upload = ini_get('upload_tmp_dir'); | ||
192 | if ($upload) { | ||
193 | $dir = realpath($upload); | ||
194 | if ($this->_isGoodTmpDir($dir)) { | ||
195 | return $dir; | ||
196 | } | ||
197 | } | ||
198 | if (function_exists('sys_get_temp_dir')) { | ||
199 | $dir = sys_get_temp_dir(); | ||
200 | if ($this->_isGoodTmpDir($dir)) { | ||
201 | return $dir; | ||
202 | } | ||
203 | } | ||
204 | // Attemp to detect by creating a temporary file | ||
205 | $tempFile = tempnam(md5(uniqid(rand(), TRUE)), ''); | ||
206 | if ($tempFile) { | ||
207 | $dir = realpath(dirname($tempFile)); | ||
208 | unlink($tempFile); | ||
209 | if ($this->_isGoodTmpDir($dir)) { | ||
210 | return $dir; | ||
211 | } | ||
212 | } | ||
213 | if ($this->_isGoodTmpDir('/tmp')) { | ||
214 | return '/tmp'; | ||
215 | } | ||
216 | if ($this->_isGoodTmpDir('\\temp')) { | ||
217 | return '\\temp'; | ||
218 | } | ||
219 | Zend_Cache::throwException('Could not determine temp directory, please specify a cache_dir manually'); | ||
220 | } | ||
221 | |||
222 | /** | ||
223 | * Verify if the given temporary directory is readable and writable | ||
224 | * | ||
225 | * @param string $dir temporary directory | ||
226 | * @return boolean true if the directory is ok | ||
227 | */ | ||
228 | protected function _isGoodTmpDir($dir) | ||
229 | { | ||
230 | if (is_readable($dir)) { | ||
231 | if (is_writable($dir)) { | ||
232 | return true; | ||
233 | } | ||
234 | } | ||
235 | return false; | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * Make sure if we enable logging that the Zend_Log class | ||
240 | * is available. | ||
241 | * Create a default log object if none is set. | ||
242 | * | ||
243 | * @throws Zend_Cache_Exception | ||
244 | * @return void | ||
245 | */ | ||
246 | protected function _loggerSanity() | ||
247 | { | ||
248 | if (!isset($this->_directives['logging']) || !$this->_directives['logging']) { | ||
249 | return; | ||
250 | } | ||
251 | |||
252 | if (isset($this->_directives['logger'])) { | ||
253 | if ($this->_directives['logger'] instanceof Zend_Log) { | ||
254 | return; | ||
255 | } | ||
256 | Zend_Cache::throwException('Logger object is not an instance of Zend_Log class.'); | ||
257 | } | ||
258 | |||
259 | // Create a default logger to the standard output stream | ||
260 | require_once 'Zend/Log.php'; | ||
261 | require_once 'Zend/Log/Writer/Stream.php'; | ||
262 | require_once 'Zend/Log/Filter/Priority.php'; | ||
263 | $logger = new Zend_Log(new Zend_Log_Writer_Stream('php://output')); | ||
264 | $logger->addFilter(new Zend_Log_Filter_Priority(Zend_Log::WARN, '<=')); | ||
265 | $this->_directives['logger'] = $logger; | ||
266 | } | ||
267 | |||
268 | /** | ||
269 | * Log a message at the WARN (4) priority. | ||
270 | * | ||
271 | * @param string $message | ||
272 | * @throws Zend_Cache_Exception | ||
273 | * @return void | ||
274 | */ | ||
275 | protected function _log($message, $priority = 4) | ||
276 | { | ||
277 | if (!$this->_directives['logging']) { | ||
278 | return; | ||
279 | } | ||
280 | |||
281 | if (!isset($this->_directives['logger'])) { | ||
282 | Zend_Cache::throwException('Logging is enabled but logger is not set.'); | ||
283 | } | ||
284 | $logger = $this->_directives['logger']; | ||
285 | if (!$logger instanceof Zend_Log) { | ||
286 | Zend_Cache::throwException('Logger object is not an instance of Zend_Log class.'); | ||
287 | } | ||
288 | $logger->log($message, $priority); | ||
289 | } | ||
290 | } | ||
diff --git a/inc/3rdparty/libraries/Zend/Cache/Backend/ExtendedInterface.php b/inc/3rdparty/libraries/Zend/Cache/Backend/ExtendedInterface.php new file mode 100644 index 00000000..c192baaf --- /dev/null +++ b/inc/3rdparty/libraries/Zend/Cache/Backend/ExtendedInterface.php | |||
@@ -0,0 +1,127 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Zend Framework | ||
4 | * | ||
5 | * LICENSE | ||
6 | * | ||
7 | * This source file is subject to the new BSD license that is bundled | ||
8 | * with this package in the file LICENSE.txt. | ||
9 | * It is also available through the world-wide-web at this URL: | ||
10 | * http://framework.zend.com/license/new-bsd | ||
11 | * If you did not receive a copy of the license and are unable to | ||
12 | * obtain it through the world-wide-web, please send an email | ||
13 | * to license@zend.com so we can send you a copy immediately. | ||
14 | * | ||
15 | * @category Zend | ||
16 | * @package Zend_Cache | ||
17 | * @subpackage Zend_Cache_Backend | ||
18 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
19 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
20 | * @version $Id: ExtendedInterface.php 24593 2012-01-05 20:35:02Z matthew $ | ||
21 | */ | ||
22 | |||
23 | /** | ||
24 | * @see Zend_Cache_Backend_Interface | ||
25 | */ | ||
26 | //require_once 'Zend/Cache/Backend/Interface.php'; | ||
27 | require_once dirname(__FILE__).'/Interface.php'; | ||
28 | |||
29 | /** | ||
30 | * @package Zend_Cache | ||
31 | * @subpackage Zend_Cache_Backend | ||
32 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
33 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
34 | */ | ||
35 | interface Zend_Cache_Backend_ExtendedInterface extends Zend_Cache_Backend_Interface | ||
36 | { | ||
37 | |||
38 | /** | ||
39 | * Return an array of stored cache ids | ||
40 | * | ||
41 | * @return array array of stored cache ids (string) | ||
42 | */ | ||
43 | public function getIds(); | ||
44 | |||
45 | /** | ||
46 | * Return an array of stored tags | ||
47 | * | ||
48 | * @return array array of stored tags (string) | ||
49 | */ | ||
50 | public function getTags(); | ||
51 | |||
52 | /** | ||
53 | * Return an array of stored cache ids which match given tags | ||
54 | * | ||
55 | * In case of multiple tags, a logical AND is made between tags | ||
56 | * | ||
57 | * @param array $tags array of tags | ||
58 | * @return array array of matching cache ids (string) | ||
59 | */ | ||
60 | public function getIdsMatchingTags($tags = array()); | ||
61 | |||
62 | /** | ||
63 | * Return an array of stored cache ids which don't match given tags | ||
64 | * | ||
65 | * In case of multiple tags, a logical OR is made between tags | ||
66 | * | ||
67 | * @param array $tags array of tags | ||
68 | * @return array array of not matching cache ids (string) | ||
69 | */ | ||
70 | public function getIdsNotMatchingTags($tags = array()); | ||
71 | |||
72 | /** | ||
73 | * Return an array of stored cache ids which match any given tags | ||
74 | * | ||
75 | * In case of multiple tags, a logical AND is made between tags | ||
76 | * | ||
77 | * @param array $tags array of tags | ||
78 | * @return array array of any matching cache ids (string) | ||
79 | */ | ||
80 | public function getIdsMatchingAnyTags($tags = array()); | ||
81 | |||
82 | /** | ||
83 | * Return the filling percentage of the backend storage | ||
84 | * | ||
85 | * @return int integer between 0 and 100 | ||
86 | */ | ||
87 | public function getFillingPercentage(); | ||
88 | |||
89 | /** | ||
90 | * Return an array of metadatas for the given cache id | ||
91 | * | ||
92 | * The array must include these keys : | ||
93 | * - expire : the expire timestamp | ||
94 | * - tags : a string array of tags | ||
95 | * - mtime : timestamp of last modification time | ||
96 | * | ||
97 | * @param string $id cache id | ||
98 | * @return array array of metadatas (false if the cache id is not found) | ||
99 | */ | ||
100 | public function getMetadatas($id); | ||
101 | |||
102 | /** | ||
103 | * Give (if possible) an extra lifetime to the given cache id | ||
104 | * | ||
105 | * @param string $id cache id | ||
106 | * @param int $extraLifetime | ||
107 | * @return boolean true if ok | ||
108 | */ | ||
109 | public function touch($id, $extraLifetime); | ||
110 | |||
111 | /** | ||
112 | * Return an associative array of capabilities (booleans) of the backend | ||
113 | * | ||
114 | * The array must include these keys : | ||
115 | * - automatic_cleaning (is automating cleaning necessary) | ||
116 | * - tags (are tags supported) | ||
117 | * - expired_read (is it possible to read expired cache records | ||
118 | * (for doNotTestCacheValidity option for example)) | ||
119 | * - priority does the backend deal with priority when saving | ||
120 | * - infinite_lifetime (is infinite lifetime can work with this backend) | ||
121 | * - get_list (is it possible to get the list of cache ids and the complete list of tags) | ||
122 | * | ||
123 | * @return array associative of with capabilities | ||
124 | */ | ||
125 | public function getCapabilities(); | ||
126 | |||
127 | } | ||
diff --git a/inc/3rdparty/libraries/Zend/Cache/Backend/File.php b/inc/3rdparty/libraries/Zend/Cache/Backend/File.php new file mode 100644 index 00000000..5affbcb3 --- /dev/null +++ b/inc/3rdparty/libraries/Zend/Cache/Backend/File.php | |||
@@ -0,0 +1,1034 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Zend Framework | ||
4 | * | ||
5 | * LICENSE | ||
6 | * | ||
7 | * This source file is subject to the new BSD license that is bundled | ||
8 | * with this package in the file LICENSE.txt. | ||
9 | * It is also available through the world-wide-web at this URL: | ||
10 | * http://framework.zend.com/license/new-bsd | ||
11 | * If you did not receive a copy of the license and are unable to | ||
12 | * obtain it through the world-wide-web, please send an email | ||
13 | * to license@zend.com so we can send you a copy immediately. | ||
14 | * | ||
15 | * @category Zend | ||
16 | * @package Zend_Cache | ||
17 | * @subpackage Zend_Cache_Backend | ||
18 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
19 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
20 | * @version $Id: File.php 24844 2012-05-31 19:01:36Z rob $ | ||
21 | */ | ||
22 | |||
23 | /** | ||
24 | * @see Zend_Cache_Backend_Interface | ||
25 | */ | ||
26 | //require_once 'Zend/Cache/Backend/ExtendedInterface.php'; | ||
27 | require_once dirname(__FILE__).'/ExtendedInterface.php'; | ||
28 | |||
29 | /** | ||
30 | * @see Zend_Cache_Backend | ||
31 | */ | ||
32 | //require_once 'Zend/Cache/Backend.php'; | ||
33 | require_once realpath(dirname(__FILE__).'/..').DIRECTORY_SEPARATOR.'Backend.php'; | ||
34 | |||
35 | |||
36 | |||
37 | /** | ||
38 | * @package Zend_Cache | ||
39 | * @subpackage Zend_Cache_Backend | ||
40 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
41 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
42 | */ | ||
43 | class Zend_Cache_Backend_File extends Zend_Cache_Backend implements Zend_Cache_Backend_ExtendedInterface | ||
44 | { | ||
45 | /** | ||
46 | * Available options | ||
47 | * | ||
48 | * =====> (string) cache_dir : | ||
49 | * - Directory where to put the cache files | ||
50 | * | ||
51 | * =====> (boolean) file_locking : | ||
52 | * - Enable / disable file_locking | ||
53 | * - Can avoid cache corruption under bad circumstances but it doesn't work on multithread | ||
54 | * webservers and on NFS filesystems for example | ||
55 | * | ||
56 | * =====> (boolean) read_control : | ||
57 | * - Enable / disable read control | ||
58 | * - If enabled, a control key is embeded in cache file and this key is compared with the one | ||
59 | * calculated after the reading. | ||
60 | * | ||
61 | * =====> (string) read_control_type : | ||
62 | * - Type of read control (only if read control is enabled). Available values are : | ||
63 | * 'md5' for a md5 hash control (best but slowest) | ||
64 | * 'crc32' for a crc32 hash control (lightly less safe but faster, better choice) | ||
65 | * 'adler32' for an adler32 hash control (excellent choice too, faster than crc32) | ||
66 | * 'strlen' for a length only test (fastest) | ||
67 | * | ||
68 | * =====> (int) hashed_directory_level : | ||
69 | * - Hashed directory level | ||
70 | * - Set the hashed directory structure level. 0 means "no hashed directory | ||
71 | * structure", 1 means "one level of directory", 2 means "two levels"... | ||
72 | * This option can speed up the cache only when you have many thousands of | ||
73 | * cache file. Only specific benchs can help you to choose the perfect value | ||
74 | * for you. Maybe, 1 or 2 is a good start. | ||
75 | * | ||
76 | * =====> (int) hashed_directory_umask : | ||
77 | * - deprecated | ||
78 | * - Permissions for hashed directory structure | ||
79 | * | ||
80 | * =====> (int) hashed_directory_perm : | ||
81 | * - Permissions for hashed directory structure | ||
82 | * | ||
83 | * =====> (string) file_name_prefix : | ||
84 | * - prefix for cache files | ||
85 | * - be really carefull with this option because a too generic value in a system cache dir | ||
86 | * (like /tmp) can cause disasters when cleaning the cache | ||
87 | * | ||
88 | * =====> (int) cache_file_umask : | ||
89 | * - deprecated | ||
90 | * - Permissions for cache files | ||
91 | * | ||
92 | * =====> (int) cache_file_perm : | ||
93 | * - Permissions for cache files | ||
94 | * | ||
95 | * =====> (int) metatadatas_array_max_size : | ||
96 | * - max size for the metadatas array (don't change this value unless you | ||
97 | * know what you are doing) | ||
98 | * | ||
99 | * @var array available options | ||
100 | */ | ||
101 | protected $_options = array( | ||
102 | 'cache_dir' => null, | ||
103 | 'file_locking' => true, | ||
104 | 'read_control' => true, | ||
105 | 'read_control_type' => 'crc32', | ||
106 | 'hashed_directory_level' => 0, | ||
107 | 'hashed_directory_perm' => 0700, | ||
108 | 'file_name_prefix' => 'zend_cache', | ||
109 | 'cache_file_perm' => 0600, | ||
110 | 'metadatas_array_max_size' => 100 | ||
111 | ); | ||
112 | |||
113 | /** | ||
114 | * Array of metadatas (each item is an associative array) | ||
115 | * | ||
116 | * @var array | ||
117 | */ | ||
118 | protected $_metadatasArray = array(); | ||
119 | |||
120 | |||
121 | /** | ||
122 | * Constructor | ||
123 | * | ||
124 | * @param array $options associative array of options | ||
125 | * @throws Zend_Cache_Exception | ||
126 | * @return void | ||
127 | */ | ||
128 | public function __construct(array $options = array()) | ||
129 | { | ||
130 | parent::__construct($options); | ||
131 | if ($this->_options['cache_dir'] !== null) { // particular case for this option | ||
132 | $this->setCacheDir($this->_options['cache_dir']); | ||
133 | } else { | ||
134 | $this->setCacheDir(self::getTmpDir() . DIRECTORY_SEPARATOR, false); | ||
135 | } | ||
136 | if (isset($this->_options['file_name_prefix'])) { // particular case for this option | ||
137 | if (!preg_match('~^[a-zA-Z0-9_]+$~D', $this->_options['file_name_prefix'])) { | ||
138 | Zend_Cache::throwException('Invalid file_name_prefix : must use only [a-zA-Z0-9_]'); | ||
139 | } | ||
140 | } | ||
141 | if ($this->_options['metadatas_array_max_size'] < 10) { | ||
142 | Zend_Cache::throwException('Invalid metadatas_array_max_size, must be > 10'); | ||
143 | } | ||
144 | |||
145 | if (isset($options['hashed_directory_umask'])) { | ||
146 | // See #ZF-12047 | ||
147 | trigger_error("'hashed_directory_umask' is deprecated -> please use 'hashed_directory_perm' instead", E_USER_NOTICE); | ||
148 | if (!isset($options['hashed_directory_perm'])) { | ||
149 | $options['hashed_directory_perm'] = $options['hashed_directory_umask']; | ||
150 | } | ||
151 | } | ||
152 | if (isset($options['hashed_directory_perm']) && is_string($options['hashed_directory_perm'])) { | ||
153 | // See #ZF-4422 | ||
154 | $this->_options['hashed_directory_perm'] = octdec($this->_options['hashed_directory_perm']); | ||
155 | } | ||
156 | |||
157 | if (isset($options['cache_file_umask'])) { | ||
158 | // See #ZF-12047 | ||
159 | trigger_error("'cache_file_umask' is deprecated -> please use 'cache_file_perm' instead", E_USER_NOTICE); | ||
160 | if (!isset($options['cache_file_perm'])) { | ||
161 | $options['cache_file_perm'] = $options['cache_file_umask']; | ||
162 | } | ||
163 | } | ||
164 | if (isset($options['cache_file_perm']) && is_string($options['cache_file_perm'])) { | ||
165 | // See #ZF-4422 | ||
166 | $this->_options['cache_file_perm'] = octdec($this->_options['cache_file_perm']); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | /** | ||
171 | * Set the cache_dir (particular case of setOption() method) | ||
172 | * | ||
173 | * @param string $value | ||
174 | * @param boolean $trailingSeparator If true, add a trailing separator is necessary | ||
175 | * @throws Zend_Cache_Exception | ||
176 | * @return void | ||
177 | */ | ||
178 | public function setCacheDir($value, $trailingSeparator = true) | ||
179 | { | ||
180 | if (!is_dir($value)) { | ||
181 | Zend_Cache::throwException(sprintf('cache_dir "%s" must be a directory', $value)); | ||
182 | } | ||
183 | if (!is_writable($value)) { | ||
184 | Zend_Cache::throwException(sprintf('cache_dir "%s" is not writable', $value)); | ||
185 | } | ||
186 | if ($trailingSeparator) { | ||
187 | // add a trailing DIRECTORY_SEPARATOR if necessary | ||
188 | $value = rtrim(realpath($value), '\\/') . DIRECTORY_SEPARATOR; | ||
189 | } | ||
190 | $this->_options['cache_dir'] = $value; | ||
191 | } | ||
192 | |||
193 | /** | ||
194 | * Test if a cache is available for the given id and (if yes) return it (false else) | ||
195 | * | ||
196 | * @param string $id cache id | ||
197 | * @param boolean $doNotTestCacheValidity if set to true, the cache validity won't be tested | ||
198 | * @return string|false cached datas | ||
199 | */ | ||
200 | public function load($id, $doNotTestCacheValidity = false) | ||
201 | { | ||
202 | if (!($this->_test($id, $doNotTestCacheValidity))) { | ||
203 | // The cache is not hit ! | ||
204 | return false; | ||
205 | } | ||
206 | $metadatas = $this->_getMetadatas($id); | ||
207 | $file = $this->_file($id); | ||
208 | $data = $this->_fileGetContents($file); | ||
209 | if ($this->_options['read_control']) { | ||
210 | $hashData = $this->_hash($data, $this->_options['read_control_type']); | ||
211 | $hashControl = $metadatas['hash']; | ||
212 | if ($hashData != $hashControl) { | ||
213 | // Problem detected by the read control ! | ||
214 | $this->_log('Zend_Cache_Backend_File::load() / read_control : stored hash and computed hash do not match'); | ||
215 | $this->remove($id); | ||
216 | return false; | ||
217 | } | ||
218 | } | ||
219 | return $data; | ||
220 | } | ||
221 | |||
222 | /** | ||
223 | * Test if a cache is available or not (for the given id) | ||
224 | * | ||
225 | * @param string $id cache id | ||
226 | * @return mixed false (a cache is not available) or "last modified" timestamp (int) of the available cache record | ||
227 | */ | ||
228 | public function test($id) | ||
229 | { | ||
230 | clearstatcache(); | ||
231 | return $this->_test($id, false); | ||
232 | } | ||
233 | |||
234 | /** | ||
235 | * Save some string datas into a cache record | ||
236 | * | ||
237 | * Note : $data is always "string" (serialization is done by the | ||
238 | * core not by the backend) | ||
239 | * | ||
240 | * @param string $data Datas to cache | ||
241 | * @param string $id Cache id | ||
242 | * @param array $tags Array of strings, the cache record will be tagged by each string entry | ||
243 | * @param int $specificLifetime If != false, set a specific lifetime for this cache record (null => infinite lifetime) | ||
244 | * @return boolean true if no problem | ||
245 | */ | ||
246 | public function save($data, $id, $tags = array(), $specificLifetime = false) | ||
247 | { | ||
248 | clearstatcache(); | ||
249 | $file = $this->_file($id); | ||
250 | $path = $this->_path($id); | ||
251 | if ($this->_options['hashed_directory_level'] > 0) { | ||
252 | if (!is_writable($path)) { | ||
253 | // maybe, we just have to build the directory structure | ||
254 | $this->_recursiveMkdirAndChmod($id); | ||
255 | } | ||
256 | if (!is_writable($path)) { | ||
257 | return false; | ||
258 | } | ||
259 | } | ||
260 | if ($this->_options['read_control']) { | ||
261 | $hash = $this->_hash($data, $this->_options['read_control_type']); | ||
262 | } else { | ||
263 | $hash = ''; | ||
264 | } | ||
265 | $metadatas = array( | ||
266 | 'hash' => $hash, | ||
267 | 'mtime' => time(), | ||
268 | 'expire' => $this->_expireTime($this->getLifetime($specificLifetime)), | ||
269 | 'tags' => $tags | ||
270 | ); | ||
271 | $res = $this->_setMetadatas($id, $metadatas); | ||
272 | if (!$res) { | ||
273 | $this->_log('Zend_Cache_Backend_File::save() / error on saving metadata'); | ||
274 | return false; | ||
275 | } | ||
276 | $res = $this->_filePutContents($file, $data); | ||
277 | return $res; | ||
278 | } | ||
279 | |||
280 | /** | ||
281 | * Remove a cache record | ||
282 | * | ||
283 | * @param string $id cache id | ||
284 | * @return boolean true if no problem | ||
285 | */ | ||
286 | public function remove($id) | ||
287 | { | ||
288 | $file = $this->_file($id); | ||
289 | $boolRemove = $this->_remove($file); | ||
290 | $boolMetadata = $this->_delMetadatas($id); | ||
291 | return $boolMetadata && $boolRemove; | ||
292 | } | ||
293 | |||
294 | /** | ||
295 | * Clean some cache records | ||
296 | * | ||
297 | * Available modes are : | ||
298 | * | ||
299 | * Zend_Cache::CLEANING_MODE_ALL (default) => remove all cache entries ($tags is not used) | ||
300 | * Zend_Cache::CLEANING_MODE_OLD => remove too old cache entries ($tags is not used) | ||
301 | * Zend_Cache::CLEANING_MODE_MATCHING_TAG => remove cache entries matching all given tags | ||
302 | * ($tags can be an array of strings or a single string) | ||
303 | * Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG => remove cache entries not {matching one of the given tags} | ||
304 | * ($tags can be an array of strings or a single string) | ||
305 | * Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG => remove cache entries matching any given tags | ||
306 | * ($tags can be an array of strings or a single string) | ||
307 | * | ||
308 | * @param string $mode clean mode | ||
309 | * @param tags array $tags array of tags | ||
310 | * @return boolean true if no problem | ||
311 | */ | ||
312 | public function clean($mode = Zend_Cache::CLEANING_MODE_ALL, $tags = array()) | ||
313 | { | ||
314 | // We use this protected method to hide the recursive stuff | ||
315 | clearstatcache(); | ||
316 | return $this->_clean($this->_options['cache_dir'], $mode, $tags); | ||
317 | } | ||
318 | |||
319 | /** | ||
320 | * Return an array of stored cache ids | ||
321 | * | ||
322 | * @return array array of stored cache ids (string) | ||
323 | */ | ||
324 | public function getIds() | ||
325 | { | ||
326 | return $this->_get($this->_options['cache_dir'], 'ids', array()); | ||
327 | } | ||
328 | |||
329 | /** | ||
330 | * Return an array of stored tags | ||
331 | * | ||
332 | * @return array array of stored tags (string) | ||
333 | */ | ||
334 | public function getTags() | ||
335 | { | ||
336 | return $this->_get($this->_options['cache_dir'], 'tags', array()); | ||
337 | } | ||
338 | |||
339 | /** | ||
340 | * Return an array of stored cache ids which match given tags | ||
341 | * | ||
342 | * In case of multiple tags, a logical AND is made between tags | ||
343 | * | ||
344 | * @param array $tags array of tags | ||
345 | * @return array array of matching cache ids (string) | ||
346 | */ | ||
347 | public function getIdsMatchingTags($tags = array()) | ||
348 | { | ||
349 | return $this->_get($this->_options['cache_dir'], 'matching', $tags); | ||
350 | } | ||
351 | |||
352 | /** | ||
353 | * Return an array of stored cache ids which don't match given tags | ||
354 | * | ||
355 | * In case of multiple tags, a logical OR is made between tags | ||
356 | * | ||
357 | * @param array $tags array of tags | ||
358 | * @return array array of not matching cache ids (string) | ||
359 | */ | ||
360 | public function getIdsNotMatchingTags($tags = array()) | ||
361 | { | ||
362 | return $this->_get($this->_options['cache_dir'], 'notMatching', $tags); | ||
363 | } | ||
364 | |||
365 | /** | ||
366 | * Return an array of stored cache ids which match any given tags | ||
367 | * | ||
368 | * In case of multiple tags, a logical AND is made between tags | ||
369 | * | ||
370 | * @param array $tags array of tags | ||
371 | * @return array array of any matching cache ids (string) | ||
372 | */ | ||
373 | public function getIdsMatchingAnyTags($tags = array()) | ||
374 | { | ||
375 | return $this->_get($this->_options['cache_dir'], 'matchingAny', $tags); | ||
376 | } | ||
377 | |||
378 | /** | ||
379 | * Return the filling percentage of the backend storage | ||
380 | * | ||
381 | * @throws Zend_Cache_Exception | ||
382 | * @return int integer between 0 and 100 | ||
383 | */ | ||
384 | public function getFillingPercentage() | ||
385 | { | ||
386 | $free = disk_free_space($this->_options['cache_dir']); | ||
387 | $total = disk_total_space($this->_options['cache_dir']); | ||
388 | if ($total == 0) { | ||
389 | Zend_Cache::throwException('can\'t get disk_total_space'); | ||
390 | } else { | ||
391 | if ($free >= $total) { | ||
392 | return 100; | ||
393 | } | ||
394 | return ((int) (100. * ($total - $free) / $total)); | ||
395 | } | ||
396 | } | ||
397 | |||
398 | /** | ||
399 | * Return an array of metadatas for the given cache id | ||
400 | * | ||
401 | * The array must include these keys : | ||
402 | * - expire : the expire timestamp | ||
403 | * - tags : a string array of tags | ||
404 | * - mtime : timestamp of last modification time | ||
405 | * | ||
406 | * @param string $id cache id | ||
407 | * @return array array of metadatas (false if the cache id is not found) | ||
408 | */ | ||
409 | public function getMetadatas($id) | ||
410 | { | ||
411 | $metadatas = $this->_getMetadatas($id); | ||
412 | if (!$metadatas) { | ||
413 | return false; | ||
414 | } | ||
415 | if (time() > $metadatas['expire']) { | ||
416 | return false; | ||
417 | } | ||
418 | return array( | ||
419 | 'expire' => $metadatas['expire'], | ||
420 | 'tags' => $metadatas['tags'], | ||
421 | 'mtime' => $metadatas['mtime'] | ||
422 | ); | ||
423 | } | ||
424 | |||
425 | /** | ||
426 | * Give (if possible) an extra lifetime to the given cache id | ||
427 | * | ||
428 | * @param string $id cache id | ||
429 | * @param int $extraLifetime | ||
430 | * @return boolean true if ok | ||
431 | */ | ||
432 | public function touch($id, $extraLifetime) | ||
433 | { | ||
434 | $metadatas = $this->_getMetadatas($id); | ||
435 | if (!$metadatas) { | ||
436 | return false; | ||
437 | } | ||
438 | if (time() > $metadatas['expire']) { | ||
439 | return false; | ||
440 | } | ||
441 | $newMetadatas = array( | ||
442 | 'hash' => $metadatas['hash'], | ||
443 | 'mtime' => time(), | ||
444 | 'expire' => $metadatas['expire'] + $extraLifetime, | ||
445 | 'tags' => $metadatas['tags'] | ||
446 | ); | ||
447 | $res = $this->_setMetadatas($id, $newMetadatas); | ||
448 | if (!$res) { | ||
449 | return false; | ||
450 | } | ||
451 | return true; | ||
452 | } | ||
453 | |||
454 | /** | ||
455 | * Return an associative array of capabilities (booleans) of the backend | ||
456 | * | ||
457 | * The array must include these keys : | ||
458 | * - automatic_cleaning (is automating cleaning necessary) | ||
459 | * - tags (are tags supported) | ||
460 | * - expired_read (is it possible to read expired cache records | ||
461 | * (for doNotTestCacheValidity option for example)) | ||
462 | * - priority does the backend deal with priority when saving | ||
463 | * - infinite_lifetime (is infinite lifetime can work with this backend) | ||
464 | * - get_list (is it possible to get the list of cache ids and the complete list of tags) | ||
465 | * | ||
466 | * @return array associative of with capabilities | ||
467 | */ | ||
468 | public function getCapabilities() | ||
469 | { | ||
470 | return array( | ||
471 | 'automatic_cleaning' => true, | ||
472 | 'tags' => true, | ||
473 | 'expired_read' => true, | ||
474 | 'priority' => false, | ||
475 | 'infinite_lifetime' => true, | ||
476 | 'get_list' => true | ||
477 | ); | ||
478 | } | ||
479 | |||
480 | /** | ||
481 | * PUBLIC METHOD FOR UNIT TESTING ONLY ! | ||
482 | * | ||
483 | * Force a cache record to expire | ||
484 | * | ||
485 | * @param string $id cache id | ||
486 | */ | ||
487 | public function ___expire($id) | ||
488 | { | ||
489 | $metadatas = $this->_getMetadatas($id); | ||
490 | if ($metadatas) { | ||
491 | $metadatas['expire'] = 1; | ||
492 | $this->_setMetadatas($id, $metadatas); | ||
493 | } | ||
494 | } | ||
495 | |||
496 | /** | ||
497 | * Get a metadatas record | ||
498 | * | ||
499 | * @param string $id Cache id | ||
500 | * @return array|false Associative array of metadatas | ||
501 | */ | ||
502 | protected function _getMetadatas($id) | ||
503 | { | ||
504 | if (isset($this->_metadatasArray[$id])) { | ||
505 | return $this->_metadatasArray[$id]; | ||
506 | } else { | ||
507 | $metadatas = $this->_loadMetadatas($id); | ||
508 | if (!$metadatas) { | ||
509 | return false; | ||
510 | } | ||
511 | $this->_setMetadatas($id, $metadatas, false); | ||
512 | return $metadatas; | ||
513 | } | ||
514 | } | ||
515 | |||
516 | /** | ||
517 | * Set a metadatas record | ||
518 | * | ||
519 | * @param string $id Cache id | ||
520 | * @param array $metadatas Associative array of metadatas | ||
521 | * @param boolean $save optional pass false to disable saving to file | ||
522 | * @return boolean True if no problem | ||
523 | */ | ||
524 | protected function _setMetadatas($id, $metadatas, $save = true) | ||
525 | { | ||
526 | if (count($this->_metadatasArray) >= $this->_options['metadatas_array_max_size']) { | ||
527 | $n = (int) ($this->_options['metadatas_array_max_size'] / 10); | ||
528 | $this->_metadatasArray = array_slice($this->_metadatasArray, $n); | ||
529 | } | ||
530 | if ($save) { | ||
531 | $result = $this->_saveMetadatas($id, $metadatas); | ||
532 | if (!$result) { | ||
533 | return false; | ||
534 | } | ||
535 | } | ||
536 | $this->_metadatasArray[$id] = $metadatas; | ||
537 | return true; | ||
538 | } | ||
539 | |||
540 | /** | ||
541 | * Drop a metadata record | ||
542 | * | ||
543 | * @param string $id Cache id | ||
544 | * @return boolean True if no problem | ||
545 | */ | ||
546 | protected function _delMetadatas($id) | ||
547 | { | ||
548 | if (isset($this->_metadatasArray[$id])) { | ||
549 | unset($this->_metadatasArray[$id]); | ||
550 | } | ||
551 | $file = $this->_metadatasFile($id); | ||
552 | return $this->_remove($file); | ||
553 | } | ||
554 | |||
555 | /** | ||
556 | * Clear the metadatas array | ||
557 | * | ||
558 | * @return void | ||
559 | */ | ||
560 | protected function _cleanMetadatas() | ||
561 | { | ||
562 | $this->_metadatasArray = array(); | ||
563 | } | ||
564 | |||
565 | /** | ||
566 | * Load metadatas from disk | ||
567 | * | ||
568 | * @param string $id Cache id | ||
569 | * @return array|false Metadatas associative array | ||
570 | */ | ||
571 | protected function _loadMetadatas($id) | ||
572 | { | ||
573 | $file = $this->_metadatasFile($id); | ||
574 | $result = $this->_fileGetContents($file); | ||
575 | if (!$result) { | ||
576 | return false; | ||
577 | } | ||
578 | $tmp = @unserialize($result); | ||
579 | return $tmp; | ||
580 | } | ||
581 | |||
582 | /** | ||
583 | * Save metadatas to disk | ||
584 | * | ||
585 | * @param string $id Cache id | ||
586 | * @param array $metadatas Associative array | ||
587 | * @return boolean True if no problem | ||
588 | */ | ||
589 | protected function _saveMetadatas($id, $metadatas) | ||
590 | { | ||
591 | $file = $this->_metadatasFile($id); | ||
592 | $result = $this->_filePutContents($file, serialize($metadatas)); | ||
593 | if (!$result) { | ||
594 | return false; | ||
595 | } | ||
596 | return true; | ||
597 | } | ||
598 | |||
599 | /** | ||
600 | * Make and return a file name (with path) for metadatas | ||
601 | * | ||
602 | * @param string $id Cache id | ||
603 | * @return string Metadatas file name (with path) | ||
604 | */ | ||
605 | protected function _metadatasFile($id) | ||
606 | { | ||
607 | $path = $this->_path($id); | ||
608 | $fileName = $this->_idToFileName('internal-metadatas---' . $id); | ||
609 | return $path . $fileName; | ||
610 | } | ||
611 | |||
612 | /** | ||
613 | * Check if the given filename is a metadatas one | ||
614 | * | ||
615 | * @param string $fileName File name | ||
616 | * @return boolean True if it's a metadatas one | ||
617 | */ | ||
618 | protected function _isMetadatasFile($fileName) | ||
619 | { | ||
620 | $id = $this->_fileNameToId($fileName); | ||
621 | if (substr($id, 0, 21) == 'internal-metadatas---') { | ||
622 | return true; | ||
623 | } else { | ||
624 | return false; | ||
625 | } | ||
626 | } | ||
627 | |||
628 | /** | ||
629 | * Remove a file | ||
630 | * | ||
631 | * If we can't remove the file (because of locks or any problem), we will touch | ||
632 | * the file to invalidate it | ||
633 | * | ||
634 | * @param string $file Complete file path | ||
635 | * @return boolean True if ok | ||
636 | */ | ||
637 | protected function _remove($file) | ||
638 | { | ||
639 | if (!is_file($file)) { | ||
640 | return false; | ||
641 | } | ||
642 | if (!@unlink($file)) { | ||
643 | # we can't remove the file (because of locks or any problem) | ||
644 | $this->_log("Zend_Cache_Backend_File::_remove() : we can't remove $file"); | ||
645 | return false; | ||
646 | } | ||
647 | return true; | ||
648 | } | ||
649 | |||
650 | /** | ||
651 | * Clean some cache records (protected method used for recursive stuff) | ||
652 | * | ||
653 | * Available modes are : | ||
654 | * Zend_Cache::CLEANING_MODE_ALL (default) => remove all cache entries ($tags is not used) | ||
655 | * Zend_Cache::CLEANING_MODE_OLD => remove too old cache entries ($tags is not used) | ||
656 | * Zend_Cache::CLEANING_MODE_MATCHING_TAG => remove cache entries matching all given tags | ||
657 | * ($tags can be an array of strings or a single string) | ||
658 | * Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG => remove cache entries not {matching one of the given tags} | ||
659 | * ($tags can be an array of strings or a single string) | ||
660 | * Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG => remove cache entries matching any given tags | ||
661 | * ($tags can be an array of strings or a single string) | ||
662 | * | ||
663 | * @param string $dir Directory to clean | ||
664 | * @param string $mode Clean mode | ||
665 | * @param array $tags Array of tags | ||
666 | * @throws Zend_Cache_Exception | ||
667 | * @return boolean True if no problem | ||
668 | */ | ||
669 | protected function _clean($dir, $mode = Zend_Cache::CLEANING_MODE_ALL, $tags = array()) | ||
670 | { | ||
671 | if (!is_dir($dir)) { | ||
672 | return false; | ||
673 | } | ||
674 | $result = true; | ||
675 | $prefix = $this->_options['file_name_prefix']; | ||
676 | $glob = @glob($dir . $prefix . '--*'); | ||
677 | if ($glob === false) { | ||
678 | // On some systems it is impossible to distinguish between empty match and an error. | ||
679 | return true; | ||
680 | } | ||
681 | foreach ($glob as $file) { | ||
682 | if (is_file($file)) { | ||
683 | $fileName = basename($file); | ||
684 | if ($this->_isMetadatasFile($fileName)) { | ||
685 | // in CLEANING_MODE_ALL, we drop anything, even remainings old metadatas files | ||
686 | if ($mode != Zend_Cache::CLEANING_MODE_ALL) { | ||
687 | continue; | ||
688 | } | ||
689 | } | ||
690 | $id = $this->_fileNameToId($fileName); | ||
691 | $metadatas = $this->_getMetadatas($id); | ||
692 | if ($metadatas === FALSE) { | ||
693 | $metadatas = array('expire' => 1, 'tags' => array()); | ||
694 | } | ||
695 | switch ($mode) { | ||
696 | case Zend_Cache::CLEANING_MODE_ALL: | ||
697 | $res = $this->remove($id); | ||
698 | if (!$res) { | ||
699 | // in this case only, we accept a problem with the metadatas file drop | ||
700 | $res = $this->_remove($file); | ||
701 | } | ||
702 | $result = $result && $res; | ||
703 | break; | ||
704 | case Zend_Cache::CLEANING_MODE_OLD: | ||
705 | if (time() > $metadatas['expire']) { | ||
706 | $result = $this->remove($id) && $result; | ||
707 | } | ||
708 | break; | ||
709 | case Zend_Cache::CLEANING_MODE_MATCHING_TAG: | ||
710 | $matching = true; | ||
711 | foreach ($tags as $tag) { | ||
712 | if (!in_array($tag, $metadatas['tags'])) { | ||
713 | $matching = false; | ||
714 | break; | ||
715 | } | ||
716 | } | ||
717 | if ($matching) { | ||
718 | $result = $this->remove($id) && $result; | ||
719 | } | ||
720 | break; | ||
721 | case Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG: | ||
722 | $matching = false; | ||
723 | foreach ($tags as $tag) { | ||
724 | if (in_array($tag, $metadatas['tags'])) { | ||
725 | $matching = true; | ||
726 | break; | ||
727 | } | ||
728 | } | ||
729 | if (!$matching) { | ||
730 | $result = $this->remove($id) && $result; | ||
731 | } | ||
732 | break; | ||
733 | case Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG: | ||
734 | $matching = false; | ||
735 | foreach ($tags as $tag) { | ||
736 | if (in_array($tag, $metadatas['tags'])) { | ||
737 | $matching = true; | ||
738 | break; | ||
739 | } | ||
740 | } | ||
741 | if ($matching) { | ||
742 | $result = $this->remove($id) && $result; | ||
743 | } | ||
744 | break; | ||
745 | default: | ||
746 | Zend_Cache::throwException('Invalid mode for clean() method'); | ||
747 | break; | ||
748 | } | ||
749 | } | ||
750 | if ((is_dir($file)) and ($this->_options['hashed_directory_level']>0)) { | ||
751 | // Recursive call | ||
752 | $result = $this->_clean($file . DIRECTORY_SEPARATOR, $mode, $tags) && $result; | ||
753 | if ($mode == Zend_Cache::CLEANING_MODE_ALL) { | ||
754 | // we try to drop the structure too | ||
755 | @rmdir($file); | ||
756 | } | ||
757 | } | ||
758 | } | ||
759 | return $result; | ||
760 | } | ||
761 | |||
762 | protected function _get($dir, $mode, $tags = array()) | ||
763 | { | ||
764 | if (!is_dir($dir)) { | ||
765 | return false; | ||
766 | } | ||
767 | $result = array(); | ||
768 | $prefix = $this->_options['file_name_prefix']; | ||
769 | $glob = @glob($dir . $prefix . '--*'); | ||
770 | if ($glob === false) { | ||
771 | // On some systems it is impossible to distinguish between empty match and an error. | ||
772 | return array(); | ||
773 | } | ||
774 | foreach ($glob as $file) { | ||
775 | if (is_file($file)) { | ||
776 | $fileName = basename($file); | ||
777 | $id = $this->_fileNameToId($fileName); | ||
778 | $metadatas = $this->_getMetadatas($id); | ||
779 | if ($metadatas === FALSE) { | ||
780 | continue; | ||
781 | } | ||
782 | if (time() > $metadatas['expire']) { | ||
783 | continue; | ||
784 | } | ||
785 | switch ($mode) { | ||
786 | case 'ids': | ||
787 | $result[] = $id; | ||
788 | break; | ||
789 | case 'tags': | ||
790 | $result = array_unique(array_merge($result, $metadatas['tags'])); | ||
791 | break; | ||
792 | case 'matching': | ||
793 | $matching = true; | ||
794 | foreach ($tags as $tag) { | ||
795 | if (!in_array($tag, $metadatas['tags'])) { | ||
796 | $matching = false; | ||
797 | break; | ||
798 | } | ||
799 | } | ||
800 | if ($matching) { | ||
801 | $result[] = $id; | ||
802 | } | ||
803 | break; | ||
804 | case 'notMatching': | ||
805 | $matching = false; | ||
806 | foreach ($tags as $tag) { | ||
807 | if (in_array($tag, $metadatas['tags'])) { | ||
808 | $matching = true; | ||
809 | break; | ||
810 | } | ||
811 | } | ||
812 | if (!$matching) { | ||
813 | $result[] = $id; | ||
814 | } | ||
815 | break; | ||
816 | case 'matchingAny': | ||
817 | $matching = false; | ||
818 | foreach ($tags as $tag) { | ||
819 | if (in_array($tag, $metadatas['tags'])) { | ||
820 | $matching = true; | ||
821 | break; | ||
822 | } | ||
823 | } | ||
824 | if ($matching) { | ||
825 | $result[] = $id; | ||
826 | } | ||
827 | break; | ||
828 | default: | ||
829 | Zend_Cache::throwException('Invalid mode for _get() method'); | ||
830 | break; | ||
831 | } | ||
832 | } | ||
833 | if ((is_dir($file)) and ($this->_options['hashed_directory_level']>0)) { | ||
834 | // Recursive call | ||
835 | $recursiveRs = $this->_get($file . DIRECTORY_SEPARATOR, $mode, $tags); | ||
836 | if ($recursiveRs === false) { | ||
837 | $this->_log('Zend_Cache_Backend_File::_get() / recursive call : can\'t list entries of "'.$file.'"'); | ||
838 | } else { | ||
839 | $result = array_unique(array_merge($result, $recursiveRs)); | ||
840 | } | ||
841 | } | ||
842 | } | ||
843 | return array_unique($result); | ||
844 | } | ||
845 | |||
846 | /** | ||
847 | * Compute & return the expire time | ||
848 | * | ||
849 | * @return int expire time (unix timestamp) | ||
850 | */ | ||
851 | protected function _expireTime($lifetime) | ||
852 | { | ||
853 | if ($lifetime === null) { | ||
854 | return 9999999999; | ||
855 | } | ||
856 | return time() + $lifetime; | ||
857 | } | ||
858 | |||
859 | /** | ||
860 | * Make a control key with the string containing datas | ||
861 | * | ||
862 | * @param string $data Data | ||
863 | * @param string $controlType Type of control 'md5', 'crc32' or 'strlen' | ||
864 | * @throws Zend_Cache_Exception | ||
865 | * @return string Control key | ||
866 | */ | ||
867 | protected function _hash($data, $controlType) | ||
868 | { | ||
869 | switch ($controlType) { | ||
870 | case 'md5': | ||
871 | return md5($data); | ||
872 | case 'crc32': | ||
873 | return crc32($data); | ||
874 | case 'strlen': | ||
875 | return strlen($data); | ||
876 | case 'adler32': | ||
877 | return hash('adler32', $data); | ||
878 | default: | ||
879 | Zend_Cache::throwException("Incorrect hash function : $controlType"); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | /** | ||
884 | * Transform a cache id into a file name and return it | ||
885 | * | ||
886 | * @param string $id Cache id | ||
887 | * @return string File name | ||
888 | */ | ||
889 | protected function _idToFileName($id) | ||
890 | { | ||
891 | $prefix = $this->_options['file_name_prefix']; | ||
892 | $result = $prefix . '---' . $id; | ||
893 | return $result; | ||
894 | } | ||
895 | |||
896 | /** | ||
897 | * Make and return a file name (with path) | ||
898 | * | ||
899 | * @param string $id Cache id | ||
900 | * @return string File name (with path) | ||
901 | */ | ||
902 | protected function _file($id) | ||
903 | { | ||
904 | $path = $this->_path($id); | ||
905 | $fileName = $this->_idToFileName($id); | ||
906 | return $path . $fileName; | ||
907 | } | ||
908 | |||
909 | /** | ||
910 | * Return the complete directory path of a filename (including hashedDirectoryStructure) | ||
911 | * | ||
912 | * @param string $id Cache id | ||
913 | * @param boolean $parts if true, returns array of directory parts instead of single string | ||
914 | * @return string Complete directory path | ||
915 | */ | ||
916 | protected function _path($id, $parts = false) | ||
917 | { | ||
918 | $partsArray = array(); | ||
919 | $root = $this->_options['cache_dir']; | ||
920 | $prefix = $this->_options['file_name_prefix']; | ||
921 | if ($this->_options['hashed_directory_level']>0) { | ||
922 | $hash = hash('adler32', $id); | ||
923 | for ($i=0 ; $i < $this->_options['hashed_directory_level'] ; $i++) { | ||
924 | $root = $root . $prefix . '--' . substr($hash, 0, $i + 1) . DIRECTORY_SEPARATOR; | ||
925 | $partsArray[] = $root; | ||
926 | } | ||
927 | } | ||
928 | if ($parts) { | ||
929 | return $partsArray; | ||
930 | } else { | ||
931 | return $root; | ||
932 | } | ||
933 | } | ||
934 | |||
935 | /** | ||
936 | * Make the directory strucuture for the given id | ||
937 | * | ||
938 | * @param string $id cache id | ||
939 | * @return boolean true | ||
940 | */ | ||
941 | protected function _recursiveMkdirAndChmod($id) | ||
942 | { | ||
943 | if ($this->_options['hashed_directory_level'] <=0) { | ||
944 | return true; | ||
945 | } | ||
946 | $partsArray = $this->_path($id, true); | ||
947 | foreach ($partsArray as $part) { | ||
948 | if (!is_dir($part)) { | ||
949 | @mkdir($part, $this->_options['hashed_directory_perm']); | ||
950 | @chmod($part, $this->_options['hashed_directory_perm']); // see #ZF-320 (this line is required in some configurations) | ||
951 | } | ||
952 | } | ||
953 | return true; | ||
954 | } | ||
955 | |||
956 | /** | ||
957 | * Test if the given cache id is available (and still valid as a cache record) | ||
958 | * | ||
959 | * @param string $id Cache id | ||
960 | * @param boolean $doNotTestCacheValidity If set to true, the cache validity won't be tested | ||
961 | * @return boolean|mixed false (a cache is not available) or "last modified" timestamp (int) of the available cache record | ||
962 | */ | ||
963 | protected function _test($id, $doNotTestCacheValidity) | ||
964 | { | ||
965 | $metadatas = $this->_getMetadatas($id); | ||
966 | if (!$metadatas) { | ||
967 | return false; | ||
968 | } | ||
969 | if ($doNotTestCacheValidity || (time() <= $metadatas['expire'])) { | ||
970 | return $metadatas['mtime']; | ||
971 | } | ||
972 | return false; | ||
973 | } | ||
974 | |||
975 | /** | ||
976 | * Return the file content of the given file | ||
977 | * | ||
978 | * @param string $file File complete path | ||
979 | * @return string File content (or false if problem) | ||
980 | */ | ||
981 | protected function _fileGetContents($file) | ||
982 | { | ||
983 | $result = false; | ||
984 | if (!is_file($file)) { | ||
985 | return false; | ||
986 | } | ||
987 | $f = @fopen($file, 'rb'); | ||
988 | if ($f) { | ||
989 | if ($this->_options['file_locking']) @flock($f, LOCK_SH); | ||
990 | $result = stream_get_contents($f); | ||
991 | if ($this->_options['file_locking']) @flock($f, LOCK_UN); | ||
992 | @fclose($f); | ||
993 | } | ||
994 | return $result; | ||
995 | } | ||
996 | |||
997 | /** | ||
998 | * Put the given string into the given file | ||
999 | * | ||
1000 | * @param string $file File complete path | ||
1001 | * @param string $string String to put in file | ||
1002 | * @return boolean true if no problem | ||
1003 | */ | ||
1004 | protected function _filePutContents($file, $string) | ||
1005 | { | ||
1006 | $result = false; | ||
1007 | $f = @fopen($file, 'ab+'); | ||
1008 | if ($f) { | ||
1009 | if ($this->_options['file_locking']) @flock($f, LOCK_EX); | ||
1010 | fseek($f, 0); | ||
1011 | ftruncate($f, 0); | ||
1012 | $tmp = @fwrite($f, $string); | ||
1013 | if (!($tmp === FALSE)) { | ||
1014 | $result = true; | ||
1015 | } | ||
1016 | @fclose($f); | ||
1017 | } | ||
1018 | @chmod($file, $this->_options['cache_file_perm']); | ||
1019 | return $result; | ||
1020 | } | ||
1021 | |||
1022 | /** | ||
1023 | * Transform a file name into cache id and return it | ||
1024 | * | ||
1025 | * @param string $fileName File name | ||
1026 | * @return string Cache id | ||
1027 | */ | ||
1028 | protected function _fileNameToId($fileName) | ||
1029 | { | ||
1030 | $prefix = $this->_options['file_name_prefix']; | ||
1031 | return preg_replace('~^' . $prefix . '---(.*)$~', '$1', $fileName); | ||
1032 | } | ||
1033 | |||
1034 | } | ||
diff --git a/inc/3rdparty/libraries/Zend/Cache/Backend/Interface.php b/inc/3rdparty/libraries/Zend/Cache/Backend/Interface.php new file mode 100644 index 00000000..3f44e2e1 --- /dev/null +++ b/inc/3rdparty/libraries/Zend/Cache/Backend/Interface.php | |||
@@ -0,0 +1,99 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Zend Framework | ||
4 | * | ||
5 | * LICENSE | ||
6 | * | ||
7 | * This source file is subject to the new BSD license that is bundled | ||
8 | * with this package in the file LICENSE.txt. | ||
9 | * It is also available through the world-wide-web at this URL: | ||
10 | * http://framework.zend.com/license/new-bsd | ||
11 | * If you did not receive a copy of the license and are unable to | ||
12 | * obtain it through the world-wide-web, please send an email | ||
13 | * to license@zend.com so we can send you a copy immediately. | ||
14 | * | ||
15 | * @category Zend | ||
16 | * @package Zend_Cache | ||
17 | * @subpackage Zend_Cache_Backend | ||
18 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
19 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
20 | * @version $Id: Interface.php 24593 2012-01-05 20:35:02Z matthew $ | ||
21 | */ | ||
22 | |||
23 | |||
24 | /** | ||
25 | * @package Zend_Cache | ||
26 | * @subpackage Zend_Cache_Backend | ||
27 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
28 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
29 | */ | ||
30 | interface Zend_Cache_Backend_Interface | ||
31 | { | ||
32 | /** | ||
33 | * Set the frontend directives | ||
34 | * | ||
35 | * @param array $directives assoc of directives | ||
36 | */ | ||
37 | public function setDirectives($directives); | ||
38 | |||
39 | /** | ||
40 | * Test if a cache is available for the given id and (if yes) return it (false else) | ||
41 | * | ||
42 | * Note : return value is always "string" (unserialization is done by the core not by the backend) | ||
43 | * | ||
44 | * @param string $id Cache id | ||
45 | * @param boolean $doNotTestCacheValidity If set to true, the cache validity won't be tested | ||
46 | * @return string|false cached datas | ||
47 | */ | ||
48 | public function load($id, $doNotTestCacheValidity = false); | ||
49 | |||
50 | /** | ||
51 | * Test if a cache is available or not (for the given id) | ||
52 | * | ||
53 | * @param string $id cache id | ||
54 | * @return mixed|false (a cache is not available) or "last modified" timestamp (int) of the available cache record | ||
55 | */ | ||
56 | public function test($id); | ||
57 | |||
58 | /** | ||
59 | * Save some string datas into a cache record | ||
60 | * | ||
61 | * Note : $data is always "string" (serialization is done by the | ||
62 | * core not by the backend) | ||
63 | * | ||
64 | * @param string $data Datas to cache | ||
65 | * @param string $id Cache id | ||
66 | * @param array $tags Array of strings, the cache record will be tagged by each string entry | ||
67 | * @param int $specificLifetime If != false, set a specific lifetime for this cache record (null => infinite lifetime) | ||
68 | * @return boolean true if no problem | ||
69 | */ | ||
70 | public function save($data, $id, $tags = array(), $specificLifetime = false); | ||
71 | |||
72 | /** | ||
73 | * Remove a cache record | ||
74 | * | ||
75 | * @param string $id Cache id | ||
76 | * @return boolean True if no problem | ||
77 | */ | ||
78 | public function remove($id); | ||
79 | |||
80 | /** | ||
81 | * Clean some cache records | ||
82 | * | ||
83 | * Available modes are : | ||
84 | * Zend_Cache::CLEANING_MODE_ALL (default) => remove all cache entries ($tags is not used) | ||
85 | * Zend_Cache::CLEANING_MODE_OLD => remove too old cache entries ($tags is not used) | ||
86 | * Zend_Cache::CLEANING_MODE_MATCHING_TAG => remove cache entries matching all given tags | ||
87 | * ($tags can be an array of strings or a single string) | ||
88 | * Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG => remove cache entries not {matching one of the given tags} | ||
89 | * ($tags can be an array of strings or a single string) | ||
90 | * Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG => remove cache entries matching any given tags | ||
91 | * ($tags can be an array of strings or a single string) | ||
92 | * | ||
93 | * @param string $mode Clean mode | ||
94 | * @param array $tags Array of tags | ||
95 | * @return boolean true if no problem | ||
96 | */ | ||
97 | public function clean($mode = Zend_Cache::CLEANING_MODE_ALL, $tags = array()); | ||
98 | |||
99 | } | ||
diff --git a/inc/3rdparty/libraries/Zend/Cache/Core.php b/inc/3rdparty/libraries/Zend/Cache/Core.php new file mode 100644 index 00000000..e3588636 --- /dev/null +++ b/inc/3rdparty/libraries/Zend/Cache/Core.php | |||
@@ -0,0 +1,765 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Zend Framework | ||
4 | * | ||
5 | * LICENSE | ||
6 | * | ||
7 | * This source file is subject to the new BSD license that is bundled | ||
8 | * with this package in the file LICENSE.txt. | ||
9 | * It is also available through the world-wide-web at this URL: | ||
10 | * http://framework.zend.com/license/new-bsd | ||
11 | * If you did not receive a copy of the license and are unable to | ||
12 | * obtain it through the world-wide-web, please send an email | ||
13 | * to license@zend.com so we can send you a copy immediately. | ||
14 | * | ||
15 | * @category Zend | ||
16 | * @package Zend_Cache | ||
17 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
18 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
19 | * @version $Id: Core.php 24989 2012-06-21 07:24:13Z mabe $ | ||
20 | */ | ||
21 | |||
22 | |||
23 | /** | ||
24 | * @package Zend_Cache | ||
25 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
26 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
27 | */ | ||
28 | class Zend_Cache_Core | ||
29 | { | ||
30 | /** | ||
31 | * Messages | ||
32 | */ | ||
33 | const BACKEND_NOT_SUPPORTS_TAG = 'tags are not supported by the current backend'; | ||
34 | const BACKEND_NOT_IMPLEMENTS_EXTENDED_IF = 'Current backend doesn\'t implement the Zend_Cache_Backend_ExtendedInterface, so this method is not available'; | ||
35 | |||
36 | /** | ||
37 | * Backend Object | ||
38 | * | ||
39 | * @var Zend_Cache_Backend_Interface $_backend | ||
40 | */ | ||
41 | protected $_backend = null; | ||
42 | |||
43 | /** | ||
44 | * Available options | ||
45 | * | ||
46 | * ====> (boolean) write_control : | ||
47 | * - Enable / disable write control (the cache is read just after writing to detect corrupt entries) | ||
48 | * - Enable write control will lightly slow the cache writing but not the cache reading | ||
49 | * Write control can detect some corrupt cache files but maybe it's not a perfect control | ||
50 | * | ||
51 | * ====> (boolean) caching : | ||
52 | * - Enable / disable caching | ||
53 | * (can be very useful for the debug of cached scripts) | ||
54 | * | ||
55 | * =====> (string) cache_id_prefix : | ||
56 | * - prefix for cache ids (namespace) | ||
57 | * | ||
58 | * ====> (boolean) automatic_serialization : | ||
59 | * - Enable / disable automatic serialization | ||
60 | * - It can be used to save directly datas which aren't strings (but it's slower) | ||
61 | * | ||
62 | * ====> (int) automatic_cleaning_factor : | ||
63 | * - Disable / Tune the automatic cleaning process | ||
64 | * - The automatic cleaning process destroy too old (for the given life time) | ||
65 | * cache files when a new cache file is written : | ||
66 | * 0 => no automatic cache cleaning | ||
67 | * 1 => systematic cache cleaning | ||
68 | * x (integer) > 1 => automatic cleaning randomly 1 times on x cache write | ||
69 | * | ||
70 | * ====> (int) lifetime : | ||
71 | * - Cache lifetime (in seconds) | ||
72 | * - If null, the cache is valid forever. | ||
73 | * | ||
74 | * ====> (boolean) logging : | ||
75 | * - If set to true, logging is activated (but the system is slower) | ||
76 | * | ||
77 | * ====> (boolean) ignore_user_abort | ||
78 | * - If set to true, the core will set the ignore_user_abort PHP flag inside the | ||
79 | * save() method to avoid cache corruptions in some cases (default false) | ||
80 | * | ||
81 | * @var array $_options available options | ||
82 | */ | ||
83 | protected $_options = array( | ||
84 | 'write_control' => true, | ||
85 | 'caching' => true, | ||
86 | 'cache_id_prefix' => null, | ||
87 | 'automatic_serialization' => false, | ||
88 | 'automatic_cleaning_factor' => 10, | ||
89 | 'lifetime' => 3600, | ||
90 | 'logging' => false, | ||
91 | 'logger' => null, | ||
92 | 'ignore_user_abort' => false | ||
93 | ); | ||
94 | |||
95 | /** | ||
96 | * Array of options which have to be transfered to backend | ||
97 | * | ||
98 | * @var array $_directivesList | ||
99 | */ | ||
100 | protected static $_directivesList = array('lifetime', 'logging', 'logger'); | ||
101 | |||
102 | /** | ||
103 | * Not used for the core, just a sort a hint to get a common setOption() method (for the core and for frontends) | ||
104 | * | ||
105 | * @var array $_specificOptions | ||
106 | */ | ||
107 | protected $_specificOptions = array(); | ||
108 | |||
109 | /** | ||
110 | * Last used cache id | ||
111 | * | ||
112 | * @var string $_lastId | ||
113 | */ | ||
114 | private $_lastId = null; | ||
115 | |||
116 | /** | ||
117 | * True if the backend implements Zend_Cache_Backend_ExtendedInterface | ||
118 | * | ||
119 | * @var boolean $_extendedBackend | ||
120 | */ | ||
121 | protected $_extendedBackend = false; | ||
122 | |||
123 | /** | ||
124 | * Array of capabilities of the backend (only if it implements Zend_Cache_Backend_ExtendedInterface) | ||
125 | * | ||
126 | * @var array | ||
127 | */ | ||
128 | protected $_backendCapabilities = array(); | ||
129 | |||
130 | /** | ||
131 | * Constructor | ||
132 | * | ||
133 | * @param array|Zend_Config $options Associative array of options or Zend_Config instance | ||
134 | * @throws Zend_Cache_Exception | ||
135 | * @return void | ||
136 | */ | ||
137 | public function __construct($options = array()) | ||
138 | { | ||
139 | if ($options instanceof Zend_Config) { | ||
140 | $options = $options->toArray(); | ||
141 | } | ||
142 | if (!is_array($options)) { | ||
143 | Zend_Cache::throwException("Options passed were not an array" | ||
144 | . " or Zend_Config instance."); | ||
145 | } | ||
146 | while (list($name, $value) = each($options)) { | ||
147 | $this->setOption($name, $value); | ||
148 | } | ||
149 | $this->_loggerSanity(); | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * Set options using an instance of type Zend_Config | ||
154 | * | ||
155 | * @param Zend_Config $config | ||
156 | * @return Zend_Cache_Core | ||
157 | */ | ||
158 | public function setConfig(Zend_Config $config) | ||
159 | { | ||
160 | $options = $config->toArray(); | ||
161 | while (list($name, $value) = each($options)) { | ||
162 | $this->setOption($name, $value); | ||
163 | } | ||
164 | return $this; | ||
165 | } | ||
166 | |||
167 | /** | ||
168 | * Set the backend | ||
169 | * | ||
170 | * @param Zend_Cache_Backend $backendObject | ||
171 | * @throws Zend_Cache_Exception | ||
172 | * @return void | ||
173 | */ | ||
174 | public function setBackend(Zend_Cache_Backend $backendObject) | ||
175 | { | ||
176 | $this->_backend= $backendObject; | ||
177 | // some options (listed in $_directivesList) have to be given | ||
178 | // to the backend too (even if they are not "backend specific") | ||
179 | $directives = array(); | ||
180 | foreach (Zend_Cache_Core::$_directivesList as $directive) { | ||
181 | $directives[$directive] = $this->_options[$directive]; | ||
182 | } | ||
183 | $this->_backend->setDirectives($directives); | ||
184 | if (in_array('Zend_Cache_Backend_ExtendedInterface', class_implements($this->_backend))) { | ||
185 | $this->_extendedBackend = true; | ||
186 | $this->_backendCapabilities = $this->_backend->getCapabilities(); | ||
187 | } | ||
188 | |||
189 | } | ||
190 | |||
191 | /** | ||
192 | * Returns the backend | ||
193 | * | ||
194 | * @return Zend_Cache_Backend backend object | ||
195 | */ | ||
196 | public function getBackend() | ||
197 | { | ||
198 | return $this->_backend; | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * Public frontend to set an option | ||
203 | * | ||
204 | * There is an additional validation (relatively to the protected _setOption method) | ||
205 | * | ||
206 | * @param string $name Name of the option | ||
207 | * @param mixed $value Value of the option | ||
208 | * @throws Zend_Cache_Exception | ||
209 | * @return void | ||
210 | */ | ||
211 | public function setOption($name, $value) | ||
212 | { | ||
213 | if (!is_string($name)) { | ||
214 | Zend_Cache::throwException("Incorrect option name!"); | ||
215 | } | ||
216 | $name = strtolower($name); | ||
217 | if (array_key_exists($name, $this->_options)) { | ||
218 | // This is a Core option | ||
219 | $this->_setOption($name, $value); | ||
220 | return; | ||
221 | } | ||
222 | if (array_key_exists($name, $this->_specificOptions)) { | ||
223 | // This a specic option of this frontend | ||
224 | $this->_specificOptions[$name] = $value; | ||
225 | return; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | /** | ||
230 | * Public frontend to get an option value | ||
231 | * | ||
232 | * @param string $name Name of the option | ||
233 | * @throws Zend_Cache_Exception | ||
234 | * @return mixed option value | ||
235 | */ | ||
236 | public function getOption($name) | ||
237 | { | ||
238 | $name = strtolower($name); | ||
239 | |||
240 | if (array_key_exists($name, $this->_options)) { | ||
241 | // This is a Core option | ||
242 | return $this->_options[$name]; | ||
243 | } | ||
244 | |||
245 | if (array_key_exists($name, $this->_specificOptions)) { | ||
246 | // This a specic option of this frontend | ||
247 | return $this->_specificOptions[$name]; | ||
248 | } | ||
249 | |||
250 | Zend_Cache::throwException("Incorrect option name : $name"); | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * Set an option | ||
255 | * | ||
256 | * @param string $name Name of the option | ||
257 | * @param mixed $value Value of the option | ||
258 | * @throws Zend_Cache_Exception | ||
259 | * @return void | ||
260 | */ | ||
261 | private function _setOption($name, $value) | ||
262 | { | ||
263 | if (!is_string($name) || !array_key_exists($name, $this->_options)) { | ||
264 | Zend_Cache::throwException("Incorrect option name : $name"); | ||
265 | } | ||
266 | if ($name == 'lifetime' && empty($value)) { | ||
267 | $value = null; | ||
268 | } | ||
269 | $this->_options[$name] = $value; | ||
270 | } | ||
271 | |||
272 | /** | ||
273 | * Force a new lifetime | ||
274 | * | ||
275 | * The new value is set for the core/frontend but for the backend too (directive) | ||
276 | * | ||
277 | * @param int $newLifetime New lifetime (in seconds) | ||
278 | * @return void | ||
279 | */ | ||
280 | public function setLifetime($newLifetime) | ||
281 | { | ||
282 | $this->_options['lifetime'] = $newLifetime; | ||
283 | $this->_backend->setDirectives(array( | ||
284 | 'lifetime' => $newLifetime | ||
285 | )); | ||
286 | } | ||
287 | |||
288 | /** | ||
289 | * Test if a cache is available for the given id and (if yes) return it (false else) | ||
290 | * | ||
291 | * @param string $id Cache id | ||
292 | * @param boolean $doNotTestCacheValidity If set to true, the cache validity won't be tested | ||
293 | * @param boolean $doNotUnserialize Do not serialize (even if automatic_serialization is true) => for internal use | ||
294 | * @return mixed|false Cached datas | ||
295 | */ | ||
296 | public function load($id, $doNotTestCacheValidity = false, $doNotUnserialize = false) | ||
297 | { | ||
298 | if (!$this->_options['caching']) { | ||
299 | return false; | ||
300 | } | ||
301 | $id = $this->_id($id); // cache id may need prefix | ||
302 | $this->_lastId = $id; | ||
303 | self::_validateIdOrTag($id); | ||
304 | |||
305 | $this->_log("Zend_Cache_Core: load item '{$id}'", 7); | ||
306 | $data = $this->_backend->load($id, $doNotTestCacheValidity); | ||
307 | if ($data===false) { | ||
308 | // no cache available | ||
309 | return false; | ||
310 | } | ||
311 | if ((!$doNotUnserialize) && $this->_options['automatic_serialization']) { | ||
312 | // we need to unserialize before sending the result | ||
313 | return unserialize($data); | ||
314 | } | ||
315 | return $data; | ||
316 | } | ||
317 | |||
318 | /** | ||
319 | * Test if a cache is available for the given id | ||
320 | * | ||
321 | * @param string $id Cache id | ||
322 | * @return int|false Last modified time of cache entry if it is available, false otherwise | ||
323 | */ | ||
324 | public function test($id) | ||
325 | { | ||
326 | if (!$this->_options['caching']) { | ||
327 | return false; | ||
328 | } | ||
329 | $id = $this->_id($id); // cache id may need prefix | ||
330 | self::_validateIdOrTag($id); | ||
331 | $this->_lastId = $id; | ||
332 | |||
333 | $this->_log("Zend_Cache_Core: test item '{$id}'", 7); | ||
334 | return $this->_backend->test($id); | ||
335 | } | ||
336 | |||
337 | /** | ||
338 | * Save some data in a cache | ||
339 | * | ||
340 | * @param mixed $data Data to put in cache (can be another type than string if automatic_serialization is on) | ||
341 | * @param string $id Cache id (if not set, the last cache id will be used) | ||
342 | * @param array $tags Cache tags | ||
343 | * @param int $specificLifetime If != false, set a specific lifetime for this cache record (null => infinite lifetime) | ||
344 | * @param int $priority integer between 0 (very low priority) and 10 (maximum priority) used by some particular backends | ||
345 | * @throws Zend_Cache_Exception | ||
346 | * @return boolean True if no problem | ||
347 | */ | ||
348 | public function save($data, $id = null, $tags = array(), $specificLifetime = false, $priority = 8) | ||
349 | { | ||
350 | if (!$this->_options['caching']) { | ||
351 | return true; | ||
352 | } | ||
353 | if ($id === null) { | ||
354 | $id = $this->_lastId; | ||
355 | } else { | ||
356 | $id = $this->_id($id); | ||
357 | } | ||
358 | self::_validateIdOrTag($id); | ||
359 | self::_validateTagsArray($tags); | ||
360 | if ($this->_options['automatic_serialization']) { | ||
361 | // we need to serialize datas before storing them | ||
362 | $data = serialize($data); | ||
363 | } else { | ||
364 | if (!is_string($data)) { | ||
365 | Zend_Cache::throwException("Datas must be string or set automatic_serialization = true"); | ||
366 | } | ||
367 | } | ||
368 | |||
369 | // automatic cleaning | ||
370 | if ($this->_options['automatic_cleaning_factor'] > 0) { | ||
371 | $rand = rand(1, $this->_options['automatic_cleaning_factor']); | ||
372 | if ($rand==1) { | ||
373 | // new way || deprecated way | ||
374 | if ($this->_extendedBackend || method_exists($this->_backend, 'isAutomaticCleaningAvailable')) { | ||
375 | $this->_log("Zend_Cache_Core::save(): automatic cleaning running", 7); | ||
376 | $this->clean(Zend_Cache::CLEANING_MODE_OLD); | ||
377 | } else { | ||
378 | $this->_log("Zend_Cache_Core::save(): automatic cleaning is not available/necessary with current backend", 4); | ||
379 | } | ||
380 | } | ||
381 | } | ||
382 | |||
383 | $this->_log("Zend_Cache_Core: save item '{$id}'", 7); | ||
384 | if ($this->_options['ignore_user_abort']) { | ||
385 | $abort = ignore_user_abort(true); | ||
386 | } | ||
387 | if (($this->_extendedBackend) && ($this->_backendCapabilities['priority'])) { | ||
388 | $result = $this->_backend->save($data, $id, $tags, $specificLifetime, $priority); | ||
389 | } else { | ||
390 | $result = $this->_backend->save($data, $id, $tags, $specificLifetime); | ||
391 | } | ||
392 | if ($this->_options['ignore_user_abort']) { | ||
393 | ignore_user_abort($abort); | ||
394 | } | ||
395 | |||
396 | if (!$result) { | ||
397 | // maybe the cache is corrupted, so we remove it ! | ||
398 | $this->_log("Zend_Cache_Core::save(): failed to save item '{$id}' -> removing it", 4); | ||
399 | $this->_backend->remove($id); | ||
400 | return false; | ||
401 | } | ||
402 | |||
403 | if ($this->_options['write_control']) { | ||
404 | $data2 = $this->_backend->load($id, true); | ||
405 | if ($data!=$data2) { | ||
406 | $this->_log("Zend_Cache_Core::save(): write control of item '{$id}' failed -> removing it", 4); | ||
407 | $this->_backend->remove($id); | ||
408 | return false; | ||
409 | } | ||
410 | } | ||
411 | |||
412 | return true; | ||
413 | } | ||
414 | |||
415 | /** | ||
416 | * Remove a cache | ||
417 | * | ||
418 | * @param string $id Cache id to remove | ||
419 | * @return boolean True if ok | ||
420 | */ | ||
421 | public function remove($id) | ||
422 | { | ||
423 | if (!$this->_options['caching']) { | ||
424 | return true; | ||
425 | } | ||
426 | $id = $this->_id($id); // cache id may need prefix | ||
427 | self::_validateIdOrTag($id); | ||
428 | |||
429 | $this->_log("Zend_Cache_Core: remove item '{$id}'", 7); | ||
430 | return $this->_backend->remove($id); | ||
431 | } | ||
432 | |||
433 | /** | ||
434 | * Clean cache entries | ||
435 | * | ||
436 | * Available modes are : | ||
437 | * 'all' (default) => remove all cache entries ($tags is not used) | ||
438 | * 'old' => remove too old cache entries ($tags is not used) | ||
439 | * 'matchingTag' => remove cache entries matching all given tags | ||
440 | * ($tags can be an array of strings or a single string) | ||
441 | * 'notMatchingTag' => remove cache entries not matching one of the given tags | ||
442 | * ($tags can be an array of strings or a single string) | ||
443 | * 'matchingAnyTag' => remove cache entries matching any given tags | ||
444 | * ($tags can be an array of strings or a single string) | ||
445 | * | ||
446 | * @param string $mode | ||
447 | * @param array|string $tags | ||
448 | * @throws Zend_Cache_Exception | ||
449 | * @return boolean True if ok | ||
450 | */ | ||
451 | public function clean($mode = 'all', $tags = array()) | ||
452 | { | ||
453 | if (!$this->_options['caching']) { | ||
454 | return true; | ||
455 | } | ||
456 | if (!in_array($mode, array(Zend_Cache::CLEANING_MODE_ALL, | ||
457 | Zend_Cache::CLEANING_MODE_OLD, | ||
458 | Zend_Cache::CLEANING_MODE_MATCHING_TAG, | ||
459 | Zend_Cache::CLEANING_MODE_NOT_MATCHING_TAG, | ||
460 | Zend_Cache::CLEANING_MODE_MATCHING_ANY_TAG))) { | ||
461 | Zend_Cache::throwException('Invalid cleaning mode'); | ||
462 | } | ||
463 | self::_validateTagsArray($tags); | ||
464 | |||
465 | return $this->_backend->clean($mode, $tags); | ||
466 | } | ||
467 | |||
468 | /** | ||
469 | * Return an array of stored cache ids which match given tags | ||
470 | * | ||
471 | * In case of multiple tags, a logical AND is made between tags | ||
472 | * | ||
473 | * @param array $tags array of tags | ||
474 | * @return array array of matching cache ids (string) | ||
475 | */ | ||
476 | public function getIdsMatchingTags($tags = array()) | ||
477 | { | ||
478 | if (!$this->_extendedBackend) { | ||
479 | Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF); | ||
480 | } | ||
481 | if (!($this->_backendCapabilities['tags'])) { | ||
482 | Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG); | ||
483 | } | ||
484 | |||
485 | $ids = $this->_backend->getIdsMatchingTags($tags); | ||
486 | |||
487 | // we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600) | ||
488 | if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') { | ||
489 | $prefix = & $this->_options['cache_id_prefix']; | ||
490 | $prefixLen = strlen($prefix); | ||
491 | foreach ($ids as &$id) { | ||
492 | if (strpos($id, $prefix) === 0) { | ||
493 | $id = substr($id, $prefixLen); | ||
494 | } | ||
495 | } | ||
496 | } | ||
497 | |||
498 | return $ids; | ||
499 | } | ||
500 | |||
501 | /** | ||
502 | * Return an array of stored cache ids which don't match given tags | ||
503 | * | ||
504 | * In case of multiple tags, a logical OR is made between tags | ||
505 | * | ||
506 | * @param array $tags array of tags | ||
507 | * @return array array of not matching cache ids (string) | ||
508 | */ | ||
509 | public function getIdsNotMatchingTags($tags = array()) | ||
510 | { | ||
511 | if (!$this->_extendedBackend) { | ||
512 | Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF); | ||
513 | } | ||
514 | if (!($this->_backendCapabilities['tags'])) { | ||
515 | Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG); | ||
516 | } | ||
517 | |||
518 | $ids = $this->_backend->getIdsNotMatchingTags($tags); | ||
519 | |||
520 | // we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600) | ||
521 | if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') { | ||
522 | $prefix = & $this->_options['cache_id_prefix']; | ||
523 | $prefixLen = strlen($prefix); | ||
524 | foreach ($ids as &$id) { | ||
525 | if (strpos($id, $prefix) === 0) { | ||
526 | $id = substr($id, $prefixLen); | ||
527 | } | ||
528 | } | ||
529 | } | ||
530 | |||
531 | return $ids; | ||
532 | } | ||
533 | |||
534 | /** | ||
535 | * Return an array of stored cache ids which match any given tags | ||
536 | * | ||
537 | * In case of multiple tags, a logical OR is made between tags | ||
538 | * | ||
539 | * @param array $tags array of tags | ||
540 | * @return array array of matching any cache ids (string) | ||
541 | */ | ||
542 | public function getIdsMatchingAnyTags($tags = array()) | ||
543 | { | ||
544 | if (!$this->_extendedBackend) { | ||
545 | Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF); | ||
546 | } | ||
547 | if (!($this->_backendCapabilities['tags'])) { | ||
548 | Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG); | ||
549 | } | ||
550 | |||
551 | $ids = $this->_backend->getIdsMatchingAnyTags($tags); | ||
552 | |||
553 | // we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600) | ||
554 | if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') { | ||
555 | $prefix = & $this->_options['cache_id_prefix']; | ||
556 | $prefixLen = strlen($prefix); | ||
557 | foreach ($ids as &$id) { | ||
558 | if (strpos($id, $prefix) === 0) { | ||
559 | $id = substr($id, $prefixLen); | ||
560 | } | ||
561 | } | ||
562 | } | ||
563 | |||
564 | return $ids; | ||
565 | } | ||
566 | |||
567 | /** | ||
568 | * Return an array of stored cache ids | ||
569 | * | ||
570 | * @return array array of stored cache ids (string) | ||
571 | */ | ||
572 | public function getIds() | ||
573 | { | ||
574 | if (!$this->_extendedBackend) { | ||
575 | Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF); | ||
576 | } | ||
577 | |||
578 | $ids = $this->_backend->getIds(); | ||
579 | |||
580 | // we need to remove cache_id_prefix from ids (see #ZF-6178, #ZF-7600) | ||
581 | if (isset($this->_options['cache_id_prefix']) && $this->_options['cache_id_prefix'] !== '') { | ||
582 | $prefix = & $this->_options['cache_id_prefix']; | ||
583 | $prefixLen = strlen($prefix); | ||
584 | foreach ($ids as &$id) { | ||
585 | if (strpos($id, $prefix) === 0) { | ||
586 | $id = substr($id, $prefixLen); | ||
587 | } | ||
588 | } | ||
589 | } | ||
590 | |||
591 | return $ids; | ||
592 | } | ||
593 | |||
594 | /** | ||
595 | * Return an array of stored tags | ||
596 | * | ||
597 | * @return array array of stored tags (string) | ||
598 | */ | ||
599 | public function getTags() | ||
600 | { | ||
601 | if (!$this->_extendedBackend) { | ||
602 | Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF); | ||
603 | } | ||
604 | if (!($this->_backendCapabilities['tags'])) { | ||
605 | Zend_Cache::throwException(self::BACKEND_NOT_SUPPORTS_TAG); | ||
606 | } | ||
607 | return $this->_backend->getTags(); | ||
608 | } | ||
609 | |||
610 | /** | ||
611 | * Return the filling percentage of the backend storage | ||
612 | * | ||
613 | * @return int integer between 0 and 100 | ||
614 | */ | ||
615 | public function getFillingPercentage() | ||
616 | { | ||
617 | if (!$this->_extendedBackend) { | ||
618 | Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF); | ||
619 | } | ||
620 | return $this->_backend->getFillingPercentage(); | ||
621 | } | ||
622 | |||
623 | /** | ||
624 | * Return an array of metadatas for the given cache id | ||
625 | * | ||
626 | * The array will include these keys : | ||
627 | * - expire : the expire timestamp | ||
628 | * - tags : a string array of tags | ||
629 | * - mtime : timestamp of last modification time | ||
630 | * | ||
631 | * @param string $id cache id | ||
632 | * @return array array of metadatas (false if the cache id is not found) | ||
633 | */ | ||
634 | public function getMetadatas($id) | ||
635 | { | ||
636 | if (!$this->_extendedBackend) { | ||
637 | Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF); | ||
638 | } | ||
639 | $id = $this->_id($id); // cache id may need prefix | ||
640 | return $this->_backend->getMetadatas($id); | ||
641 | } | ||
642 | |||
643 | /** | ||
644 | * Give (if possible) an extra lifetime to the given cache id | ||
645 | * | ||
646 | * @param string $id cache id | ||
647 | * @param int $extraLifetime | ||
648 | * @return boolean true if ok | ||
649 | */ | ||
650 | public function touch($id, $extraLifetime) | ||
651 | { | ||
652 | if (!$this->_extendedBackend) { | ||
653 | Zend_Cache::throwException(self::BACKEND_NOT_IMPLEMENTS_EXTENDED_IF); | ||
654 | } | ||
655 | $id = $this->_id($id); // cache id may need prefix | ||
656 | |||
657 | $this->_log("Zend_Cache_Core: touch item '{$id}'", 7); | ||
658 | return $this->_backend->touch($id, $extraLifetime); | ||
659 | } | ||
660 | |||
661 | /** | ||
662 | * Validate a cache id or a tag (security, reliable filenames, reserved prefixes...) | ||
663 | * | ||
664 | * Throw an exception if a problem is found | ||
665 | * | ||
666 | * @param string $string Cache id or tag | ||
667 | * @throws Zend_Cache_Exception | ||
668 | * @return void | ||
669 | */ | ||
670 | protected static function _validateIdOrTag($string) | ||
671 | { | ||
672 | if (!is_string($string)) { | ||
673 | Zend_Cache::throwException('Invalid id or tag : must be a string'); | ||
674 | } | ||
675 | if (substr($string, 0, 9) == 'internal-') { | ||
676 | Zend_Cache::throwException('"internal-*" ids or tags are reserved'); | ||
677 | } | ||
678 | if (!preg_match('~^[a-zA-Z0-9_]+$~D', $string)) { | ||
679 | Zend_Cache::throwException("Invalid id or tag '$string' : must use only [a-zA-Z0-9_]"); | ||
680 | } | ||
681 | } | ||
682 | |||
683 | /** | ||
684 | * Validate a tags array (security, reliable filenames, reserved prefixes...) | ||
685 | * | ||
686 | * Throw an exception if a problem is found | ||
687 | * | ||
688 | * @param array $tags Array of tags | ||
689 | * @throws Zend_Cache_Exception | ||
690 | * @return void | ||
691 | */ | ||
692 | protected static function _validateTagsArray($tags) | ||
693 | { | ||
694 | if (!is_array($tags)) { | ||
695 | Zend_Cache::throwException('Invalid tags array : must be an array'); | ||
696 | } | ||
697 | foreach($tags as $tag) { | ||
698 | self::_validateIdOrTag($tag); | ||
699 | } | ||
700 | reset($tags); | ||
701 | } | ||
702 | |||
703 | /** | ||
704 | * Make sure if we enable logging that the Zend_Log class | ||
705 | * is available. | ||
706 | * Create a default log object if none is set. | ||
707 | * | ||
708 | * @throws Zend_Cache_Exception | ||
709 | * @return void | ||
710 | */ | ||
711 | protected function _loggerSanity() | ||
712 | { | ||
713 | if (!isset($this->_options['logging']) || !$this->_options['logging']) { | ||
714 | return; | ||
715 | } | ||
716 | |||
717 | if (isset($this->_options['logger']) && $this->_options['logger'] instanceof Zend_Log) { | ||
718 | return; | ||
719 | } | ||
720 | |||
721 | // Create a default logger to the standard output stream | ||
722 | require_once 'Zend/Log.php'; | ||
723 | require_once 'Zend/Log/Writer/Stream.php'; | ||
724 | require_once 'Zend/Log/Filter/Priority.php'; | ||
725 | $logger = new Zend_Log(new Zend_Log_Writer_Stream('php://output')); | ||
726 | $logger->addFilter(new Zend_Log_Filter_Priority(Zend_Log::WARN, '<=')); | ||
727 | $this->_options['logger'] = $logger; | ||
728 | } | ||
729 | |||
730 | /** | ||
731 | * Log a message at the WARN (4) priority. | ||
732 | * | ||
733 | * @param string $message | ||
734 | * @throws Zend_Cache_Exception | ||
735 | * @return void | ||
736 | */ | ||
737 | protected function _log($message, $priority = 4) | ||
738 | { | ||
739 | if (!$this->_options['logging']) { | ||
740 | return; | ||
741 | } | ||
742 | if (!(isset($this->_options['logger']) || $this->_options['logger'] instanceof Zend_Log)) { | ||
743 | Zend_Cache::throwException('Logging is enabled but logger is not set'); | ||
744 | } | ||
745 | $logger = $this->_options['logger']; | ||
746 | $logger->log($message, $priority); | ||
747 | } | ||
748 | |||
749 | /** | ||
750 | * Make and return a cache id | ||
751 | * | ||
752 | * Checks 'cache_id_prefix' and returns new id with prefix or simply the id if null | ||
753 | * | ||
754 | * @param string $id Cache id | ||
755 | * @return string Cache id (with or without prefix) | ||
756 | */ | ||
757 | protected function _id($id) | ||
758 | { | ||
759 | if (($id !== null) && isset($this->_options['cache_id_prefix'])) { | ||
760 | return $this->_options['cache_id_prefix'] . $id; // return with prefix | ||
761 | } | ||
762 | return $id; // no prefix, just return the $id passed | ||
763 | } | ||
764 | |||
765 | } | ||
diff --git a/inc/3rdparty/libraries/Zend/Cache/Exception.php b/inc/3rdparty/libraries/Zend/Cache/Exception.php new file mode 100644 index 00000000..44884515 --- /dev/null +++ b/inc/3rdparty/libraries/Zend/Cache/Exception.php | |||
@@ -0,0 +1,32 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Zend Framework | ||
4 | * | ||
5 | * LICENSE | ||
6 | * | ||
7 | * This source file is subject to the new BSD license that is bundled | ||
8 | * with this package in the file LICENSE.txt. | ||
9 | * It is also available through the world-wide-web at this URL: | ||
10 | * http://framework.zend.com/license/new-bsd | ||
11 | * If you did not receive a copy of the license and are unable to | ||
12 | * obtain it through the world-wide-web, please send an email | ||
13 | * to license@zend.com so we can send you a copy immediately. | ||
14 | * | ||
15 | * @category Zend | ||
16 | * @package Zend_Cache | ||
17 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
18 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
19 | * @version $Id: Exception.php 24593 2012-01-05 20:35:02Z matthew $ | ||
20 | */ | ||
21 | |||
22 | /** | ||
23 | * @see Zend_Exception | ||
24 | */ | ||
25 | require_once 'Zend/Exception.php'; | ||
26 | |||
27 | /** | ||
28 | * @package Zend_Cache | ||
29 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
30 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
31 | */ | ||
32 | class Zend_Cache_Exception extends Zend_Exception {} | ||
diff --git a/inc/3rdparty/libraries/Zend/Exception.php b/inc/3rdparty/libraries/Zend/Exception.php new file mode 100644 index 00000000..92b2e460 --- /dev/null +++ b/inc/3rdparty/libraries/Zend/Exception.php | |||
@@ -0,0 +1,96 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Zend Framework | ||
4 | * | ||
5 | * LICENSE | ||
6 | * | ||
7 | * This source file is subject to the new BSD license that is bundled | ||
8 | * with this package in the file LICENSE.txt. | ||
9 | * It is also available through the world-wide-web at this URL: | ||
10 | * http://framework.zend.com/license/new-bsd | ||
11 | * If you did not receive a copy of the license and are unable to | ||
12 | * obtain it through the world-wide-web, please send an email | ||
13 | * to license@zend.com so we can send you a copy immediately. | ||
14 | * | ||
15 | * @category Zend | ||
16 | * @package Zend | ||
17 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
18 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
19 | * @version $Id: Exception.php 24593 2012-01-05 20:35:02Z matthew $ | ||
20 | */ | ||
21 | |||
22 | /** | ||
23 | * @category Zend | ||
24 | * @package Zend | ||
25 | * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) | ||
26 | * @license http://framework.zend.com/license/new-bsd New BSD License | ||
27 | */ | ||
28 | class Zend_Exception extends Exception | ||
29 | { | ||
30 | /** | ||
31 | * @var null|Exception | ||
32 | */ | ||
33 | private $_previous = null; | ||
34 | |||
35 | /** | ||
36 | * Construct the exception | ||
37 | * | ||
38 | * @param string $msg | ||
39 | * @param int $code | ||
40 | * @param Exception $previous | ||
41 | * @return void | ||
42 | */ | ||
43 | public function __construct($msg = '', $code = 0, Exception $previous = null) | ||
44 | { | ||
45 | if (version_compare(PHP_VERSION, '5.3.0', '<')) { | ||
46 | parent::__construct($msg, (int) $code); | ||
47 | $this->_previous = $previous; | ||
48 | } else { | ||
49 | parent::__construct($msg, (int) $code, $previous); | ||
50 | } | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * Overloading | ||
55 | * | ||
56 | * For PHP < 5.3.0, provides access to the getPrevious() method. | ||
57 | * | ||
58 | * @param string $method | ||
59 | * @param array $args | ||
60 | * @return mixed | ||
61 | */ | ||
62 | public function __call($method, array $args) | ||
63 | { | ||
64 | if ('getprevious' == strtolower($method)) { | ||
65 | return $this->_getPrevious(); | ||
66 | } | ||
67 | return null; | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * String representation of the exception | ||
72 | * | ||
73 | * @return string | ||
74 | */ | ||
75 | public function __toString() | ||
76 | { | ||
77 | if (version_compare(PHP_VERSION, '5.3.0', '<')) { | ||
78 | if (null !== ($e = $this->getPrevious())) { | ||
79 | return $e->__toString() | ||
80 | . "\n\nNext " | ||
81 | . parent::__toString(); | ||
82 | } | ||
83 | } | ||
84 | return parent::__toString(); | ||
85 | } | ||
86 | |||
87 | /** | ||
88 | * Returns previous Exception | ||
89 | * | ||
90 | * @return Exception|null | ||
91 | */ | ||
92 | protected function _getPrevious() | ||
93 | { | ||
94 | return $this->_previous; | ||
95 | } | ||
96 | } | ||
diff --git a/inc/3rdparty/content-extractor/ContentExtractor.php b/inc/3rdparty/libraries/content-extractor/ContentExtractor.php index 26878392..ddd33bb5 100644 --- a/inc/3rdparty/content-extractor/ContentExtractor.php +++ b/inc/3rdparty/libraries/content-extractor/ContentExtractor.php | |||
@@ -1,612 +1,728 @@ | |||
1 | <?php | 1 | <?php |
2 | /** | 2 | /** |
3 | * Content Extractor | 3 | * Content Extractor |
4 | * | 4 | * |
5 | * Uses patterns specified in site config files and auto detection (hNews/PHP Readability) | 5 | * Uses patterns specified in site config files and auto detection (hNews/PHP Readability) |
6 | * to extract content from HTML files. | 6 | * to extract content from HTML files. |
7 | * | 7 | * |
8 | * @version 0.8 | 8 | * @version 1.0 |
9 | * @date 2012-02-21 | 9 | * @date 2013-02-05 |
10 | * @author Keyvan Minoukadeh | 10 | * @author Keyvan Minoukadeh |
11 | * @copyright 2011 Keyvan Minoukadeh | 11 | * @copyright 2013 Keyvan Minoukadeh |
12 | * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 | 12 | * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 |
13 | */ | 13 | */ |
14 | 14 | ||
15 | class ContentExtractor | 15 | class ContentExtractor |
16 | { | 16 | { |
17 | protected static $tidy_config = array( | 17 | protected static $tidy_config = array( |
18 | 'clean' => true, | 18 | 'clean' => true, |
19 | 'output-xhtml' => true, | 19 | 'output-xhtml' => true, |
20 | 'logical-emphasis' => true, | 20 | 'logical-emphasis' => true, |
21 | 'show-body-only' => false, | 21 | 'show-body-only' => false, |
22 | 'new-blocklevel-tags' => 'article, aside, footer, header, hgroup, menu, nav, section, details, datagrid', | 22 | 'new-blocklevel-tags' => 'article, aside, footer, header, hgroup, menu, nav, section, details, datagrid', |
23 | 'new-inline-tags' => 'mark, time, meter, progress, data', | 23 | 'new-inline-tags' => 'mark, time, meter, progress, data', |
24 | 'wrap' => 0, | 24 | 'wrap' => 0, |
25 | 'drop-empty-paras' => true, | 25 | 'drop-empty-paras' => true, |
26 | 'drop-proprietary-attributes' => false, | 26 | 'drop-proprietary-attributes' => false, |
27 | 'enclose-text' => true, | 27 | 'enclose-text' => true, |
28 | 'enclose-block-text' => true, | 28 | 'enclose-block-text' => true, |
29 | 'merge-divs' => true, | 29 | 'merge-divs' => true, |
30 | 'merge-spans' => true, | 30 | 'merge-spans' => true, |
31 | 'char-encoding' => 'utf8', | 31 | 'char-encoding' => 'utf8', |
32 | 'hide-comments' => true | 32 | 'hide-comments' => true |
33 | ); | 33 | ); |
34 | protected $html; | 34 | protected $html; |
35 | protected $config; | 35 | protected $config; |
36 | protected $title; | 36 | protected $title; |
37 | protected $author = array(); | 37 | protected $author = array(); |
38 | protected $language; | 38 | protected $language; |
39 | protected $date; | 39 | protected $date; |
40 | protected $body; | 40 | protected $body; |
41 | protected $success = false; | 41 | protected $success = false; |
42 | public $fingerprints = array(); | 42 | protected $nextPageUrl; |
43 | public $readability; | 43 | public $allowedParsers = array('libxml', 'html5lib'); |
44 | public $debug = false; | 44 | public $fingerprints = array(); |
45 | 45 | public $readability; | |
46 | function __construct($path, $fallback=null) { | 46 | public $debug = false; |
47 | SiteConfig::set_config_path($path, $fallback); | 47 | public $debugVerbose = false; |
48 | } | 48 | |
49 | 49 | function __construct($path, $fallback=null) { | |
50 | protected function debug($msg) { | 50 | SiteConfig::set_config_path($path, $fallback); |
51 | if ($this->debug) { | 51 | } |
52 | $mem = round(memory_get_usage()/1024, 2); | 52 | |
53 | $memPeak = round(memory_get_peak_usage()/1024, 2); | 53 | protected function debug($msg) { |
54 | echo '* ',$msg; | 54 | if ($this->debug) { |
55 | echo ' - mem used: ',$mem," (peak: $memPeak)\n"; | 55 | $mem = round(memory_get_usage()/1024, 2); |
56 | ob_flush(); | 56 | $memPeak = round(memory_get_peak_usage()/1024, 2); |
57 | flush(); | 57 | echo '* ',$msg; |
58 | } | 58 | if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)"; |
59 | } | 59 | echo "\n"; |
60 | 60 | ob_flush(); | |
61 | public function reset() { | 61 | flush(); |
62 | $this->html = null; | 62 | } |
63 | $this->readability = null; | 63 | } |
64 | $this->config = null; | 64 | |
65 | $this->title = null; | 65 | public function reset() { |
66 | $this->body = null; | 66 | $this->html = null; |
67 | $this->author = array(); | 67 | $this->readability = null; |
68 | $this->language = null; | 68 | $this->config = null; |
69 | $this->date = null; | 69 | $this->title = null; |
70 | $this->success = false; | 70 | $this->body = null; |
71 | } | 71 | $this->author = array(); |
72 | 72 | $this->language = null; | |
73 | public function findHostUsingFingerprints($html) { | 73 | $this->date = null; |
74 | $this->debug('Checking fingerprints...'); | 74 | $this->nextPageUrl = null; |
75 | $head = substr($html, 0, 8000); | 75 | $this->success = false; |
76 | foreach ($this->fingerprints as $_fp => $_fphost) { | 76 | } |
77 | $lookin = 'html'; | 77 | |
78 | if (is_array($_fphost)) { | 78 | public function findHostUsingFingerprints($html) { |
79 | if (isset($_fphost['head']) && $_fphost['head']) { | 79 | $this->debug('Checking fingerprints...'); |
80 | $lookin = 'head'; | 80 | $head = substr($html, 0, 8000); |
81 | } | 81 | foreach ($this->fingerprints as $_fp => $_fphost) { |
82 | $_fphost = $_fphost['hostname']; | 82 | $lookin = 'html'; |
83 | } | 83 | if (is_array($_fphost)) { |
84 | if (strpos($$lookin, $_fp) !== false) { | 84 | if (isset($_fphost['head']) && $_fphost['head']) { |
85 | $this->debug("Found match: $_fphost"); | 85 | $lookin = 'head'; |
86 | return $_fphost; | 86 | } |
87 | } | 87 | $_fphost = $_fphost['hostname']; |
88 | } | 88 | } |
89 | return false; | 89 | if (strpos($$lookin, $_fp) !== false) { |
90 | } | 90 | $this->debug("Found match: $_fphost"); |
91 | 91 | return $_fphost; | |
92 | // returns true on success, false on failure | 92 | } |
93 | // $smart_tidy indicates that if tidy is used and no results are produced, we will | 93 | } |
94 | // try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time | 94 | $this->debug('No fingerprint matches'); |
95 | // but it has problems of its own which we try to avoid with this option. | 95 | return false; |
96 | public function process($html, $url, $smart_tidy=true) { | 96 | } |
97 | $this->reset(); | 97 | |
98 | // extract host name | 98 | // returns SiteConfig instance (joined in order: exact match, wildcard, fingerprint, global, default) |
99 | $host = @parse_url($url, PHP_URL_HOST); | 99 | public function buildSiteConfig($url, $html='', $add_to_cache=true) { |
100 | if (!($this->config = SiteConfig::build($host))) { | 100 | // extract host name |
101 | // no match, check HTML for fingerprints | 101 | $host = @parse_url($url, PHP_URL_HOST); |
102 | if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) { | 102 | $host = strtolower($host); |
103 | $this->config = SiteConfig::build($_fphost); | 103 | if (substr($host, 0, 4) == 'www.') $host = substr($host, 4); |
104 | } | 104 | // is merged version already cached? |
105 | unset($_fphost); | 105 | if (SiteConfig::is_cached("$host.merged")) { |
106 | if (!$this->config) { | 106 | $this->debug("Returning cached and merged site config for $host"); |
107 | // no match, so use defaults | 107 | return SiteConfig::build("$host.merged"); |
108 | $this->config = new SiteConfig(); | 108 | } |
109 | } | 109 | // let's build from site_config/custom/ and standard/ |
110 | } | 110 | $config = SiteConfig::build($host); |
111 | // store copy of config in our static cache array in case we need to process another URL | 111 | if ($add_to_cache && $config && !SiteConfig::is_cached("$host")) { |
112 | SiteConfig::add_to_cache($host, $this->config); | 112 | SiteConfig::add_to_cache($host, $config); |
113 | 113 | } | |
114 | // do string replacements | 114 | // if no match, use defaults |
115 | foreach ($this->config->replace_string as $_repl) { | 115 | if (!$config) $config = new SiteConfig(); |
116 | $html = str_replace($_repl[0], $_repl[1], $html); | 116 | // load fingerprint config? |
117 | } | 117 | if ($config->autodetect_on_failure()) { |
118 | unset($_repl); | 118 | // check HTML for fingerprints |
119 | 119 | if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) { | |
120 | // use tidy (if it exists)? | 120 | if ($config_fingerprint = SiteConfig::build($_fphost)) { |
121 | // This fixes problems with some sites which would otherwise | 121 | $this->debug("Appending site config settings from $_fphost (fingerprint match)"); |
122 | // trouble DOMDocument's HTML parsing. (Although sometimes it | 122 | $config->append($config_fingerprint); |
123 | // makes matters worse, which is why you can override it in site config files.) | 123 | if ($add_to_cache && !SiteConfig::is_cached($_fphost)) { |
124 | $tidied = false; | 124 | //$config_fingerprint->cache_in_apc = true; |
125 | if ($this->config->tidy && function_exists('tidy_parse_string') && $smart_tidy) { | 125 | SiteConfig::add_to_cache($_fphost, $config_fingerprint); |
126 | $this->debug('Using Tidy'); | 126 | } |
127 | $tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8'); | 127 | } |
128 | if (tidy_clean_repair($tidy)) { | 128 | } |
129 | $original_html = $html; | 129 | } |
130 | $tidied = true; | 130 | // load global config? |
131 | // $html = $tidy->value; | 131 | if ($config->autodetect_on_failure()) { |
132 | } | 132 | if ($config_global = SiteConfig::build('global', true)) { |
133 | $body = $tidy->body(); | 133 | $this->debug('Appending site config settings from global.txt'); |
134 | if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") { | 134 | $config->append($config_global); |
135 | $html = $tidy->value; | 135 | if ($add_to_cache && !SiteConfig::is_cached('global')) { |
136 | } | 136 | //$config_global->cache_in_apc = true; |
137 | unset($tidy); | 137 | SiteConfig::add_to_cache('global', $config_global); |
138 | } | 138 | } |
139 | 139 | } | |
140 | // load and parse html | 140 | } |
141 | $this->readability = new PocheReadability($html, $url); | 141 | // store copy of merged config |
142 | 142 | if ($add_to_cache) { | |
143 | // we use xpath to find elements in the given HTML document | 143 | // do not store in APC if wildcard match |
144 | // see http://en.wikipedia.org/wiki/XPath_1.0 | 144 | $use_apc = ($host == $config->cache_key); |
145 | $xpath = new DOMXPath($this->readability->dom); | 145 | $config->cache_key = null; |
146 | 146 | SiteConfig::add_to_cache("$host.merged", $config, $use_apc); | |
147 | // try to get title | 147 | } |
148 | foreach ($this->config->title as $pattern) { | 148 | return $config; |
149 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); | 149 | } |
150 | if (is_string($elems)) { | 150 | |
151 | $this->debug('Title expression evaluated as string'); | 151 | // returns true on success, false on failure |
152 | $this->title = trim($elems); | 152 | // $smart_tidy indicates that if tidy is used and no results are produced, we will |
153 | break; | 153 | // try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time |
154 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { | 154 | // but it has problems of its own which we try to avoid with this option. |
155 | $this->debug('Title matched'); | 155 | public function process($html, $url, $smart_tidy=true) { |
156 | $this->title = $elems->item(0)->textContent; | 156 | $this->reset(); |
157 | // remove title from document | 157 | $this->config = $this->buildSiteConfig($url, $html); |
158 | try { | 158 | |
159 | $elems->item(0)->parentNode->removeChild($elems->item(0)); | 159 | // do string replacements |
160 | } catch (DOMException $e) { | 160 | if (!empty($this->config->find_string)) { |
161 | // do nothing | 161 | if (count($this->config->find_string) == count($this->config->replace_string)) { |
162 | } | 162 | $html = str_replace($this->config->find_string, $this->config->replace_string, $html, $_count); |
163 | break; | 163 | $this->debug("Strings replaced: $_count (find_string and/or replace_string)"); |
164 | } | 164 | } else { |
165 | } | 165 | $this->debug('Skipped string replacement - incorrect number of find-replace strings in site config'); |
166 | 166 | } | |
167 | // try to get author (if it hasn't already been set) | 167 | unset($_count); |
168 | if (empty($this->author)) { | 168 | } |
169 | foreach ($this->config->author as $pattern) { | 169 | |
170 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); | 170 | // use tidy (if it exists)? |
171 | if (is_string($elems)) { | 171 | // This fixes problems with some sites which would otherwise |
172 | $this->debug('Author expression evaluated as string'); | 172 | // trouble DOMDocument's HTML parsing. (Although sometimes it |
173 | if (trim($elems) != '') { | 173 | // makes matters worse, which is why you can override it in site config files.) |
174 | $this->author[] = trim($elems); | 174 | $tidied = false; |
175 | break; | 175 | if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) { |
176 | } | 176 | $this->debug('Using Tidy'); |
177 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { | 177 | $tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8'); |
178 | foreach ($elems as $elem) { | 178 | if (tidy_clean_repair($tidy)) { |
179 | if (!isset($elem->parentNode)) continue; | 179 | $original_html = $html; |
180 | $this->author[] = trim($elem->textContent); | 180 | $tidied = true; |
181 | } | 181 | $html = $tidy->value; |
182 | if (!empty($this->author)) break; | 182 | } |
183 | } | 183 | unset($tidy); |
184 | } | 184 | } |
185 | } | 185 | |
186 | 186 | // load and parse html | |
187 | // try to get language | 187 | $_parser = $this->config->parser(); |
188 | $_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content'); | 188 | if (!in_array($_parser, $this->allowedParsers)) { |
189 | foreach ($_lang_xpath as $pattern) { | 189 | $this->debug("HTML parser $_parser not listed, using libxml instead"); |
190 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); | 190 | $_parser = 'libxml'; |
191 | if (is_string($elems)) { | 191 | } |
192 | if (trim($elems) != '') { | 192 | $this->debug("Attempting to parse HTML with $_parser"); |
193 | $this->language = trim($elems); | 193 | $this->readability = new Readability($html, $url, $_parser); |
194 | break; | 194 | |
195 | } | 195 | // we use xpath to find elements in the given HTML document |
196 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { | 196 | // see http://en.wikipedia.org/wiki/XPath_1.0 |
197 | foreach ($elems as $elem) { | 197 | $xpath = new DOMXPath($this->readability->dom); |
198 | if (!isset($elem->parentNode)) continue; | 198 | |
199 | $this->language = trim($elem->textContent); | 199 | // try to get next page link |
200 | } | 200 | foreach ($this->config->next_page_link as $pattern) { |
201 | if ($this->language) break; | 201 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); |
202 | } | 202 | if (is_string($elems)) { |
203 | } | 203 | $this->nextPageUrl = trim($elems); |
204 | 204 | break; | |
205 | // try to get date | 205 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { |
206 | foreach ($this->config->date as $pattern) { | 206 | foreach ($elems as $item) { |
207 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); | 207 | if ($item instanceof DOMElement && $item->hasAttribute('href')) { |
208 | if (is_string($elems)) { | 208 | $this->nextPageUrl = $item->getAttribute('href'); |
209 | $this->debug('Date expression evaluated as string'); | 209 | break 2; |
210 | $this->date = strtotime(trim($elems, "; \t\n\r\0\x0B")); | 210 | } elseif ($item instanceof DOMAttr && $item->value) { |
211 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { | 211 | $this->nextPageUrl = $item->value; |
212 | $this->debug('Date matched'); | 212 | break 2; |
213 | $this->date = $elems->item(0)->textContent; | 213 | } |
214 | $this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B")); | 214 | } |
215 | // remove date from document | 215 | } |
216 | // $elems->item(0)->parentNode->removeChild($elems->item(0)); | 216 | } |
217 | } | 217 | |
218 | if (!$this->date) { | 218 | // try to get title |
219 | $this->date = null; | 219 | foreach ($this->config->title as $pattern) { |
220 | } else { | 220 | // $this->debug("Trying $pattern"); |
221 | break; | 221 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); |
222 | } | 222 | if (is_string($elems)) { |
223 | } | 223 | $this->title = trim($elems); |
224 | 224 | $this->debug('Title expression evaluated as string: '.$this->title); | |
225 | // strip elements (using xpath expressions) | 225 | $this->debug("...XPath match: $pattern"); |
226 | foreach ($this->config->strip as $pattern) { | 226 | break; |
227 | $elems = @$xpath->query($pattern, $this->readability->dom); | 227 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { |
228 | // check for matches | 228 | $this->title = $elems->item(0)->textContent; |
229 | if ($elems && $elems->length > 0) { | 229 | $this->debug('Title matched: '.$this->title); |
230 | $this->debug('Stripping '.$elems->length.' elements (strip)'); | 230 | $this->debug("...XPath match: $pattern"); |
231 | for ($i=$elems->length-1; $i >= 0; $i--) { | 231 | // remove title from document |
232 | $elems->item($i)->parentNode->removeChild($elems->item($i)); | 232 | try { |
233 | } | 233 | $elems->item(0)->parentNode->removeChild($elems->item(0)); |
234 | } | 234 | } catch (DOMException $e) { |
235 | } | 235 | // do nothing |
236 | 236 | } | |
237 | // strip elements (using id and class attribute values) | 237 | break; |
238 | foreach ($this->config->strip_id_or_class as $string) { | 238 | } |
239 | $string = strtr($string, array("'"=>'', '"'=>'')); | 239 | } |
240 | $elems = @$xpath->query("//*[contains(@class, '$string') or contains(@id, '$string')]", $this->readability->dom); | 240 | |
241 | // check for matches | 241 | // try to get author (if it hasn't already been set) |
242 | if ($elems && $elems->length > 0) { | 242 | if (empty($this->author)) { |
243 | $this->debug('Stripping '.$elems->length.' elements (strip_id_or_class)'); | 243 | foreach ($this->config->author as $pattern) { |
244 | for ($i=$elems->length-1; $i >= 0; $i--) { | 244 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); |
245 | $elems->item($i)->parentNode->removeChild($elems->item($i)); | 245 | if (is_string($elems)) { |
246 | } | 246 | if (trim($elems) != '') { |
247 | } | 247 | $this->author[] = trim($elems); |
248 | } | 248 | $this->debug('Author expression evaluated as string: '.trim($elems)); |
249 | 249 | $this->debug("...XPath match: $pattern"); | |
250 | // strip images (using src attribute values) | 250 | break; |
251 | foreach ($this->config->strip_image_src as $string) { | 251 | } |
252 | $string = strtr($string, array("'"=>'', '"'=>'')); | 252 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { |
253 | $elems = @$xpath->query("//img[contains(@src, '$string')]", $this->readability->dom); | 253 | foreach ($elems as $elem) { |
254 | // check for matches | 254 | if (!isset($elem->parentNode)) continue; |
255 | if ($elems && $elems->length > 0) { | 255 | $this->author[] = trim($elem->textContent); |
256 | $this->debug('Stripping '.$elems->length.' image elements'); | 256 | $this->debug('Author matched: '.trim($elem->textContent)); |
257 | for ($i=$elems->length-1; $i >= 0; $i--) { | 257 | } |
258 | $elems->item($i)->parentNode->removeChild($elems->item($i)); | 258 | if (!empty($this->author)) { |
259 | } | 259 | $this->debug("...XPath match: $pattern"); |
260 | } | 260 | break; |
261 | } | 261 | } |
262 | // strip elements using Readability.com and Instapaper.com ignore class names | 262 | } |
263 | // .entry-unrelated and .instapaper_ignore | 263 | } |
264 | // See https://www.readability.com/publishers/guidelines/#view-plainGuidelines | 264 | } |
265 | // and http://blog.instapaper.com/post/730281947 | 265 | |
266 | $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom); | 266 | // try to get language |
267 | // check for matches | 267 | $_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content'); |
268 | if ($elems && $elems->length > 0) { | 268 | foreach ($_lang_xpath as $pattern) { |
269 | $this->debug('Stripping '.$elems->length.' .entry-unrelated,.instapaper_ignore elements'); | 269 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); |
270 | for ($i=$elems->length-1; $i >= 0; $i--) { | 270 | if (is_string($elems)) { |
271 | $elems->item($i)->parentNode->removeChild($elems->item($i)); | 271 | if (trim($elems) != '') { |
272 | } | 272 | $this->language = trim($elems); |
273 | } | 273 | $this->debug('Language matched: '.$this->language); |
274 | 274 | break; | |
275 | // strip elements that contain style="display: none;" | 275 | } |
276 | $elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom); | 276 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { |
277 | // check for matches | 277 | foreach ($elems as $elem) { |
278 | if ($elems && $elems->length > 0) { | 278 | if (!isset($elem->parentNode)) continue; |
279 | $this->debug('Stripping '.$elems->length.' elements with inline display:none style'); | 279 | $this->language = trim($elem->textContent); |
280 | for ($i=$elems->length-1; $i >= 0; $i--) { | 280 | $this->debug('Language matched: '.$this->language); |
281 | $elems->item($i)->parentNode->removeChild($elems->item($i)); | 281 | } |
282 | } | 282 | if ($this->language) break; |
283 | } | 283 | } |
284 | 284 | } | |
285 | // try to get body | 285 | |
286 | foreach ($this->config->body as $pattern) { | 286 | // try to get date |
287 | $elems = @$xpath->query($pattern, $this->readability->dom); | 287 | foreach ($this->config->date as $pattern) { |
288 | // check for matches | 288 | $elems = @$xpath->evaluate($pattern, $this->readability->dom); |
289 | if ($elems && $elems->length > 0) { | 289 | if (is_string($elems)) { |
290 | $this->debug('Body matched'); | 290 | $this->date = strtotime(trim($elems, "; \t\n\r\0\x0B")); |
291 | if ($elems->length == 1) { | 291 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { |
292 | $this->body = $elems->item(0); | 292 | $this->date = $elems->item(0)->textContent; |
293 | // prune (clean up elements that may not be content) | 293 | $this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B")); |
294 | if ($this->config->prune) { | 294 | // remove date from document |
295 | $this->debug('Pruning content'); | 295 | // $elems->item(0)->parentNode->removeChild($elems->item(0)); |
296 | $this->readability->prepArticle($this->body); | 296 | } |
297 | } | 297 | if (!$this->date) { |
298 | break; | 298 | $this->date = null; |
299 | } else { | 299 | } else { |
300 | $this->body = $this->readability->dom->createElement('div'); | 300 | $this->debug('Date matched: '.date('Y-m-d H:i:s', $this->date)); |
301 | $this->debug($elems->length.' body elems found'); | 301 | $this->debug("...XPath match: $pattern"); |
302 | foreach ($elems as $elem) { | 302 | break; |
303 | if (!isset($elem->parentNode)) continue; | 303 | } |
304 | $isDescendant = false; | 304 | } |
305 | foreach ($this->body->childNodes as $parent) { | 305 | |
306 | if ($this->isDescendant($parent, $elem)) { | 306 | // strip elements (using xpath expressions) |
307 | $isDescendant = true; | 307 | foreach ($this->config->strip as $pattern) { |
308 | break; | 308 | $elems = @$xpath->query($pattern, $this->readability->dom); |
309 | } | 309 | // check for matches |
310 | } | 310 | if ($elems && $elems->length > 0) { |
311 | if ($isDescendant) { | 311 | $this->debug('Stripping '.$elems->length.' elements (strip)'); |
312 | $this->debug('Element is child of another body element, skipping.'); | 312 | for ($i=$elems->length-1; $i >= 0; $i--) { |
313 | } else { | 313 | $elems->item($i)->parentNode->removeChild($elems->item($i)); |
314 | // prune (clean up elements that may not be content) | 314 | } |
315 | if ($this->config->prune) { | 315 | } |
316 | $this->debug('Pruning content'); | 316 | } |
317 | $this->readability->prepArticle($elem); | 317 | |
318 | } | 318 | // strip elements (using id and class attribute values) |
319 | $this->debug('Element added to body'); | 319 | foreach ($this->config->strip_id_or_class as $string) { |
320 | $this->body->appendChild($elem); | 320 | $string = strtr($string, array("'"=>'', '"'=>'')); |
321 | } | 321 | $elems = @$xpath->query("//*[contains(@class, '$string') or contains(@id, '$string')]", $this->readability->dom); |
322 | } | 322 | // check for matches |
323 | } | 323 | if ($elems && $elems->length > 0) { |
324 | } | 324 | $this->debug('Stripping '.$elems->length.' elements (strip_id_or_class)'); |
325 | } | 325 | for ($i=$elems->length-1; $i >= 0; $i--) { |
326 | 326 | $elems->item($i)->parentNode->removeChild($elems->item($i)); | |
327 | // auto detect? | 327 | } |
328 | $detect_title = $detect_body = $detect_author = $detect_date = false; | 328 | } |
329 | // detect title? | 329 | } |
330 | if (!isset($this->title)) { | 330 | |
331 | if (empty($this->config->title) || $this->config->autodetect_on_failure) { | 331 | // strip images (using src attribute values) |
332 | $detect_title = true; | 332 | foreach ($this->config->strip_image_src as $string) { |
333 | } | 333 | $string = strtr($string, array("'"=>'', '"'=>'')); |
334 | } | 334 | $elems = @$xpath->query("//img[contains(@src, '$string')]", $this->readability->dom); |
335 | // detect body? | 335 | // check for matches |
336 | if (!isset($this->body)) { | 336 | if ($elems && $elems->length > 0) { |
337 | if (empty($this->config->body) || $this->config->autodetect_on_failure) { | 337 | $this->debug('Stripping '.$elems->length.' image elements'); |
338 | $detect_body = true; | 338 | for ($i=$elems->length-1; $i >= 0; $i--) { |
339 | } | 339 | $elems->item($i)->parentNode->removeChild($elems->item($i)); |
340 | } | 340 | } |
341 | // detect author? | 341 | } |
342 | if (empty($this->author)) { | 342 | } |
343 | if (empty($this->config->author) || $this->config->autodetect_on_failure) { | 343 | // strip elements using Readability.com and Instapaper.com ignore class names |
344 | $detect_author = true; | 344 | // .entry-unrelated and .instapaper_ignore |
345 | } | 345 | // See https://www.readability.com/publishers/guidelines/#view-plainGuidelines |
346 | } | 346 | // and http://blog.instapaper.com/post/730281947 |
347 | // detect date? | 347 | $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom); |
348 | if (!isset($this->date)) { | 348 | // check for matches |
349 | if (empty($this->config->date) || $this->config->autodetect_on_failure) { | 349 | if ($elems && $elems->length > 0) { |
350 | $detect_date = true; | 350 | $this->debug('Stripping '.$elems->length.' .entry-unrelated,.instapaper_ignore elements'); |
351 | } | 351 | for ($i=$elems->length-1; $i >= 0; $i--) { |
352 | } | 352 | $elems->item($i)->parentNode->removeChild($elems->item($i)); |
353 | 353 | } | |
354 | // check for hNews | 354 | } |
355 | if ($detect_title || $detect_body) { | 355 | |
356 | // check for hentry | 356 | // strip elements that contain style="display: none;" |
357 | $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom); | 357 | $elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom); |
358 | if ($elems && $elems->length > 0) { | 358 | // check for matches |
359 | $this->debug('hNews: found hentry'); | 359 | if ($elems && $elems->length > 0) { |
360 | $hentry = $elems->item(0); | 360 | $this->debug('Stripping '.$elems->length.' elements with inline display:none style'); |
361 | 361 | for ($i=$elems->length-1; $i >= 0; $i--) { | |
362 | if ($detect_title) { | 362 | $elems->item($i)->parentNode->removeChild($elems->item($i)); |
363 | // check for entry-title | 363 | } |
364 | $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry); | 364 | } |
365 | if ($elems && $elems->length > 0) { | 365 | |
366 | $this->debug('hNews: found entry-title'); | 366 | // try to get body |
367 | $this->title = $elems->item(0)->textContent; | 367 | foreach ($this->config->body as $pattern) { |
368 | // remove title from document | 368 | $elems = @$xpath->query($pattern, $this->readability->dom); |
369 | $elems->item(0)->parentNode->removeChild($elems->item(0)); | 369 | // check for matches |
370 | $detect_title = false; | 370 | if ($elems && $elems->length > 0) { |
371 | } | 371 | $this->debug('Body matched'); |
372 | } | 372 | $this->debug("...XPath match: $pattern"); |
373 | 373 | if ($elems->length == 1) { | |
374 | if ($detect_date) { | 374 | $this->body = $elems->item(0); |
375 | // check for time element with pubdate attribute | 375 | // prune (clean up elements that may not be content) |
376 | $elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry); | 376 | if ($this->config->prune()) { |
377 | if ($elems && $elems->length > 0) { | 377 | $this->debug('...pruning content'); |
378 | $this->debug('hNews: found publication date'); | 378 | $this->readability->prepArticle($this->body); |
379 | $this->date = strtotime(trim($elems->item(0)->textContent)); | 379 | } |
380 | // remove date from document | 380 | break; |
381 | //$elems->item(0)->parentNode->removeChild($elems->item(0)); | 381 | } else { |
382 | if ($this->date) { | 382 | $this->body = $this->readability->dom->createElement('div'); |
383 | $detect_date = false; | 383 | $this->debug($elems->length.' body elems found'); |
384 | } else { | 384 | foreach ($elems as $elem) { |
385 | $this->date = null; | 385 | if (!isset($elem->parentNode)) continue; |
386 | } | 386 | $isDescendant = false; |
387 | } | 387 | foreach ($this->body->childNodes as $parent) { |
388 | } | 388 | if ($this->isDescendant($parent, $elem)) { |
389 | 389 | $isDescendant = true; | |
390 | if ($detect_author) { | 390 | break; |
391 | // check for time element with pubdate attribute | 391 | } |
392 | $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry); | 392 | } |
393 | if ($elems && $elems->length > 0) { | 393 | if ($isDescendant) { |
394 | $this->debug('hNews: found author'); | 394 | $this->debug('...element is child of another body element, skipping.'); |
395 | $author = $elems->item(0); | 395 | } else { |
396 | $fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author); | 396 | // prune (clean up elements that may not be content) |
397 | if ($fn && $fn->length > 0) { | 397 | if ($this->config->prune()) { |
398 | foreach ($fn as $_fn) { | 398 | $this->debug('Pruning content'); |
399 | if (trim($_fn->textContent) != '') { | 399 | $this->readability->prepArticle($elem); |
400 | $this->author[] = trim($_fn->textContent); | 400 | } |
401 | } | 401 | $this->debug('...element added to body'); |
402 | } | 402 | $this->body->appendChild($elem); |
403 | } else { | 403 | } |
404 | if (trim($author->textContent) != '') { | 404 | } |
405 | $this->author[] = trim($author->textContent); | 405 | if ($this->body->hasChildNodes()) break; |
406 | } | 406 | } |
407 | } | 407 | } |
408 | $detect_author = empty($this->author); | 408 | } |
409 | } | 409 | |
410 | } | 410 | // auto detect? |
411 | 411 | $detect_title = $detect_body = $detect_author = $detect_date = false; | |
412 | // check for entry-content. | 412 | // detect title? |
413 | // according to hAtom spec, if there are multiple elements marked entry-content, | 413 | if (!isset($this->title)) { |
414 | // we include all of these in the order they appear - see http://microformats.org/wiki/hatom#Entry_Content | 414 | if (empty($this->config->title) || $this->config->autodetect_on_failure()) { |
415 | if ($detect_body) { | 415 | $detect_title = true; |
416 | $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]", $hentry); | 416 | } |
417 | if ($elems && $elems->length > 0) { | 417 | } |
418 | $this->debug('hNews: found entry-content'); | 418 | // detect body? |
419 | if ($elems->length == 1) { | 419 | if (!isset($this->body)) { |
420 | // what if it's empty? (some sites misuse hNews - place their content outside an empty entry-content element) | 420 | if (empty($this->config->body) || $this->config->autodetect_on_failure()) { |
421 | $e = $elems->item(0); | 421 | $detect_body = true; |
422 | if (($e->tagName == 'img') || (trim($e->textContent) != '')) { | 422 | } |
423 | $this->body = $elems->item(0); | 423 | } |
424 | // prune (clean up elements that may not be content) | 424 | // detect author? |
425 | if ($this->config->prune) { | 425 | if (empty($this->author)) { |
426 | $this->debug('Pruning content'); | 426 | if (empty($this->config->author) || $this->config->autodetect_on_failure()) { |
427 | $this->readability->prepArticle($this->body); | 427 | $detect_author = true; |
428 | } | 428 | } |
429 | $detect_body = false; | 429 | } |
430 | } else { | 430 | // detect date? |
431 | $this->debug('hNews: skipping entry-content - appears not to contain content'); | 431 | if (!isset($this->date)) { |
432 | } | 432 | if (empty($this->config->date) || $this->config->autodetect_on_failure()) { |
433 | unset($e); | 433 | $detect_date = true; |
434 | } else { | 434 | } |
435 | $this->body = $this->readability->dom->createElement('div'); | 435 | } |
436 | $this->debug($elems->length.' entry-content elems found'); | 436 | |
437 | foreach ($elems as $elem) { | 437 | // check for hNews |
438 | if (!isset($elem->parentNode)) continue; | 438 | if ($detect_title || $detect_body) { |
439 | $isDescendant = false; | 439 | // check for hentry |
440 | foreach ($this->body->childNodes as $parent) { | 440 | $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom); |
441 | if ($this->isDescendant($parent, $elem)) { | 441 | if ($elems && $elems->length > 0) { |
442 | $isDescendant = true; | 442 | $this->debug('hNews: found hentry'); |
443 | break; | 443 | $hentry = $elems->item(0); |
444 | } | 444 | |
445 | } | 445 | if ($detect_title) { |
446 | if ($isDescendant) { | 446 | // check for entry-title |
447 | $this->debug('Element is child of another body element, skipping.'); | 447 | $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry); |
448 | } else { | 448 | if ($elems && $elems->length > 0) { |
449 | // prune (clean up elements that may not be content) | 449 | $this->title = $elems->item(0)->textContent; |
450 | if ($this->config->prune) { | 450 | $this->debug('hNews: found entry-title: '.$this->title); |
451 | $this->debug('Pruning content'); | 451 | // remove title from document |
452 | $this->readability->prepArticle($elem); | 452 | $elems->item(0)->parentNode->removeChild($elems->item(0)); |
453 | } | 453 | $detect_title = false; |
454 | $this->debug('Element added to body'); | 454 | } |
455 | $this->body->appendChild($elem); | 455 | } |
456 | } | 456 | |
457 | } | 457 | if ($detect_date) { |
458 | $detect_body = false; | 458 | // check for time element with pubdate attribute |
459 | } | 459 | $elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry); |
460 | } | 460 | if ($elems && $elems->length > 0) { |
461 | } | 461 | $this->date = strtotime(trim($elems->item(0)->textContent)); |
462 | } | 462 | // remove date from document |
463 | } | 463 | //$elems->item(0)->parentNode->removeChild($elems->item(0)); |
464 | 464 | if ($this->date) { | |
465 | // check for elements marked with instapaper_title | 465 | $this->debug('hNews: found publication date: '.date('Y-m-d H:i:s', $this->date)); |
466 | if ($detect_title) { | 466 | $detect_date = false; |
467 | // check for instapaper_title | 467 | } else { |
468 | $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom); | 468 | $this->date = null; |
469 | if ($elems && $elems->length > 0) { | 469 | } |
470 | $this->debug('title found (.instapaper_title)'); | 470 | } |
471 | $this->title = $elems->item(0)->textContent; | 471 | } |
472 | // remove title from document | 472 | |
473 | $elems->item(0)->parentNode->removeChild($elems->item(0)); | 473 | if ($detect_author) { |
474 | $detect_title = false; | 474 | // check for time element with pubdate attribute |
475 | } | 475 | $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry); |
476 | } | 476 | if ($elems && $elems->length > 0) { |
477 | // check for elements marked with instapaper_body | 477 | $author = $elems->item(0); |
478 | if ($detect_body) { | 478 | $fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author); |
479 | $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_body ')]", $this->readability->dom); | 479 | if ($fn && $fn->length > 0) { |
480 | if ($elems && $elems->length > 0) { | 480 | foreach ($fn as $_fn) { |
481 | $this->debug('body found (.instapaper_body)'); | 481 | if (trim($_fn->textContent) != '') { |
482 | $this->body = $elems->item(0); | 482 | $this->author[] = trim($_fn->textContent); |
483 | // prune (clean up elements that may not be content) | 483 | $this->debug('hNews: found author: '.trim($_fn->textContent)); |
484 | if ($this->config->prune) { | 484 | } |
485 | $this->debug('Pruning content'); | 485 | } |
486 | $this->readability->prepArticle($this->body); | 486 | } else { |
487 | } | 487 | if (trim($author->textContent) != '') { |
488 | $detect_body = false; | 488 | $this->author[] = trim($author->textContent); |
489 | } | 489 | $this->debug('hNews: found author: '.trim($author->textContent)); |
490 | } | 490 | } |
491 | 491 | } | |
492 | // Find author in rel="author" marked element | 492 | $detect_author = empty($this->author); |
493 | // We only use this if there's exactly one. | 493 | } |
494 | // If there's more than one, it could indicate more than | 494 | } |
495 | // one author, but it could also indicate that we're processing | 495 | |
496 | // a page listing different articles with different authors. | 496 | // check for entry-content. |
497 | if ($detect_author) { | 497 | // according to hAtom spec, if there are multiple elements marked entry-content, |
498 | $elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom); | 498 | // we include all of these in the order they appear - see http://microformats.org/wiki/hatom#Entry_Content |
499 | if ($elems && $elems->length == 1) { | 499 | if ($detect_body) { |
500 | $this->debug('Author found (rel="author")'); | 500 | $elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]", $hentry); |
501 | $author = trim($elems->item(0)->textContent); | 501 | if ($elems && $elems->length > 0) { |
502 | if ($author != '') { | 502 | $this->debug('hNews: found entry-content'); |
503 | $this->author[] = $author; | 503 | if ($elems->length == 1) { |
504 | $detect_author = false; | 504 | // what if it's empty? (some sites misuse hNews - place their content outside an empty entry-content element) |
505 | } | 505 | $e = $elems->item(0); |
506 | } | 506 | if (($e->tagName == 'img') || (trim($e->textContent) != '')) { |
507 | } | 507 | $this->body = $elems->item(0); |
508 | 508 | // prune (clean up elements that may not be content) | |
509 | // Find date in pubdate marked time element | 509 | if ($this->config->prune()) { |
510 | // For the same reason given above, we only use this | 510 | $this->debug('Pruning content'); |
511 | // if there's exactly one element. | 511 | $this->readability->prepArticle($this->body); |
512 | if ($detect_date) { | 512 | } |
513 | $elems = @$xpath->query("//time[@pubdate]", $this->readability->dom); | 513 | $detect_body = false; |
514 | if ($elems && $elems->length == 1) { | 514 | } else { |
515 | $this->debug('Date found (pubdate marked time element)'); | 515 | $this->debug('hNews: skipping entry-content - appears not to contain content'); |
516 | $this->date = strtotime(trim($elems->item(0)->textContent)); | 516 | } |
517 | // remove date from document | 517 | unset($e); |
518 | //$elems->item(0)->parentNode->removeChild($elems->item(0)); | 518 | } else { |
519 | if ($this->date) { | 519 | $this->body = $this->readability->dom->createElement('div'); |
520 | $detect_date = false; | 520 | $this->debug($elems->length.' entry-content elems found'); |
521 | } else { | 521 | foreach ($elems as $elem) { |
522 | $this->date = null; | 522 | if (!isset($elem->parentNode)) continue; |
523 | } | 523 | $isDescendant = false; |
524 | } | 524 | foreach ($this->body->childNodes as $parent) { |
525 | } | 525 | if ($this->isDescendant($parent, $elem)) { |
526 | 526 | $isDescendant = true; | |
527 | // still missing title or body, so we detect using Readability | 527 | break; |
528 | if ($detect_title || $detect_body) { | 528 | } |
529 | $this->debug('Using Readability'); | 529 | } |
530 | // clone body if we're only using Readability for title (otherwise it may interfere with body element) | 530 | if ($isDescendant) { |
531 | if (isset($this->body)) $this->body = $this->body->cloneNode(true); | 531 | $this->debug('Element is child of another body element, skipping.'); |
532 | $success = $this->readability->init(); | 532 | } else { |
533 | } | 533 | // prune (clean up elements that may not be content) |
534 | if ($detect_title) { | 534 | if ($this->config->prune()) { |
535 | $this->debug('Detecting title'); | 535 | $this->debug('Pruning content'); |
536 | $this->title = $this->readability->getTitle()->textContent; | 536 | $this->readability->prepArticle($elem); |
537 | } | 537 | } |
538 | if ($detect_body && $success) { | 538 | $this->debug('Element added to body'); |
539 | $this->debug('Detecting body'); | 539 | $this->body->appendChild($elem); |
540 | $this->body = $this->readability->getContent(); | 540 | } |
541 | if ($this->body->childNodes->length == 1 && $this->body->firstChild->nodeType === XML_ELEMENT_NODE) { | 541 | } |
542 | $this->body = $this->body->firstChild; | 542 | $detect_body = false; |
543 | } | 543 | } |
544 | // prune (clean up elements that may not be content) | 544 | } |
545 | if ($this->config->prune) { | 545 | } |
546 | $this->debug('Pruning content'); | 546 | } |
547 | $this->readability->prepArticle($this->body); | 547 | } |
548 | } | 548 | |
549 | } | 549 | // check for elements marked with instapaper_title |
550 | if (isset($this->body)) { | 550 | if ($detect_title) { |
551 | // remove scripts | 551 | // check for instapaper_title |
552 | $this->readability->removeScripts($this->body); | 552 | $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom); |
553 | // remove any h1-h6 elements that appear as first thing in the body | 553 | if ($elems && $elems->length > 0) { |
554 | // and which match our title | 554 | $this->title = $elems->item(0)->textContent; |
555 | if (isset($this->title) && ($this->title != '')) { | 555 | $this->debug('Title found (.instapaper_title): '.$this->title); |
556 | $firstChild = $this->body->firstChild; | 556 | // remove title from document |
557 | while ($firstChild->nodeType && ($firstChild->nodeType !== XML_ELEMENT_NODE)) { | 557 | $elems->item(0)->parentNode->removeChild($elems->item(0)); |
558 | $firstChild = $firstChild->nextSibling; | 558 | $detect_title = false; |
559 | } | 559 | } |
560 | if (($firstChild->nodeType === XML_ELEMENT_NODE) | 560 | } |
561 | && in_array(strtolower($firstChild->tagName), array('h1', 'h2', 'h3', 'h4', 'h5', 'h6')) | 561 | // check for elements marked with instapaper_body |
562 | && (strtolower(trim($firstChild->textContent)) == strtolower(trim($this->title)))) { | 562 | if ($detect_body) { |
563 | $this->body->removeChild($firstChild); | 563 | $elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_body ')]", $this->readability->dom); |
564 | } | 564 | if ($elems && $elems->length > 0) { |
565 | } | 565 | $this->debug('body found (.instapaper_body)'); |
566 | $this->success = true; | 566 | $this->body = $elems->item(0); |
567 | } | 567 | // prune (clean up elements that may not be content) |
568 | 568 | if ($this->config->prune()) { | |
569 | // if we've had no success and we've used tidy, there's a chance | 569 | $this->debug('Pruning content'); |
570 | // that tidy has messed up. So let's try again without tidy... | 570 | $this->readability->prepArticle($this->body); |
571 | if (!$this->success && $tidied && $smart_tidy) { | 571 | } |
572 | $this->debug('Trying again without tidy'); | 572 | $detect_body = false; |
573 | $this->process($original_html, $url, false); | 573 | } |
574 | } | 574 | } |
575 | 575 | ||
576 | return $this->success; | 576 | // Find author in rel="author" marked element |
577 | } | 577 | // We only use this if there's exactly one. |
578 | 578 | // If there's more than one, it could indicate more than | |
579 | private function isDescendant(DOMElement $parent, DOMElement $child) { | 579 | // one author, but it could also indicate that we're processing |
580 | $node = $child->parentNode; | 580 | // a page listing different articles with different authors. |
581 | while ($node != null) { | 581 | if ($detect_author) { |
582 | if ($node->isSameNode($parent)) return true; | 582 | $elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom); |
583 | $node = $node->parentNode; | 583 | if ($elems && $elems->length == 1) { |
584 | } | 584 | $author = trim($elems->item(0)->textContent); |
585 | return false; | 585 | if ($author != '') { |
586 | } | 586 | $this->debug("Author found (rel=\"author\"): $author"); |
587 | 587 | $this->author[] = $author; | |
588 | public function getContent() { | 588 | $detect_author = false; |
589 | return $this->body; | 589 | } |
590 | } | 590 | } |
591 | 591 | } | |
592 | public function getTitle() { | 592 | |
593 | return $this->title; | 593 | // Find date in pubdate marked time element |
594 | } | 594 | // For the same reason given above, we only use this |
595 | 595 | // if there's exactly one element. | |
596 | public function getAuthors() { | 596 | if ($detect_date) { |
597 | return $this->author; | 597 | $elems = @$xpath->query("//time[@pubdate]", $this->readability->dom); |
598 | } | 598 | if ($elems && $elems->length == 1) { |
599 | 599 | $this->date = strtotime(trim($elems->item(0)->textContent)); | |
600 | public function getLanguage() { | 600 | // remove date from document |
601 | return $this->language; | 601 | //$elems->item(0)->parentNode->removeChild($elems->item(0)); |
602 | } | 602 | if ($this->date) { |
603 | 603 | $this->debug('Date found (pubdate marked time element): '.date('Y-m-d H:i:s', $this->date)); | |
604 | public function getDate() { | 604 | $detect_date = false; |
605 | return $this->date; | 605 | } else { |
606 | } | 606 | $this->date = null; |
607 | 607 | } | |
608 | public function getSiteConfig() { | 608 | } |
609 | return $this->config; | 609 | } |
610 | } | 610 | |
611 | } | 611 | // still missing title or body, so we detect using Readability |
612 | if ($detect_title || $detect_body) { | ||
613 | $this->debug('Using Readability'); | ||
614 | // clone body if we're only using Readability for title (otherwise it may interfere with body element) | ||
615 | if (isset($this->body)) $this->body = $this->body->cloneNode(true); | ||
616 | $success = $this->readability->init(); | ||
617 | } | ||
618 | if ($detect_title) { | ||
619 | $this->debug('Detecting title'); | ||
620 | $this->title = $this->readability->getTitle()->textContent; | ||
621 | } | ||
622 | if ($detect_body && $success) { | ||
623 | $this->debug('Detecting body'); | ||
624 | $this->body = $this->readability->getContent(); | ||
625 | if ($this->body->childNodes->length == 1 && $this->body->firstChild->nodeType === XML_ELEMENT_NODE) { | ||
626 | $this->body = $this->body->firstChild; | ||
627 | } | ||
628 | // prune (clean up elements that may not be content) | ||
629 | if ($this->config->prune()) { | ||
630 | $this->debug('Pruning content'); | ||
631 | $this->readability->prepArticle($this->body); | ||
632 | } | ||
633 | } | ||
634 | if (isset($this->body)) { | ||
635 | // remove scripts | ||
636 | $this->readability->removeScripts($this->body); | ||
637 | // remove any h1-h6 elements that appear as first thing in the body | ||
638 | // and which match our title | ||
639 | if (isset($this->title) && ($this->title != '')) { | ||
640 | $firstChild = $this->body->firstChild; | ||
641 | while ($firstChild->nodeType && ($firstChild->nodeType !== XML_ELEMENT_NODE)) { | ||
642 | $firstChild = $firstChild->nextSibling; | ||
643 | } | ||
644 | if (($firstChild->nodeType === XML_ELEMENT_NODE) | ||
645 | && in_array(strtolower($firstChild->tagName), array('h1', 'h2', 'h3', 'h4', 'h5', 'h6')) | ||
646 | && (strtolower(trim($firstChild->textContent)) == strtolower(trim($this->title)))) { | ||
647 | $this->body->removeChild($firstChild); | ||
648 | } | ||
649 | } | ||
650 | // prevent self-closing iframes | ||
651 | $elems = $this->body->getElementsByTagName('iframe'); | ||
652 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
653 | $e = $elems->item($i); | ||
654 | if (!$e->hasChildNodes()) { | ||
655 | $e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]')); | ||
656 | } | ||
657 | } | ||
658 | // remove image lazy loading - WordPress plugin http://wordpress.org/extend/plugins/lazy-load/ | ||
659 | // the plugin replaces the src attribute to point to a 1x1 gif and puts the original src | ||
660 | // inside the data-lazy-src attribute. It also places the original image inside a noscript element | ||
661 | // next to the amended one. | ||
662 | $elems = @$xpath->query("//img[@data-lazy-src]", $this->body); | ||
663 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
664 | $e = $elems->item($i); | ||
665 | // let's see if we can grab image from noscript | ||
666 | if ($e->nextSibling !== null && $e->nextSibling->nodeName === 'noscript') { | ||
667 | $_new_elem = $e->ownerDocument->createDocumentFragment(); | ||
668 | @$_new_elem->appendXML($e->nextSibling->innerHTML); | ||
669 | $e->nextSibling->parentNode->replaceChild($_new_elem, $e->nextSibling); | ||
670 | $e->parentNode->removeChild($e); | ||
671 | } else { | ||
672 | // Use data-lazy-src as src value | ||
673 | $e->setAttribute('src', $e->getAttribute('data-lazy-src')); | ||
674 | $e->removeAttribute('data-lazy-src'); | ||
675 | } | ||
676 | } | ||
677 | |||
678 | $this->success = true; | ||
679 | } | ||
680 | |||
681 | // if we've had no success and we've used tidy, there's a chance | ||
682 | // that tidy has messed up. So let's try again without tidy... | ||
683 | if (!$this->success && $tidied && $smart_tidy) { | ||
684 | $this->debug('Trying again without tidy'); | ||
685 | $this->process($original_html, $url, false); | ||
686 | } | ||
687 | |||
688 | return $this->success; | ||
689 | } | ||
690 | |||
691 | private function isDescendant(DOMElement $parent, DOMElement $child) { | ||
692 | $node = $child->parentNode; | ||
693 | while ($node != null) { | ||
694 | if ($node->isSameNode($parent)) return true; | ||
695 | $node = $node->parentNode; | ||
696 | } | ||
697 | return false; | ||
698 | } | ||
699 | |||
700 | public function getContent() { | ||
701 | return $this->body; | ||
702 | } | ||
703 | |||
704 | public function getTitle() { | ||
705 | return $this->title; | ||
706 | } | ||
707 | |||
708 | public function getAuthors() { | ||
709 | return $this->author; | ||
710 | } | ||
711 | |||
712 | public function getLanguage() { | ||
713 | return $this->language; | ||
714 | } | ||
715 | |||
716 | public function getDate() { | ||
717 | return $this->date; | ||
718 | } | ||
719 | |||
720 | public function getSiteConfig() { | ||
721 | return $this->config; | ||
722 | } | ||
723 | |||
724 | public function getNextPageUrl() { | ||
725 | return $this->nextPageUrl; | ||
726 | } | ||
727 | } | ||
612 | ?> \ No newline at end of file | 728 | ?> \ No newline at end of file |
diff --git a/inc/3rdparty/libraries/content-extractor/SiteConfig.php b/inc/3rdparty/libraries/content-extractor/SiteConfig.php new file mode 100644 index 00000000..c5e300d7 --- /dev/null +++ b/inc/3rdparty/libraries/content-extractor/SiteConfig.php | |||
@@ -0,0 +1,338 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Site Config | ||
4 | * | ||
5 | * Each instance of this class should hold extraction patterns and other directives | ||
6 | * for a website. See ContentExtractor class to see how it's used. | ||
7 | * | ||
8 | * @version 0.7 | ||
9 | * @date 2012-08-27 | ||
10 | * @author Keyvan Minoukadeh | ||
11 | * @copyright 2012 Keyvan Minoukadeh | ||
12 | * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 | ||
13 | */ | ||
14 | |||
15 | class SiteConfig | ||
16 | { | ||
17 | // Use first matching element as title (0 or more xpath expressions) | ||
18 | public $title = array(); | ||
19 | |||
20 | // Use first matching element as body (0 or more xpath expressions) | ||
21 | public $body = array(); | ||
22 | |||
23 | // Use first matching element as author (0 or more xpath expressions) | ||
24 | public $author = array(); | ||
25 | |||
26 | // Use first matching element as date (0 or more xpath expressions) | ||
27 | public $date = array(); | ||
28 | |||
29 | // Strip elements matching these xpath expressions (0 or more) | ||
30 | public $strip = array(); | ||
31 | |||
32 | // Strip elements which contain these strings (0 or more) in the id or class attribute | ||
33 | public $strip_id_or_class = array(); | ||
34 | |||
35 | // Strip images which contain these strings (0 or more) in the src attribute | ||
36 | public $strip_image_src = array(); | ||
37 | |||
38 | // Additional HTTP headers to send | ||
39 | // NOT YET USED | ||
40 | public $http_header = array(); | ||
41 | |||
42 | // Process HTML with tidy before creating DOM (bool or null if undeclared) | ||
43 | public $tidy = null; | ||
44 | |||
45 | protected $default_tidy = true; // used if undeclared | ||
46 | |||
47 | // Autodetect title/body if xpath expressions fail to produce results. | ||
48 | // Note that this applies to title and body separately, ie. | ||
49 | // * if we get a body match but no title match, this option will determine whether we autodetect title | ||
50 | // * if neither match, this determines whether we autodetect title and body. | ||
51 | // Also note that this only applies when there is at least one xpath expression in title or body, ie. | ||
52 | // * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected) | ||
53 | // * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results. | ||
54 | // Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content). | ||
55 | // bool or null if undeclared | ||
56 | public $autodetect_on_failure = null; | ||
57 | protected $default_autodetect_on_failure = true; // used if undeclared | ||
58 | |||
59 | // Clean up content block - attempt to remove elements that appear to be superfluous | ||
60 | // bool or null if undeclared | ||
61 | public $prune = null; | ||
62 | protected $default_prune = true; // used if undeclared | ||
63 | |||
64 | // Test URL - if present, can be used to test the config above | ||
65 | public $test_url = array(); | ||
66 | |||
67 | // Single-page link - should identify a link element or URL pointing to the page holding the entire article | ||
68 | // This is useful for sites which split their articles across multiple pages. Links to such pages tend to | ||
69 | // display the first page with links to the other pages at the bottom. Often there is also a link to a page | ||
70 | // which displays the entire article on one page (e.g. 'print view'). | ||
71 | // This should be an XPath expression identifying the link to that page. If present and we find a match, | ||
72 | // we will retrieve that page and the rest of the options in this config will be applied to the new page. | ||
73 | public $single_page_link = array(); | ||
74 | |||
75 | public $next_page_link = array(); | ||
76 | |||
77 | // Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed | ||
78 | public $single_page_link_in_feed = array(); | ||
79 | |||
80 | // Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib') | ||
81 | // string or null if undeclared | ||
82 | public $parser = null; | ||
83 | protected $default_parser = 'libxml'; // used if undeclared | ||
84 | |||
85 | // Strings to search for in HTML before processing begins (used with $replace_string) | ||
86 | public $find_string = array(); | ||
87 | // Strings to replace those found in $find_string before HTML processing begins | ||
88 | public $replace_string = array(); | ||
89 | |||
90 | // the options below cannot be set in the config files which this class represents | ||
91 | |||
92 | //public $cache_in_apc = false; // used to decide if we should cache in apc or not | ||
93 | public $cache_key = null; | ||
94 | public static $debug = false; | ||
95 | protected static $apc = false; | ||
96 | protected static $config_path; | ||
97 | protected static $config_path_fallback; | ||
98 | protected static $config_cache = array(); | ||
99 | const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/'; | ||
100 | |||
101 | protected static function debug($msg) { | ||
102 | if (self::$debug) { | ||
103 | //$mem = round(memory_get_usage()/1024, 2); | ||
104 | //$memPeak = round(memory_get_peak_usage()/1024, 2); | ||
105 | echo '* ',$msg; | ||
106 | //echo ' - mem used: ',$mem," (peak: $memPeak)\n"; | ||
107 | echo "\n"; | ||
108 | ob_flush(); | ||
109 | flush(); | ||
110 | } | ||
111 | } | ||
112 | |||
113 | // enable APC caching of certain site config files? | ||
114 | // If enabled the following site config files will be | ||
115 | // cached in APC cache (when requested for first time): | ||
116 | // * anything in site_config/custom/ and its corresponding file in site_config/standard/ | ||
117 | // * the site config files associated with HTML fingerprints | ||
118 | // * the global site config file | ||
119 | // returns true if enabled, false otherwise | ||
120 | public static function use_apc($apc=true) { | ||
121 | if (!function_exists('apc_add')) { | ||
122 | if ($apc) self::debug('APC will not be used (function apc_add does not exist)'); | ||
123 | return false; | ||
124 | } | ||
125 | self::$apc = $apc; | ||
126 | return $apc; | ||
127 | } | ||
128 | |||
129 | // return bool or null | ||
130 | public function tidy($use_default=true) { | ||
131 | if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy; | ||
132 | return $this->tidy; | ||
133 | } | ||
134 | |||
135 | // return bool or null | ||
136 | public function prune($use_default=true) { | ||
137 | if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune; | ||
138 | return $this->prune; | ||
139 | } | ||
140 | |||
141 | // return string or null | ||
142 | public function parser($use_default=true) { | ||
143 | if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser; | ||
144 | return $this->parser; | ||
145 | } | ||
146 | |||
147 | // return bool or null | ||
148 | public function autodetect_on_failure($use_default=true) { | ||
149 | if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure; | ||
150 | return $this->autodetect_on_failure; | ||
151 | } | ||
152 | |||
153 | public static function set_config_path($path, $fallback=null) { | ||
154 | self::$config_path = $path; | ||
155 | self::$config_path_fallback = $fallback; | ||
156 | } | ||
157 | |||
158 | public static function add_to_cache($key, SiteConfig $config, $use_apc=true) { | ||
159 | $key = strtolower($key); | ||
160 | if (substr($key, 0, 4) == 'www.') $key = substr($key, 4); | ||
161 | if ($config->cache_key) $key = $config->cache_key; | ||
162 | self::$config_cache[$key] = $config; | ||
163 | if (self::$apc && $use_apc) { | ||
164 | self::debug("Adding site config to APC cache with key sc.$key"); | ||
165 | apc_add("sc.$key", $config); | ||
166 | } | ||
167 | self::debug("Cached site config with key $key"); | ||
168 | } | ||
169 | |||
170 | public static function is_cached($key) { | ||
171 | $key = strtolower($key); | ||
172 | if (substr($key, 0, 4) == 'www.') $key = substr($key, 4); | ||
173 | if (array_key_exists($key, self::$config_cache)) { | ||
174 | return true; | ||
175 | } elseif (self::$apc && (bool)apc_fetch("sc.$key")) { | ||
176 | return true; | ||
177 | } | ||
178 | return false; | ||
179 | } | ||
180 | |||
181 | public function append(SiteConfig $newconfig) { | ||
182 | // check for commands where we accept multiple statements (no test_url) | ||
183 | foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'find_string', 'replace_string') as $var) { | ||
184 | // append array elements for this config variable from $newconfig to this config | ||
185 | //$this->$var = $this->$var + $newconfig->$var; | ||
186 | $this->$var = array_unique(array_merge($this->$var, $newconfig->$var)); | ||
187 | } | ||
188 | // check for single statement commands | ||
189 | // we do not overwrite existing non null values | ||
190 | foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) { | ||
191 | if ($this->$var === null) $this->$var = $newconfig->$var; | ||
192 | } | ||
193 | } | ||
194 | |||
195 | // returns SiteConfig instance if an appropriate one is found, false otherwise | ||
196 | // if $exact_host_match is true, we will not look for wildcard config matches | ||
197 | // by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists | ||
198 | public static function build($host, $exact_host_match=false) { | ||
199 | $host = strtolower($host); | ||
200 | if (substr($host, 0, 4) == 'www.') $host = substr($host, 4); | ||
201 | if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false; | ||
202 | // check for site configuration | ||
203 | $try = array($host); | ||
204 | // should we look for wildcard matches | ||
205 | if (!$exact_host_match) { | ||
206 | $split = explode('.', $host); | ||
207 | if (count($split) > 1) { | ||
208 | array_shift($split); | ||
209 | $try[] = '.'.implode('.', $split); | ||
210 | } | ||
211 | } | ||
212 | |||
213 | // look for site config file in primary folder | ||
214 | self::debug(". looking for site config for $host in primary folder"); | ||
215 | foreach ($try as $h) { | ||
216 | if (array_key_exists($h, self::$config_cache)) { | ||
217 | self::debug("... site config for $h already loaded in this request"); | ||
218 | return self::$config_cache[$h]; | ||
219 | } elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) { | ||
220 | self::debug("... site config for $h in APC cache"); | ||
221 | return $sconfig; | ||
222 | } elseif (file_exists(self::$config_path."/$h.txt")) { | ||
223 | self::debug("... found site config ($h.txt)"); | ||
224 | $file_primary = self::$config_path."/$h.txt"; | ||
225 | $matched_name = $h; | ||
226 | break; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | // if we found site config, process it | ||
231 | if (isset($file_primary)) { | ||
232 | $config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); | ||
233 | if (!$config_lines || !is_array($config_lines)) return false; | ||
234 | $config = self::build_from_array($config_lines); | ||
235 | // if APC caching is available and enabled, mark this for cache | ||
236 | //$config->cache_in_apc = true; | ||
237 | $config->cache_key = $matched_name; | ||
238 | |||
239 | // if autodetec on failure is off (on by default) we do not need to look | ||
240 | // in secondary folder | ||
241 | if (!$config->autodetect_on_failure()) { | ||
242 | self::debug('... autodetect on failure is disabled (no other site config files will be loaded)'); | ||
243 | return $config; | ||
244 | } | ||
245 | } | ||
246 | |||
247 | // look for site config file in secondary folder | ||
248 | if (isset(self::$config_path_fallback)) { | ||
249 | self::debug(". looking for site config for $host in secondary folder"); | ||
250 | foreach ($try as $h) { | ||
251 | if (file_exists(self::$config_path_fallback."/$h.txt")) { | ||
252 | self::debug("... found site config in secondary folder ($h.txt)"); | ||
253 | $file_secondary = self::$config_path_fallback."/$h.txt"; | ||
254 | $matched_name = $h; | ||
255 | break; | ||
256 | } | ||
257 | } | ||
258 | if (!isset($file_secondary)) { | ||
259 | self::debug("... no site config match in secondary folder"); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | // return false if no config file found | ||
264 | if (!isset($file_primary) && !isset($file_secondary)) { | ||
265 | self::debug("... no site config match for $host"); | ||
266 | return false; | ||
267 | } | ||
268 | |||
269 | // return primary config if secondary not found | ||
270 | if (!isset($file_secondary) && isset($config)) { | ||
271 | return $config; | ||
272 | } | ||
273 | |||
274 | // process secondary config file | ||
275 | $config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); | ||
276 | if (!$config_lines || !is_array($config_lines)) { | ||
277 | // failed to process secondary | ||
278 | if (isset($config)) { | ||
279 | // return primary config | ||
280 | return $config; | ||
281 | } else { | ||
282 | return false; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | // merge with primary and return | ||
287 | if (isset($config)) { | ||
288 | self::debug('. merging config files'); | ||
289 | $config->append(self::build_from_array($config_lines)); | ||
290 | return $config; | ||
291 | } else { | ||
292 | // return just secondary | ||
293 | $config = self::build_from_array($config_lines); | ||
294 | // if APC caching is available and enabled, mark this for cache | ||
295 | //$config->cache_in_apc = true; | ||
296 | $config->cache_key = $matched_name; | ||
297 | return $config; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | public static function build_from_array(array $lines) { | ||
302 | $config = new SiteConfig(); | ||
303 | foreach ($lines as $line) { | ||
304 | $line = trim($line); | ||
305 | |||
306 | // skip comments, empty lines | ||
307 | if ($line == '' || $line[0] == '#') continue; | ||
308 | |||
309 | // get command | ||
310 | $command = explode(':', $line, 2); | ||
311 | // if there's no colon ':', skip this line | ||
312 | if (count($command) != 2) continue; | ||
313 | $val = trim($command[1]); | ||
314 | $command = trim($command[0]); | ||
315 | if ($command == '' || $val == '') continue; | ||
316 | |||
317 | // check for commands where we accept multiple statements | ||
318 | if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) { | ||
319 | array_push($config->$command, $val); | ||
320 | // check for single statement commands that evaluate to true or false | ||
321 | } elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) { | ||
322 | $config->$command = ($val == 'yes'); | ||
323 | // check for single statement commands stored as strings | ||
324 | } elseif (in_array($command, array('parser'))) { | ||
325 | $config->$command = $val; | ||
326 | // check for replace_string(find): replace | ||
327 | } elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) { | ||
328 | if (in_array($match[1], array('replace_string'))) { | ||
329 | $command = $match[1]; | ||
330 | array_push($config->find_string, $match[2]); | ||
331 | array_push($config->$command, $val); | ||
332 | } | ||
333 | } | ||
334 | } | ||
335 | return $config; | ||
336 | } | ||
337 | } | ||
338 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/feedwriter/FeedItem.php b/inc/3rdparty/libraries/feedwriter/FeedItem.php index 71e6e98c..9373deeb 100644 --- a/inc/3rdparty/feedwriter/FeedItem.php +++ b/inc/3rdparty/libraries/feedwriter/FeedItem.php | |||
@@ -1,167 +1,185 @@ | |||
1 | <?php | 1 | <?php |
2 | /** | 2 | /** |
3 | * Univarsel Feed Writer | 3 | * Univarsel Feed Writer |
4 | * | 4 | * |
5 | * FeedItem class - Used as feed element in FeedWriter class | 5 | * FeedItem class - Used as feed element in FeedWriter class |
6 | * | 6 | * |
7 | * @package UnivarselFeedWriter | 7 | * @package UnivarselFeedWriter |
8 | * @author Anis uddin Ahmad <anisniit@gmail.com> | 8 | * @author Anis uddin Ahmad <anisniit@gmail.com> |
9 | * @link http://www.ajaxray.com/projects/rss | 9 | * @link http://www.ajaxray.com/projects/rss |
10 | */ | 10 | */ |
11 | class FeedItem | 11 | class FeedItem |
12 | { | 12 | { |
13 | private $elements = array(); //Collection of feed elements | 13 | private $elements = array(); //Collection of feed elements |
14 | private $version; | 14 | private $version; |
15 | 15 | ||
16 | /** | 16 | /** |
17 | * Constructor | 17 | * Constructor |
18 | * | 18 | * |
19 | * @param contant (RSS1/RSS2/ATOM) RSS2 is default. | 19 | * @param contant (RSS1/RSS2/ATOM) RSS2 is default. |
20 | */ | 20 | */ |
21 | function __construct($version = RSS2) | 21 | function __construct($version = RSS2) |
22 | { | 22 | { |
23 | $this->version = $version; | 23 | $this->version = $version; |
24 | } | 24 | } |
25 | 25 | ||
26 | /** | 26 | /** |
27 | * Add an element to elements array | 27 | * Set element (overwrites existing elements with $elementName) |
28 | * | 28 | * |
29 | * @access public | 29 | * @access public |
30 | * @param srting The tag name of an element | 30 | * @param srting The tag name of an element |
31 | * @param srting The content of tag | 31 | * @param srting The content of tag |
32 | * @param array Attributes(if any) in 'attrName' => 'attrValue' format | 32 | * @param array Attributes(if any) in 'attrName' => 'attrValue' format |
33 | * @return void | 33 | * @return void |
34 | */ | 34 | */ |
35 | public function addElement($elementName, $content, $attributes = null) | 35 | public function setElement($elementName, $content, $attributes = null) |
36 | { | 36 | { |
37 | $this->elements[$elementName]['name'] = $elementName; | 37 | if (isset($this->elements[$elementName])) { |
38 | $this->elements[$elementName]['content'] = $content; | 38 | unset($this->elements[$elementName]); |
39 | $this->elements[$elementName]['attributes'] = $attributes; | 39 | } |
40 | } | 40 | $this->addElement($elementName, $content, $attributes); |
41 | 41 | } | |
42 | /** | 42 | |
43 | * Set multiple feed elements from an array. | 43 | /** |
44 | * Elements which have attributes cannot be added by this method | 44 | * Add an element to elements array |
45 | * | 45 | * |
46 | * @access public | 46 | * @access public |
47 | * @param array array of elements in 'tagName' => 'tagContent' format. | 47 | * @param srting The tag name of an element |
48 | * @return void | 48 | * @param srting The content of tag |
49 | */ | 49 | * @param array Attributes(if any) in 'attrName' => 'attrValue' format |
50 | public function addElementArray($elementArray) | 50 | * @return void |
51 | { | 51 | */ |
52 | if(! is_array($elementArray)) return; | 52 | public function addElement($elementName, $content, $attributes = null) |
53 | foreach ($elementArray as $elementName => $content) | 53 | { |
54 | { | 54 | $i = 0; |
55 | $this->addElement($elementName, $content); | 55 | if (isset($this->elements[$elementName])) { |
56 | } | 56 | $i = count($this->elements[$elementName]); |
57 | } | 57 | } else { |
58 | 58 | $this->elements[$elementName] = array(); | |
59 | /** | 59 | } |
60 | * Return the collection of elements in this feed item | 60 | $this->elements[$elementName][$i]['name'] = $elementName; |
61 | * | 61 | $this->elements[$elementName][$i]['content'] = $content; |
62 | * @access public | 62 | $this->elements[$elementName][$i]['attributes'] = $attributes; |
63 | * @return array | 63 | } |
64 | */ | 64 | |
65 | public function getElements() | 65 | /** |
66 | { | 66 | * Set multiple feed elements from an array. |
67 | return $this->elements; | 67 | * Elements which have attributes cannot be added by this method |
68 | } | 68 | * |
69 | 69 | * @access public | |
70 | // Wrapper functions ------------------------------------------------------ | 70 | * @param array array of elements in 'tagName' => 'tagContent' format. |
71 | 71 | * @return void | |
72 | /** | 72 | */ |
73 | * Set the 'dscription' element of feed item | 73 | public function addElementArray($elementArray) |
74 | * | 74 | { |
75 | * @access public | 75 | if(! is_array($elementArray)) return; |
76 | * @param string The content of 'description' element | 76 | foreach ($elementArray as $elementName => $content) |
77 | * @return void | 77 | { |
78 | */ | 78 | $this->addElement($elementName, $content); |
79 | public function setDescription($description) | 79 | } |
80 | { | 80 | } |
81 | $tag = ($this->version == ATOM)? 'summary' : 'description'; | 81 | |
82 | $this->addElement($tag, $description); | 82 | /** |
83 | } | 83 | * Return the collection of elements in this feed item |
84 | 84 | * | |
85 | /** | 85 | * @access public |
86 | * @desc Set the 'title' element of feed item | 86 | * @return array |
87 | * @access public | 87 | */ |
88 | * @param string The content of 'title' element | 88 | public function getElements() |
89 | * @return void | 89 | { |
90 | */ | 90 | return $this->elements; |
91 | public function setTitle($title) | 91 | } |
92 | { | 92 | |
93 | $this->addElement('title', $title); | 93 | // Wrapper functions ------------------------------------------------------ |
94 | } | 94 | |
95 | 95 | /** | |
96 | /** | 96 | * Set the 'dscription' element of feed item |
97 | * Set the 'date' element of feed item | 97 | * |
98 | * | 98 | * @access public |
99 | * @access public | 99 | * @param string The content of 'description' element |
100 | * @param string The content of 'date' element | 100 | * @return void |
101 | * @return void | 101 | */ |
102 | */ | 102 | public function setDescription($description) |
103 | public function setDate($date) | 103 | { |
104 | { | 104 | $tag = 'description'; |
105 | if(! is_numeric($date)) | 105 | $this->setElement($tag, $description); |
106 | { | 106 | } |
107 | $date = strtotime($date); | 107 | |
108 | } | 108 | /** |
109 | 109 | * @desc Set the 'title' element of feed item | |
110 | if($this->version == ATOM) | 110 | * @access public |
111 | { | 111 | * @param string The content of 'title' element |
112 | $tag = 'updated'; | 112 | * @return void |
113 | $value = date(DATE_ATOM, $date); | 113 | */ |
114 | } | 114 | public function setTitle($title) |
115 | elseif($this->version == RSS2) | 115 | { |
116 | { | 116 | $this->setElement('title', $title); |
117 | $tag = 'pubDate'; | 117 | } |
118 | $value = date(DATE_RSS, $date); | 118 | |
119 | } | 119 | /** |
120 | else | 120 | * Set the 'date' element of feed item |
121 | { | 121 | * |
122 | $tag = 'dc:date'; | 122 | * @access public |
123 | $value = date("Y-m-d", $date); | 123 | * @param string The content of 'date' element |
124 | } | 124 | * @return void |
125 | 125 | */ | |
126 | $this->addElement($tag, $value); | 126 | public function setDate($date) |
127 | } | 127 | { |
128 | 128 | if(! is_numeric($date)) | |
129 | /** | 129 | { |
130 | * Set the 'link' element of feed item | 130 | $date = strtotime($date); |
131 | * | 131 | } |
132 | * @access public | 132 | |
133 | * @param string The content of 'link' element | 133 | if($this->version == RSS2) |
134 | * @return void | 134 | { |
135 | */ | 135 | $tag = 'pubDate'; |
136 | public function setLink($link) | 136 | $value = date(DATE_RSS, $date); |
137 | { | 137 | } |
138 | if($this->version == RSS2 || $this->version == RSS1) | 138 | else |
139 | { | 139 | { |
140 | $this->addElement('link', $link); | 140 | $tag = 'dc:date'; |
141 | } | 141 | $value = date("Y-m-d", $date); |
142 | else | 142 | } |
143 | { | 143 | |
144 | $this->addElement('link','',array('href'=>$link)); | 144 | $this->setElement($tag, $value); |
145 | $this->addElement('id', FeedWriter::uuid($link,'urn:uuid:')); | 145 | } |
146 | } | 146 | |
147 | 147 | /** | |
148 | } | 148 | * Set the 'link' element of feed item |
149 | 149 | * | |
150 | /** | 150 | * @access public |
151 | * Set the 'encloser' element of feed item | 151 | * @param string The content of 'link' element |
152 | * For RSS 2.0 only | 152 | * @return void |
153 | * | 153 | */ |
154 | * @access public | 154 | public function setLink($link) |
155 | * @param string The url attribute of encloser tag | 155 | { |
156 | * @param string The length attribute of encloser tag | 156 | if($this->version == RSS2 || $this->version == RSS1) |
157 | * @param string The type attribute of encloser tag | 157 | { |
158 | * @return void | 158 | $this->setElement('link', $link); |
159 | */ | 159 | } |
160 | public function setEncloser($url, $length, $type) | 160 | else |
161 | { | 161 | { |
162 | $attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type); | 162 | $this->setElement('link','',array('href'=>$link)); |
163 | $this->addElement('enclosure','',$attributes); | 163 | $this->setElement('id', FeedWriter::uuid($link,'urn:uuid:')); |
164 | } | 164 | } |
165 | 165 | ||
166 | } // end of class FeedItem | 166 | } |
167 | ?> | 167 | |
168 | /** | ||
169 | * Set the 'encloser' element of feed item | ||
170 | * For RSS 2.0 only | ||
171 | * | ||
172 | * @access public | ||
173 | * @param string The url attribute of encloser tag | ||
174 | * @param string The length attribute of encloser tag | ||
175 | * @param string The type attribute of encloser tag | ||
176 | * @return void | ||
177 | */ | ||
178 | public function setEncloser($url, $length, $type) | ||
179 | { | ||
180 | $attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type); | ||
181 | $this->setElement('enclosure','',$attributes); | ||
182 | } | ||
183 | |||
184 | } // end of class FeedItem | ||
185 | ?> | ||
diff --git a/inc/3rdparty/feedwriter/FeedWriter.php b/inc/3rdparty/libraries/feedwriter/FeedWriter.php index d5d6648a..adb2526c 100644 --- a/inc/3rdparty/feedwriter/FeedWriter.php +++ b/inc/3rdparty/libraries/feedwriter/FeedWriter.php | |||
@@ -1,434 +1,441 @@ | |||
1 | <?php | 1 | <?php |
2 | define('RSS2', 1, true); | 2 | define('RSS2', 1, true); |
3 | define('JSON', 2, true); | 3 | define('JSON', 2, true); |
4 | define('ATOM', 3, true); | 4 | define('JSONP', 3, true); |
5 | 5 | ||
6 | /** | 6 | /** |
7 | * Univarsel Feed Writer class | 7 | * Univarsel Feed Writer class |
8 | * | 8 | * |
9 | * Genarate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed) | 9 | * Genarate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed) |
10 | * | 10 | * |
11 | * Modified for FiveFilters.org's Full-Text RSS project | 11 | * Modified for FiveFilters.org's Full-Text RSS project |
12 | * to allow for inclusion of hubs, JSON output. | 12 | * to allow for inclusion of hubs, JSON output. |
13 | * Stripped RSS1 and ATOM support. | 13 | * Stripped RSS1 and ATOM support. |
14 | * | 14 | * |
15 | * @package UnivarselFeedWriter | 15 | * @package UnivarselFeedWriter |
16 | * @author Anis uddin Ahmad <anisniit@gmail.com> | 16 | * @author Anis uddin Ahmad <anisniit@gmail.com> |
17 | * @link http://www.ajaxray.com/projects/rss | 17 | * @link http://www.ajaxray.com/projects/rss |
18 | */ | 18 | */ |
19 | class FeedWriter | 19 | class FeedWriter |
20 | { | 20 | { |
21 | private $self = null; // self URL - http://feed2.w3.org/docs/warning/MissingAtomSelfLink.html | 21 | private $self = null; // self URL - http://feed2.w3.org/docs/warning/MissingAtomSelfLink.html |
22 | private $hubs = array(); // PubSubHubbub hubs | 22 | private $hubs = array(); // PubSubHubbub hubs |
23 | private $channels = array(); // Collection of channel elements | 23 | private $channels = array(); // Collection of channel elements |
24 | private $items = array(); // Collection of items as object of FeedItem class. | 24 | private $items = array(); // Collection of items as object of FeedItem class. |
25 | private $data = array(); // Store some other version wise data | 25 | private $data = array(); // Store some other version wise data |
26 | private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA | 26 | private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA |
27 | private $xsl = null; // stylesheet to render RSS (used by Chrome) | 27 | private $xsl = null; // stylesheet to render RSS (used by Chrome) |
28 | private $json = null; // JSON object | 28 | private $json = null; // JSON object |
29 | 29 | ||
30 | private $version = null; | 30 | private $version = null; |
31 | 31 | ||
32 | /** | 32 | /** |
33 | * Constructor | 33 | * Constructor |
34 | * | 34 | * |
35 | * @param constant the version constant (RSS2 or JSON). | 35 | * @param constant the version constant (RSS2 or JSON). |
36 | */ | 36 | */ |
37 | function __construct($version = RSS2) | 37 | function __construct($version = RSS2) |
38 | { | 38 | { |
39 | $this->version = $version; | 39 | $this->version = $version; |
40 | 40 | ||
41 | // Setting default value for assential channel elements | 41 | // Setting default value for assential channel elements |
42 | $this->channels['title'] = $version . ' Feed'; | 42 | $this->channels['title'] = $version . ' Feed'; |
43 | $this->channels['link'] = 'http://www.ajaxray.com/blog'; | 43 | $this->channels['link'] = 'http://www.ajaxray.com/blog'; |
44 | 44 | ||
45 | //Tag names to encode in CDATA | 45 | //Tag names to encode in CDATA |
46 | $this->CDATAEncoding = array('description', 'content:encoded', 'content', 'subtitle', 'summary'); | 46 | $this->CDATAEncoding = array('description', 'content:encoded', 'content', 'subtitle', 'summary'); |
47 | } | 47 | } |
48 | 48 | ||
49 | public function setFormat($format) { | 49 | public function setFormat($format) { |
50 | $this->version = $format; | 50 | $this->version = $format; |
51 | } | 51 | } |
52 | 52 | ||
53 | // Start # public functions --------------------------------------------- | 53 | // Start # public functions --------------------------------------------- |
54 | 54 | ||
55 | /** | 55 | /** |
56 | * Set a channel element | 56 | * Set a channel element |
57 | * @access public | 57 | * @access public |
58 | * @param srting name of the channel tag | 58 | * @param srting name of the channel tag |
59 | * @param string content of the channel tag | 59 | * @param string content of the channel tag |
60 | * @return void | 60 | * @return void |
61 | */ | 61 | */ |
62 | public function setChannelElement($elementName, $content) | 62 | public function setChannelElement($elementName, $content) |
63 | { | 63 | { |
64 | $this->channels[$elementName] = $content ; | 64 | $this->channels[$elementName] = $content ; |
65 | } | 65 | } |
66 | 66 | ||
67 | /** | 67 | /** |
68 | * Set multiple channel elements from an array. Array elements | 68 | * Set multiple channel elements from an array. Array elements |
69 | * should be 'channelName' => 'channelContent' format. | 69 | * should be 'channelName' => 'channelContent' format. |
70 | * | 70 | * |
71 | * @access public | 71 | * @access public |
72 | * @param array array of channels | 72 | * @param array array of channels |
73 | * @return void | 73 | * @return void |
74 | */ | 74 | */ |
75 | public function setChannelElementsFromArray($elementArray) | 75 | public function setChannelElementsFromArray($elementArray) |
76 | { | 76 | { |
77 | if(! is_array($elementArray)) return; | 77 | if(! is_array($elementArray)) return; |
78 | foreach ($elementArray as $elementName => $content) | 78 | foreach ($elementArray as $elementName => $content) |
79 | { | 79 | { |
80 | $this->setChannelElement($elementName, $content); | 80 | $this->setChannelElement($elementName, $content); |
81 | } | 81 | } |
82 | } | 82 | } |
83 | 83 | ||
84 | /** | 84 | /** |
85 | * Genarate the actual RSS/JSON file | 85 | * Genarate the actual RSS/JSON file |
86 | * | 86 | * |
87 | * @access public | 87 | * @access public |
88 | * @return void | 88 | * @return void |
89 | */ | 89 | */ |
90 | public function genarateFeed() | 90 | public function genarateFeed() |
91 | { | 91 | { |
92 | if ($this->version == RSS2) { | 92 | if ($this->version == RSS2) { |
93 | header('Content-type: text/xml; charset=UTF-8'); | 93 | header('Content-type: text/xml; charset=UTF-8'); |
94 | } elseif ($this->version == JSON) { | 94 | // this line prevents Chrome 20 from prompting download |
95 | header('Content-type: application/json; charset=UTF-8'); | 95 | // used by Google: https://news.google.com/news/feeds?ned=us&topic=b&output=rss |
96 | $this->json = new stdClass(); | 96 | header('X-content-type-options: nosniff'); |
97 | } | 97 | } elseif ($this->version == JSON) { |
98 | $this->printHead(); | 98 | header('Content-type: application/json; charset=UTF-8'); |
99 | $this->printChannels(); | 99 | $this->json = new stdClass(); |
100 | $this->printItems(); | 100 | } elseif ($this->version == JSONP) { |
101 | $this->printTale(); | 101 | header('Content-type: application/javascript; charset=UTF-8'); |
102 | if ($this->version == JSON) { | 102 | $this->json = new stdClass(); |
103 | echo json_encode($this->json); | 103 | } |
104 | } | 104 | $this->printHead(); |
105 | } | 105 | $this->printChannels(); |
106 | 106 | $this->printItems(); | |
107 | /** | 107 | $this->printTale(); |
108 | * Create a new FeedItem. | 108 | if ($this->version == JSON || $this->version == JSONP) { |
109 | * | 109 | echo json_encode($this->json); |
110 | * @access public | 110 | } |
111 | * @return object instance of FeedItem class | 111 | } |
112 | */ | 112 | |
113 | public function createNewItem() | 113 | /** |
114 | { | 114 | * Create a new FeedItem. |
115 | $Item = new FeedItem($this->version); | 115 | * |
116 | return $Item; | 116 | * @access public |
117 | } | 117 | * @return object instance of FeedItem class |
118 | 118 | */ | |
119 | /** | 119 | public function createNewItem() |
120 | * Add a FeedItem to the main class | 120 | { |
121 | * | 121 | $Item = new FeedItem($this->version); |
122 | * @access public | 122 | return $Item; |
123 | * @param object instance of FeedItem class | 123 | } |
124 | * @return void | 124 | |
125 | */ | 125 | /** |
126 | public function addItem($feedItem) | 126 | * Add a FeedItem to the main class |
127 | { | 127 | * |
128 | $this->items[] = $feedItem; | 128 | * @access public |
129 | } | 129 | * @param object instance of FeedItem class |
130 | 130 | * @return void | |
131 | // Wrapper functions ------------------------------------------------------------------- | 131 | */ |
132 | 132 | public function addItem($feedItem) | |
133 | /** | 133 | { |
134 | * Set the 'title' channel element | 134 | $this->items[] = $feedItem; |
135 | * | 135 | } |
136 | * @access public | 136 | |
137 | * @param srting value of 'title' channel tag | 137 | // Wrapper functions ------------------------------------------------------------------- |
138 | * @return void | 138 | |
139 | */ | 139 | /** |
140 | public function setTitle($title) | 140 | * Set the 'title' channel element |
141 | { | 141 | * |
142 | $this->setChannelElement('title', $title); | 142 | * @access public |
143 | } | 143 | * @param srting value of 'title' channel tag |
144 | 144 | * @return void | |
145 | /** | 145 | */ |
146 | * Add a hub to the channel element | 146 | public function setTitle($title) |
147 | * | 147 | { |
148 | * @access public | 148 | $this->setChannelElement('title', $title); |
149 | * @param string URL | 149 | } |
150 | * @return void | 150 | |
151 | */ | 151 | /** |
152 | public function addHub($hub) | 152 | * Add a hub to the channel element |
153 | { | 153 | * |
154 | $this->hubs[] = $hub; | 154 | * @access public |
155 | } | 155 | * @param string URL |
156 | 156 | * @return void | |
157 | /** | 157 | */ |
158 | * Set XSL URL | 158 | public function addHub($hub) |
159 | * | 159 | { |
160 | * @access public | 160 | $this->hubs[] = $hub; |
161 | * @param string URL | 161 | } |
162 | * @return void | 162 | |
163 | */ | 163 | /** |
164 | public function setXsl($xsl) | 164 | * Set XSL URL |
165 | { | 165 | * |
166 | $this->xsl = $xsl; | 166 | * @access public |
167 | } | 167 | * @param string URL |
168 | 168 | * @return void | |
169 | /** | 169 | */ |
170 | * Set self URL | 170 | public function setXsl($xsl) |
171 | * | 171 | { |
172 | * @access public | 172 | $this->xsl = $xsl; |
173 | * @param string URL | 173 | } |
174 | * @return void | 174 | |
175 | */ | 175 | /** |
176 | public function setSelf($self) | 176 | * Set self URL |
177 | { | 177 | * |
178 | $this->self = $self; | 178 | * @access public |
179 | } | 179 | * @param string URL |
180 | 180 | * @return void | |
181 | /** | 181 | */ |
182 | * Set the 'description' channel element | 182 | public function setSelf($self) |
183 | * | 183 | { |
184 | * @access public | 184 | $this->self = $self; |
185 | * @param srting value of 'description' channel tag | 185 | } |
186 | * @return void | 186 | |
187 | */ | 187 | /** |
188 | public function setDescription($desciption) | 188 | * Set the 'description' channel element |
189 | { | 189 | * |
190 | $tag = ($this->version == ATOM)? 'subtitle' : 'description'; | 190 | * @access public |
191 | $this->setChannelElement($tag, $desciption); | 191 | * @param srting value of 'description' channel tag |
192 | } | 192 | * @return void |
193 | 193 | */ | |
194 | /** | 194 | public function setDescription($desciption) |
195 | * Set the 'link' channel element | 195 | { |
196 | * | 196 | $tag = ($this->version == ATOM)? 'subtitle' : 'description'; |
197 | * @access public | 197 | $this->setChannelElement($tag, $desciption); |
198 | * @param srting value of 'link' channel tag | 198 | } |
199 | * @return void | 199 | |
200 | */ | 200 | /** |
201 | public function setLink($link) | 201 | * Set the 'link' channel element |
202 | { | 202 | * |
203 | $this->setChannelElement('link', $link); | 203 | * @access public |
204 | } | 204 | * @param srting value of 'link' channel tag |
205 | 205 | * @return void | |
206 | /** | 206 | */ |
207 | * Set the 'image' channel element | 207 | public function setLink($link) |
208 | * | 208 | { |
209 | * @access public | 209 | $this->setChannelElement('link', $link); |
210 | * @param srting title of image | 210 | } |
211 | * @param srting link url of the imahe | 211 | |
212 | * @param srting path url of the image | 212 | /** |
213 | * @return void | 213 | * Set the 'image' channel element |
214 | */ | 214 | * |
215 | public function setImage($title, $link, $url) | 215 | * @access public |
216 | { | 216 | * @param srting title of image |
217 | $this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url)); | 217 | * @param srting link url of the imahe |
218 | } | 218 | * @param srting path url of the image |
219 | 219 | * @return void | |
220 | // End # public functions ---------------------------------------------- | 220 | */ |
221 | 221 | public function setImage($title, $link, $url) | |
222 | // Start # private functions ---------------------------------------------- | 222 | { |
223 | 223 | $this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url)); | |
224 | /** | 224 | } |
225 | * Prints the xml and rss namespace | 225 | |
226 | * | 226 | // End # public functions ---------------------------------------------- |
227 | * @access private | 227 | |
228 | * @return void | 228 | // Start # private functions ---------------------------------------------- |
229 | */ | 229 | |
230 | private function printHead() | 230 | /** |
231 | { | 231 | * Prints the xml and rss namespace |
232 | if ($this->version == RSS2) | 232 | * |
233 | { | 233 | * @access private |
234 | $out = '<?xml version="1.0" encoding="utf-8"?>'."\n"; | 234 | * @return void |
235 | if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL; | 235 | */ |
236 | $out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL; | 236 | private function printHead() |
237 | echo $out; | 237 | { |
238 | } | 238 | if ($this->version == RSS2) |
239 | elseif ($this->version == JSON) | 239 | { |
240 | { | 240 | $out = '<?xml version="1.0" encoding="utf-8"?>'."\n"; |
241 | $this->json->rss = array('@attributes' => array('version' => '2.0')); | 241 | if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL; |
242 | } | 242 | $out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL; |
243 | } | 243 | echo $out; |
244 | 244 | } | |
245 | /** | 245 | elseif ($this->version == JSON || $this->version == JSONP) |
246 | * Closes the open tags at the end of file | 246 | { |
247 | * | 247 | $this->json->rss = array('@attributes' => array('version' => '2.0')); |
248 | * @access private | 248 | } |
249 | * @return void | 249 | } |
250 | */ | 250 | |
251 | private function printTale() | 251 | /** |
252 | { | 252 | * Closes the open tags at the end of file |
253 | if ($this->version == RSS2) | 253 | * |
254 | { | 254 | * @access private |
255 | echo '</channel>',PHP_EOL,'</rss>'; | 255 | * @return void |
256 | } | 256 | */ |
257 | // do nothing for JSON | 257 | private function printTale() |
258 | } | 258 | { |
259 | 259 | if ($this->version == RSS2) | |
260 | /** | 260 | { |
261 | * Creates a single node as xml format | 261 | echo '</channel>',PHP_EOL,'</rss>'; |
262 | * | 262 | } |
263 | * @access private | 263 | // do nothing for JSON |
264 | * @param string name of the tag | 264 | } |
265 | * @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format | 265 | |
266 | * @param array Attributes(if any) in 'attrName' => 'attrValue' format | 266 | /** |
267 | * @return string formatted xml tag | 267 | * Creates a single node as xml format |
268 | */ | 268 | * |
269 | private function makeNode($tagName, $tagContent, $attributes = null) | 269 | * @access private |
270 | { | 270 | * @param string name of the tag |
271 | if ($this->version == RSS2) | 271 | * @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format |
272 | { | 272 | * @param array Attributes(if any) in 'attrName' => 'attrValue' format |
273 | $nodeText = ''; | 273 | * @return string formatted xml tag |
274 | $attrText = ''; | 274 | */ |
275 | if (is_array($attributes)) | 275 | private function makeNode($tagName, $tagContent, $attributes = null) |
276 | { | 276 | { |
277 | foreach ($attributes as $key => $value) | 277 | if ($this->version == RSS2) |
278 | { | 278 | { |
279 | $attrText .= " $key=\"$value\" "; | 279 | $nodeText = ''; |
280 | } | 280 | $attrText = ''; |
281 | } | 281 | if (is_array($attributes)) |
282 | $nodeText .= "<{$tagName}{$attrText}>"; | 282 | { |
283 | if (is_array($tagContent)) | 283 | foreach ($attributes as $key => $value) |
284 | { | 284 | { |
285 | foreach ($tagContent as $key => $value) | 285 | $attrText .= " $key=\"$value\" "; |
286 | { | 286 | } |
287 | $nodeText .= $this->makeNode($key, $value); | 287 | } |
288 | } | 288 | $nodeText .= "<{$tagName}{$attrText}>"; |
289 | } | 289 | if (is_array($tagContent)) |
290 | else | 290 | { |
291 | { | 291 | foreach ($tagContent as $key => $value) |
292 | //$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent); | 292 | { |
293 | $nodeText .= htmlspecialchars($tagContent); | 293 | $nodeText .= $this->makeNode($key, $value); |
294 | } | 294 | } |
295 | //$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>"; | 295 | } |
296 | $nodeText .= "</$tagName>"; | 296 | else |
297 | return $nodeText . PHP_EOL; | 297 | { |
298 | } | 298 | //$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent); |
299 | elseif ($this->version == JSON) | 299 | $nodeText .= htmlspecialchars($tagContent); |
300 | { | 300 | } |
301 | $tagName = (string)$tagName; | 301 | //$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>"; |
302 | $tagName = strtr($tagName, ':', '_'); | 302 | $nodeText .= "</$tagName>"; |
303 | $node = null; | 303 | return $nodeText . PHP_EOL; |
304 | if (!$tagContent && is_array($attributes) && count($attributes)) | 304 | } |
305 | { | 305 | elseif ($this->version == JSON || $this->version == JSONP) |
306 | $node = array('@attributes' => $this->json_keys($attributes)); | 306 | { |
307 | } else { | 307 | $tagName = (string)$tagName; |
308 | if (is_array($tagContent)) { | 308 | $tagName = strtr($tagName, ':', '_'); |
309 | $node = $this->json_keys($tagContent); | 309 | $node = null; |
310 | } else { | 310 | if (!$tagContent && is_array($attributes) && count($attributes)) |
311 | $node = $tagContent; | 311 | { |
312 | } | 312 | $node = array('@attributes' => $this->json_keys($attributes)); |
313 | } | 313 | } else { |
314 | return $node; | 314 | if (is_array($tagContent)) { |
315 | } | 315 | $node = $this->json_keys($tagContent); |
316 | return ''; // should not get here | 316 | } else { |
317 | } | 317 | $node = $tagContent; |
318 | 318 | } | |
319 | private function json_keys(array $array) { | 319 | } |
320 | $new = array(); | 320 | return $node; |
321 | foreach ($array as $key => $val) { | 321 | } |
322 | if (is_string($key)) $key = strtr($key, ':', '_'); | 322 | return ''; // should not get here |
323 | if (is_array($val)) { | 323 | } |
324 | $new[$key] = $this->json_keys($val); | 324 | |
325 | } else { | 325 | private function json_keys(array $array) { |
326 | $new[$key] = $val; | 326 | $new = array(); |
327 | } | 327 | foreach ($array as $key => $val) { |
328 | } | 328 | if (is_string($key)) $key = strtr($key, ':', '_'); |
329 | return $new; | 329 | if (is_array($val)) { |
330 | } | 330 | $new[$key] = $this->json_keys($val); |
331 | 331 | } else { | |
332 | /** | 332 | $new[$key] = $val; |
333 | * @desc Print channels | 333 | } |
334 | * @access private | 334 | } |
335 | * @return void | 335 | return $new; |
336 | */ | 336 | } |
337 | private function printChannels() | 337 | |
338 | { | 338 | /** |
339 | //Start channel tag | 339 | * @desc Print channels |
340 | switch ($this->version) | 340 | * @access private |
341 | { | 341 | * @return void |
342 | case RSS2: | 342 | */ |
343 | echo '<channel>' . PHP_EOL; | 343 | private function printChannels() |
344 | // add hubs | 344 | { |
345 | foreach ($this->hubs as $hub) { | 345 | //Start channel tag |
346 | //echo $this->makeNode('link', '', array('rel'=>'hub', 'href'=>$hub, 'xmlns'=>'http://www.w3.org/2005/Atom')); | 346 | if ($this->version == RSS2) { |
347 | echo '<link rel="hub" href="'.htmlspecialchars($hub).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL; | 347 | echo '<channel>' . PHP_EOL; |
348 | } | 348 | // add hubs |
349 | // add self | 349 | foreach ($this->hubs as $hub) { |
350 | if (isset($this->self)) { | 350 | //echo $this->makeNode('link', '', array('rel'=>'hub', 'href'=>$hub, 'xmlns'=>'http://www.w3.org/2005/Atom')); |
351 | //echo $this->makeNode('link', '', array('rel'=>'self', 'href'=>$this->self, 'xmlns'=>'http://www.w3.org/2005/Atom')); | 351 | echo '<link rel="hub" href="'.htmlspecialchars($hub).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL; |
352 | echo '<link rel="self" href="'.htmlspecialchars($this->self).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL; | 352 | } |
353 | } | 353 | // add self |
354 | //Print Items of channel | 354 | if (isset($this->self)) { |
355 | foreach ($this->channels as $key => $value) | 355 | //echo $this->makeNode('link', '', array('rel'=>'self', 'href'=>$this->self, 'xmlns'=>'http://www.w3.org/2005/Atom')); |
356 | { | 356 | echo '<link rel="self" href="'.htmlspecialchars($this->self).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL; |
357 | echo $this->makeNode($key, $value); | 357 | } |
358 | } | 358 | //Print Items of channel |
359 | break; | 359 | foreach ($this->channels as $key => $value) |
360 | case JSON: | 360 | { |
361 | $this->json->rss['channel'] = (object)$this->json_keys($this->channels); | 361 | echo $this->makeNode($key, $value); |
362 | break; | 362 | } |
363 | } | 363 | } elseif ($this->version == JSON || $this->version == JSONP) { |
364 | } | 364 | $this->json->rss['channel'] = (object)$this->json_keys($this->channels); |
365 | 365 | } | |
366 | /** | 366 | } |
367 | * Prints formatted feed items | 367 | |
368 | * | 368 | /** |
369 | * @access private | 369 | * Prints formatted feed items |
370 | * @return void | 370 | * |
371 | */ | 371 | * @access private |
372 | private function printItems() | 372 | * @return void |
373 | { | 373 | */ |
374 | foreach ($this->items as $item) | 374 | private function printItems() |
375 | { | 375 | { |
376 | $thisItems = $item->getElements(); | 376 | foreach ($this->items as $item) { |
377 | 377 | $itemElements = $item->getElements(); | |
378 | echo $this->startItem(); | 378 | |
379 | 379 | echo $this->startItem(); | |
380 | if ($this->version == JSON) { | 380 | |
381 | $json_item = array(); | 381 | if ($this->version == JSON || $this->version == JSONP) { |
382 | } | 382 | $json_item = array(); |
383 | 383 | } | |
384 | foreach ($thisItems as $feedItem ) | 384 | |
385 | { | 385 | foreach ($itemElements as $thisElement) { |
386 | if ($this->version == RSS2) { | 386 | foreach ($thisElement as $instance) { |
387 | echo $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']); | 387 | if ($this->version == RSS2) { |
388 | } elseif ($this->version == JSON) { | 388 | echo $this->makeNode($instance['name'], $instance['content'], $instance['attributes']); |
389 | $json_item[strtr($feedItem['name'], ':', '_')] = $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']); | 389 | } elseif ($this->version == JSON || $this->version == JSONP) { |
390 | } | 390 | $_json_node = $this->makeNode($instance['name'], $instance['content'], $instance['attributes']); |
391 | } | 391 | if (count($thisElement) > 1) { |
392 | echo $this->endItem(); | 392 | $json_item[strtr($instance['name'], ':', '_')][] = $_json_node; |
393 | if ($this->version == JSON) { | 393 | } else { |
394 | if (count($this->items) > 1) { | 394 | $json_item[strtr($instance['name'], ':', '_')] = $_json_node; |
395 | $this->json->rss['channel']->item[] = $json_item; | 395 | } |
396 | } else { | 396 | } |
397 | $this->json->rss['channel']->item = $json_item; | 397 | } |
398 | } | 398 | } |
399 | } | 399 | echo $this->endItem(); |
400 | } | 400 | if ($this->version == JSON || $this->version == JSONP) { |
401 | } | 401 | if (count($this->items) > 1) { |
402 | 402 | $this->json->rss['channel']->item[] = $json_item; | |
403 | /** | 403 | } else { |
404 | * Make the starting tag of channels | 404 | $this->json->rss['channel']->item = $json_item; |
405 | * | 405 | } |
406 | * @access private | 406 | } |
407 | * @return void | 407 | } |
408 | */ | 408 | } |
409 | private function startItem() | 409 | |
410 | { | 410 | /** |
411 | if ($this->version == RSS2) | 411 | * Make the starting tag of channels |
412 | { | 412 | * |
413 | echo '<item>' . PHP_EOL; | 413 | * @access private |
414 | } | 414 | * @return void |
415 | // nothing for JSON | 415 | */ |
416 | } | 416 | private function startItem() |
417 | 417 | { | |
418 | /** | 418 | if ($this->version == RSS2) |
419 | * Closes feed item tag | 419 | { |
420 | * | 420 | echo '<item>' . PHP_EOL; |
421 | * @access private | 421 | } |
422 | * @return void | 422 | // nothing for JSON |
423 | */ | 423 | } |
424 | private function endItem() | 424 | |
425 | { | 425 | /** |
426 | if ($this->version == RSS2) | 426 | * Closes feed item tag |
427 | { | 427 | * |
428 | echo '</item>' . PHP_EOL; | 428 | * @access private |
429 | } | 429 | * @return void |
430 | // nothing for JSON | 430 | */ |
431 | } | 431 | private function endItem() |
432 | 432 | { | |
433 | // End # private functions ---------------------------------------------- | 433 | if ($this->version == RSS2) |
434 | { | ||
435 | echo '</item>' . PHP_EOL; | ||
436 | } | ||
437 | // nothing for JSON | ||
438 | } | ||
439 | |||
440 | // End # private functions ---------------------------------------------- | ||
434 | } \ No newline at end of file | 441 | } \ No newline at end of file |
diff --git a/inc/3rdparty/libraries/htmLawed/htmLawed.php b/inc/3rdparty/libraries/htmLawed/htmLawed.php new file mode 100644 index 00000000..9a62aca5 --- /dev/null +++ b/inc/3rdparty/libraries/htmLawed/htmLawed.php | |||
@@ -0,0 +1,728 @@ | |||
1 | <?php | ||
2 | |||
3 | /* | ||
4 | htmLawed 1.1.14, 8 August 2012 | ||
5 | OOP code, 8 August 2012 | ||
6 | Copyright Santosh Patnaik | ||
7 | Dual LGPL v3 and GPL v2+ license | ||
8 | A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed | ||
9 | |||
10 | See htmLawed_README.txt/htm | ||
11 | */ | ||
12 | |||
13 | class htmLawed{ | ||
14 | // begin class | ||
15 | public static function hl($t, $C=1, $S=array()){ | ||
16 | $C = is_array($C) ? $C : array(); | ||
17 | if(!empty($C['valid_xhtml'])){ | ||
18 | $C['elements'] = empty($C['elements']) ? '*-center-dir-font-isindex-menu-s-strike-u' : $C['elements']; | ||
19 | $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2; | ||
20 | $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2; | ||
21 | } | ||
22 | // config eles | ||
23 | $e = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'del'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1); // 86/deprecated+embed+ruby | ||
24 | if(!empty($C['safe'])){ | ||
25 | unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']); | ||
26 | } | ||
27 | $x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*'; | ||
28 | if($x == '-*'){$e = array();} | ||
29 | elseif(strpos($x, '*') === false){$e = array_flip(explode(',', $x));} | ||
30 | else{ | ||
31 | if(isset($x[1])){ | ||
32 | preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER); | ||
33 | for($i=count($m); --$i>=0;){$m[$i] = $m[$i][0];} | ||
34 | foreach($m as $v){ | ||
35 | if($v[0] == '+'){$e[substr($v, 1)] = 1;} | ||
36 | if($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'. $v, $m)){unset($e[$v]);} | ||
37 | } | ||
38 | } | ||
39 | } | ||
40 | $C['elements'] =& $e; | ||
41 | // config attrs | ||
42 | $x = !empty($C['deny_attribute']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute']) : ''; | ||
43 | $x = array_flip((isset($x[0]) && $x[0] == '*') ? explode('-', $x) : explode(',', $x. (!empty($C['safe']) ? ',on*' : ''))); | ||
44 | if(isset($x['on*'])){ | ||
45 | unset($x['on*']); | ||
46 | $x += array('onblur'=>1, 'onchange'=>1, 'onclick'=>1, 'ondblclick'=>1, 'onfocus'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onmousedown'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onreset'=>1, 'onselect'=>1, 'onsubmit'=>1); | ||
47 | } | ||
48 | $C['deny_attribute'] = $x; | ||
49 | // config URL | ||
50 | $x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https'; | ||
51 | $C['schemes'] = array(); | ||
52 | foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v){ | ||
53 | $x = $x2 = null; list($x, $x2) = explode(':', $v, 2); | ||
54 | if($x2){$C['schemes'][$x] = array_flip(explode(',', $x2));} | ||
55 | } | ||
56 | if(!isset($C['schemes']['*'])){$C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,);} | ||
57 | if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('!'=>1);} | ||
58 | $C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0; | ||
59 | if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){ | ||
60 | $C['base_url'] = $C['abs_url'] = 0; | ||
61 | } | ||
62 | // config rest | ||
63 | $C['and_mark'] = empty($C['and_mark']) ? 0 : 1; | ||
64 | $C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or htmLawed::hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or htmLawed::hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0; | ||
65 | $C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0; | ||
66 | $C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1; | ||
67 | $C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0); | ||
68 | $C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char']; | ||
69 | $C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0); | ||
70 | $C['css_expression'] = empty($C['css_expression']) ? 0 : 1; | ||
71 | $C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1; | ||
72 | $C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1; | ||
73 | $C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0; | ||
74 | $C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0; | ||
75 | $C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6; | ||
76 | $C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1; | ||
77 | $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1; | ||
78 | $C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1; | ||
79 | $C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1; | ||
80 | $C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body'; | ||
81 | $C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0; | ||
82 | $C['style_pass'] = empty($C['style_pass']) ? 0 : 1; | ||
83 | $C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy']; | ||
84 | $C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1; | ||
85 | $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0; | ||
86 | |||
87 | if(isset($GLOBALS['C'])){$reC = $GLOBALS['C'];} | ||
88 | $GLOBALS['C'] = $C; | ||
89 | $S = is_array($S) ? $S : htmLawed::hl_spec($S); | ||
90 | if(isset($GLOBALS['S'])){$reS = $GLOBALS['S'];} | ||
91 | $GLOBALS['S'] = $S; | ||
92 | |||
93 | $t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t); | ||
94 | if($C['clean_ms_char']){ | ||
95 | $x = array("\x7f"=>'', "\x80"=>'€', "\x81"=>'', "\x83"=>'ƒ', "\x85"=>'…', "\x86"=>'†', "\x87"=>'‡', "\x88"=>'ˆ', "\x89"=>'‰', "\x8a"=>'Š', "\x8b"=>'‹', "\x8c"=>'Œ', "\x8d"=>'', "\x8e"=>'Ž', "\x8f"=>'', "\x90"=>'', "\x95"=>'•', "\x96"=>'–', "\x97"=>'—', "\x98"=>'˜', "\x99"=>'™', "\x9a"=>'š', "\x9b"=>'›', "\x9c"=>'œ', "\x9d"=>'', "\x9e"=>'ž', "\x9f"=>'Ÿ'); | ||
96 | $x = $x + ($C['clean_ms_char'] == 1 ? array("\x82"=>'‚', "\x84"=>'„', "\x91"=>'‘', "\x92"=>'’', "\x93"=>'“', "\x94"=>'”') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"')); | ||
97 | $t = strtr($t, $x); | ||
98 | } | ||
99 | if($C['cdata'] or $C['comment']){$t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'htmLawed::hl_cmtcd', $t);} | ||
100 | $t = preg_replace_callback('`&([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'htmLawed::hl_ent', str_replace('&', '&', $t)); | ||
101 | if($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])){$GLOBALS['hl_Ids'] = array();} | ||
102 | if($C['hook']){$t = $C['hook']($t, $C, $S);} | ||
103 | if($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])){ | ||
104 | $GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime()); | ||
105 | } | ||
106 | // main | ||
107 | $t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'htmLawed::hl_tag', $t); | ||
108 | $t = $C['balance'] ? htmLawed::hl_bal($t, $C['keep_bad'], $C['parent']) : $t; | ||
109 | $t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t; | ||
110 | $t = $C['tidy'] ? htmLawed::hl_tidy($t, $C['tidy'], $C['parent']) : $t; | ||
111 | unset($C, $e); | ||
112 | if(isset($reC)){$GLOBALS['C'] = $reC;} | ||
113 | if(isset($reS)){$GLOBALS['S'] = $reS;} | ||
114 | return $t; | ||
115 | // eof | ||
116 | } | ||
117 | |||
118 | public static function hl_attrval($t, $p){ | ||
119 | // check attr val against $S | ||
120 | $o = 1; $l = strlen($t); | ||
121 | foreach($p as $k=>$v){ | ||
122 | switch($k){ | ||
123 | case 'maxlen':if($l > $v){$o = 0;} | ||
124 | break; case 'minlen': if($l < $v){$o = 0;} | ||
125 | break; case 'maxval': if((float)($t) > $v){$o = 0;} | ||
126 | break; case 'minval': if((float)($t) < $v){$o = 0;} | ||
127 | break; case 'match': if(!preg_match($v, $t)){$o = 0;} | ||
128 | break; case 'nomatch': if(preg_match($v, $t)){$o = 0;} | ||
129 | break; case 'oneof': | ||
130 | $m = 0; | ||
131 | foreach(explode('|', $v) as $n){if($t == $n){$m = 1; break;}} | ||
132 | $o = $m; | ||
133 | break; case 'noneof': | ||
134 | $m = 1; | ||
135 | foreach(explode('|', $v) as $n){if($t == $n){$m = 0; break;}} | ||
136 | $o = $m; | ||
137 | break; default: | ||
138 | break; | ||
139 | } | ||
140 | if(!$o){break;} | ||
141 | } | ||
142 | return ($o ? $t : (isset($p['default']) ? $p['default'] : 0)); | ||
143 | // eof | ||
144 | } | ||
145 | |||
146 | public static function hl_bal($t, $do=1, $in='div'){ | ||
147 | // balance tags | ||
148 | // by content | ||
149 | $cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block | ||
150 | $cE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty | ||
151 | $cF = array('button'=>1, 'del'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'noscript'=>1, 'object'=>1, 'td'=>1, 'th'=>1); // Flow; later context-wise dynamic move of ins & del to $cI | ||
152 | $cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'p'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline | ||
153 | $cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal | ||
154 | $cN2 = array_keys($cN); | ||
155 | $cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); | ||
156 | $cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child | ||
157 | if($GLOBALS['C']['direct_list_nest']){$cS['ol'] = $cS['ul'] += array('ol'=>1, 'ul'=>1);} | ||
158 | $cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other | ||
159 | $cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing | ||
160 | // block/inline type; ins & del both type; #pcdata: text | ||
161 | $eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1); | ||
162 | $eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); | ||
163 | $eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values | ||
164 | $eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI | ||
165 | $eF = $eB + $eI; | ||
166 | |||
167 | // $in sets allowed child | ||
168 | $in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div'; | ||
169 | if(isset($cE[$in])){ | ||
170 | return (!$do ? '' : str_replace(array('<', '>'), array('<', '>'), $t)); | ||
171 | } | ||
172 | if(isset($cS[$in])){$inOk = $cS[$in];} | ||
173 | elseif(isset($cI[$in])){$inOk = $eI; $cI['del'] = 1; $cI['ins'] = 1;} | ||
174 | elseif(isset($cF[$in])){$inOk = $eF; unset($cI['del'], $cI['ins']);} | ||
175 | elseif(isset($cB[$in])){$inOk = $eB; unset($cI['del'], $cI['ins']);} | ||
176 | if(isset($cO[$in])){$inOk = $inOk + $cO[$in];} | ||
177 | if(isset($cN[$in])){$inOk = array_diff_assoc($inOk, $cN[$in]);} | ||
178 | |||
179 | $t = explode('<', $t); | ||
180 | $ok = $q = array(); // $q seq list of open non-empty ele | ||
181 | ob_start(); | ||
182 | |||
183 | for($i=-1, $ci=count($t); ++$i<$ci;){ | ||
184 | // allowed $ok in parent $p | ||
185 | if($ql = count($q)){ | ||
186 | $p = array_pop($q); | ||
187 | $q[] = $p; | ||
188 | if(isset($cS[$p])){$ok = $cS[$p];} | ||
189 | elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;} | ||
190 | elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);} | ||
191 | elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);} | ||
192 | if(isset($cO[$p])){$ok = $ok + $cO[$p];} | ||
193 | if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);} | ||
194 | }else{$ok = $inOk; unset($cI['del'], $cI['ins']);} | ||
195 | // bad tags, & ele content | ||
196 | if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){ | ||
197 | echo '<', $s, $e, $a, '>'; | ||
198 | } | ||
199 | if(isset($x[0])){ | ||
200 | if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){ | ||
201 | echo '<div>', $x, '</div>'; | ||
202 | } | ||
203 | elseif($do < 3 or isset($ok['#pcdata'])){echo $x;} | ||
204 | elseif(strpos($x, "\x02\x04")){ | ||
205 | foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){ | ||
206 | echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); | ||
207 | } | ||
208 | }elseif($do > 4){echo preg_replace('`\S`', '', $x);} | ||
209 | } | ||
210 | // get markup | ||
211 | if(!preg_match('`^(/?)([a-z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;} | ||
212 | $s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r; | ||
213 | // close tag | ||
214 | if($s){ | ||
215 | if(isset($cE[$e]) or !in_array($e, $q)){continue;} // Empty/unopen | ||
216 | if($p == $e){array_pop($q); echo '</', $e, '>'; unset($e); continue;} // Last open | ||
217 | $add = ''; // Nesting - close open tags that need to be | ||
218 | for($j=-1, $cj=count($q); ++$j<$cj;){ | ||
219 | if(($d = array_pop($q)) == $e){break;} | ||
220 | else{$add .= "</{$d}>";} | ||
221 | } | ||
222 | echo $add, '</', $e, '>'; unset($e); continue; | ||
223 | } | ||
224 | // open tag | ||
225 | // $cB ele needs $eB ele as child | ||
226 | if(isset($cB[$e]) && strlen(trim($x))){ | ||
227 | $t[$i] = "{$e}{$a}>"; | ||
228 | array_splice($t, $i+1, 0, 'div>'. $x); unset($e, $x); ++$ci; --$i; continue; | ||
229 | } | ||
230 | if((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])){ | ||
231 | array_splice($t, $i, 0, 'div>'); unset($e, $x); ++$ci; --$i; continue; | ||
232 | } | ||
233 | // if no open ele, $in = parent; mostly immediate parent-child relation should hold | ||
234 | if(!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)){ | ||
235 | if(!isset($ok[$e])){ | ||
236 | if($ql && isset($cT[$p])){echo '</', array_pop($q), '>'; unset($e, $x); --$i;} | ||
237 | continue; | ||
238 | } | ||
239 | if(!isset($cE[$e])){$q[] = $e;} | ||
240 | echo '<', $e, $a, '>'; unset($e); continue; | ||
241 | } | ||
242 | // specific parent-child | ||
243 | if(isset($cS[$p][$e])){ | ||
244 | if(!isset($cE[$e])){$q[] = $e;} | ||
245 | echo '<', $e, $a, '>'; unset($e); continue; | ||
246 | } | ||
247 | // nesting | ||
248 | $add = ''; | ||
249 | $q2 = array(); | ||
250 | for($k=-1, $kc=count($q); ++$k<$kc;){ | ||
251 | $d = $q[$k]; | ||
252 | $ok2 = array(); | ||
253 | if(isset($cS[$d])){$q2[] = $d; continue;} | ||
254 | $ok2 = isset($cI[$d]) ? $eI : $eF; | ||
255 | if(isset($cO[$d])){$ok2 = $ok2 + $cO[$d];} | ||
256 | if(isset($cN[$d])){$ok2 = array_diff_assoc($ok2, $cN[$d]);} | ||
257 | if(!isset($ok2[$e])){ | ||
258 | if(!$k && !isset($inOk[$e])){continue 2;} | ||
259 | $add = "</{$d}>"; | ||
260 | for(;++$k<$kc;){$add = "</{$q[$k]}>{$add}";} | ||
261 | break; | ||
262 | } | ||
263 | else{$q2[] = $d;} | ||
264 | } | ||
265 | $q = $q2; | ||
266 | if(!isset($cE[$e])){$q[] = $e;} | ||
267 | echo $add, '<', $e, $a, '>'; unset($e); continue; | ||
268 | } | ||
269 | |||
270 | // end | ||
271 | if($ql = count($q)){ | ||
272 | $p = array_pop($q); | ||
273 | $q[] = $p; | ||
274 | if(isset($cS[$p])){$ok = $cS[$p];} | ||
275 | elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;} | ||
276 | elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);} | ||
277 | elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);} | ||
278 | if(isset($cO[$p])){$ok = $ok + $cO[$p];} | ||
279 | if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);} | ||
280 | }else{$ok = $inOk; unset($cI['del'], $cI['ins']);} | ||
281 | if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){ | ||
282 | echo '<', $s, $e, $a, '>'; | ||
283 | } | ||
284 | if(isset($x[0])){ | ||
285 | if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){ | ||
286 | echo '<div>', $x, '</div>'; | ||
287 | } | ||
288 | elseif($do < 3 or isset($ok['#pcdata'])){echo $x;} | ||
289 | elseif(strpos($x, "\x02\x04")){ | ||
290 | foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){ | ||
291 | echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : '')); | ||
292 | } | ||
293 | }elseif($do > 4){echo preg_replace('`\S`', '', $x);} | ||
294 | } | ||
295 | while(!empty($q) && ($e = array_pop($q))){echo '</', $e, '>';} | ||
296 | $o = ob_get_contents(); | ||
297 | ob_end_clean(); | ||
298 | return $o; | ||
299 | // eof | ||
300 | } | ||
301 | |||
302 | public static function hl_cmtcd($t){ | ||
303 | // comment/CDATA sec handler | ||
304 | $t = $t[0]; | ||
305 | global $C; | ||
306 | if(!($v = $C[$n = $t[3] == '-' ? 'comment' : 'cdata'])){return $t;} | ||
307 | if($v == 1){return '';} | ||
308 | if($n == 'comment'){ | ||
309 | if(substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' '){$t .= ' ';} | ||
310 | } | ||
311 | else{$t = substr($t, 1, -1);} | ||
312 | $t = $v == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t; | ||
313 | return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), ($n == 'comment' ? "\x01\x02\x04!--$t--\x05\x02\x01" : "\x01\x01\x04$t\x05\x01\x01")); | ||
314 | // eof | ||
315 | } | ||
316 | |||
317 | public static function hl_ent($t){ | ||
318 | // entitity handler | ||
319 | global $C; | ||
320 | $t = $t[1]; | ||
321 | static $U = array('quot'=>1,'amp'=>1,'lt'=>1,'gt'=>1); | ||
322 | static $N = array('fnof'=>'402', 'Alpha'=>'913', 'Beta'=>'914', 'Gamma'=>'915', 'Delta'=>'916', 'Epsilon'=>'917', 'Zeta'=>'918', 'Eta'=>'919', 'Theta'=>'920', 'Iota'=>'921', 'Kappa'=>'922', 'Lambda'=>'923', 'Mu'=>'924', 'Nu'=>'925', 'Xi'=>'926', 'Omicron'=>'927', 'Pi'=>'928', 'Rho'=>'929', 'Sigma'=>'931', 'Tau'=>'932', 'Upsilon'=>'933', 'Phi'=>'934', 'Chi'=>'935', 'Psi'=>'936', 'Omega'=>'937', 'alpha'=>'945', 'beta'=>'946', 'gamma'=>'947', 'delta'=>'948', 'epsilon'=>'949', 'zeta'=>'950', 'eta'=>'951', 'theta'=>'952', 'iota'=>'953', 'kappa'=>'954', 'lambda'=>'955', 'mu'=>'956', 'nu'=>'957', 'xi'=>'958', 'omicron'=>'959', 'pi'=>'960', 'rho'=>'961', 'sigmaf'=>'962', 'sigma'=>'963', 'tau'=>'964', 'upsilon'=>'965', 'phi'=>'966', 'chi'=>'967', 'psi'=>'968', 'omega'=>'969', 'thetasym'=>'977', 'upsih'=>'978', 'piv'=>'982', 'bull'=>'8226', 'hellip'=>'8230', 'prime'=>'8242', 'Prime'=>'8243', 'oline'=>'8254', 'frasl'=>'8260', 'weierp'=>'8472', 'image'=>'8465', 'real'=>'8476', 'trade'=>'8482', 'alefsym'=>'8501', 'larr'=>'8592', 'uarr'=>'8593', 'rarr'=>'8594', 'darr'=>'8595', 'harr'=>'8596', 'crarr'=>'8629', 'lArr'=>'8656', 'uArr'=>'8657', 'rArr'=>'8658', 'dArr'=>'8659', 'hArr'=>'8660', 'forall'=>'8704', 'part'=>'8706', 'exist'=>'8707', 'empty'=>'8709', 'nabla'=>'8711', 'isin'=>'8712', 'notin'=>'8713', 'ni'=>'8715', 'prod'=>'8719', 'sum'=>'8721', 'minus'=>'8722', 'lowast'=>'8727', 'radic'=>'8730', 'prop'=>'8733', 'infin'=>'8734', 'ang'=>'8736', 'and'=>'8743', 'or'=>'8744', 'cap'=>'8745', 'cup'=>'8746', 'int'=>'8747', 'there4'=>'8756', 'sim'=>'8764', 'cong'=>'8773', 'asymp'=>'8776', 'ne'=>'8800', 'equiv'=>'8801', 'le'=>'8804', 'ge'=>'8805', 'sub'=>'8834', 'sup'=>'8835', 'nsub'=>'8836', 'sube'=>'8838', 'supe'=>'8839', 'oplus'=>'8853', 'otimes'=>'8855', 'perp'=>'8869', 'sdot'=>'8901', 'lceil'=>'8968', 'rceil'=>'8969', 'lfloor'=>'8970', 'rfloor'=>'8971', 'lang'=>'9001', 'rang'=>'9002', 'loz'=>'9674', 'spades'=>'9824', 'clubs'=>'9827', 'hearts'=>'9829', 'diams'=>'9830', 'apos'=>'39', 'OElig'=>'338', 'oelig'=>'339', 'Scaron'=>'352', 'scaron'=>'353', 'Yuml'=>'376', 'circ'=>'710', 'tilde'=>'732', 'ensp'=>'8194', 'emsp'=>'8195', 'thinsp'=>'8201', 'zwnj'=>'8204', 'zwj'=>'8205', 'lrm'=>'8206', 'rlm'=>'8207', 'ndash'=>'8211', 'mdash'=>'8212', 'lsquo'=>'8216', 'rsquo'=>'8217', 'sbquo'=>'8218', 'ldquo'=>'8220', 'rdquo'=>'8221', 'bdquo'=>'8222', 'dagger'=>'8224', 'Dagger'=>'8225', 'permil'=>'8240', 'lsaquo'=>'8249', 'rsaquo'=>'8250', 'euro'=>'8364', 'nbsp'=>'160', 'iexcl'=>'161', 'cent'=>'162', 'pound'=>'163', 'curren'=>'164', 'yen'=>'165', 'brvbar'=>'166', 'sect'=>'167', 'uml'=>'168', 'copy'=>'169', 'ordf'=>'170', 'laquo'=>'171', 'not'=>'172', 'shy'=>'173', 'reg'=>'174', 'macr'=>'175', 'deg'=>'176', 'plusmn'=>'177', 'sup2'=>'178', 'sup3'=>'179', 'acute'=>'180', 'micro'=>'181', 'para'=>'182', 'middot'=>'183', 'cedil'=>'184', 'sup1'=>'185', 'ordm'=>'186', 'raquo'=>'187', 'frac14'=>'188', 'frac12'=>'189', 'frac34'=>'190', 'iquest'=>'191', 'Agrave'=>'192', 'Aacute'=>'193', 'Acirc'=>'194', 'Atilde'=>'195', 'Auml'=>'196', 'Aring'=>'197', 'AElig'=>'198', 'Ccedil'=>'199', 'Egrave'=>'200', 'Eacute'=>'201', 'Ecirc'=>'202', 'Euml'=>'203', 'Igrave'=>'204', 'Iacute'=>'205', 'Icirc'=>'206', 'Iuml'=>'207', 'ETH'=>'208', 'Ntilde'=>'209', 'Ograve'=>'210', 'Oacute'=>'211', 'Ocirc'=>'212', 'Otilde'=>'213', 'Ouml'=>'214', 'times'=>'215', 'Oslash'=>'216', 'Ugrave'=>'217', 'Uacute'=>'218', 'Ucirc'=>'219', 'Uuml'=>'220', 'Yacute'=>'221', 'THORN'=>'222', 'szlig'=>'223', 'agrave'=>'224', 'aacute'=>'225', 'acirc'=>'226', 'atilde'=>'227', 'auml'=>'228', 'aring'=>'229', 'aelig'=>'230', 'ccedil'=>'231', 'egrave'=>'232', 'eacute'=>'233', 'ecirc'=>'234', 'euml'=>'235', 'igrave'=>'236', 'iacute'=>'237', 'icirc'=>'238', 'iuml'=>'239', 'eth'=>'240', 'ntilde'=>'241', 'ograve'=>'242', 'oacute'=>'243', 'ocirc'=>'244', 'otilde'=>'245', 'ouml'=>'246', 'divide'=>'247', 'oslash'=>'248', 'ugrave'=>'249', 'uacute'=>'250', 'ucirc'=>'251', 'uuml'=>'252', 'yacute'=>'253', 'thorn'=>'254', 'yuml'=>'255'); | ||
323 | if($t[0] != '#'){ | ||
324 | return ($C['and_mark'] ? "\x06" : '&'). (isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'. ($C['hexdec_entity'] > 1 ? 'x'. dechex($N[$t]) : $N[$t]) : $t) : 'amp;'. $t)). ';'; | ||
325 | } | ||
326 | if(($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or $n == 65534 or $n == 65535 or $n > 1114111))){ | ||
327 | return ($C['and_mark'] ? "\x06" : '&'). "amp;#{$t};"; | ||
328 | } | ||
329 | return ($C['and_mark'] ? "\x06" : '&'). '#'. (((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'. dechex($n)). ';'; | ||
330 | // eof | ||
331 | } | ||
332 | |||
333 | public static function hl_prot($p, $c=null){ | ||
334 | // check URL scheme | ||
335 | global $C; | ||
336 | $b = $a = ''; | ||
337 | if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);} | ||
338 | $c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*']; | ||
339 | static $d = 'denied:'; | ||
340 | if(isset($c['!']) && substr($p, 0, 7) != $d){$p = "$d$p";} | ||
341 | if(isset($c['*']) or !strcspn($p, '#?;') or (substr($p, 0, 7) == $d)){return "{$b}{$p}{$a}";} // All ok, frag, query, param | ||
342 | if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot | ||
343 | return "{$b}{$d}{$p}{$a}"; | ||
344 | } | ||
345 | if($C['abs_url']){ | ||
346 | if($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0){ // Make url rel | ||
347 | $p = substr($p, strlen($C['base_url'])); | ||
348 | }elseif(empty($m[1])){ // Make URL abs | ||
349 | if(substr($p, 0, 2) == '//'){$p = substr($C['base_url'], 0, strpos($C['base_url'], ':')+1). $p;} | ||
350 | elseif($p[0] == '/'){$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']). $p;} | ||
351 | elseif(strcspn($p, './')){$p = $C['base_url']. $p;} | ||
352 | else{ | ||
353 | preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m); | ||
354 | $p = preg_replace('`(?<=/)\./`', '', $m[2]. $p); | ||
355 | while(preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)){ | ||
356 | $p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p); | ||
357 | } | ||
358 | $p = $m[1]. $p; | ||
359 | } | ||
360 | } | ||
361 | } | ||
362 | return "{$b}{$p}{$a}"; | ||
363 | // eof | ||
364 | } | ||
365 | |||
366 | public static function hl_regex($p){ | ||
367 | // ?regex | ||
368 | if(empty($p)){return 0;} | ||
369 | if($t = ini_get('track_errors')){$o = isset($php_errormsg) ? $php_errormsg : null;} | ||
370 | else{ini_set('track_errors', 1);} | ||
371 | unset($php_errormsg); | ||
372 | if(($d = ini_get('display_errors'))){ini_set('display_errors', 0);} | ||
373 | preg_match($p, ''); | ||
374 | if($d){ini_set('display_errors', 1);} | ||
375 | $r = isset($php_errormsg) ? 0 : 1; | ||
376 | if($t){$php_errormsg = isset($o) ? $o : null;} | ||
377 | else{ini_set('track_errors', 0);} | ||
378 | return $r; | ||
379 | // eof | ||
380 | } | ||
381 | |||
382 | public static function hl_spec($t){ | ||
383 | // final $spec | ||
384 | $s = array(); | ||
385 | $t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace('/"(?>(`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), "$0"), 1, -1)', trim($t))); | ||
386 | for($i = count(($t = explode(';', $t))); --$i>=0;){ | ||
387 | $w = $t[$i]; | ||
388 | if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;} | ||
389 | $y = $n = array(); | ||
390 | foreach(explode(',', $a) as $v){ | ||
391 | if(!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)){continue;} | ||
392 | if(($x = strtolower($m[1])) == '-*'){$n['*'] = 1; continue;} | ||
393 | if($x[0] == '-'){$n[substr($x, 1)] = 1; continue;} | ||
394 | if(!isset($m[2])){$y[$x] = 1; continue;} | ||
395 | foreach(explode('/', $m[2]) as $m){ | ||
396 | if(empty($m) or ($p = strpos($m, '=')) == 0 or $p < 5){$y[$x] = 1; continue;} | ||
397 | $y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(";", "|", "~", " ", ",", "/", "(", ")"), substr($m, $p+1)); | ||
398 | } | ||
399 | if(isset($y[$x]['match']) && !htmLawed::hl_regex($y[$x]['match'])){unset($y[$x]['match']);} | ||
400 | if(isset($y[$x]['nomatch']) && !htmLawed::hl_regex($y[$x]['nomatch'])){unset($y[$x]['nomatch']);} | ||
401 | } | ||
402 | if(!count($y) && !count($n)){continue;} | ||
403 | foreach(explode(',', substr($w, 0, $e)) as $v){ | ||
404 | if(!strlen(($v = strtolower($v)))){continue;} | ||
405 | if(count($y)){$s[$v] = $y;} | ||
406 | if(count($n)){$s[$v]['n'] = $n;} | ||
407 | } | ||
408 | } | ||
409 | return $s; | ||
410 | // eof | ||
411 | } | ||
412 | |||
413 | public static function hl_tag($t){ | ||
414 | // tag/attribute handler | ||
415 | global $C; | ||
416 | $t = $t[0]; | ||
417 | // invalid < > | ||
418 | if($t == '< '){return '< ';} | ||
419 | if($t == '>'){return '>';} | ||
420 | if(!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $t, $m)){ | ||
421 | return str_replace(array('<', '>'), array('<', '>'), $t); | ||
422 | }elseif(!isset($C['elements'][($e = strtolower($m[2]))])){ | ||
423 | return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : ''); | ||
424 | } | ||
425 | // attr string | ||
426 | $a = str_replace(array("\n", "\r", "\t"), ' ', trim($m[3])); | ||
427 | // tag transform | ||
428 | static $eD = array('applet'=>1, 'center'=>1, 'dir'=>1, 'embed'=>1, 'font'=>1, 'isindex'=>1, 'menu'=>1, 's'=>1, 'strike'=>1, 'u'=>1); // Deprecated | ||
429 | if($C['make_tag_strict'] && isset($eD[$e])){ | ||
430 | $trt = htmLawed::hl_tag2($e, $a, $C['make_tag_strict']); | ||
431 | if(!$e){return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : '');} | ||
432 | } | ||
433 | // close tag | ||
434 | static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele | ||
435 | if(!empty($m[1])){ | ||
436 | return (!isset($eE[$e]) ? (empty($C['hook_tag']) ? "</$e>" : $C['hook_tag']($e)) : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : '')); | ||
437 | } | ||
438 | |||
439 | // open tag & attr | ||
440 | static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific | ||
441 | static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // Empty | ||
442 | static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Need scheme check; excludes style, on* & src | ||
443 | static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ & exceptions | ||
444 | |||
445 | if($C['lc_std_val']){ | ||
446 | // predef attr vals for $eAL & $aNE ele | ||
447 | static $aNL = array('all'=>1, 'baseline'=>1, 'bottom'=>1, 'button'=>1, 'center'=>1, 'char'=>1, 'checkbox'=>1, 'circle'=>1, 'col'=>1, 'colgroup'=>1, 'cols'=>1, 'data'=>1, 'default'=>1, 'file'=>1, 'get'=>1, 'groups'=>1, 'hidden'=>1, 'image'=>1, 'justify'=>1, 'left'=>1, 'ltr'=>1, 'middle'=>1, 'none'=>1, 'object'=>1, 'password'=>1, 'poly'=>1, 'post'=>1, 'preserve'=>1, 'radio'=>1, 'rect'=>1, 'ref'=>1, 'reset'=>1, 'right'=>1, 'row'=>1, 'rowgroup'=>1, 'rows'=>1, 'rtl'=>1, 'submit'=>1, 'text'=>1, 'top'=>1); | ||
448 | static $eAL = array('a'=>1, 'area'=>1, 'bdo'=>1, 'button'=>1, 'col'=>1, 'form'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'xml:space'=>1); | ||
449 | $lcase = isset($eAL[$e]) ? 1 : 0; | ||
450 | } | ||
451 | |||
452 | $depTr = 0; | ||
453 | if($C['no_deprecated_attr']){ | ||
454 | // dep attr:applicable ele | ||
455 | static $aND = array('align'=>array('caption'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1), 'bgcolor'=>array('table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'clear'=>array('br'=>1), 'compact'=>array('dl'=>1, 'ol'=>1, 'ul'=>1), 'height'=>array('td'=>1, 'th'=>1), 'hspace'=>array('img'=>1, 'object'=>1), 'language'=>array('script'=>1), 'name'=>array('a'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'map'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'size'=>array('hr'=>1), 'start'=>array('ol'=>1), 'type'=>array('li'=>1, 'ol'=>1, 'ul'=>1), 'value'=>array('li'=>1), 'vspace'=>array('img'=>1, 'object'=>1), 'width'=>array('hr'=>1, 'pre'=>1, 'td'=>1, 'th'=>1)); | ||
456 | static $eAD = array('a'=>1, 'br'=>1, 'caption'=>1, 'div'=>1, 'dl'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'object'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'script'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1, 'ul'=>1); | ||
457 | $depTr = isset($eAD[$e]) ? 1 : 0; | ||
458 | } | ||
459 | |||
460 | // attr name-vals | ||
461 | if(strpos($a, "\x01") !== false){$a = preg_replace('`\x01[^\x01]*\x01`', '', $a);} // No comment/CDATA sec | ||
462 | $mode = 0; $a = trim($a, ' /'); $aA = array(); | ||
463 | while(strlen($a)){ | ||
464 | $w = 0; | ||
465 | switch($mode){ | ||
466 | case 0: // Name | ||
467 | if(preg_match('`^[a-zA-Z][\-a-zA-Z:]+`', $a, $m)){ | ||
468 | $nm = strtolower($m[0]); | ||
469 | $w = $mode = 1; $a = ltrim(substr_replace($a, '', 0, strlen($m[0]))); | ||
470 | } | ||
471 | break; case 1: | ||
472 | if($a[0] == '='){ // = | ||
473 | $w = 1; $mode = 2; $a = ltrim($a, '= '); | ||
474 | }else{ // No val | ||
475 | $w = 1; $mode = 0; $a = ltrim($a); | ||
476 | $aA[$nm] = ''; | ||
477 | } | ||
478 | break; case 2: // Val | ||
479 | if(preg_match('`^((?:"[^"]*")|(?:\'[^\']*\')|(?:\s*[^\s"\']+))(.*)`', $a, $m)){ | ||
480 | $a = ltrim($m[2]); $m = $m[1]; $w = 1; $mode = 0; | ||
481 | $aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m); | ||
482 | } | ||
483 | break; | ||
484 | } | ||
485 | if($w == 0){ // Parse errs, deal with space, " & ' | ||
486 | $a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a); | ||
487 | $mode = 0; | ||
488 | } | ||
489 | } | ||
490 | if($mode == 1){$aA[$nm] = '';} | ||
491 | |||
492 | // clean attrs | ||
493 | global $S; | ||
494 | $rl = isset($S[$e]) ? $S[$e] : array(); | ||
495 | $a = array(); $nfr = 0; | ||
496 | foreach($aA as $k=>$v){ | ||
497 | if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e]))) && !isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])){ | ||
498 | if(isset($aNE[$k])){$v = $k;} | ||
499 | elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues | ||
500 | $v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v; | ||
501 | } | ||
502 | if($k == 'style' && !$C['style_pass']){ | ||
503 | if(false !== strpos($v, '&#')){ | ||
504 | static $sC = array(' '=>' ', ' '=>' ', 'E'=>'e', 'E'=>'e', 'e'=>'e', 'e'=>'e', 'X'=>'x', 'X'=>'x', 'x'=>'x', 'x'=>'x', 'P'=>'p', 'P'=>'p', 'p'=>'p', 'p'=>'p', 'S'=>'s', 'S'=>'s', 's'=>'s', 's'=>'s', 'I'=>'i', 'I'=>'i', 'i'=>'i', 'i'=>'i', 'O'=>'o', 'O'=>'o', 'o'=>'o', 'o'=>'o', 'N'=>'n', 'N'=>'n', 'n'=>'n', 'n'=>'n', 'U'=>'u', 'U'=>'u', 'u'=>'u', 'u'=>'u', 'R'=>'r', 'R'=>'r', 'r'=>'r', 'r'=>'r', 'L'=>'l', 'L'=>'l', 'l'=>'l', 'l'=>'l', '('=>'(', '('=>'(', ')'=>')', ')'=>')', ' '=>':', ' '=>':', '"'=>'"', '"'=>'"', '''=>"'", '''=>"'", '/'=>'/', '/'=>'/', '*'=>'*', '*'=>'*', '\'=>'\\', '\'=>'\\'); | ||
505 | $v = strtr($v, $sC); | ||
506 | } | ||
507 | $v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+?)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'htmLawed::hl_prot', $v); | ||
508 | $v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v; | ||
509 | }elseif(isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o'){ | ||
510 | $v = str_replace("\xad", ' ', (strpos($v, '&') !== false ? str_replace(array('­', '­', '­'), ' ', $v) : $v)); | ||
511 | $v = htmLawed::hl_prot($v, $k); | ||
512 | if($k == 'href'){ // X-spam | ||
513 | if($C['anti_mail_spam'] && strpos($v, 'mailto:') === 0){ | ||
514 | $v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v); | ||
515 | }elseif($C['anti_link_spam']){ | ||
516 | $r1 = $C['anti_link_spam'][1]; | ||
517 | if(!empty($r1) && preg_match($r1, $v)){continue;} | ||
518 | $r0 = $C['anti_link_spam'][0]; | ||
519 | if(!empty($r0) && preg_match($r0, $v)){ | ||
520 | if(isset($a['rel'])){ | ||
521 | if(!preg_match('`\bnofollow\b`i', $a['rel'])){$a['rel'] .= ' nofollow';} | ||
522 | }elseif(isset($aA['rel'])){ | ||
523 | if(!preg_match('`\bnofollow\b`i', $aA['rel'])){$nfr = 1;} | ||
524 | }else{$a['rel'] = 'nofollow';} | ||
525 | } | ||
526 | } | ||
527 | } | ||
528 | } | ||
529 | if(isset($rl[$k]) && is_array($rl[$k]) && ($v = htmLawed::hl_attrval($v, $rl[$k])) === 0){continue;} | ||
530 | $a[$k] = str_replace('"', '"', $v); | ||
531 | } | ||
532 | } | ||
533 | if($nfr){$a['rel'] = isset($a['rel']) ? $a['rel']. ' nofollow' : 'nofollow';} | ||
534 | |||
535 | // rqd attr | ||
536 | static $eAR = array('area'=>array('alt'=>'area'), 'bdo'=>array('dir'=>'ltr'), 'form'=>array('action'=>''), 'img'=>array('src'=>'', 'alt'=>'image'), 'map'=>array('name'=>''), 'optgroup'=>array('label'=>''), 'param'=>array('name'=>''), 'script'=>array('type'=>'text/javascript'), 'textarea'=>array('rows'=>'10', 'cols'=>'50')); | ||
537 | if(isset($eAR[$e])){ | ||
538 | foreach($eAR[$e] as $k=>$v){ | ||
539 | if(!isset($a[$k])){$a[$k] = isset($v[0]) ? $v : $k;} | ||
540 | } | ||
541 | } | ||
542 | |||
543 | // depr attrs | ||
544 | if($depTr){ | ||
545 | $c = array(); | ||
546 | foreach($a as $k=>$v){ | ||
547 | if($k == 'style' or !isset($aND[$k][$e])){continue;} | ||
548 | if($k == 'align'){ | ||
549 | unset($a['align']); | ||
550 | if($e == 'img' && ($v == 'left' or $v == 'right')){$c[] = 'float: '. $v;} | ||
551 | elseif(($e == 'div' or $e == 'table') && $v == 'center'){$c[] = 'margin: auto';} | ||
552 | else{$c[] = 'text-align: '. $v;} | ||
553 | }elseif($k == 'bgcolor'){ | ||
554 | unset($a['bgcolor']); | ||
555 | $c[] = 'background-color: '. $v; | ||
556 | }elseif($k == 'border'){ | ||
557 | unset($a['border']); $c[] = "border: {$v}px"; | ||
558 | }elseif($k == 'bordercolor'){ | ||
559 | unset($a['bordercolor']); $c[] = 'border-color: '. $v; | ||
560 | }elseif($k == 'clear'){ | ||
561 | unset($a['clear']); $c[] = 'clear: '. ($v != 'all' ? $v : 'both'); | ||
562 | }elseif($k == 'compact'){ | ||
563 | unset($a['compact']); $c[] = 'font-size: 85%'; | ||
564 | }elseif($k == 'height' or $k == 'width'){ | ||
565 | unset($a[$k]); $c[] = $k. ': '. ($v[0] != '*' ? $v. (ctype_digit($v) ? 'px' : '') : 'auto'); | ||
566 | }elseif($k == 'hspace'){ | ||
567 | unset($a['hspace']); $c[] = "margin-left: {$v}px; margin-right: {$v}px"; | ||
568 | }elseif($k == 'language' && !isset($a['type'])){ | ||
569 | unset($a['language']); | ||
570 | $a['type'] = 'text/'. strtolower($v); | ||
571 | }elseif($k == 'name'){ | ||
572 | if($C['no_deprecated_attr'] == 2 or ($e != 'a' && $e != 'map')){unset($a['name']);} | ||
573 | if(!isset($a['id']) && preg_match('`[a-zA-Z][a-zA-Z\d.:_\-]*`', $v)){$a['id'] = $v;} | ||
574 | }elseif($k == 'noshade'){ | ||
575 | unset($a['noshade']); $c[] = 'border-style: none; border: 0; background-color: gray; color: gray'; | ||
576 | }elseif($k == 'nowrap'){ | ||
577 | unset($a['nowrap']); $c[] = 'white-space: nowrap'; | ||
578 | }elseif($k == 'size'){ | ||
579 | unset($a['size']); $c[] = 'size: '. $v. 'px'; | ||
580 | }elseif($k == 'start' or $k == 'value'){ | ||
581 | unset($a[$k]); | ||
582 | }elseif($k == 'type'){ | ||
583 | unset($a['type']); | ||
584 | static $ol_type = array('i'=>'lower-roman', 'I'=>'upper-roman', 'a'=>'lower-latin', 'A'=>'upper-latin', '1'=>'decimal'); | ||
585 | $c[] = 'list-style-type: '. (isset($ol_type[$v]) ? $ol_type[$v] : 'decimal'); | ||
586 | }elseif($k == 'vspace'){ | ||
587 | unset($a['vspace']); $c[] = "margin-top: {$v}px; margin-bottom: {$v}px"; | ||
588 | } | ||
589 | } | ||
590 | if(count($c)){ | ||
591 | $c = implode('; ', $c); | ||
592 | $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $c. ';': $c. ';'; | ||
593 | } | ||
594 | } | ||
595 | // unique ID | ||
596 | if($C['unique_ids'] && isset($a['id'])){ | ||
597 | if(!preg_match('`^[A-Za-z][A-Za-z0-9_\-.:]*$`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && $C['unique_ids'] == 1)){unset($a['id']); | ||
598 | }else{ | ||
599 | while(isset($GLOBALS['hl_Ids'][$id])){$id = $C['unique_ids']. $id;} | ||
600 | $GLOBALS['hl_Ids'][($a['id'] = $id)] = 1; | ||
601 | } | ||
602 | } | ||
603 | // xml:lang | ||
604 | if($C['xml:lang'] && isset($a['lang'])){ | ||
605 | $a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang']; | ||
606 | if($C['xml:lang'] == 2){unset($a['lang']);} | ||
607 | } | ||
608 | // for transformed tag | ||
609 | if(!empty($trt)){ | ||
610 | $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $trt : $trt; | ||
611 | } | ||
612 | // return with empty ele / | ||
613 | if(empty($C['hook_tag'])){ | ||
614 | $aA = ''; | ||
615 | foreach($a as $k=>$v){$aA .= " {$k}=\"{$v}\"";} | ||
616 | return "<{$e}{$aA}". (isset($eE[$e]) ? ' /' : ''). '>'; | ||
617 | } | ||
618 | else{return $C['hook_tag']($e, $a);} | ||
619 | // eof | ||
620 | } | ||
621 | |||
622 | public static function hl_tag2(&$e, &$a, $t=1){ | ||
623 | // transform tag | ||
624 | if($e == 'center'){$e = 'div'; return 'text-align: center;';} | ||
625 | if($e == 'dir' or $e == 'menu'){$e = 'ul'; return '';} | ||
626 | if($e == 's' or $e == 'strike'){$e = 'span'; return 'text-decoration: line-through;';} | ||
627 | if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';} | ||
628 | static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%'); | ||
629 | if($e == 'font'){ | ||
630 | $a2 = ''; | ||
631 | if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){ | ||
632 | $a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';'; | ||
633 | } | ||
634 | if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){ | ||
635 | $a2 .= ' color: '. trim($m[2]). ';'; | ||
636 | } | ||
637 | if(preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])){ | ||
638 | $a2 .= ' font-size: '. $fs[$m]. ';'; | ||
639 | } | ||
640 | $e = 'span'; return ltrim($a2); | ||
641 | } | ||
642 | if($t == 2){$e = 0; return 0;} | ||
643 | return ''; | ||
644 | // eof | ||
645 | } | ||
646 | |||
647 | public static function hl_tidy($t, $w, $p){ | ||
648 | // Tidy/compact HTM | ||
649 | if(strpos(' pre,script,textarea', "$p,")){return $t;} | ||
650 | $t = str_replace(' </', '</', preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); | ||
651 | if(($w = strtolower($w)) == -1){ | ||
652 | return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); | ||
653 | } | ||
654 | $s = strpos(" $w", 't') ? "\t" : ' '; | ||
655 | $s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2)); | ||
656 | $N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; | ||
657 | $a = array('br'=>1); | ||
658 | $b = array('button'=>1, 'input'=>1, 'option'=>1); | ||
659 | $c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1); | ||
660 | $d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); | ||
661 | $T = explode('<', $t); | ||
662 | $X = 1; | ||
663 | while($X){ | ||
664 | $n = $N; | ||
665 | $t = $T; | ||
666 | ob_start(); | ||
667 | if(isset($d[$p])){echo str_repeat($s, ++$n);} | ||
668 | echo ltrim(array_shift($t)); | ||
669 | for($i=-1, $j=count($t); ++$i<$j;){ | ||
670 | $r = ''; list($e, $r) = explode('>', $t[$i]); | ||
671 | $x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1)); | ||
672 | $y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0); | ||
673 | $e = "<$e>"; | ||
674 | if(isset($d[$y])){ | ||
675 | if(!$x){ | ||
676 | if($n){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);} | ||
677 | else{++$N; ob_end_clean(); continue 2;} | ||
678 | } | ||
679 | else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));} | ||
680 | echo ltrim($r); continue; | ||
681 | } | ||
682 | $f = "\n". str_repeat($s, $n); | ||
683 | if(isset($c[$y])){ | ||
684 | if(!$x){echo $e, $f, ltrim($r);} | ||
685 | else{echo $f, $e, $r;} | ||
686 | }elseif(isset($b[$y])){echo $f, $e, $r; | ||
687 | }elseif(isset($a[$y])){echo $e, $f, ltrim($r); | ||
688 | }elseif(!$y){echo $f, $e, $f, ltrim($r); | ||
689 | }else{echo $e, $r;} | ||
690 | } | ||
691 | $X = 0; | ||
692 | } | ||
693 | $t = preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents()); | ||
694 | ob_end_clean(); | ||
695 | if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){ | ||
696 | $t = str_replace("\n", $l, $t); | ||
697 | } | ||
698 | return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); | ||
699 | // eof | ||
700 | } | ||
701 | |||
702 | public static function hl_version(){ | ||
703 | // rel | ||
704 | return '1.1.14'; | ||
705 | // eof | ||
706 | } | ||
707 | |||
708 | public static function kses($t, $h, $p=array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto')){ | ||
709 | // kses compat | ||
710 | foreach($h as $k=>$v){ | ||
711 | $h[$k]['n']['*'] = 1; | ||
712 | } | ||
713 | $C['cdata'] = $C['comment'] = $C['make_tag_strict'] = $C['no_deprecated_attr'] = $C['unique_ids'] = 0; | ||
714 | $C['keep_bad'] = 1; | ||
715 | $C['elements'] = count($h) ? strtolower(implode(',', array_keys($h))) : '-*'; | ||
716 | $C['hook'] = 'htmLawed::kses_hook'; | ||
717 | $C['schemes'] = '*:'. implode(',', $p); | ||
718 | return htmLawed::hl($t, $C, $h); | ||
719 | // eof | ||
720 | } | ||
721 | |||
722 | public static function kses_hook($t, &$C, &$S){ | ||
723 | // kses compat | ||
724 | return $t; | ||
725 | // eof | ||
726 | } | ||
727 | // end class | ||
728 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/html5/Data.php b/inc/3rdparty/libraries/html5/Data.php new file mode 100644 index 00000000..497345f4 --- /dev/null +++ b/inc/3rdparty/libraries/html5/Data.php | |||
@@ -0,0 +1,114 @@ | |||
1 | <?php | ||
2 | |||
3 | // warning: this file is encoded in UTF-8! | ||
4 | |||
5 | class HTML5_Data | ||
6 | { | ||
7 | |||
8 | // at some point this should be moved to a .ser file. Another | ||
9 | // possible optimization is to give UTF-8 bytes, not Unicode | ||
10 | // codepoints | ||
11 | // XXX: Not quite sure why it's named this; this is | ||
12 | // actually the numeric entity dereference table. | ||
13 | protected static $realCodepointTable = array( | ||
14 | 0x00 => 0xFFFD, // REPLACEMENT CHARACTER | ||
15 | 0x0D => 0x000A, // LINE FEED (LF) | ||
16 | 0x80 => 0x20AC, // EURO SIGN ('€') | ||
17 | 0x81 => 0x0081, // <control> | ||
18 | 0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK ('‚') | ||
19 | 0x83 => 0x0192, // LATIN SMALL LETTER F WITH HOOK ('Æ’') | ||
20 | 0x84 => 0x201E, // DOUBLE LOW-9 QUOTATION MARK ('„') | ||
21 | 0x85 => 0x2026, // HORIZONTAL ELLIPSIS ('…') | ||
22 | 0x86 => 0x2020, // DAGGER ('†') | ||
23 | 0x87 => 0x2021, // DOUBLE DAGGER ('‡') | ||
24 | 0x88 => 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ') | ||
25 | 0x89 => 0x2030, // PER MILLE SIGN ('‰') | ||
26 | 0x8A => 0x0160, // LATIN CAPITAL LETTER S WITH CARON ('Å ') | ||
27 | 0x8B => 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹') | ||
28 | 0x8C => 0x0152, // LATIN CAPITAL LIGATURE OE ('Å’') | ||
29 | 0x8D => 0x008D, // <control> | ||
30 | 0x8E => 0x017D, // LATIN CAPITAL LETTER Z WITH CARON ('Ž') | ||
31 | 0x8F => 0x008F, // <control> | ||
32 | 0x90 => 0x0090, // <control> | ||
33 | 0x91 => 0x2018, // LEFT SINGLE QUOTATION MARK ('‘') | ||
34 | 0x92 => 0x2019, // RIGHT SINGLE QUOTATION MARK ('’') | ||
35 | 0x93 => 0x201C, // LEFT DOUBLE QUOTATION MARK ('“') | ||
36 | 0x94 => 0x201D, // RIGHT DOUBLE QUOTATION MARK ('â€') | ||
37 | 0x95 => 0x2022, // BULLET ('•') | ||
38 | 0x96 => 0x2013, // EN DASH ('–') | ||
39 | 0x97 => 0x2014, // EM DASH ('—') | ||
40 | 0x98 => 0x02DC, // SMALL TILDE ('˜') | ||
41 | 0x99 => 0x2122, // TRADE MARK SIGN ('â„¢') | ||
42 | 0x9A => 0x0161, // LATIN SMALL LETTER S WITH CARON ('Å¡') | ||
43 | 0x9B => 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›') | ||
44 | 0x9C => 0x0153, // LATIN SMALL LIGATURE OE ('Å“') | ||
45 | 0x9D => 0x009D, // <control> | ||
46 | 0x9E => 0x017E, // LATIN SMALL LETTER Z WITH CARON ('ž') | ||
47 | 0x9F => 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ') | ||
48 | ); | ||
49 | |||
50 | protected static $namedCharacterReferences; | ||
51 | |||
52 | protected static $namedCharacterReferenceMaxLength; | ||
53 | |||
54 | /** | ||
55 | * Returns the "real" Unicode codepoint of a malformed character | ||
56 | * reference. | ||
57 | */ | ||
58 | public static function getRealCodepoint($ref) { | ||
59 | if (!isset(self::$realCodepointTable[$ref])) return false; | ||
60 | else return self::$realCodepointTable[$ref]; | ||
61 | } | ||
62 | |||
63 | public static function getNamedCharacterReferences() { | ||
64 | if (!self::$namedCharacterReferences) { | ||
65 | self::$namedCharacterReferences = unserialize( | ||
66 | file_get_contents(dirname(__FILE__) . '/named-character-references.ser')); | ||
67 | } | ||
68 | return self::$namedCharacterReferences; | ||
69 | } | ||
70 | |||
71 | /** | ||
72 | * Converts a Unicode codepoint to sequence of UTF-8 bytes. | ||
73 | * @note Shamelessly stolen from HTML Purifier, which is also | ||
74 | * shamelessly stolen from Feyd (which is in public domain). | ||
75 | */ | ||
76 | public static function utf8chr($code) { | ||
77 | /* We don't care: we live dangerously | ||
78 | * if($code > 0x10FFFF or $code < 0x0 or | ||
79 | ($code >= 0xD800 and $code <= 0xDFFF) ) { | ||
80 | // bits are set outside the "valid" range as defined | ||
81 | // by UNICODE 4.1.0 | ||
82 | return "\xEF\xBF\xBD"; | ||
83 | }*/ | ||
84 | |||
85 | $x = $y = $z = $w = 0; | ||
86 | if ($code < 0x80) { | ||
87 | // regular ASCII character | ||
88 | $x = $code; | ||
89 | } else { | ||
90 | // set up bits for UTF-8 | ||
91 | $x = ($code & 0x3F) | 0x80; | ||
92 | if ($code < 0x800) { | ||
93 | $y = (($code & 0x7FF) >> 6) | 0xC0; | ||
94 | } else { | ||
95 | $y = (($code & 0xFC0) >> 6) | 0x80; | ||
96 | if($code < 0x10000) { | ||
97 | $z = (($code >> 12) & 0x0F) | 0xE0; | ||
98 | } else { | ||
99 | $z = (($code >> 12) & 0x3F) | 0x80; | ||
100 | $w = (($code >> 18) & 0x07) | 0xF0; | ||
101 | } | ||
102 | } | ||
103 | } | ||
104 | // set up the actual character | ||
105 | $ret = ''; | ||
106 | if($w) $ret .= chr($w); | ||
107 | if($z) $ret .= chr($z); | ||
108 | if($y) $ret .= chr($y); | ||
109 | $ret .= chr($x); | ||
110 | |||
111 | return $ret; | ||
112 | } | ||
113 | |||
114 | } | ||
diff --git a/inc/3rdparty/libraries/html5/InputStream.php b/inc/3rdparty/libraries/html5/InputStream.php new file mode 100644 index 00000000..f98b4272 --- /dev/null +++ b/inc/3rdparty/libraries/html5/InputStream.php | |||
@@ -0,0 +1,284 @@ | |||
1 | <?php | ||
2 | |||
3 | /* | ||
4 | |||
5 | Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/> | ||
6 | |||
7 | Permission is hereby granted, free of charge, to any person obtaining a | ||
8 | copy of this software and associated documentation files (the | ||
9 | "Software"), to deal in the Software without restriction, including | ||
10 | without limitation the rights to use, copy, modify, merge, publish, | ||
11 | distribute, sublicense, and/or sell copies of the Software, and to | ||
12 | permit persons to whom the Software is furnished to do so, subject to | ||
13 | the following conditions: | ||
14 | |||
15 | The above copyright notice and this permission notice shall be included | ||
16 | in all copies or substantial portions of the Software. | ||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
19 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
21 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
23 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
24 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
25 | |||
26 | */ | ||
27 | |||
28 | // Some conventions: | ||
29 | // /* */ indicates verbatim text from the HTML 5 specification | ||
30 | // // indicates regular comments | ||
31 | |||
32 | class HTML5_InputStream { | ||
33 | /** | ||
34 | * The string data we're parsing. | ||
35 | */ | ||
36 | private $data; | ||
37 | |||
38 | /** | ||
39 | * The current integer byte position we are in $data | ||
40 | */ | ||
41 | private $char; | ||
42 | |||
43 | /** | ||
44 | * Length of $data; when $char === $data, we are at the end-of-file. | ||
45 | */ | ||
46 | private $EOF; | ||
47 | |||
48 | /** | ||
49 | * Parse errors. | ||
50 | */ | ||
51 | public $errors = array(); | ||
52 | |||
53 | /** | ||
54 | * @param $data Data to parse | ||
55 | */ | ||
56 | public function __construct($data) { | ||
57 | |||
58 | /* Given an encoding, the bytes in the input stream must be | ||
59 | converted to Unicode characters for the tokeniser, as | ||
60 | described by the rules for that encoding, except that the | ||
61 | leading U+FEFF BYTE ORDER MARK character, if any, must not | ||
62 | be stripped by the encoding layer (it is stripped by the rule below). | ||
63 | |||
64 | Bytes or sequences of bytes in the original byte stream that | ||
65 | could not be converted to Unicode characters must be converted | ||
66 | to U+FFFD REPLACEMENT CHARACTER code points. */ | ||
67 | |||
68 | // XXX currently assuming input data is UTF-8; once we | ||
69 | // build encoding detection this will no longer be the case | ||
70 | // | ||
71 | // We previously had an mbstring implementation here, but that | ||
72 | // implementation is heavily non-conforming, so it's been | ||
73 | // omitted. | ||
74 | if (extension_loaded('iconv')) { | ||
75 | // non-conforming | ||
76 | $data = @iconv('UTF-8', 'UTF-8//IGNORE', $data); | ||
77 | } else { | ||
78 | // we can make a conforming native implementation | ||
79 | throw new Exception('Not implemented, please install mbstring or iconv'); | ||
80 | } | ||
81 | |||
82 | /* One leading U+FEFF BYTE ORDER MARK character must be | ||
83 | ignored if any are present. */ | ||
84 | if (substr($data, 0, 3) === "\xEF\xBB\xBF") { | ||
85 | $data = substr($data, 3); | ||
86 | } | ||
87 | |||
88 | /* All U+0000 NULL characters in the input must be replaced | ||
89 | by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such | ||
90 | characters is a parse error. */ | ||
91 | for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i++) { | ||
92 | $this->errors[] = array( | ||
93 | 'type' => HTML5_Tokenizer::PARSEERROR, | ||
94 | 'data' => 'null-character' | ||
95 | ); | ||
96 | } | ||
97 | /* U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED | ||
98 | (LF) characters are treated specially. Any CR characters | ||
99 | that are followed by LF characters must be removed, and any | ||
100 | CR characters not followed by LF characters must be converted | ||
101 | to LF characters. Thus, newlines in HTML DOMs are represented | ||
102 | by LF characters, and there are never any CR characters in the | ||
103 | input to the tokenization stage. */ | ||
104 | $data = str_replace( | ||
105 | array( | ||
106 | "\0", | ||
107 | "\r\n", | ||
108 | "\r" | ||
109 | ), | ||
110 | array( | ||
111 | "\xEF\xBF\xBD", | ||
112 | "\n", | ||
113 | "\n" | ||
114 | ), | ||
115 | $data | ||
116 | ); | ||
117 | |||
118 | /* Any occurrences of any characters in the ranges U+0001 to | ||
119 | U+0008, U+000B, U+000E to U+001F, U+007F to U+009F, | ||
120 | U+D800 to U+DFFF , U+FDD0 to U+FDEF, and | ||
121 | characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, | ||
122 | U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE, | ||
123 | U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF, | ||
124 | U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE, | ||
125 | U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and | ||
126 | U+10FFFF are parse errors. (These are all control characters | ||
127 | or permanently undefined Unicode characters.) */ | ||
128 | // Check PCRE is loaded. | ||
129 | if (extension_loaded('pcre')) { | ||
130 | $count = preg_match_all( | ||
131 | '/(?: | ||
132 | [\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B, U+000E to U+001F and U+007F | ||
133 | | | ||
134 | \xC2[\x80-\x9F] # U+0080 to U+009F | ||
135 | | | ||
136 | \xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF | ||
137 | | | ||
138 | \xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF | ||
139 | | | ||
140 | \xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF | ||
141 | | | ||
142 | [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16}) | ||
143 | )/x', | ||
144 | $data, | ||
145 | $matches | ||
146 | ); | ||
147 | for ($i = 0; $i < $count; $i++) { | ||
148 | $this->errors[] = array( | ||
149 | 'type' => HTML5_Tokenizer::PARSEERROR, | ||
150 | 'data' => 'invalid-codepoint' | ||
151 | ); | ||
152 | } | ||
153 | } else { | ||
154 | // XXX: Need non-PCRE impl, probably using substr_count | ||
155 | } | ||
156 | |||
157 | $this->data = $data; | ||
158 | $this->char = 0; | ||
159 | $this->EOF = strlen($data); | ||
160 | } | ||
161 | |||
162 | /** | ||
163 | * Returns the current line that the tokenizer is at. | ||
164 | */ | ||
165 | public function getCurrentLine() { | ||
166 | // Check the string isn't empty | ||
167 | if($this->EOF) { | ||
168 | // Add one to $this->char because we want the number for the next | ||
169 | // byte to be processed. | ||
170 | return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1; | ||
171 | } else { | ||
172 | // If the string is empty, we are on the first line (sorta). | ||
173 | return 1; | ||
174 | } | ||
175 | } | ||
176 | |||
177 | /** | ||
178 | * Returns the current column of the current line that the tokenizer is at. | ||
179 | */ | ||
180 | public function getColumnOffset() { | ||
181 | // strrpos is weird, and the offset needs to be negative for what we | ||
182 | // want (i.e., the last \n before $this->char). This needs to not have | ||
183 | // one (to make it point to the next character, the one we want the | ||
184 | // position of) added to it because strrpos's behaviour includes the | ||
185 | // final offset byte. | ||
186 | $lastLine = strrpos($this->data, "\n", $this->char - 1 - strlen($this->data)); | ||
187 | |||
188 | // However, for here we want the length up until the next byte to be | ||
189 | // processed, so add one to the current byte ($this->char). | ||
190 | if($lastLine !== false) { | ||
191 | $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine); | ||
192 | } else { | ||
193 | $findLengthOf = substr($this->data, 0, $this->char); | ||
194 | } | ||
195 | |||
196 | // Get the length for the string we need. | ||
197 | if(extension_loaded('iconv')) { | ||
198 | return iconv_strlen($findLengthOf, 'utf-8'); | ||
199 | } elseif(extension_loaded('mbstring')) { | ||
200 | return mb_strlen($findLengthOf, 'utf-8'); | ||
201 | } elseif(extension_loaded('xml')) { | ||
202 | return strlen(utf8_decode($findLengthOf)); | ||
203 | } else { | ||
204 | $count = count_chars($findLengthOf); | ||
205 | // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range) | ||
206 | // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range) | ||
207 | return array_sum(array_slice($count, 0, 0x80)) + | ||
208 | array_sum(array_slice($count, 0xC2, 0x33)); | ||
209 | } | ||
210 | } | ||
211 | |||
212 | /** | ||
213 | * Retrieve the currently consume character. | ||
214 | * @note This performs bounds checking | ||
215 | */ | ||
216 | public function char() { | ||
217 | return ($this->char++ < $this->EOF) | ||
218 | ? $this->data[$this->char - 1] | ||
219 | : false; | ||
220 | } | ||
221 | |||
222 | /** | ||
223 | * Get all characters until EOF. | ||
224 | * @note This performs bounds checking | ||
225 | */ | ||
226 | public function remainingChars() { | ||
227 | if($this->char < $this->EOF) { | ||
228 | $data = substr($this->data, $this->char); | ||
229 | $this->char = $this->EOF; | ||
230 | return $data; | ||
231 | } else { | ||
232 | return false; | ||
233 | } | ||
234 | } | ||
235 | |||
236 | /** | ||
237 | * Matches as far as possible until we reach a certain set of bytes | ||
238 | * and returns the matched substring. | ||
239 | * @param $bytes Bytes to match. | ||
240 | */ | ||
241 | public function charsUntil($bytes, $max = null) { | ||
242 | if ($this->char < $this->EOF) { | ||
243 | if ($max === 0 || $max) { | ||
244 | $len = strcspn($this->data, $bytes, $this->char, $max); | ||
245 | } else { | ||
246 | $len = strcspn($this->data, $bytes, $this->char); | ||
247 | } | ||
248 | $string = (string) substr($this->data, $this->char, $len); | ||
249 | $this->char += $len; | ||
250 | return $string; | ||
251 | } else { | ||
252 | return false; | ||
253 | } | ||
254 | } | ||
255 | |||
256 | /** | ||
257 | * Matches as far as possible with a certain set of bytes | ||
258 | * and returns the matched substring. | ||
259 | * @param $bytes Bytes to match. | ||
260 | */ | ||
261 | public function charsWhile($bytes, $max = null) { | ||
262 | if ($this->char < $this->EOF) { | ||
263 | if ($max === 0 || $max) { | ||
264 | $len = strspn($this->data, $bytes, $this->char, $max); | ||
265 | } else { | ||
266 | $len = strspn($this->data, $bytes, $this->char); | ||
267 | } | ||
268 | $string = (string) substr($this->data, $this->char, $len); | ||
269 | $this->char += $len; | ||
270 | return $string; | ||
271 | } else { | ||
272 | return false; | ||
273 | } | ||
274 | } | ||
275 | |||
276 | /** | ||
277 | * Unconsume one character. | ||
278 | */ | ||
279 | public function unget() { | ||
280 | if ($this->char <= $this->EOF) { | ||
281 | $this->char--; | ||
282 | } | ||
283 | } | ||
284 | } | ||
diff --git a/inc/3rdparty/libraries/html5/Parser.php b/inc/3rdparty/libraries/html5/Parser.php new file mode 100644 index 00000000..5f9ca560 --- /dev/null +++ b/inc/3rdparty/libraries/html5/Parser.php | |||
@@ -0,0 +1,36 @@ | |||
1 | <?php | ||
2 | |||
3 | require_once dirname(__FILE__) . '/Data.php'; | ||
4 | require_once dirname(__FILE__) . '/InputStream.php'; | ||
5 | require_once dirname(__FILE__) . '/TreeBuilder.php'; | ||
6 | require_once dirname(__FILE__) . '/Tokenizer.php'; | ||
7 | |||
8 | /** | ||
9 | * Outwards facing interface for HTML5. | ||
10 | */ | ||
11 | class HTML5_Parser | ||
12 | { | ||
13 | /** | ||
14 | * Parses a full HTML document. | ||
15 | * @param $text HTML text to parse | ||
16 | * @param $builder Custom builder implementation | ||
17 | * @return Parsed HTML as DOMDocument | ||
18 | */ | ||
19 | static public function parse($text, $builder = null) { | ||
20 | $tokenizer = new HTML5_Tokenizer($text, $builder); | ||
21 | $tokenizer->parse(); | ||
22 | return $tokenizer->save(); | ||
23 | } | ||
24 | /** | ||
25 | * Parses an HTML fragment. | ||
26 | * @param $text HTML text to parse | ||
27 | * @param $context String name of context element to pretend parsing is in. | ||
28 | * @param $builder Custom builder implementation | ||
29 | * @return Parsed HTML as DOMDocument | ||
30 | */ | ||
31 | static public function parseFragment($text, $context = null, $builder = null) { | ||
32 | $tokenizer = new HTML5_Tokenizer($text, $builder); | ||
33 | $tokenizer->parseFragment($context); | ||
34 | return $tokenizer->save(); | ||
35 | } | ||
36 | } | ||
diff --git a/inc/3rdparty/libraries/html5/Tokenizer.php b/inc/3rdparty/libraries/html5/Tokenizer.php new file mode 100644 index 00000000..0af07164 --- /dev/null +++ b/inc/3rdparty/libraries/html5/Tokenizer.php | |||
@@ -0,0 +1,2422 @@ | |||
1 | <?php | ||
2 | |||
3 | /* | ||
4 | |||
5 | Copyright 2007 Jeroen van der Meer <http://jero.net/> | ||
6 | Copyright 2008 Edward Z. Yang <http://htmlpurifier.org/> | ||
7 | Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/> | ||
8 | |||
9 | Permission is hereby granted, free of charge, to any person obtaining a | ||
10 | copy of this software and associated documentation files (the | ||
11 | "Software"), to deal in the Software without restriction, including | ||
12 | without limitation the rights to use, copy, modify, merge, publish, | ||
13 | distribute, sublicense, and/or sell copies of the Software, and to | ||
14 | permit persons to whom the Software is furnished to do so, subject to | ||
15 | the following conditions: | ||
16 | |||
17 | The above copyright notice and this permission notice shall be included | ||
18 | in all copies or substantial portions of the Software. | ||
19 | |||
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
21 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
22 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
23 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
24 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
25 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
26 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
27 | |||
28 | */ | ||
29 | |||
30 | // Some conventions: | ||
31 | // /* */ indicates verbatim text from the HTML 5 specification | ||
32 | // // indicates regular comments | ||
33 | |||
34 | // all flags are in hyphenated form | ||
35 | |||
36 | class HTML5_Tokenizer { | ||
37 | /** | ||
38 | * Points to an InputStream object. | ||
39 | */ | ||
40 | protected $stream; | ||
41 | |||
42 | /** | ||
43 | * Tree builder that the tokenizer emits token to. | ||
44 | */ | ||
45 | private $tree; | ||
46 | |||
47 | /** | ||
48 | * Current content model we are parsing as. | ||
49 | */ | ||
50 | protected $content_model; | ||
51 | |||
52 | /** | ||
53 | * Current token that is being built, but not yet emitted. Also | ||
54 | * is the last token emitted, if applicable. | ||
55 | */ | ||
56 | protected $token; | ||
57 | |||
58 | // These are constants describing the content model | ||
59 | const PCDATA = 0; | ||
60 | const RCDATA = 1; | ||
61 | const CDATA = 2; | ||
62 | const PLAINTEXT = 3; | ||
63 | |||
64 | // These are constants describing tokens | ||
65 | // XXX should probably be moved somewhere else, probably the | ||
66 | // HTML5 class. | ||
67 | const DOCTYPE = 0; | ||
68 | const STARTTAG = 1; | ||
69 | const ENDTAG = 2; | ||
70 | const COMMENT = 3; | ||
71 | const CHARACTER = 4; | ||
72 | const SPACECHARACTER = 5; | ||
73 | const EOF = 6; | ||
74 | const PARSEERROR = 7; | ||
75 | |||
76 | // These are constants representing bunches of characters. | ||
77 | const ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'; | ||
78 | const UPPER_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; | ||
79 | const LOWER_ALPHA = 'abcdefghijklmnopqrstuvwxyz'; | ||
80 | const DIGIT = '0123456789'; | ||
81 | const HEX = '0123456789ABCDEFabcdef'; | ||
82 | const WHITESPACE = "\t\n\x0c "; | ||
83 | |||
84 | /** | ||
85 | * @param $data Data to parse | ||
86 | */ | ||
87 | public function __construct($data, $builder = null) { | ||
88 | $this->stream = new HTML5_InputStream($data); | ||
89 | if (!$builder) $this->tree = new HTML5_TreeBuilder; | ||
90 | else $this->tree = $builder; | ||
91 | $this->content_model = self::PCDATA; | ||
92 | } | ||
93 | |||
94 | public function parseFragment($context = null) { | ||
95 | $this->tree->setupContext($context); | ||
96 | if ($this->tree->content_model) { | ||
97 | $this->content_model = $this->tree->content_model; | ||
98 | $this->tree->content_model = null; | ||
99 | } | ||
100 | $this->parse(); | ||
101 | } | ||
102 | |||
103 | // XXX maybe convert this into an iterator? regardless, this function | ||
104 | // and the save function should go into a Parser facade of some sort | ||
105 | /** | ||
106 | * Performs the actual parsing of the document. | ||
107 | */ | ||
108 | public function parse() { | ||
109 | // Current state | ||
110 | $state = 'data'; | ||
111 | // This is used to avoid having to have look-behind in the data state. | ||
112 | $lastFourChars = ''; | ||
113 | /** | ||
114 | * Escape flag as specified by the HTML5 specification: "used to | ||
115 | * control the behavior of the tokeniser. It is either true or | ||
116 | * false, and initially must be set to the false state." | ||
117 | */ | ||
118 | $escape = false; | ||
119 | //echo "\n\n"; | ||
120 | while($state !== null) { | ||
121 | |||
122 | /*echo $state . ' '; | ||
123 | switch ($this->content_model) { | ||
124 | case self::PCDATA: echo 'PCDATA'; break; | ||
125 | case self::RCDATA: echo 'RCDATA'; break; | ||
126 | case self::CDATA: echo 'CDATA'; break; | ||
127 | case self::PLAINTEXT: echo 'PLAINTEXT'; break; | ||
128 | } | ||
129 | if ($escape) echo " escape"; | ||
130 | echo "\n";*/ | ||
131 | |||
132 | switch($state) { | ||
133 | case 'data': | ||
134 | |||
135 | /* Consume the next input character */ | ||
136 | $char = $this->stream->char(); | ||
137 | $lastFourChars .= $char; | ||
138 | if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4); | ||
139 | |||
140 | // see below for meaning | ||
141 | $hyp_cond = | ||
142 | !$escape && | ||
143 | ( | ||
144 | $this->content_model === self::RCDATA || | ||
145 | $this->content_model === self::CDATA | ||
146 | ); | ||
147 | $amp_cond = | ||
148 | !$escape && | ||
149 | ( | ||
150 | $this->content_model === self::PCDATA || | ||
151 | $this->content_model === self::RCDATA | ||
152 | ); | ||
153 | $lt_cond = | ||
154 | $this->content_model === self::PCDATA || | ||
155 | ( | ||
156 | ( | ||
157 | $this->content_model === self::RCDATA || | ||
158 | $this->content_model === self::CDATA | ||
159 | ) && | ||
160 | !$escape | ||
161 | ); | ||
162 | $gt_cond = | ||
163 | $escape && | ||
164 | ( | ||
165 | $this->content_model === self::RCDATA || | ||
166 | $this->content_model === self::CDATA | ||
167 | ); | ||
168 | |||
169 | if($char === '&' && $amp_cond) { | ||
170 | /* U+0026 AMPERSAND (&) | ||
171 | When the content model flag is set to one of the PCDATA or RCDATA | ||
172 | states and the escape flag is false: switch to the | ||
173 | character reference data state. Otherwise: treat it as per | ||
174 | the "anything else" entry below. */ | ||
175 | $state = 'character reference data'; | ||
176 | |||
177 | } elseif( | ||
178 | $char === '-' && | ||
179 | $hyp_cond && | ||
180 | $lastFourChars === '<!--' | ||
181 | ) { | ||
182 | /* | ||
183 | U+002D HYPHEN-MINUS (-) | ||
184 | If the content model flag is set to either the RCDATA state or | ||
185 | the CDATA state, and the escape flag is false, and there are at | ||
186 | least three characters before this one in the input stream, and the | ||
187 | last four characters in the input stream, including this one, are | ||
188 | U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, | ||
189 | and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */ | ||
190 | $escape = true; | ||
191 | |||
192 | /* In any case, emit the input character as a character token. Stay | ||
193 | in the data state. */ | ||
194 | $this->emitToken(array( | ||
195 | 'type' => self::CHARACTER, | ||
196 | 'data' => '-' | ||
197 | )); | ||
198 | // We do the "any case" part as part of "anything else". | ||
199 | |||
200 | /* U+003C LESS-THAN SIGN (<) */ | ||
201 | } elseif($char === '<' && $lt_cond) { | ||
202 | /* When the content model flag is set to the PCDATA state: switch | ||
203 | to the tag open state. | ||
204 | |||
205 | When the content model flag is set to either the RCDATA state or | ||
206 | the CDATA state and the escape flag is false: switch to the tag | ||
207 | open state. | ||
208 | |||
209 | Otherwise: treat it as per the "anything else" entry below. */ | ||
210 | $state = 'tag open'; | ||
211 | |||
212 | /* U+003E GREATER-THAN SIGN (>) */ | ||
213 | } elseif( | ||
214 | $char === '>' && | ||
215 | $gt_cond && | ||
216 | substr($lastFourChars, 1) === '-->' | ||
217 | ) { | ||
218 | /* If the content model flag is set to either the RCDATA state or | ||
219 | the CDATA state, and the escape flag is true, and the last three | ||
220 | characters in the input stream including this one are U+002D | ||
221 | HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"), | ||
222 | set the escape flag to false. */ | ||
223 | $escape = false; | ||
224 | |||
225 | /* In any case, emit the input character as a character token. | ||
226 | Stay in the data state. */ | ||
227 | $this->emitToken(array( | ||
228 | 'type' => self::CHARACTER, | ||
229 | 'data' => '>' | ||
230 | )); | ||
231 | // We do the "any case" part as part of "anything else". | ||
232 | |||
233 | } elseif($char === false) { | ||
234 | /* EOF | ||
235 | Emit an end-of-file token. */ | ||
236 | $state = null; | ||
237 | $this->tree->emitToken(array( | ||
238 | 'type' => self::EOF | ||
239 | )); | ||
240 | |||
241 | } elseif($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
242 | // Directly after emitting a token you switch back to the "data | ||
243 | // state". At that point spaceCharacters are important so they are | ||
244 | // emitted separately. | ||
245 | $chars = $this->stream->charsWhile(self::WHITESPACE); | ||
246 | $this->emitToken(array( | ||
247 | 'type' => self::SPACECHARACTER, | ||
248 | 'data' => $char . $chars | ||
249 | )); | ||
250 | $lastFourChars .= $chars; | ||
251 | if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4); | ||
252 | |||
253 | } else { | ||
254 | /* Anything else | ||
255 | THIS IS AN OPTIMIZATION: Get as many character that | ||
256 | otherwise would also be treated as a character token and emit it | ||
257 | as a single character token. Stay in the data state. */ | ||
258 | |||
259 | $mask = ''; | ||
260 | if ($hyp_cond) $mask .= '-'; | ||
261 | if ($amp_cond) $mask .= '&'; | ||
262 | if ($lt_cond) $mask .= '<'; | ||
263 | if ($gt_cond) $mask .= '>'; | ||
264 | |||
265 | if ($mask === '') { | ||
266 | $chars = $this->stream->remainingChars(); | ||
267 | } else { | ||
268 | $chars = $this->stream->charsUntil($mask); | ||
269 | } | ||
270 | |||
271 | $this->emitToken(array( | ||
272 | 'type' => self::CHARACTER, | ||
273 | 'data' => $char . $chars | ||
274 | )); | ||
275 | |||
276 | $lastFourChars .= $chars; | ||
277 | if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4); | ||
278 | |||
279 | $state = 'data'; | ||
280 | } | ||
281 | break; | ||
282 | |||
283 | case 'character reference data': | ||
284 | /* (This cannot happen if the content model flag | ||
285 | is set to the CDATA state.) */ | ||
286 | |||
287 | /* Attempt to consume a character reference, with no | ||
288 | additional allowed character. */ | ||
289 | $entity = $this->consumeCharacterReference(); | ||
290 | |||
291 | /* If nothing is returned, emit a U+0026 AMPERSAND | ||
292 | character token. Otherwise, emit the character token that | ||
293 | was returned. */ | ||
294 | // This is all done when consuming the character reference. | ||
295 | $this->emitToken(array( | ||
296 | 'type' => self::CHARACTER, | ||
297 | 'data' => $entity | ||
298 | )); | ||
299 | |||
300 | /* Finally, switch to the data state. */ | ||
301 | $state = 'data'; | ||
302 | break; | ||
303 | |||
304 | case 'tag open': | ||
305 | $char = $this->stream->char(); | ||
306 | |||
307 | switch($this->content_model) { | ||
308 | case self::RCDATA: | ||
309 | case self::CDATA: | ||
310 | /* Consume the next input character. If it is a | ||
311 | U+002F SOLIDUS (/) character, switch to the close | ||
312 | tag open state. Otherwise, emit a U+003C LESS-THAN | ||
313 | SIGN character token and reconsume the current input | ||
314 | character in the data state. */ | ||
315 | // We consumed above. | ||
316 | |||
317 | if($char === '/') { | ||
318 | $state = 'close tag open'; | ||
319 | |||
320 | } else { | ||
321 | $this->emitToken(array( | ||
322 | 'type' => self::CHARACTER, | ||
323 | 'data' => '<' | ||
324 | )); | ||
325 | |||
326 | $this->stream->unget(); | ||
327 | |||
328 | $state = 'data'; | ||
329 | } | ||
330 | break; | ||
331 | |||
332 | case self::PCDATA: | ||
333 | /* If the content model flag is set to the PCDATA state | ||
334 | Consume the next input character: */ | ||
335 | // We consumed above. | ||
336 | |||
337 | if($char === '!') { | ||
338 | /* U+0021 EXCLAMATION MARK (!) | ||
339 | Switch to the markup declaration open state. */ | ||
340 | $state = 'markup declaration open'; | ||
341 | |||
342 | } elseif($char === '/') { | ||
343 | /* U+002F SOLIDUS (/) | ||
344 | Switch to the close tag open state. */ | ||
345 | $state = 'close tag open'; | ||
346 | |||
347 | } elseif('A' <= $char && $char <= 'Z') { | ||
348 | /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z | ||
349 | Create a new start tag token, set its tag name to the lowercase | ||
350 | version of the input character (add 0x0020 to the character's code | ||
351 | point), then switch to the tag name state. (Don't emit the token | ||
352 | yet; further details will be filled in before it is emitted.) */ | ||
353 | $this->token = array( | ||
354 | 'name' => strtolower($char), | ||
355 | 'type' => self::STARTTAG, | ||
356 | 'attr' => array() | ||
357 | ); | ||
358 | |||
359 | $state = 'tag name'; | ||
360 | |||
361 | } elseif('a' <= $char && $char <= 'z') { | ||
362 | /* U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z | ||
363 | Create a new start tag token, set its tag name to the input | ||
364 | character, then switch to the tag name state. (Don't emit | ||
365 | the token yet; further details will be filled in before it | ||
366 | is emitted.) */ | ||
367 | $this->token = array( | ||
368 | 'name' => $char, | ||
369 | 'type' => self::STARTTAG, | ||
370 | 'attr' => array() | ||
371 | ); | ||
372 | |||
373 | $state = 'tag name'; | ||
374 | |||
375 | } elseif($char === '>') { | ||
376 | /* U+003E GREATER-THAN SIGN (>) | ||
377 | Parse error. Emit a U+003C LESS-THAN SIGN character token and a | ||
378 | U+003E GREATER-THAN SIGN character token. Switch to the data state. */ | ||
379 | $this->emitToken(array( | ||
380 | 'type' => self::PARSEERROR, | ||
381 | 'data' => 'expected-tag-name-but-got-right-bracket' | ||
382 | )); | ||
383 | $this->emitToken(array( | ||
384 | 'type' => self::CHARACTER, | ||
385 | 'data' => '<>' | ||
386 | )); | ||
387 | |||
388 | $state = 'data'; | ||
389 | |||
390 | } elseif($char === '?') { | ||
391 | /* U+003F QUESTION MARK (?) | ||
392 | Parse error. Switch to the bogus comment state. */ | ||
393 | $this->emitToken(array( | ||
394 | 'type' => self::PARSEERROR, | ||
395 | 'data' => 'expected-tag-name-but-got-question-mark' | ||
396 | )); | ||
397 | $this->token = array( | ||
398 | 'data' => '?', | ||
399 | 'type' => self::COMMENT | ||
400 | ); | ||
401 | $state = 'bogus comment'; | ||
402 | |||
403 | } else { | ||
404 | /* Anything else | ||
405 | Parse error. Emit a U+003C LESS-THAN SIGN character token and | ||
406 | reconsume the current input character in the data state. */ | ||
407 | $this->emitToken(array( | ||
408 | 'type' => self::PARSEERROR, | ||
409 | 'data' => 'expected-tag-name' | ||
410 | )); | ||
411 | $this->emitToken(array( | ||
412 | 'type' => self::CHARACTER, | ||
413 | 'data' => '<' | ||
414 | )); | ||
415 | |||
416 | $state = 'data'; | ||
417 | $this->stream->unget(); | ||
418 | } | ||
419 | break; | ||
420 | } | ||
421 | break; | ||
422 | |||
423 | case 'close tag open': | ||
424 | if ( | ||
425 | $this->content_model === self::RCDATA || | ||
426 | $this->content_model === self::CDATA | ||
427 | ) { | ||
428 | /* If the content model flag is set to the RCDATA or CDATA | ||
429 | states... */ | ||
430 | $name = strtolower($this->stream->charsWhile(self::ALPHA)); | ||
431 | $following = $this->stream->char(); | ||
432 | $this->stream->unget(); | ||
433 | if ( | ||
434 | !$this->token || | ||
435 | $this->token['name'] !== $name || | ||
436 | $this->token['name'] === $name && !in_array($following, array("\x09", "\x0A", "\x0C", "\x20", "\x3E", "\x2F", false)) | ||
437 | ) { | ||
438 | /* if no start tag token has ever been emitted by this instance | ||
439 | of the tokenizer (fragment case), or, if the next few | ||
440 | characters do not match the tag name of the last start tag | ||
441 | token emitted (compared in an ASCII case-insensitive manner), | ||
442 | or if they do but they are not immediately followed by one of | ||
443 | the following characters: | ||
444 | |||
445 | * U+0009 CHARACTER TABULATION | ||
446 | * U+000A LINE FEED (LF) | ||
447 | * U+000C FORM FEED (FF) | ||
448 | * U+0020 SPACE | ||
449 | * U+003E GREATER-THAN SIGN (>) | ||
450 | * U+002F SOLIDUS (/) | ||
451 | * EOF | ||
452 | |||
453 | ...then emit a U+003C LESS-THAN SIGN character token, a | ||
454 | U+002F SOLIDUS character token, and switch to the data | ||
455 | state to process the next input character. */ | ||
456 | // XXX: Probably ought to replace in_array with $following === x ||... | ||
457 | |||
458 | // We also need to emit $name now we've consumed that, as we | ||
459 | // know it'll just be emitted as a character token. | ||
460 | $this->emitToken(array( | ||
461 | 'type' => self::CHARACTER, | ||
462 | 'data' => '</' . $name | ||
463 | )); | ||
464 | |||
465 | $state = 'data'; | ||
466 | } else { | ||
467 | // This matches what would happen if we actually did the | ||
468 | // otherwise below (but we can't because we've consumed too | ||
469 | // much). | ||
470 | |||
471 | // Start the end tag token with the name we already have. | ||
472 | $this->token = array( | ||
473 | 'name' => $name, | ||
474 | 'type' => self::ENDTAG | ||
475 | ); | ||
476 | |||
477 | // Change to tag name state. | ||
478 | $state = 'tag name'; | ||
479 | } | ||
480 | } elseif ($this->content_model === self::PCDATA) { | ||
481 | /* Otherwise, if the content model flag is set to the PCDATA | ||
482 | state [...]: */ | ||
483 | $char = $this->stream->char(); | ||
484 | |||
485 | if ('A' <= $char && $char <= 'Z') { | ||
486 | /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z | ||
487 | Create a new end tag token, set its tag name to the lowercase version | ||
488 | of the input character (add 0x0020 to the character's code point), then | ||
489 | switch to the tag name state. (Don't emit the token yet; further details | ||
490 | will be filled in before it is emitted.) */ | ||
491 | $this->token = array( | ||
492 | 'name' => strtolower($char), | ||
493 | 'type' => self::ENDTAG | ||
494 | ); | ||
495 | |||
496 | $state = 'tag name'; | ||
497 | |||
498 | } elseif ('a' <= $char && $char <= 'z') { | ||
499 | /* U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z | ||
500 | Create a new end tag token, set its tag name to the | ||
501 | input character, then switch to the tag name state. | ||
502 | (Don't emit the token yet; further details will be | ||
503 | filled in before it is emitted.) */ | ||
504 | $this->token = array( | ||
505 | 'name' => $char, | ||
506 | 'type' => self::ENDTAG | ||
507 | ); | ||
508 | |||
509 | $state = 'tag name'; | ||
510 | |||
511 | } elseif($char === '>') { | ||
512 | /* U+003E GREATER-THAN SIGN (>) | ||
513 | Parse error. Switch to the data state. */ | ||
514 | $this->emitToken(array( | ||
515 | 'type' => self::PARSEERROR, | ||
516 | 'data' => 'expected-closing-tag-but-got-right-bracket' | ||
517 | )); | ||
518 | $state = 'data'; | ||
519 | |||
520 | } elseif($char === false) { | ||
521 | /* EOF | ||
522 | Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F | ||
523 | SOLIDUS character token. Reconsume the EOF character in the data state. */ | ||
524 | $this->emitToken(array( | ||
525 | 'type' => self::PARSEERROR, | ||
526 | 'data' => 'expected-closing-tag-but-got-eof' | ||
527 | )); | ||
528 | $this->emitToken(array( | ||
529 | 'type' => self::CHARACTER, | ||
530 | 'data' => '</' | ||
531 | )); | ||
532 | |||
533 | $this->stream->unget(); | ||
534 | $state = 'data'; | ||
535 | |||
536 | } else { | ||
537 | /* Parse error. Switch to the bogus comment state. */ | ||
538 | $this->emitToken(array( | ||
539 | 'type' => self::PARSEERROR, | ||
540 | 'data' => 'expected-closing-tag-but-got-char' | ||
541 | )); | ||
542 | $this->token = array( | ||
543 | 'data' => $char, | ||
544 | 'type' => self::COMMENT | ||
545 | ); | ||
546 | $state = 'bogus comment'; | ||
547 | } | ||
548 | } | ||
549 | break; | ||
550 | |||
551 | case 'tag name': | ||
552 | /* Consume the next input character: */ | ||
553 | $char = $this->stream->char(); | ||
554 | |||
555 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
556 | /* U+0009 CHARACTER TABULATION | ||
557 | U+000A LINE FEED (LF) | ||
558 | U+000C FORM FEED (FF) | ||
559 | U+0020 SPACE | ||
560 | Switch to the before attribute name state. */ | ||
561 | $state = 'before attribute name'; | ||
562 | |||
563 | } elseif($char === '/') { | ||
564 | /* U+002F SOLIDUS (/) | ||
565 | Switch to the self-closing start tag state. */ | ||
566 | $state = 'self-closing start tag'; | ||
567 | |||
568 | } elseif($char === '>') { | ||
569 | /* U+003E GREATER-THAN SIGN (>) | ||
570 | Emit the current tag token. Switch to the data state. */ | ||
571 | $this->emitToken($this->token); | ||
572 | $state = 'data'; | ||
573 | |||
574 | } elseif('A' <= $char && $char <= 'Z') { | ||
575 | /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z | ||
576 | Append the lowercase version of the current input | ||
577 | character (add 0x0020 to the character's code point) to | ||
578 | the current tag token's tag name. Stay in the tag name state. */ | ||
579 | $chars = $this->stream->charsWhile(self::UPPER_ALPHA); | ||
580 | |||
581 | $this->token['name'] .= strtolower($char . $chars); | ||
582 | $state = 'tag name'; | ||
583 | |||
584 | } elseif($char === false) { | ||
585 | /* EOF | ||
586 | Parse error. Reconsume the EOF character in the data state. */ | ||
587 | $this->emitToken(array( | ||
588 | 'type' => self::PARSEERROR, | ||
589 | 'data' => 'eof-in-tag-name' | ||
590 | )); | ||
591 | |||
592 | $this->stream->unget(); | ||
593 | $state = 'data'; | ||
594 | |||
595 | } else { | ||
596 | /* Anything else | ||
597 | Append the current input character to the current tag token's tag name. | ||
598 | Stay in the tag name state. */ | ||
599 | $chars = $this->stream->charsUntil("\t\n\x0C />" . self::UPPER_ALPHA); | ||
600 | |||
601 | $this->token['name'] .= $char . $chars; | ||
602 | $state = 'tag name'; | ||
603 | } | ||
604 | break; | ||
605 | |||
606 | case 'before attribute name': | ||
607 | /* Consume the next input character: */ | ||
608 | $char = $this->stream->char(); | ||
609 | |||
610 | // this conditional is optimized, check bottom | ||
611 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
612 | /* U+0009 CHARACTER TABULATION | ||
613 | U+000A LINE FEED (LF) | ||
614 | U+000C FORM FEED (FF) | ||
615 | U+0020 SPACE | ||
616 | Stay in the before attribute name state. */ | ||
617 | $state = 'before attribute name'; | ||
618 | |||
619 | } elseif($char === '/') { | ||
620 | /* U+002F SOLIDUS (/) | ||
621 | Switch to the self-closing start tag state. */ | ||
622 | $state = 'self-closing start tag'; | ||
623 | |||
624 | } elseif($char === '>') { | ||
625 | /* U+003E GREATER-THAN SIGN (>) | ||
626 | Emit the current tag token. Switch to the data state. */ | ||
627 | $this->emitToken($this->token); | ||
628 | $state = 'data'; | ||
629 | |||
630 | } elseif('A' <= $char && $char <= 'Z') { | ||
631 | /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z | ||
632 | Start a new attribute in the current tag token. Set that | ||
633 | attribute's name to the lowercase version of the current | ||
634 | input character (add 0x0020 to the character's code | ||
635 | point), and its value to the empty string. Switch to the | ||
636 | attribute name state.*/ | ||
637 | $this->token['attr'][] = array( | ||
638 | 'name' => strtolower($char), | ||
639 | 'value' => '' | ||
640 | ); | ||
641 | |||
642 | $state = 'attribute name'; | ||
643 | |||
644 | } elseif($char === false) { | ||
645 | /* EOF | ||
646 | Parse error. Reconsume the EOF character in the data state. */ | ||
647 | $this->emitToken(array( | ||
648 | 'type' => self::PARSEERROR, | ||
649 | 'data' => 'expected-attribute-name-but-got-eof' | ||
650 | )); | ||
651 | |||
652 | $this->stream->unget(); | ||
653 | $state = 'data'; | ||
654 | |||
655 | } else { | ||
656 | /* U+0022 QUOTATION MARK (") | ||
657 | U+0027 APOSTROPHE (') | ||
658 | U+003C LESS-THAN SIGN (<) | ||
659 | U+003D EQUALS SIGN (=) | ||
660 | Parse error. Treat it as per the "anything else" entry | ||
661 | below. */ | ||
662 | if($char === '"' || $char === "'" || $char === '<' || $char === '=') { | ||
663 | $this->emitToken(array( | ||
664 | 'type' => self::PARSEERROR, | ||
665 | 'data' => 'invalid-character-in-attribute-name' | ||
666 | )); | ||
667 | } | ||
668 | |||
669 | /* Anything else | ||
670 | Start a new attribute in the current tag token. Set that attribute's | ||
671 | name to the current input character, and its value to the empty string. | ||
672 | Switch to the attribute name state. */ | ||
673 | $this->token['attr'][] = array( | ||
674 | 'name' => $char, | ||
675 | 'value' => '' | ||
676 | ); | ||
677 | |||
678 | $state = 'attribute name'; | ||
679 | } | ||
680 | break; | ||
681 | |||
682 | case 'attribute name': | ||
683 | // Consume the next input character: | ||
684 | $char = $this->stream->char(); | ||
685 | |||
686 | // this conditional is optimized, check bottom | ||
687 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
688 | /* U+0009 CHARACTER TABULATION | ||
689 | U+000A LINE FEED (LF) | ||
690 | U+000C FORM FEED (FF) | ||
691 | U+0020 SPACE | ||
692 | Switch to the after attribute name state. */ | ||
693 | $state = 'after attribute name'; | ||
694 | |||
695 | } elseif($char === '/') { | ||
696 | /* U+002F SOLIDUS (/) | ||
697 | Switch to the self-closing start tag state. */ | ||
698 | $state = 'self-closing start tag'; | ||
699 | |||
700 | } elseif($char === '=') { | ||
701 | /* U+003D EQUALS SIGN (=) | ||
702 | Switch to the before attribute value state. */ | ||
703 | $state = 'before attribute value'; | ||
704 | |||
705 | } elseif($char === '>') { | ||
706 | /* U+003E GREATER-THAN SIGN (>) | ||
707 | Emit the current tag token. Switch to the data state. */ | ||
708 | $this->emitToken($this->token); | ||
709 | $state = 'data'; | ||
710 | |||
711 | } elseif('A' <= $char && $char <= 'Z') { | ||
712 | /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z | ||
713 | Append the lowercase version of the current input | ||
714 | character (add 0x0020 to the character's code point) to | ||
715 | the current attribute's name. Stay in the attribute name | ||
716 | state. */ | ||
717 | $chars = $this->stream->charsWhile(self::UPPER_ALPHA); | ||
718 | |||
719 | $last = count($this->token['attr']) - 1; | ||
720 | $this->token['attr'][$last]['name'] .= strtolower($char . $chars); | ||
721 | |||
722 | $state = 'attribute name'; | ||
723 | |||
724 | } elseif($char === false) { | ||
725 | /* EOF | ||
726 | Parse error. Reconsume the EOF character in the data state. */ | ||
727 | $this->emitToken(array( | ||
728 | 'type' => self::PARSEERROR, | ||
729 | 'data' => 'eof-in-attribute-name' | ||
730 | )); | ||
731 | |||
732 | $this->stream->unget(); | ||
733 | $state = 'data'; | ||
734 | |||
735 | } else { | ||
736 | /* U+0022 QUOTATION MARK (") | ||
737 | U+0027 APOSTROPHE (') | ||
738 | U+003C LESS-THAN SIGN (<) | ||
739 | Parse error. Treat it as per the "anything else" | ||
740 | entry below. */ | ||
741 | if($char === '"' || $char === "'" || $char === '<') { | ||
742 | $this->emitToken(array( | ||
743 | 'type' => self::PARSEERROR, | ||
744 | 'data' => 'invalid-character-in-attribute-name' | ||
745 | )); | ||
746 | } | ||
747 | |||
748 | /* Anything else | ||
749 | Append the current input character to the current attribute's name. | ||
750 | Stay in the attribute name state. */ | ||
751 | $chars = $this->stream->charsUntil("\t\n\x0C /=>\"'" . self::UPPER_ALPHA); | ||
752 | |||
753 | $last = count($this->token['attr']) - 1; | ||
754 | $this->token['attr'][$last]['name'] .= $char . $chars; | ||
755 | |||
756 | $state = 'attribute name'; | ||
757 | } | ||
758 | |||
759 | /* When the user agent leaves the attribute name state | ||
760 | (and before emitting the tag token, if appropriate), the | ||
761 | complete attribute's name must be compared to the other | ||
762 | attributes on the same token; if there is already an | ||
763 | attribute on the token with the exact same name, then this | ||
764 | is a parse error and the new attribute must be dropped, along | ||
765 | with the value that gets associated with it (if any). */ | ||
766 | // this might be implemented in the emitToken method | ||
767 | break; | ||
768 | |||
769 | case 'after attribute name': | ||
770 | // Consume the next input character: | ||
771 | $char = $this->stream->char(); | ||
772 | |||
773 | // this is an optimized conditional, check the bottom | ||
774 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
775 | /* U+0009 CHARACTER TABULATION | ||
776 | U+000A LINE FEED (LF) | ||
777 | U+000C FORM FEED (FF) | ||
778 | U+0020 SPACE | ||
779 | Stay in the after attribute name state. */ | ||
780 | $state = 'after attribute name'; | ||
781 | |||
782 | } elseif($char === '/') { | ||
783 | /* U+002F SOLIDUS (/) | ||
784 | Switch to the self-closing start tag state. */ | ||
785 | $state = 'self-closing start tag'; | ||
786 | |||
787 | } elseif($char === '=') { | ||
788 | /* U+003D EQUALS SIGN (=) | ||
789 | Switch to the before attribute value state. */ | ||
790 | $state = 'before attribute value'; | ||
791 | |||
792 | } elseif($char === '>') { | ||
793 | /* U+003E GREATER-THAN SIGN (>) | ||
794 | Emit the current tag token. Switch to the data state. */ | ||
795 | $this->emitToken($this->token); | ||
796 | $state = 'data'; | ||
797 | |||
798 | } elseif('A' <= $char && $char <= 'Z') { | ||
799 | /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z | ||
800 | Start a new attribute in the current tag token. Set that | ||
801 | attribute's name to the lowercase version of the current | ||
802 | input character (add 0x0020 to the character's code | ||
803 | point), and its value to the empty string. Switch to the | ||
804 | attribute name state. */ | ||
805 | $this->token['attr'][] = array( | ||
806 | 'name' => strtolower($char), | ||
807 | 'value' => '' | ||
808 | ); | ||
809 | |||
810 | $state = 'attribute name'; | ||
811 | |||
812 | } elseif($char === false) { | ||
813 | /* EOF | ||
814 | Parse error. Reconsume the EOF character in the data state. */ | ||
815 | $this->emitToken(array( | ||
816 | 'type' => self::PARSEERROR, | ||
817 | 'data' => 'expected-end-of-tag-but-got-eof' | ||
818 | )); | ||
819 | |||
820 | $this->stream->unget(); | ||
821 | $state = 'data'; | ||
822 | |||
823 | } else { | ||
824 | /* U+0022 QUOTATION MARK (") | ||
825 | U+0027 APOSTROPHE (') | ||
826 | U+003C LESS-THAN SIGN(<) | ||
827 | Parse error. Treat it as per the "anything else" | ||
828 | entry below. */ | ||
829 | if($char === '"' || $char === "'" || $char === "<") { | ||
830 | $this->emitToken(array( | ||
831 | 'type' => self::PARSEERROR, | ||
832 | 'data' => 'invalid-character-after-attribute-name' | ||
833 | )); | ||
834 | } | ||
835 | |||
836 | /* Anything else | ||
837 | Start a new attribute in the current tag token. Set that attribute's | ||
838 | name to the current input character, and its value to the empty string. | ||
839 | Switch to the attribute name state. */ | ||
840 | $this->token['attr'][] = array( | ||
841 | 'name' => $char, | ||
842 | 'value' => '' | ||
843 | ); | ||
844 | |||
845 | $state = 'attribute name'; | ||
846 | } | ||
847 | break; | ||
848 | |||
849 | case 'before attribute value': | ||
850 | // Consume the next input character: | ||
851 | $char = $this->stream->char(); | ||
852 | |||
853 | // this is an optimized conditional | ||
854 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
855 | /* U+0009 CHARACTER TABULATION | ||
856 | U+000A LINE FEED (LF) | ||
857 | U+000C FORM FEED (FF) | ||
858 | U+0020 SPACE | ||
859 | Stay in the before attribute value state. */ | ||
860 | $state = 'before attribute value'; | ||
861 | |||
862 | } elseif($char === '"') { | ||
863 | /* U+0022 QUOTATION MARK (") | ||
864 | Switch to the attribute value (double-quoted) state. */ | ||
865 | $state = 'attribute value (double-quoted)'; | ||
866 | |||
867 | } elseif($char === '&') { | ||
868 | /* U+0026 AMPERSAND (&) | ||
869 | Switch to the attribute value (unquoted) state and reconsume | ||
870 | this input character. */ | ||
871 | $this->stream->unget(); | ||
872 | $state = 'attribute value (unquoted)'; | ||
873 | |||
874 | } elseif($char === '\'') { | ||
875 | /* U+0027 APOSTROPHE (') | ||
876 | Switch to the attribute value (single-quoted) state. */ | ||
877 | $state = 'attribute value (single-quoted)'; | ||
878 | |||
879 | } elseif($char === '>') { | ||
880 | /* U+003E GREATER-THAN SIGN (>) | ||
881 | Parse error. Emit the current tag token. Switch to the data state. */ | ||
882 | $this->emitToken(array( | ||
883 | 'type' => self::PARSEERROR, | ||
884 | 'data' => 'expected-attribute-value-but-got-right-bracket' | ||
885 | )); | ||
886 | $this->emitToken($this->token); | ||
887 | $state = 'data'; | ||
888 | |||
889 | } elseif($char === false) { | ||
890 | /* EOF | ||
891 | Parse error. Reconsume the EOF character in the data state. */ | ||
892 | $this->emitToken(array( | ||
893 | 'type' => self::PARSEERROR, | ||
894 | 'data' => 'expected-attribute-value-but-got-eof' | ||
895 | )); | ||
896 | $this->stream->unget(); | ||
897 | $state = 'data'; | ||
898 | |||
899 | } else { | ||
900 | /* U+003D EQUALS SIGN (=) | ||
901 | * U+003C LESS-THAN SIGN (<) | ||
902 | Parse error. Treat it as per the "anything else" entry below. */ | ||
903 | if($char === '=' || $char === '<') { | ||
904 | $this->emitToken(array( | ||
905 | 'type' => self::PARSEERROR, | ||
906 | 'data' => 'equals-in-unquoted-attribute-value' | ||
907 | )); | ||
908 | } | ||
909 | |||
910 | /* Anything else | ||
911 | Append the current input character to the current attribute's value. | ||
912 | Switch to the attribute value (unquoted) state. */ | ||
913 | $last = count($this->token['attr']) - 1; | ||
914 | $this->token['attr'][$last]['value'] .= $char; | ||
915 | |||
916 | $state = 'attribute value (unquoted)'; | ||
917 | } | ||
918 | break; | ||
919 | |||
920 | case 'attribute value (double-quoted)': | ||
921 | // Consume the next input character: | ||
922 | $char = $this->stream->char(); | ||
923 | |||
924 | if($char === '"') { | ||
925 | /* U+0022 QUOTATION MARK (") | ||
926 | Switch to the after attribute value (quoted) state. */ | ||
927 | $state = 'after attribute value (quoted)'; | ||
928 | |||
929 | } elseif($char === '&') { | ||
930 | /* U+0026 AMPERSAND (&) | ||
931 | Switch to the character reference in attribute value | ||
932 | state, with the additional allowed character | ||
933 | being U+0022 QUOTATION MARK ("). */ | ||
934 | $this->characterReferenceInAttributeValue('"'); | ||
935 | |||
936 | } elseif($char === false) { | ||
937 | /* EOF | ||
938 | Parse error. Reconsume the EOF character in the data state. */ | ||
939 | $this->emitToken(array( | ||
940 | 'type' => self::PARSEERROR, | ||
941 | 'data' => 'eof-in-attribute-value-double-quote' | ||
942 | )); | ||
943 | |||
944 | $this->stream->unget(); | ||
945 | $state = 'data'; | ||
946 | |||
947 | } else { | ||
948 | /* Anything else | ||
949 | Append the current input character to the current attribute's value. | ||
950 | Stay in the attribute value (double-quoted) state. */ | ||
951 | $chars = $this->stream->charsUntil('"&'); | ||
952 | |||
953 | $last = count($this->token['attr']) - 1; | ||
954 | $this->token['attr'][$last]['value'] .= $char . $chars; | ||
955 | |||
956 | $state = 'attribute value (double-quoted)'; | ||
957 | } | ||
958 | break; | ||
959 | |||
960 | case 'attribute value (single-quoted)': | ||
961 | // Consume the next input character: | ||
962 | $char = $this->stream->char(); | ||
963 | |||
964 | if($char === "'") { | ||
965 | /* U+0022 QUOTATION MARK (') | ||
966 | Switch to the after attribute value state. */ | ||
967 | $state = 'after attribute value (quoted)'; | ||
968 | |||
969 | } elseif($char === '&') { | ||
970 | /* U+0026 AMPERSAND (&) | ||
971 | Switch to the entity in attribute value state. */ | ||
972 | $this->characterReferenceInAttributeValue("'"); | ||
973 | |||
974 | } elseif($char === false) { | ||
975 | /* EOF | ||
976 | Parse error. Reconsume the EOF character in the data state. */ | ||
977 | $this->emitToken(array( | ||
978 | 'type' => self::PARSEERROR, | ||
979 | 'data' => 'eof-in-attribute-value-single-quote' | ||
980 | )); | ||
981 | |||
982 | $this->stream->unget(); | ||
983 | $state = 'data'; | ||
984 | |||
985 | } else { | ||
986 | /* Anything else | ||
987 | Append the current input character to the current attribute's value. | ||
988 | Stay in the attribute value (single-quoted) state. */ | ||
989 | $chars = $this->stream->charsUntil("'&"); | ||
990 | |||
991 | $last = count($this->token['attr']) - 1; | ||
992 | $this->token['attr'][$last]['value'] .= $char . $chars; | ||
993 | |||
994 | $state = 'attribute value (single-quoted)'; | ||
995 | } | ||
996 | break; | ||
997 | |||
998 | case 'attribute value (unquoted)': | ||
999 | // Consume the next input character: | ||
1000 | $char = $this->stream->char(); | ||
1001 | |||
1002 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1003 | /* U+0009 CHARACTER TABULATION | ||
1004 | U+000A LINE FEED (LF) | ||
1005 | U+000C FORM FEED (FF) | ||
1006 | U+0020 SPACE | ||
1007 | Switch to the before attribute name state. */ | ||
1008 | $state = 'before attribute name'; | ||
1009 | |||
1010 | } elseif($char === '&') { | ||
1011 | /* U+0026 AMPERSAND (&) | ||
1012 | Switch to the entity in attribute value state, with the | ||
1013 | additional allowed character being U+003E | ||
1014 | GREATER-THAN SIGN (>). */ | ||
1015 | $this->characterReferenceInAttributeValue('>'); | ||
1016 | |||
1017 | } elseif($char === '>') { | ||
1018 | /* U+003E GREATER-THAN SIGN (>) | ||
1019 | Emit the current tag token. Switch to the data state. */ | ||
1020 | $this->emitToken($this->token); | ||
1021 | $state = 'data'; | ||
1022 | |||
1023 | } elseif ($char === false) { | ||
1024 | /* EOF | ||
1025 | Parse error. Reconsume the EOF character in the data state. */ | ||
1026 | $this->emitToken(array( | ||
1027 | 'type' => self::PARSEERROR, | ||
1028 | 'data' => 'eof-in-attribute-value-no-quotes' | ||
1029 | )); | ||
1030 | $this->stream->unget(); | ||
1031 | $state = 'data'; | ||
1032 | |||
1033 | } else { | ||
1034 | /* U+0022 QUOTATION MARK (") | ||
1035 | U+0027 APOSTROPHE (') | ||
1036 | U+003C LESS-THAN SIGN (<) | ||
1037 | U+003D EQUALS SIGN (=) | ||
1038 | Parse error. Treat it as per the "anything else" | ||
1039 | entry below. */ | ||
1040 | if($char === '"' || $char === "'" || $char === '=' || $char == '<') { | ||
1041 | $this->emitToken(array( | ||
1042 | 'type' => self::PARSEERROR, | ||
1043 | 'data' => 'unexpected-character-in-unquoted-attribute-value' | ||
1044 | )); | ||
1045 | } | ||
1046 | |||
1047 | /* Anything else | ||
1048 | Append the current input character to the current attribute's value. | ||
1049 | Stay in the attribute value (unquoted) state. */ | ||
1050 | $chars = $this->stream->charsUntil("\t\n\x0c &>\"'="); | ||
1051 | |||
1052 | $last = count($this->token['attr']) - 1; | ||
1053 | $this->token['attr'][$last]['value'] .= $char . $chars; | ||
1054 | |||
1055 | $state = 'attribute value (unquoted)'; | ||
1056 | } | ||
1057 | break; | ||
1058 | |||
1059 | case 'after attribute value (quoted)': | ||
1060 | /* Consume the next input character: */ | ||
1061 | $char = $this->stream->char(); | ||
1062 | |||
1063 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1064 | /* U+0009 CHARACTER TABULATION | ||
1065 | U+000A LINE FEED (LF) | ||
1066 | U+000C FORM FEED (FF) | ||
1067 | U+0020 SPACE | ||
1068 | Switch to the before attribute name state. */ | ||
1069 | $state = 'before attribute name'; | ||
1070 | |||
1071 | } elseif ($char === '/') { | ||
1072 | /* U+002F SOLIDUS (/) | ||
1073 | Switch to the self-closing start tag state. */ | ||
1074 | $state = 'self-closing start tag'; | ||
1075 | |||
1076 | } elseif ($char === '>') { | ||
1077 | /* U+003E GREATER-THAN SIGN (>) | ||
1078 | Emit the current tag token. Switch to the data state. */ | ||
1079 | $this->emitToken($this->token); | ||
1080 | $state = 'data'; | ||
1081 | |||
1082 | } elseif ($char === false) { | ||
1083 | /* EOF | ||
1084 | Parse error. Reconsume the EOF character in the data state. */ | ||
1085 | $this->emitToken(array( | ||
1086 | 'type' => self::PARSEERROR, | ||
1087 | 'data' => 'unexpected-EOF-after-attribute-value' | ||
1088 | )); | ||
1089 | $this->stream->unget(); | ||
1090 | $state = 'data'; | ||
1091 | |||
1092 | } else { | ||
1093 | /* Anything else | ||
1094 | Parse error. Reconsume the character in the before attribute | ||
1095 | name state. */ | ||
1096 | $this->emitToken(array( | ||
1097 | 'type' => self::PARSEERROR, | ||
1098 | 'data' => 'unexpected-character-after-attribute-value' | ||
1099 | )); | ||
1100 | $this->stream->unget(); | ||
1101 | $state = 'before attribute name'; | ||
1102 | } | ||
1103 | break; | ||
1104 | |||
1105 | case 'self-closing start tag': | ||
1106 | /* Consume the next input character: */ | ||
1107 | $char = $this->stream->char(); | ||
1108 | |||
1109 | if ($char === '>') { | ||
1110 | /* U+003E GREATER-THAN SIGN (>) | ||
1111 | Set the self-closing flag of the current tag token. | ||
1112 | Emit the current tag token. Switch to the data state. */ | ||
1113 | // not sure if this is the name we want | ||
1114 | $this->token['self-closing'] = true; | ||
1115 | $this->emitToken($this->token); | ||
1116 | $state = 'data'; | ||
1117 | |||
1118 | } elseif ($char === false) { | ||
1119 | /* EOF | ||
1120 | Parse error. Reconsume the EOF character in the data state. */ | ||
1121 | $this->emitToken(array( | ||
1122 | 'type' => self::PARSEERROR, | ||
1123 | 'data' => 'unexpected-eof-after-self-closing' | ||
1124 | )); | ||
1125 | $this->stream->unget(); | ||
1126 | $state = 'data'; | ||
1127 | |||
1128 | } else { | ||
1129 | /* Anything else | ||
1130 | Parse error. Reconsume the character in the before attribute name state. */ | ||
1131 | $this->emitToken(array( | ||
1132 | 'type' => self::PARSEERROR, | ||
1133 | 'data' => 'unexpected-character-after-self-closing' | ||
1134 | )); | ||
1135 | $this->stream->unget(); | ||
1136 | $state = 'before attribute name'; | ||
1137 | } | ||
1138 | break; | ||
1139 | |||
1140 | case 'bogus comment': | ||
1141 | /* (This can only happen if the content model flag is set to the PCDATA state.) */ | ||
1142 | /* Consume every character up to the first U+003E GREATER-THAN SIGN | ||
1143 | character (>) or the end of the file (EOF), whichever comes first. Emit | ||
1144 | a comment token whose data is the concatenation of all the characters | ||
1145 | starting from and including the character that caused the state machine | ||
1146 | to switch into the bogus comment state, up to and including the last | ||
1147 | consumed character before the U+003E character, if any, or up to the | ||
1148 | end of the file otherwise. (If the comment was started by the end of | ||
1149 | the file (EOF), the token is empty.) */ | ||
1150 | $this->token['data'] .= (string) $this->stream->charsUntil('>'); | ||
1151 | $this->stream->char(); | ||
1152 | |||
1153 | $this->emitToken($this->token); | ||
1154 | |||
1155 | /* Switch to the data state. */ | ||
1156 | $state = 'data'; | ||
1157 | break; | ||
1158 | |||
1159 | case 'markup declaration open': | ||
1160 | // Consume for below | ||
1161 | $hyphens = $this->stream->charsWhile('-', 2); | ||
1162 | if ($hyphens === '-') { | ||
1163 | $this->stream->unget(); | ||
1164 | } | ||
1165 | if ($hyphens !== '--') { | ||
1166 | $alpha = $this->stream->charsWhile(self::ALPHA, 7); | ||
1167 | } | ||
1168 | |||
1169 | /* If the next two characters are both U+002D HYPHEN-MINUS (-) | ||
1170 | characters, consume those two characters, create a comment token whose | ||
1171 | data is the empty string, and switch to the comment state. */ | ||
1172 | if($hyphens === '--') { | ||
1173 | $state = 'comment start'; | ||
1174 | $this->token = array( | ||
1175 | 'data' => '', | ||
1176 | 'type' => self::COMMENT | ||
1177 | ); | ||
1178 | |||
1179 | /* Otherwise if the next seven characters are a case-insensitive match | ||
1180 | for the word "DOCTYPE", then consume those characters and switch to the | ||
1181 | DOCTYPE state. */ | ||
1182 | } elseif(strtoupper($alpha) === 'DOCTYPE') { | ||
1183 | $state = 'DOCTYPE'; | ||
1184 | |||
1185 | // XXX not implemented | ||
1186 | /* Otherwise, if the insertion mode is "in foreign content" | ||
1187 | and the current node is not an element in the HTML namespace | ||
1188 | and the next seven characters are an ASCII case-sensitive | ||
1189 | match for the string "[CDATA[" (the five uppercase letters | ||
1190 | "CDATA" with a U+005B LEFT SQUARE BRACKET character before | ||
1191 | and after), then consume those characters and switch to the | ||
1192 | CDATA section state (which is unrelated to the content model | ||
1193 | flag's CDATA state). */ | ||
1194 | |||
1195 | /* Otherwise, is is a parse error. Switch to the bogus comment state. | ||
1196 | The next character that is consumed, if any, is the first character | ||
1197 | that will be in the comment. */ | ||
1198 | } else { | ||
1199 | $this->emitToken(array( | ||
1200 | 'type' => self::PARSEERROR, | ||
1201 | 'data' => 'expected-dashes-or-doctype' | ||
1202 | )); | ||
1203 | $this->token = array( | ||
1204 | 'data' => (string) $alpha, | ||
1205 | 'type' => self::COMMENT | ||
1206 | ); | ||
1207 | $state = 'bogus comment'; | ||
1208 | } | ||
1209 | break; | ||
1210 | |||
1211 | case 'comment start': | ||
1212 | /* Consume the next input character: */ | ||
1213 | $char = $this->stream->char(); | ||
1214 | |||
1215 | if ($char === '-') { | ||
1216 | /* U+002D HYPHEN-MINUS (-) | ||
1217 | Switch to the comment start dash state. */ | ||
1218 | $state = 'comment start dash'; | ||
1219 | } elseif ($char === '>') { | ||
1220 | /* U+003E GREATER-THAN SIGN (>) | ||
1221 | Parse error. Emit the comment token. Switch to the | ||
1222 | data state. */ | ||
1223 | $this->emitToken(array( | ||
1224 | 'type' => self::PARSEERROR, | ||
1225 | 'data' => 'incorrect-comment' | ||
1226 | )); | ||
1227 | $this->emitToken($this->token); | ||
1228 | $state = 'data'; | ||
1229 | } elseif ($char === false) { | ||
1230 | /* EOF | ||
1231 | Parse error. Emit the comment token. Reconsume the | ||
1232 | EOF character in the data state. */ | ||
1233 | $this->emitToken(array( | ||
1234 | 'type' => self::PARSEERROR, | ||
1235 | 'data' => 'eof-in-comment' | ||
1236 | )); | ||
1237 | $this->emitToken($this->token); | ||
1238 | $this->stream->unget(); | ||
1239 | $state = 'data'; | ||
1240 | } else { | ||
1241 | /* Anything else | ||
1242 | Append the input character to the comment token's | ||
1243 | data. Switch to the comment state. */ | ||
1244 | $this->token['data'] .= $char; | ||
1245 | $state = 'comment'; | ||
1246 | } | ||
1247 | break; | ||
1248 | |||
1249 | case 'comment start dash': | ||
1250 | /* Consume the next input character: */ | ||
1251 | $char = $this->stream->char(); | ||
1252 | if ($char === '-') { | ||
1253 | /* U+002D HYPHEN-MINUS (-) | ||
1254 | Switch to the comment end state */ | ||
1255 | $state = 'comment end'; | ||
1256 | } elseif ($char === '>') { | ||
1257 | /* U+003E GREATER-THAN SIGN (>) | ||
1258 | Parse error. Emit the comment token. Switch to the | ||
1259 | data state. */ | ||
1260 | $this->emitToken(array( | ||
1261 | 'type' => self::PARSEERROR, | ||
1262 | 'data' => 'incorrect-comment' | ||
1263 | )); | ||
1264 | $this->emitToken($this->token); | ||
1265 | $state = 'data'; | ||
1266 | } elseif ($char === false) { | ||
1267 | /* Parse error. Emit the comment token. Reconsume the | ||
1268 | EOF character in the data state. */ | ||
1269 | $this->emitToken(array( | ||
1270 | 'type' => self::PARSEERROR, | ||
1271 | 'data' => 'eof-in-comment' | ||
1272 | )); | ||
1273 | $this->emitToken($this->token); | ||
1274 | $this->stream->unget(); | ||
1275 | $state = 'data'; | ||
1276 | } else { | ||
1277 | $this->token['data'] .= '-' . $char; | ||
1278 | $state = 'comment'; | ||
1279 | } | ||
1280 | break; | ||
1281 | |||
1282 | case 'comment': | ||
1283 | /* Consume the next input character: */ | ||
1284 | $char = $this->stream->char(); | ||
1285 | |||
1286 | if($char === '-') { | ||
1287 | /* U+002D HYPHEN-MINUS (-) | ||
1288 | Switch to the comment end dash state */ | ||
1289 | $state = 'comment end dash'; | ||
1290 | |||
1291 | } elseif($char === false) { | ||
1292 | /* EOF | ||
1293 | Parse error. Emit the comment token. Reconsume the EOF character | ||
1294 | in the data state. */ | ||
1295 | $this->emitToken(array( | ||
1296 | 'type' => self::PARSEERROR, | ||
1297 | 'data' => 'eof-in-comment' | ||
1298 | )); | ||
1299 | $this->emitToken($this->token); | ||
1300 | $this->stream->unget(); | ||
1301 | $state = 'data'; | ||
1302 | |||
1303 | } else { | ||
1304 | /* Anything else | ||
1305 | Append the input character to the comment token's data. Stay in | ||
1306 | the comment state. */ | ||
1307 | $chars = $this->stream->charsUntil('-'); | ||
1308 | |||
1309 | $this->token['data'] .= $char . $chars; | ||
1310 | } | ||
1311 | break; | ||
1312 | |||
1313 | case 'comment end dash': | ||
1314 | /* Consume the next input character: */ | ||
1315 | $char = $this->stream->char(); | ||
1316 | |||
1317 | if($char === '-') { | ||
1318 | /* U+002D HYPHEN-MINUS (-) | ||
1319 | Switch to the comment end state */ | ||
1320 | $state = 'comment end'; | ||
1321 | |||
1322 | } elseif($char === false) { | ||
1323 | /* EOF | ||
1324 | Parse error. Emit the comment token. Reconsume the EOF character | ||
1325 | in the data state. */ | ||
1326 | $this->emitToken(array( | ||
1327 | 'type' => self::PARSEERROR, | ||
1328 | 'data' => 'eof-in-comment-end-dash' | ||
1329 | )); | ||
1330 | $this->emitToken($this->token); | ||
1331 | $this->stream->unget(); | ||
1332 | $state = 'data'; | ||
1333 | |||
1334 | } else { | ||
1335 | /* Anything else | ||
1336 | Append a U+002D HYPHEN-MINUS (-) character and the input | ||
1337 | character to the comment token's data. Switch to the comment state. */ | ||
1338 | $this->token['data'] .= '-'.$char; | ||
1339 | $state = 'comment'; | ||
1340 | } | ||
1341 | break; | ||
1342 | |||
1343 | case 'comment end': | ||
1344 | /* Consume the next input character: */ | ||
1345 | $char = $this->stream->char(); | ||
1346 | |||
1347 | if($char === '>') { | ||
1348 | /* U+003E GREATER-THAN SIGN (>) | ||
1349 | Emit the comment token. Switch to the data state. */ | ||
1350 | $this->emitToken($this->token); | ||
1351 | $state = 'data'; | ||
1352 | |||
1353 | } elseif($char === '-') { | ||
1354 | /* U+002D HYPHEN-MINUS (-) | ||
1355 | Parse error. Append a U+002D HYPHEN-MINUS (-) character | ||
1356 | to the comment token's data. Stay in the comment end | ||
1357 | state. */ | ||
1358 | $this->emitToken(array( | ||
1359 | 'type' => self::PARSEERROR, | ||
1360 | 'data' => 'unexpected-dash-after-double-dash-in-comment' | ||
1361 | )); | ||
1362 | $this->token['data'] .= '-'; | ||
1363 | |||
1364 | } elseif($char === "\t" || $char === "\n" || $char === "\x0a" || $char === ' ') { | ||
1365 | $this->emitToken(array( | ||
1366 | 'type' => self::PARSEERROR, | ||
1367 | 'data' => 'unexpected-space-after-double-dash-in-comment' | ||
1368 | )); | ||
1369 | $this->token['data'] .= '--' . $char; | ||
1370 | $state = 'comment end space'; | ||
1371 | |||
1372 | } elseif($char === '!') { | ||
1373 | $this->emitToken(array( | ||
1374 | 'type' => self::PARSEERROR, | ||
1375 | 'data' => 'unexpected-bang-after-double-dash-in-comment' | ||
1376 | )); | ||
1377 | $state = 'comment end bang'; | ||
1378 | |||
1379 | } elseif($char === false) { | ||
1380 | /* EOF | ||
1381 | Parse error. Emit the comment token. Reconsume the | ||
1382 | EOF character in the data state. */ | ||
1383 | $this->emitToken(array( | ||
1384 | 'type' => self::PARSEERROR, | ||
1385 | 'data' => 'eof-in-comment-double-dash' | ||
1386 | )); | ||
1387 | $this->emitToken($this->token); | ||
1388 | $this->stream->unget(); | ||
1389 | $state = 'data'; | ||
1390 | |||
1391 | } else { | ||
1392 | /* Anything else | ||
1393 | Parse error. Append two U+002D HYPHEN-MINUS (-) | ||
1394 | characters and the input character to the comment token's | ||
1395 | data. Switch to the comment state. */ | ||
1396 | $this->emitToken(array( | ||
1397 | 'type' => self::PARSEERROR, | ||
1398 | 'data' => 'unexpected-char-in-comment' | ||
1399 | )); | ||
1400 | $this->token['data'] .= '--'.$char; | ||
1401 | $state = 'comment'; | ||
1402 | } | ||
1403 | break; | ||
1404 | |||
1405 | case 'comment end bang': | ||
1406 | $char = $this->stream->char(); | ||
1407 | if ($char === '>') { | ||
1408 | $this->emitToken($this->token); | ||
1409 | $state = 'data'; | ||
1410 | } elseif ($char === "-") { | ||
1411 | $this->token['data'] .= '--!'; | ||
1412 | $state = 'comment end dash'; | ||
1413 | } elseif ($char === false) { | ||
1414 | $this->emitToken(array( | ||
1415 | 'type' => self::PARSEERROR, | ||
1416 | 'data' => 'eof-in-comment-end-bang' | ||
1417 | )); | ||
1418 | $this->emitToken($this->token); | ||
1419 | $this->stream->unget(); | ||
1420 | $state = 'data'; | ||
1421 | } else { | ||
1422 | $this->token['data'] .= '--!' . $char; | ||
1423 | $state = 'comment'; | ||
1424 | } | ||
1425 | break; | ||
1426 | |||
1427 | case 'comment end space': | ||
1428 | $char = $this->stream->char(); | ||
1429 | if ($char === '>') { | ||
1430 | $this->emitToken($this->token); | ||
1431 | $state = 'data'; | ||
1432 | } elseif ($char === '-') { | ||
1433 | $state = 'comment end dash'; | ||
1434 | } elseif ($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1435 | $this->token['data'] .= $char; | ||
1436 | } elseif ($char === false) { | ||
1437 | $this->emitToken(array( | ||
1438 | 'type' => self::PARSEERROR, | ||
1439 | 'data' => 'unexpected-eof-in-comment-end-space', | ||
1440 | )); | ||
1441 | $this->emitToken($this->token); | ||
1442 | $this->stream->unget(); | ||
1443 | $state = 'data'; | ||
1444 | } else { | ||
1445 | $this->token['data'] .= $char; | ||
1446 | $state = 'comment'; | ||
1447 | } | ||
1448 | break; | ||
1449 | |||
1450 | case 'DOCTYPE': | ||
1451 | /* Consume the next input character: */ | ||
1452 | $char = $this->stream->char(); | ||
1453 | |||
1454 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1455 | /* U+0009 CHARACTER TABULATION | ||
1456 | U+000A LINE FEED (LF) | ||
1457 | U+000C FORM FEED (FF) | ||
1458 | U+0020 SPACE | ||
1459 | Switch to the before DOCTYPE name state. */ | ||
1460 | $state = 'before DOCTYPE name'; | ||
1461 | |||
1462 | } elseif($char === false) { | ||
1463 | /* EOF | ||
1464 | Parse error. Create a new DOCTYPE token. Set its | ||
1465 | force-quirks flag to on. Emit the token. Reconsume the | ||
1466 | EOF character in the data state. */ | ||
1467 | $this->emitToken(array( | ||
1468 | 'type' => self::PARSEERROR, | ||
1469 | 'data' => 'need-space-after-doctype-but-got-eof' | ||
1470 | )); | ||
1471 | $this->emitToken(array( | ||
1472 | 'name' => '', | ||
1473 | 'type' => self::DOCTYPE, | ||
1474 | 'force-quirks' => true, | ||
1475 | 'error' => true | ||
1476 | )); | ||
1477 | $this->stream->unget(); | ||
1478 | $state = 'data'; | ||
1479 | |||
1480 | } else { | ||
1481 | /* Anything else | ||
1482 | Parse error. Reconsume the current character in the | ||
1483 | before DOCTYPE name state. */ | ||
1484 | $this->emitToken(array( | ||
1485 | 'type' => self::PARSEERROR, | ||
1486 | 'data' => 'need-space-after-doctype' | ||
1487 | )); | ||
1488 | $this->stream->unget(); | ||
1489 | $state = 'before DOCTYPE name'; | ||
1490 | } | ||
1491 | break; | ||
1492 | |||
1493 | case 'before DOCTYPE name': | ||
1494 | /* Consume the next input character: */ | ||
1495 | $char = $this->stream->char(); | ||
1496 | |||
1497 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1498 | /* U+0009 CHARACTER TABULATION | ||
1499 | U+000A LINE FEED (LF) | ||
1500 | U+000C FORM FEED (FF) | ||
1501 | U+0020 SPACE | ||
1502 | Stay in the before DOCTYPE name state. */ | ||
1503 | |||
1504 | } elseif($char === '>') { | ||
1505 | /* U+003E GREATER-THAN SIGN (>) | ||
1506 | Parse error. Create a new DOCTYPE token. Set its | ||
1507 | force-quirks flag to on. Emit the token. Switch to the | ||
1508 | data state. */ | ||
1509 | $this->emitToken(array( | ||
1510 | 'type' => self::PARSEERROR, | ||
1511 | 'data' => 'expected-doctype-name-but-got-right-bracket' | ||
1512 | )); | ||
1513 | $this->emitToken(array( | ||
1514 | 'name' => '', | ||
1515 | 'type' => self::DOCTYPE, | ||
1516 | 'force-quirks' => true, | ||
1517 | 'error' => true | ||
1518 | )); | ||
1519 | |||
1520 | $state = 'data'; | ||
1521 | |||
1522 | } elseif('A' <= $char && $char <= 'Z') { | ||
1523 | /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z | ||
1524 | Create a new DOCTYPE token. Set the token's name to the | ||
1525 | lowercase version of the input character (add 0x0020 to | ||
1526 | the character's code point). Switch to the DOCTYPE name | ||
1527 | state. */ | ||
1528 | $this->token = array( | ||
1529 | 'name' => strtolower($char), | ||
1530 | 'type' => self::DOCTYPE, | ||
1531 | 'error' => true | ||
1532 | ); | ||
1533 | |||
1534 | $state = 'DOCTYPE name'; | ||
1535 | |||
1536 | } elseif($char === false) { | ||
1537 | /* EOF | ||
1538 | Parse error. Create a new DOCTYPE token. Set its | ||
1539 | force-quirks flag to on. Emit the token. Reconsume the | ||
1540 | EOF character in the data state. */ | ||
1541 | $this->emitToken(array( | ||
1542 | 'type' => self::PARSEERROR, | ||
1543 | 'data' => 'expected-doctype-name-but-got-eof' | ||
1544 | )); | ||
1545 | $this->emitToken(array( | ||
1546 | 'name' => '', | ||
1547 | 'type' => self::DOCTYPE, | ||
1548 | 'force-quirks' => true, | ||
1549 | 'error' => true | ||
1550 | )); | ||
1551 | |||
1552 | $this->stream->unget(); | ||
1553 | $state = 'data'; | ||
1554 | |||
1555 | } else { | ||
1556 | /* Anything else | ||
1557 | Create a new DOCTYPE token. Set the token's name to the | ||
1558 | current input character. Switch to the DOCTYPE name state. */ | ||
1559 | $this->token = array( | ||
1560 | 'name' => $char, | ||
1561 | 'type' => self::DOCTYPE, | ||
1562 | 'error' => true | ||
1563 | ); | ||
1564 | |||
1565 | $state = 'DOCTYPE name'; | ||
1566 | } | ||
1567 | break; | ||
1568 | |||
1569 | case 'DOCTYPE name': | ||
1570 | /* Consume the next input character: */ | ||
1571 | $char = $this->stream->char(); | ||
1572 | |||
1573 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1574 | /* U+0009 CHARACTER TABULATION | ||
1575 | U+000A LINE FEED (LF) | ||
1576 | U+000C FORM FEED (FF) | ||
1577 | U+0020 SPACE | ||
1578 | Switch to the after DOCTYPE name state. */ | ||
1579 | $state = 'after DOCTYPE name'; | ||
1580 | |||
1581 | } elseif($char === '>') { | ||
1582 | /* U+003E GREATER-THAN SIGN (>) | ||
1583 | Emit the current DOCTYPE token. Switch to the data state. */ | ||
1584 | $this->emitToken($this->token); | ||
1585 | $state = 'data'; | ||
1586 | |||
1587 | } elseif('A' <= $char && $char <= 'Z') { | ||
1588 | /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z | ||
1589 | Append the lowercase version of the input character | ||
1590 | (add 0x0020 to the character's code point) to the current | ||
1591 | DOCTYPE token's name. Stay in the DOCTYPE name state. */ | ||
1592 | $this->token['name'] .= strtolower($char); | ||
1593 | |||
1594 | } elseif($char === false) { | ||
1595 | /* EOF | ||
1596 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1597 | to on. Emit that DOCTYPE token. Reconsume the EOF | ||
1598 | character in the data state. */ | ||
1599 | $this->emitToken(array( | ||
1600 | 'type' => self::PARSEERROR, | ||
1601 | 'data' => 'eof-in-doctype-name' | ||
1602 | )); | ||
1603 | $this->token['force-quirks'] = true; | ||
1604 | $this->emitToken($this->token); | ||
1605 | $this->stream->unget(); | ||
1606 | $state = 'data'; | ||
1607 | |||
1608 | } else { | ||
1609 | /* Anything else | ||
1610 | Append the current input character to the current | ||
1611 | DOCTYPE token's name. Stay in the DOCTYPE name state. */ | ||
1612 | $this->token['name'] .= $char; | ||
1613 | } | ||
1614 | |||
1615 | // XXX this is probably some sort of quirks mode designation, | ||
1616 | // check tree-builder to be sure. In general 'error' needs | ||
1617 | // to be specc'ified, this probably means removing it at the end | ||
1618 | $this->token['error'] = ($this->token['name'] === 'HTML') | ||
1619 | ? false | ||
1620 | : true; | ||
1621 | break; | ||
1622 | |||
1623 | case 'after DOCTYPE name': | ||
1624 | /* Consume the next input character: */ | ||
1625 | $char = $this->stream->char(); | ||
1626 | |||
1627 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1628 | /* U+0009 CHARACTER TABULATION | ||
1629 | U+000A LINE FEED (LF) | ||
1630 | U+000C FORM FEED (FF) | ||
1631 | U+0020 SPACE | ||
1632 | Stay in the after DOCTYPE name state. */ | ||
1633 | |||
1634 | } elseif($char === '>') { | ||
1635 | /* U+003E GREATER-THAN SIGN (>) | ||
1636 | Emit the current DOCTYPE token. Switch to the data state. */ | ||
1637 | $this->emitToken($this->token); | ||
1638 | $state = 'data'; | ||
1639 | |||
1640 | } elseif($char === false) { | ||
1641 | /* EOF | ||
1642 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1643 | to on. Emit that DOCTYPE token. Reconsume the EOF | ||
1644 | character in the data state. */ | ||
1645 | $this->emitToken(array( | ||
1646 | 'type' => self::PARSEERROR, | ||
1647 | 'data' => 'eof-in-doctype' | ||
1648 | )); | ||
1649 | $this->token['force-quirks'] = true; | ||
1650 | $this->emitToken($this->token); | ||
1651 | $this->stream->unget(); | ||
1652 | $state = 'data'; | ||
1653 | |||
1654 | } else { | ||
1655 | /* Anything else */ | ||
1656 | |||
1657 | $nextSix = strtoupper($char . $this->stream->charsWhile(self::ALPHA, 5)); | ||
1658 | if ($nextSix === 'PUBLIC') { | ||
1659 | /* If the next six characters are an ASCII | ||
1660 | case-insensitive match for the word "PUBLIC", then | ||
1661 | consume those characters and switch to the before | ||
1662 | DOCTYPE public identifier state. */ | ||
1663 | $state = 'before DOCTYPE public identifier'; | ||
1664 | |||
1665 | } elseif ($nextSix === 'SYSTEM') { | ||
1666 | /* Otherwise, if the next six characters are an ASCII | ||
1667 | case-insensitive match for the word "SYSTEM", then | ||
1668 | consume those characters and switch to the before | ||
1669 | DOCTYPE system identifier state. */ | ||
1670 | $state = 'before DOCTYPE system identifier'; | ||
1671 | |||
1672 | } else { | ||
1673 | /* Otherwise, this is the parse error. Set the DOCTYPE | ||
1674 | token's force-quirks flag to on. Switch to the bogus | ||
1675 | DOCTYPE state. */ | ||
1676 | $this->emitToken(array( | ||
1677 | 'type' => self::PARSEERROR, | ||
1678 | 'data' => 'expected-space-or-right-bracket-in-doctype' | ||
1679 | )); | ||
1680 | $this->token['force-quirks'] = true; | ||
1681 | $this->token['error'] = true; | ||
1682 | $state = 'bogus DOCTYPE'; | ||
1683 | } | ||
1684 | } | ||
1685 | break; | ||
1686 | |||
1687 | case 'before DOCTYPE public identifier': | ||
1688 | /* Consume the next input character: */ | ||
1689 | $char = $this->stream->char(); | ||
1690 | |||
1691 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1692 | /* U+0009 CHARACTER TABULATION | ||
1693 | U+000A LINE FEED (LF) | ||
1694 | U+000C FORM FEED (FF) | ||
1695 | U+0020 SPACE | ||
1696 | Stay in the before DOCTYPE public identifier state. */ | ||
1697 | } elseif ($char === '"') { | ||
1698 | /* U+0022 QUOTATION MARK (") | ||
1699 | Set the DOCTYPE token's public identifier to the empty | ||
1700 | string (not missing), then switch to the DOCTYPE public | ||
1701 | identifier (double-quoted) state. */ | ||
1702 | $this->token['public'] = ''; | ||
1703 | $state = 'DOCTYPE public identifier (double-quoted)'; | ||
1704 | } elseif ($char === "'") { | ||
1705 | /* U+0027 APOSTROPHE (') | ||
1706 | Set the DOCTYPE token's public identifier to the empty | ||
1707 | string (not missing), then switch to the DOCTYPE public | ||
1708 | identifier (single-quoted) state. */ | ||
1709 | $this->token['public'] = ''; | ||
1710 | $state = 'DOCTYPE public identifier (single-quoted)'; | ||
1711 | } elseif ($char === '>') { | ||
1712 | /* Parse error. Set the DOCTYPE token's force-quirks flag | ||
1713 | to on. Emit that DOCTYPE token. Switch to the data state. */ | ||
1714 | $this->emitToken(array( | ||
1715 | 'type' => self::PARSEERROR, | ||
1716 | 'data' => 'unexpected-end-of-doctype' | ||
1717 | )); | ||
1718 | $this->token['force-quirks'] = true; | ||
1719 | $this->emitToken($this->token); | ||
1720 | $state = 'data'; | ||
1721 | } elseif ($char === false) { | ||
1722 | /* Parse error. Set the DOCTYPE token's force-quirks | ||
1723 | flag to on. Emit that DOCTYPE token. Reconsume the EOF | ||
1724 | character in the data state. */ | ||
1725 | $this->emitToken(array( | ||
1726 | 'type' => self::PARSEERROR, | ||
1727 | 'data' => 'eof-in-doctype' | ||
1728 | )); | ||
1729 | $this->token['force-quirks'] = true; | ||
1730 | $this->emitToken($this->token); | ||
1731 | $this->stream->unget(); | ||
1732 | $state = 'data'; | ||
1733 | } else { | ||
1734 | /* Parse error. Set the DOCTYPE token's force-quirks flag | ||
1735 | to on. Switch to the bogus DOCTYPE state. */ | ||
1736 | $this->emitToken(array( | ||
1737 | 'type' => self::PARSEERROR, | ||
1738 | 'data' => 'unexpected-char-in-doctype' | ||
1739 | )); | ||
1740 | $this->token['force-quirks'] = true; | ||
1741 | $state = 'bogus DOCTYPE'; | ||
1742 | } | ||
1743 | break; | ||
1744 | |||
1745 | case 'DOCTYPE public identifier (double-quoted)': | ||
1746 | /* Consume the next input character: */ | ||
1747 | $char = $this->stream->char(); | ||
1748 | |||
1749 | if ($char === '"') { | ||
1750 | /* U+0022 QUOTATION MARK (") | ||
1751 | Switch to the after DOCTYPE public identifier state. */ | ||
1752 | $state = 'after DOCTYPE public identifier'; | ||
1753 | } elseif ($char === '>') { | ||
1754 | /* U+003E GREATER-THAN SIGN (>) | ||
1755 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1756 | to on. Emit that DOCTYPE token. Switch to the data state. */ | ||
1757 | $this->emitToken(array( | ||
1758 | 'type' => self::PARSEERROR, | ||
1759 | 'data' => 'unexpected-end-of-doctype' | ||
1760 | )); | ||
1761 | $this->token['force-quirks'] = true; | ||
1762 | $this->emitToken($this->token); | ||
1763 | $state = 'data'; | ||
1764 | } elseif ($char === false) { | ||
1765 | /* EOF | ||
1766 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1767 | to on. Emit that DOCTYPE token. Reconsume the EOF | ||
1768 | character in the data state. */ | ||
1769 | $this->emitToken(array( | ||
1770 | 'type' => self::PARSEERROR, | ||
1771 | 'data' => 'eof-in-doctype' | ||
1772 | )); | ||
1773 | $this->token['force-quirks'] = true; | ||
1774 | $this->emitToken($this->token); | ||
1775 | $this->stream->unget(); | ||
1776 | $state = 'data'; | ||
1777 | } else { | ||
1778 | /* Anything else | ||
1779 | Append the current input character to the current | ||
1780 | DOCTYPE token's public identifier. Stay in the DOCTYPE | ||
1781 | public identifier (double-quoted) state. */ | ||
1782 | $this->token['public'] .= $char; | ||
1783 | } | ||
1784 | break; | ||
1785 | |||
1786 | case 'DOCTYPE public identifier (single-quoted)': | ||
1787 | /* Consume the next input character: */ | ||
1788 | $char = $this->stream->char(); | ||
1789 | |||
1790 | if ($char === "'") { | ||
1791 | /* U+0027 APOSTROPHE (') | ||
1792 | Switch to the after DOCTYPE public identifier state. */ | ||
1793 | $state = 'after DOCTYPE public identifier'; | ||
1794 | } elseif ($char === '>') { | ||
1795 | /* U+003E GREATER-THAN SIGN (>) | ||
1796 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1797 | to on. Emit that DOCTYPE token. Switch to the data state. */ | ||
1798 | $this->emitToken(array( | ||
1799 | 'type' => self::PARSEERROR, | ||
1800 | 'data' => 'unexpected-end-of-doctype' | ||
1801 | )); | ||
1802 | $this->token['force-quirks'] = true; | ||
1803 | $this->emitToken($this->token); | ||
1804 | $state = 'data'; | ||
1805 | } elseif ($char === false) { | ||
1806 | /* EOF | ||
1807 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1808 | to on. Emit that DOCTYPE token. Reconsume the EOF | ||
1809 | character in the data state. */ | ||
1810 | $this->emitToken(array( | ||
1811 | 'type' => self::PARSEERROR, | ||
1812 | 'data' => 'eof-in-doctype' | ||
1813 | )); | ||
1814 | $this->token['force-quirks'] = true; | ||
1815 | $this->emitToken($this->token); | ||
1816 | $this->stream->unget(); | ||
1817 | $state = 'data'; | ||
1818 | } else { | ||
1819 | /* Anything else | ||
1820 | Append the current input character to the current | ||
1821 | DOCTYPE token's public identifier. Stay in the DOCTYPE | ||
1822 | public identifier (double-quoted) state. */ | ||
1823 | $this->token['public'] .= $char; | ||
1824 | } | ||
1825 | break; | ||
1826 | |||
1827 | case 'after DOCTYPE public identifier': | ||
1828 | /* Consume the next input character: */ | ||
1829 | $char = $this->stream->char(); | ||
1830 | |||
1831 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1832 | /* U+0009 CHARACTER TABULATION | ||
1833 | U+000A LINE FEED (LF) | ||
1834 | U+000C FORM FEED (FF) | ||
1835 | U+0020 SPACE | ||
1836 | Stay in the after DOCTYPE public identifier state. */ | ||
1837 | } elseif ($char === '"') { | ||
1838 | /* U+0022 QUOTATION MARK (") | ||
1839 | Set the DOCTYPE token's system identifier to the | ||
1840 | empty string (not missing), then switch to the DOCTYPE | ||
1841 | system identifier (double-quoted) state. */ | ||
1842 | $this->token['system'] = ''; | ||
1843 | $state = 'DOCTYPE system identifier (double-quoted)'; | ||
1844 | } elseif ($char === "'") { | ||
1845 | /* U+0027 APOSTROPHE (') | ||
1846 | Set the DOCTYPE token's system identifier to the | ||
1847 | empty string (not missing), then switch to the DOCTYPE | ||
1848 | system identifier (single-quoted) state. */ | ||
1849 | $this->token['system'] = ''; | ||
1850 | $state = 'DOCTYPE system identifier (single-quoted)'; | ||
1851 | } elseif ($char === '>') { | ||
1852 | /* U+003E GREATER-THAN SIGN (>) | ||
1853 | Emit the current DOCTYPE token. Switch to the data state. */ | ||
1854 | $this->emitToken($this->token); | ||
1855 | $state = 'data'; | ||
1856 | } elseif ($char === false) { | ||
1857 | /* Parse error. Set the DOCTYPE token's force-quirks | ||
1858 | flag to on. Emit that DOCTYPE token. Reconsume the EOF | ||
1859 | character in the data state. */ | ||
1860 | $this->emitToken(array( | ||
1861 | 'type' => self::PARSEERROR, | ||
1862 | 'data' => 'eof-in-doctype' | ||
1863 | )); | ||
1864 | $this->token['force-quirks'] = true; | ||
1865 | $this->emitToken($this->token); | ||
1866 | $this->stream->unget(); | ||
1867 | $state = 'data'; | ||
1868 | } else { | ||
1869 | /* Anything else | ||
1870 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1871 | to on. Switch to the bogus DOCTYPE state. */ | ||
1872 | $this->emitToken(array( | ||
1873 | 'type' => self::PARSEERROR, | ||
1874 | 'data' => 'unexpected-char-in-doctype' | ||
1875 | )); | ||
1876 | $this->token['force-quirks'] = true; | ||
1877 | $state = 'bogus DOCTYPE'; | ||
1878 | } | ||
1879 | break; | ||
1880 | |||
1881 | case 'before DOCTYPE system identifier': | ||
1882 | /* Consume the next input character: */ | ||
1883 | $char = $this->stream->char(); | ||
1884 | |||
1885 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
1886 | /* U+0009 CHARACTER TABULATION | ||
1887 | U+000A LINE FEED (LF) | ||
1888 | U+000C FORM FEED (FF) | ||
1889 | U+0020 SPACE | ||
1890 | Stay in the before DOCTYPE system identifier state. */ | ||
1891 | } elseif ($char === '"') { | ||
1892 | /* U+0022 QUOTATION MARK (") | ||
1893 | Set the DOCTYPE token's system identifier to the empty | ||
1894 | string (not missing), then switch to the DOCTYPE system | ||
1895 | identifier (double-quoted) state. */ | ||
1896 | $this->token['system'] = ''; | ||
1897 | $state = 'DOCTYPE system identifier (double-quoted)'; | ||
1898 | } elseif ($char === "'") { | ||
1899 | /* U+0027 APOSTROPHE (') | ||
1900 | Set the DOCTYPE token's system identifier to the empty | ||
1901 | string (not missing), then switch to the DOCTYPE system | ||
1902 | identifier (single-quoted) state. */ | ||
1903 | $this->token['system'] = ''; | ||
1904 | $state = 'DOCTYPE system identifier (single-quoted)'; | ||
1905 | } elseif ($char === '>') { | ||
1906 | /* Parse error. Set the DOCTYPE token's force-quirks flag | ||
1907 | to on. Emit that DOCTYPE token. Switch to the data state. */ | ||
1908 | $this->emitToken(array( | ||
1909 | 'type' => self::PARSEERROR, | ||
1910 | 'data' => 'unexpected-char-in-doctype' | ||
1911 | )); | ||
1912 | $this->token['force-quirks'] = true; | ||
1913 | $this->emitToken($this->token); | ||
1914 | $state = 'data'; | ||
1915 | } elseif ($char === false) { | ||
1916 | /* Parse error. Set the DOCTYPE token's force-quirks | ||
1917 | flag to on. Emit that DOCTYPE token. Reconsume the EOF | ||
1918 | character in the data state. */ | ||
1919 | $this->emitToken(array( | ||
1920 | 'type' => self::PARSEERROR, | ||
1921 | 'data' => 'eof-in-doctype' | ||
1922 | )); | ||
1923 | $this->token['force-quirks'] = true; | ||
1924 | $this->emitToken($this->token); | ||
1925 | $this->stream->unget(); | ||
1926 | $state = 'data'; | ||
1927 | } else { | ||
1928 | /* Parse error. Set the DOCTYPE token's force-quirks flag | ||
1929 | to on. Switch to the bogus DOCTYPE state. */ | ||
1930 | $this->emitToken(array( | ||
1931 | 'type' => self::PARSEERROR, | ||
1932 | 'data' => 'unexpected-char-in-doctype' | ||
1933 | )); | ||
1934 | $this->token['force-quirks'] = true; | ||
1935 | $state = 'bogus DOCTYPE'; | ||
1936 | } | ||
1937 | break; | ||
1938 | |||
1939 | case 'DOCTYPE system identifier (double-quoted)': | ||
1940 | /* Consume the next input character: */ | ||
1941 | $char = $this->stream->char(); | ||
1942 | |||
1943 | if ($char === '"') { | ||
1944 | /* U+0022 QUOTATION MARK (") | ||
1945 | Switch to the after DOCTYPE system identifier state. */ | ||
1946 | $state = 'after DOCTYPE system identifier'; | ||
1947 | } elseif ($char === '>') { | ||
1948 | /* U+003E GREATER-THAN SIGN (>) | ||
1949 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1950 | to on. Emit that DOCTYPE token. Switch to the data state. */ | ||
1951 | $this->emitToken(array( | ||
1952 | 'type' => self::PARSEERROR, | ||
1953 | 'data' => 'unexpected-end-of-doctype' | ||
1954 | )); | ||
1955 | $this->token['force-quirks'] = true; | ||
1956 | $this->emitToken($this->token); | ||
1957 | $state = 'data'; | ||
1958 | } elseif ($char === false) { | ||
1959 | /* EOF | ||
1960 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1961 | to on. Emit that DOCTYPE token. Reconsume the EOF | ||
1962 | character in the data state. */ | ||
1963 | $this->emitToken(array( | ||
1964 | 'type' => self::PARSEERROR, | ||
1965 | 'data' => 'eof-in-doctype' | ||
1966 | )); | ||
1967 | $this->token['force-quirks'] = true; | ||
1968 | $this->emitToken($this->token); | ||
1969 | $this->stream->unget(); | ||
1970 | $state = 'data'; | ||
1971 | } else { | ||
1972 | /* Anything else | ||
1973 | Append the current input character to the current | ||
1974 | DOCTYPE token's system identifier. Stay in the DOCTYPE | ||
1975 | system identifier (double-quoted) state. */ | ||
1976 | $this->token['system'] .= $char; | ||
1977 | } | ||
1978 | break; | ||
1979 | |||
1980 | case 'DOCTYPE system identifier (single-quoted)': | ||
1981 | /* Consume the next input character: */ | ||
1982 | $char = $this->stream->char(); | ||
1983 | |||
1984 | if ($char === "'") { | ||
1985 | /* U+0027 APOSTROPHE (') | ||
1986 | Switch to the after DOCTYPE system identifier state. */ | ||
1987 | $state = 'after DOCTYPE system identifier'; | ||
1988 | } elseif ($char === '>') { | ||
1989 | /* U+003E GREATER-THAN SIGN (>) | ||
1990 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
1991 | to on. Emit that DOCTYPE token. Switch to the data state. */ | ||
1992 | $this->emitToken(array( | ||
1993 | 'type' => self::PARSEERROR, | ||
1994 | 'data' => 'unexpected-end-of-doctype' | ||
1995 | )); | ||
1996 | $this->token['force-quirks'] = true; | ||
1997 | $this->emitToken($this->token); | ||
1998 | $state = 'data'; | ||
1999 | } elseif ($char === false) { | ||
2000 | /* EOF | ||
2001 | Parse error. Set the DOCTYPE token's force-quirks flag | ||
2002 | to on. Emit that DOCTYPE token. Reconsume the EOF | ||
2003 | character in the data state. */ | ||
2004 | $this->emitToken(array( | ||
2005 | 'type' => self::PARSEERROR, | ||
2006 | 'data' => 'eof-in-doctype' | ||
2007 | )); | ||
2008 | $this->token['force-quirks'] = true; | ||
2009 | $this->emitToken($this->token); | ||
2010 | $this->stream->unget(); | ||
2011 | $state = 'data'; | ||
2012 | } else { | ||
2013 | /* Anything else | ||
2014 | Append the current input character to the current | ||
2015 | DOCTYPE token's system identifier. Stay in the DOCTYPE | ||
2016 | system identifier (double-quoted) state. */ | ||
2017 | $this->token['system'] .= $char; | ||
2018 | } | ||
2019 | break; | ||
2020 | |||
2021 | case 'after DOCTYPE system identifier': | ||
2022 | /* Consume the next input character: */ | ||
2023 | $char = $this->stream->char(); | ||
2024 | |||
2025 | if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { | ||
2026 | /* U+0009 CHARACTER TABULATION | ||
2027 | U+000A LINE FEED (LF) | ||
2028 | U+000C FORM FEED (FF) | ||
2029 | U+0020 SPACE | ||
2030 | Stay in the after DOCTYPE system identifier state. */ | ||
2031 | } elseif ($char === '>') { | ||
2032 | /* U+003E GREATER-THAN SIGN (>) | ||
2033 | Emit the current DOCTYPE token. Switch to the data state. */ | ||
2034 | $this->emitToken($this->token); | ||
2035 | $state = 'data'; | ||
2036 | } elseif ($char === false) { | ||
2037 | /* Parse error. Set the DOCTYPE token's force-quirks | ||
2038 | flag to on. Emit that DOCTYPE token. Reconsume the EOF | ||
2039 | character in the data state. */ | ||
2040 | $this->emitToken(array( | ||
2041 | 'type' => self::PARSEERROR, | ||
2042 | 'data' => 'eof-in-doctype' | ||
2043 | )); | ||
2044 | $this->token['force-quirks'] = true; | ||
2045 | $this->emitToken($this->token); | ||
2046 | $this->stream->unget(); | ||
2047 | $state = 'data'; | ||
2048 | } else { | ||
2049 | /* Anything else | ||
2050 | Parse error. Switch to the bogus DOCTYPE state. | ||
2051 | (This does not set the DOCTYPE token's force-quirks | ||
2052 | flag to on.) */ | ||
2053 | $this->emitToken(array( | ||
2054 | 'type' => self::PARSEERROR, | ||
2055 | 'data' => 'unexpected-char-in-doctype' | ||
2056 | )); | ||
2057 | $state = 'bogus DOCTYPE'; | ||
2058 | } | ||
2059 | break; | ||
2060 | |||
2061 | case 'bogus DOCTYPE': | ||
2062 | /* Consume the next input character: */ | ||
2063 | $char = $this->stream->char(); | ||
2064 | |||
2065 | if ($char === '>') { | ||
2066 | /* U+003E GREATER-THAN SIGN (>) | ||
2067 | Emit the DOCTYPE token. Switch to the data state. */ | ||
2068 | $this->emitToken($this->token); | ||
2069 | $state = 'data'; | ||
2070 | |||
2071 | } elseif($char === false) { | ||
2072 | /* EOF | ||
2073 | Emit the DOCTYPE token. Reconsume the EOF character in | ||
2074 | the data state. */ | ||
2075 | $this->emitToken($this->token); | ||
2076 | $this->stream->unget(); | ||
2077 | $state = 'data'; | ||
2078 | |||
2079 | } else { | ||
2080 | /* Anything else | ||
2081 | Stay in the bogus DOCTYPE state. */ | ||
2082 | } | ||
2083 | break; | ||
2084 | |||
2085 | // case 'cdataSection': | ||
2086 | |||
2087 | } | ||
2088 | } | ||
2089 | } | ||
2090 | |||
2091 | /** | ||
2092 | * Returns a serialized representation of the tree. | ||
2093 | */ | ||
2094 | public function save() { | ||
2095 | return $this->tree->save(); | ||
2096 | } | ||
2097 | |||
2098 | /** | ||
2099 | * Returns the input stream. | ||
2100 | */ | ||
2101 | public function stream() { | ||
2102 | return $this->stream; | ||
2103 | } | ||
2104 | |||
2105 | private function consumeCharacterReference($allowed = false, $inattr = false) { | ||
2106 | // This goes quite far against spec, and is far closer to the Python | ||
2107 | // impl., mainly because we don't do the large unconsuming the spec | ||
2108 | // requires. | ||
2109 | |||
2110 | // All consumed characters. | ||
2111 | $chars = $this->stream->char(); | ||
2112 | |||
2113 | /* This section defines how to consume a character | ||
2114 | reference. This definition is used when parsing character | ||
2115 | references in text and in attributes. | ||
2116 | |||
2117 | The behavior depends on the identity of the next character | ||
2118 | (the one immediately after the U+0026 AMPERSAND character): */ | ||
2119 | |||
2120 | if ( | ||
2121 | $chars[0] === "\x09" || | ||
2122 | $chars[0] === "\x0A" || | ||
2123 | $chars[0] === "\x0C" || | ||
2124 | $chars[0] === "\x20" || | ||
2125 | $chars[0] === '<' || | ||
2126 | $chars[0] === '&' || | ||
2127 | $chars === false || | ||
2128 | $chars[0] === $allowed | ||
2129 | ) { | ||
2130 | /* U+0009 CHARACTER TABULATION | ||
2131 | U+000A LINE FEED (LF) | ||
2132 | U+000C FORM FEED (FF) | ||
2133 | U+0020 SPACE | ||
2134 | U+003C LESS-THAN SIGN | ||
2135 | U+0026 AMPERSAND | ||
2136 | EOF | ||
2137 | The additional allowed character, if there is one | ||
2138 | Not a character reference. No characters are consumed, | ||
2139 | and nothing is returned. (This is not an error, either.) */ | ||
2140 | // We already consumed, so unconsume. | ||
2141 | $this->stream->unget(); | ||
2142 | return '&'; | ||
2143 | } elseif ($chars[0] === '#') { | ||
2144 | /* Consume the U+0023 NUMBER SIGN. */ | ||
2145 | // Um, yeah, we already did that. | ||
2146 | /* The behavior further depends on the character after | ||
2147 | the U+0023 NUMBER SIGN: */ | ||
2148 | $chars .= $this->stream->char(); | ||
2149 | if (isset($chars[1]) && ($chars[1] === 'x' || $chars[1] === 'X')) { | ||
2150 | /* U+0078 LATIN SMALL LETTER X | ||
2151 | U+0058 LATIN CAPITAL LETTER X */ | ||
2152 | /* Consume the X. */ | ||
2153 | // Um, yeah, we already did that. | ||
2154 | /* Follow the steps below, but using the range of | ||
2155 | characters U+0030 DIGIT ZERO through to U+0039 DIGIT | ||
2156 | NINE, U+0061 LATIN SMALL LETTER A through to U+0066 | ||
2157 | LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER | ||
2158 | A, through to U+0046 LATIN CAPITAL LETTER F (in other | ||
2159 | words, 0123456789, ABCDEF, abcdef). */ | ||
2160 | $char_class = self::HEX; | ||
2161 | /* When it comes to interpreting the | ||
2162 | number, interpret it as a hexadecimal number. */ | ||
2163 | $hex = true; | ||
2164 | } else { | ||
2165 | /* Anything else */ | ||
2166 | // Unconsume because we shouldn't have consumed this. | ||
2167 | $chars = $chars[0]; | ||
2168 | $this->stream->unget(); | ||
2169 | /* Follow the steps below, but using the range of | ||
2170 | characters U+0030 DIGIT ZERO through to U+0039 DIGIT | ||
2171 | NINE (i.e. just 0123456789). */ | ||
2172 | $char_class = self::DIGIT; | ||
2173 | /* When it comes to interpreting the number, | ||
2174 | interpret it as a decimal number. */ | ||
2175 | $hex = false; | ||
2176 | } | ||
2177 | |||
2178 | /* Consume as many characters as match the range of characters given above. */ | ||
2179 | $consumed = $this->stream->charsWhile($char_class); | ||
2180 | if ($consumed === '' || $consumed === false) { | ||
2181 | /* If no characters match the range, then don't consume | ||
2182 | any characters (and unconsume the U+0023 NUMBER SIGN | ||
2183 | character and, if appropriate, the X character). This | ||
2184 | is a parse error; nothing is returned. */ | ||
2185 | $this->emitToken(array( | ||
2186 | 'type' => self::PARSEERROR, | ||
2187 | 'data' => 'expected-numeric-entity' | ||
2188 | )); | ||
2189 | return '&' . $chars; | ||
2190 | } else { | ||
2191 | /* Otherwise, if the next character is a U+003B SEMICOLON, | ||
2192 | consume that too. If it isn't, there is a parse error. */ | ||
2193 | if ($this->stream->char() !== ';') { | ||
2194 | $this->stream->unget(); | ||
2195 | $this->emitToken(array( | ||
2196 | 'type' => self::PARSEERROR, | ||
2197 | 'data' => 'numeric-entity-without-semicolon' | ||
2198 | )); | ||
2199 | } | ||
2200 | |||
2201 | /* If one or more characters match the range, then take | ||
2202 | them all and interpret the string of characters as a number | ||
2203 | (either hexadecimal or decimal as appropriate). */ | ||
2204 | $codepoint = $hex ? hexdec($consumed) : (int) $consumed; | ||
2205 | |||
2206 | /* If that number is one of the numbers in the first column | ||
2207 | of the following table, then this is a parse error. Find the | ||
2208 | row with that number in the first column, and return a | ||
2209 | character token for the Unicode character given in the | ||
2210 | second column of that row. */ | ||
2211 | $new_codepoint = HTML5_Data::getRealCodepoint($codepoint); | ||
2212 | if ($new_codepoint) { | ||
2213 | $this->emitToken(array( | ||
2214 | 'type' => self::PARSEERROR, | ||
2215 | 'data' => 'illegal-windows-1252-entity' | ||
2216 | )); | ||
2217 | return HTML5_Data::utf8chr($new_codepoint); | ||
2218 | } else { | ||
2219 | /* Otherwise, if the number is greater than 0x10FFFF, then | ||
2220 | * this is a parse error. Return a U+FFFD REPLACEMENT | ||
2221 | * CHARACTER. */ | ||
2222 | if ($codepoint > 0x10FFFF) { | ||
2223 | $this->emitToken(array( | ||
2224 | 'type' => self::PARSEERROR, | ||
2225 | 'data' => 'overlong-character-entity' // XXX probably not correct | ||
2226 | )); | ||
2227 | return "\xEF\xBF\xBD"; | ||
2228 | } | ||
2229 | /* Otherwise, return a character token for the Unicode | ||
2230 | * character whose code point is that number. If the | ||
2231 | * number is in the range 0x0001 to 0x0008, 0x000E to | ||
2232 | * 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to | ||
2233 | * 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, | ||
2234 | * 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, | ||
2235 | * 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, | ||
2236 | * 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, | ||
2237 | * 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, | ||
2238 | * 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, | ||
2239 | * or 0x10FFFF, then this is a parse error. */ | ||
2240 | // && has higher precedence than || | ||
2241 | if ( | ||
2242 | $codepoint >= 0x0000 && $codepoint <= 0x0008 || | ||
2243 | $codepoint === 0x000B || | ||
2244 | $codepoint >= 0x000E && $codepoint <= 0x001F || | ||
2245 | $codepoint >= 0x007F && $codepoint <= 0x009F || | ||
2246 | $codepoint >= 0xD800 && $codepoint <= 0xDFFF || | ||
2247 | $codepoint >= 0xFDD0 && $codepoint <= 0xFDEF || | ||
2248 | ($codepoint & 0xFFFE) === 0xFFFE || | ||
2249 | $codepoint == 0x10FFFF || $codepoint == 0x10FFFE | ||
2250 | ) { | ||
2251 | $this->emitToken(array( | ||
2252 | 'type' => self::PARSEERROR, | ||
2253 | 'data' => 'illegal-codepoint-for-numeric-entity' | ||
2254 | )); | ||
2255 | } | ||
2256 | return HTML5_Data::utf8chr($codepoint); | ||
2257 | } | ||
2258 | } | ||
2259 | |||
2260 | } else { | ||
2261 | /* Anything else */ | ||
2262 | |||
2263 | /* Consume the maximum number of characters possible, | ||
2264 | with the consumed characters matching one of the | ||
2265 | identifiers in the first column of the named character | ||
2266 | references table (in a case-sensitive manner). */ | ||
2267 | // What we actually do here is consume as much as we can while it | ||
2268 | // matches the start of one of the identifiers in the first column. | ||
2269 | |||
2270 | $refs = HTML5_Data::getNamedCharacterReferences(); | ||
2271 | |||
2272 | // Get the longest string which is the start of an identifier | ||
2273 | // ($chars) as well as the longest identifier which matches ($id) | ||
2274 | // and its codepoint ($codepoint). | ||
2275 | $codepoint = false; | ||
2276 | $char = $chars; | ||
2277 | while ($char !== false && isset($refs[$char])) { | ||
2278 | $refs = $refs[$char]; | ||
2279 | if (isset($refs['codepoint'])) { | ||
2280 | $id = $chars; | ||
2281 | $codepoint = $refs['codepoint']; | ||
2282 | } | ||
2283 | $chars .= $char = $this->stream->char(); | ||
2284 | } | ||
2285 | |||
2286 | // Unconsume the one character we just took which caused the while | ||
2287 | // statement to fail. This could be anything and could cause state | ||
2288 | // changes (as if it matches the while loop it must be | ||
2289 | // alphanumeric so we can just concat it to whatever we get later). | ||
2290 | $this->stream->unget(); | ||
2291 | if ($char !== false) { | ||
2292 | $chars = substr($chars, 0, -1); | ||
2293 | } | ||
2294 | |||
2295 | /* If no match can be made, then this is a parse error. | ||
2296 | No characters are consumed, and nothing is returned. */ | ||
2297 | if (!$codepoint) { | ||
2298 | $this->emitToken(array( | ||
2299 | 'type' => self::PARSEERROR, | ||
2300 | 'data' => 'expected-named-entity' | ||
2301 | )); | ||
2302 | return '&' . $chars; | ||
2303 | } | ||
2304 | |||
2305 | /* If the last character matched is not a U+003B SEMICOLON | ||
2306 | (;), there is a parse error. */ | ||
2307 | $semicolon = true; | ||
2308 | if (substr($id, -1) !== ';') { | ||
2309 | $this->emitToken(array( | ||
2310 | 'type' => self::PARSEERROR, | ||
2311 | 'data' => 'named-entity-without-semicolon' | ||
2312 | )); | ||
2313 | $semicolon = false; | ||
2314 | } | ||
2315 | |||
2316 | /* If the character reference is being consumed as part of | ||
2317 | an attribute, and the last character matched is not a | ||
2318 | U+003B SEMICOLON (;), and the next character is in the | ||
2319 | range U+0030 DIGIT ZERO to U+0039 DIGIT NINE, U+0041 | ||
2320 | LATIN CAPITAL LETTER A to U+005A LATIN CAPITAL LETTER Z, | ||
2321 | or U+0061 LATIN SMALL LETTER A to U+007A LATIN SMALL LETTER Z, | ||
2322 | then, for historical reasons, all the characters that were | ||
2323 | matched after the U+0026 AMPERSAND (&) must be unconsumed, | ||
2324 | and nothing is returned. */ | ||
2325 | if ($inattr && !$semicolon) { | ||
2326 | // The next character is either the next character in $chars or in the stream. | ||
2327 | if (strlen($chars) > strlen($id)) { | ||
2328 | $next = substr($chars, strlen($id), 1); | ||
2329 | } else { | ||
2330 | $next = $this->stream->char(); | ||
2331 | $this->stream->unget(); | ||
2332 | } | ||
2333 | if ( | ||
2334 | '0' <= $next && $next <= '9' || | ||
2335 | 'A' <= $next && $next <= 'Z' || | ||
2336 | 'a' <= $next && $next <= 'z' | ||
2337 | ) { | ||
2338 | return '&' . $chars; | ||
2339 | } | ||
2340 | } | ||
2341 | |||
2342 | /* Otherwise, return a character token for the character | ||
2343 | corresponding to the character reference name (as given | ||
2344 | by the second column of the named character references table). */ | ||
2345 | return HTML5_Data::utf8chr($codepoint) . substr($chars, strlen($id)); | ||
2346 | } | ||
2347 | } | ||
2348 | |||
2349 | private function characterReferenceInAttributeValue($allowed = false) { | ||
2350 | /* Attempt to consume a character reference. */ | ||
2351 | $entity = $this->consumeCharacterReference($allowed, true); | ||
2352 | |||
2353 | /* If nothing is returned, append a U+0026 AMPERSAND | ||
2354 | character to the current attribute's value. | ||
2355 | |||
2356 | Otherwise, append the returned character token to the | ||
2357 | current attribute's value. */ | ||
2358 | $char = (!$entity) | ||
2359 | ? '&' | ||
2360 | : $entity; | ||
2361 | |||
2362 | $last = count($this->token['attr']) - 1; | ||
2363 | $this->token['attr'][$last]['value'] .= $char; | ||
2364 | |||
2365 | /* Finally, switch back to the attribute value state that you | ||
2366 | were in when were switched into this state. */ | ||
2367 | } | ||
2368 | |||
2369 | /** | ||
2370 | * Emits a token, passing it on to the tree builder. | ||
2371 | */ | ||
2372 | protected function emitToken($token, $checkStream = true, $dry = false) { | ||
2373 | if ($checkStream) { | ||
2374 | // Emit errors from input stream. | ||
2375 | while ($this->stream->errors) { | ||
2376 | $this->emitToken(array_shift($this->stream->errors), false); | ||
2377 | } | ||
2378 | } | ||
2379 | if($token['type'] === self::ENDTAG && !empty($token['attr'])) { | ||
2380 | for ($i = 0; $i < count($token['attr']); $i++) { | ||
2381 | $this->emitToken(array( | ||
2382 | 'type' => self::PARSEERROR, | ||
2383 | 'data' => 'attributes-in-end-tag' | ||
2384 | )); | ||
2385 | } | ||
2386 | } | ||
2387 | if($token['type'] === self::ENDTAG && !empty($token['self-closing'])) { | ||
2388 | $this->emitToken(array( | ||
2389 | 'type' => self::PARSEERROR, | ||
2390 | 'data' => 'self-closing-flag-on-end-tag', | ||
2391 | )); | ||
2392 | } | ||
2393 | if($token['type'] === self::STARTTAG) { | ||
2394 | // This could be changed to actually pass the tree-builder a hash | ||
2395 | $hash = array(); | ||
2396 | foreach ($token['attr'] as $keypair) { | ||
2397 | if (isset($hash[$keypair['name']])) { | ||
2398 | $this->emitToken(array( | ||
2399 | 'type' => self::PARSEERROR, | ||
2400 | 'data' => 'duplicate-attribute', | ||
2401 | )); | ||
2402 | } else { | ||
2403 | $hash[$keypair['name']] = $keypair['value']; | ||
2404 | } | ||
2405 | } | ||
2406 | } | ||
2407 | |||
2408 | if(!$dry) { | ||
2409 | // the current structure of attributes is not a terribly good one | ||
2410 | $this->tree->emitToken($token); | ||
2411 | } | ||
2412 | |||
2413 | if(!$dry && is_int($this->tree->content_model)) { | ||
2414 | $this->content_model = $this->tree->content_model; | ||
2415 | $this->tree->content_model = null; | ||
2416 | |||
2417 | } elseif($token['type'] === self::ENDTAG) { | ||
2418 | $this->content_model = self::PCDATA; | ||
2419 | } | ||
2420 | } | ||
2421 | } | ||
2422 | |||
diff --git a/inc/3rdparty/libraries/html5/TreeBuilder.php b/inc/3rdparty/libraries/html5/TreeBuilder.php new file mode 100644 index 00000000..2f5244f9 --- /dev/null +++ b/inc/3rdparty/libraries/html5/TreeBuilder.php | |||
@@ -0,0 +1,3840 @@ | |||
1 | <?php | ||
2 | |||
3 | /* | ||
4 | |||
5 | Copyright 2007 Jeroen van der Meer <http://jero.net/> | ||
6 | Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com> | ||
7 | |||
8 | Permission is hereby granted, free of charge, to any person obtaining a | ||
9 | copy of this software and associated documentation files (the | ||
10 | "Software"), to deal in the Software without restriction, including | ||
11 | without limitation the rights to use, copy, modify, merge, publish, | ||
12 | distribute, sublicense, and/or sell copies of the Software, and to | ||
13 | permit persons to whom the Software is furnished to do so, subject to | ||
14 | the following conditions: | ||
15 | |||
16 | The above copyright notice and this permission notice shall be included | ||
17 | in all copies or substantial portions of the Software. | ||
18 | |||
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
20 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
22 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
23 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
24 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
25 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
26 | |||
27 | */ | ||
28 | |||
29 | // Tags for FIX ME!!!: (in order of priority) | ||
30 | // XXX - should be fixed NAO! | ||
31 | // XERROR - with regards to parse errors | ||
32 | // XSCRIPT - with regards to scripting mode | ||
33 | // XENCODING - with regards to encoding (for reparsing tests) | ||
34 | // XDOM - DOM specific code (tagName is explicitly not marked). | ||
35 | // this is not (yet) in helper functions. | ||
36 | |||
37 | class HTML5_TreeBuilder { | ||
38 | public $stack = array(); | ||
39 | public $content_model; | ||
40 | |||
41 | private $mode; | ||
42 | private $original_mode; | ||
43 | private $secondary_mode; | ||
44 | private $dom; | ||
45 | // Whether or not normal insertion of nodes should actually foster | ||
46 | // parent (used in one case in spec) | ||
47 | private $foster_parent = false; | ||
48 | private $a_formatting = array(); | ||
49 | |||
50 | private $head_pointer = null; | ||
51 | private $form_pointer = null; | ||
52 | |||
53 | private $flag_frameset_ok = true; | ||
54 | private $flag_force_quirks = false; | ||
55 | private $ignored = false; | ||
56 | private $quirks_mode = null; | ||
57 | // this gets to 2 when we want to ignore the next lf character, and | ||
58 | // is decrement at the beginning of each processed token (this way, | ||
59 | // code can check for (bool)$ignore_lf_token, but it phases out | ||
60 | // appropriately) | ||
61 | private $ignore_lf_token = 0; | ||
62 | private $fragment = false; | ||
63 | private $root; | ||
64 | |||
65 | private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject'); | ||
66 | private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u'); | ||
67 | // dl and ds are speculative | ||
68 | private $special = array('address','area','article','aside','base','basefont','bgsound', | ||
69 | 'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds', | ||
70 | 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5', | ||
71 | 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link', | ||
72 | 'listing','menu','meta','nav','noembed','noframes','noscript','ol', | ||
73 | 'p','param','plaintext','pre','script','select','spacer','style', | ||
74 | 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); | ||
75 | |||
76 | private $pendingTableCharacters; | ||
77 | private $pendingTableCharactersDirty; | ||
78 | |||
79 | // Tree construction modes | ||
80 | const INITIAL = 0; | ||
81 | const BEFORE_HTML = 1; | ||
82 | const BEFORE_HEAD = 2; | ||
83 | const IN_HEAD = 3; | ||
84 | const IN_HEAD_NOSCRIPT = 4; | ||
85 | const AFTER_HEAD = 5; | ||
86 | const IN_BODY = 6; | ||
87 | const IN_CDATA_RCDATA = 7; | ||
88 | const IN_TABLE = 8; | ||
89 | const IN_TABLE_TEXT = 9; | ||
90 | const IN_CAPTION = 10; | ||
91 | const IN_COLUMN_GROUP = 11; | ||
92 | const IN_TABLE_BODY = 12; | ||
93 | const IN_ROW = 13; | ||
94 | const IN_CELL = 14; | ||
95 | const IN_SELECT = 15; | ||
96 | const IN_SELECT_IN_TABLE= 16; | ||
97 | const IN_FOREIGN_CONTENT= 17; | ||
98 | const AFTER_BODY = 18; | ||
99 | const IN_FRAMESET = 19; | ||
100 | const AFTER_FRAMESET = 20; | ||
101 | const AFTER_AFTER_BODY = 21; | ||
102 | const AFTER_AFTER_FRAMESET = 22; | ||
103 | |||
104 | /** | ||
105 | * Converts a magic number to a readable name. Use for debugging. | ||
106 | */ | ||
107 | private function strConst($number) { | ||
108 | static $lookup; | ||
109 | if (!$lookup) { | ||
110 | $lookup = array(); | ||
111 | $r = new ReflectionClass('HTML5_TreeBuilder'); | ||
112 | $consts = $r->getConstants(); | ||
113 | foreach ($consts as $const => $num) { | ||
114 | if (!is_int($num)) continue; | ||
115 | $lookup[$num] = $const; | ||
116 | } | ||
117 | } | ||
118 | return $lookup[$number]; | ||
119 | } | ||
120 | |||
121 | // The different types of elements. | ||
122 | const SPECIAL = 100; | ||
123 | const SCOPING = 101; | ||
124 | const FORMATTING = 102; | ||
125 | const PHRASING = 103; | ||
126 | |||
127 | // Quirks modes in $quirks_mode | ||
128 | const NO_QUIRKS = 200; | ||
129 | const QUIRKS_MODE = 201; | ||
130 | const LIMITED_QUIRKS_MODE = 202; | ||
131 | |||
132 | // Marker to be placed in $a_formatting | ||
133 | const MARKER = 300; | ||
134 | |||
135 | // Namespaces for foreign content | ||
136 | const NS_HTML = null; // to prevent DOM from requiring NS on everything | ||
137 | const NS_MATHML = 'http://www.w3.org/1998/Math/MathML'; | ||
138 | const NS_SVG = 'http://www.w3.org/2000/svg'; | ||
139 | const NS_XLINK = 'http://www.w3.org/1999/xlink'; | ||
140 | const NS_XML = 'http://www.w3.org/XML/1998/namespace'; | ||
141 | const NS_XMLNS = 'http://www.w3.org/2000/xmlns/'; | ||
142 | |||
143 | // Different types of scopes to test for elements | ||
144 | const SCOPE = 0; | ||
145 | const SCOPE_LISTITEM = 1; | ||
146 | const SCOPE_TABLE = 2; | ||
147 | |||
148 | public function __construct() { | ||
149 | $this->mode = self::INITIAL; | ||
150 | $this->dom = new DOMDocument; | ||
151 | |||
152 | $this->dom->encoding = 'UTF-8'; | ||
153 | $this->dom->preserveWhiteSpace = true; | ||
154 | $this->dom->substituteEntities = true; | ||
155 | $this->dom->strictErrorChecking = false; | ||
156 | } | ||
157 | |||
158 | // Process tag tokens | ||
159 | public function emitToken($token, $mode = null) { | ||
160 | // XXX: ignore parse errors... why are we emitting them, again? | ||
161 | if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return; | ||
162 | if ($mode === null) $mode = $this->mode; | ||
163 | |||
164 | /* | ||
165 | $backtrace = debug_backtrace(); | ||
166 | if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n"; | ||
167 | echo $this->strConst($mode); | ||
168 | if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")"; | ||
169 | echo "\n "; | ||
170 | token_dump($token); | ||
171 | $this->printStack(); | ||
172 | $this->printActiveFormattingElements(); | ||
173 | if ($this->foster_parent) echo " -> this is a foster parent mode\n"; | ||
174 | if ($this->flag_frameset_ok) echo " -> frameset ok\n"; | ||
175 | */ | ||
176 | |||
177 | if ($this->ignore_lf_token) $this->ignore_lf_token--; | ||
178 | $this->ignored = false; | ||
179 | // indenting is a little wonky, this can be changed later on | ||
180 | switch ($mode) { | ||
181 | |||
182 | case self::INITIAL: | ||
183 | |||
184 | /* A character token that is one of U+0009 CHARACTER TABULATION, | ||
185 | * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */ | ||
186 | if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
187 | /* Ignore the token. */ | ||
188 | $this->ignored = true; | ||
189 | } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
190 | if ( | ||
191 | $token['name'] !== 'html' || !empty($token['public']) || | ||
192 | !empty($token['system']) || $token !== 'about:legacy-compat' | ||
193 | ) { | ||
194 | /* If the DOCTYPE token's name is not a case-sensitive match | ||
195 | * for the string "html", or if the token's public identifier | ||
196 | * is not missing, or if the token's system identifier is | ||
197 | * neither missing nor a case-sensitive match for the string | ||
198 | * "about:legacy-compat", then there is a parse error (this | ||
199 | * is the DOCTYPE parse error). */ | ||
200 | // DOCTYPE parse error | ||
201 | } | ||
202 | /* Append a DocumentType node to the Document node, with the name | ||
203 | * attribute set to the name given in the DOCTYPE token, or the | ||
204 | * empty string if the name was missing; the publicId attribute | ||
205 | * set to the public identifier given in the DOCTYPE token, or | ||
206 | * the empty string if the public identifier was missing; the | ||
207 | * systemId attribute set to the system identifier given in the | ||
208 | * DOCTYPE token, or the empty string if the system identifier | ||
209 | * was missing; and the other attributes specific to | ||
210 | * DocumentType objects set to null and empty lists as | ||
211 | * appropriate. Associate the DocumentType node with the | ||
212 | * Document object so that it is returned as the value of the | ||
213 | * doctype attribute of the Document object. */ | ||
214 | if (!isset($token['public'])) $token['public'] = null; | ||
215 | if (!isset($token['system'])) $token['system'] = null; | ||
216 | // XDOM | ||
217 | // Yes this is hacky. I'm kind of annoyed that I can't appendChild | ||
218 | // a doctype to DOMDocument. Maybe I haven't chanted the right | ||
219 | // syllables. | ||
220 | $impl = new DOMImplementation(); | ||
221 | // This call can fail for particularly pathological cases (namely, | ||
222 | // the qualifiedName parameter ($token['name']) could be missing. | ||
223 | if ($token['name']) { | ||
224 | $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']); | ||
225 | $this->dom->appendChild($doctype); | ||
226 | } else { | ||
227 | // It looks like libxml's not actually *able* to express this case. | ||
228 | // So... don't. | ||
229 | $this->dom->emptyDoctype = true; | ||
230 | } | ||
231 | $public = is_null($token['public']) ? false : strtolower($token['public']); | ||
232 | $system = is_null($token['system']) ? false : strtolower($token['system']); | ||
233 | $publicStartsWithForQuirks = array( | ||
234 | "+//silmaril//dtd html pro v0r11 19970101//", | ||
235 | "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", | ||
236 | "-//as//dtd html 3.0 aswedit + extensions//", | ||
237 | "-//ietf//dtd html 2.0 level 1//", | ||
238 | "-//ietf//dtd html 2.0 level 2//", | ||
239 | "-//ietf//dtd html 2.0 strict level 1//", | ||
240 | "-//ietf//dtd html 2.0 strict level 2//", | ||
241 | "-//ietf//dtd html 2.0 strict//", | ||
242 | "-//ietf//dtd html 2.0//", | ||
243 | "-//ietf//dtd html 2.1e//", | ||
244 | "-//ietf//dtd html 3.0//", | ||
245 | "-//ietf//dtd html 3.2 final//", | ||
246 | "-//ietf//dtd html 3.2//", | ||
247 | "-//ietf//dtd html 3//", | ||
248 | "-//ietf//dtd html level 0//", | ||
249 | "-//ietf//dtd html level 1//", | ||
250 | "-//ietf//dtd html level 2//", | ||
251 | "-//ietf//dtd html level 3//", | ||
252 | "-//ietf//dtd html strict level 0//", | ||
253 | "-//ietf//dtd html strict level 1//", | ||
254 | "-//ietf//dtd html strict level 2//", | ||
255 | "-//ietf//dtd html strict level 3//", | ||
256 | "-//ietf//dtd html strict//", | ||
257 | "-//ietf//dtd html//", | ||
258 | "-//metrius//dtd metrius presentational//", | ||
259 | "-//microsoft//dtd internet explorer 2.0 html strict//", | ||
260 | "-//microsoft//dtd internet explorer 2.0 html//", | ||
261 | "-//microsoft//dtd internet explorer 2.0 tables//", | ||
262 | "-//microsoft//dtd internet explorer 3.0 html strict//", | ||
263 | "-//microsoft//dtd internet explorer 3.0 html//", | ||
264 | "-//microsoft//dtd internet explorer 3.0 tables//", | ||
265 | "-//netscape comm. corp.//dtd html//", | ||
266 | "-//netscape comm. corp.//dtd strict html//", | ||
267 | "-//o'reilly and associates//dtd html 2.0//", | ||
268 | "-//o'reilly and associates//dtd html extended 1.0//", | ||
269 | "-//o'reilly and associates//dtd html extended relaxed 1.0//", | ||
270 | "-//spyglass//dtd html 2.0 extended//", | ||
271 | "-//sq//dtd html 2.0 hotmetal + extensions//", | ||
272 | "-//sun microsystems corp.//dtd hotjava html//", | ||
273 | "-//sun microsystems corp.//dtd hotjava strict html//", | ||
274 | "-//w3c//dtd html 3 1995-03-24//", | ||
275 | "-//w3c//dtd html 3.2 draft//", | ||
276 | "-//w3c//dtd html 3.2 final//", | ||
277 | "-//w3c//dtd html 3.2//", | ||
278 | "-//w3c//dtd html 3.2s draft//", | ||
279 | "-//w3c//dtd html 4.0 frameset//", | ||
280 | "-//w3c//dtd html 4.0 transitional//", | ||
281 | "-//w3c//dtd html experimental 19960712//", | ||
282 | "-//w3c//dtd html experimental 970421//", | ||
283 | "-//w3c//dtd w3 html//", | ||
284 | "-//w3o//dtd w3 html 3.0//", | ||
285 | "-//webtechs//dtd mozilla html 2.0//", | ||
286 | "-//webtechs//dtd mozilla html//", | ||
287 | ); | ||
288 | $publicSetToForQuirks = array( | ||
289 | "-//w3o//dtd w3 html strict 3.0//", | ||
290 | "-/w3c/dtd html 4.0 transitional/en", | ||
291 | "html", | ||
292 | ); | ||
293 | $publicStartsWithAndSystemForQuirks = array( | ||
294 | "-//w3c//dtd html 4.01 frameset//", | ||
295 | "-//w3c//dtd html 4.01 transitional//", | ||
296 | ); | ||
297 | $publicStartsWithForLimitedQuirks = array( | ||
298 | "-//w3c//dtd xhtml 1.0 frameset//", | ||
299 | "-//w3c//dtd xhtml 1.0 transitional//", | ||
300 | ); | ||
301 | $publicStartsWithAndSystemForLimitedQuirks = array( | ||
302 | "-//w3c//dtd html 4.01 frameset//", | ||
303 | "-//w3c//dtd html 4.01 transitional//", | ||
304 | ); | ||
305 | // first, do easy checks | ||
306 | if ( | ||
307 | !empty($token['force-quirks']) || | ||
308 | strtolower($token['name']) !== 'html' | ||
309 | ) { | ||
310 | $this->quirks_mode = self::QUIRKS_MODE; | ||
311 | } else { | ||
312 | do { | ||
313 | if ($system) { | ||
314 | foreach ($publicStartsWithAndSystemForQuirks as $x) { | ||
315 | if (strncmp($public, $x, strlen($x)) === 0) { | ||
316 | $this->quirks_mode = self::QUIRKS_MODE; | ||
317 | break; | ||
318 | } | ||
319 | } | ||
320 | if (!is_null($this->quirks_mode)) break; | ||
321 | foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) { | ||
322 | if (strncmp($public, $x, strlen($x)) === 0) { | ||
323 | $this->quirks_mode = self::LIMITED_QUIRKS_MODE; | ||
324 | break; | ||
325 | } | ||
326 | } | ||
327 | if (!is_null($this->quirks_mode)) break; | ||
328 | } | ||
329 | foreach ($publicSetToForQuirks as $x) { | ||
330 | if ($public === $x) { | ||
331 | $this->quirks_mode = self::QUIRKS_MODE; | ||
332 | break; | ||
333 | } | ||
334 | } | ||
335 | if (!is_null($this->quirks_mode)) break; | ||
336 | foreach ($publicStartsWithForLimitedQuirks as $x) { | ||
337 | if (strncmp($public, $x, strlen($x)) === 0) { | ||
338 | $this->quirks_mode = self::LIMITED_QUIRKS_MODE; | ||
339 | } | ||
340 | } | ||
341 | if (!is_null($this->quirks_mode)) break; | ||
342 | if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { | ||
343 | $this->quirks_mode = self::QUIRKS_MODE; | ||
344 | break; | ||
345 | } | ||
346 | foreach ($publicStartsWithForQuirks as $x) { | ||
347 | if (strncmp($public, $x, strlen($x)) === 0) { | ||
348 | $this->quirks_mode = self::QUIRKS_MODE; | ||
349 | break; | ||
350 | } | ||
351 | } | ||
352 | if (is_null($this->quirks_mode)) { | ||
353 | $this->quirks_mode = self::NO_QUIRKS; | ||
354 | } | ||
355 | } while (false); | ||
356 | } | ||
357 | $this->mode = self::BEFORE_HTML; | ||
358 | } else { | ||
359 | // parse error | ||
360 | /* Switch the insertion mode to "before html", then reprocess the | ||
361 | * current token. */ | ||
362 | $this->mode = self::BEFORE_HTML; | ||
363 | $this->quirks_mode = self::QUIRKS_MODE; | ||
364 | $this->emitToken($token); | ||
365 | } | ||
366 | break; | ||
367 | |||
368 | case self::BEFORE_HTML: | ||
369 | |||
370 | /* A DOCTYPE token */ | ||
371 | if($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
372 | // Parse error. Ignore the token. | ||
373 | $this->ignored = true; | ||
374 | |||
375 | /* A comment token */ | ||
376 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
377 | /* Append a Comment node to the Document object with the data | ||
378 | attribute set to the data given in the comment token. */ | ||
379 | // XDOM | ||
380 | $comment = $this->dom->createComment($token['data']); | ||
381 | $this->dom->appendChild($comment); | ||
382 | |||
383 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, | ||
384 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), | ||
385 | or U+0020 SPACE */ | ||
386 | } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
387 | /* Ignore the token. */ | ||
388 | $this->ignored = true; | ||
389 | |||
390 | /* A start tag whose tag name is "html" */ | ||
391 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') { | ||
392 | /* Create an element for the token in the HTML namespace. Append it | ||
393 | * to the Document object. Put this element in the stack of open | ||
394 | * elements. */ | ||
395 | // XDOM | ||
396 | $html = $this->insertElement($token, false); | ||
397 | $this->dom->appendChild($html); | ||
398 | $this->stack[] = $html; | ||
399 | |||
400 | $this->mode = self::BEFORE_HEAD; | ||
401 | |||
402 | } else { | ||
403 | /* Create an html element. Append it to the Document object. Put | ||
404 | * this element in the stack of open elements. */ | ||
405 | // XDOM | ||
406 | $html = $this->dom->createElementNS(self::NS_HTML, 'html'); | ||
407 | $this->dom->appendChild($html); | ||
408 | $this->stack[] = $html; | ||
409 | |||
410 | /* Switch the insertion mode to "before head", then reprocess the | ||
411 | * current token. */ | ||
412 | $this->mode = self::BEFORE_HEAD; | ||
413 | $this->emitToken($token); | ||
414 | } | ||
415 | break; | ||
416 | |||
417 | case self::BEFORE_HEAD: | ||
418 | |||
419 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, | ||
420 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), | ||
421 | or U+0020 SPACE */ | ||
422 | if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
423 | /* Ignore the token. */ | ||
424 | $this->ignored = true; | ||
425 | |||
426 | /* A comment token */ | ||
427 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
428 | /* Append a Comment node to the current node with the data attribute | ||
429 | set to the data given in the comment token. */ | ||
430 | $this->insertComment($token['data']); | ||
431 | |||
432 | /* A DOCTYPE token */ | ||
433 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
434 | /* Parse error. Ignore the token */ | ||
435 | $this->ignored = true; | ||
436 | // parse error | ||
437 | |||
438 | /* A start tag token with the tag name "html" */ | ||
439 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { | ||
440 | /* Process the token using the rules for the "in body" | ||
441 | * insertion mode. */ | ||
442 | $this->processWithRulesFor($token, self::IN_BODY); | ||
443 | |||
444 | /* A start tag token with the tag name "head" */ | ||
445 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') { | ||
446 | /* Insert an HTML element for the token. */ | ||
447 | $element = $this->insertElement($token); | ||
448 | |||
449 | /* Set the head element pointer to this new element node. */ | ||
450 | $this->head_pointer = $element; | ||
451 | |||
452 | /* Change the insertion mode to "in head". */ | ||
453 | $this->mode = self::IN_HEAD; | ||
454 | |||
455 | /* An end tag whose tag name is one of: "head", "body", "html", "br" */ | ||
456 | } elseif( | ||
457 | $token['type'] === HTML5_Tokenizer::ENDTAG && ( | ||
458 | $token['name'] === 'head' || $token['name'] === 'body' || | ||
459 | $token['name'] === 'html' || $token['name'] === 'br' | ||
460 | )) { | ||
461 | /* Act as if a start tag token with the tag name "head" and no | ||
462 | * attributes had been seen, then reprocess the current token. */ | ||
463 | $this->emitToken(array( | ||
464 | 'name' => 'head', | ||
465 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
466 | 'attr' => array() | ||
467 | )); | ||
468 | $this->emitToken($token); | ||
469 | |||
470 | /* Any other end tag */ | ||
471 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) { | ||
472 | /* Parse error. Ignore the token. */ | ||
473 | $this->ignored = true; | ||
474 | |||
475 | } else { | ||
476 | /* Act as if a start tag token with the tag name "head" and no | ||
477 | * attributes had been seen, then reprocess the current token. | ||
478 | * Note: This will result in an empty head element being | ||
479 | * generated, with the current token being reprocessed in the | ||
480 | * "after head" insertion mode. */ | ||
481 | $this->emitToken(array( | ||
482 | 'name' => 'head', | ||
483 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
484 | 'attr' => array() | ||
485 | )); | ||
486 | $this->emitToken($token); | ||
487 | } | ||
488 | break; | ||
489 | |||
490 | case self::IN_HEAD: | ||
491 | |||
492 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, | ||
493 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), | ||
494 | or U+0020 SPACE. */ | ||
495 | if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
496 | /* Insert the character into the current node. */ | ||
497 | $this->insertText($token['data']); | ||
498 | |||
499 | /* A comment token */ | ||
500 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
501 | /* Append a Comment node to the current node with the data attribute | ||
502 | set to the data given in the comment token. */ | ||
503 | $this->insertComment($token['data']); | ||
504 | |||
505 | /* A DOCTYPE token */ | ||
506 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
507 | /* Parse error. Ignore the token. */ | ||
508 | $this->ignored = true; | ||
509 | // parse error | ||
510 | |||
511 | /* A start tag whose tag name is "html" */ | ||
512 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
513 | $token['name'] === 'html') { | ||
514 | $this->processWithRulesFor($token, self::IN_BODY); | ||
515 | |||
516 | /* A start tag whose tag name is one of: "base", "command", "link" */ | ||
517 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
518 | ($token['name'] === 'base' || $token['name'] === 'command' || | ||
519 | $token['name'] === 'link')) { | ||
520 | /* Insert an HTML element for the token. Immediately pop the | ||
521 | * current node off the stack of open elements. */ | ||
522 | $this->insertElement($token); | ||
523 | array_pop($this->stack); | ||
524 | |||
525 | // YYY: Acknowledge the token's self-closing flag, if it is set. | ||
526 | |||
527 | /* A start tag whose tag name is "meta" */ | ||
528 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') { | ||
529 | /* Insert an HTML element for the token. Immediately pop the | ||
530 | * current node off the stack of open elements. */ | ||
531 | $this->insertElement($token); | ||
532 | array_pop($this->stack); | ||
533 | |||
534 | // XERROR: Acknowledge the token's self-closing flag, if it is set. | ||
535 | |||
536 | // XENCODING: If the element has a charset attribute, and its value is a | ||
537 | // supported encoding, and the confidence is currently tentative, | ||
538 | // then change the encoding to the encoding given by the value of | ||
539 | // the charset attribute. | ||
540 | // | ||
541 | // Otherwise, if the element has a content attribute, and applying | ||
542 | // the algorithm for extracting an encoding from a Content-Type to | ||
543 | // its value returns a supported encoding encoding, and the | ||
544 | // confidence is currently tentative, then change the encoding to | ||
545 | // the encoding encoding. | ||
546 | |||
547 | /* A start tag with the tag name "title" */ | ||
548 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') { | ||
549 | $this->insertRCDATAElement($token); | ||
550 | |||
551 | /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or | ||
552 | * A start tag whose tag name is one of: "noframes", "style" */ | ||
553 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
554 | ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) { | ||
555 | // XSCRIPT: Scripting flag not respected | ||
556 | $this->insertCDATAElement($token); | ||
557 | |||
558 | // XSCRIPT: Scripting flag disable not implemented | ||
559 | |||
560 | /* A start tag with the tag name "script" */ | ||
561 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') { | ||
562 | /* 1. Create an element for the token in the HTML namespace. */ | ||
563 | $node = $this->insertElement($token, false); | ||
564 | |||
565 | /* 2. Mark the element as being "parser-inserted" */ | ||
566 | // Uhhh... XSCRIPT | ||
567 | |||
568 | /* 3. If the parser was originally created for the HTML | ||
569 | * fragment parsing algorithm, then mark the script element as | ||
570 | * "already executed". (fragment case) */ | ||
571 | // ditto... XSCRIPT | ||
572 | |||
573 | /* 4. Append the new element to the current node and push it onto | ||
574 | * the stack of open elements. */ | ||
575 | end($this->stack)->appendChild($node); | ||
576 | $this->stack[] = $node; | ||
577 | // I guess we could squash these together | ||
578 | |||
579 | /* 6. Let the original insertion mode be the current insertion mode. */ | ||
580 | $this->original_mode = $this->mode; | ||
581 | /* 7. Switch the insertion mode to "in CDATA/RCDATA" */ | ||
582 | $this->mode = self::IN_CDATA_RCDATA; | ||
583 | /* 5. Switch the tokeniser's content model flag to the CDATA state. */ | ||
584 | $this->content_model = HTML5_Tokenizer::CDATA; | ||
585 | |||
586 | /* An end tag with the tag name "head" */ | ||
587 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') { | ||
588 | /* Pop the current node (which will be the head element) off the stack of open elements. */ | ||
589 | array_pop($this->stack); | ||
590 | |||
591 | /* Change the insertion mode to "after head". */ | ||
592 | $this->mode = self::AFTER_HEAD; | ||
593 | |||
594 | // Slight logic inversion here to minimize duplication | ||
595 | /* A start tag with the tag name "head". */ | ||
596 | /* An end tag whose tag name is not one of: "body", "html", "br" */ | ||
597 | } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') || | ||
598 | ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' && | ||
599 | $token['name'] !== 'body' && $token['name'] !== 'br')) { | ||
600 | // Parse error. Ignore the token. | ||
601 | $this->ignored = true; | ||
602 | |||
603 | /* Anything else */ | ||
604 | } else { | ||
605 | /* Act as if an end tag token with the tag name "head" had been | ||
606 | * seen, and reprocess the current token. */ | ||
607 | $this->emitToken(array( | ||
608 | 'name' => 'head', | ||
609 | 'type' => HTML5_Tokenizer::ENDTAG | ||
610 | )); | ||
611 | |||
612 | /* Then, reprocess the current token. */ | ||
613 | $this->emitToken($token); | ||
614 | } | ||
615 | break; | ||
616 | |||
617 | case self::IN_HEAD_NOSCRIPT: | ||
618 | if ($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
619 | // parse error | ||
620 | } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { | ||
621 | $this->processWithRulesFor($token, self::IN_BODY); | ||
622 | } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') { | ||
623 | /* Pop the current node (which will be a noscript element) from the | ||
624 | * stack of open elements; the new current node will be a head | ||
625 | * element. */ | ||
626 | array_pop($this->stack); | ||
627 | $this->mode = self::IN_HEAD; | ||
628 | } elseif ( | ||
629 | ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) || | ||
630 | ($token['type'] === HTML5_Tokenizer::COMMENT) || | ||
631 | ($token['type'] === HTML5_Tokenizer::STARTTAG && ( | ||
632 | $token['name'] === 'link' || $token['name'] === 'meta' || | ||
633 | $token['name'] === 'noframes' || $token['name'] === 'style'))) { | ||
634 | $this->processWithRulesFor($token, self::IN_HEAD); | ||
635 | // inverted logic | ||
636 | } elseif ( | ||
637 | ($token['type'] === HTML5_Tokenizer::STARTTAG && ( | ||
638 | $token['name'] === 'head' || $token['name'] === 'noscript')) || | ||
639 | ($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
640 | $token['name'] !== 'br')) { | ||
641 | // parse error | ||
642 | } else { | ||
643 | // parse error | ||
644 | $this->emitToken(array( | ||
645 | 'type' => HTML5_Tokenizer::ENDTAG, | ||
646 | 'name' => 'noscript', | ||
647 | )); | ||
648 | $this->emitToken($token); | ||
649 | } | ||
650 | break; | ||
651 | |||
652 | case self::AFTER_HEAD: | ||
653 | /* Handle the token as follows: */ | ||
654 | |||
655 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, | ||
656 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), | ||
657 | or U+0020 SPACE */ | ||
658 | if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
659 | /* Append the character to the current node. */ | ||
660 | $this->insertText($token['data']); | ||
661 | |||
662 | /* A comment token */ | ||
663 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
664 | /* Append a Comment node to the current node with the data attribute | ||
665 | set to the data given in the comment token. */ | ||
666 | $this->insertComment($token['data']); | ||
667 | |||
668 | } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
669 | // parse error | ||
670 | |||
671 | } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { | ||
672 | $this->processWithRulesFor($token, self::IN_BODY); | ||
673 | |||
674 | /* A start tag token with the tag name "body" */ | ||
675 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') { | ||
676 | $this->insertElement($token); | ||
677 | |||
678 | /* Set the frameset-ok flag to "not ok". */ | ||
679 | $this->flag_frameset_ok = false; | ||
680 | |||
681 | /* Change the insertion mode to "in body". */ | ||
682 | $this->mode = self::IN_BODY; | ||
683 | |||
684 | /* A start tag token with the tag name "frameset" */ | ||
685 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') { | ||
686 | /* Insert a frameset element for the token. */ | ||
687 | $this->insertElement($token); | ||
688 | |||
689 | /* Change the insertion mode to "in frameset". */ | ||
690 | $this->mode = self::IN_FRAMESET; | ||
691 | |||
692 | /* A start tag token whose tag name is one of: "base", "link", "meta", | ||
693 | "script", "style", "title" */ | ||
694 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'], | ||
695 | array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) { | ||
696 | // parse error | ||
697 | /* Push the node pointed to by the head element pointer onto the | ||
698 | * stack of open elements. */ | ||
699 | $this->stack[] = $this->head_pointer; | ||
700 | $this->processWithRulesFor($token, self::IN_HEAD); | ||
701 | array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1); | ||
702 | |||
703 | // inversion of specification | ||
704 | } elseif( | ||
705 | ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') || | ||
706 | ($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
707 | $token['name'] !== 'body' && $token['name'] !== 'html' && | ||
708 | $token['name'] !== 'br')) { | ||
709 | // parse error | ||
710 | |||
711 | /* Anything else */ | ||
712 | } else { | ||
713 | $this->emitToken(array( | ||
714 | 'name' => 'body', | ||
715 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
716 | 'attr' => array() | ||
717 | )); | ||
718 | $this->flag_frameset_ok = true; | ||
719 | $this->emitToken($token); | ||
720 | } | ||
721 | break; | ||
722 | |||
723 | case self::IN_BODY: | ||
724 | /* Handle the token as follows: */ | ||
725 | |||
726 | switch($token['type']) { | ||
727 | /* A character token */ | ||
728 | case HTML5_Tokenizer::CHARACTER: | ||
729 | case HTML5_Tokenizer::SPACECHARACTER: | ||
730 | /* Reconstruct the active formatting elements, if any. */ | ||
731 | $this->reconstructActiveFormattingElements(); | ||
732 | |||
733 | /* Append the token's character to the current node. */ | ||
734 | $this->insertText($token['data']); | ||
735 | |||
736 | /* If the token is not one of U+0009 CHARACTER TABULATION, | ||
737 | * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 | ||
738 | * SPACE, then set the frameset-ok flag to "not ok". */ | ||
739 | // i.e., if any of the characters is not whitespace | ||
740 | if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) { | ||
741 | $this->flag_frameset_ok = false; | ||
742 | } | ||
743 | break; | ||
744 | |||
745 | /* A comment token */ | ||
746 | case HTML5_Tokenizer::COMMENT: | ||
747 | /* Append a Comment node to the current node with the data | ||
748 | attribute set to the data given in the comment token. */ | ||
749 | $this->insertComment($token['data']); | ||
750 | break; | ||
751 | |||
752 | case HTML5_Tokenizer::DOCTYPE: | ||
753 | // parse error | ||
754 | break; | ||
755 | |||
756 | case HTML5_Tokenizer::EOF: | ||
757 | // parse error | ||
758 | break; | ||
759 | |||
760 | case HTML5_Tokenizer::STARTTAG: | ||
761 | switch($token['name']) { | ||
762 | case 'html': | ||
763 | // parse error | ||
764 | /* For each attribute on the token, check to see if the | ||
765 | * attribute is already present on the top element of the | ||
766 | * stack of open elements. If it is not, add the attribute | ||
767 | * and its corresponding value to that element. */ | ||
768 | foreach($token['attr'] as $attr) { | ||
769 | if(!$this->stack[0]->hasAttribute($attr['name'])) { | ||
770 | $this->stack[0]->setAttribute($attr['name'], $attr['value']); | ||
771 | } | ||
772 | } | ||
773 | break; | ||
774 | |||
775 | case 'base': case 'command': case 'link': case 'meta': case 'noframes': | ||
776 | case 'script': case 'style': case 'title': | ||
777 | /* Process the token as if the insertion mode had been "in | ||
778 | head". */ | ||
779 | $this->processWithRulesFor($token, self::IN_HEAD); | ||
780 | break; | ||
781 | |||
782 | /* A start tag token with the tag name "body" */ | ||
783 | case 'body': | ||
784 | /* Parse error. If the second element on the stack of open | ||
785 | elements is not a body element, or, if the stack of open | ||
786 | elements has only one node on it, then ignore the token. | ||
787 | (fragment case) */ | ||
788 | if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') { | ||
789 | $this->ignored = true; | ||
790 | // Ignore | ||
791 | |||
792 | /* Otherwise, for each attribute on the token, check to see | ||
793 | if the attribute is already present on the body element (the | ||
794 | second element) on the stack of open elements. If it is not, | ||
795 | add the attribute and its corresponding value to that | ||
796 | element. */ | ||
797 | } else { | ||
798 | foreach($token['attr'] as $attr) { | ||
799 | if(!$this->stack[1]->hasAttribute($attr['name'])) { | ||
800 | $this->stack[1]->setAttribute($attr['name'], $attr['value']); | ||
801 | } | ||
802 | } | ||
803 | } | ||
804 | break; | ||
805 | |||
806 | case 'frameset': | ||
807 | // parse error | ||
808 | /* If the second element on the stack of open elements is | ||
809 | * not a body element, or, if the stack of open elements | ||
810 | * has only one node on it, then ignore the token. | ||
811 | * (fragment case) */ | ||
812 | if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') { | ||
813 | $this->ignored = true; | ||
814 | // Ignore | ||
815 | } elseif (!$this->flag_frameset_ok) { | ||
816 | $this->ignored = true; | ||
817 | // Ignore | ||
818 | } else { | ||
819 | /* 1. Remove the second element on the stack of open | ||
820 | * elements from its parent node, if it has one. */ | ||
821 | if($this->stack[1]->parentNode) { | ||
822 | $this->stack[1]->parentNode->removeChild($this->stack[1]); | ||
823 | } | ||
824 | |||
825 | /* 2. Pop all the nodes from the bottom of the stack of | ||
826 | * open elements, from the current node up to the root | ||
827 | * html element. */ | ||
828 | array_splice($this->stack, 1); | ||
829 | |||
830 | $this->insertElement($token); | ||
831 | $this->mode = self::IN_FRAMESET; | ||
832 | } | ||
833 | break; | ||
834 | |||
835 | // in spec, there is a diversion here | ||
836 | |||
837 | case 'address': case 'article': case 'aside': case 'blockquote': | ||
838 | case 'center': case 'datagrid': case 'details': case 'dir': | ||
839 | case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer': | ||
840 | case 'header': case 'hgroup': case 'menu': case 'nav': | ||
841 | case 'ol': case 'p': case 'section': case 'ul': | ||
842 | /* If the stack of open elements has a p element in scope, | ||
843 | then act as if an end tag with the tag name p had been | ||
844 | seen. */ | ||
845 | if($this->elementInScope('p')) { | ||
846 | $this->emitToken(array( | ||
847 | 'name' => 'p', | ||
848 | 'type' => HTML5_Tokenizer::ENDTAG | ||
849 | )); | ||
850 | } | ||
851 | |||
852 | /* Insert an HTML element for the token. */ | ||
853 | $this->insertElement($token); | ||
854 | break; | ||
855 | |||
856 | /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", | ||
857 | "h5", "h6" */ | ||
858 | case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': | ||
859 | /* If the stack of open elements has a p element in scope, | ||
860 | then act as if an end tag with the tag name p had been seen. */ | ||
861 | if($this->elementInScope('p')) { | ||
862 | $this->emitToken(array( | ||
863 | 'name' => 'p', | ||
864 | 'type' => HTML5_Tokenizer::ENDTAG | ||
865 | )); | ||
866 | } | ||
867 | |||
868 | /* If the current node is an element whose tag name is one | ||
869 | * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a | ||
870 | * parse error; pop the current node off the stack of open | ||
871 | * elements. */ | ||
872 | $peek = array_pop($this->stack); | ||
873 | if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) { | ||
874 | // parse error | ||
875 | } else { | ||
876 | $this->stack[] = $peek; | ||
877 | } | ||
878 | |||
879 | /* Insert an HTML element for the token. */ | ||
880 | $this->insertElement($token); | ||
881 | break; | ||
882 | |||
883 | case 'pre': case 'listing': | ||
884 | /* If the stack of open elements has a p element in scope, | ||
885 | then act as if an end tag with the tag name p had been seen. */ | ||
886 | if($this->elementInScope('p')) { | ||
887 | $this->emitToken(array( | ||
888 | 'name' => 'p', | ||
889 | 'type' => HTML5_Tokenizer::ENDTAG | ||
890 | )); | ||
891 | } | ||
892 | $this->insertElement($token); | ||
893 | /* If the next token is a U+000A LINE FEED (LF) character | ||
894 | * token, then ignore that token and move on to the next | ||
895 | * one. (Newlines at the start of pre blocks are ignored as | ||
896 | * an authoring convenience.) */ | ||
897 | $this->ignore_lf_token = 2; | ||
898 | $this->flag_frameset_ok = false; | ||
899 | break; | ||
900 | |||
901 | /* A start tag whose tag name is "form" */ | ||
902 | case 'form': | ||
903 | /* If the form element pointer is not null, ignore the | ||
904 | token with a parse error. */ | ||
905 | if($this->form_pointer !== null) { | ||
906 | $this->ignored = true; | ||
907 | // Ignore. | ||
908 | |||
909 | /* Otherwise: */ | ||
910 | } else { | ||
911 | /* If the stack of open elements has a p element in | ||
912 | scope, then act as if an end tag with the tag name p | ||
913 | had been seen. */ | ||
914 | if($this->elementInScope('p')) { | ||
915 | $this->emitToken(array( | ||
916 | 'name' => 'p', | ||
917 | 'type' => HTML5_Tokenizer::ENDTAG | ||
918 | )); | ||
919 | } | ||
920 | |||
921 | /* Insert an HTML element for the token, and set the | ||
922 | form element pointer to point to the element created. */ | ||
923 | $element = $this->insertElement($token); | ||
924 | $this->form_pointer = $element; | ||
925 | } | ||
926 | break; | ||
927 | |||
928 | // condensed specification | ||
929 | case 'li': case 'dc': case 'dd': case 'ds': case 'dt': | ||
930 | /* 1. Set the frameset-ok flag to "not ok". */ | ||
931 | $this->flag_frameset_ok = false; | ||
932 | |||
933 | $stack_length = count($this->stack) - 1; | ||
934 | for($n = $stack_length; 0 <= $n; $n--) { | ||
935 | /* 2. Initialise node to be the current node (the | ||
936 | bottommost node of the stack). */ | ||
937 | $stop = false; | ||
938 | $node = $this->stack[$n]; | ||
939 | $cat = $this->getElementCategory($node); | ||
940 | |||
941 | // for case 'li': | ||
942 | /* 3. If node is an li element, then act as if an end | ||
943 | * tag with the tag name "li" had been seen, then jump | ||
944 | * to the last step. */ | ||
945 | // for case 'dc': case 'dd': case 'ds': case 'dt': | ||
946 | /* If node is a dc, dd, ds or dt element, then act as if an end | ||
947 | * tag with the same tag name as node had been seen, then | ||
948 | * jump to the last step. */ | ||
949 | if(($token['name'] === 'li' && $node->tagName === 'li') || | ||
950 | ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional | ||
951 | $this->emitToken(array( | ||
952 | 'type' => HTML5_Tokenizer::ENDTAG, | ||
953 | 'name' => $node->tagName, | ||
954 | )); | ||
955 | break; | ||
956 | } | ||
957 | |||
958 | /* 4. If node is not in the formatting category, and is | ||
959 | not in the phrasing category, and is not an address, | ||
960 | div or p element, then stop this algorithm. */ | ||
961 | if($cat !== self::FORMATTING && $cat !== self::PHRASING && | ||
962 | $node->tagName !== 'address' && $node->tagName !== 'div' && | ||
963 | $node->tagName !== 'p') { | ||
964 | break; | ||
965 | } | ||
966 | |||
967 | /* 5. Otherwise, set node to the previous entry in the | ||
968 | * stack of open elements and return to step 2. */ | ||
969 | } | ||
970 | |||
971 | /* 6. This is the last step. */ | ||
972 | |||
973 | /* If the stack of open elements has a p element in scope, | ||
974 | then act as if an end tag with the tag name p had been | ||
975 | seen. */ | ||
976 | if($this->elementInScope('p')) { | ||
977 | $this->emitToken(array( | ||
978 | 'name' => 'p', | ||
979 | 'type' => HTML5_Tokenizer::ENDTAG | ||
980 | )); | ||
981 | } | ||
982 | |||
983 | /* Finally, insert an HTML element with the same tag | ||
984 | name as the token's. */ | ||
985 | $this->insertElement($token); | ||
986 | break; | ||
987 | |||
988 | /* A start tag token whose tag name is "plaintext" */ | ||
989 | case 'plaintext': | ||
990 | /* If the stack of open elements has a p element in scope, | ||
991 | then act as if an end tag with the tag name p had been | ||
992 | seen. */ | ||
993 | if($this->elementInScope('p')) { | ||
994 | $this->emitToken(array( | ||
995 | 'name' => 'p', | ||
996 | 'type' => HTML5_Tokenizer::ENDTAG | ||
997 | )); | ||
998 | } | ||
999 | |||
1000 | /* Insert an HTML element for the token. */ | ||
1001 | $this->insertElement($token); | ||
1002 | |||
1003 | $this->content_model = HTML5_Tokenizer::PLAINTEXT; | ||
1004 | break; | ||
1005 | |||
1006 | // more diversions | ||
1007 | |||
1008 | /* A start tag whose tag name is "a" */ | ||
1009 | case 'a': | ||
1010 | /* If the list of active formatting elements contains | ||
1011 | an element whose tag name is "a" between the end of the | ||
1012 | list and the last marker on the list (or the start of | ||
1013 | the list if there is no marker on the list), then this | ||
1014 | is a parse error; act as if an end tag with the tag name | ||
1015 | "a" had been seen, then remove that element from the list | ||
1016 | of active formatting elements and the stack of open | ||
1017 | elements if the end tag didn't already remove it (it | ||
1018 | might not have if the element is not in table scope). */ | ||
1019 | $leng = count($this->a_formatting); | ||
1020 | |||
1021 | for($n = $leng - 1; $n >= 0; $n--) { | ||
1022 | if($this->a_formatting[$n] === self::MARKER) { | ||
1023 | break; | ||
1024 | |||
1025 | } elseif($this->a_formatting[$n]->tagName === 'a') { | ||
1026 | $a = $this->a_formatting[$n]; | ||
1027 | $this->emitToken(array( | ||
1028 | 'name' => 'a', | ||
1029 | 'type' => HTML5_Tokenizer::ENDTAG | ||
1030 | )); | ||
1031 | if (in_array($a, $this->a_formatting)) { | ||
1032 | $a_i = array_search($a, $this->a_formatting, true); | ||
1033 | if($a_i !== false) array_splice($this->a_formatting, $a_i, 1); | ||
1034 | } | ||
1035 | if (in_array($a, $this->stack)) { | ||
1036 | $a_i = array_search($a, $this->stack, true); | ||
1037 | if ($a_i !== false) array_splice($this->stack, $a_i, 1); | ||
1038 | } | ||
1039 | break; | ||
1040 | } | ||
1041 | } | ||
1042 | |||
1043 | /* Reconstruct the active formatting elements, if any. */ | ||
1044 | $this->reconstructActiveFormattingElements(); | ||
1045 | |||
1046 | /* Insert an HTML element for the token. */ | ||
1047 | $el = $this->insertElement($token); | ||
1048 | |||
1049 | /* Add that element to the list of active formatting | ||
1050 | elements. */ | ||
1051 | $this->a_formatting[] = $el; | ||
1052 | break; | ||
1053 | |||
1054 | case 'b': case 'big': case 'code': case 'em': case 'font': case 'i': | ||
1055 | case 's': case 'small': case 'strike': | ||
1056 | case 'strong': case 'tt': case 'u': | ||
1057 | /* Reconstruct the active formatting elements, if any. */ | ||
1058 | $this->reconstructActiveFormattingElements(); | ||
1059 | |||
1060 | /* Insert an HTML element for the token. */ | ||
1061 | $el = $this->insertElement($token); | ||
1062 | |||
1063 | /* Add that element to the list of active formatting | ||
1064 | elements. */ | ||
1065 | $this->a_formatting[] = $el; | ||
1066 | break; | ||
1067 | |||
1068 | case 'nobr': | ||
1069 | /* Reconstruct the active formatting elements, if any. */ | ||
1070 | $this->reconstructActiveFormattingElements(); | ||
1071 | |||
1072 | /* If the stack of open elements has a nobr element in | ||
1073 | * scope, then this is a parse error; act as if an end tag | ||
1074 | * with the tag name "nobr" had been seen, then once again | ||
1075 | * reconstruct the active formatting elements, if any. */ | ||
1076 | if ($this->elementInScope('nobr')) { | ||
1077 | $this->emitToken(array( | ||
1078 | 'name' => 'nobr', | ||
1079 | 'type' => HTML5_Tokenizer::ENDTAG, | ||
1080 | )); | ||
1081 | $this->reconstructActiveFormattingElements(); | ||
1082 | } | ||
1083 | |||
1084 | /* Insert an HTML element for the token. */ | ||
1085 | $el = $this->insertElement($token); | ||
1086 | |||
1087 | /* Add that element to the list of active formatting | ||
1088 | elements. */ | ||
1089 | $this->a_formatting[] = $el; | ||
1090 | break; | ||
1091 | |||
1092 | // another diversion | ||
1093 | |||
1094 | /* A start tag token whose tag name is "button" */ | ||
1095 | case 'button': | ||
1096 | /* If the stack of open elements has a button element in scope, | ||
1097 | then this is a parse error; act as if an end tag with the tag | ||
1098 | name "button" had been seen, then reprocess the token. (We don't | ||
1099 | do that. Unnecessary.) (I hope you're right! -- ezyang) */ | ||
1100 | if($this->elementInScope('button')) { | ||
1101 | $this->emitToken(array( | ||
1102 | 'name' => 'button', | ||
1103 | 'type' => HTML5_Tokenizer::ENDTAG | ||
1104 | )); | ||
1105 | } | ||
1106 | |||
1107 | /* Reconstruct the active formatting elements, if any. */ | ||
1108 | $this->reconstructActiveFormattingElements(); | ||
1109 | |||
1110 | /* Insert an HTML element for the token. */ | ||
1111 | $this->insertElement($token); | ||
1112 | |||
1113 | /* Insert a marker at the end of the list of active | ||
1114 | formatting elements. */ | ||
1115 | $this->a_formatting[] = self::MARKER; | ||
1116 | |||
1117 | $this->flag_frameset_ok = false; | ||
1118 | break; | ||
1119 | |||
1120 | case 'applet': case 'marquee': case 'object': | ||
1121 | /* Reconstruct the active formatting elements, if any. */ | ||
1122 | $this->reconstructActiveFormattingElements(); | ||
1123 | |||
1124 | /* Insert an HTML element for the token. */ | ||
1125 | $this->insertElement($token); | ||
1126 | |||
1127 | /* Insert a marker at the end of the list of active | ||
1128 | formatting elements. */ | ||
1129 | $this->a_formatting[] = self::MARKER; | ||
1130 | |||
1131 | $this->flag_frameset_ok = false; | ||
1132 | break; | ||
1133 | |||
1134 | // spec diversion | ||
1135 | |||
1136 | /* A start tag whose tag name is "table" */ | ||
1137 | case 'table': | ||
1138 | /* If the Document is not set to quirks mode, and the | ||
1139 | * stack of open elements has a p element in scope, then | ||
1140 | * act as if an end tag with the tag name "p" had been | ||
1141 | * seen. */ | ||
1142 | if($this->quirks_mode !== self::QUIRKS_MODE && | ||
1143 | $this->elementInScope('p')) { | ||
1144 | $this->emitToken(array( | ||
1145 | 'name' => 'p', | ||
1146 | 'type' => HTML5_Tokenizer::ENDTAG | ||
1147 | )); | ||
1148 | } | ||
1149 | |||
1150 | /* Insert an HTML element for the token. */ | ||
1151 | $this->insertElement($token); | ||
1152 | |||
1153 | $this->flag_frameset_ok = false; | ||
1154 | |||
1155 | /* Change the insertion mode to "in table". */ | ||
1156 | $this->mode = self::IN_TABLE; | ||
1157 | break; | ||
1158 | |||
1159 | /* A start tag whose tag name is one of: "area", "basefont", | ||
1160 | "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ | ||
1161 | case 'area': case 'basefont': case 'bgsound': case 'br': | ||
1162 | case 'embed': case 'img': case 'input': case 'keygen': case 'spacer': | ||
1163 | case 'wbr': | ||
1164 | /* Reconstruct the active formatting elements, if any. */ | ||
1165 | $this->reconstructActiveFormattingElements(); | ||
1166 | |||
1167 | /* Insert an HTML element for the token. */ | ||
1168 | $this->insertElement($token); | ||
1169 | |||
1170 | /* Immediately pop the current node off the stack of open elements. */ | ||
1171 | array_pop($this->stack); | ||
1172 | |||
1173 | // YYY: Acknowledge the token's self-closing flag, if it is set. | ||
1174 | |||
1175 | $this->flag_frameset_ok = false; | ||
1176 | break; | ||
1177 | |||
1178 | case 'param': case 'source': | ||
1179 | /* Insert an HTML element for the token. */ | ||
1180 | $this->insertElement($token); | ||
1181 | |||
1182 | /* Immediately pop the current node off the stack of open elements. */ | ||
1183 | array_pop($this->stack); | ||
1184 | |||
1185 | // YYY: Acknowledge the token's self-closing flag, if it is set. | ||
1186 | break; | ||
1187 | |||
1188 | /* A start tag whose tag name is "hr" */ | ||
1189 | case 'hr': | ||
1190 | /* If the stack of open elements has a p element in scope, | ||
1191 | then act as if an end tag with the tag name p had been seen. */ | ||
1192 | if($this->elementInScope('p')) { | ||
1193 | $this->emitToken(array( | ||
1194 | 'name' => 'p', | ||
1195 | 'type' => HTML5_Tokenizer::ENDTAG | ||
1196 | )); | ||
1197 | } | ||
1198 | |||
1199 | /* Insert an HTML element for the token. */ | ||
1200 | $this->insertElement($token); | ||
1201 | |||
1202 | /* Immediately pop the current node off the stack of open elements. */ | ||
1203 | array_pop($this->stack); | ||
1204 | |||
1205 | // YYY: Acknowledge the token's self-closing flag, if it is set. | ||
1206 | |||
1207 | $this->flag_frameset_ok = false; | ||
1208 | break; | ||
1209 | |||
1210 | /* A start tag whose tag name is "image" */ | ||
1211 | case 'image': | ||
1212 | /* Parse error. Change the token's tag name to "img" and | ||
1213 | reprocess it. (Don't ask.) */ | ||
1214 | $token['name'] = 'img'; | ||
1215 | $this->emitToken($token); | ||
1216 | break; | ||
1217 | |||
1218 | /* A start tag whose tag name is "isindex" */ | ||
1219 | case 'isindex': | ||
1220 | /* Parse error. */ | ||
1221 | |||
1222 | /* If the form element pointer is not null, | ||
1223 | then ignore the token. */ | ||
1224 | if($this->form_pointer === null) { | ||
1225 | /* Act as if a start tag token with the tag name "form" had | ||
1226 | been seen. */ | ||
1227 | /* If the token has an attribute called "action", set | ||
1228 | * the action attribute on the resulting form | ||
1229 | * element to the value of the "action" attribute of | ||
1230 | * the token. */ | ||
1231 | $attr = array(); | ||
1232 | $action = $this->getAttr($token, 'action'); | ||
1233 | if ($action !== false) { | ||
1234 | $attr[] = array('name' => 'action', 'value' => $action); | ||
1235 | } | ||
1236 | $this->emitToken(array( | ||
1237 | 'name' => 'form', | ||
1238 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
1239 | 'attr' => $attr | ||
1240 | )); | ||
1241 | |||
1242 | /* Act as if a start tag token with the tag name "hr" had | ||
1243 | been seen. */ | ||
1244 | $this->emitToken(array( | ||
1245 | 'name' => 'hr', | ||
1246 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
1247 | 'attr' => array() | ||
1248 | )); | ||
1249 | |||
1250 | /* Act as if a start tag token with the tag name "label" | ||
1251 | had been seen. */ | ||
1252 | $this->emitToken(array( | ||
1253 | 'name' => 'label', | ||
1254 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
1255 | 'attr' => array() | ||
1256 | )); | ||
1257 | |||
1258 | /* Act as if a stream of character tokens had been seen. */ | ||
1259 | $prompt = $this->getAttr($token, 'prompt'); | ||
1260 | if ($prompt === false) { | ||
1261 | $prompt = 'This is a searchable index. '. | ||
1262 | 'Insert your search keywords here: '; | ||
1263 | } | ||
1264 | $this->emitToken(array( | ||
1265 | 'data' => $prompt, | ||
1266 | 'type' => HTML5_Tokenizer::CHARACTER, | ||
1267 | )); | ||
1268 | |||
1269 | /* Act as if a start tag token with the tag name "input" | ||
1270 | had been seen, with all the attributes from the "isindex" | ||
1271 | token, except with the "name" attribute set to the value | ||
1272 | "isindex" (ignoring any explicit "name" attribute). */ | ||
1273 | $attr = array(); | ||
1274 | foreach ($token['attr'] as $keypair) { | ||
1275 | if ($keypair['name'] === 'name' || $keypair['name'] === 'action' || | ||
1276 | $keypair['name'] === 'prompt') continue; | ||
1277 | $attr[] = $keypair; | ||
1278 | } | ||
1279 | $attr[] = array('name' => 'name', 'value' => 'isindex'); | ||
1280 | |||
1281 | $this->emitToken(array( | ||
1282 | 'name' => 'input', | ||
1283 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
1284 | 'attr' => $attr | ||
1285 | )); | ||
1286 | |||
1287 | /* Act as if an end tag token with the tag name "label" | ||
1288 | had been seen. */ | ||
1289 | $this->emitToken(array( | ||
1290 | 'name' => 'label', | ||
1291 | 'type' => HTML5_Tokenizer::ENDTAG | ||
1292 | )); | ||
1293 | |||
1294 | /* Act as if a start tag token with the tag name "hr" had | ||
1295 | been seen. */ | ||
1296 | $this->emitToken(array( | ||
1297 | 'name' => 'hr', | ||
1298 | 'type' => HTML5_Tokenizer::STARTTAG | ||
1299 | )); | ||
1300 | |||
1301 | /* Act as if an end tag token with the tag name "form" had | ||
1302 | been seen. */ | ||
1303 | $this->emitToken(array( | ||
1304 | 'name' => 'form', | ||
1305 | 'type' => HTML5_Tokenizer::ENDTAG | ||
1306 | )); | ||
1307 | } else { | ||
1308 | $this->ignored = true; | ||
1309 | } | ||
1310 | break; | ||
1311 | |||
1312 | /* A start tag whose tag name is "textarea" */ | ||
1313 | case 'textarea': | ||
1314 | $this->insertElement($token); | ||
1315 | |||
1316 | /* If the next token is a U+000A LINE FEED (LF) | ||
1317 | * character token, then ignore that token and move on to | ||
1318 | * the next one. (Newlines at the start of textarea | ||
1319 | * elements are ignored as an authoring convenience.) | ||
1320 | * need flag, see also <pre> */ | ||
1321 | $this->ignore_lf_token = 2; | ||
1322 | |||
1323 | $this->original_mode = $this->mode; | ||
1324 | $this->flag_frameset_ok = false; | ||
1325 | $this->mode = self::IN_CDATA_RCDATA; | ||
1326 | |||
1327 | /* Switch the tokeniser's content model flag to the | ||
1328 | RCDATA state. */ | ||
1329 | $this->content_model = HTML5_Tokenizer::RCDATA; | ||
1330 | break; | ||
1331 | |||
1332 | /* A start tag token whose tag name is "xmp" */ | ||
1333 | case 'xmp': | ||
1334 | /* If the stack of open elements has a p element in | ||
1335 | scope, then act as if an end tag with the tag name | ||
1336 | "p" has been seen. */ | ||
1337 | if ($this->elementInScope('p')) { | ||
1338 | $this->emitToken(array( | ||
1339 | 'name' => 'p', | ||
1340 | 'type' => HTML5_Tokenizer::ENDTAG | ||
1341 | )); | ||
1342 | } | ||
1343 | |||
1344 | /* Reconstruct the active formatting elements, if any. */ | ||
1345 | $this->reconstructActiveFormattingElements(); | ||
1346 | |||
1347 | $this->flag_frameset_ok = false; | ||
1348 | |||
1349 | $this->insertCDATAElement($token); | ||
1350 | break; | ||
1351 | |||
1352 | case 'iframe': | ||
1353 | $this->flag_frameset_ok = false; | ||
1354 | $this->insertCDATAElement($token); | ||
1355 | break; | ||
1356 | |||
1357 | case 'noembed': case 'noscript': | ||
1358 | // XSCRIPT: should check scripting flag | ||
1359 | $this->insertCDATAElement($token); | ||
1360 | break; | ||
1361 | |||
1362 | /* A start tag whose tag name is "select" */ | ||
1363 | case 'select': | ||
1364 | /* Reconstruct the active formatting elements, if any. */ | ||
1365 | $this->reconstructActiveFormattingElements(); | ||
1366 | |||
1367 | /* Insert an HTML element for the token. */ | ||
1368 | $this->insertElement($token); | ||
1369 | |||
1370 | $this->flag_frameset_ok = false; | ||
1371 | |||
1372 | /* If the insertion mode is one of in table", "in caption", | ||
1373 | * "in column group", "in table body", "in row", or "in | ||
1374 | * cell", then switch the insertion mode to "in select in | ||
1375 | * table". Otherwise, switch the insertion mode to "in | ||
1376 | * select". */ | ||
1377 | if ( | ||
1378 | $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION || | ||
1379 | $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY || | ||
1380 | $this->mode === self::IN_ROW || $this->mode === self::IN_CELL | ||
1381 | ) { | ||
1382 | $this->mode = self::IN_SELECT_IN_TABLE; | ||
1383 | } else { | ||
1384 | $this->mode = self::IN_SELECT; | ||
1385 | } | ||
1386 | break; | ||
1387 | |||
1388 | case 'option': case 'optgroup': | ||
1389 | if ($this->elementInScope('option')) { | ||
1390 | $this->emitToken(array( | ||
1391 | 'name' => 'option', | ||
1392 | 'type' => HTML5_Tokenizer::ENDTAG, | ||
1393 | )); | ||
1394 | } | ||
1395 | $this->reconstructActiveFormattingElements(); | ||
1396 | $this->insertElement($token); | ||
1397 | break; | ||
1398 | |||
1399 | case 'rp': case 'rt': | ||
1400 | /* If the stack of open elements has a ruby element in scope, then generate | ||
1401 | * implied end tags. If the current node is not then a ruby element, this is | ||
1402 | * a parse error; pop all the nodes from the current node up to the node | ||
1403 | * immediately before the bottommost ruby element on the stack of open elements. | ||
1404 | */ | ||
1405 | if ($this->elementInScope('ruby')) { | ||
1406 | $this->generateImpliedEndTags(); | ||
1407 | } | ||
1408 | $peek = false; | ||
1409 | do { | ||
1410 | if ($peek) { | ||
1411 | // parse error | ||
1412 | } | ||
1413 | $peek = array_pop($this->stack); | ||
1414 | } while ($peek->tagName !== 'ruby'); | ||
1415 | $this->stack[] = $peek; // we popped one too many | ||
1416 | $this->insertElement($token); | ||
1417 | break; | ||
1418 | |||
1419 | // spec diversion | ||
1420 | |||
1421 | case 'math': | ||
1422 | $this->reconstructActiveFormattingElements(); | ||
1423 | $token = $this->adjustMathMLAttributes($token); | ||
1424 | $token = $this->adjustForeignAttributes($token); | ||
1425 | $this->insertForeignElement($token, self::NS_MATHML); | ||
1426 | if (isset($token['self-closing'])) { | ||
1427 | // XERROR: acknowledge the token's self-closing flag | ||
1428 | array_pop($this->stack); | ||
1429 | } | ||
1430 | if ($this->mode !== self::IN_FOREIGN_CONTENT) { | ||
1431 | $this->secondary_mode = $this->mode; | ||
1432 | $this->mode = self::IN_FOREIGN_CONTENT; | ||
1433 | } | ||
1434 | break; | ||
1435 | |||
1436 | case 'svg': | ||
1437 | $this->reconstructActiveFormattingElements(); | ||
1438 | $token = $this->adjustSVGAttributes($token); | ||
1439 | $token = $this->adjustForeignAttributes($token); | ||
1440 | $this->insertForeignElement($token, self::NS_SVG); | ||
1441 | if (isset($token['self-closing'])) { | ||
1442 | // XERROR: acknowledge the token's self-closing flag | ||
1443 | array_pop($this->stack); | ||
1444 | } | ||
1445 | if ($this->mode !== self::IN_FOREIGN_CONTENT) { | ||
1446 | $this->secondary_mode = $this->mode; | ||
1447 | $this->mode = self::IN_FOREIGN_CONTENT; | ||
1448 | } | ||
1449 | break; | ||
1450 | |||
1451 | case 'caption': case 'col': case 'colgroup': case 'frame': case 'head': | ||
1452 | case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr': | ||
1453 | // parse error | ||
1454 | break; | ||
1455 | |||
1456 | /* A start tag token not covered by the previous entries */ | ||
1457 | default: | ||
1458 | /* Reconstruct the active formatting elements, if any. */ | ||
1459 | $this->reconstructActiveFormattingElements(); | ||
1460 | |||
1461 | $this->insertElement($token); | ||
1462 | /* This element will be a phrasing element. */ | ||
1463 | break; | ||
1464 | } | ||
1465 | break; | ||
1466 | |||
1467 | case HTML5_Tokenizer::ENDTAG: | ||
1468 | switch($token['name']) { | ||
1469 | /* An end tag with the tag name "body" */ | ||
1470 | case 'body': | ||
1471 | /* If the stack of open elements does not have a body | ||
1472 | * element in scope, this is a parse error; ignore the | ||
1473 | * token. */ | ||
1474 | if(!$this->elementInScope('body')) { | ||
1475 | $this->ignored = true; | ||
1476 | |||
1477 | /* Otherwise, if there is a node in the stack of open | ||
1478 | * elements that is not either a dc element, a dd element, | ||
1479 | * a ds element, a dt element, an li element, an optgroup | ||
1480 | * element, an option element, a p element, an rp element, | ||
1481 | * an rt element, a tbody element, a td element, a tfoot | ||
1482 | * element, a th element, a thead element, a tr element, | ||
1483 | * the body element, or the html element, then this is a | ||
1484 | * parse error. | ||
1485 | */ | ||
1486 | } else { | ||
1487 | // XERROR: implement this check for parse error | ||
1488 | } | ||
1489 | |||
1490 | /* Change the insertion mode to "after body". */ | ||
1491 | $this->mode = self::AFTER_BODY; | ||
1492 | break; | ||
1493 | |||
1494 | /* An end tag with the tag name "html" */ | ||
1495 | case 'html': | ||
1496 | /* Act as if an end tag with tag name "body" had been seen, | ||
1497 | then, if that token wasn't ignored, reprocess the current | ||
1498 | token. */ | ||
1499 | $this->emitToken(array( | ||
1500 | 'name' => 'body', | ||
1501 | 'type' => HTML5_Tokenizer::ENDTAG | ||
1502 | )); | ||
1503 | |||
1504 | if (!$this->ignored) $this->emitToken($token); | ||
1505 | break; | ||
1506 | |||
1507 | case 'address': case 'article': case 'aside': case 'blockquote': | ||
1508 | case 'center': case 'datagrid': case 'details': case 'dir': | ||
1509 | case 'div': case 'dl': case 'fieldset': case 'footer': | ||
1510 | case 'header': case 'hgroup': case 'listing': case 'menu': | ||
1511 | case 'nav': case 'ol': case 'pre': case 'section': case 'ul': | ||
1512 | /* If the stack of open elements has an element in scope | ||
1513 | with the same tag name as that of the token, then generate | ||
1514 | implied end tags. */ | ||
1515 | if($this->elementInScope($token['name'])) { | ||
1516 | $this->generateImpliedEndTags(); | ||
1517 | |||
1518 | /* Now, if the current node is not an element with | ||
1519 | the same tag name as that of the token, then this | ||
1520 | is a parse error. */ | ||
1521 | // XERROR: implement parse error logic | ||
1522 | |||
1523 | /* If the stack of open elements has an element in | ||
1524 | scope with the same tag name as that of the token, | ||
1525 | then pop elements from this stack until an element | ||
1526 | with that tag name has been popped from the stack. */ | ||
1527 | do { | ||
1528 | $node = array_pop($this->stack); | ||
1529 | } while ($node->tagName !== $token['name']); | ||
1530 | } else { | ||
1531 | // parse error | ||
1532 | } | ||
1533 | break; | ||
1534 | |||
1535 | /* An end tag whose tag name is "form" */ | ||
1536 | case 'form': | ||
1537 | /* Let node be the element that the form element pointer is set to. */ | ||
1538 | $node = $this->form_pointer; | ||
1539 | /* Set the form element pointer to null. */ | ||
1540 | $this->form_pointer = null; | ||
1541 | /* If node is null or the stack of open elements does not | ||
1542 | * have node in scope, then this is a parse error; ignore the token. */ | ||
1543 | if ($node === null || !in_array($node, $this->stack)) { | ||
1544 | // parse error | ||
1545 | $this->ignored = true; | ||
1546 | } else { | ||
1547 | /* 1. Generate implied end tags. */ | ||
1548 | $this->generateImpliedEndTags(); | ||
1549 | /* 2. If the current node is not node, then this is a parse error. */ | ||
1550 | if (end($this->stack) !== $node) { | ||
1551 | // parse error | ||
1552 | } | ||
1553 | /* 3. Remove node from the stack of open elements. */ | ||
1554 | array_splice($this->stack, array_search($node, $this->stack, true), 1); | ||
1555 | } | ||
1556 | |||
1557 | break; | ||
1558 | |||
1559 | /* An end tag whose tag name is "p" */ | ||
1560 | case 'p': | ||
1561 | /* If the stack of open elements has a p element in scope, | ||
1562 | then generate implied end tags, except for p elements. */ | ||
1563 | if($this->elementInScope('p')) { | ||
1564 | /* Generate implied end tags, except for elements with | ||
1565 | * the same tag name as the token. */ | ||
1566 | $this->generateImpliedEndTags(array('p')); | ||
1567 | |||
1568 | /* If the current node is not a p element, then this is | ||
1569 | a parse error. */ | ||
1570 | // XERROR: implement | ||
1571 | |||
1572 | /* Pop elements from the stack of open elements until | ||
1573 | * an element with the same tag name as the token has | ||
1574 | * been popped from the stack. */ | ||
1575 | do { | ||
1576 | $node = array_pop($this->stack); | ||
1577 | } while ($node->tagName !== 'p'); | ||
1578 | |||
1579 | } else { | ||
1580 | // parse error | ||
1581 | $this->emitToken(array( | ||
1582 | 'name' => 'p', | ||
1583 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
1584 | )); | ||
1585 | $this->emitToken($token); | ||
1586 | } | ||
1587 | break; | ||
1588 | |||
1589 | /* An end tag whose tag name is "li" */ | ||
1590 | case 'li': | ||
1591 | /* If the stack of open elements does not have an element | ||
1592 | * in list item scope with the same tag name as that of the | ||
1593 | * token, then this is a parse error; ignore the token. */ | ||
1594 | if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) { | ||
1595 | /* Generate implied end tags, except for elements with the | ||
1596 | * same tag name as the token. */ | ||
1597 | $this->generateImpliedEndTags(array($token['name'])); | ||
1598 | /* If the current node is not an element with the same tag | ||
1599 | * name as that of the token, then this is a parse error. */ | ||
1600 | // XERROR: parse error | ||
1601 | /* Pop elements from the stack of open elements until an | ||
1602 | * element with the same tag name as the token has been | ||
1603 | * popped from the stack. */ | ||
1604 | do { | ||
1605 | $node = array_pop($this->stack); | ||
1606 | } while ($node->tagName !== $token['name']); | ||
1607 | } else { | ||
1608 | // XERROR: parse error | ||
1609 | } | ||
1610 | break; | ||
1611 | |||
1612 | /* An end tag whose tag name is "dc", "dd", "ds", "dt" */ | ||
1613 | case 'dc': case 'dd': case 'ds': case 'dt': | ||
1614 | if($this->elementInScope($token['name'])) { | ||
1615 | $this->generateImpliedEndTags(array($token['name'])); | ||
1616 | |||
1617 | /* If the current node is not an element with the same | ||
1618 | tag name as the token, then this is a parse error. */ | ||
1619 | // XERROR: implement parse error | ||
1620 | |||
1621 | /* Pop elements from the stack of open elements until | ||
1622 | * an element with the same tag name as the token has | ||
1623 | * been popped from the stack. */ | ||
1624 | do { | ||
1625 | $node = array_pop($this->stack); | ||
1626 | } while ($node->tagName !== $token['name']); | ||
1627 | |||
1628 | } else { | ||
1629 | // XERROR: parse error | ||
1630 | } | ||
1631 | break; | ||
1632 | |||
1633 | /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", | ||
1634 | "h5", "h6" */ | ||
1635 | case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': | ||
1636 | $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); | ||
1637 | |||
1638 | /* If the stack of open elements has in scope an element whose | ||
1639 | tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then | ||
1640 | generate implied end tags. */ | ||
1641 | if($this->elementInScope($elements)) { | ||
1642 | $this->generateImpliedEndTags(); | ||
1643 | |||
1644 | /* Now, if the current node is not an element with the same | ||
1645 | tag name as that of the token, then this is a parse error. */ | ||
1646 | // XERROR: implement parse error | ||
1647 | |||
1648 | /* If the stack of open elements has in scope an element | ||
1649 | whose tag name is one of "h1", "h2", "h3", "h4", "h5", or | ||
1650 | "h6", then pop elements from the stack until an element | ||
1651 | with one of those tag names has been popped from the stack. */ | ||
1652 | do { | ||
1653 | $node = array_pop($this->stack); | ||
1654 | } while (!in_array($node->tagName, $elements)); | ||
1655 | } else { | ||
1656 | // parse error | ||
1657 | } | ||
1658 | break; | ||
1659 | |||
1660 | /* An end tag whose tag name is one of: "a", "b", "big", "em", | ||
1661 | "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ | ||
1662 | case 'a': case 'b': case 'big': case 'code': case 'em': case 'font': | ||
1663 | case 'i': case 'nobr': case 's': case 'small': case 'strike': | ||
1664 | case 'strong': case 'tt': case 'u': | ||
1665 | // XERROR: generally speaking this needs parse error logic | ||
1666 | /* 1. Let the formatting element be the last element in | ||
1667 | the list of active formatting elements that: | ||
1668 | * is between the end of the list and the last scope | ||
1669 | marker in the list, if any, or the start of the list | ||
1670 | otherwise, and | ||
1671 | * has the same tag name as the token. | ||
1672 | */ | ||
1673 | while(true) { | ||
1674 | for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { | ||
1675 | if($this->a_formatting[$a] === self::MARKER) { | ||
1676 | break; | ||
1677 | |||
1678 | } elseif($this->a_formatting[$a]->tagName === $token['name']) { | ||
1679 | $formatting_element = $this->a_formatting[$a]; | ||
1680 | $in_stack = in_array($formatting_element, $this->stack, true); | ||
1681 | $fe_af_pos = $a; | ||
1682 | break; | ||
1683 | } | ||
1684 | } | ||
1685 | |||
1686 | /* If there is no such node, or, if that node is | ||
1687 | also in the stack of open elements but the element | ||
1688 | is not in scope, then this is a parse error. Abort | ||
1689 | these steps. The token is ignored. */ | ||
1690 | if(!isset($formatting_element) || ($in_stack && | ||
1691 | !$this->elementInScope($token['name']))) { | ||
1692 | $this->ignored = true; | ||
1693 | break; | ||
1694 | |||
1695 | /* Otherwise, if there is such a node, but that node | ||
1696 | is not in the stack of open elements, then this is a | ||
1697 | parse error; remove the element from the list, and | ||
1698 | abort these steps. */ | ||
1699 | } elseif(isset($formatting_element) && !$in_stack) { | ||
1700 | unset($this->a_formatting[$fe_af_pos]); | ||
1701 | $this->a_formatting = array_merge($this->a_formatting); | ||
1702 | break; | ||
1703 | } | ||
1704 | |||
1705 | /* Otherwise, there is a formatting element and that | ||
1706 | * element is in the stack and is in scope. If the | ||
1707 | * element is not the current node, this is a parse | ||
1708 | * error. In any case, proceed with the algorithm as | ||
1709 | * written in the following steps. */ | ||
1710 | // XERROR: implement me | ||
1711 | |||
1712 | /* 2. Let the furthest block be the topmost node in the | ||
1713 | stack of open elements that is lower in the stack | ||
1714 | than the formatting element, and is not an element in | ||
1715 | the phrasing or formatting categories. There might | ||
1716 | not be one. */ | ||
1717 | $fe_s_pos = array_search($formatting_element, $this->stack, true); | ||
1718 | $length = count($this->stack); | ||
1719 | |||
1720 | for($s = $fe_s_pos + 1; $s < $length; $s++) { | ||
1721 | $category = $this->getElementCategory($this->stack[$s]); | ||
1722 | |||
1723 | if($category !== self::PHRASING && $category !== self::FORMATTING) { | ||
1724 | $furthest_block = $this->stack[$s]; | ||
1725 | break; | ||
1726 | } | ||
1727 | } | ||
1728 | |||
1729 | /* 3. If there is no furthest block, then the UA must | ||
1730 | skip the subsequent steps and instead just pop all | ||
1731 | the nodes from the bottom of the stack of open | ||
1732 | elements, from the current node up to the formatting | ||
1733 | element, and remove the formatting element from the | ||
1734 | list of active formatting elements. */ | ||
1735 | if(!isset($furthest_block)) { | ||
1736 | for($n = $length - 1; $n >= $fe_s_pos; $n--) { | ||
1737 | array_pop($this->stack); | ||
1738 | } | ||
1739 | |||
1740 | unset($this->a_formatting[$fe_af_pos]); | ||
1741 | $this->a_formatting = array_merge($this->a_formatting); | ||
1742 | break; | ||
1743 | } | ||
1744 | |||
1745 | /* 4. Let the common ancestor be the element | ||
1746 | immediately above the formatting element in the stack | ||
1747 | of open elements. */ | ||
1748 | $common_ancestor = $this->stack[$fe_s_pos - 1]; | ||
1749 | |||
1750 | /* 5. Let a bookmark note the position of the | ||
1751 | formatting element in the list of active formatting | ||
1752 | elements relative to the elements on either side | ||
1753 | of it in the list. */ | ||
1754 | $bookmark = $fe_af_pos; | ||
1755 | |||
1756 | /* 6. Let node and last node be the furthest block. | ||
1757 | Follow these steps: */ | ||
1758 | $node = $furthest_block; | ||
1759 | $last_node = $furthest_block; | ||
1760 | |||
1761 | while(true) { | ||
1762 | for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { | ||
1763 | /* 6.1 Let node be the element immediately | ||
1764 | prior to node in the stack of open elements. */ | ||
1765 | $node = $this->stack[$n]; | ||
1766 | |||
1767 | /* 6.2 If node is not in the list of active | ||
1768 | formatting elements, then remove node from | ||
1769 | the stack of open elements and then go back | ||
1770 | to step 1. */ | ||
1771 | if(!in_array($node, $this->a_formatting, true)) { | ||
1772 | array_splice($this->stack, $n, 1); | ||
1773 | |||
1774 | } else { | ||
1775 | break; | ||
1776 | } | ||
1777 | } | ||
1778 | |||
1779 | /* 6.3 Otherwise, if node is the formatting | ||
1780 | element, then go to the next step in the overall | ||
1781 | algorithm. */ | ||
1782 | if($node === $formatting_element) { | ||
1783 | break; | ||
1784 | |||
1785 | /* 6.4 Otherwise, if last node is the furthest | ||
1786 | block, then move the aforementioned bookmark to | ||
1787 | be immediately after the node in the list of | ||
1788 | active formatting elements. */ | ||
1789 | } elseif($last_node === $furthest_block) { | ||
1790 | $bookmark = array_search($node, $this->a_formatting, true) + 1; | ||
1791 | } | ||
1792 | |||
1793 | /* 6.5 Create an element for the token for which | ||
1794 | * the element node was created, replace the entry | ||
1795 | * for node in the list of active formatting | ||
1796 | * elements with an entry for the new element, | ||
1797 | * replace the entry for node in the stack of open | ||
1798 | * elements with an entry for the new element, and | ||
1799 | * let node be the new element. */ | ||
1800 | // we don't know what the token is anymore | ||
1801 | // XDOM | ||
1802 | $clone = $node->cloneNode(); | ||
1803 | $a_pos = array_search($node, $this->a_formatting, true); | ||
1804 | $s_pos = array_search($node, $this->stack, true); | ||
1805 | $this->a_formatting[$a_pos] = $clone; | ||
1806 | $this->stack[$s_pos] = $clone; | ||
1807 | $node = $clone; | ||
1808 | |||
1809 | /* 6.6 Insert last node into node, first removing | ||
1810 | it from its previous parent node if any. */ | ||
1811 | // XDOM | ||
1812 | if($last_node->parentNode !== null) { | ||
1813 | $last_node->parentNode->removeChild($last_node); | ||
1814 | } | ||
1815 | |||
1816 | // XDOM | ||
1817 | $node->appendChild($last_node); | ||
1818 | |||
1819 | /* 6.7 Let last node be node. */ | ||
1820 | $last_node = $node; | ||
1821 | |||
1822 | /* 6.8 Return to step 1 of this inner set of steps. */ | ||
1823 | } | ||
1824 | |||
1825 | /* 7. If the common ancestor node is a table, tbody, | ||
1826 | * tfoot, thead, or tr element, then, foster parent | ||
1827 | * whatever last node ended up being in the previous | ||
1828 | * step, first removing it from its previous parent | ||
1829 | * node if any. */ | ||
1830 | // XDOM | ||
1831 | if ($last_node->parentNode) { // common step | ||
1832 | $last_node->parentNode->removeChild($last_node); | ||
1833 | } | ||
1834 | if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { | ||
1835 | $this->fosterParent($last_node); | ||
1836 | /* Otherwise, append whatever last node ended up being | ||
1837 | * in the previous step to the common ancestor node, | ||
1838 | * first removing it from its previous parent node if | ||
1839 | * any. */ | ||
1840 | } else { | ||
1841 | // XDOM | ||
1842 | $common_ancestor->appendChild($last_node); | ||
1843 | } | ||
1844 | |||
1845 | /* 8. Create an element for the token for which the | ||
1846 | * formatting element was created. */ | ||
1847 | // XDOM | ||
1848 | $clone = $formatting_element->cloneNode(); | ||
1849 | |||
1850 | /* 9. Take all of the child nodes of the furthest | ||
1851 | block and append them to the element created in the | ||
1852 | last step. */ | ||
1853 | // XDOM | ||
1854 | while($furthest_block->hasChildNodes()) { | ||
1855 | $child = $furthest_block->firstChild; | ||
1856 | $furthest_block->removeChild($child); | ||
1857 | $clone->appendChild($child); | ||
1858 | } | ||
1859 | |||
1860 | /* 10. Append that clone to the furthest block. */ | ||
1861 | // XDOM | ||
1862 | $furthest_block->appendChild($clone); | ||
1863 | |||
1864 | /* 11. Remove the formatting element from the list | ||
1865 | of active formatting elements, and insert the new element | ||
1866 | into the list of active formatting elements at the | ||
1867 | position of the aforementioned bookmark. */ | ||
1868 | $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); | ||
1869 | array_splice($this->a_formatting, $fe_af_pos, 1); | ||
1870 | |||
1871 | $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); | ||
1872 | $af_part2 = array_slice($this->a_formatting, $bookmark); | ||
1873 | $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); | ||
1874 | |||
1875 | /* 12. Remove the formatting element from the stack | ||
1876 | of open elements, and insert the new element into the stack | ||
1877 | of open elements immediately below the position of the | ||
1878 | furthest block in that stack. */ | ||
1879 | $fe_s_pos = array_search($formatting_element, $this->stack, true); | ||
1880 | array_splice($this->stack, $fe_s_pos, 1); | ||
1881 | |||
1882 | $fb_s_pos = array_search($furthest_block, $this->stack, true); | ||
1883 | $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1); | ||
1884 | $s_part2 = array_slice($this->stack, $fb_s_pos + 1); | ||
1885 | $this->stack = array_merge($s_part1, array($clone), $s_part2); | ||
1886 | |||
1887 | /* 13. Jump back to step 1 in this series of steps. */ | ||
1888 | unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); | ||
1889 | } | ||
1890 | break; | ||
1891 | |||
1892 | case 'applet': case 'button': case 'marquee': case 'object': | ||
1893 | /* If the stack of open elements has an element in scope whose | ||
1894 | tag name matches the tag name of the token, then generate implied | ||
1895 | tags. */ | ||
1896 | if($this->elementInScope($token['name'])) { | ||
1897 | $this->generateImpliedEndTags(); | ||
1898 | |||
1899 | /* Now, if the current node is not an element with the same | ||
1900 | tag name as the token, then this is a parse error. */ | ||
1901 | // XERROR: implement logic | ||
1902 | |||
1903 | /* Pop elements from the stack of open elements until | ||
1904 | * an element with the same tag name as the token has | ||
1905 | * been popped from the stack. */ | ||
1906 | do { | ||
1907 | $node = array_pop($this->stack); | ||
1908 | } while ($node->tagName !== $token['name']); | ||
1909 | |||
1910 | /* Clear the list of active formatting elements up to the | ||
1911 | * last marker. */ | ||
1912 | $keys = array_keys($this->a_formatting, self::MARKER, true); | ||
1913 | $marker = end($keys); | ||
1914 | |||
1915 | for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { | ||
1916 | array_pop($this->a_formatting); | ||
1917 | } | ||
1918 | } else { | ||
1919 | // parse error | ||
1920 | } | ||
1921 | break; | ||
1922 | |||
1923 | case 'br': | ||
1924 | // Parse error | ||
1925 | $this->emitToken(array( | ||
1926 | 'name' => 'br', | ||
1927 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
1928 | )); | ||
1929 | break; | ||
1930 | |||
1931 | /* An end tag token not covered by the previous entries */ | ||
1932 | default: | ||
1933 | for($n = count($this->stack) - 1; $n >= 0; $n--) { | ||
1934 | /* Initialise node to be the current node (the bottommost | ||
1935 | node of the stack). */ | ||
1936 | $node = $this->stack[$n]; | ||
1937 | |||
1938 | /* If node has the same tag name as the end tag token, | ||
1939 | then: */ | ||
1940 | if($token['name'] === $node->tagName) { | ||
1941 | /* Generate implied end tags. */ | ||
1942 | $this->generateImpliedEndTags(); | ||
1943 | |||
1944 | /* If the tag name of the end tag token does not | ||
1945 | match the tag name of the current node, this is a | ||
1946 | parse error. */ | ||
1947 | // XERROR: implement this | ||
1948 | |||
1949 | /* Pop all the nodes from the current node up to | ||
1950 | node, including node, then stop these steps. */ | ||
1951 | // XSKETCHY | ||
1952 | do { | ||
1953 | $pop = array_pop($this->stack); | ||
1954 | } while ($pop !== $node); | ||
1955 | break; | ||
1956 | |||
1957 | } else { | ||
1958 | $category = $this->getElementCategory($node); | ||
1959 | |||
1960 | if($category !== self::FORMATTING && $category !== self::PHRASING) { | ||
1961 | /* Otherwise, if node is in neither the formatting | ||
1962 | category nor the phrasing category, then this is a | ||
1963 | parse error. Stop this algorithm. The end tag token | ||
1964 | is ignored. */ | ||
1965 | $this->ignored = true; | ||
1966 | break; | ||
1967 | // parse error | ||
1968 | } | ||
1969 | } | ||
1970 | /* Set node to the previous entry in the stack of open elements. Loop. */ | ||
1971 | } | ||
1972 | break; | ||
1973 | } | ||
1974 | break; | ||
1975 | } | ||
1976 | break; | ||
1977 | |||
1978 | case self::IN_CDATA_RCDATA: | ||
1979 | if ( | ||
1980 | $token['type'] === HTML5_Tokenizer::CHARACTER || | ||
1981 | $token['type'] === HTML5_Tokenizer::SPACECHARACTER | ||
1982 | ) { | ||
1983 | $this->insertText($token['data']); | ||
1984 | } elseif ($token['type'] === HTML5_Tokenizer::EOF) { | ||
1985 | // parse error | ||
1986 | /* If the current node is a script element, mark the script | ||
1987 | * element as "already executed". */ | ||
1988 | // probably not necessary | ||
1989 | array_pop($this->stack); | ||
1990 | $this->mode = $this->original_mode; | ||
1991 | $this->emitToken($token); | ||
1992 | } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') { | ||
1993 | array_pop($this->stack); | ||
1994 | $this->mode = $this->original_mode; | ||
1995 | // we're ignoring all of the execution stuff | ||
1996 | } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) { | ||
1997 | array_pop($this->stack); | ||
1998 | $this->mode = $this->original_mode; | ||
1999 | } | ||
2000 | break; | ||
2001 | |||
2002 | case self::IN_TABLE: | ||
2003 | $clear = array('html', 'table'); | ||
2004 | |||
2005 | /* A character token */ | ||
2006 | if ($token['type'] === HTML5_Tokenizer::CHARACTER || | ||
2007 | $token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
2008 | /* Let the pending table character tokens | ||
2009 | * be an empty list of tokens. */ | ||
2010 | $this->pendingTableCharacters = ""; | ||
2011 | $this->pendingTableCharactersDirty = false; | ||
2012 | /* Let the original insertion mode be the current | ||
2013 | * insertion mode. */ | ||
2014 | $this->original_mode = $this->mode; | ||
2015 | /* Switch the insertion mode to | ||
2016 | * "in table text" and | ||
2017 | * reprocess the token. */ | ||
2018 | $this->mode = self::IN_TABLE_TEXT; | ||
2019 | $this->emitToken($token); | ||
2020 | |||
2021 | /* A comment token */ | ||
2022 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
2023 | /* Append a Comment node to the current node with the data | ||
2024 | attribute set to the data given in the comment token. */ | ||
2025 | $this->insertComment($token['data']); | ||
2026 | |||
2027 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
2028 | // parse error | ||
2029 | |||
2030 | /* A start tag whose tag name is "caption" */ | ||
2031 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2032 | $token['name'] === 'caption') { | ||
2033 | /* Clear the stack back to a table context. */ | ||
2034 | $this->clearStackToTableContext($clear); | ||
2035 | |||
2036 | /* Insert a marker at the end of the list of active | ||
2037 | formatting elements. */ | ||
2038 | $this->a_formatting[] = self::MARKER; | ||
2039 | |||
2040 | /* Insert an HTML element for the token, then switch the | ||
2041 | insertion mode to "in caption". */ | ||
2042 | $this->insertElement($token); | ||
2043 | $this->mode = self::IN_CAPTION; | ||
2044 | |||
2045 | /* A start tag whose tag name is "colgroup" */ | ||
2046 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2047 | $token['name'] === 'colgroup') { | ||
2048 | /* Clear the stack back to a table context. */ | ||
2049 | $this->clearStackToTableContext($clear); | ||
2050 | |||
2051 | /* Insert an HTML element for the token, then switch the | ||
2052 | insertion mode to "in column group". */ | ||
2053 | $this->insertElement($token); | ||
2054 | $this->mode = self::IN_COLUMN_GROUP; | ||
2055 | |||
2056 | /* A start tag whose tag name is "col" */ | ||
2057 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2058 | $token['name'] === 'col') { | ||
2059 | $this->emitToken(array( | ||
2060 | 'name' => 'colgroup', | ||
2061 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
2062 | 'attr' => array() | ||
2063 | )); | ||
2064 | |||
2065 | $this->emitToken($token); | ||
2066 | |||
2067 | /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ | ||
2068 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'], | ||
2069 | array('tbody', 'tfoot', 'thead'))) { | ||
2070 | /* Clear the stack back to a table context. */ | ||
2071 | $this->clearStackToTableContext($clear); | ||
2072 | |||
2073 | /* Insert an HTML element for the token, then switch the insertion | ||
2074 | mode to "in table body". */ | ||
2075 | $this->insertElement($token); | ||
2076 | $this->mode = self::IN_TABLE_BODY; | ||
2077 | |||
2078 | /* A start tag whose tag name is one of: "td", "th", "tr" */ | ||
2079 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2080 | in_array($token['name'], array('td', 'th', 'tr'))) { | ||
2081 | /* Act as if a start tag token with the tag name "tbody" had been | ||
2082 | seen, then reprocess the current token. */ | ||
2083 | $this->emitToken(array( | ||
2084 | 'name' => 'tbody', | ||
2085 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
2086 | 'attr' => array() | ||
2087 | )); | ||
2088 | |||
2089 | $this->emitToken($token); | ||
2090 | |||
2091 | /* A start tag whose tag name is "table" */ | ||
2092 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2093 | $token['name'] === 'table') { | ||
2094 | /* Parse error. Act as if an end tag token with the tag name "table" | ||
2095 | had been seen, then, if that token wasn't ignored, reprocess the | ||
2096 | current token. */ | ||
2097 | $this->emitToken(array( | ||
2098 | 'name' => 'table', | ||
2099 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2100 | )); | ||
2101 | |||
2102 | if (!$this->ignored) $this->emitToken($token); | ||
2103 | |||
2104 | /* An end tag whose tag name is "table" */ | ||
2105 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2106 | $token['name'] === 'table') { | ||
2107 | /* If the stack of open elements does not have an element in table | ||
2108 | scope with the same tag name as the token, this is a parse error. | ||
2109 | Ignore the token. (fragment case) */ | ||
2110 | if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { | ||
2111 | $this->ignored = true; | ||
2112 | |||
2113 | /* Otherwise: */ | ||
2114 | } else { | ||
2115 | do { | ||
2116 | $node = array_pop($this->stack); | ||
2117 | } while ($node->tagName !== 'table'); | ||
2118 | |||
2119 | /* Reset the insertion mode appropriately. */ | ||
2120 | $this->resetInsertionMode(); | ||
2121 | } | ||
2122 | |||
2123 | /* An end tag whose tag name is one of: "body", "caption", "col", | ||
2124 | "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ | ||
2125 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'], | ||
2126 | array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', | ||
2127 | 'tfoot', 'th', 'thead', 'tr'))) { | ||
2128 | // Parse error. Ignore the token. | ||
2129 | |||
2130 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2131 | ($token['name'] === 'style' || $token['name'] === 'script')) { | ||
2132 | $this->processWithRulesFor($token, self::IN_HEAD); | ||
2133 | |||
2134 | } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' && | ||
2135 | // assignment is intentional | ||
2136 | /* If the token does not have an attribute with the name "type", or | ||
2137 | * if it does, but that attribute's value is not an ASCII | ||
2138 | * case-insensitive match for the string "hidden", then: act as | ||
2139 | * described in the "anything else" entry below. */ | ||
2140 | ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') { | ||
2141 | // I.e., if its an input with the type attribute == 'hidden' | ||
2142 | /* Otherwise */ | ||
2143 | // parse error | ||
2144 | $this->insertElement($token); | ||
2145 | array_pop($this->stack); | ||
2146 | } elseif ($token['type'] === HTML5_Tokenizer::EOF) { | ||
2147 | /* If the current node is not the root html element, then this is a parse error. */ | ||
2148 | if (end($this->stack)->tagName !== 'html') { | ||
2149 | // Note: It can only be the current node in the fragment case. | ||
2150 | // parse error | ||
2151 | } | ||
2152 | /* Stop parsing. */ | ||
2153 | /* Anything else */ | ||
2154 | } else { | ||
2155 | /* Parse error. Process the token as if the insertion mode was "in | ||
2156 | body", with the following exception: */ | ||
2157 | |||
2158 | $old = $this->foster_parent; | ||
2159 | $this->foster_parent = true; | ||
2160 | $this->processWithRulesFor($token, self::IN_BODY); | ||
2161 | $this->foster_parent = $old; | ||
2162 | } | ||
2163 | break; | ||
2164 | |||
2165 | case self::IN_TABLE_TEXT: | ||
2166 | /* A character token */ | ||
2167 | if($token['type'] === HTML5_Tokenizer::CHARACTER) { | ||
2168 | /* Append the character token to the pending table | ||
2169 | * character tokens list. */ | ||
2170 | $this->pendingTableCharacters .= $token['data']; | ||
2171 | $this->pendingTableCharactersDirty = true; | ||
2172 | } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
2173 | $this->pendingTableCharacters .= $token['data']; | ||
2174 | /* Anything else */ | ||
2175 | } else { | ||
2176 | if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) { | ||
2177 | /* If any of the tokens in the pending table character tokens list | ||
2178 | * are character tokens that are not one of U+0009 CHARACTER | ||
2179 | * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or | ||
2180 | * U+0020 SPACE, then reprocess those character tokens using the | ||
2181 | * rules given in the "anything else" entry in the in table" | ||
2182 | * insertion mode.*/ | ||
2183 | if ($this->pendingTableCharactersDirty) { | ||
2184 | /* Parse error. Process the token using the rules for the | ||
2185 | * "in body" insertion mode, except that if the current | ||
2186 | * node is a table, tbody, tfoot, thead, or tr element, | ||
2187 | * then, whenever a node would be inserted into the current | ||
2188 | * node, it must instead be foster parented. */ | ||
2189 | // XERROR | ||
2190 | $old = $this->foster_parent; | ||
2191 | $this->foster_parent = true; | ||
2192 | $text_token = array( | ||
2193 | 'type' => HTML5_Tokenizer::CHARACTER, | ||
2194 | 'data' => $this->pendingTableCharacters, | ||
2195 | ); | ||
2196 | $this->processWithRulesFor($text_token, self::IN_BODY); | ||
2197 | $this->foster_parent = $old; | ||
2198 | |||
2199 | /* Otherwise, insert the characters given by the pending table | ||
2200 | * character tokens list into the current node. */ | ||
2201 | } else { | ||
2202 | $this->insertText($this->pendingTableCharacters); | ||
2203 | } | ||
2204 | $this->pendingTableCharacters = null; | ||
2205 | $this->pendingTableCharactersNull = null; | ||
2206 | } | ||
2207 | |||
2208 | /* Switch the insertion mode to the original insertion mode and | ||
2209 | * reprocess the token. | ||
2210 | */ | ||
2211 | $this->mode = $this->original_mode; | ||
2212 | $this->emitToken($token); | ||
2213 | } | ||
2214 | break; | ||
2215 | |||
2216 | case self::IN_CAPTION: | ||
2217 | /* An end tag whose tag name is "caption" */ | ||
2218 | if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') { | ||
2219 | /* If the stack of open elements does not have an element in table | ||
2220 | scope with the same tag name as the token, this is a parse error. | ||
2221 | Ignore the token. (fragment case) */ | ||
2222 | if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { | ||
2223 | $this->ignored = true; | ||
2224 | // Ignore | ||
2225 | |||
2226 | /* Otherwise: */ | ||
2227 | } else { | ||
2228 | /* Generate implied end tags. */ | ||
2229 | $this->generateImpliedEndTags(); | ||
2230 | |||
2231 | /* Now, if the current node is not a caption element, then this | ||
2232 | is a parse error. */ | ||
2233 | // XERROR: implement | ||
2234 | |||
2235 | /* Pop elements from this stack until a caption element has | ||
2236 | been popped from the stack. */ | ||
2237 | do { | ||
2238 | $node = array_pop($this->stack); | ||
2239 | } while ($node->tagName !== 'caption'); | ||
2240 | |||
2241 | /* Clear the list of active formatting elements up to the last | ||
2242 | marker. */ | ||
2243 | $this->clearTheActiveFormattingElementsUpToTheLastMarker(); | ||
2244 | |||
2245 | /* Switch the insertion mode to "in table". */ | ||
2246 | $this->mode = self::IN_TABLE; | ||
2247 | } | ||
2248 | |||
2249 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", | ||
2250 | "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag | ||
2251 | name is "table" */ | ||
2252 | } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'], | ||
2253 | array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', | ||
2254 | 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2255 | $token['name'] === 'table')) { | ||
2256 | /* Parse error. Act as if an end tag with the tag name "caption" | ||
2257 | had been seen, then, if that token wasn't ignored, reprocess the | ||
2258 | current token. */ | ||
2259 | $this->emitToken(array( | ||
2260 | 'name' => 'caption', | ||
2261 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2262 | )); | ||
2263 | |||
2264 | if (!$this->ignored) $this->emitToken($token); | ||
2265 | |||
2266 | /* An end tag whose tag name is one of: "body", "col", "colgroup", | ||
2267 | "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ | ||
2268 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'], | ||
2269 | array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', | ||
2270 | 'thead', 'tr'))) { | ||
2271 | // Parse error. Ignore the token. | ||
2272 | $this->ignored = true; | ||
2273 | |||
2274 | /* Anything else */ | ||
2275 | } else { | ||
2276 | /* Process the token as if the insertion mode was "in body". */ | ||
2277 | $this->processWithRulesFor($token, self::IN_BODY); | ||
2278 | } | ||
2279 | break; | ||
2280 | |||
2281 | case self::IN_COLUMN_GROUP: | ||
2282 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, | ||
2283 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), | ||
2284 | or U+0020 SPACE */ | ||
2285 | if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
2286 | /* Append the character to the current node. */ | ||
2287 | $this->insertText($token['data']); | ||
2288 | |||
2289 | /* A comment token */ | ||
2290 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
2291 | /* Append a Comment node to the current node with the data | ||
2292 | attribute set to the data given in the comment token. */ | ||
2293 | $this->insertToken($token['data']); | ||
2294 | |||
2295 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
2296 | // parse error | ||
2297 | |||
2298 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { | ||
2299 | $this->processWithRulesFor($token, self::IN_BODY); | ||
2300 | |||
2301 | /* A start tag whose tag name is "col" */ | ||
2302 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') { | ||
2303 | /* Insert a col element for the token. Immediately pop the current | ||
2304 | node off the stack of open elements. */ | ||
2305 | $this->insertElement($token); | ||
2306 | array_pop($this->stack); | ||
2307 | // XERROR: Acknowledge the token's self-closing flag, if it is set. | ||
2308 | |||
2309 | /* An end tag whose tag name is "colgroup" */ | ||
2310 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2311 | $token['name'] === 'colgroup') { | ||
2312 | /* If the current node is the root html element, then this is a | ||
2313 | parse error, ignore the token. (fragment case) */ | ||
2314 | if(end($this->stack)->tagName === 'html') { | ||
2315 | $this->ignored = true; | ||
2316 | |||
2317 | /* Otherwise, pop the current node (which will be a colgroup | ||
2318 | element) from the stack of open elements. Switch the insertion | ||
2319 | mode to "in table". */ | ||
2320 | } else { | ||
2321 | array_pop($this->stack); | ||
2322 | $this->mode = self::IN_TABLE; | ||
2323 | } | ||
2324 | |||
2325 | /* An end tag whose tag name is "col" */ | ||
2326 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') { | ||
2327 | /* Parse error. Ignore the token. */ | ||
2328 | $this->ignored = true; | ||
2329 | |||
2330 | /* An end-of-file token */ | ||
2331 | /* If the current node is the root html element */ | ||
2332 | } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') { | ||
2333 | /* Stop parsing */ | ||
2334 | |||
2335 | /* Anything else */ | ||
2336 | } else { | ||
2337 | /* Act as if an end tag with the tag name "colgroup" had been seen, | ||
2338 | and then, if that token wasn't ignored, reprocess the current token. */ | ||
2339 | $this->emitToken(array( | ||
2340 | 'name' => 'colgroup', | ||
2341 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2342 | )); | ||
2343 | |||
2344 | if (!$this->ignored) $this->emitToken($token); | ||
2345 | } | ||
2346 | break; | ||
2347 | |||
2348 | case self::IN_TABLE_BODY: | ||
2349 | $clear = array('tbody', 'tfoot', 'thead', 'html'); | ||
2350 | |||
2351 | /* A start tag whose tag name is "tr" */ | ||
2352 | if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') { | ||
2353 | /* Clear the stack back to a table body context. */ | ||
2354 | $this->clearStackToTableContext($clear); | ||
2355 | |||
2356 | /* Insert a tr element for the token, then switch the insertion | ||
2357 | mode to "in row". */ | ||
2358 | $this->insertElement($token); | ||
2359 | $this->mode = self::IN_ROW; | ||
2360 | |||
2361 | /* A start tag whose tag name is one of: "th", "td" */ | ||
2362 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2363 | ($token['name'] === 'th' || $token['name'] === 'td')) { | ||
2364 | /* Parse error. Act as if a start tag with the tag name "tr" had | ||
2365 | been seen, then reprocess the current token. */ | ||
2366 | $this->emitToken(array( | ||
2367 | 'name' => 'tr', | ||
2368 | 'type' => HTML5_Tokenizer::STARTTAG, | ||
2369 | 'attr' => array() | ||
2370 | )); | ||
2371 | |||
2372 | $this->emitToken($token); | ||
2373 | |||
2374 | /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ | ||
2375 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2376 | in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { | ||
2377 | /* If the stack of open elements does not have an element in table | ||
2378 | scope with the same tag name as the token, this is a parse error. | ||
2379 | Ignore the token. */ | ||
2380 | if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { | ||
2381 | // Parse error | ||
2382 | $this->ignored = true; | ||
2383 | |||
2384 | /* Otherwise: */ | ||
2385 | } else { | ||
2386 | /* Clear the stack back to a table body context. */ | ||
2387 | $this->clearStackToTableContext($clear); | ||
2388 | |||
2389 | /* Pop the current node from the stack of open elements. Switch | ||
2390 | the insertion mode to "in table". */ | ||
2391 | array_pop($this->stack); | ||
2392 | $this->mode = self::IN_TABLE; | ||
2393 | } | ||
2394 | |||
2395 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", | ||
2396 | "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ | ||
2397 | } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'], | ||
2398 | array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) || | ||
2399 | ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) { | ||
2400 | /* If the stack of open elements does not have a tbody, thead, or | ||
2401 | tfoot element in table scope, this is a parse error. Ignore the | ||
2402 | token. (fragment case) */ | ||
2403 | if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) { | ||
2404 | // parse error | ||
2405 | $this->ignored = true; | ||
2406 | |||
2407 | /* Otherwise: */ | ||
2408 | } else { | ||
2409 | /* Clear the stack back to a table body context. */ | ||
2410 | $this->clearStackToTableContext($clear); | ||
2411 | |||
2412 | /* Act as if an end tag with the same tag name as the current | ||
2413 | node ("tbody", "tfoot", or "thead") had been seen, then | ||
2414 | reprocess the current token. */ | ||
2415 | $this->emitToken(array( | ||
2416 | 'name' => end($this->stack)->tagName, | ||
2417 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2418 | )); | ||
2419 | |||
2420 | $this->emitToken($token); | ||
2421 | } | ||
2422 | |||
2423 | /* An end tag whose tag name is one of: "body", "caption", "col", | ||
2424 | "colgroup", "html", "td", "th", "tr" */ | ||
2425 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'], | ||
2426 | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { | ||
2427 | /* Parse error. Ignore the token. */ | ||
2428 | $this->ignored = true; | ||
2429 | |||
2430 | /* Anything else */ | ||
2431 | } else { | ||
2432 | /* Process the token as if the insertion mode was "in table". */ | ||
2433 | $this->processWithRulesFor($token, self::IN_TABLE); | ||
2434 | } | ||
2435 | break; | ||
2436 | |||
2437 | case self::IN_ROW: | ||
2438 | $clear = array('tr', 'html'); | ||
2439 | |||
2440 | /* A start tag whose tag name is one of: "th", "td" */ | ||
2441 | if($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2442 | ($token['name'] === 'th' || $token['name'] === 'td')) { | ||
2443 | /* Clear the stack back to a table row context. */ | ||
2444 | $this->clearStackToTableContext($clear); | ||
2445 | |||
2446 | /* Insert an HTML element for the token, then switch the insertion | ||
2447 | mode to "in cell". */ | ||
2448 | $this->insertElement($token); | ||
2449 | $this->mode = self::IN_CELL; | ||
2450 | |||
2451 | /* Insert a marker at the end of the list of active formatting | ||
2452 | elements. */ | ||
2453 | $this->a_formatting[] = self::MARKER; | ||
2454 | |||
2455 | /* An end tag whose tag name is "tr" */ | ||
2456 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') { | ||
2457 | /* If the stack of open elements does not have an element in table | ||
2458 | scope with the same tag name as the token, this is a parse error. | ||
2459 | Ignore the token. (fragment case) */ | ||
2460 | if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { | ||
2461 | // Ignore. | ||
2462 | $this->ignored = true; | ||
2463 | |||
2464 | /* Otherwise: */ | ||
2465 | } else { | ||
2466 | /* Clear the stack back to a table row context. */ | ||
2467 | $this->clearStackToTableContext($clear); | ||
2468 | |||
2469 | /* Pop the current node (which will be a tr element) from the | ||
2470 | stack of open elements. Switch the insertion mode to "in table | ||
2471 | body". */ | ||
2472 | array_pop($this->stack); | ||
2473 | $this->mode = self::IN_TABLE_BODY; | ||
2474 | } | ||
2475 | |||
2476 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", | ||
2477 | "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ | ||
2478 | } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'], | ||
2479 | array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) || | ||
2480 | ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) { | ||
2481 | /* Act as if an end tag with the tag name "tr" had been seen, then, | ||
2482 | if that token wasn't ignored, reprocess the current token. */ | ||
2483 | $this->emitToken(array( | ||
2484 | 'name' => 'tr', | ||
2485 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2486 | )); | ||
2487 | if (!$this->ignored) $this->emitToken($token); | ||
2488 | |||
2489 | /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ | ||
2490 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2491 | in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { | ||
2492 | /* If the stack of open elements does not have an element in table | ||
2493 | scope with the same tag name as the token, this is a parse error. | ||
2494 | Ignore the token. */ | ||
2495 | if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { | ||
2496 | $this->ignored = true; | ||
2497 | |||
2498 | /* Otherwise: */ | ||
2499 | } else { | ||
2500 | /* Otherwise, act as if an end tag with the tag name "tr" had | ||
2501 | been seen, then reprocess the current token. */ | ||
2502 | $this->emitToken(array( | ||
2503 | 'name' => 'tr', | ||
2504 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2505 | )); | ||
2506 | |||
2507 | $this->emitToken($token); | ||
2508 | } | ||
2509 | |||
2510 | /* An end tag whose tag name is one of: "body", "caption", "col", | ||
2511 | "colgroup", "html", "td", "th" */ | ||
2512 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'], | ||
2513 | array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) { | ||
2514 | /* Parse error. Ignore the token. */ | ||
2515 | $this->ignored = true; | ||
2516 | |||
2517 | /* Anything else */ | ||
2518 | } else { | ||
2519 | /* Process the token as if the insertion mode was "in table". */ | ||
2520 | $this->processWithRulesFor($token, self::IN_TABLE); | ||
2521 | } | ||
2522 | break; | ||
2523 | |||
2524 | case self::IN_CELL: | ||
2525 | /* An end tag whose tag name is one of: "td", "th" */ | ||
2526 | if($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2527 | ($token['name'] === 'td' || $token['name'] === 'th')) { | ||
2528 | /* If the stack of open elements does not have an element in table | ||
2529 | scope with the same tag name as that of the token, then this is a | ||
2530 | parse error and the token must be ignored. */ | ||
2531 | if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { | ||
2532 | $this->ignored = true; | ||
2533 | |||
2534 | /* Otherwise: */ | ||
2535 | } else { | ||
2536 | /* Generate implied end tags, except for elements with the same | ||
2537 | tag name as the token. */ | ||
2538 | $this->generateImpliedEndTags(array($token['name'])); | ||
2539 | |||
2540 | /* Now, if the current node is not an element with the same tag | ||
2541 | name as the token, then this is a parse error. */ | ||
2542 | // XERROR: Implement parse error code | ||
2543 | |||
2544 | /* Pop elements from this stack until an element with the same | ||
2545 | tag name as the token has been popped from the stack. */ | ||
2546 | do { | ||
2547 | $node = array_pop($this->stack); | ||
2548 | } while ($node->tagName !== $token['name']); | ||
2549 | |||
2550 | /* Clear the list of active formatting elements up to the last | ||
2551 | marker. */ | ||
2552 | $this->clearTheActiveFormattingElementsUpToTheLastMarker(); | ||
2553 | |||
2554 | /* Switch the insertion mode to "in row". (The current node | ||
2555 | will be a tr element at this point.) */ | ||
2556 | $this->mode = self::IN_ROW; | ||
2557 | } | ||
2558 | |||
2559 | /* A start tag whose tag name is one of: "caption", "col", "colgroup", | ||
2560 | "tbody", "td", "tfoot", "th", "thead", "tr" */ | ||
2561 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'], | ||
2562 | array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', | ||
2563 | 'thead', 'tr'))) { | ||
2564 | /* If the stack of open elements does not have a td or th element | ||
2565 | in table scope, then this is a parse error; ignore the token. | ||
2566 | (fragment case) */ | ||
2567 | if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) { | ||
2568 | // parse error | ||
2569 | $this->ignored = true; | ||
2570 | |||
2571 | /* Otherwise, close the cell (see below) and reprocess the current | ||
2572 | token. */ | ||
2573 | } else { | ||
2574 | $this->closeCell(); | ||
2575 | $this->emitToken($token); | ||
2576 | } | ||
2577 | |||
2578 | /* An end tag whose tag name is one of: "body", "caption", "col", | ||
2579 | "colgroup", "html" */ | ||
2580 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'], | ||
2581 | array('body', 'caption', 'col', 'colgroup', 'html'))) { | ||
2582 | /* Parse error. Ignore the token. */ | ||
2583 | $this->ignored = true; | ||
2584 | |||
2585 | /* An end tag whose tag name is one of: "table", "tbody", "tfoot", | ||
2586 | "thead", "tr" */ | ||
2587 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'], | ||
2588 | array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { | ||
2589 | /* If the stack of open elements does not have a td or th element | ||
2590 | in table scope, then this is a parse error; ignore the token. | ||
2591 | (innerHTML case) */ | ||
2592 | if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) { | ||
2593 | // Parse error | ||
2594 | $this->ignored = true; | ||
2595 | |||
2596 | /* Otherwise, close the cell (see below) and reprocess the current | ||
2597 | token. */ | ||
2598 | } else { | ||
2599 | $this->closeCell(); | ||
2600 | $this->emitToken($token); | ||
2601 | } | ||
2602 | |||
2603 | /* Anything else */ | ||
2604 | } else { | ||
2605 | /* Process the token as if the insertion mode was "in body". */ | ||
2606 | $this->processWithRulesFor($token, self::IN_BODY); | ||
2607 | } | ||
2608 | break; | ||
2609 | |||
2610 | case self::IN_SELECT: | ||
2611 | /* Handle the token as follows: */ | ||
2612 | |||
2613 | /* A character token */ | ||
2614 | if( | ||
2615 | $token['type'] === HTML5_Tokenizer::CHARACTER || | ||
2616 | $token['type'] === HTML5_Tokenizer::SPACECHARACTER | ||
2617 | ) { | ||
2618 | /* Append the token's character to the current node. */ | ||
2619 | $this->insertText($token['data']); | ||
2620 | |||
2621 | /* A comment token */ | ||
2622 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
2623 | /* Append a Comment node to the current node with the data | ||
2624 | attribute set to the data given in the comment token. */ | ||
2625 | $this->insertComment($token['data']); | ||
2626 | |||
2627 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
2628 | // parse error | ||
2629 | |||
2630 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { | ||
2631 | $this->processWithRulesFor($token, self::INBODY); | ||
2632 | |||
2633 | /* A start tag token whose tag name is "option" */ | ||
2634 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2635 | $token['name'] === 'option') { | ||
2636 | /* If the current node is an option element, act as if an end tag | ||
2637 | with the tag name "option" had been seen. */ | ||
2638 | if(end($this->stack)->tagName === 'option') { | ||
2639 | $this->emitToken(array( | ||
2640 | 'name' => 'option', | ||
2641 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2642 | )); | ||
2643 | } | ||
2644 | |||
2645 | /* Insert an HTML element for the token. */ | ||
2646 | $this->insertElement($token); | ||
2647 | |||
2648 | /* A start tag token whose tag name is "optgroup" */ | ||
2649 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2650 | $token['name'] === 'optgroup') { | ||
2651 | /* If the current node is an option element, act as if an end tag | ||
2652 | with the tag name "option" had been seen. */ | ||
2653 | if(end($this->stack)->tagName === 'option') { | ||
2654 | $this->emitToken(array( | ||
2655 | 'name' => 'option', | ||
2656 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2657 | )); | ||
2658 | } | ||
2659 | |||
2660 | /* If the current node is an optgroup element, act as if an end tag | ||
2661 | with the tag name "optgroup" had been seen. */ | ||
2662 | if(end($this->stack)->tagName === 'optgroup') { | ||
2663 | $this->emitToken(array( | ||
2664 | 'name' => 'optgroup', | ||
2665 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2666 | )); | ||
2667 | } | ||
2668 | |||
2669 | /* Insert an HTML element for the token. */ | ||
2670 | $this->insertElement($token); | ||
2671 | |||
2672 | /* An end tag token whose tag name is "optgroup" */ | ||
2673 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2674 | $token['name'] === 'optgroup') { | ||
2675 | /* First, if the current node is an option element, and the node | ||
2676 | immediately before it in the stack of open elements is an optgroup | ||
2677 | element, then act as if an end tag with the tag name "option" had | ||
2678 | been seen. */ | ||
2679 | $elements_in_stack = count($this->stack); | ||
2680 | |||
2681 | if($this->stack[$elements_in_stack - 1]->tagName === 'option' && | ||
2682 | $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') { | ||
2683 | $this->emitToken(array( | ||
2684 | 'name' => 'option', | ||
2685 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2686 | )); | ||
2687 | } | ||
2688 | |||
2689 | /* If the current node is an optgroup element, then pop that node | ||
2690 | from the stack of open elements. Otherwise, this is a parse error, | ||
2691 | ignore the token. */ | ||
2692 | if(end($this->stack)->tagName === 'optgroup') { | ||
2693 | array_pop($this->stack); | ||
2694 | } else { | ||
2695 | // parse error | ||
2696 | $this->ignored = true; | ||
2697 | } | ||
2698 | |||
2699 | /* An end tag token whose tag name is "option" */ | ||
2700 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2701 | $token['name'] === 'option') { | ||
2702 | /* If the current node is an option element, then pop that node | ||
2703 | from the stack of open elements. Otherwise, this is a parse error, | ||
2704 | ignore the token. */ | ||
2705 | if(end($this->stack)->tagName === 'option') { | ||
2706 | array_pop($this->stack); | ||
2707 | } else { | ||
2708 | // parse error | ||
2709 | $this->ignored = true; | ||
2710 | } | ||
2711 | |||
2712 | /* An end tag whose tag name is "select" */ | ||
2713 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2714 | $token['name'] === 'select') { | ||
2715 | /* If the stack of open elements does not have an element in table | ||
2716 | scope with the same tag name as the token, this is a parse error. | ||
2717 | Ignore the token. (fragment case) */ | ||
2718 | if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { | ||
2719 | $this->ignored = true; | ||
2720 | // parse error | ||
2721 | |||
2722 | /* Otherwise: */ | ||
2723 | } else { | ||
2724 | /* Pop elements from the stack of open elements until a select | ||
2725 | element has been popped from the stack. */ | ||
2726 | do { | ||
2727 | $node = array_pop($this->stack); | ||
2728 | } while ($node->tagName !== 'select'); | ||
2729 | |||
2730 | /* Reset the insertion mode appropriately. */ | ||
2731 | $this->resetInsertionMode(); | ||
2732 | } | ||
2733 | |||
2734 | /* A start tag whose tag name is "select" */ | ||
2735 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') { | ||
2736 | /* Parse error. Act as if the token had been an end tag with the | ||
2737 | tag name "select" instead. */ | ||
2738 | $this->emitToken(array( | ||
2739 | 'name' => 'select', | ||
2740 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2741 | )); | ||
2742 | |||
2743 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2744 | ($token['name'] === 'input' || $token['name'] === 'keygen' || $token['name'] === 'textarea')) { | ||
2745 | // parse error | ||
2746 | $this->emitToken(array( | ||
2747 | 'name' => 'select', | ||
2748 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2749 | )); | ||
2750 | $this->emitToken($token); | ||
2751 | |||
2752 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') { | ||
2753 | $this->processWithRulesFor($token, self::IN_HEAD); | ||
2754 | |||
2755 | } elseif($token['type'] === HTML5_Tokenizer::EOF) { | ||
2756 | // XERROR: If the current node is not the root html element, then this is a parse error. | ||
2757 | /* Stop parsing */ | ||
2758 | |||
2759 | /* Anything else */ | ||
2760 | } else { | ||
2761 | /* Parse error. Ignore the token. */ | ||
2762 | $this->ignored = true; | ||
2763 | } | ||
2764 | break; | ||
2765 | |||
2766 | case self::IN_SELECT_IN_TABLE: | ||
2767 | |||
2768 | if($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2769 | in_array($token['name'], array('caption', 'table', 'tbody', | ||
2770 | 'tfoot', 'thead', 'tr', 'td', 'th'))) { | ||
2771 | // parse error | ||
2772 | $this->emitToken(array( | ||
2773 | 'name' => 'select', | ||
2774 | 'type' => HTML5_Tokenizer::ENDTAG, | ||
2775 | )); | ||
2776 | $this->emitToken($token); | ||
2777 | |||
2778 | /* An end tag whose tag name is one of: "caption", "table", "tbody", | ||
2779 | "tfoot", "thead", "tr", "td", "th" */ | ||
2780 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2781 | in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) { | ||
2782 | /* Parse error. */ | ||
2783 | // parse error | ||
2784 | |||
2785 | /* If the stack of open elements has an element in table scope with | ||
2786 | the same tag name as that of the token, then act as if an end tag | ||
2787 | with the tag name "select" had been seen, and reprocess the token. | ||
2788 | Otherwise, ignore the token. */ | ||
2789 | if($this->elementInScope($token['name'], self::SCOPE_TABLE)) { | ||
2790 | $this->emitToken(array( | ||
2791 | 'name' => 'select', | ||
2792 | 'type' => HTML5_Tokenizer::ENDTAG | ||
2793 | )); | ||
2794 | |||
2795 | $this->emitToken($token); | ||
2796 | } else { | ||
2797 | $this->ignored = true; | ||
2798 | } | ||
2799 | } else { | ||
2800 | $this->processWithRulesFor($token, self::IN_SELECT); | ||
2801 | } | ||
2802 | break; | ||
2803 | |||
2804 | case self::IN_FOREIGN_CONTENT: | ||
2805 | if ($token['type'] === HTML5_Tokenizer::CHARACTER || | ||
2806 | $token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
2807 | $this->insertText($token['data']); | ||
2808 | } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
2809 | $this->insertComment($token['data']); | ||
2810 | } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
2811 | // XERROR: parse error | ||
2812 | } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
2813 | $token['name'] === 'script' && end($this->stack)->tagName === 'script' && | ||
2814 | // XDOM | ||
2815 | end($this->stack)->namespaceURI === self::NS_SVG) { | ||
2816 | array_pop($this->stack); | ||
2817 | // a bunch of script running mumbo jumbo | ||
2818 | } elseif ( | ||
2819 | ($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2820 | (( | ||
2821 | $token['name'] !== 'mglyph' && | ||
2822 | $token['name'] !== 'malignmark' && | ||
2823 | // XDOM | ||
2824 | end($this->stack)->namespaceURI === self::NS_MATHML && | ||
2825 | in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext')) | ||
2826 | ) || | ||
2827 | ( | ||
2828 | $token['name'] === 'svg' && | ||
2829 | // XDOM | ||
2830 | end($this->stack)->namespaceURI === self::NS_MATHML && | ||
2831 | end($this->stack)->tagName === 'annotation-xml' | ||
2832 | ) || | ||
2833 | ( | ||
2834 | // XDOM | ||
2835 | end($this->stack)->namespaceURI === self::NS_SVG && | ||
2836 | in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title')) | ||
2837 | ) || | ||
2838 | ( | ||
2839 | // XSKETCHY && XDOM | ||
2840 | end($this->stack)->namespaceURI === self::NS_HTML | ||
2841 | )) | ||
2842 | ) || $token['type'] === HTML5_Tokenizer::ENDTAG | ||
2843 | ) { | ||
2844 | $this->processWithRulesFor($token, $this->secondary_mode); | ||
2845 | /* If, after doing so, the insertion mode is still "in foreign | ||
2846 | * content", but there is no element in scope that has a namespace | ||
2847 | * other than the HTML namespace, switch the insertion mode to the | ||
2848 | * secondary insertion mode. */ | ||
2849 | if ($this->mode === self::IN_FOREIGN_CONTENT) { | ||
2850 | $found = false; | ||
2851 | // this basically duplicates elementInScope() | ||
2852 | for ($i = count($this->stack) - 1; $i >= 0; $i--) { | ||
2853 | // XDOM | ||
2854 | $node = $this->stack[$i]; | ||
2855 | if ($node->namespaceURI !== self::NS_HTML) { | ||
2856 | $found = true; | ||
2857 | break; | ||
2858 | } elseif (in_array($node->tagName, array('table', 'html', | ||
2859 | 'applet', 'caption', 'td', 'th', 'button', 'marquee', | ||
2860 | 'object')) || ($node->tagName === 'foreignObject' && | ||
2861 | $node->namespaceURI === self::NS_SVG)) { | ||
2862 | break; | ||
2863 | } | ||
2864 | } | ||
2865 | if (!$found) { | ||
2866 | $this->mode = $this->secondary_mode; | ||
2867 | } | ||
2868 | } | ||
2869 | } elseif ($token['type'] === HTML5_Tokenizer::EOF || ( | ||
2870 | $token['type'] === HTML5_Tokenizer::STARTTAG && | ||
2871 | (in_array($token['name'], array('b', "big", "blockquote", "body", "br", | ||
2872 | "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2", | ||
2873 | "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing", | ||
2874 | "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small", | ||
2875 | "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul", | ||
2876 | "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') || | ||
2877 | $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) { | ||
2878 | // XERROR: parse error | ||
2879 | do { | ||
2880 | $node = array_pop($this->stack); | ||
2881 | // XDOM | ||
2882 | } while ($node->namespaceURI !== self::NS_HTML); | ||
2883 | $this->stack[] = $node; | ||
2884 | $this->mode = $this->secondary_mode; | ||
2885 | $this->emitToken($token); | ||
2886 | } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) { | ||
2887 | static $svg_lookup = array( | ||
2888 | 'altglyph' => 'altGlyph', | ||
2889 | 'altglyphdef' => 'altGlyphDef', | ||
2890 | 'altglyphitem' => 'altGlyphItem', | ||
2891 | 'animatecolor' => 'animateColor', | ||
2892 | 'animatemotion' => 'animateMotion', | ||
2893 | 'animatetransform' => 'animateTransform', | ||
2894 | 'clippath' => 'clipPath', | ||
2895 | 'feblend' => 'feBlend', | ||
2896 | 'fecolormatrix' => 'feColorMatrix', | ||
2897 | 'fecomponenttransfer' => 'feComponentTransfer', | ||
2898 | 'fecomposite' => 'feComposite', | ||
2899 | 'feconvolvematrix' => 'feConvolveMatrix', | ||
2900 | 'fediffuselighting' => 'feDiffuseLighting', | ||
2901 | 'fedisplacementmap' => 'feDisplacementMap', | ||
2902 | 'fedistantlight' => 'feDistantLight', | ||
2903 | 'feflood' => 'feFlood', | ||
2904 | 'fefunca' => 'feFuncA', | ||
2905 | 'fefuncb' => 'feFuncB', | ||
2906 | 'fefuncg' => 'feFuncG', | ||
2907 | 'fefuncr' => 'feFuncR', | ||
2908 | 'fegaussianblur' => 'feGaussianBlur', | ||
2909 | 'feimage' => 'feImage', | ||
2910 | 'femerge' => 'feMerge', | ||
2911 | 'femergenode' => 'feMergeNode', | ||
2912 | 'femorphology' => 'feMorphology', | ||
2913 | 'feoffset' => 'feOffset', | ||
2914 | 'fepointlight' => 'fePointLight', | ||
2915 | 'fespecularlighting' => 'feSpecularLighting', | ||
2916 | 'fespotlight' => 'feSpotLight', | ||
2917 | 'fetile' => 'feTile', | ||
2918 | 'feturbulence' => 'feTurbulence', | ||
2919 | 'foreignobject' => 'foreignObject', | ||
2920 | 'glyphref' => 'glyphRef', | ||
2921 | 'lineargradient' => 'linearGradient', | ||
2922 | 'radialgradient' => 'radialGradient', | ||
2923 | 'textpath' => 'textPath', | ||
2924 | ); | ||
2925 | // XDOM | ||
2926 | $current = end($this->stack); | ||
2927 | if ($current->namespaceURI === self::NS_MATHML) { | ||
2928 | $token = $this->adjustMathMLAttributes($token); | ||
2929 | } | ||
2930 | if ($current->namespaceURI === self::NS_SVG && | ||
2931 | isset($svg_lookup[$token['name']])) { | ||
2932 | $token['name'] = $svg_lookup[$token['name']]; | ||
2933 | } | ||
2934 | if ($current->namespaceURI === self::NS_SVG) { | ||
2935 | $token = $this->adjustSVGAttributes($token); | ||
2936 | } | ||
2937 | $token = $this->adjustForeignAttributes($token); | ||
2938 | $this->insertForeignElement($token, $current->namespaceURI); | ||
2939 | if (isset($token['self-closing'])) { | ||
2940 | array_pop($this->stack); | ||
2941 | // XERROR: acknowledge self-closing flag | ||
2942 | } | ||
2943 | } | ||
2944 | break; | ||
2945 | |||
2946 | case self::AFTER_BODY: | ||
2947 | /* Handle the token as follows: */ | ||
2948 | |||
2949 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, | ||
2950 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), | ||
2951 | or U+0020 SPACE */ | ||
2952 | if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
2953 | /* Process the token as it would be processed if the insertion mode | ||
2954 | was "in body". */ | ||
2955 | $this->processWithRulesFor($token, self::IN_BODY); | ||
2956 | |||
2957 | /* A comment token */ | ||
2958 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
2959 | /* Append a Comment node to the first element in the stack of open | ||
2960 | elements (the html element), with the data attribute set to the | ||
2961 | data given in the comment token. */ | ||
2962 | // XDOM | ||
2963 | $comment = $this->dom->createComment($token['data']); | ||
2964 | $this->stack[0]->appendChild($comment); | ||
2965 | |||
2966 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
2967 | // parse error | ||
2968 | |||
2969 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { | ||
2970 | $this->processWithRulesFor($token, self::IN_BODY); | ||
2971 | |||
2972 | /* An end tag with the tag name "html" */ | ||
2973 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') { | ||
2974 | /* If the parser was originally created as part of the HTML | ||
2975 | * fragment parsing algorithm, this is a parse error; ignore | ||
2976 | * the token. (fragment case) */ | ||
2977 | $this->ignored = true; | ||
2978 | // XERROR: implement this | ||
2979 | |||
2980 | $this->mode = self::AFTER_AFTER_BODY; | ||
2981 | |||
2982 | } elseif($token['type'] === HTML5_Tokenizer::EOF) { | ||
2983 | /* Stop parsing */ | ||
2984 | |||
2985 | /* Anything else */ | ||
2986 | } else { | ||
2987 | /* Parse error. Set the insertion mode to "in body" and reprocess | ||
2988 | the token. */ | ||
2989 | $this->mode = self::IN_BODY; | ||
2990 | $this->emitToken($token); | ||
2991 | } | ||
2992 | break; | ||
2993 | |||
2994 | case self::IN_FRAMESET: | ||
2995 | /* Handle the token as follows: */ | ||
2996 | |||
2997 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, | ||
2998 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), | ||
2999 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ | ||
3000 | if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
3001 | /* Append the character to the current node. */ | ||
3002 | $this->insertText($token['data']); | ||
3003 | |||
3004 | /* A comment token */ | ||
3005 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
3006 | /* Append a Comment node to the current node with the data | ||
3007 | attribute set to the data given in the comment token. */ | ||
3008 | $this->insertComment($token['data']); | ||
3009 | |||
3010 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
3011 | // parse error | ||
3012 | |||
3013 | /* A start tag with the tag name "frameset" */ | ||
3014 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
3015 | $token['name'] === 'frameset') { | ||
3016 | $this->insertElement($token); | ||
3017 | |||
3018 | /* An end tag with the tag name "frameset" */ | ||
3019 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
3020 | $token['name'] === 'frameset') { | ||
3021 | /* If the current node is the root html element, then this is a | ||
3022 | parse error; ignore the token. (fragment case) */ | ||
3023 | if(end($this->stack)->tagName === 'html') { | ||
3024 | $this->ignored = true; | ||
3025 | // Parse error | ||
3026 | |||
3027 | } else { | ||
3028 | /* Otherwise, pop the current node from the stack of open | ||
3029 | elements. */ | ||
3030 | array_pop($this->stack); | ||
3031 | |||
3032 | /* If the parser was not originally created as part of the HTML | ||
3033 | * fragment parsing algorithm (fragment case), and the current | ||
3034 | * node is no longer a frameset element, then switch the | ||
3035 | * insertion mode to "after frameset". */ | ||
3036 | $this->mode = self::AFTER_FRAMESET; | ||
3037 | } | ||
3038 | |||
3039 | /* A start tag with the tag name "frame" */ | ||
3040 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
3041 | $token['name'] === 'frame') { | ||
3042 | /* Insert an HTML element for the token. */ | ||
3043 | $this->insertElement($token); | ||
3044 | |||
3045 | /* Immediately pop the current node off the stack of open elements. */ | ||
3046 | array_pop($this->stack); | ||
3047 | |||
3048 | // XERROR: Acknowledge the token's self-closing flag, if it is set. | ||
3049 | |||
3050 | /* A start tag with the tag name "noframes" */ | ||
3051 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
3052 | $token['name'] === 'noframes') { | ||
3053 | /* Process the token using the rules for the "in head" insertion mode. */ | ||
3054 | $this->processwithRulesFor($token, self::IN_HEAD); | ||
3055 | |||
3056 | } elseif($token['type'] === HTML5_Tokenizer::EOF) { | ||
3057 | // XERROR: If the current node is not the root html element, then this is a parse error. | ||
3058 | /* Stop parsing */ | ||
3059 | /* Anything else */ | ||
3060 | } else { | ||
3061 | /* Parse error. Ignore the token. */ | ||
3062 | $this->ignored = true; | ||
3063 | } | ||
3064 | break; | ||
3065 | |||
3066 | case self::AFTER_FRAMESET: | ||
3067 | /* Handle the token as follows: */ | ||
3068 | |||
3069 | /* A character token that is one of one of U+0009 CHARACTER TABULATION, | ||
3070 | U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), | ||
3071 | U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ | ||
3072 | if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { | ||
3073 | /* Append the character to the current node. */ | ||
3074 | $this->insertText($token['data']); | ||
3075 | |||
3076 | /* A comment token */ | ||
3077 | } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
3078 | /* Append a Comment node to the current node with the data | ||
3079 | attribute set to the data given in the comment token. */ | ||
3080 | $this->insertComment($token['data']); | ||
3081 | |||
3082 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { | ||
3083 | // parse error | ||
3084 | |||
3085 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { | ||
3086 | $this->processWithRulesFor($token, self::IN_BODY); | ||
3087 | |||
3088 | /* An end tag with the tag name "html" */ | ||
3089 | } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && | ||
3090 | $token['name'] === 'html') { | ||
3091 | $this->mode = self::AFTER_AFTER_FRAMESET; | ||
3092 | |||
3093 | /* A start tag with the tag name "noframes" */ | ||
3094 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && | ||
3095 | $token['name'] === 'noframes') { | ||
3096 | $this->processWithRulesFor($token, self::IN_HEAD); | ||
3097 | |||
3098 | } elseif($token['type'] === HTML5_Tokenizer::EOF) { | ||
3099 | /* Stop parsing */ | ||
3100 | |||
3101 | /* Anything else */ | ||
3102 | } else { | ||
3103 | /* Parse error. Ignore the token. */ | ||
3104 | $this->ignored = true; | ||
3105 | } | ||
3106 | break; | ||
3107 | |||
3108 | case self::AFTER_AFTER_BODY: | ||
3109 | /* A comment token */ | ||
3110 | if($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
3111 | /* Append a Comment node to the Document object with the data | ||
3112 | attribute set to the data given in the comment token. */ | ||
3113 | // XDOM | ||
3114 | $comment = $this->dom->createComment($token['data']); | ||
3115 | $this->dom->appendChild($comment); | ||
3116 | |||
3117 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE || | ||
3118 | $token['type'] === HTML5_Tokenizer::SPACECHARACTER || | ||
3119 | ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) { | ||
3120 | $this->processWithRulesFor($token, self::IN_BODY); | ||
3121 | |||
3122 | /* An end-of-file token */ | ||
3123 | } elseif($token['type'] === HTML5_Tokenizer::EOF) { | ||
3124 | /* OMG DONE!! */ | ||
3125 | } else { | ||
3126 | // parse error | ||
3127 | $this->mode = self::IN_BODY; | ||
3128 | $this->emitToken($token); | ||
3129 | } | ||
3130 | break; | ||
3131 | |||
3132 | case self::AFTER_AFTER_FRAMESET: | ||
3133 | /* A comment token */ | ||
3134 | if($token['type'] === HTML5_Tokenizer::COMMENT) { | ||
3135 | /* Append a Comment node to the Document object with the data | ||
3136 | attribute set to the data given in the comment token. */ | ||
3137 | // XDOM | ||
3138 | $comment = $this->dom->createComment($token['data']); | ||
3139 | $this->dom->appendChild($comment); | ||
3140 | |||
3141 | } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE || | ||
3142 | $token['type'] === HTML5_Tokenizer::SPACECHARACTER || | ||
3143 | ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) { | ||
3144 | $this->processWithRulesFor($token, self::IN_BODY); | ||
3145 | |||
3146 | /* An end-of-file token */ | ||
3147 | } elseif($token['type'] === HTML5_Tokenizer::EOF) { | ||
3148 | /* OMG DONE!! */ | ||
3149 | } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') { | ||
3150 | $this->processWithRulesFor($token, self::IN_HEAD); | ||
3151 | } else { | ||
3152 | // parse error | ||
3153 | } | ||
3154 | break; | ||
3155 | } | ||
3156 | // end funky indenting | ||
3157 | } | ||
3158 | |||
3159 | private function insertElement($token, $append = true) { | ||
3160 | $el = $this->dom->createElementNS(self::NS_HTML, $token['name']); | ||
3161 | |||
3162 | if (!empty($token['attr'])) { | ||
3163 | foreach($token['attr'] as $attr) { | ||
3164 | if(!$el->hasAttribute($attr['name'])) { | ||
3165 | $el->setAttribute($attr['name'], $attr['value']); | ||
3166 | } | ||
3167 | } | ||
3168 | } | ||
3169 | if ($append) { | ||
3170 | $this->appendToRealParent($el); | ||
3171 | $this->stack[] = $el; | ||
3172 | } | ||
3173 | |||
3174 | return $el; | ||
3175 | } | ||
3176 | |||
3177 | private function insertText($data) { | ||
3178 | if ($data === '') return; | ||
3179 | if ($this->ignore_lf_token) { | ||
3180 | if ($data[0] === "\n") { | ||
3181 | $data = substr($data, 1); | ||
3182 | if ($data === false) return; | ||
3183 | } | ||
3184 | } | ||
3185 | $text = $this->dom->createTextNode($data); | ||
3186 | $this->appendToRealParent($text); | ||
3187 | } | ||
3188 | |||
3189 | private function insertComment($data) { | ||
3190 | $comment = $this->dom->createComment($data); | ||
3191 | $this->appendToRealParent($comment); | ||
3192 | } | ||
3193 | |||
3194 | private function appendToRealParent($node) { | ||
3195 | // this is only for the foster_parent case | ||
3196 | /* If the current node is a table, tbody, tfoot, thead, or tr | ||
3197 | element, then, whenever a node would be inserted into the current | ||
3198 | node, it must instead be inserted into the foster parent element. */ | ||
3199 | if(!$this->foster_parent || !in_array(end($this->stack)->tagName, | ||
3200 | array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { | ||
3201 | end($this->stack)->appendChild($node); | ||
3202 | } else { | ||
3203 | $this->fosterParent($node); | ||
3204 | } | ||
3205 | } | ||
3206 | |||
3207 | private function elementInScope($el, $scope = self::SCOPE) { | ||
3208 | if(is_array($el)) { | ||
3209 | foreach($el as $element) { | ||
3210 | if($this->elementInScope($element, $scope)) { | ||
3211 | return true; | ||
3212 | } | ||
3213 | } | ||
3214 | |||
3215 | return false; | ||
3216 | } | ||
3217 | |||
3218 | $leng = count($this->stack); | ||
3219 | |||
3220 | for($n = 0; $n < $leng; $n++) { | ||
3221 | /* 1. Initialise node to be the current node (the bottommost node of | ||
3222 | the stack). */ | ||
3223 | $node = $this->stack[$leng - 1 - $n]; | ||
3224 | |||
3225 | if($node->tagName === $el) { | ||
3226 | /* 2. If node is the target node, terminate in a match state. */ | ||
3227 | return true; | ||
3228 | |||
3229 | // We've expanded the logic for these states a little differently; | ||
3230 | // Hixie's refactoring into "specific scope" is more general, but | ||
3231 | // this "gets the job done" | ||
3232 | |||
3233 | // these are the common states for all scopes | ||
3234 | } elseif($node->tagName === 'table' || $node->tagName === 'html') { | ||
3235 | return false; | ||
3236 | |||
3237 | // these are valid for "in scope" and "in list item scope" | ||
3238 | } elseif($scope !== self::SCOPE_TABLE && | ||
3239 | (in_array($node->tagName, array('applet', 'caption', 'td', | ||
3240 | 'th', 'button', 'marquee', 'object')) || | ||
3241 | $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) { | ||
3242 | return false; | ||
3243 | |||
3244 | |||
3245 | // these are valid for "in list item scope" | ||
3246 | } elseif($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) { | ||
3247 | return false; | ||
3248 | } | ||
3249 | |||
3250 | /* Otherwise, set node to the previous entry in the stack of open | ||
3251 | elements and return to step 2. (This will never fail, since the loop | ||
3252 | will always terminate in the previous step if the top of the stack | ||
3253 | is reached.) */ | ||
3254 | } | ||
3255 | } | ||
3256 | |||
3257 | private function reconstructActiveFormattingElements() { | ||
3258 | /* 1. If there are no entries in the list of active formatting elements, | ||
3259 | then there is nothing to reconstruct; stop this algorithm. */ | ||
3260 | $formatting_elements = count($this->a_formatting); | ||
3261 | |||
3262 | if($formatting_elements === 0) { | ||
3263 | return false; | ||
3264 | } | ||
3265 | |||
3266 | /* 3. Let entry be the last (most recently added) element in the list | ||
3267 | of active formatting elements. */ | ||
3268 | $entry = end($this->a_formatting); | ||
3269 | |||
3270 | /* 2. If the last (most recently added) entry in the list of active | ||
3271 | formatting elements is a marker, or if it is an element that is in the | ||
3272 | stack of open elements, then there is nothing to reconstruct; stop this | ||
3273 | algorithm. */ | ||
3274 | if($entry === self::MARKER || in_array($entry, $this->stack, true)) { | ||
3275 | return false; | ||
3276 | } | ||
3277 | |||
3278 | for($a = $formatting_elements - 1; $a >= 0; true) { | ||
3279 | /* 4. If there are no entries before entry in the list of active | ||
3280 | formatting elements, then jump to step 8. */ | ||
3281 | if($a === 0) { | ||
3282 | $step_seven = false; | ||
3283 | break; | ||
3284 | } | ||
3285 | |||
3286 | /* 5. Let entry be the entry one earlier than entry in the list of | ||
3287 | active formatting elements. */ | ||
3288 | $a--; | ||
3289 | $entry = $this->a_formatting[$a]; | ||
3290 | |||
3291 | /* 6. If entry is neither a marker nor an element that is also in | ||
3292 | thetack of open elements, go to step 4. */ | ||
3293 | if($entry === self::MARKER || in_array($entry, $this->stack, true)) { | ||
3294 | break; | ||
3295 | } | ||
3296 | } | ||
3297 | |||
3298 | while(true) { | ||
3299 | /* 7. Let entry be the element one later than entry in the list of | ||
3300 | active formatting elements. */ | ||
3301 | if(isset($step_seven) && $step_seven === true) { | ||
3302 | $a++; | ||
3303 | $entry = $this->a_formatting[$a]; | ||
3304 | } | ||
3305 | |||
3306 | /* 8. Perform a shallow clone of the element entry to obtain clone. */ | ||
3307 | $clone = $entry->cloneNode(); | ||
3308 | |||
3309 | /* 9. Append clone to the current node and push it onto the stack | ||
3310 | of open elements so that it is the new current node. */ | ||
3311 | $this->appendToRealParent($clone); | ||
3312 | $this->stack[] = $clone; | ||
3313 | |||
3314 | /* 10. Replace the entry for entry in the list with an entry for | ||
3315 | clone. */ | ||
3316 | $this->a_formatting[$a] = $clone; | ||
3317 | |||
3318 | /* 11. If the entry for clone in the list of active formatting | ||
3319 | elements is not the last entry in the list, return to step 7. */ | ||
3320 | if(end($this->a_formatting) !== $clone) { | ||
3321 | $step_seven = true; | ||
3322 | } else { | ||
3323 | break; | ||
3324 | } | ||
3325 | } | ||
3326 | } | ||
3327 | |||
3328 | private function clearTheActiveFormattingElementsUpToTheLastMarker() { | ||
3329 | /* When the steps below require the UA to clear the list of active | ||
3330 | formatting elements up to the last marker, the UA must perform the | ||
3331 | following steps: */ | ||
3332 | |||
3333 | while(true) { | ||
3334 | /* 1. Let entry be the last (most recently added) entry in the list | ||
3335 | of active formatting elements. */ | ||
3336 | $entry = end($this->a_formatting); | ||
3337 | |||
3338 | /* 2. Remove entry from the list of active formatting elements. */ | ||
3339 | array_pop($this->a_formatting); | ||
3340 | |||
3341 | /* 3. If entry was a marker, then stop the algorithm at this point. | ||
3342 | The list has been cleared up to the last marker. */ | ||
3343 | if($entry === self::MARKER) { | ||
3344 | break; | ||
3345 | } | ||
3346 | } | ||
3347 | } | ||
3348 | |||
3349 | private function generateImpliedEndTags($exclude = array()) { | ||
3350 | /* When the steps below require the UA to generate implied end tags, | ||
3351 | * then, while the current node is a dc element, a dd element, a ds | ||
3352 | * element, a dt element, an li element, an option element, an optgroup | ||
3353 | * element, a p element, an rp element, or an rt element, the UA must | ||
3354 | * pop the current node off the stack of open elements. */ | ||
3355 | $node = end($this->stack); | ||
3356 | $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); | ||
3357 | |||
3358 | while(in_array(end($this->stack)->tagName, $elements)) { | ||
3359 | array_pop($this->stack); | ||
3360 | } | ||
3361 | } | ||
3362 | |||
3363 | private function getElementCategory($node) { | ||
3364 | if (!is_object($node)) debug_print_backtrace(); | ||
3365 | $name = $node->tagName; | ||
3366 | if(in_array($name, $this->special)) | ||
3367 | return self::SPECIAL; | ||
3368 | |||
3369 | elseif(in_array($name, $this->scoping)) | ||
3370 | return self::SCOPING; | ||
3371 | |||
3372 | elseif(in_array($name, $this->formatting)) | ||
3373 | return self::FORMATTING; | ||
3374 | |||
3375 | else | ||
3376 | return self::PHRASING; | ||
3377 | } | ||
3378 | |||
3379 | private function clearStackToTableContext($elements) { | ||
3380 | /* When the steps above require the UA to clear the stack back to a | ||
3381 | table context, it means that the UA must, while the current node is not | ||
3382 | a table element or an html element, pop elements from the stack of open | ||
3383 | elements. */ | ||
3384 | while(true) { | ||
3385 | $name = end($this->stack)->tagName; | ||
3386 | |||
3387 | if(in_array($name, $elements)) { | ||
3388 | break; | ||
3389 | } else { | ||
3390 | array_pop($this->stack); | ||
3391 | } | ||
3392 | } | ||
3393 | } | ||
3394 | |||
3395 | private function resetInsertionMode($context = null) { | ||
3396 | /* 1. Let last be false. */ | ||
3397 | $last = false; | ||
3398 | $leng = count($this->stack); | ||
3399 | |||
3400 | for($n = $leng - 1; $n >= 0; $n--) { | ||
3401 | /* 2. Let node be the last node in the stack of open elements. */ | ||
3402 | $node = $this->stack[$n]; | ||
3403 | |||
3404 | /* 3. If node is the first node in the stack of open elements, then | ||
3405 | * set last to true and set node to the context element. (fragment | ||
3406 | * case) */ | ||
3407 | if($this->stack[0]->isSameNode($node)) { | ||
3408 | $last = true; | ||
3409 | $node = $context; | ||
3410 | } | ||
3411 | |||
3412 | /* 4. If node is a select element, then switch the insertion mode to | ||
3413 | "in select" and abort these steps. (fragment case) */ | ||
3414 | if($node->tagName === 'select') { | ||
3415 | $this->mode = self::IN_SELECT; | ||
3416 | break; | ||
3417 | |||
3418 | /* 5. If node is a td or th element, then switch the insertion mode | ||
3419 | to "in cell" and abort these steps. */ | ||
3420 | } elseif($node->tagName === 'td' || $node->nodeName === 'th') { | ||
3421 | $this->mode = self::IN_CELL; | ||
3422 | break; | ||
3423 | |||
3424 | /* 6. If node is a tr element, then switch the insertion mode to | ||
3425 | "in row" and abort these steps. */ | ||
3426 | } elseif($node->tagName === 'tr') { | ||
3427 | $this->mode = self::IN_ROW; | ||
3428 | break; | ||
3429 | |||
3430 | /* 7. If node is a tbody, thead, or tfoot element, then switch the | ||
3431 | insertion mode to "in table body" and abort these steps. */ | ||
3432 | } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) { | ||
3433 | $this->mode = self::IN_TABLE_BODY; | ||
3434 | break; | ||
3435 | |||
3436 | /* 8. If node is a caption element, then switch the insertion mode | ||
3437 | to "in caption" and abort these steps. */ | ||
3438 | } elseif($node->tagName === 'caption') { | ||
3439 | $this->mode = self::IN_CAPTION; | ||
3440 | break; | ||
3441 | |||
3442 | /* 9. If node is a colgroup element, then switch the insertion mode | ||
3443 | to "in column group" and abort these steps. (innerHTML case) */ | ||
3444 | } elseif($node->tagName === 'colgroup') { | ||
3445 | $this->mode = self::IN_COLUMN_GROUP; | ||
3446 | break; | ||
3447 | |||
3448 | /* 10. If node is a table element, then switch the insertion mode | ||
3449 | to "in table" and abort these steps. */ | ||
3450 | } elseif($node->tagName === 'table') { | ||
3451 | $this->mode = self::IN_TABLE; | ||
3452 | break; | ||
3453 | |||
3454 | /* 11. If node is an element from the MathML namespace or the SVG | ||
3455 | * namespace, then switch the insertion mode to "in foreign | ||
3456 | * content", let the secondary insertion mode be "in body", and | ||
3457 | * abort these steps. */ | ||
3458 | } elseif($node->namespaceURI === self::NS_SVG || | ||
3459 | $node->namespaceURI === self::NS_MATHML) { | ||
3460 | $this->mode = self::IN_FOREIGN_CONTENT; | ||
3461 | $this->secondary_mode = self::IN_BODY; | ||
3462 | break; | ||
3463 | |||
3464 | /* 12. If node is a head element, then switch the insertion mode | ||
3465 | to "in body" ("in body"! not "in head"!) and abort these steps. | ||
3466 | (fragment case) */ | ||
3467 | } elseif($node->tagName === 'head') { | ||
3468 | $this->mode = self::IN_BODY; | ||
3469 | break; | ||
3470 | |||
3471 | /* 13. If node is a body element, then switch the insertion mode to | ||
3472 | "in body" and abort these steps. */ | ||
3473 | } elseif($node->tagName === 'body') { | ||
3474 | $this->mode = self::IN_BODY; | ||
3475 | break; | ||
3476 | |||
3477 | /* 14. If node is a frameset element, then switch the insertion | ||
3478 | mode to "in frameset" and abort these steps. (fragment case) */ | ||
3479 | } elseif($node->tagName === 'frameset') { | ||
3480 | $this->mode = self::IN_FRAMESET; | ||
3481 | break; | ||
3482 | |||
3483 | /* 15. If node is an html element, then: if the head element | ||
3484 | pointer is null, switch the insertion mode to "before head", | ||
3485 | otherwise, switch the insertion mode to "after head". In either | ||
3486 | case, abort these steps. (fragment case) */ | ||
3487 | } elseif($node->tagName === 'html') { | ||
3488 | $this->mode = ($this->head_pointer === null) | ||
3489 | ? self::BEFORE_HEAD | ||
3490 | : self::AFTER_HEAD; | ||
3491 | |||
3492 | break; | ||
3493 | |||
3494 | /* 16. If last is true, then set the insertion mode to "in body" | ||
3495 | and abort these steps. (fragment case) */ | ||
3496 | } elseif($last) { | ||
3497 | $this->mode = self::IN_BODY; | ||
3498 | break; | ||
3499 | } | ||
3500 | } | ||
3501 | } | ||
3502 | |||
3503 | private function closeCell() { | ||
3504 | /* If the stack of open elements has a td or th element in table scope, | ||
3505 | then act as if an end tag token with that tag name had been seen. */ | ||
3506 | foreach(array('td', 'th') as $cell) { | ||
3507 | if($this->elementInScope($cell, self::SCOPE_TABLE)) { | ||
3508 | $this->emitToken(array( | ||
3509 | 'name' => $cell, | ||
3510 | 'type' => HTML5_Tokenizer::ENDTAG | ||
3511 | )); | ||
3512 | |||
3513 | break; | ||
3514 | } | ||
3515 | } | ||
3516 | } | ||
3517 | |||
3518 | private function processWithRulesFor($token, $mode) { | ||
3519 | /* "using the rules for the m insertion mode", where m is one of these | ||
3520 | * modes, the user agent must use the rules described under the m | ||
3521 | * insertion mode's section, but must leave the insertion mode | ||
3522 | * unchanged unless the rules in m themselves switch the insertion mode | ||
3523 | * to a new value. */ | ||
3524 | return $this->emitToken($token, $mode); | ||
3525 | } | ||
3526 | |||
3527 | private function insertCDATAElement($token) { | ||
3528 | $this->insertElement($token); | ||
3529 | $this->original_mode = $this->mode; | ||
3530 | $this->mode = self::IN_CDATA_RCDATA; | ||
3531 | $this->content_model = HTML5_Tokenizer::CDATA; | ||
3532 | } | ||
3533 | |||
3534 | private function insertRCDATAElement($token) { | ||
3535 | $this->insertElement($token); | ||
3536 | $this->original_mode = $this->mode; | ||
3537 | $this->mode = self::IN_CDATA_RCDATA; | ||
3538 | $this->content_model = HTML5_Tokenizer::RCDATA; | ||
3539 | } | ||
3540 | |||
3541 | private function getAttr($token, $key) { | ||
3542 | if (!isset($token['attr'])) return false; | ||
3543 | $ret = false; | ||
3544 | foreach ($token['attr'] as $keypair) { | ||
3545 | if ($keypair['name'] === $key) $ret = $keypair['value']; | ||
3546 | } | ||
3547 | return $ret; | ||
3548 | } | ||
3549 | |||
3550 | private function getCurrentTable() { | ||
3551 | /* The current table is the last table element in the stack of open | ||
3552 | * elements, if there is one. If there is no table element in the stack | ||
3553 | * of open elements (fragment case), then the current table is the | ||
3554 | * first element in the stack of open elements (the html element). */ | ||
3555 | for ($i = count($this->stack) - 1; $i >= 0; $i--) { | ||
3556 | if ($this->stack[$i]->tagName === 'table') { | ||
3557 | return $this->stack[$i]; | ||
3558 | } | ||
3559 | } | ||
3560 | return $this->stack[0]; | ||
3561 | } | ||
3562 | |||
3563 | private function getFosterParent() { | ||
3564 | /* The foster parent element is the parent element of the last | ||
3565 | table element in the stack of open elements, if there is a | ||
3566 | table element and it has such a parent element. If there is no | ||
3567 | table element in the stack of open elements (innerHTML case), | ||
3568 | then the foster parent element is the first element in the | ||
3569 | stack of open elements (the html element). Otherwise, if there | ||
3570 | is a table element in the stack of open elements, but the last | ||
3571 | table element in the stack of open elements has no parent, or | ||
3572 | its parent node is not an element, then the foster parent | ||
3573 | element is the element before the last table element in the | ||
3574 | stack of open elements. */ | ||
3575 | for($n = count($this->stack) - 1; $n >= 0; $n--) { | ||
3576 | if($this->stack[$n]->tagName === 'table') { | ||
3577 | $table = $this->stack[$n]; | ||
3578 | break; | ||
3579 | } | ||
3580 | } | ||
3581 | |||
3582 | if(isset($table) && $table->parentNode !== null) { | ||
3583 | return $table->parentNode; | ||
3584 | |||
3585 | } elseif(!isset($table)) { | ||
3586 | return $this->stack[0]; | ||
3587 | |||
3588 | } elseif(isset($table) && ($table->parentNode === null || | ||
3589 | $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { | ||
3590 | return $this->stack[$n - 1]; | ||
3591 | } | ||
3592 | } | ||
3593 | |||
3594 | public function fosterParent($node) { | ||
3595 | $foster_parent = $this->getFosterParent(); | ||
3596 | $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html | ||
3597 | /* When a node node is to be foster parented, the node node must be | ||
3598 | * be inserted into the foster parent element. */ | ||
3599 | /* If the foster parent element is the parent element of the last table | ||
3600 | * element in the stack of open elements, then node must be inserted | ||
3601 | * immediately before the last table element in the stack of open | ||
3602 | * elements in the foster parent element; otherwise, node must be | ||
3603 | * appended to the foster parent element. */ | ||
3604 | if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) { | ||
3605 | $foster_parent->insertBefore($node, $table); | ||
3606 | } else { | ||
3607 | $foster_parent->appendChild($node); | ||
3608 | } | ||
3609 | } | ||
3610 | |||
3611 | /** | ||
3612 | * For debugging, prints the stack | ||
3613 | */ | ||
3614 | private function printStack() { | ||
3615 | $names = array(); | ||
3616 | foreach ($this->stack as $i => $element) { | ||
3617 | $names[] = $element->tagName; | ||
3618 | } | ||
3619 | echo " -> stack [" . implode(', ', $names) . "]\n"; | ||
3620 | } | ||
3621 | |||
3622 | /** | ||
3623 | * For debugging, prints active formatting elements | ||
3624 | */ | ||
3625 | private function printActiveFormattingElements() { | ||
3626 | if (!$this->a_formatting) return; | ||
3627 | $names = array(); | ||
3628 | foreach ($this->a_formatting as $node) { | ||
3629 | if ($node === self::MARKER) $names[] = 'MARKER'; | ||
3630 | else $names[] = $node->tagName; | ||
3631 | } | ||
3632 | echo " -> active formatting [" . implode(', ', $names) . "]\n"; | ||
3633 | } | ||
3634 | |||
3635 | public function currentTableIsTainted() { | ||
3636 | return !empty($this->getCurrentTable()->tainted); | ||
3637 | } | ||
3638 | |||
3639 | /** | ||
3640 | * Sets up the tree constructor for building a fragment. | ||
3641 | */ | ||
3642 | public function setupContext($context = null) { | ||
3643 | $this->fragment = true; | ||
3644 | if ($context) { | ||
3645 | $context = $this->dom->createElementNS(self::NS_HTML, $context); | ||
3646 | /* 4.1. Set the HTML parser's tokenization stage's content model | ||
3647 | * flag according to the context element, as follows: */ | ||
3648 | switch ($context->tagName) { | ||
3649 | case 'title': case 'textarea': | ||
3650 | $this->content_model = HTML5_Tokenizer::RCDATA; | ||
3651 | break; | ||
3652 | case 'style': case 'script': case 'xmp': case 'iframe': | ||
3653 | case 'noembed': case 'noframes': | ||
3654 | $this->content_model = HTML5_Tokenizer::CDATA; | ||
3655 | break; | ||
3656 | case 'noscript': | ||
3657 | // XSCRIPT: assuming scripting is enabled | ||
3658 | $this->content_model = HTML5_Tokenizer::CDATA; | ||
3659 | break; | ||
3660 | case 'plaintext': | ||
3661 | $this->content_model = HTML5_Tokenizer::PLAINTEXT; | ||
3662 | break; | ||
3663 | } | ||
3664 | /* 4.2. Let root be a new html element with no attributes. */ | ||
3665 | $root = $this->dom->createElementNS(self::NS_HTML, 'html'); | ||
3666 | $this->root = $root; | ||
3667 | /* 4.3 Append the element root to the Document node created above. */ | ||
3668 | $this->dom->appendChild($root); | ||
3669 | /* 4.4 Set up the parser's stack of open elements so that it | ||
3670 | * contains just the single element root. */ | ||
3671 | $this->stack = array($root); | ||
3672 | /* 4.5 Reset the parser's insertion mode appropriately. */ | ||
3673 | $this->resetInsertionMode($context); | ||
3674 | /* 4.6 Set the parser's form element pointer to the nearest node | ||
3675 | * to the context element that is a form element (going straight up | ||
3676 | * the ancestor chain, and including the element itself, if it is a | ||
3677 | * form element), or, if there is no such form element, to null. */ | ||
3678 | $node = $context; | ||
3679 | do { | ||
3680 | if ($node->tagName === 'form') { | ||
3681 | $this->form_pointer = $node; | ||
3682 | break; | ||
3683 | } | ||
3684 | } while ($node = $node->parentNode); | ||
3685 | } | ||
3686 | } | ||
3687 | |||
3688 | public function adjustMathMLAttributes($token) { | ||
3689 | foreach ($token['attr'] as &$kp) { | ||
3690 | if ($kp['name'] === 'definitionurl') { | ||
3691 | $kp['name'] = 'definitionURL'; | ||
3692 | } | ||
3693 | } | ||
3694 | return $token; | ||
3695 | } | ||
3696 | |||
3697 | public function adjustSVGAttributes($token) { | ||
3698 | static $lookup = array( | ||
3699 | 'attributename' => 'attributeName', | ||
3700 | 'attributetype' => 'attributeType', | ||
3701 | 'basefrequency' => 'baseFrequency', | ||
3702 | 'baseprofile' => 'baseProfile', | ||
3703 | 'calcmode' => 'calcMode', | ||
3704 | 'clippathunits' => 'clipPathUnits', | ||
3705 | 'contentscripttype' => 'contentScriptType', | ||
3706 | 'contentstyletype' => 'contentStyleType', | ||
3707 | 'diffuseconstant' => 'diffuseConstant', | ||
3708 | 'edgemode' => 'edgeMode', | ||
3709 | 'externalresourcesrequired' => 'externalResourcesRequired', | ||
3710 | 'filterres' => 'filterRes', | ||
3711 | 'filterunits' => 'filterUnits', | ||
3712 | 'glyphref' => 'glyphRef', | ||
3713 | 'gradienttransform' => 'gradientTransform', | ||
3714 | 'gradientunits' => 'gradientUnits', | ||
3715 | 'kernelmatrix' => 'kernelMatrix', | ||
3716 | 'kernelunitlength' => 'kernelUnitLength', | ||
3717 | 'keypoints' => 'keyPoints', | ||
3718 | 'keysplines' => 'keySplines', | ||
3719 | 'keytimes' => 'keyTimes', | ||
3720 | 'lengthadjust' => 'lengthAdjust', | ||
3721 | 'limitingconeangle' => 'limitingConeAngle', | ||
3722 | 'markerheight' => 'markerHeight', | ||
3723 | 'markerunits' => 'markerUnits', | ||
3724 | 'markerwidth' => 'markerWidth', | ||
3725 | 'maskcontentunits' => 'maskContentUnits', | ||
3726 | 'maskunits' => 'maskUnits', | ||
3727 | 'numoctaves' => 'numOctaves', | ||
3728 | 'pathlength' => 'pathLength', | ||
3729 | 'patterncontentunits' => 'patternContentUnits', | ||
3730 | 'patterntransform' => 'patternTransform', | ||
3731 | 'patternunits' => 'patternUnits', | ||
3732 | 'pointsatx' => 'pointsAtX', | ||
3733 | 'pointsaty' => 'pointsAtY', | ||
3734 | 'pointsatz' => 'pointsAtZ', | ||
3735 | 'preservealpha' => 'preserveAlpha', | ||
3736 | 'preserveaspectratio' => 'preserveAspectRatio', | ||
3737 | 'primitiveunits' => 'primitiveUnits', | ||
3738 | 'refx' => 'refX', | ||
3739 | 'refy' => 'refY', | ||
3740 | 'repeatcount' => 'repeatCount', | ||
3741 | 'repeatdur' => 'repeatDur', | ||
3742 | 'requiredextensions' => 'requiredExtensions', | ||
3743 | 'requiredfeatures' => 'requiredFeatures', | ||
3744 | 'specularconstant' => 'specularConstant', | ||
3745 | 'specularexponent' => 'specularExponent', | ||
3746 | 'spreadmethod' => 'spreadMethod', | ||
3747 | 'startoffset' => 'startOffset', | ||
3748 | 'stddeviation' => 'stdDeviation', | ||
3749 | 'stitchtiles' => 'stitchTiles', | ||
3750 | 'surfacescale' => 'surfaceScale', | ||
3751 | 'systemlanguage' => 'systemLanguage', | ||
3752 | 'tablevalues' => 'tableValues', | ||
3753 | 'targetx' => 'targetX', | ||
3754 | 'targety' => 'targetY', | ||
3755 | 'textlength' => 'textLength', | ||
3756 | 'viewbox' => 'viewBox', | ||
3757 | 'viewtarget' => 'viewTarget', | ||
3758 | 'xchannelselector' => 'xChannelSelector', | ||
3759 | 'ychannelselector' => 'yChannelSelector', | ||
3760 | 'zoomandpan' => 'zoomAndPan', | ||
3761 | ); | ||
3762 | foreach ($token['attr'] as &$kp) { | ||
3763 | if (isset($lookup[$kp['name']])) { | ||
3764 | $kp['name'] = $lookup[$kp['name']]; | ||
3765 | } | ||
3766 | } | ||
3767 | return $token; | ||
3768 | } | ||
3769 | |||
3770 | public function adjustForeignAttributes($token) { | ||
3771 | static $lookup = array( | ||
3772 | 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK), | ||
3773 | 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK), | ||
3774 | 'xlink:href' => array('xlink', 'href', self::NS_XLINK), | ||
3775 | 'xlink:role' => array('xlink', 'role', self::NS_XLINK), | ||
3776 | 'xlink:show' => array('xlink', 'show', self::NS_XLINK), | ||
3777 | 'xlink:title' => array('xlink', 'title', self::NS_XLINK), | ||
3778 | 'xlink:type' => array('xlink', 'type', self::NS_XLINK), | ||
3779 | 'xml:base' => array('xml', 'base', self::NS_XML), | ||
3780 | 'xml:lang' => array('xml', 'lang', self::NS_XML), | ||
3781 | 'xml:space' => array('xml', 'space', self::NS_XML), | ||
3782 | 'xmlns' => array(null, 'xmlns', self::NS_XMLNS), | ||
3783 | 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS), | ||
3784 | ); | ||
3785 | foreach ($token['attr'] as &$kp) { | ||
3786 | if (isset($lookup[$kp['name']])) { | ||
3787 | $kp['name'] = $lookup[$kp['name']]; | ||
3788 | } | ||
3789 | } | ||
3790 | return $token; | ||
3791 | } | ||
3792 | |||
3793 | public function insertForeignElement($token, $namespaceURI) { | ||
3794 | $el = $this->dom->createElementNS($namespaceURI, $token['name']); | ||
3795 | if (!empty($token['attr'])) { | ||
3796 | foreach ($token['attr'] as $kp) { | ||
3797 | $attr = $kp['name']; | ||
3798 | if (is_array($attr)) { | ||
3799 | $ns = $attr[2]; | ||
3800 | $attr = $attr[1]; | ||
3801 | } else { | ||
3802 | $ns = self::NS_HTML; | ||
3803 | } | ||
3804 | if (!$el->hasAttributeNS($ns, $attr)) { | ||
3805 | // XSKETCHY: work around godawful libxml bug | ||
3806 | if ($ns === self::NS_XLINK) { | ||
3807 | $el->setAttribute('xlink:'.$attr, $kp['value']); | ||
3808 | } elseif ($ns === self::NS_HTML) { | ||
3809 | // Another godawful libxml bug | ||
3810 | $el->setAttribute($attr, $kp['value']); | ||
3811 | } else { | ||
3812 | $el->setAttributeNS($ns, $attr, $kp['value']); | ||
3813 | } | ||
3814 | } | ||
3815 | } | ||
3816 | } | ||
3817 | $this->appendToRealParent($el); | ||
3818 | $this->stack[] = $el; | ||
3819 | // XERROR: see below | ||
3820 | /* If the newly created element has an xmlns attribute in the XMLNS | ||
3821 | * namespace whose value is not exactly the same as the element's | ||
3822 | * namespace, that is a parse error. Similarly, if the newly created | ||
3823 | * element has an xmlns:xlink attribute in the XMLNS namespace whose | ||
3824 | * value is not the XLink Namespace, that is a parse error. */ | ||
3825 | } | ||
3826 | |||
3827 | public function save() { | ||
3828 | $this->dom->normalize(); | ||
3829 | if (!$this->fragment) { | ||
3830 | return $this->dom; | ||
3831 | } else { | ||
3832 | if ($this->root) { | ||
3833 | return $this->root->childNodes; | ||
3834 | } else { | ||
3835 | return $this->dom->childNodes; | ||
3836 | } | ||
3837 | } | ||
3838 | } | ||
3839 | } | ||
3840 | |||
diff --git a/inc/3rdparty/libraries/html5/named-character-references.ser b/inc/3rdparty/libraries/html5/named-character-references.ser new file mode 100644 index 00000000..e3ae0502 --- /dev/null +++ b/inc/3rdparty/libraries/html5/named-character-references.ser | |||
@@ -0,0 +1 @@ | |||
a:52:{s:1:"A";a:16:{s:1:"E";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:198;}s:9:"codepoint";i:198;}}}}s:1:"M";a:1:{s:1:"P";a:2:{s:1:";";a:1:{s:9:"codepoint";i:38;}s:9:"codepoint";i:38;}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:193;}s:9:"codepoint";i:193;}}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:258;}}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:194;}s:9:"codepoint";i:194;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1040;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120068;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:192;}s:9:"codepoint";i:192;}}}}}s:1:"l";a:1:{s:1:"p";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:913;}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:256;}}}}}s:1:"n";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10835;}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:260;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120120;}}}}s:1:"p";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"y";a:1:{s:1:"F";a:1:{s:1:"u";a:1:{s:1:"n";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8289;}}}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:197;}s:9:"codepoint";i:197;}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119964;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8788;}}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:195;}s:9:"codepoint";i:195;}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:196;}s:9:"codepoint";i:196;}}}}s:1:"B";a:8:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"s";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}}}}}s:1:"r";a:2:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10983;}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8966;}}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1041;}}}s:1:"e";a:3:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8757;}}}}}}s:1:"r";a:1:{s:1:"n";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8492;}}}}}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:914;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120069;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120121;}}}}s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:728;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8492;}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8782;}}}}}}}s:1:"C";a:14:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1063;}}}}s:1:"O";a:1:{s:1:"P";a:1:{s:1:"Y";a:2:{s:1:";";a:1:{s:9:"codepoint";i:169;}s:9:"codepoint";i:169;}}}s:1:"a";a:3:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:262;}}}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8914;}s:1:"i";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"D";a:1:{s:1:"i";a:1:{s:1:"f";a:1:{s:1:"f";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8517;}}}}}}}}}}}}}}}}}}}s:1:"y";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"y";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8493;}}}}}}}s:1:"c";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:268;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:199;}s:9:"codepoint";i:199;}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:264;}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8752;}}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:266;}}}}s:1:"e";a:2:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:184;}}}}}}s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:183;}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8493;}}}s:1:"h";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:935;}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"l";a:1:{s:1:"e";a:4:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8857;}}}}s:1:"M";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8854;}}}}}}s:1:"P";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8853;}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8855;}}}}}}}}}}}s:1:"l";a:1:{s:1:"o";a:2:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"w";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"C";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8754;}}}}}}}}}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"C";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"l";a:1:{s:1:"y";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"Q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8221;}}}}}}}}}}}}s:1:"Q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8217;}}}}}}}}}}}}}}}s:1:"o";a:4:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8759;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10868;}}}}}s:1:"n";a:3:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8801;}}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8751;}}}}s:1:"t";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8750;}}}}}}}}}}}}}}s:1:"p";a:2:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8450;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:"u";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8720;}}}}}}}}s:1:"u";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"C";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"w";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"C";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8755;}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10799;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119966;}}}}s:1:"u";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8915;}s:1:"C";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8781;}}}}}}}s:1:"D";a:11:{s:1:"D";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8517;}s:1:"o";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"h";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10513;}}}}}}}}s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1026;}}}}s:1:"S";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1029;}}}}s:1:"Z";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1039;}}}}s:1:"a";a:3:{s:1:"g";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8225;}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8609;}}}s:1:"s";a:1:{s:1:"h";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10980;}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:270;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1044;}}}s:1:"e";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8711;}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:916;}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120071;}}}s:1:"i";a:2:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:4:{s:1:"A";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:180;}}}}}}s:1:"D";a:1:{s:1:"o";a:2:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:729;}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"A";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:733;}}}}}}}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:96;}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:732;}}}}}}}}}}}}}}s:1:"m";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8900;}}}}}}s:1:"f";a:1:{s:1:"f";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8518;}}}}}}}}}}}}}s:1:"o";a:4:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120123;}}}s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:168;}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8412;}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8784;}}}}}}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:6:{s:1:"C";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8751;}}}}}}}}}}}}}}}}s:1:"D";a:1:{s:1:"o";a:2:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:168;}}s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8659;}}}}}}}}}}s:1:"L";a:2:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:3:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8656;}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8660;}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10980;}}}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:2:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10232;}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10234;}}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10233;}}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8658;}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8872;}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8657;}}}}}}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8661;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8741;}}}}}}}}}}}}}}}}s:1:"w";a:1:{s:1:"n";a:6:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8595;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10515;}}}}s:1:"U";a:1:{s:1:"p";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8693;}}}}}}}}}}}}}s:1:"B";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:785;}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:3:{s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10576;}}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10590;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8637;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10582;}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10591;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8641;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10583;}}}}}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8868;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8615;}}}}}}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8659;}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119967;}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:272;}}}}}}}s:1:"E";a:16:{s:1:"N";a:1:{s:1:"G";a:1:{s:1:";";a:1:{s:9:"codepoint";i:330;}}}s:1:"T";a:1:{s:1:"H";a:2:{s:1:";";a:1:{s:9:"codepoint";i:208;}s:9:"codepoint";i:208;}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:201;}s:9:"codepoint";i:201;}}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:282;}}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:202;}s:9:"codepoint";i:202;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1069;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:278;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120072;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:200;}s:9:"codepoint";i:200;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8712;}}}}}}}s:1:"m";a:2:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:274;}}}}s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:2:{s:1:"S";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"S";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9723;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"y";a:1:{s:1:"S";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"S";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9643;}}}}}}}}}}}}}}}}}}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:280;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120124;}}}}s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:917;}}}}}}}s:1:"q";a:1:{s:1:"u";a:2:{s:1:"a";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10869;}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8770;}}}}}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8652;}}}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8496;}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10867;}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:919;}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:203;}s:9:"codepoint";i:203;}}}s:1:"x";a:2:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8707;}}}}}s:1:"p";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8519;}}}}}}}}}}}}}s:1:"F";a:5:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1060;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120073;}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"d";a:2:{s:1:"S";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"S";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9724;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"y";a:1:{s:1:"S";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"S";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9642;}}}}}}}}}}}}}}}}}}}}}s:1:"o";a:3:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120125;}}}s:1:"r";a:1:{s:1:"A";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8704;}}}}}s:1:"u";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8497;}}}}}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8497;}}}}}s:1:"G";a:12:{s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1027;}}}}s:1:"T";a:2:{s:1:";";a:1:{s:9:"codepoint";i:62;}s:9:"codepoint";i:62;}s:1:"a";a:1:{s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:2:{s:1:";";a:1:{s:9:"codepoint";i:915;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:988;}}}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:286;}}}}}}s:1:"c";a:3:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:290;}}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:284;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1043;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:288;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120074;}}}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8921;}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120126;}}}}s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:6:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8805;}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8923;}}}}}}}}}}s:1:"F";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8807;}}}}}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10914;}}}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8823;}}}}}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10878;}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8819;}}}}}}}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119970;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8811;}}}s:1:"H";a:8:{s:1:"A";a:1:{s:1:"R";a:1:{s:1:"D";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1066;}}}}}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:711;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:94;}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:292;}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8460;}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"b";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8459;}}}}}}}}}}}}s:1:"o";a:2:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8461;}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"z";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"L";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9472;}}}}}}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8459;}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:294;}}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"p";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"H";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8782;}}}}}}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8783;}}}}}}}}}}s:1:"I";a:14:{s:1:"E";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1045;}}}}s:1:"J";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:306;}}}}}s:1:"O";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1025;}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:205;}s:9:"codepoint";i:205;}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:206;}s:9:"codepoint";i:206;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1048;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:304;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8465;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:204;}s:9:"codepoint";i:204;}}}}}s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8465;}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:298;}}}s:1:"g";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"y";a:1:{s:1:"I";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8520;}}}}}}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8658;}}}}}}}s:1:"n";a:2:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8748;}s:1:"e";a:2:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8747;}}}}}s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8898;}}}}}}}}}}}s:1:"v";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:"C";a:1:{s:1:"o";a:1:{s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8291;}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8290;}}}}}}}}}}}}}}s:1:"o";a:3:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:302;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120128;}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:921;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8464;}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:296;}}}}}}s:1:"u";a:2:{s:1:"k";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1030;}}}}s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:207;}s:9:"codepoint";i:207;}}}}s:1:"J";a:5:{s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:308;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1049;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120077;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120129;}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119973;}}}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1032;}}}}}}s:1:"u";a:1:{s:1:"k";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1028;}}}}}}s:1:"K";a:7:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1061;}}}}s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1036;}}}}s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:922;}}}}}s:1:"c";a:2:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:310;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1050;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120078;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120130;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119974;}}}}}s:1:"L";a:11:{s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1033;}}}}s:1:"T";a:2:{s:1:";";a:1:{s:9:"codepoint";i:60;}s:9:"codepoint";i:60;}s:1:"a";a:5:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:313;}}}}}s:1:"m";a:1:{s:1:"b";a:1:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:923;}}}}}s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10218;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8466;}}}}}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8606;}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:317;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:315;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1051;}}}s:1:"e";a:2:{s:1:"f";a:1:{s:1:"t";a:10:{s:1:"A";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10216;}}}}}}}}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8592;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8676;}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8646;}}}}}}}}}}}}}}}}s:1:"C";a:1:{s:1:"e";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8968;}}}}}}}}s:1:"D";a:1:{s:1:"o";a:2:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10214;}}}}}}}}}}}}s:1:"w";a:1:{s:1:"n";a:2:{s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10593;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8643;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10585;}}}}}}}}}}}}}}s:1:"F";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8970;}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8596;}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10574;}}}}}}}}}}}}s:1:"T";a:2:{s:1:"e";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8867;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8612;}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10586;}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8882;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10703;}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8884;}}}}}}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:3:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10577;}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10592;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8639;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10584;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8636;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10578;}}}}}}}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8656;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8660;}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"s";a:6:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8922;}}}}}}}}}}}}}s:1:"F";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8806;}}}}}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8822;}}}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10913;}}}}}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10877;}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8818;}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120079;}}}s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8920;}s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8666;}}}}}}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:319;}}}}}}s:1:"o";a:3:{s:1:"n";a:1:{s:1:"g";a:4:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10229;}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10231;}}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10230;}}}}}}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10232;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10234;}}}}}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10233;}}}}}}}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120131;}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"r";a:2:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8601;}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8600;}}}}}}}}}}}}}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8466;}}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8624;}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:321;}}}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8810;}}}s:1:"M";a:8:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10501;}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1052;}}}s:1:"e";a:2:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8287;}}}}}}}}}}s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8499;}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120080;}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"P";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8723;}}}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120132;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8499;}}}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:924;}}}s:1:"N";a:9:{s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1034;}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:323;}}}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:327;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:325;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1053;}}}s:1:"e";a:3:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"v";a:1:{s:1:"e";a:3:{s:1:"M";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}}}}}s:1:"T";a:1:{s:1:"h";a:1:{s:1:"i";a:2:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}s:1:"n";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"y";a:1:{s:1:"T";a:1:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"d";a:2:{s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8811;}}}}}}}}}}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8810;}}}}}}}}}}}}}s:1:"w";a:1:{s:1:"L";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10;}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120081;}}}s:1:"o";a:4:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8288;}}}}}}s:1:"n";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"k";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:160;}}}}}}}}}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8469;}}}s:1:"t";a:11:{s:1:";";a:1:{s:9:"codepoint";i:10988;}s:1:"C";a:2:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8802;}}}}}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:"C";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8813;}}}}}}}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8742;}}}}}}}}}}}}}}}}}}s:1:"E";a:3:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8713;}}}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8800;}}}}}s:1:"x";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8708;}}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8815;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8817;}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8825;}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8821;}}}}}}}}}}}}}s:1:"L";a:1:{s:1:"e";a:2:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"T";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8938;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8940;}}}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8814;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8816;}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8824;}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8820;}}}}}}}}}}s:1:"P";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8832;}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8928;}}}}}}}}}}}}}}}}}}}s:1:"R";a:2:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"E";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8716;}}}}}}}}}}}}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"T";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8939;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8941;}}}}}}}}}}}}}}}}}}}s:1:"S";a:2:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"S";a:1:{s:1:"u";a:2:{s:1:"b";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8930;}}}}}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8931;}}}}}}}}}}}}}}}}}}}s:1:"u";a:3:{s:1:"b";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8840;}}}}}}}}}}s:1:"c";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8833;}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8929;}}}}}}}}}}}}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8841;}}}}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8769;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8772;}}}}}}s:1:"F";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8775;}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8777;}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8740;}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119977;}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:209;}s:9:"codepoint";i:209;}}}}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:925;}}}s:1:"O";a:14:{s:1:"E";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:338;}}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:211;}s:9:"codepoint";i:211;}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:212;}s:9:"codepoint";i:212;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1054;}}}s:1:"d";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:336;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120082;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:210;}s:9:"codepoint";i:210;}}}}}s:1:"m";a:3:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:332;}}}}s:1:"e";a:1:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:937;}}}}s:1:"i";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:927;}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120134;}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"C";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"l";a:1:{s:1:"y";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"Q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8220;}}}}}}}}}}}}s:1:"Q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8216;}}}}}}}}}}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10836;}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119978;}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:216;}s:9:"codepoint";i:216;}}}}}s:1:"t";a:1:{s:1:"i";a:2:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:213;}s:9:"codepoint";i:213;}}}s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10807;}}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:214;}s:9:"codepoint";i:214;}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"r";a:2:{s:1:"B";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:175;}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9182;}}s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9140;}}}}}}}}s:1:"P";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9180;}}}}}}}}}}}}}}}}s:1:"P";a:9:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8706;}}}}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1055;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120083;}}}s:1:"h";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:934;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:928;}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"M";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:177;}}}}}}}}}s:1:"o";a:2:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8460;}}}}}}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8473;}}}}s:1:"r";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10939;}s:1:"e";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8826;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10927;}}}}}}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8828;}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8830;}}}}}}}}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8243;}}}}s:1:"o";a:2:{s:1:"d";a:1:{s:1:"u";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8719;}}}}}s:1:"p";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8759;}s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8733;}}}}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119979;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:936;}}}}s:1:"Q";a:4:{s:1:"U";a:1:{s:1:"O";a:1:{s:1:"T";a:2:{s:1:";";a:1:{s:9:"codepoint";i:34;}s:9:"codepoint";i:34;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120084;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8474;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119980;}}}}}s:1:"R";a:12:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10512;}}}}}s:1:"E";a:1:{s:1:"G";a:2:{s:1:";";a:1:{s:9:"codepoint";i:174;}s:9:"codepoint";i:174;}}s:1:"a";a:3:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:340;}}}}}s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10219;}}}s:1:"r";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8608;}s:1:"t";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10518;}}}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:344;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:342;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1056;}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8476;}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:2:{s:1:"E";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8715;}}}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8651;}}}}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10607;}}}}}}}}}}}}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8476;}}}s:1:"h";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:929;}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:8:{s:1:"A";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10217;}}}}}}}}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8594;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8677;}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8644;}}}}}}}}}}}}}}}s:1:"C";a:1:{s:1:"e";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8969;}}}}}}}}s:1:"D";a:1:{s:1:"o";a:2:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10215;}}}}}}}}}}}}s:1:"w";a:1:{s:1:"n";a:2:{s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10589;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8642;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10581;}}}}}}}}}}}}}}s:1:"F";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8971;}}}}}}s:1:"T";a:2:{s:1:"e";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8866;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8614;}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10587;}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8883;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10704;}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8885;}}}}}}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:3:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10575;}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10588;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8638;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10580;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8640;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10579;}}}}}}}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8658;}}}}}}}}}}s:1:"o";a:2:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8477;}}}s:1:"u";a:1:{s:1:"n";a:1:{s:1:"d";a:1:{s:1:"I";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10608;}}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8667;}}}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8475;}}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8625;}}}s:1:"u";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"D";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"y";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10740;}}}}}}}}}}}}s:1:"S";a:13:{s:1:"H";a:2:{s:1:"C";a:1:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1065;}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1064;}}}}s:1:"O";a:1:{s:1:"F";a:1:{s:1:"T";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1068;}}}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:346;}}}}}}s:1:"c";a:5:{s:1:";";a:1:{s:9:"codepoint";i:10940;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:352;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:350;}}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:348;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1057;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120086;}}}s:1:"h";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"t";a:4:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8595;}}}}}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8592;}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8594;}}}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8593;}}}}}}}}}}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:931;}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"C";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8728;}}}}}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120138;}}}}s:1:"q";a:2:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8730;}}}s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:9633;}s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8851;}}}}}}}}}}}}}s:1:"S";a:1:{s:1:"u";a:2:{s:1:"b";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8847;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8849;}}}}}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8848;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8850;}}}}}}}}}}}}}}s:1:"U";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8852;}}}}}}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119982;}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8902;}}}}s:1:"u";a:4:{s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8912;}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8912;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8838;}}}}}}}}}}s:1:"c";a:2:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8827;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10928;}}}}}}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8829;}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8831;}}}}}}}}}}}s:1:"h";a:1:{s:1:"T";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8715;}}}}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8721;}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8913;}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8835;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8839;}}}}}}}}}}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8913;}}}}}}}s:1:"T";a:11:{s:1:"H";a:1:{s:1:"O";a:1:{s:1:"R";a:1:{s:1:"N";a:2:{s:1:";";a:1:{s:9:"codepoint";i:222;}s:9:"codepoint";i:222;}}}}s:1:"R";a:1:{s:1:"A";a:1:{s:1:"D";a:1:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8482;}}}}}s:1:"S";a:2:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1035;}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1062;}}}}s:1:"a";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:932;}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:356;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:354;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1058;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120087;}}}s:1:"h";a:2:{s:1:"e";a:2:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8756;}}}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:920;}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8201;}}}}}}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8764;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8771;}}}}}}s:1:"F";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8773;}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8776;}}}}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120139;}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8411;}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119983;}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:358;}}}}}}}s:1:"U";a:14:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:218;}s:9:"codepoint";i:218;}}}}s:1:"r";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8607;}s:1:"o";a:1:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10569;}}}}}}}}s:1:"b";a:1:{s:1:"r";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1038;}}}s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:364;}}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:219;}s:9:"codepoint";i:219;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1059;}}}s:1:"d";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:368;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120088;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:217;}s:9:"codepoint";i:217;}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:362;}}}}}s:1:"n";a:2:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"r";a:2:{s:1:"B";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:818;}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9183;}}s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9141;}}}}}}}}s:1:"P";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9181;}}}}}}}}}}}}}}}s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8899;}s:1:"P";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8846;}}}}}}}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:370;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120140;}}}}s:1:"p";a:8:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8593;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10514;}}}}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8645;}}}}}}}}}}}}}}}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8597;}}}}}}}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10606;}}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8869;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8613;}}}}}}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8657;}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8661;}}}}}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"r";a:2:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8598;}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8599;}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:978;}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:933;}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:366;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119984;}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:360;}}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:220;}s:9:"codepoint";i:220;}}}}s:1:"V";a:9:{s:1:"D";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8875;}}}}}s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10987;}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1042;}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8873;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10982;}}}}}}s:1:"e";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8897;}}s:1:"r";a:3:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8214;}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8214;}s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:4:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8739;}}}}s:1:"L";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:124;}}}}}s:1:"S";a:1:{s:1:"e";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10072;}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8768;}}}}}}}}}}}s:1:"y";a:1:{s:1:"T";a:1:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8202;}}}}}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120089;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120141;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119985;}}}}s:1:"v";a:1:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8874;}}}}}}}s:1:"W";a:5:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:372;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8896;}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120090;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120142;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119986;}}}}}s:1:"X";a:4:{s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120091;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:926;}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120143;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119987;}}}}}s:1:"Y";a:9:{s:1:"A";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1071;}}}}s:1:"I";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1031;}}}}s:1:"U";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1070;}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:221;}s:9:"codepoint";i:221;}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:374;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1067;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120092;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120144;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119988;}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:376;}}}}}s:1:"Z";a:8:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1046;}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:377;}}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:381;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1047;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:379;}}}}s:1:"e";a:2:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"W";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:918;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8488;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8484;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119989;}}}}}s:1:"a";a:16:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:225;}s:9:"codepoint";i:225;}}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:259;}}}}}}s:1:"c";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8766;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8767;}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:226;}s:9:"codepoint";i:226;}}}s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:180;}s:9:"codepoint";i:180;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1072;}}}s:1:"e";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:230;}s:9:"codepoint";i:230;}}}}s:1:"f";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8289;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120094;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:224;}s:9:"codepoint";i:224;}}}}}s:1:"l";a:2:{s:1:"e";a:2:{s:1:"f";a:1:{s:1:"s";a:1:{s:1:"y";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8501;}}}}}s:1:"p";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8501;}}}}s:1:"p";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:945;}}}}}s:1:"m";a:2:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:257;}}}s:1:"l";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10815;}}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:38;}s:9:"codepoint";i:38;}}s:1:"n";a:2:{s:1:"d";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8743;}s:1:"a";a:1:{s:1:"n";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10837;}}}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10844;}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10840;}}}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10842;}}}s:1:"g";a:7:{s:1:";";a:1:{s:9:"codepoint";i:8736;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10660;}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8736;}}}s:1:"m";a:1:{s:1:"s";a:1:{s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8737;}s:1:"a";a:8:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10664;}}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10665;}}s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10666;}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10667;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10668;}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10669;}}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10670;}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10671;}}}}}}s:1:"r";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8735;}s:1:"v";a:1:{s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8894;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10653;}}}}}}s:1:"s";a:2:{s:1:"p";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8738;}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8491;}}}s:1:"z";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9084;}}}}}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:261;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120146;}}}}s:1:"p";a:7:{s:1:";";a:1:{s:9:"codepoint";i:8776;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10864;}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10863;}}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8778;}}s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8779;}}}s:1:"o";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:39;}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8776;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8778;}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:229;}s:9:"codepoint";i:229;}}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119990;}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:42;}}s:1:"y";a:1:{s:1:"m";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8776;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8781;}}}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:227;}s:9:"codepoint";i:227;}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:228;}s:9:"codepoint";i:228;}}}s:1:"w";a:2:{s:1:"c";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8755;}}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10769;}}}}}}s:1:"b";a:16:{s:1:"N";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10989;}}}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"k";a:4:{s:1:"c";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8780;}}}}}s:1:"e";a:1:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1014;}}}}}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8245;}}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8765;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8909;}}}}}}}}s:1:"r";a:2:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8893;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8965;}s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8965;}}}}}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9141;}s:1:"t";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9142;}}}}}}}}s:1:"c";a:2:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8780;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1073;}}}s:1:"d";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8222;}}}}}s:1:"e";a:5:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"u";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8757;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8757;}}}}}}s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10672;}}}}}}s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1014;}}}}s:1:"r";a:1:{s:1:"n";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8492;}}}}}s:1:"t";a:3:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:946;}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8502;}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8812;}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120095;}}}s:1:"i";a:1:{s:1:"g";a:7:{s:1:"c";a:3:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8898;}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9711;}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8899;}}}}s:1:"o";a:3:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10752;}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10753;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10754;}}}}}}}s:1:"s";a:2:{s:1:"q";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10758;}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9733;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9661;}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9651;}}}}}}}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10756;}}}}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8897;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8896;}}}}}}}}s:1:"k";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10509;}}}}}}s:1:"l";a:3:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"k";a:3:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"z";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10731;}}}}}}}}s:1:"s";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9642;}}}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:9652;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9662;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9666;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9656;}}}}}}}}}}}}}}}}s:1:"n";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9251;}}}}s:1:"k";a:2:{i:1;a:2:{i:2;a:1:{s:1:";";a:1:{s:9:"codepoint";i:9618;}}i:4;a:1:{s:1:";";a:1:{s:9:"codepoint";i:9617;}}}i:3;a:1:{i:4;a:1:{s:1:";";a:1:{s:9:"codepoint";i:9619;}}}}s:1:"o";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9608;}}}}}s:1:"n";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8976;}}}}s:1:"o";a:4:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120147;}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8869;}s:1:"t";a:1:{s:1:"o";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8869;}}}}}s:1:"w";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8904;}}}}}s:1:"x";a:12:{s:1:"D";a:4:{s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9559;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9556;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9558;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9555;}}}s:1:"H";a:5:{s:1:";";a:1:{s:9:"codepoint";i:9552;}s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9574;}}s:1:"U";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9577;}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9572;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9575;}}}s:1:"U";a:4:{s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9565;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9562;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9564;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9561;}}}s:1:"V";a:7:{s:1:";";a:1:{s:9:"codepoint";i:9553;}s:1:"H";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9580;}}s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9571;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9568;}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9579;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9570;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9567;}}}s:1:"b";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10697;}}}}s:1:"d";a:4:{s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9557;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9554;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9488;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9484;}}}s:1:"h";a:5:{s:1:";";a:1:{s:9:"codepoint";i:9472;}s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9573;}}s:1:"U";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9576;}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9516;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9524;}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8863;}}}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8862;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8864;}}}}}}s:1:"u";a:4:{s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9563;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9560;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9496;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9492;}}}s:1:"v";a:7:{s:1:";";a:1:{s:9:"codepoint";i:9474;}s:1:"H";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9578;}}s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9569;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9566;}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9532;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9508;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9500;}}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8245;}}}}}}s:1:"r";a:2:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:728;}}}}s:1:"v";a:1:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:166;}s:9:"codepoint";i:166;}}}}}s:1:"s";a:4:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119991;}}}s:1:"e";a:1:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8271;}}}}s:1:"i";a:1:{s:1:"m";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8765;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8909;}}}}s:1:"o";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:92;}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10693;}}}}}s:1:"u";a:2:{s:1:"l";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8226;}s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8226;}}}}}s:1:"m";a:1:{s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8782;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10926;}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8783;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8783;}}}}}}}s:1:"c";a:15:{s:1:"a";a:3:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:263;}}}}}s:1:"p";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8745;}s:1:"a";a:1:{s:1:"n";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10820;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10825;}}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10827;}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10823;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10816;}}}}}s:1:"r";a:2:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8257;}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:711;}}}}}s:1:"c";a:4:{s:1:"a";a:2:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10829;}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:269;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:231;}s:9:"codepoint";i:231;}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:265;}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10828;}s:1:"s";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10832;}}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:267;}}}}s:1:"e";a:3:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:184;}s:9:"codepoint";i:184;}}}s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10674;}}}}}}s:1:"n";a:1:{s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:162;}s:9:"codepoint";i:162;s:1:"e";a:1:{s:1:"r";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:183;}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120096;}}}s:1:"h";a:3:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1095;}}}s:1:"e";a:1:{s:1:"c";a:1:{s:1:"k";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10003;}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10003;}}}}}}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:967;}}}s:1:"i";a:1:{s:1:"r";a:7:{s:1:";";a:1:{s:9:"codepoint";i:9675;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10691;}}s:1:"c";a:3:{s:1:";";a:1:{s:9:"codepoint";i:710;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8791;}}}s:1:"l";a:1:{s:1:"e";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8634;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8635;}}}}}}}}}}}s:1:"d";a:5:{s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:174;}}s:1:"S";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9416;}}s:1:"a";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8859;}}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8858;}}}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8861;}}}}}}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8791;}}s:1:"f";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10768;}}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10991;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10690;}}}}}}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9827;}s:1:"u";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9827;}}}}}}}}s:1:"o";a:4:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:58;}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8788;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8788;}}}}}}s:1:"m";a:2:{s:1:"m";a:1:{s:1:"a";a:2:{s:1:";";a:1:{s:9:"codepoint";i:44;}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64;}}}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8705;}s:1:"f";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8728;}}}s:1:"l";a:1:{s:1:"e";a:2:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8705;}}}}}s:1:"x";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8450;}}}}}}}}s:1:"n";a:2:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8773;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10861;}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8750;}}}}}s:1:"p";a:3:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120148;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8720;}}}}s:1:"y";a:3:{s:1:";";a:1:{s:9:"codepoint";i:169;}s:9:"codepoint";i:169;s:1:"s";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8471;}}}}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8629;}}}}s:1:"o";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10007;}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119992;}}}s:1:"u";a:2:{s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10959;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10961;}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10960;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10962;}}}}}s:1:"t";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8943;}}}}}s:1:"u";a:7:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:2:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10552;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10549;}}}}}}s:1:"e";a:2:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8926;}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8927;}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8630;}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10557;}}}}}}s:1:"p";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8746;}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10824;}}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10822;}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10826;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8845;}}}}s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10821;}}}}s:1:"r";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8631;}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10556;}}}}}s:1:"l";a:1:{s:1:"y";a:3:{s:1:"e";a:1:{s:1:"q";a:2:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8926;}}}}}s:1:"s";a:1:{s:1:"u";a:1:{s:1:"c";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8927;}}}}}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8910;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8911;}}}}}}}}s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:164;}s:9:"codepoint";i:164;}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8630;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8631;}}}}}}}}}}}}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8910;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8911;}}}}}s:1:"w";a:2:{s:1:"c";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8754;}}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8753;}}}}}s:1:"y";a:1:{s:1:"l";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9005;}}}}}}}s:1:"d";a:19:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8659;}}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10597;}}}}s:1:"a";a:4:{s:1:"g";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8224;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8504;}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8595;}}}s:1:"s";a:1:{s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8208;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8867;}}}}}s:1:"b";a:2:{s:1:"k";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10511;}}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:733;}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:271;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1076;}}}s:1:"d";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8518;}s:1:"a";a:2:{s:1:"g";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8225;}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8650;}}}}s:1:"o";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10871;}}}}}}}s:1:"e";a:3:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:176;}s:9:"codepoint";i:176;}s:1:"l";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:948;}}}}s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10673;}}}}}}}s:1:"f";a:2:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10623;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120097;}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8643;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8642;}}}}}s:1:"i";a:5:{s:1:"a";a:1:{s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8900;}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8900;}s:1:"s";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9830;}}}}}}}}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9830;}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:168;}}s:1:"g";a:1:{s:1:"a";a:1:{s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:989;}}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8946;}}}}s:1:"v";a:3:{s:1:";";a:1:{s:9:"codepoint";i:247;}s:1:"i";a:1:{s:1:"d";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:247;}s:9:"codepoint";i:247;s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8903;}}}}}}}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8903;}}}}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1106;}}}}s:1:"l";a:1:{s:1:"c";a:2:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8990;}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8973;}}}}}}s:1:"o";a:5:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:36;}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120149;}}}s:1:"t";a:5:{s:1:";";a:1:{s:9:"codepoint";i:729;}s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8784;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8785;}}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8760;}}}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8724;}}}}}s:1:"s";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8865;}}}}}}}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8966;}}}}}}}}}}}}}s:1:"w";a:1:{s:1:"n";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8595;}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8650;}}}}}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8643;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8642;}}}}}}}}}}}}}}}}s:1:"r";a:2:{s:1:"b";a:1:{s:1:"k";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10512;}}}}}}}s:1:"c";a:2:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8991;}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8972;}}}}}}s:1:"s";a:3:{s:1:"c";a:2:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119993;}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1109;}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10742;}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:273;}}}}}}s:1:"t";a:2:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8945;}}}}s:1:"r";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9663;}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9662;}}}}}s:1:"u";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8693;}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10607;}}}}}s:1:"w";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10662;}}}}}}}s:1:"z";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1119;}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10239;}}}}}}}}}s:1:"e";a:18:{s:1:"D";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10871;}}}}s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8785;}}}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:233;}s:9:"codepoint";i:233;}}}}s:1:"s";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10862;}}}}}}s:1:"c";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:283;}}}}}s:1:"i";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8790;}s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:234;}s:9:"codepoint";i:234;}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8789;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1101;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:279;}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8519;}}s:1:"f";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8786;}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120098;}}}s:1:"g";a:3:{s:1:";";a:1:{s:9:"codepoint";i:10906;}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:232;}s:9:"codepoint";i:232;}}}}s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10902;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10904;}}}}}}s:1:"l";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10905;}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9191;}}}}}}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8467;}}s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10901;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10903;}}}}}}s:1:"m";a:3:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:275;}}}}s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8709;}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8709;}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8709;}}}}}s:1:"s";a:1:{s:1:"p";a:2:{i:1;a:2:{i:3;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8196;}}i:4;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8197;}}}s:1:";";a:1:{s:9:"codepoint";i:8195;}}}}s:1:"n";a:2:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:331;}}s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8194;}}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:281;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120150;}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8917;}s:1:"s";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10723;}}}}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10865;}}}}s:1:"s";a:1:{s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:1013;}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:949;}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:949;}}}}}s:1:"q";a:4:{s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8790;}}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8789;}}}}}}s:1:"s";a:2:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8770;}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:2:{s:1:"g";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10902;}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10901;}}}}}}}}}}s:1:"u";a:3:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:61;}}}}s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8799;}}}}s:1:"i";a:1:{s:1:"v";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8801;}s:1:"D";a:1:{s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10872;}}}}}}s:1:"v";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10725;}}}}}}}}s:1:"r";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8787;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10609;}}}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8495;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8784;}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8770;}}}}s:1:"t";a:2:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:951;}}s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:240;}s:9:"codepoint";i:240;}}s:1:"u";a:2:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:235;}s:9:"codepoint";i:235;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8364;}}}}s:1:"x";a:3:{s:1:"c";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:33;}}}s:1:"i";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8707;}}}}s:1:"p";a:2:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8496;}}}}}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8519;}}}}}}}}}}}}}s:1:"f";a:11:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8786;}}}}}}}}}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1092;}}}s:1:"e";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9792;}}}}}}s:1:"f";a:3:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64259;}}}}}s:1:"l";a:2:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64256;}}}s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64260;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120099;}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64257;}}}}}s:1:"l";a:3:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9837;}}}s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64258;}}}}s:1:"t";a:1:{s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9649;}}}}}s:1:"n";a:1:{s:1:"o";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:402;}}}}s:1:"o";a:2:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120151;}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8704;}}}}s:1:"k";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8916;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10969;}}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10765;}}}}}}}}s:1:"r";a:2:{s:1:"a";a:2:{s:1:"c";a:6:{i:1;a:6:{i:2;a:2:{s:1:";";a:1:{s:9:"codepoint";i:189;}s:9:"codepoint";i:189;}i:3;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8531;}}i:4;a:2:{s:1:";";a:1:{s:9:"codepoint";i:188;}s:9:"codepoint";i:188;}i:5;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8533;}}i:6;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8537;}}i:8;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8539;}}}i:2;a:2:{i:3;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8532;}}i:5;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8534;}}}i:3;a:3:{i:4;a:2:{s:1:";";a:1:{s:9:"codepoint";i:190;}s:9:"codepoint";i:190;}i:5;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8535;}}i:8;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8540;}}}i:4;a:1:{i:5;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8536;}}}i:5;a:2:{i:6;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8538;}}i:8;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8541;}}}i:7;a:1:{i:8;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8542;}}}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8260;}}}}s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8994;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119995;}}}}}s:1:"g";a:16:{s:1:"E";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8807;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10892;}}}s:1:"a";a:3:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:501;}}}}}s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:2:{s:1:";";a:1:{s:9:"codepoint";i:947;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:989;}}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10886;}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:287;}}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:285;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1075;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:289;}}}}s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8805;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8923;}}s:1:"q";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8805;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8807;}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10878;}}}}}}}s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10878;}s:1:"c";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10921;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10880;}s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10882;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10884;}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10900;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120100;}}}s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8811;}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8921;}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8503;}}}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1107;}}}}s:1:"l";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8823;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10898;}}s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10917;}}s:1:"j";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10916;}}}s:1:"n";a:4:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8809;}}s:1:"a";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10890;}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10890;}}}}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10888;}s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10888;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8809;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8935;}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120152;}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:96;}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8458;}}}s:1:"i";a:1:{s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8819;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10894;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10896;}}}}}s:1:"t";a:7:{s:1:";";a:1:{s:9:"codepoint";i:62;}s:9:"codepoint";i:62;s:1:"c";a:2:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10919;}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10874;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8919;}}}}s:1:"l";a:1:{s:1:"P";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10645;}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10876;}}}}}}s:1:"r";a:5:{s:1:"a";a:2:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10886;}}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10616;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8919;}}}}s:1:"e";a:1:{s:1:"q";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8923;}}}}}s:1:"q";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10892;}}}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8823;}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8819;}}}}}}}s:1:"h";a:10:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8660;}}}}s:1:"a";a:4:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8202;}}}}}s:1:"l";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:189;}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8459;}}}}}s:1:"r";a:2:{s:1:"d";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1098;}}}}s:1:"r";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8596;}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10568;}}}}s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8621;}}}}}s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8463;}}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:293;}}}}}s:1:"e";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9829;}s:1:"u";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9829;}}}}}}}}s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8230;}}}}}s:1:"r";a:1:{s:1:"c";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8889;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120101;}}}s:1:"k";a:1:{s:1:"s";a:2:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10533;}}}}}}s:1:"w";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10534;}}}}}}}}s:1:"o";a:5:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8703;}}}}s:1:"m";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8763;}}}}}s:1:"o";a:1:{s:1:"k";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8617;}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8618;}}}}}}}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120153;}}}s:1:"r";a:1:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8213;}}}}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119997;}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8463;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:295;}}}}}}s:1:"y";a:2:{s:1:"b";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8259;}}}}}s:1:"p";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8208;}}}}}}}s:1:"i";a:15:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:237;}s:9:"codepoint";i:237;}}}}}s:1:"c";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8291;}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:238;}s:9:"codepoint";i:238;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1080;}}}s:1:"e";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1077;}}}s:1:"x";a:1:{s:1:"c";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:161;}s:9:"codepoint";i:161;}}}}s:1:"f";a:2:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8660;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120102;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:236;}s:9:"codepoint";i:236;}}}}}s:1:"i";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8520;}s:1:"i";a:2:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10764;}}}}s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8749;}}}}s:1:"n";a:1:{s:1:"f";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10716;}}}}}s:1:"o";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8489;}}}}}s:1:"j";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:307;}}}}}s:1:"m";a:3:{s:1:"a";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:299;}}}s:1:"g";a:3:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8465;}}s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8464;}}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8465;}}}}}}s:1:"t";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:305;}}}}s:1:"o";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8887;}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:437;}}}}}s:1:"n";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8712;}s:1:"c";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8453;}}}}}s:1:"f";a:1:{s:1:"i";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8734;}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10717;}}}}}}}s:1:"o";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:305;}}}}}s:1:"t";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8747;}s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8890;}}}}s:1:"e";a:2:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8484;}}}}}s:1:"r";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8890;}}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10775;}}}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10812;}}}}}}}s:1:"o";a:4:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1105;}}}s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:303;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120154;}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:953;}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10812;}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:191;}s:9:"codepoint";i:191;}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119998;}}}s:1:"i";a:1:{s:1:"n";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8712;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8953;}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8949;}}}}s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8948;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8947;}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8712;}}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8290;}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:297;}}}}}}s:1:"u";a:2:{s:1:"k";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1110;}}}}s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:239;}s:9:"codepoint";i:239;}}}}s:1:"j";a:6:{s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:309;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1081;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120103;}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:567;}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120155;}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119999;}}}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1112;}}}}}}s:1:"u";a:1:{s:1:"k";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1108;}}}}}}s:1:"k";a:8:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"a";a:2:{s:1:";";a:1:{s:9:"codepoint";i:954;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1008;}}}}}}s:1:"c";a:2:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:311;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1082;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120104;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:312;}}}}}}s:1:"h";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1093;}}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1116;}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120156;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120000;}}}}}s:1:"l";a:22:{s:1:"A";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8666;}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8656;}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10523;}}}}}}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10510;}}}}}s:1:"E";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8806;}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10891;}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10594;}}}}s:1:"a";a:9:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:314;}}}}}s:1:"e";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10676;}}}}}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8466;}}}}}s:1:"m";a:1:{s:1:"b";a:1:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:955;}}}}}s:1:"n";a:1:{s:1:"g";a:3:{s:1:";";a:1:{s:9:"codepoint";i:10216;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10641;}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10216;}}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10885;}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:171;}s:9:"codepoint";i:171;}}}s:1:"r";a:1:{s:1:"r";a:8:{s:1:";";a:1:{s:9:"codepoint";i:8592;}s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8676;}s:1:"f";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10527;}}}}s:1:"f";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10525;}}}s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8617;}}}s:1:"l";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8619;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10553;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10611;}}}}s:1:"t";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8610;}}}}}s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:10923;}s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10521;}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10925;}}}}s:1:"b";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10508;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10098;}}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:123;}}s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:91;}}}}s:1:"k";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10635;}}s:1:"s";a:1:{s:1:"l";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10639;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10637;}}}}}}}s:1:"c";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:318;}}}}}s:1:"e";a:2:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:316;}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8968;}}}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:123;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1083;}}}s:1:"d";a:4:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10550;}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8220;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8222;}}}}}s:1:"r";a:2:{s:1:"d";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10599;}}}}}s:1:"u";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10571;}}}}}}}s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8626;}}}}s:1:"e";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8804;}s:1:"f";a:1:{s:1:"t";a:5:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8592;}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8610;}}}}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8637;}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8636;}}}}}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8647;}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8596;}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8646;}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8651;}}}}}}}}}s:1:"s";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8621;}}}}}}}}}}}}}}}}s:1:"t";a:1:{s:1:"h";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8907;}}}}}}}}}}}}}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8922;}}s:1:"q";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8804;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8806;}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10877;}}}}}}}s:1:"s";a:5:{s:1:";";a:1:{s:9:"codepoint";i:10877;}s:1:"c";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10920;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10879;}s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10881;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10883;}}}}}}s:1:"g";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10899;}}}}s:1:"s";a:5:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10885;}}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8918;}}}}s:1:"e";a:1:{s:1:"q";a:2:{s:1:"g";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8922;}}}}s:1:"q";a:1:{s:1:"g";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10891;}}}}}}}s:1:"g";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8822;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8818;}}}}}}}s:1:"f";a:3:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10620;}}}}}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8970;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120105;}}}s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8822;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10897;}}}s:1:"h";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8637;}}s:1:"u";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8636;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10602;}}}}}s:1:"b";a:1:{s:1:"l";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9604;}}}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1113;}}}}s:1:"l";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8810;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8647;}}}}s:1:"c";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8990;}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10603;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9722;}}}}}s:1:"m";a:2:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:320;}}}}}s:1:"o";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9136;}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9136;}}}}}}}}}}s:1:"n";a:4:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8808;}}s:1:"a";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10889;}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10889;}}}}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10887;}s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10887;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8808;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8934;}}}}}s:1:"o";a:8:{s:1:"a";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10220;}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8701;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10214;}}}}s:1:"n";a:1:{s:1:"g";a:3:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10229;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10231;}}}}}}}}}}}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10236;}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10230;}}}}}}}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8619;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8620;}}}}}}}}}}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10629;}}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120157;}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10797;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10804;}}}}}}s:1:"w";a:2:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8727;}}}}s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:95;}}}}}s:1:"z";a:3:{s:1:";";a:1:{s:9:"codepoint";i:9674;}s:1:"e";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9674;}}}}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10731;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:40;}s:1:"l";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10643;}}}}}}s:1:"r";a:5:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8646;}}}}s:1:"c";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8991;}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8651;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10605;}}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8206;}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8895;}}}}}s:1:"s";a:6:{s:1:"a";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8249;}}}}}s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120001;}}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8624;}}s:1:"i";a:1:{s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8818;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10893;}}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10895;}}}}s:1:"q";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:91;}}s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8216;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8218;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:322;}}}}}}s:1:"t";a:9:{s:1:";";a:1:{s:9:"codepoint";i:60;}s:9:"codepoint";i:60;s:1:"c";a:2:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10918;}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10873;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8918;}}}}s:1:"h";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8907;}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8905;}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10614;}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10875;}}}}}}s:1:"r";a:2:{s:1:"P";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10646;}}}}s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:9667;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8884;}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9666;}}}}}s:1:"u";a:1:{s:1:"r";a:2:{s:1:"d";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10570;}}}}}}s:1:"u";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10598;}}}}}}}}s:1:"m";a:14:{s:1:"D";a:1:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8762;}}}}}s:1:"a";a:4:{s:1:"c";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:175;}s:9:"codepoint";i:175;}}s:1:"l";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9794;}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10016;}s:1:"e";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10016;}}}}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8614;}s:1:"s";a:1:{s:1:"t";a:1:{s:1:"o";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8614;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8615;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8612;}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8613;}}}}}}}s:1:"r";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9646;}}}}}}s:1:"c";a:2:{s:1:"o";a:1:{s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10793;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1084;}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8212;}}}}}s:1:"e";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8737;}}}}}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120106;}}}s:1:"h";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8487;}}}s:1:"i";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:181;}s:9:"codepoint";i:181;}}}s:1:"d";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8739;}s:1:"a";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:42;}}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10992;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:183;}s:9:"codepoint";i:183;}}}}s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8722;}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8863;}}s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8760;}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10794;}}}}}}}s:1:"l";a:2:{s:1:"c";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10971;}}}s:1:"d";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8230;}}}}s:1:"n";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8723;}}}}}}s:1:"o";a:2:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8871;}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120158;}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8723;}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120002;}}}s:1:"t";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8766;}}}}}}s:1:"u";a:3:{s:1:";";a:1:{s:9:"codepoint";i:956;}s:1:"l";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8888;}}}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8888;}}}}}}s:1:"n";a:23:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8653;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8654;}}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8655;}}}}}}}}}}}s:1:"V";a:2:{s:1:"D";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8879;}}}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8878;}}}}}}s:1:"a";a:4:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8711;}}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:324;}}}}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8777;}s:1:"o";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:329;}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8777;}}}}}}s:1:"t";a:1:{s:1:"u";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9838;}s:1:"a";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9838;}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8469;}}}}}}}}s:1:"b";a:1:{s:1:"s";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:160;}s:9:"codepoint";i:160;}}}s:1:"c";a:5:{s:1:"a";a:2:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10819;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:328;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:326;}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8775;}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10818;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1085;}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8211;}}}}}s:1:"e";a:6:{s:1:";";a:1:{s:9:"codepoint";i:8800;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8663;}}}}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10532;}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8599;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8599;}}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8802;}}}}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10536;}}}}}s:1:"x";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8708;}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8708;}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120107;}}}s:1:"g";a:3:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8817;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8817;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8821;}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8815;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8815;}}}}s:1:"h";a:3:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8654;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8622;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10994;}}}}}s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8715;}s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8956;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8954;}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8715;}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1114;}}}}s:1:"l";a:6:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8653;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8602;}}}}s:1:"d";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8229;}}}s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8816;}s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8602;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8622;}}}}}}}}}}}}}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8816;}}s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8814;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8820;}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8814;}s:1:"r";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8938;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8940;}}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8740;}}}}s:1:"o";a:2:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120159;}}}s:1:"t";a:4:{s:1:";";a:1:{s:9:"codepoint";i:172;}s:9:"codepoint";i:172;s:1:"i";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8713;}s:1:"v";a:3:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8713;}}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8951;}}s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8950;}}}}}s:1:"n";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8716;}s:1:"v";a:3:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8716;}}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8958;}}s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8957;}}}}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8742;}s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8742;}}}}}}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10772;}}}}}}s:1:"r";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8832;}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8928;}}}}s:1:"e";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8832;}}}}}s:1:"r";a:4:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8655;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8603;}}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8603;}}}}}}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8939;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8941;}}}}}}s:1:"s";a:7:{s:1:"c";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8833;}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8929;}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120003;}}}s:1:"h";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"t";a:2:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8740;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8742;}}}}}}}}}}}}}s:1:"i";a:1:{s:1:"m";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8769;}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8772;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8772;}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8740;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8742;}}}}s:1:"q";a:1:{s:1:"s";a:1:{s:1:"u";a:2:{s:1:"b";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8930;}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8931;}}}}}}s:1:"u";a:3:{s:1:"b";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8836;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8840;}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8840;}}}}}}}s:1:"c";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8833;}}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8837;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8841;}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8841;}}}}}}}}}s:1:"t";a:4:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8825;}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:241;}s:9:"codepoint";i:241;}}}}s:1:"l";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8824;}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8938;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8940;}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8939;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8941;}}}}}}}}}}}}}}}}s:1:"u";a:2:{s:1:";";a:1:{s:9:"codepoint";i:957;}s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:35;}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8470;}}}}s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8199;}}}}}s:1:"v";a:6:{s:1:"D";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8877;}}}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10500;}}}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8876;}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"f";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10718;}}}}}}s:1:"l";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10498;}}}}}s:1:"r";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10499;}}}}}}s:1:"w";a:3:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8662;}}}}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10531;}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8598;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8598;}}}}}}s:1:"n";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10535;}}}}}}}s:1:"o";a:18:{s:1:"S";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9416;}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:243;}s:9:"codepoint";i:243;}}}}s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8859;}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8858;}s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:244;}s:9:"codepoint";i:244;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1086;}}}s:1:"d";a:5:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8861;}}}}s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:337;}}}}}s:1:"i";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10808;}}}s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8857;}}}s:1:"s";a:1:{s:1:"o";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10684;}}}}}}s:1:"e";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:339;}}}}}s:1:"f";a:2:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10687;}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120108;}}}s:1:"g";a:3:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:731;}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:242;}s:9:"codepoint";i:242;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10689;}}}s:1:"h";a:2:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10677;}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8486;}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8750;}}}}s:1:"l";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8634;}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10686;}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10683;}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8254;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10688;}}}s:1:"m";a:3:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:333;}}}}s:1:"e";a:1:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:969;}}}}s:1:"i";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:959;}}}}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10678;}}s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8854;}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120160;}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10679;}}}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10681;}}}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8853;}}}}}s:1:"r";a:7:{s:1:";";a:1:{s:9:"codepoint";i:8744;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8635;}}}}s:1:"d";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10845;}s:1:"e";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8500;}s:1:"o";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8500;}}}}}s:1:"f";a:2:{s:1:";";a:1:{s:9:"codepoint";i:170;}s:9:"codepoint";i:170;}s:1:"m";a:2:{s:1:";";a:1:{s:9:"codepoint";i:186;}s:9:"codepoint";i:186;}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8886;}}}}}s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10838;}}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10839;}}}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10843;}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8500;}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:248;}s:9:"codepoint";i:248;}}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8856;}}}}s:1:"t";a:1:{s:1:"i";a:2:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:245;}s:9:"codepoint";i:245;}}}s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8855;}s:1:"a";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10806;}}}}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:246;}s:9:"codepoint";i:246;}}}s:1:"v";a:1:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9021;}}}}}}s:1:"p";a:12:{s:1:"a";a:1:{s:1:"r";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8741;}s:1:"a";a:3:{s:1:";";a:1:{s:9:"codepoint";i:182;}s:9:"codepoint";i:182;s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8741;}}}}}}s:1:"s";a:2:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10995;}}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:11005;}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8706;}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1087;}}}s:1:"e";a:1:{s:1:"r";a:5:{s:1:"c";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:37;}}}}s:1:"i";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:46;}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8240;}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8869;}}s:1:"t";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8241;}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120109;}}}s:1:"h";a:3:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:966;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:966;}}}s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8499;}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9742;}}}}}s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:960;}s:1:"t";a:1:{s:1:"c";a:1:{s:1:"h";a:1:{s:1:"f";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8916;}}}}}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:982;}}}s:1:"l";a:2:{s:1:"a";a:1:{s:1:"n";a:2:{s:1:"c";a:1:{s:1:"k";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8463;}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8462;}}}}s:1:"k";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8463;}}}}}s:1:"u";a:1:{s:1:"s";a:9:{s:1:";";a:1:{s:9:"codepoint";i:43;}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10787;}}}}}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8862;}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10786;}}}}s:1:"d";a:2:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8724;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10789;}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10866;}}s:1:"m";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:177;}s:9:"codepoint";i:177;}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10790;}}}}s:1:"t";a:1:{s:1:"w";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10791;}}}}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:177;}}s:1:"o";a:3:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10773;}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120161;}}}s:1:"u";a:1:{s:1:"n";a:1:{s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:163;}s:9:"codepoint";i:163;}}}}s:1:"r";a:10:{s:1:";";a:1:{s:9:"codepoint";i:8826;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10931;}}s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10935;}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8828;}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10927;}s:1:"c";a:6:{s:1:";";a:1:{s:9:"codepoint";i:8826;}s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10935;}}}}}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"l";a:1:{s:1:"y";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8828;}}}}}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10927;}}}s:1:"n";a:3:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10937;}}}}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10933;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8936;}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8830;}}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8242;}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8473;}}}}}s:1:"n";a:3:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10933;}}s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10937;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8936;}}}}}s:1:"o";a:3:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8719;}}s:1:"f";a:3:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9006;}}}}}s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8978;}}}}}s:1:"s";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8979;}}}}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8733;}s:1:"t";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8733;}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8830;}}}}s:1:"u";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8880;}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120005;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:968;}}}s:1:"u";a:1:{s:1:"n";a:1:{s:1:"c";a:1:{s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8200;}}}}}}}s:1:"q";a:6:{s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120110;}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10764;}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120162;}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8279;}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120006;}}}}s:1:"u";a:3:{s:1:"a";a:1:{s:1:"t";a:2:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8461;}}}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10774;}}}}}}s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:63;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8799;}}}}}}s:1:"o";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:34;}s:9:"codepoint";i:34;}}}}s:1:"r";a:21:{s:1:"A";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8667;}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8658;}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10524;}}}}}}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10511;}}}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10596;}}}}s:1:"a";a:7:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10714;}}s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:341;}}}}}s:1:"d";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8730;}}}}s:1:"e";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10675;}}}}}}}s:1:"n";a:1:{s:1:"g";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10217;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10642;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10661;}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10217;}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:187;}s:9:"codepoint";i:187;}}}s:1:"r";a:1:{s:1:"r";a:11:{s:1:";";a:1:{s:9:"codepoint";i:8594;}s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10613;}}}s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8677;}s:1:"f";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10528;}}}}s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10547;}}s:1:"f";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10526;}}}s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8618;}}}s:1:"l";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8620;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10565;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10612;}}}}s:1:"t";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8611;}}}s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8605;}}}}s:1:"t";a:2:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10522;}}}}s:1:"i";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8758;}s:1:"n";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8474;}}}}}}}}}s:1:"b";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10509;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10099;}}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:125;}}s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:93;}}}}s:1:"k";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10636;}}s:1:"s";a:1:{s:1:"l";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10638;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10640;}}}}}}}s:1:"c";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:345;}}}}}s:1:"e";a:2:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:343;}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8969;}}}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:125;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1088;}}}s:1:"d";a:4:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10551;}}}s:1:"l";a:1:{s:1:"d";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10601;}}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8221;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8221;}}}}}s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8627;}}}}s:1:"e";a:3:{s:1:"a";a:1:{s:1:"l";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8476;}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8475;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8476;}}}}}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8477;}}}}s:1:"c";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9645;}}}s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:174;}s:9:"codepoint";i:174;}}s:1:"f";a:3:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10621;}}}}}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8971;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120111;}}}s:1:"h";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8641;}}s:1:"u";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8640;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10604;}}}}}s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:961;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1009;}}}}s:1:"i";a:3:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:6:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8594;}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8611;}}}}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8641;}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8640;}}}}}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8644;}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8652;}}}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8649;}}}}}}}}}}}}s:1:"s";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8605;}}}}}}}}}}}s:1:"t";a:1:{s:1:"h";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8908;}}}}}}}}}}}}}}s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:730;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8787;}}}}}}}}}}}}s:1:"l";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8644;}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8652;}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8207;}}}s:1:"m";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9137;}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9137;}}}}}}}}}}s:1:"n";a:1:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10990;}}}}}s:1:"o";a:4:{s:1:"a";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10221;}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8702;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10215;}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10630;}}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120163;}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10798;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10805;}}}}}}}s:1:"p";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:41;}s:1:"g";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10644;}}}}}s:1:"p";a:1:{s:1:"o";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10770;}}}}}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8649;}}}}}s:1:"s";a:4:{s:1:"a";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8250;}}}}}s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120007;}}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8625;}}s:1:"q";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:93;}}s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8217;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8217;}}}}}}s:1:"t";a:3:{s:1:"h";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8908;}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8906;}}}}}s:1:"r";a:1:{s:1:"i";a:4:{s:1:";";a:1:{s:9:"codepoint";i:9657;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8885;}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9656;}}s:1:"l";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10702;}}}}}}}}s:1:"u";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10600;}}}}}}}s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8478;}}}s:1:"s";a:19:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:347;}}}}}}s:1:"b";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8218;}}}}}s:1:"c";a:10:{s:1:";";a:1:{s:9:"codepoint";i:8827;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10932;}}s:1:"a";a:2:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10936;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:353;}}}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8829;}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10928;}s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:351;}}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:349;}}}}s:1:"n";a:3:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10934;}}s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10938;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8937;}}}}}s:1:"p";a:1:{s:1:"o";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10771;}}}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8831;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1089;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8901;}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8865;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10854;}}}}}s:1:"e";a:7:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8664;}}}}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10533;}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8600;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8600;}}}}}}s:1:"c";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:167;}s:9:"codepoint";i:167;}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:59;}}}s:1:"s";a:1:{s:1:"w";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10537;}}}}}s:1:"t";a:1:{s:1:"m";a:2:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}}s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}s:1:"x";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10038;}}}}s:1:"f";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:120112;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8994;}}}}}}s:1:"h";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9839;}}}}s:1:"c";a:2:{s:1:"h";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1097;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1096;}}}s:1:"o";a:1:{s:1:"r";a:1:{s:1:"t";a:2:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8739;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8741;}}}}}}}}}}}}s:1:"y";a:2:{s:1:";";a:1:{s:9:"codepoint";i:173;}s:9:"codepoint";i:173;}}s:1:"i";a:2:{s:1:"g";a:1:{s:1:"m";a:1:{s:1:"a";a:3:{s:1:";";a:1:{s:9:"codepoint";i:963;}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:962;}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:962;}}}}}s:1:"m";a:8:{s:1:";";a:1:{s:9:"codepoint";i:8764;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10858;}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8771;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8771;}}}s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10910;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10912;}}}s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10909;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10911;}}}s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8774;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10788;}}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10610;}}}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8592;}}}}}s:1:"m";a:4:{s:1:"a";a:2:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}}}}}}}}s:1:"s";a:1:{s:1:"h";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10803;}}}}}s:1:"e";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10724;}}}}}}}s:1:"i";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8739;}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8995;}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10922;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10924;}}}}s:1:"o";a:3:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1100;}}}}}s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:47;}s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10692;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9023;}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120164;}}}}s:1:"p";a:1:{s:1:"a";a:2:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9824;}s:1:"u";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9824;}}}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8741;}}}}s:1:"q";a:3:{s:1:"c";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8851;}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8852;}}}}s:1:"s";a:1:{s:1:"u";a:2:{s:1:"b";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8847;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8849;}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8847;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8849;}}}}}}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8848;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8850;}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8848;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8850;}}}}}}}}}s:1:"u";a:3:{s:1:";";a:1:{s:9:"codepoint";i:9633;}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9633;}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9642;}}}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9642;}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8594;}}}}}s:1:"s";a:4:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120008;}}}s:1:"e";a:1:{s:1:"t";a:1:{s:1:"m";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8995;}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8902;}}}}}}s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9734;}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9733;}}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:"e";a:1:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1013;}}}}}}}}s:1:"p";a:1:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:981;}}}}}}}}}s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:175;}}}}}s:1:"u";a:5:{s:1:"b";a:9:{s:1:";";a:1:{s:9:"codepoint";i:8834;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10949;}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10941;}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8838;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10947;}}}}}s:1:"m";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10945;}}}}}s:1:"n";a:2:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10955;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8842;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10943;}}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10617;}}}}}s:1:"s";a:3:{s:1:"e";a:1:{s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8834;}s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8838;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10949;}}}}s:1:"n";a:1:{s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8842;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10955;}}}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10951;}}}s:1:"u";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10965;}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10963;}}}}}s:1:"c";a:1:{s:1:"c";a:6:{s:1:";";a:1:{s:9:"codepoint";i:8827;}s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10936;}}}}}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"l";a:1:{s:1:"y";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8829;}}}}}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10928;}}}s:1:"n";a:3:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10938;}}}}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10934;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8937;}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8831;}}}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8721;}}s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9834;}}}s:1:"p";a:13:{i:1;a:2:{s:1:";";a:1:{s:9:"codepoint";i:185;}s:9:"codepoint";i:185;}i:2;a:2:{s:1:";";a:1:{s:9:"codepoint";i:178;}s:9:"codepoint";i:178;}i:3;a:2:{s:1:";";a:1:{s:9:"codepoint";i:179;}s:9:"codepoint";i:179;}s:1:";";a:1:{s:9:"codepoint";i:8835;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10950;}}s:1:"d";a:2:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10942;}}}s:1:"s";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10968;}}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8839;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10948;}}}}}s:1:"h";a:1:{s:1:"s";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10967;}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10619;}}}}}s:1:"m";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10946;}}}}}s:1:"n";a:2:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10956;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8843;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10944;}}}}}s:1:"s";a:3:{s:1:"e";a:1:{s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8835;}s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8839;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10950;}}}}s:1:"n";a:1:{s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8843;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10956;}}}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10952;}}}s:1:"u";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10964;}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10966;}}}}}}s:1:"w";a:3:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8665;}}}}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10534;}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8601;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8601;}}}}}}s:1:"n";a:1:{s:1:"w";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10538;}}}}}}s:1:"z";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:223;}s:9:"codepoint";i:223;}}}}}s:1:"t";a:13:{s:1:"a";a:2:{s:1:"r";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8982;}}}}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:964;}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9140;}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:357;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:355;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1090;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8411;}}}}s:1:"e";a:1:{s:1:"l";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8981;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120113;}}}s:1:"h";a:4:{s:1:"e";a:2:{s:1:"r";a:1:{s:1:"e";a:2:{i:4;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8756;}}s:1:"f";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8756;}}}}}}}s:1:"t";a:1:{s:1:"a";a:3:{s:1:";";a:1:{s:9:"codepoint";i:952;}s:1:"s";a:1:{s:1:"y";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:977;}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:977;}}}}}s:1:"i";a:2:{s:1:"c";a:1:{s:1:"k";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8776;}}}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8764;}}}}}}s:1:"n";a:1:{s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8201;}}}}}s:1:"k";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8776;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8764;}}}}}s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:254;}s:9:"codepoint";i:254;}}}}s:1:"i";a:3:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:732;}}}}s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:215;}s:9:"codepoint";i:215;s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8864;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10801;}}}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10800;}}}}}s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8749;}}}}s:1:"o";a:3:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10536;}}}s:1:"p";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8868;}s:1:"b";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9014;}}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10993;}}}}s:1:"f";a:2:{s:1:";";a:1:{s:9:"codepoint";i:120165;}s:1:"o";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10970;}}}}}}s:1:"s";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10537;}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8244;}}}}}}s:1:"r";a:3:{s:1:"a";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8482;}}}}s:1:"i";a:7:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:5:{s:1:";";a:1:{s:9:"codepoint";i:9653;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9663;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9667;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8884;}}}}}}}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8796;}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9657;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8885;}}}}}}}}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9708;}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8796;}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10810;}}}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10809;}}}}}s:1:"s";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10701;}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10811;}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"z";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9186;}}}}}}}}s:1:"s";a:3:{s:1:"c";a:2:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120009;}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1094;}}}s:1:"h";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1115;}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:359;}}}}}}s:1:"w";a:2:{s:1:"i";a:1:{s:1:"x";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8812;}}}}s:1:"o";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"d";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8606;}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8608;}}}}}}}}}}}}}}}}}}s:1:"u";a:18:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8657;}}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10595;}}}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:250;}s:9:"codepoint";i:250;}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8593;}}}}s:1:"b";a:1:{s:1:"r";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1118;}}}s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:365;}}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:251;}s:9:"codepoint";i:251;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1091;}}}s:1:"d";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8645;}}}}s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:369;}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10606;}}}}}s:1:"f";a:2:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10622;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120114;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:249;}s:9:"codepoint";i:249;}}}}}s:1:"h";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8639;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8638;}}}}s:1:"b";a:1:{s:1:"l";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9600;}}}}}s:1:"l";a:2:{s:1:"c";a:2:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8988;}s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8988;}}}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8975;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9720;}}}}}s:1:"m";a:2:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:363;}}}}s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:168;}s:9:"codepoint";i:168;}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:371;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120166;}}}}s:1:"p";a:6:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8593;}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8597;}}}}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8639;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8638;}}}}}}}}}}}}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8846;}}}}s:1:"s";a:1:{s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:965;}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:978;}}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:965;}}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8648;}}}}}}}}}}s:1:"r";a:3:{s:1:"c";a:2:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8989;}s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8989;}}}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8974;}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:367;}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9721;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120010;}}}}s:1:"t";a:3:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8944;}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:361;}}}}}s:1:"r";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9653;}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9652;}}}}}s:1:"u";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8648;}}}}s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:252;}s:9:"codepoint";i:252;}}}s:1:"w";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10663;}}}}}}}}s:1:"v";a:14:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8661;}}}}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10984;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10985;}}}}}s:1:"D";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8872;}}}}}s:1:"a";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10652;}}}}}s:1:"r";a:7:{s:1:"e";a:1:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:949;}}}}}}}}s:1:"k";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1008;}}}}}}s:1:"n";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8709;}}}}}}}}s:1:"p";a:3:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:966;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:982;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8733;}}}}}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8597;}s:1:"h";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1009;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:962;}}}}}}s:1:"t";a:2:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:977;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8882;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8883;}}}}}}}}}}}}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1074;}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8866;}}}}}s:1:"e";a:3:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8744;}s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8891;}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8794;}}}}s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8942;}}}}}s:1:"r";a:2:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:124;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:124;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120115;}}}s:1:"l";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8882;}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120167;}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8733;}}}}}s:1:"r";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8883;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120011;}}}}s:1:"z";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"z";a:1:{s:1:"a";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10650;}}}}}}}}s:1:"w";a:7:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:373;}}}}}s:1:"e";a:2:{s:1:"d";a:2:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10847;}}}}s:1:"g";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8743;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8793;}}}}}s:1:"i";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8472;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120116;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120168;}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8472;}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8768;}s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8768;}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120012;}}}}}s:1:"x";a:14:{s:1:"c";a:3:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8898;}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9711;}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8899;}}}}s:1:"d";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9661;}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120117;}}}s:1:"h";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10234;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10231;}}}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:958;}}s:1:"l";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10232;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10229;}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10236;}}}}s:1:"n";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8955;}}}}s:1:"o";a:3:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10752;}}}}s:1:"p";a:2:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120169;}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10753;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10754;}}}}}}s:1:"r";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10233;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10230;}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120013;}}}s:1:"q";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10758;}}}}}}s:1:"u";a:2:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10756;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9651;}}}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8897;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8896;}}}}}}}s:1:"y";a:8:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:253;}s:9:"codepoint";i:253;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1103;}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:375;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1099;}}}s:1:"e";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:165;}s:9:"codepoint";i:165;}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120118;}}}s:1:"i";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1111;}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120170;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120014;}}}}s:1:"u";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1102;}}}s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:255;}s:9:"codepoint";i:255;}}}}s:1:"z";a:10:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:378;}}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:382;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1079;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:380;}}}}s:1:"e";a:2:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8488;}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:950;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120119;}}}s:1:"h";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1078;}}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8669;}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120171;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120015;}}}}s:1:"w";a:2:{s:1:"j";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8205;}}s:1:"n";a:1:{s:1:"j";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8204;}}}}}} \ No newline at end of file | |||
diff --git a/inc/3rdparty/humble-http-agent/CookieJar.php b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php index d91b711e..83e94f14 100644 --- a/inc/3rdparty/humble-http-agent/CookieJar.php +++ b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php | |||
@@ -1,404 +1,404 @@ | |||
1 | <?php | 1 | <?php |
2 | /** | 2 | /** |
3 | * Cookie Jar | 3 | * Cookie Jar |
4 | * | 4 | * |
5 | * PHP class for handling cookies, as defined by the Netscape spec: | 5 | * PHP class for handling cookies, as defined by the Netscape spec: |
6 | * <http://curl.haxx.se/rfc/cookie_spec.html> | 6 | * <http://curl.haxx.se/rfc/cookie_spec.html> |
7 | * | 7 | * |
8 | * This class should be used to handle cookies (storing cookies from HTTP response messages, and | 8 | * This class should be used to handle cookies (storing cookies from HTTP response messages, and |
9 | * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org | 9 | * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org |
10 | * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/ | 10 | * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/ |
11 | * | 11 | * |
12 | * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/ | 12 | * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/ |
13 | * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>. | 13 | * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>. |
14 | * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965. | 14 | * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965. |
15 | * | 15 | * |
16 | * @version 0.5 | 16 | * @version 0.5 |
17 | * @date 2011-03-15 | 17 | * @date 2011-03-15 |
18 | * @see http://php.net/HttpRequestPool | 18 | * @see http://php.net/HttpRequestPool |
19 | * @author Keyvan Minoukadeh | 19 | * @author Keyvan Minoukadeh |
20 | * @copyright 2011 Keyvan Minoukadeh | 20 | * @copyright 2011 Keyvan Minoukadeh |
21 | * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 | 21 | * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 |
22 | */ | 22 | */ |
23 | 23 | ||
24 | class CookieJar | 24 | class CookieJar |
25 | { | 25 | { |
26 | /** | 26 | /** |
27 | * Cookies - array containing all cookies. | 27 | * Cookies - array containing all cookies. |
28 | * | 28 | * |
29 | * <pre> | 29 | * <pre> |
30 | * Cookies are stored like this: | 30 | * Cookies are stored like this: |
31 | * [domain][path][name] = array | 31 | * [domain][path][name] = array |
32 | * where array is: | 32 | * where array is: |
33 | * 0 => value, 1 => secure, 2 => expires | 33 | * 0 => value, 1 => secure, 2 => expires |
34 | * </pre> | 34 | * </pre> |
35 | * @var array | 35 | * @var array |
36 | * @access private | 36 | * @access private |
37 | */ | 37 | */ |
38 | public $cookies = array(); | 38 | public $cookies = array(); |
39 | public $debug = false; | 39 | public $debug = false; |
40 | 40 | ||
41 | /** | 41 | /** |
42 | * Constructor | 42 | * Constructor |
43 | */ | 43 | */ |
44 | function __construct() { | 44 | function __construct() { |
45 | } | 45 | } |
46 | 46 | ||
47 | protected function debug($msg, $file=null, $line=null) { | 47 | protected function debug($msg, $file=null, $line=null) { |
48 | if ($this->debug) { | 48 | if ($this->debug) { |
49 | $mem = round(memory_get_usage()/1024, 2); | 49 | $mem = round(memory_get_usage()/1024, 2); |
50 | $memPeak = round(memory_get_peak_usage()/1024, 2); | 50 | $memPeak = round(memory_get_peak_usage()/1024, 2); |
51 | echo '* ',$msg; | 51 | echo '* ',$msg; |
52 | if (isset($file, $line)) echo " ($file line $line)"; | 52 | if (isset($file, $line)) echo " ($file line $line)"; |
53 | echo ' - mem used: ',$mem," (peak: $memPeak)\n"; | 53 | echo ' - mem used: ',$mem," (peak: $memPeak)\n"; |
54 | ob_flush(); | 54 | ob_flush(); |
55 | flush(); | 55 | flush(); |
56 | } | 56 | } |
57 | } | 57 | } |
58 | 58 | ||
59 | /** | 59 | /** |
60 | * Get matching cookies | 60 | * Get matching cookies |
61 | * | 61 | * |
62 | * Only use this method if you cannot use add_cookie_header(), for example, if you want to use | 62 | * Only use this method if you cannot use add_cookie_header(), for example, if you want to use |
63 | * this cookie jar class without using the request class. | 63 | * this cookie jar class without using the request class. |
64 | * | 64 | * |
65 | * @param array $param associative array containing 'domain', 'path', 'secure' keys | 65 | * @param array $param associative array containing 'domain', 'path', 'secure' keys |
66 | * @return string | 66 | * @return string |
67 | * @see add_cookie_header() | 67 | * @see add_cookie_header() |
68 | */ | 68 | */ |
69 | public function getMatchingCookies($url) | 69 | public function getMatchingCookies($url) |
70 | { | 70 | { |
71 | if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) { | 71 | if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) { |
72 | $param['domain'] = $parts['host']; | 72 | $param['domain'] = $parts['host']; |
73 | $param['path'] = $parts['path']; | 73 | $param['path'] = $parts['path']; |
74 | $param['secure'] = (strtolower($parts['scheme']) == 'https'); | 74 | $param['secure'] = (strtolower($parts['scheme']) == 'https'); |
75 | unset($parts); | 75 | unset($parts); |
76 | } else { | 76 | } else { |
77 | return false; | 77 | return false; |
78 | } | 78 | } |
79 | // RFC 2965 notes: | 79 | // RFC 2965 notes: |
80 | // If multiple cookies satisfy the criteria above, they are ordered in | 80 | // If multiple cookies satisfy the criteria above, they are ordered in |
81 | // the Cookie header such that those with more specific Path attributes | 81 | // the Cookie header such that those with more specific Path attributes |
82 | // precede those with less specific. Ordering with respect to other | 82 | // precede those with less specific. Ordering with respect to other |
83 | // attributes (e.g., Domain) is unspecified. | 83 | // attributes (e.g., Domain) is unspecified. |
84 | $domain = $param['domain']; | 84 | $domain = $param['domain']; |
85 | if (strpos($domain, '.') === false) $domain .= '.local'; | 85 | if (strpos($domain, '.') === false) $domain .= '.local'; |
86 | $request_path = $param['path']; | 86 | $request_path = $param['path']; |
87 | if ($request_path == '') $request_path = '/'; | 87 | if ($request_path == '') $request_path = '/'; |
88 | $request_secure = $param['secure']; | 88 | $request_secure = $param['secure']; |
89 | $now = time(); | 89 | $now = time(); |
90 | $matched_cookies = array(); | 90 | $matched_cookies = array(); |
91 | // domain - find matching domains | 91 | // domain - find matching domains |
92 | $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__); | 92 | $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__); |
93 | while (strpos($domain, '.') !== false) { | 93 | while (strpos($domain, '.') !== false) { |
94 | if (isset($this->cookies[$domain])) { | 94 | if (isset($this->cookies[$domain])) { |
95 | $this->debug(' domain match found: '.$domain); | 95 | $this->debug(' domain match found: '.$domain); |
96 | $cookies =& $this->cookies[$domain]; | 96 | $cookies =& $this->cookies[$domain]; |
97 | } else { | 97 | } else { |
98 | $domain = $this->_reduce_domain($domain); | 98 | $domain = $this->_reduce_domain($domain); |
99 | continue; | 99 | continue; |
100 | } | 100 | } |
101 | // paths - find matching paths starting from most specific | 101 | // paths - find matching paths starting from most specific |
102 | $this->debug(' - Finding matching paths for '.$request_path); | 102 | $this->debug(' - Finding matching paths for '.$request_path); |
103 | $paths = array_keys($cookies); | 103 | $paths = array_keys($cookies); |
104 | usort($paths, array($this, '_cmp_length')); | 104 | usort($paths, array($this, '_cmp_length')); |
105 | foreach ($paths as $path) { | 105 | foreach ($paths as $path) { |
106 | // continue to next cookie if request path does not path-match cookie path | 106 | // continue to next cookie if request path does not path-match cookie path |
107 | if (!$this->_path_match($request_path, $path)) continue; | 107 | if (!$this->_path_match($request_path, $path)) continue; |
108 | // loop through cookie names | 108 | // loop through cookie names |
109 | $this->debug(' path match found: '.$path); | 109 | $this->debug(' path match found: '.$path); |
110 | foreach ($cookies[$path] as $name => $values) { | 110 | foreach ($cookies[$path] as $name => $values) { |
111 | // if this cookie is secure but request isn't, continue to next cookie | 111 | // if this cookie is secure but request isn't, continue to next cookie |
112 | if ($values[1] && !$request_secure) continue; | 112 | if ($values[1] && !$request_secure) continue; |
113 | // if cookie is not a session cookie and has expired, continue to next cookie | 113 | // if cookie is not a session cookie and has expired, continue to next cookie |
114 | if (is_int($values[2]) && ($values[2] < $now)) continue; | 114 | if (is_int($values[2]) && ($values[2] < $now)) continue; |
115 | // cookie matches request | 115 | // cookie matches request |
116 | $this->debug(' cookie match: '.$name.'='.$values[0]); | 116 | $this->debug(' cookie match: '.$name.'='.$values[0]); |
117 | $matched_cookies[] = $name.'='.$values[0]; | 117 | $matched_cookies[] = $name.'='.$values[0]; |
118 | } | 118 | } |
119 | } | 119 | } |
120 | $domain = $this->_reduce_domain($domain); | 120 | $domain = $this->_reduce_domain($domain); |
121 | } | 121 | } |
122 | // return cookies | 122 | // return cookies |
123 | return implode('; ', $matched_cookies); | 123 | return implode('; ', $matched_cookies); |
124 | } | 124 | } |
125 | 125 | ||
126 | /** | 126 | /** |
127 | * Parse Set-Cookie values. | 127 | * Parse Set-Cookie values. |
128 | * | 128 | * |
129 | * Only use this method if you cannot use extract_cookies(), for example, if you want to use | 129 | * Only use this method if you cannot use extract_cookies(), for example, if you want to use |
130 | * this cookie jar class without using the response class. | 130 | * this cookie jar class without using the response class. |
131 | * | 131 | * |
132 | * @param array $set_cookies array holding 1 or more "Set-Cookie" header values | 132 | * @param array $set_cookies array holding 1 or more "Set-Cookie" header values |
133 | * @param array $param associative array containing 'host', 'path' keys | 133 | * @param array $param associative array containing 'host', 'path' keys |
134 | * @return void | 134 | * @return void |
135 | * @see extract_cookies() | 135 | * @see extract_cookies() |
136 | */ | 136 | */ |
137 | public function storeCookies($url, $set_cookies) | 137 | public function storeCookies($url, $set_cookies) |
138 | { | 138 | { |
139 | if (count($set_cookies) == 0) return; | 139 | if (count($set_cookies) == 0) return; |
140 | $param = @parse_url($url); | 140 | $param = @parse_url($url); |
141 | if (!is_array($param) || !isset($param['host'])) return; | 141 | if (!is_array($param) || !isset($param['host'])) return; |
142 | $request_host = $param['host']; | 142 | $request_host = $param['host']; |
143 | if (strpos($request_host, '.') === false) $request_host .= '.local'; | 143 | if (strpos($request_host, '.') === false) $request_host .= '.local'; |
144 | $request_path = @$param['path']; | 144 | $request_path = @$param['path']; |
145 | if ($request_path == '') $request_path = '/'; | 145 | if ($request_path == '') $request_path = '/'; |
146 | // | 146 | // |
147 | // loop through set-cookie headers | 147 | // loop through set-cookie headers |
148 | // | 148 | // |
149 | foreach ($set_cookies as $set_cookie) { | 149 | foreach ($set_cookies as $set_cookie) { |
150 | $this->debug('Parsing: '.$set_cookie); | 150 | $this->debug('Parsing: '.$set_cookie); |
151 | // temporary cookie store (before adding to jar) | 151 | // temporary cookie store (before adding to jar) |
152 | $tmp_cookie = array(); | 152 | $tmp_cookie = array(); |
153 | $param = explode(';', $set_cookie); | 153 | $param = explode(';', $set_cookie); |
154 | // loop through params | 154 | // loop through params |
155 | for ($x=0; $x<count($param); $x++) { | 155 | for ($x=0; $x<count($param); $x++) { |
156 | $key_val = explode('=', $param[$x], 2); | 156 | $key_val = explode('=', $param[$x], 2); |
157 | if (count($key_val) != 2) { | 157 | if (count($key_val) != 2) { |
158 | // if the first param isn't a name=value pair, continue to the next set-cookie | 158 | // if the first param isn't a name=value pair, continue to the next set-cookie |
159 | // header | 159 | // header |
160 | if ($x == 0) continue 2; | 160 | if ($x == 0) continue 2; |
161 | // check for secure flag | 161 | // check for secure flag |
162 | if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true; | 162 | if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true; |
163 | // continue to next param | 163 | // continue to next param |
164 | continue; | 164 | continue; |
165 | } | 165 | } |
166 | list($key, $val) = array_map('trim', $key_val); | 166 | list($key, $val) = array_map('trim', $key_val); |
167 | // first name=value pair is the cookie name and value | 167 | // first name=value pair is the cookie name and value |
168 | // the name and value are stored under 'name' and 'value' to avoid conflicts | 168 | // the name and value are stored under 'name' and 'value' to avoid conflicts |
169 | // with later parameters. | 169 | // with later parameters. |
170 | if ($x == 0) { | 170 | if ($x == 0) { |
171 | $tmp_cookie = array('name'=>$key, 'value'=>$val); | 171 | $tmp_cookie = array('name'=>$key, 'value'=>$val); |
172 | continue; | 172 | continue; |
173 | } | 173 | } |
174 | $key = strtolower($key); | 174 | $key = strtolower($key); |
175 | if (in_array($key, array('expires', 'path', 'domain', 'secure'))) { | 175 | if (in_array($key, array('expires', 'path', 'domain', 'secure'))) { |
176 | $tmp_cookie[$key] = $val; | 176 | $tmp_cookie[$key] = $val; |
177 | } | 177 | } |
178 | } | 178 | } |
179 | // | 179 | // |
180 | // set cookie | 180 | // set cookie |
181 | // | 181 | // |
182 | // check domain | 182 | // check domain |
183 | if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) && | 183 | if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) && |
184 | ($tmp_cookie['domain'] != ".$request_host")) { | 184 | ($tmp_cookie['domain'] != ".$request_host")) { |
185 | $domain = $tmp_cookie['domain']; | 185 | $domain = $tmp_cookie['domain']; |
186 | if ((strpos($domain, '.') === false) && ($domain != 'local')) { | 186 | if ((strpos($domain, '.') === false) && ($domain != 'local')) { |
187 | $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain'); | 187 | $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain'); |
188 | continue; | 188 | continue; |
189 | } | 189 | } |
190 | if (preg_match('/\.[0-9]+$/', $domain)) { | 190 | if (preg_match('/\.[0-9]+$/', $domain)) { |
191 | $this->debug(' - domain "'.$domain.'" appears to be an ip address'); | 191 | $this->debug(' - domain "'.$domain.'" appears to be an ip address'); |
192 | continue; | 192 | continue; |
193 | } | 193 | } |
194 | if (substr($domain, 0, 1) != '.') $domain = ".$domain"; | 194 | if (substr($domain, 0, 1) != '.') $domain = ".$domain"; |
195 | if (!$this->_domain_match($request_host, $domain)) { | 195 | if (!$this->_domain_match($request_host, $domain)) { |
196 | $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"'); | 196 | $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"'); |
197 | continue; | 197 | continue; |
198 | } | 198 | } |
199 | } else { | 199 | } else { |
200 | // if domain is not specified in the set-cookie header, domain will default to | 200 | // if domain is not specified in the set-cookie header, domain will default to |
201 | // the request host | 201 | // the request host |
202 | $domain = $request_host; | 202 | $domain = $request_host; |
203 | } | 203 | } |
204 | // check path | 204 | // check path |
205 | if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) { | 205 | if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) { |
206 | $path = urldecode($tmp_cookie['path']); | 206 | $path = urldecode($tmp_cookie['path']); |
207 | if (!$this->_path_match($request_path, $path)) { | 207 | if (!$this->_path_match($request_path, $path)) { |
208 | $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"'); | 208 | $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"'); |
209 | continue; | 209 | continue; |
210 | } | 210 | } |
211 | } else { | 211 | } else { |
212 | $path = $request_path; | 212 | $path = $request_path; |
213 | $path = substr($path, 0, strrpos($path, '/')); | 213 | $path = substr($path, 0, strrpos($path, '/')); |
214 | if ($path == '') $path = '/'; | 214 | if ($path == '') $path = '/'; |
215 | } | 215 | } |
216 | // check if secure | 216 | // check if secure |
217 | $secure = (isset($tmp_cookie['secure'])) ? true : false; | 217 | $secure = (isset($tmp_cookie['secure'])) ? true : false; |
218 | // check expiry | 218 | // check expiry |
219 | if (isset($tmp_cookie['expires'])) { | 219 | if (isset($tmp_cookie['expires'])) { |
220 | if (($expires = strtotime($tmp_cookie['expires'])) < 0) { | 220 | if (($expires = strtotime($tmp_cookie['expires'])) < 0) { |
221 | $expires = null; | 221 | $expires = null; |
222 | } | 222 | } |
223 | } else { | 223 | } else { |
224 | $expires = null; | 224 | $expires = null; |
225 | } | 225 | } |
226 | // set cookie | 226 | // set cookie |
227 | $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires); | 227 | $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires); |
228 | } | 228 | } |
229 | } | 229 | } |
230 | 230 | ||
231 | // return array of set-cookie values extracted from HTTP response headers (string $h) | 231 | // return array of set-cookie values extracted from HTTP response headers (string $h) |
232 | public function extractCookies($h) { | 232 | public function extractCookies($h) { |
233 | $x = 0; | 233 | $x = 0; |
234 | $lines = 0; | 234 | $lines = 0; |
235 | $headers = array(); | 235 | $headers = array(); |
236 | $last_match = false; | 236 | $last_match = false; |
237 | $h = explode("\n", $h); | 237 | $h = explode("\n", $h); |
238 | foreach ($h as $line) { | 238 | foreach ($h as $line) { |
239 | $line = rtrim($line); | 239 | $line = rtrim($line); |
240 | $lines++; | 240 | $lines++; |
241 | 241 | ||
242 | $trimmed_line = trim($line); | 242 | $trimmed_line = trim($line); |
243 | if (isset($line_last)) { | 243 | if (isset($line_last)) { |
244 | // check if we have \r\n\r\n (indicating the end of headers) | 244 | // check if we have \r\n\r\n (indicating the end of headers) |
245 | // some servers will not use CRLF (\r\n), so we make CR (\r) optional. | 245 | // some servers will not use CRLF (\r\n), so we make CR (\r) optional. |
246 | // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) { | 246 | // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) { |
247 | // break; | 247 | // break; |
248 | // } | 248 | // } |
249 | // As an alternative, we can check if the current trimmed line is empty | 249 | // As an alternative, we can check if the current trimmed line is empty |
250 | if ($trimmed_line == '') { | 250 | if ($trimmed_line == '') { |
251 | break; | 251 | break; |
252 | } | 252 | } |
253 | 253 | ||
254 | // check for continuation line... | 254 | // check for continuation line... |
255 | // RFC 2616 Section 2.2 "Basic Rules": | 255 | // RFC 2616 Section 2.2 "Basic Rules": |
256 | // HTTP/1.1 header field values can be folded onto multiple lines if the | 256 | // HTTP/1.1 header field values can be folded onto multiple lines if the |
257 | // continuation line begins with a space or horizontal tab. All linear | 257 | // continuation line begins with a space or horizontal tab. All linear |
258 | // white space, including folding, has the same semantics as SP. A | 258 | // white space, including folding, has the same semantics as SP. A |
259 | // recipient MAY replace any linear white space with a single SP before | 259 | // recipient MAY replace any linear white space with a single SP before |
260 | // interpreting the field value or forwarding the message downstream. | 260 | // interpreting the field value or forwarding the message downstream. |
261 | if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) { | 261 | if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) { |
262 | // append to previous header value | 262 | // append to previous header value |
263 | $headers[$x-1] .= ' '.rtrim($match[1]); | 263 | $headers[$x-1] .= ' '.rtrim($match[1]); |
264 | continue; | 264 | continue; |
265 | } | 265 | } |
266 | } | 266 | } |
267 | $line_last = $line; | 267 | $line_last = $line; |
268 | 268 | ||
269 | // split header name and value | 269 | // split header name and value |
270 | if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) { | 270 | if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) { |
271 | $headers[$x++] = rtrim($match[1]); | 271 | $headers[$x++] = rtrim($match[1]); |
272 | $last_match = true; | 272 | $last_match = true; |
273 | } else { | 273 | } else { |
274 | $last_match = false; | 274 | $last_match = false; |
275 | } | 275 | } |
276 | } | 276 | } |
277 | return $headers; | 277 | return $headers; |
278 | } | 278 | } |
279 | 279 | ||
280 | /** | 280 | /** |
281 | * Set Cookie | 281 | * Set Cookie |
282 | * @param string $domain | 282 | * @param string $domain |
283 | * @param string $path | 283 | * @param string $path |
284 | * @param string $name cookie name | 284 | * @param string $name cookie name |
285 | * @param string $value cookie value | 285 | * @param string $value cookie value |
286 | * @param bool $secure | 286 | * @param bool $secure |
287 | * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie) | 287 | * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie) |
288 | * @return void | 288 | * @return void |
289 | */ | 289 | */ |
290 | function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null) | 290 | function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null) |
291 | { | 291 | { |
292 | if ($domain == '') return; | 292 | if ($domain == '') return; |
293 | if ($path == '') return; | 293 | if ($path == '') return; |
294 | if ($name == '') return; | 294 | if ($name == '') return; |
295 | // check if cookie needs to go | 295 | // check if cookie needs to go |
296 | if (isset($expires) && ($expires <= 0)) { | 296 | if (isset($expires) && ($expires <= 0)) { |
297 | if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]); | 297 | if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]); |
298 | return; | 298 | return; |
299 | } | 299 | } |
300 | if ($value == '') return; | 300 | if ($value == '') return; |
301 | $this->cookies[$domain][$path][$name] = array($value, $secure, $expires); | 301 | $this->cookies[$domain][$path][$name] = array($value, $secure, $expires); |
302 | return; | 302 | return; |
303 | } | 303 | } |
304 | 304 | ||
305 | /** | 305 | /** |
306 | * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies. | 306 | * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies. |
307 | * @param string $domain | 307 | * @param string $domain |
308 | * @param string $path | 308 | * @param string $path |
309 | * @param string $name | 309 | * @param string $name |
310 | * @return void | 310 | * @return void |
311 | */ | 311 | */ |
312 | function clear($domain=null, $path=null, $name=null) | 312 | function clear($domain=null, $path=null, $name=null) |
313 | { | 313 | { |
314 | if (!isset($domain)) { | 314 | if (!isset($domain)) { |
315 | $this->cookies = array(); | 315 | $this->cookies = array(); |
316 | } elseif (!isset($path)) { | 316 | } elseif (!isset($path)) { |
317 | if (isset($this->cookies[$domain])) unset($this->cookies[$domain]); | 317 | if (isset($this->cookies[$domain])) unset($this->cookies[$domain]); |
318 | } elseif (!isset($name)) { | 318 | } elseif (!isset($name)) { |
319 | if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]); | 319 | if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]); |
320 | } elseif (isset($name)) { | 320 | } elseif (isset($name)) { |
321 | if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]); | 321 | if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]); |
322 | } | 322 | } |
323 | } | 323 | } |
324 | 324 | ||
325 | /** | 325 | /** |
326 | * Compare string length - used for sorting | 326 | * Compare string length - used for sorting |
327 | * @access private | 327 | * @access private |
328 | * @return int | 328 | * @return int |
329 | */ | 329 | */ |
330 | function _cmp_length($a, $b) | 330 | function _cmp_length($a, $b) |
331 | { | 331 | { |
332 | $la = strlen($a); $lb = strlen($b); | 332 | $la = strlen($a); $lb = strlen($b); |
333 | if ($la == $lb) return 0; | 333 | if ($la == $lb) return 0; |
334 | return ($la > $lb) ? -1 : 1; | 334 | return ($la > $lb) ? -1 : 1; |
335 | } | 335 | } |
336 | 336 | ||
337 | /** | 337 | /** |
338 | * Reduce domain | 338 | * Reduce domain |
339 | * @param string $domain | 339 | * @param string $domain |
340 | * @return string | 340 | * @return string |
341 | * @access private | 341 | * @access private |
342 | */ | 342 | */ |
343 | function _reduce_domain($domain) | 343 | function _reduce_domain($domain) |
344 | { | 344 | { |
345 | if ($domain == '') return ''; | 345 | if ($domain == '') return ''; |
346 | if (substr($domain, 0, 1) == '.') return substr($domain, 1); | 346 | if (substr($domain, 0, 1) == '.') return substr($domain, 1); |
347 | return substr($domain, strpos($domain, '.')); | 347 | return substr($domain, strpos($domain, '.')); |
348 | } | 348 | } |
349 | 349 | ||
350 | /** | 350 | /** |
351 | * Path match - check if path1 path-matches path2 | 351 | * Path match - check if path1 path-matches path2 |
352 | * | 352 | * |
353 | * From RFC 2965: | 353 | * From RFC 2965: |
354 | * <i>For two strings that represent paths, P1 and P2, P1 path-matches P2 | 354 | * <i>For two strings that represent paths, P1 and P2, P1 path-matches P2 |
355 | * if P2 is a prefix of P1 (including the case where P1 and P2 string- | 355 | * if P2 is a prefix of P1 (including the case where P1 and P2 string- |
356 | * compare equal). Thus, the string /tec/waldo path-matches /tec.</i> | 356 | * compare equal). Thus, the string /tec/waldo path-matches /tec.</i> |
357 | * @param string $path1 | 357 | * @param string $path1 |
358 | * @param string $path2 | 358 | * @param string $path2 |
359 | * @return bool | 359 | * @return bool |
360 | * @access private | 360 | * @access private |
361 | */ | 361 | */ |
362 | function _path_match($path1, $path2) | 362 | function _path_match($path1, $path2) |
363 | { | 363 | { |
364 | return (substr($path1, 0, strlen($path2)) == $path2); | 364 | return (substr($path1, 0, strlen($path2)) == $path2); |
365 | } | 365 | } |
366 | 366 | ||
367 | /** | 367 | /** |
368 | * Domain match - check if domain1 domain-matches domain2 | 368 | * Domain match - check if domain1 domain-matches domain2 |
369 | * | 369 | * |
370 | * A few extracts from RFC 2965: | 370 | * A few extracts from RFC 2965: |
371 | * - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com | 371 | * - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com |
372 | * would be rejected, because H is y.x and contains a dot. | 372 | * would be rejected, because H is y.x and contains a dot. |
373 | * | 373 | * |
374 | * - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com | 374 | * - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com |
375 | * would be accepted. | 375 | * would be accepted. |
376 | * | 376 | * |
377 | * - A Set-Cookie2 with Domain=.com or Domain=.com., will always be | 377 | * - A Set-Cookie2 with Domain=.com or Domain=.com., will always be |
378 | * rejected, because there is no embedded dot. | 378 | * rejected, because there is no embedded dot. |
379 | * | 379 | * |
380 | * - A Set-Cookie2 from request-host example for Domain=.local will | 380 | * - A Set-Cookie2 from request-host example for Domain=.local will |
381 | * be accepted, because the effective host name for the request- | 381 | * be accepted, because the effective host name for the request- |
382 | * host is example.local, and example.local domain-matches .local. | 382 | * host is example.local, and example.local domain-matches .local. |
383 | * | 383 | * |
384 | * I'm ignoring the first point for now (must check to see how other browsers handle | 384 | * I'm ignoring the first point for now (must check to see how other browsers handle |
385 | * this rule for Set-Cookie headers) | 385 | * this rule for Set-Cookie headers) |
386 | * | 386 | * |
387 | * @param string $domain1 | 387 | * @param string $domain1 |
388 | * @param string $domain2 | 388 | * @param string $domain2 |
389 | * @return bool | 389 | * @return bool |
390 | * @access private | 390 | * @access private |
391 | */ | 391 | */ |
392 | function _domain_match($domain1, $domain2) | 392 | function _domain_match($domain1, $domain2) |
393 | { | 393 | { |
394 | $domain1 = strtolower($domain1); | 394 | $domain1 = strtolower($domain1); |
395 | $domain2 = strtolower($domain2); | 395 | $domain2 = strtolower($domain2); |
396 | while (strpos($domain1, '.') !== false) { | 396 | while (strpos($domain1, '.') !== false) { |
397 | if ($domain1 == $domain2) return true; | 397 | if ($domain1 == $domain2) return true; |
398 | $domain1 = $this->_reduce_domain($domain1); | 398 | $domain1 = $this->_reduce_domain($domain1); |
399 | continue; | 399 | continue; |
400 | } | 400 | } |
401 | return false; | 401 | return false; |
402 | } | 402 | } |
403 | } | 403 | } |
404 | ?> \ No newline at end of file | 404 | ?> \ No newline at end of file |
diff --git a/inc/3rdparty/humble-http-agent/HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php index 7e5834ab..e4f1b3b3 100644 --- a/inc/3rdparty/humble-http-agent/HumbleHttpAgent.php +++ b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php | |||
@@ -1,720 +1,779 @@ | |||
1 | <?php | 1 | <?php |
2 | /** | 2 | /** |
3 | * Humble HTTP Agent | 3 | * Humble HTTP Agent |
4 | * | 4 | * |
5 | * This class is designed to take advantage of parallel HTTP requests | 5 | * This class is designed to take advantage of parallel HTTP requests |
6 | * offered by PHP's PECL HTTP extension or the curl_multi_* functions. | 6 | * offered by PHP's PECL HTTP extension or the curl_multi_* functions. |
7 | * For environments which do not have these options, it reverts to standard sequential | 7 | * For environments which do not have these options, it reverts to standard sequential |
8 | * requests (using file_get_contents()) | 8 | * requests (using file_get_contents()) |
9 | * | 9 | * |
10 | * @version 1.0 | 10 | * @version 1.1 |
11 | * @date 2012-02-09 | 11 | * @date 2012-08-20 |
12 | * @see http://php.net/HttpRequestPool | 12 | * @see http://php.net/HttpRequestPool |
13 | * @author Keyvan Minoukadeh | 13 | * @author Keyvan Minoukadeh |
14 | * @copyright 2011-2012 Keyvan Minoukadeh | 14 | * @copyright 2011-2012 Keyvan Minoukadeh |
15 | * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 | 15 | * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 |
16 | */ | 16 | */ |
17 | 17 | ||
18 | class HumbleHttpAgent | 18 | class HumbleHttpAgent |
19 | { | 19 | { |
20 | const METHOD_REQUEST_POOL = 1; | 20 | const METHOD_REQUEST_POOL = 1; |
21 | const METHOD_CURL_MULTI = 2; | 21 | const METHOD_CURL_MULTI = 2; |
22 | const METHOD_FILE_GET_CONTENTS = 4; | 22 | const METHOD_FILE_GET_CONTENTS = 4; |
23 | //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'; | 23 | //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'; |
24 | const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2'; | 24 | const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2'; |
25 | const UA_PHP = 'PHP/5.2'; | 25 | const UA_PHP = 'PHP/5.2'; |
26 | const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1'; | 26 | const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1'; |
27 | 27 | ||
28 | protected $requests = array(); | 28 | protected $requests = array(); |
29 | protected $redirectQueue = array(); | 29 | protected $redirectQueue = array(); |
30 | protected $requestOptions; | 30 | protected $requestOptions; |
31 | protected $maxParallelRequests = 5; | 31 | protected $maxParallelRequests = 5; |
32 | protected $cache = null; //TODO | 32 | protected $cache = null; //TODO |
33 | protected $httpContext; | 33 | protected $httpContext; |
34 | protected $minimiseMemoryUse = false; //TODO | 34 | protected $minimiseMemoryUse = false; //TODO |
35 | protected $debug = false; | 35 | protected $method; |
36 | protected $method; | 36 | protected $cookieJar; |
37 | protected $cookieJar; | 37 | public $debug = false; |
38 | public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html | 38 | public $debugVerbose = false; |
39 | public $maxRedirects = 5; | 39 | public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html |
40 | public $userAgentMap = array(); | 40 | public $maxRedirects = 5; |
41 | public $rewriteUrls = array(); | 41 | public $userAgentMap = array(); |
42 | public $userAgentDefault; | 42 | public $rewriteUrls = array(); |
43 | public $referer; | 43 | public $userAgentDefault; |
44 | //public $userAgent = 'Mozilla/5.0'; | 44 | public $referer; |
45 | 45 | //public $userAgent = 'Mozilla/5.0'; | |
46 | // Prevent certain file/mime types | 46 | |
47 | // HTTP responses which match these content types will | 47 | // Prevent certain file/mime types |
48 | // be returned without body. | 48 | // HTTP responses which match these content types will |
49 | public $headerOnlyTypes = array(); | 49 | // be returned without body. |
50 | // URLs ending with one of these extensions will | 50 | public $headerOnlyTypes = array(); |
51 | // prompt Humble HTTP Agent to send a HEAD request first | 51 | // URLs ending with one of these extensions will |
52 | // to see if returned content type matches $headerOnlyTypes. | 52 | // prompt Humble HTTP Agent to send a HEAD request first |
53 | public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov'); | 53 | // to see if returned content type matches $headerOnlyTypes. |
54 | 54 | public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov'); | |
55 | //TODO: set max file size | 55 | // AJAX triggers to search for. |
56 | //TODO: normalise headers | 56 | // for AJAX sites, e.g. Blogger with its dynamic views templates. |
57 | 57 | public $ajaxTriggers = array("<meta name='fragment' content='!'",'<meta name="fragment" content="!"',"<meta content='!' name='fragment'",'<meta content="!" name="fragment"'); | |
58 | function __construct($requestOptions=null, $method=null) { | 58 | |
59 | $this->userAgentDefault = self::UA_BROWSER; | 59 | //TODO: set max file size |
60 | $this->referer = self::REF_GOOGLE; | 60 | //TODO: normalise headers |
61 | // set the request method | 61 | |
62 | if (in_array($method, array(1,2,4))) { | 62 | function __construct($requestOptions=null, $method=null) { |
63 | $this->method = $method; | 63 | $this->userAgentDefault = self::UA_BROWSER; |
64 | } else { | 64 | $this->referer = self::REF_GOOGLE; |
65 | if (class_exists('HttpRequestPool')) { | 65 | // set the request method |
66 | $this->method = self::METHOD_REQUEST_POOL; | 66 | if (in_array($method, array(1,2,4))) { |
67 | } elseif (function_exists('curl_multi_init')) { | 67 | $this->method = $method; |
68 | $this->method = self::METHOD_CURL_MULTI; | 68 | } else { |
69 | } else { | 69 | if (class_exists('HttpRequestPool')) { |
70 | $this->method = self::METHOD_FILE_GET_CONTENTS; | 70 | $this->method = self::METHOD_REQUEST_POOL; |
71 | } | 71 | } elseif (function_exists('curl_multi_init')) { |
72 | } | 72 | $this->method = self::METHOD_CURL_MULTI; |
73 | if ($this->method == self::METHOD_CURL_MULTI) { | 73 | } else { |
74 | require_once(dirname(__FILE__).'/RollingCurl.php'); | 74 | $this->method = self::METHOD_FILE_GET_CONTENTS; |
75 | } | 75 | } |
76 | // create cookie jar | 76 | } |
77 | $this->cookieJar = new CookieJar(); | 77 | if ($this->method == self::METHOD_CURL_MULTI) { |
78 | // set request options (redirect must be 0) | 78 | require_once(dirname(__FILE__).'/RollingCurl.php'); |
79 | $this->requestOptions = array( | 79 | } |
80 | 'timeout' => 15, | 80 | // create cookie jar |
81 | 'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web | 81 | $this->cookieJar = new CookieJar(); |
82 | // TODO: test onprogress? | 82 | // set request options (redirect must be 0) |
83 | ); | 83 | $this->requestOptions = array( |
84 | if (is_array($requestOptions)) { | 84 | 'timeout' => 15, |
85 | $this->requestOptions = array_merge($this->requestOptions, $requestOptions); | 85 | 'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web |
86 | } | 86 | // TODO: test onprogress? |
87 | $this->httpContext = array( | 87 | ); |
88 | 'http' => array( | 88 | if (is_array($requestOptions)) { |
89 | 'ignore_errors' => true, | 89 | $this->requestOptions = array_merge($this->requestOptions, $requestOptions); |
90 | 'timeout' => $this->requestOptions['timeout'], | 90 | } |
91 | 'max_redirects' => $this->requestOptions['redirect'], | 91 | $this->httpContext = array( |
92 | 'header' => "Accept: */*\r\n" | 92 | 'http' => array( |
93 | ) | 93 | 'ignore_errors' => true, |
94 | ); | 94 | 'timeout' => $this->requestOptions['timeout'], |
95 | } | 95 | 'max_redirects' => $this->requestOptions['redirect'], |
96 | 96 | 'header' => "Accept: */*\r\n" | |
97 | protected function debug($msg) { | 97 | ) |
98 | if ($this->debug) { | 98 | ); |
99 | $mem = round(memory_get_usage()/1024, 2); | 99 | } |
100 | $memPeak = round(memory_get_peak_usage()/1024, 2); | 100 | |
101 | echo '* ',$msg; | 101 | protected function debug($msg) { |
102 | echo ' - mem used: ',$mem," (peak: $memPeak)\n"; | 102 | if ($this->debug) { |
103 | ob_flush(); | 103 | $mem = round(memory_get_usage()/1024, 2); |
104 | flush(); | 104 | $memPeak = round(memory_get_peak_usage()/1024, 2); |
105 | } | 105 | echo '* ',$msg; |
106 | } | 106 | if ($this->debugVerbose) echo ' - mem used: ',$mem," (peak: $memPeak)"; |
107 | 107 | echo "\n"; | |
108 | protected function getUserAgent($url, $asArray=false) { | 108 | ob_flush(); |
109 | $host = @parse_url($url, PHP_URL_HOST); | 109 | flush(); |
110 | if (strtolower(substr($host, 0, 4)) == 'www.') { | 110 | } |
111 | $host = substr($host, 4); | 111 | } |
112 | } | 112 | |
113 | if ($host) { | 113 | protected function getUserAgent($url, $asArray=false) { |
114 | $try = array($host); | 114 | $host = @parse_url($url, PHP_URL_HOST); |
115 | $split = explode('.', $host); | 115 | if (strtolower(substr($host, 0, 4)) == 'www.') { |
116 | if (count($split) > 1) { | 116 | $host = substr($host, 4); |
117 | array_shift($split); | 117 | } |
118 | $try[] = '.'.implode('.', $split); | 118 | if ($host) { |
119 | } | 119 | $try = array($host); |
120 | foreach ($try as $h) { | 120 | $split = explode('.', $host); |
121 | if (isset($this->userAgentMap[$h])) { | 121 | if (count($split) > 1) { |
122 | $ua = $this->userAgentMap[$h]; | 122 | array_shift($split); |
123 | break; | 123 | $try[] = '.'.implode('.', $split); |
124 | } | 124 | } |
125 | } | 125 | foreach ($try as $h) { |
126 | } | 126 | if (isset($this->userAgentMap[$h])) { |
127 | if (!isset($ua)) $ua = $this->userAgentDefault; | 127 | $ua = $this->userAgentMap[$h]; |
128 | if ($asArray) { | 128 | break; |
129 | return array('User-Agent' => $ua); | 129 | } |
130 | } else { | 130 | } |
131 | return 'User-Agent: '.$ua; | 131 | } |
132 | } | 132 | if (!isset($ua)) $ua = $this->userAgentDefault; |
133 | } | 133 | if ($asArray) { |
134 | 134 | return array('User-Agent' => $ua); | |
135 | public function rewriteHashbangFragment($url) { | 135 | } else { |
136 | // return $url if there's no '#!' | 136 | return 'User-Agent: '.$ua; |
137 | if (strpos($url, '#!') === false) return $url; | 137 | } |
138 | // split $url and rewrite | 138 | } |
139 | // TODO: is SimplePie_IRI included? | 139 | |
140 | $iri = new SimplePie_IRI($url); | 140 | public function rewriteHashbangFragment($url) { |
141 | $fragment = substr($iri->fragment, 1); // strip '!' | 141 | // return $url if there's no '#!' |
142 | $iri->fragment = null; | 142 | if (strpos($url, '#!') === false) return $url; |
143 | if (isset($iri->query)) { | 143 | // split $url and rewrite |
144 | parse_str($iri->query, $query); | 144 | // TODO: is SimplePie_IRI included? |
145 | } else { | 145 | $iri = new SimplePie_IRI($url); |
146 | $query = array(); | 146 | $fragment = substr($iri->fragment, 1); // strip '!' |
147 | } | 147 | $iri->fragment = null; |
148 | $query['_escaped_fragment_'] = (string)$fragment; | 148 | if (isset($iri->query)) { |
149 | $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites | 149 | parse_str($iri->query, $query); |
150 | return $iri->get_iri(); | 150 | } else { |
151 | } | 151 | $query = array(); |
152 | 152 | } | |
153 | public function removeFragment($url) { | 153 | $query['_escaped_fragment_'] = (string)$fragment; |
154 | $pos = strpos($url, '#'); | 154 | $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites |
155 | if ($pos === false) { | 155 | return $iri->get_iri(); |
156 | return $url; | 156 | } |
157 | } else { | 157 | |
158 | return substr($url, 0, $pos); | 158 | public function getUglyURL($url, $html) { |
159 | } | 159 | if ($html == '') return false; |
160 | } | 160 | $found = false; |
161 | 161 | foreach ($this->ajaxTriggers as $string) { | |
162 | public function rewriteUrls($url) { | 162 | if (stripos($html, $string)) { |
163 | foreach ($this->rewriteUrls as $find => $action) { | 163 | $found = true; |
164 | if (strpos($url, $find) !== false) { | 164 | break; |
165 | if (is_array($action)) { | 165 | } |
166 | return strtr($url, $action); | 166 | } |
167 | } | 167 | if (!$found) return false; |
168 | } | 168 | $iri = new SimplePie_IRI($url); |
169 | } | 169 | if (isset($iri->query)) { |
170 | return $url; | 170 | parse_str($iri->query, $query); |
171 | } | 171 | } else { |
172 | 172 | $query = array(); | |
173 | public function enableDebug($bool=true) { | 173 | } |
174 | $this->debug = (bool)$bool; | 174 | $query['_escaped_fragment_'] = ''; |
175 | } | 175 | $iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites |
176 | 176 | return $iri->get_iri(); | |
177 | public function minimiseMemoryUse($bool = true) { | 177 | } |
178 | $this->minimiseMemoryUse = $bool; | 178 | |
179 | } | 179 | public function removeFragment($url) { |
180 | 180 | $pos = strpos($url, '#'); | |
181 | public function setMaxParallelRequests($max) { | 181 | if ($pos === false) { |
182 | $this->maxParallelRequests = $max; | 182 | return $url; |
183 | } | 183 | } else { |
184 | 184 | return substr($url, 0, $pos); | |
185 | public function validateUrl($url) { | 185 | } |
186 | $url = filter_var($url, FILTER_SANITIZE_URL); | 186 | } |
187 | $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); | 187 | |
188 | // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2) | 188 | public function rewriteUrls($url) { |
189 | if ($test === false) { | 189 | foreach ($this->rewriteUrls as $find => $action) { |
190 | $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); | 190 | if (strpos($url, $find) !== false) { |
191 | } | 191 | if (is_array($action)) { |
192 | if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) { | 192 | return strtr($url, $action); |
193 | return $url; | 193 | } |
194 | } else { | 194 | } |
195 | return false; | 195 | } |
196 | } | 196 | return $url; |
197 | } | 197 | } |
198 | 198 | ||
199 | public function fetchAll(array $urls) { | 199 | public function enableDebug($bool=true) { |
200 | $this->fetchAllOnce($urls, $isRedirect=false); | 200 | $this->debug = (bool)$bool; |
201 | $redirects = 0; | 201 | } |
202 | while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) { | 202 | |
203 | $this->debug("Following redirects #$redirects..."); | 203 | public function minimiseMemoryUse($bool = true) { |
204 | $this->fetchAllOnce($this->redirectQueue, $isRedirect=true); | 204 | $this->minimiseMemoryUse = $bool; |
205 | } | 205 | } |
206 | } | 206 | |
207 | 207 | public function setMaxParallelRequests($max) { | |
208 | // fetch all URLs without following redirects | 208 | $this->maxParallelRequests = $max; |
209 | public function fetchAllOnce(array $urls, $isRedirect=false) { | 209 | } |
210 | if (!$isRedirect) $urls = array_unique($urls); | 210 | |
211 | if (empty($urls)) return; | 211 | public function validateUrl($url) { |
212 | 212 | $url = filter_var($url, FILTER_SANITIZE_URL); | |
213 | ////////////////////////////////////////////////////// | 213 | $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); |
214 | // parallel (HttpRequestPool) | 214 | // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2) |
215 | if ($this->method == self::METHOD_REQUEST_POOL) { | 215 | if ($test === false) { |
216 | $this->debug('Starting parallel fetch (HttpRequestPool)'); | 216 | $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); |
217 | try { | 217 | } |
218 | while (count($urls) > 0) { | 218 | if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) { |
219 | $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls))); | 219 | return $url; |
220 | $subset = array_splice($urls, 0, $this->maxParallelRequests); | 220 | } else { |
221 | $pool = new HttpRequestPool(); | 221 | return false; |
222 | foreach ($subset as $orig => $url) { | 222 | } |
223 | if (!$isRedirect) $orig = $url; | 223 | } |
224 | unset($this->redirectQueue[$orig]); | 224 | |
225 | $this->debug("...$url"); | 225 | public function fetchAll(array $urls) { |
226 | if (!$isRedirect && isset($this->requests[$url])) { | 226 | $this->fetchAllOnce($urls, $isRedirect=false); |
227 | $this->debug("......in memory"); | 227 | $redirects = 0; |
228 | /* | 228 | while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) { |
229 | } elseif ($this->isCached($url)) { | 229 | $this->debug("Following redirects #$redirects..."); |
230 | $this->debug("......is cached"); | 230 | $this->fetchAllOnce($this->redirectQueue, $isRedirect=true); |
231 | if (!$this->minimiseMemoryUse) { | 231 | } |
232 | $this->requests[$url] = $this->getCached($url); | 232 | } |
233 | } | 233 | |
234 | */ | 234 | // fetch all URLs without following redirects |
235 | } else { | 235 | public function fetchAllOnce(array $urls, $isRedirect=false) { |
236 | $this->debug("......adding to pool"); | 236 | if (!$isRedirect) $urls = array_unique($urls); |
237 | $req_url = $this->rewriteUrls($url); | 237 | if (empty($urls)) return; |
238 | $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; | 238 | |
239 | $req_url = $this->removeFragment($req_url); | 239 | ////////////////////////////////////////////////////// |
240 | if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) { | 240 | // parallel (HttpRequestPool) |
241 | $_meth = HttpRequest::METH_HEAD; | 241 | if ($this->method == self::METHOD_REQUEST_POOL) { |
242 | } else { | 242 | $this->debug('Starting parallel fetch (HttpRequestPool)'); |
243 | $_meth = HttpRequest::METH_GET; | 243 | try { |
244 | unset($this->requests[$orig]['wrongGuess']); | 244 | while (count($urls) > 0) { |
245 | } | 245 | $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls))); |
246 | $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions); | 246 | $subset = array_splice($urls, 0, $this->maxParallelRequests); |
247 | // send cookies, if we have any | 247 | $pool = new HttpRequestPool(); |
248 | if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { | 248 | foreach ($subset as $orig => $url) { |
249 | $this->debug("......sending cookies: $cookies"); | 249 | if (!$isRedirect) $orig = $url; |
250 | $httpRequest->addHeaders(array('Cookie' => $cookies)); | 250 | unset($this->redirectQueue[$orig]); |
251 | } | 251 | $this->debug("...$url"); |
252 | //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent)); | 252 | if (!$isRedirect && isset($this->requests[$url])) { |
253 | $httpRequest->addHeaders($this->getUserAgent($req_url, true)); | 253 | $this->debug("......in memory"); |
254 | // add referer for picky sites | 254 | /* |
255 | $httpRequest->addheaders(array('Referer' => $this->referer)); | 255 | } elseif ($this->isCached($url)) { |
256 | $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); | 256 | $this->debug("......is cached"); |
257 | $this->requests[$orig]['original_url'] = $orig; | 257 | if (!$this->minimiseMemoryUse) { |
258 | $pool->attach($httpRequest); | 258 | $this->requests[$url] = $this->getCached($url); |
259 | } | 259 | } |
260 | } | 260 | */ |
261 | // did we get anything into the pool? | 261 | } else { |
262 | if (count($pool) > 0) { | 262 | $this->debug("......adding to pool"); |
263 | $this->debug('Sending request...'); | 263 | $req_url = $this->rewriteUrls($url); |
264 | try { | 264 | $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; |
265 | $pool->send(); | 265 | $req_url = $this->removeFragment($req_url); |
266 | } catch (HttpRequestPoolException $e) { | 266 | if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) { |
267 | // do nothing | 267 | $_meth = HttpRequest::METH_HEAD; |
268 | } | 268 | } else { |
269 | $this->debug('Received responses'); | 269 | $_meth = HttpRequest::METH_GET; |
270 | foreach($subset as $orig => $url) { | 270 | unset($this->requests[$orig]['wrongGuess']); |
271 | if (!$isRedirect) $orig = $url; | 271 | } |
272 | $request = $this->requests[$orig]['httpRequest']; | 272 | $httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions); |
273 | //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader()); | 273 | // send cookies, if we have any |
274 | // getResponseHeader() doesn't return status line, so, for consistency... | 274 | if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { |
275 | $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size')); | 275 | $this->debug("......sending cookies: $cookies"); |
276 | // check content type | 276 | $httpRequest->addHeaders(array('Cookie' => $cookies)); |
277 | // TODO: use getResponseHeader('content-type') or getResponseInfo() | 277 | } |
278 | if ($this->headerOnlyType($this->requests[$orig]['headers'])) { | 278 | //$httpRequest->addHeaders(array('User-Agent' => $this->userAgent)); |
279 | $this->requests[$orig]['body'] = ''; | 279 | $httpRequest->addHeaders($this->getUserAgent($req_url, true)); |
280 | $_header_only_type = true; | 280 | // add referer for picky sites |
281 | $this->debug('Header only type returned'); | 281 | $httpRequest->addheaders(array('Referer' => $this->referer)); |
282 | } else { | 282 | $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); |
283 | $this->requests[$orig]['body'] = $request->getResponseBody(); | 283 | $this->requests[$orig]['original_url'] = $orig; |
284 | $_header_only_type = false; | 284 | $pool->attach($httpRequest); |
285 | } | 285 | } |
286 | $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url'); | 286 | } |
287 | $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode(); | 287 | // did we get anything into the pool? |
288 | // is redirect? | 288 | if (count($pool) > 0) { |
289 | if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) { | 289 | $this->debug('Sending request...'); |
290 | $redirectURL = $request->getResponseHeader('location'); | 290 | try { |
291 | if (!preg_match('!^https?://!i', $redirectURL)) { | 291 | $pool->send(); |
292 | $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); | 292 | } catch (HttpRequestPoolException $e) { |
293 | } | 293 | // do nothing |
294 | if ($this->validateURL($redirectURL)) { | 294 | } |
295 | $this->debug('Redirect detected. Valid URL: '.$redirectURL); | 295 | $this->debug('Received responses'); |
296 | // store any cookies | 296 | foreach($subset as $orig => $url) { |
297 | $cookies = $request->getResponseHeader('set-cookie'); | 297 | if (!$isRedirect) $orig = $url; |
298 | if ($cookies && !is_array($cookies)) $cookies = array($cookies); | 298 | $request = $this->requests[$orig]['httpRequest']; |
299 | if ($cookies) $this->cookieJar->storeCookies($url, $cookies); | 299 | //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader()); |
300 | $this->redirectQueue[$orig] = $redirectURL; | 300 | // getResponseHeader() doesn't return status line, so, for consistency... |
301 | } else { | 301 | $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size')); |
302 | $this->debug('Redirect detected. Invalid URL: '.$redirectURL); | 302 | // check content type |
303 | } | 303 | // TODO: use getResponseHeader('content-type') or getResponseInfo() |
304 | } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) { | 304 | if ($this->headerOnlyType($this->requests[$orig]['headers'])) { |
305 | // the response content-type did not match our 'header only' types, | 305 | $this->requests[$orig]['body'] = ''; |
306 | // but we'd issues a HEAD request because we assumed it would. So | 306 | $_header_only_type = true; |
307 | // let's queue a proper GET request for this item... | 307 | $this->debug('Header only type returned'); |
308 | $this->debug('Wrong guess at content-type, queing GET request'); | 308 | } else { |
309 | $this->requests[$orig]['wrongGuess'] = true; | 309 | $this->requests[$orig]['body'] = $request->getResponseBody(); |
310 | $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url']; | 310 | $_header_only_type = false; |
311 | } | 311 | } |
312 | //die($url.' -multi- '.$request->getResponseInfo('effective_url')); | 312 | $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url'); |
313 | $pool->detach($request); | 313 | $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode(); |
314 | unset($this->requests[$orig]['httpRequest'], $request); | 314 | // is redirect? |
315 | /* | 315 | if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) { |
316 | if ($this->minimiseMemoryUse) { | 316 | $redirectURL = $request->getResponseHeader('location'); |
317 | if ($this->cache($url)) { | 317 | if (!preg_match('!^https?://!i', $redirectURL)) { |
318 | unset($this->requests[$url]); | 318 | $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); |
319 | } | 319 | } |
320 | } | 320 | if ($this->validateURL($redirectURL)) { |
321 | */ | 321 | $this->debug('Redirect detected. Valid URL: '.$redirectURL); |
322 | } | 322 | // store any cookies |
323 | } | 323 | $cookies = $request->getResponseHeader('set-cookie'); |
324 | } | 324 | if ($cookies && !is_array($cookies)) $cookies = array($cookies); |
325 | } catch (HttpException $e) { | 325 | if ($cookies) $this->cookieJar->storeCookies($url, $cookies); |
326 | $this->debug($e); | 326 | $this->redirectQueue[$orig] = $redirectURL; |
327 | return false; | 327 | } else { |
328 | } | 328 | $this->debug('Redirect detected. Invalid URL: '.$redirectURL); |
329 | } | 329 | } |
330 | 330 | } elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) { | |
331 | ////////////////////////////////////////////////////////// | 331 | // the response content-type did not match our 'header only' types, |
332 | // parallel (curl_multi_*) | 332 | // but we'd issues a HEAD request because we assumed it would. So |
333 | elseif ($this->method == self::METHOD_CURL_MULTI) { | 333 | // let's queue a proper GET request for this item... |
334 | $this->debug('Starting parallel fetch (curl_multi_*)'); | 334 | $this->debug('Wrong guess at content-type, queing GET request'); |
335 | while (count($urls) > 0) { | 335 | $this->requests[$orig]['wrongGuess'] = true; |
336 | $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls))); | 336 | $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url']; |
337 | $subset = array_splice($urls, 0, $this->maxParallelRequests); | 337 | } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) { |
338 | $pool = new RollingCurl(array($this, 'handleCurlResponse')); | 338 | // check for <meta name='fragment' content='!'/> |
339 | $pool->window_size = count($subset); | 339 | // for AJAX sites, e.g. Blogger with its dynamic views templates. |
340 | 340 | // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification | |
341 | foreach ($subset as $orig => $url) { | 341 | if (isset($this->requests[$orig]['body'])) { |
342 | if (!$isRedirect) $orig = $url; | 342 | $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000)); |
343 | unset($this->redirectQueue[$orig]); | 343 | if ($redirectURL) { |
344 | $this->debug("...$url"); | 344 | $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL); |
345 | if (!$isRedirect && isset($this->requests[$url])) { | 345 | $this->redirectQueue[$orig] = $redirectURL; |
346 | $this->debug("......in memory"); | 346 | } |
347 | /* | 347 | } |
348 | } elseif ($this->isCached($url)) { | 348 | } |
349 | $this->debug("......is cached"); | 349 | //die($url.' -multi- '.$request->getResponseInfo('effective_url')); |
350 | if (!$this->minimiseMemoryUse) { | 350 | $pool->detach($request); |
351 | $this->requests[$url] = $this->getCached($url); | 351 | unset($this->requests[$orig]['httpRequest'], $request); |
352 | } | 352 | /* |
353 | */ | 353 | if ($this->minimiseMemoryUse) { |
354 | } else { | 354 | if ($this->cache($url)) { |
355 | $this->debug("......adding to pool"); | 355 | unset($this->requests[$url]); |
356 | $req_url = $this->rewriteUrls($url); | 356 | } |
357 | $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; | 357 | } |
358 | $req_url = $this->removeFragment($req_url); | 358 | */ |
359 | if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) { | 359 | } |
360 | $_meth = 'HEAD'; | 360 | } |
361 | } else { | 361 | } |
362 | $_meth = 'GET'; | 362 | } catch (HttpException $e) { |
363 | unset($this->requests[$orig]['wrongGuess']); | 363 | $this->debug($e); |
364 | } | 364 | return false; |
365 | $headers = array(); | 365 | } |
366 | //$headers[] = 'User-Agent: '.$this->userAgent; | 366 | } |
367 | $headers[] = $this->getUserAgent($req_url); | 367 | |
368 | // add referer for picky sites | 368 | ////////////////////////////////////////////////////////// |
369 | $headers[] = 'Referer: '.$this->referer; | 369 | // parallel (curl_multi_*) |
370 | // send cookies, if we have any | 370 | elseif ($this->method == self::METHOD_CURL_MULTI) { |
371 | if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { | 371 | $this->debug('Starting parallel fetch (curl_multi_*)'); |
372 | $this->debug("......sending cookies: $cookies"); | 372 | while (count($urls) > 0) { |
373 | $headers[] = 'Cookie: '.$cookies; | 373 | $this->debug('Processing set of '.min($this->maxParallelRequests, count($urls))); |
374 | } | 374 | $subset = array_splice($urls, 0, $this->maxParallelRequests); |
375 | $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array( | 375 | $pool = new RollingCurl(array($this, 'handleCurlResponse')); |
376 | CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], | 376 | $pool->window_size = count($subset); |
377 | CURLOPT_TIMEOUT => $this->requestOptions['timeout'] | 377 | |
378 | )); | 378 | foreach ($subset as $orig => $url) { |
379 | $httpRequest->set_original_url($orig); | 379 | if (!$isRedirect) $orig = $url; |
380 | $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); | 380 | unset($this->redirectQueue[$orig]); |
381 | $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore? | 381 | $this->debug("...$url"); |
382 | $pool->add($httpRequest); | 382 | if (!$isRedirect && isset($this->requests[$url])) { |
383 | } | 383 | $this->debug("......in memory"); |
384 | } | 384 | /* |
385 | // did we get anything into the pool? | 385 | } elseif ($this->isCached($url)) { |
386 | if (count($pool) > 0) { | 386 | $this->debug("......is cached"); |
387 | $this->debug('Sending request...'); | 387 | if (!$this->minimiseMemoryUse) { |
388 | $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig] | 388 | $this->requests[$url] = $this->getCached($url); |
389 | $this->debug('Received responses'); | 389 | } |
390 | foreach($subset as $orig => $url) { | 390 | */ |
391 | if (!$isRedirect) $orig = $url; | 391 | } else { |
392 | // $this->requests[$orig]['headers'] | 392 | $this->debug("......adding to pool"); |
393 | // $this->requests[$orig]['body'] | 393 | $req_url = $this->rewriteUrls($url); |
394 | // $this->requests[$orig]['effective_url'] | 394 | $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; |
395 | // check content type | 395 | $req_url = $this->removeFragment($req_url); |
396 | if ($this->headerOnlyType($this->requests[$orig]['headers'])) { | 396 | if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) { |
397 | $this->requests[$orig]['body'] = ''; | 397 | $_meth = 'HEAD'; |
398 | $_header_only_type = true; | 398 | } else { |
399 | $this->debug('Header only type returned'); | 399 | $_meth = 'GET'; |
400 | } else { | 400 | unset($this->requests[$orig]['wrongGuess']); |
401 | $_header_only_type = false; | 401 | } |
402 | } | 402 | $headers = array(); |
403 | $status_code = $this->requests[$orig]['status_code']; | 403 | //$headers[] = 'User-Agent: '.$this->userAgent; |
404 | if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { | 404 | $headers[] = $this->getUserAgent($req_url); |
405 | $redirectURL = $this->requests[$orig]['location']; | 405 | // add referer for picky sites |
406 | if (!preg_match('!^https?://!i', $redirectURL)) { | 406 | $headers[] = 'Referer: '.$this->referer; |
407 | $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); | 407 | // send cookies, if we have any |
408 | } | 408 | if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { |
409 | if ($this->validateURL($redirectURL)) { | 409 | $this->debug("......sending cookies: $cookies"); |
410 | $this->debug('Redirect detected. Valid URL: '.$redirectURL); | 410 | $headers[] = 'Cookie: '.$cookies; |
411 | // store any cookies | 411 | } |
412 | $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); | 412 | $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array( |
413 | if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); | 413 | CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], |
414 | $this->redirectQueue[$orig] = $redirectURL; | 414 | CURLOPT_TIMEOUT => $this->requestOptions['timeout'] |
415 | } else { | 415 | )); |
416 | $this->debug('Redirect detected. Invalid URL: '.$redirectURL); | 416 | $httpRequest->set_original_url($orig); |
417 | } | 417 | $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); |
418 | } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') { | 418 | $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore? |
419 | // the response content-type did not match our 'header only' types, | 419 | $pool->add($httpRequest); |
420 | // but we'd issues a HEAD request because we assumed it would. So | 420 | } |
421 | // let's queue a proper GET request for this item... | 421 | } |
422 | $this->debug('Wrong guess at content-type, queing GET request'); | 422 | // did we get anything into the pool? |
423 | $this->requests[$orig]['wrongGuess'] = true; | 423 | if (count($pool) > 0) { |
424 | $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url']; | 424 | $this->debug('Sending request...'); |
425 | } | 425 | $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig] |
426 | // die($url.' -multi- '.$request->getResponseInfo('effective_url')); | 426 | $this->debug('Received responses'); |
427 | unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']); | 427 | foreach($subset as $orig => $url) { |
428 | } | 428 | if (!$isRedirect) $orig = $url; |
429 | } | 429 | // $this->requests[$orig]['headers'] |
430 | } | 430 | // $this->requests[$orig]['body'] |
431 | } | 431 | // $this->requests[$orig]['effective_url'] |
432 | 432 | // check content type | |
433 | ////////////////////////////////////////////////////// | 433 | if ($this->headerOnlyType($this->requests[$orig]['headers'])) { |
434 | // sequential (file_get_contents) | 434 | $this->requests[$orig]['body'] = ''; |
435 | else { | 435 | $_header_only_type = true; |
436 | $this->debug('Starting sequential fetch (file_get_contents)'); | 436 | $this->debug('Header only type returned'); |
437 | $this->debug('Processing set of '.count($urls)); | 437 | } else { |
438 | foreach ($urls as $orig => $url) { | 438 | $_header_only_type = false; |
439 | if (!$isRedirect) $orig = $url; | 439 | } |
440 | unset($this->redirectQueue[$orig]); | 440 | $status_code = $this->requests[$orig]['status_code']; |
441 | $this->debug("...$url"); | 441 | if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { |
442 | if (!$isRedirect && isset($this->requests[$url])) { | 442 | $redirectURL = $this->requests[$orig]['location']; |
443 | $this->debug("......in memory"); | 443 | if (!preg_match('!^https?://!i', $redirectURL)) { |
444 | /* | 444 | $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); |
445 | } elseif ($this->isCached($url)) { | 445 | } |
446 | $this->debug("......is cached"); | 446 | if ($this->validateURL($redirectURL)) { |
447 | if (!$this->minimiseMemoryUse) { | 447 | $this->debug('Redirect detected. Valid URL: '.$redirectURL); |
448 | $this->requests[$url] = $this->getCached($url); | 448 | // store any cookies |
449 | } | 449 | $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); |
450 | */ | 450 | if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); |
451 | } else { | 451 | $this->redirectQueue[$orig] = $redirectURL; |
452 | $this->debug("Sending request for $url"); | 452 | } else { |
453 | $this->requests[$orig]['original_url'] = $orig; | 453 | $this->debug('Redirect detected. Invalid URL: '.$redirectURL); |
454 | $req_url = $this->rewriteUrls($url); | 454 | } |
455 | $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; | 455 | } elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') { |
456 | $req_url = $this->removeFragment($req_url); | 456 | // the response content-type did not match our 'header only' types, |
457 | // send cookies, if we have any | 457 | // but we'd issues a HEAD request because we assumed it would. So |
458 | $httpContext = $this->httpContext; | 458 | // let's queue a proper GET request for this item... |
459 | $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n"; | 459 | $this->debug('Wrong guess at content-type, queing GET request'); |
460 | // add referer for picky sites | 460 | $this->requests[$orig]['wrongGuess'] = true; |
461 | $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n"; | 461 | $this->redirectQueue[$orig] = $this->requests[$orig]['effective_url']; |
462 | if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { | 462 | } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) { |
463 | $this->debug("......sending cookies: $cookies"); | 463 | // check for <meta name='fragment' content='!'/> |
464 | $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n"; | 464 | // for AJAX sites, e.g. Blogger with its dynamic views templates. |
465 | } | 465 | // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification |
466 | if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) { | 466 | if (isset($this->requests[$orig]['body'])) { |
467 | $this->debug('Received response'); | 467 | $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000)); |
468 | // get status code | 468 | if ($redirectURL) { |
469 | if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) { | 469 | $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL); |
470 | $this->debug('Error: no status code found'); | 470 | $this->redirectQueue[$orig] = $redirectURL; |
471 | // TODO: handle error - no status code | 471 | } |
472 | } else { | 472 | } |
473 | $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false); | 473 | } |
474 | // check content type | 474 | // die($url.' -multi- '.$request->getResponseInfo('effective_url')); |
475 | if ($this->headerOnlyType($this->requests[$orig]['headers'])) { | 475 | unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']); |
476 | $this->requests[$orig]['body'] = ''; | 476 | } |
477 | } else { | 477 | } |
478 | $this->requests[$orig]['body'] = $html; | 478 | } |
479 | } | 479 | } |
480 | $this->requests[$orig]['effective_url'] = $req_url; | 480 | |
481 | $this->requests[$orig]['status_code'] = $status_code = (int)$match[1]; | 481 | ////////////////////////////////////////////////////// |
482 | unset($match); | 482 | // sequential (file_get_contents) |
483 | // handle redirect | 483 | else { |
484 | if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) { | 484 | $this->debug('Starting sequential fetch (file_get_contents)'); |
485 | $this->requests[$orig]['location'] = trim($match[1]); | 485 | $this->debug('Processing set of '.count($urls)); |
486 | } | 486 | foreach ($urls as $orig => $url) { |
487 | if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { | 487 | if (!$isRedirect) $orig = $url; |
488 | $redirectURL = $this->requests[$orig]['location']; | 488 | unset($this->redirectQueue[$orig]); |
489 | if (!preg_match('!^https?://!i', $redirectURL)) { | 489 | $this->debug("...$url"); |
490 | $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); | 490 | if (!$isRedirect && isset($this->requests[$url])) { |
491 | } | 491 | $this->debug("......in memory"); |
492 | if ($this->validateURL($redirectURL)) { | 492 | /* |
493 | $this->debug('Redirect detected. Valid URL: '.$redirectURL); | 493 | } elseif ($this->isCached($url)) { |
494 | // store any cookies | 494 | $this->debug("......is cached"); |
495 | $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); | 495 | if (!$this->minimiseMemoryUse) { |
496 | if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); | 496 | $this->requests[$url] = $this->getCached($url); |
497 | $this->redirectQueue[$orig] = $redirectURL; | 497 | } |
498 | } else { | 498 | */ |
499 | $this->debug('Redirect detected. Invalid URL: '.$redirectURL); | 499 | } else { |
500 | } | 500 | $this->debug("Sending request for $url"); |
501 | } | 501 | $this->requests[$orig]['original_url'] = $orig; |
502 | } | 502 | $req_url = $this->rewriteUrls($url); |
503 | } else { | 503 | $req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url; |
504 | $this->debug('Error retrieving URL'); | 504 | $req_url = $this->removeFragment($req_url); |
505 | //print_r($req_url); | 505 | // send cookies, if we have any |
506 | //print_r($http_response_header); | 506 | $httpContext = $this->httpContext; |
507 | //print_r($html); | 507 | $httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n"; |
508 | 508 | // add referer for picky sites | |
509 | // TODO: handle error - failed to retrieve URL | 509 | $httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n"; |
510 | } | 510 | if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { |
511 | } | 511 | $this->debug("......sending cookies: $cookies"); |
512 | } | 512 | $httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n"; |
513 | } | 513 | } |
514 | } | 514 | if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) { |
515 | 515 | $this->debug('Received response'); | |
516 | public function handleCurlResponse($response, $info, $request) { | 516 | // get status code |
517 | $orig = $request->url_original; | 517 | if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) { |
518 | $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']); | 518 | $this->debug('Error: no status code found'); |
519 | $this->requests[$orig]['body'] = substr($response, $info['header_size']); | 519 | // TODO: handle error - no status code |
520 | $this->requests[$orig]['method'] = $request->method; | 520 | } else { |
521 | $this->requests[$orig]['effective_url'] = $info['url']; | 521 | $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false); |
522 | $this->requests[$orig]['status_code'] = (int)$info['http_code']; | 522 | // check content type |
523 | if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) { | 523 | if ($this->headerOnlyType($this->requests[$orig]['headers'])) { |
524 | $this->requests[$orig]['location'] = trim($match[1]); | 524 | $this->requests[$orig]['body'] = ''; |
525 | } | 525 | } else { |
526 | } | 526 | $this->requests[$orig]['body'] = $html; |
527 | 527 | } | |
528 | protected function headersToString(array $headers, $associative=true) { | 528 | $this->requests[$orig]['effective_url'] = $req_url; |
529 | if (!$associative) { | 529 | $this->requests[$orig]['status_code'] = $status_code = (int)$match[1]; |
530 | return implode("\n", $headers); | 530 | unset($match); |
531 | } else { | 531 | // handle redirect |
532 | $str = ''; | 532 | if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) { |
533 | foreach ($headers as $key => $val) { | 533 | $this->requests[$orig]['location'] = trim($match[1]); |
534 | if (is_array($val)) { | 534 | } |
535 | foreach ($val as $v) $str .= "$key: $v\n"; | 535 | if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { |
536 | } else { | 536 | $redirectURL = $this->requests[$orig]['location']; |
537 | $str .= "$key: $val\n"; | 537 | if (!preg_match('!^https?://!i', $redirectURL)) { |
538 | } | 538 | $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); |
539 | } | 539 | } |
540 | return rtrim($str); | 540 | if ($this->validateURL($redirectURL)) { |
541 | } | 541 | $this->debug('Redirect detected. Valid URL: '.$redirectURL); |
542 | } | 542 | // store any cookies |
543 | 543 | $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); | |
544 | public function get($url, $remove=false, $gzdecode=true) { | 544 | if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies); |
545 | $url = "$url"; | 545 | $this->redirectQueue[$orig] = $redirectURL; |
546 | if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) { | 546 | } else { |
547 | $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})"); | 547 | $this->debug('Redirect detected. Invalid URL: '.$redirectURL); |
548 | $response = $this->requests[$url]; | 548 | } |
549 | /* | 549 | } elseif (strpos($this->requests[$orig]['effective_url'], '_escaped_fragment_') === false) { |
550 | } elseif ($this->isCached($url)) { | 550 | // check for <meta name='fragment' content='!'/> |
551 | $this->debug("URL already fetched - in disk cache ($url)"); | 551 | // for AJAX sites, e.g. Blogger with its dynamic views templates. |
552 | $response = $this->getCached($url); | 552 | // Based on Google's spec: https://developers.google.com/webmasters/ajax-crawling/docs/specification |
553 | $this->requests[$url] = $response; | 553 | if (isset($this->requests[$orig]['body'])) { |
554 | */ | 554 | $redirectURL = $this->getUglyURL($this->requests[$orig]['effective_url'], substr($this->requests[$orig]['body'], 0, 4000)); |
555 | } else { | 555 | if ($redirectURL) { |
556 | $this->debug("Fetching URL ($url)"); | 556 | $this->debug('AJAX trigger (meta name="fragment" content="!") found. Queueing '.$redirectURL); |
557 | $this->fetchAll(array($url)); | 557 | $this->redirectQueue[$orig] = $redirectURL; |
558 | if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) { | 558 | } |
559 | $response = $this->requests[$url]; | 559 | } |
560 | } else { | 560 | } |
561 | $this->debug("Request failed"); | 561 | } |
562 | $response = false; | 562 | } else { |
563 | } | 563 | $this->debug('Error retrieving URL'); |
564 | } | 564 | //print_r($req_url); |
565 | /* | 565 | //print_r($http_response_header); |
566 | if ($this->minimiseMemoryUse && $response) { | 566 | //print_r($html); |
567 | $this->cache($url); | 567 | |
568 | unset($this->requests[$url]); | 568 | // TODO: handle error - failed to retrieve URL |
569 | } | 569 | } |
570 | */ | 570 | } |
571 | if ($remove && $response) unset($this->requests[$url]); | 571 | } |
572 | if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) { | 572 | } |
573 | if ($html = gzdecode($response['body'])) { | 573 | } |
574 | $response['body'] = $html; | 574 | |
575 | } | 575 | public function handleCurlResponse($response, $info, $request) { |
576 | } | 576 | $orig = $request->url_original; |
577 | return $response; | 577 | $this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']); |
578 | } | 578 | $this->requests[$orig]['body'] = substr($response, $info['header_size']); |
579 | 579 | $this->requests[$orig]['method'] = $request->method; | |
580 | public function parallelSupport() { | 580 | $this->requests[$orig]['effective_url'] = $info['url']; |
581 | return class_exists('HttpRequestPool') || function_exists('curl_multi_init'); | 581 | $this->requests[$orig]['status_code'] = (int)$info['http_code']; |
582 | } | 582 | if (preg_match('/^Location:(.*?)$/mi', $this->requests[$orig]['headers'], $match)) { |
583 | 583 | $this->requests[$orig]['location'] = trim($match[1]); | |
584 | private function headerOnlyType($headers) { | 584 | } |
585 | if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) { | 585 | } |
586 | // look for full mime type (e.g. image/jpeg) or just type (e.g. image) | 586 | |
587 | $match[1] = strtolower(trim($match[1])); | 587 | protected function headersToString(array $headers, $associative=true) { |
588 | $match[2] = strtolower(trim($match[2])); | 588 | if (!$associative) { |
589 | foreach (array($match[1], $match[2]) as $mime) { | 589 | return implode("\n", $headers); |
590 | if (in_array($mime, $this->headerOnlyTypes)) return true; | 590 | } else { |
591 | } | 591 | $str = ''; |
592 | } | 592 | foreach ($headers as $key => $val) { |
593 | return false; | 593 | if (is_array($val)) { |
594 | } | 594 | foreach ($val as $v) $str .= "$key: $v\n"; |
595 | 595 | } else { | |
596 | private function possibleUnsupportedType($url) { | 596 | $str .= "$key: $val\n"; |
597 | $path = @parse_url($url, PHP_URL_PATH); | 597 | } |
598 | if ($path && strpos($path, '.') !== false) { | 598 | } |
599 | $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION))); | 599 | return rtrim($str); |
600 | return in_array($ext, $this->headerOnlyClues); | 600 | } |
601 | } | 601 | } |
602 | return false; | 602 | |
603 | } | 603 | public function get($url, $remove=false, $gzdecode=true) { |
604 | } | 604 | $url = "$url"; |
605 | 605 | if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) { | |
606 | // gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930 | 606 | $this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})"); |
607 | if (!function_exists('gzdecode')) { | 607 | $response = $this->requests[$url]; |
608 | function gzdecode($data,&$filename='',&$error='',$maxlength=null) | 608 | /* |
609 | { | 609 | } elseif ($this->isCached($url)) { |
610 | $len = strlen($data); | 610 | $this->debug("URL already fetched - in disk cache ($url)"); |
611 | if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) { | 611 | $response = $this->getCached($url); |
612 | $error = "Not in GZIP format."; | 612 | $this->requests[$url] = $response; |
613 | return null; // Not GZIP format (See RFC 1952) | 613 | */ |
614 | } | 614 | } else { |
615 | $method = ord(substr($data,2,1)); // Compression method | 615 | $this->debug("Fetching URL ($url)"); |
616 | $flags = ord(substr($data,3,1)); // Flags | 616 | $this->fetchAll(array($url)); |
617 | if ($flags & 31 != $flags) { | 617 | if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) { |
618 | $error = "Reserved bits not allowed."; | 618 | $response = $this->requests[$url]; |
619 | return null; | 619 | } else { |
620 | } | 620 | $this->debug("Request failed"); |
621 | // NOTE: $mtime may be negative (PHP integer limitations) | 621 | $response = false; |
622 | $mtime = unpack("V", substr($data,4,4)); | 622 | } |
623 | $mtime = $mtime[1]; | 623 | } |
624 | $xfl = substr($data,8,1); | 624 | /* |
625 | $os = substr($data,8,1); | 625 | if ($this->minimiseMemoryUse && $response) { |
626 | $headerlen = 10; | 626 | $this->cache($url); |
627 | $extralen = 0; | 627 | unset($this->requests[$url]); |
628 | $extra = ""; | 628 | } |
629 | if ($flags & 4) { | 629 | */ |
630 | // 2-byte length prefixed EXTRA data in header | 630 | if ($remove && $response) unset($this->requests[$url]); |
631 | if ($len - $headerlen - 2 < 8) { | 631 | if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) { |
632 | return false; // invalid | 632 | if ($html = gzdecode($response['body'])) { |
633 | } | 633 | $response['body'] = $html; |
634 | $extralen = unpack("v",substr($data,8,2)); | 634 | } |
635 | $extralen = $extralen[1]; | 635 | } |
636 | if ($len - $headerlen - 2 - $extralen < 8) { | 636 | return $response; |
637 | return false; // invalid | 637 | } |
638 | } | 638 | |
639 | $extra = substr($data,10,$extralen); | 639 | public function parallelSupport() { |
640 | $headerlen += 2 + $extralen; | 640 | return class_exists('HttpRequestPool') || function_exists('curl_multi_init'); |
641 | } | 641 | } |
642 | $filenamelen = 0; | 642 | |
643 | $filename = ""; | 643 | private function headerOnlyType($headers) { |
644 | if ($flags & 8) { | 644 | if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) { |
645 | // C-style string | 645 | // look for full mime type (e.g. image/jpeg) or just type (e.g. image) |
646 | if ($len - $headerlen - 1 < 8) { | 646 | $match[1] = strtolower(trim($match[1])); |
647 | return false; // invalid | 647 | $match[2] = strtolower(trim($match[2])); |
648 | } | 648 | foreach (array($match[1], $match[2]) as $mime) { |
649 | $filenamelen = strpos(substr($data,$headerlen),chr(0)); | 649 | if (in_array($mime, $this->headerOnlyTypes)) return true; |
650 | if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) { | 650 | } |
651 | return false; // invalid | 651 | } |
652 | } | 652 | return false; |
653 | $filename = substr($data,$headerlen,$filenamelen); | 653 | } |
654 | $headerlen += $filenamelen + 1; | 654 | |
655 | } | 655 | private function possibleUnsupportedType($url) { |
656 | $commentlen = 0; | 656 | $path = @parse_url($url, PHP_URL_PATH); |
657 | $comment = ""; | 657 | if ($path && strpos($path, '.') !== false) { |
658 | if ($flags & 16) { | 658 | $ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION))); |
659 | // C-style string COMMENT data in header | 659 | return in_array($ext, $this->headerOnlyClues); |
660 | if ($len - $headerlen - 1 < 8) { | 660 | } |
661 | return false; // invalid | 661 | return false; |
662 | } | 662 | } |
663 | $commentlen = strpos(substr($data,$headerlen),chr(0)); | 663 | } |
664 | if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) { | 664 | |
665 | return false; // Invalid header format | 665 | // gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930 |
666 | } | 666 | if (!function_exists('gzdecode')) { |
667 | $comment = substr($data,$headerlen,$commentlen); | 667 | function gzdecode($data,&$filename='',&$error='',$maxlength=null) |
668 | $headerlen += $commentlen + 1; | 668 | { |
669 | } | 669 | $len = strlen($data); |
670 | $headercrc = ""; | 670 | if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) { |
671 | if ($flags & 2) { | 671 | $error = "Not in GZIP format."; |
672 | // 2-bytes (lowest order) of CRC32 on header present | 672 | return null; // Not GZIP format (See RFC 1952) |
673 | if ($len - $headerlen - 2 < 8) { | 673 | } |
674 | return false; // invalid | 674 | $method = ord(substr($data,2,1)); // Compression method |
675 | } | 675 | $flags = ord(substr($data,3,1)); // Flags |
676 | $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff; | 676 | if ($flags & 31 != $flags) { |
677 | $headercrc = unpack("v", substr($data,$headerlen,2)); | 677 | $error = "Reserved bits not allowed."; |
678 | $headercrc = $headercrc[1]; | 678 | return null; |
679 | if ($headercrc != $calccrc) { | 679 | } |
680 | $error = "Header checksum failed."; | 680 | // NOTE: $mtime may be negative (PHP integer limitations) |
681 | return false; // Bad header CRC | 681 | $mtime = unpack("V", substr($data,4,4)); |
682 | } | 682 | $mtime = $mtime[1]; |
683 | $headerlen += 2; | 683 | $xfl = substr($data,8,1); |
684 | } | 684 | $os = substr($data,8,1); |
685 | // GZIP FOOTER | 685 | $headerlen = 10; |
686 | $datacrc = unpack("V",substr($data,-8,4)); | 686 | $extralen = 0; |
687 | $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF); | 687 | $extra = ""; |
688 | $isize = unpack("V",substr($data,-4)); | 688 | if ($flags & 4) { |
689 | $isize = $isize[1]; | 689 | // 2-byte length prefixed EXTRA data in header |
690 | // decompression: | 690 | if ($len - $headerlen - 2 < 8) { |
691 | $bodylen = $len-$headerlen-8; | 691 | return false; // invalid |
692 | if ($bodylen < 1) { | 692 | } |
693 | // IMPLEMENTATION BUG! | 693 | $extralen = unpack("v",substr($data,8,2)); |
694 | return null; | 694 | $extralen = $extralen[1]; |
695 | } | 695 | if ($len - $headerlen - 2 - $extralen < 8) { |
696 | $body = substr($data,$headerlen,$bodylen); | 696 | return false; // invalid |
697 | $data = ""; | 697 | } |
698 | if ($bodylen > 0) { | 698 | $extra = substr($data,10,$extralen); |
699 | switch ($method) { | 699 | $headerlen += 2 + $extralen; |
700 | case 8: | 700 | } |
701 | // Currently the only supported compression method: | 701 | $filenamelen = 0; |
702 | $data = gzinflate($body,$maxlength); | 702 | $filename = ""; |
703 | break; | 703 | if ($flags & 8) { |
704 | default: | 704 | // C-style string |
705 | $error = "Unknown compression method."; | 705 | if ($len - $headerlen - 1 < 8) { |
706 | return false; | 706 | return false; // invalid |
707 | } | 707 | } |
708 | } // zero-byte body content is allowed | 708 | $filenamelen = strpos(substr($data,$headerlen),chr(0)); |
709 | // Verifiy CRC32 | 709 | if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) { |
710 | $crc = sprintf("%u",crc32($data)); | 710 | return false; // invalid |
711 | $crcOK = $crc == $datacrc; | 711 | } |
712 | $lenOK = $isize == strlen($data); | 712 | $filename = substr($data,$headerlen,$filenamelen); |
713 | if (!$lenOK || !$crcOK) { | 713 | $headerlen += $filenamelen + 1; |
714 | $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.'); | 714 | } |
715 | return false; | 715 | $commentlen = 0; |
716 | } | 716 | $comment = ""; |
717 | return $data; | 717 | if ($flags & 16) { |
718 | } | 718 | // C-style string COMMENT data in header |
719 | } | 719 | if ($len - $headerlen - 1 < 8) { |
720 | return false; // invalid | ||
721 | } | ||
722 | $commentlen = strpos(substr($data,$headerlen),chr(0)); | ||
723 | if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) { | ||
724 | return false; // Invalid header format | ||
725 | } | ||
726 | $comment = substr($data,$headerlen,$commentlen); | ||
727 | $headerlen += $commentlen + 1; | ||
728 | } | ||
729 | $headercrc = ""; | ||
730 | if ($flags & 2) { | ||
731 | // 2-bytes (lowest order) of CRC32 on header present | ||
732 | if ($len - $headerlen - 2 < 8) { | ||
733 | return false; // invalid | ||
734 | } | ||
735 | $calccrc = crc32(substr($data,0,$headerlen)) & 0xffff; | ||
736 | $headercrc = unpack("v", substr($data,$headerlen,2)); | ||
737 | $headercrc = $headercrc[1]; | ||
738 | if ($headercrc != $calccrc) { | ||
739 | $error = "Header checksum failed."; | ||
740 | return false; // Bad header CRC | ||
741 | } | ||
742 | $headerlen += 2; | ||
743 | } | ||
744 | // GZIP FOOTER | ||
745 | $datacrc = unpack("V",substr($data,-8,4)); | ||
746 | $datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF); | ||
747 | $isize = unpack("V",substr($data,-4)); | ||
748 | $isize = $isize[1]; | ||
749 | // decompression: | ||
750 | $bodylen = $len-$headerlen-8; | ||
751 | if ($bodylen < 1) { | ||
752 | // IMPLEMENTATION BUG! | ||
753 | return null; | ||
754 | } | ||
755 | $body = substr($data,$headerlen,$bodylen); | ||
756 | $data = ""; | ||
757 | if ($bodylen > 0) { | ||
758 | switch ($method) { | ||
759 | case 8: | ||
760 | // Currently the only supported compression method: | ||
761 | $data = gzinflate($body,$maxlength); | ||
762 | break; | ||
763 | default: | ||
764 | $error = "Unknown compression method."; | ||
765 | return false; | ||
766 | } | ||
767 | } // zero-byte body content is allowed | ||
768 | // Verifiy CRC32 | ||
769 | $crc = sprintf("%u",crc32($data)); | ||
770 | $crcOK = $crc == $datacrc; | ||
771 | $lenOK = $isize == strlen($data); | ||
772 | if (!$lenOK || !$crcOK) { | ||
773 | $error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.'); | ||
774 | return false; | ||
775 | } | ||
776 | return $data; | ||
777 | } | ||
778 | } | ||
720 | ?> \ No newline at end of file | 779 | ?> \ No newline at end of file |
diff --git a/inc/3rdparty/humble-http-agent/RollingCurl.php b/inc/3rdparty/libraries/humble-http-agent/RollingCurl.php index fdd021af..d24dc690 100644 --- a/inc/3rdparty/humble-http-agent/RollingCurl.php +++ b/inc/3rdparty/libraries/humble-http-agent/RollingCurl.php | |||
@@ -1,402 +1,402 @@ | |||
1 | <?php | 1 | <?php |
2 | /* | 2 | /* |
3 | Authored by Josh Fraser (www.joshfraser.com) | 3 | Authored by Josh Fraser (www.joshfraser.com) |
4 | Released under Apache License 2.0 | 4 | Released under Apache License 2.0 |
5 | 5 | ||
6 | Maintained by Alexander Makarov, http://rmcreative.ru/ | 6 | Maintained by Alexander Makarov, http://rmcreative.ru/ |
7 | 7 | ||
8 | Modified by Keyvan Minoukadeh for the Five Filters project: http://fivefilters.org | 8 | Modified by Keyvan Minoukadeh for the Five Filters project: http://fivefilters.org |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /** | 11 | /** |
12 | * Class that represent a single curl request | 12 | * Class that represent a single curl request |
13 | */ | 13 | */ |
14 | class RollingCurlRequest { | 14 | class RollingCurlRequest { |
15 | public $url = false; | 15 | public $url = false; |
16 | public $url_original = false; // used for tracking redirects | 16 | public $url_original = false; // used for tracking redirects |
17 | public $method = 'GET'; | 17 | public $method = 'GET'; |
18 | public $post_data = null; | 18 | public $post_data = null; |
19 | public $headers = null; | 19 | public $headers = null; |
20 | public $options = null; | 20 | public $options = null; |
21 | 21 | ||
22 | /** | 22 | /** |
23 | * @param string $url | 23 | * @param string $url |
24 | * @param string $method | 24 | * @param string $method |
25 | * @param $post_data | 25 | * @param $post_data |
26 | * @param $headers | 26 | * @param $headers |
27 | * @param $options | 27 | * @param $options |
28 | * @return void | 28 | * @return void |
29 | */ | 29 | */ |
30 | function __construct($url, $method = "GET", $post_data = null, $headers = null, $options = null) { | 30 | function __construct($url, $method = "GET", $post_data = null, $headers = null, $options = null) { |
31 | $this->url = $url; | 31 | $this->url = $url; |
32 | $this->url_original = $url; | 32 | $this->url_original = $url; |
33 | $this->method = $method; | 33 | $this->method = $method; |
34 | $this->post_data = $post_data; | 34 | $this->post_data = $post_data; |
35 | $this->headers = $headers; | 35 | $this->headers = $headers; |
36 | $this->options = $options; | 36 | $this->options = $options; |
37 | } | 37 | } |
38 | 38 | ||
39 | /** | 39 | /** |
40 | * @param string $url | 40 | * @param string $url |
41 | * @return void | 41 | * @return void |
42 | */ | 42 | */ |
43 | public function set_original_url($url) { | 43 | public function set_original_url($url) { |
44 | $this->url_original = $url; | 44 | $this->url_original = $url; |
45 | } | 45 | } |
46 | /** | 46 | /** |
47 | * @return void | 47 | * @return void |
48 | */ | 48 | */ |
49 | public function __destruct() { | 49 | public function __destruct() { |
50 | unset($this->url, $this->url_original, $this->method, $this->post_data, $this->headers, $this->options); | 50 | unset($this->url, $this->url_original, $this->method, $this->post_data, $this->headers, $this->options); |
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | /** | 54 | /** |
55 | * RollingCurl custom exception | 55 | * RollingCurl custom exception |
56 | */ | 56 | */ |
57 | class RollingCurlException extends Exception { | 57 | class RollingCurlException extends Exception { |
58 | } | 58 | } |
59 | 59 | ||
60 | /** | 60 | /** |
61 | * Class that holds a rolling queue of curl requests. | 61 | * Class that holds a rolling queue of curl requests. |
62 | * | 62 | * |
63 | * @throws RollingCurlException | 63 | * @throws RollingCurlException |
64 | */ | 64 | */ |
65 | class RollingCurl implements Countable { | 65 | class RollingCurl implements Countable { |
66 | /** | 66 | /** |
67 | * @var int | 67 | * @var int |
68 | * | 68 | * |
69 | * Window size is the max number of simultaneous connections allowed. | 69 | * Window size is the max number of simultaneous connections allowed. |
70 | * | 70 | * |
71 | * REMEMBER TO RESPECT THE SERVERS: | 71 | * REMEMBER TO RESPECT THE SERVERS: |
72 | * Sending too many requests at one time can easily be perceived | 72 | * Sending too many requests at one time can easily be perceived |
73 | * as a DOS attack. Increase this window_size if you are making requests | 73 | * as a DOS attack. Increase this window_size if you are making requests |
74 | * to multiple servers or have permission from the receving server admins. | 74 | * to multiple servers or have permission from the receving server admins. |
75 | */ | 75 | */ |
76 | private $window_size = 5; | 76 | private $window_size = 5; |
77 | 77 | ||
78 | /** | 78 | /** |
79 | * @var float | 79 | * @var float |
80 | * | 80 | * |
81 | * Timeout is the timeout used for curl_multi_select. | 81 | * Timeout is the timeout used for curl_multi_select. |
82 | */ | 82 | */ |
83 | private $timeout = 10; | 83 | private $timeout = 10; |
84 | 84 | ||
85 | /** | 85 | /** |
86 | * @var string|array | 86 | * @var string|array |
87 | * | 87 | * |
88 | * Callback function to be applied to each result. | 88 | * Callback function to be applied to each result. |
89 | */ | 89 | */ |
90 | private $callback; | 90 | private $callback; |
91 | 91 | ||
92 | /** | 92 | /** |
93 | * @var array | 93 | * @var array |
94 | * | 94 | * |
95 | * Set your base options that you want to be used with EVERY request. | 95 | * Set your base options that you want to be used with EVERY request. |
96 | */ | 96 | */ |
97 | protected $options = array( | 97 | protected $options = array( |
98 | CURLOPT_SSL_VERIFYPEER => 0, | 98 | CURLOPT_SSL_VERIFYPEER => 0, |
99 | CURLOPT_RETURNTRANSFER => 1, | 99 | CURLOPT_RETURNTRANSFER => 1, |
100 | CURLOPT_CONNECTTIMEOUT => 30, | 100 | CURLOPT_CONNECTTIMEOUT => 30, |
101 | CURLOPT_TIMEOUT => 30 | 101 | CURLOPT_TIMEOUT => 30 |
102 | ); | 102 | ); |
103 | 103 | ||
104 | /** | 104 | /** |
105 | * @var array | 105 | * @var array |
106 | */ | 106 | */ |
107 | private $headers = array(); | 107 | private $headers = array(); |
108 | 108 | ||
109 | /** | 109 | /** |
110 | * @var Request[] | 110 | * @var Request[] |
111 | * | 111 | * |
112 | * The request queue | 112 | * The request queue |
113 | */ | 113 | */ |
114 | private $requests = array(); | 114 | private $requests = array(); |
115 | 115 | ||
116 | /** | 116 | /** |
117 | * @var RequestMap[] | 117 | * @var RequestMap[] |
118 | * | 118 | * |
119 | * Maps handles to request indexes | 119 | * Maps handles to request indexes |
120 | */ | 120 | */ |
121 | private $requestMap = array(); | 121 | private $requestMap = array(); |
122 | 122 | ||
123 | /** | 123 | /** |
124 | * @param $callback | 124 | * @param $callback |
125 | * Callback function to be applied to each result. | 125 | * Callback function to be applied to each result. |
126 | * | 126 | * |
127 | * Can be specified as 'my_callback_function' | 127 | * Can be specified as 'my_callback_function' |
128 | * or array($object, 'my_callback_method'). | 128 | * or array($object, 'my_callback_method'). |
129 | * | 129 | * |
130 | * Function should take three parameters: $response, $info, $request. | 130 | * Function should take three parameters: $response, $info, $request. |
131 | * $response is response body, $info is additional curl info. | 131 | * $response is response body, $info is additional curl info. |
132 | * $request is the original request | 132 | * $request is the original request |
133 | * | 133 | * |
134 | * @return void | 134 | * @return void |
135 | */ | 135 | */ |
136 | function __construct($callback = null) { | 136 | function __construct($callback = null) { |
137 | $this->callback = $callback; | 137 | $this->callback = $callback; |
138 | } | 138 | } |
139 | 139 | ||
140 | /** | 140 | /** |
141 | * @param string $name | 141 | * @param string $name |
142 | * @return mixed | 142 | * @return mixed |
143 | */ | 143 | */ |
144 | public function __get($name) { | 144 | public function __get($name) { |
145 | return (isset($this->{$name})) ? $this->{$name} : null; | 145 | return (isset($this->{$name})) ? $this->{$name} : null; |
146 | } | 146 | } |
147 | 147 | ||
148 | /** | 148 | /** |
149 | * @param string $name | 149 | * @param string $name |
150 | * @param mixed $value | 150 | * @param mixed $value |
151 | * @return bool | 151 | * @return bool |
152 | */ | 152 | */ |
153 | public function __set($name, $value) { | 153 | public function __set($name, $value) { |
154 | // append the base options & headers | 154 | // append the base options & headers |
155 | if ($name == "options" || $name == "headers") { | 155 | if ($name == "options" || $name == "headers") { |
156 | $this->{$name} = $value + $this->{$name}; | 156 | $this->{$name} = $value + $this->{$name}; |
157 | } else { | 157 | } else { |
158 | $this->{$name} = $value; | 158 | $this->{$name} = $value; |
159 | } | 159 | } |
160 | return true; | 160 | return true; |
161 | } | 161 | } |
162 | 162 | ||
163 | /** | 163 | /** |
164 | * Count number of requests added (Countable interface) | 164 | * Count number of requests added (Countable interface) |
165 | * | 165 | * |
166 | * @return int | 166 | * @return int |
167 | */ | 167 | */ |
168 | public function count() { | 168 | public function count() { |
169 | return count($this->requests); | 169 | return count($this->requests); |
170 | } | 170 | } |
171 | 171 | ||
172 | /** | 172 | /** |
173 | * Add a request to the request queue | 173 | * Add a request to the request queue |
174 | * | 174 | * |
175 | * @param Request $request | 175 | * @param Request $request |
176 | * @return bool | 176 | * @return bool |
177 | */ | 177 | */ |
178 | public function add($request) { | 178 | public function add($request) { |
179 | $this->requests[] = $request; | 179 | $this->requests[] = $request; |
180 | return true; | 180 | return true; |
181 | } | 181 | } |
182 | 182 | ||
183 | /** | 183 | /** |
184 | * Create new Request and add it to the request queue | 184 | * Create new Request and add it to the request queue |
185 | * | 185 | * |
186 | * @param string $url | 186 | * @param string $url |
187 | * @param string $method | 187 | * @param string $method |
188 | * @param $post_data | 188 | * @param $post_data |
189 | * @param $headers | 189 | * @param $headers |
190 | * @param $options | 190 | * @param $options |
191 | * @return bool | 191 | * @return bool |
192 | */ | 192 | */ |
193 | public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) { | 193 | public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) { |
194 | $this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options); | 194 | $this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options); |
195 | return true; | 195 | return true; |
196 | } | 196 | } |
197 | 197 | ||
198 | /** | 198 | /** |
199 | * Perform GET request | 199 | * Perform GET request |
200 | * | 200 | * |
201 | * @param string $url | 201 | * @param string $url |
202 | * @param $headers | 202 | * @param $headers |
203 | * @param $options | 203 | * @param $options |
204 | * @return bool | 204 | * @return bool |
205 | */ | 205 | */ |
206 | public function get($url, $headers = null, $options = null) { | 206 | public function get($url, $headers = null, $options = null) { |
207 | return $this->request($url, "GET", null, $headers, $options); | 207 | return $this->request($url, "GET", null, $headers, $options); |
208 | } | 208 | } |
209 | 209 | ||
210 | /** | 210 | /** |
211 | * Perform POST request | 211 | * Perform POST request |
212 | * | 212 | * |
213 | * @param string $url | 213 | * @param string $url |
214 | * @param $post_data | 214 | * @param $post_data |
215 | * @param $headers | 215 | * @param $headers |
216 | * @param $options | 216 | * @param $options |
217 | * @return bool | 217 | * @return bool |
218 | */ | 218 | */ |
219 | public function post($url, $post_data = null, $headers = null, $options = null) { | 219 | public function post($url, $post_data = null, $headers = null, $options = null) { |
220 | return $this->request($url, "POST", $post_data, $headers, $options); | 220 | return $this->request($url, "POST", $post_data, $headers, $options); |
221 | } | 221 | } |
222 | 222 | ||
223 | /** | 223 | /** |
224 | * Execute processing | 224 | * Execute processing |
225 | * | 225 | * |
226 | * @param int $window_size Max number of simultaneous connections | 226 | * @param int $window_size Max number of simultaneous connections |
227 | * @return string|bool | 227 | * @return string|bool |
228 | */ | 228 | */ |
229 | public function execute($window_size = null) { | 229 | public function execute($window_size = null) { |
230 | // rolling curl window must always be greater than 1 | 230 | // rolling curl window must always be greater than 1 |
231 | if (sizeof($this->requests) == 1) { | 231 | if (sizeof($this->requests) == 1) { |
232 | return $this->single_curl(); | 232 | return $this->single_curl(); |
233 | } else { | 233 | } else { |
234 | // start the rolling curl. window_size is the max number of simultaneous connections | 234 | // start the rolling curl. window_size is the max number of simultaneous connections |
235 | return $this->rolling_curl($window_size); | 235 | return $this->rolling_curl($window_size); |
236 | } | 236 | } |
237 | } | 237 | } |
238 | 238 | ||
239 | /** | 239 | /** |
240 | * Performs a single curl request | 240 | * Performs a single curl request |
241 | * | 241 | * |
242 | * @access private | 242 | * @access private |
243 | * @return string | 243 | * @return string |
244 | */ | 244 | */ |
245 | private function single_curl() { | 245 | private function single_curl() { |
246 | $ch = curl_init(); | 246 | $ch = curl_init(); |
247 | $request = array_shift($this->requests); | 247 | $request = array_shift($this->requests); |
248 | $options = $this->get_options($request); | 248 | $options = $this->get_options($request); |
249 | curl_setopt_array($ch, $options); | 249 | curl_setopt_array($ch, $options); |
250 | $output = curl_exec($ch); | 250 | $output = curl_exec($ch); |
251 | $info = curl_getinfo($ch); | 251 | $info = curl_getinfo($ch); |
252 | 252 | ||
253 | // it's not neccesary to set a callback for one-off requests | 253 | // it's not neccesary to set a callback for one-off requests |
254 | if ($this->callback) { | 254 | if ($this->callback) { |
255 | $callback = $this->callback; | 255 | $callback = $this->callback; |
256 | if (is_callable($this->callback)) { | 256 | if (is_callable($this->callback)) { |
257 | call_user_func($callback, $output, $info, $request); | 257 | call_user_func($callback, $output, $info, $request); |
258 | } | 258 | } |
259 | } | 259 | } |
260 | else | 260 | else |
261 | return $output; | 261 | return $output; |
262 | return true; | 262 | return true; |
263 | } | 263 | } |
264 | 264 | ||
265 | /** | 265 | /** |
266 | * Performs multiple curl requests | 266 | * Performs multiple curl requests |
267 | * | 267 | * |
268 | * @access private | 268 | * @access private |
269 | * @throws RollingCurlException | 269 | * @throws RollingCurlException |
270 | * @param int $window_size Max number of simultaneous connections | 270 | * @param int $window_size Max number of simultaneous connections |
271 | * @return bool | 271 | * @return bool |
272 | */ | 272 | */ |
273 | private function rolling_curl($window_size = null) { | 273 | private function rolling_curl($window_size = null) { |
274 | if ($window_size) | 274 | if ($window_size) |
275 | $this->window_size = $window_size; | 275 | $this->window_size = $window_size; |
276 | 276 | ||
277 | // make sure the rolling window isn't greater than the # of urls | 277 | // make sure the rolling window isn't greater than the # of urls |
278 | if (sizeof($this->requests) < $this->window_size) | 278 | if (sizeof($this->requests) < $this->window_size) |
279 | $this->window_size = sizeof($this->requests); | 279 | $this->window_size = sizeof($this->requests); |
280 | 280 | ||
281 | if ($this->window_size < 2) { | 281 | if ($this->window_size < 2) { |
282 | throw new RollingCurlException("Window size must be greater than 1"); | 282 | throw new RollingCurlException("Window size must be greater than 1"); |
283 | } | 283 | } |
284 | 284 | ||
285 | $master = curl_multi_init(); | 285 | $master = curl_multi_init(); |
286 | 286 | ||
287 | // start the first batch of requests | 287 | // start the first batch of requests |
288 | for ($i = 0; $i < $this->window_size; $i++) { | 288 | for ($i = 0; $i < $this->window_size; $i++) { |
289 | $ch = curl_init(); | 289 | $ch = curl_init(); |
290 | 290 | ||
291 | $options = $this->get_options($this->requests[$i]); | 291 | $options = $this->get_options($this->requests[$i]); |
292 | 292 | ||
293 | curl_setopt_array($ch, $options); | 293 | curl_setopt_array($ch, $options); |
294 | curl_multi_add_handle($master, $ch); | 294 | curl_multi_add_handle($master, $ch); |
295 | 295 | ||
296 | // Add to our request Maps | 296 | // Add to our request Maps |
297 | $key = (string) $ch; | 297 | $key = (string) $ch; |
298 | $this->requestMap[$key] = $i; | 298 | $this->requestMap[$key] = $i; |
299 | } | 299 | } |
300 | 300 | ||
301 | do { | 301 | do { |
302 | while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ; | 302 | while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ; |
303 | if ($execrun != CURLM_OK) | 303 | if ($execrun != CURLM_OK) |
304 | break; | 304 | break; |
305 | // a request was just completed -- find out which one | 305 | // a request was just completed -- find out which one |
306 | while ($done = curl_multi_info_read($master)) { | 306 | while ($done = curl_multi_info_read($master)) { |
307 | 307 | ||
308 | // get the info and content returned on the request | 308 | // get the info and content returned on the request |
309 | $info = curl_getinfo($done['handle']); | 309 | $info = curl_getinfo($done['handle']); |
310 | $output = curl_multi_getcontent($done['handle']); | 310 | $output = curl_multi_getcontent($done['handle']); |
311 | 311 | ||
312 | // send the return values to the callback function. | 312 | // send the return values to the callback function. |
313 | $callback = $this->callback; | 313 | $callback = $this->callback; |
314 | if (is_callable($callback)) { | 314 | if (is_callable($callback)) { |
315 | $key = (string) $done['handle']; | 315 | $key = (string) $done['handle']; |
316 | $request = $this->requests[$this->requestMap[$key]]; | 316 | $request = $this->requests[$this->requestMap[$key]]; |
317 | unset($this->requestMap[$key]); | 317 | unset($this->requestMap[$key]); |
318 | call_user_func($callback, $output, $info, $request); | 318 | call_user_func($callback, $output, $info, $request); |
319 | } | 319 | } |
320 | 320 | ||
321 | // start a new request (it's important to do this before removing the old one) | 321 | // start a new request (it's important to do this before removing the old one) |
322 | if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) { | 322 | if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) { |
323 | $ch = curl_init(); | 323 | $ch = curl_init(); |
324 | $options = $this->get_options($this->requests[$i]); | 324 | $options = $this->get_options($this->requests[$i]); |
325 | curl_setopt_array($ch, $options); | 325 | curl_setopt_array($ch, $options); |
326 | curl_multi_add_handle($master, $ch); | 326 | curl_multi_add_handle($master, $ch); |
327 | 327 | ||
328 | // Add to our request Maps | 328 | // Add to our request Maps |
329 | $key = (string) $ch; | 329 | $key = (string) $ch; |
330 | $this->requestMap[$key] = $i; | 330 | $this->requestMap[$key] = $i; |
331 | $i++; | 331 | $i++; |
332 | } | 332 | } |
333 | 333 | ||
334 | // remove the curl handle that just completed | 334 | // remove the curl handle that just completed |
335 | curl_multi_remove_handle($master, $done['handle']); | 335 | curl_multi_remove_handle($master, $done['handle']); |
336 | 336 | ||
337 | } | 337 | } |
338 | 338 | ||
339 | // Block for data in / output; error handling is done by curl_multi_exec | 339 | // Block for data in / output; error handling is done by curl_multi_exec |
340 | //if ($running) curl_multi_select($master, $this->timeout); | 340 | //if ($running) curl_multi_select($master, $this->timeout); |
341 | // removing timeout as it causes problems on Windows with PHP 5.3.5 and Curl 7.20.0 | 341 | // removing timeout as it causes problems on Windows with PHP 5.3.5 and Curl 7.20.0 |
342 | if ($running) curl_multi_select($master); | 342 | if ($running) curl_multi_select($master); |
343 | 343 | ||
344 | } while ($running); | 344 | } while ($running); |
345 | curl_multi_close($master); | 345 | curl_multi_close($master); |
346 | return true; | 346 | return true; |
347 | } | 347 | } |
348 | 348 | ||
349 | 349 | ||
350 | /** | 350 | /** |
351 | * Helper function to set up a new request by setting the appropriate options | 351 | * Helper function to set up a new request by setting the appropriate options |
352 | * | 352 | * |
353 | * @access private | 353 | * @access private |
354 | * @param Request $request | 354 | * @param Request $request |
355 | * @return array | 355 | * @return array |
356 | */ | 356 | */ |
357 | private function get_options($request) { | 357 | private function get_options($request) { |
358 | // options for this entire curl object | 358 | // options for this entire curl object |
359 | $options = $this->__get('options'); | 359 | $options = $this->__get('options'); |
360 | // We're managing reirects in PHP - allows us to intervene and rewrite/block URLs | 360 | // We're managing reirects in PHP - allows us to intervene and rewrite/block URLs |
361 | // before the next request goes out. | 361 | // before the next request goes out. |
362 | $options[CURLOPT_FOLLOWLOCATION] = 0; | 362 | $options[CURLOPT_FOLLOWLOCATION] = 0; |
363 | $options[CURLOPT_MAXREDIRS] = 0; | 363 | $options[CURLOPT_MAXREDIRS] = 0; |
364 | //if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) { | 364 | //if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) { |
365 | // $options[CURLOPT_FOLLOWLOCATION] = 1; | 365 | // $options[CURLOPT_FOLLOWLOCATION] = 1; |
366 | // $options[CURLOPT_MAXREDIRS] = 5; | 366 | // $options[CURLOPT_MAXREDIRS] = 5; |
367 | //} | 367 | //} |
368 | $headers = $this->__get('headers'); | 368 | $headers = $this->__get('headers'); |
369 | // append custom headers for this specific request | 369 | // append custom headers for this specific request |
370 | if ($request->headers) { | 370 | if ($request->headers) { |
371 | $headers = $headers + $request->headers; | 371 | $headers = $headers + $request->headers; |
372 | } | 372 | } |
373 | 373 | ||
374 | // append custom options for this specific request | 374 | // append custom options for this specific request |
375 | if ($request->options) { | 375 | if ($request->options) { |
376 | $options = $request->options + $options; | 376 | $options = $request->options + $options; |
377 | } | 377 | } |
378 | 378 | ||
379 | // set the request URL | 379 | // set the request URL |
380 | $options[CURLOPT_URL] = $request->url; | 380 | $options[CURLOPT_URL] = $request->url; |
381 | 381 | ||
382 | if ($headers) { | 382 | if ($headers) { |
383 | $options[CURLOPT_HTTPHEADER] = $headers; | 383 | $options[CURLOPT_HTTPHEADER] = $headers; |
384 | } | 384 | } |
385 | // return response headers | 385 | // return response headers |
386 | $options[CURLOPT_HEADER] = 1; | 386 | $options[CURLOPT_HEADER] = 1; |
387 | 387 | ||
388 | // send HEAD request? | 388 | // send HEAD request? |
389 | if ($request->method == 'HEAD') { | 389 | if ($request->method == 'HEAD') { |
390 | $options[CURLOPT_NOBODY] = 1; | 390 | $options[CURLOPT_NOBODY] = 1; |
391 | } | 391 | } |
392 | 392 | ||
393 | return $options; | 393 | return $options; |
394 | } | 394 | } |
395 | 395 | ||
396 | /** | 396 | /** |
397 | * @return void | 397 | * @return void |
398 | */ | 398 | */ |
399 | public function __destruct() { | 399 | public function __destruct() { |
400 | unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests); | 400 | unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests); |
401 | } | 401 | } |
402 | } \ No newline at end of file | 402 | } \ No newline at end of file |
diff --git a/inc/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php index ce76a929..ecd46d5f 100644 --- a/inc/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php +++ b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php | |||
@@ -1,79 +1,79 @@ | |||
1 | <?php | 1 | <?php |
2 | /** | 2 | /** |
3 | * Humble HTTP Agent extension for SimplePie_File | 3 | * Humble HTTP Agent extension for SimplePie_File |
4 | * | 4 | * |
5 | * This class is designed to extend and override SimplePie_File | 5 | * This class is designed to extend and override SimplePie_File |
6 | * in order to prevent duplicate HTTP requests being sent out. | 6 | * in order to prevent duplicate HTTP requests being sent out. |
7 | * The idea is to initialise an instance of Humble HTTP Agent | 7 | * The idea is to initialise an instance of Humble HTTP Agent |
8 | * and attach it, to a static class variable, of this class. | 8 | * and attach it, to a static class variable, of this class. |
9 | * SimplePie will then automatically initialise this class | 9 | * SimplePie will then automatically initialise this class |
10 | * | 10 | * |
11 | * @date 2011-02-28 | 11 | * @date 2011-02-28 |
12 | */ | 12 | */ |
13 | 13 | ||
14 | class SimplePie_HumbleHttpAgent extends SimplePie_File | 14 | class SimplePie_HumbleHttpAgent extends SimplePie_File |
15 | { | 15 | { |
16 | protected static $agent; | 16 | protected static $agent; |
17 | var $url; | 17 | var $url; |
18 | var $useragent; | 18 | var $useragent; |
19 | var $success = true; | 19 | var $success = true; |
20 | var $headers = array(); | 20 | var $headers = array(); |
21 | var $body; | 21 | var $body; |
22 | var $status_code; | 22 | var $status_code; |
23 | var $redirects = 0; | 23 | var $redirects = 0; |
24 | var $error; | 24 | var $error; |
25 | var $method = SIMPLEPIE_FILE_SOURCE_NONE; | 25 | var $method = SIMPLEPIE_FILE_SOURCE_NONE; |
26 | 26 | ||
27 | public static function set_agent(HumbleHttpAgent $agent) { | 27 | public static function set_agent(HumbleHttpAgent $agent) { |
28 | self::$agent = $agent; | 28 | self::$agent = $agent; |
29 | } | 29 | } |
30 | 30 | ||
31 | public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) { | 31 | public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) { |
32 | if (class_exists('idna_convert')) | 32 | if (class_exists('idna_convert')) |
33 | { | 33 | { |
34 | $idn = new idna_convert(); | 34 | $idn = new idna_convert(); |
35 | $parsed = SimplePie_Misc::parse_url($url); | 35 | $parsed = SimplePie_Misc::parse_url($url); |
36 | $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); | 36 | $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); |
37 | } | 37 | } |
38 | $this->url = $url; | 38 | $this->url = $url; |
39 | $this->useragent = $useragent; | 39 | $this->useragent = $useragent; |
40 | if (preg_match('/^http(s)?:\/\//i', $url)) | 40 | if (preg_match('/^http(s)?:\/\//i', $url)) |
41 | { | 41 | { |
42 | if (!is_array($headers)) | 42 | if (!is_array($headers)) |
43 | { | 43 | { |
44 | $headers = array(); | 44 | $headers = array(); |
45 | } | 45 | } |
46 | $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL; | 46 | $this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL; |
47 | $headers2 = array(); | 47 | $headers2 = array(); |
48 | foreach ($headers as $key => $value) { | 48 | foreach ($headers as $key => $value) { |
49 | $headers2[] = "$key: $value"; | 49 | $headers2[] = "$key: $value"; |
50 | } | 50 | } |
51 | //TODO: allow for HTTP headers | 51 | //TODO: allow for HTTP headers |
52 | // curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); | 52 | // curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); |
53 | 53 | ||
54 | $response = self::$agent->get($url); | 54 | $response = self::$agent->get($url); |
55 | 55 | ||
56 | if ($response === false || !isset($response['status_code'])) { | 56 | if ($response === false || !isset($response['status_code'])) { |
57 | $this->error = 'failed to fetch URL'; | 57 | $this->error = 'failed to fetch URL'; |
58 | $this->success = false; | 58 | $this->success = false; |
59 | } else { | 59 | } else { |
60 | // The extra lines at the end are there to satisfy SimplePie's HTTP parser. | 60 | // The extra lines at the end are there to satisfy SimplePie's HTTP parser. |
61 | // The class expects a full HTTP message, whereas we're giving it only | 61 | // The class expects a full HTTP message, whereas we're giving it only |
62 | // headers - the new lines indicate the start of the body. | 62 | // headers - the new lines indicate the start of the body. |
63 | $parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n"); | 63 | $parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n"); |
64 | if ($parser->parse()) { | 64 | if ($parser->parse()) { |
65 | $this->headers = $parser->headers; | 65 | $this->headers = $parser->headers; |
66 | //$this->body = $parser->body; | 66 | //$this->body = $parser->body; |
67 | $this->body = $response['body']; | 67 | $this->body = $response['body']; |
68 | $this->status_code = $parser->status_code; | 68 | $this->status_code = $parser->status_code; |
69 | } | 69 | } |
70 | } | 70 | } |
71 | } | 71 | } |
72 | else | 72 | else |
73 | { | 73 | { |
74 | $this->error = 'invalid URL'; | 74 | $this->error = 'invalid URL'; |
75 | $this->success = false; | 75 | $this->success = false; |
76 | } | 76 | } |
77 | } | 77 | } |
78 | } | 78 | } |
79 | ?> \ No newline at end of file | 79 | ?> \ No newline at end of file |
diff --git a/inc/3rdparty/libraries/language-detect/LanguageDetect.php b/inc/3rdparty/libraries/language-detect/LanguageDetect.php new file mode 100644 index 00000000..09b11546 --- /dev/null +++ b/inc/3rdparty/libraries/language-detect/LanguageDetect.php | |||
@@ -0,0 +1,1635 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Detects the language of a given piece of text. | ||
5 | * | ||
6 | * Attempts to detect the language of a sample of text by correlating ranked | ||
7 | * 3-gram frequencies to a table of 3-gram frequencies of known languages. | ||
8 | * | ||
9 | * Implements a version of a technique originally proposed by Cavnar & Trenkle | ||
10 | * (1994): "N-Gram-Based Text Categorization" | ||
11 | * | ||
12 | * PHP versions 4 and 5 | ||
13 | * | ||
14 | * @category Text | ||
15 | * @package Text_LanguageDetect | ||
16 | * @author Nicholas Pisarro <infinityminusnine+pear@gmail.com> | ||
17 | * @copyright 2005-2006 Nicholas Pisarro | ||
18 | * @license http://www.debian.org/misc/bsd.license BSD | ||
19 | * @version CVS: $Id: LanguageDetect.php,v 1.20 2008/07/01 02:09:15 taak Exp $ | ||
20 | * @link http://pear.php.net/package/Text_LanguageDetect/ | ||
21 | * @link http://langdetect.blogspot.com/ | ||
22 | */ | ||
23 | |||
24 | //require_once 'PEAR.php'; | ||
25 | require_once 'Parser.php'; | ||
26 | |||
27 | /** | ||
28 | * Language detection class | ||
29 | * | ||
30 | * Requires the langauge model database (lang.dat) that should have | ||
31 | * accompanied this class definition in order to be instantiated. | ||
32 | * | ||
33 | * Example usage: | ||
34 | * | ||
35 | * <code> | ||
36 | * require_once 'Text/LanguageDetect.php'; | ||
37 | * | ||
38 | * $l = new Text_LanguageDetect; | ||
39 | * | ||
40 | * $stdin = fopen('php://stdin', 'r'); | ||
41 | * | ||
42 | * echo "Supported languages:\n"; | ||
43 | * | ||
44 | * $langs = $l->getLanguages(); | ||
45 | * if (PEAR::isError($langs)) { | ||
46 | * die($langs->getMessage()); | ||
47 | * } | ||
48 | * | ||
49 | * sort($langs); | ||
50 | * echo join(', ', $langs); | ||
51 | * | ||
52 | * while ($line = fgets($stdin)) { | ||
53 | * print_r($l->detect($line, 4)); | ||
54 | * } | ||
55 | * </code> | ||
56 | * | ||
57 | * @category Text | ||
58 | * @package Text_LanguageDetect | ||
59 | * @author Nicholas Pisarro <infinityminusnine+pear@gmail.com> | ||
60 | * @copyright 2005 Nicholas Pisarro | ||
61 | * @license http://www.debian.org/misc/bsd.license BSD | ||
62 | * @version Release: @package_version@ | ||
63 | * @todo allow users to generate their own language models | ||
64 | */ | ||
65 | |||
66 | class Text_LanguageDetect | ||
67 | { | ||
68 | /** | ||
69 | * The filename that stores the trigram data for the detector | ||
70 | * | ||
71 | * If this value starts with a slash (/) or a dot (.) the value of | ||
72 | * $this->_data_dir will be ignored | ||
73 | * | ||
74 | * @var string | ||
75 | * @access private | ||
76 | */ | ||
77 | var $_db_filename = './lang.dat'; | ||
78 | |||
79 | /** | ||
80 | * The filename that stores the unicode block definitions | ||
81 | * | ||
82 | * If this value starts with a slash (/) or a dot (.) the value of | ||
83 | * $this->_data_dir will be ignored | ||
84 | * | ||
85 | * @var string | ||
86 | * @access private | ||
87 | */ | ||
88 | var $_unicode_db_filename = './unicode_blocks.dat'; | ||
89 | |||
90 | /** | ||
91 | * The data directory | ||
92 | * | ||
93 | * Should be set by PEAR installer | ||
94 | * | ||
95 | * @var string | ||
96 | * @access private | ||
97 | */ | ||
98 | var $_data_dir = '@data_dir@'; | ||
99 | |||
100 | /** | ||
101 | * The trigram data for comparison | ||
102 | * | ||
103 | * Will be loaded on start from $this->_db_filename | ||
104 | * | ||
105 | * May be set to a PEAR_Error object if there is an error during its | ||
106 | * initialization | ||
107 | * | ||
108 | * @var array | ||
109 | * @access private | ||
110 | */ | ||
111 | var $_lang_db = array(); | ||
112 | |||
113 | /** | ||
114 | * stores the map of the trigram data to unicode characters | ||
115 | * | ||
116 | * @access private | ||
117 | * @var array | ||
118 | */ | ||
119 | var $_unicode_map; | ||
120 | |||
121 | /** | ||
122 | * The size of the trigram data arrays | ||
123 | * | ||
124 | * @var int | ||
125 | * @access private | ||
126 | */ | ||
127 | var $_threshold = 300; | ||
128 | |||
129 | /** | ||
130 | * the maximum possible score. | ||
131 | * | ||
132 | * needed for score normalization. Different depending on the | ||
133 | * perl compatibility setting | ||
134 | * | ||
135 | * @access private | ||
136 | * @var int | ||
137 | * @see setPerlCompatible() | ||
138 | */ | ||
139 | var $_max_score = 0; | ||
140 | |||
141 | /** | ||
142 | * Whether or not to simulate perl's Language::Guess exactly | ||
143 | * | ||
144 | * @access private | ||
145 | * @var bool | ||
146 | * @see setPerlCompatible() | ||
147 | */ | ||
148 | var $_perl_compatible = false; | ||
149 | |||
150 | /** | ||
151 | * Whether to use the unicode block detection to speed up processing | ||
152 | * | ||
153 | * @access private | ||
154 | * @var bool | ||
155 | */ | ||
156 | var $_use_unicode_narrowing = true; | ||
157 | |||
158 | /** | ||
159 | * stores the result of the clustering operation | ||
160 | * | ||
161 | * @access private | ||
162 | * @var array | ||
163 | * @see clusterLanguages() | ||
164 | */ | ||
165 | var $_clusters; | ||
166 | |||
167 | /** | ||
168 | * Constructor | ||
169 | * | ||
170 | * Will attempt to load the language database. If it fails, you will get | ||
171 | * a PEAR_Error object returned when you try to use detect() | ||
172 | * | ||
173 | */ | ||
174 | function Text_LanguageDetect($db=null, $unicode_db=null) | ||
175 | { | ||
176 | if (isset($db)) $this->_db_filename = $db; | ||
177 | if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db; | ||
178 | |||
179 | $data = $this->_readdb($this->_db_filename); | ||
180 | $this->_lang_db = $data['trigram']; | ||
181 | |||
182 | if (isset($data['trigram-unicodemap'])) { | ||
183 | $this->_unicode_map = $data['trigram-unicodemap']; | ||
184 | } | ||
185 | |||
186 | // Not yet implemented: | ||
187 | if (isset($data['trigram-clusters'])) { | ||
188 | $this->_clusters = $data['trigram-clusters']; | ||
189 | } | ||
190 | } | ||
191 | |||
192 | /** | ||
193 | * Returns the path to the location of the database | ||
194 | * | ||
195 | * @access private | ||
196 | * @return string expected path to the language model database | ||
197 | */ | ||
198 | function _get_data_loc($fname) | ||
199 | { | ||
200 | return $fname; | ||
201 | } | ||
202 | |||
203 | /** | ||
204 | * Loads the language trigram database from filename | ||
205 | * | ||
206 | * Trigram datbase should be a serialize()'d array | ||
207 | * | ||
208 | * @access private | ||
209 | * @param string $fname the filename where the data is stored | ||
210 | * @return array the language model data | ||
211 | * @throws PEAR_Error | ||
212 | */ | ||
213 | function _readdb($fname) | ||
214 | { | ||
215 | // finds the correct data dir | ||
216 | $fname = $this->_get_data_loc($fname); | ||
217 | |||
218 | // input check | ||
219 | if (!file_exists($fname)) { | ||
220 | throw new Exception('Language database does not exist.'); | ||
221 | } elseif (!is_readable($fname)) { | ||
222 | throw new Exception('Language database is not readable.'); | ||
223 | } | ||
224 | |||
225 | if (function_exists('file_get_contents')) { | ||
226 | return unserialize(file_get_contents($fname)); | ||
227 | } else { | ||
228 | // if you don't have file_get_contents(), | ||
229 | // then this is the next fastest way | ||
230 | ob_start(); | ||
231 | readfile($fname); | ||
232 | $contents = ob_get_contents(); | ||
233 | ob_end_clean(); | ||
234 | return unserialize($contents); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | |||
239 | /** | ||
240 | * Checks if this object is ready to detect languages | ||
241 | * | ||
242 | * @access private | ||
243 | * @param mixed &$err error object to be returned by reference, if any | ||
244 | * @return bool true if no errors | ||
245 | */ | ||
246 | function _setup_ok(&$err) | ||
247 | { | ||
248 | if (!is_array($this->_lang_db)) { | ||
249 | if (ini_get('magic_quotes_runtime')) { | ||
250 | throw new Exception('Error loading database. Try turning magic_quotes_runtime off.'); | ||
251 | } else { | ||
252 | throw new Exception('Language database is not an array.'); | ||
253 | } | ||
254 | return false; | ||
255 | |||
256 | } elseif (empty($this->_lang_db)) { | ||
257 | throw new Exception('Language database has no elements.'); | ||
258 | return false; | ||
259 | |||
260 | } else { | ||
261 | return true; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | /** | ||
266 | * Omits languages | ||
267 | * | ||
268 | * Pass this function the name of or an array of names of | ||
269 | * languages that you don't want considered | ||
270 | * | ||
271 | * If you're only expecting a limited set of languages, this can greatly | ||
272 | * speed up processing | ||
273 | * | ||
274 | * @access public | ||
275 | * @param mixed $omit_list language name or array of names to omit | ||
276 | * @param bool $include_only if true will include (rather than | ||
277 | * exclude) only those in the list | ||
278 | * @return int number of languages successfully deleted | ||
279 | * @throws PEAR_Error | ||
280 | */ | ||
281 | function omitLanguages($omit_list, $include_only = false) | ||
282 | { | ||
283 | |||
284 | // setup check | ||
285 | if (!$this->_setup_ok($err)) { | ||
286 | return $err; | ||
287 | } | ||
288 | |||
289 | $deleted = 0; | ||
290 | |||
291 | // deleting the given languages | ||
292 | if (!$include_only) { | ||
293 | if (!is_array($omit_list)) { | ||
294 | $omit_list = strtolower($omit_list); // case desensitize | ||
295 | if (isset($this->_lang_db[$omit_list])) { | ||
296 | unset($this->_lang_db[$omit_list]); | ||
297 | $deleted++; | ||
298 | } | ||
299 | } else { | ||
300 | foreach ($omit_list as $omit_lang) { | ||
301 | if (isset($this->_lang_db[$omit_lang])) { | ||
302 | unset($this->_lang_db[$omit_lang]); | ||
303 | $deleted++; | ||
304 | } | ||
305 | } | ||
306 | } | ||
307 | |||
308 | // deleting all except the given languages | ||
309 | } else { | ||
310 | if (!is_array($omit_list)) { | ||
311 | $omit_list = array($omit_list); | ||
312 | } | ||
313 | |||
314 | // case desensitize | ||
315 | foreach ($omit_list as $key => $omit_lang) { | ||
316 | $omit_list[$key] = strtolower($omit_lang); | ||
317 | } | ||
318 | |||
319 | foreach (array_keys($this->_lang_db) as $lang) { | ||
320 | if (!in_array($lang, $omit_list)) { | ||
321 | unset($this->_lang_db[$lang]); | ||
322 | $deleted++; | ||
323 | } | ||
324 | } | ||
325 | } | ||
326 | |||
327 | // reset the cluster cache if the number of languages changes | ||
328 | // this will then have to be recalculated | ||
329 | if (isset($this->_clusters) && $deleted > 0) { | ||
330 | unset($this->_clusters); | ||
331 | } | ||
332 | |||
333 | return $deleted; | ||
334 | } | ||
335 | |||
336 | |||
337 | /** | ||
338 | * Returns the number of languages that this object can detect | ||
339 | * | ||
340 | * @access public | ||
341 | * @return int the number of languages | ||
342 | * @throws PEAR_Error | ||
343 | */ | ||
344 | function getLanguageCount() | ||
345 | { | ||
346 | if (!$this->_setup_ok($err)) { | ||
347 | return $err; | ||
348 | } else { | ||
349 | return count($this->_lang_db); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | /** | ||
354 | * Returns true if a given language exists | ||
355 | * | ||
356 | * If passed an array of names, will return true only if all exist | ||
357 | * | ||
358 | * @access public | ||
359 | * @param mixed $lang language name or array of language names | ||
360 | * @return bool true if language model exists | ||
361 | * @throws PEAR_Error | ||
362 | */ | ||
363 | function languageExists($lang) | ||
364 | { | ||
365 | if (!$this->_setup_ok($err)) { | ||
366 | return $err; | ||
367 | } else { | ||
368 | // string | ||
369 | if (is_string($lang)) { | ||
370 | return isset($this->_lang_db[strtolower($lang)]); | ||
371 | |||
372 | // array | ||
373 | } elseif (is_array($lang)) { | ||
374 | foreach ($lang as $test_lang) { | ||
375 | if (!isset($this->_lang_db[strtolower($test_lang)])) { | ||
376 | return false; | ||
377 | } | ||
378 | } | ||
379 | return true; | ||
380 | |||
381 | // other (error) | ||
382 | } else { | ||
383 | throw new Exception('Unknown type passed to languageExists()'); | ||
384 | } | ||
385 | } | ||
386 | } | ||
387 | |||
388 | /** | ||
389 | * Returns the list of detectable languages | ||
390 | * | ||
391 | * @access public | ||
392 | * @return array the names of the languages known to this object | ||
393 | * @throws PEAR_Error | ||
394 | */ | ||
395 | function getLanguages() | ||
396 | { | ||
397 | if (!$this->_setup_ok($err)) { | ||
398 | return $err; | ||
399 | } else { | ||
400 | return array_keys($this->_lang_db); | ||
401 | } | ||
402 | } | ||
403 | |||
404 | /** | ||
405 | * Make this object behave like Language::Guess | ||
406 | * | ||
407 | * @access public | ||
408 | * @param bool $setting false to turn off perl compatibility | ||
409 | */ | ||
410 | function setPerlCompatible($setting = true) | ||
411 | { | ||
412 | if (is_bool($setting)) { // input check | ||
413 | $this->_perl_compatible = $setting; | ||
414 | |||
415 | if ($setting == true) { | ||
416 | $this->_max_score = $this->_threshold; | ||
417 | } else { | ||
418 | $this->_max_score = 0; | ||
419 | } | ||
420 | } | ||
421 | |||
422 | } | ||
423 | |||
424 | /** | ||
425 | * Whether to use unicode block ranges in detection | ||
426 | * | ||
427 | * Should speed up most detections if turned on (detault is on). In some | ||
428 | * circumstances it may be slower, such as for large text samples (> 10K) | ||
429 | * in languages that use latin scripts. In other cases it should speed up | ||
430 | * detection noticeably. | ||
431 | * | ||
432 | * @access public | ||
433 | * @param bool $setting false to turn off | ||
434 | */ | ||
435 | function useUnicodeBlocks($setting = true) | ||
436 | { | ||
437 | if (is_bool($setting)) { | ||
438 | $this->_use_unicode_narrowing = $setting; | ||
439 | } | ||
440 | } | ||
441 | |||
442 | /** | ||
443 | * Converts a piece of text into trigrams | ||
444 | * | ||
445 | * Superceded by the Text_LanguageDetect_Parser class | ||
446 | * | ||
447 | * @access private | ||
448 | * @param string $text text to convert | ||
449 | * @return array array of trigram frequencies | ||
450 | */ | ||
451 | function _trigram($text) | ||
452 | { | ||
453 | $s = new Text_LanguageDetect_Parser($text, $this->_db_filename, $this->_unicode_db_filename); | ||
454 | $s->prepareTrigram(); | ||
455 | $s->prepareUnicode(false); | ||
456 | $s->setPadStart(!$this->_perl_compatible); | ||
457 | $s->analyze(); | ||
458 | return $s->getTrigramFreqs(); | ||
459 | } | ||
460 | |||
461 | /** | ||
462 | * Converts a set of trigrams from frequencies to ranks | ||
463 | * | ||
464 | * Thresholds (cuts off) the list at $this->_threshold | ||
465 | * | ||
466 | * @access protected | ||
467 | * @param array $arr array of trgram | ||
468 | * @return array ranks of trigrams | ||
469 | */ | ||
470 | function _arr_rank(&$arr) | ||
471 | { | ||
472 | |||
473 | // sorts alphabetically first as a standard way of breaking rank ties | ||
474 | $this->_bub_sort($arr); | ||
475 | |||
476 | // below might also work, but seemed to introduce errors in testing | ||
477 | //ksort($arr); | ||
478 | //asort($arr); | ||
479 | |||
480 | $rank = array(); | ||
481 | |||
482 | $i = 0; | ||
483 | foreach ($arr as $key => $value) { | ||
484 | $rank[$key] = $i++; | ||
485 | |||
486 | // cut off at a standard threshold | ||
487 | if ($i >= $this->_threshold) { | ||
488 | break; | ||
489 | } | ||
490 | } | ||
491 | |||
492 | return $rank; | ||
493 | } | ||
494 | |||
495 | /** | ||
496 | * Sorts an array by value breaking ties alphabetically | ||
497 | * | ||
498 | * @access private | ||
499 | * @param array &$arr the array to sort | ||
500 | */ | ||
501 | function _bub_sort(&$arr) | ||
502 | { | ||
503 | // should do the same as this perl statement: | ||
504 | // sort { $trigrams{$b} == $trigrams{$a} ? $a cmp $b : $trigrams{$b} <=> $trigrams{$a} } | ||
505 | |||
506 | // needs to sort by both key and value at once | ||
507 | // using the key to break ties for the value | ||
508 | |||
509 | // converts array into an array of arrays of each key and value | ||
510 | // may be a better way of doing this | ||
511 | $combined = array(); | ||
512 | |||
513 | foreach ($arr as $key => $value) { | ||
514 | $combined[] = array($key, $value); | ||
515 | } | ||
516 | |||
517 | usort($combined, array($this, '_sort_func')); | ||
518 | |||
519 | $replacement = array(); | ||
520 | foreach ($combined as $key => $value) { | ||
521 | list($new_key, $new_value) = $value; | ||
522 | $replacement[$new_key] = $new_value; | ||
523 | } | ||
524 | |||
525 | $arr = $replacement; | ||
526 | } | ||
527 | |||
528 | /** | ||
529 | * Sort function used by bubble sort | ||
530 | * | ||
531 | * Callback function for usort(). | ||
532 | * | ||
533 | * @access private | ||
534 | * @param array first param passed by usort() | ||
535 | * @param array second param passed by usort() | ||
536 | * @return int 1 if $a is greater, -1 if not | ||
537 | * @see _bub_sort() | ||
538 | */ | ||
539 | function _sort_func($a, $b) | ||
540 | { | ||
541 | // each is actually a key/value pair, so that it can compare using both | ||
542 | list($a_key, $a_value) = $a; | ||
543 | list($b_key, $b_value) = $b; | ||
544 | |||
545 | // if the values are the same, break ties using the key | ||
546 | if ($a_value == $b_value) { | ||
547 | return strcmp($a_key, $b_key); | ||
548 | |||
549 | // if not, just sort normally | ||
550 | } else { | ||
551 | if ($a_value > $b_value) { | ||
552 | return -1; | ||
553 | } else { | ||
554 | return 1; | ||
555 | } | ||
556 | } | ||
557 | |||
558 | // 0 should not be possible because keys must be unique | ||
559 | } | ||
560 | |||
561 | /** | ||
562 | * Calculates a linear rank-order distance statistic between two sets of | ||
563 | * ranked trigrams | ||
564 | * | ||
565 | * Sums the differences in rank for each trigram. If the trigram does not | ||
566 | * appear in both, consider it a difference of $this->_threshold. | ||
567 | * | ||
568 | * This distance measure was proposed by Cavnar & Trenkle (1994). Despite | ||
569 | * its simplicity it has been shown to be highly accurate for language | ||
570 | * identification tasks. | ||
571 | * | ||
572 | * @access private | ||
573 | * @param array $arr1 the reference set of trigram ranks | ||
574 | * @param array $arr2 the target set of trigram ranks | ||
575 | * @return int the sum of the differences between the ranks of | ||
576 | * the two trigram sets | ||
577 | */ | ||
578 | function _distance(&$arr1, &$arr2) | ||
579 | { | ||
580 | $sumdist = 0; | ||
581 | |||
582 | foreach ($arr2 as $key => $value) { | ||
583 | if (isset($arr1[$key])) { | ||
584 | $distance = abs($value - $arr1[$key]); | ||
585 | } else { | ||
586 | // $this->_threshold sets the maximum possible distance value | ||
587 | // for any one pair of trigrams | ||
588 | $distance = $this->_threshold; | ||
589 | } | ||
590 | $sumdist += $distance; | ||
591 | } | ||
592 | |||
593 | return $sumdist; | ||
594 | |||
595 | // todo: there are other distance statistics to try, e.g. relative | ||
596 | // entropy, but they're probably more costly to compute | ||
597 | } | ||
598 | |||
599 | /** | ||
600 | * Normalizes the score returned by _distance() | ||
601 | * | ||
602 | * Different if perl compatible or not | ||
603 | * | ||
604 | * @access private | ||
605 | * @param int $score the score from _distance() | ||
606 | * @param int $base_count the number of trigrams being considered | ||
607 | * @return float the normalized score | ||
608 | * @see _distance() | ||
609 | */ | ||
610 | function _normalize_score($score, $base_count = null) | ||
611 | { | ||
612 | if ($base_count === null) { | ||
613 | $base_count = $this->_threshold; | ||
614 | } | ||
615 | |||
616 | if (!$this->_perl_compatible) { | ||
617 | return 1 - ($score / $base_count / $this->_threshold); | ||
618 | } else { | ||
619 | return floor($score / $base_count); | ||
620 | } | ||
621 | } | ||
622 | |||
623 | |||
624 | /** | ||
625 | * Detects the closeness of a sample of text to the known languages | ||
626 | * | ||
627 | * Calculates the statistical difference between the text and | ||
628 | * the trigrams for each language, normalizes the score then | ||
629 | * returns results for all languages in sorted order | ||
630 | * | ||
631 | * If perl compatible, the score is 300-0, 0 being most similar. | ||
632 | * Otherwise, it's 0-1 with 1 being most similar. | ||
633 | * | ||
634 | * The $sample text should be at least a few sentences in length; | ||
635 | * should be ascii-7 or utf8 encoded, if another and the mbstring extension | ||
636 | * is present it will try to detect and convert. However, experience has | ||
637 | * shown that mb_detect_encoding() *does not work very well* with at least | ||
638 | * some types of encoding. | ||
639 | * | ||
640 | * @access public | ||
641 | * @param string $sample a sample of text to compare. | ||
642 | * @param int $limit if specified, return an array of the most likely | ||
643 | * $limit languages and their scores. | ||
644 | * @return mixed sorted array of language scores, blank array if no | ||
645 | * useable text was found, or PEAR_Error if error | ||
646 | * with the object setup | ||
647 | * @see _distance() | ||
648 | * @throws PEAR_Error | ||
649 | */ | ||
650 | function detect($sample, $limit = 0) | ||
651 | { | ||
652 | if (!$this->_setup_ok($err)) { | ||
653 | return $err; | ||
654 | } | ||
655 | |||
656 | // input check | ||
657 | if (!Text_LanguageDetect_Parser::validateString($sample)) { | ||
658 | return array(); | ||
659 | } | ||
660 | |||
661 | // check char encoding | ||
662 | // (only if mbstring extension is compiled and PHP > 4.0.6) | ||
663 | if (function_exists('mb_detect_encoding') | ||
664 | && function_exists('mb_convert_encoding')) { | ||
665 | |||
666 | // mb_detect_encoding isn't very reliable, to say the least | ||
667 | // detection should still work with a sufficient sample of ascii characters | ||
668 | $encoding = mb_detect_encoding($sample); | ||
669 | |||
670 | // mb_detect_encoding() will return FALSE if detection fails | ||
671 | // don't attempt conversion if that's the case | ||
672 | if ($encoding != 'ASCII' && $encoding != 'UTF-8' && $encoding !== false) { | ||
673 | |||
674 | if (function_exists('mb_list_encodings')) { | ||
675 | |||
676 | // verify the encoding exists in mb_list_encodings | ||
677 | if (in_array($encoding, mb_list_encodings())) { | ||
678 | $sample = mb_convert_encoding($sample, 'UTF-8', $encoding); | ||
679 | } | ||
680 | |||
681 | // if the previous condition failed: | ||
682 | // somehow we detected an encoding that also we don't support | ||
683 | |||
684 | } else { | ||
685 | // php 4 doesnt have mb_list_encodings() | ||
686 | // so attempt with error suppression | ||
687 | $sample = @mb_convert_encoding($sample, 'UTF-8', $encoding); | ||
688 | } | ||
689 | } | ||
690 | } | ||
691 | |||
692 | $sample_obj = new Text_LanguageDetect_Parser($sample, $this->_db_filename, $this->_unicode_db_filename); | ||
693 | $sample_obj->prepareTrigram(); | ||
694 | if ($this->_use_unicode_narrowing) { | ||
695 | $sample_obj->prepareUnicode(); | ||
696 | } | ||
697 | $sample_obj->setPadStart(!$this->_perl_compatible); | ||
698 | $sample_obj->analyze(); | ||
699 | |||
700 | $trigram_freqs =& $sample_obj->getTrigramRanks(); | ||
701 | $trigram_count = count($trigram_freqs); | ||
702 | |||
703 | if ($trigram_count == 0) { | ||
704 | return array(); | ||
705 | } | ||
706 | |||
707 | $scores = array(); | ||
708 | |||
709 | // use unicode block detection to narrow down the possibilities | ||
710 | if ($this->_use_unicode_narrowing) { | ||
711 | $blocks =& $sample_obj->getUnicodeBlocks(); | ||
712 | |||
713 | if (is_array($blocks)) { | ||
714 | $present_blocks = array_keys($blocks); | ||
715 | } else { | ||
716 | throw new Exception('Error during block detection'); | ||
717 | } | ||
718 | |||
719 | $possible_langs = array(); | ||
720 | |||
721 | foreach ($present_blocks as $blockname) { | ||
722 | if (isset($this->_unicode_map[$blockname])) { | ||
723 | |||
724 | $possible_langs = array_merge( | ||
725 | $possible_langs, | ||
726 | array_keys($this->_unicode_map[$blockname]) | ||
727 | ); | ||
728 | |||
729 | // todo: faster way to do this? | ||
730 | } | ||
731 | } | ||
732 | |||
733 | // could also try an intersect operation rather than a union | ||
734 | // in other words, choose languages whose trigrams contain | ||
735 | // ALL of the unicode blocks found in this sample | ||
736 | // would improve speed but would be completely thrown off by an | ||
737 | // unexpected character, like an umlaut appearing in english text | ||
738 | |||
739 | $possible_langs = array_intersect( | ||
740 | array_keys($this->_lang_db), | ||
741 | array_unique($possible_langs) | ||
742 | ); | ||
743 | |||
744 | // needs to intersect it with the keys of _lang_db in case | ||
745 | // languages have been omitted | ||
746 | |||
747 | // or just try 'em all | ||
748 | } else { | ||
749 | $possible_langs = array_keys($this->_lang_db); | ||
750 | } | ||
751 | |||
752 | |||
753 | foreach ($possible_langs as $lang) { | ||
754 | $scores[$lang] = | ||
755 | $this->_normalize_score( | ||
756 | $this->_distance($this->_lang_db[$lang], $trigram_freqs), | ||
757 | $trigram_count); | ||
758 | } | ||
759 | |||
760 | unset($sample_obj); | ||
761 | |||
762 | if ($this->_perl_compatible) { | ||
763 | asort($scores); | ||
764 | } else { | ||
765 | arsort($scores); | ||
766 | } | ||
767 | |||
768 | // todo: drop languages with a score of $this->_max_score? | ||
769 | |||
770 | // limit the number of returned scores | ||
771 | if ($limit && is_numeric($limit)) { | ||
772 | $limited_scores = array(); | ||
773 | |||
774 | $i = 0; | ||
775 | |||
776 | foreach ($scores as $key => $value) { | ||
777 | if ($i++ >= $limit) { | ||
778 | break; | ||
779 | } | ||
780 | |||
781 | $limited_scores[$key] = $value; | ||
782 | } | ||
783 | |||
784 | return $limited_scores; | ||
785 | } else { | ||
786 | return $scores; | ||
787 | } | ||
788 | } | ||
789 | |||
790 | /** | ||
791 | * Returns only the most similar language to the text sample | ||
792 | * | ||
793 | * Calls $this->detect() and returns only the top result | ||
794 | * | ||
795 | * @access public | ||
796 | * @param string $sample text to detect the language of | ||
797 | * @return string the name of the most likely language | ||
798 | * or null if no language is similar | ||
799 | * @see detect() | ||
800 | * @throws PEAR_Error | ||
801 | */ | ||
802 | function detectSimple($sample) | ||
803 | { | ||
804 | $scores = $this->detect($sample, 1); | ||
805 | |||
806 | // if top language has the maximum possible score, | ||
807 | // then the top score will have been picked at random | ||
808 | if ( !is_array($scores) | ||
809 | || empty($scores) | ||
810 | || current($scores) == $this->_max_score) { | ||
811 | |||
812 | return null; | ||
813 | |||
814 | } else { | ||
815 | return ucfirst(key($scores)); | ||
816 | } | ||
817 | } | ||
818 | |||
819 | /** | ||
820 | * Returns an array containing the most similar language and a confidence | ||
821 | * rating | ||
822 | * | ||
823 | * Confidence is a simple measure calculated from the similarity score | ||
824 | * minus the similarity score from the next most similar language | ||
825 | * divided by the highest possible score. Languages that have closely | ||
826 | * related cousins (e.g. Norwegian and Danish) should generally have lower | ||
827 | * confidence scores. | ||
828 | * | ||
829 | * The similarity score answers the question "How likely is the text the | ||
830 | * returned language regardless of the other languages considered?" The | ||
831 | * confidence score is one way of answering the question "how likely is the | ||
832 | * text the detected language relative to the rest of the language model | ||
833 | * set?" | ||
834 | * | ||
835 | * To see how similar languages are a priori, see languageSimilarity() | ||
836 | * | ||
837 | * @access public | ||
838 | * @param string $sample text for which language will be detected | ||
839 | * @return array most similar language, score and confidence rating | ||
840 | * or null if no language is similar | ||
841 | * @see detect() | ||
842 | * @throws PEAR_Error | ||
843 | */ | ||
844 | function detectConfidence($sample) | ||
845 | { | ||
846 | $scores = $this->detect($sample, 2); | ||
847 | |||
848 | // if most similar language has the max score, it | ||
849 | // will have been picked at random | ||
850 | if ( !is_array($scores) | ||
851 | || empty($scores) | ||
852 | || current($scores) == $this->_max_score) { | ||
853 | |||
854 | return null; | ||
855 | } | ||
856 | |||
857 | $arr['language'] = ucfirst(key($scores)); | ||
858 | $arr['similarity'] = current($scores); | ||
859 | if (next($scores) !== false) { // if false then no next element | ||
860 | // the goal is to return a higher value if the distance between | ||
861 | // the similarity of the first score and the second score is high | ||
862 | |||
863 | if ($this->_perl_compatible) { | ||
864 | |||
865 | $arr['confidence'] = | ||
866 | (current($scores) - $arr['similarity']) / $this->_max_score; | ||
867 | |||
868 | } else { | ||
869 | |||
870 | $arr['confidence'] = $arr['similarity'] - current($scores); | ||
871 | |||
872 | } | ||
873 | |||
874 | } else { | ||
875 | $arr['confidence'] = null; | ||
876 | } | ||
877 | |||
878 | return $arr; | ||
879 | } | ||
880 | |||
881 | /** | ||
882 | * Returns the distribution of unicode blocks in a given utf8 string | ||
883 | * | ||
884 | * For the block name of a single char, use unicodeBlockName() | ||
885 | * | ||
886 | * @access public | ||
887 | * @param string $str input string. Must be ascii or utf8 | ||
888 | * @param bool $skip_symbols if true, skip ascii digits, symbols and | ||
889 | * non-printing characters. Includes spaces, | ||
890 | * newlines and common punctutation characters. | ||
891 | * @return array | ||
892 | * @throws PEAR_Error | ||
893 | */ | ||
894 | function detectUnicodeBlocks($str, $skip_symbols) | ||
895 | { | ||
896 | // input check | ||
897 | if (!is_bool($skip_symbols)) { | ||
898 | throw new Exception('Second parameter must be boolean'); | ||
899 | } | ||
900 | |||
901 | if (!is_string($str)) { | ||
902 | throw new Exception('First parameter was not a string'); | ||
903 | } | ||
904 | |||
905 | $sample_obj = new Text_LanguageDetect_Parser($str, $this->_db_filename, $this->_unicode_db_filename); | ||
906 | $sample_obj->prepareUnicode(); | ||
907 | $sample_obj->prepareTrigram(false); | ||
908 | $sample_obj->setUnicodeSkipSymbols($skip_symbols); | ||
909 | $sample_obj->analyze(); | ||
910 | $blocks =& $sample_obj->getUnicodeBlocks(); | ||
911 | unset($sample_obj); | ||
912 | return $blocks; | ||
913 | } | ||
914 | |||
915 | /** | ||
916 | * Returns the block name for a given unicode value | ||
917 | * | ||
918 | * If passed a string, will assume it is being passed a UTF8-formatted | ||
919 | * character and will automatically convert. Otherwise it will assume it | ||
920 | * is being passed a numeric unicode value. | ||
921 | * | ||
922 | * Make sure input is of the correct type! | ||
923 | * | ||
924 | * @access public | ||
925 | * @param mixed $unicode unicode value or utf8 char | ||
926 | * @return mixed the block name string or false if not found | ||
927 | * @throws PEAR_Error | ||
928 | */ | ||
929 | function unicodeBlockName($unicode) { | ||
930 | if (is_string($unicode)) { | ||
931 | // assume it is being passed a utf8 char, so convert it | ||
932 | |||
933 | // input check | ||
934 | if ($this->utf8strlen($unicode) > 1) { | ||
935 | throw new Exception('Pass this function only a single char'); | ||
936 | } | ||
937 | |||
938 | $unicode = $this->_utf8char2unicode($unicode); | ||
939 | |||
940 | if ($unicode == -1) { | ||
941 | throw new Exception('Malformatted char'); | ||
942 | } | ||
943 | |||
944 | // input check | ||
945 | } elseif (!is_int($unicode)) { | ||
946 | throw new Exception('Input must be of type string or int.'); | ||
947 | } | ||
948 | |||
949 | $blocks =& $this->_read_unicode_block_db(); | ||
950 | |||
951 | $result = $this->_unicode_block_name($unicode, $blocks); | ||
952 | |||
953 | if ($result == -1) { | ||
954 | return false; | ||
955 | } else { | ||
956 | return $result[2]; | ||
957 | } | ||
958 | } | ||
959 | |||
960 | /** | ||
961 | * Searches the unicode block database | ||
962 | * | ||
963 | * Returns the block name for a given unicode value. unicodeBlockName() is | ||
964 | * the public interface for this function, which does input checks which | ||
965 | * this function omits for speed. | ||
966 | * | ||
967 | * @access protected | ||
968 | * @param int $unicode the unicode value | ||
969 | * @param array &$blocks the block database | ||
970 | * @param int $block_count the number of defined blocks in the database | ||
971 | * @see unicodeBlockName() | ||
972 | */ | ||
973 | function _unicode_block_name($unicode, &$blocks, $block_count = -1) { | ||
974 | // for a reference, see | ||
975 | // http://www.unicode.org/Public/UNIDATA/Blocks.txt | ||
976 | |||
977 | // assume that ascii characters are the most common | ||
978 | // so try it first for efficiency | ||
979 | if ($unicode <= $blocks[0][1]) { | ||
980 | return $blocks[0]; | ||
981 | } | ||
982 | |||
983 | // the optional $block_count param is for efficiency | ||
984 | // so we this function doesn't have to run count() every time | ||
985 | if ($block_count != -1) { | ||
986 | $high = $block_count - 1; | ||
987 | } else { | ||
988 | $high = count($blocks) - 1; | ||
989 | } | ||
990 | |||
991 | $low = 1; // start with 1 because ascii was 0 | ||
992 | |||
993 | // your average binary search algorithm | ||
994 | while ($low <= $high) { | ||
995 | $mid = floor(($low + $high) / 2); | ||
996 | |||
997 | // if it's lower than the lower bound | ||
998 | if ($unicode < $blocks[$mid][0]) { | ||
999 | $high = $mid - 1; | ||
1000 | |||
1001 | // if it's higher than the upper bound | ||
1002 | } elseif ($unicode > $blocks[$mid][1]) { | ||
1003 | $low = $mid + 1; | ||
1004 | |||
1005 | // found it | ||
1006 | } else { | ||
1007 | return $blocks[$mid]; | ||
1008 | } | ||
1009 | } | ||
1010 | |||
1011 | // failed to find the block | ||
1012 | return -1; | ||
1013 | |||
1014 | // todo: differentiate when it's out of range or when it falls | ||
1015 | // into an unassigned range? | ||
1016 | } | ||
1017 | |||
1018 | /** | ||
1019 | * Brings up the unicode block database | ||
1020 | * | ||
1021 | * @access protected | ||
1022 | * @return array the database of unicode block definitions | ||
1023 | * @throws PEAR_Error | ||
1024 | */ | ||
1025 | function &_read_unicode_block_db() { | ||
1026 | // since the unicode definitions are always going to be the same, | ||
1027 | // might as well share the memory for the db with all other instances | ||
1028 | // of this class | ||
1029 | static $data; | ||
1030 | |||
1031 | if (!isset($data)) { | ||
1032 | $data = $this->_readdb($this->_unicode_db_filename); | ||
1033 | } | ||
1034 | |||
1035 | return $data; | ||
1036 | } | ||
1037 | |||
1038 | /** | ||
1039 | * Calculate the similarities between the language models | ||
1040 | * | ||
1041 | * Use this function to see how similar languages are to each other. | ||
1042 | * | ||
1043 | * If passed 2 language names, will return just those languages compared. | ||
1044 | * If passed 1 language name, will return that language compared to | ||
1045 | * all others. | ||
1046 | * If passed none, will return an array of every language model compared | ||
1047 | * to every other one. | ||
1048 | * | ||
1049 | * @access public | ||
1050 | * @param string $lang1 the name of the first language to be compared | ||
1051 | * @param string $lang2 the name of the second language to be compared | ||
1052 | * @return array scores of every language compared | ||
1053 | * or the score of just the provided languages | ||
1054 | * or null if one of the supplied languages does not exist | ||
1055 | * @throws PEAR_Error | ||
1056 | */ | ||
1057 | function languageSimilarity($lang1 = null, $lang2 = null) | ||
1058 | { | ||
1059 | if (!$this->_setup_ok($err)) { | ||
1060 | return $err; | ||
1061 | } | ||
1062 | |||
1063 | if ($lang1 != null) { | ||
1064 | $lang1 = strtolower($lang1); | ||
1065 | |||
1066 | // check if language model exists | ||
1067 | if (!isset($this->_lang_db[$lang1])) { | ||
1068 | return null; | ||
1069 | } | ||
1070 | |||
1071 | if ($lang2 != null) { | ||
1072 | |||
1073 | // can't only set the second param | ||
1074 | if ($lang1 == null) { | ||
1075 | return null; | ||
1076 | // check if language model exists | ||
1077 | } elseif (!isset($this->_lang_db[$lang2])) { | ||
1078 | return null; | ||
1079 | } | ||
1080 | |||
1081 | $lang2 = strtolower($lang2); | ||
1082 | |||
1083 | // compare just these two languages | ||
1084 | return $this->_normalize_score( | ||
1085 | $this->_distance( | ||
1086 | $this->_lang_db[$lang1], | ||
1087 | $this->_lang_db[$lang2] | ||
1088 | ) | ||
1089 | ); | ||
1090 | |||
1091 | |||
1092 | // compare just $lang1 to all languages | ||
1093 | } else { | ||
1094 | $return_arr = array(); | ||
1095 | foreach ($this->_lang_db as $key => $value) { | ||
1096 | if ($key != $lang1) { // don't compare a language to itself | ||
1097 | $return_arr[$key] = $this->_normalize_score( | ||
1098 | $this->_distance($this->_lang_db[$lang1], $value)); | ||
1099 | } | ||
1100 | } | ||
1101 | asort($return_arr); | ||
1102 | |||
1103 | return $return_arr; | ||
1104 | } | ||
1105 | |||
1106 | |||
1107 | // compare all languages to each other | ||
1108 | } else { | ||
1109 | $return_arr = array(); | ||
1110 | foreach (array_keys($this->_lang_db) as $lang1) { | ||
1111 | foreach (array_keys($this->_lang_db) as $lang2) { | ||
1112 | |||
1113 | // skip comparing languages to themselves | ||
1114 | if ($lang1 != $lang2) { | ||
1115 | |||
1116 | // don't re-calculate what's already been done | ||
1117 | if (isset($return_arr[$lang2][$lang1])) { | ||
1118 | |||
1119 | $return_arr[$lang1][$lang2] = | ||
1120 | $return_arr[$lang2][$lang1]; | ||
1121 | |||
1122 | // calculate | ||
1123 | } else { | ||
1124 | |||
1125 | $return_arr[$lang1][$lang2] = | ||
1126 | $this->_normalize_score( | ||
1127 | $this->_distance( | ||
1128 | $this->_lang_db[$lang1], | ||
1129 | $this->_lang_db[$lang2] | ||
1130 | ) | ||
1131 | ); | ||
1132 | |||
1133 | } | ||
1134 | } | ||
1135 | } | ||
1136 | } | ||
1137 | return $return_arr; | ||
1138 | } | ||
1139 | } | ||
1140 | |||
1141 | /** | ||
1142 | * Cluster known languages according to languageSimilarity() | ||
1143 | * | ||
1144 | * WARNING: this method is EXPERIMENTAL. It is not recommended for common | ||
1145 | * use, and it may disappear or its functionality may change in future | ||
1146 | * releases without notice. | ||
1147 | * | ||
1148 | * Uses a nearest neighbor technique to generate the maximum possible | ||
1149 | * number of dendograms from the similarity data. | ||
1150 | * | ||
1151 | * @access public | ||
1152 | * @return array language cluster data | ||
1153 | * @throws PEAR_Error | ||
1154 | * @see languageSimilarity() | ||
1155 | * @deprecated this function will eventually be removed and placed into | ||
1156 | * the model generation class | ||
1157 | */ | ||
1158 | function clusterLanguages() | ||
1159 | { | ||
1160 | // todo: set the maximum number of clusters | ||
1161 | |||
1162 | // setup check | ||
1163 | if (!$this->_setup_ok($err)) { | ||
1164 | return $err; | ||
1165 | } | ||
1166 | |||
1167 | // return cached result, if any | ||
1168 | if (isset($this->_clusters)) { | ||
1169 | return $this->_clusters; | ||
1170 | } | ||
1171 | |||
1172 | $langs = array_keys($this->_lang_db); | ||
1173 | |||
1174 | $arr = $this->languageSimilarity(); | ||
1175 | |||
1176 | sort($langs); | ||
1177 | |||
1178 | foreach ($langs as $lang) { | ||
1179 | if (!isset($this->_lang_db[$lang])) { | ||
1180 | throw new Exception("missing $lang!\n"); | ||
1181 | } | ||
1182 | } | ||
1183 | |||
1184 | // http://www.psychstat.missouristate.edu/multibook/mlt04m.html | ||
1185 | foreach ($langs as $old_key => $lang1) { | ||
1186 | $langs[$lang1] = $lang1; | ||
1187 | unset($langs[$old_key]); | ||
1188 | } | ||
1189 | |||
1190 | $i = 0; | ||
1191 | while (count($langs) > 2 && $i++ < 200) { | ||
1192 | $highest_score = -1; | ||
1193 | $highest_key1 = ''; | ||
1194 | $highest_key2 = ''; | ||
1195 | foreach ($langs as $lang1) { | ||
1196 | foreach ($langs as $lang2) { | ||
1197 | if ( $lang1 != $lang2 | ||
1198 | && $arr[$lang1][$lang2] > $highest_score) { | ||
1199 | $highest_score = $arr[$lang1][$lang2]; | ||
1200 | $highest_key1 = $lang1; | ||
1201 | $highest_key2 = $lang2; | ||
1202 | } | ||
1203 | } | ||
1204 | } | ||
1205 | |||
1206 | if (!$highest_key1) { | ||
1207 | // should not ever happen | ||
1208 | throw new Exception("no highest key? (step: $i)"); | ||
1209 | } | ||
1210 | |||
1211 | if ($highest_score == 0) { | ||
1212 | // languages are perfectly dissimilar | ||
1213 | break; | ||
1214 | } | ||
1215 | |||
1216 | // $highest_key1 and $highest_key2 are most similar | ||
1217 | $sum1 = array_sum($arr[$highest_key1]); | ||
1218 | $sum2 = array_sum($arr[$highest_key2]); | ||
1219 | |||
1220 | // use the score for the one that is most similar to the rest of | ||
1221 | // the field as the score for the group | ||
1222 | // todo: could try averaging or "centroid" method instead | ||
1223 | // seems like that might make more sense | ||
1224 | // actually nearest neighbor may be better for binary searching | ||
1225 | |||
1226 | |||
1227 | // for "Complete Linkage"/"furthest neighbor" | ||
1228 | // sign should be < | ||
1229 | // for "Single Linkage"/"nearest neighbor" method | ||
1230 | // should should be > | ||
1231 | // results seem to be pretty much the same with either method | ||
1232 | |||
1233 | // figure out which to delete and which to replace | ||
1234 | if ($sum1 > $sum2) { | ||
1235 | $replaceme = $highest_key1; | ||
1236 | $deleteme = $highest_key2; | ||
1237 | } else { | ||
1238 | $replaceme = $highest_key2; | ||
1239 | $deleteme = $highest_key1; | ||
1240 | } | ||
1241 | |||
1242 | $newkey = $replaceme . ':' . $deleteme; | ||
1243 | |||
1244 | // $replaceme is most similar to remaining languages | ||
1245 | // replace $replaceme with '$newkey', deleting $deleteme | ||
1246 | |||
1247 | // keep a record of which fork is really which language | ||
1248 | $really_lang = $replaceme; | ||
1249 | while (isset($really_map[$really_lang])) { | ||
1250 | $really_lang = $really_map[$really_lang]; | ||
1251 | } | ||
1252 | $really_map[$newkey] = $really_lang; | ||
1253 | |||
1254 | |||
1255 | // replace the best fitting key, delete the other | ||
1256 | foreach ($arr as $key1 => $arr2) { | ||
1257 | foreach ($arr2 as $key2 => $value2) { | ||
1258 | if ($key2 == $replaceme) { | ||
1259 | $arr[$key1][$newkey] = $arr[$key1][$key2]; | ||
1260 | unset($arr[$key1][$key2]); | ||
1261 | // replacing $arr[$key1][$key2] with $arr[$key1][$newkey] | ||
1262 | } | ||
1263 | |||
1264 | if ($key1 == $replaceme) { | ||
1265 | $arr[$newkey][$key2] = $arr[$key1][$key2]; | ||
1266 | unset($arr[$key1][$key2]); | ||
1267 | // replacing $arr[$key1][$key2] with $arr[$newkey][$key2] | ||
1268 | } | ||
1269 | |||
1270 | if ($key1 == $deleteme || $key2 == $deleteme) { | ||
1271 | // deleting $arr[$key1][$key2] | ||
1272 | unset($arr[$key1][$key2]); | ||
1273 | } | ||
1274 | } | ||
1275 | } | ||
1276 | |||
1277 | |||
1278 | unset($langs[$highest_key1]); | ||
1279 | unset($langs[$highest_key2]); | ||
1280 | $langs[$newkey] = $newkey; | ||
1281 | |||
1282 | |||
1283 | // some of these may be overkill | ||
1284 | $result_data[$newkey] = array( | ||
1285 | 'newkey' => $newkey, | ||
1286 | 'count' => $i, | ||
1287 | 'diff' => abs($sum1 - $sum2), | ||
1288 | 'score' => $highest_score, | ||
1289 | 'bestfit' => $replaceme, | ||
1290 | 'otherfit' => $deleteme, | ||
1291 | 'really' => $really_lang, | ||
1292 | ); | ||
1293 | } | ||
1294 | |||
1295 | $return_val = array( | ||
1296 | 'open_forks' => $langs, | ||
1297 | // the top level of clusters | ||
1298 | // clusters that are mutually exclusive | ||
1299 | // or specified by a specific maximum | ||
1300 | |||
1301 | 'fork_data' => $result_data, | ||
1302 | // data for each split | ||
1303 | |||
1304 | 'name_map' => $really_map, | ||
1305 | // which cluster is really which language | ||
1306 | // using the nearest neighbor technique, the cluster | ||
1307 | // inherits all of the properties of its most-similar member | ||
1308 | // this keeps track | ||
1309 | ); | ||
1310 | |||
1311 | |||
1312 | // saves the result in the object | ||
1313 | $this->_clusters = $return_val; | ||
1314 | |||
1315 | return $return_val; | ||
1316 | } | ||
1317 | |||
1318 | |||
1319 | /** | ||
1320 | * Perform an intelligent detection based on clusterLanguages() | ||
1321 | * | ||
1322 | * WARNING: this method is EXPERIMENTAL. It is not recommended for common | ||
1323 | * use, and it may disappear or its functionality may change in future | ||
1324 | * releases without notice. | ||
1325 | * | ||
1326 | * This compares the sample text to top the top level of clusters. If the | ||
1327 | * sample is similar to the cluster it will drop down and compare it to the | ||
1328 | * languages in the cluster, and so on until it hits a leaf node. | ||
1329 | * | ||
1330 | * this should find the language in considerably fewer compares | ||
1331 | * (the equivalent of a binary search), however clusterLanguages() is costly | ||
1332 | * and the loss of accuracy from this technique is significant. | ||
1333 | * | ||
1334 | * This method may need to be 'fuzzier' in order to become more accurate. | ||
1335 | * | ||
1336 | * This function could be more useful if the universe of possible languages | ||
1337 | * was very large, however in such cases some method of Bayesian inference | ||
1338 | * might be more helpful. | ||
1339 | * | ||
1340 | * @see clusterLanguages() | ||
1341 | * @access public | ||
1342 | * @param string $str input string | ||
1343 | * @return array language scores (only those compared) | ||
1344 | * @throws PEAR_Error | ||
1345 | */ | ||
1346 | function clusteredSearch($str) | ||
1347 | { | ||
1348 | |||
1349 | // input check | ||
1350 | if (!Text_LanguageDetect_Parser::validateString($str)) { | ||
1351 | return array(); | ||
1352 | } | ||
1353 | |||
1354 | // clusterLanguages() will return a cached result if possible | ||
1355 | // so it's safe to call it every time | ||
1356 | $result = $this->clusterLanguages(); | ||
1357 | |||
1358 | $dendogram_start = $result['open_forks']; | ||
1359 | $dendogram_data = $result['fork_data']; | ||
1360 | $dendogram_alias = $result['name_map']; | ||
1361 | |||
1362 | $sample_obj = new Text_LanguageDetect_Parser($str, $this->_db_filename, $this->_unicode_db_filename); | ||
1363 | $sample_obj->prepareTrigram(); | ||
1364 | $sample_obj->setPadStart(!$this->_perl_compatible); | ||
1365 | $sample_obj->analyze(); | ||
1366 | $sample_result = $sample_obj->getTrigramRanks(); | ||
1367 | $sample_count = count($sample_result); | ||
1368 | |||
1369 | // input check | ||
1370 | if ($sample_count == 0) { | ||
1371 | return array(); | ||
1372 | } | ||
1373 | |||
1374 | $i = 0; // counts the number of steps | ||
1375 | |||
1376 | foreach ($dendogram_start as $lang) { | ||
1377 | if (isset($dendogram_alias[$lang])) { | ||
1378 | $lang_key = $dendogram_alias[$lang]; | ||
1379 | } else { | ||
1380 | $lang_key = $lang; | ||
1381 | } | ||
1382 | |||
1383 | $scores[$lang] = $this->_normalize_score( | ||
1384 | $this->_distance($this->_lang_db[$lang_key], $sample_result), | ||
1385 | $sample_count); | ||
1386 | |||
1387 | $i++; | ||
1388 | } | ||
1389 | |||
1390 | if ($this->_perl_compatible) { | ||
1391 | asort($scores); | ||
1392 | } else { | ||
1393 | arsort($scores); | ||
1394 | } | ||
1395 | |||
1396 | $top_score = current($scores); | ||
1397 | $top_key = key($scores); | ||
1398 | |||
1399 | // of starting forks, $top_key is the most similar to the sample | ||
1400 | |||
1401 | $cur_key = $top_key; | ||
1402 | while (isset($dendogram_data[$cur_key])) { | ||
1403 | $lang1 = $dendogram_data[$cur_key]['bestfit']; | ||
1404 | $lang2 = $dendogram_data[$cur_key]['otherfit']; | ||
1405 | foreach (array($lang1, $lang2) as $lang) { | ||
1406 | if (isset($dendogram_alias[$lang])) { | ||
1407 | $lang_key = $dendogram_alias[$lang]; | ||
1408 | } else { | ||
1409 | $lang_key = $lang; | ||
1410 | } | ||
1411 | |||
1412 | $scores[$lang] = $this->_normalize_score( | ||
1413 | $this->_distance($this->_lang_db[$lang_key], $sample_result), | ||
1414 | $sample_count); | ||
1415 | |||
1416 | //todo: does not need to do same comparison again | ||
1417 | } | ||
1418 | |||
1419 | $i++; | ||
1420 | |||
1421 | if ($scores[$lang1] > $scores[$lang2]) { | ||
1422 | $cur_key = $lang1; | ||
1423 | $loser_key = $lang2; | ||
1424 | } else { | ||
1425 | $cur_key = $lang2; | ||
1426 | $loser_key = $lang1; | ||
1427 | } | ||
1428 | |||
1429 | $diff = $scores[$cur_key] - $scores[$loser_key]; | ||
1430 | |||
1431 | // $cur_key ({$dendogram_alias[$cur_key]}) wins | ||
1432 | // over $loser_key ({$dendogram_alias[$loser_key]}) | ||
1433 | // with a difference of $diff | ||
1434 | } | ||
1435 | |||
1436 | // found result in $i compares | ||
1437 | |||
1438 | // rather than sorting the result, preserve it so that you can see | ||
1439 | // which paths the algorithm decided to take along the tree | ||
1440 | |||
1441 | // but sometimes the last item is only the second highest | ||
1442 | if ( ($this->_perl_compatible && (end($scores) > prev($scores))) | ||
1443 | || (!$this->_perl_compatible && (end($scores) < prev($scores)))) { | ||
1444 | |||
1445 | $real_last_score = current($scores); | ||
1446 | $real_last_key = key($scores); | ||
1447 | |||
1448 | // swaps the 2nd-to-last item for the last item | ||
1449 | unset($scores[$real_last_key]); | ||
1450 | $scores[$real_last_key] = $real_last_score; | ||
1451 | } | ||
1452 | |||
1453 | |||
1454 | if (!$this->_perl_compatible) { | ||
1455 | $scores = array_reverse($scores, true); | ||
1456 | // second param requires php > 4.0.3 | ||
1457 | } | ||
1458 | |||
1459 | return $scores; | ||
1460 | } | ||
1461 | |||
1462 | /** | ||
1463 | * ut8-safe strlen() | ||
1464 | * | ||
1465 | * Returns the numbers of characters (not bytes) in a utf8 string | ||
1466 | * | ||
1467 | * @static | ||
1468 | * @access public | ||
1469 | * @param string $str string to get the length of | ||
1470 | * @return int number of chars | ||
1471 | */ | ||
1472 | function utf8strlen($str) | ||
1473 | { | ||
1474 | // utf8_decode() will convert unknown chars to '?', which is actually | ||
1475 | // ideal for counting. | ||
1476 | |||
1477 | return strlen(utf8_decode($str)); | ||
1478 | |||
1479 | // idea stolen from dokuwiki | ||
1480 | } | ||
1481 | |||
1482 | /** | ||
1483 | * Returns the unicode value of a utf8 char | ||
1484 | * | ||
1485 | * @access protected | ||
1486 | * @param string $char a utf8 (possibly multi-byte) char | ||
1487 | * @return int unicode value or -1 if malformatted | ||
1488 | */ | ||
1489 | function _utf8char2unicode($char) { | ||
1490 | |||
1491 | // strlen() here will actually get the binary length of a single char | ||
1492 | switch (strlen($char)) { | ||
1493 | |||
1494 | // for a reference, see http://en.wikipedia.org/wiki/UTF-8 | ||
1495 | |||
1496 | case 1: | ||
1497 | // normal ASCII-7 byte | ||
1498 | // 0xxxxxxx --> 0xxxxxxx | ||
1499 | return ord($char{0}); | ||
1500 | |||
1501 | case 2: | ||
1502 | // 2 byte unicode | ||
1503 | // 110zzzzx 10xxxxxx --> 00000zzz zxxxxxxx | ||
1504 | $z = (ord($char{0}) & 0x000001F) << 6; | ||
1505 | $x = (ord($char{1}) & 0x0000003F); | ||
1506 | |||
1507 | return ($z | $x); | ||
1508 | |||
1509 | case 3: | ||
1510 | // 3 byte unicode | ||
1511 | // 1110zzzz 10zxxxxx 10xxxxxx --> zzzzzxxx xxxxxxxx | ||
1512 | $z = (ord($char{0}) & 0x0000000F) << 12; | ||
1513 | $x1 = (ord($char{1}) & 0x0000003F) << 6; | ||
1514 | $x2 = (ord($char{2}) & 0x0000003F); | ||
1515 | |||
1516 | return ($z | $x1 | $x2); | ||
1517 | |||
1518 | case 4: | ||
1519 | // 4 byte unicode | ||
1520 | // 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx --> | ||
1521 | // 000zzzzz xxxxxxxx xxxxxxxx | ||
1522 | $z1 = (ord($char{0}) & 0x00000007) << 18; | ||
1523 | $z2 = (ord($char{1}) & 0x0000003F) << 12; | ||
1524 | $x1 = (ord($char{2}) & 0x0000003F) << 6; | ||
1525 | $x2 = (ord($char{3}) & 0x0000003F); | ||
1526 | |||
1527 | return ($z1 | $z2 | $x1 | $x2); | ||
1528 | |||
1529 | default: | ||
1530 | // error: malformatted char? | ||
1531 | return -1; | ||
1532 | } | ||
1533 | } | ||
1534 | |||
1535 | /** | ||
1536 | * utf8-safe fast character iterator | ||
1537 | * | ||
1538 | * Will get the next character starting from $counter, which will then be | ||
1539 | * incremented. If a multi-byte char the bytes will be concatenated and | ||
1540 | * $counter will be incremeted by the number of bytes in the char. | ||
1541 | * | ||
1542 | * @access private | ||
1543 | * @param string &$str the string being iterated over | ||
1544 | * @param int &$counter the iterator, will increment by reference | ||
1545 | * @param bool $special_convert whether to do special conversions | ||
1546 | * @return char the next (possibly multi-byte) char from $counter | ||
1547 | */ | ||
1548 | function _next_char(&$str, &$counter, $special_convert = false) | ||
1549 | { | ||
1550 | |||
1551 | $char = $str{$counter++}; | ||
1552 | $ord = ord($char); | ||
1553 | |||
1554 | // for a description of the utf8 system see | ||
1555 | // http://www.phpclasses.org/browse/file/5131.html | ||
1556 | |||
1557 | // normal ascii one byte char | ||
1558 | if ($ord <= 127) { | ||
1559 | |||
1560 | // special conversions needed for this package | ||
1561 | // (that only apply to regular ascii characters) | ||
1562 | // lower case, and convert all non-alphanumeric characters | ||
1563 | // other than "'" to space | ||
1564 | if ($special_convert && $char != ' ' && $char != "'") { | ||
1565 | if ($ord >= 65 && $ord <= 90) { // A-Z | ||
1566 | $char = chr($ord + 32); // lower case | ||
1567 | } elseif ($ord < 97 || $ord > 122) { // NOT a-z | ||
1568 | $char = ' '; // convert to space | ||
1569 | } | ||
1570 | } | ||
1571 | |||
1572 | return $char; | ||
1573 | |||
1574 | // multi-byte chars | ||
1575 | } elseif ($ord >> 5 == 6) { // two-byte char | ||
1576 | $nextchar = $str{$counter++}; // get next byte | ||
1577 | |||
1578 | // lower-casing of non-ascii characters is still incomplete | ||
1579 | |||
1580 | if ($special_convert) { | ||
1581 | // lower case latin accented characters | ||
1582 | if ($ord == 195) { | ||
1583 | $nextord = ord($nextchar); | ||
1584 | $nextord_adj = $nextord + 64; | ||
1585 | // for a reference, see | ||
1586 | // http://www.ramsch.org/martin/uni/fmi-hp/iso8859-1.html | ||
1587 | |||
1588 | // À - Þ but not × | ||
1589 | if ( $nextord_adj >= 192 | ||
1590 | && $nextord_adj <= 222 | ||
1591 | && $nextord_adj != 215) { | ||
1592 | |||
1593 | $nextchar = chr($nextord + 32); | ||
1594 | } | ||
1595 | |||
1596 | // lower case cyrillic alphabet | ||
1597 | } elseif ($ord == 208) { | ||
1598 | $nextord = ord($nextchar); | ||
1599 | // if A - Pe | ||
1600 | if ($nextord >= 144 && $nextord <= 159) { | ||
1601 | // lower case | ||
1602 | $nextchar = chr($nextord + 32); | ||
1603 | |||
1604 | // if Er - Ya | ||
1605 | } elseif ($nextord >= 160 && $nextord <= 175) { | ||
1606 | // lower case | ||
1607 | $char = chr(209); // == $ord++ | ||
1608 | $nextchar = chr($nextord - 32); | ||
1609 | } | ||
1610 | } | ||
1611 | } | ||
1612 | |||
1613 | // tag on next byte | ||
1614 | return $char . $nextchar; | ||
1615 | |||
1616 | } elseif ($ord >> 4 == 14) { // three-byte char | ||
1617 | |||
1618 | // tag on next 2 bytes | ||
1619 | return $char . $str{$counter++} . $str{$counter++}; | ||
1620 | |||
1621 | } elseif ($ord >> 3 == 30) { // four-byte char | ||
1622 | |||
1623 | // tag on next 3 bytes | ||
1624 | return $char . $str{$counter++} . $str{$counter++} . $str{$counter++}; | ||
1625 | |||
1626 | } else { | ||
1627 | // error? | ||
1628 | } | ||
1629 | } | ||
1630 | |||
1631 | } | ||
1632 | |||
1633 | /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ | ||
1634 | |||
1635 | ?> | ||
diff --git a/inc/3rdparty/libraries/language-detect/Parser.php b/inc/3rdparty/libraries/language-detect/Parser.php new file mode 100644 index 00000000..7f15fa98 --- /dev/null +++ b/inc/3rdparty/libraries/language-detect/Parser.php | |||
@@ -0,0 +1,354 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * This class represents a text sample to be parsed. | ||
5 | * | ||
6 | * @category Text | ||
7 | * @package Text_LanguageDetect | ||
8 | * @author Nicholas Pisarro | ||
9 | * @copyright 2006 | ||
10 | * @license BSD | ||
11 | * @version CVS: $Id: Parser.php,v 1.5 2006/03/11 05:45:05 taak Exp $ | ||
12 | * @link http://pear.php.net/package/Text_LanguageDetect/ | ||
13 | * @link http://langdetect.blogspot.com/ | ||
14 | */ | ||
15 | |||
16 | /** | ||
17 | * This class represents a text sample to be parsed. | ||
18 | * | ||
19 | * This separates the analysis of a text sample from the primary LanguageDetect | ||
20 | * class. After a new profile has been built, the data can be retrieved using | ||
21 | * the accessor functions. | ||
22 | * | ||
23 | * This class is intended to be used by the Text_LanguageDetect class, not | ||
24 | * end-users. | ||
25 | * | ||
26 | * @category Text | ||
27 | * @package Text_LanguageDetect | ||
28 | * @author Nicholas Pisarro | ||
29 | * @copyright 2006 | ||
30 | * @license BSD | ||
31 | * @version release: 0.2.3 | ||
32 | */ | ||
33 | class Text_LanguageDetect_Parser extends Text_LanguageDetect | ||
34 | { | ||
35 | /** | ||
36 | * the piece of text being parsed | ||
37 | * | ||
38 | * @access private | ||
39 | * @var string | ||
40 | */ | ||
41 | var $_string; | ||
42 | |||
43 | /** | ||
44 | * stores the trigram frequencies of the sample | ||
45 | * | ||
46 | * @access private | ||
47 | * @var string | ||
48 | */ | ||
49 | var $_trigrams = array(); | ||
50 | |||
51 | /** | ||
52 | * stores the trigram ranks of the sample | ||
53 | * | ||
54 | * @access private | ||
55 | * @var array | ||
56 | */ | ||
57 | var $_trigram_ranks = array(); | ||
58 | |||
59 | /** | ||
60 | * stores the unicode blocks of the sample | ||
61 | * | ||
62 | * @access private | ||
63 | * @var array | ||
64 | */ | ||
65 | var $_unicode_blocks = array(); | ||
66 | |||
67 | /** | ||
68 | * Whether the parser should compile the unicode ranges | ||
69 | * | ||
70 | * @access private | ||
71 | * @var bool | ||
72 | */ | ||
73 | var $_compile_unicode = false; | ||
74 | |||
75 | /** | ||
76 | * Whether the parser should compile trigrams | ||
77 | * | ||
78 | * @access private | ||
79 | * @var bool | ||
80 | */ | ||
81 | var $_compile_trigram = false; | ||
82 | |||
83 | /** | ||
84 | * Whether the trigram parser should pad the beginning of the string | ||
85 | * | ||
86 | * @access private | ||
87 | * @var bool | ||
88 | */ | ||
89 | var $_trigram_pad_start = false; | ||
90 | |||
91 | /** | ||
92 | * Whether the unicode parser should skip non-alphabetical ascii chars | ||
93 | * | ||
94 | * @access private | ||
95 | * @var bool | ||
96 | */ | ||
97 | var $_unicode_skip_symbols = true; | ||
98 | |||
99 | /** | ||
100 | * Constructor | ||
101 | * | ||
102 | * @access private | ||
103 | * @param string $string string to be parsed | ||
104 | */ | ||
105 | function Text_LanguageDetect_Parser($string, $db=null, $unicode_db=null) { | ||
106 | if (isset($db)) $this->_db_filename = $db; | ||
107 | if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db; | ||
108 | $this->_string = $string; | ||
109 | } | ||
110 | |||
111 | /** | ||
112 | * Returns true if a string is suitable for parsing | ||
113 | * | ||
114 | * @static | ||
115 | * @access public | ||
116 | * @param string $str input string to test | ||
117 | * @return bool true if acceptable, false if not | ||
118 | */ | ||
119 | function validateString($str) { | ||
120 | if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) { | ||
121 | return true; | ||
122 | } else { | ||
123 | return false; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * turn on/off trigram counting | ||
129 | * | ||
130 | * @access public | ||
131 | * @param bool $bool true for on, false for off | ||
132 | */ | ||
133 | function prepareTrigram($bool = true) | ||
134 | { | ||
135 | $this->_compile_trigram = $bool; | ||
136 | } | ||
137 | |||
138 | /** | ||
139 | * turn on/off unicode block counting | ||
140 | * | ||
141 | * @access public | ||
142 | * @param bool $bool true for on, false for off | ||
143 | */ | ||
144 | function prepareUnicode($bool = true) | ||
145 | { | ||
146 | $this->_compile_unicode = $bool; | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * turn on/off padding the beginning of the sample string | ||
151 | * | ||
152 | * @access public | ||
153 | * @param bool $bool true for on, false for off | ||
154 | */ | ||
155 | function setPadStart($bool = true) | ||
156 | { | ||
157 | $this->_trigram_pad_start = $bool; | ||
158 | } | ||
159 | |||
160 | /** | ||
161 | * Should the unicode block counter skip non-alphabetical ascii chars? | ||
162 | * | ||
163 | * @access public | ||
164 | * @param bool $bool true for on, false for off | ||
165 | */ | ||
166 | function setUnicodeSkipSymbols($bool = true) | ||
167 | { | ||
168 | $this->_unicode_skip_symbols = $bool; | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * Returns the trigram ranks for the text sample | ||
173 | * | ||
174 | * @access public | ||
175 | * @return array trigram ranks in the text sample | ||
176 | */ | ||
177 | function &getTrigramRanks() | ||
178 | { | ||
179 | return $this->_trigram_ranks; | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * Return the trigram freqency table | ||
184 | * | ||
185 | * only used in testing to make sure the parser is working | ||
186 | * | ||
187 | * @access public | ||
188 | * @return array trigram freqencies in the text sample | ||
189 | */ | ||
190 | function &getTrigramFreqs() | ||
191 | { | ||
192 | return $this->_trigram; | ||
193 | } | ||
194 | |||
195 | /** | ||
196 | * returns the array of unicode blocks | ||
197 | * | ||
198 | * @access public | ||
199 | * @return array unicode blocks in the text sample | ||
200 | */ | ||
201 | function &getUnicodeBlocks() | ||
202 | { | ||
203 | return $this->_unicode_blocks; | ||
204 | } | ||
205 | |||
206 | /** | ||
207 | * Executes the parsing operation | ||
208 | * | ||
209 | * Be sure to call the set*() functions to set options and the | ||
210 | * prepare*() functions first to tell it what kind of data to compute | ||
211 | * | ||
212 | * Afterwards the get*() functions can be used to access the compiled | ||
213 | * information. | ||
214 | * | ||
215 | * @access public | ||
216 | */ | ||
217 | function analyze() | ||
218 | { | ||
219 | $len = strlen($this->_string); | ||
220 | $byte_counter = 0; | ||
221 | |||
222 | |||
223 | // unicode startup | ||
224 | if ($this->_compile_unicode) { | ||
225 | $blocks =& $this->_read_unicode_block_db(); | ||
226 | |||
227 | $block_count = count($blocks); | ||
228 | |||
229 | $skipped_count = 0; | ||
230 | $unicode_chars = array(); | ||
231 | } | ||
232 | |||
233 | // trigram startup | ||
234 | if ($this->_compile_trigram) { | ||
235 | // initialize them as blank so the parser will skip the first two | ||
236 | // (since it skips trigrams with more than 2 contiguous spaces) | ||
237 | $a = ' '; | ||
238 | $b = ' '; | ||
239 | |||
240 | // kludge | ||
241 | // if it finds a valid trigram to start and the start pad option is | ||
242 | // off, then set a variable that will be used to reduce this | ||
243 | // trigram after parsing has finished | ||
244 | if (!$this->_trigram_pad_start) { | ||
245 | $a = $this->_next_char($this->_string, $byte_counter, true); | ||
246 | |||
247 | if ($a != ' ') { | ||
248 | $b = $this->_next_char($this->_string, $byte_counter, true); | ||
249 | $dropone = " $a$b"; | ||
250 | } | ||
251 | |||
252 | $byte_counter = 0; | ||
253 | $a = ' '; | ||
254 | $b = ' '; | ||
255 | } | ||
256 | } | ||
257 | |||
258 | while ($byte_counter < $len) { | ||
259 | $char = $this->_next_char($this->_string, $byte_counter, true); | ||
260 | |||
261 | |||
262 | // language trigram detection | ||
263 | if ($this->_compile_trigram) { | ||
264 | if (!($b == ' ' && ($a == ' ' || $char == ' '))) { | ||
265 | if (!isset($this->_trigram[$a . $b . $char])) { | ||
266 | $this->_trigram[$a . $b . $char] = 1; | ||
267 | } else { | ||
268 | $this->_trigram[$a . $b . $char]++; | ||
269 | } | ||
270 | } | ||
271 | |||
272 | $a = $b; | ||
273 | $b = $char; | ||
274 | } | ||
275 | |||
276 | // unicode block detection | ||
277 | if ($this->_compile_unicode) { | ||
278 | if ($this->_unicode_skip_symbols | ||
279 | && strlen($char) == 1 | ||
280 | && ($char < 'A' || $char > 'z' | ||
281 | || ($char > 'Z' && $char < 'a')) | ||
282 | && $char != "'") { // does not skip the apostrophe | ||
283 | // since it's included in the language | ||
284 | // models | ||
285 | |||
286 | $skipped_count++; | ||
287 | continue; | ||
288 | } | ||
289 | |||
290 | // build an array of all the characters | ||
291 | if (isset($unicode_chars[$char])) { | ||
292 | $unicode_chars[$char]++; | ||
293 | } else { | ||
294 | $unicode_chars[$char] = 1; | ||
295 | } | ||
296 | } | ||
297 | |||
298 | // todo: add byte detection here | ||
299 | } | ||
300 | |||
301 | // unicode cleanup | ||
302 | if ($this->_compile_unicode) { | ||
303 | foreach ($unicode_chars as $utf8_char => $count) { | ||
304 | $search_result = $this->_unicode_block_name( | ||
305 | $this->_utf8char2unicode($utf8_char), $blocks, $block_count); | ||
306 | |||
307 | if ($search_result != -1) { | ||
308 | $block_name = $search_result[2]; | ||
309 | } else { | ||
310 | $block_name = '[Malformatted]'; | ||
311 | } | ||
312 | |||
313 | if (isset($this->_unicode_blocks[$block_name])) { | ||
314 | $this->_unicode_blocks[$block_name] += $count; | ||
315 | } else { | ||
316 | $this->_unicode_blocks[$block_name] = $count; | ||
317 | } | ||
318 | } | ||
319 | } | ||
320 | |||
321 | |||
322 | // trigram cleanup | ||
323 | if ($this->_compile_trigram) { | ||
324 | // pad the end | ||
325 | if ($b != ' ') { | ||
326 | if (!isset($this->_trigram["$a$b "])) { | ||
327 | $this->_trigram["$a$b "] = 1; | ||
328 | } else { | ||
329 | $this->_trigram["$a$b "]++; | ||
330 | } | ||
331 | } | ||
332 | |||
333 | // perl compatibility; Language::Guess does not pad the beginning | ||
334 | // kludge | ||
335 | if (isset($dropone)) { | ||
336 | if ($this->_trigram[$dropone] == 1) { | ||
337 | unset($this->_trigram[$dropone]); | ||
338 | } else { | ||
339 | $this->_trigram[$dropone]--; | ||
340 | } | ||
341 | } | ||
342 | |||
343 | if (!empty($this->_trigram)) { | ||
344 | $this->_trigram_ranks = $this->_arr_rank($this->_trigram); | ||
345 | } else { | ||
346 | $this->_trigram_ranks = array(); | ||
347 | } | ||
348 | } | ||
349 | } | ||
350 | } | ||
351 | |||
352 | /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ | ||
353 | |||
354 | ?> | ||
diff --git a/inc/3rdparty/libraries/language-detect/lang.dat b/inc/3rdparty/libraries/language-detect/lang.dat new file mode 100644 index 00000000..c2a44f56 --- /dev/null +++ b/inc/3rdparty/libraries/language-detect/lang.dat | |||
@@ -0,0 +1 @@ | |||
a:2:{s:7:"trigram";a:52:{s:8:"albanian";a:300:{s:4:"të ";s:1:"0";s:4:" të";s:1:"1";s:4:"në ";s:1:"2";s:4:"për";s:1:"3";s:4:" pë";s:1:"4";s:3:" e ";s:1:"5";s:3:"sht";s:1:"6";s:4:" në";s:1:"7";s:3:" sh";s:1:"8";s:3:"se ";s:1:"9";s:3:"et ";s:2:"10";s:4:"ë s";s:2:"11";s:4:"ë t";s:2:"12";s:3:" se";s:2:"13";s:3:"he ";s:2:"14";s:4:"jë ";s:2:"15";s:4:"ër ";s:2:"16";s:3:"dhe";s:2:"17";s:3:" pa";s:2:"18";s:4:"ë n";s:2:"19";s:4:"ë p";s:2:"20";s:4:" që";s:2:"21";s:3:" dh";s:2:"22";s:4:"një";s:2:"23";s:4:"ë m";s:2:"24";s:3:" nj";s:2:"25";s:4:"ësh";s:2:"26";s:3:"in ";s:2:"27";s:3:" me";s:2:"28";s:4:"që ";s:2:"29";s:3:" po";s:2:"30";s:3:"e n";s:2:"31";s:3:"e t";s:2:"32";s:3:"ish";s:2:"33";s:4:"më ";s:2:"34";s:4:"së ";s:2:"35";s:3:"me ";s:2:"36";s:4:"htë";s:2:"37";s:3:" ka";s:2:"38";s:3:" si";s:2:"39";s:3:"e k";s:2:"40";s:3:"e p";s:2:"41";s:3:" i ";s:2:"42";s:4:"anë";s:2:"43";s:3:"ar ";s:2:"44";s:3:" nu";s:2:"45";s:3:"und";s:2:"46";s:3:"ve ";s:2:"47";s:4:" ës";s:2:"48";s:3:"e s";s:2:"49";s:4:" më";s:2:"50";s:3:"nuk";s:2:"51";s:3:"par";s:2:"52";s:3:"uar";s:2:"53";s:3:"uk ";s:2:"54";s:3:"jo ";s:2:"55";s:4:"rë ";s:2:"56";s:3:"ta ";s:2:"57";s:4:"ë f";s:2:"58";s:3:"en ";s:2:"59";s:3:"it ";s:2:"60";s:3:"min";s:2:"61";s:3:"het";s:2:"62";s:3:"n e";s:2:"63";s:3:"ri ";s:2:"64";s:3:"shq";s:2:"65";s:4:"ë d";s:2:"66";s:3:" do";s:2:"67";s:3:" nd";s:2:"68";s:3:"sh ";s:2:"69";s:4:"ën ";s:2:"70";s:4:"atë";s:2:"71";s:3:"hqi";s:2:"72";s:3:"ist";s:2:"73";s:4:"ë q";s:2:"74";s:3:" gj";s:2:"75";s:3:" ng";s:2:"76";s:3:" th";s:2:"77";s:3:"a n";s:2:"78";s:3:"do ";s:2:"79";s:3:"end";s:2:"80";s:3:"imi";s:2:"81";s:3:"ndi";s:2:"82";s:3:"r t";s:2:"83";s:3:"rat";s:2:"84";s:4:"ë b";s:2:"85";s:4:"ëri";s:2:"86";s:3:" mu";s:2:"87";s:3:"art";s:2:"88";s:3:"ash";s:2:"89";s:3:"qip";s:2:"90";s:3:" ko";s:2:"91";s:3:"e m";s:2:"92";s:3:"edh";s:2:"93";s:3:"eri";s:2:"94";s:3:"je ";s:2:"95";s:3:"ka ";s:2:"96";s:3:"nga";s:2:"97";s:3:"si ";s:2:"98";s:3:"te ";s:2:"99";s:4:"ë k";s:3:"100";s:4:"ësi";s:3:"101";s:3:" ma";s:3:"102";s:3:" ti";s:3:"103";s:3:"eve";s:3:"104";s:3:"hje";s:3:"105";s:3:"ira";s:3:"106";s:3:"mun";s:3:"107";s:3:"on ";s:3:"108";s:3:"po ";s:3:"109";s:3:"re ";s:3:"110";s:3:" pr";s:3:"111";s:3:"im ";s:3:"112";s:3:"lit";s:3:"113";s:3:"o t";s:3:"114";s:3:"ur ";s:3:"115";s:4:"ë e";s:3:"116";s:4:"ë v";s:3:"117";s:4:"ët ";s:3:"118";s:3:" ku";s:3:"119";s:4:" së";s:3:"120";s:3:"e d";s:3:"121";s:3:"es ";s:3:"122";s:3:"ga ";s:3:"123";s:3:"iti";s:3:"124";s:3:"jet";s:3:"125";s:4:"ndë";s:3:"126";s:3:"oli";s:3:"127";s:3:"shi";s:3:"128";s:3:"tje";s:3:"129";s:4:" bë";s:3:"130";s:3:" z ";s:3:"131";s:3:"gje";s:3:"132";s:3:"kan";s:3:"133";s:3:"shk";s:3:"134";s:4:"ënd";s:3:"135";s:4:"ës ";s:3:"136";s:3:" de";s:3:"137";s:3:" kj";s:3:"138";s:3:" ru";s:3:"139";s:3:" vi";s:3:"140";s:3:"ara";s:3:"141";s:3:"gov";s:3:"142";s:3:"kjo";s:3:"143";s:3:"or ";s:3:"144";s:3:"r p";s:3:"145";s:3:"rto";s:3:"146";s:3:"rug";s:3:"147";s:3:"tet";s:3:"148";s:3:"ugo";s:3:"149";s:3:"ali";s:3:"150";s:3:"arr";s:3:"151";s:3:"at ";s:3:"152";s:3:"d t";s:3:"153";s:3:"ht ";s:3:"154";s:3:"i p";s:3:"155";s:4:"ipë";s:3:"156";s:3:"izi";s:3:"157";s:4:"jnë";s:3:"158";s:3:"n n";s:3:"159";s:3:"ohe";s:3:"160";s:3:"shu";s:3:"161";s:4:"shë";s:3:"162";s:3:"t e";s:3:"163";s:3:"tik";s:3:"164";s:3:"a e";s:3:"165";s:4:"arë";s:3:"166";s:4:"etë";s:3:"167";s:3:"hum";s:3:"168";s:3:"nd ";s:3:"169";s:3:"ndr";s:3:"170";s:3:"osh";s:3:"171";s:3:"ova";s:3:"172";s:3:"rim";s:3:"173";s:3:"tos";s:3:"174";s:3:"va ";s:3:"175";s:3:" fa";s:3:"176";s:3:" fi";s:3:"177";s:3:"a s";s:3:"178";s:3:"hen";s:3:"179";s:3:"i n";s:3:"180";s:3:"mar";s:3:"181";s:3:"ndo";s:3:"182";s:3:"por";s:3:"183";s:3:"ris";s:3:"184";s:3:"sa ";s:3:"185";s:3:"sis";s:3:"186";s:4:"tës";s:3:"187";s:4:"umë";s:3:"188";s:3:"viz";s:3:"189";s:3:"zit";s:3:"190";s:3:" di";s:3:"191";s:3:" mb";s:3:"192";s:3:"aj ";s:3:"193";s:3:"ana";s:3:"194";s:3:"ata";s:3:"195";s:4:"dër";s:3:"196";s:3:"e a";s:3:"197";s:3:"esh";s:3:"198";s:3:"ime";s:3:"199";s:3:"jes";s:3:"200";s:3:"lar";s:3:"201";s:3:"n s";s:3:"202";s:3:"nte";s:3:"203";s:3:"pol";s:3:"204";s:3:"r n";s:3:"205";s:3:"ran";s:3:"206";s:3:"res";s:3:"207";s:4:"rrë";s:3:"208";s:3:"tar";s:3:"209";s:4:"ë a";s:3:"210";s:4:"ë i";s:3:"211";s:3:" at";s:3:"212";s:3:" jo";s:3:"213";s:4:" kë";s:3:"214";s:3:" re";s:3:"215";s:3:"a k";s:3:"216";s:3:"ai ";s:3:"217";s:3:"akt";s:3:"218";s:4:"hë ";s:3:"219";s:4:"hën";s:3:"220";s:3:"i i";s:3:"221";s:3:"i m";s:3:"222";s:3:"ia ";s:3:"223";s:3:"men";s:3:"224";s:3:"nis";s:3:"225";s:3:"shm";s:3:"226";s:3:"str";s:3:"227";s:3:"t k";s:3:"228";s:3:"t n";s:3:"229";s:3:"t s";s:3:"230";s:4:"ë g";s:3:"231";s:4:"ërk";s:3:"232";s:4:"ëve";s:3:"233";s:3:" ai";s:3:"234";s:3:" ci";s:3:"235";s:3:" ed";s:3:"236";s:3:" ja";s:3:"237";s:3:" kr";s:3:"238";s:3:" qe";s:3:"239";s:3:" ta";s:3:"240";s:3:" ve";s:3:"241";s:3:"a p";s:3:"242";s:3:"cil";s:3:"243";s:3:"el ";s:3:"244";s:4:"erë";s:3:"245";s:3:"gji";s:3:"246";s:3:"hte";s:3:"247";s:3:"i t";s:3:"248";s:3:"jen";s:3:"249";s:3:"jit";s:3:"250";s:3:"k d";s:3:"251";s:4:"mën";s:3:"252";s:3:"n t";s:3:"253";s:3:"nyr";s:3:"254";s:3:"ori";s:3:"255";s:3:"pas";s:3:"256";s:3:"ra ";s:3:"257";s:3:"rie";s:3:"258";s:4:"rës";s:3:"259";s:3:"tor";s:3:"260";s:3:"uaj";s:3:"261";s:3:"yre";s:3:"262";s:4:"ëm ";s:3:"263";s:4:"ëny";s:3:"264";s:3:" ar";s:3:"265";s:3:" du";s:3:"266";s:3:" ga";s:3:"267";s:3:" je";s:3:"268";s:4:"dës";s:3:"269";s:3:"e e";s:3:"270";s:3:"e z";s:3:"271";s:3:"ha ";s:3:"272";s:3:"hme";s:3:"273";s:3:"ika";s:3:"274";s:3:"ini";s:3:"275";s:3:"ite";s:3:"276";s:3:"ith";s:3:"277";s:3:"koh";s:3:"278";s:3:"kra";s:3:"279";s:3:"ku ";s:3:"280";s:3:"lim";s:3:"281";s:3:"lis";s:3:"282";s:4:"qën";s:3:"283";s:4:"rën";s:3:"284";s:3:"s s";s:3:"285";s:3:"t d";s:3:"286";s:3:"t t";s:3:"287";s:3:"tir";s:3:"288";s:4:"tën";s:3:"289";s:3:"ver";s:3:"290";s:4:"ë j";s:3:"291";s:3:" ba";s:3:"292";s:3:" in";s:3:"293";s:3:" tr";s:3:"294";s:3:" zg";s:3:"295";s:3:"a a";s:3:"296";s:3:"a m";s:3:"297";s:3:"a t";s:3:"298";s:3:"abr";s:3:"299";}s:6:"arabic";a:300:{s:5:" ال";s:1:"0";s:6:"الع";s:1:"1";s:6:"لعر";s:1:"2";s:6:"عرا";s:1:"3";s:6:"راق";s:1:"4";s:5:" ÙÙŠ";s:1:"5";s:5:"ÙÙŠ ";s:1:"6";s:5:"ين ";s:1:"7";s:5:"ية ";s:1:"8";s:5:"Ù† ا";s:1:"9";s:6:"الم";s:2:"10";s:5:"ات ";s:2:"11";s:5:"من ";s:2:"12";s:5:"ÙŠ ا";s:2:"13";s:5:" من";s:2:"14";s:6:"الأ";s:2:"15";s:5:"Ø© ا";s:2:"16";s:5:"اق ";s:2:"17";s:5:" وا";s:2:"18";s:5:"اء ";s:2:"19";s:6:"الإ";s:2:"20";s:5:" أن";s:2:"21";s:6:"وال";s:2:"22";s:5:"ما ";s:2:"23";s:5:" عل";s:2:"24";s:5:"لى ";s:2:"25";s:5:"ت ا";s:2:"26";s:5:"ون ";s:2:"27";s:5:"هم ";s:2:"28";s:6:"اقي";s:2:"29";s:5:"ام ";s:2:"30";s:5:"Ù„ ا";s:2:"31";s:5:"أن ";s:2:"32";s:5:"Ù… ا";s:2:"33";s:6:"الت";s:2:"34";s:5:"لا ";s:2:"35";s:6:"الا";s:2:"36";s:5:"ان ";s:2:"37";s:5:"ها ";s:2:"38";s:5:"ال ";s:2:"39";s:5:"Ø© Ùˆ";s:2:"40";s:5:"ا ا";s:2:"41";s:6:"رها";s:2:"42";s:6:"لام";s:2:"43";s:6:"يين";s:2:"44";s:5:" ول";s:2:"45";s:6:"لأم";s:2:"46";s:5:"نا ";s:2:"47";s:6:"على";s:2:"48";s:5:"Ù† ÙŠ";s:2:"49";s:6:"الب";s:2:"50";s:5:"اد ";s:2:"51";s:6:"الق";s:2:"52";s:5:"د ا";s:2:"53";s:5:"ذا ";s:2:"54";s:5:"Ù‡ ا";s:2:"55";s:5:" با";s:2:"56";s:6:"الد";s:2:"57";s:5:"ب ا";s:2:"58";s:6:"مري";s:2:"59";s:5:"لم ";s:2:"60";s:5:" إن";s:2:"61";s:5:" لل";s:2:"62";s:6:"سلا";s:2:"63";s:6:"أمر";s:2:"64";s:6:"ريك";s:2:"65";s:5:"مة ";s:2:"66";s:5:"Ù‰ ا";s:2:"67";s:5:"ا ÙŠ";s:2:"68";s:5:" عن";s:2:"69";s:5:" هذ";s:2:"70";s:5:"Ø¡ ا";s:2:"71";s:5:"ر ا";s:2:"72";s:6:"كان";s:2:"73";s:6:"قتل";s:2:"74";s:6:"إسل";s:2:"75";s:6:"الØ";s:2:"76";s:5:"وا ";s:2:"77";s:5:" إل";s:2:"78";s:5:"ا Ø£";s:2:"79";s:6:"بال";s:2:"80";s:5:"Ù† Ù…";s:2:"81";s:6:"الس";s:2:"82";s:5:"رة ";s:2:"83";s:6:"لإس";s:2:"84";s:5:"Ù† Ùˆ";s:2:"85";s:6:"هاب";s:2:"86";s:5:"ÙŠ Ùˆ";s:2:"87";s:5:"ير ";s:2:"88";s:5:" كا";s:2:"89";s:5:"لة ";s:2:"90";s:6:"يات";s:2:"91";s:5:" لا";s:2:"92";s:6:"انت";s:2:"93";s:5:"Ù† Ø£";s:2:"94";s:6:"يكي";s:2:"95";s:6:"الر";s:2:"96";s:6:"الو";s:2:"97";s:5:"Ø© Ù";s:2:"98";s:5:"دة ";s:2:"99";s:6:"الج";s:3:"100";s:5:"قي ";s:3:"101";s:5:"وي ";s:3:"102";s:6:"الذ";s:3:"103";s:6:"الش";s:3:"104";s:6:"امي";s:3:"105";s:6:"اني";s:3:"106";s:5:"ذه ";s:3:"107";s:5:"عن ";s:3:"108";s:6:"لما";s:3:"109";s:6:"هذه";s:3:"110";s:5:"ول ";s:3:"111";s:5:"ا٠";s:3:"112";s:6:"اوي";s:3:"113";s:6:"بري";s:3:"114";s:5:"Ø© Ù„";s:3:"115";s:5:" أم";s:3:"116";s:5:" لم";s:3:"117";s:5:" ما";s:3:"118";s:5:"يد ";s:3:"119";s:5:" أي";s:3:"120";s:6:"إره";s:3:"121";s:5:"ع ا";s:3:"122";s:6:"عمل";s:3:"123";s:6:"ولا";s:3:"124";s:6:"إلى";s:3:"125";s:6:"ابي";s:3:"126";s:5:"Ù† Ù";s:3:"127";s:6:"ختط";s:3:"128";s:5:"لك ";s:3:"129";s:5:"نه ";s:3:"130";s:5:"ني ";s:3:"131";s:5:"إن ";s:3:"132";s:6:"دين";s:3:"133";s:5:"٠ا";s:3:"134";s:6:"لذي";s:3:"135";s:5:"ÙŠ Ø£";s:3:"136";s:5:"ÙŠ ب";s:3:"137";s:5:" وأ";s:3:"138";s:5:"ا ع";s:3:"139";s:6:"الخ";s:3:"140";s:5:"تل ";s:3:"141";s:5:"تي ";s:3:"142";s:5:"قد ";s:3:"143";s:6:"لدي";s:3:"144";s:5:" كل";s:3:"145";s:5:" مع";s:3:"146";s:5:"اب ";s:3:"147";s:6:"اخت";s:3:"148";s:5:"ار ";s:3:"149";s:6:"الن";s:3:"150";s:6:"علا";s:3:"151";s:5:"Ù… Ùˆ";s:3:"152";s:5:"مع ";s:3:"153";s:5:"س ا";s:3:"154";s:5:"كل ";s:3:"155";s:6:"لاء";s:3:"156";s:5:"Ù† ب";s:3:"157";s:5:"Ù† ت";s:3:"158";s:5:"ÙŠ Ù…";s:3:"159";s:6:"عرب";s:3:"160";s:5:"Ù… ب";s:3:"161";s:5:" وق";s:3:"162";s:5:" يق";s:3:"163";s:5:"ا Ù„";s:3:"164";s:5:"ا Ù…";s:3:"165";s:6:"الÙ";s:3:"166";s:6:"تطا";s:3:"167";s:6:"داد";s:3:"168";s:6:"لمس";s:3:"169";s:5:"له ";s:3:"170";s:6:"هذا";s:3:"171";s:5:" Ù…Ø";s:3:"172";s:6:"ؤلا";s:3:"173";s:5:"بي ";s:3:"174";s:5:"Ø© Ù…";s:3:"175";s:5:"Ù† Ù„";s:3:"176";s:6:"هؤل";s:3:"177";s:5:"كن ";s:3:"178";s:6:"لإر";s:3:"179";s:6:"لتي";s:3:"180";s:5:" أو";s:3:"181";s:5:" ان";s:3:"182";s:5:" عم";s:3:"183";s:5:"ا Ù";s:3:"184";s:5:"Ø© Ø£";s:3:"185";s:6:"طاÙ";s:3:"186";s:5:"عب ";s:3:"187";s:5:"Ù„ Ù…";s:3:"188";s:5:"Ù† ع";s:3:"189";s:5:"ور ";s:3:"190";s:5:"يا ";s:3:"191";s:5:" يس";s:3:"192";s:5:"ا ت";s:3:"193";s:5:"Ø© ب";s:3:"194";s:6:"راء";s:3:"195";s:6:"عال";s:3:"196";s:6:"قوا";s:3:"197";s:6:"قية";s:3:"198";s:6:"لعا";s:3:"199";s:5:"Ù… ÙŠ";s:3:"200";s:5:"مي ";s:3:"201";s:6:"مية";s:3:"202";s:6:"نية";s:3:"203";s:5:"أي ";s:3:"204";s:6:"ابا";s:3:"205";s:6:"بغد";s:3:"206";s:5:"بل ";s:3:"207";s:5:"رب ";s:3:"208";s:6:"عما";s:3:"209";s:6:"غدا";s:3:"210";s:6:"مال";s:3:"211";s:6:"ملي";s:3:"212";s:5:"يس ";s:3:"213";s:5:" بأ";s:3:"214";s:5:" بع";s:3:"215";s:5:" بغ";s:3:"216";s:5:" وم";s:3:"217";s:6:"بات";s:3:"218";s:6:"بية";s:3:"219";s:6:"ذلك";s:3:"220";s:5:"عة ";s:3:"221";s:6:"قاو";s:3:"222";s:6:"قيي";s:3:"223";s:5:"كي ";s:3:"224";s:5:"Ù… Ù…";s:3:"225";s:5:"ÙŠ ع";s:3:"226";s:5:" عر";s:3:"227";s:5:" قا";s:3:"228";s:5:"ا Ùˆ";s:3:"229";s:5:"رى ";s:3:"230";s:5:"Ù‚ ا";s:3:"231";s:6:"وات";s:3:"232";s:5:"وم ";s:3:"233";s:5:" هؤ";s:3:"234";s:5:"ا ب";s:3:"235";s:6:"دام";s:3:"236";s:5:"دي ";s:3:"237";s:6:"رات";s:3:"238";s:6:"شعب";s:3:"239";s:6:"لان";s:3:"240";s:6:"لشع";s:3:"241";s:6:"لقو";s:3:"242";s:6:"ليا";s:3:"243";s:5:"Ù† Ù‡";s:3:"244";s:5:"ÙŠ ت";s:3:"245";s:5:"ÙŠ ÙŠ";s:3:"246";s:5:" وه";s:3:"247";s:5:" ÙŠØ";s:3:"248";s:6:"جرا";s:3:"249";s:6:"جما";s:3:"250";s:6:"Øمد";s:3:"251";s:5:"دم ";s:3:"252";s:5:"كم ";s:3:"253";s:6:"لاو";s:3:"254";s:6:"لره";s:3:"255";s:6:"ماع";s:3:"256";s:5:"Ù† Ù‚";s:3:"257";s:5:"نة ";s:3:"258";s:5:"هي ";s:3:"259";s:5:" بل";s:3:"260";s:5:" به";s:3:"261";s:5:" له";s:3:"262";s:5:" وي";s:3:"263";s:5:"ا Ùƒ";s:3:"264";s:6:"اذا";s:3:"265";s:5:"اع ";s:3:"266";s:5:"ت Ù…";s:3:"267";s:6:"تخا";s:3:"268";s:6:"خاب";s:3:"269";s:5:"ر Ù…";s:3:"270";s:6:"لمت";s:3:"271";s:6:"مسل";s:3:"272";s:5:"Ù‰ Ø£";s:3:"273";s:6:"يست";s:3:"274";s:6:"يطا";s:3:"275";s:5:" لأ";s:3:"276";s:5:" لي";s:3:"277";s:6:"أمن";s:3:"278";s:6:"است";s:3:"279";s:6:"بعض";s:3:"280";s:5:"Ø© ت";s:3:"281";s:5:"ري ";s:3:"282";s:6:"صدا";s:3:"283";s:5:"Ù‚ Ùˆ";s:3:"284";s:6:"قول";s:3:"285";s:5:"مد ";s:3:"286";s:6:"نتخ";s:3:"287";s:6:"Ù†Ùس";s:3:"288";s:6:"نها";s:3:"289";s:6:"هنا";s:3:"290";s:6:"أعم";s:3:"291";s:6:"أنه";s:3:"292";s:6:"ائن";s:3:"293";s:6:"الآ";s:3:"294";s:6:"الك";s:3:"295";s:5:"ØØ© ";s:3:"296";s:5:"د Ù…";s:3:"297";s:5:"ر ع";s:3:"298";s:6:"ربي";s:3:"299";}s:5:"azeri";a:300:{s:4:"lÉ™r";s:1:"0";s:3:"in ";s:1:"1";s:4:"ın ";s:1:"2";s:3:"lar";s:1:"3";s:3:"da ";s:1:"4";s:3:"an ";s:1:"5";s:3:"ir ";s:1:"6";s:4:"dÉ™ ";s:1:"7";s:3:"ki ";s:1:"8";s:3:" bi";s:1:"9";s:4:"É™n ";s:2:"10";s:4:"É™ri";s:2:"11";s:4:"arı";s:2:"12";s:4:"É™r ";s:2:"13";s:3:"dir";s:2:"14";s:3:"nda";s:2:"15";s:3:" ki";s:2:"16";s:3:"rin";s:2:"17";s:4:"nın";s:2:"18";s:4:"É™si";s:2:"19";s:3:"ini";s:2:"20";s:3:" ed";s:2:"21";s:3:" qa";s:2:"22";s:4:" tÉ™";s:2:"23";s:3:" ba";s:2:"24";s:3:" ol";s:2:"25";s:4:"ası";s:2:"26";s:4:"ilÉ™";s:2:"27";s:4:"rın";s:2:"28";s:3:" ya";s:2:"29";s:4:"anı";s:2:"30";s:4:" vÉ™";s:2:"31";s:4:"ndÉ™";s:2:"32";s:3:"ni ";s:2:"33";s:3:"ara";s:2:"34";s:5:"ını";s:2:"35";s:4:"ınd";s:2:"36";s:3:" bu";s:2:"37";s:3:"si ";s:2:"38";s:3:"ib ";s:2:"39";s:3:"aq ";s:2:"40";s:4:"dÉ™n";s:2:"41";s:3:"iya";s:2:"42";s:4:"nÉ™ ";s:2:"43";s:4:"rÉ™ ";s:2:"44";s:3:"n b";s:2:"45";s:4:"sın";s:2:"46";s:4:"vÉ™ ";s:2:"47";s:3:"iri";s:2:"48";s:4:"lÉ™ ";s:2:"49";s:3:"nin";s:2:"50";s:4:"É™li";s:2:"51";s:3:" de";s:2:"52";s:4:" mü";s:2:"53";s:3:"bir";s:2:"54";s:3:"n s";s:2:"55";s:3:"ri ";s:2:"56";s:4:"É™k ";s:2:"57";s:3:" az";s:2:"58";s:4:" sÉ™";s:2:"59";s:3:"ar ";s:2:"60";s:3:"bil";s:2:"61";s:4:"zÉ™r";s:2:"62";s:3:"bu ";s:2:"63";s:3:"dan";s:2:"64";s:3:"edi";s:2:"65";s:3:"ind";s:2:"66";s:3:"man";s:2:"67";s:3:"un ";s:2:"68";s:5:"É™rÉ™";s:2:"69";s:3:" ha";s:2:"70";s:3:"lan";s:2:"71";s:4:"yyÉ™";s:2:"72";s:3:"iyy";s:2:"73";s:3:" il";s:2:"74";s:3:" ne";s:2:"75";s:3:"r k";s:2:"76";s:4:"É™ b";s:2:"77";s:3:" is";s:2:"78";s:3:"na ";s:2:"79";s:3:"nun";s:2:"80";s:4:"ır ";s:2:"81";s:3:" da";s:2:"82";s:4:" hÉ™";s:2:"83";s:3:"a b";s:2:"84";s:4:"inÉ™";s:2:"85";s:3:"sin";s:2:"86";s:3:"yan";s:2:"87";s:4:"É™rb";s:2:"88";s:4:" dÉ™";s:2:"89";s:4:" mÉ™";s:2:"90";s:4:" qÉ™";s:2:"91";s:4:"dır";s:2:"92";s:3:"li ";s:2:"93";s:3:"ola";s:2:"94";s:3:"rba";s:2:"95";s:4:"azÉ™";s:2:"96";s:3:"can";s:2:"97";s:4:"lı ";s:2:"98";s:3:"nla";s:2:"99";s:3:" et";s:3:"100";s:4:" gö";s:3:"101";s:4:"alı";s:3:"102";s:3:"ayc";s:3:"103";s:3:"bay";s:3:"104";s:3:"eft";s:3:"105";s:3:"ist";s:3:"106";s:3:"n i";s:3:"107";s:3:"nef";s:3:"108";s:4:"tlÉ™";s:3:"109";s:3:"yca";s:3:"110";s:4:"yÉ™t";s:3:"111";s:5:"É™cÉ™";s:3:"112";s:3:" la";s:3:"113";s:3:"ild";s:3:"114";s:4:"nı ";s:3:"115";s:3:"tin";s:3:"116";s:3:"ldi";s:3:"117";s:3:"lik";s:3:"118";s:3:"n h";s:3:"119";s:3:"n m";s:3:"120";s:3:"oyu";s:3:"121";s:3:"raq";s:3:"122";s:3:"ya ";s:3:"123";s:4:"É™ti";s:3:"124";s:3:" ar";s:3:"125";s:3:"ada";s:3:"126";s:4:"edÉ™";s:3:"127";s:3:"mas";s:3:"128";s:4:"sı ";s:3:"129";s:4:"ına";s:3:"130";s:4:"É™ d";s:3:"131";s:5:"É™lÉ™";s:3:"132";s:4:"ayı";s:3:"133";s:3:"iyi";s:3:"134";s:3:"lma";s:3:"135";s:4:"mÉ™k";s:3:"136";s:3:"n d";s:3:"137";s:3:"ti ";s:3:"138";s:3:"yin";s:3:"139";s:3:"yun";s:3:"140";s:4:"É™t ";s:3:"141";s:4:"azı";s:3:"142";s:3:"ft ";s:3:"143";s:3:"i t";s:3:"144";s:3:"lli";s:3:"145";s:3:"n a";s:3:"146";s:3:"ra ";s:3:"147";s:4:" cÉ™";s:3:"148";s:4:" gÉ™";s:3:"149";s:3:" ko";s:3:"150";s:4:" nÉ™";s:3:"151";s:3:" oy";s:3:"152";s:3:"a d";s:3:"153";s:3:"ana";s:3:"154";s:4:"cÉ™k";s:3:"155";s:3:"eyi";s:3:"156";s:3:"ilm";s:3:"157";s:3:"irl";s:3:"158";s:3:"lay";s:3:"159";s:3:"liy";s:3:"160";s:3:"lub";s:3:"161";s:4:"n É™";s:3:"162";s:3:"ril";s:3:"163";s:4:"rlÉ™";s:3:"164";s:3:"unu";s:3:"165";s:3:"ver";s:3:"166";s:4:"ün ";s:3:"167";s:4:"É™ o";s:3:"168";s:4:"É™ni";s:3:"169";s:3:" he";s:3:"170";s:3:" ma";s:3:"171";s:3:" on";s:3:"172";s:3:" pa";s:3:"173";s:3:"ala";s:3:"174";s:3:"dey";s:3:"175";s:3:"i m";s:3:"176";s:3:"ima";s:3:"177";s:4:"lmÉ™";s:3:"178";s:4:"mÉ™t";s:3:"179";s:3:"par";s:3:"180";s:4:"yÉ™ ";s:3:"181";s:4:"É™tl";s:3:"182";s:3:" al";s:3:"183";s:3:" mi";s:3:"184";s:3:" sa";s:3:"185";s:4:" É™l";s:3:"186";s:4:"adı";s:3:"187";s:4:"akı";s:3:"188";s:3:"and";s:3:"189";s:3:"ard";s:3:"190";s:3:"art";s:3:"191";s:3:"ayi";s:3:"192";s:3:"i a";s:3:"193";s:3:"i q";s:3:"194";s:3:"i y";s:3:"195";s:3:"ili";s:3:"196";s:3:"ill";s:3:"197";s:4:"isÉ™";s:3:"198";s:3:"n o";s:3:"199";s:3:"n q";s:3:"200";s:3:"olu";s:3:"201";s:3:"rla";s:3:"202";s:4:"stÉ™";s:3:"203";s:4:"sÉ™ ";s:3:"204";s:3:"tan";s:3:"205";s:3:"tel";s:3:"206";s:3:"yar";s:3:"207";s:5:"É™dÉ™";s:3:"208";s:3:" me";s:3:"209";s:4:" rÉ™";s:3:"210";s:3:" ve";s:3:"211";s:3:" ye";s:3:"212";s:3:"a k";s:3:"213";s:3:"at ";s:3:"214";s:4:"baÅŸ";s:3:"215";s:3:"diy";s:3:"216";s:3:"ent";s:3:"217";s:3:"eti";s:3:"218";s:4:"hÉ™s";s:3:"219";s:3:"i i";s:3:"220";s:3:"ik ";s:3:"221";s:3:"la ";s:3:"222";s:4:"miÅŸ";s:3:"223";s:3:"n n";s:3:"224";s:3:"nu ";s:3:"225";s:3:"qar";s:3:"226";s:3:"ran";s:3:"227";s:4:"tÉ™r";s:3:"228";s:3:"xan";s:3:"229";s:4:"É™ a";s:3:"230";s:4:"É™ g";s:3:"231";s:4:"É™ t";s:3:"232";s:4:" dü";s:3:"233";s:3:"ama";s:3:"234";s:3:"b k";s:3:"235";s:3:"dil";s:3:"236";s:3:"era";s:3:"237";s:3:"etm";s:3:"238";s:3:"i b";s:3:"239";s:3:"kil";s:3:"240";s:3:"mil";s:3:"241";s:3:"n r";s:3:"242";s:3:"qla";s:3:"243";s:3:"r s";s:3:"244";s:3:"ras";s:3:"245";s:3:"siy";s:3:"246";s:3:"son";s:3:"247";s:3:"tim";s:3:"248";s:3:"yer";s:3:"249";s:4:"É™ k";s:3:"250";s:4:" gü";s:3:"251";s:3:" so";s:3:"252";s:4:" sö";s:3:"253";s:3:" te";s:3:"254";s:3:" xa";s:3:"255";s:3:"ai ";s:3:"256";s:3:"bar";s:3:"257";s:3:"cti";s:3:"258";s:3:"di ";s:3:"259";s:3:"eri";s:3:"260";s:4:"gör";s:3:"261";s:4:"gün";s:3:"262";s:4:"gÉ™l";s:3:"263";s:4:"hbÉ™";s:3:"264";s:4:"ihÉ™";s:3:"265";s:3:"iki";s:3:"266";s:3:"isi";s:3:"267";s:3:"lin";s:3:"268";s:3:"mai";s:3:"269";s:3:"maq";s:3:"270";s:3:"n k";s:3:"271";s:3:"n t";s:3:"272";s:3:"n v";s:3:"273";s:3:"onu";s:3:"274";s:3:"qan";s:3:"275";s:4:"qÉ™z";s:3:"276";s:4:"tÉ™ ";s:3:"277";s:3:"xal";s:3:"278";s:3:"yib";s:3:"279";s:3:"yih";s:3:"280";s:3:"zet";s:3:"281";s:4:"zır";s:3:"282";s:4:"ıb ";s:3:"283";s:4:"É™ m";s:3:"284";s:4:"É™ze";s:3:"285";s:3:" br";s:3:"286";s:3:" in";s:3:"287";s:4:" i̇";s:3:"288";s:3:" pr";s:3:"289";s:3:" ta";s:3:"290";s:3:" to";s:3:"291";s:5:" üç";s:3:"292";s:3:"a o";s:3:"293";s:3:"ali";s:3:"294";s:3:"ani";s:3:"295";s:3:"anl";s:3:"296";s:3:"aql";s:3:"297";s:3:"azi";s:3:"298";s:3:"bri";s:3:"299";}s:7:"bengali";a:300:{s:7:"ার ";s:1:"0";s:7:"য় ";s:1:"1";s:9:"েয়";s:1:"2";s:9:"য়া";s:1:"3";s:7:" কর";s:1:"4";s:7:"েত ";s:1:"5";s:7:" কা";s:1:"6";s:7:" পা";s:1:"7";s:7:" তা";s:1:"8";s:7:"না ";s:1:"9";s:9:"ায়";s:2:"10";s:7:"ের ";s:2:"11";s:9:"য়ে";s:2:"12";s:7:" বা";s:2:"13";s:7:"েব ";s:2:"14";s:7:" যা";s:2:"15";s:7:" হে";s:2:"16";s:7:" সা";s:2:"17";s:7:"ান ";s:2:"18";s:7:"েছ ";s:2:"19";s:7:" িন";s:2:"20";s:7:"েল ";s:2:"21";s:7:" িদ";s:2:"22";s:7:" না";s:2:"23";s:7:" িব";s:2:"24";s:7:"েক ";s:2:"25";s:7:"লা ";s:2:"26";s:7:"তা ";s:2:"27";s:7:" বઘ";s:2:"28";s:7:" িক";s:2:"29";s:9:"করে";s:2:"30";s:7:" পચ";s:2:"31";s:9:"াের";s:2:"32";s:9:"িনে";s:2:"33";s:7:"রা ";s:2:"34";s:7:" োব";s:2:"35";s:7:"কা ";s:2:"36";s:7:" কে";s:2:"37";s:7:" টা";s:2:"38";s:7:"র ক";s:2:"39";s:9:"েলা";s:2:"40";s:7:" োক";s:2:"41";s:7:" মা";s:2:"42";s:7:" োদ";s:2:"43";s:7:" োম";s:2:"44";s:7:"দর ";s:2:"45";s:7:"়া ";s:2:"46";s:9:"িদে";s:2:"47";s:9:"াকা";s:2:"48";s:9:"়েছ";s:2:"49";s:9:"েদর";s:2:"50";s:7:" আে";s:2:"51";s:5:" ও ";s:2:"52";s:7:"াল ";s:2:"53";s:7:"িট ";s:2:"54";s:7:" মà§";s:2:"55";s:9:"কের";s:2:"56";s:9:"হয়";s:2:"57";s:9:"করা";s:2:"58";s:7:"পর ";s:2:"59";s:9:"পাে";s:2:"60";s:7:" à¦à¦•";s:2:"61";s:7:" পদ";s:2:"62";s:9:"টাক";s:2:"63";s:7:"ড় ";s:2:"64";s:9:"কান";s:2:"65";s:7:"টা ";s:2:"66";s:9:"দગা";s:2:"67";s:9:"পদગ";s:2:"68";s:9:"াড়";s:2:"69";s:9:"োকা";s:2:"70";s:9:"ওয়";s:2:"71";s:9:"কাপ";s:2:"72";s:9:"হেয";s:2:"73";s:9:"েনর";s:2:"74";s:7:" হয";s:2:"75";s:9:"দেয";s:2:"76";s:7:"নর ";s:2:"77";s:9:"ানা";s:2:"78";s:9:"ােল";s:2:"79";s:7:" আর";s:2:"80";s:5:" ় ";s:2:"81";s:9:"বઘব";s:2:"82";s:9:"িয়";s:2:"83";s:7:" দা";s:2:"84";s:7:" সম";s:2:"85";s:9:"কার";s:2:"86";s:9:"হার";s:2:"87";s:7:"াই ";s:2:"88";s:9:"ড়া";s:2:"89";s:9:"িবি";s:2:"90";s:7:" রা";s:2:"91";s:7:" লা";s:2:"92";s:9:"নার";s:2:"93";s:9:"বহা";s:2:"94";s:7:"বা ";s:2:"95";s:9:"যায";s:2:"96";s:7:"েন ";s:2:"97";s:9:"ઘবহ";s:2:"98";s:7:" à¦à¦¾";s:2:"99";s:7:" সে";s:3:"100";s:7:" োয";s:3:"101";s:7:"রর ";s:3:"102";s:9:"়ার";s:3:"103";s:9:"়াল";s:3:"104";s:7:"ગা ";s:3:"105";s:9:"থেক";s:3:"106";s:9:"à¦à¦¾à§‡";s:3:"107";s:7:"়ে ";s:3:"108";s:9:"েরর";s:3:"109";s:7:" ধর";s:3:"110";s:7:" হা";s:3:"111";s:7:"নઘ ";s:3:"112";s:9:"রেন";s:3:"113";s:9:"ােব";s:3:"114";s:9:"িড়";s:3:"115";s:7:"ির ";s:3:"116";s:7:" োথ";s:3:"117";s:9:"তার";s:3:"118";s:9:"বিà¦";s:3:"119";s:9:"রেত";s:3:"120";s:9:"সাে";s:3:"121";s:9:"াকে";s:3:"122";s:9:"ােত";s:3:"123";s:9:"িà¦à¨";s:3:"124";s:7:"ে ব";s:3:"125";s:9:"োথে";s:3:"126";s:7:" োপ";s:3:"127";s:7:" োস";s:3:"128";s:9:"বার";s:3:"129";s:7:"à¦à¨ ";s:3:"130";s:7:"রন ";s:3:"131";s:7:"াম ";s:3:"132";s:7:" à¦à¦–";s:3:"133";s:7:"আর ";s:3:"134";s:9:"কাে";s:3:"135";s:7:"দন ";s:3:"136";s:9:"সাজ";s:3:"137";s:9:"ােক";s:3:"138";s:9:"ােন";s:3:"139";s:9:"েনা";s:3:"140";s:7:" ঘে";s:3:"141";s:7:" তে";s:3:"142";s:7:" রে";s:3:"143";s:9:"তেব";s:3:"144";s:7:"বন ";s:3:"145";s:9:"বઘা";s:3:"146";s:9:"েড়";s:3:"147";s:9:"েবন";s:3:"148";s:7:" খà§";s:3:"149";s:7:" চা";s:3:"150";s:7:" সà§";s:3:"151";s:7:"কে ";s:3:"152";s:9:"ধরে";s:3:"153";s:7:"র ো";s:3:"154";s:7:"় ি";s:3:"155";s:7:"া ি";s:3:"156";s:9:"ােথ";s:3:"157";s:9:"াਠা";s:3:"158";s:7:"িদ ";s:3:"159";s:7:"িন ";s:3:"160";s:7:" অন";s:3:"161";s:7:" আপ";s:3:"162";s:7:" আম";s:3:"163";s:7:" থা";s:3:"164";s:7:" বચ";s:3:"165";s:7:" োফ";s:3:"166";s:7:" ৌত";s:3:"167";s:9:"ঘের";s:3:"168";s:7:"তে ";s:3:"169";s:9:"ময়";s:3:"170";s:9:"যাਠ";s:3:"171";s:7:"র স";s:3:"172";s:9:"রাখ";s:3:"173";s:7:"া ব";s:3:"174";s:7:"া ো";s:3:"175";s:9:"ালা";s:3:"176";s:7:"িক ";s:3:"177";s:7:"িশ ";s:3:"178";s:7:"েখ ";s:3:"179";s:7:" à¦à¦°";s:3:"180";s:7:" চઓ";s:3:"181";s:7:" িড";s:3:"182";s:7:"খন ";s:3:"183";s:9:"ড়ে";s:3:"184";s:7:"র ব";s:3:"185";s:7:"়র ";s:3:"186";s:9:"াইে";s:3:"187";s:9:"ােদ";s:3:"188";s:9:"িদন";s:3:"189";s:9:"েরন";s:3:"190";s:7:" তੴ";s:3:"191";s:9:"ছাড";s:3:"192";s:9:"জনઘ";s:3:"193";s:9:"তাই";s:3:"194";s:7:"মা ";s:3:"195";s:9:"মাে";s:3:"196";s:9:"লার";s:3:"197";s:7:"াজ ";s:3:"198";s:9:"াতা";s:3:"199";s:9:"ামা";s:3:"200";s:9:"ਊেল";s:3:"201";s:9:"ગার";s:3:"202";s:7:" সব";s:3:"203";s:9:"আপন";s:3:"204";s:9:"à¦à¦•à¦Ÿ";s:3:"205";s:9:"কাি";s:3:"206";s:9:"জাই";s:3:"207";s:7:"টর ";s:3:"208";s:9:"ডজা";s:3:"209";s:9:"দেখ";s:3:"210";s:9:"পনা";s:3:"211";s:7:"রও ";s:3:"212";s:7:"লে ";s:3:"213";s:9:"হেব";s:3:"214";s:9:"াজা";s:3:"215";s:9:"ািট";s:3:"216";s:9:"িডজ";s:3:"217";s:7:"েথ ";s:3:"218";s:7:" à¦à¦¬";s:3:"219";s:7:" জন";s:3:"220";s:7:" জা";s:3:"221";s:9:"আমা";s:3:"222";s:9:"গেল";s:3:"223";s:9:"জান";s:3:"224";s:9:"নেত";s:3:"225";s:9:"বিশ";s:3:"226";s:9:"মà§à§‡";s:3:"227";s:9:"মেয";s:3:"228";s:7:"র প";s:3:"229";s:7:"সে ";s:3:"230";s:9:"হেল";s:3:"231";s:7:"় ো";s:3:"232";s:7:"া হ";s:3:"233";s:9:"াওয";s:3:"234";s:9:"োমক";s:3:"235";s:9:"ઘাি";s:3:"236";s:7:" অে";s:3:"237";s:5:" ট ";s:3:"238";s:7:" োগ";s:3:"239";s:7:" োন";s:3:"240";s:7:"জর ";s:3:"241";s:9:"তির";s:3:"242";s:9:"দাম";s:3:"243";s:9:"পড়";s:3:"244";s:9:"পার";s:3:"245";s:9:"বাঘ";s:3:"246";s:9:"মকা";s:3:"247";s:9:"মাম";s:3:"248";s:9:"য়র";s:3:"249";s:9:"যাে";s:3:"250";s:7:"র ম";s:3:"251";s:7:"রে ";s:3:"252";s:7:"লর ";s:3:"253";s:7:"া ক";s:3:"254";s:7:"াগ ";s:3:"255";s:9:"াবা";s:3:"256";s:9:"ারা";s:3:"257";s:9:"ািন";s:3:"258";s:7:"ে গ";s:3:"259";s:7:"েগ ";s:3:"260";s:9:"েলর";s:3:"261";s:9:"োদখ";s:3:"262";s:9:"োবি";s:3:"263";s:7:"ઓল ";s:3:"264";s:7:" দে";s:3:"265";s:7:" পà§";s:3:"266";s:7:" বে";s:3:"267";s:9:"অেন";s:3:"268";s:9:"à¦à¦–ন";s:3:"269";s:9:"কছà§";s:3:"270";s:9:"কাল";s:3:"271";s:9:"গেয";s:3:"272";s:7:"ছন ";s:3:"273";s:7:"ত প";s:3:"274";s:9:"নেয";s:3:"275";s:9:"পাি";s:3:"276";s:7:"মন ";s:3:"277";s:7:"র আ";s:3:"278";s:9:"রার";s:3:"279";s:7:"াও ";s:3:"280";s:7:"াপ ";s:3:"281";s:9:"িকছ";s:3:"282";s:9:"িগে";s:3:"283";s:9:"েছন";s:3:"284";s:9:"েজর";s:3:"285";s:9:"োমা";s:3:"286";s:9:"োমে";s:3:"287";s:9:"ৌতি";s:3:"288";s:9:"ઘাে";s:3:"289";s:3:" ' ";s:3:"290";s:7:" à¦à¦›";s:3:"291";s:7:" ছা";s:3:"292";s:7:" বল";s:3:"293";s:7:" যি";s:3:"294";s:7:" শি";s:3:"295";s:7:" িম";s:3:"296";s:7:" োল";s:3:"297";s:9:"à¦à¦›à¦¾";s:3:"298";s:7:"খা ";s:3:"299";}s:9:"bulgarian";a:300:{s:5:"на ";s:1:"0";s:5:" на";s:1:"1";s:5:"то ";s:1:"2";s:5:" пр";s:1:"3";s:5:" за";s:1:"4";s:5:"та ";s:1:"5";s:5:" по";s:1:"6";s:6:"ите";s:1:"7";s:5:"те ";s:1:"8";s:5:"а п";s:1:"9";s:5:"а Ñ";s:2:"10";s:5:" от";s:2:"11";s:5:"за ";s:2:"12";s:6:"ата";s:2:"13";s:5:"Ð¸Ñ ";s:2:"14";s:4:" в ";s:2:"15";s:5:"е н";s:2:"16";s:5:" да";s:2:"17";s:5:"а н";s:2:"18";s:5:" Ñе";s:2:"19";s:5:" ко";s:2:"20";s:5:"да ";s:2:"21";s:5:"от ";s:2:"22";s:6:"ани";s:2:"23";s:6:"пре";s:2:"24";s:5:"не ";s:2:"25";s:6:"ени";s:2:"26";s:5:"о н";s:2:"27";s:5:"ни ";s:2:"28";s:5:"Ñе ";s:2:"29";s:4:" и ";s:2:"30";s:5:"но ";s:2:"31";s:6:"ане";s:2:"32";s:6:"ето";s:2:"33";s:5:"а в";s:2:"34";s:5:"ва ";s:2:"35";s:6:"ван";s:2:"36";s:5:"е п";s:2:"37";s:5:"а о";s:2:"38";s:6:"ото";s:2:"39";s:6:"ран";s:2:"40";s:5:"ат ";s:2:"41";s:6:"ред";s:2:"42";s:5:" не";s:2:"43";s:5:"а д";s:2:"44";s:5:"и п";s:2:"45";s:5:" до";s:2:"46";s:6:"про";s:2:"47";s:5:" ÑÑŠ";s:2:"48";s:5:"ли ";s:2:"49";s:6:"при";s:2:"50";s:6:"ниÑ";s:2:"51";s:6:"Ñки";s:2:"52";s:6:"тел";s:2:"53";s:5:"а и";s:2:"54";s:5:"по ";s:2:"55";s:5:"ри ";s:2:"56";s:4:" е ";s:2:"57";s:5:" ка";s:2:"58";s:6:"ира";s:2:"59";s:6:"кат";s:2:"60";s:6:"ние";s:2:"61";s:6:"нит";s:2:"62";s:5:"е з";s:2:"63";s:5:"и Ñ";s:2:"64";s:5:"о Ñ";s:2:"65";s:6:"оÑÑ‚";s:2:"66";s:5:"че ";s:2:"67";s:5:" ра";s:2:"68";s:6:"иÑÑ‚";s:2:"69";s:5:"о п";s:2:"70";s:5:" из";s:2:"71";s:5:" Ñа";s:2:"72";s:5:"е д";s:2:"73";s:6:"ини";s:2:"74";s:5:"ки ";s:2:"75";s:6:"мин";s:2:"76";s:5:" ми";s:2:"77";s:5:"а б";s:2:"78";s:6:"ава";s:2:"79";s:5:"е в";s:2:"80";s:5:"ие ";s:2:"81";s:6:"пол";s:2:"82";s:6:"Ñтв";s:2:"83";s:5:"Ñ‚ н";s:2:"84";s:5:" въ";s:2:"85";s:5:" ÑÑ‚";s:2:"86";s:5:" то";s:2:"87";s:6:"аза";s:2:"88";s:5:"е о";s:2:"89";s:5:"ов ";s:2:"90";s:5:"ÑÑ‚ ";s:2:"91";s:5:"ÑŠÑ‚ ";s:2:"92";s:5:"и н";s:2:"93";s:6:"иÑÑ‚";s:2:"94";s:6:"нат";s:2:"95";s:5:"ра ";s:2:"96";s:5:" бъ";s:2:"97";s:5:" че";s:2:"98";s:6:"алн";s:2:"99";s:5:"е Ñ";s:3:"100";s:5:"ен ";s:3:"101";s:6:"еÑÑ‚";s:3:"102";s:5:"и д";s:3:"103";s:6:"лен";s:3:"104";s:6:"ниÑ";s:3:"105";s:5:"о о";s:3:"106";s:6:"ови";s:3:"107";s:5:" об";s:3:"108";s:5:" Ñл";s:3:"109";s:5:"а Ñ€";s:3:"110";s:6:"ато";s:3:"111";s:6:"кон";s:3:"112";s:6:"ноÑ";s:3:"113";s:6:"ров";s:3:"114";s:5:"ще ";s:3:"115";s:5:" ре";s:3:"116";s:4:" Ñ ";s:3:"117";s:5:" Ñп";s:3:"118";s:6:"ват";s:3:"119";s:6:"еше";s:3:"120";s:5:"и в";s:3:"121";s:6:"иет";s:3:"122";s:5:"о в";s:3:"123";s:6:"ове";s:3:"124";s:6:"Ñта";s:3:"125";s:5:"а к";s:3:"126";s:5:"а Ñ‚";s:3:"127";s:6:"дат";s:3:"128";s:6:"ент";s:3:"129";s:5:"ка ";s:3:"130";s:6:"лед";s:3:"131";s:6:"нет";s:3:"132";s:6:"ори";s:3:"133";s:6:"ÑÑ‚Ñ€";s:3:"134";s:6:"ÑÑ‚ÑŠ";s:3:"135";s:5:"ти ";s:3:"136";s:6:"Ñ‚ÑŠÑ€";s:3:"137";s:5:" те";s:3:"138";s:5:"а з";s:3:"139";s:5:"а м";s:3:"140";s:5:"ад ";s:3:"141";s:6:"ана";s:3:"142";s:6:"ено";s:3:"143";s:5:"и о";s:3:"144";s:6:"ина";s:3:"145";s:6:"ити";s:3:"146";s:5:"ма ";s:3:"147";s:6:"Ñка";s:3:"148";s:6:"Ñле";s:3:"149";s:6:"тво";s:3:"150";s:6:"тер";s:3:"151";s:6:"циÑ";s:3:"152";s:5:"ÑÑ‚ ";s:3:"153";s:5:" бе";s:3:"154";s:5:" де";s:3:"155";s:5:" па";s:3:"156";s:6:"ате";s:3:"157";s:6:"вен";s:3:"158";s:5:"ви ";s:3:"159";s:6:"вит";s:3:"160";s:5:"и з";s:3:"161";s:5:"и и";s:3:"162";s:6:"нар";s:3:"163";s:6:"нов";s:3:"164";s:6:"ова";s:3:"165";s:6:"пов";s:3:"166";s:6:"рез";s:3:"167";s:6:"рит";s:3:"168";s:5:"Ñа ";s:3:"169";s:6:"Ñта";s:3:"170";s:5:" го";s:3:"171";s:5:" ще";s:3:"172";s:6:"али";s:3:"173";s:5:"в п";s:3:"174";s:6:"гра";s:3:"175";s:5:"е и";s:3:"176";s:6:"еди";s:3:"177";s:6:"ели";s:3:"178";s:6:"или";s:3:"179";s:6:"каз";s:3:"180";s:6:"кит";s:3:"181";s:6:"лно";s:3:"182";s:6:"мен";s:3:"183";s:6:"оли";s:3:"184";s:6:"раз";s:3:"185";s:5:" ве";s:3:"186";s:5:" гр";s:3:"187";s:5:" им";s:3:"188";s:5:" ме";s:3:"189";s:5:" пъ";s:3:"190";s:6:"ави";s:3:"191";s:6:"ако";s:3:"192";s:6:"ача";s:3:"193";s:6:"вин";s:3:"194";s:5:"во ";s:3:"195";s:6:"гов";s:3:"196";s:6:"дан";s:3:"197";s:5:"ди ";s:3:"198";s:5:"до ";s:3:"199";s:5:"ед ";s:3:"200";s:6:"ери";s:3:"201";s:6:"еро";s:3:"202";s:6:"жда";s:3:"203";s:6:"ито";s:3:"204";s:6:"ков";s:3:"205";s:6:"кол";s:3:"206";s:6:"лни";s:3:"207";s:6:"мер";s:3:"208";s:6:"нач";s:3:"209";s:5:"о з";s:3:"210";s:6:"ола";s:3:"211";s:5:"он ";s:3:"212";s:6:"она";s:3:"213";s:6:"пра";s:3:"214";s:6:"рав";s:3:"215";s:6:"рем";s:3:"216";s:6:"ÑиÑ";s:3:"217";s:6:"Ñти";s:3:"218";s:5:"Ñ‚ п";s:3:"219";s:6:"тан";s:3:"220";s:5:"ха ";s:3:"221";s:5:"ше ";s:3:"222";s:6:"шен";s:3:"223";s:6:"ълг";s:3:"224";s:5:" ба";s:3:"225";s:5:" Ñи";s:3:"226";s:6:"аро";s:3:"227";s:6:"бъл";s:3:"228";s:5:"в Ñ€";s:3:"229";s:6:"гар";s:3:"230";s:5:"е е";s:3:"231";s:6:"елн";s:3:"232";s:6:"еме";s:3:"233";s:6:"ико";s:3:"234";s:6:"има";s:3:"235";s:5:"ко ";s:3:"236";s:6:"кои";s:3:"237";s:5:"ла ";s:3:"238";s:6:"лга";s:3:"239";s:5:"о д";s:3:"240";s:6:"ози";s:3:"241";s:6:"оит";s:3:"242";s:6:"под";s:3:"243";s:6:"реÑ";s:3:"244";s:6:"рие";s:3:"245";s:6:"Ñто";s:3:"246";s:5:"Ñ‚ к";s:3:"247";s:5:"Ñ‚ м";s:3:"248";s:5:"Ñ‚ Ñ";s:3:"249";s:6:"уÑÑ‚";s:3:"250";s:5:" би";s:3:"251";s:5:" дв";s:3:"252";s:5:" дъ";s:3:"253";s:5:" ма";s:3:"254";s:5:" мо";s:3:"255";s:5:" ни";s:3:"256";s:5:" оÑ";s:3:"257";s:6:"ала";s:3:"258";s:6:"анÑ";s:3:"259";s:6:"ара";s:3:"260";s:6:"ати";s:3:"261";s:6:"аци";s:3:"262";s:6:"беш";s:3:"263";s:6:"вър";s:3:"264";s:5:"е Ñ€";s:3:"265";s:6:"едв";s:3:"266";s:6:"ема";s:3:"267";s:6:"жав";s:3:"268";s:5:"и к";s:3:"269";s:6:"иал";s:3:"270";s:6:"ица";s:3:"271";s:6:"иче";s:3:"272";s:6:"киÑ";s:3:"273";s:6:"лит";s:3:"274";s:5:"о б";s:3:"275";s:6:"ово";s:3:"276";s:6:"оди";s:3:"277";s:6:"ока";s:3:"278";s:6:"поÑ";s:3:"279";s:6:"род";s:3:"280";s:6:"Ñед";s:3:"281";s:6:"Ñлу";s:3:"282";s:5:"Ñ‚ и";s:3:"283";s:6:"тов";s:3:"284";s:6:"ува";s:3:"285";s:6:"циа";s:3:"286";s:6:"чеÑ";s:3:"287";s:5:"Ñ Ð·";s:3:"288";s:5:" во";s:3:"289";s:5:" ил";s:3:"290";s:5:" Ñк";s:3:"291";s:5:" Ñ‚Ñ€";s:3:"292";s:5:" це";s:3:"293";s:6:"ами";s:3:"294";s:6:"ари";s:3:"295";s:6:"бат";s:3:"296";s:5:"би ";s:3:"297";s:6:"бра";s:3:"298";s:6:"бъд";s:3:"299";}s:7:"cebuano";a:300:{s:3:"ng ";s:1:"0";s:3:"sa ";s:1:"1";s:3:" sa";s:1:"2";s:3:"ang";s:1:"3";s:3:"ga ";s:1:"4";s:3:"nga";s:1:"5";s:3:" ka";s:1:"6";s:3:" ng";s:1:"7";s:3:"an ";s:1:"8";s:3:" an";s:1:"9";s:3:" na";s:2:"10";s:3:" ma";s:2:"11";s:3:" ni";s:2:"12";s:3:"a s";s:2:"13";s:3:"a n";s:2:"14";s:3:"on ";s:2:"15";s:3:" pa";s:2:"16";s:3:" si";s:2:"17";s:3:"a k";s:2:"18";s:3:"a m";s:2:"19";s:3:" ba";s:2:"20";s:3:"ong";s:2:"21";s:3:"a i";s:2:"22";s:3:"ila";s:2:"23";s:3:" mg";s:2:"24";s:3:"mga";s:2:"25";s:3:"a p";s:2:"26";s:3:"iya";s:2:"27";s:3:"a a";s:2:"28";s:3:"ay ";s:2:"29";s:3:"ka ";s:2:"30";s:3:"ala";s:2:"31";s:3:"ing";s:2:"32";s:3:"g m";s:2:"33";s:3:"n s";s:2:"34";s:3:"g n";s:2:"35";s:3:"lan";s:2:"36";s:3:" gi";s:2:"37";s:3:"na ";s:2:"38";s:3:"ni ";s:2:"39";s:3:"o s";s:2:"40";s:3:"g p";s:2:"41";s:3:"n n";s:2:"42";s:3:" da";s:2:"43";s:3:"ag ";s:2:"44";s:3:"pag";s:2:"45";s:3:"g s";s:2:"46";s:3:"yan";s:2:"47";s:3:"ayo";s:2:"48";s:3:"o n";s:2:"49";s:3:"si ";s:2:"50";s:3:" mo";s:2:"51";s:3:"a b";s:2:"52";s:3:"g a";s:2:"53";s:3:"ail";s:2:"54";s:3:"g b";s:2:"55";s:3:"han";s:2:"56";s:3:"a d";s:2:"57";s:3:"asu";s:2:"58";s:3:"nag";s:2:"59";s:3:"ya ";s:2:"60";s:3:"man";s:2:"61";s:3:"ne ";s:2:"62";s:3:"pan";s:2:"63";s:3:"kon";s:2:"64";s:3:" il";s:2:"65";s:3:" la";s:2:"66";s:3:"aka";s:2:"67";s:3:"ako";s:2:"68";s:3:"ana";s:2:"69";s:3:"bas";s:2:"70";s:3:"ko ";s:2:"71";s:3:"od ";s:2:"72";s:3:"yo ";s:2:"73";s:3:" di";s:2:"74";s:3:" ko";s:2:"75";s:3:" ug";s:2:"76";s:3:"a u";s:2:"77";s:3:"g k";s:2:"78";s:3:"kan";s:2:"79";s:3:"la ";s:2:"80";s:3:"len";s:2:"81";s:3:"sur";s:2:"82";s:3:"ug ";s:2:"83";s:3:" ai";s:2:"84";s:3:"apa";s:2:"85";s:3:"aw ";s:2:"86";s:3:"d s";s:2:"87";s:3:"g d";s:2:"88";s:3:"g g";s:2:"89";s:3:"ile";s:2:"90";s:3:"nin";s:2:"91";s:3:" iy";s:2:"92";s:3:" su";s:2:"93";s:3:"ene";s:2:"94";s:3:"og ";s:2:"95";s:3:"ot ";s:2:"96";s:3:"aba";s:2:"97";s:3:"aha";s:2:"98";s:3:"as ";s:2:"99";s:3:"imo";s:3:"100";s:3:" ki";s:3:"101";s:3:"a t";s:3:"102";s:3:"aga";s:3:"103";s:3:"ban";s:3:"104";s:3:"ero";s:3:"105";s:3:"nan";s:3:"106";s:3:"o k";s:3:"107";s:3:"ran";s:3:"108";s:3:"ron";s:3:"109";s:3:"sil";s:3:"110";s:3:"una";s:3:"111";s:3:"usa";s:3:"112";s:3:" us";s:3:"113";s:3:"a g";s:3:"114";s:3:"ahi";s:3:"115";s:3:"ani";s:3:"116";s:3:"er ";s:3:"117";s:3:"ha ";s:3:"118";s:3:"i a";s:3:"119";s:3:"rer";s:3:"120";s:3:"yon";s:3:"121";s:3:" pu";s:3:"122";s:3:"ini";s:3:"123";s:3:"nak";s:3:"124";s:3:"ro ";s:3:"125";s:3:"to ";s:3:"126";s:3:"ure";s:3:"127";s:3:" ed";s:3:"128";s:3:" og";s:3:"129";s:3:" wa";s:3:"130";s:3:"ili";s:3:"131";s:3:"mo ";s:3:"132";s:3:"n a";s:3:"133";s:3:"nd ";s:3:"134";s:3:"o a";s:3:"135";s:3:" ad";s:3:"136";s:3:" du";s:3:"137";s:3:" pr";s:3:"138";s:3:"aro";s:3:"139";s:3:"i s";s:3:"140";s:3:"ma ";s:3:"141";s:3:"n m";s:3:"142";s:3:"ulo";s:3:"143";s:3:"und";s:3:"144";s:3:" ta";s:3:"145";s:3:"ara";s:3:"146";s:3:"asa";s:3:"147";s:3:"ato";s:3:"148";s:3:"awa";s:3:"149";s:3:"dmu";s:3:"150";s:3:"e n";s:3:"151";s:3:"edm";s:3:"152";s:3:"ina";s:3:"153";s:3:"mak";s:3:"154";s:3:"mun";s:3:"155";s:3:"niy";s:3:"156";s:3:"san";s:3:"157";s:3:"wa ";s:3:"158";s:3:" tu";s:3:"159";s:3:" un";s:3:"160";s:3:"a l";s:3:"161";s:3:"bay";s:3:"162";s:3:"iga";s:3:"163";s:3:"ika";s:3:"164";s:3:"ita";s:3:"165";s:3:"kin";s:3:"166";s:3:"lis";s:3:"167";s:3:"may";s:3:"168";s:3:"os ";s:3:"169";s:3:" ar";s:3:"170";s:3:"ad ";s:3:"171";s:3:"ali";s:3:"172";s:3:"ama";s:3:"173";s:3:"ers";s:3:"174";s:3:"ipa";s:3:"175";s:3:"isa";s:3:"176";s:3:"mao";s:3:"177";s:3:"nim";s:3:"178";s:3:"t s";s:3:"179";s:3:"tin";s:3:"180";s:3:" ak";s:3:"181";s:3:" ap";s:3:"182";s:3:" hi";s:3:"183";s:3:"abo";s:3:"184";s:3:"agp";s:3:"185";s:3:"ano";s:3:"186";s:3:"ata";s:3:"187";s:3:"g i";s:3:"188";s:3:"gan";s:3:"189";s:3:"gka";s:3:"190";s:3:"gpa";s:3:"191";s:3:"i m";s:3:"192";s:3:"iha";s:3:"193";s:3:"k s";s:3:"194";s:3:"law";s:3:"195";s:3:"or ";s:3:"196";s:3:"rs ";s:3:"197";s:3:"siy";s:3:"198";s:3:"tag";s:3:"199";s:3:" al";s:3:"200";s:3:" at";s:3:"201";s:3:" ha";s:3:"202";s:3:" hu";s:3:"203";s:3:" im";s:3:"204";s:3:"a h";s:3:"205";s:3:"bu ";s:3:"206";s:3:"e s";s:3:"207";s:3:"gma";s:3:"208";s:3:"kas";s:3:"209";s:3:"lag";s:3:"210";s:3:"mon";s:3:"211";s:3:"nah";s:3:"212";s:3:"ngo";s:3:"213";s:3:"r s";s:3:"214";s:3:"ra ";s:3:"215";s:3:"sab";s:3:"216";s:3:"sam";s:3:"217";s:3:"sul";s:3:"218";s:3:"uba";s:3:"219";s:3:"uha";s:3:"220";s:3:" lo";s:3:"221";s:3:" re";s:3:"222";s:3:"ada";s:3:"223";s:3:"aki";s:3:"224";s:3:"aya";s:3:"225";s:3:"bah";s:3:"226";s:3:"ce ";s:3:"227";s:3:"d n";s:3:"228";s:3:"lab";s:3:"229";s:3:"pa ";s:3:"230";s:3:"pak";s:3:"231";s:3:"s n";s:3:"232";s:3:"s s";s:3:"233";s:3:"tan";s:3:"234";s:3:"taw";s:3:"235";s:3:"te ";s:3:"236";s:3:"uma";s:3:"237";s:3:"ura";s:3:"238";s:3:" in";s:3:"239";s:3:" lu";s:3:"240";s:3:"a c";s:3:"241";s:3:"abi";s:3:"242";s:3:"at ";s:3:"243";s:3:"awo";s:3:"244";s:3:"bat";s:3:"245";s:3:"dal";s:3:"246";s:3:"dla";s:3:"247";s:3:"ele";s:3:"248";s:3:"g t";s:3:"249";s:3:"g u";s:3:"250";s:3:"gay";s:3:"251";s:3:"go ";s:3:"252";s:3:"hab";s:3:"253";s:3:"hin";s:3:"254";s:3:"i e";s:3:"255";s:3:"i n";s:3:"256";s:3:"kab";s:3:"257";s:3:"kap";s:3:"258";s:3:"lay";s:3:"259";s:3:"lin";s:3:"260";s:3:"nil";s:3:"261";s:3:"pam";s:3:"262";s:3:"pas";s:3:"263";s:3:"pro";s:3:"264";s:3:"pul";s:3:"265";s:3:"ta ";s:3:"266";s:3:"ton";s:3:"267";s:3:"uga";s:3:"268";s:3:"ugm";s:3:"269";s:3:"unt";s:3:"270";s:3:" co";s:3:"271";s:3:" gu";s:3:"272";s:3:" mi";s:3:"273";s:3:" pi";s:3:"274";s:3:" ti";s:3:"275";s:3:"a o";s:3:"276";s:3:"abu";s:3:"277";s:3:"adl";s:3:"278";s:3:"ado";s:3:"279";s:3:"agh";s:3:"280";s:3:"agk";s:3:"281";s:3:"ao ";s:3:"282";s:3:"art";s:3:"283";s:3:"bal";s:3:"284";s:3:"cit";s:3:"285";s:3:"di ";s:3:"286";s:3:"dto";s:3:"287";s:3:"dun";s:3:"288";s:3:"ent";s:3:"289";s:3:"g e";s:3:"290";s:3:"gon";s:3:"291";s:3:"gug";s:3:"292";s:3:"ia ";s:3:"293";s:3:"iba";s:3:"294";s:3:"ice";s:3:"295";s:3:"in ";s:3:"296";s:3:"inu";s:3:"297";s:3:"it ";s:3:"298";s:3:"kaa";s:3:"299";}s:8:"croatian";a:300:{s:3:"je ";s:1:"0";s:3:" na";s:1:"1";s:3:" pr";s:1:"2";s:3:" po";s:1:"3";s:3:"na ";s:1:"4";s:3:" je";s:1:"5";s:3:" za";s:1:"6";s:3:"ije";s:1:"7";s:3:"ne ";s:1:"8";s:3:" i ";s:1:"9";s:3:"ti ";s:2:"10";s:3:"da ";s:2:"11";s:3:" ko";s:2:"12";s:3:" ne";s:2:"13";s:3:"li ";s:2:"14";s:3:" bi";s:2:"15";s:3:" da";s:2:"16";s:3:" u ";s:2:"17";s:3:"ma ";s:2:"18";s:3:"mo ";s:2:"19";s:3:"a n";s:2:"20";s:3:"ih ";s:2:"21";s:3:"za ";s:2:"22";s:3:"a s";s:2:"23";s:3:"ko ";s:2:"24";s:3:"i s";s:2:"25";s:3:"a p";s:2:"26";s:3:"koj";s:2:"27";s:3:"pro";s:2:"28";s:3:"ju ";s:2:"29";s:3:"se ";s:2:"30";s:3:" go";s:2:"31";s:3:"ost";s:2:"32";s:3:"to ";s:2:"33";s:3:"va ";s:2:"34";s:3:" do";s:2:"35";s:3:" to";s:2:"36";s:3:"e n";s:2:"37";s:3:"i p";s:2:"38";s:3:" od";s:2:"39";s:3:" ra";s:2:"40";s:3:"no ";s:2:"41";s:3:"ako";s:2:"42";s:3:"ka ";s:2:"43";s:3:"ni ";s:2:"44";s:3:" ka";s:2:"45";s:3:" se";s:2:"46";s:3:" mo";s:2:"47";s:3:" st";s:2:"48";s:3:"i n";s:2:"49";s:3:"ima";s:2:"50";s:3:"ja ";s:2:"51";s:3:"pri";s:2:"52";s:3:"vat";s:2:"53";s:3:"sta";s:2:"54";s:3:" su";s:2:"55";s:3:"ati";s:2:"56";s:3:"e p";s:2:"57";s:3:"ta ";s:2:"58";s:3:"tsk";s:2:"59";s:3:"e i";s:2:"60";s:3:"nij";s:2:"61";s:3:" tr";s:2:"62";s:3:"cij";s:2:"63";s:3:"jen";s:2:"64";s:3:"nos";s:2:"65";s:3:"o s";s:2:"66";s:3:" iz";s:2:"67";s:3:"om ";s:2:"68";s:3:"tro";s:2:"69";s:3:"ili";s:2:"70";s:3:"iti";s:2:"71";s:3:"pos";s:2:"72";s:3:" al";s:2:"73";s:3:"a i";s:2:"74";s:3:"a o";s:2:"75";s:3:"e s";s:2:"76";s:3:"ija";s:2:"77";s:3:"ini";s:2:"78";s:3:"pre";s:2:"79";s:3:"str";s:2:"80";s:3:"la ";s:2:"81";s:3:"og ";s:2:"82";s:3:"ovo";s:2:"83";s:3:" sv";s:2:"84";s:3:"ekt";s:2:"85";s:3:"nje";s:2:"86";s:3:"o p";s:2:"87";s:3:"odi";s:2:"88";s:3:"rva";s:2:"89";s:3:" ni";s:2:"90";s:3:"ali";s:2:"91";s:3:"min";s:2:"92";s:3:"rij";s:2:"93";s:3:"a t";s:2:"94";s:3:"a z";s:2:"95";s:3:"ats";s:2:"96";s:3:"iva";s:2:"97";s:3:"o t";s:2:"98";s:3:"od ";s:2:"99";s:3:"oje";s:3:"100";s:3:"ra ";s:3:"101";s:3:" hr";s:3:"102";s:3:"a m";s:3:"103";s:3:"a u";s:3:"104";s:3:"hrv";s:3:"105";s:3:"im ";s:3:"106";s:3:"ke ";s:3:"107";s:3:"o i";s:3:"108";s:3:"ovi";s:3:"109";s:3:"red";s:3:"110";s:3:"riv";s:3:"111";s:3:"te ";s:3:"112";s:3:"bi ";s:3:"113";s:3:"e o";s:3:"114";s:3:"god";s:3:"115";s:3:"i d";s:3:"116";s:3:"lek";s:3:"117";s:3:"umi";s:3:"118";s:3:"zvo";s:3:"119";s:3:"din";s:3:"120";s:3:"e u";s:3:"121";s:3:"ene";s:3:"122";s:3:"jed";s:3:"123";s:3:"ji ";s:3:"124";s:3:"lje";s:3:"125";s:3:"nog";s:3:"126";s:3:"su ";s:3:"127";s:3:" a ";s:3:"128";s:3:" el";s:3:"129";s:3:" mi";s:3:"130";s:3:" o ";s:3:"131";s:3:"a d";s:3:"132";s:3:"alu";s:3:"133";s:3:"ele";s:3:"134";s:3:"i u";s:3:"135";s:3:"izv";s:3:"136";s:3:"ktr";s:3:"137";s:3:"lum";s:3:"138";s:3:"o d";s:3:"139";s:3:"ori";s:3:"140";s:3:"rad";s:3:"141";s:3:"sto";s:3:"142";s:3:"a k";s:3:"143";s:3:"anj";s:3:"144";s:3:"ava";s:3:"145";s:3:"e k";s:3:"146";s:3:"men";s:3:"147";s:3:"nic";s:3:"148";s:3:"o j";s:3:"149";s:3:"oj ";s:3:"150";s:3:"ove";s:3:"151";s:3:"ski";s:3:"152";s:3:"tvr";s:3:"153";s:3:"una";s:3:"154";s:3:"vor";s:3:"155";s:3:" di";s:3:"156";s:3:" no";s:3:"157";s:3:" s ";s:3:"158";s:3:" ta";s:3:"159";s:3:" tv";s:3:"160";s:3:"i i";s:3:"161";s:3:"i o";s:3:"162";s:3:"kak";s:3:"163";s:4:"roÅ¡";s:3:"164";s:3:"sko";s:3:"165";s:3:"vod";s:3:"166";s:3:" sa";s:3:"167";s:4:" će";s:3:"168";s:3:"a b";s:3:"169";s:3:"adi";s:3:"170";s:3:"amo";s:3:"171";s:3:"eni";s:3:"172";s:3:"gov";s:3:"173";s:3:"iju";s:3:"174";s:3:"ku ";s:3:"175";s:3:"o n";s:3:"176";s:3:"ora";s:3:"177";s:3:"rav";s:3:"178";s:3:"ruj";s:3:"179";s:3:"smo";s:3:"180";s:3:"tav";s:3:"181";s:3:"tru";s:3:"182";s:3:"u p";s:3:"183";s:3:"ve ";s:3:"184";s:3:" in";s:3:"185";s:3:" pl";s:3:"186";s:3:"aci";s:3:"187";s:3:"bit";s:3:"188";s:3:"de ";s:3:"189";s:4:"diÅ¡";s:3:"190";s:3:"ema";s:3:"191";s:3:"i m";s:3:"192";s:3:"ika";s:3:"193";s:4:"iÅ¡t";s:3:"194";s:3:"jer";s:3:"195";s:3:"ki ";s:3:"196";s:3:"mog";s:3:"197";s:3:"nik";s:3:"198";s:3:"nov";s:3:"199";s:3:"nu ";s:3:"200";s:3:"oji";s:3:"201";s:3:"oli";s:3:"202";s:3:"pla";s:3:"203";s:3:"pod";s:3:"204";s:3:"st ";s:3:"205";s:3:"sti";s:3:"206";s:3:"tra";s:3:"207";s:3:"tre";s:3:"208";s:3:"vo ";s:3:"209";s:3:" sm";s:3:"210";s:4:" Å¡t";s:3:"211";s:3:"dan";s:3:"212";s:3:"e z";s:3:"213";s:3:"i t";s:3:"214";s:3:"io ";s:3:"215";s:3:"ist";s:3:"216";s:3:"kon";s:3:"217";s:3:"lo ";s:3:"218";s:3:"stv";s:3:"219";s:3:"u s";s:3:"220";s:3:"uje";s:3:"221";s:3:"ust";s:3:"222";s:4:"će ";s:3:"223";s:4:"ći ";s:3:"224";s:4:"Å¡to";s:3:"225";s:3:" dr";s:3:"226";s:3:" im";s:3:"227";s:3:" li";s:3:"228";s:3:"ada";s:3:"229";s:3:"aft";s:3:"230";s:3:"ani";s:3:"231";s:3:"ao ";s:3:"232";s:3:"ars";s:3:"233";s:3:"ata";s:3:"234";s:3:"e t";s:3:"235";s:3:"emo";s:3:"236";s:3:"i k";s:3:"237";s:3:"ine";s:3:"238";s:3:"jem";s:3:"239";s:3:"kov";s:3:"240";s:3:"lik";s:3:"241";s:3:"lji";s:3:"242";s:3:"mje";s:3:"243";s:3:"naf";s:3:"244";s:3:"ner";s:3:"245";s:3:"nih";s:3:"246";s:3:"nja";s:3:"247";s:3:"ogo";s:3:"248";s:3:"oiz";s:3:"249";s:3:"ome";s:3:"250";s:3:"pot";s:3:"251";s:3:"ran";s:3:"252";s:3:"ri ";s:3:"253";s:3:"roi";s:3:"254";s:3:"rtk";s:3:"255";s:3:"ska";s:3:"256";s:3:"ter";s:3:"257";s:3:"u i";s:3:"258";s:3:"u o";s:3:"259";s:3:"vi ";s:3:"260";s:3:"vrt";s:3:"261";s:3:" me";s:3:"262";s:3:" ug";s:3:"263";s:3:"ak ";s:3:"264";s:3:"ama";s:3:"265";s:4:"drž";s:3:"266";s:3:"e e";s:3:"267";s:3:"e g";s:3:"268";s:3:"e m";s:3:"269";s:3:"em ";s:3:"270";s:3:"eme";s:3:"271";s:3:"enj";s:3:"272";s:3:"ent";s:3:"273";s:3:"er ";s:3:"274";s:3:"ere";s:3:"275";s:3:"erg";s:3:"276";s:3:"eur";s:3:"277";s:3:"go ";s:3:"278";s:3:"i b";s:3:"279";s:3:"i z";s:3:"280";s:3:"jet";s:3:"281";s:3:"ksi";s:3:"282";s:3:"o u";s:3:"283";s:3:"oda";s:3:"284";s:3:"ona";s:3:"285";s:3:"pra";s:3:"286";s:3:"reb";s:3:"287";s:3:"rem";s:3:"288";s:3:"rop";s:3:"289";s:3:"tri";s:3:"290";s:4:"žav";s:3:"291";s:3:" ci";s:3:"292";s:3:" eu";s:3:"293";s:3:" re";s:3:"294";s:3:" te";s:3:"295";s:3:" uv";s:3:"296";s:3:" ve";s:3:"297";s:3:"aju";s:3:"298";s:3:"an ";s:3:"299";}s:5:"czech";a:300:{s:3:" pr";s:1:"0";s:3:" po";s:1:"1";s:4:"nà ";s:1:"2";s:3:"pro";s:1:"3";s:3:" na";s:1:"4";s:3:"na ";s:1:"5";s:4:" pÅ™";s:1:"6";s:3:"ch ";s:1:"7";s:3:" je";s:1:"8";s:3:" ne";s:1:"9";s:4:"že ";s:2:"10";s:4:" že";s:2:"11";s:3:" se";s:2:"12";s:3:" do";s:2:"13";s:3:" ro";s:2:"14";s:3:" st";s:2:"15";s:3:" v ";s:2:"16";s:3:" ve";s:2:"17";s:4:"pÅ™e";s:2:"18";s:3:"se ";s:2:"19";s:3:"ho ";s:2:"20";s:3:"sta";s:2:"21";s:3:" to";s:2:"22";s:3:" vy";s:2:"23";s:3:" za";s:2:"24";s:3:"ou ";s:2:"25";s:3:" a ";s:2:"26";s:3:"to ";s:2:"27";s:3:" by";s:2:"28";s:3:"la ";s:2:"29";s:3:"ce ";s:2:"30";s:3:"e v";s:2:"31";s:3:"ist";s:2:"32";s:3:"le ";s:2:"33";s:3:"pod";s:2:"34";s:4:"à p";s:2:"35";s:3:" vl";s:2:"36";s:3:"e n";s:2:"37";s:3:"e s";s:2:"38";s:3:"je ";s:2:"39";s:4:"ké ";s:2:"40";s:3:"by ";s:2:"41";s:3:"em ";s:2:"42";s:4:"ých";s:2:"43";s:3:" od";s:2:"44";s:3:"ova";s:2:"45";s:4:"Å™ed";s:2:"46";s:3:"dy ";s:2:"47";s:4:"enÃ";s:2:"48";s:3:"kon";s:2:"49";s:3:"li ";s:2:"50";s:4:"nÄ› ";s:2:"51";s:3:"str";s:2:"52";s:4:" zá";s:2:"53";s:3:"ve ";s:2:"54";s:3:" ka";s:2:"55";s:3:" sv";s:2:"56";s:3:"e p";s:2:"57";s:3:"it ";s:2:"58";s:4:"lád";s:2:"59";s:3:"oho";s:2:"60";s:3:"rov";s:2:"61";s:3:"roz";s:2:"62";s:3:"ter";s:2:"63";s:4:"vlá";s:2:"64";s:4:"Ãm ";s:2:"65";s:3:" ko";s:2:"66";s:3:"hod";s:2:"67";s:3:"nis";s:2:"68";s:5:"pÅ™Ã";s:2:"69";s:4:"ský";s:2:"70";s:3:" mi";s:2:"71";s:3:" ob";s:2:"72";s:3:" so";s:2:"73";s:3:"a p";s:2:"74";s:3:"ali";s:2:"75";s:3:"bud";s:2:"76";s:3:"edn";s:2:"77";s:3:"ick";s:2:"78";s:3:"kte";s:2:"79";s:3:"ku ";s:2:"80";s:3:"o s";s:2:"81";s:3:"al ";s:2:"82";s:3:"ci ";s:2:"83";s:3:"e t";s:2:"84";s:3:"il ";s:2:"85";s:3:"ny ";s:2:"86";s:4:"né ";s:2:"87";s:3:"odl";s:2:"88";s:4:"ová";s:2:"89";s:3:"rot";s:2:"90";s:3:"sou";s:2:"91";s:5:"ánÃ";s:2:"92";s:3:" bu";s:2:"93";s:3:" mo";s:2:"94";s:3:" o ";s:2:"95";s:3:"ast";s:2:"96";s:3:"byl";s:2:"97";s:3:"de ";s:2:"98";s:3:"ek ";s:2:"99";s:3:"ost";s:3:"100";s:4:" mÃ";s:3:"101";s:3:" ta";s:3:"102";s:3:"es ";s:3:"103";s:3:"jed";s:3:"104";s:3:"ky ";s:3:"105";s:3:"las";s:3:"106";s:3:"m p";s:3:"107";s:3:"nes";s:3:"108";s:4:"nÃm";s:3:"109";s:3:"ran";s:3:"110";s:3:"rem";s:3:"111";s:3:"ros";s:3:"112";s:4:"ého";s:3:"113";s:3:" de";s:3:"114";s:3:" kt";s:3:"115";s:3:" ni";s:3:"116";s:3:" si";s:3:"117";s:4:" vý";s:3:"118";s:3:"at ";s:3:"119";s:4:"jà ";s:3:"120";s:4:"ký ";s:3:"121";s:3:"mi ";s:3:"122";s:3:"pre";s:3:"123";s:3:"tak";s:3:"124";s:3:"tan";s:3:"125";s:3:"y v";s:3:"126";s:4:"Å™ek";s:3:"127";s:3:" ch";s:3:"128";s:3:" li";s:3:"129";s:4:" ná";s:3:"130";s:3:" pa";s:3:"131";s:4:" Å™e";s:3:"132";s:3:"da ";s:3:"133";s:3:"dle";s:3:"134";s:3:"dne";s:3:"135";s:3:"i p";s:3:"136";s:3:"i v";s:3:"137";s:3:"ly ";s:3:"138";s:3:"min";s:3:"139";s:3:"o n";s:3:"140";s:3:"o v";s:3:"141";s:3:"pol";s:3:"142";s:3:"tra";s:3:"143";s:3:"val";s:3:"144";s:4:"vnÃ";s:3:"145";s:4:"Ãch";s:3:"146";s:4:"ý p";s:3:"147";s:4:"Å™ej";s:3:"148";s:3:" ce";s:3:"149";s:3:" kd";s:3:"150";s:3:" le";s:3:"151";s:3:"a s";s:3:"152";s:3:"a z";s:3:"153";s:3:"cen";s:3:"154";s:3:"e k";s:3:"155";s:3:"eds";s:3:"156";s:3:"ekl";s:3:"157";s:3:"emi";s:3:"158";s:3:"kl ";s:3:"159";s:3:"lat";s:3:"160";s:3:"lo ";s:3:"161";s:4:"mié";s:3:"162";s:3:"nov";s:3:"163";s:3:"pra";s:3:"164";s:3:"sku";s:3:"165";s:4:"ské";s:3:"166";s:3:"sti";s:3:"167";s:3:"tav";s:3:"168";s:3:"ti ";s:3:"169";s:3:"ty ";s:3:"170";s:4:"ván";s:3:"171";s:4:"vé ";s:3:"172";s:3:"y n";s:3:"173";s:3:"y s";s:3:"174";s:4:"à s";s:3:"175";s:4:"à v";s:3:"176";s:4:"Ä› p";s:3:"177";s:3:" dn";s:3:"178";s:4:" nÄ›";s:3:"179";s:3:" sp";s:3:"180";s:4:" Äs";s:3:"181";s:3:"a n";s:3:"182";s:3:"a t";s:3:"183";s:3:"ak ";s:3:"184";s:4:"dnÃ";s:3:"185";s:3:"doh";s:3:"186";s:3:"e b";s:3:"187";s:3:"e m";s:3:"188";s:3:"ejn";s:3:"189";s:3:"ena";s:3:"190";s:3:"est";s:3:"191";s:3:"ini";s:3:"192";s:3:"m z";s:3:"193";s:3:"nal";s:3:"194";s:3:"nou";s:3:"195";s:4:"ná ";s:3:"196";s:3:"ovi";s:3:"197";s:4:"ové";s:3:"198";s:4:"ový";s:3:"199";s:3:"rsk";s:3:"200";s:4:"stá";s:3:"201";s:4:"tà ";s:3:"202";s:4:"tÅ™e";s:3:"203";s:4:"tů ";s:3:"204";s:3:"ude";s:3:"205";s:3:"za ";s:3:"206";s:4:"é p";s:3:"207";s:4:"ém ";s:3:"208";s:4:"à d";s:3:"209";s:3:" ir";s:3:"210";s:3:" zv";s:3:"211";s:3:"ale";s:3:"212";s:4:"anÄ›";s:3:"213";s:3:"ave";s:3:"214";s:4:"cké";s:3:"215";s:3:"den";s:3:"216";s:3:"e z";s:3:"217";s:3:"ech";s:3:"218";s:3:"en ";s:3:"219";s:4:"erý";s:3:"220";s:3:"hla";s:3:"221";s:3:"i s";s:3:"222";s:4:"iér";s:3:"223";s:3:"lov";s:3:"224";s:3:"mu ";s:3:"225";s:3:"neb";s:3:"226";s:3:"nic";s:3:"227";s:3:"o b";s:3:"228";s:3:"o m";s:3:"229";s:3:"pad";s:3:"230";s:3:"pot";s:3:"231";s:3:"rav";s:3:"232";s:3:"rop";s:3:"233";s:4:"rý ";s:3:"234";s:3:"sed";s:3:"235";s:3:"si ";s:3:"236";s:3:"t p";s:3:"237";s:3:"tic";s:3:"238";s:3:"tu ";s:3:"239";s:4:"tÄ› ";s:3:"240";s:3:"u p";s:3:"241";s:3:"u v";s:3:"242";s:4:"vá ";s:3:"243";s:5:"výš";s:3:"244";s:4:"zvý";s:3:"245";s:5:"ÄnÃ";s:3:"246";s:5:"řà ";s:3:"247";s:4:"ům ";s:3:"248";s:3:" bl";s:3:"249";s:3:" br";s:3:"250";s:3:" ho";s:3:"251";s:3:" ja";s:3:"252";s:3:" re";s:3:"253";s:3:" s ";s:3:"254";s:3:" z ";s:3:"255";s:3:" zd";s:3:"256";s:3:"a v";s:3:"257";s:3:"ani";s:3:"258";s:3:"ato";s:3:"259";s:3:"bla";s:3:"260";s:3:"bri";s:3:"261";s:4:"eÄn";s:3:"262";s:4:"eÅ™e";s:3:"263";s:3:"h v";s:3:"264";s:3:"i n";s:3:"265";s:3:"ie ";s:3:"266";s:3:"ila";s:3:"267";s:3:"irs";s:3:"268";s:3:"ite";s:3:"269";s:3:"kov";s:3:"270";s:3:"nos";s:3:"271";s:3:"o o";s:3:"272";s:3:"o p";s:3:"273";s:3:"oce";s:3:"274";s:3:"ody";s:3:"275";s:3:"ohl";s:3:"276";s:3:"oli";s:3:"277";s:3:"ovo";s:3:"278";s:3:"pla";s:3:"279";s:4:"poÄ";s:3:"280";s:4:"prá";s:3:"281";s:3:"ra ";s:3:"282";s:3:"rit";s:3:"283";s:3:"rod";s:3:"284";s:3:"ry ";s:3:"285";s:3:"sd ";s:3:"286";s:3:"sko";s:3:"287";s:3:"ssd";s:3:"288";s:3:"tel";s:3:"289";s:3:"u s";s:3:"290";s:3:"vat";s:3:"291";s:4:"veÅ™";s:3:"292";s:3:"vit";s:3:"293";s:3:"vla";s:3:"294";s:3:"y p";s:3:"295";s:4:"áln";s:3:"296";s:4:"Äss";s:3:"297";s:4:"Å¡en";s:3:"298";s:3:" al";s:3:"299";}s:6:"danish";a:300:{s:3:"er ";s:1:"0";s:3:"en ";s:1:"1";s:3:" de";s:1:"2";s:3:"et ";s:1:"3";s:3:"der";s:1:"4";s:3:"de ";s:1:"5";s:3:"for";s:1:"6";s:3:" fo";s:1:"7";s:3:" i ";s:1:"8";s:3:"at ";s:1:"9";s:3:" at";s:2:"10";s:3:"re ";s:2:"11";s:3:"det";s:2:"12";s:3:" ha";s:2:"13";s:3:"nde";s:2:"14";s:3:"ere";s:2:"15";s:3:"ing";s:2:"16";s:3:"den";s:2:"17";s:3:" me";s:2:"18";s:3:" og";s:2:"19";s:3:"ger";s:2:"20";s:3:"ter";s:2:"21";s:3:" er";s:2:"22";s:3:" si";s:2:"23";s:3:"and";s:2:"24";s:3:" af";s:2:"25";s:3:"or ";s:2:"26";s:3:" st";s:2:"27";s:3:" ti";s:2:"28";s:3:" en";s:2:"29";s:3:"og ";s:2:"30";s:3:"ar ";s:2:"31";s:3:"il ";s:2:"32";s:3:"r s";s:2:"33";s:3:"ige";s:2:"34";s:3:"til";s:2:"35";s:3:"ke ";s:2:"36";s:3:"r e";s:2:"37";s:3:"af ";s:2:"38";s:3:"kke";s:2:"39";s:3:" ma";s:2:"40";s:4:" pÃ¥";s:2:"41";s:3:"om ";s:2:"42";s:4:"pÃ¥ ";s:2:"43";s:3:"ed ";s:2:"44";s:3:"ge ";s:2:"45";s:3:"end";s:2:"46";s:3:"nge";s:2:"47";s:3:"t s";s:2:"48";s:3:"e s";s:2:"49";s:3:"ler";s:2:"50";s:3:" sk";s:2:"51";s:3:"els";s:2:"52";s:3:"ern";s:2:"53";s:3:"sig";s:2:"54";s:3:"ne ";s:2:"55";s:3:"lig";s:2:"56";s:3:"r d";s:2:"57";s:3:"ska";s:2:"58";s:3:" vi";s:2:"59";s:3:"har";s:2:"60";s:3:" be";s:2:"61";s:3:" se";s:2:"62";s:3:"an ";s:2:"63";s:3:"ikk";s:2:"64";s:3:"lle";s:2:"65";s:3:"gen";s:2:"66";s:3:"n f";s:2:"67";s:3:"ste";s:2:"68";s:3:"t a";s:2:"69";s:3:"t d";s:2:"70";s:3:"rin";s:2:"71";s:3:" ik";s:2:"72";s:3:"es ";s:2:"73";s:3:"ng ";s:2:"74";s:3:"ver";s:2:"75";s:3:"r b";s:2:"76";s:3:"sen";s:2:"77";s:3:"ede";s:2:"78";s:3:"men";s:2:"79";s:3:"r i";s:2:"80";s:3:" he";s:2:"81";s:3:" et";s:2:"82";s:3:"ig ";s:2:"83";s:3:"lan";s:2:"84";s:3:"med";s:2:"85";s:3:"nd ";s:2:"86";s:3:"rne";s:2:"87";s:3:" da";s:2:"88";s:3:" in";s:2:"89";s:3:"e t";s:2:"90";s:3:"mme";s:2:"91";s:3:"und";s:2:"92";s:3:" om";s:2:"93";s:3:"e e";s:2:"94";s:3:"e m";s:2:"95";s:3:"her";s:2:"96";s:3:"le ";s:2:"97";s:3:"r f";s:2:"98";s:3:"t f";s:2:"99";s:4:"sÃ¥ ";s:3:"100";s:3:"te ";s:3:"101";s:3:" so";s:3:"102";s:3:"ele";s:3:"103";s:3:"t e";s:3:"104";s:3:" ko";s:3:"105";s:3:"est";s:3:"106";s:3:"ske";s:3:"107";s:3:" bl";s:3:"108";s:3:"e f";s:3:"109";s:3:"ekt";s:3:"110";s:3:"mar";s:3:"111";s:3:"bru";s:3:"112";s:3:"e a";s:3:"113";s:3:"el ";s:3:"114";s:3:"ers";s:3:"115";s:3:"ret";s:3:"116";s:3:"som";s:3:"117";s:3:"tte";s:3:"118";s:3:"ve ";s:3:"119";s:3:" la";s:3:"120";s:3:" ud";s:3:"121";s:3:" ve";s:3:"122";s:3:"age";s:3:"123";s:3:"e d";s:3:"124";s:3:"e h";s:3:"125";s:3:"lse";s:3:"126";s:3:"man";s:3:"127";s:3:"rug";s:3:"128";s:3:"sel";s:3:"129";s:3:"ser";s:3:"130";s:3:" fi";s:3:"131";s:3:" op";s:3:"132";s:3:" pr";s:3:"133";s:3:"dt ";s:3:"134";s:3:"e i";s:3:"135";s:3:"n m";s:3:"136";s:3:"r m";s:3:"137";s:3:" an";s:3:"138";s:3:" re";s:3:"139";s:3:" sa";s:3:"140";s:3:"ion";s:3:"141";s:3:"ner";s:3:"142";s:3:"res";s:3:"143";s:3:"t i";s:3:"144";s:3:"get";s:3:"145";s:3:"n s";s:3:"146";s:3:"one";s:3:"147";s:3:"orb";s:3:"148";s:3:"t h";s:3:"149";s:3:"vis";s:3:"150";s:4:"Ã¥r ";s:3:"151";s:3:" fr";s:3:"152";s:3:"bil";s:3:"153";s:3:"e k";s:3:"154";s:3:"ens";s:3:"155";s:3:"ind";s:3:"156";s:3:"omm";s:3:"157";s:3:"t m";s:3:"158";s:3:" hv";s:3:"159";s:3:" je";s:3:"160";s:3:"dan";s:3:"161";s:3:"ent";s:3:"162";s:3:"fte";s:3:"163";s:3:"nin";s:3:"164";s:3:" mi";s:3:"165";s:3:"e o";s:3:"166";s:3:"e p";s:3:"167";s:3:"n o";s:3:"168";s:3:"nte";s:3:"169";s:3:" ku";s:3:"170";s:3:"ell";s:3:"171";s:3:"nas";s:3:"172";s:3:"ore";s:3:"173";s:3:"r h";s:3:"174";s:3:"r k";s:3:"175";s:3:"sta";s:3:"176";s:3:"sto";s:3:"177";s:3:"dag";s:3:"178";s:3:"eri";s:3:"179";s:3:"kun";s:3:"180";s:3:"lde";s:3:"181";s:3:"mer";s:3:"182";s:3:"r a";s:3:"183";s:3:"r v";s:3:"184";s:3:"rek";s:3:"185";s:3:"rer";s:3:"186";s:3:"t o";s:3:"187";s:3:"tor";s:3:"188";s:4:"tør";s:3:"189";s:4:" fÃ¥";s:3:"190";s:4:" mÃ¥";s:3:"191";s:3:" to";s:3:"192";s:3:"boe";s:3:"193";s:3:"che";s:3:"194";s:3:"e v";s:3:"195";s:3:"i d";s:3:"196";s:3:"ive";s:3:"197";s:3:"kab";s:3:"198";s:3:"ns ";s:3:"199";s:3:"oel";s:3:"200";s:3:"se ";s:3:"201";s:3:"t v";s:3:"202";s:3:" al";s:3:"203";s:3:" bo";s:3:"204";s:3:" un";s:3:"205";s:3:"ans";s:3:"206";s:3:"dre";s:3:"207";s:3:"ire";s:3:"208";s:4:"køb";s:3:"209";s:3:"ors";s:3:"210";s:3:"ove";s:3:"211";s:3:"ren";s:3:"212";s:3:"t b";s:3:"213";s:4:"ør ";s:3:"214";s:3:" ka";s:3:"215";s:3:"ald";s:3:"216";s:3:"bet";s:3:"217";s:3:"gt ";s:3:"218";s:3:"isk";s:3:"219";s:3:"kal";s:3:"220";s:3:"kom";s:3:"221";s:3:"lev";s:3:"222";s:3:"n d";s:3:"223";s:3:"n i";s:3:"224";s:3:"pri";s:3:"225";s:3:"r p";s:3:"226";s:3:"rbr";s:3:"227";s:4:"søg";s:3:"228";s:3:"tel";s:3:"229";s:4:" sÃ¥";s:3:"230";s:3:" te";s:3:"231";s:3:" va";s:3:"232";s:3:"al ";s:3:"233";s:3:"dir";s:3:"234";s:3:"eje";s:3:"235";s:3:"fis";s:3:"236";s:4:"gsÃ¥";s:3:"237";s:3:"isc";s:3:"238";s:3:"jer";s:3:"239";s:3:"ker";s:3:"240";s:3:"ogs";s:3:"241";s:3:"sch";s:3:"242";s:3:"st ";s:3:"243";s:3:"t k";s:3:"244";s:3:"uge";s:3:"245";s:3:" di";s:3:"246";s:3:"ag ";s:3:"247";s:3:"d a";s:3:"248";s:3:"g i";s:3:"249";s:3:"ill";s:3:"250";s:3:"l a";s:3:"251";s:3:"lsk";s:3:"252";s:3:"n a";s:3:"253";s:3:"on ";s:3:"254";s:3:"sam";s:3:"255";s:3:"str";s:3:"256";s:3:"tet";s:3:"257";s:3:"var";s:3:"258";s:3:" mo";s:3:"259";s:3:"art";s:3:"260";s:3:"ash";s:3:"261";s:3:"att";s:3:"262";s:3:"e b";s:3:"263";s:3:"han";s:3:"264";s:3:"hav";s:3:"265";s:3:"kla";s:3:"266";s:3:"kon";s:3:"267";s:3:"n t";s:3:"268";s:3:"ned";s:3:"269";s:3:"r o";s:3:"270";s:3:"ra ";s:3:"271";s:3:"rre";s:3:"272";s:3:"ves";s:3:"273";s:3:"vil";s:3:"274";s:3:" el";s:3:"275";s:3:" kr";s:3:"276";s:3:" ov";s:3:"277";s:3:"ann";s:3:"278";s:3:"e u";s:3:"279";s:3:"ess";s:3:"280";s:3:"fra";s:3:"281";s:3:"g a";s:3:"282";s:3:"g d";s:3:"283";s:3:"int";s:3:"284";s:3:"ngs";s:3:"285";s:3:"rde";s:3:"286";s:3:"tra";s:3:"287";s:4:" Ã¥r";s:3:"288";s:3:"akt";s:3:"289";s:3:"asi";s:3:"290";s:3:"em ";s:3:"291";s:3:"gel";s:3:"292";s:3:"gym";s:3:"293";s:3:"hol";s:3:"294";s:3:"kan";s:3:"295";s:3:"mna";s:3:"296";s:3:"n h";s:3:"297";s:3:"nsk";s:3:"298";s:3:"old";s:3:"299";}s:5:"dutch";a:300:{s:3:"en ";s:1:"0";s:3:"de ";s:1:"1";s:3:" de";s:1:"2";s:3:"et ";s:1:"3";s:3:"an ";s:1:"4";s:3:" he";s:1:"5";s:3:"er ";s:1:"6";s:3:" va";s:1:"7";s:3:"n d";s:1:"8";s:3:"van";s:1:"9";s:3:"een";s:2:"10";s:3:"het";s:2:"11";s:3:" ge";s:2:"12";s:3:"oor";s:2:"13";s:3:" ee";s:2:"14";s:3:"der";s:2:"15";s:3:" en";s:2:"16";s:3:"ij ";s:2:"17";s:3:"aar";s:2:"18";s:3:"gen";s:2:"19";s:3:"te ";s:2:"20";s:3:"ver";s:2:"21";s:3:" in";s:2:"22";s:3:" me";s:2:"23";s:3:"aan";s:2:"24";s:3:"den";s:2:"25";s:3:" we";s:2:"26";s:3:"at ";s:2:"27";s:3:"in ";s:2:"28";s:3:" da";s:2:"29";s:3:" te";s:2:"30";s:3:"eer";s:2:"31";s:3:"nde";s:2:"32";s:3:"ter";s:2:"33";s:3:"ste";s:2:"34";s:3:"n v";s:2:"35";s:3:" vo";s:2:"36";s:3:" zi";s:2:"37";s:3:"ing";s:2:"38";s:3:"n h";s:2:"39";s:3:"voo";s:2:"40";s:3:"is ";s:2:"41";s:3:" op";s:2:"42";s:3:"tie";s:2:"43";s:3:" aa";s:2:"44";s:3:"ede";s:2:"45";s:3:"erd";s:2:"46";s:3:"ers";s:2:"47";s:3:" be";s:2:"48";s:3:"eme";s:2:"49";s:3:"ten";s:2:"50";s:3:"ken";s:2:"51";s:3:"n e";s:2:"52";s:3:" ni";s:2:"53";s:3:" ve";s:2:"54";s:3:"ent";s:2:"55";s:3:"ijn";s:2:"56";s:3:"jn ";s:2:"57";s:3:"mee";s:2:"58";s:3:"iet";s:2:"59";s:3:"n w";s:2:"60";s:3:"ng ";s:2:"61";s:3:"nie";s:2:"62";s:3:" is";s:2:"63";s:3:"cht";s:2:"64";s:3:"dat";s:2:"65";s:3:"ere";s:2:"66";s:3:"ie ";s:2:"67";s:3:"ijk";s:2:"68";s:3:"n b";s:2:"69";s:3:"rde";s:2:"70";s:3:"ar ";s:2:"71";s:3:"e b";s:2:"72";s:3:"e a";s:2:"73";s:3:"met";s:2:"74";s:3:"t d";s:2:"75";s:3:"el ";s:2:"76";s:3:"ond";s:2:"77";s:3:"t h";s:2:"78";s:3:" al";s:2:"79";s:3:"e w";s:2:"80";s:3:"op ";s:2:"81";s:3:"ren";s:2:"82";s:3:" di";s:2:"83";s:3:" on";s:2:"84";s:3:"al ";s:2:"85";s:3:"and";s:2:"86";s:3:"bij";s:2:"87";s:3:"zij";s:2:"88";s:3:" bi";s:2:"89";s:3:" hi";s:2:"90";s:3:" wi";s:2:"91";s:3:"or ";s:2:"92";s:3:"r d";s:2:"93";s:3:"t v";s:2:"94";s:3:" wa";s:2:"95";s:3:"e h";s:2:"96";s:3:"lle";s:2:"97";s:3:"rt ";s:2:"98";s:3:"ang";s:2:"99";s:3:"hij";s:3:"100";s:3:"men";s:3:"101";s:3:"n a";s:3:"102";s:3:"n z";s:3:"103";s:3:"rs ";s:3:"104";s:3:" om";s:3:"105";s:3:"e o";s:3:"106";s:3:"e v";s:3:"107";s:3:"end";s:3:"108";s:3:"est";s:3:"109";s:3:"n t";s:3:"110";s:3:"par";s:3:"111";s:3:" pa";s:3:"112";s:3:" pr";s:3:"113";s:3:" ze";s:3:"114";s:3:"e g";s:3:"115";s:3:"e p";s:3:"116";s:3:"n p";s:3:"117";s:3:"ord";s:3:"118";s:3:"oud";s:3:"119";s:3:"raa";s:3:"120";s:3:"sch";s:3:"121";s:3:"t e";s:3:"122";s:3:"ege";s:3:"123";s:3:"ich";s:3:"124";s:3:"ien";s:3:"125";s:3:"aat";s:3:"126";s:3:"ek ";s:3:"127";s:3:"len";s:3:"128";s:3:"n m";s:3:"129";s:3:"nge";s:3:"130";s:3:"nt ";s:3:"131";s:3:"ove";s:3:"132";s:3:"rd ";s:3:"133";s:3:"wer";s:3:"134";s:3:" ma";s:3:"135";s:3:" mi";s:3:"136";s:3:"daa";s:3:"137";s:3:"e k";s:3:"138";s:3:"lij";s:3:"139";s:3:"mer";s:3:"140";s:3:"n g";s:3:"141";s:3:"n o";s:3:"142";s:3:"om ";s:3:"143";s:3:"sen";s:3:"144";s:3:"t b";s:3:"145";s:3:"wij";s:3:"146";s:3:" ho";s:3:"147";s:3:"e m";s:3:"148";s:3:"ele";s:3:"149";s:3:"gem";s:3:"150";s:3:"heb";s:3:"151";s:3:"pen";s:3:"152";s:3:"ude";s:3:"153";s:3:" bo";s:3:"154";s:3:" ja";s:3:"155";s:3:"die";s:3:"156";s:3:"e e";s:3:"157";s:3:"eli";s:3:"158";s:3:"erk";s:3:"159";s:3:"le ";s:3:"160";s:3:"pro";s:3:"161";s:3:"rij";s:3:"162";s:3:" er";s:3:"163";s:3:" za";s:3:"164";s:3:"e d";s:3:"165";s:3:"ens";s:3:"166";s:3:"ind";s:3:"167";s:3:"ke ";s:3:"168";s:3:"n k";s:3:"169";s:3:"nd ";s:3:"170";s:3:"nen";s:3:"171";s:3:"nte";s:3:"172";s:3:"r h";s:3:"173";s:3:"s d";s:3:"174";s:3:"s e";s:3:"175";s:3:"t z";s:3:"176";s:3:" b ";s:3:"177";s:3:" co";s:3:"178";s:3:" ik";s:3:"179";s:3:" ko";s:3:"180";s:3:" ov";s:3:"181";s:3:"eke";s:3:"182";s:3:"hou";s:3:"183";s:3:"ik ";s:3:"184";s:3:"iti";s:3:"185";s:3:"lan";s:3:"186";s:3:"ns ";s:3:"187";s:3:"t g";s:3:"188";s:3:"t m";s:3:"189";s:3:" do";s:3:"190";s:3:" le";s:3:"191";s:3:" zo";s:3:"192";s:3:"ams";s:3:"193";s:3:"e z";s:3:"194";s:3:"g v";s:3:"195";s:3:"it ";s:3:"196";s:3:"je ";s:3:"197";s:3:"ls ";s:3:"198";s:3:"maa";s:3:"199";s:3:"n i";s:3:"200";s:3:"nke";s:3:"201";s:3:"rke";s:3:"202";s:3:"uit";s:3:"203";s:3:" ha";s:3:"204";s:3:" ka";s:3:"205";s:3:" mo";s:3:"206";s:3:" re";s:3:"207";s:3:" st";s:3:"208";s:3:" to";s:3:"209";s:3:"age";s:3:"210";s:3:"als";s:3:"211";s:3:"ark";s:3:"212";s:3:"art";s:3:"213";s:3:"ben";s:3:"214";s:3:"e r";s:3:"215";s:3:"e s";s:3:"216";s:3:"ert";s:3:"217";s:3:"eze";s:3:"218";s:3:"ht ";s:3:"219";s:3:"ijd";s:3:"220";s:3:"lem";s:3:"221";s:3:"r v";s:3:"222";s:3:"rte";s:3:"223";s:3:"t p";s:3:"224";s:3:"zeg";s:3:"225";s:3:"zic";s:3:"226";s:3:"aak";s:3:"227";s:3:"aal";s:3:"228";s:3:"ag ";s:3:"229";s:3:"ale";s:3:"230";s:3:"bbe";s:3:"231";s:3:"ch ";s:3:"232";s:3:"e t";s:3:"233";s:3:"ebb";s:3:"234";s:3:"erz";s:3:"235";s:3:"ft ";s:3:"236";s:3:"ge ";s:3:"237";s:3:"led";s:3:"238";s:3:"mst";s:3:"239";s:3:"n n";s:3:"240";s:3:"oek";s:3:"241";s:3:"r i";s:3:"242";s:3:"t o";s:3:"243";s:3:"t w";s:3:"244";s:3:"tel";s:3:"245";s:3:"tte";s:3:"246";s:3:"uur";s:3:"247";s:3:"we ";s:3:"248";s:3:"zit";s:3:"249";s:3:" af";s:3:"250";s:3:" li";s:3:"251";s:3:" ui";s:3:"252";s:3:"ak ";s:3:"253";s:3:"all";s:3:"254";s:3:"aut";s:3:"255";s:3:"doo";s:3:"256";s:3:"e i";s:3:"257";s:3:"ene";s:3:"258";s:3:"erg";s:3:"259";s:3:"ete";s:3:"260";s:3:"ges";s:3:"261";s:3:"hee";s:3:"262";s:3:"jaa";s:3:"263";s:3:"jke";s:3:"264";s:3:"kee";s:3:"265";s:3:"kel";s:3:"266";s:3:"kom";s:3:"267";s:3:"lee";s:3:"268";s:3:"moe";s:3:"269";s:3:"n s";s:3:"270";s:3:"ort";s:3:"271";s:3:"rec";s:3:"272";s:3:"s o";s:3:"273";s:3:"s v";s:3:"274";s:3:"teg";s:3:"275";s:3:"tij";s:3:"276";s:3:"ven";s:3:"277";s:3:"waa";s:3:"278";s:3:"wel";s:3:"279";s:3:" an";s:3:"280";s:3:" au";s:3:"281";s:3:" bu";s:3:"282";s:3:" gr";s:3:"283";s:3:" pl";s:3:"284";s:3:" ti";s:3:"285";s:3:"'' ";s:3:"286";s:3:"ade";s:3:"287";s:3:"dag";s:3:"288";s:3:"e l";s:3:"289";s:3:"ech";s:3:"290";s:3:"eel";s:3:"291";s:3:"eft";s:3:"292";s:3:"ger";s:3:"293";s:3:"gt ";s:3:"294";s:3:"ig ";s:3:"295";s:3:"itt";s:3:"296";s:3:"j d";s:3:"297";s:3:"ppe";s:3:"298";s:3:"rda";s:3:"299";}s:7:"english";a:300:{s:3:" th";s:1:"0";s:3:"the";s:1:"1";s:3:"he ";s:1:"2";s:3:"ed ";s:1:"3";s:3:" to";s:1:"4";s:3:" in";s:1:"5";s:3:"er ";s:1:"6";s:3:"ing";s:1:"7";s:3:"ng ";s:1:"8";s:3:" an";s:1:"9";s:3:"nd ";s:2:"10";s:3:" of";s:2:"11";s:3:"and";s:2:"12";s:3:"to ";s:2:"13";s:3:"of ";s:2:"14";s:3:" co";s:2:"15";s:3:"at ";s:2:"16";s:3:"on ";s:2:"17";s:3:"in ";s:2:"18";s:3:" a ";s:2:"19";s:3:"d t";s:2:"20";s:3:" he";s:2:"21";s:3:"e t";s:2:"22";s:3:"ion";s:2:"23";s:3:"es ";s:2:"24";s:3:" re";s:2:"25";s:3:"re ";s:2:"26";s:3:"hat";s:2:"27";s:3:" sa";s:2:"28";s:3:" st";s:2:"29";s:3:" ha";s:2:"30";s:3:"her";s:2:"31";s:3:"tha";s:2:"32";s:3:"tio";s:2:"33";s:3:"or ";s:2:"34";s:3:" ''";s:2:"35";s:3:"en ";s:2:"36";s:3:" wh";s:2:"37";s:3:"e s";s:2:"38";s:3:"ent";s:2:"39";s:3:"n t";s:2:"40";s:3:"s a";s:2:"41";s:3:"as ";s:2:"42";s:3:"for";s:2:"43";s:3:"is ";s:2:"44";s:3:"t t";s:2:"45";s:3:" be";s:2:"46";s:3:"ld ";s:2:"47";s:3:"e a";s:2:"48";s:3:"rs ";s:2:"49";s:3:" wa";s:2:"50";s:3:"ut ";s:2:"51";s:3:"ve ";s:2:"52";s:3:"ll ";s:2:"53";s:3:"al ";s:2:"54";s:3:" ma";s:2:"55";s:3:"e i";s:2:"56";s:3:" fo";s:2:"57";s:3:"'s ";s:2:"58";s:3:"an ";s:2:"59";s:3:"est";s:2:"60";s:3:" hi";s:2:"61";s:3:" mo";s:2:"62";s:3:" se";s:2:"63";s:3:" pr";s:2:"64";s:3:"s t";s:2:"65";s:3:"ate";s:2:"66";s:3:"st ";s:2:"67";s:3:"ter";s:2:"68";s:3:"ere";s:2:"69";s:3:"ted";s:2:"70";s:3:"nt ";s:2:"71";s:3:"ver";s:2:"72";s:3:"d a";s:2:"73";s:3:" wi";s:2:"74";s:3:"se ";s:2:"75";s:3:"e c";s:2:"76";s:3:"ect";s:2:"77";s:3:"ns ";s:2:"78";s:3:" on";s:2:"79";s:3:"ly ";s:2:"80";s:3:"tol";s:2:"81";s:3:"ey ";s:2:"82";s:3:"r t";s:2:"83";s:3:" ca";s:2:"84";s:3:"ati";s:2:"85";s:3:"ts ";s:2:"86";s:3:"all";s:2:"87";s:3:" no";s:2:"88";s:3:"his";s:2:"89";s:3:"s o";s:2:"90";s:3:"ers";s:2:"91";s:3:"con";s:2:"92";s:3:"e o";s:2:"93";s:3:"ear";s:2:"94";s:3:"f t";s:2:"95";s:3:"e w";s:2:"96";s:3:"was";s:2:"97";s:3:"ons";s:2:"98";s:3:"sta";s:2:"99";s:3:"'' ";s:3:"100";s:3:"sti";s:3:"101";s:3:"n a";s:3:"102";s:3:"sto";s:3:"103";s:3:"t h";s:3:"104";s:3:" we";s:3:"105";s:3:"id ";s:3:"106";s:3:"th ";s:3:"107";s:3:" it";s:3:"108";s:3:"ce ";s:3:"109";s:3:" di";s:3:"110";s:3:"ave";s:3:"111";s:3:"d h";s:3:"112";s:3:"cou";s:3:"113";s:3:"pro";s:3:"114";s:3:"ad ";s:3:"115";s:3:"oll";s:3:"116";s:3:"ry ";s:3:"117";s:3:"d s";s:3:"118";s:3:"e m";s:3:"119";s:3:" so";s:3:"120";s:3:"ill";s:3:"121";s:3:"cti";s:3:"122";s:3:"te ";s:3:"123";s:3:"tor";s:3:"124";s:3:"eve";s:3:"125";s:3:"g t";s:3:"126";s:3:"it ";s:3:"127";s:3:" ch";s:3:"128";s:3:" de";s:3:"129";s:3:"hav";s:3:"130";s:3:"oul";s:3:"131";s:3:"ty ";s:3:"132";s:3:"uld";s:3:"133";s:3:"use";s:3:"134";s:3:" al";s:3:"135";s:3:"are";s:3:"136";s:3:"ch ";s:3:"137";s:3:"me ";s:3:"138";s:3:"out";s:3:"139";s:3:"ove";s:3:"140";s:3:"wit";s:3:"141";s:3:"ys ";s:3:"142";s:3:"chi";s:3:"143";s:3:"t a";s:3:"144";s:3:"ith";s:3:"145";s:3:"oth";s:3:"146";s:3:" ab";s:3:"147";s:3:" te";s:3:"148";s:3:" wo";s:3:"149";s:3:"s s";s:3:"150";s:3:"res";s:3:"151";s:3:"t w";s:3:"152";s:3:"tin";s:3:"153";s:3:"e b";s:3:"154";s:3:"e h";s:3:"155";s:3:"nce";s:3:"156";s:3:"t s";s:3:"157";s:3:"y t";s:3:"158";s:3:"e p";s:3:"159";s:3:"ele";s:3:"160";s:3:"hin";s:3:"161";s:3:"s i";s:3:"162";s:3:"nte";s:3:"163";s:3:" li";s:3:"164";s:3:"le ";s:3:"165";s:3:" do";s:3:"166";s:3:"aid";s:3:"167";s:3:"hey";s:3:"168";s:3:"ne ";s:3:"169";s:3:"s w";s:3:"170";s:3:" as";s:3:"171";s:3:" fr";s:3:"172";s:3:" tr";s:3:"173";s:3:"end";s:3:"174";s:3:"sai";s:3:"175";s:3:" el";s:3:"176";s:3:" ne";s:3:"177";s:3:" su";s:3:"178";s:3:"'t ";s:3:"179";s:3:"ay ";s:3:"180";s:3:"hou";s:3:"181";s:3:"ive";s:3:"182";s:3:"lec";s:3:"183";s:3:"n't";s:3:"184";s:3:" ye";s:3:"185";s:3:"but";s:3:"186";s:3:"d o";s:3:"187";s:3:"o t";s:3:"188";s:3:"y o";s:3:"189";s:3:" ho";s:3:"190";s:3:" me";s:3:"191";s:3:"be ";s:3:"192";s:3:"cal";s:3:"193";s:3:"e e";s:3:"194";s:3:"had";s:3:"195";s:3:"ple";s:3:"196";s:3:" at";s:3:"197";s:3:" bu";s:3:"198";s:3:" la";s:3:"199";s:3:"d b";s:3:"200";s:3:"s h";s:3:"201";s:3:"say";s:3:"202";s:3:"t i";s:3:"203";s:3:" ar";s:3:"204";s:3:"e f";s:3:"205";s:3:"ght";s:3:"206";s:3:"hil";s:3:"207";s:3:"igh";s:3:"208";s:3:"int";s:3:"209";s:3:"not";s:3:"210";s:3:"ren";s:3:"211";s:3:" is";s:3:"212";s:3:" pa";s:3:"213";s:3:" sh";s:3:"214";s:3:"ays";s:3:"215";s:3:"com";s:3:"216";s:3:"n s";s:3:"217";s:3:"r a";s:3:"218";s:3:"rin";s:3:"219";s:3:"y a";s:3:"220";s:3:" un";s:3:"221";s:3:"n c";s:3:"222";s:3:"om ";s:3:"223";s:3:"thi";s:3:"224";s:3:" mi";s:3:"225";s:3:"by ";s:3:"226";s:3:"d i";s:3:"227";s:3:"e d";s:3:"228";s:3:"e n";s:3:"229";s:3:"t o";s:3:"230";s:3:" by";s:3:"231";s:3:"e r";s:3:"232";s:3:"eri";s:3:"233";s:3:"old";s:3:"234";s:3:"ome";s:3:"235";s:3:"whe";s:3:"236";s:3:"yea";s:3:"237";s:3:" gr";s:3:"238";s:3:"ar ";s:3:"239";s:3:"ity";s:3:"240";s:3:"mpl";s:3:"241";s:3:"oun";s:3:"242";s:3:"one";s:3:"243";s:3:"ow ";s:3:"244";s:3:"r s";s:3:"245";s:3:"s f";s:3:"246";s:3:"tat";s:3:"247";s:3:" ba";s:3:"248";s:3:" vo";s:3:"249";s:3:"bou";s:3:"250";s:3:"sam";s:3:"251";s:3:"tim";s:3:"252";s:3:"vot";s:3:"253";s:3:"abo";s:3:"254";s:3:"ant";s:3:"255";s:3:"ds ";s:3:"256";s:3:"ial";s:3:"257";s:3:"ine";s:3:"258";s:3:"man";s:3:"259";s:3:"men";s:3:"260";s:3:" or";s:3:"261";s:3:" po";s:3:"262";s:3:"amp";s:3:"263";s:3:"can";s:3:"264";s:3:"der";s:3:"265";s:3:"e l";s:3:"266";s:3:"les";s:3:"267";s:3:"ny ";s:3:"268";s:3:"ot ";s:3:"269";s:3:"rec";s:3:"270";s:3:"tes";s:3:"271";s:3:"tho";s:3:"272";s:3:"ica";s:3:"273";s:3:"ild";s:3:"274";s:3:"ir ";s:3:"275";s:3:"nde";s:3:"276";s:3:"ose";s:3:"277";s:3:"ous";s:3:"278";s:3:"pre";s:3:"279";s:3:"ste";s:3:"280";s:3:"era";s:3:"281";s:3:"per";s:3:"282";s:3:"r o";s:3:"283";s:3:"red";s:3:"284";s:3:"rie";s:3:"285";s:3:" bo";s:3:"286";s:3:" le";s:3:"287";s:3:"ali";s:3:"288";s:3:"ars";s:3:"289";s:3:"ore";s:3:"290";s:3:"ric";s:3:"291";s:3:"s m";s:3:"292";s:3:"str";s:3:"293";s:3:" fa";s:3:"294";s:3:"ess";s:3:"295";s:3:"ie ";s:3:"296";s:3:"ist";s:3:"297";s:3:"lat";s:3:"298";s:3:"uri";s:3:"299";}s:8:"estonian";a:300:{s:3:"st ";s:1:"0";s:3:" ka";s:1:"1";s:3:"on ";s:1:"2";s:3:"ja ";s:1:"3";s:3:" va";s:1:"4";s:3:" on";s:1:"5";s:3:" ja";s:1:"6";s:3:" ko";s:1:"7";s:3:"se ";s:1:"8";s:3:"ast";s:1:"9";s:3:"le ";s:2:"10";s:3:"es ";s:2:"11";s:3:"as ";s:2:"12";s:3:"is ";s:2:"13";s:3:"ud ";s:2:"14";s:3:" sa";s:2:"15";s:3:"da ";s:2:"16";s:3:"ga ";s:2:"17";s:3:" ta";s:2:"18";s:3:"aja";s:2:"19";s:3:"sta";s:2:"20";s:3:" ku";s:2:"21";s:3:" pe";s:2:"22";s:3:"a k";s:2:"23";s:3:"est";s:2:"24";s:3:"ist";s:2:"25";s:3:"ks ";s:2:"26";s:3:"ta ";s:2:"27";s:3:"al ";s:2:"28";s:3:"ava";s:2:"29";s:3:"id ";s:2:"30";s:3:"saa";s:2:"31";s:3:"mis";s:2:"32";s:3:"te ";s:2:"33";s:3:"val";s:2:"34";s:3:" et";s:2:"35";s:3:"nud";s:2:"36";s:3:" te";s:2:"37";s:3:"inn";s:2:"38";s:3:" se";s:2:"39";s:3:" tu";s:2:"40";s:3:"a v";s:2:"41";s:3:"alu";s:2:"42";s:3:"e k";s:2:"43";s:3:"ise";s:2:"44";s:3:"lu ";s:2:"45";s:3:"ma ";s:2:"46";s:3:"mes";s:2:"47";s:3:" mi";s:2:"48";s:3:"et ";s:2:"49";s:3:"iku";s:2:"50";s:3:"lin";s:2:"51";s:3:"ad ";s:2:"52";s:3:"el ";s:2:"53";s:3:"ime";s:2:"54";s:3:"ne ";s:2:"55";s:3:"nna";s:2:"56";s:3:" ha";s:2:"57";s:3:" in";s:2:"58";s:3:" ke";s:2:"59";s:4:" võ";s:2:"60";s:3:"a s";s:2:"61";s:3:"a t";s:2:"62";s:3:"ab ";s:2:"63";s:3:"e s";s:2:"64";s:3:"esi";s:2:"65";s:3:" la";s:2:"66";s:3:" li";s:2:"67";s:3:"e v";s:2:"68";s:3:"eks";s:2:"69";s:3:"ema";s:2:"70";s:3:"las";s:2:"71";s:3:"les";s:2:"72";s:3:"rju";s:2:"73";s:3:"tle";s:2:"74";s:3:"tsi";s:2:"75";s:3:"tus";s:2:"76";s:3:"upa";s:2:"77";s:3:"use";s:2:"78";s:3:"ust";s:2:"79";s:3:"var";s:2:"80";s:4:" lä";s:2:"81";s:3:"ali";s:2:"82";s:3:"arj";s:2:"83";s:3:"de ";s:2:"84";s:3:"ete";s:2:"85";s:3:"i t";s:2:"86";s:3:"iga";s:2:"87";s:3:"ilm";s:2:"88";s:3:"kui";s:2:"89";s:3:"li ";s:2:"90";s:3:"tul";s:2:"91";s:3:" ei";s:2:"92";s:3:" me";s:2:"93";s:4:" sõ";s:2:"94";s:3:"aal";s:2:"95";s:3:"ata";s:2:"96";s:3:"dus";s:2:"97";s:3:"ei ";s:2:"98";s:3:"nik";s:2:"99";s:3:"pea";s:3:"100";s:3:"s k";s:3:"101";s:3:"s o";s:3:"102";s:3:"sal";s:3:"103";s:4:"sõn";s:3:"104";s:3:"ter";s:3:"105";s:3:"ul ";s:3:"106";s:4:"või";s:3:"107";s:3:" el";s:3:"108";s:3:" ne";s:3:"109";s:3:"a j";s:3:"110";s:3:"ate";s:3:"111";s:3:"end";s:3:"112";s:3:"i k";s:3:"113";s:3:"ita";s:3:"114";s:3:"kar";s:3:"115";s:3:"kor";s:3:"116";s:3:"l o";s:3:"117";s:3:"lt ";s:3:"118";s:3:"maa";s:3:"119";s:3:"oli";s:3:"120";s:3:"sti";s:3:"121";s:3:"vad";s:3:"122";s:5:"ään";s:3:"123";s:3:" ju";s:3:"124";s:4:" jä";s:3:"125";s:4:" kü";s:3:"126";s:3:" ma";s:3:"127";s:3:" po";s:3:"128";s:4:" üt";s:3:"129";s:3:"aas";s:3:"130";s:3:"aks";s:3:"131";s:3:"at ";s:3:"132";s:3:"ed ";s:3:"133";s:3:"eri";s:3:"134";s:3:"hoi";s:3:"135";s:3:"i s";s:3:"136";s:3:"ka ";s:3:"137";s:3:"la ";s:3:"138";s:3:"nni";s:3:"139";s:3:"oid";s:3:"140";s:3:"pai";s:3:"141";s:3:"rit";s:3:"142";s:3:"us ";s:3:"143";s:4:"ütl";s:3:"144";s:3:" aa";s:3:"145";s:3:" lo";s:3:"146";s:3:" to";s:3:"147";s:3:" ve";s:3:"148";s:3:"a e";s:3:"149";s:3:"ada";s:3:"150";s:3:"aid";s:3:"151";s:3:"ami";s:3:"152";s:3:"and";s:3:"153";s:3:"dla";s:3:"154";s:3:"e j";s:3:"155";s:3:"ega";s:3:"156";s:3:"gi ";s:3:"157";s:3:"gu ";s:3:"158";s:3:"i p";s:3:"159";s:3:"idl";s:3:"160";s:3:"ik ";s:3:"161";s:3:"ini";s:3:"162";s:3:"jup";s:3:"163";s:3:"kal";s:3:"164";s:3:"kas";s:3:"165";s:3:"kes";s:3:"166";s:3:"koh";s:3:"167";s:3:"s e";s:3:"168";s:3:"s p";s:3:"169";s:3:"sel";s:3:"170";s:3:"sse";s:3:"171";s:3:"ui ";s:3:"172";s:3:" pi";s:3:"173";s:3:" si";s:3:"174";s:3:"aru";s:3:"175";s:3:"eda";s:3:"176";s:3:"eva";s:3:"177";s:3:"fil";s:3:"178";s:3:"i v";s:3:"179";s:3:"ida";s:3:"180";s:3:"ing";s:3:"181";s:5:"lää";s:3:"182";s:3:"me ";s:3:"183";s:3:"na ";s:3:"184";s:3:"nda";s:3:"185";s:3:"nim";s:3:"186";s:3:"ole";s:3:"187";s:3:"ots";s:3:"188";s:3:"ris";s:3:"189";s:3:"s l";s:3:"190";s:3:"sia";s:3:"191";s:3:"t p";s:3:"192";s:3:" en";s:3:"193";s:3:" mu";s:3:"194";s:3:" ol";s:3:"195";s:4:" põ";s:3:"196";s:3:" su";s:3:"197";s:4:" vä";s:3:"198";s:4:" üh";s:3:"199";s:3:"a l";s:3:"200";s:3:"a p";s:3:"201";s:3:"aga";s:3:"202";s:3:"ale";s:3:"203";s:3:"aps";s:3:"204";s:3:"arv";s:3:"205";s:3:"e a";s:3:"206";s:3:"ela";s:3:"207";s:3:"ika";s:3:"208";s:3:"lle";s:3:"209";s:3:"loo";s:3:"210";s:3:"mal";s:3:"211";s:3:"pet";s:3:"212";s:3:"t k";s:3:"213";s:3:"tee";s:3:"214";s:3:"tis";s:3:"215";s:3:"vat";s:3:"216";s:4:"äne";s:3:"217";s:4:"õnn";s:3:"218";s:3:" es";s:3:"219";s:3:" fi";s:3:"220";s:3:" vi";s:3:"221";s:3:"a i";s:3:"222";s:3:"a o";s:3:"223";s:3:"aab";s:3:"224";s:3:"aap";s:3:"225";s:3:"ala";s:3:"226";s:3:"alt";s:3:"227";s:3:"ama";s:3:"228";s:3:"anu";s:3:"229";s:3:"e p";s:3:"230";s:3:"e t";s:3:"231";s:3:"eal";s:3:"232";s:3:"eli";s:3:"233";s:3:"haa";s:3:"234";s:3:"hin";s:3:"235";s:3:"iva";s:3:"236";s:3:"kon";s:3:"237";s:3:"ku ";s:3:"238";s:3:"lik";s:3:"239";s:3:"lm ";s:3:"240";s:3:"min";s:3:"241";s:3:"n t";s:3:"242";s:3:"odu";s:3:"243";s:3:"oon";s:3:"244";s:3:"psa";s:3:"245";s:3:"ri ";s:3:"246";s:3:"si ";s:3:"247";s:3:"stu";s:3:"248";s:3:"t e";s:3:"249";s:3:"t s";s:3:"250";s:3:"ti ";s:3:"251";s:3:"ule";s:3:"252";s:3:"uur";s:3:"253";s:3:"vas";s:3:"254";s:3:"vee";s:3:"255";s:3:" ki";s:3:"256";s:3:" ni";s:3:"257";s:4:" nä";s:3:"258";s:3:" ra";s:3:"259";s:3:"aig";s:3:"260";s:3:"aka";s:3:"261";s:3:"all";s:3:"262";s:3:"atu";s:3:"263";s:3:"e e";s:3:"264";s:3:"eis";s:3:"265";s:3:"ers";s:3:"266";s:3:"i e";s:3:"267";s:3:"ii ";s:3:"268";s:3:"iis";s:3:"269";s:3:"il ";s:3:"270";s:3:"ima";s:3:"271";s:3:"its";s:3:"272";s:3:"kka";s:3:"273";s:3:"kuh";s:3:"274";s:3:"l k";s:3:"275";s:3:"lat";s:3:"276";s:3:"maj";s:3:"277";s:3:"ndu";s:3:"278";s:3:"ni ";s:3:"279";s:3:"nii";s:3:"280";s:3:"oma";s:3:"281";s:3:"ool";s:3:"282";s:3:"rso";s:3:"283";s:3:"ru ";s:3:"284";s:3:"rva";s:3:"285";s:3:"s t";s:3:"286";s:3:"sek";s:3:"287";s:3:"son";s:3:"288";s:3:"ste";s:3:"289";s:3:"t m";s:3:"290";s:3:"taj";s:3:"291";s:3:"tam";s:3:"292";s:3:"ude";s:3:"293";s:3:"uho";s:3:"294";s:3:"vai";s:3:"295";s:3:" ag";s:3:"296";s:3:" os";s:3:"297";s:3:" pa";s:3:"298";s:3:" re";s:3:"299";}s:5:"farsi";a:300:{s:5:"ان ";s:1:"0";s:5:"ای ";s:1:"1";s:5:"Ù‡ ا";s:1:"2";s:5:" اي";s:1:"3";s:5:" در";s:1:"4";s:5:"به ";s:1:"5";s:5:" بر";s:1:"6";s:5:"در ";s:1:"7";s:6:"ران";s:1:"8";s:5:" به";s:1:"9";s:5:"ÛŒ ا";s:2:"10";s:5:"از ";s:2:"11";s:5:"ين ";s:2:"12";s:5:"Ù…ÛŒ ";s:2:"13";s:5:" از";s:2:"14";s:5:"ده ";s:2:"15";s:5:"ست ";s:2:"16";s:6:"است";s:2:"17";s:5:" اس";s:2:"18";s:5:" Ú©Ù‡";s:2:"19";s:5:"Ú©Ù‡ ";s:2:"20";s:6:"اير";s:2:"21";s:5:"ند ";s:2:"22";s:6:"اين";s:2:"23";s:5:" ها";s:2:"24";s:6:"يرا";s:2:"25";s:5:"ود ";s:2:"26";s:5:" را";s:2:"27";s:6:"های";s:2:"28";s:5:" خو";s:2:"29";s:5:"ته ";s:2:"30";s:5:"را ";s:2:"31";s:6:"رای";s:2:"32";s:5:"رد ";s:2:"33";s:5:"Ù† ب";s:2:"34";s:6:"کرد";s:2:"35";s:4:" Ùˆ ";s:2:"36";s:5:" کر";s:2:"37";s:5:"ات ";s:2:"38";s:6:"برا";s:2:"39";s:5:"د Ú©";s:2:"40";s:6:"مان";s:2:"41";s:5:"ÛŒ د";s:2:"42";s:5:" ان";s:2:"43";s:6:"خوا";s:2:"44";s:6:"شور";s:2:"45";s:5:" با";s:2:"46";s:5:"Ù† ا";s:2:"47";s:5:" سا";s:2:"48";s:6:"تمی";s:2:"49";s:5:"ری ";s:2:"50";s:6:"اتم";s:2:"51";s:5:"ا ا";s:2:"52";s:6:"واه";s:2:"53";s:5:" ات";s:2:"54";s:5:" عر";s:2:"55";s:5:"اق ";s:2:"56";s:5:"ر Ù…";s:2:"57";s:6:"راق";s:2:"58";s:6:"عرا";s:2:"59";s:5:"ÛŒ ب";s:2:"60";s:5:" تا";s:2:"61";s:5:" تو";s:2:"62";s:5:"ار ";s:2:"63";s:5:"ر ا";s:2:"64";s:5:"Ù† Ù…";s:2:"65";s:5:"Ù‡ ب";s:2:"66";s:5:"ور ";s:2:"67";s:5:"يد ";s:2:"68";s:5:"ÛŒ Ú©";s:2:"69";s:5:" ام";s:2:"70";s:5:" دا";s:2:"71";s:5:" Ú©Ù†";s:2:"72";s:6:"اهد";s:2:"73";s:5:"هد ";s:2:"74";s:5:" آن";s:2:"75";s:5:" Ù…ÛŒ";s:2:"76";s:5:" ني";s:2:"77";s:5:" Ú¯Ù";s:2:"78";s:5:"د ا";s:2:"79";s:6:"Ú¯Ùت";s:2:"80";s:5:" Ú©Ø´";s:2:"81";s:5:"ا ب";s:2:"82";s:5:"Ù†ÛŒ ";s:2:"83";s:5:"ها ";s:2:"84";s:6:"کشو";s:2:"85";s:5:" رو";s:2:"86";s:5:"ت Ú©";s:2:"87";s:6:"نيو";s:2:"88";s:5:"Ù‡ Ù…";s:2:"89";s:5:"ÙˆÛŒ ";s:2:"90";s:5:"ÛŒ ت";s:2:"91";s:5:" شو";s:2:"92";s:5:"ال ";s:2:"93";s:6:"دار";s:2:"94";s:5:"مه ";s:2:"95";s:5:"Ù† Ú©";s:2:"96";s:5:"Ù‡ د";s:2:"97";s:5:"يه ";s:2:"98";s:5:" ما";s:2:"99";s:6:"امه";s:3:"100";s:5:"د ب";s:3:"101";s:6:"زار";s:3:"102";s:6:"ورا";s:3:"103";s:6:"گزا";s:3:"104";s:5:" پي";s:3:"105";s:5:"آن ";s:3:"106";s:6:"انت";s:3:"107";s:5:"ت ا";s:3:"108";s:5:"Ùت ";s:3:"109";s:5:"Ù‡ Ù†";s:3:"110";s:5:"ÛŒ Ø®";s:3:"111";s:6:"اما";s:3:"112";s:6:"بات";s:3:"113";s:5:"ما ";s:3:"114";s:6:"ملل";s:3:"115";s:6:"نام";s:3:"116";s:5:"ير ";s:3:"117";s:5:"ÛŒ Ù…";s:3:"118";s:5:"ÛŒ Ù‡";s:3:"119";s:5:" آم";s:3:"120";s:5:" ای";s:3:"121";s:5:" من";s:3:"122";s:6:"انس";s:3:"123";s:6:"اني";s:3:"124";s:5:"ت د";s:3:"125";s:6:"رده";s:3:"126";s:6:"ساز";s:3:"127";s:5:"Ù† د";s:3:"128";s:5:"نه ";s:3:"129";s:6:"ورد";s:3:"130";s:5:" او";s:3:"131";s:5:" بي";s:3:"132";s:5:" سو";s:3:"133";s:5:" شد";s:3:"134";s:6:"اده";s:3:"135";s:6:"اند";s:3:"136";s:5:"با ";s:3:"137";s:5:"ت ب";s:3:"138";s:5:"ر ب";s:3:"139";s:5:"ز ا";s:3:"140";s:6:"زما";s:3:"141";s:6:"سته";s:3:"142";s:5:"Ù† ر";s:3:"143";s:5:"Ù‡ س";s:3:"144";s:6:"وان";s:3:"145";s:5:"وز ";s:3:"146";s:5:"ÛŒ ر";s:3:"147";s:5:"ÛŒ س";s:3:"148";s:5:" هس";s:3:"149";s:6:"ابا";s:3:"150";s:5:"ام ";s:3:"151";s:6:"اور";s:3:"152";s:6:"تخا";s:3:"153";s:6:"خاب";s:3:"154";s:6:"خود";s:3:"155";s:5:"د د";s:3:"156";s:5:"دن ";s:3:"157";s:6:"رها";s:3:"158";s:6:"روز";s:3:"159";s:6:"رگز";s:3:"160";s:6:"نتخ";s:3:"161";s:5:"Ù‡ Ø´";s:3:"162";s:5:"Ù‡ Ù‡";s:3:"163";s:6:"هست";s:3:"164";s:5:"يت ";s:3:"165";s:5:"يم ";s:3:"166";s:5:" دو";s:3:"167";s:5:" دي";s:3:"168";s:5:" مو";s:3:"169";s:5:" نو";s:3:"170";s:5:" هم";s:3:"171";s:5:" کا";s:3:"172";s:5:"اد ";s:3:"173";s:6:"اری";s:3:"174";s:6:"انی";s:3:"175";s:5:"بر ";s:3:"176";s:6:"بود";s:3:"177";s:5:"ت Ù‡";s:3:"178";s:5:"Ø Ù‡";s:3:"179";s:6:"Øال";s:3:"180";s:5:"رش ";s:3:"181";s:5:"عه ";s:3:"182";s:5:"Ù„ÛŒ ";s:3:"183";s:5:"وم ";s:3:"184";s:6:"ژان";s:3:"185";s:5:" سل";s:3:"186";s:6:"آمر";s:3:"187";s:5:"Ø§Ø ";s:3:"188";s:6:"توس";s:3:"189";s:6:"داد";s:3:"190";s:6:"دام";s:3:"191";s:5:"ر د";s:3:"192";s:5:"ره ";s:3:"193";s:6:"ريک";s:3:"194";s:5:"زی ";s:3:"195";s:6:"سلا";s:3:"196";s:6:"شود";s:3:"197";s:6:"لاØ";s:3:"198";s:6:"مري";s:3:"199";s:6:"نند";s:3:"200";s:5:"Ù‡ ع";s:3:"201";s:6:"يما";s:3:"202";s:6:"يکا";s:3:"203";s:6:"پيم";s:3:"204";s:5:"گر ";s:3:"205";s:5:" Ø¢Ú˜";s:3:"206";s:5:" ال";s:3:"207";s:5:" بو";s:3:"208";s:5:" مق";s:3:"209";s:5:" مل";s:3:"210";s:5:" ÙˆÛŒ";s:3:"211";s:6:"آژا";s:3:"212";s:6:"ازم";s:3:"213";s:6:"ازی";s:3:"214";s:6:"بار";s:3:"215";s:6:"برن";s:3:"216";s:5:"ر Ø¢";s:3:"217";s:5:"ز س";s:3:"218";s:6:"سعه";s:3:"219";s:6:"شته";s:3:"220";s:6:"مات";s:3:"221";s:5:"Ù† Ø¢";s:3:"222";s:5:"Ù† Ù¾";s:3:"223";s:5:"نس ";s:3:"224";s:5:"Ù‡ Ú¯";s:3:"225";s:6:"وسع";s:3:"226";s:6:"يان";s:3:"227";s:6:"يوم";s:3:"228";s:5:"کا ";s:3:"229";s:6:"کام";s:3:"230";s:6:"کند";s:3:"231";s:5:" خا";s:3:"232";s:5:" سر";s:3:"233";s:6:"آور";s:3:"234";s:6:"ارد";s:3:"235";s:6:"اقد";s:3:"236";s:6:"ايم";s:3:"237";s:6:"ايی";s:3:"238";s:6:"برگ";s:3:"239";s:5:"ت ع";s:3:"240";s:5:"تن ";s:3:"241";s:5:"خت ";s:3:"242";s:5:"د Ùˆ";s:3:"243";s:5:"ر Ø®";s:3:"244";s:5:"رک ";s:3:"245";s:6:"زير";s:3:"246";s:6:"Ùته";s:3:"247";s:6:"قدا";s:3:"248";s:5:"Ù„ ت";s:3:"249";s:6:"مين";s:3:"250";s:5:"Ù† Ú¯";s:3:"251";s:5:"Ù‡ Ø¢";s:3:"252";s:5:"Ù‡ Ø®";s:3:"253";s:5:"Ù‡ Ú©";s:3:"254";s:6:"ورک";s:3:"255";s:6:"ويو";s:3:"256";s:6:"يور";s:3:"257";s:6:"يوي";s:3:"258";s:5:"ÙŠÛŒ ";s:3:"259";s:5:"Ú© ت";s:3:"260";s:5:"ÛŒ Ø´";s:3:"261";s:5:" اق";s:3:"262";s:5:" Øا";s:3:"263";s:5:" ØÙ‚";s:3:"264";s:5:" دس";s:3:"265";s:5:" Ø´Ú©";s:3:"266";s:5:" عم";s:3:"267";s:5:" ÙŠÚ©";s:3:"268";s:5:"ا ت";s:3:"269";s:5:"ا د";s:3:"270";s:6:"ارج";s:3:"271";s:6:"بين";s:3:"272";s:5:"ت Ù…";s:3:"273";s:5:"ت Ùˆ";s:3:"274";s:6:"تاي";s:3:"275";s:6:"دست";s:3:"276";s:5:"ر Ø";s:3:"277";s:5:"ر س";s:3:"278";s:6:"رنا";s:3:"279";s:5:"ز ب";s:3:"280";s:6:"شکا";s:3:"281";s:5:"لل ";s:3:"282";s:5:"Ù… Ú©";s:3:"283";s:5:"مز ";s:3:"284";s:6:"ندا";s:3:"285";s:6:"نوا";s:3:"286";s:5:"Ùˆ ا";s:3:"287";s:6:"وره";s:3:"288";s:5:"ون ";s:3:"289";s:6:"وند";s:3:"290";s:6:"يمز";s:3:"291";s:5:" آو";s:3:"292";s:5:" اع";s:3:"293";s:5:" Ùر";s:3:"294";s:5:" مت";s:3:"295";s:5:" نه";s:3:"296";s:5:" هر";s:3:"297";s:5:" وز";s:3:"298";s:5:" گز";s:3:"299";}s:7:"finnish";a:300:{s:3:"en ";s:1:"0";s:3:"in ";s:1:"1";s:3:"an ";s:1:"2";s:3:"on ";s:1:"3";s:3:"ist";s:1:"4";s:3:"ta ";s:1:"5";s:3:"ja ";s:1:"6";s:3:"n t";s:1:"7";s:3:"sa ";s:1:"8";s:3:"sta";s:1:"9";s:3:"aan";s:2:"10";s:3:"n p";s:2:"11";s:3:" on";s:2:"12";s:3:"ssa";s:2:"13";s:3:"tta";s:2:"14";s:4:"tä ";s:2:"15";s:3:" ka";s:2:"16";s:3:" pa";s:2:"17";s:3:"si ";s:2:"18";s:3:" ja";s:2:"19";s:3:"n k";s:2:"20";s:3:"lla";s:2:"21";s:4:"än ";s:2:"22";s:3:"een";s:2:"23";s:3:"n v";s:2:"24";s:3:"ksi";s:2:"25";s:3:"ett";s:2:"26";s:3:"nen";s:2:"27";s:3:"taa";s:2:"28";s:4:"ttä";s:2:"29";s:3:" va";s:2:"30";s:3:"ill";s:2:"31";s:3:"itt";s:2:"32";s:3:" jo";s:2:"33";s:3:" ko";s:2:"34";s:3:"n s";s:2:"35";s:3:" tu";s:2:"36";s:3:"ia ";s:2:"37";s:3:" su";s:2:"38";s:3:"a p";s:2:"39";s:3:"aa ";s:2:"40";s:3:"la ";s:2:"41";s:3:"lle";s:2:"42";s:3:"n m";s:2:"43";s:3:"le ";s:2:"44";s:3:"tte";s:2:"45";s:3:"na ";s:2:"46";s:3:" ta";s:2:"47";s:3:" ve";s:2:"48";s:3:"at ";s:2:"49";s:3:" vi";s:2:"50";s:3:"utt";s:2:"51";s:3:" sa";s:2:"52";s:3:"ise";s:2:"53";s:3:"sen";s:2:"54";s:3:" ku";s:2:"55";s:4:" nä";s:2:"56";s:4:" pä";s:2:"57";s:3:"ste";s:2:"58";s:3:" ol";s:2:"59";s:3:"a t";s:2:"60";s:3:"ais";s:2:"61";s:3:"maa";s:2:"62";s:3:"ti ";s:2:"63";s:3:"a o";s:2:"64";s:3:"oit";s:2:"65";s:5:"pää";s:2:"66";s:3:" pi";s:2:"67";s:3:"a v";s:2:"68";s:3:"ala";s:2:"69";s:3:"ine";s:2:"70";s:3:"isi";s:2:"71";s:3:"tel";s:2:"72";s:3:"tti";s:2:"73";s:3:" si";s:2:"74";s:3:"a k";s:2:"75";s:3:"all";s:2:"76";s:3:"iin";s:2:"77";s:3:"kin";s:2:"78";s:4:"stä";s:2:"79";s:3:"uom";s:2:"80";s:3:"vii";s:2:"81";s:3:" ma";s:2:"82";s:3:" se";s:2:"83";s:4:"enä";s:2:"84";s:3:" mu";s:2:"85";s:3:"a s";s:2:"86";s:3:"est";s:2:"87";s:3:"iss";s:2:"88";s:4:"llä";s:2:"89";s:3:"lok";s:2:"90";s:4:"lä ";s:2:"91";s:3:"n j";s:2:"92";s:3:"n o";s:2:"93";s:3:"toi";s:2:"94";s:3:"ven";s:2:"95";s:3:"ytt";s:2:"96";s:3:" li";s:2:"97";s:3:"ain";s:2:"98";s:3:"et ";s:2:"99";s:3:"ina";s:3:"100";s:3:"n a";s:3:"101";s:3:"n n";s:3:"102";s:3:"oll";s:3:"103";s:3:"plo";s:3:"104";s:3:"ten";s:3:"105";s:3:"ust";s:3:"106";s:4:"äll";s:3:"107";s:5:"ään";s:3:"108";s:3:" to";s:3:"109";s:3:"den";s:3:"110";s:3:"men";s:3:"111";s:3:"oki";s:3:"112";s:3:"suo";s:3:"113";s:4:"sä ";s:3:"114";s:5:"tää";s:3:"115";s:3:"uks";s:3:"116";s:3:"vat";s:3:"117";s:3:" al";s:3:"118";s:3:" ke";s:3:"119";s:3:" te";s:3:"120";s:3:"a e";s:3:"121";s:3:"lii";s:3:"122";s:3:"tai";s:3:"123";s:3:"tei";s:3:"124";s:4:"äis";s:3:"125";s:5:"ää ";s:3:"126";s:3:" pl";s:3:"127";s:3:"ell";s:3:"128";s:3:"i t";s:3:"129";s:3:"ide";s:3:"130";s:3:"ikk";s:3:"131";s:3:"ki ";s:3:"132";s:3:"nta";s:3:"133";s:3:"ova";s:3:"134";s:3:"yst";s:3:"135";s:3:"yt ";s:3:"136";s:4:"ä p";s:3:"137";s:4:"äyt";s:3:"138";s:3:" ha";s:3:"139";s:3:" pe";s:3:"140";s:4:" tä";s:3:"141";s:3:"a n";s:3:"142";s:3:"aik";s:3:"143";s:3:"i p";s:3:"144";s:3:"i v";s:3:"145";s:3:"nyt";s:3:"146";s:4:"näy";s:3:"147";s:3:"pal";s:3:"148";s:3:"tee";s:3:"149";s:3:"un ";s:3:"150";s:3:" me";s:3:"151";s:3:"a m";s:3:"152";s:3:"ess";s:3:"153";s:3:"kau";s:3:"154";s:3:"pai";s:3:"155";s:3:"stu";s:3:"156";s:3:"ut ";s:3:"157";s:3:"voi";s:3:"158";s:3:" et";s:3:"159";s:3:"a h";s:3:"160";s:3:"eis";s:3:"161";s:3:"hte";s:3:"162";s:3:"i o";s:3:"163";s:3:"iik";s:3:"164";s:3:"ita";s:3:"165";s:3:"jou";s:3:"166";s:3:"mis";s:3:"167";s:3:"nin";s:3:"168";s:3:"nut";s:3:"169";s:3:"sia";s:3:"170";s:4:"ssä";s:3:"171";s:3:"van";s:3:"172";s:3:" ty";s:3:"173";s:3:" yh";s:3:"174";s:3:"aks";s:3:"175";s:3:"ime";s:3:"176";s:3:"loi";s:3:"177";s:3:"me ";s:3:"178";s:3:"n e";s:3:"179";s:3:"n h";s:3:"180";s:3:"n l";s:3:"181";s:3:"oin";s:3:"182";s:3:"ome";s:3:"183";s:3:"ott";s:3:"184";s:3:"ouk";s:3:"185";s:3:"sit";s:3:"186";s:3:"sti";s:3:"187";s:3:"tet";s:3:"188";s:3:"tie";s:3:"189";s:3:"ukk";s:3:"190";s:4:"ä k";s:3:"191";s:3:" ra";s:3:"192";s:3:" ti";s:3:"193";s:3:"aja";s:3:"194";s:3:"asi";s:3:"195";s:3:"ent";s:3:"196";s:3:"iga";s:3:"197";s:3:"iig";s:3:"198";s:3:"ite";s:3:"199";s:3:"jan";s:3:"200";s:3:"kaa";s:3:"201";s:3:"kse";s:3:"202";s:3:"laa";s:3:"203";s:3:"lan";s:3:"204";s:3:"li ";s:3:"205";s:4:"näj";s:3:"206";s:3:"ole";s:3:"207";s:3:"tii";s:3:"208";s:3:"usi";s:3:"209";s:5:"äjä";s:3:"210";s:3:" ov";s:3:"211";s:3:"a a";s:3:"212";s:3:"ant";s:3:"213";s:3:"ava";s:3:"214";s:3:"ei ";s:3:"215";s:3:"eri";s:3:"216";s:3:"kan";s:3:"217";s:3:"kku";s:3:"218";s:3:"lai";s:3:"219";s:3:"lis";s:3:"220";s:4:"läi";s:3:"221";s:3:"mat";s:3:"222";s:3:"ois";s:3:"223";s:3:"pel";s:3:"224";s:3:"sil";s:3:"225";s:3:"sty";s:3:"226";s:3:"taj";s:3:"227";s:3:"tav";s:3:"228";s:3:"ttu";s:3:"229";s:4:"työ";s:3:"230";s:4:"yös";s:3:"231";s:4:"ä o";s:3:"232";s:3:" ai";s:3:"233";s:3:" pu";s:3:"234";s:3:"a j";s:3:"235";s:3:"a l";s:3:"236";s:3:"aal";s:3:"237";s:3:"arv";s:3:"238";s:3:"ass";s:3:"239";s:3:"ien";s:3:"240";s:3:"imi";s:3:"241";s:3:"imm";s:3:"242";s:4:"itä";s:3:"243";s:3:"ka ";s:3:"244";s:3:"kes";s:3:"245";s:3:"kue";s:3:"246";s:3:"lee";s:3:"247";s:3:"lin";s:3:"248";s:3:"llo";s:3:"249";s:3:"one";s:3:"250";s:3:"ri ";s:3:"251";s:3:"t o";s:3:"252";s:3:"t p";s:3:"253";s:3:"tu ";s:3:"254";s:3:"val";s:3:"255";s:3:"vuo";s:3:"256";s:3:" ei";s:3:"257";s:3:" he";s:3:"258";s:3:" hy";s:3:"259";s:3:" my";s:3:"260";s:3:" vo";s:3:"261";s:3:"ali";s:3:"262";s:3:"alo";s:3:"263";s:3:"ano";s:3:"264";s:3:"ast";s:3:"265";s:3:"att";s:3:"266";s:3:"auk";s:3:"267";s:3:"eli";s:3:"268";s:3:"ely";s:3:"269";s:3:"hti";s:3:"270";s:3:"ika";s:3:"271";s:3:"ken";s:3:"272";s:3:"kki";s:3:"273";s:3:"lys";s:3:"274";s:3:"min";s:3:"275";s:4:"myö";s:3:"276";s:3:"oht";s:3:"277";s:3:"oma";s:3:"278";s:3:"tus";s:3:"279";s:3:"umi";s:3:"280";s:3:"yks";s:3:"281";s:4:"ät ";s:3:"282";s:5:"ääl";s:3:"283";s:4:"ös ";s:3:"284";s:3:" ar";s:3:"285";s:3:" eu";s:3:"286";s:3:" hu";s:3:"287";s:3:" na";s:3:"288";s:3:"aat";s:3:"289";s:3:"alk";s:3:"290";s:3:"alu";s:3:"291";s:3:"ans";s:3:"292";s:3:"arj";s:3:"293";s:3:"enn";s:3:"294";s:3:"han";s:3:"295";s:3:"kuu";s:3:"296";s:3:"n y";s:3:"297";s:3:"set";s:3:"298";s:3:"sim";s:3:"299";}s:6:"french";a:300:{s:3:"es ";s:1:"0";s:3:" de";s:1:"1";s:3:"de ";s:1:"2";s:3:" le";s:1:"3";s:3:"ent";s:1:"4";s:3:"le ";s:1:"5";s:3:"nt ";s:1:"6";s:3:"la ";s:1:"7";s:3:"s d";s:1:"8";s:3:" la";s:1:"9";s:3:"ion";s:2:"10";s:3:"on ";s:2:"11";s:3:"re ";s:2:"12";s:3:" pa";s:2:"13";s:3:"e l";s:2:"14";s:3:"e d";s:2:"15";s:3:" l'";s:2:"16";s:3:"e p";s:2:"17";s:3:" co";s:2:"18";s:3:" pr";s:2:"19";s:3:"tio";s:2:"20";s:3:"ns ";s:2:"21";s:3:" en";s:2:"22";s:3:"ne ";s:2:"23";s:3:"que";s:2:"24";s:3:"r l";s:2:"25";s:3:"les";s:2:"26";s:3:"ur ";s:2:"27";s:3:"en ";s:2:"28";s:3:"ati";s:2:"29";s:3:"ue ";s:2:"30";s:3:" po";s:2:"31";s:3:" d'";s:2:"32";s:3:"par";s:2:"33";s:3:" a ";s:2:"34";s:3:"et ";s:2:"35";s:3:"it ";s:2:"36";s:3:" qu";s:2:"37";s:3:"men";s:2:"38";s:3:"ons";s:2:"39";s:3:"te ";s:2:"40";s:3:" et";s:2:"41";s:3:"t d";s:2:"42";s:3:" re";s:2:"43";s:3:"des";s:2:"44";s:3:" un";s:2:"45";s:3:"ie ";s:2:"46";s:3:"s l";s:2:"47";s:3:" su";s:2:"48";s:3:"pou";s:2:"49";s:3:" au";s:2:"50";s:4:" à ";s:2:"51";s:3:"con";s:2:"52";s:3:"er ";s:2:"53";s:3:" no";s:2:"54";s:3:"ait";s:2:"55";s:3:"e c";s:2:"56";s:3:"se ";s:2:"57";s:4:"té ";s:2:"58";s:3:"du ";s:2:"59";s:3:" du";s:2:"60";s:4:" dé";s:2:"61";s:3:"ce ";s:2:"62";s:3:"e e";s:2:"63";s:3:"is ";s:2:"64";s:3:"n d";s:2:"65";s:3:"s a";s:2:"66";s:3:" so";s:2:"67";s:3:"e r";s:2:"68";s:3:"e s";s:2:"69";s:3:"our";s:2:"70";s:3:"res";s:2:"71";s:3:"ssi";s:2:"72";s:3:"eur";s:2:"73";s:3:" se";s:2:"74";s:3:"eme";s:2:"75";s:3:"est";s:2:"76";s:3:"us ";s:2:"77";s:3:"sur";s:2:"78";s:3:"ant";s:2:"79";s:3:"iqu";s:2:"80";s:3:"s p";s:2:"81";s:3:"une";s:2:"82";s:3:"uss";s:2:"83";s:3:"l'a";s:2:"84";s:3:"pro";s:2:"85";s:3:"ter";s:2:"86";s:3:"tre";s:2:"87";s:3:"end";s:2:"88";s:3:"rs ";s:2:"89";s:3:" ce";s:2:"90";s:3:"e a";s:2:"91";s:3:"t p";s:2:"92";s:3:"un ";s:2:"93";s:3:" ma";s:2:"94";s:3:" ru";s:2:"95";s:4:" ré";s:2:"96";s:3:"ous";s:2:"97";s:3:"ris";s:2:"98";s:3:"rus";s:2:"99";s:3:"sse";s:3:"100";s:3:"ans";s:3:"101";s:3:"ar ";s:3:"102";s:3:"com";s:3:"103";s:3:"e m";s:3:"104";s:3:"ire";s:3:"105";s:3:"nce";s:3:"106";s:3:"nte";s:3:"107";s:3:"t l";s:3:"108";s:3:" av";s:3:"109";s:3:" mo";s:3:"110";s:3:" te";s:3:"111";s:3:"il ";s:3:"112";s:3:"me ";s:3:"113";s:3:"ont";s:3:"114";s:3:"ten";s:3:"115";s:3:"a p";s:3:"116";s:3:"dan";s:3:"117";s:3:"pas";s:3:"118";s:3:"qui";s:3:"119";s:3:"s e";s:3:"120";s:3:"s s";s:3:"121";s:3:" in";s:3:"122";s:3:"ist";s:3:"123";s:3:"lle";s:3:"124";s:3:"nou";s:3:"125";s:4:"pré";s:3:"126";s:3:"'un";s:3:"127";s:3:"air";s:3:"128";s:3:"d'a";s:3:"129";s:3:"ir ";s:3:"130";s:3:"n e";s:3:"131";s:3:"rop";s:3:"132";s:3:"ts ";s:3:"133";s:3:" da";s:3:"134";s:3:"a s";s:3:"135";s:3:"as ";s:3:"136";s:3:"au ";s:3:"137";s:3:"den";s:3:"138";s:3:"mai";s:3:"139";s:3:"mis";s:3:"140";s:3:"ori";s:3:"141";s:3:"out";s:3:"142";s:3:"rme";s:3:"143";s:3:"sio";s:3:"144";s:3:"tte";s:3:"145";s:3:"ux ";s:3:"146";s:3:"a d";s:3:"147";s:3:"ien";s:3:"148";s:3:"n a";s:3:"149";s:3:"ntr";s:3:"150";s:3:"omm";s:3:"151";s:3:"ort";s:3:"152";s:3:"ouv";s:3:"153";s:3:"s c";s:3:"154";s:3:"son";s:3:"155";s:3:"tes";s:3:"156";s:3:"ver";s:3:"157";s:4:"ère";s:3:"158";s:3:" il";s:3:"159";s:3:" m ";s:3:"160";s:3:" sa";s:3:"161";s:3:" ve";s:3:"162";s:3:"a r";s:3:"163";s:3:"ais";s:3:"164";s:3:"ava";s:3:"165";s:3:"di ";s:3:"166";s:3:"n p";s:3:"167";s:3:"sti";s:3:"168";s:3:"ven";s:3:"169";s:3:" mi";s:3:"170";s:3:"ain";s:3:"171";s:3:"enc";s:3:"172";s:3:"for";s:3:"173";s:4:"ité";s:3:"174";s:3:"lar";s:3:"175";s:3:"oir";s:3:"176";s:3:"rem";s:3:"177";s:3:"ren";s:3:"178";s:3:"rro";s:3:"179";s:4:"rés";s:3:"180";s:3:"sie";s:3:"181";s:3:"t a";s:3:"182";s:3:"tur";s:3:"183";s:3:" pe";s:3:"184";s:3:" to";s:3:"185";s:3:"d'u";s:3:"186";s:3:"ell";s:3:"187";s:3:"err";s:3:"188";s:3:"ers";s:3:"189";s:3:"ide";s:3:"190";s:3:"ine";s:3:"191";s:3:"iss";s:3:"192";s:3:"mes";s:3:"193";s:3:"por";s:3:"194";s:3:"ran";s:3:"195";s:3:"sit";s:3:"196";s:3:"st ";s:3:"197";s:3:"t r";s:3:"198";s:3:"uti";s:3:"199";s:3:"vai";s:3:"200";s:4:"é l";s:3:"201";s:4:"ési";s:3:"202";s:3:" di";s:3:"203";s:3:" n'";s:3:"204";s:4:" ét";s:3:"205";s:3:"a c";s:3:"206";s:3:"ass";s:3:"207";s:3:"e t";s:3:"208";s:3:"in ";s:3:"209";s:3:"nde";s:3:"210";s:3:"pre";s:3:"211";s:3:"rat";s:3:"212";s:3:"s m";s:3:"213";s:3:"ste";s:3:"214";s:3:"tai";s:3:"215";s:3:"tch";s:3:"216";s:3:"ui ";s:3:"217";s:3:"uro";s:3:"218";s:4:"ès ";s:3:"219";s:3:" es";s:3:"220";s:3:" fo";s:3:"221";s:3:" tr";s:3:"222";s:3:"'ad";s:3:"223";s:3:"app";s:3:"224";s:3:"aux";s:3:"225";s:4:"e à ";s:3:"226";s:3:"ett";s:3:"227";s:3:"iti";s:3:"228";s:3:"lit";s:3:"229";s:3:"nal";s:3:"230";s:4:"opé";s:3:"231";s:3:"r d";s:3:"232";s:3:"ra ";s:3:"233";s:3:"rai";s:3:"234";s:3:"ror";s:3:"235";s:3:"s r";s:3:"236";s:3:"tat";s:3:"237";s:4:"uté";s:3:"238";s:4:"à l";s:3:"239";s:3:" af";s:3:"240";s:3:"anc";s:3:"241";s:3:"ara";s:3:"242";s:3:"art";s:3:"243";s:3:"bre";s:3:"244";s:4:"ché";s:3:"245";s:3:"dre";s:3:"246";s:3:"e f";s:3:"247";s:3:"ens";s:3:"248";s:3:"lem";s:3:"249";s:3:"n r";s:3:"250";s:3:"n t";s:3:"251";s:3:"ndr";s:3:"252";s:3:"nne";s:3:"253";s:3:"onn";s:3:"254";s:3:"pos";s:3:"255";s:3:"s t";s:3:"256";s:3:"tiq";s:3:"257";s:3:"ure";s:3:"258";s:3:" tu";s:3:"259";s:3:"ale";s:3:"260";s:3:"and";s:3:"261";s:3:"ave";s:3:"262";s:3:"cla";s:3:"263";s:3:"cou";s:3:"264";s:3:"e n";s:3:"265";s:3:"emb";s:3:"266";s:3:"ins";s:3:"267";s:3:"jou";s:3:"268";s:3:"mme";s:3:"269";s:3:"rie";s:3:"270";s:4:"rès";s:3:"271";s:3:"sem";s:3:"272";s:3:"str";s:3:"273";s:3:"t i";s:3:"274";s:3:"ues";s:3:"275";s:3:"uni";s:3:"276";s:3:"uve";s:3:"277";s:4:"é d";s:3:"278";s:4:"ée ";s:3:"279";s:3:" ch";s:3:"280";s:3:" do";s:3:"281";s:3:" eu";s:3:"282";s:3:" fa";s:3:"283";s:3:" lo";s:3:"284";s:3:" ne";s:3:"285";s:3:" ra";s:3:"286";s:3:"arl";s:3:"287";s:3:"att";s:3:"288";s:3:"ec ";s:3:"289";s:3:"ica";s:3:"290";s:3:"l a";s:3:"291";s:3:"l'o";s:3:"292";s:4:"l'é";s:3:"293";s:3:"mmi";s:3:"294";s:3:"nta";s:3:"295";s:3:"orm";s:3:"296";s:3:"ou ";s:3:"297";s:3:"r u";s:3:"298";s:3:"rle";s:3:"299";}s:6:"german";a:300:{s:3:"en ";s:1:"0";s:3:"er ";s:1:"1";s:3:" de";s:1:"2";s:3:"der";s:1:"3";s:3:"ie ";s:1:"4";s:3:" di";s:1:"5";s:3:"die";s:1:"6";s:3:"sch";s:1:"7";s:3:"ein";s:1:"8";s:3:"che";s:1:"9";s:3:"ich";s:2:"10";s:3:"den";s:2:"11";s:3:"in ";s:2:"12";s:3:"te ";s:2:"13";s:3:"ch ";s:2:"14";s:3:" ei";s:2:"15";s:3:"ung";s:2:"16";s:3:"n d";s:2:"17";s:3:"nd ";s:2:"18";s:3:" be";s:2:"19";s:3:"ver";s:2:"20";s:3:"es ";s:2:"21";s:3:" zu";s:2:"22";s:3:"eit";s:2:"23";s:3:"gen";s:2:"24";s:3:"und";s:2:"25";s:3:" un";s:2:"26";s:3:" au";s:2:"27";s:3:" in";s:2:"28";s:3:"cht";s:2:"29";s:3:"it ";s:2:"30";s:3:"ten";s:2:"31";s:3:" da";s:2:"32";s:3:"ent";s:2:"33";s:3:" ve";s:2:"34";s:3:"and";s:2:"35";s:3:" ge";s:2:"36";s:3:"ine";s:2:"37";s:3:" mi";s:2:"38";s:3:"r d";s:2:"39";s:3:"hen";s:2:"40";s:3:"ng ";s:2:"41";s:3:"nde";s:2:"42";s:3:" vo";s:2:"43";s:3:"e d";s:2:"44";s:3:"ber";s:2:"45";s:3:"men";s:2:"46";s:3:"ei ";s:2:"47";s:3:"mit";s:2:"48";s:3:" st";s:2:"49";s:3:"ter";s:2:"50";s:3:"ren";s:2:"51";s:3:"t d";s:2:"52";s:3:" er";s:2:"53";s:3:"ere";s:2:"54";s:3:"n s";s:2:"55";s:3:"ste";s:2:"56";s:3:" se";s:2:"57";s:3:"e s";s:2:"58";s:3:"ht ";s:2:"59";s:3:"des";s:2:"60";s:3:"ist";s:2:"61";s:3:"ne ";s:2:"62";s:3:"auf";s:2:"63";s:3:"e a";s:2:"64";s:3:"isc";s:2:"65";s:3:"on ";s:2:"66";s:3:"rte";s:2:"67";s:3:" re";s:2:"68";s:3:" we";s:2:"69";s:3:"ges";s:2:"70";s:3:"uch";s:2:"71";s:4:" fü";s:2:"72";s:3:" so";s:2:"73";s:3:"bei";s:2:"74";s:3:"e e";s:2:"75";s:3:"nen";s:2:"76";s:3:"r s";s:2:"77";s:3:"ach";s:2:"78";s:4:"für";s:2:"79";s:3:"ier";s:2:"80";s:3:"par";s:2:"81";s:4:"ür ";s:2:"82";s:3:" ha";s:2:"83";s:3:"as ";s:2:"84";s:3:"ert";s:2:"85";s:3:" an";s:2:"86";s:3:" pa";s:2:"87";s:3:" sa";s:2:"88";s:3:" sp";s:2:"89";s:3:" wi";s:2:"90";s:3:"for";s:2:"91";s:3:"tag";s:2:"92";s:3:"zu ";s:2:"93";s:3:"das";s:2:"94";s:3:"rei";s:2:"95";s:3:"he ";s:2:"96";s:3:"hre";s:2:"97";s:3:"nte";s:2:"98";s:3:"sen";s:2:"99";s:3:"vor";s:3:"100";s:3:" sc";s:3:"101";s:3:"ech";s:3:"102";s:3:"etz";s:3:"103";s:3:"hei";s:3:"104";s:3:"lan";s:3:"105";s:3:"n a";s:3:"106";s:3:"pd ";s:3:"107";s:3:"st ";s:3:"108";s:3:"sta";s:3:"109";s:3:"ese";s:3:"110";s:3:"lic";s:3:"111";s:3:" ab";s:3:"112";s:3:" si";s:3:"113";s:3:"gte";s:3:"114";s:3:" wa";s:3:"115";s:3:"iti";s:3:"116";s:3:"kei";s:3:"117";s:3:"n e";s:3:"118";s:3:"nge";s:3:"119";s:3:"sei";s:3:"120";s:3:"tra";s:3:"121";s:3:"zen";s:3:"122";s:3:" im";s:3:"123";s:3:" la";s:3:"124";s:3:"art";s:3:"125";s:3:"im ";s:3:"126";s:3:"lle";s:3:"127";s:3:"n w";s:3:"128";s:3:"rde";s:3:"129";s:3:"rec";s:3:"130";s:3:"set";s:3:"131";s:3:"str";s:3:"132";s:3:"tei";s:3:"133";s:3:"tte";s:3:"134";s:3:" ni";s:3:"135";s:3:"e p";s:3:"136";s:3:"ehe";s:3:"137";s:3:"ers";s:3:"138";s:3:"g d";s:3:"139";s:3:"nic";s:3:"140";s:3:"von";s:3:"141";s:3:" al";s:3:"142";s:3:" pr";s:3:"143";s:3:"an ";s:3:"144";s:3:"aus";s:3:"145";s:3:"erf";s:3:"146";s:3:"r e";s:3:"147";s:3:"tze";s:3:"148";s:4:"tür";s:3:"149";s:3:"uf ";s:3:"150";s:3:"ag ";s:3:"151";s:3:"als";s:3:"152";s:3:"ar ";s:3:"153";s:3:"chs";s:3:"154";s:3:"end";s:3:"155";s:3:"ge ";s:3:"156";s:3:"ige";s:3:"157";s:3:"ion";s:3:"158";s:3:"ls ";s:3:"159";s:3:"n m";s:3:"160";s:3:"ngs";s:3:"161";s:3:"nis";s:3:"162";s:3:"nt ";s:3:"163";s:3:"ord";s:3:"164";s:3:"s s";s:3:"165";s:3:"sse";s:3:"166";s:4:" tü";s:3:"167";s:3:"ahl";s:3:"168";s:3:"e b";s:3:"169";s:3:"ede";s:3:"170";s:3:"em ";s:3:"171";s:3:"len";s:3:"172";s:3:"n i";s:3:"173";s:3:"orm";s:3:"174";s:3:"pro";s:3:"175";s:3:"rke";s:3:"176";s:3:"run";s:3:"177";s:3:"s d";s:3:"178";s:3:"wah";s:3:"179";s:3:"wer";s:3:"180";s:4:"ürk";s:3:"181";s:3:" me";s:3:"182";s:3:"age";s:3:"183";s:3:"att";s:3:"184";s:3:"ell";s:3:"185";s:3:"est";s:3:"186";s:3:"hat";s:3:"187";s:3:"n b";s:3:"188";s:3:"oll";s:3:"189";s:3:"raf";s:3:"190";s:3:"s a";s:3:"191";s:3:"tsc";s:3:"192";s:3:" es";s:3:"193";s:3:" fo";s:3:"194";s:3:" gr";s:3:"195";s:3:" ja";s:3:"196";s:3:"abe";s:3:"197";s:3:"auc";s:3:"198";s:3:"ben";s:3:"199";s:3:"e n";s:3:"200";s:3:"ege";s:3:"201";s:3:"lie";s:3:"202";s:3:"n u";s:3:"203";s:3:"r v";s:3:"204";s:3:"re ";s:3:"205";s:3:"rit";s:3:"206";s:3:"sag";s:3:"207";s:3:" am";s:3:"208";s:3:"agt";s:3:"209";s:3:"ahr";s:3:"210";s:3:"bra";s:3:"211";s:3:"de ";s:3:"212";s:3:"erd";s:3:"213";s:3:"her";s:3:"214";s:3:"ite";s:3:"215";s:3:"le ";s:3:"216";s:3:"n p";s:3:"217";s:3:"n v";s:3:"218";s:3:"or ";s:3:"219";s:3:"rbe";s:3:"220";s:3:"rt ";s:3:"221";s:3:"sic";s:3:"222";s:3:"wie";s:3:"223";s:4:"übe";s:3:"224";s:3:" is";s:3:"225";s:4:" üb";s:3:"226";s:3:"cha";s:3:"227";s:3:"chi";s:3:"228";s:3:"e f";s:3:"229";s:3:"e m";s:3:"230";s:3:"eri";s:3:"231";s:3:"ied";s:3:"232";s:3:"mme";s:3:"233";s:3:"ner";s:3:"234";s:3:"r a";s:3:"235";s:3:"sti";s:3:"236";s:3:"t a";s:3:"237";s:3:"t s";s:3:"238";s:3:"tis";s:3:"239";s:3:" ko";s:3:"240";s:3:"arb";s:3:"241";s:3:"ds ";s:3:"242";s:3:"gan";s:3:"243";s:3:"n z";s:3:"244";s:3:"r f";s:3:"245";s:3:"r w";s:3:"246";s:3:"ran";s:3:"247";s:3:"se ";s:3:"248";s:3:"t i";s:3:"249";s:3:"wei";s:3:"250";s:3:"wir";s:3:"251";s:3:" br";s:3:"252";s:3:" np";s:3:"253";s:3:"am ";s:3:"254";s:3:"bes";s:3:"255";s:3:"d d";s:3:"256";s:3:"deu";s:3:"257";s:3:"e g";s:3:"258";s:3:"e k";s:3:"259";s:3:"efo";s:3:"260";s:3:"et ";s:3:"261";s:3:"eut";s:3:"262";s:3:"fen";s:3:"263";s:3:"hse";s:3:"264";s:3:"lte";s:3:"265";s:3:"n r";s:3:"266";s:3:"npd";s:3:"267";s:3:"r b";s:3:"268";s:3:"rhe";s:3:"269";s:3:"t w";s:3:"270";s:3:"tz ";s:3:"271";s:3:" fr";s:3:"272";s:3:" ih";s:3:"273";s:3:" ke";s:3:"274";s:3:" ma";s:3:"275";s:3:"ame";s:3:"276";s:3:"ang";s:3:"277";s:3:"d s";s:3:"278";s:3:"eil";s:3:"279";s:3:"el ";s:3:"280";s:3:"era";s:3:"281";s:3:"erh";s:3:"282";s:3:"h d";s:3:"283";s:3:"i d";s:3:"284";s:3:"kan";s:3:"285";s:3:"n f";s:3:"286";s:3:"n l";s:3:"287";s:3:"nts";s:3:"288";s:3:"och";s:3:"289";s:3:"rag";s:3:"290";s:3:"rd ";s:3:"291";s:3:"spd";s:3:"292";s:3:"spr";s:3:"293";s:3:"tio";s:3:"294";s:3:" ar";s:3:"295";s:3:" en";s:3:"296";s:3:" ka";s:3:"297";s:3:"ark";s:3:"298";s:3:"ass";s:3:"299";}s:5:"hausa";a:300:{s:3:" da";s:1:"0";s:3:"da ";s:1:"1";s:3:"in ";s:1:"2";s:3:"an ";s:1:"3";s:3:"ya ";s:1:"4";s:3:" wa";s:1:"5";s:3:" ya";s:1:"6";s:3:"na ";s:1:"7";s:3:"ar ";s:1:"8";s:3:"a d";s:1:"9";s:3:" ma";s:2:"10";s:3:"wa ";s:2:"11";s:3:"a a";s:2:"12";s:3:"a k";s:2:"13";s:3:"a s";s:2:"14";s:3:" ta";s:2:"15";s:3:"wan";s:2:"16";s:3:" a ";s:2:"17";s:3:" ba";s:2:"18";s:3:" ka";s:2:"19";s:3:"ta ";s:2:"20";s:3:"a y";s:2:"21";s:3:"n d";s:2:"22";s:3:" ha";s:2:"23";s:3:" na";s:2:"24";s:3:" su";s:2:"25";s:3:" sa";s:2:"26";s:3:"kin";s:2:"27";s:3:"sa ";s:2:"28";s:3:"ata";s:2:"29";s:3:" ko";s:2:"30";s:3:"a t";s:2:"31";s:3:"su ";s:2:"32";s:3:" ga";s:2:"33";s:3:"ai ";s:2:"34";s:3:" sh";s:2:"35";s:3:"a m";s:2:"36";s:3:"uwa";s:2:"37";s:3:"iya";s:2:"38";s:3:"ma ";s:2:"39";s:3:"a w";s:2:"40";s:3:"asa";s:2:"41";s:3:"yan";s:2:"42";s:3:"ka ";s:2:"43";s:3:"ani";s:2:"44";s:3:"shi";s:2:"45";s:3:"a b";s:2:"46";s:3:"a h";s:2:"47";s:3:"a c";s:2:"48";s:3:"ama";s:2:"49";s:3:"ba ";s:2:"50";s:3:"nan";s:2:"51";s:3:"n a";s:2:"52";s:3:" mu";s:2:"53";s:3:"ana";s:2:"54";s:3:" yi";s:2:"55";s:3:"a g";s:2:"56";s:3:" za";s:2:"57";s:3:"i d";s:2:"58";s:3:" ku";s:2:"59";s:3:"aka";s:2:"60";s:3:"yi ";s:2:"61";s:3:"n k";s:2:"62";s:3:"ann";s:2:"63";s:3:"ke ";s:2:"64";s:3:"tar";s:2:"65";s:3:" ci";s:2:"66";s:3:"iki";s:2:"67";s:3:"n s";s:2:"68";s:3:"ko ";s:2:"69";s:3:" ra";s:2:"70";s:3:"ki ";s:2:"71";s:3:"ne ";s:2:"72";s:3:"a z";s:2:"73";s:3:"mat";s:2:"74";s:3:"hak";s:2:"75";s:3:"nin";s:2:"76";s:3:"e d";s:2:"77";s:3:"nna";s:2:"78";s:3:"uma";s:2:"79";s:3:"nda";s:2:"80";s:3:"a n";s:2:"81";s:3:"ada";s:2:"82";s:3:"cik";s:2:"83";s:3:"ni ";s:2:"84";s:3:"rin";s:2:"85";s:3:"una";s:2:"86";s:3:"ara";s:2:"87";s:3:"kum";s:2:"88";s:3:"akk";s:2:"89";s:3:" ce";s:2:"90";s:3:" du";s:2:"91";s:3:"man";s:2:"92";s:3:"n y";s:2:"93";s:3:"nci";s:2:"94";s:3:"sar";s:2:"95";s:3:"aki";s:2:"96";s:3:"awa";s:2:"97";s:3:"ci ";s:2:"98";s:3:"kan";s:2:"99";s:3:"kar";s:3:"100";s:3:"ari";s:3:"101";s:3:"n m";s:3:"102";s:3:"and";s:3:"103";s:3:"hi ";s:3:"104";s:3:"n t";s:3:"105";s:3:"ga ";s:3:"106";s:3:"owa";s:3:"107";s:3:"ash";s:3:"108";s:3:"kam";s:3:"109";s:3:"dan";s:3:"110";s:3:"ewa";s:3:"111";s:3:"nsa";s:3:"112";s:3:"ali";s:3:"113";s:3:"ami";s:3:"114";s:3:" ab";s:3:"115";s:3:" do";s:3:"116";s:3:"anc";s:3:"117";s:3:"n r";s:3:"118";s:3:"aya";s:3:"119";s:3:"i n";s:3:"120";s:3:"sun";s:3:"121";s:3:"uka";s:3:"122";s:3:" al";s:3:"123";s:3:" ne";s:3:"124";s:3:"a'a";s:3:"125";s:3:"cew";s:3:"126";s:3:"cin";s:3:"127";s:3:"mas";s:3:"128";s:3:"tak";s:3:"129";s:3:"un ";s:3:"130";s:3:"aba";s:3:"131";s:3:"kow";s:3:"132";s:3:"a r";s:3:"133";s:3:"ra ";s:3:"134";s:3:" ja";s:3:"135";s:4:" Æ™a";s:3:"136";s:3:"en ";s:3:"137";s:3:"r d";s:3:"138";s:3:"sam";s:3:"139";s:3:"tsa";s:3:"140";s:3:" ru";s:3:"141";s:3:"ce ";s:3:"142";s:3:"i a";s:3:"143";s:3:"abi";s:3:"144";s:3:"ida";s:3:"145";s:3:"mut";s:3:"146";s:3:"n g";s:3:"147";s:3:"n j";s:3:"148";s:3:"san";s:3:"149";s:4:"a Æ™";s:3:"150";s:3:"har";s:3:"151";s:3:"on ";s:3:"152";s:3:"i m";s:3:"153";s:3:"suk";s:3:"154";s:3:" ak";s:3:"155";s:3:" ji";s:3:"156";s:3:"yar";s:3:"157";s:3:"'ya";s:3:"158";s:3:"kwa";s:3:"159";s:3:"min";s:3:"160";s:3:" 'y";s:3:"161";s:3:"ane";s:3:"162";s:3:"ban";s:3:"163";s:3:"ins";s:3:"164";s:3:"ruw";s:3:"165";s:3:"i k";s:3:"166";s:3:"n h";s:3:"167";s:3:" ad";s:3:"168";s:3:"ake";s:3:"169";s:3:"n w";s:3:"170";s:3:"sha";s:3:"171";s:3:"utu";s:3:"172";s:4:" Æ´a";s:3:"173";s:3:"bay";s:3:"174";s:3:"tan";s:3:"175";s:4:"Æ´an";s:3:"176";s:3:"bin";s:3:"177";s:3:"duk";s:3:"178";s:3:"e m";s:3:"179";s:3:"n n";s:3:"180";s:3:"oka";s:3:"181";s:3:"yin";s:3:"182";s:4:"É—an";s:3:"183";s:3:" fa";s:3:"184";s:3:"a i";s:3:"185";s:3:"kki";s:3:"186";s:3:"re ";s:3:"187";s:3:"za ";s:3:"188";s:3:"ala";s:3:"189";s:3:"asu";s:3:"190";s:3:"han";s:3:"191";s:3:"i y";s:3:"192";s:3:"mar";s:3:"193";s:3:"ran";s:3:"194";s:4:"Æ™as";s:3:"195";s:3:"add";s:3:"196";s:3:"ars";s:3:"197";s:3:"gab";s:3:"198";s:3:"ira";s:3:"199";s:3:"mma";s:3:"200";s:3:"u d";s:3:"201";s:3:" ts";s:3:"202";s:3:"abb";s:3:"203";s:3:"abu";s:3:"204";s:3:"aga";s:3:"205";s:3:"gar";s:3:"206";s:3:"n b";s:3:"207";s:4:" É—a";s:3:"208";s:3:"aci";s:3:"209";s:3:"aik";s:3:"210";s:3:"am ";s:3:"211";s:3:"dun";s:3:"212";s:3:"e s";s:3:"213";s:3:"i b";s:3:"214";s:3:"i w";s:3:"215";s:3:"kas";s:3:"216";s:3:"kok";s:3:"217";s:3:"wam";s:3:"218";s:3:" am";s:3:"219";s:3:"amf";s:3:"220";s:3:"bba";s:3:"221";s:3:"din";s:3:"222";s:3:"fan";s:3:"223";s:3:"gwa";s:3:"224";s:3:"i s";s:3:"225";s:3:"wat";s:3:"226";s:3:"ano";s:3:"227";s:3:"are";s:3:"228";s:3:"dai";s:3:"229";s:3:"iri";s:3:"230";s:3:"ma'";s:3:"231";s:3:" la";s:3:"232";s:3:"all";s:3:"233";s:3:"dam";s:3:"234";s:3:"ika";s:3:"235";s:3:"mi ";s:3:"236";s:3:"she";s:3:"237";s:3:"tum";s:3:"238";s:3:"uni";s:3:"239";s:3:" an";s:3:"240";s:3:" ai";s:3:"241";s:3:" ke";s:3:"242";s:3:" ki";s:3:"243";s:3:"dag";s:3:"244";s:3:"mai";s:3:"245";s:3:"mfa";s:3:"246";s:3:"no ";s:3:"247";s:3:"nsu";s:3:"248";s:3:"o d";s:3:"249";s:3:"sak";s:3:"250";s:3:"um ";s:3:"251";s:3:" bi";s:3:"252";s:3:" gw";s:3:"253";s:3:" kw";s:3:"254";s:3:"jam";s:3:"255";s:3:"yya";s:3:"256";s:3:"a j";s:3:"257";s:3:"fa ";s:3:"258";s:3:"uta";s:3:"259";s:3:" hu";s:3:"260";s:3:"'a ";s:3:"261";s:3:"ans";s:3:"262";s:4:"aÉ—a";s:3:"263";s:3:"dda";s:3:"264";s:3:"hin";s:3:"265";s:3:"niy";s:3:"266";s:3:"r s";s:3:"267";s:3:"bat";s:3:"268";s:3:"dar";s:3:"269";s:3:"gan";s:3:"270";s:3:"i t";s:3:"271";s:3:"nta";s:3:"272";s:3:"oki";s:3:"273";s:3:"omi";s:3:"274";s:3:"sal";s:3:"275";s:3:"a l";s:3:"276";s:3:"kac";s:3:"277";s:3:"lla";s:3:"278";s:3:"wad";s:3:"279";s:3:"war";s:3:"280";s:3:"amm";s:3:"281";s:3:"dom";s:3:"282";s:3:"r m";s:3:"283";s:3:"ras";s:3:"284";s:3:"sai";s:3:"285";s:3:" lo";s:3:"286";s:3:"ats";s:3:"287";s:3:"hal";s:3:"288";s:3:"kat";s:3:"289";s:3:"li ";s:3:"290";s:3:"lok";s:3:"291";s:3:"n c";s:3:"292";s:3:"nar";s:3:"293";s:3:"tin";s:3:"294";s:3:"afa";s:3:"295";s:3:"bub";s:3:"296";s:3:"i g";s:3:"297";s:3:"isa";s:3:"298";s:3:"mak";s:3:"299";}s:8:"hawaiian";a:300:{s:3:" ka";s:1:"0";s:3:"na ";s:1:"1";s:3:" o ";s:1:"2";s:3:"ka ";s:1:"3";s:3:" ma";s:1:"4";s:3:" a ";s:1:"5";s:3:" la";s:1:"6";s:3:"a i";s:1:"7";s:3:"a m";s:1:"8";s:3:" i ";s:1:"9";s:3:"la ";s:2:"10";s:3:"ana";s:2:"11";s:3:"ai ";s:2:"12";s:3:"ia ";s:2:"13";s:3:"a o";s:2:"14";s:3:"a k";s:2:"15";s:3:"a h";s:2:"16";s:3:"o k";s:2:"17";s:3:" ke";s:2:"18";s:3:"a a";s:2:"19";s:3:"i k";s:2:"20";s:3:" ho";s:2:"21";s:3:" ia";s:2:"22";s:3:"ua ";s:2:"23";s:3:" na";s:2:"24";s:3:" me";s:2:"25";s:3:"e k";s:2:"26";s:3:"e a";s:2:"27";s:3:"au ";s:2:"28";s:3:"ke ";s:2:"29";s:3:"ma ";s:2:"30";s:3:"mai";s:2:"31";s:3:"aku";s:2:"32";s:3:" ak";s:2:"33";s:3:"ahi";s:2:"34";s:3:" ha";s:2:"35";s:3:" ko";s:2:"36";s:3:" e ";s:2:"37";s:3:"a l";s:2:"38";s:3:" no";s:2:"39";s:3:"me ";s:2:"40";s:3:"ku ";s:2:"41";s:3:"aka";s:2:"42";s:3:"kan";s:2:"43";s:3:"no ";s:2:"44";s:3:"i a";s:2:"45";s:3:"ho ";s:2:"46";s:3:"ou ";s:2:"47";s:3:" ai";s:2:"48";s:3:"i o";s:2:"49";s:3:"a p";s:2:"50";s:3:"o l";s:2:"51";s:3:"o a";s:2:"52";s:3:"ama";s:2:"53";s:3:"a n";s:2:"54";s:3:" an";s:2:"55";s:3:"i m";s:2:"56";s:3:"han";s:2:"57";s:3:"i i";s:2:"58";s:3:"iho";s:2:"59";s:3:"kou";s:2:"60";s:3:"ne ";s:2:"61";s:3:" ih";s:2:"62";s:3:"o i";s:2:"63";s:3:"iki";s:2:"64";s:3:"ona";s:2:"65";s:3:"hoo";s:2:"66";s:3:"le ";s:2:"67";s:3:"e h";s:2:"68";s:3:" he";s:2:"69";s:3:"ina";s:2:"70";s:3:" wa";s:2:"71";s:3:"ea ";s:2:"72";s:3:"ako";s:2:"73";s:3:"u i";s:2:"74";s:3:"kah";s:2:"75";s:3:"oe ";s:2:"76";s:3:"i l";s:2:"77";s:3:"u a";s:2:"78";s:3:" pa";s:2:"79";s:3:"hoi";s:2:"80";s:3:"e i";s:2:"81";s:3:"era";s:2:"82";s:3:"ko ";s:2:"83";s:3:"u m";s:2:"84";s:3:"kua";s:2:"85";s:3:"mak";s:2:"86";s:3:"oi ";s:2:"87";s:3:"kai";s:2:"88";s:3:"i n";s:2:"89";s:3:"a e";s:2:"90";s:3:"hin";s:2:"91";s:3:"ane";s:2:"92";s:3:" ol";s:2:"93";s:3:"i h";s:2:"94";s:3:"mea";s:2:"95";s:3:"wah";s:2:"96";s:3:"lak";s:2:"97";s:3:"e m";s:2:"98";s:3:"o n";s:2:"99";s:3:"u l";s:3:"100";s:3:"ika";s:3:"101";s:3:"ki ";s:3:"102";s:3:"a w";s:3:"103";s:3:"mal";s:3:"104";s:3:"hi ";s:3:"105";s:3:"e n";s:3:"106";s:3:"u o";s:3:"107";s:3:"hik";s:3:"108";s:3:" ku";s:3:"109";s:3:"e l";s:3:"110";s:3:"ele";s:3:"111";s:3:"ra ";s:3:"112";s:3:"ber";s:3:"113";s:3:"ine";s:3:"114";s:3:"abe";s:3:"115";s:3:"ain";s:3:"116";s:3:"ala";s:3:"117";s:3:"lo ";s:3:"118";s:3:" po";s:3:"119";s:3:"kon";s:3:"120";s:3:" ab";s:3:"121";s:3:"ole";s:3:"122";s:3:"he ";s:3:"123";s:3:"pau";s:3:"124";s:3:"mah";s:3:"125";s:3:"va ";s:3:"126";s:3:"ela";s:3:"127";s:3:"kau";s:3:"128";s:3:"nak";s:3:"129";s:3:" oe";s:3:"130";s:3:"kei";s:3:"131";s:3:"oia";s:3:"132";s:3:" ie";s:3:"133";s:3:"ram";s:3:"134";s:3:" oi";s:3:"135";s:3:"oa ";s:3:"136";s:3:"eho";s:3:"137";s:3:"hov";s:3:"138";s:3:"ieh";s:3:"139";s:3:"ova";s:3:"140";s:3:" ua";s:3:"141";s:3:"una";s:3:"142";s:3:"ara";s:3:"143";s:3:"o s";s:3:"144";s:3:"awa";s:3:"145";s:3:"o o";s:3:"146";s:3:"nau";s:3:"147";s:3:"u n";s:3:"148";s:3:"wa ";s:3:"149";s:3:"wai";s:3:"150";s:3:"hel";s:3:"151";s:3:" ae";s:3:"152";s:3:" al";s:3:"153";s:3:"ae ";s:3:"154";s:3:"ta ";s:3:"155";s:3:"aik";s:3:"156";s:3:" hi";s:3:"157";s:3:"ale";s:3:"158";s:3:"ila";s:3:"159";s:3:"lel";s:3:"160";s:3:"ali";s:3:"161";s:3:"eik";s:3:"162";s:3:"olo";s:3:"163";s:3:"onu";s:3:"164";s:3:" lo";s:3:"165";s:3:"aua";s:3:"166";s:3:"e o";s:3:"167";s:3:"ola";s:3:"168";s:3:"hon";s:3:"169";s:3:"mam";s:3:"170";s:3:"nan";s:3:"171";s:3:" au";s:3:"172";s:3:"aha";s:3:"173";s:3:"lau";s:3:"174";s:3:"nua";s:3:"175";s:3:"oho";s:3:"176";s:3:"oma";s:3:"177";s:3:" ao";s:3:"178";s:3:"ii ";s:3:"179";s:3:"alu";s:3:"180";s:3:"ima";s:3:"181";s:3:"mau";s:3:"182";s:3:"ike";s:3:"183";s:3:"apa";s:3:"184";s:3:"elo";s:3:"185";s:3:"lii";s:3:"186";s:3:"poe";s:3:"187";s:3:"aia";s:3:"188";s:3:"noa";s:3:"189";s:3:" in";s:3:"190";s:3:"o m";s:3:"191";s:3:"oka";s:3:"192";s:3:"'u ";s:3:"193";s:3:"aho";s:3:"194";s:3:"ei ";s:3:"195";s:3:"eka";s:3:"196";s:3:"ha ";s:3:"197";s:3:"lu ";s:3:"198";s:3:"nei";s:3:"199";s:3:"hol";s:3:"200";s:3:"ino";s:3:"201";s:3:"o e";s:3:"202";s:3:"ema";s:3:"203";s:3:"iwa";s:3:"204";s:3:"olu";s:3:"205";s:3:"ada";s:3:"206";s:3:"naa";s:3:"207";s:3:"pa ";s:3:"208";s:3:"u k";s:3:"209";s:3:"ewa";s:3:"210";s:3:"hua";s:3:"211";s:3:"lam";s:3:"212";s:3:"lua";s:3:"213";s:3:"o h";s:3:"214";s:3:"ook";s:3:"215";s:3:"u h";s:3:"216";s:3:" li";s:3:"217";s:3:"ahu";s:3:"218";s:3:"amu";s:3:"219";s:3:"ui ";s:3:"220";s:3:" il";s:3:"221";s:3:" mo";s:3:"222";s:3:" se";s:3:"223";s:3:"eia";s:3:"224";s:3:"law";s:3:"225";s:3:" hu";s:3:"226";s:3:" ik";s:3:"227";s:3:"ail";s:3:"228";s:3:"e p";s:3:"229";s:3:"li ";s:3:"230";s:3:"lun";s:3:"231";s:3:"uli";s:3:"232";s:3:"io ";s:3:"233";s:3:"kik";s:3:"234";s:3:"noh";s:3:"235";s:3:"u e";s:3:"236";s:3:" sa";s:3:"237";s:3:"aaw";s:3:"238";s:3:"awe";s:3:"239";s:3:"ena";s:3:"240";s:3:"hal";s:3:"241";s:3:"kol";s:3:"242";s:3:"lan";s:3:"243";s:3:" le";s:3:"244";s:3:" ne";s:3:"245";s:3:"a'u";s:3:"246";s:3:"ilo";s:3:"247";s:3:"kap";s:3:"248";s:3:"oko";s:3:"249";s:3:"sa ";s:3:"250";s:3:" pe";s:3:"251";s:3:"hop";s:3:"252";s:3:"loa";s:3:"253";s:3:"ope";s:3:"254";s:3:"pe ";s:3:"255";s:3:" ad";s:3:"256";s:3:" pu";s:3:"257";s:3:"ahe";s:3:"258";s:3:"aol";s:3:"259";s:3:"ia'";s:3:"260";s:3:"lai";s:3:"261";s:3:"loh";s:3:"262";s:3:"na'";s:3:"263";s:3:"oom";s:3:"264";s:3:"aau";s:3:"265";s:3:"eri";s:3:"266";s:3:"kul";s:3:"267";s:3:"we ";s:3:"268";s:3:"ake";s:3:"269";s:3:"kek";s:3:"270";s:3:"laa";s:3:"271";s:3:"ri ";s:3:"272";s:3:"iku";s:3:"273";s:3:"kak";s:3:"274";s:3:"lim";s:3:"275";s:3:"nah";s:3:"276";s:3:"ner";s:3:"277";s:3:"nui";s:3:"278";s:3:"ono";s:3:"279";s:3:"a u";s:3:"280";s:3:"dam";s:3:"281";s:3:"kum";s:3:"282";s:3:"lok";s:3:"283";s:3:"mua";s:3:"284";s:3:"uma";s:3:"285";s:3:"wal";s:3:"286";s:3:"wi ";s:3:"287";s:3:"'i ";s:3:"288";s:3:"a'i";s:3:"289";s:3:"aan";s:3:"290";s:3:"alo";s:3:"291";s:3:"eta";s:3:"292";s:3:"mu ";s:3:"293";s:3:"ohe";s:3:"294";s:3:"u p";s:3:"295";s:3:"ula";s:3:"296";s:3:"uwa";s:3:"297";s:3:" nu";s:3:"298";s:3:"amo";s:3:"299";}s:5:"hindi";a:300:{s:7:"ें ";s:1:"0";s:7:" है";s:1:"1";s:9:"में";s:1:"2";s:7:" मे";s:1:"3";s:7:"ने ";s:1:"4";s:7:"की ";s:1:"5";s:7:"के ";s:1:"6";s:7:"है ";s:1:"7";s:7:" के";s:1:"8";s:7:" की";s:1:"9";s:7:" को";s:2:"10";s:7:"ों ";s:2:"11";s:7:"को ";s:2:"12";s:7:"ा ह";s:2:"13";s:7:" का";s:2:"14";s:7:"से ";s:2:"15";s:7:"ा क";s:2:"16";s:7:"े क";s:2:"17";s:7:"ं क";s:2:"18";s:7:"या ";s:2:"19";s:7:" कि";s:2:"20";s:7:" से";s:2:"21";s:7:"का ";s:2:"22";s:7:"ी क";s:2:"23";s:7:" ने";s:2:"24";s:7:" और";s:2:"25";s:7:"और ";s:2:"26";s:7:"ना ";s:2:"27";s:7:"कि ";s:2:"28";s:7:"à¤à¥€ ";s:2:"29";s:7:"ी स";s:2:"30";s:7:" जा";s:2:"31";s:7:" पर";s:2:"32";s:7:"ार ";s:2:"33";s:7:" कर";s:2:"34";s:7:"ी ह";s:2:"35";s:7:" हो";s:2:"36";s:7:"ही ";s:2:"37";s:9:"िया";s:2:"38";s:7:" इस";s:2:"39";s:7:" रह";s:2:"40";s:7:"र क";s:2:"41";s:9:"à¥à¤¨à¤¾";s:2:"42";s:7:"ता ";s:2:"43";s:7:"ान ";s:2:"44";s:7:"े स";s:2:"45";s:7:" à¤à¥€";s:2:"46";s:7:" रा";s:2:"47";s:7:"े ह";s:2:"48";s:7:" चà¥";s:2:"49";s:7:" पा";s:2:"50";s:7:"पर ";s:2:"51";s:9:"चà¥à¤¨";s:2:"52";s:9:"नाव";s:2:"53";s:7:" कह";s:2:"54";s:9:"पà¥à¤°";s:2:"55";s:7:" à¤à¤¾";s:2:"56";s:9:"राज";s:2:"57";s:9:"हैं";s:2:"58";s:7:"ा स";s:2:"59";s:7:"ै क";s:2:"60";s:7:"ैं ";s:2:"61";s:7:"नी ";s:2:"62";s:7:"ल क";s:2:"63";s:7:"ीं ";s:2:"64";s:7:"़ी ";s:2:"65";s:7:"था ";s:2:"66";s:7:"री ";s:2:"67";s:7:"ाव ";s:2:"68";s:7:"े ब";s:2:"69";s:7:" पà¥";s:2:"70";s:9:"कà¥à¤·";s:2:"71";s:7:"पा ";s:2:"72";s:7:"ले ";s:2:"73";s:7:" दे";s:2:"74";s:7:"ला ";s:2:"75";s:7:"हा ";s:2:"76";s:9:"ाजप";s:2:"77";s:7:" था";s:2:"78";s:7:" नह";s:2:"79";s:7:"इस ";s:2:"80";s:7:"कर ";s:2:"81";s:9:"जपा";s:2:"82";s:9:"नही";s:2:"83";s:9:"à¤à¤¾à¤œ";s:2:"84";s:9:"यों";s:2:"85";s:7:"र स";s:2:"86";s:9:"हीं";s:2:"87";s:7:" अम";s:2:"88";s:7:" बा";s:2:"89";s:7:" मा";s:2:"90";s:7:" वि";s:2:"91";s:9:"रीक";s:2:"92";s:7:"िठ";s:2:"93";s:7:"े प";s:2:"94";s:9:"à¥à¤¯à¤¾";s:2:"95";s:7:" ही";s:2:"96";s:7:"ं म";s:2:"97";s:9:"कार";s:2:"98";s:7:"ा ज";s:2:"99";s:7:"े ल";s:3:"100";s:7:" ता";s:3:"101";s:7:" दि";s:3:"102";s:7:" सा";s:3:"103";s:7:" हम";s:3:"104";s:7:"ा न";s:3:"105";s:7:"ा म";s:3:"106";s:9:"ाक़";s:3:"107";s:9:"à¥à¤¤à¤¾";s:3:"108";s:7:" à¤à¤•";s:3:"109";s:7:" सं";s:3:"110";s:7:" सà¥";s:3:"111";s:9:"अमर";s:3:"112";s:9:"क़ी";s:3:"113";s:9:"ताज";s:3:"114";s:9:"मरी";s:3:"115";s:9:"सà¥à¤¥";s:3:"116";s:7:"ा थ";s:3:"117";s:9:"ारà¥";s:3:"118";s:7:" हà¥";s:3:"119";s:9:"इरा";s:3:"120";s:7:"à¤à¤• ";s:3:"121";s:7:"न क";s:3:"122";s:7:"र म";s:3:"123";s:9:"राक";s:3:"124";s:7:"ी ज";s:3:"125";s:7:"ी न";s:3:"126";s:7:" इर";s:3:"127";s:7:" उन";s:3:"128";s:7:" पह";s:3:"129";s:9:"कहा";s:3:"130";s:7:"ते ";s:3:"131";s:7:"े अ";s:3:"132";s:7:" तो";s:3:"133";s:7:" सà¥";s:3:"134";s:7:"ति ";s:3:"135";s:7:"ती ";s:3:"136";s:7:"तो ";s:3:"137";s:9:"मिल";s:3:"138";s:7:"िक ";s:3:"139";s:9:"ियो";s:3:"140";s:9:"à¥à¤°à¥‡";s:3:"141";s:7:" अप";s:3:"142";s:7:" फ़";s:3:"143";s:7:" लि";s:3:"144";s:7:" लो";s:3:"145";s:7:" सम";s:3:"146";s:7:"म क";s:3:"147";s:9:"रà¥à¤Ÿ";s:3:"148";s:7:"हो ";s:3:"149";s:7:"ा च";s:3:"150";s:7:"ाई ";s:3:"151";s:9:"ाने";s:3:"152";s:7:"िन ";s:3:"153";s:7:"à¥à¤¯ ";s:3:"154";s:7:" उस";s:3:"155";s:7:" क़";s:3:"156";s:7:" सक";s:3:"157";s:7:" सै";s:3:"158";s:7:"ं प";s:3:"159";s:7:"ं ह";s:3:"160";s:7:"गी ";s:3:"161";s:7:"त क";s:3:"162";s:9:"मान";s:3:"163";s:7:"र न";s:3:"164";s:9:"षà¥à¤Ÿ";s:3:"165";s:7:"स क";s:3:"166";s:9:"सà¥à¤¤";s:3:"167";s:7:"ाठ";s:3:"168";s:7:"ी ब";s:3:"169";s:7:"ी म";s:3:"170";s:9:"à¥à¤°à¥€";s:3:"171";s:7:" दो";s:3:"172";s:7:" मि";s:3:"173";s:7:" मà¥";s:3:"174";s:7:" ले";s:3:"175";s:7:" शा";s:3:"176";s:7:"ं स";s:3:"177";s:9:"ज़ा";s:3:"178";s:9:"तà¥à¤°";s:3:"179";s:7:"थी ";s:3:"180";s:9:"लिà¤";s:3:"181";s:7:"सी ";s:3:"182";s:7:"़ा ";s:3:"183";s:9:"़ार";s:3:"184";s:9:"ांग";s:3:"185";s:7:"े द";s:3:"186";s:7:"े म";s:3:"187";s:7:"à¥à¤µ ";s:3:"188";s:7:" ना";s:3:"189";s:7:" बन";s:3:"190";s:9:"ंगà¥";s:3:"191";s:9:"कां";s:3:"192";s:7:"गा ";s:3:"193";s:9:"गà¥à¤°";s:3:"194";s:7:"जा ";s:3:"195";s:9:"जà¥à¤¯";s:3:"196";s:7:"दी ";s:3:"197";s:7:"न म";s:3:"198";s:9:"पार";s:3:"199";s:7:"à¤à¤¾ ";s:3:"200";s:9:"रही";s:3:"201";s:7:"रे ";s:3:"202";s:9:"रेस";s:3:"203";s:7:"ली ";s:3:"204";s:9:"सà¤à¤¾";s:3:"205";s:7:"ा र";s:3:"206";s:7:"ाल ";s:3:"207";s:7:"ी अ";s:3:"208";s:9:"ीकी";s:3:"209";s:7:"े त";s:3:"210";s:7:"ेश ";s:3:"211";s:7:" अं";s:3:"212";s:7:" तक";s:3:"213";s:7:" या";s:3:"214";s:7:"ई ह";s:3:"215";s:9:"करन";s:3:"216";s:7:"तक ";s:3:"217";s:9:"देश";s:3:"218";s:9:"वरà¥";s:3:"219";s:9:"ाया";s:3:"220";s:7:"ी à¤";s:3:"221";s:7:"ेस ";s:3:"222";s:7:"à¥à¤· ";s:3:"223";s:7:" गय";s:3:"224";s:7:" जि";s:3:"225";s:7:" थी";s:3:"226";s:7:" बड";s:3:"227";s:7:" यह";s:3:"228";s:7:" वा";s:3:"229";s:9:"ंतर";s:3:"230";s:9:"अंत";s:3:"231";s:7:"क़ ";s:3:"232";s:9:"गया";s:3:"233";s:7:"टी ";s:3:"234";s:9:"निक";s:3:"235";s:9:"नà¥à¤¹";s:3:"236";s:9:"पहल";s:3:"237";s:9:"बड़";s:3:"238";s:9:"मार";s:3:"239";s:7:"र प";s:3:"240";s:9:"रने";s:3:"241";s:9:"ाज़";s:3:"242";s:7:"ि इ";s:3:"243";s:7:"ी र";s:3:"244";s:7:"े ज";s:3:"245";s:7:"े व";s:3:"246";s:7:"à¥à¤Ÿ ";s:3:"247";s:9:"à¥à¤Ÿà¥€";s:3:"248";s:7:" अब";s:3:"249";s:7:" लग";s:3:"250";s:7:" वर";s:3:"251";s:7:" सी";s:3:"252";s:7:"ं à¤";s:3:"253";s:9:"उनà¥";s:3:"254";s:7:"क क";s:3:"255";s:9:"किय";s:3:"256";s:9:"देख";s:3:"257";s:9:"पूर";s:3:"258";s:9:"फ़à¥";s:3:"259";s:7:"यह ";s:3:"260";s:9:"यान";s:3:"261";s:9:"रिक";s:3:"262";s:9:"रिय";s:3:"263";s:9:"रà¥à¤¡";s:3:"264";s:9:"लेक";s:3:"265";s:9:"सकत";s:3:"266";s:9:"हों";s:3:"267";s:9:"होग";s:3:"268";s:7:"ा अ";s:3:"269";s:7:"ा द";s:3:"270";s:7:"ा प";s:3:"271";s:7:"ाद ";s:3:"272";s:9:"ारा";s:3:"273";s:7:"ित ";s:3:"274";s:7:"ी त";s:3:"275";s:7:"ी प";s:3:"276";s:7:"ो क";s:3:"277";s:7:"ो द";s:3:"278";s:7:" ते";s:3:"279";s:7:" नि";s:3:"280";s:7:" सर";s:3:"281";s:7:" हा";s:3:"282";s:7:"ं द";s:3:"283";s:9:"अपन";s:3:"284";s:9:"जान";s:3:"285";s:7:"त म";s:3:"286";s:9:"थित";s:3:"287";s:9:"पनी";s:3:"288";s:9:"महल";s:3:"289";s:7:"र ह";s:3:"290";s:9:"लोग";s:3:"291";s:7:"व क";s:3:"292";s:9:"हना";s:3:"293";s:7:"हल ";s:3:"294";s:9:"हाà¤";s:3:"295";s:9:"ाजà¥";s:3:"296";s:9:"ाना";s:3:"297";s:9:"िकà¥";s:3:"298";s:9:"िसà¥";s:3:"299";}s:9:"hungarian";a:300:{s:3:" a ";s:1:"0";s:3:" az";s:1:"1";s:3:" sz";s:1:"2";s:3:"az ";s:1:"3";s:3:" me";s:1:"4";s:3:"en ";s:1:"5";s:3:" el";s:1:"6";s:3:" ho";s:1:"7";s:3:"ek ";s:1:"8";s:3:"gy ";s:1:"9";s:3:"tt ";s:2:"10";s:3:"ett";s:2:"11";s:3:"sze";s:2:"12";s:3:" fe";s:2:"13";s:4:"és ";s:2:"14";s:3:" ki";s:2:"15";s:3:"tet";s:2:"16";s:3:" be";s:2:"17";s:3:"et ";s:2:"18";s:3:"ter";s:2:"19";s:4:" kö";s:2:"20";s:4:" és";s:2:"21";s:3:"hog";s:2:"22";s:3:"meg";s:2:"23";s:3:"ogy";s:2:"24";s:3:"szt";s:2:"25";s:3:"te ";s:2:"26";s:3:"t a";s:2:"27";s:3:"zet";s:2:"28";s:3:"a m";s:2:"29";s:3:"nek";s:2:"30";s:3:"nt ";s:2:"31";s:4:"ség";s:2:"32";s:4:"szá";s:2:"33";s:3:"ak ";s:2:"34";s:3:" va";s:2:"35";s:3:"an ";s:2:"36";s:3:"eze";s:2:"37";s:3:"ra ";s:2:"38";s:3:"ta ";s:2:"39";s:3:" mi";s:2:"40";s:3:"int";s:2:"41";s:4:"köz";s:2:"42";s:3:" is";s:2:"43";s:3:"esz";s:2:"44";s:3:"fel";s:2:"45";s:3:"min";s:2:"46";s:3:"nak";s:2:"47";s:3:"ors";s:2:"48";s:3:"zer";s:2:"49";s:3:" te";s:2:"50";s:3:"a a";s:2:"51";s:3:"a k";s:2:"52";s:3:"is ";s:2:"53";s:3:" cs";s:2:"54";s:3:"ele";s:2:"55";s:3:"er ";s:2:"56";s:3:"men";s:2:"57";s:3:"si ";s:2:"58";s:3:"tek";s:2:"59";s:3:"ti ";s:2:"60";s:3:" ne";s:2:"61";s:3:"csa";s:2:"62";s:3:"ent";s:2:"63";s:3:"z e";s:2:"64";s:3:"a t";s:2:"65";s:3:"ala";s:2:"66";s:3:"ere";s:2:"67";s:3:"es ";s:2:"68";s:3:"lom";s:2:"69";s:3:"lte";s:2:"70";s:3:"mon";s:2:"71";s:3:"ond";s:2:"72";s:3:"rsz";s:2:"73";s:3:"sza";s:2:"74";s:3:"tte";s:2:"75";s:4:"zág";s:2:"76";s:4:"ány";s:2:"77";s:3:" fo";s:2:"78";s:3:" ma";s:2:"79";s:3:"ai ";s:2:"80";s:3:"ben";s:2:"81";s:3:"el ";s:2:"82";s:3:"ene";s:2:"83";s:3:"ik ";s:2:"84";s:3:"jel";s:2:"85";s:4:"tás";s:2:"86";s:4:"áll";s:2:"87";s:3:" ha";s:2:"88";s:3:" le";s:2:"89";s:4:" ál";s:2:"90";s:3:"agy";s:2:"91";s:4:"alá";s:2:"92";s:3:"isz";s:2:"93";s:3:"y a";s:2:"94";s:3:"zte";s:2:"95";s:4:"ás ";s:2:"96";s:3:" al";s:2:"97";s:3:"e a";s:2:"98";s:3:"egy";s:2:"99";s:3:"ely";s:3:"100";s:3:"for";s:3:"101";s:3:"lat";s:3:"102";s:3:"lt ";s:3:"103";s:3:"n a";s:3:"104";s:3:"oga";s:3:"105";s:3:"on ";s:3:"106";s:3:"re ";s:3:"107";s:3:"st ";s:3:"108";s:4:"ság";s:3:"109";s:3:"t m";s:3:"110";s:4:"án ";s:3:"111";s:4:"ét ";s:3:"112";s:4:"ült";s:3:"113";s:3:" je";s:3:"114";s:3:"gi ";s:3:"115";s:3:"k a";s:3:"116";s:4:"kül";s:3:"117";s:3:"lam";s:3:"118";s:3:"len";s:3:"119";s:4:"lás";s:3:"120";s:4:"más";s:3:"121";s:3:"s k";s:3:"122";s:3:"vez";s:3:"123";s:4:"áso";s:3:"124";s:5:"özö";s:3:"125";s:3:" ta";s:3:"126";s:3:"a s";s:3:"127";s:3:"a v";s:3:"128";s:3:"asz";s:3:"129";s:4:"atá";s:3:"130";s:4:"etÅ‘";s:3:"131";s:3:"kez";s:3:"132";s:3:"let";s:3:"133";s:3:"mag";s:3:"134";s:3:"nem";s:3:"135";s:4:"szé";s:3:"136";s:3:"z m";s:3:"137";s:4:"át ";s:3:"138";s:4:"éte";s:3:"139";s:4:"ölt";s:3:"140";s:3:" de";s:3:"141";s:3:" gy";s:3:"142";s:4:" ké";s:3:"143";s:3:" mo";s:3:"144";s:4:" vá";s:3:"145";s:4:" ér";s:3:"146";s:3:"a b";s:3:"147";s:3:"a f";s:3:"148";s:3:"ami";s:3:"149";s:3:"at ";s:3:"150";s:3:"ato";s:3:"151";s:3:"att";s:3:"152";s:3:"bef";s:3:"153";s:3:"dta";s:3:"154";s:3:"gya";s:3:"155";s:3:"hat";s:3:"156";s:3:"i s";s:3:"157";s:3:"las";s:3:"158";s:3:"ndt";s:3:"159";s:3:"rt ";s:3:"160";s:3:"szo";s:3:"161";s:3:"t k";s:3:"162";s:4:"tár";s:3:"163";s:4:"tés";s:3:"164";s:3:"van";s:3:"165";s:5:"ásá";s:3:"166";s:4:"ól ";s:3:"167";s:4:" bé";s:3:"168";s:3:" eg";s:3:"169";s:3:" or";s:3:"170";s:4:" pá";s:3:"171";s:4:" pé";s:3:"172";s:3:" ve";s:3:"173";s:3:"ban";s:3:"174";s:3:"eke";s:3:"175";s:4:"ekü";s:3:"176";s:4:"elÅ‘";s:3:"177";s:3:"erv";s:3:"178";s:3:"ete";s:3:"179";s:3:"fog";s:3:"180";s:3:"i a";s:3:"181";s:3:"kis";s:3:"182";s:4:"lád";s:3:"183";s:3:"nte";s:3:"184";s:3:"nye";s:3:"185";s:3:"nyi";s:3:"186";s:3:"ok ";s:3:"187";s:4:"omá";s:3:"188";s:3:"os ";s:3:"189";s:4:"rán";s:3:"190";s:4:"rás";s:3:"191";s:3:"sal";s:3:"192";s:3:"t e";s:3:"193";s:4:"vál";s:3:"194";s:3:"yar";s:3:"195";s:4:"ágo";s:3:"196";s:4:"ála";s:3:"197";s:4:"ége";s:3:"198";s:4:"ény";s:3:"199";s:4:"ött";s:3:"200";s:4:" tá";s:3:"201";s:4:"adó";s:3:"202";s:3:"elh";s:3:"203";s:3:"fej";s:3:"204";s:3:"het";s:3:"205";s:3:"hoz";s:3:"206";s:3:"ill";s:3:"207";s:4:"jár";s:3:"208";s:4:"kés";s:3:"209";s:3:"llo";s:3:"210";s:3:"mi ";s:3:"211";s:3:"ny ";s:3:"212";s:3:"ont";s:3:"213";s:3:"ren";s:3:"214";s:3:"res";s:3:"215";s:3:"rin";s:3:"216";s:3:"s a";s:3:"217";s:3:"s e";s:3:"218";s:3:"ssz";s:3:"219";s:3:"zt ";s:3:"220";s:3:" ez";s:3:"221";s:3:" ka";s:3:"222";s:3:" ke";s:3:"223";s:3:" ko";s:3:"224";s:3:" re";s:3:"225";s:3:"a h";s:3:"226";s:3:"a n";s:3:"227";s:3:"den";s:3:"228";s:4:"dó ";s:3:"229";s:3:"efo";s:3:"230";s:3:"gad";s:3:"231";s:3:"gat";s:3:"232";s:3:"gye";s:3:"233";s:3:"hel";s:3:"234";s:3:"k e";s:3:"235";s:3:"ket";s:3:"236";s:3:"les";s:3:"237";s:4:"mán";s:3:"238";s:3:"nde";s:3:"239";s:3:"nis";s:3:"240";s:3:"ozz";s:3:"241";s:3:"t b";s:3:"242";s:3:"t i";s:3:"243";s:4:"t é";s:3:"244";s:3:"tat";s:3:"245";s:3:"tos";s:3:"246";s:3:"val";s:3:"247";s:3:"z o";s:3:"248";s:3:"zak";s:3:"249";s:4:"ád ";s:3:"250";s:4:"ály";s:3:"251";s:4:"ára";s:3:"252";s:4:"ési";s:3:"253";s:4:"ész";s:3:"254";s:3:" ak";s:3:"255";s:3:" am";s:3:"256";s:3:" es";s:3:"257";s:4:" há";s:3:"258";s:3:" ny";s:3:"259";s:4:" tö";s:3:"260";s:3:"aka";s:3:"261";s:3:"art";s:3:"262";s:4:"ató";s:3:"263";s:3:"azt";s:3:"264";s:3:"bbe";s:3:"265";s:3:"ber";s:3:"266";s:4:"ció";s:3:"267";s:3:"cso";s:3:"268";s:3:"em ";s:3:"269";s:3:"eti";s:3:"270";s:4:"eté";s:3:"271";s:3:"gal";s:3:"272";s:3:"i t";s:3:"273";s:3:"ini";s:3:"274";s:3:"ist";s:3:"275";s:3:"ja ";s:3:"276";s:3:"ker";s:3:"277";s:3:"ki ";s:3:"278";s:3:"kor";s:3:"279";s:3:"koz";s:3:"280";s:4:"l é";s:3:"281";s:4:"ljá";s:3:"282";s:3:"lye";s:3:"283";s:3:"n v";s:3:"284";s:3:"ni ";s:3:"285";s:4:"pál";s:3:"286";s:3:"ror";s:3:"287";s:4:"ról";s:3:"288";s:4:"rül";s:3:"289";s:3:"s c";s:3:"290";s:3:"s p";s:3:"291";s:3:"s s";s:3:"292";s:3:"s v";s:3:"293";s:3:"sok";s:3:"294";s:3:"t j";s:3:"295";s:3:"t t";s:3:"296";s:3:"tar";s:3:"297";s:3:"tel";s:3:"298";s:3:"vat";s:3:"299";}s:9:"icelandic";a:300:{s:4:"að ";s:1:"0";s:3:"um ";s:1:"1";s:4:" að";s:1:"2";s:3:"ir ";s:1:"3";s:4:"ið ";s:1:"4";s:3:"ur ";s:1:"5";s:3:" ve";s:1:"6";s:4:" à ";s:1:"7";s:3:"na ";s:1:"8";s:4:" á ";s:1:"9";s:3:" se";s:2:"10";s:3:" er";s:2:"11";s:3:" og";s:2:"12";s:3:"ar ";s:2:"13";s:3:"og ";s:2:"14";s:3:"ver";s:2:"15";s:3:" mi";s:2:"16";s:3:"inn";s:2:"17";s:3:"nn ";s:2:"18";s:3:" fy";s:2:"19";s:3:"er ";s:2:"20";s:3:"fyr";s:2:"21";s:3:" ek";s:2:"22";s:3:" en";s:2:"23";s:3:" ha";s:2:"24";s:3:" he";s:2:"25";s:3:"ekk";s:2:"26";s:3:" st";s:2:"27";s:3:"ki ";s:2:"28";s:3:"st ";s:2:"29";s:4:"ði ";s:2:"30";s:3:" ba";s:2:"31";s:3:" me";s:2:"32";s:3:" vi";s:2:"33";s:3:"ig ";s:2:"34";s:3:"rir";s:2:"35";s:3:"yri";s:2:"36";s:3:" um";s:2:"37";s:3:"g f";s:2:"38";s:3:"leg";s:2:"39";s:3:"lei";s:2:"40";s:3:"ns ";s:2:"41";s:4:"ð s";s:2:"42";s:3:" ei";s:2:"43";s:4:" þa";s:2:"44";s:3:"in ";s:2:"45";s:3:"kki";s:2:"46";s:3:"r h";s:2:"47";s:3:"r s";s:2:"48";s:3:"egi";s:2:"49";s:3:"ein";s:2:"50";s:3:"ga ";s:2:"51";s:3:"ing";s:2:"52";s:3:"ra ";s:2:"53";s:3:"sta";s:2:"54";s:3:" va";s:2:"55";s:4:" þe";s:2:"56";s:3:"ann";s:2:"57";s:3:"en ";s:2:"58";s:3:"mil";s:2:"59";s:3:"sem";s:2:"60";s:4:"tjó";s:2:"61";s:4:"arð";s:2:"62";s:3:"di ";s:2:"63";s:3:"eit";s:2:"64";s:3:"haf";s:2:"65";s:3:"ill";s:2:"66";s:3:"ins";s:2:"67";s:3:"ist";s:2:"68";s:3:"llj";s:2:"69";s:3:"ndi";s:2:"70";s:3:"r a";s:2:"71";s:3:"r e";s:2:"72";s:3:"seg";s:2:"73";s:3:"un ";s:2:"74";s:3:"var";s:2:"75";s:3:" bi";s:2:"76";s:3:" el";s:2:"77";s:3:" fo";s:2:"78";s:3:" ge";s:2:"79";s:3:" yf";s:2:"80";s:3:"and";s:2:"81";s:3:"aug";s:2:"82";s:3:"bau";s:2:"83";s:3:"big";s:2:"84";s:3:"ega";s:2:"85";s:3:"eld";s:2:"86";s:4:"erð";s:2:"87";s:3:"fir";s:2:"88";s:3:"foo";s:2:"89";s:3:"gin";s:2:"90";s:3:"itt";s:2:"91";s:3:"n s";s:2:"92";s:3:"ngi";s:2:"93";s:3:"num";s:2:"94";s:3:"od ";s:2:"95";s:3:"ood";s:2:"96";s:3:"sin";s:2:"97";s:3:"ta ";s:2:"98";s:3:"tt ";s:2:"99";s:4:"við";s:3:"100";s:3:"yfi";s:3:"101";s:4:"ð e";s:3:"102";s:4:"ð f";s:3:"103";s:3:" hr";s:3:"104";s:4:" sé";s:3:"105";s:4:" þv";s:3:"106";s:3:"a e";s:3:"107";s:4:"a á";s:3:"108";s:3:"em ";s:3:"109";s:3:"gi ";s:3:"110";s:3:"i f";s:3:"111";s:3:"jar";s:3:"112";s:4:"jór";s:3:"113";s:3:"lja";s:3:"114";s:3:"m e";s:3:"115";s:4:"r á";s:3:"116";s:3:"rei";s:3:"117";s:3:"rst";s:3:"118";s:4:"rða";s:3:"119";s:4:"rði";s:3:"120";s:4:"rðu";s:3:"121";s:3:"stj";s:3:"122";s:3:"und";s:3:"123";s:3:"veg";s:3:"124";s:4:"và ";s:3:"125";s:4:"ð v";s:3:"126";s:5:"það";s:3:"127";s:5:"þvÃ";s:3:"128";s:3:" fj";s:3:"129";s:3:" ko";s:3:"130";s:3:" sl";s:3:"131";s:3:"eik";s:3:"132";s:3:"end";s:3:"133";s:3:"ert";s:3:"134";s:3:"ess";s:3:"135";s:4:"fjá";s:3:"136";s:3:"fur";s:3:"137";s:3:"gir";s:3:"138";s:4:"hús";s:3:"139";s:4:"jár";s:3:"140";s:3:"n e";s:3:"141";s:3:"ri ";s:3:"142";s:3:"tar";s:3:"143";s:5:"ð þ";s:3:"144";s:4:"ðar";s:3:"145";s:4:"ður";s:3:"146";s:4:"þes";s:3:"147";s:3:" br";s:3:"148";s:4:" hú";s:3:"149";s:3:" kr";s:3:"150";s:3:" le";s:3:"151";s:3:" up";s:3:"152";s:3:"a s";s:3:"153";s:3:"egg";s:3:"154";s:3:"i s";s:3:"155";s:3:"irt";s:3:"156";s:3:"ja ";s:3:"157";s:4:"kið";s:3:"158";s:3:"len";s:3:"159";s:4:"með";s:3:"160";s:3:"mik";s:3:"161";s:3:"n b";s:3:"162";s:3:"nar";s:3:"163";s:3:"nir";s:3:"164";s:3:"nun";s:3:"165";s:3:"r f";s:3:"166";s:3:"r v";s:3:"167";s:4:"rið";s:3:"168";s:3:"rt ";s:3:"169";s:3:"sti";s:3:"170";s:3:"t v";s:3:"171";s:3:"ti ";s:3:"172";s:3:"una";s:3:"173";s:3:"upp";s:3:"174";s:4:"ða ";s:3:"175";s:4:"óna";s:3:"176";s:3:" al";s:3:"177";s:3:" fr";s:3:"178";s:3:" gr";s:3:"179";s:3:"a v";s:3:"180";s:3:"all";s:3:"181";s:3:"an ";s:3:"182";s:3:"da ";s:3:"183";s:4:"eið";s:3:"184";s:4:"eð ";s:3:"185";s:3:"fa ";s:3:"186";s:3:"fra";s:3:"187";s:3:"g e";s:3:"188";s:3:"ger";s:3:"189";s:4:"gið";s:3:"190";s:3:"gt ";s:3:"191";s:3:"han";s:3:"192";s:3:"hef";s:3:"193";s:3:"hel";s:3:"194";s:3:"her";s:3:"195";s:3:"hra";s:3:"196";s:3:"i a";s:3:"197";s:3:"i e";s:3:"198";s:3:"i v";s:3:"199";s:4:"i þ";s:3:"200";s:3:"iki";s:3:"201";s:4:"jón";s:3:"202";s:4:"jör";s:3:"203";s:3:"ka ";s:3:"204";s:4:"kró";s:3:"205";s:4:"lÃk";s:3:"206";s:3:"m h";s:3:"207";s:3:"n a";s:3:"208";s:3:"nga";s:3:"209";s:3:"r l";s:3:"210";s:3:"ram";s:3:"211";s:3:"ru ";s:3:"212";s:5:"ráð";s:3:"213";s:4:"rón";s:3:"214";s:3:"svo";s:3:"215";s:3:"vin";s:3:"216";s:4:"à b";s:3:"217";s:4:"à h";s:3:"218";s:4:"ð h";s:3:"219";s:4:"ð k";s:3:"220";s:4:"ð m";s:3:"221";s:5:"örð";s:3:"222";s:3:" af";s:3:"223";s:3:" fa";s:3:"224";s:4:" lÃ";s:3:"225";s:4:" rá";s:3:"226";s:3:" sk";s:3:"227";s:3:" sv";s:3:"228";s:3:" te";s:3:"229";s:3:"a b";s:3:"230";s:3:"a f";s:3:"231";s:3:"a h";s:3:"232";s:3:"a k";s:3:"233";s:3:"a u";s:3:"234";s:3:"afi";s:3:"235";s:3:"agn";s:3:"236";s:3:"arn";s:3:"237";s:3:"ast";s:3:"238";s:3:"ber";s:3:"239";s:3:"efu";s:3:"240";s:3:"enn";s:3:"241";s:3:"erb";s:3:"242";s:3:"erg";s:3:"243";s:3:"fi ";s:3:"244";s:3:"g a";s:3:"245";s:3:"gar";s:3:"246";s:4:"iðs";s:3:"247";s:3:"ker";s:3:"248";s:3:"kke";s:3:"249";s:3:"lan";s:3:"250";s:4:"ljó";s:3:"251";s:3:"llt";s:3:"252";s:3:"ma ";s:3:"253";s:4:"mið";s:3:"254";s:3:"n v";s:3:"255";s:4:"n Ã";s:3:"256";s:3:"nan";s:3:"257";s:3:"nda";s:3:"258";s:3:"ndu";s:3:"259";s:4:"nið";s:3:"260";s:3:"nna";s:3:"261";s:3:"nnu";s:3:"262";s:3:"nu ";s:3:"263";s:3:"r o";s:3:"264";s:3:"rbe";s:3:"265";s:3:"rgi";s:3:"266";s:4:"slö";s:3:"267";s:4:"sé ";s:3:"268";s:3:"t a";s:3:"269";s:3:"t h";s:3:"270";s:3:"til";s:3:"271";s:3:"tin";s:3:"272";s:3:"ugu";s:3:"273";s:3:"vil";s:3:"274";s:3:"ygg";s:3:"275";s:4:"á s";s:3:"276";s:4:"ð a";s:3:"277";s:4:"ð b";s:3:"278";s:4:"órn";s:3:"279";s:4:"ögn";s:3:"280";s:4:"öku";s:3:"281";s:3:" at";s:3:"282";s:3:" fi";s:3:"283";s:4:" fé";s:3:"284";s:3:" ka";s:3:"285";s:3:" ma";s:3:"286";s:3:" no";s:3:"287";s:3:" sa";s:3:"288";s:3:" si";s:3:"289";s:3:" ti";s:3:"290";s:4:" ák";s:3:"291";s:3:"a m";s:3:"292";s:3:"a t";s:3:"293";s:4:"a Ã";s:3:"294";s:4:"a þ";s:3:"295";s:3:"afa";s:3:"296";s:3:"afs";s:3:"297";s:3:"ald";s:3:"298";s:3:"arf";s:3:"299";}s:10:"indonesian";a:300:{s:3:"an ";s:1:"0";s:3:" me";s:1:"1";s:3:"kan";s:1:"2";s:3:"ang";s:1:"3";s:3:"ng ";s:1:"4";s:3:" pe";s:1:"5";s:3:"men";s:1:"6";s:3:" di";s:1:"7";s:3:" ke";s:1:"8";s:3:" da";s:1:"9";s:3:" se";s:2:"10";s:3:"eng";s:2:"11";s:3:" be";s:2:"12";s:3:"nga";s:2:"13";s:3:"nya";s:2:"14";s:3:" te";s:2:"15";s:3:"ah ";s:2:"16";s:3:"ber";s:2:"17";s:3:"aka";s:2:"18";s:3:" ya";s:2:"19";s:3:"dan";s:2:"20";s:3:"di ";s:2:"21";s:3:"yan";s:2:"22";s:3:"n p";s:2:"23";s:3:"per";s:2:"24";s:3:"a m";s:2:"25";s:3:"ita";s:2:"26";s:3:" pa";s:2:"27";s:3:"da ";s:2:"28";s:3:"ata";s:2:"29";s:3:"ada";s:2:"30";s:3:"ya ";s:2:"31";s:3:"ta ";s:2:"32";s:3:" in";s:2:"33";s:3:"ala";s:2:"34";s:3:"eri";s:2:"35";s:3:"ia ";s:2:"36";s:3:"a d";s:2:"37";s:3:"n k";s:2:"38";s:3:"am ";s:2:"39";s:3:"ga ";s:2:"40";s:3:"at ";s:2:"41";s:3:"era";s:2:"42";s:3:"n d";s:2:"43";s:3:"ter";s:2:"44";s:3:" ka";s:2:"45";s:3:"a p";s:2:"46";s:3:"ari";s:2:"47";s:3:"emb";s:2:"48";s:3:"n m";s:2:"49";s:3:"ri ";s:2:"50";s:3:" ba";s:2:"51";s:3:"aan";s:2:"52";s:3:"ak ";s:2:"53";s:3:"ra ";s:2:"54";s:3:" it";s:2:"55";s:3:"ara";s:2:"56";s:3:"ela";s:2:"57";s:3:"ni ";s:2:"58";s:3:"ali";s:2:"59";s:3:"ran";s:2:"60";s:3:"ar ";s:2:"61";s:3:"eru";s:2:"62";s:3:"lah";s:2:"63";s:3:"a b";s:2:"64";s:3:"asi";s:2:"65";s:3:"awa";s:2:"66";s:3:"eba";s:2:"67";s:3:"gan";s:2:"68";s:3:"n b";s:2:"69";s:3:" ha";s:2:"70";s:3:"ini";s:2:"71";s:3:"mer";s:2:"72";s:3:" la";s:2:"73";s:3:" mi";s:2:"74";s:3:"and";s:2:"75";s:3:"ena";s:2:"76";s:3:"wan";s:2:"77";s:3:" sa";s:2:"78";s:3:"aha";s:2:"79";s:3:"lam";s:2:"80";s:3:"n i";s:2:"81";s:3:"nda";s:2:"82";s:3:" wa";s:2:"83";s:3:"a i";s:2:"84";s:3:"dua";s:2:"85";s:3:"g m";s:2:"86";s:3:"mi ";s:2:"87";s:3:"n a";s:2:"88";s:3:"rus";s:2:"89";s:3:"tel";s:2:"90";s:3:"yak";s:2:"91";s:3:" an";s:2:"92";s:3:"dal";s:2:"93";s:3:"h d";s:2:"94";s:3:"i s";s:2:"95";s:3:"ing";s:2:"96";s:3:"min";s:2:"97";s:3:"ngg";s:2:"98";s:3:"tak";s:2:"99";s:3:"ami";s:3:"100";s:3:"beb";s:3:"101";s:3:"den";s:3:"102";s:3:"gat";s:3:"103";s:3:"ian";s:3:"104";s:3:"ih ";s:3:"105";s:3:"pad";s:3:"106";s:3:"rga";s:3:"107";s:3:"san";s:3:"108";s:3:"ua ";s:3:"109";s:3:" de";s:3:"110";s:3:"a t";s:3:"111";s:3:"arg";s:3:"112";s:3:"dar";s:3:"113";s:3:"elu";s:3:"114";s:3:"har";s:3:"115";s:3:"i k";s:3:"116";s:3:"i m";s:3:"117";s:3:"i p";s:3:"118";s:3:"ika";s:3:"119";s:3:"in ";s:3:"120";s:3:"iny";s:3:"121";s:3:"itu";s:3:"122";s:3:"mba";s:3:"123";s:3:"n t";s:3:"124";s:3:"ntu";s:3:"125";s:3:"pan";s:3:"126";s:3:"pen";s:3:"127";s:3:"sah";s:3:"128";s:3:"tan";s:3:"129";s:3:"tu ";s:3:"130";s:3:"a k";s:3:"131";s:3:"ban";s:3:"132";s:3:"edu";s:3:"133";s:3:"eka";s:3:"134";s:3:"g d";s:3:"135";s:3:"ka ";s:3:"136";s:3:"ker";s:3:"137";s:3:"nde";s:3:"138";s:3:"nta";s:3:"139";s:3:"ora";s:3:"140";s:3:"usa";s:3:"141";s:3:" du";s:3:"142";s:3:" ma";s:3:"143";s:3:"a s";s:3:"144";s:3:"ai ";s:3:"145";s:3:"ant";s:3:"146";s:3:"bas";s:3:"147";s:3:"end";s:3:"148";s:3:"i d";s:3:"149";s:3:"ira";s:3:"150";s:3:"kam";s:3:"151";s:3:"lan";s:3:"152";s:3:"n s";s:3:"153";s:3:"uli";s:3:"154";s:3:"al ";s:3:"155";s:3:"apa";s:3:"156";s:3:"ere";s:3:"157";s:3:"ert";s:3:"158";s:3:"lia";s:3:"159";s:3:"mem";s:3:"160";s:3:"rka";s:3:"161";s:3:"si ";s:3:"162";s:3:"tal";s:3:"163";s:3:"ung";s:3:"164";s:3:" ak";s:3:"165";s:3:"a a";s:3:"166";s:3:"a w";s:3:"167";s:3:"ani";s:3:"168";s:3:"ask";s:3:"169";s:3:"ent";s:3:"170";s:3:"gar";s:3:"171";s:3:"haa";s:3:"172";s:3:"i i";s:3:"173";s:3:"isa";s:3:"174";s:3:"ked";s:3:"175";s:3:"mbe";s:3:"176";s:3:"ska";s:3:"177";s:3:"tor";s:3:"178";s:3:"uan";s:3:"179";s:3:"uk ";s:3:"180";s:3:"uka";s:3:"181";s:3:" ad";s:3:"182";s:3:" to";s:3:"183";s:3:"asa";s:3:"184";s:3:"aya";s:3:"185";s:3:"bag";s:3:"186";s:3:"dia";s:3:"187";s:3:"dun";s:3:"188";s:3:"erj";s:3:"189";s:3:"mas";s:3:"190";s:3:"na ";s:3:"191";s:3:"rek";s:3:"192";s:3:"rit";s:3:"193";s:3:"sih";s:3:"194";s:3:"us ";s:3:"195";s:3:" bi";s:3:"196";s:3:"a h";s:3:"197";s:3:"ama";s:3:"198";s:3:"dib";s:3:"199";s:3:"ers";s:3:"200";s:3:"g s";s:3:"201";s:3:"han";s:3:"202";s:3:"ik ";s:3:"203";s:3:"kem";s:3:"204";s:3:"ma ";s:3:"205";s:3:"n l";s:3:"206";s:3:"nit";s:3:"207";s:3:"r b";s:3:"208";s:3:"rja";s:3:"209";s:3:"sa ";s:3:"210";s:3:" ju";s:3:"211";s:3:" or";s:3:"212";s:3:" si";s:3:"213";s:3:" ti";s:3:"214";s:3:"a y";s:3:"215";s:3:"aga";s:3:"216";s:3:"any";s:3:"217";s:3:"as ";s:3:"218";s:3:"cul";s:3:"219";s:3:"eme";s:3:"220";s:3:"emu";s:3:"221";s:3:"eny";s:3:"222";s:3:"epa";s:3:"223";s:3:"erb";s:3:"224";s:3:"erl";s:3:"225";s:3:"gi ";s:3:"226";s:3:"h m";s:3:"227";s:3:"i a";s:3:"228";s:3:"kel";s:3:"229";s:3:"li ";s:3:"230";s:3:"mel";s:3:"231";s:3:"nia";s:3:"232";s:3:"opa";s:3:"233";s:3:"rta";s:3:"234";s:3:"sia";s:3:"235";s:3:"tah";s:3:"236";s:3:"ula";s:3:"237";s:3:"un ";s:3:"238";s:3:"unt";s:3:"239";s:3:" at";s:3:"240";s:3:" bu";s:3:"241";s:3:" pu";s:3:"242";s:3:" ta";s:3:"243";s:3:"agi";s:3:"244";s:3:"alu";s:3:"245";s:3:"amb";s:3:"246";s:3:"bah";s:3:"247";s:3:"bis";s:3:"248";s:3:"er ";s:3:"249";s:3:"i t";s:3:"250";s:3:"ibe";s:3:"251";s:3:"ir ";s:3:"252";s:3:"ja ";s:3:"253";s:3:"k m";s:3:"254";s:3:"kar";s:3:"255";s:3:"lai";s:3:"256";s:3:"lal";s:3:"257";s:3:"lu ";s:3:"258";s:3:"mpa";s:3:"259";s:3:"ngk";s:3:"260";s:3:"nja";s:3:"261";s:3:"or ";s:3:"262";s:3:"pa ";s:3:"263";s:3:"pas";s:3:"264";s:3:"pem";s:3:"265";s:3:"rak";s:3:"266";s:3:"rik";s:3:"267";s:3:"seb";s:3:"268";s:3:"tam";s:3:"269";s:3:"tem";s:3:"270";s:3:"top";s:3:"271";s:3:"tuk";s:3:"272";s:3:"uni";s:3:"273";s:3:"war";s:3:"274";s:3:" al";s:3:"275";s:3:" ga";s:3:"276";s:3:" ge";s:3:"277";s:3:" ir";s:3:"278";s:3:" ja";s:3:"279";s:3:" mu";s:3:"280";s:3:" na";s:3:"281";s:3:" pr";s:3:"282";s:3:" su";s:3:"283";s:3:" un";s:3:"284";s:3:"ad ";s:3:"285";s:3:"adi";s:3:"286";s:3:"akt";s:3:"287";s:3:"ann";s:3:"288";s:3:"apo";s:3:"289";s:3:"bel";s:3:"290";s:3:"bul";s:3:"291";s:3:"der";s:3:"292";s:3:"ega";s:3:"293";s:3:"eke";s:3:"294";s:3:"ema";s:3:"295";s:3:"emp";s:3:"296";s:3:"ene";s:3:"297";s:3:"enj";s:3:"298";s:3:"esa";s:3:"299";}s:7:"italian";a:300:{s:3:" di";s:1:"0";s:3:"to ";s:1:"1";s:3:"la ";s:1:"2";s:3:" de";s:1:"3";s:3:"di ";s:1:"4";s:3:"no ";s:1:"5";s:3:" co";s:1:"6";s:3:"re ";s:1:"7";s:3:"ion";s:1:"8";s:3:"e d";s:1:"9";s:3:" e ";s:2:"10";s:3:"le ";s:2:"11";s:3:"del";s:2:"12";s:3:"ne ";s:2:"13";s:3:"ti ";s:2:"14";s:3:"ell";s:2:"15";s:3:" la";s:2:"16";s:3:" un";s:2:"17";s:3:"ni ";s:2:"18";s:3:"i d";s:2:"19";s:3:"per";s:2:"20";s:3:" pe";s:2:"21";s:3:"ent";s:2:"22";s:3:" in";s:2:"23";s:3:"one";s:2:"24";s:3:"he ";s:2:"25";s:3:"ta ";s:2:"26";s:3:"zio";s:2:"27";s:3:"che";s:2:"28";s:3:"o d";s:2:"29";s:3:"a d";s:2:"30";s:3:"na ";s:2:"31";s:3:"ato";s:2:"32";s:3:"e s";s:2:"33";s:3:" so";s:2:"34";s:3:"i s";s:2:"35";s:3:"lla";s:2:"36";s:3:"a p";s:2:"37";s:3:"li ";s:2:"38";s:3:"te ";s:2:"39";s:3:" al";s:2:"40";s:3:" ch";s:2:"41";s:3:"er ";s:2:"42";s:3:" pa";s:2:"43";s:3:" si";s:2:"44";s:3:"con";s:2:"45";s:3:"sta";s:2:"46";s:3:" pr";s:2:"47";s:3:"a c";s:2:"48";s:3:" se";s:2:"49";s:3:"el ";s:2:"50";s:3:"ia ";s:2:"51";s:3:"si ";s:2:"52";s:3:"e p";s:2:"53";s:3:" da";s:2:"54";s:3:"e i";s:2:"55";s:3:"i p";s:2:"56";s:3:"ont";s:2:"57";s:3:"ano";s:2:"58";s:3:"i c";s:2:"59";s:3:"all";s:2:"60";s:3:"azi";s:2:"61";s:3:"nte";s:2:"62";s:3:"on ";s:2:"63";s:3:"nti";s:2:"64";s:3:"o s";s:2:"65";s:3:" ri";s:2:"66";s:3:"i a";s:2:"67";s:3:"o a";s:2:"68";s:3:"un ";s:2:"69";s:3:" an";s:2:"70";s:3:"are";s:2:"71";s:3:"ari";s:2:"72";s:3:"e a";s:2:"73";s:3:"i e";s:2:"74";s:3:"ita";s:2:"75";s:3:"men";s:2:"76";s:3:"ri ";s:2:"77";s:3:" ca";s:2:"78";s:3:" il";s:2:"79";s:3:" no";s:2:"80";s:3:" po";s:2:"81";s:3:"a s";s:2:"82";s:3:"ant";s:2:"83";s:3:"il ";s:2:"84";s:3:"in ";s:2:"85";s:3:"a l";s:2:"86";s:3:"ati";s:2:"87";s:3:"cia";s:2:"88";s:3:"e c";s:2:"89";s:3:"ro ";s:2:"90";s:3:"ann";s:2:"91";s:3:"est";s:2:"92";s:3:"gli";s:2:"93";s:4:"tà ";s:2:"94";s:3:" qu";s:2:"95";s:3:"e l";s:2:"96";s:3:"nta";s:2:"97";s:3:" a ";s:2:"98";s:3:"com";s:2:"99";s:3:"o c";s:3:"100";s:3:"ra ";s:3:"101";s:3:" le";s:3:"102";s:3:" ne";s:3:"103";s:3:"ali";s:3:"104";s:3:"ere";s:3:"105";s:3:"ist";s:3:"106";s:3:" ma";s:3:"107";s:4:" è ";s:3:"108";s:3:"io ";s:3:"109";s:3:"lle";s:3:"110";s:3:"me ";s:3:"111";s:3:"era";s:3:"112";s:3:"ica";s:3:"113";s:3:"ost";s:3:"114";s:3:"pro";s:3:"115";s:3:"tar";s:3:"116";s:3:"una";s:3:"117";s:3:" pi";s:3:"118";s:3:"da ";s:3:"119";s:3:"tat";s:3:"120";s:3:" mi";s:3:"121";s:3:"att";s:3:"122";s:3:"ca ";s:3:"123";s:3:"mo ";s:3:"124";s:3:"non";s:3:"125";s:3:"par";s:3:"126";s:3:"sti";s:3:"127";s:3:" fa";s:3:"128";s:3:" i ";s:3:"129";s:3:" re";s:3:"130";s:3:" su";s:3:"131";s:3:"ess";s:3:"132";s:3:"ini";s:3:"133";s:3:"nto";s:3:"134";s:3:"o l";s:3:"135";s:3:"ssi";s:3:"136";s:3:"tto";s:3:"137";s:3:"a e";s:3:"138";s:3:"ame";s:3:"139";s:3:"col";s:3:"140";s:3:"ei ";s:3:"141";s:3:"ma ";s:3:"142";s:3:"o i";s:3:"143";s:3:"za ";s:3:"144";s:3:" st";s:3:"145";s:3:"a a";s:3:"146";s:3:"ale";s:3:"147";s:3:"anc";s:3:"148";s:3:"ani";s:3:"149";s:3:"i m";s:3:"150";s:3:"ian";s:3:"151";s:3:"o p";s:3:"152";s:3:"oni";s:3:"153";s:3:"sio";s:3:"154";s:3:"tan";s:3:"155";s:3:"tti";s:3:"156";s:3:" lo";s:3:"157";s:3:"i r";s:3:"158";s:3:"oci";s:3:"159";s:3:"oli";s:3:"160";s:3:"ona";s:3:"161";s:3:"ono";s:3:"162";s:3:"tra";s:3:"163";s:3:" l ";s:3:"164";s:3:"a r";s:3:"165";s:3:"eri";s:3:"166";s:3:"ett";s:3:"167";s:3:"lo ";s:3:"168";s:3:"nza";s:3:"169";s:3:"que";s:3:"170";s:3:"str";s:3:"171";s:3:"ter";s:3:"172";s:3:"tta";s:3:"173";s:3:" ba";s:3:"174";s:3:" li";s:3:"175";s:3:" te";s:3:"176";s:3:"ass";s:3:"177";s:3:"e f";s:3:"178";s:3:"enz";s:3:"179";s:3:"for";s:3:"180";s:3:"nno";s:3:"181";s:3:"olo";s:3:"182";s:3:"ori";s:3:"183";s:3:"res";s:3:"184";s:3:"tor";s:3:"185";s:3:" ci";s:3:"186";s:3:" vo";s:3:"187";s:3:"a i";s:3:"188";s:3:"al ";s:3:"189";s:3:"chi";s:3:"190";s:3:"e n";s:3:"191";s:3:"lia";s:3:"192";s:3:"pre";s:3:"193";s:3:"ria";s:3:"194";s:3:"uni";s:3:"195";s:3:"ver";s:3:"196";s:3:" sp";s:3:"197";s:3:"imo";s:3:"198";s:3:"l a";s:3:"199";s:3:"l c";s:3:"200";s:3:"ran";s:3:"201";s:3:"sen";s:3:"202";s:3:"soc";s:3:"203";s:3:"tic";s:3:"204";s:3:" fi";s:3:"205";s:3:" mo";s:3:"206";s:3:"a n";s:3:"207";s:3:"ce ";s:3:"208";s:3:"dei";s:3:"209";s:3:"ggi";s:3:"210";s:3:"gio";s:3:"211";s:3:"iti";s:3:"212";s:3:"l s";s:3:"213";s:3:"lit";s:3:"214";s:3:"ll ";s:3:"215";s:3:"mon";s:3:"216";s:3:"ola";s:3:"217";s:3:"pac";s:3:"218";s:3:"sim";s:3:"219";s:3:"tit";s:3:"220";s:3:"utt";s:3:"221";s:3:"vol";s:3:"222";s:3:" ar";s:3:"223";s:3:" fo";s:3:"224";s:3:" ha";s:3:"225";s:3:" sa";s:3:"226";s:3:"acc";s:3:"227";s:3:"e r";s:3:"228";s:3:"ire";s:3:"229";s:3:"man";s:3:"230";s:3:"ntr";s:3:"231";s:3:"rat";s:3:"232";s:3:"sco";s:3:"233";s:3:"tro";s:3:"234";s:3:"tut";s:3:"235";s:3:"va ";s:3:"236";s:3:" do";s:3:"237";s:3:" gi";s:3:"238";s:3:" me";s:3:"239";s:3:" sc";s:3:"240";s:3:" tu";s:3:"241";s:3:" ve";s:3:"242";s:3:" vi";s:3:"243";s:3:"a m";s:3:"244";s:3:"ber";s:3:"245";s:3:"can";s:3:"246";s:3:"cit";s:3:"247";s:3:"i l";s:3:"248";s:3:"ier";s:3:"249";s:4:"ità ";s:3:"250";s:3:"lli";s:3:"251";s:3:"min";s:3:"252";s:3:"n p";s:3:"253";s:3:"nat";s:3:"254";s:3:"nda";s:3:"255";s:3:"o e";s:3:"256";s:3:"o f";s:3:"257";s:3:"o u";s:3:"258";s:3:"ore";s:3:"259";s:3:"oro";s:3:"260";s:3:"ort";s:3:"261";s:3:"sto";s:3:"262";s:3:"ten";s:3:"263";s:3:"tiv";s:3:"264";s:3:"van";s:3:"265";s:3:"art";s:3:"266";s:3:"cco";s:3:"267";s:3:"ci ";s:3:"268";s:3:"cos";s:3:"269";s:3:"dal";s:3:"270";s:3:"e v";s:3:"271";s:3:"i i";s:3:"272";s:3:"ila";s:3:"273";s:3:"ino";s:3:"274";s:3:"l p";s:3:"275";s:3:"n c";s:3:"276";s:3:"nit";s:3:"277";s:3:"ole";s:3:"278";s:3:"ome";s:3:"279";s:3:"po ";s:3:"280";s:3:"rio";s:3:"281";s:3:"sa ";s:3:"282";s:3:" ce";s:3:"283";s:3:" es";s:3:"284";s:3:" tr";s:3:"285";s:3:"a b";s:3:"286";s:3:"and";s:3:"287";s:3:"ata";s:3:"288";s:3:"der";s:3:"289";s:3:"ens";s:3:"290";s:3:"ers";s:3:"291";s:3:"gi ";s:3:"292";s:3:"ial";s:3:"293";s:3:"ina";s:3:"294";s:3:"itt";s:3:"295";s:3:"izi";s:3:"296";s:3:"lan";s:3:"297";s:3:"lor";s:3:"298";s:3:"mil";s:3:"299";}s:6:"kazakh";a:300:{s:5:"ан ";s:1:"0";s:5:"ен ";s:1:"1";s:5:"Ñ‹Ò£ ";s:1:"2";s:5:" қа";s:1:"3";s:5:" ба";s:1:"4";s:5:"ай ";s:1:"5";s:6:"нда";s:1:"6";s:5:"ын ";s:1:"7";s:5:" Ñа";s:1:"8";s:5:" ал";s:1:"9";s:5:"ді ";s:2:"10";s:6:"ары";s:2:"11";s:5:"ды ";s:2:"12";s:5:"ып ";s:2:"13";s:5:" мұ";s:2:"14";s:5:" бі";s:2:"15";s:6:"аÑÑ‹";s:2:"16";s:5:"да ";s:2:"17";s:6:"най";s:2:"18";s:5:" жа";s:2:"19";s:6:"мұн";s:2:"20";s:6:"Ñта";s:2:"21";s:6:"ған";s:2:"22";s:5:"н б";s:2:"23";s:6:"ұна";s:2:"24";s:5:" бо";s:2:"25";s:6:"ның";s:2:"26";s:5:"ін ";s:2:"27";s:6:"лар";s:2:"28";s:6:"Ñын";s:2:"29";s:5:" де";s:2:"30";s:6:"аға";s:2:"31";s:6:"тан";s:2:"32";s:5:" кө";s:2:"33";s:6:"бір";s:2:"34";s:5:"ер ";s:2:"35";s:6:"мен";s:2:"36";s:6:"аза";s:2:"37";s:6:"ынд";s:2:"38";s:6:"ыны";s:2:"39";s:5:" ме";s:2:"40";s:6:"анд";s:2:"41";s:6:"ері";s:2:"42";s:6:"бол";s:2:"43";s:6:"дың";s:2:"44";s:6:"қаз";s:2:"45";s:6:"аты";s:2:"46";s:5:"ÑÑ‹ ";s:2:"47";s:6:"тын";s:2:"48";s:5:"Ò“Ñ‹ ";s:2:"49";s:5:" ке";s:2:"50";s:5:"ар ";s:2:"51";s:6:"зақ";s:2:"52";s:5:"Ñ‹Ò› ";s:2:"53";s:6:"ала";s:2:"54";s:6:"алы";s:2:"55";s:6:"аны";s:2:"56";s:6:"ара";s:2:"57";s:6:"ағы";s:2:"58";s:6:"ген";s:2:"59";s:6:"тар";s:2:"60";s:6:"тер";s:2:"61";s:6:"Ñ‚Ñ‹Ñ€";s:2:"62";s:6:"айд";s:2:"63";s:6:"ард";s:2:"64";s:5:"де ";s:2:"65";s:5:"ға ";s:2:"66";s:5:" қо";s:2:"67";s:6:"бар";s:2:"68";s:5:"Ñ–Ò£ ";s:2:"69";s:6:"қан";s:2:"70";s:5:" бе";s:2:"71";s:5:" қы";s:2:"72";s:6:"ақÑ";s:2:"73";s:6:"гер";s:2:"74";s:6:"дан";s:2:"75";s:6:"дар";s:2:"76";s:6:"лық";s:2:"77";s:6:"лға";s:2:"78";s:6:"ына";s:2:"79";s:5:"Ñ–Ñ€ ";s:2:"80";s:6:"ірі";s:2:"81";s:6:"ғаÑ";s:2:"82";s:5:" та";s:2:"83";s:5:"а б";s:2:"84";s:5:"гі ";s:2:"85";s:6:"еді";s:2:"86";s:6:"еле";s:2:"87";s:6:"йды";s:2:"88";s:5:"н к";s:2:"89";s:5:"н Ñ‚";s:2:"90";s:6:"ола";s:2:"91";s:6:"рын";s:2:"92";s:5:"іп ";s:2:"93";s:6:"Ò›ÑÑ‚";s:2:"94";s:6:"қта";s:2:"95";s:5:"Ò£ б";s:2:"96";s:5:" ай";s:2:"97";s:5:" ол";s:2:"98";s:5:" Ñо";s:2:"99";s:6:"айт";s:3:"100";s:6:"дағ";s:3:"101";s:6:"иге";s:3:"102";s:6:"лер";s:3:"103";s:6:"лып";s:3:"104";s:5:"н а";s:3:"105";s:5:"ік ";s:3:"106";s:6:"ақт";s:3:"107";s:6:"бағ";s:3:"108";s:6:"кен";s:3:"109";s:5:"н Ò›";s:3:"110";s:5:"ны ";s:3:"111";s:6:"рге";s:3:"112";s:6:"рға";s:3:"113";s:5:"Ñ‹Ñ€ ";s:3:"114";s:5:" ар";s:3:"115";s:6:"алғ";s:3:"116";s:6:"аÑа";s:3:"117";s:6:"баÑ";s:3:"118";s:6:"бер";s:3:"119";s:5:"ге ";s:3:"120";s:6:"еті";s:3:"121";s:5:"на ";s:3:"122";s:6:"нде";s:3:"123";s:5:"не ";s:3:"124";s:6:"ниг";s:3:"125";s:6:"рды";s:3:"126";s:5:"ры ";s:3:"127";s:6:"Ñай";s:3:"128";s:5:" ау";s:3:"129";s:5:" кү";s:3:"130";s:5:" ни";s:3:"131";s:5:" от";s:3:"132";s:5:" өз";s:3:"133";s:6:"ауд";s:3:"134";s:5:"еп ";s:3:"135";s:6:"иÑл";s:3:"136";s:6:"лты";s:3:"137";s:5:"н ж";s:3:"138";s:5:"н о";s:3:"139";s:6:"оÑÑ‹";s:3:"140";s:6:"оты";s:3:"141";s:6:"рып";s:3:"142";s:5:"рі ";s:3:"143";s:6:"тке";s:3:"144";s:5:"Ñ‚Ñ‹ ";s:3:"145";s:5:"Ñ‹ б";s:3:"146";s:5:"Ñ‹ ж";s:3:"147";s:6:"ылы";s:3:"148";s:6:"Ñ‹ÑÑ‹";s:3:"149";s:5:"Ñ– Ñ";s:3:"150";s:6:"қар";s:3:"151";s:5:" бұ";s:3:"152";s:5:" да";s:3:"153";s:5:" же";s:3:"154";s:5:" Ñ‚Ò±";s:3:"155";s:5:" Ò›Ò±";s:3:"156";s:6:"ады";s:3:"157";s:6:"айл";s:3:"158";s:5:"ап ";s:3:"159";s:6:"ата";s:3:"160";s:6:"ені";s:3:"161";s:6:"йла";s:3:"162";s:5:"н м";s:3:"163";s:5:"н Ñ";s:3:"164";s:6:"нды";s:3:"165";s:6:"нді";s:3:"166";s:5:"Ñ€ м";s:3:"167";s:6:"тай";s:3:"168";s:6:"тін";s:3:"169";s:5:"Ñ‹ Ñ‚";s:3:"170";s:5:"Ñ‹Ñ ";s:3:"171";s:6:"інд";s:3:"172";s:5:" би";s:3:"173";s:5:"а ж";s:3:"174";s:6:"ауы";s:3:"175";s:6:"деп";s:3:"176";s:6:"дің";s:3:"177";s:6:"еке";s:3:"178";s:6:"ери";s:3:"179";s:6:"йын";s:3:"180";s:6:"кел";s:3:"181";s:6:"лды";s:3:"182";s:5:"ма ";s:3:"183";s:6:"нан";s:3:"184";s:6:"оны";s:3:"185";s:5:"п ж";s:3:"186";s:5:"п о";s:3:"187";s:5:"Ñ€ б";s:3:"188";s:6:"риÑ";s:3:"189";s:6:"рла";s:3:"190";s:6:"уда";s:3:"191";s:6:"шыл";s:3:"192";s:5:"Ñ‹ а";s:3:"193";s:6:"ықт";s:3:"194";s:5:"Ñ– а";s:3:"195";s:5:"Ñ– б";s:3:"196";s:5:"із ";s:3:"197";s:6:"ілі";s:3:"198";s:5:"Ò£ Ò›";s:3:"199";s:5:" аÑ";s:3:"200";s:5:" ек";s:3:"201";s:5:" жо";s:3:"202";s:5:" мә";s:3:"203";s:5:" оÑ";s:3:"204";s:5:" ре";s:3:"205";s:5:" Ñе";s:3:"206";s:6:"алд";s:3:"207";s:6:"дал";s:3:"208";s:6:"дег";s:3:"209";s:6:"дей";s:3:"210";s:5:"е б";s:3:"211";s:5:"ет ";s:3:"212";s:6:"жаÑ";s:3:"213";s:5:"й б";s:3:"214";s:6:"лау";s:3:"215";s:6:"лда";s:3:"216";s:6:"мет";s:3:"217";s:6:"нын";s:3:"218";s:6:"Ñар";s:3:"219";s:5:"ÑÑ– ";s:3:"220";s:5:"Ñ‚Ñ– ";s:3:"221";s:6:"ыры";s:3:"222";s:6:"ыта";s:3:"223";s:6:"Ñ–ÑÑ–";s:3:"224";s:5:"Ò£ а";s:3:"225";s:6:"өте";s:3:"226";s:5:" ат";s:3:"227";s:5:" ел";s:3:"228";s:5:" жү";s:3:"229";s:5:" ма";s:3:"230";s:5:" то";s:3:"231";s:5:" шы";s:3:"232";s:5:"а а";s:3:"233";s:6:"алт";s:3:"234";s:6:"ама";s:3:"235";s:6:"арл";s:3:"236";s:6:"аÑÑ‚";s:3:"237";s:6:"бұл";s:3:"238";s:6:"дай";s:3:"239";s:6:"дық";s:3:"240";s:5:"ек ";s:3:"241";s:6:"ель";s:3:"242";s:6:"еÑÑ–";s:3:"243";s:6:"зді";s:3:"244";s:6:"көт";s:3:"245";s:6:"лем";s:3:"246";s:5:"ль ";s:3:"247";s:5:"н е";s:3:"248";s:5:"п а";s:3:"249";s:5:"Ñ€ а";s:3:"250";s:6:"реÑ";s:3:"251";s:5:"Ñа ";s:3:"252";s:5:"та ";s:3:"253";s:6:"тте";s:3:"254";s:6:"тұр";s:3:"255";s:5:"шы ";s:3:"256";s:5:"Ñ‹ д";s:3:"257";s:5:"Ñ‹ Ò›";s:3:"258";s:5:"ыз ";s:3:"259";s:6:"қыт";s:3:"260";s:5:" ко";s:3:"261";s:5:" не";s:3:"262";s:5:" ой";s:3:"263";s:5:" ор";s:3:"264";s:5:" ÑÒ±";s:3:"265";s:5:" Ñ‚Ò¯";s:3:"266";s:6:"аль";s:3:"267";s:6:"аре";s:3:"268";s:6:"атт";s:3:"269";s:6:"дір";s:3:"270";s:5:"ев ";s:3:"271";s:6:"егі";s:3:"272";s:6:"еда";s:3:"273";s:6:"екі";s:3:"274";s:6:"елд";s:3:"275";s:6:"ерг";s:3:"276";s:6:"ерд";s:3:"277";s:6:"иÑд";s:3:"278";s:6:"кер";s:3:"279";s:6:"кет";s:3:"280";s:6:"лыÑ";s:3:"281";s:6:"ліÑ";s:3:"282";s:6:"мед";s:3:"283";s:6:"мпи";s:3:"284";s:5:"н д";s:3:"285";s:5:"ні ";s:3:"286";s:6:"нін";s:3:"287";s:5:"п Ñ‚";s:3:"288";s:6:"пек";s:3:"289";s:6:"рел";s:3:"290";s:6:"рта";s:3:"291";s:6:"ріл";s:3:"292";s:6:"рін";s:3:"293";s:6:"Ñен";s:3:"294";s:6:"тал";s:3:"295";s:6:"шіл";s:3:"296";s:5:"Ñ‹ к";s:3:"297";s:5:"Ñ‹ м";s:3:"298";s:6:"Ñ‹ÑÑ‚";s:3:"299";}s:6:"kyrgyz";a:300:{s:5:"ын ";s:1:"0";s:5:"ан ";s:1:"1";s:5:" жа";s:1:"2";s:5:"ен ";s:1:"3";s:5:"да ";s:1:"4";s:5:" та";s:1:"5";s:5:"ар ";s:1:"6";s:5:"ин ";s:1:"7";s:5:" ка";s:1:"8";s:6:"ары";s:1:"9";s:5:" ал";s:2:"10";s:5:" ба";s:2:"11";s:5:" би";s:2:"12";s:6:"лар";s:2:"13";s:5:" бо";s:2:"14";s:5:" кы";s:2:"15";s:6:"ала";s:2:"16";s:5:"н к";s:2:"17";s:5:" Ñа";s:2:"18";s:6:"нда";s:2:"19";s:6:"ган";s:2:"20";s:6:"тар";s:2:"21";s:5:" де";s:2:"22";s:6:"анд";s:2:"23";s:5:"н б";s:2:"24";s:5:" ке";s:2:"25";s:6:"ард";s:2:"26";s:6:"мен";s:2:"27";s:5:"н Ñ‚";s:2:"28";s:6:"ара";s:2:"29";s:6:"нын";s:2:"30";s:5:" да";s:2:"31";s:5:" ме";s:2:"32";s:6:"кыр";s:2:"33";s:5:" че";s:2:"34";s:5:"н а";s:2:"35";s:5:"ры ";s:2:"36";s:5:" ко";s:2:"37";s:6:"ген";s:2:"38";s:6:"дар";s:2:"39";s:6:"кен";s:2:"40";s:6:"кта";s:2:"41";s:5:"уу ";s:2:"42";s:6:"ене";s:2:"43";s:6:"ери";s:2:"44";s:5:" ша";s:2:"45";s:6:"алы";s:2:"46";s:5:"ат ";s:2:"47";s:5:"на ";s:2:"48";s:5:" кө";s:2:"49";s:5:" Ñм";s:2:"50";s:6:"аты";s:2:"51";s:6:"дан";s:2:"52";s:6:"деп";s:2:"53";s:6:"дын";s:2:"54";s:5:"еп ";s:2:"55";s:6:"нен";s:2:"56";s:6:"рын";s:2:"57";s:5:" бе";s:2:"58";s:6:"кан";s:2:"59";s:6:"луу";s:2:"60";s:6:"ргы";s:2:"61";s:6:"тан";s:2:"62";s:6:"шай";s:2:"63";s:6:"ырг";s:2:"64";s:5:"үн ";s:2:"65";s:5:" ар";s:2:"66";s:5:" ма";s:2:"67";s:6:"агы";s:2:"68";s:6:"акт";s:2:"69";s:6:"аны";s:2:"70";s:5:"гы ";s:2:"71";s:6:"гыз";s:2:"72";s:5:"ды ";s:2:"73";s:6:"рда";s:2:"74";s:5:"ай ";s:2:"75";s:6:"бир";s:2:"76";s:6:"бол";s:2:"77";s:5:"ер ";s:2:"78";s:5:"н Ñ";s:2:"79";s:6:"нды";s:2:"80";s:5:"ун ";s:2:"81";s:5:"ча ";s:2:"82";s:6:"ынд";s:2:"83";s:5:"а к";s:2:"84";s:6:"ага";s:2:"85";s:6:"айл";s:2:"86";s:6:"ана";s:2:"87";s:5:"ап ";s:2:"88";s:5:"га ";s:2:"89";s:6:"лге";s:2:"90";s:6:"нча";s:2:"91";s:5:"п к";s:2:"92";s:6:"рды";s:2:"93";s:6:"туу";s:2:"94";s:6:"ыны";s:2:"95";s:5:" ан";s:2:"96";s:5:" өз";s:2:"97";s:6:"ама";s:2:"98";s:6:"ата";s:2:"99";s:6:"дин";s:3:"100";s:5:"йт ";s:3:"101";s:6:"лга";s:3:"102";s:6:"лоо";s:3:"103";s:5:"оо ";s:3:"104";s:5:"ри ";s:3:"105";s:6:"тин";s:3:"106";s:5:"ыз ";s:3:"107";s:5:"ып ";s:3:"108";s:6:"Ó©Ñ€Ò¯";s:3:"109";s:5:" па";s:3:"110";s:5:" Ñк";s:3:"111";s:5:"а б";s:3:"112";s:6:"алг";s:3:"113";s:6:"аÑÑ‹";s:3:"114";s:6:"ашт";s:3:"115";s:6:"биз";s:3:"116";s:6:"кел";s:3:"117";s:6:"кте";s:3:"118";s:6:"тал";s:3:"119";s:5:" не";s:3:"120";s:5:" Ñу";s:3:"121";s:6:"акы";s:3:"122";s:6:"ент";s:3:"123";s:6:"инд";s:3:"124";s:5:"ир ";s:3:"125";s:6:"кал";s:3:"126";s:5:"н д";s:3:"127";s:6:"нде";s:3:"128";s:6:"ого";s:3:"129";s:6:"онд";s:3:"130";s:6:"оюн";s:3:"131";s:5:"Ñ€ б";s:3:"132";s:5:"Ñ€ м";s:3:"133";s:6:"ран";s:3:"134";s:6:"Ñал";s:3:"135";s:6:"Ñта";s:3:"136";s:5:"ÑÑ‹ ";s:3:"137";s:6:"ура";s:3:"138";s:6:"ыгы";s:3:"139";s:5:" аш";s:3:"140";s:5:" ми";s:3:"141";s:5:" ÑÑ‹";s:3:"142";s:5:" ту";s:3:"143";s:5:"ал ";s:3:"144";s:6:"арт";s:3:"145";s:6:"бор";s:3:"146";s:6:"елг";s:3:"147";s:6:"ени";s:3:"148";s:5:"ет ";s:3:"149";s:6:"жат";s:3:"150";s:6:"йло";s:3:"151";s:6:"кар";s:3:"152";s:5:"н м";s:3:"153";s:6:"огу";s:3:"154";s:5:"п а";s:3:"155";s:5:"п ж";s:3:"156";s:5:"Ñ€ Ñ";s:3:"157";s:6:"Ñын";s:3:"158";s:5:"ык ";s:3:"159";s:6:"юнч";s:3:"160";s:5:" бу";s:3:"161";s:5:" ур";s:3:"162";s:5:"а а";s:3:"163";s:5:"ак ";s:3:"164";s:6:"алд";s:3:"165";s:6:"алу";s:3:"166";s:6:"бар";s:3:"167";s:6:"бер";s:3:"168";s:6:"бою";s:3:"169";s:5:"ге ";s:3:"170";s:6:"дон";s:3:"171";s:6:"еги";s:3:"172";s:6:"ект";s:3:"173";s:6:"ефт";s:3:"174";s:5:"из ";s:3:"175";s:6:"кат";s:3:"176";s:6:"лды";s:3:"177";s:5:"н ч";s:3:"178";s:5:"н Ñ";s:3:"179";s:5:"н Ó©";s:3:"180";s:6:"ндо";s:3:"181";s:6:"неф";s:3:"182";s:5:"он ";s:3:"183";s:6:"Ñат";s:3:"184";s:6:"тор";s:3:"185";s:5:"Ñ‚Ñ‹ ";s:3:"186";s:6:"уда";s:3:"187";s:5:"ул ";s:3:"188";s:6:"ула";s:3:"189";s:6:"ууд";s:3:"190";s:5:"Ñ‹ б";s:3:"191";s:5:"Ñ‹ ж";s:3:"192";s:5:"Ñ‹ к";s:3:"193";s:5:"ыл ";s:3:"194";s:6:"ына";s:3:"195";s:6:"Ñке";s:3:"196";s:6:"ÑÑÑ‹";s:3:"197";s:5:" ат";s:3:"198";s:5:" до";s:3:"199";s:5:" жы";s:3:"200";s:5:" Ñо";s:3:"201";s:5:" чы";s:3:"202";s:6:"ааÑ";s:3:"203";s:6:"айт";s:3:"204";s:6:"аÑÑ‚";s:3:"205";s:6:"баа";s:3:"206";s:6:"баш";s:3:"207";s:6:"гар";s:3:"208";s:6:"гын";s:3:"209";s:5:"дө ";s:3:"210";s:5:"е б";s:3:"211";s:5:"ек ";s:3:"212";s:6:"жыл";s:3:"213";s:5:"и б";s:3:"214";s:5:"ик ";s:3:"215";s:6:"иÑÑ";s:3:"216";s:6:"кыз";s:3:"217";s:6:"лда";s:3:"218";s:6:"лык";s:3:"219";s:6:"мда";s:3:"220";s:5:"н ж";s:3:"221";s:6:"нди";s:3:"222";s:5:"ни ";s:3:"223";s:6:"нин";s:3:"224";s:6:"орд";s:3:"225";s:6:"рдо";s:3:"226";s:6:"Ñто";s:3:"227";s:5:"та ";s:3:"228";s:6:"тер";s:3:"229";s:6:"тти";s:3:"230";s:6:"тур";s:3:"231";s:6:"тын";s:3:"232";s:5:"уп ";s:3:"233";s:6:"ушу";s:3:"234";s:6:"фти";s:3:"235";s:6:"ыкт";s:3:"236";s:5:"үп ";s:3:"237";s:5:"өн ";s:3:"238";s:5:" ай";s:3:"239";s:5:" бү";s:3:"240";s:5:" ич";s:3:"241";s:5:" иш";s:3:"242";s:5:" мо";s:3:"243";s:5:" пр";s:3:"244";s:5:" ре";s:3:"245";s:5:" өк";s:3:"246";s:5:" Ó©Ñ‚";s:3:"247";s:5:"а д";s:3:"248";s:5:"а у";s:3:"249";s:5:"а Ñ";s:3:"250";s:6:"айм";s:3:"251";s:6:"амд";s:3:"252";s:6:"атт";s:3:"253";s:6:"бек";s:3:"254";s:6:"бул";s:3:"255";s:6:"гол";s:3:"256";s:6:"дег";s:3:"257";s:6:"еге";s:3:"258";s:6:"ейт";s:3:"259";s:6:"еле";s:3:"260";s:6:"енд";s:3:"261";s:6:"жак";s:3:"262";s:5:"и к";s:3:"263";s:6:"ини";s:3:"264";s:6:"ири";s:3:"265";s:6:"йма";s:3:"266";s:6:"кто";s:3:"267";s:6:"лик";s:3:"268";s:6:"мак";s:3:"269";s:6:"меÑ";s:3:"270";s:5:"н у";s:3:"271";s:5:"н ш";s:3:"272";s:6:"нтт";s:3:"273";s:5:"ол ";s:3:"274";s:6:"оло";s:3:"275";s:6:"пар";s:3:"276";s:6:"рак";s:3:"277";s:6:"Ñ€Ò¯Ò¯";s:3:"278";s:6:"ÑÑ‹Ñ€";s:3:"279";s:5:"ти ";s:3:"280";s:6:"тик";s:3:"281";s:6:"тта";s:3:"282";s:6:"Ñ‚Ó©Ñ€";s:3:"283";s:5:"у ж";s:3:"284";s:5:"у Ñ";s:3:"285";s:6:"шка";s:3:"286";s:5:"Ñ‹ м";s:3:"287";s:6:"ызы";s:3:"288";s:6:"ылд";s:3:"289";s:6:"Ñме";s:3:"290";s:6:"үрү";s:3:"291";s:6:"өлү";s:3:"292";s:6:"Ó©Ñ‚Ó©";s:3:"293";s:5:" же";s:3:"294";s:5:" Ñ‚Ò¯";s:3:"295";s:5:" Ñл";s:3:"296";s:5:" өн";s:3:"297";s:5:"а ж";s:3:"298";s:6:"ады";s:3:"299";}s:5:"latin";a:300:{s:3:"um ";s:1:"0";s:3:"us ";s:1:"1";s:3:"ut ";s:1:"2";s:3:"et ";s:1:"3";s:3:"is ";s:1:"4";s:3:" et";s:1:"5";s:3:" in";s:1:"6";s:3:" qu";s:1:"7";s:3:"tur";s:1:"8";s:3:" pr";s:1:"9";s:3:"est";s:2:"10";s:3:"tio";s:2:"11";s:3:" au";s:2:"12";s:3:"am ";s:2:"13";s:3:"em ";s:2:"14";s:3:"aut";s:2:"15";s:3:" di";s:2:"16";s:3:"ent";s:2:"17";s:3:"in ";s:2:"18";s:3:"dic";s:2:"19";s:3:"t e";s:2:"20";s:3:" es";s:2:"21";s:3:"ur ";s:2:"22";s:3:"ati";s:2:"23";s:3:"ion";s:2:"24";s:3:"st ";s:2:"25";s:3:" ut";s:2:"26";s:3:"ae ";s:2:"27";s:3:"qua";s:2:"28";s:3:" de";s:2:"29";s:3:"nt ";s:2:"30";s:3:" su";s:2:"31";s:3:" si";s:2:"32";s:3:"itu";s:2:"33";s:3:"unt";s:2:"34";s:3:"rum";s:2:"35";s:3:"ia ";s:2:"36";s:3:"es ";s:2:"37";s:3:"ter";s:2:"38";s:3:" re";s:2:"39";s:3:"nti";s:2:"40";s:3:"rae";s:2:"41";s:3:"s e";s:2:"42";s:3:"qui";s:2:"43";s:3:"io ";s:2:"44";s:3:"pro";s:2:"45";s:3:"it ";s:2:"46";s:3:"per";s:2:"47";s:3:"ita";s:2:"48";s:3:"one";s:2:"49";s:3:"ici";s:2:"50";s:3:"ius";s:2:"51";s:3:" co";s:2:"52";s:3:"t d";s:2:"53";s:3:"bus";s:2:"54";s:3:"pra";s:2:"55";s:3:"m e";s:2:"56";s:3:" no";s:2:"57";s:3:"edi";s:2:"58";s:3:"tia";s:2:"59";s:3:"ue ";s:2:"60";s:3:"ibu";s:2:"61";s:3:" se";s:2:"62";s:3:" ad";s:2:"63";s:3:"er ";s:2:"64";s:3:" fi";s:2:"65";s:3:"ili";s:2:"66";s:3:"que";s:2:"67";s:3:"t i";s:2:"68";s:3:"de ";s:2:"69";s:3:"oru";s:2:"70";s:3:" te";s:2:"71";s:3:"ali";s:2:"72";s:3:" pe";s:2:"73";s:3:"aed";s:2:"74";s:3:"cit";s:2:"75";s:3:"m d";s:2:"76";s:3:"t s";s:2:"77";s:3:"tat";s:2:"78";s:3:"tem";s:2:"79";s:3:"tis";s:2:"80";s:3:"t p";s:2:"81";s:3:"sti";s:2:"82";s:3:"te ";s:2:"83";s:3:"cum";s:2:"84";s:3:"ere";s:2:"85";s:3:"ium";s:2:"86";s:3:" ex";s:2:"87";s:3:"rat";s:2:"88";s:3:"ta ";s:2:"89";s:3:"con";s:2:"90";s:3:"cti";s:2:"91";s:3:"oni";s:2:"92";s:3:"ra ";s:2:"93";s:3:"s i";s:2:"94";s:3:" cu";s:2:"95";s:3:" sa";s:2:"96";s:3:"eni";s:2:"97";s:3:"nis";s:2:"98";s:3:"nte";s:2:"99";s:3:"eri";s:3:"100";s:3:"omi";s:3:"101";s:3:"re ";s:3:"102";s:3:"s a";s:3:"103";s:3:"min";s:3:"104";s:3:"os ";s:3:"105";s:3:"ti ";s:3:"106";s:3:"uer";s:3:"107";s:3:" ma";s:3:"108";s:3:" ue";s:3:"109";s:3:"m s";s:3:"110";s:3:"nem";s:3:"111";s:3:"t m";s:3:"112";s:3:" mo";s:3:"113";s:3:" po";s:3:"114";s:3:" ui";s:3:"115";s:3:"gen";s:3:"116";s:3:"ict";s:3:"117";s:3:"m i";s:3:"118";s:3:"ris";s:3:"119";s:3:"s s";s:3:"120";s:3:"t a";s:3:"121";s:3:"uae";s:3:"122";s:3:" do";s:3:"123";s:3:"m a";s:3:"124";s:3:"t c";s:3:"125";s:3:" ge";s:3:"126";s:3:"as ";s:3:"127";s:3:"e i";s:3:"128";s:3:"e p";s:3:"129";s:3:"ne ";s:3:"130";s:3:" ca";s:3:"131";s:3:"ine";s:3:"132";s:3:"quo";s:3:"133";s:3:"s p";s:3:"134";s:3:" al";s:3:"135";s:3:"e e";s:3:"136";s:3:"ntu";s:3:"137";s:3:"ro ";s:3:"138";s:3:"tri";s:3:"139";s:3:"tus";s:3:"140";s:3:"uit";s:3:"141";s:3:"atu";s:3:"142";s:3:"ini";s:3:"143";s:3:"iqu";s:3:"144";s:3:"m p";s:3:"145";s:3:"ost";s:3:"146";s:3:"res";s:3:"147";s:3:"ura";s:3:"148";s:3:" ac";s:3:"149";s:3:" fu";s:3:"150";s:3:"a e";s:3:"151";s:3:"ant";s:3:"152";s:3:"nes";s:3:"153";s:3:"nim";s:3:"154";s:3:"sun";s:3:"155";s:3:"tra";s:3:"156";s:3:"e a";s:3:"157";s:3:"s d";s:3:"158";s:3:" pa";s:3:"159";s:3:" uo";s:3:"160";s:3:"ecu";s:3:"161";s:3:" om";s:3:"162";s:3:" tu";s:3:"163";s:3:"ad ";s:3:"164";s:3:"cut";s:3:"165";s:3:"omn";s:3:"166";s:3:"s q";s:3:"167";s:3:" ei";s:3:"168";s:3:"ex ";s:3:"169";s:3:"icu";s:3:"170";s:3:"tor";s:3:"171";s:3:"uid";s:3:"172";s:3:" ip";s:3:"173";s:3:" me";s:3:"174";s:3:"e s";s:3:"175";s:3:"era";s:3:"176";s:3:"eru";s:3:"177";s:3:"iam";s:3:"178";s:3:"ide";s:3:"179";s:3:"ips";s:3:"180";s:3:" iu";s:3:"181";s:3:"a s";s:3:"182";s:3:"do ";s:3:"183";s:3:"e d";s:3:"184";s:3:"eiu";s:3:"185";s:3:"ica";s:3:"186";s:3:"im ";s:3:"187";s:3:"m c";s:3:"188";s:3:"m u";s:3:"189";s:3:"tiu";s:3:"190";s:3:" ho";s:3:"191";s:3:"cat";s:3:"192";s:3:"ist";s:3:"193";s:3:"nat";s:3:"194";s:3:"on ";s:3:"195";s:3:"pti";s:3:"196";s:3:"reg";s:3:"197";s:3:"rit";s:3:"198";s:3:"s t";s:3:"199";s:3:"sic";s:3:"200";s:3:"spe";s:3:"201";s:3:" en";s:3:"202";s:3:" sp";s:3:"203";s:3:"dis";s:3:"204";s:3:"eli";s:3:"205";s:3:"liq";s:3:"206";s:3:"lis";s:3:"207";s:3:"men";s:3:"208";s:3:"mus";s:3:"209";s:3:"num";s:3:"210";s:3:"pos";s:3:"211";s:3:"sio";s:3:"212";s:3:" an";s:3:"213";s:3:" gr";s:3:"214";s:3:"abi";s:3:"215";s:3:"acc";s:3:"216";s:3:"ect";s:3:"217";s:3:"ri ";s:3:"218";s:3:"uan";s:3:"219";s:3:" le";s:3:"220";s:3:"ecc";s:3:"221";s:3:"ete";s:3:"222";s:3:"gra";s:3:"223";s:3:"non";s:3:"224";s:3:"se ";s:3:"225";s:3:"uen";s:3:"226";s:3:"uis";s:3:"227";s:3:" fa";s:3:"228";s:3:" tr";s:3:"229";s:3:"ate";s:3:"230";s:3:"e c";s:3:"231";s:3:"fil";s:3:"232";s:3:"na ";s:3:"233";s:3:"ni ";s:3:"234";s:3:"pul";s:3:"235";s:3:"s f";s:3:"236";s:3:"ui ";s:3:"237";s:3:"at ";s:3:"238";s:3:"cce";s:3:"239";s:3:"dam";s:3:"240";s:3:"i e";s:3:"241";s:3:"ina";s:3:"242";s:3:"leg";s:3:"243";s:3:"nos";s:3:"244";s:3:"ori";s:3:"245";s:3:"pec";s:3:"246";s:3:"rop";s:3:"247";s:3:"sta";s:3:"248";s:3:"uia";s:3:"249";s:3:"ene";s:3:"250";s:3:"iue";s:3:"251";s:3:"iui";s:3:"252";s:3:"siu";s:3:"253";s:3:"t t";s:3:"254";s:3:"t u";s:3:"255";s:3:"tib";s:3:"256";s:3:"tit";s:3:"257";s:3:" da";s:3:"258";s:3:" ne";s:3:"259";s:3:"a d";s:3:"260";s:3:"and";s:3:"261";s:3:"ege";s:3:"262";s:3:"equ";s:3:"263";s:3:"hom";s:3:"264";s:3:"imu";s:3:"265";s:3:"lor";s:3:"266";s:3:"m m";s:3:"267";s:3:"mni";s:3:"268";s:3:"ndo";s:3:"269";s:3:"ner";s:3:"270";s:3:"o e";s:3:"271";s:3:"r e";s:3:"272";s:3:"sit";s:3:"273";s:3:"tum";s:3:"274";s:3:"utu";s:3:"275";s:3:"a p";s:3:"276";s:3:"bis";s:3:"277";s:3:"bit";s:3:"278";s:3:"cer";s:3:"279";s:3:"cta";s:3:"280";s:3:"dom";s:3:"281";s:3:"fut";s:3:"282";s:3:"i s";s:3:"283";s:3:"ign";s:3:"284";s:3:"int";s:3:"285";s:3:"mod";s:3:"286";s:3:"ndu";s:3:"287";s:3:"nit";s:3:"288";s:3:"rib";s:3:"289";s:3:"rti";s:3:"290";s:3:"tas";s:3:"291";s:3:"und";s:3:"292";s:3:" ab";s:3:"293";s:3:"err";s:3:"294";s:3:"ers";s:3:"295";s:3:"ite";s:3:"296";s:3:"iti";s:3:"297";s:3:"m t";s:3:"298";s:3:"o p";s:3:"299";}s:7:"latvian";a:300:{s:3:"as ";s:1:"0";s:3:" la";s:1:"1";s:3:" pa";s:1:"2";s:3:" ne";s:1:"3";s:3:"es ";s:1:"4";s:3:" un";s:1:"5";s:3:"un ";s:1:"6";s:3:" ka";s:1:"7";s:3:" va";s:1:"8";s:3:"ar ";s:1:"9";s:3:"s p";s:2:"10";s:3:" ar";s:2:"11";s:3:" vi";s:2:"12";s:3:"is ";s:2:"13";s:3:"ai ";s:2:"14";s:3:" no";s:2:"15";s:3:"ja ";s:2:"16";s:3:"ija";s:2:"17";s:3:"iem";s:2:"18";s:3:"em ";s:2:"19";s:3:"tu ";s:2:"20";s:3:"tie";s:2:"21";s:3:"vie";s:2:"22";s:3:"lat";s:2:"23";s:3:"aks";s:2:"24";s:3:"ien";s:2:"25";s:3:"kst";s:2:"26";s:3:"ies";s:2:"27";s:3:"s a";s:2:"28";s:3:"rak";s:2:"29";s:3:"atv";s:2:"30";s:3:"tvi";s:2:"31";s:3:" ja";s:2:"32";s:3:" pi";s:2:"33";s:3:"ka ";s:2:"34";s:3:" ir";s:2:"35";s:3:"ir ";s:2:"36";s:3:"ta ";s:2:"37";s:3:" sa";s:2:"38";s:3:"ts ";s:2:"39";s:4:" kÄ";s:2:"40";s:4:"Äs ";s:2:"41";s:3:" ti";s:2:"42";s:3:"ot ";s:2:"43";s:3:"s n";s:2:"44";s:3:" ie";s:2:"45";s:3:" ta";s:2:"46";s:4:"arÄ«";s:2:"47";s:3:"par";s:2:"48";s:3:"pie";s:2:"49";s:3:" pr";s:2:"50";s:4:"kÄ ";s:2:"51";s:3:" at";s:2:"52";s:3:" ra";s:2:"53";s:3:"am ";s:2:"54";s:4:"inÄ";s:2:"55";s:4:"tÄ ";s:2:"56";s:3:" iz";s:2:"57";s:3:"jas";s:2:"58";s:3:"lai";s:2:"59";s:3:" na";s:2:"60";s:3:"aut";s:2:"61";s:4:"ieÅ¡";s:2:"62";s:3:"s s";s:2:"63";s:3:" ap";s:2:"64";s:3:" ko";s:2:"65";s:3:" st";s:2:"66";s:3:"iek";s:2:"67";s:3:"iet";s:2:"68";s:3:"jau";s:2:"69";s:3:"us ";s:2:"70";s:4:"rÄ« ";s:2:"71";s:3:"tik";s:2:"72";s:4:"Ä«ba";s:2:"73";s:3:"na ";s:2:"74";s:3:" ga";s:2:"75";s:3:"cij";s:2:"76";s:3:"s i";s:2:"77";s:3:" uz";s:2:"78";s:3:"jum";s:2:"79";s:3:"s v";s:2:"80";s:3:"ms ";s:2:"81";s:3:"var";s:2:"82";s:3:" ku";s:2:"83";s:3:" ma";s:2:"84";s:4:"jÄ ";s:2:"85";s:3:"sta";s:2:"86";s:3:"s u";s:2:"87";s:4:" tÄ";s:2:"88";s:3:"die";s:2:"89";s:3:"kai";s:2:"90";s:3:"kas";s:2:"91";s:3:"ska";s:2:"92";s:3:" ci";s:2:"93";s:3:" da";s:2:"94";s:3:"kur";s:2:"95";s:3:"lie";s:2:"96";s:3:"tas";s:2:"97";s:3:"a p";s:2:"98";s:3:"est";s:2:"99";s:4:"stÄ";s:3:"100";s:4:"Å¡an";s:3:"101";s:3:"nes";s:3:"102";s:3:"nie";s:3:"103";s:3:"s d";s:3:"104";s:3:"s m";s:3:"105";s:3:"val";s:3:"106";s:3:" di";s:3:"107";s:3:" es";s:3:"108";s:3:" re";s:3:"109";s:3:"no ";s:3:"110";s:3:"to ";s:3:"111";s:3:"umu";s:3:"112";s:3:"vai";s:3:"113";s:4:"Å¡i ";s:3:"114";s:4:" vÄ“";s:3:"115";s:3:"kum";s:3:"116";s:3:"nu ";s:3:"117";s:3:"rie";s:3:"118";s:3:"s t";s:3:"119";s:4:"Äm ";s:3:"120";s:3:"ad ";s:3:"121";s:3:"et ";s:3:"122";s:3:"mu ";s:3:"123";s:3:"s l";s:3:"124";s:3:" be";s:3:"125";s:3:"aud";s:3:"126";s:3:"tur";s:3:"127";s:3:"vij";s:3:"128";s:4:"viņ";s:3:"129";s:4:"Äju";s:3:"130";s:3:"bas";s:3:"131";s:3:"gad";s:3:"132";s:3:"i n";s:3:"133";s:3:"ika";s:3:"134";s:3:"os ";s:3:"135";s:3:"a v";s:3:"136";s:3:"not";s:3:"137";s:3:"oti";s:3:"138";s:3:"sts";s:3:"139";s:3:"aik";s:3:"140";s:3:"u a";s:3:"141";s:4:"Ä a";s:3:"142";s:4:"Äk ";s:3:"143";s:3:" to";s:3:"144";s:3:"ied";s:3:"145";s:3:"stu";s:3:"146";s:3:"ti ";s:3:"147";s:3:"u p";s:3:"148";s:4:"vÄ“l";s:3:"149";s:4:"Äci";s:3:"150";s:4:" Å¡o";s:3:"151";s:3:"gi ";s:3:"152";s:3:"ko ";s:3:"153";s:3:"pro";s:3:"154";s:3:"s r";s:3:"155";s:4:"tÄj";s:3:"156";s:3:"u s";s:3:"157";s:3:"u v";s:3:"158";s:3:"vis";s:3:"159";s:3:"aun";s:3:"160";s:3:"ks ";s:3:"161";s:3:"str";s:3:"162";s:3:"zin";s:3:"163";s:3:"a a";s:3:"164";s:4:"adÄ«";s:3:"165";s:3:"da ";s:3:"166";s:3:"dar";s:3:"167";s:3:"ena";s:3:"168";s:3:"ici";s:3:"169";s:3:"kra";s:3:"170";s:3:"nas";s:3:"171";s:4:"stÄ«";s:3:"172";s:4:"Å¡u ";s:3:"173";s:4:" mÄ“";s:3:"174";s:3:"a n";s:3:"175";s:3:"eci";s:3:"176";s:3:"i s";s:3:"177";s:3:"ie ";s:3:"178";s:4:"iņa";s:3:"179";s:3:"ju ";s:3:"180";s:3:"las";s:3:"181";s:3:"r t";s:3:"182";s:3:"ums";s:3:"183";s:4:"Å¡ie";s:3:"184";s:3:"bu ";s:3:"185";s:3:"cit";s:3:"186";s:3:"i a";s:3:"187";s:3:"ina";s:3:"188";s:3:"ma ";s:3:"189";s:3:"pus";s:3:"190";s:3:"ra ";s:3:"191";s:3:" au";s:3:"192";s:3:" se";s:3:"193";s:3:" sl";s:3:"194";s:3:"a s";s:3:"195";s:3:"ais";s:3:"196";s:4:"eÅ¡i";s:3:"197";s:3:"iec";s:3:"198";s:3:"iku";s:3:"199";s:4:"pÄr";s:3:"200";s:3:"s b";s:3:"201";s:3:"s k";s:3:"202";s:3:"sot";s:3:"203";s:5:"ÄdÄ";s:3:"204";s:3:" in";s:3:"205";s:3:" li";s:3:"206";s:3:" tr";s:3:"207";s:3:"ana";s:3:"208";s:3:"eso";s:3:"209";s:3:"ikr";s:3:"210";s:3:"man";s:3:"211";s:3:"ne ";s:3:"212";s:3:"u k";s:3:"213";s:3:" tu";s:3:"214";s:3:"an ";s:3:"215";s:3:"av ";s:3:"216";s:3:"bet";s:3:"217";s:4:"bÅ«t";s:3:"218";s:3:"im ";s:3:"219";s:3:"isk";s:3:"220";s:4:"lÄ«d";s:3:"221";s:3:"nav";s:3:"222";s:3:"ras";s:3:"223";s:3:"ri ";s:3:"224";s:3:"s g";s:3:"225";s:3:"sti";s:3:"226";s:4:"Ä«dz";s:3:"227";s:3:" ai";s:3:"228";s:3:"arb";s:3:"229";s:3:"cin";s:3:"230";s:3:"das";s:3:"231";s:3:"ent";s:3:"232";s:3:"gal";s:3:"233";s:3:"i p";s:3:"234";s:3:"lik";s:3:"235";s:4:"mÄ ";s:3:"236";s:3:"nek";s:3:"237";s:3:"pat";s:3:"238";s:4:"rÄ“t";s:3:"239";s:3:"si ";s:3:"240";s:3:"tra";s:3:"241";s:4:"uÅ¡i";s:3:"242";s:3:"vei";s:3:"243";s:3:" br";s:3:"244";s:3:" pu";s:3:"245";s:3:" sk";s:3:"246";s:3:"als";s:3:"247";s:3:"ama";s:3:"248";s:3:"edz";s:3:"249";s:3:"eka";s:3:"250";s:4:"eÅ¡u";s:3:"251";s:3:"ieg";s:3:"252";s:3:"jis";s:3:"253";s:3:"kam";s:3:"254";s:3:"lst";s:3:"255";s:4:"nÄk";s:3:"256";s:3:"oli";s:3:"257";s:3:"pre";s:3:"258";s:4:"pÄ“c";s:3:"259";s:3:"rot";s:3:"260";s:4:"tÄs";s:3:"261";s:3:"usi";s:3:"262";s:4:"Ä“l ";s:3:"263";s:4:"Ä“s ";s:3:"264";s:3:" bi";s:3:"265";s:3:" de";s:3:"266";s:3:" me";s:3:"267";s:4:" pÄ";s:3:"268";s:3:"a i";s:3:"269";s:3:"aid";s:3:"270";s:4:"ajÄ";s:3:"271";s:3:"ikt";s:3:"272";s:3:"kat";s:3:"273";s:3:"lic";s:3:"274";s:3:"lod";s:3:"275";s:3:"mi ";s:3:"276";s:3:"ni ";s:3:"277";s:3:"pri";s:3:"278";s:4:"rÄd";s:3:"279";s:4:"rÄ«g";s:3:"280";s:3:"sim";s:3:"281";s:4:"trÄ";s:3:"282";s:3:"u l";s:3:"283";s:3:"uto";s:3:"284";s:3:"uz ";s:3:"285";s:4:"Ä“c ";s:3:"286";s:5:"Ä«tÄ";s:3:"287";s:3:" ce";s:3:"288";s:4:" jÄ";s:3:"289";s:3:" sv";s:3:"290";s:3:"a t";s:3:"291";s:3:"aga";s:3:"292";s:3:"aiz";s:3:"293";s:3:"atu";s:3:"294";s:3:"ba ";s:3:"295";s:3:"cie";s:3:"296";s:3:"du ";s:3:"297";s:3:"dzi";s:3:"298";s:4:"dzÄ«";s:3:"299";}s:10:"lithuanian";a:300:{s:3:"as ";s:1:"0";s:3:" pa";s:1:"1";s:3:" ka";s:1:"2";s:3:"ai ";s:1:"3";s:3:"us ";s:1:"4";s:3:"os ";s:1:"5";s:3:"is ";s:1:"6";s:3:" ne";s:1:"7";s:3:" ir";s:1:"8";s:3:"ir ";s:1:"9";s:3:"ti ";s:2:"10";s:3:" pr";s:2:"11";s:3:"aus";s:2:"12";s:3:"ini";s:2:"13";s:3:"s p";s:2:"14";s:3:"pas";s:2:"15";s:4:"ių ";s:2:"16";s:3:" ta";s:2:"17";s:3:" vi";s:2:"18";s:3:"iau";s:2:"19";s:3:" ko";s:2:"20";s:3:" su";s:2:"21";s:3:"kai";s:2:"22";s:3:"o p";s:2:"23";s:3:"usi";s:2:"24";s:3:" sa";s:2:"25";s:3:"vo ";s:2:"26";s:3:"tai";s:2:"27";s:3:"ali";s:2:"28";s:4:"tų ";s:2:"29";s:3:"io ";s:2:"30";s:3:"jo ";s:2:"31";s:3:"s k";s:2:"32";s:3:"sta";s:2:"33";s:3:"iai";s:2:"34";s:3:" bu";s:2:"35";s:3:" nu";s:2:"36";s:3:"ius";s:2:"37";s:3:"mo ";s:2:"38";s:3:" po";s:2:"39";s:3:"ien";s:2:"40";s:3:"s s";s:2:"41";s:3:"tas";s:2:"42";s:3:" me";s:2:"43";s:3:"uvo";s:2:"44";s:3:"kad";s:2:"45";s:4:" iÅ¡";s:2:"46";s:3:" la";s:2:"47";s:3:"to ";s:2:"48";s:3:"ais";s:2:"49";s:3:"ie ";s:2:"50";s:3:"kur";s:2:"51";s:3:"uri";s:2:"52";s:3:" ku";s:2:"53";s:3:"ijo";s:2:"54";s:4:"Äia";s:2:"55";s:3:"au ";s:2:"56";s:3:"met";s:2:"57";s:3:"je ";s:2:"58";s:3:" va";s:2:"59";s:3:"ad ";s:2:"60";s:3:" ap";s:2:"61";s:3:"and";s:2:"62";s:3:" gr";s:2:"63";s:3:" ti";s:2:"64";s:3:"kal";s:2:"65";s:3:"asi";s:2:"66";s:3:"i p";s:2:"67";s:4:"iÄi";s:2:"68";s:3:"s i";s:2:"69";s:3:"s v";s:2:"70";s:3:"ink";s:2:"71";s:3:"o n";s:2:"72";s:4:"Ä—s ";s:2:"73";s:3:"buv";s:2:"74";s:3:"s a";s:2:"75";s:3:" ga";s:2:"76";s:3:"aip";s:2:"77";s:3:"avi";s:2:"78";s:3:"mas";s:2:"79";s:3:"pri";s:2:"80";s:3:"tik";s:2:"81";s:3:" re";s:2:"82";s:3:"etu";s:2:"83";s:3:"jos";s:2:"84";s:3:" da";s:2:"85";s:3:"ent";s:2:"86";s:3:"oli";s:2:"87";s:3:"par";s:2:"88";s:3:"ant";s:2:"89";s:3:"ara";s:2:"90";s:3:"tar";s:2:"91";s:3:"ama";s:2:"92";s:3:"gal";s:2:"93";s:3:"imo";s:2:"94";s:4:"iÅ¡k";s:2:"95";s:3:"o s";s:2:"96";s:3:" at";s:2:"97";s:3:" be";s:2:"98";s:4:" į ";s:2:"99";s:3:"min";s:3:"100";s:3:"tin";s:3:"101";s:3:" tu";s:3:"102";s:3:"s n";s:3:"103";s:3:" jo";s:3:"104";s:3:"dar";s:3:"105";s:3:"ip ";s:3:"106";s:3:"rei";s:3:"107";s:3:" te";s:3:"108";s:4:"dži";s:3:"109";s:3:"kas";s:3:"110";s:3:"nin";s:3:"111";s:3:"tei";s:3:"112";s:3:"vie";s:3:"113";s:3:" li";s:3:"114";s:3:" se";s:3:"115";s:3:"cij";s:3:"116";s:3:"gar";s:3:"117";s:3:"lai";s:3:"118";s:3:"art";s:3:"119";s:3:"lau";s:3:"120";s:3:"ras";s:3:"121";s:3:"no ";s:3:"122";s:3:"o k";s:3:"123";s:4:"tÄ… ";s:3:"124";s:3:" ar";s:3:"125";s:4:"Ä—jo";s:3:"126";s:4:"viÄ";s:3:"127";s:3:"iga";s:3:"128";s:3:"pra";s:3:"129";s:3:"vis";s:3:"130";s:3:" na";s:3:"131";s:3:"men";s:3:"132";s:3:"oki";s:3:"133";s:4:"raÅ¡";s:3:"134";s:3:"s t";s:3:"135";s:3:"iet";s:3:"136";s:3:"ika";s:3:"137";s:3:"int";s:3:"138";s:3:"kom";s:3:"139";s:3:"tam";s:3:"140";s:3:"aug";s:3:"141";s:3:"avo";s:3:"142";s:3:"rie";s:3:"143";s:3:"s b";s:3:"144";s:3:" st";s:3:"145";s:3:"eim";s:3:"146";s:3:"ko ";s:3:"147";s:3:"nus";s:3:"148";s:3:"pol";s:3:"149";s:3:"ria";s:3:"150";s:3:"sau";s:3:"151";s:3:"api";s:3:"152";s:3:"me ";s:3:"153";s:3:"ne ";s:3:"154";s:3:"sik";s:3:"155";s:4:" Å¡i";s:3:"156";s:3:"i n";s:3:"157";s:3:"ia ";s:3:"158";s:3:"ici";s:3:"159";s:3:"oja";s:3:"160";s:3:"sak";s:3:"161";s:3:"sti";s:3:"162";s:3:"ui ";s:3:"163";s:3:"ame";s:3:"164";s:3:"lie";s:3:"165";s:3:"o t";s:3:"166";s:3:"pie";s:3:"167";s:4:"Äiu";s:3:"168";s:3:" di";s:3:"169";s:3:" pe";s:3:"170";s:3:"gri";s:3:"171";s:3:"ios";s:3:"172";s:3:"lia";s:3:"173";s:3:"lin";s:3:"174";s:3:"s d";s:3:"175";s:3:"s g";s:3:"176";s:3:"ta ";s:3:"177";s:3:"uot";s:3:"178";s:3:" ja";s:3:"179";s:4:" už";s:3:"180";s:3:"aut";s:3:"181";s:3:"i s";s:3:"182";s:3:"ino";s:3:"183";s:4:"mÄ… ";s:3:"184";s:3:"oje";s:3:"185";s:3:"rav";s:3:"186";s:4:"dÄ—l";s:3:"187";s:3:"nti";s:3:"188";s:3:"o a";s:3:"189";s:3:"toj";s:3:"190";s:4:"Ä—l ";s:3:"191";s:3:" to";s:3:"192";s:3:" vy";s:3:"193";s:3:"ar ";s:3:"194";s:3:"ina";s:3:"195";s:3:"lic";s:3:"196";s:3:"o v";s:3:"197";s:3:"sei";s:3:"198";s:3:"su ";s:3:"199";s:3:" mi";s:3:"200";s:3:" pi";s:3:"201";s:3:"din";s:3:"202";s:4:"iÅ¡ ";s:3:"203";s:3:"lan";s:3:"204";s:3:"si ";s:3:"205";s:3:"tus";s:3:"206";s:3:" ba";s:3:"207";s:3:"asa";s:3:"208";s:3:"ata";s:3:"209";s:3:"kla";s:3:"210";s:3:"omi";s:3:"211";s:3:"tat";s:3:"212";s:3:" an";s:3:"213";s:3:" ji";s:3:"214";s:3:"als";s:3:"215";s:3:"ena";s:3:"216";s:4:"jų ";s:3:"217";s:3:"nuo";s:3:"218";s:3:"per";s:3:"219";s:3:"rig";s:3:"220";s:3:"s m";s:3:"221";s:3:"val";s:3:"222";s:3:"yta";s:3:"223";s:4:"Äio";s:3:"224";s:3:" ra";s:3:"225";s:3:"i k";s:3:"226";s:3:"lik";s:3:"227";s:3:"net";s:3:"228";s:4:"nÄ— ";s:3:"229";s:3:"tis";s:3:"230";s:3:"tuo";s:3:"231";s:3:"yti";s:3:"232";s:4:"Ä™s ";s:3:"233";s:4:"ų s";s:3:"234";s:3:"ada";s:3:"235";s:3:"ari";s:3:"236";s:3:"do ";s:3:"237";s:3:"eik";s:3:"238";s:3:"eis";s:3:"239";s:3:"ist";s:3:"240";s:3:"lst";s:3:"241";s:3:"ma ";s:3:"242";s:3:"nes";s:3:"243";s:3:"sav";s:3:"244";s:3:"sio";s:3:"245";s:3:"tau";s:3:"246";s:3:" ki";s:3:"247";s:3:"aik";s:3:"248";s:3:"aud";s:3:"249";s:3:"ies";s:3:"250";s:3:"ori";s:3:"251";s:3:"s r";s:3:"252";s:3:"ska";s:3:"253";s:3:" ge";s:3:"254";s:3:"ast";s:3:"255";s:3:"eig";s:3:"256";s:3:"et ";s:3:"257";s:3:"iam";s:3:"258";s:3:"isa";s:3:"259";s:3:"mis";s:3:"260";s:3:"nam";s:3:"261";s:3:"ome";s:3:"262";s:4:"žia";s:3:"263";s:3:"aba";s:3:"264";s:3:"aul";s:3:"265";s:3:"ikr";s:3:"266";s:4:"kÄ… ";s:3:"267";s:3:"nta";s:3:"268";s:3:"ra ";s:3:"269";s:3:"tur";s:3:"270";s:3:" ma";s:3:"271";s:3:"die";s:3:"272";s:3:"ei ";s:3:"273";s:3:"i t";s:3:"274";s:3:"nas";s:3:"275";s:3:"rin";s:3:"276";s:3:"sto";s:3:"277";s:3:"tie";s:3:"278";s:3:"tuv";s:3:"279";s:3:"vos";s:3:"280";s:4:"ų p";s:3:"281";s:4:" dÄ—";s:3:"282";s:3:"are";s:3:"283";s:3:"ats";s:3:"284";s:4:"enÄ—";s:3:"285";s:3:"ili";s:3:"286";s:3:"ima";s:3:"287";s:3:"kar";s:3:"288";s:3:"ms ";s:3:"289";s:3:"nia";s:3:"290";s:3:"r p";s:3:"291";s:3:"rod";s:3:"292";s:3:"s l";s:3:"293";s:3:" o ";s:3:"294";s:3:"e p";s:3:"295";s:3:"es ";s:3:"296";s:3:"ide";s:3:"297";s:3:"ik ";s:3:"298";s:3:"ja ";s:3:"299";}s:10:"macedonian";a:300:{s:5:"на ";s:1:"0";s:5:" на";s:1:"1";s:5:"та ";s:1:"2";s:6:"ата";s:1:"3";s:6:"ија";s:1:"4";s:5:" пр";s:1:"5";s:5:"то ";s:1:"6";s:5:"ја ";s:1:"7";s:5:" за";s:1:"8";s:5:"а н";s:1:"9";s:4:" и ";s:2:"10";s:5:"а Ñ";s:2:"11";s:5:"те ";s:2:"12";s:6:"ите";s:2:"13";s:5:" ко";s:2:"14";s:5:"от ";s:2:"15";s:5:" де";s:2:"16";s:5:" по";s:2:"17";s:5:"а д";s:2:"18";s:5:"во ";s:2:"19";s:5:"за ";s:2:"20";s:5:" во";s:2:"21";s:5:" од";s:2:"22";s:5:" Ñе";s:2:"23";s:5:" не";s:2:"24";s:5:"Ñе ";s:2:"25";s:5:" до";s:2:"26";s:5:"а в";s:2:"27";s:5:"ка ";s:2:"28";s:6:"ање";s:2:"29";s:5:"а п";s:2:"30";s:5:"о п";s:2:"31";s:6:"ува";s:2:"32";s:6:"циј";s:2:"33";s:5:"а о";s:2:"34";s:6:"ици";s:2:"35";s:6:"ето";s:2:"36";s:5:"о н";s:2:"37";s:6:"ани";s:2:"38";s:5:"ни ";s:2:"39";s:5:" вл";s:2:"40";s:6:"дек";s:2:"41";s:6:"ека";s:2:"42";s:6:"њет";s:2:"43";s:5:"ќе ";s:2:"44";s:4:" е ";s:2:"45";s:5:"а з";s:2:"46";s:5:"а и";s:2:"47";s:5:"ат ";s:2:"48";s:6:"вла";s:2:"49";s:5:"го ";s:2:"50";s:5:"е н";s:2:"51";s:5:"од ";s:2:"52";s:6:"пре";s:2:"53";s:5:" го";s:2:"54";s:5:" да";s:2:"55";s:5:" ма";s:2:"56";s:5:" ре";s:2:"57";s:5:" ќе";s:2:"58";s:6:"али";s:2:"59";s:5:"и д";s:2:"60";s:5:"и н";s:2:"61";s:6:"иот";s:2:"62";s:6:"нат";s:2:"63";s:6:"ово";s:2:"64";s:5:" па";s:2:"65";s:5:" ра";s:2:"66";s:5:" Ñо";s:2:"67";s:6:"ове";s:2:"68";s:6:"пра";s:2:"69";s:6:"што";s:2:"70";s:5:"ње ";s:2:"71";s:5:"а е";s:2:"72";s:5:"да ";s:2:"73";s:6:"дат";s:2:"74";s:6:"дон";s:2:"75";s:5:"е в";s:2:"76";s:5:"е д";s:2:"77";s:5:"е з";s:2:"78";s:5:"е Ñ";s:2:"79";s:6:"кон";s:2:"80";s:6:"нит";s:2:"81";s:5:"но ";s:2:"82";s:6:"они";s:2:"83";s:6:"ото";s:2:"84";s:6:"пар";s:2:"85";s:6:"при";s:2:"86";s:6:"Ñта";s:2:"87";s:5:"Ñ‚ н";s:2:"88";s:5:" шт";s:2:"89";s:5:"а к";s:2:"90";s:6:"аци";s:2:"91";s:5:"ва ";s:2:"92";s:6:"вањ";s:2:"93";s:5:"е п";s:2:"94";s:6:"ени";s:2:"95";s:5:"ла ";s:2:"96";s:6:"лад";s:2:"97";s:6:"мак";s:2:"98";s:6:"неÑ";s:2:"99";s:6:"ноÑ";s:3:"100";s:6:"про";s:3:"101";s:6:"рен";s:3:"102";s:6:"јат";s:3:"103";s:5:" ин";s:3:"104";s:5:" ме";s:3:"105";s:5:" то";s:3:"106";s:5:"а г";s:3:"107";s:5:"а м";s:3:"108";s:5:"а Ñ€";s:3:"109";s:6:"аке";s:3:"110";s:6:"ако";s:3:"111";s:6:"вор";s:3:"112";s:6:"гов";s:3:"113";s:6:"едо";s:3:"114";s:6:"ена";s:3:"115";s:5:"и и";s:3:"116";s:6:"ира";s:3:"117";s:6:"кед";s:3:"118";s:5:"не ";s:3:"119";s:6:"ниц";s:3:"120";s:6:"ниј";s:3:"121";s:6:"оÑÑ‚";s:3:"122";s:5:"ра ";s:3:"123";s:6:"рат";s:3:"124";s:6:"ред";s:3:"125";s:6:"Ñка";s:3:"126";s:6:"тен";s:3:"127";s:5:" ка";s:3:"128";s:5:" Ñп";s:3:"129";s:5:" ја";s:3:"130";s:5:"а Ñ‚";s:3:"131";s:6:"аде";s:3:"132";s:6:"арт";s:3:"133";s:5:"е г";s:3:"134";s:5:"е и";s:3:"135";s:6:"кат";s:3:"136";s:6:"лаÑ";s:3:"137";s:6:"нио";s:3:"138";s:5:"о Ñ";s:3:"139";s:5:"ри ";s:3:"140";s:5:" ба";s:3:"141";s:5:" би";s:3:"142";s:6:"ава";s:3:"143";s:6:"ате";s:3:"144";s:6:"вни";s:3:"145";s:5:"д н";s:3:"146";s:6:"ден";s:3:"147";s:6:"дов";s:3:"148";s:6:"држ";s:3:"149";s:6:"дув";s:3:"150";s:5:"е о";s:3:"151";s:5:"ен ";s:3:"152";s:6:"ере";s:3:"153";s:6:"ери";s:3:"154";s:5:"и п";s:3:"155";s:5:"и Ñ";s:3:"156";s:6:"ина";s:3:"157";s:6:"кој";s:3:"158";s:6:"нци";s:3:"159";s:5:"о м";s:3:"160";s:5:"о о";s:3:"161";s:6:"одн";s:3:"162";s:6:"пор";s:3:"163";s:6:"Ñки";s:3:"164";s:6:"Ñпо";s:3:"165";s:6:"Ñтв";s:3:"166";s:6:"Ñти";s:3:"167";s:6:"тво";s:3:"168";s:5:"ти ";s:3:"169";s:5:" об";s:3:"170";s:5:" ов";s:3:"171";s:5:"а б";s:3:"172";s:6:"алн";s:3:"173";s:6:"ара";s:3:"174";s:6:"бар";s:3:"175";s:5:"е к";s:3:"176";s:5:"ед ";s:3:"177";s:6:"ент";s:3:"178";s:6:"еѓу";s:3:"179";s:5:"и о";s:3:"180";s:5:"ии ";s:3:"181";s:6:"меѓ";s:3:"182";s:5:"о д";s:3:"183";s:6:"оја";s:3:"184";s:6:"пот";s:3:"185";s:6:"раз";s:3:"186";s:6:"раш";s:3:"187";s:6:"Ñпр";s:3:"188";s:6:"Ñто";s:3:"189";s:5:"Ñ‚ д";s:3:"190";s:5:"ци ";s:3:"191";s:5:" бе";s:3:"192";s:5:" гр";s:3:"193";s:5:" др";s:3:"194";s:5:" из";s:3:"195";s:5:" ÑÑ‚";s:3:"196";s:5:"аа ";s:3:"197";s:6:"бид";s:3:"198";s:6:"вед";s:3:"199";s:6:"гла";s:3:"200";s:6:"еко";s:3:"201";s:6:"енд";s:3:"202";s:6:"еÑе";s:3:"203";s:6:"етÑ";s:3:"204";s:6:"зац";s:3:"205";s:5:"и Ñ‚";s:3:"206";s:6:"иза";s:3:"207";s:6:"инÑ";s:3:"208";s:6:"иÑÑ‚";s:3:"209";s:5:"ки ";s:3:"210";s:6:"ков";s:3:"211";s:6:"кол";s:3:"212";s:5:"ку ";s:3:"213";s:6:"лиц";s:3:"214";s:5:"о з";s:3:"215";s:5:"о и";s:3:"216";s:6:"ова";s:3:"217";s:6:"олк";s:3:"218";s:6:"оре";s:3:"219";s:6:"ори";s:3:"220";s:6:"под";s:3:"221";s:6:"рањ";s:3:"222";s:6:"реф";s:3:"223";s:6:"ржа";s:3:"224";s:6:"ров";s:3:"225";s:6:"рти";s:3:"226";s:5:"Ñо ";s:3:"227";s:6:"тор";s:3:"228";s:6:"фер";s:3:"229";s:6:"цен";s:3:"230";s:6:"цит";s:3:"231";s:4:" а ";s:3:"232";s:5:" вр";s:3:"233";s:5:" гл";s:3:"234";s:5:" дп";s:3:"235";s:5:" мо";s:3:"236";s:5:" ни";s:3:"237";s:5:" но";s:3:"238";s:5:" оп";s:3:"239";s:5:" от";s:3:"240";s:5:"а Ñœ";s:3:"241";s:6:"або";s:3:"242";s:6:"ада";s:3:"243";s:6:"аÑа";s:3:"244";s:6:"аша";s:3:"245";s:5:"ба ";s:3:"246";s:6:"бот";s:3:"247";s:6:"ваа";s:3:"248";s:6:"ват";s:3:"249";s:6:"вот";s:3:"250";s:5:"ги ";s:3:"251";s:6:"гра";s:3:"252";s:5:"де ";s:3:"253";s:6:"дин";s:3:"254";s:6:"дум";s:3:"255";s:6:"евр";s:3:"256";s:6:"еду";s:3:"257";s:6:"ено";s:3:"258";s:6:"ера";s:3:"259";s:5:"ÐµÑ ";s:3:"260";s:6:"ење";s:3:"261";s:5:"же ";s:3:"262";s:6:"зак";s:3:"263";s:5:"и в";s:3:"264";s:6:"ила";s:3:"265";s:6:"иту";s:3:"266";s:6:"коа";s:3:"267";s:6:"кои";s:3:"268";s:6:"лан";s:3:"269";s:6:"лку";s:3:"270";s:6:"лож";s:3:"271";s:6:"мот";s:3:"272";s:6:"нду";s:3:"273";s:6:"нÑÑ‚";s:3:"274";s:5:"о в";s:3:"275";s:5:"оа ";s:3:"276";s:6:"оал";s:3:"277";s:6:"обр";s:3:"278";s:5:"ов ";s:3:"279";s:6:"ови";s:3:"280";s:6:"овн";s:3:"281";s:5:"ои ";s:3:"282";s:5:"ор ";s:3:"283";s:6:"орм";s:3:"284";s:5:"ој ";s:3:"285";s:6:"рет";s:3:"286";s:6:"Ñед";s:3:"287";s:5:"ÑÑ‚ ";s:3:"288";s:6:"тер";s:3:"289";s:6:"тиј";s:3:"290";s:6:"тоа";s:3:"291";s:6:"фор";s:3:"292";s:6:"ции";s:3:"293";s:5:"ѓу ";s:3:"294";s:5:" ал";s:3:"295";s:5:" ве";s:3:"296";s:5:" вм";s:3:"297";s:5:" ги";s:3:"298";s:5:" ду";s:3:"299";}s:9:"mongolian";a:300:{s:5:"ын ";s:1:"0";s:5:" ба";s:1:"1";s:5:"йн ";s:1:"2";s:6:"бай";s:1:"3";s:6:"ийн";s:1:"4";s:6:"уул";s:1:"5";s:5:" ул";s:1:"6";s:6:"улÑ";s:1:"7";s:5:"ан ";s:1:"8";s:5:" ха";s:1:"9";s:6:"ний";s:2:"10";s:5:"н Ñ…";s:2:"11";s:6:"гаа";s:2:"12";s:6:"Ñын";s:2:"13";s:5:"ий ";s:2:"14";s:6:"лÑÑ‹";s:2:"15";s:5:" бо";s:2:"16";s:5:"й б";s:2:"17";s:5:"Ñн ";s:2:"18";s:5:"ах ";s:2:"19";s:6:"бол";s:2:"20";s:5:"ол ";s:2:"21";s:5:"н б";s:2:"22";s:6:"оло";s:2:"23";s:5:" Ñ…Ñ";s:2:"24";s:6:"онг";s:2:"25";s:6:"гол";s:2:"26";s:6:"гуу";s:2:"27";s:6:"нго";s:2:"28";s:5:"ыг ";s:2:"29";s:6:"жил";s:2:"30";s:5:" мо";s:2:"31";s:6:"лаг";s:2:"32";s:6:"лла";s:2:"33";s:6:"мон";s:2:"34";s:5:" Ñ‚Ñ”";s:2:"35";s:5:" ху";s:2:"36";s:6:"айд";s:2:"37";s:5:"ны ";s:2:"38";s:5:"он ";s:2:"39";s:6:"Ñан";s:2:"40";s:6:"хий";s:2:"41";s:5:" аж";s:2:"42";s:5:" ор";s:2:"43";s:5:"л у";s:2:"44";s:5:"н Ñ‚";s:2:"45";s:6:"улг";s:2:"46";s:6:"айг";s:2:"47";s:6:"длы";s:2:"48";s:5:"йг ";s:2:"49";s:5:" за";s:2:"50";s:6:"дÑÑ";s:2:"51";s:5:"н а";s:2:"52";s:6:"ндÑ";s:2:"53";s:6:"ула";s:2:"54";s:5:"ÑÑ ";s:2:"55";s:6:"ага";s:2:"56";s:6:"ийг";s:2:"57";s:4:"vй ";s:2:"58";s:5:"аа ";s:2:"59";s:5:"й а";s:2:"60";s:6:"лын";s:2:"61";s:5:"н з";s:2:"62";s:5:" аю";s:2:"63";s:5:" зє";s:2:"64";s:6:"аар";s:2:"65";s:5:"ад ";s:2:"66";s:5:"ар ";s:2:"67";s:5:"гvй";s:2:"68";s:6:"зєв";s:2:"69";s:6:"ажи";s:2:"70";s:5:"ал ";s:2:"71";s:6:"аюу";s:2:"72";s:5:"г Ñ…";s:2:"73";s:5:"лгv";s:2:"74";s:5:"лж ";s:2:"75";s:6:"Ñни";s:2:"76";s:6:"ÑÑн";s:2:"77";s:6:"юул";s:2:"78";s:6:"йдл";s:2:"79";s:6:"лыг";s:2:"80";s:6:"нхи";s:2:"81";s:6:"ууд";s:2:"82";s:6:"хам";s:2:"83";s:5:" нÑ";s:2:"84";s:5:" Ñа";s:2:"85";s:6:"гий";s:2:"86";s:6:"лах";s:2:"87";s:6:"лєл";s:2:"88";s:6:"рєн";s:2:"89";s:6:"єгч";s:2:"90";s:5:" та";s:2:"91";s:6:"илл";s:2:"92";s:6:"лий";s:2:"93";s:6:"лÑÑ…";s:2:"94";s:6:"рий";s:2:"95";s:5:"ÑÑ… ";s:2:"96";s:5:" ер";s:2:"97";s:5:" ÑÑ€";s:2:"98";s:6:"влє";s:2:"99";s:6:"ерє";s:3:"100";s:6:"ийл";s:3:"101";s:6:"лон";s:3:"102";s:6:"лєг";s:3:"103";s:6:"євл";s:3:"104";s:6:"єнх";s:3:"105";s:5:" хо";s:3:"106";s:6:"ари";s:3:"107";s:5:"их ";s:3:"108";s:6:"хан";s:3:"109";s:5:"ÑÑ€ ";s:3:"110";s:5:"єн ";s:3:"111";s:4:"vvл";s:3:"112";s:5:"ж б";s:3:"113";s:6:"Ñ‚Ñй";s:3:"114";s:5:"Ñ… Ñ…";s:3:"115";s:6:"Ñрх";s:3:"116";s:4:" vн";s:3:"117";s:5:" нь";s:3:"118";s:5:"vнд";s:3:"119";s:6:"алт";s:3:"120";s:6:"йлє";s:3:"121";s:5:"нь ";s:3:"122";s:6:"тєр";s:3:"123";s:5:" га";s:3:"124";s:5:" Ñу";s:3:"125";s:6:"аан";s:3:"126";s:6:"даа";s:3:"127";s:6:"илц";s:3:"128";s:6:"йгу";s:3:"129";s:5:"л а";s:3:"130";s:6:"лаа";s:3:"131";s:5:"н н";s:3:"132";s:6:"руу";s:3:"133";s:5:"Ñй ";s:3:"134";s:5:" то";s:3:"135";s:5:"н Ñ";s:3:"136";s:6:"рил";s:3:"137";s:6:"єри";s:3:"138";s:6:"ааг";s:3:"139";s:5:"гч ";s:3:"140";s:6:"лÑÑ";s:3:"141";s:5:"н о";s:3:"142";s:6:"Ñ€Ñг";s:3:"143";s:6:"Ñуу";s:3:"144";s:6:"ÑÑ€Ñ";s:3:"145";s:6:"їїл";s:3:"146";s:4:" yн";s:3:"147";s:5:" бу";s:3:"148";s:5:" дÑ";s:3:"149";s:5:" ол";s:3:"150";s:5:" ту";s:3:"151";s:5:" ши";s:3:"152";s:5:"yнд";s:3:"153";s:6:"аши";s:3:"154";s:5:"г Ñ‚";s:3:"155";s:5:"иг ";s:3:"156";s:5:"йл ";s:3:"157";s:6:"хар";s:3:"158";s:6:"шин";s:3:"159";s:5:"Ñг ";s:3:"160";s:5:"єр ";s:3:"161";s:5:" их";s:3:"162";s:5:" Ñ…Ñ”";s:3:"163";s:5:" Ñ…Ñ—";s:3:"164";s:5:"ам ";s:3:"165";s:6:"анг";s:3:"166";s:5:"ин ";s:3:"167";s:6:"йга";s:3:"168";s:6:"лÑа";s:3:"169";s:4:"н v";s:3:"170";s:5:"н е";s:3:"171";s:6:"нал";s:3:"172";s:5:"нд ";s:3:"173";s:6:"хуу";s:3:"174";s:6:"цаа";s:3:"175";s:5:"Ñд ";s:3:"176";s:6:"ÑÑÑ€";s:3:"177";s:5:"єл ";s:3:"178";s:5:"vйл";s:3:"179";s:6:"ада";s:3:"180";s:6:"айн";s:3:"181";s:6:"ала";s:3:"182";s:6:"амт";s:3:"183";s:6:"гах";s:3:"184";s:5:"д Ñ…";s:3:"185";s:6:"дал";s:3:"186";s:6:"зар";s:3:"187";s:5:"л б";s:3:"188";s:6:"лан";s:3:"189";s:5:"н д";s:3:"190";s:6:"ÑÑн";s:3:"191";s:6:"улл";s:3:"192";s:5:"Ñ… б";s:3:"193";s:6:"Ñ…ÑÑ€";s:3:"194";s:4:" бv";s:3:"195";s:5:" да";s:3:"196";s:5:" зо";s:3:"197";s:5:"vÑ€Ñ";s:3:"198";s:6:"аад";s:3:"199";s:6:"гÑÑ";s:3:"200";s:6:"лÑн";s:3:"201";s:5:"н и";s:3:"202";s:5:"н Ñ";s:3:"203";s:6:"нга";s:3:"204";s:5:"Ð½Ñ ";s:3:"205";s:6:"тал";s:3:"206";s:6:"тын";s:3:"207";s:6:"хур";s:3:"208";s:5:"Ñл ";s:3:"209";s:5:" на";s:3:"210";s:5:" ни";s:3:"211";s:5:" он";s:3:"212";s:5:"vлÑ";s:3:"213";s:5:"аг ";s:3:"214";s:5:"аж ";s:3:"215";s:5:"ай ";s:3:"216";s:6:"ата";s:3:"217";s:6:"бар";s:3:"218";s:5:"г б";s:3:"219";s:6:"гад";s:3:"220";s:6:"гїй";s:3:"221";s:5:"й Ñ…";s:3:"222";s:5:"лт ";s:3:"223";s:5:"н м";s:3:"224";s:5:"на ";s:3:"225";s:6:"оро";s:3:"226";s:6:"уль";s:3:"227";s:6:"чин";s:3:"228";s:5:"Ñж ";s:3:"229";s:6:"ÑнÑ";s:3:"230";s:6:"ÑÑд";s:3:"231";s:5:"їй ";s:3:"232";s:6:"їлÑ";s:3:"233";s:5:" би";s:3:"234";s:5:" Ñ‚Ñ";s:3:"235";s:5:" Ñн";s:3:"236";s:6:"аны";s:3:"237";s:6:"дий";s:3:"238";s:6:"дÑÑ";s:3:"239";s:6:"лал";s:3:"240";s:6:"лга";s:3:"241";s:5:"лд ";s:3:"242";s:6:"лог";s:3:"243";s:5:"ль ";s:3:"244";s:5:"н у";s:3:"245";s:5:"н Ñ—";s:3:"246";s:5:"Ñ€ б";s:3:"247";s:6:"рал";s:3:"248";s:6:"Ñон";s:3:"249";s:6:"тай";s:3:"250";s:6:"удл";s:3:"251";s:6:"Ñлт";s:3:"252";s:6:"Ñрг";s:3:"253";s:6:"єлє";s:3:"254";s:4:" vй";s:3:"255";s:4:" в ";s:3:"256";s:5:" гÑ";s:3:"257";s:4:" Ñ…v";s:3:"258";s:6:"ара";s:3:"259";s:5:"бvÑ€";s:3:"260";s:5:"д н";s:3:"261";s:5:"д о";s:3:"262";s:5:"л Ñ…";s:3:"263";s:5:"Ð»Ñ ";s:3:"264";s:6:"лты";s:3:"265";s:5:"н г";s:3:"266";s:6:"нÑг";s:3:"267";s:6:"огт";s:3:"268";s:6:"олы";s:3:"269";s:6:"оёр";s:3:"270";s:5:"Ñ€ Ñ‚";s:3:"271";s:6:"Ñ€ÑÑ";s:3:"272";s:6:"тав";s:3:"273";s:6:"тог";s:3:"274";s:6:"уур";s:3:"275";s:6:"хоё";s:3:"276";s:6:"Ñ…Ñл";s:3:"277";s:6:"Ñ…ÑÑ";s:3:"278";s:6:"ÑлÑ";s:3:"279";s:5:"Ñ‘Ñ€ ";s:3:"280";s:5:" ав";s:3:"281";s:5:" аÑ";s:3:"282";s:5:" аш";s:3:"283";s:5:" ду";s:3:"284";s:5:" Ñо";s:3:"285";s:5:" чи";s:3:"286";s:5:" Ñв";s:3:"287";s:5:" єр";s:3:"288";s:6:"аал";s:3:"289";s:6:"алд";s:3:"290";s:6:"амж";s:3:"291";s:6:"анд";s:3:"292";s:6:"аÑу";s:3:"293";s:6:"вÑÑ€";s:3:"294";s:5:"г у";s:3:"295";s:6:"двÑ";s:3:"296";s:4:"жvv";s:3:"297";s:6:"лца";s:3:"298";s:6:"лÑл";s:3:"299";}s:6:"nepali";a:300:{s:7:"को ";s:1:"0";s:7:"का ";s:1:"1";s:7:"मा ";s:1:"2";s:9:"हरà¥";s:1:"3";s:7:" ने";s:1:"4";s:9:"नेप";s:1:"5";s:9:"पाल";s:1:"6";s:9:"ेपा";s:1:"7";s:7:" सम";s:1:"8";s:7:"ले ";s:1:"9";s:7:" पà¥";s:2:"10";s:9:"पà¥à¤°";s:2:"11";s:9:"कार";s:2:"12";s:7:"ा स";s:2:"13";s:9:"à¤à¤•à¥‹";s:2:"14";s:7:" à¤à¤";s:2:"15";s:5:" छ ";s:2:"16";s:7:" à¤à¤¾";s:2:"17";s:9:"à¥à¤°à¤®";s:2:"18";s:7:" गर";s:2:"19";s:9:"रà¥à¤•";s:2:"20";s:5:" र ";s:2:"21";s:9:"à¤à¤¾à¤°";s:2:"22";s:9:"ारत";s:2:"23";s:7:" का";s:2:"24";s:7:" वि";s:2:"25";s:9:"à¤à¤à¤•";s:2:"26";s:9:"ाली";s:2:"27";s:7:"ली ";s:2:"28";s:7:"ा प";s:2:"29";s:9:"ीहर";s:2:"30";s:9:"ारà¥";s:2:"31";s:7:"ो छ";s:2:"32";s:7:"ना ";s:2:"33";s:7:"रॠ";s:2:"34";s:9:"ालक";s:2:"35";s:9:"à¥à¤¯à¤¾";s:2:"36";s:7:" बा";s:2:"37";s:9:"à¤à¤•à¤¾";s:2:"38";s:7:"ने ";s:2:"39";s:9:"नà¥à¤¤";s:2:"40";s:7:"ा ब";s:2:"41";s:9:"ाको";s:2:"42";s:7:"ार ";s:2:"43";s:7:"ा à¤";s:2:"44";s:9:"ाहर";s:2:"45";s:9:"à¥à¤°à¥‹";s:2:"46";s:9:"कà¥à¤·";s:2:"47";s:7:"नॠ";s:2:"48";s:9:"ारी";s:2:"49";s:7:" नि";s:2:"50";s:7:"ा न";s:2:"51";s:7:"ी स";s:2:"52";s:7:" डà¥";s:2:"53";s:9:"कà¥à¤°";s:2:"54";s:9:"जना";s:2:"55";s:7:"यो ";s:2:"56";s:7:"ा छ";s:2:"57";s:9:"ेवा";s:2:"58";s:9:"à¥à¤¤à¤¾";s:2:"59";s:7:" रा";s:2:"60";s:9:"तà¥à¤¯";s:2:"61";s:9:"नà¥à¤¦";s:2:"62";s:9:"हà¥à¤¨";s:2:"63";s:7:"ा क";s:2:"64";s:9:"ामा";s:2:"65";s:7:"ी न";s:2:"66";s:9:"à¥à¤¦à¤¾";s:2:"67";s:7:" से";s:2:"68";s:9:"छनà¥";s:2:"69";s:9:"मà¥à¤¬";s:2:"70";s:9:"रोत";s:2:"71";s:9:"सेव";s:2:"72";s:9:"सà¥à¤¤";s:2:"73";s:9:"सà¥à¤°";s:2:"74";s:9:"ेका";s:2:"75";s:7:"à¥à¤¤ ";s:2:"76";s:7:" बी";s:2:"77";s:7:" हà¥";s:2:"78";s:9:"कà¥à¤¤";s:2:"79";s:9:"तà¥à¤°";s:2:"80";s:7:"रत ";s:2:"81";s:9:"रà¥à¤¨";s:2:"82";s:9:"रà¥à¤¯";s:2:"83";s:7:"ा र";s:2:"84";s:9:"ाका";s:2:"85";s:9:"à¥à¤•à¥‹";s:2:"86";s:7:" à¤à¤•";s:2:"87";s:7:" सं";s:2:"88";s:7:" सà¥";s:2:"89";s:9:"बीब";s:2:"90";s:9:"बीस";s:2:"91";s:9:"लको";s:2:"92";s:9:"सà¥à¤¯";s:2:"93";s:9:"ीबी";s:2:"94";s:9:"ीसी";s:2:"95";s:9:"ेको";s:2:"96";s:7:"ो स";s:2:"97";s:9:"à¥à¤¯à¤•";s:2:"98";s:7:" छन";s:2:"99";s:7:" जन";s:3:"100";s:7:" बि";s:3:"101";s:7:" मà¥";s:3:"102";s:7:" सà¥";s:3:"103";s:9:"गरà¥";s:3:"104";s:9:"ताह";s:3:"105";s:9:"नà¥à¤§";s:3:"106";s:9:"बार";s:3:"107";s:9:"मनà¥";s:3:"108";s:9:"मसà¥";s:3:"109";s:9:"रà¥à¤²";s:3:"110";s:9:"लाई";s:3:"111";s:7:"ा व";s:3:"112";s:7:"ाई ";s:3:"113";s:7:"ाल ";s:3:"114";s:9:"िका";s:3:"115";s:7:" तà¥";s:3:"116";s:7:" मा";s:3:"117";s:7:" यस";s:3:"118";s:7:" रà¥";s:3:"119";s:9:"ताक";s:3:"120";s:9:"बनà¥";s:3:"121";s:7:"र ब";s:3:"122";s:7:"रण ";s:3:"123";s:9:"रà¥à¤ª";s:3:"124";s:9:"रेक";s:3:"125";s:9:"षà¥à¤Ÿ";s:3:"126";s:9:"समà¥";s:3:"127";s:7:"सी ";s:3:"128";s:9:"ाà¤à¤•";s:3:"129";s:9:"à¥à¤•à¤¾";s:3:"130";s:9:"à¥à¤•à¥";s:3:"131";s:7:" अध";s:3:"132";s:7:" अन";s:3:"133";s:7:" तथ";s:3:"134";s:7:" थि";s:3:"135";s:7:" दे";s:3:"136";s:7:" पर";s:3:"137";s:7:" बै";s:3:"138";s:9:"तथा";s:3:"139";s:7:"ता ";s:3:"140";s:7:"दा ";s:3:"141";s:9:"दà¥à¤¦";s:3:"142";s:7:"नी ";s:3:"143";s:9:"बाट";s:3:"144";s:9:"यकà¥";s:3:"145";s:7:"री ";s:3:"146";s:9:"रीह";s:3:"147";s:9:"रà¥à¤®";s:3:"148";s:9:"लका";s:3:"149";s:9:"समस";s:3:"150";s:7:"ा अ";s:3:"151";s:7:"ा à¤";s:3:"152";s:7:"ाट ";s:3:"153";s:7:"िय ";s:3:"154";s:7:"ो प";s:3:"155";s:7:"ो म";s:3:"156";s:7:"à¥à¤¨ ";s:3:"157";s:9:"à¥à¤¨à¥‡";s:3:"158";s:9:"à¥à¤·à¤¾";s:3:"159";s:7:" पा";s:3:"160";s:7:" यो";s:3:"161";s:7:" हा";s:3:"162";s:9:"अधि";s:3:"163";s:9:"डà¥à¤µ";s:3:"164";s:7:"त à¤";s:3:"165";s:7:"त स";s:3:"166";s:7:"था ";s:3:"167";s:9:"धिक";s:3:"168";s:9:"पमा";s:3:"169";s:9:"बैठ";s:3:"170";s:9:"मà¥à¤¦";s:3:"171";s:7:"या ";s:3:"172";s:9:"यà¥à¤•";s:3:"173";s:7:"र न";s:3:"174";s:9:"रति";s:3:"175";s:9:"वान";s:3:"176";s:9:"सार";s:3:"177";s:7:"ा आ";s:3:"178";s:7:"ा ज";s:3:"179";s:7:"ा ह";s:3:"180";s:9:"à¥à¤¦à¥";s:3:"181";s:9:"à¥à¤ªà¤®";s:3:"182";s:9:"à¥à¤²à¥‡";s:3:"183";s:9:"à¥à¤µà¤¾";s:3:"184";s:9:"ैठक";s:3:"185";s:7:"ो ब";s:3:"186";s:9:"à¥à¤¤à¤°";s:3:"187";s:7:"à¥à¤¯ ";s:3:"188";s:9:"à¥à¤¯à¤¸";s:3:"189";s:7:" कà¥";s:3:"190";s:7:" मन";s:3:"191";s:7:" रह";s:3:"192";s:9:"चार";s:3:"193";s:9:"तिय";s:3:"194";s:7:"दै ";s:3:"195";s:9:"निर";s:3:"196";s:7:"नॠ";s:3:"197";s:9:"परà¥";s:3:"198";s:9:"रकà¥";s:3:"199";s:9:"रà¥à¤¦";s:3:"200";s:9:"समा";s:3:"201";s:9:"सà¥à¤°";s:3:"202";s:9:"ाउन";s:3:"203";s:7:"ान ";s:3:"204";s:9:"ानम";s:3:"205";s:9:"ारण";s:3:"206";s:9:"ाले";s:3:"207";s:7:"ि ब";s:3:"208";s:9:"ियो";s:3:"209";s:9:"à¥à¤¨à¥";s:3:"210";s:9:"à¥à¤°à¤•";s:3:"211";s:9:"à¥à¤¤à¥";s:3:"212";s:9:"à¥à¤¬à¤¨";s:3:"213";s:9:"à¥à¤°à¤¾";s:3:"214";s:7:"à¥à¤· ";s:3:"215";s:7:" आर";s:3:"216";s:7:" जल";s:3:"217";s:7:" बे";s:3:"218";s:7:" या";s:3:"219";s:7:" सा";s:3:"220";s:9:"आà¤à¤•";s:3:"221";s:7:"à¤à¤• ";s:3:"222";s:9:"करà¥";s:3:"223";s:9:"जलस";s:3:"224";s:9:"णका";s:3:"225";s:7:"त र";s:3:"226";s:9:"दà¥à¤°";s:3:"227";s:9:"धान";s:3:"228";s:7:"धि ";s:3:"229";s:9:"नका";s:3:"230";s:9:"नमा";s:3:"231";s:7:"नि ";s:3:"232";s:9:"ममा";s:3:"233";s:7:"रम ";s:3:"234";s:9:"रहे";s:3:"235";s:9:"राज";s:3:"236";s:9:"लसà¥";s:3:"237";s:7:"ला ";s:3:"238";s:9:"वार";s:3:"239";s:9:"सका";s:3:"240";s:9:"हिल";s:3:"241";s:9:"हेक";s:3:"242";s:7:"ा त";s:3:"243";s:9:"ारे";s:3:"244";s:9:"िनà¥";s:3:"245";s:9:"िसà¥";s:3:"246";s:7:"े स";s:3:"247";s:7:"ो न";s:3:"248";s:7:"ो र";s:3:"249";s:7:"ोत ";s:3:"250";s:9:"à¥à¤§à¤¿";s:3:"251";s:9:"à¥à¤®à¥€";s:3:"252";s:9:"à¥à¤°à¤¸";s:3:"253";s:7:" दà¥";s:3:"254";s:7:" पन";s:3:"255";s:7:" बत";s:3:"256";s:7:" बन";s:3:"257";s:7:" à¤à¤¨";s:3:"258";s:9:"ंयà¥";s:3:"259";s:9:"आरम";s:3:"260";s:7:"खि ";s:3:"261";s:9:"णà¥à¤¡";s:3:"262";s:9:"तका";s:3:"263";s:9:"ताल";s:3:"264";s:7:"दी ";s:3:"265";s:9:"देख";s:3:"266";s:9:"निय";s:3:"267";s:9:"पनि";s:3:"268";s:9:"पà¥à¤¤";s:3:"269";s:9:"बता";s:3:"270";s:7:"मी ";s:3:"271";s:9:"मà¥à¤";s:3:"272";s:7:"र स";s:3:"273";s:9:"रमà¥";s:3:"274";s:9:"लमा";s:3:"275";s:9:"विश";s:3:"276";s:9:"षाक";s:3:"277";s:9:"संय";s:3:"278";s:7:"ा ड";s:3:"279";s:7:"ा म";s:3:"280";s:9:"ानक";s:3:"281";s:9:"ालम";s:3:"282";s:7:"ि à¤";s:3:"283";s:7:"ित ";s:3:"284";s:7:"ी प";s:3:"285";s:7:"ी र";s:3:"286";s:7:"ॠà¤";s:3:"287";s:9:"à¥à¤¨à¥‡";s:3:"288";s:7:"े ग";s:3:"289";s:9:"ेखि";s:3:"290";s:7:"ेर ";s:3:"291";s:7:"ो à¤";s:3:"292";s:7:"ो व";s:3:"293";s:7:"ो ह";s:3:"294";s:7:"à¥à¤ ";s:3:"295";s:7:"à¥à¤° ";s:3:"296";s:7:" ता";s:3:"297";s:7:" नम";s:3:"298";s:7:" ना";s:3:"299";}s:9:"norwegian";a:300:{s:3:"er ";s:1:"0";s:3:"en ";s:1:"1";s:3:"et ";s:1:"2";s:3:" de";s:1:"3";s:3:"det";s:1:"4";s:3:" i ";s:1:"5";s:3:"for";s:1:"6";s:3:"il ";s:1:"7";s:3:" fo";s:1:"8";s:3:" me";s:1:"9";s:3:"ing";s:2:"10";s:3:"om ";s:2:"11";s:3:" ha";s:2:"12";s:3:" og";s:2:"13";s:3:"ter";s:2:"14";s:3:" er";s:2:"15";s:3:" ti";s:2:"16";s:3:" st";s:2:"17";s:3:"og ";s:2:"18";s:3:"til";s:2:"19";s:3:"ne ";s:2:"20";s:3:" vi";s:2:"21";s:3:"re ";s:2:"22";s:3:" en";s:2:"23";s:3:" se";s:2:"24";s:3:"te ";s:2:"25";s:3:"or ";s:2:"26";s:3:"de ";s:2:"27";s:3:"kke";s:2:"28";s:3:"ke ";s:2:"29";s:3:"ar ";s:2:"30";s:3:"ng ";s:2:"31";s:3:"r s";s:2:"32";s:3:"ene";s:2:"33";s:3:" so";s:2:"34";s:3:"e s";s:2:"35";s:3:"der";s:2:"36";s:3:"an ";s:2:"37";s:3:"som";s:2:"38";s:3:"ste";s:2:"39";s:3:"at ";s:2:"40";s:3:"ed ";s:2:"41";s:3:"r i";s:2:"42";s:3:" av";s:2:"43";s:3:" in";s:2:"44";s:3:"men";s:2:"45";s:3:" at";s:2:"46";s:3:" ko";s:2:"47";s:4:" pÃ¥";s:2:"48";s:3:"har";s:2:"49";s:3:" si";s:2:"50";s:3:"ere";s:2:"51";s:4:"pÃ¥ ";s:2:"52";s:3:"nde";s:2:"53";s:3:"and";s:2:"54";s:3:"els";s:2:"55";s:3:"ett";s:2:"56";s:3:"tte";s:2:"57";s:3:"lig";s:2:"58";s:3:"t s";s:2:"59";s:3:"den";s:2:"60";s:3:"t i";s:2:"61";s:3:"ikk";s:2:"62";s:3:"med";s:2:"63";s:3:"n s";s:2:"64";s:3:"rt ";s:2:"65";s:3:"ser";s:2:"66";s:3:"ska";s:2:"67";s:3:"t e";s:2:"68";s:3:"ker";s:2:"69";s:3:"sen";s:2:"70";s:3:"av ";s:2:"71";s:3:"ler";s:2:"72";s:3:"r a";s:2:"73";s:3:"ten";s:2:"74";s:3:"e f";s:2:"75";s:3:"r e";s:2:"76";s:3:"r t";s:2:"77";s:3:"ede";s:2:"78";s:3:"ig ";s:2:"79";s:3:" re";s:2:"80";s:3:"han";s:2:"81";s:3:"lle";s:2:"82";s:3:"ner";s:2:"83";s:3:" bl";s:2:"84";s:3:" fr";s:2:"85";s:3:"le ";s:2:"86";s:3:" ve";s:2:"87";s:3:"e t";s:2:"88";s:3:"lan";s:2:"89";s:3:"mme";s:2:"90";s:3:"nge";s:2:"91";s:3:" be";s:2:"92";s:3:" ik";s:2:"93";s:3:" om";s:2:"94";s:4:" Ã¥ ";s:2:"95";s:3:"ell";s:2:"96";s:3:"sel";s:2:"97";s:3:"sta";s:2:"98";s:3:"ver";s:2:"99";s:3:" et";s:3:"100";s:3:" sk";s:3:"101";s:3:"nte";s:3:"102";s:3:"one";s:3:"103";s:3:"ore";s:3:"104";s:3:"r d";s:3:"105";s:3:"ske";s:3:"106";s:3:" an";s:3:"107";s:3:" la";s:3:"108";s:3:"del";s:3:"109";s:3:"gen";s:3:"110";s:3:"nin";s:3:"111";s:3:"r f";s:3:"112";s:3:"r v";s:3:"113";s:3:"se ";s:3:"114";s:3:" po";s:3:"115";s:3:"ir ";s:3:"116";s:3:"jon";s:3:"117";s:3:"mer";s:3:"118";s:3:"nen";s:3:"119";s:3:"omm";s:3:"120";s:3:"sjo";s:3:"121";s:3:" fl";s:3:"122";s:3:" sa";s:3:"123";s:3:"ern";s:3:"124";s:3:"kom";s:3:"125";s:3:"r m";s:3:"126";s:3:"r o";s:3:"127";s:3:"ren";s:3:"128";s:3:"vil";s:3:"129";s:3:"ale";s:3:"130";s:3:"es ";s:3:"131";s:3:"n a";s:3:"132";s:3:"t f";s:3:"133";s:3:" le";s:3:"134";s:3:"bli";s:3:"135";s:3:"e e";s:3:"136";s:3:"e i";s:3:"137";s:3:"e v";s:3:"138";s:3:"het";s:3:"139";s:3:"ye ";s:3:"140";s:3:" ir";s:3:"141";s:3:"al ";s:3:"142";s:3:"e o";s:3:"143";s:3:"ide";s:3:"144";s:3:"iti";s:3:"145";s:3:"lit";s:3:"146";s:3:"nne";s:3:"147";s:3:"ran";s:3:"148";s:3:"t o";s:3:"149";s:3:"tal";s:3:"150";s:3:"tat";s:3:"151";s:3:"tt ";s:3:"152";s:3:" ka";s:3:"153";s:3:"ans";s:3:"154";s:3:"asj";s:3:"155";s:3:"ge ";s:3:"156";s:3:"inn";s:3:"157";s:3:"kon";s:3:"158";s:3:"lse";s:3:"159";s:3:"pet";s:3:"160";s:3:"t d";s:3:"161";s:3:"vi ";s:3:"162";s:3:" ut";s:3:"163";s:3:"ent";s:3:"164";s:3:"eri";s:3:"165";s:3:"oli";s:3:"166";s:3:"r p";s:3:"167";s:3:"ret";s:3:"168";s:3:"ris";s:3:"169";s:3:"sto";s:3:"170";s:3:"str";s:3:"171";s:3:"t a";s:3:"172";s:3:" ga";s:3:"173";s:3:"all";s:3:"174";s:3:"ape";s:3:"175";s:3:"g s";s:3:"176";s:3:"ill";s:3:"177";s:3:"ira";s:3:"178";s:3:"kap";s:3:"179";s:3:"nn ";s:3:"180";s:3:"opp";s:3:"181";s:3:"r h";s:3:"182";s:3:"rin";s:3:"183";s:3:" br";s:3:"184";s:3:" op";s:3:"185";s:3:"e m";s:3:"186";s:3:"ert";s:3:"187";s:3:"ger";s:3:"188";s:3:"ion";s:3:"189";s:3:"kal";s:3:"190";s:3:"lsk";s:3:"191";s:3:"nes";s:3:"192";s:3:" gj";s:3:"193";s:3:" mi";s:3:"194";s:3:" pr";s:3:"195";s:3:"ang";s:3:"196";s:3:"e h";s:3:"197";s:3:"e r";s:3:"198";s:3:"elt";s:3:"199";s:3:"enn";s:3:"200";s:3:"i s";s:3:"201";s:3:"ist";s:3:"202";s:3:"jen";s:3:"203";s:3:"kan";s:3:"204";s:3:"lt ";s:3:"205";s:3:"nal";s:3:"206";s:3:"res";s:3:"207";s:3:"tor";s:3:"208";s:3:"ass";s:3:"209";s:3:"dre";s:3:"210";s:3:"e b";s:3:"211";s:3:"e p";s:3:"212";s:3:"mel";s:3:"213";s:3:"n t";s:3:"214";s:3:"nse";s:3:"215";s:3:"ort";s:3:"216";s:3:"per";s:3:"217";s:3:"reg";s:3:"218";s:3:"sje";s:3:"219";s:3:"t p";s:3:"220";s:3:"t v";s:3:"221";s:3:" hv";s:3:"222";s:4:" nÃ¥";s:3:"223";s:3:" va";s:3:"224";s:3:"ann";s:3:"225";s:3:"ato";s:3:"226";s:3:"e a";s:3:"227";s:3:"est";s:3:"228";s:3:"ise";s:3:"229";s:3:"isk";s:3:"230";s:3:"oil";s:3:"231";s:3:"ord";s:3:"232";s:3:"pol";s:3:"233";s:3:"ra ";s:3:"234";s:3:"rak";s:3:"235";s:3:"sse";s:3:"236";s:3:"toi";s:3:"237";s:3:" gr";s:3:"238";s:3:"ak ";s:3:"239";s:3:"eg ";s:3:"240";s:3:"ele";s:3:"241";s:3:"g a";s:3:"242";s:3:"ige";s:3:"243";s:3:"igh";s:3:"244";s:3:"m e";s:3:"245";s:3:"n f";s:3:"246";s:3:"n v";s:3:"247";s:3:"ndr";s:3:"248";s:3:"nsk";s:3:"249";s:3:"rer";s:3:"250";s:3:"t m";s:3:"251";s:3:"und";s:3:"252";s:3:"var";s:3:"253";s:4:"Ã¥r ";s:3:"254";s:3:" he";s:3:"255";s:3:" no";s:3:"256";s:3:" ny";s:3:"257";s:3:"end";s:3:"258";s:3:"ete";s:3:"259";s:3:"fly";s:3:"260";s:3:"g i";s:3:"261";s:3:"ghe";s:3:"262";s:3:"ier";s:3:"263";s:3:"ind";s:3:"264";s:3:"int";s:3:"265";s:3:"lin";s:3:"266";s:3:"n d";s:3:"267";s:3:"n p";s:3:"268";s:3:"rne";s:3:"269";s:3:"sak";s:3:"270";s:3:"sie";s:3:"271";s:3:"t b";s:3:"272";s:3:"tid";s:3:"273";s:3:" al";s:3:"274";s:3:" pa";s:3:"275";s:3:" tr";s:3:"276";s:3:"ag ";s:3:"277";s:3:"dig";s:3:"278";s:3:"e d";s:3:"279";s:3:"e k";s:3:"280";s:3:"ess";s:3:"281";s:3:"hol";s:3:"282";s:3:"i d";s:3:"283";s:3:"lag";s:3:"284";s:3:"led";s:3:"285";s:3:"n e";s:3:"286";s:3:"n i";s:3:"287";s:3:"n o";s:3:"288";s:3:"pri";s:3:"289";s:3:"r b";s:3:"290";s:3:"st ";s:3:"291";s:3:" fe";s:3:"292";s:3:" li";s:3:"293";s:3:" ry";s:3:"294";s:3:"air";s:3:"295";s:3:"ake";s:3:"296";s:3:"d s";s:3:"297";s:3:"eas";s:3:"298";s:3:"egi";s:3:"299";}s:6:"pashto";a:300:{s:4:" د ";s:1:"0";s:5:"اؤ ";s:1:"1";s:5:" اؤ";s:1:"2";s:5:"نو ";s:1:"3";s:5:"Û Ø¯";s:1:"4";s:5:"ره ";s:1:"5";s:5:" په";s:1:"6";s:5:"نه ";s:1:"7";s:5:"Ú†Û ";s:1:"8";s:5:" Ú†Û";s:1:"9";s:5:"په ";s:2:"10";s:5:"Ù‡ د";s:2:"11";s:5:"ته ";s:2:"12";s:5:"Ùˆ ا";s:2:"13";s:6:"ونو";s:2:"14";s:5:"Ùˆ د";s:2:"15";s:5:" او";s:2:"16";s:6:"انو";s:2:"17";s:6:"ونه";s:2:"18";s:5:"Ù‡ Ú©";s:2:"19";s:5:" دا";s:2:"20";s:5:"Ù‡ ا";s:2:"21";s:5:"Ø¯Û ";s:2:"22";s:5:"ÚšÛ ";s:2:"23";s:5:" Ú©Û";s:2:"24";s:5:"ان ";s:2:"25";s:5:"لو ";s:2:"26";s:5:"هم ";s:2:"27";s:5:"Ùˆ Ù…";s:2:"28";s:6:"Ú©ÚšÛ";s:2:"29";s:5:"Ù‡ Ù…";s:2:"30";s:5:"Ù‰ ا";s:2:"31";s:5:" نو";s:2:"32";s:5:" ته";s:2:"33";s:5:" Ú©Úš";s:2:"34";s:6:"رون";s:2:"35";s:5:"Ú©Û ";s:2:"36";s:5:"ده ";s:2:"37";s:5:"له ";s:2:"38";s:5:"به ";s:2:"39";s:5:"رو ";s:2:"40";s:5:" هم";s:2:"41";s:5:"Ù‡ Ùˆ";s:2:"42";s:5:"وى ";s:2:"43";s:5:"او ";s:2:"44";s:6:"تون";s:2:"45";s:5:"دا ";s:2:"46";s:5:" Ú©Ùˆ";s:2:"47";s:5:" Ú©Ú“";s:2:"48";s:6:"قام";s:2:"49";s:5:" تر";s:2:"50";s:6:"ران";s:2:"51";s:5:"Ù‡ Ù¾";s:2:"52";s:5:"Û Ùˆ";s:2:"53";s:5:"Û Ù¾";s:2:"54";s:5:" به";s:2:"55";s:5:" خو";s:2:"56";s:5:"تو ";s:2:"57";s:5:"د د";s:2:"58";s:5:"د ا";s:2:"59";s:5:"Ù‡ ت";s:2:"60";s:5:"Ùˆ Ù¾";s:2:"61";s:5:"يا ";s:2:"62";s:5:" خپ";s:2:"63";s:5:" دو";s:2:"64";s:5:" را";s:2:"65";s:5:" مش";s:2:"66";s:5:" پر";s:2:"67";s:6:"ارو";s:2:"68";s:5:"Ø±Û ";s:2:"69";s:5:"Ù… د";s:2:"70";s:6:"مشر";s:2:"71";s:5:" شو";s:2:"72";s:5:" ور";s:2:"73";s:5:"ار ";s:2:"74";s:5:"دى ";s:2:"75";s:5:" اد";s:2:"76";s:5:" دى";s:2:"77";s:5:" مو";s:2:"78";s:5:"د Ù¾";s:2:"79";s:5:"لي ";s:2:"80";s:5:"Ùˆ Ú©";s:2:"81";s:5:" مق";s:2:"82";s:5:" يو";s:2:"83";s:5:"ؤ د";s:2:"84";s:6:"خپل";s:2:"85";s:6:"سره";s:2:"86";s:5:"Ù‡ Ú†";s:2:"87";s:5:"ور ";s:2:"88";s:5:" تا";s:2:"89";s:5:" دÛ";s:2:"90";s:5:" رو";s:2:"91";s:5:" سر";s:2:"92";s:5:" مل";s:2:"93";s:5:" کا";s:2:"94";s:5:"ؤ ا";s:2:"95";s:6:"اره";s:2:"96";s:6:"برو";s:2:"97";s:5:"مه ";s:2:"98";s:5:"Ù‡ ب";s:2:"99";s:5:"Ùˆ ت";s:3:"100";s:6:"پښت";s:3:"101";s:5:" با";s:3:"102";s:5:" دغ";s:3:"103";s:5:" قب";s:3:"104";s:5:" له";s:3:"105";s:5:" وا";s:3:"106";s:5:" پا";s:3:"107";s:5:" Ù¾Úš";s:3:"108";s:5:"د Ù…";s:3:"109";s:5:"د Ù‡";s:3:"110";s:5:"Ù„Û ";s:3:"111";s:6:"مات";s:3:"112";s:5:"مو ";s:3:"113";s:5:"Ù‡ Ù‡";s:3:"114";s:5:"وي ";s:3:"115";s:5:"Û Ø¨";s:3:"116";s:5:"Û Ú©";s:3:"117";s:5:" ده";s:3:"118";s:5:" قا";s:3:"119";s:5:"ال ";s:3:"120";s:6:"اما";s:3:"121";s:5:"د Ù†";s:3:"122";s:6:"قبر";s:3:"123";s:5:"Ù‡ Ù†";s:3:"124";s:6:"پار";s:3:"125";s:5:" اث";s:3:"126";s:5:" بي";s:3:"127";s:5:" لا";s:3:"128";s:5:" لر";s:3:"129";s:6:"اثا";s:3:"130";s:5:"د Ø®";s:3:"131";s:6:"دار";s:3:"132";s:6:"ريخ";s:3:"133";s:6:"شرا";s:3:"134";s:6:"مقا";s:3:"135";s:5:"Ù†Û ";s:3:"136";s:5:"Ù‡ ر";s:3:"137";s:5:"Ù‡ Ù„";s:3:"138";s:6:"ولو";s:3:"139";s:5:"يو ";s:3:"140";s:6:"کوم";s:3:"141";s:5:" دد";s:3:"142";s:5:" لو";s:3:"143";s:5:" Ù…Ø";s:3:"144";s:5:" مر";s:3:"145";s:5:" وو";s:3:"146";s:6:"اتو";s:3:"147";s:6:"اري";s:3:"148";s:6:"الو";s:3:"149";s:6:"اند";s:3:"150";s:6:"خان";s:3:"151";s:5:"د ت";s:3:"152";s:5:"Ø³Û ";s:3:"153";s:5:"لى ";s:3:"154";s:6:"نور";s:3:"155";s:5:"Ùˆ Ù„";s:3:"156";s:5:"ÙŠ Ú†";s:3:"157";s:5:"Ú“ÙŠ ";s:3:"158";s:6:"ښتو";s:3:"159";s:5:"Û Ù„";s:3:"160";s:5:" جو";s:3:"161";s:5:" سي";s:3:"162";s:5:"ام ";s:3:"163";s:6:"بان";s:3:"164";s:6:"تار";s:3:"165";s:5:"تر ";s:3:"166";s:6:"ثار";s:3:"167";s:5:"خو ";s:3:"168";s:5:"دو ";s:3:"169";s:5:"ر Ú©";s:3:"170";s:5:"Ù„ د";s:3:"171";s:6:"مون";s:3:"172";s:6:"ندÛ";s:3:"173";s:5:"Ùˆ Ù†";s:3:"174";s:5:"ول ";s:3:"175";s:5:"وه ";s:3:"176";s:5:"Ù‰ Ùˆ";s:3:"177";s:5:"ÙŠ د";s:3:"178";s:5:"Û Ø§";s:3:"179";s:5:"Û Øª";s:3:"180";s:5:"Û ÙŠ";s:3:"181";s:5:" ØÚ©";s:3:"182";s:5:" خب";s:3:"183";s:5:" نه";s:3:"184";s:5:" پو";s:3:"185";s:5:"ا د";s:3:"186";s:5:"ØªÛ ";s:3:"187";s:6:"جوړ";s:3:"188";s:6:"ØÚ©Ù…";s:3:"189";s:6:"ØÚ©Ùˆ";s:3:"190";s:6:"خبر";s:3:"191";s:6:"دان";s:3:"192";s:5:"ر د";s:3:"193";s:5:"غه ";s:3:"194";s:6:"قاÙ";s:3:"195";s:6:"Ù…ØÚ©";s:3:"196";s:6:"وال";s:3:"197";s:6:"ومت";s:3:"198";s:6:"ويل";s:3:"199";s:5:"Ù‰ د";s:3:"200";s:5:"Ù‰ Ù…";s:3:"201";s:6:"يره";s:3:"202";s:5:"پر ";s:3:"203";s:6:"کول";s:3:"204";s:5:"Û Ù‡";s:3:"205";s:5:" تي";s:3:"206";s:5:" خا";s:3:"207";s:5:" ÙˆÚ©";s:3:"208";s:5:" يا";s:3:"209";s:5:" Úا";s:3:"210";s:5:"ؤ Ù‚";s:3:"211";s:6:"انÛ";s:3:"212";s:5:"بى ";s:3:"213";s:5:"غو ";s:3:"214";s:5:"Ù‡ Ø®";s:3:"215";s:5:"Ùˆ ب";s:3:"216";s:6:"ودا";s:3:"217";s:6:"يدو";s:3:"218";s:5:"Ú“Û ";s:3:"219";s:6:"کال";s:3:"220";s:5:" بر";s:3:"221";s:5:" قد";s:3:"222";s:5:" مي";s:3:"223";s:5:" وي";s:3:"224";s:5:" کر";s:3:"225";s:5:"ؤ Ù…";s:3:"226";s:5:"ات ";s:3:"227";s:6:"ايي";s:3:"228";s:5:"تى ";s:3:"229";s:6:"تيا";s:3:"230";s:6:"تير";s:3:"231";s:6:"خوا";s:3:"232";s:6:"دغو";s:3:"233";s:5:"دم ";s:3:"234";s:6:"ديم";s:3:"235";s:5:"ر Ùˆ";s:3:"236";s:6:"قدي";s:3:"237";s:5:"Ù… Ø®";s:3:"238";s:6:"مان";s:3:"239";s:5:"Ù…Û ";s:3:"240";s:6:"نيو";s:3:"241";s:5:"Ù†Ú– ";s:3:"242";s:5:"Ù‡ ÙŠ";s:3:"243";s:5:"Ùˆ س";s:3:"244";s:5:"Ùˆ Ú†";s:3:"245";s:6:"وان";s:3:"246";s:6:"ورو";s:3:"247";s:6:"ونږ";s:3:"248";s:6:"پور";s:3:"249";s:5:"Ú“Ù‡ ";s:3:"250";s:5:"Ú“Ùˆ ";s:3:"251";s:5:"Û Ø¯";s:3:"252";s:5:"Û Ù†";s:3:"253";s:5:" اه";s:3:"254";s:5:" زي";s:3:"255";s:5:" سو";s:3:"256";s:5:" شي";s:3:"257";s:5:" هر";s:3:"258";s:5:" هغ";s:3:"259";s:5:" ښا";s:3:"260";s:6:"اتل";s:3:"261";s:5:"اق ";s:3:"262";s:6:"اني";s:3:"263";s:6:"بري";s:3:"264";s:5:"Ø¨Û ";s:3:"265";s:5:"ت ا";s:3:"266";s:5:"د ب";s:3:"267";s:5:"د س";s:3:"268";s:5:"ر Ù…";s:3:"269";s:5:"رى ";s:3:"270";s:6:"عرا";s:3:"271";s:6:"لان";s:3:"272";s:5:"مى ";s:3:"273";s:5:"نى ";s:3:"274";s:5:"Ùˆ Ø®";s:3:"275";s:5:"وئ ";s:3:"276";s:6:"ورک";s:3:"277";s:6:"ورÛ";s:3:"278";s:5:"ون ";s:3:"279";s:6:"ÙˆÚ©Ú“";s:3:"280";s:5:"Ù‰ Ú†";s:3:"281";s:6:"يمه";s:3:"282";s:5:"ÙŠÛ ";s:3:"283";s:6:"ښتن";s:3:"284";s:5:"Ú©Ù‡ ";s:3:"285";s:6:"Ú©Ú“ÙŠ";s:3:"286";s:5:"Û Ø®";s:3:"287";s:5:"Û’ Ø´";s:3:"288";s:5:" تØ";s:3:"289";s:5:" تو";s:3:"290";s:5:" در";s:3:"291";s:5:" دپ";s:3:"292";s:5:" صو";s:3:"293";s:5:" عر";s:3:"294";s:5:" ول";s:3:"295";s:5:" يؤ";s:3:"296";s:5:" Ù¾Û€";s:3:"297";s:5:" Ú…Ùˆ";s:3:"298";s:5:"ا ا";s:3:"299";}s:6:"pidgin";a:300:{s:3:" de";s:1:"0";s:3:" we";s:1:"1";s:3:" di";s:1:"2";s:3:"di ";s:1:"3";s:3:"dem";s:1:"4";s:3:"em ";s:1:"5";s:3:"ay ";s:1:"6";s:3:" sa";s:1:"7";s:3:"or ";s:1:"8";s:3:"say";s:1:"9";s:3:"ke ";s:2:"10";s:3:"ey ";s:2:"11";s:3:" an";s:2:"12";s:3:" go";s:2:"13";s:3:" e ";s:2:"14";s:3:" to";s:2:"15";s:3:" ma";s:2:"16";s:3:"e d";s:2:"17";s:3:"wey";s:2:"18";s:3:"for";s:2:"19";s:3:"nd ";s:2:"20";s:3:"to ";s:2:"21";s:3:" be";s:2:"22";s:3:" fo";s:2:"23";s:3:"ake";s:2:"24";s:3:"im ";s:2:"25";s:3:" pe";s:2:"26";s:3:"le ";s:2:"27";s:3:"go ";s:2:"28";s:3:"ll ";s:2:"29";s:3:"de ";s:2:"30";s:3:"e s";s:2:"31";s:3:"on ";s:2:"32";s:3:"get";s:2:"33";s:3:"ght";s:2:"34";s:3:"igh";s:2:"35";s:3:" ri";s:2:"36";s:3:"et ";s:2:"37";s:3:"rig";s:2:"38";s:3:" ge";s:2:"39";s:3:"y d";s:2:"40";s:3:" na";s:2:"41";s:3:"mak";s:2:"42";s:3:"t t";s:2:"43";s:3:" no";s:2:"44";s:3:"and";s:2:"45";s:3:"tin";s:2:"46";s:3:"ing";s:2:"47";s:3:"eve";s:2:"48";s:3:"ri ";s:2:"49";s:3:" im";s:2:"50";s:3:" am";s:2:"51";s:3:" or";s:2:"52";s:3:"am ";s:2:"53";s:3:"be ";s:2:"54";s:3:" ev";s:2:"55";s:3:" ta";s:2:"56";s:3:"ht ";s:2:"57";s:3:"e w";s:2:"58";s:3:" li";s:2:"59";s:3:"eri";s:2:"60";s:3:"ng ";s:2:"61";s:3:"ver";s:2:"62";s:3:"all";s:2:"63";s:3:"e f";s:2:"64";s:3:"ers";s:2:"65";s:3:"ntr";s:2:"66";s:3:"ont";s:2:"67";s:3:" do";s:2:"68";s:3:"r d";s:2:"69";s:3:" ko";s:2:"70";s:3:" ti";s:2:"71";s:3:"an ";s:2:"72";s:3:"kon";s:2:"73";s:3:"per";s:2:"74";s:3:"tri";s:2:"75";s:3:"y e";s:2:"76";s:3:"rso";s:2:"77";s:3:"son";s:2:"78";s:3:"no ";s:2:"79";s:3:"ome";s:2:"80";s:3:"is ";s:2:"81";s:3:"do ";s:2:"82";s:3:"ne ";s:2:"83";s:3:"one";s:2:"84";s:3:"ion";s:2:"85";s:3:"m g";s:2:"86";s:3:"i k";s:2:"87";s:3:" al";s:2:"88";s:3:"bod";s:2:"89";s:3:"i w";s:2:"90";s:3:"odi";s:2:"91";s:3:" so";s:2:"92";s:3:" wo";s:2:"93";s:3:"o d";s:2:"94";s:3:"st ";s:2:"95";s:3:"t r";s:2:"96";s:3:" of";s:2:"97";s:3:"aim";s:2:"98";s:3:"e g";s:2:"99";s:3:"nai";s:3:"100";s:3:" co";s:3:"101";s:3:"dis";s:3:"102";s:3:"me ";s:3:"103";s:3:"of ";s:3:"104";s:3:" wa";s:3:"105";s:3:"e t";s:3:"106";s:3:" ar";s:3:"107";s:3:"e l";s:3:"108";s:3:"ike";s:3:"109";s:3:"lik";s:3:"110";s:3:"t a";s:3:"111";s:3:"wor";s:3:"112";s:3:"alk";s:3:"113";s:3:"ell";s:3:"114";s:3:"eop";s:3:"115";s:3:"lk ";s:3:"116";s:3:"opl";s:3:"117";s:3:"peo";s:3:"118";s:3:"ple";s:3:"119";s:3:"re ";s:3:"120";s:3:"tal";s:3:"121";s:3:"any";s:3:"122";s:3:"e a";s:3:"123";s:3:"o g";s:3:"124";s:3:"art";s:3:"125";s:3:"cle";s:3:"126";s:3:"i p";s:3:"127";s:3:"icl";s:3:"128";s:3:"rti";s:3:"129";s:3:"the";s:3:"130";s:3:"tic";s:3:"131";s:3:"we ";s:3:"132";s:3:"f d";s:3:"133";s:3:"in ";s:3:"134";s:3:" mu";s:3:"135";s:3:"e n";s:3:"136";s:3:"e o";s:3:"137";s:3:"mus";s:3:"138";s:3:"n d";s:3:"139";s:3:"na ";s:3:"140";s:3:"o m";s:3:"141";s:3:"ust";s:3:"142";s:3:"wel";s:3:"143";s:3:"e e";s:3:"144";s:3:"her";s:3:"145";s:3:"m d";s:3:"146";s:3:"nt ";s:3:"147";s:3:" fi";s:3:"148";s:3:"at ";s:3:"149";s:3:"e b";s:3:"150";s:3:"it ";s:3:"151";s:3:"m w";s:3:"152";s:3:"o t";s:3:"153";s:3:"wan";s:3:"154";s:3:"com";s:3:"155";s:3:"da ";s:3:"156";s:3:"fit";s:3:"157";s:3:"m b";s:3:"158";s:3:"so ";s:3:"159";s:3:" fr";s:3:"160";s:3:"ce ";s:3:"161";s:3:"er ";s:3:"162";s:3:"o a";s:3:"163";s:3:" if";s:3:"164";s:3:" on";s:3:"165";s:3:"ent";s:3:"166";s:3:"if ";s:3:"167";s:3:"ind";s:3:"168";s:3:"kin";s:3:"169";s:3:"l d";s:3:"170";s:3:"man";s:3:"171";s:3:"o s";s:3:"172";s:3:" se";s:3:"173";s:3:"y a";s:3:"174";s:3:"y m";s:3:"175";s:3:" re";s:3:"176";s:3:"ee ";s:3:"177";s:3:"k a";s:3:"178";s:3:"t s";s:3:"179";s:3:"ve ";s:3:"180";s:3:"y w";s:3:"181";s:3:" ki";s:3:"182";s:3:"eti";s:3:"183";s:3:"men";s:3:"184";s:3:"ta ";s:3:"185";s:3:"y n";s:3:"186";s:3:"d t";s:3:"187";s:3:"dey";s:3:"188";s:3:"e c";s:3:"189";s:3:"i o";s:3:"190";s:3:"ibo";s:3:"191";s:3:"ld ";s:3:"192";s:3:"m t";s:3:"193";s:3:"n b";s:3:"194";s:3:"o b";s:3:"195";s:3:"ow ";s:3:"196";s:3:"ree";s:3:"197";s:3:"rio";s:3:"198";s:3:"t d";s:3:"199";s:3:" hu";s:3:"200";s:3:" su";s:3:"201";s:3:"en ";s:3:"202";s:3:"hts";s:3:"203";s:3:"ive";s:3:"204";s:3:"m n";s:3:"205";s:3:"n g";s:3:"206";s:3:"ny ";s:3:"207";s:3:"oth";s:3:"208";s:3:"ts ";s:3:"209";s:3:" as";s:3:"210";s:3:" wh";s:3:"211";s:3:"as ";s:3:"212";s:3:"gom";s:3:"213";s:3:"hum";s:3:"214";s:3:"k s";s:3:"215";s:3:"oda";s:3:"216";s:3:"ork";s:3:"217";s:3:"se ";s:3:"218";s:3:"uma";s:3:"219";s:3:"ut ";s:3:"220";s:3:" ba";s:3:"221";s:3:" ot";s:3:"222";s:3:"ano";s:3:"223";s:3:"m a";s:3:"224";s:3:"m s";s:3:"225";s:3:"nod";s:3:"226";s:3:"om ";s:3:"227";s:3:"r a";s:3:"228";s:3:"r i";s:3:"229";s:3:"rk ";s:3:"230";s:3:" fa";s:3:"231";s:3:" si";s:3:"232";s:3:" th";s:3:"233";s:3:"ad ";s:3:"234";s:3:"e m";s:3:"235";s:3:"eac";s:3:"236";s:3:"m m";s:3:"237";s:3:"n w";s:3:"238";s:3:"nob";s:3:"239";s:3:"orl";s:3:"240";s:3:"out";s:3:"241";s:3:"own";s:3:"242";s:3:"r s";s:3:"243";s:3:"r w";s:3:"244";s:3:"rib";s:3:"245";s:3:"rld";s:3:"246";s:3:"s w";s:3:"247";s:3:"ure";s:3:"248";s:3:"wn ";s:3:"249";s:3:" ow";s:3:"250";s:3:"a d";s:3:"251";s:3:"bad";s:3:"252";s:3:"ch ";s:3:"253";s:3:"fre";s:3:"254";s:3:"gs ";s:3:"255";s:3:"m k";s:3:"256";s:3:"nce";s:3:"257";s:3:"ngs";s:3:"258";s:3:"o f";s:3:"259";s:3:"obo";s:3:"260";s:3:"rea";s:3:"261";s:3:"sur";s:3:"262";s:3:"y o";s:3:"263";s:3:" ab";s:3:"264";s:3:" un";s:3:"265";s:3:"abo";s:3:"266";s:3:"ach";s:3:"267";s:3:"bou";s:3:"268";s:3:"d m";s:3:"269";s:3:"dat";s:3:"270";s:3:"e p";s:3:"271";s:3:"g w";s:3:"272";s:3:"hol";s:3:"273";s:3:"i m";s:3:"274";s:3:"i r";s:3:"275";s:3:"m f";s:3:"276";s:3:"m o";s:3:"277";s:3:"n o";s:3:"278";s:3:"now";s:3:"279";s:3:"ry ";s:3:"280";s:3:"s a";s:3:"281";s:3:"t o";s:3:"282";s:3:"tay";s:3:"283";s:3:"wet";s:3:"284";s:3:" ag";s:3:"285";s:3:" bo";s:3:"286";s:3:" da";s:3:"287";s:3:" pr";s:3:"288";s:3:"arr";s:3:"289";s:3:"ati";s:3:"290";s:3:"d d";s:3:"291";s:3:"d p";s:3:"292";s:3:"i g";s:3:"293";s:3:"i t";s:3:"294";s:3:"liv";s:3:"295";s:3:"ly ";s:3:"296";s:3:"n a";s:3:"297";s:3:"od ";s:3:"298";s:3:"ok ";s:3:"299";}s:6:"polish";a:300:{s:3:"ie ";s:1:"0";s:3:"nie";s:1:"1";s:3:"em ";s:1:"2";s:3:" ni";s:1:"3";s:3:" po";s:1:"4";s:3:" pr";s:1:"5";s:3:"dzi";s:1:"6";s:3:" na";s:1:"7";s:4:"że ";s:1:"8";s:3:"rze";s:1:"9";s:3:"na ";s:2:"10";s:4:"Å‚em";s:2:"11";s:3:"wie";s:2:"12";s:3:" w ";s:2:"13";s:4:" że";s:2:"14";s:3:"go ";s:2:"15";s:3:" by";s:2:"16";s:3:"prz";s:2:"17";s:3:"owa";s:2:"18";s:4:"iÄ™ ";s:2:"19";s:3:" do";s:2:"20";s:3:" si";s:2:"21";s:3:"owi";s:2:"22";s:3:" pa";s:2:"23";s:3:" za";s:2:"24";s:3:"ch ";s:2:"25";s:3:"ego";s:2:"26";s:4:"aÅ‚ ";s:2:"27";s:4:"siÄ™";s:2:"28";s:3:"ej ";s:2:"29";s:4:"waÅ‚";s:2:"30";s:3:"ym ";s:2:"31";s:3:"ani";s:2:"32";s:4:"aÅ‚e";s:2:"33";s:3:"to ";s:2:"34";s:3:" i ";s:2:"35";s:3:" to";s:2:"36";s:3:" te";s:2:"37";s:3:"e p";s:2:"38";s:3:" je";s:2:"39";s:3:" z ";s:2:"40";s:3:"czy";s:2:"41";s:4:"byÅ‚";s:2:"42";s:3:"pan";s:2:"43";s:3:"sta";s:2:"44";s:3:"kie";s:2:"45";s:3:" ja";s:2:"46";s:3:"do ";s:2:"47";s:3:" ch";s:2:"48";s:3:" cz";s:2:"49";s:3:" wi";s:2:"50";s:4:"iaÅ‚";s:2:"51";s:3:"a p";s:2:"52";s:3:"pow";s:2:"53";s:3:" mi";s:2:"54";s:3:"li ";s:2:"55";s:3:"eni";s:2:"56";s:3:"zie";s:2:"57";s:3:" ta";s:2:"58";s:3:" wa";s:2:"59";s:4:"Å‚o ";s:2:"60";s:4:"ać ";s:2:"61";s:3:"dy ";s:2:"62";s:3:"ak ";s:2:"63";s:3:"e w";s:2:"64";s:3:" a ";s:2:"65";s:3:" od";s:2:"66";s:3:" st";s:2:"67";s:3:"nia";s:2:"68";s:3:"rzy";s:2:"69";s:3:"ied";s:2:"70";s:3:" kt";s:2:"71";s:3:"odz";s:2:"72";s:3:"cie";s:2:"73";s:3:"cze";s:2:"74";s:3:"ia ";s:2:"75";s:3:"iel";s:2:"76";s:4:"któ";s:2:"77";s:3:"o p";s:2:"78";s:4:"tór";s:2:"79";s:4:"Å›ci";s:2:"80";s:3:" sp";s:2:"81";s:3:" wy";s:2:"82";s:3:"jak";s:2:"83";s:3:"tak";s:2:"84";s:3:"zy ";s:2:"85";s:3:" mo";s:2:"86";s:5:"aÅ‚Ä™";s:2:"87";s:3:"pro";s:2:"88";s:3:"ski";s:2:"89";s:3:"tem";s:2:"90";s:5:"Å‚Ä™s";s:2:"91";s:3:" tr";s:2:"92";s:3:"e m";s:2:"93";s:3:"jes";s:2:"94";s:3:"my ";s:2:"95";s:3:" ro";s:2:"96";s:3:"edz";s:2:"97";s:3:"eli";s:2:"98";s:3:"iej";s:2:"99";s:3:" rz";s:3:"100";s:3:"a n";s:3:"101";s:3:"ale";s:3:"102";s:3:"an ";s:3:"103";s:3:"e s";s:3:"104";s:3:"est";s:3:"105";s:3:"le ";s:3:"106";s:3:"o s";s:3:"107";s:3:"i p";s:3:"108";s:3:"ki ";s:3:"109";s:3:" co";s:3:"110";s:3:"ada";s:3:"111";s:3:"czn";s:3:"112";s:3:"e t";s:3:"113";s:3:"e z";s:3:"114";s:3:"ent";s:3:"115";s:3:"ny ";s:3:"116";s:3:"pre";s:3:"117";s:4:"rzÄ…";s:3:"118";s:3:"y s";s:3:"119";s:3:" ko";s:3:"120";s:3:" o ";s:3:"121";s:3:"ach";s:3:"122";s:3:"am ";s:3:"123";s:3:"e n";s:3:"124";s:3:"o t";s:3:"125";s:3:"oli";s:3:"126";s:3:"pod";s:3:"127";s:3:"zia";s:3:"128";s:3:" go";s:3:"129";s:3:" ka";s:3:"130";s:3:"by ";s:3:"131";s:3:"ieg";s:3:"132";s:3:"ier";s:3:"133";s:4:"noÅ›";s:3:"134";s:3:"roz";s:3:"135";s:3:"spo";s:3:"136";s:3:"ych";s:3:"137";s:4:"zÄ…d";s:3:"138";s:3:" mn";s:3:"139";s:3:"acz";s:3:"140";s:3:"adz";s:3:"141";s:3:"bie";s:3:"142";s:3:"cho";s:3:"143";s:3:"mni";s:3:"144";s:3:"o n";s:3:"145";s:3:"ost";s:3:"146";s:3:"pra";s:3:"147";s:3:"ze ";s:3:"148";s:4:"Å‚a ";s:3:"149";s:3:" so";s:3:"150";s:3:"a m";s:3:"151";s:3:"cza";s:3:"152";s:3:"iem";s:3:"153";s:4:"ić ";s:3:"154";s:3:"obi";s:3:"155";s:4:"yÅ‚ ";s:3:"156";s:4:"yÅ‚o";s:3:"157";s:3:" mu";s:3:"158";s:4:" mó";s:3:"159";s:3:"a t";s:3:"160";s:3:"acj";s:3:"161";s:3:"ci ";s:3:"162";s:3:"e b";s:3:"163";s:3:"ich";s:3:"164";s:3:"kan";s:3:"165";s:3:"mi ";s:3:"166";s:3:"mie";s:3:"167";s:4:"oÅ›c";s:3:"168";s:3:"row";s:3:"169";s:3:"zen";s:3:"170";s:3:"zyd";s:3:"171";s:3:" al";s:3:"172";s:3:" re";s:3:"173";s:3:"a w";s:3:"174";s:3:"den";s:3:"175";s:3:"edy";s:3:"176";s:4:"iÅ‚ ";s:3:"177";s:3:"ko ";s:3:"178";s:3:"o w";s:3:"179";s:3:"rac";s:3:"180";s:4:"Å›my";s:3:"181";s:3:" ma";s:3:"182";s:3:" ra";s:3:"183";s:3:" sz";s:3:"184";s:3:" ty";s:3:"185";s:3:"e j";s:3:"186";s:3:"isk";s:3:"187";s:3:"ji ";s:3:"188";s:3:"ka ";s:3:"189";s:3:"m s";s:3:"190";s:3:"no ";s:3:"191";s:3:"o z";s:3:"192";s:3:"rez";s:3:"193";s:3:"wa ";s:3:"194";s:4:"ów ";s:3:"195";s:4:"Å‚ow";s:3:"196";s:5:"ść ";s:3:"197";s:3:" ob";s:3:"198";s:3:"ech";s:3:"199";s:3:"ecz";s:3:"200";s:3:"ezy";s:3:"201";s:3:"i w";s:3:"202";s:3:"ja ";s:3:"203";s:3:"kon";s:3:"204";s:4:"mów";s:3:"205";s:3:"ne ";s:3:"206";s:3:"ni ";s:3:"207";s:3:"now";s:3:"208";s:3:"nym";s:3:"209";s:3:"pol";s:3:"210";s:3:"pot";s:3:"211";s:3:"yde";s:3:"212";s:3:" dl";s:3:"213";s:3:" sy";s:3:"214";s:3:"a s";s:3:"215";s:3:"aki";s:3:"216";s:3:"ali";s:3:"217";s:3:"dla";s:3:"218";s:3:"icz";s:3:"219";s:3:"ku ";s:3:"220";s:3:"ocz";s:3:"221";s:3:"st ";s:3:"222";s:3:"str";s:3:"223";s:3:"szy";s:3:"224";s:3:"trz";s:3:"225";s:3:"wia";s:3:"226";s:3:"y p";s:3:"227";s:3:"za ";s:3:"228";s:3:" wt";s:3:"229";s:3:"chc";s:3:"230";s:3:"esz";s:3:"231";s:3:"iec";s:3:"232";s:3:"im ";s:3:"233";s:3:"la ";s:3:"234";s:3:"o m";s:3:"235";s:3:"sa ";s:3:"236";s:4:"wać";s:3:"237";s:3:"y n";s:3:"238";s:3:"zac";s:3:"239";s:3:"zec";s:3:"240";s:3:" gd";s:3:"241";s:3:"a z";s:3:"242";s:3:"ard";s:3:"243";s:3:"co ";s:3:"244";s:3:"dar";s:3:"245";s:3:"e r";s:3:"246";s:3:"ien";s:3:"247";s:3:"m n";s:3:"248";s:3:"m w";s:3:"249";s:3:"mia";s:3:"250";s:4:"moż";s:3:"251";s:3:"raw";s:3:"252";s:3:"rdz";s:3:"253";s:3:"tan";s:3:"254";s:3:"ted";s:3:"255";s:3:"teg";s:3:"256";s:4:"wiÅ‚";s:3:"257";s:3:"wte";s:3:"258";s:3:"y z";s:3:"259";s:3:"zna";s:3:"260";s:4:"zÅ‚o";s:3:"261";s:3:"a r";s:3:"262";s:3:"awi";s:3:"263";s:3:"bar";s:3:"264";s:3:"cji";s:3:"265";s:4:"czÄ…";s:3:"266";s:3:"dow";s:3:"267";s:4:"eż ";s:3:"268";s:3:"gdy";s:3:"269";s:3:"iek";s:3:"270";s:3:"je ";s:3:"271";s:3:"o d";s:3:"272";s:4:"taÅ‚";s:3:"273";s:3:"wal";s:3:"274";s:3:"wsz";s:3:"275";s:3:"zed";s:3:"276";s:4:"ówi";s:3:"277";s:4:"Ä™sa";s:3:"278";s:3:" ba";s:3:"279";s:3:" lu";s:3:"280";s:3:" wo";s:3:"281";s:3:"aln";s:3:"282";s:3:"arn";s:3:"283";s:3:"ba ";s:3:"284";s:3:"dzo";s:3:"285";s:3:"e c";s:3:"286";s:3:"hod";s:3:"287";s:3:"igi";s:3:"288";s:3:"lig";s:3:"289";s:3:"m p";s:3:"290";s:4:"myÅ›";s:3:"291";s:3:"o c";s:3:"292";s:3:"oni";s:3:"293";s:3:"rel";s:3:"294";s:3:"sku";s:3:"295";s:3:"ste";s:3:"296";s:3:"y w";s:3:"297";s:3:"yst";s:3:"298";s:3:"z w";s:3:"299";}s:10:"portuguese";a:300:{s:3:"de ";s:1:"0";s:3:" de";s:1:"1";s:3:"os ";s:1:"2";s:3:"as ";s:1:"3";s:3:"que";s:1:"4";s:3:" co";s:1:"5";s:4:"ão ";s:1:"6";s:3:"o d";s:1:"7";s:3:" qu";s:1:"8";s:3:"ue ";s:1:"9";s:3:" a ";s:2:"10";s:3:"do ";s:2:"11";s:3:"ent";s:2:"12";s:3:" se";s:2:"13";s:3:"a d";s:2:"14";s:3:"s d";s:2:"15";s:3:"e a";s:2:"16";s:3:"es ";s:2:"17";s:3:" pr";s:2:"18";s:3:"ra ";s:2:"19";s:3:"da ";s:2:"20";s:3:" es";s:2:"21";s:3:" pa";s:2:"22";s:3:"to ";s:2:"23";s:3:" o ";s:2:"24";s:3:"em ";s:2:"25";s:3:"con";s:2:"26";s:3:"o p";s:2:"27";s:3:" do";s:2:"28";s:3:"est";s:2:"29";s:3:"nte";s:2:"30";s:5:"ção";s:2:"31";s:3:" da";s:2:"32";s:3:" re";s:2:"33";s:3:"ma ";s:2:"34";s:3:"par";s:2:"35";s:3:" te";s:2:"36";s:3:"ara";s:2:"37";s:3:"ida";s:2:"38";s:3:" e ";s:2:"39";s:3:"ade";s:2:"40";s:3:"is ";s:2:"41";s:3:" um";s:2:"42";s:3:" po";s:2:"43";s:3:"a a";s:2:"44";s:3:"a p";s:2:"45";s:3:"dad";s:2:"46";s:3:"no ";s:2:"47";s:3:"te ";s:2:"48";s:3:" no";s:2:"49";s:5:"açã";s:2:"50";s:3:"pro";s:2:"51";s:3:"al ";s:2:"52";s:3:"com";s:2:"53";s:3:"e d";s:2:"54";s:3:"s a";s:2:"55";s:3:" as";s:2:"56";s:3:"a c";s:2:"57";s:3:"er ";s:2:"58";s:3:"men";s:2:"59";s:3:"s e";s:2:"60";s:3:"ais";s:2:"61";s:3:"nto";s:2:"62";s:3:"res";s:2:"63";s:3:"a s";s:2:"64";s:3:"ado";s:2:"65";s:3:"ist";s:2:"66";s:3:"s p";s:2:"67";s:3:"tem";s:2:"68";s:3:"e c";s:2:"69";s:3:"e s";s:2:"70";s:3:"ia ";s:2:"71";s:3:"o s";s:2:"72";s:3:"o a";s:2:"73";s:3:"o c";s:2:"74";s:3:"e p";s:2:"75";s:3:"sta";s:2:"76";s:3:"ta ";s:2:"77";s:3:"tra";s:2:"78";s:3:"ura";s:2:"79";s:3:" di";s:2:"80";s:3:" pe";s:2:"81";s:3:"ar ";s:2:"82";s:3:"e e";s:2:"83";s:3:"ser";s:2:"84";s:3:"uma";s:2:"85";s:3:"mos";s:2:"86";s:3:"se ";s:2:"87";s:3:" ca";s:2:"88";s:3:"o e";s:2:"89";s:3:" na";s:2:"90";s:3:"a e";s:2:"91";s:3:"des";s:2:"92";s:3:"ont";s:2:"93";s:3:"por";s:2:"94";s:3:" in";s:2:"95";s:3:" ma";s:2:"96";s:3:"ect";s:2:"97";s:3:"o q";s:2:"98";s:3:"ria";s:2:"99";s:3:"s c";s:3:"100";s:3:"ste";s:3:"101";s:3:"ver";s:3:"102";s:3:"cia";s:3:"103";s:3:"dos";s:3:"104";s:3:"ica";s:3:"105";s:3:"str";s:3:"106";s:3:" ao";s:3:"107";s:3:" em";s:3:"108";s:3:"das";s:3:"109";s:3:"e t";s:3:"110";s:3:"ito";s:3:"111";s:3:"iza";s:3:"112";s:3:"pre";s:3:"113";s:3:"tos";s:3:"114";s:4:" nã";s:3:"115";s:3:"ada";s:3:"116";s:4:"não";s:3:"117";s:3:"ess";s:3:"118";s:3:"eve";s:3:"119";s:3:"or ";s:3:"120";s:3:"ran";s:3:"121";s:3:"s n";s:3:"122";s:3:"s t";s:3:"123";s:3:"tur";s:3:"124";s:3:" ac";s:3:"125";s:3:" fa";s:3:"126";s:3:"a r";s:3:"127";s:3:"ens";s:3:"128";s:3:"eri";s:3:"129";s:3:"na ";s:3:"130";s:3:"sso";s:3:"131";s:3:" si";s:3:"132";s:4:" é ";s:3:"133";s:3:"bra";s:3:"134";s:3:"esp";s:3:"135";s:3:"mo ";s:3:"136";s:3:"nos";s:3:"137";s:3:"ro ";s:3:"138";s:3:"um ";s:3:"139";s:3:"a n";s:3:"140";s:3:"ao ";s:3:"141";s:3:"ico";s:3:"142";s:3:"liz";s:3:"143";s:3:"min";s:3:"144";s:3:"o n";s:3:"145";s:3:"ons";s:3:"146";s:3:"pri";s:3:"147";s:3:"ten";s:3:"148";s:3:"tic";s:3:"149";s:4:"ões";s:3:"150";s:3:" tr";s:3:"151";s:3:"a m";s:3:"152";s:3:"aga";s:3:"153";s:3:"e n";s:3:"154";s:3:"ili";s:3:"155";s:3:"ime";s:3:"156";s:3:"m a";s:3:"157";s:3:"nci";s:3:"158";s:3:"nha";s:3:"159";s:3:"nta";s:3:"160";s:3:"spe";s:3:"161";s:3:"tiv";s:3:"162";s:3:"am ";s:3:"163";s:3:"ano";s:3:"164";s:3:"arc";s:3:"165";s:3:"ass";s:3:"166";s:3:"cer";s:3:"167";s:3:"e o";s:3:"168";s:3:"ece";s:3:"169";s:3:"emo";s:3:"170";s:3:"ga ";s:3:"171";s:3:"o m";s:3:"172";s:3:"rag";s:3:"173";s:3:"so ";s:3:"174";s:4:"são";s:3:"175";s:3:" au";s:3:"176";s:3:" os";s:3:"177";s:3:" sa";s:3:"178";s:3:"ali";s:3:"179";s:3:"ca ";s:3:"180";s:3:"ema";s:3:"181";s:3:"emp";s:3:"182";s:3:"ici";s:3:"183";s:3:"ido";s:3:"184";s:3:"inh";s:3:"185";s:3:"iss";s:3:"186";s:3:"l d";s:3:"187";s:3:"la ";s:3:"188";s:3:"lic";s:3:"189";s:3:"m c";s:3:"190";s:3:"mai";s:3:"191";s:3:"onc";s:3:"192";s:3:"pec";s:3:"193";s:3:"ram";s:3:"194";s:3:"s q";s:3:"195";s:3:" ci";s:3:"196";s:3:" en";s:3:"197";s:3:" fo";s:3:"198";s:3:"a o";s:3:"199";s:3:"ame";s:3:"200";s:3:"car";s:3:"201";s:3:"co ";s:3:"202";s:3:"der";s:3:"203";s:3:"eir";s:3:"204";s:3:"ho ";s:3:"205";s:3:"io ";s:3:"206";s:3:"om ";s:3:"207";s:3:"ora";s:3:"208";s:3:"r a";s:3:"209";s:3:"sen";s:3:"210";s:3:"ter";s:3:"211";s:3:" br";s:3:"212";s:3:" ex";s:3:"213";s:3:"a u";s:3:"214";s:3:"cul";s:3:"215";s:3:"dev";s:3:"216";s:3:"e u";s:3:"217";s:3:"ha ";s:3:"218";s:3:"mpr";s:3:"219";s:3:"nce";s:3:"220";s:3:"oca";s:3:"221";s:3:"ove";s:3:"222";s:3:"rio";s:3:"223";s:3:"s o";s:3:"224";s:3:"sa ";s:3:"225";s:3:"sem";s:3:"226";s:3:"tes";s:3:"227";s:3:"uni";s:3:"228";s:3:"ven";s:3:"229";s:4:"zaç";s:3:"230";s:5:"çõe";s:3:"231";s:3:" ad";s:3:"232";s:3:" al";s:3:"233";s:3:" an";s:3:"234";s:3:" mi";s:3:"235";s:3:" mo";s:3:"236";s:3:" ve";s:3:"237";s:4:" à ";s:3:"238";s:3:"a i";s:3:"239";s:3:"a q";s:3:"240";s:3:"ala";s:3:"241";s:3:"amo";s:3:"242";s:3:"bli";s:3:"243";s:3:"cen";s:3:"244";s:3:"col";s:3:"245";s:3:"cos";s:3:"246";s:3:"cto";s:3:"247";s:3:"e m";s:3:"248";s:3:"e v";s:3:"249";s:3:"ede";s:3:"250";s:4:"gás";s:3:"251";s:3:"ias";s:3:"252";s:3:"ita";s:3:"253";s:3:"iva";s:3:"254";s:3:"ndo";s:3:"255";s:3:"o t";s:3:"256";s:3:"ore";s:3:"257";s:3:"r d";s:3:"258";s:3:"ral";s:3:"259";s:3:"rea";s:3:"260";s:3:"s f";s:3:"261";s:3:"sid";s:3:"262";s:3:"tro";s:3:"263";s:3:"vel";s:3:"264";s:3:"vid";s:3:"265";s:4:"ás ";s:3:"266";s:3:" ap";s:3:"267";s:3:" ar";s:3:"268";s:3:" ce";s:3:"269";s:3:" ou";s:3:"270";s:4:" pú";s:3:"271";s:3:" so";s:3:"272";s:3:" vi";s:3:"273";s:3:"a f";s:3:"274";s:3:"act";s:3:"275";s:3:"arr";s:3:"276";s:3:"bil";s:3:"277";s:3:"cam";s:3:"278";s:3:"e f";s:3:"279";s:3:"e i";s:3:"280";s:3:"el ";s:3:"281";s:3:"for";s:3:"282";s:3:"lem";s:3:"283";s:3:"lid";s:3:"284";s:3:"lo ";s:3:"285";s:3:"m d";s:3:"286";s:3:"mar";s:3:"287";s:3:"nde";s:3:"288";s:3:"o o";s:3:"289";s:3:"omo";s:3:"290";s:3:"ort";s:3:"291";s:3:"per";s:3:"292";s:4:"púb";s:3:"293";s:3:"r u";s:3:"294";s:3:"rei";s:3:"295";s:3:"rem";s:3:"296";s:3:"ros";s:3:"297";s:3:"rre";s:3:"298";s:3:"ssi";s:3:"299";}s:8:"romanian";a:300:{s:3:" de";s:1:"0";s:4:" în";s:1:"1";s:3:"de ";s:1:"2";s:3:" a ";s:1:"3";s:3:"ul ";s:1:"4";s:3:" co";s:1:"5";s:4:"în ";s:1:"6";s:3:"re ";s:1:"7";s:3:"e d";s:1:"8";s:3:"ea ";s:1:"9";s:3:" di";s:2:"10";s:3:" pr";s:2:"11";s:3:"le ";s:2:"12";s:4:"ÅŸi ";s:2:"13";s:3:"are";s:2:"14";s:3:"at ";s:2:"15";s:3:"con";s:2:"16";s:3:"ui ";s:2:"17";s:4:" ÅŸi";s:2:"18";s:3:"i d";s:2:"19";s:3:"ii ";s:2:"20";s:3:" cu";s:2:"21";s:3:"e a";s:2:"22";s:3:"lui";s:2:"23";s:3:"ern";s:2:"24";s:3:"te ";s:2:"25";s:3:"cu ";s:2:"26";s:3:" la";s:2:"27";s:3:"a c";s:2:"28";s:4:"că ";s:2:"29";s:3:"din";s:2:"30";s:3:"e c";s:2:"31";s:3:"or ";s:2:"32";s:3:"ulu";s:2:"33";s:3:"ne ";s:2:"34";s:3:"ter";s:2:"35";s:3:"la ";s:2:"36";s:4:"să ";s:2:"37";s:3:"tat";s:2:"38";s:3:"tre";s:2:"39";s:3:" ac";s:2:"40";s:4:" să";s:2:"41";s:3:"est";s:2:"42";s:3:"st ";s:2:"43";s:4:"tă ";s:2:"44";s:3:" ca";s:2:"45";s:3:" ma";s:2:"46";s:3:" pe";s:2:"47";s:3:"cur";s:2:"48";s:3:"ist";s:2:"49";s:4:"mân";s:2:"50";s:3:"a d";s:2:"51";s:3:"i c";s:2:"52";s:3:"nat";s:2:"53";s:3:" ce";s:2:"54";s:3:"i a";s:2:"55";s:3:"ia ";s:2:"56";s:3:"in ";s:2:"57";s:3:"scu";s:2:"58";s:3:" mi";s:2:"59";s:3:"ato";s:2:"60";s:4:"aÅ£i";s:2:"61";s:3:"ie ";s:2:"62";s:3:" re";s:2:"63";s:3:" se";s:2:"64";s:3:"a a";s:2:"65";s:3:"int";s:2:"66";s:3:"ntr";s:2:"67";s:3:"tru";s:2:"68";s:3:"uri";s:2:"69";s:4:"ă a";s:2:"70";s:3:" fo";s:2:"71";s:3:" pa";s:2:"72";s:3:"ate";s:2:"73";s:3:"ini";s:2:"74";s:3:"tul";s:2:"75";s:3:"ent";s:2:"76";s:3:"min";s:2:"77";s:3:"pre";s:2:"78";s:3:"pro";s:2:"79";s:3:"a p";s:2:"80";s:3:"e p";s:2:"81";s:3:"e s";s:2:"82";s:3:"ei ";s:2:"83";s:4:"nă ";s:2:"84";s:3:"par";s:2:"85";s:3:"rna";s:2:"86";s:3:"rul";s:2:"87";s:3:"tor";s:2:"88";s:3:" in";s:2:"89";s:3:" ro";s:2:"90";s:3:" tr";s:2:"91";s:3:" un";s:2:"92";s:3:"al ";s:2:"93";s:3:"ale";s:2:"94";s:3:"art";s:2:"95";s:3:"ce ";s:2:"96";s:3:"e e";s:2:"97";s:4:"e î";s:2:"98";s:3:"fos";s:2:"99";s:3:"ita";s:3:"100";s:3:"nte";s:3:"101";s:4:"omâ";s:3:"102";s:3:"ost";s:3:"103";s:3:"rom";s:3:"104";s:3:"ru ";s:3:"105";s:3:"str";s:3:"106";s:3:"ver";s:3:"107";s:3:" ex";s:3:"108";s:3:" na";s:3:"109";s:3:"a f";s:3:"110";s:3:"lor";s:3:"111";s:3:"nis";s:3:"112";s:3:"rea";s:3:"113";s:3:"rit";s:3:"114";s:3:" al";s:3:"115";s:3:" eu";s:3:"116";s:3:" no";s:3:"117";s:3:"ace";s:3:"118";s:3:"cer";s:3:"119";s:3:"ile";s:3:"120";s:3:"nal";s:3:"121";s:3:"pri";s:3:"122";s:3:"ri ";s:3:"123";s:3:"sta";s:3:"124";s:3:"ste";s:3:"125";s:4:"Å£ie";s:3:"126";s:3:" au";s:3:"127";s:3:" da";s:3:"128";s:3:" ju";s:3:"129";s:3:" po";s:3:"130";s:3:"ar ";s:3:"131";s:3:"au ";s:3:"132";s:3:"ele";s:3:"133";s:3:"ere";s:3:"134";s:3:"eri";s:3:"135";s:3:"ina";s:3:"136";s:3:"n a";s:3:"137";s:3:"n c";s:3:"138";s:3:"res";s:3:"139";s:3:"se ";s:3:"140";s:3:"t a";s:3:"141";s:3:"tea";s:3:"142";s:4:" că";s:3:"143";s:3:" do";s:3:"144";s:3:" fi";s:3:"145";s:3:"a s";s:3:"146";s:4:"ată";s:3:"147";s:3:"com";s:3:"148";s:4:"e ÅŸ";s:3:"149";s:3:"eur";s:3:"150";s:3:"guv";s:3:"151";s:3:"i s";s:3:"152";s:3:"ice";s:3:"153";s:3:"ili";s:3:"154";s:3:"na ";s:3:"155";s:3:"rec";s:3:"156";s:3:"rep";s:3:"157";s:3:"ril";s:3:"158";s:3:"rne";s:3:"159";s:3:"rti";s:3:"160";s:3:"uro";s:3:"161";s:3:"uve";s:3:"162";s:4:"ă p";s:3:"163";s:3:" ar";s:3:"164";s:3:" o ";s:3:"165";s:3:" su";s:3:"166";s:3:" vi";s:3:"167";s:3:"dec";s:3:"168";s:3:"dre";s:3:"169";s:3:"oar";s:3:"170";s:3:"ons";s:3:"171";s:3:"pe ";s:3:"172";s:3:"rii";s:3:"173";s:3:" ad";s:3:"174";s:3:" ge";s:3:"175";s:3:"a m";s:3:"176";s:3:"a r";s:3:"177";s:3:"ain";s:3:"178";s:3:"ali";s:3:"179";s:3:"car";s:3:"180";s:3:"cat";s:3:"181";s:3:"ecu";s:3:"182";s:3:"ene";s:3:"183";s:3:"ept";s:3:"184";s:3:"ext";s:3:"185";s:3:"ilo";s:3:"186";s:3:"iu ";s:3:"187";s:3:"n p";s:3:"188";s:3:"ori";s:3:"189";s:3:"sec";s:3:"190";s:3:"u p";s:3:"191";s:3:"une";s:3:"192";s:4:"ă c";s:3:"193";s:4:"ÅŸti";s:3:"194";s:4:"Å£ia";s:3:"195";s:3:" ch";s:3:"196";s:3:" gu";s:3:"197";s:3:"ai ";s:3:"198";s:3:"ani";s:3:"199";s:3:"cea";s:3:"200";s:3:"e f";s:3:"201";s:3:"isc";s:3:"202";s:3:"l a";s:3:"203";s:3:"lic";s:3:"204";s:3:"liu";s:3:"205";s:3:"mar";s:3:"206";s:3:"nic";s:3:"207";s:3:"nt ";s:3:"208";s:3:"nul";s:3:"209";s:3:"ris";s:3:"210";s:3:"t c";s:3:"211";s:3:"t p";s:3:"212";s:3:"tic";s:3:"213";s:3:"tid";s:3:"214";s:3:"u a";s:3:"215";s:3:"ucr";s:3:"216";s:3:" as";s:3:"217";s:3:" dr";s:3:"218";s:3:" fa";s:3:"219";s:3:" nu";s:3:"220";s:3:" pu";s:3:"221";s:3:" to";s:3:"222";s:3:"cra";s:3:"223";s:3:"dis";s:3:"224";s:4:"enÅ£";s:3:"225";s:3:"esc";s:3:"226";s:3:"gen";s:3:"227";s:3:"it ";s:3:"228";s:3:"ivi";s:3:"229";s:3:"l d";s:3:"230";s:3:"n d";s:3:"231";s:3:"nd ";s:3:"232";s:3:"nu ";s:3:"233";s:3:"ond";s:3:"234";s:3:"pen";s:3:"235";s:3:"ral";s:3:"236";s:3:"riv";s:3:"237";s:3:"rte";s:3:"238";s:3:"sti";s:3:"239";s:3:"t d";s:3:"240";s:3:"ta ";s:3:"241";s:3:"to ";s:3:"242";s:3:"uni";s:3:"243";s:3:"xte";s:3:"244";s:4:"ând";s:3:"245";s:4:"îns";s:3:"246";s:4:"ă s";s:3:"247";s:3:" bl";s:3:"248";s:3:" st";s:3:"249";s:3:" uc";s:3:"250";s:3:"a b";s:3:"251";s:3:"a i";s:3:"252";s:3:"a l";s:3:"253";s:3:"air";s:3:"254";s:3:"ast";s:3:"255";s:3:"bla";s:3:"256";s:3:"bri";s:3:"257";s:3:"che";s:3:"258";s:3:"duc";s:3:"259";s:3:"dul";s:3:"260";s:3:"e m";s:3:"261";s:3:"eas";s:3:"262";s:3:"edi";s:3:"263";s:3:"esp";s:3:"264";s:3:"i l";s:3:"265";s:3:"i p";s:3:"266";s:3:"ica";s:3:"267";s:4:"ică";s:3:"268";s:3:"ir ";s:3:"269";s:3:"iun";s:3:"270";s:3:"jud";s:3:"271";s:3:"lai";s:3:"272";s:3:"lul";s:3:"273";s:3:"mai";s:3:"274";s:3:"men";s:3:"275";s:3:"ni ";s:3:"276";s:3:"pus";s:3:"277";s:3:"put";s:3:"278";s:3:"ra ";s:3:"279";s:3:"rai";s:3:"280";s:3:"rop";s:3:"281";s:3:"sil";s:3:"282";s:3:"ti ";s:3:"283";s:3:"tra";s:3:"284";s:3:"u s";s:3:"285";s:3:"ua ";s:3:"286";s:3:"ude";s:3:"287";s:3:"urs";s:3:"288";s:4:"ân ";s:3:"289";s:4:"înt";s:3:"290";s:5:"ţă ";s:3:"291";s:3:" lu";s:3:"292";s:3:" mo";s:3:"293";s:3:" s ";s:3:"294";s:3:" sa";s:3:"295";s:3:" sc";s:3:"296";s:3:"a u";s:3:"297";s:3:"an ";s:3:"298";s:3:"atu";s:3:"299";}s:7:"russian";a:300:{s:5:" на";s:1:"0";s:5:" пр";s:1:"1";s:5:"то ";s:1:"2";s:5:" не";s:1:"3";s:5:"ли ";s:1:"4";s:5:" по";s:1:"5";s:5:"но ";s:1:"6";s:4:" в ";s:1:"7";s:5:"на ";s:1:"8";s:5:"Ñ‚ÑŒ ";s:1:"9";s:5:"не ";s:2:"10";s:4:" и ";s:2:"11";s:5:" ко";s:2:"12";s:5:"ом ";s:2:"13";s:6:"про";s:2:"14";s:5:" то";s:2:"15";s:5:"их ";s:2:"16";s:5:" ка";s:2:"17";s:6:"ать";s:2:"18";s:6:"ото";s:2:"19";s:5:" за";s:2:"20";s:5:"ие ";s:2:"21";s:6:"ова";s:2:"22";s:6:"тел";s:2:"23";s:6:"тор";s:2:"24";s:5:" де";s:2:"25";s:5:"ой ";s:2:"26";s:6:"Ñти";s:2:"27";s:5:" от";s:2:"28";s:5:"ах ";s:2:"29";s:5:"ми ";s:2:"30";s:6:"ÑÑ‚Ñ€";s:2:"31";s:5:" бе";s:2:"32";s:5:" во";s:2:"33";s:5:" ра";s:2:"34";s:5:"Ð°Ñ ";s:2:"35";s:6:"ват";s:2:"36";s:5:"ей ";s:2:"37";s:5:"ет ";s:2:"38";s:5:"же ";s:2:"39";s:6:"иче";s:2:"40";s:5:"Ð¸Ñ ";s:2:"41";s:5:"ов ";s:2:"42";s:6:"Ñто";s:2:"43";s:5:" об";s:2:"44";s:6:"вер";s:2:"45";s:5:"го ";s:2:"46";s:5:"и в";s:2:"47";s:5:"и п";s:2:"48";s:5:"и Ñ";s:2:"49";s:5:"ии ";s:2:"50";s:6:"иÑÑ‚";s:2:"51";s:5:"о в";s:2:"52";s:6:"оÑÑ‚";s:2:"53";s:6:"тра";s:2:"54";s:5:" те";s:2:"55";s:6:"ели";s:2:"56";s:6:"ере";s:2:"57";s:6:"кот";s:2:"58";s:6:"льн";s:2:"59";s:6:"ник";s:2:"60";s:6:"нти";s:2:"61";s:5:"о Ñ";s:2:"62";s:6:"рор";s:2:"63";s:6:"Ñтв";s:2:"64";s:6:"чеÑ";s:2:"65";s:5:" бо";s:2:"66";s:5:" ве";s:2:"67";s:5:" да";s:2:"68";s:5:" ин";s:2:"69";s:5:" но";s:2:"70";s:4:" Ñ ";s:2:"71";s:5:" Ñо";s:2:"72";s:5:" Ñп";s:2:"73";s:5:" ÑÑ‚";s:2:"74";s:5:" чт";s:2:"75";s:6:"али";s:2:"76";s:6:"ами";s:2:"77";s:6:"вид";s:2:"78";s:6:"дет";s:2:"79";s:5:"е н";s:2:"80";s:6:"ель";s:2:"81";s:6:"еÑк";s:2:"82";s:6:"еÑÑ‚";s:2:"83";s:6:"зал";s:2:"84";s:5:"и н";s:2:"85";s:6:"ива";s:2:"86";s:6:"кон";s:2:"87";s:6:"ого";s:2:"88";s:6:"одн";s:2:"89";s:6:"ожн";s:2:"90";s:6:"оль";s:2:"91";s:6:"ори";s:2:"92";s:6:"ров";s:2:"93";s:6:"Ñко";s:2:"94";s:5:"ÑÑ ";s:2:"95";s:6:"тер";s:2:"96";s:6:"что";s:2:"97";s:5:" мо";s:2:"98";s:5:" Ñа";s:2:"99";s:5:" ÑÑ‚";s:3:"100";s:6:"ант";s:3:"101";s:6:"вÑе";s:3:"102";s:6:"ерр";s:3:"103";s:6:"еÑл";s:3:"104";s:6:"иде";s:3:"105";s:6:"ина";s:3:"106";s:6:"ино";s:3:"107";s:6:"иро";s:3:"108";s:6:"ите";s:3:"109";s:5:"ка ";s:3:"110";s:5:"ко ";s:3:"111";s:6:"кол";s:3:"112";s:6:"ком";s:3:"113";s:5:"ла ";s:3:"114";s:6:"ниÑ";s:3:"115";s:5:"о Ñ‚";s:3:"116";s:6:"оло";s:3:"117";s:6:"ран";s:3:"118";s:6:"ред";s:3:"119";s:5:"ÑÑŒ ";s:3:"120";s:6:"тив";s:3:"121";s:6:"тич";s:3:"122";s:5:"Ñ‹Ñ… ";s:3:"123";s:5:" ви";s:3:"124";s:5:" вÑ";s:3:"125";s:5:" го";s:3:"126";s:5:" ма";s:3:"127";s:5:" Ñл";s:3:"128";s:6:"ако";s:3:"129";s:6:"ани";s:3:"130";s:6:"аÑÑ‚";s:3:"131";s:6:"без";s:3:"132";s:6:"дел";s:3:"133";s:5:"е д";s:3:"134";s:5:"е п";s:3:"135";s:5:"ем ";s:3:"136";s:6:"жно";s:3:"137";s:5:"и д";s:3:"138";s:6:"ика";s:3:"139";s:6:"каз";s:3:"140";s:6:"как";s:3:"141";s:5:"ки ";s:3:"142";s:6:"ноÑ";s:3:"143";s:5:"о н";s:3:"144";s:6:"опа";s:3:"145";s:6:"при";s:3:"146";s:6:"рро";s:3:"147";s:6:"Ñки";s:3:"148";s:5:"ти ";s:3:"149";s:6:"тов";s:3:"150";s:5:"ые ";s:3:"151";s:5:" вы";s:3:"152";s:5:" до";s:3:"153";s:5:" ме";s:3:"154";s:5:" ни";s:3:"155";s:5:" од";s:3:"156";s:5:" ро";s:3:"157";s:5:" Ñв";s:3:"158";s:5:" чи";s:3:"159";s:5:"а н";s:3:"160";s:6:"ает";s:3:"161";s:6:"аза";s:3:"162";s:6:"ате";s:3:"163";s:6:"беÑ";s:3:"164";s:5:"в п";s:3:"165";s:5:"ва ";s:3:"166";s:5:"е в";s:3:"167";s:5:"е м";s:3:"168";s:5:"е Ñ";s:3:"169";s:5:"ез ";s:3:"170";s:6:"ени";s:3:"171";s:5:"за ";s:3:"172";s:6:"зна";s:3:"173";s:6:"ини";s:3:"174";s:6:"кам";s:3:"175";s:6:"ках";s:3:"176";s:6:"кто";s:3:"177";s:6:"лов";s:3:"178";s:6:"мер";s:3:"179";s:6:"мож";s:3:"180";s:6:"нал";s:3:"181";s:6:"ниц";s:3:"182";s:5:"ны ";s:3:"183";s:6:"ным";s:3:"184";s:6:"ора";s:3:"185";s:6:"оро";s:3:"186";s:5:"от ";s:3:"187";s:6:"пор";s:3:"188";s:6:"рав";s:3:"189";s:6:"реÑ";s:3:"190";s:6:"риÑ";s:3:"191";s:6:"роÑ";s:3:"192";s:6:"Ñка";s:3:"193";s:5:"Ñ‚ н";s:3:"194";s:6:"том";s:3:"195";s:6:"чит";s:3:"196";s:6:"шко";s:3:"197";s:5:" бы";s:3:"198";s:4:" о ";s:3:"199";s:5:" Ñ‚Ñ€";s:3:"200";s:5:" уж";s:3:"201";s:5:" чу";s:3:"202";s:5:" шк";s:3:"203";s:5:"а б";s:3:"204";s:5:"а в";s:3:"205";s:5:"а Ñ€";s:3:"206";s:6:"аби";s:3:"207";s:6:"ала";s:3:"208";s:6:"ало";s:3:"209";s:6:"аль";s:3:"210";s:6:"анн";s:3:"211";s:6:"ати";s:3:"212";s:6:"бин";s:3:"213";s:6:"веÑ";s:3:"214";s:6:"вно";s:3:"215";s:5:"во ";s:3:"216";s:6:"вши";s:3:"217";s:6:"дал";s:3:"218";s:6:"дат";s:3:"219";s:6:"дно";s:3:"220";s:5:"е з";s:3:"221";s:6:"его";s:3:"222";s:6:"еле";s:3:"223";s:6:"енн";s:3:"224";s:6:"ент";s:3:"225";s:6:"ете";s:3:"226";s:5:"и о";s:3:"227";s:6:"или";s:3:"228";s:6:"иÑÑŒ";s:3:"229";s:5:"ит ";s:3:"230";s:6:"ици";s:3:"231";s:6:"ков";s:3:"232";s:6:"лен";s:3:"233";s:6:"льк";s:3:"234";s:6:"мен";s:3:"235";s:5:"мы ";s:3:"236";s:6:"нет";s:3:"237";s:5:"ни ";s:3:"238";s:6:"нны";s:3:"239";s:6:"ног";s:3:"240";s:6:"ной";s:3:"241";s:6:"ном";s:3:"242";s:5:"о п";s:3:"243";s:6:"обн";s:3:"244";s:6:"ове";s:3:"245";s:6:"овн";s:3:"246";s:6:"оры";s:3:"247";s:6:"пер";s:3:"248";s:5:"по ";s:3:"249";s:6:"пра";s:3:"250";s:6:"пре";s:3:"251";s:6:"раз";s:3:"252";s:6:"роп";s:3:"253";s:5:"ры ";s:3:"254";s:5:"Ñе ";s:3:"255";s:6:"Ñли";s:3:"256";s:6:"Ñов";s:3:"257";s:6:"тре";s:3:"258";s:6:"Ñ‚ÑÑ";s:3:"259";s:6:"уро";s:3:"260";s:6:"цел";s:3:"261";s:6:"чно";s:3:"262";s:5:"ÑŒ в";s:3:"263";s:6:"ько";s:3:"264";s:6:"ьно";s:3:"265";s:6:"Ñто";s:3:"266";s:5:"ÑŽÑ‚ ";s:3:"267";s:5:"Ñ Ð½";s:3:"268";s:5:" ан";s:3:"269";s:5:" еÑ";s:3:"270";s:5:" же";s:3:"271";s:5:" из";s:3:"272";s:5:" кт";s:3:"273";s:5:" ми";s:3:"274";s:5:" мы";s:3:"275";s:5:" пе";s:3:"276";s:5:" Ñе";s:3:"277";s:5:" це";s:3:"278";s:5:"а м";s:3:"279";s:5:"а п";s:3:"280";s:5:"а Ñ‚";s:3:"281";s:6:"авш";s:3:"282";s:6:"аже";s:3:"283";s:5:"ак ";s:3:"284";s:5:"ал ";s:3:"285";s:6:"але";s:3:"286";s:6:"ане";s:3:"287";s:6:"ачи";s:3:"288";s:6:"ают";s:3:"289";s:6:"бна";s:3:"290";s:6:"бол";s:3:"291";s:5:"бы ";s:3:"292";s:5:"в и";s:3:"293";s:5:"в Ñ";s:3:"294";s:6:"ван";s:3:"295";s:6:"гра";s:3:"296";s:6:"даж";s:3:"297";s:6:"ден";s:3:"298";s:5:"е к";s:3:"299";}s:7:"serbian";a:300:{s:5:" на";s:1:"0";s:5:" је";s:1:"1";s:5:" по";s:1:"2";s:5:"је ";s:1:"3";s:4:" и ";s:1:"4";s:5:" не";s:1:"5";s:5:" пр";s:1:"6";s:5:"га ";s:1:"7";s:5:" Ñв";s:1:"8";s:5:"ог ";s:1:"9";s:5:"а Ñ";s:2:"10";s:5:"их ";s:2:"11";s:5:"на ";s:2:"12";s:6:"кој";s:2:"13";s:6:"ога";s:2:"14";s:4:" у ";s:2:"15";s:5:"а п";s:2:"16";s:5:"не ";s:2:"17";s:5:"ни ";s:2:"18";s:5:"ти ";s:2:"19";s:5:" да";s:2:"20";s:5:"ом ";s:2:"21";s:5:" ве";s:2:"22";s:5:" ÑÑ€";s:2:"23";s:5:"и Ñ";s:2:"24";s:6:"Ñко";s:2:"25";s:5:" об";s:2:"26";s:5:"а н";s:2:"27";s:5:"да ";s:2:"28";s:5:"е н";s:2:"29";s:5:"но ";s:2:"30";s:6:"ног";s:2:"31";s:5:"о ј";s:2:"32";s:5:"ој ";s:2:"33";s:5:" за";s:2:"34";s:5:"ва ";s:2:"35";s:5:"е Ñ";s:2:"36";s:5:"и п";s:2:"37";s:5:"ма ";s:2:"38";s:6:"ник";s:2:"39";s:6:"обр";s:2:"40";s:6:"ова";s:2:"41";s:5:" ко";s:2:"42";s:5:"а и";s:2:"43";s:6:"диј";s:2:"44";s:5:"е п";s:2:"45";s:5:"ка ";s:2:"46";s:5:"ко ";s:2:"47";s:6:"ког";s:2:"48";s:6:"оÑÑ‚";s:2:"49";s:6:"Ñве";s:2:"50";s:6:"Ñтв";s:2:"51";s:6:"Ñти";s:2:"52";s:6:"тра";s:2:"53";s:6:"еди";s:2:"54";s:6:"има";s:2:"55";s:6:"пок";s:2:"56";s:6:"пра";s:2:"57";s:6:"раз";s:2:"58";s:5:"те ";s:2:"59";s:5:" бо";s:2:"60";s:5:" ви";s:2:"61";s:5:" Ñа";s:2:"62";s:6:"аво";s:2:"63";s:6:"бра";s:2:"64";s:6:"гоÑ";s:2:"65";s:5:"е и";s:2:"66";s:6:"ели";s:2:"67";s:6:"ени";s:2:"68";s:5:"за ";s:2:"69";s:6:"ики";s:2:"70";s:5:"ио ";s:2:"71";s:6:"пре";s:2:"72";s:6:"рав";s:2:"73";s:6:"рад";s:2:"74";s:5:"у Ñ";s:2:"75";s:5:"ју ";s:2:"76";s:5:"ња ";s:2:"77";s:5:" би";s:2:"78";s:5:" до";s:2:"79";s:5:" ÑÑ‚";s:2:"80";s:6:"аÑÑ‚";s:2:"81";s:6:"бој";s:2:"82";s:6:"ебо";s:2:"83";s:5:"и н";s:2:"84";s:5:"им ";s:2:"85";s:5:"ку ";s:2:"86";s:6:"лан";s:2:"87";s:6:"неб";s:2:"88";s:6:"ово";s:2:"89";s:6:"ого";s:2:"90";s:6:"оÑл";s:2:"91";s:6:"ојш";s:2:"92";s:6:"пед";s:2:"93";s:6:"ÑÑ‚Ñ€";s:2:"94";s:6:"чаÑ";s:2:"95";s:5:" го";s:2:"96";s:5:" кр";s:2:"97";s:5:" мо";s:2:"98";s:5:" чл";s:2:"99";s:5:"а м";s:3:"100";s:5:"а о";s:3:"101";s:6:"ако";s:3:"102";s:6:"ача";s:3:"103";s:6:"вел";s:3:"104";s:6:"вет";s:3:"105";s:6:"вог";s:3:"106";s:6:"еда";s:3:"107";s:6:"иÑÑ‚";s:3:"108";s:6:"ити";s:3:"109";s:6:"ије";s:3:"110";s:6:"око";s:3:"111";s:6:"Ñло";s:3:"112";s:6:"Ñрб";s:3:"113";s:6:"чла";s:3:"114";s:5:" бе";s:3:"115";s:5:" оÑ";s:3:"116";s:5:" от";s:3:"117";s:5:" ре";s:3:"118";s:5:" Ñе";s:3:"119";s:5:"а в";s:3:"120";s:5:"ан ";s:3:"121";s:6:"бог";s:3:"122";s:6:"бро";s:3:"123";s:6:"вен";s:3:"124";s:6:"гра";s:3:"125";s:5:"е о";s:3:"126";s:6:"ика";s:3:"127";s:6:"ија";s:3:"128";s:6:"ких";s:3:"129";s:6:"ком";s:3:"130";s:5:"ли ";s:3:"131";s:5:"ну ";s:3:"132";s:6:"ота";s:3:"133";s:6:"ојн";s:3:"134";s:6:"под";s:3:"135";s:6:"рбÑ";s:3:"136";s:6:"ред";s:3:"137";s:6:"рој";s:3:"138";s:5:"Ñа ";s:3:"139";s:6:"Ñни";s:3:"140";s:6:"тач";s:3:"141";s:6:"тва";s:3:"142";s:5:"ја ";s:3:"143";s:5:"ји ";s:3:"144";s:5:" ка";s:3:"145";s:5:" ов";s:3:"146";s:5:" Ñ‚Ñ€";s:3:"147";s:5:"а ј";s:3:"148";s:6:"ави";s:3:"149";s:5:"аз ";s:3:"150";s:6:"ано";s:3:"151";s:6:"био";s:3:"152";s:6:"вик";s:3:"153";s:5:"во ";s:3:"154";s:6:"гов";s:3:"155";s:6:"дни";s:3:"156";s:5:"е ч";s:3:"157";s:6:"его";s:3:"158";s:5:"и о";s:3:"159";s:6:"ива";s:3:"160";s:6:"иво";s:3:"161";s:5:"ик ";s:3:"162";s:6:"ине";s:3:"163";s:6:"ини";s:3:"164";s:6:"ипе";s:3:"165";s:6:"кип";s:3:"166";s:6:"лик";s:3:"167";s:5:"ло ";s:3:"168";s:6:"наш";s:3:"169";s:6:"ноÑ";s:3:"170";s:5:"о Ñ‚";s:3:"171";s:5:"од ";s:3:"172";s:6:"оди";s:3:"173";s:6:"она";s:3:"174";s:6:"оји";s:3:"175";s:6:"поч";s:3:"176";s:6:"про";s:3:"177";s:5:"ра ";s:3:"178";s:6:"риÑ";s:3:"179";s:6:"род";s:3:"180";s:6:"Ñ€ÑÑ‚";s:3:"181";s:5:"Ñе ";s:3:"182";s:6:"Ñпо";s:3:"183";s:6:"Ñта";s:3:"184";s:6:"тић";s:3:"185";s:5:"у д";s:3:"186";s:5:"у н";s:3:"187";s:5:"у о";s:3:"188";s:6:"чин";s:3:"189";s:5:"ша ";s:3:"190";s:6:"јед";s:3:"191";s:6:"јни";s:3:"192";s:5:"ће ";s:3:"193";s:4:" м ";s:3:"194";s:5:" ме";s:3:"195";s:5:" ни";s:3:"196";s:5:" он";s:3:"197";s:5:" па";s:3:"198";s:5:" Ñл";s:3:"199";s:5:" те";s:3:"200";s:5:"а у";s:3:"201";s:6:"ава";s:3:"202";s:6:"аве";s:3:"203";s:6:"авн";s:3:"204";s:6:"ана";s:3:"205";s:5:"ао ";s:3:"206";s:6:"ати";s:3:"207";s:6:"аци";s:3:"208";s:6:"ају";s:3:"209";s:6:"ања";s:3:"210";s:6:"бÑк";s:3:"211";s:6:"вор";s:3:"212";s:6:"воÑ";s:3:"213";s:6:"вÑк";s:3:"214";s:6:"дин";s:3:"215";s:5:"е у";s:3:"216";s:6:"едн";s:3:"217";s:6:"ези";s:3:"218";s:6:"ека";s:3:"219";s:6:"ено";s:3:"220";s:6:"ето";s:3:"221";s:6:"ења";s:3:"222";s:6:"жив";s:3:"223";s:5:"и г";s:3:"224";s:5:"и и";s:3:"225";s:5:"и к";s:3:"226";s:5:"и Ñ‚";s:3:"227";s:6:"ику";s:3:"228";s:6:"ичк";s:3:"229";s:5:"ки ";s:3:"230";s:6:"крÑ";s:3:"231";s:5:"ла ";s:3:"232";s:6:"лав";s:3:"233";s:6:"лит";s:3:"234";s:5:"ме ";s:3:"235";s:6:"мен";s:3:"236";s:6:"нац";s:3:"237";s:5:"о н";s:3:"238";s:5:"о п";s:3:"239";s:5:"о у";s:3:"240";s:6:"одн";s:3:"241";s:6:"оли";s:3:"242";s:6:"орн";s:3:"243";s:6:"оÑн";s:3:"244";s:6:"оÑп";s:3:"245";s:6:"оче";s:3:"246";s:6:"пÑк";s:3:"247";s:6:"реч";s:3:"248";s:6:"рпÑ";s:3:"249";s:6:"Ñво";s:3:"250";s:6:"Ñки";s:3:"251";s:6:"Ñла";s:3:"252";s:6:"Ñрп";s:3:"253";s:5:"Ñу ";s:3:"254";s:5:"та ";s:3:"255";s:6:"тав";s:3:"256";s:6:"тве";s:3:"257";s:5:"у б";s:3:"258";s:6:"јез";s:3:"259";s:5:"ћи ";s:3:"260";s:5:" ен";s:3:"261";s:5:" жи";s:3:"262";s:5:" им";s:3:"263";s:5:" му";s:3:"264";s:5:" од";s:3:"265";s:5:" Ñу";s:3:"266";s:5:" та";s:3:"267";s:5:" Ñ…Ñ€";s:3:"268";s:5:" ча";s:3:"269";s:5:" шт";s:3:"270";s:5:" ње";s:3:"271";s:5:"а д";s:3:"272";s:5:"а з";s:3:"273";s:5:"а к";s:3:"274";s:5:"а Ñ‚";s:3:"275";s:6:"аду";s:3:"276";s:6:"ало";s:3:"277";s:6:"ани";s:3:"278";s:6:"аÑо";s:3:"279";s:6:"ван";s:3:"280";s:6:"вач";s:3:"281";s:6:"вањ";s:3:"282";s:6:"вед";s:3:"283";s:5:"ви ";s:3:"284";s:6:"вно";s:3:"285";s:6:"вот";s:3:"286";s:6:"вој";s:3:"287";s:5:"ву ";s:3:"288";s:6:"доб";s:3:"289";s:6:"дру";s:3:"290";s:6:"дÑе";s:3:"291";s:5:"ду ";s:3:"292";s:5:"е б";s:3:"293";s:5:"е д";s:3:"294";s:5:"е м";s:3:"295";s:5:"ем ";s:3:"296";s:6:"ема";s:3:"297";s:6:"ент";s:3:"298";s:6:"енц";s:3:"299";}s:6:"slovak";a:300:{s:3:" pr";s:1:"0";s:3:" po";s:1:"1";s:3:" ne";s:1:"2";s:3:" a ";s:1:"3";s:3:"ch ";s:1:"4";s:3:" na";s:1:"5";s:3:" je";s:1:"6";s:4:"nà ";s:1:"7";s:3:"je ";s:1:"8";s:3:" do";s:1:"9";s:3:"na ";s:2:"10";s:3:"ova";s:2:"11";s:3:" v ";s:2:"12";s:3:"to ";s:2:"13";s:3:"ho ";s:2:"14";s:3:"ou ";s:2:"15";s:3:" to";s:2:"16";s:3:"ick";s:2:"17";s:3:"ter";s:2:"18";s:4:"že ";s:2:"19";s:3:" st";s:2:"20";s:3:" za";s:2:"21";s:3:"ost";s:2:"22";s:4:"ých";s:2:"23";s:3:" se";s:2:"24";s:3:"pro";s:2:"25";s:3:" te";s:2:"26";s:3:"e s";s:2:"27";s:4:" že";s:2:"28";s:3:"a p";s:2:"29";s:3:" kt";s:2:"30";s:3:"pre";s:2:"31";s:3:" by";s:2:"32";s:3:" o ";s:2:"33";s:3:"se ";s:2:"34";s:3:"kon";s:2:"35";s:4:" pÅ™";s:2:"36";s:3:"a s";s:2:"37";s:4:"né ";s:2:"38";s:4:"nÄ› ";s:2:"39";s:3:"sti";s:2:"40";s:3:"ako";s:2:"41";s:3:"ist";s:2:"42";s:3:"mu ";s:2:"43";s:3:"ame";s:2:"44";s:3:"ent";s:2:"45";s:3:"ky ";s:2:"46";s:3:"la ";s:2:"47";s:3:"pod";s:2:"48";s:3:" ve";s:2:"49";s:3:" ob";s:2:"50";s:3:"om ";s:2:"51";s:3:"vat";s:2:"52";s:3:" ko";s:2:"53";s:3:"sta";s:2:"54";s:3:"em ";s:2:"55";s:3:"le ";s:2:"56";s:3:"a v";s:2:"57";s:3:"by ";s:2:"58";s:3:"e p";s:2:"59";s:3:"ko ";s:2:"60";s:3:"eri";s:2:"61";s:3:"kte";s:2:"62";s:3:"sa ";s:2:"63";s:4:"ého";s:2:"64";s:3:"e v";s:2:"65";s:3:"mer";s:2:"66";s:3:"tel";s:2:"67";s:3:" ak";s:2:"68";s:3:" sv";s:2:"69";s:4:" zá";s:2:"70";s:3:"hla";s:2:"71";s:3:"las";s:2:"72";s:3:"lo ";s:2:"73";s:3:" ta";s:2:"74";s:3:"a n";s:2:"75";s:3:"ej ";s:2:"76";s:3:"li ";s:2:"77";s:3:"ne ";s:2:"78";s:3:" sa";s:2:"79";s:3:"ak ";s:2:"80";s:3:"ani";s:2:"81";s:3:"ate";s:2:"82";s:3:"ia ";s:2:"83";s:3:"sou";s:2:"84";s:3:" so";s:2:"85";s:4:"enÃ";s:2:"86";s:3:"ie ";s:2:"87";s:3:" re";s:2:"88";s:3:"ce ";s:2:"89";s:3:"e n";s:2:"90";s:3:"ori";s:2:"91";s:3:"tic";s:2:"92";s:3:" vy";s:2:"93";s:3:"a t";s:2:"94";s:4:"ké ";s:2:"95";s:3:"nos";s:2:"96";s:3:"o s";s:2:"97";s:3:"str";s:2:"98";s:3:"ti ";s:2:"99";s:3:"uje";s:3:"100";s:3:" sp";s:3:"101";s:3:"lov";s:3:"102";s:3:"o p";s:3:"103";s:3:"oli";s:3:"104";s:4:"ová";s:3:"105";s:4:" ná";s:3:"106";s:3:"ale";s:3:"107";s:3:"den";s:3:"108";s:3:"e o";s:3:"109";s:3:"ku ";s:3:"110";s:3:"val";s:3:"111";s:3:" am";s:3:"112";s:3:" ro";s:3:"113";s:3:" si";s:3:"114";s:3:"nie";s:3:"115";s:3:"pol";s:3:"116";s:3:"tra";s:3:"117";s:3:" al";s:3:"118";s:3:"ali";s:3:"119";s:3:"o v";s:3:"120";s:3:"tor";s:3:"121";s:3:" mo";s:3:"122";s:3:" ni";s:3:"123";s:3:"ci ";s:3:"124";s:3:"o n";s:3:"125";s:4:"Ãm ";s:3:"126";s:3:" le";s:3:"127";s:3:" pa";s:3:"128";s:3:" s ";s:3:"129";s:3:"al ";s:3:"130";s:3:"ati";s:3:"131";s:3:"ero";s:3:"132";s:3:"ove";s:3:"133";s:3:"rov";s:3:"134";s:4:"ván";s:3:"135";s:4:"Ãch";s:3:"136";s:3:" ja";s:3:"137";s:3:" z ";s:3:"138";s:4:"cké";s:3:"139";s:3:"e z";s:3:"140";s:3:" od";s:3:"141";s:3:"byl";s:3:"142";s:3:"de ";s:3:"143";s:3:"dob";s:3:"144";s:3:"nep";s:3:"145";s:3:"pra";s:3:"146";s:3:"ric";s:3:"147";s:3:"spo";s:3:"148";s:3:"tak";s:3:"149";s:4:" vÅ¡";s:3:"150";s:3:"a a";s:3:"151";s:3:"e t";s:3:"152";s:3:"lit";s:3:"153";s:3:"me ";s:3:"154";s:3:"nej";s:3:"155";s:3:"no ";s:3:"156";s:4:"nýc";s:3:"157";s:3:"o t";s:3:"158";s:3:"a j";s:3:"159";s:3:"e a";s:3:"160";s:3:"en ";s:3:"161";s:3:"est";s:3:"162";s:4:"jà ";s:3:"163";s:3:"mi ";s:3:"164";s:3:"slo";s:3:"165";s:4:"stá";s:3:"166";s:3:"u v";s:3:"167";s:3:"for";s:3:"168";s:3:"nou";s:3:"169";s:3:"pos";s:3:"170";s:4:"pÅ™e";s:3:"171";s:3:"si ";s:3:"172";s:3:"tom";s:3:"173";s:3:" vl";s:3:"174";s:3:"a z";s:3:"175";s:3:"ly ";s:3:"176";s:3:"orm";s:3:"177";s:3:"ris";s:3:"178";s:3:"za ";s:3:"179";s:4:"zák";s:3:"180";s:3:" k ";s:3:"181";s:3:"at ";s:3:"182";s:4:"cký";s:3:"183";s:3:"dno";s:3:"184";s:3:"dos";s:3:"185";s:3:"dy ";s:3:"186";s:3:"jak";s:3:"187";s:3:"kov";s:3:"188";s:3:"ny ";s:3:"189";s:3:"res";s:3:"190";s:3:"ror";s:3:"191";s:3:"sto";s:3:"192";s:3:"van";s:3:"193";s:3:" op";s:3:"194";s:3:"da ";s:3:"195";s:3:"do ";s:3:"196";s:3:"e j";s:3:"197";s:3:"hod";s:3:"198";s:3:"len";s:3:"199";s:4:"ný ";s:3:"200";s:3:"o z";s:3:"201";s:3:"poz";s:3:"202";s:3:"pri";s:3:"203";s:3:"ran";s:3:"204";s:3:"u s";s:3:"205";s:3:" ab";s:3:"206";s:3:"aj ";s:3:"207";s:3:"ast";s:3:"208";s:3:"it ";s:3:"209";s:3:"kto";s:3:"210";s:3:"o o";s:3:"211";s:3:"oby";s:3:"212";s:3:"odo";s:3:"213";s:3:"u p";s:3:"214";s:3:"va ";s:3:"215";s:5:"ánÃ";s:3:"216";s:4:"à p";s:3:"217";s:4:"ým ";s:3:"218";s:3:" in";s:3:"219";s:3:" mi";s:3:"220";s:4:"aÅ¥ ";s:3:"221";s:3:"dov";s:3:"222";s:3:"ka ";s:3:"223";s:3:"nsk";s:3:"224";s:4:"áln";s:3:"225";s:3:" an";s:3:"226";s:3:" bu";s:3:"227";s:3:" sl";s:3:"228";s:3:" tr";s:3:"229";s:3:"e m";s:3:"230";s:3:"ech";s:3:"231";s:3:"edn";s:3:"232";s:3:"i n";s:3:"233";s:4:"kýc";s:3:"234";s:4:"nÃc";s:3:"235";s:3:"ov ";s:3:"236";s:5:"pÅ™Ã";s:3:"237";s:4:"à a";s:3:"238";s:3:" aj";s:3:"239";s:3:" bo";s:3:"240";s:3:"a d";s:3:"241";s:3:"ide";s:3:"242";s:3:"o a";s:3:"243";s:3:"o d";s:3:"244";s:3:"och";s:3:"245";s:3:"pov";s:3:"246";s:3:"svo";s:3:"247";s:4:"é s";s:3:"248";s:3:" kd";s:3:"249";s:3:" vo";s:3:"250";s:4:" vý";s:3:"251";s:3:"bud";s:3:"252";s:3:"ich";s:3:"253";s:3:"il ";s:3:"254";s:3:"ili";s:3:"255";s:3:"ni ";s:3:"256";s:4:"nÃm";s:3:"257";s:3:"od ";s:3:"258";s:3:"osl";s:3:"259";s:3:"ouh";s:3:"260";s:3:"rav";s:3:"261";s:3:"roz";s:3:"262";s:3:"st ";s:3:"263";s:3:"stv";s:3:"264";s:3:"tu ";s:3:"265";s:3:"u a";s:3:"266";s:4:"vál";s:3:"267";s:3:"y s";s:3:"268";s:4:"à s";s:3:"269";s:4:"à v";s:3:"270";s:3:" hl";s:3:"271";s:3:" li";s:3:"272";s:3:" me";s:3:"273";s:3:"a m";s:3:"274";s:3:"e b";s:3:"275";s:3:"h s";s:3:"276";s:3:"i p";s:3:"277";s:3:"i s";s:3:"278";s:3:"iti";s:3:"279";s:4:"lád";s:3:"280";s:3:"nem";s:3:"281";s:3:"nov";s:3:"282";s:3:"opo";s:3:"283";s:3:"uhl";s:3:"284";s:3:"eno";s:3:"285";s:3:"ens";s:3:"286";s:3:"men";s:3:"287";s:3:"nes";s:3:"288";s:3:"obo";s:3:"289";s:3:"te ";s:3:"290";s:3:"ved";s:3:"291";s:4:"vlá";s:3:"292";s:3:"y n";s:3:"293";s:3:" ma";s:3:"294";s:3:" mu";s:3:"295";s:4:" vá";s:3:"296";s:3:"bez";s:3:"297";s:3:"byv";s:3:"298";s:3:"cho";s:3:"299";}s:7:"slovene";a:300:{s:3:"je ";s:1:"0";s:3:" pr";s:1:"1";s:3:" po";s:1:"2";s:3:" je";s:1:"3";s:3:" v ";s:1:"4";s:3:" za";s:1:"5";s:3:" na";s:1:"6";s:3:"pre";s:1:"7";s:3:"da ";s:1:"8";s:3:" da";s:1:"9";s:3:"ki ";s:2:"10";s:3:"ti ";s:2:"11";s:3:"ja ";s:2:"12";s:3:"ne ";s:2:"13";s:3:" in";s:2:"14";s:3:"in ";s:2:"15";s:3:"li ";s:2:"16";s:3:"no ";s:2:"17";s:3:"na ";s:2:"18";s:3:"ni ";s:2:"19";s:3:" bi";s:2:"20";s:3:"jo ";s:2:"21";s:3:" ne";s:2:"22";s:3:"nje";s:2:"23";s:3:"e p";s:2:"24";s:3:"i p";s:2:"25";s:3:"pri";s:2:"26";s:3:"o p";s:2:"27";s:3:"red";s:2:"28";s:3:" do";s:2:"29";s:3:"anj";s:2:"30";s:3:"em ";s:2:"31";s:3:"ih ";s:2:"32";s:3:" bo";s:2:"33";s:3:" ki";s:2:"34";s:3:" iz";s:2:"35";s:3:" se";s:2:"36";s:3:" so";s:2:"37";s:3:"al ";s:2:"38";s:3:" de";s:2:"39";s:3:"e v";s:2:"40";s:3:"i s";s:2:"41";s:3:"ko ";s:2:"42";s:3:"bil";s:2:"43";s:3:"ira";s:2:"44";s:3:"ove";s:2:"45";s:3:" br";s:2:"46";s:3:" ob";s:2:"47";s:3:"e b";s:2:"48";s:3:"i n";s:2:"49";s:3:"ova";s:2:"50";s:3:"se ";s:2:"51";s:3:"za ";s:2:"52";s:3:"la ";s:2:"53";s:3:" ja";s:2:"54";s:3:"ati";s:2:"55";s:3:"so ";s:2:"56";s:3:"ter";s:2:"57";s:3:" ta";s:2:"58";s:3:"a s";s:2:"59";s:3:"del";s:2:"60";s:3:"e d";s:2:"61";s:3:" dr";s:2:"62";s:3:" od";s:2:"63";s:3:"a n";s:2:"64";s:3:"ar ";s:2:"65";s:3:"jal";s:2:"66";s:3:"ji ";s:2:"67";s:3:"rit";s:2:"68";s:3:" ka";s:2:"69";s:3:" ko";s:2:"70";s:3:" pa";s:2:"71";s:3:"a b";s:2:"72";s:3:"ani";s:2:"73";s:3:"e s";s:2:"74";s:3:"er ";s:2:"75";s:3:"ili";s:2:"76";s:3:"lov";s:2:"77";s:3:"o v";s:2:"78";s:3:"tov";s:2:"79";s:3:" ir";s:2:"80";s:3:" ni";s:2:"81";s:3:" vo";s:2:"82";s:3:"a j";s:2:"83";s:3:"bi ";s:2:"84";s:3:"bri";s:2:"85";s:3:"iti";s:2:"86";s:3:"let";s:2:"87";s:3:"o n";s:2:"88";s:3:"tan";s:2:"89";s:4:"Å¡e ";s:2:"90";s:3:" le";s:2:"91";s:3:" te";s:2:"92";s:3:"eni";s:2:"93";s:3:"eri";s:2:"94";s:3:"ita";s:2:"95";s:3:"kat";s:2:"96";s:3:"por";s:2:"97";s:3:"pro";s:2:"98";s:3:"ali";s:2:"99";s:3:"ke ";s:3:"100";s:3:"oli";s:3:"101";s:3:"ov ";s:3:"102";s:3:"pra";s:3:"103";s:3:"ri ";s:3:"104";s:3:"uar";s:3:"105";s:3:"ve ";s:3:"106";s:3:" to";s:3:"107";s:3:"a i";s:3:"108";s:3:"a v";s:3:"109";s:3:"ako";s:3:"110";s:3:"arj";s:3:"111";s:3:"ate";s:3:"112";s:3:"di ";s:3:"113";s:3:"do ";s:3:"114";s:3:"ga ";s:3:"115";s:3:"le ";s:3:"116";s:3:"lo ";s:3:"117";s:3:"mer";s:3:"118";s:3:"o s";s:3:"119";s:3:"oda";s:3:"120";s:3:"oro";s:3:"121";s:3:"pod";s:3:"122";s:3:" ma";s:3:"123";s:3:" mo";s:3:"124";s:3:" si";s:3:"125";s:3:"a p";s:3:"126";s:3:"bod";s:3:"127";s:3:"e n";s:3:"128";s:3:"ega";s:3:"129";s:3:"ju ";s:3:"130";s:3:"ka ";s:3:"131";s:3:"lje";s:3:"132";s:3:"rav";s:3:"133";s:3:"ta ";s:3:"134";s:3:"a o";s:3:"135";s:3:"e t";s:3:"136";s:3:"e z";s:3:"137";s:3:"i d";s:3:"138";s:3:"i v";s:3:"139";s:3:"ila";s:3:"140";s:3:"lit";s:3:"141";s:3:"nih";s:3:"142";s:3:"odo";s:3:"143";s:3:"sti";s:3:"144";s:3:"to ";s:3:"145";s:3:"var";s:3:"146";s:3:"ved";s:3:"147";s:3:"vol";s:3:"148";s:3:" la";s:3:"149";s:3:" no";s:3:"150";s:3:" vs";s:3:"151";s:3:"a d";s:3:"152";s:3:"agu";s:3:"153";s:3:"aja";s:3:"154";s:3:"dej";s:3:"155";s:3:"dnj";s:3:"156";s:3:"eda";s:3:"157";s:3:"gov";s:3:"158";s:3:"gua";s:3:"159";s:3:"jag";s:3:"160";s:3:"jem";s:3:"161";s:3:"kon";s:3:"162";s:3:"ku ";s:3:"163";s:3:"nij";s:3:"164";s:3:"omo";s:3:"165";s:4:"oÄi";s:3:"166";s:3:"pov";s:3:"167";s:3:"rak";s:3:"168";s:3:"rja";s:3:"169";s:3:"sta";s:3:"170";s:3:"tev";s:3:"171";s:3:"a t";s:3:"172";s:3:"aj ";s:3:"173";s:3:"ed ";s:3:"174";s:3:"eja";s:3:"175";s:3:"ent";s:3:"176";s:3:"ev ";s:3:"177";s:3:"i i";s:3:"178";s:3:"i o";s:3:"179";s:3:"ijo";s:3:"180";s:3:"ist";s:3:"181";s:3:"ost";s:3:"182";s:3:"ske";s:3:"183";s:3:"str";s:3:"184";s:3:" ra";s:3:"185";s:3:" s ";s:3:"186";s:3:" tr";s:3:"187";s:4:" Å¡e";s:3:"188";s:3:"arn";s:3:"189";s:3:"bo ";s:3:"190";s:4:"drž";s:3:"191";s:3:"i j";s:3:"192";s:3:"ilo";s:3:"193";s:3:"izv";s:3:"194";s:3:"jen";s:3:"195";s:3:"lja";s:3:"196";s:3:"nsk";s:3:"197";s:3:"o d";s:3:"198";s:3:"o i";s:3:"199";s:3:"om ";s:3:"200";s:3:"ora";s:3:"201";s:3:"ovo";s:3:"202";s:3:"raz";s:3:"203";s:4:"rža";s:3:"204";s:3:"tak";s:3:"205";s:3:"va ";s:3:"206";s:3:"ven";s:3:"207";s:4:"žav";s:3:"208";s:3:" me";s:3:"209";s:4:" Äe";s:3:"210";s:3:"ame";s:3:"211";s:3:"avi";s:3:"212";s:3:"e i";s:3:"213";s:3:"e o";s:3:"214";s:3:"eka";s:3:"215";s:3:"gre";s:3:"216";s:3:"i t";s:3:"217";s:3:"ija";s:3:"218";s:3:"il ";s:3:"219";s:3:"ite";s:3:"220";s:3:"kra";s:3:"221";s:3:"lju";s:3:"222";s:3:"mor";s:3:"223";s:3:"nik";s:3:"224";s:3:"o t";s:3:"225";s:3:"obi";s:3:"226";s:3:"odn";s:3:"227";s:3:"ran";s:3:"228";s:3:"re ";s:3:"229";s:3:"sto";s:3:"230";s:3:"stv";s:3:"231";s:3:"udi";s:3:"232";s:3:"v i";s:3:"233";s:3:"van";s:3:"234";s:3:" am";s:3:"235";s:3:" sp";s:3:"236";s:3:" st";s:3:"237";s:3:" tu";s:3:"238";s:3:" ve";s:3:"239";s:4:" že";s:3:"240";s:3:"ajo";s:3:"241";s:3:"ale";s:3:"242";s:3:"apo";s:3:"243";s:3:"dal";s:3:"244";s:3:"dru";s:3:"245";s:3:"e j";s:3:"246";s:3:"edn";s:3:"247";s:3:"ejo";s:3:"248";s:3:"elo";s:3:"249";s:3:"est";s:3:"250";s:3:"etj";s:3:"251";s:3:"eva";s:3:"252";s:3:"iji";s:3:"253";s:3:"ik ";s:3:"254";s:3:"im ";s:3:"255";s:3:"itv";s:3:"256";s:3:"mob";s:3:"257";s:3:"nap";s:3:"258";s:3:"nek";s:3:"259";s:3:"pol";s:3:"260";s:3:"pos";s:3:"261";s:3:"rat";s:3:"262";s:3:"ski";s:3:"263";s:4:"tiÄ";s:3:"264";s:3:"tom";s:3:"265";s:3:"ton";s:3:"266";s:3:"tra";s:3:"267";s:3:"tud";s:3:"268";s:3:"tve";s:3:"269";s:3:"v b";s:3:"270";s:3:"vil";s:3:"271";s:3:"vse";s:3:"272";s:4:"Äit";s:3:"273";s:3:" av";s:3:"274";s:3:" gr";s:3:"275";s:3:"a z";s:3:"276";s:3:"ans";s:3:"277";s:3:"ast";s:3:"278";s:3:"avt";s:3:"279";s:3:"dan";s:3:"280";s:3:"e m";s:3:"281";s:3:"eds";s:3:"282";s:3:"for";s:3:"283";s:3:"i z";s:3:"284";s:3:"kot";s:3:"285";s:3:"mi ";s:3:"286";s:3:"nim";s:3:"287";s:3:"o b";s:3:"288";s:3:"o o";s:3:"289";s:3:"od ";s:3:"290";s:3:"odl";s:3:"291";s:3:"oiz";s:3:"292";s:3:"ot ";s:3:"293";s:3:"par";s:3:"294";s:3:"pot";s:3:"295";s:3:"rje";s:3:"296";s:3:"roi";s:3:"297";s:3:"tem";s:3:"298";s:3:"val";s:3:"299";}s:6:"somali";a:300:{s:3:"ka ";s:1:"0";s:3:"ay ";s:1:"1";s:3:"da ";s:1:"2";s:3:" ay";s:1:"3";s:3:"aal";s:1:"4";s:3:"oo ";s:1:"5";s:3:"aan";s:1:"6";s:3:" ka";s:1:"7";s:3:"an ";s:1:"8";s:3:"in ";s:1:"9";s:3:" in";s:2:"10";s:3:"ada";s:2:"11";s:3:"maa";s:2:"12";s:3:"aba";s:2:"13";s:3:" so";s:2:"14";s:3:"ali";s:2:"15";s:3:"bad";s:2:"16";s:3:"add";s:2:"17";s:3:"soo";s:2:"18";s:3:" na";s:2:"19";s:3:"aha";s:2:"20";s:3:"ku ";s:2:"21";s:3:"ta ";s:2:"22";s:3:" wa";s:2:"23";s:3:"yo ";s:2:"24";s:3:"a s";s:2:"25";s:3:"oma";s:2:"26";s:3:"yaa";s:2:"27";s:3:" ba";s:2:"28";s:3:" ku";s:2:"29";s:3:" la";s:2:"30";s:3:" oo";s:2:"31";s:3:"iya";s:2:"32";s:3:"sha";s:2:"33";s:3:"a a";s:2:"34";s:3:"dda";s:2:"35";s:3:"nab";s:2:"36";s:3:"nta";s:2:"37";s:3:" da";s:2:"38";s:3:" ma";s:2:"39";s:3:"nka";s:2:"40";s:3:"uu ";s:2:"41";s:3:"y i";s:2:"42";s:3:"aya";s:2:"43";s:3:"ha ";s:2:"44";s:3:"raa";s:2:"45";s:3:" dh";s:2:"46";s:3:" qa";s:2:"47";s:3:"a k";s:2:"48";s:3:"ala";s:2:"49";s:3:"baa";s:2:"50";s:3:"doo";s:2:"51";s:3:"had";s:2:"52";s:3:"liy";s:2:"53";s:3:"oom";s:2:"54";s:3:" ha";s:2:"55";s:3:" sh";s:2:"56";s:3:"a d";s:2:"57";s:3:"a i";s:2:"58";s:3:"a n";s:2:"59";s:3:"aar";s:2:"60";s:3:"ee ";s:2:"61";s:3:"ey ";s:2:"62";s:3:"y k";s:2:"63";s:3:"ya ";s:2:"64";s:3:" ee";s:2:"65";s:3:" iy";s:2:"66";s:3:"aa ";s:2:"67";s:3:"aaq";s:2:"68";s:3:"gaa";s:2:"69";s:3:"lam";s:2:"70";s:3:" bu";s:2:"71";s:3:"a b";s:2:"72";s:3:"a m";s:2:"73";s:3:"ad ";s:2:"74";s:3:"aga";s:2:"75";s:3:"ama";s:2:"76";s:3:"iyo";s:2:"77";s:3:"la ";s:2:"78";s:3:"a c";s:2:"79";s:3:"a l";s:2:"80";s:3:"een";s:2:"81";s:3:"int";s:2:"82";s:3:"she";s:2:"83";s:3:"wax";s:2:"84";s:3:"yee";s:2:"85";s:3:" si";s:2:"86";s:3:" uu";s:2:"87";s:3:"a h";s:2:"88";s:3:"aas";s:2:"89";s:3:"alk";s:2:"90";s:3:"dha";s:2:"91";s:3:"gu ";s:2:"92";s:3:"hee";s:2:"93";s:3:"ii ";s:2:"94";s:3:"ira";s:2:"95";s:3:"mad";s:2:"96";s:3:"o a";s:2:"97";s:3:"o k";s:2:"98";s:3:"qay";s:2:"99";s:3:" ah";s:3:"100";s:3:" ca";s:3:"101";s:3:" wu";s:3:"102";s:3:"ank";s:3:"103";s:3:"ash";s:3:"104";s:3:"axa";s:3:"105";s:3:"eed";s:3:"106";s:3:"en ";s:3:"107";s:3:"ga ";s:3:"108";s:3:"haa";s:3:"109";s:3:"n a";s:3:"110";s:3:"n s";s:3:"111";s:3:"naa";s:3:"112";s:3:"nay";s:3:"113";s:3:"o d";s:3:"114";s:3:"taa";s:3:"115";s:3:"u b";s:3:"116";s:3:"uxu";s:3:"117";s:3:"wux";s:3:"118";s:3:"xuu";s:3:"119";s:3:" ci";s:3:"120";s:3:" do";s:3:"121";s:3:" ho";s:3:"122";s:3:" ta";s:3:"123";s:3:"a g";s:3:"124";s:3:"a u";s:3:"125";s:3:"ana";s:3:"126";s:3:"ayo";s:3:"127";s:3:"dhi";s:3:"128";s:3:"iin";s:3:"129";s:3:"lag";s:3:"130";s:3:"lin";s:3:"131";s:3:"lka";s:3:"132";s:3:"o i";s:3:"133";s:3:"san";s:3:"134";s:3:"u s";s:3:"135";s:3:"una";s:3:"136";s:3:"uun";s:3:"137";s:3:" ga";s:3:"138";s:3:" xa";s:3:"139";s:3:" xu";s:3:"140";s:3:"aab";s:3:"141";s:3:"abt";s:3:"142";s:3:"aq ";s:3:"143";s:3:"aqa";s:3:"144";s:3:"ara";s:3:"145";s:3:"arl";s:3:"146";s:3:"caa";s:3:"147";s:3:"cir";s:3:"148";s:3:"eeg";s:3:"149";s:3:"eel";s:3:"150";s:3:"isa";s:3:"151";s:3:"kal";s:3:"152";s:3:"lah";s:3:"153";s:3:"ney";s:3:"154";s:3:"qaa";s:3:"155";s:3:"rla";s:3:"156";s:3:"sad";s:3:"157";s:3:"sii";s:3:"158";s:3:"u d";s:3:"159";s:3:"wad";s:3:"160";s:3:" ad";s:3:"161";s:3:" ar";s:3:"162";s:3:" di";s:3:"163";s:3:" jo";s:3:"164";s:3:" ra";s:3:"165";s:3:" sa";s:3:"166";s:3:" u ";s:3:"167";s:3:" yi";s:3:"168";s:3:"a j";s:3:"169";s:3:"a q";s:3:"170";s:3:"aad";s:3:"171";s:3:"aat";s:3:"172";s:3:"aay";s:3:"173";s:3:"ah ";s:3:"174";s:3:"ale";s:3:"175";s:3:"amk";s:3:"176";s:3:"ari";s:3:"177";s:3:"as ";s:3:"178";s:3:"aye";s:3:"179";s:3:"bus";s:3:"180";s:3:"dal";s:3:"181";s:3:"ddu";s:3:"182";s:3:"dii";s:3:"183";s:3:"du ";s:3:"184";s:3:"duu";s:3:"185";s:3:"ed ";s:3:"186";s:3:"ege";s:3:"187";s:3:"gey";s:3:"188";s:3:"hay";s:3:"189";s:3:"hii";s:3:"190";s:3:"ida";s:3:"191";s:3:"ine";s:3:"192";s:3:"joo";s:3:"193";s:3:"laa";s:3:"194";s:3:"lay";s:3:"195";s:3:"mar";s:3:"196";s:3:"mee";s:3:"197";s:3:"n b";s:3:"198";s:3:"n d";s:3:"199";s:3:"n m";s:3:"200";s:3:"no ";s:3:"201";s:3:"o b";s:3:"202";s:3:"o l";s:3:"203";s:3:"oog";s:3:"204";s:3:"oon";s:3:"205";s:3:"rga";s:3:"206";s:3:"sh ";s:3:"207";s:3:"sid";s:3:"208";s:3:"u q";s:3:"209";s:3:"unk";s:3:"210";s:3:"ush";s:3:"211";s:3:"xa ";s:3:"212";s:3:"y d";s:3:"213";s:3:" bi";s:3:"214";s:3:" gu";s:3:"215";s:3:" is";s:3:"216";s:3:" ke";s:3:"217";s:3:" lo";s:3:"218";s:3:" me";s:3:"219";s:3:" mu";s:3:"220";s:3:" qo";s:3:"221";s:3:" ug";s:3:"222";s:3:"a e";s:3:"223";s:3:"a o";s:3:"224";s:3:"a w";s:3:"225";s:3:"adi";s:3:"226";s:3:"ado";s:3:"227";s:3:"agu";s:3:"228";s:3:"al ";s:3:"229";s:3:"ant";s:3:"230";s:3:"ark";s:3:"231";s:3:"asa";s:3:"232";s:3:"awi";s:3:"233";s:3:"bta";s:3:"234";s:3:"bul";s:3:"235";s:3:"d a";s:3:"236";s:3:"dag";s:3:"237";s:3:"dan";s:3:"238";s:3:"do ";s:3:"239";s:3:"e s";s:3:"240";s:3:"gal";s:3:"241";s:3:"gay";s:3:"242";s:3:"guu";s:3:"243";s:3:"h e";s:3:"244";s:3:"hal";s:3:"245";s:3:"iga";s:3:"246";s:3:"ihi";s:3:"247";s:3:"iri";s:3:"248";s:3:"iye";s:3:"249";s:3:"ken";s:3:"250";s:3:"lad";s:3:"251";s:3:"lid";s:3:"252";s:3:"lsh";s:3:"253";s:3:"mag";s:3:"254";s:3:"mun";s:3:"255";s:3:"n h";s:3:"256";s:3:"n i";s:3:"257";s:3:"na ";s:3:"258";s:3:"o n";s:3:"259";s:3:"o w";s:3:"260";s:3:"ood";s:3:"261";s:3:"oor";s:3:"262";s:3:"ora";s:3:"263";s:3:"qab";s:3:"264";s:3:"qor";s:3:"265";s:3:"rab";s:3:"266";s:3:"rit";s:3:"267";s:3:"rta";s:3:"268";s:3:"s o";s:3:"269";s:3:"sab";s:3:"270";s:3:"ska";s:3:"271";s:3:"to ";s:3:"272";s:3:"u a";s:3:"273";s:3:"u h";s:3:"274";s:3:"u u";s:3:"275";s:3:"ud ";s:3:"276";s:3:"ugu";s:3:"277";s:3:"uls";s:3:"278";s:3:"uud";s:3:"279";s:3:"waa";s:3:"280";s:3:"xus";s:3:"281";s:3:"y b";s:3:"282";s:3:"y q";s:3:"283";s:3:"y s";s:3:"284";s:3:"yad";s:3:"285";s:3:"yay";s:3:"286";s:3:"yih";s:3:"287";s:3:" aa";s:3:"288";s:3:" bo";s:3:"289";s:3:" br";s:3:"290";s:3:" go";s:3:"291";s:3:" ji";s:3:"292";s:3:" mi";s:3:"293";s:3:" of";s:3:"294";s:3:" ti";s:3:"295";s:3:" um";s:3:"296";s:3:" wi";s:3:"297";s:3:" xo";s:3:"298";s:3:"a x";s:3:"299";}s:7:"spanish";a:300:{s:3:" de";s:1:"0";s:3:"de ";s:1:"1";s:3:" la";s:1:"2";s:3:"os ";s:1:"3";s:3:"la ";s:1:"4";s:3:"el ";s:1:"5";s:3:"es ";s:1:"6";s:3:" qu";s:1:"7";s:3:" co";s:1:"8";s:3:"e l";s:1:"9";s:3:"as ";s:2:"10";s:3:"que";s:2:"11";s:3:" el";s:2:"12";s:3:"ue ";s:2:"13";s:3:"en ";s:2:"14";s:3:"ent";s:2:"15";s:3:" en";s:2:"16";s:3:" se";s:2:"17";s:3:"nte";s:2:"18";s:3:"res";s:2:"19";s:3:"con";s:2:"20";s:3:"est";s:2:"21";s:3:" es";s:2:"22";s:3:"s d";s:2:"23";s:3:" lo";s:2:"24";s:3:" pr";s:2:"25";s:3:"los";s:2:"26";s:3:" y ";s:2:"27";s:3:"do ";s:2:"28";s:4:"ón ";s:2:"29";s:4:"ión";s:2:"30";s:3:" un";s:2:"31";s:4:"ció";s:2:"32";s:3:"del";s:2:"33";s:3:"o d";s:2:"34";s:3:" po";s:2:"35";s:3:"a d";s:2:"36";s:3:"aci";s:2:"37";s:3:"sta";s:2:"38";s:3:"te ";s:2:"39";s:3:"ado";s:2:"40";s:3:"pre";s:2:"41";s:3:"to ";s:2:"42";s:3:"par";s:2:"43";s:3:"a e";s:2:"44";s:3:"a l";s:2:"45";s:3:"ra ";s:2:"46";s:3:"al ";s:2:"47";s:3:"e e";s:2:"48";s:3:"se ";s:2:"49";s:3:"pro";s:2:"50";s:3:"ar ";s:2:"51";s:3:"ia ";s:2:"52";s:3:"o e";s:2:"53";s:3:" re";s:2:"54";s:3:"ida";s:2:"55";s:3:"dad";s:2:"56";s:3:"tra";s:2:"57";s:3:"por";s:2:"58";s:3:"s p";s:2:"59";s:3:" a ";s:2:"60";s:3:"a p";s:2:"61";s:3:"ara";s:2:"62";s:3:"cia";s:2:"63";s:3:" pa";s:2:"64";s:3:"com";s:2:"65";s:3:"no ";s:2:"66";s:3:" di";s:2:"67";s:3:" in";s:2:"68";s:3:"ien";s:2:"69";s:3:"n l";s:2:"70";s:3:"ad ";s:2:"71";s:3:"ant";s:2:"72";s:3:"e s";s:2:"73";s:3:"men";s:2:"74";s:3:"a c";s:2:"75";s:3:"on ";s:2:"76";s:3:"un ";s:2:"77";s:3:"las";s:2:"78";s:3:"nci";s:2:"79";s:3:" tr";s:2:"80";s:3:"cio";s:2:"81";s:3:"ier";s:2:"82";s:3:"nto";s:2:"83";s:3:"tiv";s:2:"84";s:3:"n d";s:2:"85";s:3:"n e";s:2:"86";s:3:"or ";s:2:"87";s:3:"s c";s:2:"88";s:3:"enc";s:2:"89";s:3:"ern";s:2:"90";s:3:"io ";s:2:"91";s:3:"a s";s:2:"92";s:3:"ici";s:2:"93";s:3:"s e";s:2:"94";s:3:" ma";s:2:"95";s:3:"dos";s:2:"96";s:3:"e a";s:2:"97";s:3:"e c";s:2:"98";s:3:"emp";s:2:"99";s:3:"ica";s:3:"100";s:3:"ivo";s:3:"101";s:3:"l p";s:3:"102";s:3:"n c";s:3:"103";s:3:"r e";s:3:"104";s:3:"ta ";s:3:"105";s:3:"ter";s:3:"106";s:3:"e d";s:3:"107";s:3:"esa";s:3:"108";s:3:"ez ";s:3:"109";s:3:"mpr";s:3:"110";s:3:"o a";s:3:"111";s:3:"s a";s:3:"112";s:3:" ca";s:3:"113";s:3:" su";s:3:"114";s:3:"ion";s:3:"115";s:3:" cu";s:3:"116";s:3:" ju";s:3:"117";s:3:"an ";s:3:"118";s:3:"da ";s:3:"119";s:3:"ene";s:3:"120";s:3:"ero";s:3:"121";s:3:"na ";s:3:"122";s:3:"rec";s:3:"123";s:3:"ro ";s:3:"124";s:3:"tar";s:3:"125";s:3:" al";s:3:"126";s:3:" an";s:3:"127";s:3:"bie";s:3:"128";s:3:"e p";s:3:"129";s:3:"er ";s:3:"130";s:3:"l c";s:3:"131";s:3:"n p";s:3:"132";s:3:"omp";s:3:"133";s:3:"ten";s:3:"134";s:3:" em";s:3:"135";s:3:"ist";s:3:"136";s:3:"nes";s:3:"137";s:3:"nta";s:3:"138";s:3:"o c";s:3:"139";s:3:"so ";s:3:"140";s:3:"tes";s:3:"141";s:3:"era";s:3:"142";s:3:"l d";s:3:"143";s:3:"l m";s:3:"144";s:3:"les";s:3:"145";s:3:"ntr";s:3:"146";s:3:"o s";s:3:"147";s:3:"ore";s:3:"148";s:4:"rá ";s:3:"149";s:3:"s q";s:3:"150";s:3:"s y";s:3:"151";s:3:"sto";s:3:"152";s:3:"a a";s:3:"153";s:3:"a r";s:3:"154";s:3:"ari";s:3:"155";s:3:"des";s:3:"156";s:3:"e q";s:3:"157";s:3:"ivi";s:3:"158";s:3:"lic";s:3:"159";s:3:"lo ";s:3:"160";s:3:"n a";s:3:"161";s:3:"one";s:3:"162";s:3:"ora";s:3:"163";s:3:"per";s:3:"164";s:3:"pue";s:3:"165";s:3:"r l";s:3:"166";s:3:"re ";s:3:"167";s:3:"ren";s:3:"168";s:3:"una";s:3:"169";s:4:"Ãa ";s:3:"170";s:3:"ada";s:3:"171";s:3:"cas";s:3:"172";s:3:"ere";s:3:"173";s:3:"ide";s:3:"174";s:3:"min";s:3:"175";s:3:"n s";s:3:"176";s:3:"ndo";s:3:"177";s:3:"ran";s:3:"178";s:3:"rno";s:3:"179";s:3:" ac";s:3:"180";s:3:" ex";s:3:"181";s:3:" go";s:3:"182";s:3:" no";s:3:"183";s:3:"a t";s:3:"184";s:3:"aba";s:3:"185";s:3:"ble";s:3:"186";s:3:"ece";s:3:"187";s:3:"ect";s:3:"188";s:3:"l a";s:3:"189";s:3:"l g";s:3:"190";s:3:"lid";s:3:"191";s:3:"nsi";s:3:"192";s:3:"ons";s:3:"193";s:3:"rac";s:3:"194";s:3:"rio";s:3:"195";s:3:"str";s:3:"196";s:3:"uer";s:3:"197";s:3:"ust";s:3:"198";s:3:" ha";s:3:"199";s:3:" le";s:3:"200";s:3:" mi";s:3:"201";s:3:" mu";s:3:"202";s:3:" ob";s:3:"203";s:3:" pe";s:3:"204";s:3:" pu";s:3:"205";s:3:" so";s:3:"206";s:3:"a i";s:3:"207";s:3:"ale";s:3:"208";s:3:"ca ";s:3:"209";s:3:"cto";s:3:"210";s:3:"e i";s:3:"211";s:3:"e u";s:3:"212";s:3:"eso";s:3:"213";s:3:"fer";s:3:"214";s:3:"fic";s:3:"215";s:3:"gob";s:3:"216";s:3:"jo ";s:3:"217";s:3:"ma ";s:3:"218";s:3:"mpl";s:3:"219";s:3:"o p";s:3:"220";s:3:"obi";s:3:"221";s:3:"s m";s:3:"222";s:3:"sa ";s:3:"223";s:3:"sep";s:3:"224";s:3:"ste";s:3:"225";s:3:"sti";s:3:"226";s:3:"tad";s:3:"227";s:3:"tod";s:3:"228";s:3:"y s";s:3:"229";s:3:" ci";s:3:"230";s:3:"and";s:3:"231";s:3:"ces";s:3:"232";s:4:"có ";s:3:"233";s:3:"dor";s:3:"234";s:3:"e m";s:3:"235";s:3:"eci";s:3:"236";s:3:"eco";s:3:"237";s:3:"esi";s:3:"238";s:3:"int";s:3:"239";s:3:"iza";s:3:"240";s:3:"l e";s:3:"241";s:3:"lar";s:3:"242";s:3:"mie";s:3:"243";s:3:"ner";s:3:"244";s:3:"orc";s:3:"245";s:3:"rci";s:3:"246";s:3:"ria";s:3:"247";s:3:"tic";s:3:"248";s:3:"tor";s:3:"249";s:3:" as";s:3:"250";s:3:" si";s:3:"251";s:3:"ce ";s:3:"252";s:3:"den";s:3:"253";s:3:"e r";s:3:"254";s:3:"e t";s:3:"255";s:3:"end";s:3:"256";s:3:"eri";s:3:"257";s:3:"esp";s:3:"258";s:3:"ial";s:3:"259";s:3:"ido";s:3:"260";s:3:"ina";s:3:"261";s:3:"inc";s:3:"262";s:3:"mit";s:3:"263";s:3:"o l";s:3:"264";s:3:"ome";s:3:"265";s:3:"pli";s:3:"266";s:3:"ras";s:3:"267";s:3:"s t";s:3:"268";s:3:"sid";s:3:"269";s:3:"sup";s:3:"270";s:3:"tab";s:3:"271";s:3:"uen";s:3:"272";s:3:"ues";s:3:"273";s:3:"ura";s:3:"274";s:3:"vo ";s:3:"275";s:3:"vor";s:3:"276";s:3:" sa";s:3:"277";s:3:" ti";s:3:"278";s:3:"abl";s:3:"279";s:3:"ali";s:3:"280";s:3:"aso";s:3:"281";s:3:"ast";s:3:"282";s:3:"cor";s:3:"283";s:3:"cti";s:3:"284";s:3:"cue";s:3:"285";s:3:"div";s:3:"286";s:3:"duc";s:3:"287";s:3:"ens";s:3:"288";s:3:"eti";s:3:"289";s:3:"imi";s:3:"290";s:3:"ini";s:3:"291";s:3:"lec";s:3:"292";s:3:"o q";s:3:"293";s:3:"oce";s:3:"294";s:3:"ort";s:3:"295";s:3:"ral";s:3:"296";s:3:"rma";s:3:"297";s:3:"roc";s:3:"298";s:3:"rod";s:3:"299";}s:7:"swahili";a:300:{s:3:" wa";s:1:"0";s:3:"wa ";s:1:"1";s:3:"a k";s:1:"2";s:3:"a m";s:1:"3";s:3:" ku";s:1:"4";s:3:" ya";s:1:"5";s:3:"a w";s:1:"6";s:3:"ya ";s:1:"7";s:3:"ni ";s:1:"8";s:3:" ma";s:1:"9";s:3:"ka ";s:2:"10";s:3:"a u";s:2:"11";s:3:"na ";s:2:"12";s:3:"za ";s:2:"13";s:3:"ia ";s:2:"14";s:3:" na";s:2:"15";s:3:"ika";s:2:"16";s:3:"ma ";s:2:"17";s:3:"ali";s:2:"18";s:3:"a n";s:2:"19";s:3:" am";s:2:"20";s:3:"ili";s:2:"21";s:3:"kwa";s:2:"22";s:3:" kw";s:2:"23";s:3:"ini";s:2:"24";s:3:" ha";s:2:"25";s:3:"ame";s:2:"26";s:3:"ana";s:2:"27";s:3:"i n";s:2:"28";s:3:" za";s:2:"29";s:3:"a h";s:2:"30";s:3:"ema";s:2:"31";s:3:"i m";s:2:"32";s:3:"i y";s:2:"33";s:3:"kuw";s:2:"34";s:3:"la ";s:2:"35";s:3:"o w";s:2:"36";s:3:"a y";s:2:"37";s:3:"ata";s:2:"38";s:3:"sem";s:2:"39";s:3:" la";s:2:"40";s:3:"ati";s:2:"41";s:3:"chi";s:2:"42";s:3:"i w";s:2:"43";s:3:"uwa";s:2:"44";s:3:"aki";s:2:"45";s:3:"li ";s:2:"46";s:3:"eka";s:2:"47";s:3:"ira";s:2:"48";s:3:" nc";s:2:"49";s:3:"a s";s:2:"50";s:3:"iki";s:2:"51";s:3:"kat";s:2:"52";s:3:"nch";s:2:"53";s:3:" ka";s:2:"54";s:3:" ki";s:2:"55";s:3:"a b";s:2:"56";s:3:"aji";s:2:"57";s:3:"amb";s:2:"58";s:3:"ra ";s:2:"59";s:3:"ri ";s:2:"60";s:3:"rik";s:2:"61";s:3:"ada";s:2:"62";s:3:"mat";s:2:"63";s:3:"mba";s:2:"64";s:3:"mes";s:2:"65";s:3:"yo ";s:2:"66";s:3:"zi ";s:2:"67";s:3:"da ";s:2:"68";s:3:"hi ";s:2:"69";s:3:"i k";s:2:"70";s:3:"ja ";s:2:"71";s:3:"kut";s:2:"72";s:3:"tek";s:2:"73";s:3:"wan";s:2:"74";s:3:" bi";s:2:"75";s:3:"a a";s:2:"76";s:3:"aka";s:2:"77";s:3:"ao ";s:2:"78";s:3:"asi";s:2:"79";s:3:"cha";s:2:"80";s:3:"ese";s:2:"81";s:3:"eza";s:2:"82";s:3:"ke ";s:2:"83";s:3:"moj";s:2:"84";s:3:"oja";s:2:"85";s:3:" hi";s:2:"86";s:3:"a z";s:2:"87";s:3:"end";s:2:"88";s:3:"ha ";s:2:"89";s:3:"ji ";s:2:"90";s:3:"mu ";s:2:"91";s:3:"shi";s:2:"92";s:3:"wat";s:2:"93";s:3:" bw";s:2:"94";s:3:"ake";s:2:"95";s:3:"ara";s:2:"96";s:3:"bw ";s:2:"97";s:3:"i h";s:2:"98";s:3:"imb";s:2:"99";s:3:"tik";s:3:"100";s:3:"wak";s:3:"101";s:3:"wal";s:3:"102";s:3:" hu";s:3:"103";s:3:" mi";s:3:"104";s:3:" mk";s:3:"105";s:3:" ni";s:3:"106";s:3:" ra";s:3:"107";s:3:" um";s:3:"108";s:3:"a l";s:3:"109";s:3:"ate";s:3:"110";s:3:"esh";s:3:"111";s:3:"ina";s:3:"112";s:3:"ish";s:3:"113";s:3:"kim";s:3:"114";s:3:"o k";s:3:"115";s:3:" ir";s:3:"116";s:3:"a i";s:3:"117";s:3:"ala";s:3:"118";s:3:"ani";s:3:"119";s:3:"aq ";s:3:"120";s:3:"azi";s:3:"121";s:3:"hin";s:3:"122";s:3:"i a";s:3:"123";s:3:"idi";s:3:"124";s:3:"ima";s:3:"125";s:3:"ita";s:3:"126";s:3:"rai";s:3:"127";s:3:"raq";s:3:"128";s:3:"sha";s:3:"129";s:3:" ms";s:3:"130";s:3:" se";s:3:"131";s:3:"afr";s:3:"132";s:3:"ama";s:3:"133";s:3:"ano";s:3:"134";s:3:"ea ";s:3:"135";s:3:"ele";s:3:"136";s:3:"fri";s:3:"137";s:3:"go ";s:3:"138";s:3:"i i";s:3:"139";s:3:"ifa";s:3:"140";s:3:"iwa";s:3:"141";s:3:"iyo";s:3:"142";s:3:"kus";s:3:"143";s:3:"lia";s:3:"144";s:3:"lio";s:3:"145";s:3:"maj";s:3:"146";s:3:"mku";s:3:"147";s:3:"no ";s:3:"148";s:3:"tan";s:3:"149";s:3:"uli";s:3:"150";s:3:"uta";s:3:"151";s:3:"wen";s:3:"152";s:3:" al";s:3:"153";s:3:"a j";s:3:"154";s:3:"aad";s:3:"155";s:3:"aid";s:3:"156";s:3:"ari";s:3:"157";s:3:"awa";s:3:"158";s:3:"ba ";s:3:"159";s:3:"fa ";s:3:"160";s:3:"nde";s:3:"161";s:3:"nge";s:3:"162";s:3:"nya";s:3:"163";s:3:"o y";s:3:"164";s:3:"u w";s:3:"165";s:3:"ua ";s:3:"166";s:3:"umo";s:3:"167";s:3:"waz";s:3:"168";s:3:"ye ";s:3:"169";s:3:" ut";s:3:"170";s:3:" vi";s:3:"171";s:3:"a d";s:3:"172";s:3:"a t";s:3:"173";s:3:"aif";s:3:"174";s:3:"di ";s:3:"175";s:3:"ere";s:3:"176";s:3:"ing";s:3:"177";s:3:"kin";s:3:"178";s:3:"nda";s:3:"179";s:3:"o n";s:3:"180";s:3:"oa ";s:3:"181";s:3:"tai";s:3:"182";s:3:"toa";s:3:"183";s:3:"usa";s:3:"184";s:3:"uto";s:3:"185";s:3:"was";s:3:"186";s:3:"yak";s:3:"187";s:3:"zo ";s:3:"188";s:3:" ji";s:3:"189";s:3:" mw";s:3:"190";s:3:"a p";s:3:"191";s:3:"aia";s:3:"192";s:3:"amu";s:3:"193";s:3:"ang";s:3:"194";s:3:"bik";s:3:"195";s:3:"bo ";s:3:"196";s:3:"del";s:3:"197";s:3:"e w";s:3:"198";s:3:"ene";s:3:"199";s:3:"eng";s:3:"200";s:3:"ich";s:3:"201";s:3:"iri";s:3:"202";s:3:"iti";s:3:"203";s:3:"ito";s:3:"204";s:3:"ki ";s:3:"205";s:3:"kir";s:3:"206";s:3:"ko ";s:3:"207";s:3:"kuu";s:3:"208";s:3:"mar";s:3:"209";s:3:"mbo";s:3:"210";s:3:"mil";s:3:"211";s:3:"ngi";s:3:"212";s:3:"ngo";s:3:"213";s:3:"o l";s:3:"214";s:3:"ong";s:3:"215";s:3:"si ";s:3:"216";s:3:"ta ";s:3:"217";s:3:"tak";s:3:"218";s:3:"u y";s:3:"219";s:3:"umu";s:3:"220";s:3:"usi";s:3:"221";s:3:"uu ";s:3:"222";s:3:"wam";s:3:"223";s:3:" af";s:3:"224";s:3:" ba";s:3:"225";s:3:" li";s:3:"226";s:3:" si";s:3:"227";s:3:" zi";s:3:"228";s:3:"a v";s:3:"229";s:3:"ami";s:3:"230";s:3:"atu";s:3:"231";s:3:"awi";s:3:"232";s:3:"eri";s:3:"233";s:3:"fan";s:3:"234";s:3:"fur";s:3:"235";s:3:"ger";s:3:"236";s:3:"i z";s:3:"237";s:3:"isi";s:3:"238";s:3:"izo";s:3:"239";s:3:"lea";s:3:"240";s:3:"mbi";s:3:"241";s:3:"mwa";s:3:"242";s:3:"nye";s:3:"243";s:3:"o h";s:3:"244";s:3:"o m";s:3:"245";s:3:"oni";s:3:"246";s:3:"rez";s:3:"247";s:3:"saa";s:3:"248";s:3:"ser";s:3:"249";s:3:"sin";s:3:"250";s:3:"tat";s:3:"251";s:3:"tis";s:3:"252";s:3:"tu ";s:3:"253";s:3:"uin";s:3:"254";s:3:"uki";s:3:"255";s:3:"ur ";s:3:"256";s:3:"wi ";s:3:"257";s:3:"yar";s:3:"258";s:3:" da";s:3:"259";s:3:" en";s:3:"260";s:3:" mp";s:3:"261";s:3:" ny";s:3:"262";s:3:" ta";s:3:"263";s:3:" ul";s:3:"264";s:3:" we";s:3:"265";s:3:"a c";s:3:"266";s:3:"a f";s:3:"267";s:3:"ais";s:3:"268";s:3:"apo";s:3:"269";s:3:"ayo";s:3:"270";s:3:"bar";s:3:"271";s:3:"dhi";s:3:"272";s:3:"e a";s:3:"273";s:3:"eke";s:3:"274";s:3:"eny";s:3:"275";s:3:"eon";s:3:"276";s:3:"hai";s:3:"277";s:3:"han";s:3:"278";s:3:"hiy";s:3:"279";s:3:"hur";s:3:"280";s:3:"i s";s:3:"281";s:3:"imw";s:3:"282";s:3:"kal";s:3:"283";s:3:"kwe";s:3:"284";s:3:"lak";s:3:"285";s:3:"lam";s:3:"286";s:3:"mak";s:3:"287";s:3:"msa";s:3:"288";s:3:"ne ";s:3:"289";s:3:"ngu";s:3:"290";s:3:"ru ";s:3:"291";s:3:"sal";s:3:"292";s:3:"swa";s:3:"293";s:3:"te ";s:3:"294";s:3:"ti ";s:3:"295";s:3:"uku";s:3:"296";s:3:"uma";s:3:"297";s:3:"una";s:3:"298";s:3:"uru";s:3:"299";}s:7:"swedish";a:300:{s:3:"en ";s:1:"0";s:3:" de";s:1:"1";s:3:"et ";s:1:"2";s:3:"er ";s:1:"3";s:3:"tt ";s:1:"4";s:3:"om ";s:1:"5";s:4:"för";s:1:"6";s:3:"ar ";s:1:"7";s:3:"de ";s:1:"8";s:3:"att";s:1:"9";s:4:" fö";s:2:"10";s:3:"ing";s:2:"11";s:3:" in";s:2:"12";s:3:" at";s:2:"13";s:3:" i ";s:2:"14";s:3:"det";s:2:"15";s:3:"ch ";s:2:"16";s:3:"an ";s:2:"17";s:3:"gen";s:2:"18";s:3:" an";s:2:"19";s:3:"t s";s:2:"20";s:3:"som";s:2:"21";s:3:"te ";s:2:"22";s:3:" oc";s:2:"23";s:3:"ter";s:2:"24";s:3:" ha";s:2:"25";s:3:"lle";s:2:"26";s:3:"och";s:2:"27";s:3:" sk";s:2:"28";s:3:" so";s:2:"29";s:3:"ra ";s:2:"30";s:3:"r a";s:2:"31";s:3:" me";s:2:"32";s:3:"var";s:2:"33";s:3:"nde";s:2:"34";s:4:"är ";s:2:"35";s:3:" ko";s:2:"36";s:3:"on ";s:2:"37";s:3:"ans";s:2:"38";s:3:"int";s:2:"39";s:3:"n s";s:2:"40";s:3:"na ";s:2:"41";s:3:" en";s:2:"42";s:3:" fr";s:2:"43";s:4:" pÃ¥";s:2:"44";s:3:" st";s:2:"45";s:3:" va";s:2:"46";s:3:"and";s:2:"47";s:3:"nte";s:2:"48";s:4:"pÃ¥ ";s:2:"49";s:3:"ska";s:2:"50";s:3:"ta ";s:2:"51";s:3:" vi";s:2:"52";s:3:"der";s:2:"53";s:4:"äll";s:2:"54";s:4:"örs";s:2:"55";s:3:" om";s:2:"56";s:3:"da ";s:2:"57";s:3:"kri";s:2:"58";s:3:"ka ";s:2:"59";s:3:"nst";s:2:"60";s:3:" ho";s:2:"61";s:3:"as ";s:2:"62";s:4:"stä";s:2:"63";s:3:"r d";s:2:"64";s:3:"t f";s:2:"65";s:3:"upp";s:2:"66";s:3:" be";s:2:"67";s:3:"nge";s:2:"68";s:3:"r s";s:2:"69";s:3:"tal";s:2:"70";s:4:"täl";s:2:"71";s:4:"ör ";s:2:"72";s:3:" av";s:2:"73";s:3:"ger";s:2:"74";s:3:"ill";s:2:"75";s:3:"ng ";s:2:"76";s:3:"e s";s:2:"77";s:3:"ekt";s:2:"78";s:3:"ade";s:2:"79";s:3:"era";s:2:"80";s:3:"ers";s:2:"81";s:3:"har";s:2:"82";s:3:"ll ";s:2:"83";s:3:"lld";s:2:"84";s:3:"rin";s:2:"85";s:3:"rna";s:2:"86";s:4:"säk";s:2:"87";s:3:"und";s:2:"88";s:3:"inn";s:2:"89";s:3:"lig";s:2:"90";s:3:"ns ";s:2:"91";s:3:" ma";s:2:"92";s:3:" pr";s:2:"93";s:3:" up";s:2:"94";s:3:"age";s:2:"95";s:3:"av ";s:2:"96";s:3:"iva";s:2:"97";s:3:"kti";s:2:"98";s:3:"lda";s:2:"99";s:3:"orn";s:3:"100";s:3:"son";s:3:"101";s:3:"ts ";s:3:"102";s:3:"tta";s:3:"103";s:4:"äkr";s:3:"104";s:3:" sj";s:3:"105";s:3:" ti";s:3:"106";s:3:"avt";s:3:"107";s:3:"ber";s:3:"108";s:3:"els";s:3:"109";s:3:"eta";s:3:"110";s:3:"kol";s:3:"111";s:3:"men";s:3:"112";s:3:"n d";s:3:"113";s:3:"t k";s:3:"114";s:3:"vta";s:3:"115";s:4:"Ã¥r ";s:3:"116";s:3:"juk";s:3:"117";s:3:"man";s:3:"118";s:3:"n f";s:3:"119";s:3:"nin";s:3:"120";s:3:"r i";s:3:"121";s:4:"rsä";s:3:"122";s:3:"sju";s:3:"123";s:3:"sso";s:3:"124";s:4:" är";s:3:"125";s:3:"a s";s:3:"126";s:3:"ach";s:3:"127";s:3:"ag ";s:3:"128";s:3:"bac";s:3:"129";s:3:"den";s:3:"130";s:3:"ett";s:3:"131";s:3:"fte";s:3:"132";s:3:"hor";s:3:"133";s:3:"nba";s:3:"134";s:3:"oll";s:3:"135";s:3:"rnb";s:3:"136";s:3:"ste";s:3:"137";s:3:"til";s:3:"138";s:3:" ef";s:3:"139";s:3:" si";s:3:"140";s:3:"a a";s:3:"141";s:3:"e h";s:3:"142";s:3:"ed ";s:3:"143";s:3:"eft";s:3:"144";s:3:"ga ";s:3:"145";s:3:"ig ";s:3:"146";s:3:"it ";s:3:"147";s:3:"ler";s:3:"148";s:3:"med";s:3:"149";s:3:"n i";s:3:"150";s:3:"nd ";s:3:"151";s:4:"sÃ¥ ";s:3:"152";s:3:"tiv";s:3:"153";s:3:" bl";s:3:"154";s:3:" et";s:3:"155";s:3:" fi";s:3:"156";s:4:" sä";s:3:"157";s:3:"at ";s:3:"158";s:3:"des";s:3:"159";s:3:"e a";s:3:"160";s:3:"gar";s:3:"161";s:3:"get";s:3:"162";s:3:"lan";s:3:"163";s:3:"lss";s:3:"164";s:3:"ost";s:3:"165";s:3:"r b";s:3:"166";s:3:"r e";s:3:"167";s:3:"re ";s:3:"168";s:3:"ret";s:3:"169";s:3:"sta";s:3:"170";s:3:"t i";s:3:"171";s:3:" ge";s:3:"172";s:3:" he";s:3:"173";s:3:" re";s:3:"174";s:3:"a f";s:3:"175";s:3:"all";s:3:"176";s:3:"bos";s:3:"177";s:3:"ets";s:3:"178";s:3:"lek";s:3:"179";s:3:"let";s:3:"180";s:3:"ner";s:3:"181";s:3:"nna";s:3:"182";s:3:"nne";s:3:"183";s:3:"r f";s:3:"184";s:3:"rit";s:3:"185";s:3:"s s";s:3:"186";s:3:"sen";s:3:"187";s:3:"sto";s:3:"188";s:3:"tor";s:3:"189";s:3:"vav";s:3:"190";s:3:"ygg";s:3:"191";s:3:" ka";s:3:"192";s:4:" sÃ¥";s:3:"193";s:3:" tr";s:3:"194";s:3:" ut";s:3:"195";s:3:"ad ";s:3:"196";s:3:"al ";s:3:"197";s:3:"are";s:3:"198";s:3:"e o";s:3:"199";s:3:"gon";s:3:"200";s:3:"kom";s:3:"201";s:3:"n a";s:3:"202";s:3:"n h";s:3:"203";s:3:"nga";s:3:"204";s:3:"r h";s:3:"205";s:3:"ren";s:3:"206";s:3:"t d";s:3:"207";s:3:"tag";s:3:"208";s:3:"tar";s:3:"209";s:3:"tre";s:3:"210";s:4:"ätt";s:3:"211";s:4:" fÃ¥";s:3:"212";s:4:" hä";s:3:"213";s:3:" se";s:3:"214";s:3:"a d";s:3:"215";s:3:"a i";s:3:"216";s:3:"a p";s:3:"217";s:3:"ale";s:3:"218";s:3:"ann";s:3:"219";s:3:"ara";s:3:"220";s:3:"byg";s:3:"221";s:3:"gt ";s:3:"222";s:3:"han";s:3:"223";s:3:"igt";s:3:"224";s:3:"kan";s:3:"225";s:3:"la ";s:3:"226";s:3:"n o";s:3:"227";s:3:"nom";s:3:"228";s:3:"nsk";s:3:"229";s:3:"omm";s:3:"230";s:3:"r k";s:3:"231";s:3:"r p";s:3:"232";s:3:"r v";s:3:"233";s:3:"s f";s:3:"234";s:3:"s k";s:3:"235";s:3:"t a";s:3:"236";s:3:"t p";s:3:"237";s:3:"ver";s:3:"238";s:3:" bo";s:3:"239";s:3:" br";s:3:"240";s:3:" ku";s:3:"241";s:4:" nÃ¥";s:3:"242";s:3:"a b";s:3:"243";s:3:"a e";s:3:"244";s:3:"del";s:3:"245";s:3:"ens";s:3:"246";s:3:"es ";s:3:"247";s:3:"fin";s:3:"248";s:3:"ige";s:3:"249";s:3:"m s";s:3:"250";s:3:"n p";s:3:"251";s:4:"nÃ¥g";s:3:"252";s:3:"or ";s:3:"253";s:3:"r o";s:3:"254";s:3:"rbe";s:3:"255";s:3:"rs ";s:3:"256";s:3:"rt ";s:3:"257";s:3:"s a";s:3:"258";s:3:"s n";s:3:"259";s:3:"skr";s:3:"260";s:3:"t o";s:3:"261";s:3:"ten";s:3:"262";s:3:"tio";s:3:"263";s:3:"ven";s:3:"264";s:3:" al";s:3:"265";s:3:" ja";s:3:"266";s:3:" p ";s:3:"267";s:3:" r ";s:3:"268";s:3:" sa";s:3:"269";s:3:"a h";s:3:"270";s:3:"bet";s:3:"271";s:3:"cke";s:3:"272";s:3:"dra";s:3:"273";s:3:"e f";s:3:"274";s:3:"e i";s:3:"275";s:3:"eda";s:3:"276";s:3:"eno";s:3:"277";s:4:"erä";s:3:"278";s:3:"ess";s:3:"279";s:3:"ion";s:3:"280";s:3:"jag";s:3:"281";s:3:"m f";s:3:"282";s:3:"ne ";s:3:"283";s:3:"nns";s:3:"284";s:3:"pro";s:3:"285";s:3:"r t";s:3:"286";s:3:"rar";s:3:"287";s:3:"riv";s:3:"288";s:4:"rät";s:3:"289";s:3:"t e";s:3:"290";s:3:"t t";s:3:"291";s:3:"ust";s:3:"292";s:3:"vad";s:3:"293";s:4:"öre";s:3:"294";s:3:" ar";s:3:"295";s:3:" by";s:3:"296";s:3:" kr";s:3:"297";s:3:" mi";s:3:"298";s:3:"arb";s:3:"299";}s:7:"tagalog";a:300:{s:3:"ng ";s:1:"0";s:3:"ang";s:1:"1";s:3:" na";s:1:"2";s:3:" sa";s:1:"3";s:3:"an ";s:1:"4";s:3:"nan";s:1:"5";s:3:"sa ";s:1:"6";s:3:"na ";s:1:"7";s:3:" ma";s:1:"8";s:3:" ca";s:1:"9";s:3:"ay ";s:2:"10";s:3:"n g";s:2:"11";s:3:" an";s:2:"12";s:3:"ong";s:2:"13";s:3:" ga";s:2:"14";s:3:"at ";s:2:"15";s:3:" pa";s:2:"16";s:3:"ala";s:2:"17";s:3:" si";s:2:"18";s:3:"a n";s:2:"19";s:3:"ga ";s:2:"20";s:3:"g n";s:2:"21";s:3:"g m";s:2:"22";s:3:"ito";s:2:"23";s:3:"g c";s:2:"24";s:3:"man";s:2:"25";s:3:"san";s:2:"26";s:3:"g s";s:2:"27";s:3:"ing";s:2:"28";s:3:"to ";s:2:"29";s:3:"ila";s:2:"30";s:3:"ina";s:2:"31";s:3:" di";s:2:"32";s:3:" ta";s:2:"33";s:3:"aga";s:2:"34";s:3:"iya";s:2:"35";s:3:"aca";s:2:"36";s:3:"g t";s:2:"37";s:3:" at";s:2:"38";s:3:"aya";s:2:"39";s:3:"ama";s:2:"40";s:3:"lan";s:2:"41";s:3:"a a";s:2:"42";s:3:"qui";s:2:"43";s:3:"a c";s:2:"44";s:3:"a s";s:2:"45";s:3:"nag";s:2:"46";s:3:" ba";s:2:"47";s:3:"g i";s:2:"48";s:3:"tan";s:2:"49";s:3:"'t ";s:2:"50";s:3:" cu";s:2:"51";s:3:"aua";s:2:"52";s:3:"g p";s:2:"53";s:3:" ni";s:2:"54";s:3:"os ";s:2:"55";s:3:"'y ";s:2:"56";s:3:"a m";s:2:"57";s:3:" n ";s:2:"58";s:3:"la ";s:2:"59";s:3:" la";s:2:"60";s:3:"o n";s:2:"61";s:3:"yan";s:2:"62";s:3:" ay";s:2:"63";s:3:"usa";s:2:"64";s:3:"cay";s:2:"65";s:3:"on ";s:2:"66";s:3:"ya ";s:2:"67";s:3:" it";s:2:"68";s:3:"al ";s:2:"69";s:3:"apa";s:2:"70";s:3:"ata";s:2:"71";s:3:"t n";s:2:"72";s:3:"uan";s:2:"73";s:3:"aha";s:2:"74";s:3:"asa";s:2:"75";s:3:"pag";s:2:"76";s:3:" gu";s:2:"77";s:3:"g l";s:2:"78";s:3:"di ";s:2:"79";s:3:"mag";s:2:"80";s:3:"aba";s:2:"81";s:3:"g a";s:2:"82";s:3:"ara";s:2:"83";s:3:"a p";s:2:"84";s:3:"in ";s:2:"85";s:3:"ana";s:2:"86";s:3:"it ";s:2:"87";s:3:"si ";s:2:"88";s:3:"cus";s:2:"89";s:3:"g b";s:2:"90";s:3:"uin";s:2:"91";s:3:"a t";s:2:"92";s:3:"as ";s:2:"93";s:3:"n n";s:2:"94";s:3:"hin";s:2:"95";s:3:" hi";s:2:"96";s:3:"a't";s:2:"97";s:3:"ali";s:2:"98";s:3:" bu";s:2:"99";s:3:"gan";s:3:"100";s:3:"uma";s:3:"101";s:3:"a d";s:3:"102";s:3:"agc";s:3:"103";s:3:"aqu";s:3:"104";s:3:"g d";s:3:"105";s:3:" tu";s:3:"106";s:3:"aon";s:3:"107";s:3:"ari";s:3:"108";s:3:"cas";s:3:"109";s:3:"i n";s:3:"110";s:3:"niy";s:3:"111";s:3:"pin";s:3:"112";s:3:"a i";s:3:"113";s:3:"gca";s:3:"114";s:3:"siy";s:3:"115";s:3:"a'y";s:3:"116";s:3:"yao";s:3:"117";s:3:"ag ";s:3:"118";s:3:"ca ";s:3:"119";s:3:"han";s:3:"120";s:3:"ili";s:3:"121";s:3:"pan";s:3:"122";s:3:"sin";s:3:"123";s:3:"ual";s:3:"124";s:3:"n s";s:3:"125";s:3:"nam";s:3:"126";s:3:" lu";s:3:"127";s:3:"can";s:3:"128";s:3:"dit";s:3:"129";s:3:"gui";s:3:"130";s:3:"y n";s:3:"131";s:3:"gal";s:3:"132";s:3:"hat";s:3:"133";s:3:"nal";s:3:"134";s:3:" is";s:3:"135";s:3:"bag";s:3:"136";s:3:"fra";s:3:"137";s:3:" fr";s:3:"138";s:3:" su";s:3:"139";s:3:"a l";s:3:"140";s:3:" co";s:3:"141";s:3:"ani";s:3:"142";s:3:" bi";s:3:"143";s:3:" da";s:3:"144";s:3:"alo";s:3:"145";s:3:"isa";s:3:"146";s:3:"ita";s:3:"147";s:3:"may";s:3:"148";s:3:"o s";s:3:"149";s:3:"sil";s:3:"150";s:3:"una";s:3:"151";s:3:" in";s:3:"152";s:3:" pi";s:3:"153";s:3:"l n";s:3:"154";s:3:"nil";s:3:"155";s:3:"o a";s:3:"156";s:3:"pat";s:3:"157";s:3:"sac";s:3:"158";s:3:"t s";s:3:"159";s:3:" ua";s:3:"160";s:3:"agu";s:3:"161";s:3:"ail";s:3:"162";s:3:"bin";s:3:"163";s:3:"dal";s:3:"164";s:3:"g h";s:3:"165";s:3:"ndi";s:3:"166";s:3:"oon";s:3:"167";s:3:"ua ";s:3:"168";s:3:" ha";s:3:"169";s:3:"ind";s:3:"170";s:3:"ran";s:3:"171";s:3:"s n";s:3:"172";s:3:"tin";s:3:"173";s:3:"ulo";s:3:"174";s:3:"eng";s:3:"175";s:3:"g f";s:3:"176";s:3:"ini";s:3:"177";s:3:"lah";s:3:"178";s:3:"lo ";s:3:"179";s:3:"rai";s:3:"180";s:3:"rin";s:3:"181";s:3:"ton";s:3:"182";s:3:"g u";s:3:"183";s:3:"inu";s:3:"184";s:3:"lon";s:3:"185";s:3:"o'y";s:3:"186";s:3:"t a";s:3:"187";s:3:" ar";s:3:"188";s:3:"a b";s:3:"189";s:3:"ad ";s:3:"190";s:3:"bay";s:3:"191";s:3:"cal";s:3:"192";s:3:"gya";s:3:"193";s:3:"ile";s:3:"194";s:3:"mat";s:3:"195";s:3:"n a";s:3:"196";s:3:"pau";s:3:"197";s:3:"ra ";s:3:"198";s:3:"tay";s:3:"199";s:3:"y m";s:3:"200";s:3:"ant";s:3:"201";s:3:"ban";s:3:"202";s:3:"i m";s:3:"203";s:3:"nas";s:3:"204";s:3:"nay";s:3:"205";s:3:"no ";s:3:"206";s:3:"sti";s:3:"207";s:3:" ti";s:3:"208";s:3:"ags";s:3:"209";s:3:"g g";s:3:"210";s:3:"ta ";s:3:"211";s:3:"uit";s:3:"212";s:3:"uno";s:3:"213";s:3:" ib";s:3:"214";s:3:" ya";s:3:"215";s:3:"a u";s:3:"216";s:3:"abi";s:3:"217";s:3:"ati";s:3:"218";s:3:"cap";s:3:"219";s:3:"ig ";s:3:"220";s:3:"is ";s:3:"221";s:3:"la'";s:3:"222";s:3:" do";s:3:"223";s:3:" pu";s:3:"224";s:3:"api";s:3:"225";s:3:"ayo";s:3:"226";s:3:"gos";s:3:"227";s:3:"gul";s:3:"228";s:3:"lal";s:3:"229";s:3:"tag";s:3:"230";s:3:"til";s:3:"231";s:3:"tun";s:3:"232";s:3:"y c";s:3:"233";s:3:"y s";s:3:"234";s:3:"yon";s:3:"235";s:3:"ano";s:3:"236";s:3:"bur";s:3:"237";s:3:"iba";s:3:"238";s:3:"isi";s:3:"239";s:3:"lam";s:3:"240";s:3:"nac";s:3:"241";s:3:"nat";s:3:"242";s:3:"ni ";s:3:"243";s:3:"nto";s:3:"244";s:3:"od ";s:3:"245";s:3:"pa ";s:3:"246";s:3:"rgo";s:3:"247";s:3:"urg";s:3:"248";s:3:" m ";s:3:"249";s:3:"adr";s:3:"250";s:3:"ast";s:3:"251";s:3:"cag";s:3:"252";s:3:"gay";s:3:"253";s:3:"gsi";s:3:"254";s:3:"i p";s:3:"255";s:3:"ino";s:3:"256";s:3:"len";s:3:"257";s:3:"lin";s:3:"258";s:3:"m g";s:3:"259";s:3:"mar";s:3:"260";s:3:"nah";s:3:"261";s:3:"to'";s:3:"262";s:3:" de";s:3:"263";s:3:"a h";s:3:"264";s:3:"cat";s:3:"265";s:3:"cau";s:3:"266";s:3:"con";s:3:"267";s:3:"iqu";s:3:"268";s:3:"lac";s:3:"269";s:3:"mab";s:3:"270";s:3:"min";s:3:"271";s:3:"og ";s:3:"272";s:3:"par";s:3:"273";s:3:"sal";s:3:"274";s:3:" za";s:3:"275";s:3:"ao ";s:3:"276";s:3:"doo";s:3:"277";s:3:"ipi";s:3:"278";s:3:"nod";s:3:"279";s:3:"nte";s:3:"280";s:3:"uha";s:3:"281";s:3:"ula";s:3:"282";s:3:" re";s:3:"283";s:3:"ill";s:3:"284";s:3:"lit";s:3:"285";s:3:"mac";s:3:"286";s:3:"nit";s:3:"287";s:3:"o't";s:3:"288";s:3:"or ";s:3:"289";s:3:"ora";s:3:"290";s:3:"sum";s:3:"291";s:3:"y p";s:3:"292";s:3:" al";s:3:"293";s:3:" mi";s:3:"294";s:3:" um";s:3:"295";s:3:"aco";s:3:"296";s:3:"ada";s:3:"297";s:3:"agd";s:3:"298";s:3:"cab";s:3:"299";}s:7:"turkish";a:300:{s:3:"lar";s:1:"0";s:3:"en ";s:1:"1";s:3:"ler";s:1:"2";s:3:"an ";s:1:"3";s:3:"in ";s:1:"4";s:3:" bi";s:1:"5";s:3:" ya";s:1:"6";s:3:"eri";s:1:"7";s:3:"de ";s:1:"8";s:3:" ka";s:1:"9";s:3:"ir ";s:2:"10";s:4:"arı";s:2:"11";s:3:" ba";s:2:"12";s:3:" de";s:2:"13";s:3:" ha";s:2:"14";s:4:"ın ";s:2:"15";s:3:"ara";s:2:"16";s:3:"bir";s:2:"17";s:3:" ve";s:2:"18";s:3:" sa";s:2:"19";s:3:"ile";s:2:"20";s:3:"le ";s:2:"21";s:3:"nde";s:2:"22";s:3:"da ";s:2:"23";s:3:" bu";s:2:"24";s:3:"ana";s:2:"25";s:3:"ini";s:2:"26";s:5:"ını";s:2:"27";s:3:"er ";s:2:"28";s:3:"ve ";s:2:"29";s:4:" yı";s:2:"30";s:3:"lma";s:2:"31";s:4:"yıl";s:2:"32";s:3:" ol";s:2:"33";s:3:"ar ";s:2:"34";s:3:"n b";s:2:"35";s:3:"nda";s:2:"36";s:3:"aya";s:2:"37";s:3:"li ";s:2:"38";s:4:"ası";s:2:"39";s:3:" ge";s:2:"40";s:3:"ind";s:2:"41";s:3:"n k";s:2:"42";s:3:"esi";s:2:"43";s:3:"lan";s:2:"44";s:3:"nla";s:2:"45";s:3:"ak ";s:2:"46";s:4:"anı";s:2:"47";s:3:"eni";s:2:"48";s:3:"ni ";s:2:"49";s:4:"nı ";s:2:"50";s:4:"rın";s:2:"51";s:3:"san";s:2:"52";s:3:" ko";s:2:"53";s:3:" ye";s:2:"54";s:3:"maz";s:2:"55";s:4:"baÅŸ";s:2:"56";s:3:"ili";s:2:"57";s:3:"rin";s:2:"58";s:4:"alı";s:2:"59";s:3:"az ";s:2:"60";s:3:"hal";s:2:"61";s:4:"ınd";s:2:"62";s:3:" da";s:2:"63";s:4:" gü";s:2:"64";s:3:"ele";s:2:"65";s:4:"ılm";s:2:"66";s:6:"ığı";s:2:"67";s:3:"eki";s:2:"68";s:4:"gün";s:2:"69";s:3:"i b";s:2:"70";s:4:"içi";s:2:"71";s:3:"den";s:2:"72";s:3:"kar";s:2:"73";s:3:"si ";s:2:"74";s:3:" il";s:2:"75";s:3:"e y";s:2:"76";s:3:"na ";s:2:"77";s:3:"yor";s:2:"78";s:3:"ek ";s:2:"79";s:3:"n s";s:2:"80";s:4:" iç";s:2:"81";s:3:"bu ";s:2:"82";s:3:"e b";s:2:"83";s:3:"im ";s:2:"84";s:3:"ki ";s:2:"85";s:3:"len";s:2:"86";s:3:"ri ";s:2:"87";s:4:"sın";s:2:"88";s:3:" so";s:2:"89";s:4:"ün ";s:2:"90";s:3:" ta";s:2:"91";s:3:"nin";s:2:"92";s:4:"iÄŸi";s:2:"93";s:3:"tan";s:2:"94";s:3:"yan";s:2:"95";s:3:" si";s:2:"96";s:3:"nat";s:2:"97";s:4:"nın";s:2:"98";s:3:"kan";s:2:"99";s:4:"rı ";s:3:"100";s:4:"çin";s:3:"101";s:5:"ğı ";s:3:"102";s:3:"eli";s:3:"103";s:3:"n a";s:3:"104";s:4:"ır ";s:3:"105";s:3:" an";s:3:"106";s:3:"ine";s:3:"107";s:3:"n y";s:3:"108";s:3:"ola";s:3:"109";s:3:" ar";s:3:"110";s:3:"al ";s:3:"111";s:3:"e s";s:3:"112";s:3:"lik";s:3:"113";s:3:"n d";s:3:"114";s:3:"sin";s:3:"115";s:3:" al";s:3:"116";s:4:" dü";s:3:"117";s:3:"anl";s:3:"118";s:3:"ne ";s:3:"119";s:3:"ya ";s:3:"120";s:4:"ım ";s:3:"121";s:4:"ına";s:3:"122";s:3:" be";s:3:"123";s:3:"ada";s:3:"124";s:3:"ala";s:3:"125";s:3:"ama";s:3:"126";s:3:"ilm";s:3:"127";s:3:"or ";s:3:"128";s:4:"sı ";s:3:"129";s:3:"yen";s:3:"130";s:3:" me";s:3:"131";s:4:"atı";s:3:"132";s:3:"di ";s:3:"133";s:3:"eti";s:3:"134";s:3:"ken";s:3:"135";s:3:"la ";s:3:"136";s:4:"lı ";s:3:"137";s:3:"oru";s:3:"138";s:4:" gö";s:3:"139";s:3:" in";s:3:"140";s:3:"and";s:3:"141";s:3:"e d";s:3:"142";s:3:"men";s:3:"143";s:3:"un ";s:3:"144";s:4:"öne";s:3:"145";s:3:"a d";s:3:"146";s:3:"at ";s:3:"147";s:3:"e a";s:3:"148";s:3:"e g";s:3:"149";s:3:"yar";s:3:"150";s:3:" ku";s:3:"151";s:4:"ayı";s:3:"152";s:3:"dan";s:3:"153";s:3:"edi";s:3:"154";s:3:"iri";s:3:"155";s:5:"ünü";s:3:"156";s:4:"ÄŸi ";s:3:"157";s:5:"ılı";s:3:"158";s:3:"eme";s:3:"159";s:4:"eÄŸi";s:3:"160";s:3:"i k";s:3:"161";s:3:"i y";s:3:"162";s:4:"ıla";s:3:"163";s:4:" ça";s:3:"164";s:3:"a y";s:3:"165";s:3:"alk";s:3:"166";s:4:"dı ";s:3:"167";s:3:"ede";s:3:"168";s:3:"el ";s:3:"169";s:4:"ndı";s:3:"170";s:3:"ra ";s:3:"171";s:4:"üne";s:3:"172";s:4:" sü";s:3:"173";s:4:"dır";s:3:"174";s:3:"e k";s:3:"175";s:3:"ere";s:3:"176";s:3:"ik ";s:3:"177";s:3:"imi";s:3:"178";s:4:"iÅŸi";s:3:"179";s:3:"mas";s:3:"180";s:3:"n h";s:3:"181";s:4:"sür";s:3:"182";s:3:"yle";s:3:"183";s:3:" ad";s:3:"184";s:3:" fi";s:3:"185";s:3:" gi";s:3:"186";s:3:" se";s:3:"187";s:3:"a k";s:3:"188";s:3:"arl";s:3:"189";s:5:"aşı";s:3:"190";s:3:"iyo";s:3:"191";s:3:"kla";s:3:"192";s:5:"lığ";s:3:"193";s:3:"nem";s:3:"194";s:3:"ney";s:3:"195";s:3:"rme";s:3:"196";s:3:"ste";s:3:"197";s:4:"tı ";s:3:"198";s:3:"unl";s:3:"199";s:3:"ver";s:3:"200";s:4:" sı";s:3:"201";s:3:" te";s:3:"202";s:3:" to";s:3:"203";s:3:"a s";s:3:"204";s:4:"aÅŸk";s:3:"205";s:3:"ekl";s:3:"206";s:3:"end";s:3:"207";s:3:"kal";s:3:"208";s:4:"liÄŸ";s:3:"209";s:3:"min";s:3:"210";s:4:"tır";s:3:"211";s:3:"ulu";s:3:"212";s:3:"unu";s:3:"213";s:3:"yap";s:3:"214";s:3:"ye ";s:3:"215";s:4:"ı i";s:3:"216";s:4:"ÅŸka";s:3:"217";s:5:"ÅŸtı";s:3:"218";s:4:" bü";s:3:"219";s:3:" ke";s:3:"220";s:3:" ki";s:3:"221";s:3:"ard";s:3:"222";s:3:"art";s:3:"223";s:4:"aÅŸa";s:3:"224";s:3:"n i";s:3:"225";s:3:"ndi";s:3:"226";s:3:"ti ";s:3:"227";s:3:"top";s:3:"228";s:4:"ı b";s:3:"229";s:3:" va";s:3:"230";s:4:" ön";s:3:"231";s:3:"aki";s:3:"232";s:3:"cak";s:3:"233";s:3:"ey ";s:3:"234";s:3:"fil";s:3:"235";s:3:"isi";s:3:"236";s:3:"kle";s:3:"237";s:3:"kur";s:3:"238";s:3:"man";s:3:"239";s:3:"nce";s:3:"240";s:3:"nle";s:3:"241";s:3:"nun";s:3:"242";s:3:"rak";s:3:"243";s:4:"ık ";s:3:"244";s:3:" en";s:3:"245";s:3:" yo";s:3:"246";s:3:"a g";s:3:"247";s:3:"lis";s:3:"248";s:3:"mak";s:3:"249";s:3:"n g";s:3:"250";s:3:"tir";s:3:"251";s:3:"yas";s:3:"252";s:4:" iÅŸ";s:3:"253";s:4:" yö";s:3:"254";s:3:"ale";s:3:"255";s:3:"bil";s:3:"256";s:3:"bul";s:3:"257";s:3:"et ";s:3:"258";s:3:"i d";s:3:"259";s:3:"iye";s:3:"260";s:3:"kil";s:3:"261";s:3:"ma ";s:3:"262";s:3:"n e";s:3:"263";s:3:"n t";s:3:"264";s:3:"nu ";s:3:"265";s:3:"olu";s:3:"266";s:3:"rla";s:3:"267";s:3:"te ";s:3:"268";s:4:"yön";s:3:"269";s:5:"çık";s:3:"270";s:3:" ay";s:3:"271";s:4:" mü";s:3:"272";s:4:" ço";s:3:"273";s:5:" çı";s:3:"274";s:3:"a a";s:3:"275";s:3:"a b";s:3:"276";s:3:"ata";s:3:"277";s:3:"der";s:3:"278";s:3:"gel";s:3:"279";s:3:"i g";s:3:"280";s:3:"i i";s:3:"281";s:3:"ill";s:3:"282";s:3:"ist";s:3:"283";s:4:"ldı";s:3:"284";s:3:"lu ";s:3:"285";s:3:"mek";s:3:"286";s:3:"mle";s:3:"287";s:4:"n ç";s:3:"288";s:3:"onu";s:3:"289";s:3:"opl";s:3:"290";s:3:"ran";s:3:"291";s:3:"rat";s:3:"292";s:4:"rdı";s:3:"293";s:3:"rke";s:3:"294";s:3:"siy";s:3:"295";s:3:"son";s:3:"296";s:3:"ta ";s:3:"297";s:5:"tçı";s:3:"298";s:4:"tın";s:3:"299";}s:9:"ukrainian";a:300:{s:5:" на";s:1:"0";s:5:" за";s:1:"1";s:6:"ннÑ";s:1:"2";s:5:"Ð½Ñ ";s:1:"3";s:5:"на ";s:1:"4";s:5:" пр";s:1:"5";s:6:"ого";s:1:"6";s:5:"го ";s:1:"7";s:6:"Ñьк";s:1:"8";s:5:" по";s:1:"9";s:4:" у ";s:2:"10";s:6:"від";s:2:"11";s:6:"ере";s:2:"12";s:5:" мі";s:2:"13";s:5:" не";s:2:"14";s:5:"их ";s:2:"15";s:5:"Ñ‚ÑŒ ";s:2:"16";s:6:"пер";s:2:"17";s:5:" ві";s:2:"18";s:5:"ів ";s:2:"19";s:5:" пе";s:2:"20";s:5:" що";s:2:"21";s:6:"льн";s:2:"22";s:5:"ми ";s:2:"23";s:5:"ні ";s:2:"24";s:5:"не ";s:2:"25";s:5:"ти ";s:2:"26";s:6:"ати";s:2:"27";s:6:"енн";s:2:"28";s:6:"міÑ";s:2:"29";s:6:"пра";s:2:"30";s:6:"ува";s:2:"31";s:6:"ник";s:2:"32";s:6:"про";s:2:"33";s:6:"рав";s:2:"34";s:6:"івн";s:2:"35";s:5:" та";s:2:"36";s:6:"буд";s:2:"37";s:6:"влі";s:2:"38";s:6:"рів";s:2:"39";s:5:" ко";s:2:"40";s:5:" рі";s:2:"41";s:6:"аль";s:2:"42";s:5:"но ";s:2:"43";s:6:"ому";s:2:"44";s:5:"що ";s:2:"45";s:5:" ви";s:2:"46";s:5:"му ";s:2:"47";s:6:"рев";s:2:"48";s:5:"ÑÑ ";s:2:"49";s:6:"інн";s:2:"50";s:5:" до";s:2:"51";s:5:" уп";s:2:"52";s:6:"авл";s:2:"53";s:6:"анн";s:2:"54";s:6:"ком";s:2:"55";s:5:"ли ";s:2:"56";s:6:"лін";s:2:"57";s:6:"ног";s:2:"58";s:6:"упр";s:2:"59";s:5:" бу";s:2:"60";s:4:" з ";s:2:"61";s:5:" ро";s:2:"62";s:5:"за ";s:2:"63";s:5:"и н";s:2:"64";s:6:"нов";s:2:"65";s:6:"оро";s:2:"66";s:6:"оÑÑ‚";s:2:"67";s:6:"Ñта";s:2:"68";s:5:"Ñ‚Ñ– ";s:2:"69";s:6:"ÑŽÑ‚ÑŒ";s:2:"70";s:5:" мо";s:2:"71";s:5:" ні";s:2:"72";s:5:" Ñк";s:2:"73";s:6:"бор";s:2:"74";s:5:"ва ";s:2:"75";s:6:"ван";s:2:"76";s:6:"ень";s:2:"77";s:5:"и п";s:2:"78";s:5:"нь ";s:2:"79";s:6:"ові";s:2:"80";s:6:"рон";s:2:"81";s:6:"ÑÑ‚Ñ–";s:2:"82";s:5:"та ";s:2:"83";s:5:"у в";s:2:"84";s:6:"ько";s:2:"85";s:6:"Ñ–ÑÑ‚";s:2:"86";s:4:" в ";s:2:"87";s:5:" ре";s:2:"88";s:5:"до ";s:2:"89";s:5:"е п";s:2:"90";s:6:"заб";s:2:"91";s:5:"ий ";s:2:"92";s:6:"нÑÑŒ";s:2:"93";s:5:"о в";s:2:"94";s:5:"о п";s:2:"95";s:6:"при";s:2:"96";s:5:"Ñ– п";s:2:"97";s:5:" ку";s:2:"98";s:5:" пі";s:2:"99";s:5:" Ñп";s:3:"100";s:5:"а п";s:3:"101";s:6:"або";s:3:"102";s:6:"анÑ";s:3:"103";s:6:"аці";s:3:"104";s:6:"ват";s:3:"105";s:6:"вни";s:3:"106";s:5:"и в";s:3:"107";s:6:"ими";s:3:"108";s:5:"ка ";s:3:"109";s:6:"нен";s:3:"110";s:6:"ніч";s:3:"111";s:6:"она";s:3:"112";s:5:"ої ";s:3:"113";s:6:"пов";s:3:"114";s:6:"ьки";s:3:"115";s:6:"ьно";s:3:"116";s:6:"ізн";s:3:"117";s:6:"ічн";s:3:"118";s:5:" ав";s:3:"119";s:5:" ма";s:3:"120";s:5:" ор";s:3:"121";s:5:" Ñу";s:3:"122";s:5:" чи";s:3:"123";s:5:" ін";s:3:"124";s:5:"а з";s:3:"125";s:5:"ам ";s:3:"126";s:5:"ає ";s:3:"127";s:6:"вне";s:3:"128";s:6:"вто";s:3:"129";s:6:"дом";s:3:"130";s:6:"ент";s:3:"131";s:6:"жит";s:3:"132";s:6:"зни";s:3:"133";s:5:"им ";s:3:"134";s:6:"итл";s:3:"135";s:5:"ла ";s:3:"136";s:6:"них";s:3:"137";s:6:"ниц";s:3:"138";s:6:"ова";s:3:"139";s:6:"ови";s:3:"140";s:5:"ом ";s:3:"141";s:6:"пор";s:3:"142";s:6:"Ñ‚ÑŒÑ";s:3:"143";s:5:"у Ñ€";s:3:"144";s:6:"ÑŒÑÑ";s:3:"145";s:6:"ідо";s:3:"146";s:6:"іль";s:3:"147";s:6:"Ñ–ÑÑŒ";s:3:"148";s:5:" ва";s:3:"149";s:5:" ді";s:3:"150";s:5:" жи";s:3:"151";s:5:" че";s:3:"152";s:4:" Ñ– ";s:3:"153";s:5:"а в";s:3:"154";s:5:"а н";s:3:"155";s:6:"али";s:3:"156";s:6:"вез";s:3:"157";s:6:"вно";s:3:"158";s:6:"еве";s:3:"159";s:6:"езе";s:3:"160";s:6:"зен";s:3:"161";s:6:"ицт";s:3:"162";s:5:"ки ";s:3:"163";s:6:"ких";s:3:"164";s:6:"кон";s:3:"165";s:5:"ку ";s:3:"166";s:6:"лаÑ";s:3:"167";s:5:"Ð»Ñ ";s:3:"168";s:6:"мож";s:3:"169";s:6:"нач";s:3:"170";s:6:"ним";s:3:"171";s:6:"ної";s:3:"172";s:5:"о б";s:3:"173";s:6:"ову";s:3:"174";s:6:"оди";s:3:"175";s:5:"ою ";s:3:"176";s:5:"ро ";s:3:"177";s:6:"рок";s:3:"178";s:6:"Ñно";s:3:"179";s:6:"Ñпо";s:3:"180";s:6:"так";s:3:"181";s:6:"тва";s:3:"182";s:5:"ту ";s:3:"183";s:5:"у п";s:3:"184";s:6:"цтв";s:3:"185";s:6:"ьни";s:3:"186";s:5:"Ñ Ð·";s:3:"187";s:5:"Ñ– м";s:3:"188";s:5:"Ñ–Ñ— ";s:3:"189";s:5:" вÑ";s:3:"190";s:5:" гр";s:3:"191";s:5:" де";s:3:"192";s:5:" но";s:3:"193";s:5:" па";s:3:"194";s:5:" Ñе";s:3:"195";s:5:" ук";s:3:"196";s:5:" Ñ—Ñ…";s:3:"197";s:5:"а о";s:3:"198";s:6:"авт";s:3:"199";s:6:"аÑÑ‚";s:3:"200";s:6:"ают";s:3:"201";s:6:"вар";s:3:"202";s:6:"ден";s:3:"203";s:5:"ди ";s:3:"204";s:5:"ду ";s:3:"205";s:6:"зна";s:3:"206";s:5:"и з";s:3:"207";s:6:"ико";s:3:"208";s:6:"иÑÑ";s:3:"209";s:6:"ити";s:3:"210";s:6:"ког";s:3:"211";s:6:"мен";s:3:"212";s:6:"ном";s:3:"213";s:5:"ну ";s:3:"214";s:5:"о н";s:3:"215";s:5:"о Ñ";s:3:"216";s:6:"обу";s:3:"217";s:6:"ово";s:3:"218";s:6:"пла";s:3:"219";s:6:"ран";s:3:"220";s:6:"рив";s:3:"221";s:6:"роб";s:3:"222";s:6:"Ñка";s:3:"223";s:6:"тан";s:3:"224";s:6:"тим";s:3:"225";s:6:"тиÑ";s:3:"226";s:5:"то ";s:3:"227";s:6:"тра";s:3:"228";s:6:"удо";s:3:"229";s:6:"чин";s:3:"230";s:6:"чни";s:3:"231";s:5:"Ñ– в";s:3:"232";s:5:"Ñ–ÑŽ ";s:3:"233";s:4:" а ";s:3:"234";s:5:" во";s:3:"235";s:5:" да";s:3:"236";s:5:" кв";s:3:"237";s:5:" ме";s:3:"238";s:5:" об";s:3:"239";s:5:" Ñк";s:3:"240";s:5:" ти";s:3:"241";s:5:" Ñ„Ñ–";s:3:"242";s:4:" Ñ” ";s:3:"243";s:5:"а Ñ€";s:3:"244";s:5:"а Ñ";s:3:"245";s:5:"а у";s:3:"246";s:5:"ак ";s:3:"247";s:6:"ані";s:3:"248";s:6:"арт";s:3:"249";s:6:"аÑн";s:3:"250";s:5:"в у";s:3:"251";s:6:"вик";s:3:"252";s:6:"віз";s:3:"253";s:6:"дов";s:3:"254";s:6:"дпо";s:3:"255";s:6:"дів";s:3:"256";s:6:"еві";s:3:"257";s:6:"енÑ";s:3:"258";s:5:"же ";s:3:"259";s:5:"и м";s:3:"260";s:5:"и Ñ";s:3:"261";s:6:"ика";s:3:"262";s:6:"ичн";s:3:"263";s:5:"кі ";s:3:"264";s:6:"ків";s:3:"265";s:6:"між";s:3:"266";s:6:"нан";s:3:"267";s:6:"ноÑ";s:3:"268";s:5:"о у";s:3:"269";s:6:"обл";s:3:"270";s:6:"одн";s:3:"271";s:5:"ок ";s:3:"272";s:6:"оло";s:3:"273";s:6:"отр";s:3:"274";s:6:"рен";s:3:"275";s:6:"рим";s:3:"276";s:6:"роз";s:3:"277";s:5:"ÑÑŒ ";s:3:"278";s:5:"ÑÑ– ";s:3:"279";s:6:"тла";s:3:"280";s:6:"тів";s:3:"281";s:5:"у з";s:3:"282";s:6:"уго";s:3:"283";s:6:"уді";s:3:"284";s:5:"чи ";s:3:"285";s:5:"ше ";s:3:"286";s:5:"Ñ Ð½";s:3:"287";s:5:"Ñ Ñƒ";s:3:"288";s:6:"ідп";s:3:"289";s:5:"ій ";s:3:"290";s:6:"іна";s:3:"291";s:5:"Ñ–Ñ ";s:3:"292";s:5:" ка";s:3:"293";s:5:" ни";s:3:"294";s:5:" оÑ";s:3:"295";s:5:" Ñи";s:3:"296";s:5:" то";s:3:"297";s:5:" Ñ‚Ñ€";s:3:"298";s:5:" уг";s:3:"299";}s:4:"urdu";a:300:{s:5:"یں ";s:1:"0";s:5:" Ú©ÛŒ";s:1:"1";s:5:"Ú©Û’ ";s:1:"2";s:5:" Ú©Û’";s:1:"3";s:5:"Ù†Û’ ";s:1:"4";s:5:" Ú©Û";s:1:"5";s:5:"Û’ Ú©";s:1:"6";s:5:"Ú©ÛŒ ";s:1:"7";s:6:"میں";s:1:"8";s:5:" Ù…ÛŒ";s:1:"9";s:5:"ÛÛ’ ";s:2:"10";s:5:"ÙˆÚº ";s:2:"11";s:5:"Ú©Û ";s:2:"12";s:5:" ÛÛ’";s:2:"13";s:5:"ان ";s:2:"14";s:6:"Ûیں";s:2:"15";s:5:"ور ";s:2:"16";s:5:" Ú©Ùˆ";s:2:"17";s:5:"یا ";s:2:"18";s:5:" ان";s:2:"19";s:5:" Ù†Û’";s:2:"20";s:5:"سے ";s:2:"21";s:5:" سے";s:2:"22";s:5:" کر";s:2:"23";s:6:"ستا";s:2:"24";s:5:" او";s:2:"25";s:6:"اور";s:2:"26";s:6:"تان";s:2:"27";s:5:"ر Ú©";s:2:"28";s:5:"ÛŒ Ú©";s:2:"29";s:5:" اس";s:2:"30";s:5:"Û’ ا";s:2:"31";s:5:" پا";s:2:"32";s:5:" ÛÙˆ";s:2:"33";s:5:" پر";s:2:"34";s:5:"ر٠";s:2:"35";s:5:" کا";s:2:"36";s:5:"ا Ú©";s:2:"37";s:5:"ÛŒ ا";s:2:"38";s:5:" ÛÛŒ";s:2:"39";s:5:"در ";s:2:"40";s:5:"Ú©Ùˆ ";s:2:"41";s:5:" ای";s:2:"42";s:5:"Úº Ú©";s:2:"43";s:5:" مش";s:2:"44";s:5:" مل";s:2:"45";s:5:"ات ";s:2:"46";s:6:"صدر";s:2:"47";s:6:"اکس";s:2:"48";s:6:"شرÙ";s:2:"49";s:6:"مشر";s:2:"50";s:6:"پاک";s:2:"51";s:6:"کست";s:2:"52";s:5:"ÛŒ Ù…";s:2:"53";s:5:" دی";s:2:"54";s:5:" صد";s:2:"55";s:5:" ÛŒÛ";s:2:"56";s:5:"ا Û";s:2:"57";s:5:"Ù† Ú©";s:2:"58";s:6:"وال";s:2:"59";s:5:"ÛŒÛ ";s:2:"60";s:5:"Û’ Ùˆ";s:2:"61";s:5:" بھ";s:2:"62";s:5:" دو";s:2:"63";s:5:"اس ";s:2:"64";s:5:"ر ا";s:2:"65";s:6:"Ù†ÛÛŒ";s:2:"66";s:5:"کا ";s:2:"67";s:5:"Û’ س";s:2:"68";s:5:"ئی ";s:2:"69";s:5:"Û Ø§";s:2:"70";s:5:"یت ";s:2:"71";s:5:"Û’ Û";s:2:"72";s:5:"ت Ú©";s:2:"73";s:5:" سا";s:2:"74";s:5:"Ù„Û’ ";s:2:"75";s:5:"Ûا ";s:2:"76";s:5:"Û’ ب";s:2:"77";s:5:" وا";s:2:"78";s:5:"ار ";s:2:"79";s:5:"Ù†ÛŒ ";s:2:"80";s:6:"Ú©Ûا";s:2:"81";s:5:"ÛŒ Û";s:2:"82";s:5:"Û’ Ù…";s:2:"83";s:5:" سی";s:2:"84";s:5:" Ù„ÛŒ";s:2:"85";s:6:"انÛ";s:2:"86";s:6:"انی";s:2:"87";s:5:"ر Ù…";s:2:"88";s:5:"ر Ù¾";s:2:"89";s:6:"ریت";s:2:"90";s:5:"Ù† Ù…";s:2:"91";s:5:"ھا ";s:2:"92";s:5:"یر ";s:2:"93";s:5:" جا";s:2:"94";s:5:" جن";s:2:"95";s:5:"ئے ";s:2:"96";s:5:"پر ";s:2:"97";s:5:"Úº Ù†";s:2:"98";s:5:"Û Ú©";s:2:"99";s:5:"ÛŒ Ùˆ";s:3:"100";s:5:"Û’ د";s:3:"101";s:5:" تو";s:3:"102";s:5:" تھ";s:3:"103";s:5:" Ú¯ÛŒ";s:3:"104";s:6:"ایک";s:3:"105";s:5:"Ù„ Ú©";s:3:"106";s:5:"نا ";s:3:"107";s:5:"کر ";s:3:"108";s:5:"Úº Ù…";s:3:"109";s:5:"یک ";s:3:"110";s:5:" با";s:3:"111";s:5:"ا ت";s:3:"112";s:5:"دی ";s:3:"113";s:5:"Ù† س";s:3:"114";s:6:"کیا";s:3:"115";s:6:"یوں";s:3:"116";s:5:"Û’ ج";s:3:"117";s:5:"ال ";s:3:"118";s:5:"تو ";s:3:"119";s:5:"Úº ا";s:3:"120";s:5:"Û’ Ù¾";s:3:"121";s:5:" چا";s:3:"122";s:5:"ام ";s:3:"123";s:6:"بھی";s:3:"124";s:5:"تی ";s:3:"125";s:5:"تے ";s:3:"126";s:6:"دوس";s:3:"127";s:5:"س Ú©";s:3:"128";s:6:"ملک";s:3:"129";s:5:"Ù† ا";s:3:"130";s:6:"Ûور";s:3:"131";s:5:"یے ";s:3:"132";s:5:" مو";s:3:"133";s:5:" ÙˆÚ©";s:3:"134";s:6:"ائی";s:3:"135";s:6:"ارت";s:3:"136";s:6:"الے";s:3:"137";s:6:"بھا";s:3:"138";s:6:"ردی";s:3:"139";s:5:"ری ";s:3:"140";s:5:"ÙˆÛ ";s:3:"141";s:6:"ویز";s:3:"142";s:5:"Úº د";s:3:"143";s:5:"Ú¾ÛŒ ";s:3:"144";s:5:"ÛŒ س";s:3:"145";s:5:" رÛ";s:3:"146";s:5:" من";s:3:"147";s:5:" Ù†Û";s:3:"148";s:5:" ور";s:3:"149";s:5:" ÙˆÛ";s:3:"150";s:5:" ÛÙ†";s:3:"151";s:5:"ا ا";s:3:"152";s:6:"است";s:3:"153";s:5:"ت ا";s:3:"154";s:5:"ت Ù¾";s:3:"155";s:5:"د Ú©";s:3:"156";s:5:"ز Ù…";s:3:"157";s:5:"ند ";s:3:"158";s:6:"ورد";s:3:"159";s:6:"ÙˆÚ©Ù„";s:3:"160";s:5:"Ú¯ÛŒ ";s:3:"161";s:6:"گیا";s:3:"162";s:5:"Û Ù¾";s:3:"163";s:5:"یز ";s:3:"164";s:5:"Û’ ت";s:3:"165";s:5:" اع";s:3:"166";s:5:" اپ";s:3:"167";s:5:" جس";s:3:"168";s:5:" جم";s:3:"169";s:5:" جو";s:3:"170";s:5:" سر";s:3:"171";s:6:"اپن";s:3:"172";s:6:"اکث";s:3:"173";s:6:"تھا";s:3:"174";s:6:"ثری";s:3:"175";s:6:"دیا";s:3:"176";s:5:"ر د";s:3:"177";s:5:"رت ";s:3:"178";s:6:"روی";s:3:"179";s:5:"سی ";s:3:"180";s:6:"ملا";s:3:"181";s:6:"ندو";s:3:"182";s:6:"وست";s:3:"183";s:6:"پرو";s:3:"184";s:6:"چاÛ";s:3:"185";s:6:"کثر";s:3:"186";s:6:"کلا";s:3:"187";s:5:"Û Û";s:3:"188";s:6:"Ûند";s:3:"189";s:5:"ÛÙˆ ";s:3:"190";s:5:"Û’ Ù„";s:3:"191";s:5:" اک";s:3:"192";s:5:" دا";s:3:"193";s:5:" سن";s:3:"194";s:5:" وز";s:3:"195";s:5:" Ù¾ÛŒ";s:3:"196";s:5:"ا Ú†";s:3:"197";s:5:"اء ";s:3:"198";s:6:"اتھ";s:3:"199";s:6:"اقا";s:3:"200";s:5:"Ø§Û ";s:3:"201";s:5:"تھ ";s:3:"202";s:5:"دو ";s:3:"203";s:5:"ر ب";s:3:"204";s:6:"روا";s:3:"205";s:5:"رے ";s:3:"206";s:6:"سات";s:3:"207";s:5:"Ù Ú©";s:3:"208";s:6:"قات";s:3:"209";s:5:"لا ";s:3:"210";s:6:"لاء";s:3:"211";s:5:"Ù… Ù…";s:3:"212";s:5:"Ù… Ú©";s:3:"213";s:5:"من ";s:3:"214";s:6:"نوں";s:3:"215";s:5:"Ùˆ ا";s:3:"216";s:6:"کرن";s:3:"217";s:5:"Úº Û";s:3:"218";s:6:"ھار";s:3:"219";s:6:"Ûوئ";s:3:"220";s:5:"ÛÛŒ ";s:3:"221";s:5:"یش ";s:3:"222";s:5:" ام";s:3:"223";s:5:" لا";s:3:"224";s:5:" مس";s:3:"225";s:5:" پو";s:3:"226";s:5:" Ù¾Û";s:3:"227";s:6:"انے";s:3:"228";s:5:"ت Ù…";s:3:"229";s:5:"ت Û";s:3:"230";s:5:"ج Ú©";s:3:"231";s:6:"دون";s:3:"232";s:6:"زیر";s:3:"233";s:5:"س س";s:3:"234";s:5:"Ø´ Ú©";s:3:"235";s:5:"Ù Ù†";s:3:"236";s:5:"Ù„ Û";s:3:"237";s:6:"لاق";s:3:"238";s:5:"Ù„ÛŒ ";s:3:"239";s:6:"وری";s:3:"240";s:6:"وزی";s:3:"241";s:6:"ونو";s:3:"242";s:6:"Ú©Ú¾Ù†";s:3:"243";s:5:"گا ";s:3:"244";s:5:"Úº س";s:3:"245";s:5:"Úº Ú¯";s:3:"246";s:6:"Ú¾Ù†Û’";s:3:"247";s:5:"Ú¾Û’ ";s:3:"248";s:5:"Û Ø¨";s:3:"249";s:5:"Û Ø¬";s:3:"250";s:5:"Ûر ";s:3:"251";s:5:"ÛŒ Ø¢";s:3:"252";s:5:"ÛŒ Ù¾";s:3:"253";s:5:" Øا";s:3:"254";s:5:" ÙˆÙ";s:3:"255";s:5:" گا";s:3:"256";s:5:"ا ج";s:3:"257";s:5:"ا Ú¯";s:3:"258";s:5:"اد ";s:3:"259";s:6:"ادی";s:3:"260";s:6:"اعظ";s:3:"261";s:6:"اÛت";s:3:"262";s:5:"جس ";s:3:"263";s:6:"جمÛ";s:3:"264";s:5:"جو ";s:3:"265";s:5:"ر س";s:3:"266";s:5:"ر Û";s:3:"267";s:6:"رنے";s:3:"268";s:5:"س Ù…";s:3:"269";s:5:"سا ";s:3:"270";s:6:"سند";s:3:"271";s:6:"سنگ";s:3:"272";s:5:"ظم ";s:3:"273";s:6:"عظم";s:3:"274";s:5:"Ù„ Ù…";s:3:"275";s:6:"لیے";s:3:"276";s:5:"مل ";s:3:"277";s:6:"موÛ";s:3:"278";s:6:"Ù…ÛÙˆ";s:3:"279";s:6:"Ù†Ú¯Ú¾";s:3:"280";s:5:"Ùˆ ص";s:3:"281";s:6:"ورٹ";s:3:"282";s:6:"ÙˆÛÙ†";s:3:"283";s:5:"Ú©Ù† ";s:3:"284";s:5:"Ú¯Ú¾ ";s:3:"285";s:5:"Ú¯Û’ ";s:3:"286";s:5:"Úº ج";s:3:"287";s:5:"Úº Ùˆ";s:3:"288";s:5:"Úº ÛŒ";s:3:"289";s:5:"Û Ø¯";s:3:"290";s:5:"ÛÙ† ";s:3:"291";s:6:"ÛÙˆÚº";s:3:"292";s:5:"Û’ Ø";s:3:"293";s:5:"Û’ Ú¯";s:3:"294";s:5:"Û’ ÛŒ";s:3:"295";s:5:" اگ";s:3:"296";s:5:" بع";s:3:"297";s:5:" رو";s:3:"298";s:5:" شا";s:3:"299";}s:5:"uzbek";a:300:{s:5:"ан ";s:1:"0";s:6:"ган";s:1:"1";s:6:"лар";s:1:"2";s:5:"га ";s:1:"3";s:5:"нг ";s:1:"4";s:6:"инг";s:1:"5";s:6:"нин";s:1:"6";s:5:"да ";s:1:"7";s:5:"ни ";s:1:"8";s:6:"ида";s:1:"9";s:6:"ари";s:2:"10";s:6:"ига";s:2:"11";s:6:"ини";s:2:"12";s:5:"ар ";s:2:"13";s:5:"ди ";s:2:"14";s:5:" би";s:2:"15";s:6:"ани";s:2:"16";s:5:" бо";s:2:"17";s:6:"дан";s:2:"18";s:6:"лга";s:2:"19";s:5:" ҳа";s:2:"20";s:5:" ва";s:2:"21";s:5:" Ñа";s:2:"22";s:5:"ги ";s:2:"23";s:6:"ила";s:2:"24";s:5:"н б";s:2:"25";s:5:"и б";s:2:"26";s:5:" кў";s:2:"27";s:5:" та";s:2:"28";s:5:"ир ";s:2:"29";s:5:" ма";s:2:"30";s:6:"ага";s:2:"31";s:6:"ала";s:2:"32";s:6:"бир";s:2:"33";s:5:"ри ";s:2:"34";s:6:"тга";s:2:"35";s:6:"лан";s:2:"36";s:6:"лик";s:2:"37";s:5:"а к";s:2:"38";s:6:"аги";s:2:"39";s:6:"ати";s:2:"40";s:5:"та ";s:2:"41";s:6:"ади";s:2:"42";s:6:"даг";s:2:"43";s:6:"рга";s:2:"44";s:5:" йи";s:2:"45";s:5:" ми";s:2:"46";s:5:" па";s:2:"47";s:5:" бў";s:2:"48";s:5:" қа";s:2:"49";s:5:" қи";s:2:"50";s:5:"а б";s:2:"51";s:6:"илл";s:2:"52";s:5:"ли ";s:2:"53";s:6:"аÑи";s:2:"54";s:5:"и Ñ‚";s:2:"55";s:5:"ик ";s:2:"56";s:6:"или";s:2:"57";s:6:"лла";s:2:"58";s:6:"ард";s:2:"59";s:6:"вчи";s:2:"60";s:5:"ва ";s:2:"61";s:5:"иб ";s:2:"62";s:6:"ири";s:2:"63";s:6:"лиг";s:2:"64";s:6:"нга";s:2:"65";s:6:"ран";s:2:"66";s:5:" ке";s:2:"67";s:5:" ўз";s:2:"68";s:5:"а Ñ";s:2:"69";s:6:"ахт";s:2:"70";s:6:"бўл";s:2:"71";s:6:"иги";s:2:"72";s:6:"кўр";s:2:"73";s:6:"рда";s:2:"74";s:6:"рни";s:2:"75";s:5:"Ñа ";s:2:"76";s:5:" бе";s:2:"77";s:5:" бу";s:2:"78";s:5:" да";s:2:"79";s:5:" жа";s:2:"80";s:5:"а Ñ‚";s:2:"81";s:6:"ази";s:2:"82";s:6:"ери";s:2:"83";s:5:"и а";s:2:"84";s:6:"илг";s:2:"85";s:6:"йил";s:2:"86";s:6:"ман";s:2:"87";s:6:"пах";s:2:"88";s:6:"рид";s:2:"89";s:5:"ти ";s:2:"90";s:6:"увч";s:2:"91";s:6:"хта";s:2:"92";s:5:" не";s:2:"93";s:5:" Ñо";s:2:"94";s:5:" уч";s:2:"95";s:6:"айт";s:2:"96";s:6:"лли";s:2:"97";s:6:"тла";s:2:"98";s:5:" ай";s:2:"99";s:5:" Ñ„Ñ€";s:3:"100";s:5:" ÑÑ‚";s:3:"101";s:5:" ҳо";s:3:"102";s:5:"а Ò›";s:3:"103";s:6:"али";s:3:"104";s:6:"аро";s:3:"105";s:6:"бер";s:3:"106";s:6:"бил";s:3:"107";s:6:"бор";s:3:"108";s:6:"ими";s:3:"109";s:6:"иÑÑ‚";s:3:"110";s:5:"он ";s:3:"111";s:6:"рин";s:3:"112";s:6:"тер";s:3:"113";s:6:"тил";s:3:"114";s:5:"ун ";s:3:"115";s:6:"фра";s:3:"116";s:6:"қил";s:3:"117";s:5:" ба";s:3:"118";s:5:" ол";s:3:"119";s:6:"анÑ";s:3:"120";s:6:"ефт";s:3:"121";s:6:"зир";s:3:"122";s:6:"кат";s:3:"123";s:6:"мил";s:3:"124";s:6:"неф";s:3:"125";s:6:"Ñаг";s:3:"126";s:5:"чи ";s:3:"127";s:6:"ўра";s:3:"128";s:5:" на";s:3:"129";s:5:" те";s:3:"130";s:5:" Ñн";s:3:"131";s:5:"а Ñ";s:3:"132";s:5:"ам ";s:3:"133";s:6:"арн";s:3:"134";s:5:"ат ";s:3:"135";s:5:"иш ";s:3:"136";s:5:"ма ";s:3:"137";s:6:"нла";s:3:"138";s:6:"рли";s:3:"139";s:6:"чил";s:3:"140";s:6:"шга";s:3:"141";s:5:" иш";s:3:"142";s:5:" му";s:3:"143";s:5:" ÑžÒ›";s:3:"144";s:6:"ара";s:3:"145";s:6:"ваз";s:3:"146";s:5:"и у";s:3:"147";s:5:"иқ ";s:3:"148";s:6:"моқ";s:3:"149";s:6:"рим";s:3:"150";s:6:"учу";s:3:"151";s:6:"чун";s:3:"152";s:5:"ши ";s:3:"153";s:6:"Ñнг";s:3:"154";s:6:"қув";s:3:"155";s:6:"ҳам";s:3:"156";s:5:" ÑÑž";s:3:"157";s:5:" ши";s:3:"158";s:6:"бар";s:3:"159";s:6:"бек";s:3:"160";s:6:"дам";s:3:"161";s:5:"и Ò³";s:3:"162";s:6:"иши";s:3:"163";s:6:"лад";s:3:"164";s:6:"оли";s:3:"165";s:6:"олл";s:3:"166";s:6:"ори";s:3:"167";s:6:"оқд";s:3:"168";s:5:"Ñ€ б";s:3:"169";s:5:"ра ";s:3:"170";s:6:"рла";s:3:"171";s:6:"уни";s:3:"172";s:5:"Ñ„Ñ‚ ";s:3:"173";s:6:"ўлг";s:3:"174";s:6:"ўқу";s:3:"175";s:5:" де";s:3:"176";s:5:" ка";s:3:"177";s:5:" қў";s:3:"178";s:5:"а Ñž";s:3:"179";s:6:"аба";s:3:"180";s:6:"амм";s:3:"181";s:6:"атл";s:3:"182";s:5:"б к";s:3:"183";s:6:"бош";s:3:"184";s:6:"збе";s:3:"185";s:5:"и в";s:3:"186";s:5:"им ";s:3:"187";s:5:"ин ";s:3:"188";s:6:"ишл";s:3:"189";s:6:"лаб";s:3:"190";s:6:"лей";s:3:"191";s:6:"мин";s:3:"192";s:5:"н д";s:3:"193";s:6:"нда";s:3:"194";s:5:"оқ ";s:3:"195";s:5:"Ñ€ м";s:3:"196";s:6:"рил";s:3:"197";s:6:"Ñид";s:3:"198";s:6:"тал";s:3:"199";s:6:"тан";s:3:"200";s:6:"тид";s:3:"201";s:6:"тон";s:3:"202";s:6:"ўзб";s:3:"203";s:5:" ам";s:3:"204";s:5:" ки";s:3:"205";s:5:"а Ò³";s:3:"206";s:6:"анг";s:3:"207";s:6:"анд";s:3:"208";s:6:"арт";s:3:"209";s:6:"аёт";s:3:"210";s:6:"дир";s:3:"211";s:6:"ент";s:3:"212";s:5:"и д";s:3:"213";s:5:"и м";s:3:"214";s:5:"и о";s:3:"215";s:5:"и Ñ";s:3:"216";s:6:"иро";s:3:"217";s:6:"йти";s:3:"218";s:6:"нÑу";s:3:"219";s:6:"оди";s:3:"220";s:5:"ор ";s:3:"221";s:5:"Ñи ";s:3:"222";s:6:"тиш";s:3:"223";s:6:"тоб";s:3:"224";s:6:"Ñти";s:3:"225";s:6:"қар";s:3:"226";s:6:"қда";s:3:"227";s:5:" бл";s:3:"228";s:5:" ге";s:3:"229";s:5:" до";s:3:"230";s:5:" ду";s:3:"231";s:5:" но";s:3:"232";s:5:" пр";s:3:"233";s:5:" ра";s:3:"234";s:5:" фо";s:3:"235";s:5:" қо";s:3:"236";s:5:"а м";s:3:"237";s:5:"а о";s:3:"238";s:6:"айд";s:3:"239";s:6:"ало";s:3:"240";s:6:"ама";s:3:"241";s:6:"бле";s:3:"242";s:5:"г н";s:3:"243";s:6:"дол";s:3:"244";s:6:"ейр";s:3:"245";s:5:"ек ";s:3:"246";s:6:"ерг";s:3:"247";s:6:"жар";s:3:"248";s:6:"зид";s:3:"249";s:5:"и к";s:3:"250";s:5:"и Ñ„";s:3:"251";s:5:"ий ";s:3:"252";s:6:"ило";s:3:"253";s:6:"лди";s:3:"254";s:6:"либ";s:3:"255";s:6:"лин";s:3:"256";s:5:"ми ";s:3:"257";s:6:"мма";s:3:"258";s:5:"н в";s:3:"259";s:5:"н к";s:3:"260";s:5:"н Ñž";s:3:"261";s:5:"н Ò³";s:3:"262";s:6:"ози";s:3:"263";s:6:"ора";s:3:"264";s:6:"оÑи";s:3:"265";s:6:"раÑ";s:3:"266";s:6:"риш";s:3:"267";s:6:"рка";s:3:"268";s:6:"роқ";s:3:"269";s:6:"Ñто";s:3:"270";s:6:"тин";s:3:"271";s:6:"хат";s:3:"272";s:6:"шир";s:3:"273";s:5:" ав";s:3:"274";s:5:" рў";s:3:"275";s:5:" ту";s:3:"276";s:5:" ўт";s:3:"277";s:5:"а п";s:3:"278";s:6:"авт";s:3:"279";s:6:"ада";s:3:"280";s:6:"аза";s:3:"281";s:6:"анл";s:3:"282";s:5:"б б";s:3:"283";s:6:"бой";s:3:"284";s:5:"бу ";s:3:"285";s:6:"вто";s:3:"286";s:5:"г Ñ";s:3:"287";s:6:"гин";s:3:"288";s:6:"дар";s:3:"289";s:6:"ден";s:3:"290";s:6:"дун";s:3:"291";s:6:"иде";s:3:"292";s:6:"ион";s:3:"293";s:6:"ирл";s:3:"294";s:6:"ишг";s:3:"295";s:6:"йха";s:3:"296";s:6:"кел";s:3:"297";s:6:"кўп";s:3:"298";s:6:"лио";s:3:"299";}s:10:"vietnamese";a:300:{s:3:"ng ";s:1:"0";s:3:" th";s:1:"1";s:3:" ch";s:1:"2";s:3:"g t";s:1:"3";s:3:" nh";s:1:"4";s:4:"ông";s:1:"5";s:3:" kh";s:1:"6";s:3:" tr";s:1:"7";s:3:"nh ";s:1:"8";s:4:" cô";s:1:"9";s:4:"côn";s:2:"10";s:3:" ty";s:2:"11";s:3:"ty ";s:2:"12";s:3:"i t";s:2:"13";s:3:"n t";s:2:"14";s:3:" ng";s:2:"15";s:5:"ại ";s:2:"16";s:3:" ti";s:2:"17";s:3:"ch ";s:2:"18";s:3:"y l";s:2:"19";s:5:"á»n ";s:2:"20";s:5:" Ä‘Æ°";s:2:"21";s:3:"hi ";s:2:"22";s:5:" gở";s:2:"23";s:5:"gởi";s:2:"24";s:5:"iá»n";s:2:"25";s:5:"tiá»";s:2:"26";s:5:"ởi ";s:2:"27";s:3:" gi";s:2:"28";s:3:" le";s:2:"29";s:3:" vi";s:2:"30";s:3:"cho";s:2:"31";s:3:"ho ";s:2:"32";s:4:"khá";s:2:"33";s:4:" và ";s:2:"34";s:4:"hác";s:2:"35";s:3:" ph";s:2:"36";s:3:"am ";s:2:"37";s:4:"hà n";s:2:"38";s:4:"ách";s:2:"39";s:4:"ôi ";s:2:"40";s:3:"i n";s:2:"41";s:6:"ược";s:2:"42";s:5:"ợc ";s:2:"43";s:4:" tô";s:2:"44";s:4:"chú";s:2:"45";s:5:"iệt";s:2:"46";s:4:"tôi";s:2:"47";s:4:"ên ";s:2:"48";s:4:"úng";s:2:"49";s:5:"ệt ";s:2:"50";s:4:" có";s:2:"51";s:3:"c t";s:2:"52";s:4:"có ";s:2:"53";s:4:"hún";s:2:"54";s:5:"việ";s:2:"55";s:7:"đượ";s:2:"56";s:3:" na";s:2:"57";s:3:"g c";s:2:"58";s:3:"i c";s:2:"59";s:3:"n c";s:2:"60";s:3:"n n";s:2:"61";s:3:"t n";s:2:"62";s:4:"và ";s:2:"63";s:3:"n l";s:2:"64";s:4:"n Ä‘";s:2:"65";s:4:"à ng";s:2:"66";s:4:"ác ";s:2:"67";s:5:"ất ";s:2:"68";s:3:"h l";s:2:"69";s:3:"nam";s:2:"70";s:4:"ân ";s:2:"71";s:4:"ăm ";s:2:"72";s:4:" hà ";s:2:"73";s:4:" là ";s:2:"74";s:4:" nă";s:2:"75";s:3:" qu";s:2:"76";s:5:" tạ";s:2:"77";s:3:"g m";s:2:"78";s:4:"năm";s:2:"79";s:5:"tại";s:2:"80";s:5:"á»›i ";s:2:"81";s:5:" lẹ";s:2:"82";s:3:"ay ";s:2:"83";s:3:"e g";s:2:"84";s:3:"h h";s:2:"85";s:3:"i v";s:2:"86";s:4:"i Ä‘";s:2:"87";s:3:"le ";s:2:"88";s:5:"lẹ ";s:2:"89";s:5:"á»u ";s:2:"90";s:5:"á»i ";s:2:"91";s:4:"hân";s:2:"92";s:3:"nhi";s:2:"93";s:3:"t t";s:2:"94";s:5:" củ";s:2:"95";s:5:" má»™";s:2:"96";s:5:" vá»";s:2:"97";s:4:" Ä‘i";s:2:"98";s:3:"an ";s:2:"99";s:5:"của";s:3:"100";s:4:"là ";s:3:"101";s:5:"má»™t";s:3:"102";s:5:"vá» ";s:3:"103";s:4:"à nh";s:3:"104";s:5:"ết ";s:3:"105";s:5:"á»™t ";s:3:"106";s:5:"ủa ";s:3:"107";s:3:" bi";s:3:"108";s:4:" cá";s:3:"109";s:3:"a c";s:3:"110";s:3:"anh";s:3:"111";s:4:"các";s:3:"112";s:3:"h c";s:3:"113";s:5:"iá»u";s:3:"114";s:3:"m t";s:3:"115";s:5:"ện ";s:3:"116";s:3:" ho";s:3:"117";s:3:"'s ";s:3:"118";s:3:"ave";s:3:"119";s:3:"e's";s:3:"120";s:3:"el ";s:3:"121";s:3:"g n";s:3:"122";s:3:"le'";s:3:"123";s:3:"n v";s:3:"124";s:3:"o c";s:3:"125";s:3:"rav";s:3:"126";s:3:"s t";s:3:"127";s:3:"thi";s:3:"128";s:3:"tra";s:3:"129";s:3:"vel";s:3:"130";s:5:"áºn ";s:3:"131";s:5:"ến ";s:3:"132";s:3:" ba";s:3:"133";s:3:" cu";s:3:"134";s:3:" sa";s:3:"135";s:5:" đó";s:3:"136";s:6:" đế";s:3:"137";s:3:"c c";s:3:"138";s:3:"chu";s:3:"139";s:5:"hiá»";s:3:"140";s:3:"huy";s:3:"141";s:3:"khi";s:3:"142";s:4:"nhâ";s:3:"143";s:4:"nhÆ°";s:3:"144";s:3:"ong";s:3:"145";s:3:"ron";s:3:"146";s:3:"thu";s:3:"147";s:4:"thÆ°";s:3:"148";s:3:"tro";s:3:"149";s:3:"y c";s:3:"150";s:4:"à y ";s:3:"151";s:6:"đến";s:3:"152";s:6:"Æ°á»i";s:3:"153";s:6:"Æ°á»n";s:3:"154";s:5:"á» v";s:3:"155";s:5:"á»ng";s:3:"156";s:5:" vá»›";s:3:"157";s:5:"cuá»™";s:3:"158";s:4:"g Ä‘";s:3:"159";s:5:"iết";s:3:"160";s:5:"iện";s:3:"161";s:4:"ngà ";s:3:"162";s:3:"o t";s:3:"163";s:3:"u c";s:3:"164";s:5:"uá»™c";s:3:"165";s:5:"vá»›i";s:3:"166";s:4:"à c";s:3:"167";s:4:"à i ";s:3:"168";s:4:"Æ¡ng";s:3:"169";s:5:"Æ°Æ¡n";s:3:"170";s:5:"ải ";s:3:"171";s:5:"á»™c ";s:3:"172";s:5:"ức ";s:3:"173";s:3:" an";s:3:"174";s:5:" láº";s:3:"175";s:3:" ra";s:3:"176";s:5:" sẽ";s:3:"177";s:5:" số";s:3:"178";s:5:" tổ";s:3:"179";s:3:"a k";s:3:"180";s:5:"biế";s:3:"181";s:3:"c n";s:3:"182";s:4:"c Ä‘";s:3:"183";s:5:"chứ";s:3:"184";s:3:"g v";s:3:"185";s:3:"gia";s:3:"186";s:4:"gà y";s:3:"187";s:4:"hán";s:3:"188";s:4:"hôn";s:3:"189";s:4:"hÆ° ";s:3:"190";s:5:"hức";s:3:"191";s:3:"i g";s:3:"192";s:3:"i h";s:3:"193";s:3:"i k";s:3:"194";s:3:"i p";s:3:"195";s:4:"iên";s:3:"196";s:4:"khô";s:3:"197";s:5:"láºp";s:3:"198";s:3:"n k";s:3:"199";s:3:"ra ";s:3:"200";s:4:"rên";s:3:"201";s:5:"sẽ ";s:3:"202";s:3:"t c";s:3:"203";s:4:"thà ";s:3:"204";s:4:"trê";s:3:"205";s:5:"tổ ";s:3:"206";s:3:"u n";s:3:"207";s:3:"y t";s:3:"208";s:4:"ình";s:3:"209";s:5:"ấy ";s:3:"210";s:5:"áºp ";s:3:"211";s:5:"ổ c";s:3:"212";s:4:" má";s:3:"213";s:6:" để";s:3:"214";s:3:"ai ";s:3:"215";s:3:"c s";s:3:"216";s:6:"gÆ°á»";s:3:"217";s:3:"h v";s:3:"218";s:3:"hoa";s:3:"219";s:5:"hoạ";s:3:"220";s:3:"inh";s:3:"221";s:3:"m n";s:3:"222";s:4:"máy";s:3:"223";s:3:"n g";s:3:"224";s:4:"ngÆ°";s:3:"225";s:5:"nháº";s:3:"226";s:3:"o n";s:3:"227";s:3:"oa ";s:3:"228";s:4:"oà n";s:3:"229";s:3:"p c";s:3:"230";s:5:"số ";s:3:"231";s:4:"t Ä‘";s:3:"232";s:3:"y v";s:3:"233";s:4:"à o ";s:3:"234";s:4:"áy ";s:3:"235";s:4:"ăn ";s:3:"236";s:5:"đó ";s:3:"237";s:6:"để ";s:3:"238";s:6:"Æ°á»›c";s:3:"239";s:5:"ần ";s:3:"240";s:5:"ển ";s:3:"241";s:5:"á»›c ";s:3:"242";s:4:" bá";s:3:"243";s:4:" cÆ¡";s:3:"244";s:5:" cả";s:3:"245";s:5:" cầ";s:3:"246";s:5:" há»";s:3:"247";s:5:" kỳ";s:3:"248";s:3:" li";s:3:"249";s:5:" mạ";s:3:"250";s:5:" sở";s:3:"251";s:5:" tặ";s:3:"252";s:4:" vé";s:3:"253";s:5:" vụ";s:3:"254";s:6:" đạ";s:3:"255";s:4:"a Ä‘";s:3:"256";s:3:"bay";s:3:"257";s:4:"cÆ¡ ";s:3:"258";s:3:"g s";s:3:"259";s:3:"han";s:3:"260";s:5:"hÆ°Æ¡";s:3:"261";s:3:"i s";s:3:"262";s:5:"kỳ ";s:3:"263";s:3:"m c";s:3:"264";s:3:"n m";s:3:"265";s:3:"n p";s:3:"266";s:3:"o b";s:3:"267";s:5:"oại";s:3:"268";s:3:"qua";s:3:"269";s:5:"sở ";s:3:"270";s:3:"tha";s:3:"271";s:4:"thá";s:3:"272";s:5:"tặn";s:3:"273";s:4:"và o";s:3:"274";s:4:"vé ";s:3:"275";s:5:"vụ ";s:3:"276";s:3:"y b";s:3:"277";s:4:"à n ";s:3:"278";s:4:"áng";s:3:"279";s:4:"Æ¡ s";s:3:"280";s:5:"ầu ";s:3:"281";s:5:"áºt ";s:3:"282";s:5:"ặng";s:3:"283";s:5:"á»c ";s:3:"284";s:5:"ở t";s:3:"285";s:5:"ững";s:3:"286";s:3:" du";s:3:"287";s:3:" lu";s:3:"288";s:3:" ta";s:3:"289";s:3:" to";s:3:"290";s:5:" từ";s:3:"291";s:5:" ở ";s:3:"292";s:3:"a v";s:3:"293";s:3:"ao ";s:3:"294";s:3:"c v";s:3:"295";s:5:"cả ";s:3:"296";s:3:"du ";s:3:"297";s:3:"g l";s:3:"298";s:5:"giả";s:3:"299";}s:5:"welsh";a:300:{s:3:"yn ";s:1:"0";s:3:"dd ";s:1:"1";s:3:" yn";s:1:"2";s:3:" y ";s:1:"3";s:3:"ydd";s:1:"4";s:3:"eth";s:1:"5";s:3:"th ";s:1:"6";s:3:" i ";s:1:"7";s:3:"aet";s:1:"8";s:3:"d y";s:1:"9";s:3:"ch ";s:2:"10";s:3:"od ";s:2:"11";s:3:"ol ";s:2:"12";s:3:"edd";s:2:"13";s:3:" ga";s:2:"14";s:3:" gw";s:2:"15";s:3:"'r ";s:2:"16";s:3:"au ";s:2:"17";s:3:"ddi";s:2:"18";s:3:"ad ";s:2:"19";s:3:" cy";s:2:"20";s:3:" gy";s:2:"21";s:3:" ei";s:2:"22";s:3:" o ";s:2:"23";s:3:"iad";s:2:"24";s:3:"yr ";s:2:"25";s:3:"an ";s:2:"26";s:3:"bod";s:2:"27";s:3:"wed";s:2:"28";s:3:" bo";s:2:"29";s:3:" dd";s:2:"30";s:3:"el ";s:2:"31";s:3:"n y";s:2:"32";s:3:" am";s:2:"33";s:3:"di ";s:2:"34";s:3:"edi";s:2:"35";s:3:"on ";s:2:"36";s:3:" we";s:2:"37";s:3:" ym";s:2:"38";s:3:" ar";s:2:"39";s:3:" rh";s:2:"40";s:3:"odd";s:2:"41";s:3:" ca";s:2:"42";s:3:" ma";s:2:"43";s:3:"ael";s:2:"44";s:3:"oed";s:2:"45";s:3:"dae";s:2:"46";s:3:"n a";s:2:"47";s:3:"dda";s:2:"48";s:3:"er ";s:2:"49";s:3:"h y";s:2:"50";s:3:"all";s:2:"51";s:3:"ei ";s:2:"52";s:3:" ll";s:2:"53";s:3:"am ";s:2:"54";s:3:"eu ";s:2:"55";s:3:"fod";s:2:"56";s:3:"fyd";s:2:"57";s:3:"l y";s:2:"58";s:3:"n g";s:2:"59";s:3:"wyn";s:2:"60";s:3:"d a";s:2:"61";s:3:"i g";s:2:"62";s:3:"mae";s:2:"63";s:3:"neu";s:2:"64";s:3:"os ";s:2:"65";s:3:" ne";s:2:"66";s:3:"d i";s:2:"67";s:3:"dod";s:2:"68";s:3:"dol";s:2:"69";s:3:"n c";s:2:"70";s:3:"r h";s:2:"71";s:3:"wyd";s:2:"72";s:3:"wyr";s:2:"73";s:3:"ai ";s:2:"74";s:3:"ar ";s:2:"75";s:3:"in ";s:2:"76";s:3:"rth";s:2:"77";s:3:" fy";s:2:"78";s:3:" he";s:2:"79";s:3:" me";s:2:"80";s:3:" yr";s:2:"81";s:3:"'n ";s:2:"82";s:3:"dia";s:2:"83";s:3:"est";s:2:"84";s:3:"h c";s:2:"85";s:3:"hai";s:2:"86";s:3:"i d";s:2:"87";s:3:"id ";s:2:"88";s:3:"r y";s:2:"89";s:3:"y b";s:2:"90";s:3:" dy";s:2:"91";s:3:" ha";s:2:"92";s:3:"ada";s:2:"93";s:3:"i b";s:2:"94";s:3:"n i";s:2:"95";s:3:"ote";s:2:"96";s:3:"rot";s:2:"97";s:3:"tes";s:2:"98";s:3:"y g";s:2:"99";s:3:"yd ";s:3:"100";s:3:" ad";s:3:"101";s:3:" mr";s:3:"102";s:3:" un";s:3:"103";s:3:"cyn";s:3:"104";s:3:"dau";s:3:"105";s:3:"ddy";s:3:"106";s:3:"edo";s:3:"107";s:3:"i c";s:3:"108";s:3:"i w";s:3:"109";s:3:"ith";s:3:"110";s:3:"lae";s:3:"111";s:3:"lla";s:3:"112";s:3:"nd ";s:3:"113";s:3:"oda";s:3:"114";s:3:"ryd";s:3:"115";s:3:"tho";s:3:"116";s:3:" a ";s:3:"117";s:3:" dr";s:3:"118";s:3:"aid";s:3:"119";s:3:"ain";s:3:"120";s:3:"ddo";s:3:"121";s:3:"dyd";s:3:"122";s:3:"fyn";s:3:"123";s:3:"gyn";s:3:"124";s:3:"hol";s:3:"125";s:3:"io ";s:3:"126";s:3:"o a";s:3:"127";s:3:"wch";s:3:"128";s:3:"wyb";s:3:"129";s:3:"ybo";s:3:"130";s:3:"ych";s:3:"131";s:3:" br";s:3:"132";s:3:" by";s:3:"133";s:3:" di";s:3:"134";s:3:" fe";s:3:"135";s:3:" na";s:3:"136";s:3:" o'";s:3:"137";s:3:" pe";s:3:"138";s:3:"art";s:3:"139";s:3:"byd";s:3:"140";s:3:"dro";s:3:"141";s:3:"gal";s:3:"142";s:3:"l e";s:3:"143";s:3:"lai";s:3:"144";s:3:"mr ";s:3:"145";s:3:"n n";s:3:"146";s:3:"r a";s:3:"147";s:3:"rhy";s:3:"148";s:3:"wn ";s:3:"149";s:3:"ynn";s:3:"150";s:3:" on";s:3:"151";s:3:" r ";s:3:"152";s:3:"cae";s:3:"153";s:3:"d g";s:3:"154";s:3:"d o";s:3:"155";s:3:"d w";s:3:"156";s:3:"gan";s:3:"157";s:3:"gwy";s:3:"158";s:3:"n d";s:3:"159";s:3:"n f";s:3:"160";s:3:"n o";s:3:"161";s:3:"ned";s:3:"162";s:3:"ni ";s:3:"163";s:3:"o'r";s:3:"164";s:3:"r d";s:3:"165";s:3:"ud ";s:3:"166";s:3:"wei";s:3:"167";s:3:"wrt";s:3:"168";s:3:" an";s:3:"169";s:3:" cw";s:3:"170";s:3:" da";s:3:"171";s:3:" ni";s:3:"172";s:3:" pa";s:3:"173";s:3:" pr";s:3:"174";s:3:" wy";s:3:"175";s:3:"d e";s:3:"176";s:3:"dai";s:3:"177";s:3:"dim";s:3:"178";s:3:"eud";s:3:"179";s:3:"gwa";s:3:"180";s:3:"idd";s:3:"181";s:3:"im ";s:3:"182";s:3:"iri";s:3:"183";s:3:"lwy";s:3:"184";s:3:"n b";s:3:"185";s:3:"nol";s:3:"186";s:3:"r o";s:3:"187";s:3:"rwy";s:3:"188";s:3:" ch";s:3:"189";s:3:" er";s:3:"190";s:3:" fo";s:3:"191";s:3:" ge";s:3:"192";s:3:" hy";s:3:"193";s:3:" i'";s:3:"194";s:3:" ro";s:3:"195";s:3:" sa";s:3:"196";s:3:" tr";s:3:"197";s:3:"bob";s:3:"198";s:3:"cwy";s:3:"199";s:3:"cyf";s:3:"200";s:3:"dio";s:3:"201";s:3:"dyn";s:3:"202";s:3:"eit";s:3:"203";s:3:"hel";s:3:"204";s:3:"hyn";s:3:"205";s:3:"ich";s:3:"206";s:3:"ll ";s:3:"207";s:3:"mdd";s:3:"208";s:3:"n r";s:3:"209";s:3:"ond";s:3:"210";s:3:"pro";s:3:"211";s:3:"r c";s:3:"212";s:3:"r g";s:3:"213";s:3:"red";s:3:"214";s:3:"rha";s:3:"215";s:3:"u a";s:3:"216";s:3:"u c";s:3:"217";s:3:"u y";s:3:"218";s:3:"y c";s:3:"219";s:3:"ymd";s:3:"220";s:3:"ymr";s:3:"221";s:3:"yw ";s:3:"222";s:3:" ac";s:3:"223";s:3:" be";s:3:"224";s:3:" bl";s:3:"225";s:3:" co";s:3:"226";s:3:" os";s:3:"227";s:3:"adw";s:3:"228";s:3:"ae ";s:3:"229";s:3:"af ";s:3:"230";s:3:"d p";s:3:"231";s:3:"efn";s:3:"232";s:3:"eic";s:3:"233";s:3:"en ";s:3:"234";s:3:"eol";s:3:"235";s:3:"es ";s:3:"236";s:3:"fer";s:3:"237";s:3:"gel";s:3:"238";s:3:"h g";s:3:"239";s:3:"hod";s:3:"240";s:3:"ied";s:3:"241";s:3:"ir ";s:3:"242";s:3:"laf";s:3:"243";s:3:"n h";s:3:"244";s:3:"na ";s:3:"245";s:3:"nyd";s:3:"246";s:3:"odo";s:3:"247";s:3:"ofy";s:3:"248";s:3:"rdd";s:3:"249";s:3:"rie";s:3:"250";s:3:"ros";s:3:"251";s:3:"stw";s:3:"252";s:3:"twy";s:3:"253";s:3:"yda";s:3:"254";s:3:"yng";s:3:"255";s:3:" at";s:3:"256";s:3:" de";s:3:"257";s:3:" go";s:3:"258";s:3:" id";s:3:"259";s:3:" oe";s:3:"260";s:4:" â ";s:3:"261";s:3:"'ch";s:3:"262";s:3:"ac ";s:3:"263";s:3:"ach";s:3:"264";s:3:"ae'";s:3:"265";s:3:"al ";s:3:"266";s:3:"bl ";s:3:"267";s:3:"d c";s:3:"268";s:3:"d l";s:3:"269";s:3:"dan";s:3:"270";s:3:"dde";s:3:"271";s:3:"ddw";s:3:"272";s:3:"dir";s:3:"273";s:3:"dla";s:3:"274";s:3:"ed ";s:3:"275";s:3:"ela";s:3:"276";s:3:"ell";s:3:"277";s:3:"ene";s:3:"278";s:3:"ewn";s:3:"279";s:3:"gyd";s:3:"280";s:3:"hau";s:3:"281";s:3:"hyw";s:3:"282";s:3:"i a";s:3:"283";s:3:"i f";s:3:"284";s:3:"iol";s:3:"285";s:3:"ion";s:3:"286";s:3:"l a";s:3:"287";s:3:"l i";s:3:"288";s:3:"lia";s:3:"289";s:3:"med";s:3:"290";s:3:"mon";s:3:"291";s:3:"n s";s:3:"292";s:3:"no ";s:3:"293";s:3:"obl";s:3:"294";s:3:"ola";s:3:"295";s:3:"ref";s:3:"296";s:3:"rn ";s:3:"297";s:3:"thi";s:3:"298";s:3:"un ";s:3:"299";}}s:18:"trigram-unicodemap";a:13:{s:11:"Basic Latin";a:38:{s:8:"albanian";i:661;s:5:"azeri";i:653;s:7:"bengali";i:1;s:7:"cebuano";i:750;s:8:"croatian";i:733;s:5:"czech";i:652;s:6:"danish";i:734;s:5:"dutch";i:741;s:7:"english";i:723;s:8:"estonian";i:739;s:7:"finnish";i:743;s:6:"french";i:733;s:6:"german";i:750;s:5:"hausa";i:752;s:8:"hawaiian";i:751;s:9:"hungarian";i:693;s:9:"icelandic";i:662;s:10:"indonesian";i:776;s:7:"italian";i:741;s:5:"latin";i:764;s:7:"latvian";i:693;s:10:"lithuanian";i:738;s:9:"mongolian";i:19;s:9:"norwegian";i:742;s:6:"pidgin";i:702;s:6:"polish";i:701;s:10:"portuguese";i:726;s:8:"romanian";i:714;s:6:"slovak";i:677;s:7:"slovene";i:740;s:6:"somali";i:755;s:7:"spanish";i:749;s:7:"swahili";i:770;s:7:"swedish";i:717;s:7:"tagalog";i:767;s:7:"turkish";i:673;s:10:"vietnamese";i:503;s:5:"welsh";i:728;}s:18:"Latin-1 Supplement";a:21:{s:8:"albanian";i:68;s:5:"azeri";i:10;s:5:"czech";i:51;s:6:"danish";i:13;s:8:"estonian";i:19;s:7:"finnish";i:39;s:6:"french";i:21;s:6:"german";i:8;s:9:"hungarian";i:72;s:9:"icelandic";i:80;s:7:"italian";i:3;s:9:"norwegian";i:5;s:6:"polish";i:6;s:10:"portuguese";i:18;s:8:"romanian";i:9;s:6:"slovak";i:37;s:7:"spanish";i:6;s:7:"swedish";i:26;s:7:"turkish";i:25;s:10:"vietnamese";i:56;s:5:"welsh";i:1;}s:14:"[Malformatted]";a:42:{s:8:"albanian";i:68;s:6:"arabic";i:724;s:5:"azeri";i:109;s:7:"bengali";i:1472;s:9:"bulgarian";i:750;s:8:"croatian";i:10;s:5:"czech";i:78;s:6:"danish";i:13;s:8:"estonian";i:19;s:5:"farsi";i:706;s:7:"finnish";i:39;s:6:"french";i:21;s:6:"german";i:8;s:5:"hausa";i:8;s:5:"hindi";i:1386;s:9:"hungarian";i:74;s:9:"icelandic";i:80;s:7:"italian";i:3;s:6:"kazakh";i:767;s:6:"kyrgyz";i:767;s:7:"latvian";i:56;s:10:"lithuanian";i:30;s:10:"macedonian";i:755;s:9:"mongolian";i:743;s:6:"nepali";i:1514;s:9:"norwegian";i:5;s:6:"pashto";i:677;s:6:"polish";i:45;s:10:"portuguese";i:18;s:8:"romanian";i:31;s:7:"russian";i:759;s:7:"serbian";i:757;s:6:"slovak";i:45;s:7:"slovene";i:10;s:7:"spanish";i:6;s:7:"swedish";i:26;s:7:"turkish";i:87;s:9:"ukrainian";i:748;s:4:"urdu";i:682;s:5:"uzbek";i:773;s:10:"vietnamese";i:289;s:5:"welsh";i:1;}s:6:"Arabic";a:4:{s:6:"arabic";i:724;s:5:"farsi";i:706;s:6:"pashto";i:677;s:4:"urdu";i:682;}s:16:"Latin Extended-B";a:3:{s:5:"azeri";i:73;s:5:"hausa";i:8;s:10:"vietnamese";i:19;}s:16:"Latin Extended-A";a:12:{s:5:"azeri";i:25;s:8:"croatian";i:10;s:5:"czech";i:27;s:9:"hungarian";i:2;s:7:"latvian";i:56;s:10:"lithuanian";i:30;s:6:"polish";i:39;s:8:"romanian";i:22;s:6:"slovak";i:8;s:7:"slovene";i:10;s:7:"turkish";i:62;s:10:"vietnamese";i:20;}s:27:"Combining Diacritical Marks";a:1:{s:5:"azeri";i:1;}s:7:"Bengali";a:1:{s:7:"bengali";i:714;}s:8:"Gujarati";a:1:{s:7:"bengali";i:16;}s:8:"Gurmukhi";a:1:{s:7:"bengali";i:6;}s:8:"Cyrillic";a:9:{s:9:"bulgarian";i:750;s:6:"kazakh";i:767;s:6:"kyrgyz";i:767;s:10:"macedonian";i:755;s:9:"mongolian";i:743;s:7:"russian";i:759;s:7:"serbian";i:757;s:9:"ukrainian";i:748;s:5:"uzbek";i:773;}s:10:"Devanagari";a:2:{s:5:"hindi";i:693;s:6:"nepali";i:757;}s:25:"Latin Extended Additional";a:1:{s:10:"vietnamese";i:97;}}} \ No newline at end of file | |||
diff --git a/inc/3rdparty/libraries/language-detect/unicode_blocks.dat b/inc/3rdparty/libraries/language-detect/unicode_blocks.dat new file mode 100644 index 00000000..3b24cd2c --- /dev/null +++ b/inc/3rdparty/libraries/language-detect/unicode_blocks.dat | |||
@@ -0,0 +1 @@ | |||
a:145:{i:0;a:3:{i:0;s:6:"0x0000";i:1;s:6:"0x007F";i:2;s:11:"Basic Latin";}i:1;a:3:{i:0;s:6:"0x0080";i:1;s:6:"0x00FF";i:2;s:18:"Latin-1 Supplement";}i:2;a:3:{i:0;s:6:"0x0100";i:1;s:6:"0x017F";i:2;s:16:"Latin Extended-A";}i:3;a:3:{i:0;s:6:"0x0180";i:1;s:6:"0x024F";i:2;s:16:"Latin Extended-B";}i:4;a:3:{i:0;s:6:"0x0250";i:1;s:6:"0x02AF";i:2;s:14:"IPA Extensions";}i:5;a:3:{i:0;s:6:"0x02B0";i:1;s:6:"0x02FF";i:2;s:24:"Spacing Modifier Letters";}i:6;a:3:{i:0;s:6:"0x0300";i:1;s:6:"0x036F";i:2;s:27:"Combining Diacritical Marks";}i:7;a:3:{i:0;s:6:"0x0370";i:1;s:6:"0x03FF";i:2;s:16:"Greek and Coptic";}i:8;a:3:{i:0;s:6:"0x0400";i:1;s:6:"0x04FF";i:2;s:8:"Cyrillic";}i:9;a:3:{i:0;s:6:"0x0500";i:1;s:6:"0x052F";i:2;s:19:"Cyrillic Supplement";}i:10;a:3:{i:0;s:6:"0x0530";i:1;s:6:"0x058F";i:2;s:8:"Armenian";}i:11;a:3:{i:0;s:6:"0x0590";i:1;s:6:"0x05FF";i:2;s:6:"Hebrew";}i:12;a:3:{i:0;s:6:"0x0600";i:1;s:6:"0x06FF";i:2;s:6:"Arabic";}i:13;a:3:{i:0;s:6:"0x0700";i:1;s:6:"0x074F";i:2;s:6:"Syriac";}i:14;a:3:{i:0;s:6:"0x0750";i:1;s:6:"0x077F";i:2;s:17:"Arabic Supplement";}i:15;a:3:{i:0;s:6:"0x0780";i:1;s:6:"0x07BF";i:2;s:6:"Thaana";}i:16;a:3:{i:0;s:6:"0x0900";i:1;s:6:"0x097F";i:2;s:10:"Devanagari";}i:17;a:3:{i:0;s:6:"0x0980";i:1;s:6:"0x09FF";i:2;s:7:"Bengali";}i:18;a:3:{i:0;s:6:"0x0A00";i:1;s:6:"0x0A7F";i:2;s:8:"Gurmukhi";}i:19;a:3:{i:0;s:6:"0x0A80";i:1;s:6:"0x0AFF";i:2;s:8:"Gujarati";}i:20;a:3:{i:0;s:6:"0x0B00";i:1;s:6:"0x0B7F";i:2;s:5:"Oriya";}i:21;a:3:{i:0;s:6:"0x0B80";i:1;s:6:"0x0BFF";i:2;s:5:"Tamil";}i:22;a:3:{i:0;s:6:"0x0C00";i:1;s:6:"0x0C7F";i:2;s:6:"Telugu";}i:23;a:3:{i:0;s:6:"0x0C80";i:1;s:6:"0x0CFF";i:2;s:7:"Kannada";}i:24;a:3:{i:0;s:6:"0x0D00";i:1;s:6:"0x0D7F";i:2;s:9:"Malayalam";}i:25;a:3:{i:0;s:6:"0x0D80";i:1;s:6:"0x0DFF";i:2;s:7:"Sinhala";}i:26;a:3:{i:0;s:6:"0x0E00";i:1;s:6:"0x0E7F";i:2;s:4:"Thai";}i:27;a:3:{i:0;s:6:"0x0E80";i:1;s:6:"0x0EFF";i:2;s:3:"Lao";}i:28;a:3:{i:0;s:6:"0x0F00";i:1;s:6:"0x0FFF";i:2;s:7:"Tibetan";}i:29;a:3:{i:0;s:6:"0x1000";i:1;s:6:"0x109F";i:2;s:7:"Myanmar";}i:30;a:3:{i:0;s:6:"0x10A0";i:1;s:6:"0x10FF";i:2;s:8:"Georgian";}i:31;a:3:{i:0;s:6:"0x1100";i:1;s:6:"0x11FF";i:2;s:11:"Hangul Jamo";}i:32;a:3:{i:0;s:6:"0x1200";i:1;s:6:"0x137F";i:2;s:8:"Ethiopic";}i:33;a:3:{i:0;s:6:"0x1380";i:1;s:6:"0x139F";i:2;s:19:"Ethiopic Supplement";}i:34;a:3:{i:0;s:6:"0x13A0";i:1;s:6:"0x13FF";i:2;s:8:"Cherokee";}i:35;a:3:{i:0;s:6:"0x1400";i:1;s:6:"0x167F";i:2;s:37:"Unified Canadian Aboriginal Syllabics";}i:36;a:3:{i:0;s:6:"0x1680";i:1;s:6:"0x169F";i:2;s:5:"Ogham";}i:37;a:3:{i:0;s:6:"0x16A0";i:1;s:6:"0x16FF";i:2;s:5:"Runic";}i:38;a:3:{i:0;s:6:"0x1700";i:1;s:6:"0x171F";i:2;s:7:"Tagalog";}i:39;a:3:{i:0;s:6:"0x1720";i:1;s:6:"0x173F";i:2;s:7:"Hanunoo";}i:40;a:3:{i:0;s:6:"0x1740";i:1;s:6:"0x175F";i:2;s:5:"Buhid";}i:41;a:3:{i:0;s:6:"0x1760";i:1;s:6:"0x177F";i:2;s:8:"Tagbanwa";}i:42;a:3:{i:0;s:6:"0x1780";i:1;s:6:"0x17FF";i:2;s:5:"Khmer";}i:43;a:3:{i:0;s:6:"0x1800";i:1;s:6:"0x18AF";i:2;s:9:"Mongolian";}i:44;a:3:{i:0;s:6:"0x1900";i:1;s:6:"0x194F";i:2;s:5:"Limbu";}i:45;a:3:{i:0;s:6:"0x1950";i:1;s:6:"0x197F";i:2;s:6:"Tai Le";}i:46;a:3:{i:0;s:6:"0x1980";i:1;s:6:"0x19DF";i:2;s:11:"New Tai Lue";}i:47;a:3:{i:0;s:6:"0x19E0";i:1;s:6:"0x19FF";i:2;s:13:"Khmer Symbols";}i:48;a:3:{i:0;s:6:"0x1A00";i:1;s:6:"0x1A1F";i:2;s:8:"Buginese";}i:49;a:3:{i:0;s:6:"0x1D00";i:1;s:6:"0x1D7F";i:2;s:19:"Phonetic Extensions";}i:50;a:3:{i:0;s:6:"0x1D80";i:1;s:6:"0x1DBF";i:2;s:30:"Phonetic Extensions Supplement";}i:51;a:3:{i:0;s:6:"0x1DC0";i:1;s:6:"0x1DFF";i:2;s:38:"Combining Diacritical Marks Supplement";}i:52;a:3:{i:0;s:6:"0x1E00";i:1;s:6:"0x1EFF";i:2;s:25:"Latin Extended Additional";}i:53;a:3:{i:0;s:6:"0x1F00";i:1;s:6:"0x1FFF";i:2;s:14:"Greek Extended";}i:54;a:3:{i:0;s:6:"0x2000";i:1;s:6:"0x206F";i:2;s:19:"General Punctuation";}i:55;a:3:{i:0;s:6:"0x2070";i:1;s:6:"0x209F";i:2;s:27:"Superscripts and Subscripts";}i:56;a:3:{i:0;s:6:"0x20A0";i:1;s:6:"0x20CF";i:2;s:16:"Currency Symbols";}i:57;a:3:{i:0;s:6:"0x20D0";i:1;s:6:"0x20FF";i:2;s:39:"Combining Diacritical Marks for Symbols";}i:58;a:3:{i:0;s:6:"0x2100";i:1;s:6:"0x214F";i:2;s:18:"Letterlike Symbols";}i:59;a:3:{i:0;s:6:"0x2150";i:1;s:6:"0x218F";i:2;s:12:"Number Forms";}i:60;a:3:{i:0;s:6:"0x2190";i:1;s:6:"0x21FF";i:2;s:6:"Arrows";}i:61;a:3:{i:0;s:6:"0x2200";i:1;s:6:"0x22FF";i:2;s:22:"Mathematical Operators";}i:62;a:3:{i:0;s:6:"0x2300";i:1;s:6:"0x23FF";i:2;s:23:"Miscellaneous Technical";}i:63;a:3:{i:0;s:6:"0x2400";i:1;s:6:"0x243F";i:2;s:16:"Control Pictures";}i:64;a:3:{i:0;s:6:"0x2440";i:1;s:6:"0x245F";i:2;s:29:"Optical Character Recognition";}i:65;a:3:{i:0;s:6:"0x2460";i:1;s:6:"0x24FF";i:2;s:22:"Enclosed Alphanumerics";}i:66;a:3:{i:0;s:6:"0x2500";i:1;s:6:"0x257F";i:2;s:11:"Box Drawing";}i:67;a:3:{i:0;s:6:"0x2580";i:1;s:6:"0x259F";i:2;s:14:"Block Elements";}i:68;a:3:{i:0;s:6:"0x25A0";i:1;s:6:"0x25FF";i:2;s:16:"Geometric Shapes";}i:69;a:3:{i:0;s:6:"0x2600";i:1;s:6:"0x26FF";i:2;s:21:"Miscellaneous Symbols";}i:70;a:3:{i:0;s:6:"0x2700";i:1;s:6:"0x27BF";i:2;s:8:"Dingbats";}i:71;a:3:{i:0;s:6:"0x27C0";i:1;s:6:"0x27EF";i:2;s:36:"Miscellaneous Mathematical Symbols-A";}i:72;a:3:{i:0;s:6:"0x27F0";i:1;s:6:"0x27FF";i:2;s:21:"Supplemental Arrows-A";}i:73;a:3:{i:0;s:6:"0x2800";i:1;s:6:"0x28FF";i:2;s:16:"Braille Patterns";}i:74;a:3:{i:0;s:6:"0x2900";i:1;s:6:"0x297F";i:2;s:21:"Supplemental Arrows-B";}i:75;a:3:{i:0;s:6:"0x2980";i:1;s:6:"0x29FF";i:2;s:36:"Miscellaneous Mathematical Symbols-B";}i:76;a:3:{i:0;s:6:"0x2A00";i:1;s:6:"0x2AFF";i:2;s:35:"Supplemental Mathematical Operators";}i:77;a:3:{i:0;s:6:"0x2B00";i:1;s:6:"0x2BFF";i:2;s:32:"Miscellaneous Symbols and Arrows";}i:78;a:3:{i:0;s:6:"0x2C00";i:1;s:6:"0x2C5F";i:2;s:10:"Glagolitic";}i:79;a:3:{i:0;s:6:"0x2C80";i:1;s:6:"0x2CFF";i:2;s:6:"Coptic";}i:80;a:3:{i:0;s:6:"0x2D00";i:1;s:6:"0x2D2F";i:2;s:19:"Georgian Supplement";}i:81;a:3:{i:0;s:6:"0x2D30";i:1;s:6:"0x2D7F";i:2;s:8:"Tifinagh";}i:82;a:3:{i:0;s:6:"0x2D80";i:1;s:6:"0x2DDF";i:2;s:17:"Ethiopic Extended";}i:83;a:3:{i:0;s:6:"0x2E00";i:1;s:6:"0x2E7F";i:2;s:24:"Supplemental Punctuation";}i:84;a:3:{i:0;s:6:"0x2E80";i:1;s:6:"0x2EFF";i:2;s:23:"CJK Radicals Supplement";}i:85;a:3:{i:0;s:6:"0x2F00";i:1;s:6:"0x2FDF";i:2;s:15:"Kangxi Radicals";}i:86;a:3:{i:0;s:6:"0x2FF0";i:1;s:6:"0x2FFF";i:2;s:34:"Ideographic Description Characters";}i:87;a:3:{i:0;s:6:"0x3000";i:1;s:6:"0x303F";i:2;s:27:"CJK Symbols and Punctuation";}i:88;a:3:{i:0;s:6:"0x3040";i:1;s:6:"0x309F";i:2;s:8:"Hiragana";}i:89;a:3:{i:0;s:6:"0x30A0";i:1;s:6:"0x30FF";i:2;s:8:"Katakana";}i:90;a:3:{i:0;s:6:"0x3100";i:1;s:6:"0x312F";i:2;s:8:"Bopomofo";}i:91;a:3:{i:0;s:6:"0x3130";i:1;s:6:"0x318F";i:2;s:25:"Hangul Compatibility Jamo";}i:92;a:3:{i:0;s:6:"0x3190";i:1;s:6:"0x319F";i:2;s:6:"Kanbun";}i:93;a:3:{i:0;s:6:"0x31A0";i:1;s:6:"0x31BF";i:2;s:17:"Bopomofo Extended";}i:94;a:3:{i:0;s:6:"0x31C0";i:1;s:6:"0x31EF";i:2;s:11:"CJK Strokes";}i:95;a:3:{i:0;s:6:"0x31F0";i:1;s:6:"0x31FF";i:2;s:28:"Katakana Phonetic Extensions";}i:96;a:3:{i:0;s:6:"0x3200";i:1;s:6:"0x32FF";i:2;s:31:"Enclosed CJK Letters and Months";}i:97;a:3:{i:0;s:6:"0x3300";i:1;s:6:"0x33FF";i:2;s:17:"CJK Compatibility";}i:98;a:3:{i:0;s:6:"0x3400";i:1;s:6:"0x4DBF";i:2;s:34:"CJK Unified Ideographs Extension A";}i:99;a:3:{i:0;s:6:"0x4DC0";i:1;s:6:"0x4DFF";i:2;s:23:"Yijing Hexagram Symbols";}i:100;a:3:{i:0;s:6:"0x4E00";i:1;s:6:"0x9FFF";i:2;s:22:"CJK Unified Ideographs";}i:101;a:3:{i:0;s:6:"0xA000";i:1;s:6:"0xA48F";i:2;s:12:"Yi Syllables";}i:102;a:3:{i:0;s:6:"0xA490";i:1;s:6:"0xA4CF";i:2;s:11:"Yi Radicals";}i:103;a:3:{i:0;s:6:"0xA700";i:1;s:6:"0xA71F";i:2;s:21:"Modifier Tone Letters";}i:104;a:3:{i:0;s:6:"0xA800";i:1;s:6:"0xA82F";i:2;s:12:"Syloti Nagri";}i:105;a:3:{i:0;s:6:"0xAC00";i:1;s:6:"0xD7AF";i:2;s:16:"Hangul Syllables";}i:106;a:3:{i:0;s:6:"0xD800";i:1;s:6:"0xDB7F";i:2;s:15:"High Surrogates";}i:107;a:3:{i:0;s:6:"0xDB80";i:1;s:6:"0xDBFF";i:2;s:27:"High Private Use Surrogates";}i:108;a:3:{i:0;s:6:"0xDC00";i:1;s:6:"0xDFFF";i:2;s:14:"Low Surrogates";}i:109;a:3:{i:0;s:6:"0xE000";i:1;s:6:"0xF8FF";i:2;s:16:"Private Use Area";}i:110;a:3:{i:0;s:6:"0xF900";i:1;s:6:"0xFAFF";i:2;s:28:"CJK Compatibility Ideographs";}i:111;a:3:{i:0;s:6:"0xFB00";i:1;s:6:"0xFB4F";i:2;s:29:"Alphabetic Presentation Forms";}i:112;a:3:{i:0;s:6:"0xFB50";i:1;s:6:"0xFDFF";i:2;s:27:"Arabic Presentation Forms-A";}i:113;a:3:{i:0;s:6:"0xFE00";i:1;s:6:"0xFE0F";i:2;s:19:"Variation Selectors";}i:114;a:3:{i:0;s:6:"0xFE10";i:1;s:6:"0xFE1F";i:2;s:14:"Vertical Forms";}i:115;a:3:{i:0;s:6:"0xFE20";i:1;s:6:"0xFE2F";i:2;s:20:"Combining Half Marks";}i:116;a:3:{i:0;s:6:"0xFE30";i:1;s:6:"0xFE4F";i:2;s:23:"CJK Compatibility Forms";}i:117;a:3:{i:0;s:6:"0xFE50";i:1;s:6:"0xFE6F";i:2;s:19:"Small Form Variants";}i:118;a:3:{i:0;s:6:"0xFE70";i:1;s:6:"0xFEFF";i:2;s:27:"Arabic Presentation Forms-B";}i:119;a:3:{i:0;s:6:"0xFF00";i:1;s:6:"0xFFEF";i:2;s:29:"Halfwidth and Fullwidth Forms";}i:120;a:3:{i:0;s:6:"0xFFF0";i:1;s:6:"0xFFFF";i:2;s:8:"Specials";}i:121;a:3:{i:0;s:7:"0x10000";i:1;s:7:"0x1007F";i:2;s:18:"Linear B Syllabary";}i:122;a:3:{i:0;s:7:"0x10080";i:1;s:7:"0x100FF";i:2;s:18:"Linear B Ideograms";}i:123;a:3:{i:0;s:7:"0x10100";i:1;s:7:"0x1013F";i:2;s:14:"Aegean Numbers";}i:124;a:3:{i:0;s:7:"0x10140";i:1;s:7:"0x1018F";i:2;s:21:"Ancient Greek Numbers";}i:125;a:3:{i:0;s:7:"0x10300";i:1;s:7:"0x1032F";i:2;s:10:"Old Italic";}i:126;a:3:{i:0;s:7:"0x10330";i:1;s:7:"0x1034F";i:2;s:6:"Gothic";}i:127;a:3:{i:0;s:7:"0x10380";i:1;s:7:"0x1039F";i:2;s:8:"Ugaritic";}i:128;a:3:{i:0;s:7:"0x103A0";i:1;s:7:"0x103DF";i:2;s:11:"Old Persian";}i:129;a:3:{i:0;s:7:"0x10400";i:1;s:7:"0x1044F";i:2;s:7:"Deseret";}i:130;a:3:{i:0;s:7:"0x10450";i:1;s:7:"0x1047F";i:2;s:7:"Shavian";}i:131;a:3:{i:0;s:7:"0x10480";i:1;s:7:"0x104AF";i:2;s:7:"Osmanya";}i:132;a:3:{i:0;s:7:"0x10800";i:1;s:7:"0x1083F";i:2;s:17:"Cypriot Syllabary";}i:133;a:3:{i:0;s:7:"0x10A00";i:1;s:7:"0x10A5F";i:2;s:10:"Kharoshthi";}i:134;a:3:{i:0;s:7:"0x1D000";i:1;s:7:"0x1D0FF";i:2;s:25:"Byzantine Musical Symbols";}i:135;a:3:{i:0;s:7:"0x1D100";i:1;s:7:"0x1D1FF";i:2;s:15:"Musical Symbols";}i:136;a:3:{i:0;s:7:"0x1D200";i:1;s:7:"0x1D24F";i:2;s:30:"Ancient Greek Musical Notation";}i:137;a:3:{i:0;s:7:"0x1D300";i:1;s:7:"0x1D35F";i:2;s:21:"Tai Xuan Jing Symbols";}i:138;a:3:{i:0;s:7:"0x1D400";i:1;s:7:"0x1D7FF";i:2;s:33:"Mathematical Alphanumeric Symbols";}i:139;a:3:{i:0;s:7:"0x20000";i:1;s:7:"0x2A6DF";i:2;s:34:"CJK Unified Ideographs Extension B";}i:140;a:3:{i:0;s:7:"0x2F800";i:1;s:7:"0x2FA1F";i:2;s:39:"CJK Compatibility Ideographs Supplement";}i:141;a:3:{i:0;s:7:"0xE0000";i:1;s:7:"0xE007F";i:2;s:4:"Tags";}i:142;a:3:{i:0;s:7:"0xE0100";i:1;s:7:"0xE01EF";i:2;s:30:"Variation Selectors Supplement";}i:143;a:3:{i:0;s:7:"0xF0000";i:1;s:7:"0xFFFFF";i:2;s:32:"Supplementary Private Use Area-A";}i:144;a:3:{i:0;s:8:"0x100000";i:1;s:8:"0x10FFFF";i:2;s:32:"Supplementary Private Use Area-B";}} \ No newline at end of file | |||
diff --git a/inc/3rdparty/JSLikeHTMLElement.php b/inc/3rdparty/libraries/readability/JSLikeHTMLElement.php index 238ba8a8..a8eeccf4 100644 --- a/inc/3rdparty/JSLikeHTMLElement.php +++ b/inc/3rdparty/libraries/readability/JSLikeHTMLElement.php | |||
@@ -1,109 +1,110 @@ | |||
1 | <?php | 1 | <?php |
2 | /** | 2 | /** |
3 | * JavaScript-like HTML DOM Element | 3 | * JavaScript-like HTML DOM Element |
4 | * | 4 | * |
5 | * This class extends PHP's DOMElement to allow | 5 | * This class extends PHP's DOMElement to allow |
6 | * users to get and set the innerHTML property of | 6 | * users to get and set the innerHTML property of |
7 | * HTML elements in the same way it's done in | 7 | * HTML elements in the same way it's done in |
8 | * JavaScript. | 8 | * JavaScript. |
9 | * | 9 | * |
10 | * Example usage: | 10 | * Example usage: |
11 | * @code | 11 | * @code |
12 | * require_once 'JSLikeHTMLElement.php'; | 12 | * require_once 'JSLikeHTMLElement.php'; |
13 | * header('Content-Type: text/plain'); | 13 | * header('Content-Type: text/plain'); |
14 | * $doc = new DOMDocument(); | 14 | * $doc = new DOMDocument(); |
15 | * $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); | 15 | * $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); |
16 | * $doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>'); | 16 | * $doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>'); |
17 | * $elem = $doc->getElementsByTagName('div')->item(0); | 17 | * $elem = $doc->getElementsByTagName('div')->item(0); |
18 | * | 18 | * |
19 | * // print innerHTML | 19 | * // print innerHTML |
20 | * echo $elem->innerHTML; // prints '<p>Para 1</p><p>Para 2</p>' | 20 | * echo $elem->innerHTML; // prints '<p>Para 1</p><p>Para 2</p>' |
21 | * echo "\n\n"; | 21 | * echo "\n\n"; |
22 | * | 22 | * |
23 | * // set innerHTML | 23 | * // set innerHTML |
24 | * $elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>'; | 24 | * $elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>'; |
25 | * echo $elem->innerHTML; // prints '<a href="http://fivefilters.org">FiveFilters.org</a>' | 25 | * echo $elem->innerHTML; // prints '<a href="http://fivefilters.org">FiveFilters.org</a>' |
26 | * echo "\n\n"; | 26 | * echo "\n\n"; |
27 | * | 27 | * |
28 | * // print document (with our changes) | 28 | * // print document (with our changes) |
29 | * echo $doc->saveXML(); | 29 | * echo $doc->saveXML(); |
30 | * @endcode | 30 | * @endcode |
31 | * | 31 | * |
32 | * @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net | 32 | * @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net |
33 | * @see http://fivefilters.org (the project this was written for) | 33 | * @see http://fivefilters.org (the project this was written for) |
34 | */ | 34 | */ |
35 | class JSLikeHTMLElement extends DOMElement | 35 | class JSLikeHTMLElement extends DOMElement |
36 | { | 36 | { |
37 | /** | 37 | /** |
38 | * Used for setting innerHTML like it's done in JavaScript: | 38 | * Used for setting innerHTML like it's done in JavaScript: |
39 | * @code | 39 | * @code |
40 | * $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>'; | 40 | * $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>'; |
41 | * @endcode | 41 | * @endcode |
42 | */ | 42 | */ |
43 | public function __set($name, $value) { | 43 | public function __set($name, $value) { |
44 | if ($name == 'innerHTML') { | 44 | if ($name == 'innerHTML') { |
45 | // first, empty the element | 45 | // first, empty the element |
46 | for ($x=$this->childNodes->length-1; $x>=0; $x--) { | 46 | for ($x=$this->childNodes->length-1; $x>=0; $x--) { |
47 | $this->removeChild($this->childNodes->item($x)); | 47 | $this->removeChild($this->childNodes->item($x)); |
48 | } | 48 | } |
49 | // $value holds our new inner HTML | 49 | // $value holds our new inner HTML |
50 | if ($value != '') { | 50 | if ($value != '') { |
51 | $f = $this->ownerDocument->createDocumentFragment(); | 51 | $f = $this->ownerDocument->createDocumentFragment(); |
52 | // appendXML() expects well-formed markup (XHTML) | 52 | // appendXML() expects well-formed markup (XHTML) |
53 | $result = @$f->appendXML($value); // @ to suppress PHP warnings | 53 | $result = @$f->appendXML($value); // @ to suppress PHP warnings |
54 | if ($result) { | 54 | if ($result) { |
55 | if ($f->hasChildNodes()) $this->appendChild($f); | 55 | if ($f->hasChildNodes()) $this->appendChild($f); |
56 | } else { | 56 | } else { |
57 | // $value is probably ill-formed | 57 | // $value is probably ill-formed |
58 | $f = new DOMDocument(); | 58 | $f = new DOMDocument(); |
59 | $value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8'); | 59 | $value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8'); |
60 | // Using <htmlfragment> will generate a warning, but so will bad HTML | 60 | // Using <htmlfragment> will generate a warning, but so will bad HTML |
61 | // (and by this point, bad HTML is what we've got). | 61 | // (and by this point, bad HTML is what we've got). |
62 | // We use it (and suppress the warning) because an HTML fragment will | 62 | // We use it (and suppress the warning) because an HTML fragment will |
63 | // be wrapped around <html><body> tags which we don't really want to keep. | 63 | // be wrapped around <html><body> tags which we don't really want to keep. |
64 | // Note: despite the warning, if loadHTML succeeds it will return true. | 64 | // Note: despite the warning, if loadHTML succeeds it will return true. |
65 | $result = @$f->loadHTML('<htmlfragment>'.$value.'</htmlfragment>'); | 65 | $result = @$f->loadHTML('<htmlfragment>'.$value.'</htmlfragment>'); |
66 | if ($result) { | 66 | if ($result) { |
67 | $import = $f->getElementsByTagName('htmlfragment')->item(0); | 67 | $import = $f->getElementsByTagName('htmlfragment')->item(0); |
68 | foreach ($import->childNodes as $child) { | 68 | foreach ($import->childNodes as $child) { |
69 | $importedNode = $this->ownerDocument->importNode($child, true); | 69 | $importedNode = $this->ownerDocument->importNode($child, true); |
70 | $this->appendChild($importedNode); | 70 | $this->appendChild($importedNode); |
71 | } | 71 | } |
72 | } else { | 72 | } else { |
73 | // oh well, we tried, we really did. :( | 73 | // oh well, we tried, we really did. :( |
74 | // this element is now empty | 74 | // this element is now empty |
75 | } | 75 | } |
76 | } | 76 | } |
77 | } | 77 | } |
78 | } else { | 78 | } else { |
79 | $trace = debug_backtrace(); | 79 | $trace = debug_backtrace(); |
80 | trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); | 80 | trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); |
81 | } | 81 | } |
82 | } | 82 | } |
83 | 83 | ||
84 | /** | 84 | /** |
85 | * Used for getting innerHTML like it's done in JavaScript: | 85 | * Used for getting innerHTML like it's done in JavaScript: |
86 | * @code | 86 | * @code |
87 | * $string = $div->innerHTML; | 87 | * $string = $div->innerHTML; |
88 | * @endcode | 88 | * @endcode |
89 | */ | 89 | */ |
90 | public function __get($name) | 90 | public function __get($name) |
91 | { | 91 | { |
92 | if ($name == 'innerHTML') { | 92 | if ($name == 'innerHTML') { |
93 | $inner = ''; | 93 | $inner = ''; |
94 | foreach ($this->childNodes as $child) { | 94 | foreach ($this->childNodes as $child) { |
95 | $inner .= $this->ownerDocument->saveXML($child); | 95 | $inner .= $this->ownerDocument->saveXML($child); |
96 | } | 96 | } |
97 | return $inner; | 97 | return $inner; |
98 | } | 98 | } |
99 | 99 | ||
100 | $trace = debug_backtrace(); | 100 | $trace = debug_backtrace(); |
101 | trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); | 101 | trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE); |
102 | return null; | 102 | return null; |
103 | } | 103 | } |
104 | 104 | ||
105 | public function __toString() | 105 | public function __toString() |
106 | { | 106 | { |
107 | return '['.$this->tagName.']'; | 107 | return '['.$this->tagName.']'; |
108 | } | 108 | } |
109 | } \ No newline at end of file | 109 | } |
110 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/libraries/readability/Readability.php b/inc/3rdparty/libraries/readability/Readability.php new file mode 100644 index 00000000..2e8991cc --- /dev/null +++ b/inc/3rdparty/libraries/readability/Readability.php | |||
@@ -0,0 +1,1138 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Arc90's Readability ported to PHP for FiveFilters.org | ||
4 | * Based on readability.js version 1.7.1 (without multi-page support) | ||
5 | * Updated to allow HTML5 parsing with html5lib | ||
6 | * Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds | ||
7 | * ------------------------------------------------------ | ||
8 | * Original URL: http://lab.arc90.com/experiments/readability/js/readability.js | ||
9 | * Arc90's project URL: http://lab.arc90.com/experiments/readability/ | ||
10 | * JS Source: http://code.google.com/p/arc90labs-readability | ||
11 | * Ported by: Keyvan Minoukadeh, http://www.keyvan.net | ||
12 | * More information: http://fivefilters.org/content-only/ | ||
13 | * License: Apache License, Version 2.0 | ||
14 | * Requires: PHP5 | ||
15 | * Date: 2012-09-19 | ||
16 | * | ||
17 | * Differences between the PHP port and the original | ||
18 | * ------------------------------------------------------ | ||
19 | * Arc90's Readability is designed to run in the browser. It works on the DOM | ||
20 | * tree (the parsed HTML) after the page's CSS styles have been applied and | ||
21 | * Javascript code executed. This PHP port does not run inside a browser. | ||
22 | * We use PHP's ability to parse HTML to build our DOM tree, but we cannot | ||
23 | * rely on CSS or Javascript support. As such, the results will not always | ||
24 | * match Arc90's Readability. (For example, if a web page contains CSS style | ||
25 | * rules or Javascript code which hide certain HTML elements from display, | ||
26 | * Arc90's Readability will dismiss those from consideration but our PHP port, | ||
27 | * unable to understand CSS or Javascript, will not know any better.) | ||
28 | * | ||
29 | * Another significant difference is that the aim of Arc90's Readability is | ||
30 | * to re-present the main content block of a given web page so users can | ||
31 | * read it more easily in their browsers. Correct identification, clean up, | ||
32 | * and separation of the content block is only a part of this process. | ||
33 | * This PHP port is only concerned with this part, it does not include code | ||
34 | * that relates to presentation in the browser - Arc90 already do | ||
35 | * that extremely well, and for PDF output there's FiveFilters.org's | ||
36 | * PDF Newspaper: http://fivefilters.org/pdf-newspaper/. | ||
37 | * | ||
38 | * Finally, this class contains methods that might be useful for developers | ||
39 | * working on HTML document fragments. So without deviating too much from | ||
40 | * the original code (which I don't want to do because it makes debugging | ||
41 | * and updating more difficult), I've tried to make it a little more | ||
42 | * developer friendly. You should be able to use the methods here on | ||
43 | * existing DOMElement objects without passing an entire HTML document to | ||
44 | * be parsed. | ||
45 | */ | ||
46 | |||
47 | // This class allows us to do JavaScript like assignements to innerHTML | ||
48 | require_once(dirname(__FILE__).'/JSLikeHTMLElement.php'); | ||
49 | |||
50 | // Alternative usage (for testing only!) | ||
51 | // uncomment the lines below and call Readability.php in your browser | ||
52 | // passing it the URL of the page you'd like content from, e.g.: | ||
53 | // Readability.php?url=http://medialens.org/alerts/09/090615_the_guardian_climate.php | ||
54 | |||
55 | /* | ||
56 | if (!isset($_GET['url']) || $_GET['url'] == '') { | ||
57 | die('Please pass a URL to the script. E.g. Readability.php?url=bla.com/story.html'); | ||
58 | } | ||
59 | $url = $_GET['url']; | ||
60 | if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url; | ||
61 | $html = file_get_contents($url); | ||
62 | $r = new Readability($html, $url); | ||
63 | $r->init(); | ||
64 | echo $r->articleContent->innerHTML; | ||
65 | */ | ||
66 | |||
67 | class Readability | ||
68 | { | ||
69 | public $version = '1.7.1-without-multi-page'; | ||
70 | public $convertLinksToFootnotes = false; | ||
71 | public $revertForcedParagraphElements = true; | ||
72 | public $articleTitle; | ||
73 | public $articleContent; | ||
74 | public $dom; | ||
75 | public $url = null; // optional - URL where HTML was retrieved | ||
76 | public $debug = false; | ||
77 | public $lightClean = true; // preserves more content (experimental) added 2012-09-19 | ||
78 | protected $body = null; // | ||
79 | protected $bodyCache = null; // Cache the body HTML in case we need to re-use it later | ||
80 | protected $flags = 7; // 1 | 2 | 4; // Start with all flags set. | ||
81 | protected $success = false; // indicates whether we were able to extract or not | ||
82 | |||
83 | /** | ||
84 | * All of the regular expressions in use within readability. | ||
85 | * Defined up here so we don't instantiate them repeatedly in loops. | ||
86 | **/ | ||
87 | public $regexps = array( | ||
88 | 'unlikelyCandidates' => '/combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i', | ||
89 | 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', | ||
90 | 'positive' => '/article|body|content|entry|hentry|main|page|attachment|pagination|post|text|blog|story/i', | ||
91 | 'negative' => '/combx|comment|com-|contact|foot|footer|_nav|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i', | ||
92 | 'divToPElements' => '/<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i', | ||
93 | 'replaceBrs' => '/(<br[^>]*>[ \n\r\t]*){2,}/i', | ||
94 | 'replaceFonts' => '/<(\/?)font[^>]*>/i', | ||
95 | // 'trimRe' => '/^\s+|\s+$/g', // PHP has trim() | ||
96 | 'normalize' => '/\s{2,}/', | ||
97 | 'killBreaks' => '/(<br\s*\/?>(\s| ?)*){1,}/', | ||
98 | 'video' => '!//(player\.|www\.)?(youtube|vimeo|viddler)\.com!i', | ||
99 | 'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i' | ||
100 | ); | ||
101 | |||
102 | /* constants */ | ||
103 | const FLAG_STRIP_UNLIKELYS = 1; | ||
104 | const FLAG_WEIGHT_CLASSES = 2; | ||
105 | const FLAG_CLEAN_CONDITIONALLY = 4; | ||
106 | |||
107 | /** | ||
108 | * Create instance of Readability | ||
109 | * @param string UTF-8 encoded string | ||
110 | * @param string (optional) URL associated with HTML (used for footnotes) | ||
111 | * @param string which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib') | ||
112 | */ | ||
113 | function __construct($html, $url=null, $parser='libxml') | ||
114 | { | ||
115 | $this->url = $url; | ||
116 | /* Turn all double br's into p's */ | ||
117 | $html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html); | ||
118 | $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html); | ||
119 | $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); | ||
120 | if (trim($html) == '') $html = '<html></html>'; | ||
121 | if ($parser=='html5lib' && ($this->dom = HTML5_Parser::parse($html))) { | ||
122 | // all good | ||
123 | } else { | ||
124 | $this->dom = new DOMDocument(); | ||
125 | $this->dom->preserveWhiteSpace = false; | ||
126 | @$this->dom->loadHTML($html); | ||
127 | } | ||
128 | $this->dom->registerNodeClass('DOMElement', 'JSLikeHTMLElement'); | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * Get article title element | ||
133 | * @return DOMElement | ||
134 | */ | ||
135 | public function getTitle() { | ||
136 | return $this->articleTitle; | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * Get article content element | ||
141 | * @return DOMElement | ||
142 | */ | ||
143 | public function getContent() { | ||
144 | return $this->articleContent; | ||
145 | } | ||
146 | |||
147 | /** | ||
148 | * Runs readability. | ||
149 | * | ||
150 | * Workflow: | ||
151 | * 1. Prep the document by removing script tags, css, etc. | ||
152 | * 2. Build readability's DOM tree. | ||
153 | * 3. Grab the article content from the current dom tree. | ||
154 | * 4. Replace the current DOM tree with the new one. | ||
155 | * 5. Read peacefully. | ||
156 | * | ||
157 | * @return boolean true if we found content, false otherwise | ||
158 | **/ | ||
159 | public function init() | ||
160 | { | ||
161 | if (!isset($this->dom->documentElement)) return false; | ||
162 | $this->removeScripts($this->dom); | ||
163 | //die($this->getInnerHTML($this->dom->documentElement)); | ||
164 | |||
165 | // Assume successful outcome | ||
166 | $this->success = true; | ||
167 | |||
168 | $bodyElems = $this->dom->getElementsByTagName('body'); | ||
169 | if ($bodyElems->length > 0) { | ||
170 | if ($this->bodyCache == null) { | ||
171 | $this->bodyCache = $bodyElems->item(0)->innerHTML; | ||
172 | } | ||
173 | if ($this->body == null) { | ||
174 | $this->body = $bodyElems->item(0); | ||
175 | } | ||
176 | } | ||
177 | |||
178 | $this->prepDocument(); | ||
179 | |||
180 | //die($this->dom->documentElement->parentNode->nodeType); | ||
181 | //$this->setInnerHTML($this->dom->documentElement, $this->getInnerHTML($this->dom->documentElement)); | ||
182 | //die($this->getInnerHTML($this->dom->documentElement)); | ||
183 | |||
184 | /* Build readability's DOM tree */ | ||
185 | $overlay = $this->dom->createElement('div'); | ||
186 | $innerDiv = $this->dom->createElement('div'); | ||
187 | $articleTitle = $this->getArticleTitle(); | ||
188 | $articleContent = $this->grabArticle(); | ||
189 | |||
190 | if (!$articleContent) { | ||
191 | $this->success = false; | ||
192 | $articleContent = $this->dom->createElement('div'); | ||
193 | $articleContent->setAttribute('id', 'readability-content'); | ||
194 | $articleContent->innerHTML = '<p>Sorry, Readability was unable to parse this page for content.</p>'; | ||
195 | } | ||
196 | |||
197 | $overlay->setAttribute('id', 'readOverlay'); | ||
198 | $innerDiv->setAttribute('id', 'readInner'); | ||
199 | |||
200 | /* Glue the structure of our document together. */ | ||
201 | $innerDiv->appendChild($articleTitle); | ||
202 | $innerDiv->appendChild($articleContent); | ||
203 | $overlay->appendChild($innerDiv); | ||
204 | |||
205 | /* Clear the old HTML, insert the new content. */ | ||
206 | $this->body->innerHTML = ''; | ||
207 | $this->body->appendChild($overlay); | ||
208 | //document.body.insertBefore(overlay, document.body.firstChild); | ||
209 | $this->body->removeAttribute('style'); | ||
210 | |||
211 | $this->postProcessContent($articleContent); | ||
212 | |||
213 | // Set title and content instance variables | ||
214 | $this->articleTitle = $articleTitle; | ||
215 | $this->articleContent = $articleContent; | ||
216 | |||
217 | return $this->success; | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * Debug | ||
222 | */ | ||
223 | protected function dbg($msg) { | ||
224 | if ($this->debug) echo '* ',$msg, "\n"; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * Run any post-process modifications to article content as necessary. | ||
229 | * | ||
230 | * @param DOMElement | ||
231 | * @return void | ||
232 | */ | ||
233 | public function postProcessContent($articleContent) { | ||
234 | if ($this->convertLinksToFootnotes && !preg_match('/wikipedia\.org/', @$this->url)) { | ||
235 | $this->addFootnotes($articleContent); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * Get the article title as an H1. | ||
241 | * | ||
242 | * @return DOMElement | ||
243 | */ | ||
244 | protected function getArticleTitle() { | ||
245 | $curTitle = ''; | ||
246 | $origTitle = ''; | ||
247 | |||
248 | try { | ||
249 | $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); | ||
250 | } catch(Exception $e) {} | ||
251 | |||
252 | if (preg_match('/ [\|\-] /', $curTitle)) | ||
253 | { | ||
254 | $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); | ||
255 | |||
256 | if (count(explode(' ', $curTitle)) < 3) { | ||
257 | $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); | ||
258 | } | ||
259 | } | ||
260 | else if (strpos($curTitle, ': ') !== false) | ||
261 | { | ||
262 | $curTitle = preg_replace('/.*:(.*)/i', '$1', $origTitle); | ||
263 | |||
264 | if (count(explode(' ', $curTitle)) < 3) { | ||
265 | $curTitle = preg_replace('/[^:]*[:](.*)/i','$1', $origTitle); | ||
266 | } | ||
267 | } | ||
268 | else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) | ||
269 | { | ||
270 | $hOnes = $this->dom->getElementsByTagName('h1'); | ||
271 | if($hOnes->length == 1) | ||
272 | { | ||
273 | $curTitle = $this->getInnerText($hOnes->item(0)); | ||
274 | } | ||
275 | } | ||
276 | |||
277 | $curTitle = trim($curTitle); | ||
278 | |||
279 | if (count(explode(' ', $curTitle)) <= 4) { | ||
280 | $curTitle = $origTitle; | ||
281 | } | ||
282 | |||
283 | $articleTitle = $this->dom->createElement('h1'); | ||
284 | $articleTitle->innerHTML = $curTitle; | ||
285 | |||
286 | return $articleTitle; | ||
287 | } | ||
288 | |||
289 | /** | ||
290 | * Prepare the HTML document for readability to scrape it. | ||
291 | * This includes things like stripping javascript, CSS, and handling terrible markup. | ||
292 | * | ||
293 | * @return void | ||
294 | **/ | ||
295 | protected function prepDocument() { | ||
296 | /** | ||
297 | * In some cases a body element can't be found (if the HTML is totally hosed for example) | ||
298 | * so we create a new body node and append it to the document. | ||
299 | */ | ||
300 | if ($this->body == null) | ||
301 | { | ||
302 | $this->body = $this->dom->createElement('body'); | ||
303 | $this->dom->documentElement->appendChild($this->body); | ||
304 | } | ||
305 | $this->body->setAttribute('id', 'readabilityBody'); | ||
306 | |||
307 | /* Remove all style tags in head */ | ||
308 | $styleTags = $this->dom->getElementsByTagName('style'); | ||
309 | for ($i = $styleTags->length-1; $i >= 0; $i--) | ||
310 | { | ||
311 | $styleTags->item($i)->parentNode->removeChild($styleTags->item($i)); | ||
312 | } | ||
313 | |||
314 | /* Turn all double br's into p's */ | ||
315 | /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */ | ||
316 | //document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrs, '</p><p>').replace(readability.regexps.replaceFonts, '<$1span>'); | ||
317 | // We do this in the constructor for PHP as that's when we have raw HTML - before parsing it into a DOM tree. | ||
318 | // Manipulating innerHTML as it's done in JS is not possible in PHP. | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * For easier reading, convert this document to have footnotes at the bottom rather than inline links. | ||
323 | * @see http://www.roughtype.com/archives/2010/05/experiments_in.php | ||
324 | * | ||
325 | * @return void | ||
326 | **/ | ||
327 | public function addFootnotes($articleContent) { | ||
328 | $footnotesWrapper = $this->dom->createElement('div'); | ||
329 | $footnotesWrapper->setAttribute('id', 'readability-footnotes'); | ||
330 | $footnotesWrapper->innerHTML = '<h3>References</h3>'; | ||
331 | |||
332 | $articleFootnotes = $this->dom->createElement('ol'); | ||
333 | $articleFootnotes->setAttribute('id', 'readability-footnotes-list'); | ||
334 | $footnotesWrapper->appendChild($articleFootnotes); | ||
335 | |||
336 | $articleLinks = $articleContent->getElementsByTagName('a'); | ||
337 | |||
338 | $linkCount = 0; | ||
339 | for ($i = 0; $i < $articleLinks->length; $i++) | ||
340 | { | ||
341 | $articleLink = $articleLinks->item($i); | ||
342 | $footnoteLink = $articleLink->cloneNode(true); | ||
343 | $refLink = $this->dom->createElement('a'); | ||
344 | $footnote = $this->dom->createElement('li'); | ||
345 | $linkDomain = @parse_url($footnoteLink->getAttribute('href'), PHP_URL_HOST); | ||
346 | if (!$linkDomain && isset($this->url)) $linkDomain = @parse_url($this->url, PHP_URL_HOST); | ||
347 | //linkDomain = footnoteLink.host ? footnoteLink.host : document.location.host, | ||
348 | $linkText = $this->getInnerText($articleLink); | ||
349 | |||
350 | if ((strpos($articleLink->getAttribute('class'), 'readability-DoNotFootnote') !== false) || preg_match($this->regexps['skipFootnoteLink'], $linkText)) { | ||
351 | continue; | ||
352 | } | ||
353 | |||
354 | $linkCount++; | ||
355 | |||
356 | /** Add a superscript reference after the article link */ | ||
357 | $refLink->setAttribute('href', '#readabilityFootnoteLink-' . $linkCount); | ||
358 | $refLink->innerHTML = '<small><sup>[' . $linkCount . ']</sup></small>'; | ||
359 | $refLink->setAttribute('class', 'readability-DoNotFootnote'); | ||
360 | $refLink->setAttribute('style', 'color: inherit;'); | ||
361 | |||
362 | //TODO: does this work or should we use DOMNode.isSameNode()? | ||
363 | if ($articleLink->parentNode->lastChild == $articleLink) { | ||
364 | $articleLink->parentNode->appendChild($refLink); | ||
365 | } else { | ||
366 | $articleLink->parentNode->insertBefore($refLink, $articleLink->nextSibling); | ||
367 | } | ||
368 | |||
369 | $articleLink->setAttribute('style', 'color: inherit; text-decoration: none;'); | ||
370 | $articleLink->setAttribute('name', 'readabilityLink-' . $linkCount); | ||
371 | |||
372 | $footnote->innerHTML = '<small><sup><a href="#readabilityLink-' . $linkCount . '" title="Jump to Link in Article">^</a></sup></small> '; | ||
373 | |||
374 | $footnoteLink->innerHTML = ($footnoteLink->getAttribute('title') != '' ? $footnoteLink->getAttribute('title') : $linkText); | ||
375 | $footnoteLink->setAttribute('name', 'readabilityFootnoteLink-' . $linkCount); | ||
376 | |||
377 | $footnote->appendChild($footnoteLink); | ||
378 | if ($linkDomain) $footnote->innerHTML = $footnote->innerHTML . '<small> (' . $linkDomain . ')</small>'; | ||
379 | |||
380 | $articleFootnotes->appendChild($footnote); | ||
381 | } | ||
382 | |||
383 | if ($linkCount > 0) { | ||
384 | $articleContent->appendChild($footnotesWrapper); | ||
385 | } | ||
386 | } | ||
387 | |||
388 | /** | ||
389 | * Reverts P elements with class 'readability-styled' | ||
390 | * to text nodes - which is what they were before. | ||
391 | * | ||
392 | * @param DOMElement | ||
393 | * @return void | ||
394 | */ | ||
395 | function revertReadabilityStyledElements($articleContent) { | ||
396 | $xpath = new DOMXPath($articleContent->ownerDocument); | ||
397 | $elems = $xpath->query('.//p[@class="readability-styled"]', $articleContent); | ||
398 | //$elems = $articleContent->getElementsByTagName('p'); | ||
399 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
400 | $e = $elems->item($i); | ||
401 | $e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e); | ||
402 | //if ($e->hasAttribute('class') && $e->getAttribute('class') == 'readability-styled') { | ||
403 | // $e->parentNode->replaceChild($this->dom->createTextNode($e->textContent), $e); | ||
404 | //} | ||
405 | } | ||
406 | } | ||
407 | |||
408 | /** | ||
409 | * Prepare the article node for display. Clean out any inline styles, | ||
410 | * iframes, forms, strip extraneous <p> tags, etc. | ||
411 | * | ||
412 | * @param DOMElement | ||
413 | * @return void | ||
414 | */ | ||
415 | function prepArticle($articleContent) { | ||
416 | $this->cleanStyles($articleContent); | ||
417 | $this->killBreaks($articleContent); | ||
418 | if ($this->revertForcedParagraphElements) { | ||
419 | $this->revertReadabilityStyledElements($articleContent); | ||
420 | } | ||
421 | |||
422 | /* Clean out junk from the article content */ | ||
423 | $this->cleanConditionally($articleContent, 'form'); | ||
424 | $this->clean($articleContent, 'object'); | ||
425 | $this->clean($articleContent, 'h1'); | ||
426 | |||
427 | /** | ||
428 | * If there is only one h2, they are probably using it | ||
429 | * as a header and not a subheader, so remove it since we already have a header. | ||
430 | ***/ | ||
431 | if (!$this->lightClean && ($articleContent->getElementsByTagName('h2')->length == 1)) { | ||
432 | $this->clean($articleContent, 'h2'); | ||
433 | } | ||
434 | $this->clean($articleContent, 'iframe'); | ||
435 | |||
436 | $this->cleanHeaders($articleContent); | ||
437 | |||
438 | /* Do these last as the previous stuff may have removed junk that will affect these */ | ||
439 | $this->cleanConditionally($articleContent, 'table'); | ||
440 | $this->cleanConditionally($articleContent, 'ul'); | ||
441 | $this->cleanConditionally($articleContent, 'div'); | ||
442 | |||
443 | /* Remove extra paragraphs */ | ||
444 | $articleParagraphs = $articleContent->getElementsByTagName('p'); | ||
445 | for ($i = $articleParagraphs->length-1; $i >= 0; $i--) | ||
446 | { | ||
447 | $imgCount = $articleParagraphs->item($i)->getElementsByTagName('img')->length; | ||
448 | $embedCount = $articleParagraphs->item($i)->getElementsByTagName('embed')->length; | ||
449 | $objectCount = $articleParagraphs->item($i)->getElementsByTagName('object')->length; | ||
450 | $iframeCount = $articleParagraphs->item($i)->getElementsByTagName('iframe')->length; | ||
451 | |||
452 | if ($imgCount === 0 && $embedCount === 0 && $objectCount === 0 && $iframeCount === 0 && $this->getInnerText($articleParagraphs->item($i), false) == '') | ||
453 | { | ||
454 | $articleParagraphs->item($i)->parentNode->removeChild($articleParagraphs->item($i)); | ||
455 | } | ||
456 | } | ||
457 | |||
458 | try { | ||
459 | $articleContent->innerHTML = preg_replace('/<br[^>]*>\s*<p/i', '<p', $articleContent->innerHTML); | ||
460 | //articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/gi, '<p'); | ||
461 | } | ||
462 | catch (Exception $e) { | ||
463 | $this->dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " . $e); | ||
464 | } | ||
465 | } | ||
466 | |||
467 | /** | ||
468 | * Initialize a node with the readability object. Also checks the | ||
469 | * className/id for special names to add to its score. | ||
470 | * | ||
471 | * @param Element | ||
472 | * @return void | ||
473 | **/ | ||
474 | protected function initializeNode($node) { | ||
475 | $readability = $this->dom->createAttribute('readability'); | ||
476 | $readability->value = 0; // this is our contentScore | ||
477 | $node->setAttributeNode($readability); | ||
478 | |||
479 | switch (strtoupper($node->tagName)) { // unsure if strtoupper is needed, but using it just in case | ||
480 | case 'DIV': | ||
481 | $readability->value += 5; | ||
482 | break; | ||
483 | |||
484 | case 'PRE': | ||
485 | case 'TD': | ||
486 | case 'BLOCKQUOTE': | ||
487 | $readability->value += 3; | ||
488 | break; | ||
489 | |||
490 | case 'ADDRESS': | ||
491 | case 'OL': | ||
492 | case 'UL': | ||
493 | case 'DL': | ||
494 | case 'DD': | ||
495 | case 'DT': | ||
496 | case 'LI': | ||
497 | case 'FORM': | ||
498 | $readability->value -= 3; | ||
499 | break; | ||
500 | |||
501 | case 'H1': | ||
502 | case 'H2': | ||
503 | case 'H3': | ||
504 | case 'H4': | ||
505 | case 'H5': | ||
506 | case 'H6': | ||
507 | case 'TH': | ||
508 | $readability->value -= 5; | ||
509 | break; | ||
510 | } | ||
511 | $readability->value += $this->getClassWeight($node); | ||
512 | } | ||
513 | |||
514 | /*** | ||
515 | * grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is | ||
516 | * most likely to be the stuff a user wants to read. Then return it wrapped up in a div. | ||
517 | * | ||
518 | * @return DOMElement | ||
519 | **/ | ||
520 | protected function grabArticle($page=null) { | ||
521 | $stripUnlikelyCandidates = $this->flagIsActive(self::FLAG_STRIP_UNLIKELYS); | ||
522 | if (!$page) $page = $this->dom; | ||
523 | $allElements = $page->getElementsByTagName('*'); | ||
524 | /** | ||
525 | * First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs | ||
526 | * into P tags where they have been used inappropriately (as in, where they contain no other block level elements.) | ||
527 | * | ||
528 | * Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 | ||
529 | * TODO: Shouldn't this be a reverse traversal? | ||
530 | **/ | ||
531 | $node = null; | ||
532 | $nodesToScore = array(); | ||
533 | for ($nodeIndex = 0; ($node = $allElements->item($nodeIndex)); $nodeIndex++) { | ||
534 | //for ($nodeIndex=$targetList->length-1; $nodeIndex >= 0; $nodeIndex--) { | ||
535 | //$node = $targetList->item($nodeIndex); | ||
536 | $tagName = strtoupper($node->tagName); | ||
537 | /* Remove unlikely candidates */ | ||
538 | if ($stripUnlikelyCandidates) { | ||
539 | $unlikelyMatchString = $node->getAttribute('class') . $node->getAttribute('id'); | ||
540 | if ( | ||
541 | preg_match($this->regexps['unlikelyCandidates'], $unlikelyMatchString) && | ||
542 | !preg_match($this->regexps['okMaybeItsACandidate'], $unlikelyMatchString) && | ||
543 | $tagName != 'BODY' | ||
544 | ) | ||
545 | { | ||
546 | $this->dbg('Removing unlikely candidate - ' . $unlikelyMatchString); | ||
547 | //$nodesToRemove[] = $node; | ||
548 | $node->parentNode->removeChild($node); | ||
549 | $nodeIndex--; | ||
550 | continue; | ||
551 | } | ||
552 | } | ||
553 | |||
554 | if ($tagName == 'P' || $tagName == 'TD' || $tagName == 'PRE') { | ||
555 | $nodesToScore[] = $node; | ||
556 | } | ||
557 | |||
558 | /* Turn all divs that don't have children block level elements into p's */ | ||
559 | if ($tagName == 'DIV') { | ||
560 | if (!preg_match($this->regexps['divToPElements'], $node->innerHTML)) { | ||
561 | //$this->dbg('Altering div to p'); | ||
562 | $newNode = $this->dom->createElement('p'); | ||
563 | try { | ||
564 | $newNode->innerHTML = $node->innerHTML; | ||
565 | //$nodesToReplace[] = array('new'=>$newNode, 'old'=>$node); | ||
566 | $node->parentNode->replaceChild($newNode, $node); | ||
567 | $nodeIndex--; | ||
568 | $nodesToScore[] = $node; // or $newNode? | ||
569 | } | ||
570 | catch(Exception $e) { | ||
571 | $this->dbg('Could not alter div to p, reverting back to div.: ' . $e); | ||
572 | } | ||
573 | } | ||
574 | else | ||
575 | { | ||
576 | /* EXPERIMENTAL */ | ||
577 | // TODO: change these p elements back to text nodes after processing | ||
578 | for ($i = 0, $il = $node->childNodes->length; $i < $il; $i++) { | ||
579 | $childNode = $node->childNodes->item($i); | ||
580 | if ($childNode->nodeType == 3) { // XML_TEXT_NODE | ||
581 | //$this->dbg('replacing text node with a p tag with the same content.'); | ||
582 | $p = $this->dom->createElement('p'); | ||
583 | $p->innerHTML = $childNode->nodeValue; | ||
584 | $p->setAttribute('style', 'display: inline;'); | ||
585 | $p->setAttribute('class', 'readability-styled'); | ||
586 | $childNode->parentNode->replaceChild($p, $childNode); | ||
587 | } | ||
588 | } | ||
589 | } | ||
590 | } | ||
591 | } | ||
592 | |||
593 | /** | ||
594 | * Loop through all paragraphs, and assign a score to them based on how content-y they look. | ||
595 | * Then add their score to their parent node. | ||
596 | * | ||
597 | * A score is determined by things like number of commas, class names, etc. Maybe eventually link density. | ||
598 | **/ | ||
599 | $candidates = array(); | ||
600 | for ($pt=0; $pt < count($nodesToScore); $pt++) { | ||
601 | $parentNode = $nodesToScore[$pt]->parentNode; | ||
602 | // $grandParentNode = $parentNode ? $parentNode->parentNode : null; | ||
603 | $grandParentNode = !$parentNode ? null : (($parentNode->parentNode instanceof DOMElement) ? $parentNode->parentNode : null); | ||
604 | $innerText = $this->getInnerText($nodesToScore[$pt]); | ||
605 | |||
606 | if (!$parentNode || !isset($parentNode->tagName)) { | ||
607 | continue; | ||
608 | } | ||
609 | |||
610 | /* If this paragraph is less than 25 characters, don't even count it. */ | ||
611 | if(strlen($innerText) < 25) { | ||
612 | continue; | ||
613 | } | ||
614 | |||
615 | /* Initialize readability data for the parent. */ | ||
616 | if (!$parentNode->hasAttribute('readability')) | ||
617 | { | ||
618 | $this->initializeNode($parentNode); | ||
619 | $candidates[] = $parentNode; | ||
620 | } | ||
621 | |||
622 | /* Initialize readability data for the grandparent. */ | ||
623 | if ($grandParentNode && !$grandParentNode->hasAttribute('readability') && isset($grandParentNode->tagName)) | ||
624 | { | ||
625 | $this->initializeNode($grandParentNode); | ||
626 | $candidates[] = $grandParentNode; | ||
627 | } | ||
628 | |||
629 | $contentScore = 0; | ||
630 | |||
631 | /* Add a point for the paragraph itself as a base. */ | ||
632 | $contentScore++; | ||
633 | |||
634 | /* Add points for any commas within this paragraph */ | ||
635 | $contentScore += count(explode(',', $innerText)); | ||
636 | |||
637 | /* For every 100 characters in this paragraph, add another point. Up to 3 points. */ | ||
638 | $contentScore += min(floor(strlen($innerText) / 100), 3); | ||
639 | |||
640 | /* Add the score to the parent. The grandparent gets half. */ | ||
641 | $parentNode->getAttributeNode('readability')->value += $contentScore; | ||
642 | |||
643 | if ($grandParentNode) { | ||
644 | $grandParentNode->getAttributeNode('readability')->value += $contentScore/2; | ||
645 | } | ||
646 | } | ||
647 | |||
648 | /** | ||
649 | * After we've calculated scores, loop through all of the possible candidate nodes we found | ||
650 | * and find the one with the highest score. | ||
651 | **/ | ||
652 | $topCandidate = null; | ||
653 | for ($c=0, $cl=count($candidates); $c < $cl; $c++) | ||
654 | { | ||
655 | /** | ||
656 | * Scale the final candidates score based on link density. Good content should have a | ||
657 | * relatively small link density (5% or less) and be mostly unaffected by this operation. | ||
658 | **/ | ||
659 | $readability = $candidates[$c]->getAttributeNode('readability'); | ||
660 | $readability->value = $readability->value * (1-$this->getLinkDensity($candidates[$c])); | ||
661 | |||
662 | $this->dbg('Candidate: ' . $candidates[$c]->tagName . ' (' . $candidates[$c]->getAttribute('class') . ':' . $candidates[$c]->getAttribute('id') . ') with score ' . $readability->value); | ||
663 | |||
664 | if (!$topCandidate || $readability->value > (int)$topCandidate->getAttribute('readability')) { | ||
665 | $topCandidate = $candidates[$c]; | ||
666 | } | ||
667 | } | ||
668 | |||
669 | /** | ||
670 | * If we still have no top candidate, just use the body as a last resort. | ||
671 | * We also have to copy the body node so it is something we can modify. | ||
672 | **/ | ||
673 | if ($topCandidate === null || strtoupper($topCandidate->tagName) == 'BODY') | ||
674 | { | ||
675 | $topCandidate = $this->dom->createElement('div'); | ||
676 | if ($page instanceof DOMDocument) { | ||
677 | if (!isset($page->documentElement)) { | ||
678 | // we don't have a body either? what a mess! :) | ||
679 | } else { | ||
680 | $topCandidate->innerHTML = $page->documentElement->innerHTML; | ||
681 | $page->documentElement->innerHTML = ''; | ||
682 | $page->documentElement->appendChild($topCandidate); | ||
683 | } | ||
684 | } else { | ||
685 | $topCandidate->innerHTML = $page->innerHTML; | ||
686 | $page->innerHTML = ''; | ||
687 | $page->appendChild($topCandidate); | ||
688 | } | ||
689 | $this->initializeNode($topCandidate); | ||
690 | } | ||
691 | |||
692 | /** | ||
693 | * Now that we have the top candidate, look through its siblings for content that might also be related. | ||
694 | * Things like preambles, content split by ads that we removed, etc. | ||
695 | **/ | ||
696 | $articleContent = $this->dom->createElement('div'); | ||
697 | $articleContent->setAttribute('id', 'readability-content'); | ||
698 | $siblingScoreThreshold = max(10, ((int)$topCandidate->getAttribute('readability')) * 0.2); | ||
699 | $siblingNodes = $topCandidate->parentNode->childNodes; | ||
700 | if (!isset($siblingNodes)) { | ||
701 | $siblingNodes = new stdClass; | ||
702 | $siblingNodes->length = 0; | ||
703 | } | ||
704 | |||
705 | for ($s=0, $sl=$siblingNodes->length; $s < $sl; $s++) | ||
706 | { | ||
707 | $siblingNode = $siblingNodes->item($s); | ||
708 | $append = false; | ||
709 | |||
710 | $this->dbg('Looking at sibling node: ' . $siblingNode->nodeName . (($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability')) ? (' with score ' . $siblingNode->getAttribute('readability')) : '')); | ||
711 | |||
712 | //dbg('Sibling has score ' . ($siblingNode->readability ? siblingNode.readability.contentScore : 'Unknown')); | ||
713 | |||
714 | if ($siblingNode === $topCandidate) | ||
715 | // or if ($siblingNode->isSameNode($topCandidate)) | ||
716 | { | ||
717 | $append = true; | ||
718 | } | ||
719 | |||
720 | $contentBonus = 0; | ||
721 | /* Give a bonus if sibling nodes and top candidates have the example same classname */ | ||
722 | if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->getAttribute('class') == $topCandidate->getAttribute('class') && $topCandidate->getAttribute('class') != '') { | ||
723 | $contentBonus += ((int)$topCandidate->getAttribute('readability')) * 0.2; | ||
724 | } | ||
725 | |||
726 | if ($siblingNode->nodeType === XML_ELEMENT_NODE && $siblingNode->hasAttribute('readability') && (((int)$siblingNode->getAttribute('readability')) + $contentBonus) >= $siblingScoreThreshold) | ||
727 | { | ||
728 | $append = true; | ||
729 | } | ||
730 | |||
731 | if (strtoupper($siblingNode->nodeName) == 'P') { | ||
732 | $linkDensity = $this->getLinkDensity($siblingNode); | ||
733 | $nodeContent = $this->getInnerText($siblingNode); | ||
734 | $nodeLength = strlen($nodeContent); | ||
735 | |||
736 | if ($nodeLength > 80 && $linkDensity < 0.25) | ||
737 | { | ||
738 | $append = true; | ||
739 | } | ||
740 | else if ($nodeLength < 80 && $linkDensity === 0 && preg_match('/\.( |$)/', $nodeContent)) | ||
741 | { | ||
742 | $append = true; | ||
743 | } | ||
744 | } | ||
745 | |||
746 | if ($append) | ||
747 | { | ||
748 | $this->dbg('Appending node: ' . $siblingNode->nodeName); | ||
749 | |||
750 | $nodeToAppend = null; | ||
751 | $sibNodeName = strtoupper($siblingNode->nodeName); | ||
752 | if ($sibNodeName != 'DIV' && $sibNodeName != 'P') { | ||
753 | /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */ | ||
754 | |||
755 | $this->dbg('Altering siblingNode of ' . $sibNodeName . ' to div.'); | ||
756 | $nodeToAppend = $this->dom->createElement('div'); | ||
757 | try { | ||
758 | $nodeToAppend->setAttribute('id', $siblingNode->getAttribute('id')); | ||
759 | $nodeToAppend->innerHTML = $siblingNode->innerHTML; | ||
760 | } | ||
761 | catch(Exception $e) | ||
762 | { | ||
763 | $this->dbg('Could not alter siblingNode to div, reverting back to original.'); | ||
764 | $nodeToAppend = $siblingNode; | ||
765 | $s--; | ||
766 | $sl--; | ||
767 | } | ||
768 | } else { | ||
769 | $nodeToAppend = $siblingNode; | ||
770 | $s--; | ||
771 | $sl--; | ||
772 | } | ||
773 | |||
774 | /* To ensure a node does not interfere with readability styles, remove its classnames */ | ||
775 | $nodeToAppend->removeAttribute('class'); | ||
776 | |||
777 | /* Append sibling and subtract from our list because it removes the node when you append to another node */ | ||
778 | $articleContent->appendChild($nodeToAppend); | ||
779 | } | ||
780 | } | ||
781 | |||
782 | /** | ||
783 | * So we have all of the content that we need. Now we clean it up for presentation. | ||
784 | **/ | ||
785 | $this->prepArticle($articleContent); | ||
786 | |||
787 | /** | ||
788 | * Now that we've gone through the full algorithm, check to see if we got any meaningful content. | ||
789 | * If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher | ||
790 | * likelihood of finding the content, and the sieve approach gives us a higher likelihood of | ||
791 | * finding the -right- content. | ||
792 | **/ | ||
793 | if (strlen($this->getInnerText($articleContent, false)) < 250) | ||
794 | { | ||
795 | // TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7 | ||
796 | // in the meantime, we check and create an empty element if it's not there. | ||
797 | if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body'); | ||
798 | $this->body->innerHTML = $this->bodyCache; | ||
799 | |||
800 | if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) { | ||
801 | $this->removeFlag(self::FLAG_STRIP_UNLIKELYS); | ||
802 | return $this->grabArticle($this->body); | ||
803 | } | ||
804 | else if ($this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) { | ||
805 | $this->removeFlag(self::FLAG_WEIGHT_CLASSES); | ||
806 | return $this->grabArticle($this->body); | ||
807 | } | ||
808 | else if ($this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { | ||
809 | $this->removeFlag(self::FLAG_CLEAN_CONDITIONALLY); | ||
810 | return $this->grabArticle($this->body); | ||
811 | } | ||
812 | else { | ||
813 | return false; | ||
814 | } | ||
815 | } | ||
816 | return $articleContent; | ||
817 | } | ||
818 | |||
819 | /** | ||
820 | * Remove script tags from document | ||
821 | * | ||
822 | * @param DOMElement | ||
823 | * @return void | ||
824 | */ | ||
825 | public function removeScripts($doc) { | ||
826 | $scripts = $doc->getElementsByTagName('script'); | ||
827 | for($i = $scripts->length-1; $i >= 0; $i--) | ||
828 | { | ||
829 | $scripts->item($i)->parentNode->removeChild($scripts->item($i)); | ||
830 | } | ||
831 | } | ||
832 | |||
833 | /** | ||
834 | * Get the inner text of a node. | ||
835 | * This also strips out any excess whitespace to be found. | ||
836 | * | ||
837 | * @param DOMElement $ | ||
838 | * @param boolean $normalizeSpaces (default: true) | ||
839 | * @return string | ||
840 | **/ | ||
841 | public function getInnerText($e, $normalizeSpaces=true) { | ||
842 | $textContent = ''; | ||
843 | |||
844 | if (!isset($e->textContent) || $e->textContent == '') { | ||
845 | return ''; | ||
846 | } | ||
847 | |||
848 | $textContent = trim($e->textContent); | ||
849 | |||
850 | if ($normalizeSpaces) { | ||
851 | return preg_replace($this->regexps['normalize'], ' ', $textContent); | ||
852 | } else { | ||
853 | return $textContent; | ||
854 | } | ||
855 | } | ||
856 | |||
857 | /** | ||
858 | * Get the number of times a string $s appears in the node $e. | ||
859 | * | ||
860 | * @param DOMElement $e | ||
861 | * @param string - what to count. Default is "," | ||
862 | * @return number (integer) | ||
863 | **/ | ||
864 | public function getCharCount($e, $s=',') { | ||
865 | return substr_count($this->getInnerText($e), $s); | ||
866 | } | ||
867 | |||
868 | /** | ||
869 | * Remove the style attribute on every $e and under. | ||
870 | * | ||
871 | * @param DOMElement $e | ||
872 | * @return void | ||
873 | */ | ||
874 | public function cleanStyles($e) { | ||
875 | if (!is_object($e)) return; | ||
876 | $elems = $e->getElementsByTagName('*'); | ||
877 | foreach ($elems as $elem) { | ||
878 | $elem->removeAttribute('style'); | ||
879 | } | ||
880 | } | ||
881 | |||
882 | /** | ||
883 | * Get the density of links as a percentage of the content | ||
884 | * This is the amount of text that is inside a link divided by the total text in the node. | ||
885 | * | ||
886 | * @param DOMElement $e | ||
887 | * @return number (float) | ||
888 | */ | ||
889 | public function getLinkDensity($e) { | ||
890 | $links = $e->getElementsByTagName('a'); | ||
891 | $textLength = strlen($this->getInnerText($e)); | ||
892 | $linkLength = 0; | ||
893 | for ($i=0, $il=$links->length; $i < $il; $i++) | ||
894 | { | ||
895 | $linkLength += strlen($this->getInnerText($links->item($i))); | ||
896 | } | ||
897 | if ($textLength > 0) { | ||
898 | return $linkLength / $textLength; | ||
899 | } else { | ||
900 | return 0; | ||
901 | } | ||
902 | } | ||
903 | |||
904 | /** | ||
905 | * Get an elements class/id weight. Uses regular expressions to tell if this | ||
906 | * element looks good or bad. | ||
907 | * | ||
908 | * @param DOMElement $e | ||
909 | * @return number (Integer) | ||
910 | */ | ||
911 | public function getClassWeight($e) { | ||
912 | if(!$this->flagIsActive(self::FLAG_WEIGHT_CLASSES)) { | ||
913 | return 0; | ||
914 | } | ||
915 | |||
916 | $weight = 0; | ||
917 | |||
918 | /* Look for a special classname */ | ||
919 | if ($e->hasAttribute('class') && $e->getAttribute('class') != '') | ||
920 | { | ||
921 | if (preg_match($this->regexps['negative'], $e->getAttribute('class'))) { | ||
922 | $weight -= 25; | ||
923 | } | ||
924 | if (preg_match($this->regexps['positive'], $e->getAttribute('class'))) { | ||
925 | $weight += 25; | ||
926 | } | ||
927 | } | ||
928 | |||
929 | /* Look for a special ID */ | ||
930 | if ($e->hasAttribute('id') && $e->getAttribute('id') != '') | ||
931 | { | ||
932 | if (preg_match($this->regexps['negative'], $e->getAttribute('id'))) { | ||
933 | $weight -= 25; | ||
934 | } | ||
935 | if (preg_match($this->regexps['positive'], $e->getAttribute('id'))) { | ||
936 | $weight += 25; | ||
937 | } | ||
938 | } | ||
939 | return $weight; | ||
940 | } | ||
941 | |||
942 | /** | ||
943 | * Remove extraneous break tags from a node. | ||
944 | * | ||
945 | * @param DOMElement $node | ||
946 | * @return void | ||
947 | */ | ||
948 | public function killBreaks($node) { | ||
949 | $html = $node->innerHTML; | ||
950 | $html = preg_replace($this->regexps['killBreaks'], '<br />', $html); | ||
951 | $node->innerHTML = $html; | ||
952 | } | ||
953 | |||
954 | /** | ||
955 | * Clean a node of all elements of type "tag". | ||
956 | * (Unless it's a youtube/vimeo video. People love movies.) | ||
957 | * | ||
958 | * Updated 2012-09-18 to preserve youtube/vimeo iframes | ||
959 | * | ||
960 | * @param DOMElement $e | ||
961 | * @param string $tag | ||
962 | * @return void | ||
963 | */ | ||
964 | public function clean($e, $tag) { | ||
965 | $targetList = $e->getElementsByTagName($tag); | ||
966 | $isEmbed = ($tag == 'iframe' || $tag == 'object' || $tag == 'embed'); | ||
967 | |||
968 | for ($y=$targetList->length-1; $y >= 0; $y--) { | ||
969 | /* Allow youtube and vimeo videos through as people usually want to see those. */ | ||
970 | if ($isEmbed) { | ||
971 | $attributeValues = ''; | ||
972 | for ($i=0, $il=$targetList->item($y)->attributes->length; $i < $il; $i++) { | ||
973 | $attributeValues .= $targetList->item($y)->attributes->item($i)->value . '|'; // DOMAttr? (TODO: test) | ||
974 | } | ||
975 | |||
976 | /* First, check the elements attributes to see if any of them contain youtube or vimeo */ | ||
977 | if (preg_match($this->regexps['video'], $attributeValues)) { | ||
978 | continue; | ||
979 | } | ||
980 | |||
981 | /* Then check the elements inside this element for the same. */ | ||
982 | if (preg_match($this->regexps['video'], $targetList->item($y)->innerHTML)) { | ||
983 | continue; | ||
984 | } | ||
985 | } | ||
986 | $targetList->item($y)->parentNode->removeChild($targetList->item($y)); | ||
987 | } | ||
988 | } | ||
989 | |||
990 | /** | ||
991 | * Clean an element of all tags of type "tag" if they look fishy. | ||
992 | * "Fishy" is an algorithm based on content length, classnames, | ||
993 | * link density, number of images & embeds, etc. | ||
994 | * | ||
995 | * @param DOMElement $e | ||
996 | * @param string $tag | ||
997 | * @return void | ||
998 | */ | ||
999 | public function cleanConditionally($e, $tag) { | ||
1000 | if (!$this->flagIsActive(self::FLAG_CLEAN_CONDITIONALLY)) { | ||
1001 | return; | ||
1002 | } | ||
1003 | |||
1004 | $tagsList = $e->getElementsByTagName($tag); | ||
1005 | $curTagsLength = $tagsList->length; | ||
1006 | |||
1007 | /** | ||
1008 | * Gather counts for other typical elements embedded within. | ||
1009 | * Traverse backwards so we can remove nodes at the same time without effecting the traversal. | ||
1010 | * | ||
1011 | * TODO: Consider taking into account original contentScore here. | ||
1012 | */ | ||
1013 | for ($i=$curTagsLength-1; $i >= 0; $i--) { | ||
1014 | $weight = $this->getClassWeight($tagsList->item($i)); | ||
1015 | $contentScore = ($tagsList->item($i)->hasAttribute('readability')) ? (int)$tagsList->item($i)->getAttribute('readability') : 0; | ||
1016 | |||
1017 | $this->dbg('Cleaning Conditionally ' . $tagsList->item($i)->tagName . ' (' . $tagsList->item($i)->getAttribute('class') . ':' . $tagsList->item($i)->getAttribute('id') . ')' . (($tagsList->item($i)->hasAttribute('readability')) ? (' with score ' . $tagsList->item($i)->getAttribute('readability')) : '')); | ||
1018 | |||
1019 | if ($weight + $contentScore < 0) { | ||
1020 | $tagsList->item($i)->parentNode->removeChild($tagsList->item($i)); | ||
1021 | } | ||
1022 | else if ( $this->getCharCount($tagsList->item($i), ',') < 10) { | ||
1023 | /** | ||
1024 | * If there are not very many commas, and the number of | ||
1025 | * non-paragraph elements is more than paragraphs or other ominous signs, remove the element. | ||
1026 | **/ | ||
1027 | $p = $tagsList->item($i)->getElementsByTagName('p')->length; | ||
1028 | $img = $tagsList->item($i)->getElementsByTagName('img')->length; | ||
1029 | $li = $tagsList->item($i)->getElementsByTagName('li')->length-100; | ||
1030 | $input = $tagsList->item($i)->getElementsByTagName('input')->length; | ||
1031 | $a = $tagsList->item($i)->getElementsByTagName('a')->length; | ||
1032 | |||
1033 | $embedCount = 0; | ||
1034 | $embeds = $tagsList->item($i)->getElementsByTagName('embed'); | ||
1035 | for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) { | ||
1036 | if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) { | ||
1037 | $embedCount++; | ||
1038 | } | ||
1039 | } | ||
1040 | $embeds = $tagsList->item($i)->getElementsByTagName('iframe'); | ||
1041 | for ($ei=0, $il=$embeds->length; $ei < $il; $ei++) { | ||
1042 | if (preg_match($this->regexps['video'], $embeds->item($ei)->getAttribute('src'))) { | ||
1043 | $embedCount++; | ||
1044 | } | ||
1045 | } | ||
1046 | |||
1047 | $linkDensity = $this->getLinkDensity($tagsList->item($i)); | ||
1048 | $contentLength = strlen($this->getInnerText($tagsList->item($i))); | ||
1049 | $toRemove = false; | ||
1050 | |||
1051 | if ($this->lightClean) { | ||
1052 | $this->dbg('Light clean...'); | ||
1053 | if ( ($img > $p) && ($img > 4) ) { | ||
1054 | $this->dbg(' more than 4 images and more image elements than paragraph elements'); | ||
1055 | $toRemove = true; | ||
1056 | } else if ($li > $p && $tag != 'ul' && $tag != 'ol') { | ||
1057 | $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>'); | ||
1058 | $toRemove = true; | ||
1059 | } else if ( $input > floor($p/3) ) { | ||
1060 | $this->dbg(' too many <input> elements'); | ||
1061 | $toRemove = true; | ||
1062 | } else if ($contentLength < 25 && ($embedCount === 0 && ($img === 0 || $img > 2))) { | ||
1063 | $this->dbg(' content length less than 25 chars, 0 embeds and either 0 images or more than 2 images'); | ||
1064 | $toRemove = true; | ||
1065 | } else if($weight < 25 && $linkDensity > 0.2) { | ||
1066 | $this->dbg(' weight smaller than 25 and link density above 0.2'); | ||
1067 | $toRemove = true; | ||
1068 | } else if($a > 2 && ($weight >= 25 && $linkDensity > 0.5)) { | ||
1069 | $this->dbg(' more than 2 links and weight above 25 but link density greater than 0.5'); | ||
1070 | $toRemove = true; | ||
1071 | } else if($embedCount > 3) { | ||
1072 | $this->dbg(' more than 3 embeds'); | ||
1073 | $toRemove = true; | ||
1074 | } | ||
1075 | } else { | ||
1076 | $this->dbg('Standard clean...'); | ||
1077 | if ( $img > $p ) { | ||
1078 | $this->dbg(' more image elements than paragraph elements'); | ||
1079 | $toRemove = true; | ||
1080 | } else if ($li > $p && $tag != 'ul' && $tag != 'ol') { | ||
1081 | $this->dbg(' too many <li> elements, and parent is not <ul> or <ol>'); | ||
1082 | $toRemove = true; | ||
1083 | } else if ( $input > floor($p/3) ) { | ||
1084 | $this->dbg(' too many <input> elements'); | ||
1085 | $toRemove = true; | ||
1086 | } else if ($contentLength < 25 && ($img === 0 || $img > 2) ) { | ||
1087 | $this->dbg(' content length less than 25 chars and 0 images, or more than 2 images'); | ||
1088 | $toRemove = true; | ||
1089 | } else if($weight < 25 && $linkDensity > 0.2) { | ||
1090 | $this->dbg(' weight smaller than 25 and link density above 0.2'); | ||
1091 | $toRemove = true; | ||
1092 | } else if($weight >= 25 && $linkDensity > 0.5) { | ||
1093 | $this->dbg(' weight above 25 but link density greater than 0.5'); | ||
1094 | $toRemove = true; | ||
1095 | } else if(($embedCount == 1 && $contentLength < 75) || $embedCount > 1) { | ||
1096 | $this->dbg(' 1 embed and content length smaller than 75 chars, or more than one embed'); | ||
1097 | $toRemove = true; | ||
1098 | } | ||
1099 | } | ||
1100 | |||
1101 | if ($toRemove) { | ||
1102 | //$this->dbg('Removing: '.$tagsList->item($i)->innerHTML); | ||
1103 | $tagsList->item($i)->parentNode->removeChild($tagsList->item($i)); | ||
1104 | } | ||
1105 | } | ||
1106 | } | ||
1107 | } | ||
1108 | |||
1109 | /** | ||
1110 | * Clean out spurious headers from an Element. Checks things like classnames and link density. | ||
1111 | * | ||
1112 | * @param DOMElement $e | ||
1113 | * @return void | ||
1114 | */ | ||
1115 | public function cleanHeaders($e) { | ||
1116 | for ($headerIndex = 1; $headerIndex < 3; $headerIndex++) { | ||
1117 | $headers = $e->getElementsByTagName('h' . $headerIndex); | ||
1118 | for ($i=$headers->length-1; $i >=0; $i--) { | ||
1119 | if ($this->getClassWeight($headers->item($i)) < 0 || $this->getLinkDensity($headers->item($i)) > 0.33) { | ||
1120 | $headers->item($i)->parentNode->removeChild($headers->item($i)); | ||
1121 | } | ||
1122 | } | ||
1123 | } | ||
1124 | } | ||
1125 | |||
1126 | public function flagIsActive($flag) { | ||
1127 | return ($this->flags & $flag) > 0; | ||
1128 | } | ||
1129 | |||
1130 | public function addFlag($flag) { | ||
1131 | $this->flags = $this->flags | $flag; | ||
1132 | } | ||
1133 | |||
1134 | public function removeFlag($flag) { | ||
1135 | $this->flags = $this->flags & ~$flag; | ||
1136 | } | ||
1137 | } | ||
1138 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/simplepie/LICENSE.txt b/inc/3rdparty/libraries/simplepie/LICENSE.txt index a822a4bd..a822a4bd 100644 --- a/inc/3rdparty/simplepie/LICENSE.txt +++ b/inc/3rdparty/libraries/simplepie/LICENSE.txt | |||
diff --git a/inc/3rdparty/simplepie/SimplePieAutoloader.php b/inc/3rdparty/libraries/simplepie/autoloader.php index b2654dc5..c16a8f8b 100644 --- a/inc/3rdparty/simplepie/SimplePieAutoloader.php +++ b/inc/3rdparty/libraries/simplepie/autoloader.php | |||
@@ -33,44 +33,50 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | ||
47 | // autoloader | 46 | // autoloader |
48 | spl_autoload_register(array(new SimplePie_Autoloader(), 'autoload')); | 47 | spl_autoload_register(array(new SimplePie_Autoloader(), 'autoload')); |
49 | 48 | ||
49 | if (!class_exists('SimplePie')) | ||
50 | { | ||
51 | trigger_error('Autoloader not registered properly', E_USER_ERROR); | ||
52 | } | ||
53 | |||
50 | /** | 54 | /** |
51 | * SimplePie Autoloader class. | 55 | * Autoloader class |
52 | * | 56 | * |
53 | * @package SimplePie | 57 | * @package SimplePie |
58 | * @subpackage API | ||
54 | */ | 59 | */ |
55 | class SimplePie_Autoloader | 60 | class SimplePie_Autoloader |
56 | { | 61 | { |
57 | /** | 62 | /** |
58 | * Constructor. | 63 | * Constructor |
59 | */ | 64 | */ |
60 | public function __construct() | 65 | public function __construct() |
61 | { | 66 | { |
62 | $this->path = dirname(__FILE__); | 67 | $this->path = dirname(__FILE__) . DIRECTORY_SEPARATOR . 'library'; |
63 | } | 68 | } |
64 | 69 | ||
65 | /** | 70 | /** |
66 | * Autoloader. | 71 | * Autoloader |
67 | * | 72 | * |
68 | * @param string $class The name of the class to attempt to load. | 73 | * @param string $class The name of the class to attempt to load. |
69 | */ | 74 | */ |
70 | public function autoload($class) | 75 | public function autoload($class) |
71 | { | 76 | { |
72 | // see if this request should be handled by this autoloader | 77 | // Only load the class if it starts with "SimplePie" |
73 | if (strpos($class, 'SimplePie') !== 0) { | 78 | if (strpos($class, 'SimplePie') !== 0) |
79 | { | ||
74 | return; | 80 | return; |
75 | } | 81 | } |
76 | 82 | ||
diff --git a/inc/3rdparty/simplepie/SimplePie/Core.php b/inc/3rdparty/libraries/simplepie/library/SimplePie.php index 82a30f2b..b33c635f 100644 --- a/inc/3rdparty/simplepie/SimplePie/Core.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,14 +33,13 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | /** | 45 | /** |
@@ -51,7 +50,7 @@ define('SIMPLEPIE_NAME', 'SimplePie'); | |||
51 | /** | 50 | /** |
52 | * SimplePie Version | 51 | * SimplePie Version |
53 | */ | 52 | */ |
54 | define('SIMPLEPIE_VERSION', '1.3-dev'); | 53 | define('SIMPLEPIE_VERSION', '1.3.1'); |
55 | 54 | ||
56 | /** | 55 | /** |
57 | * SimplePie Build | 56 | * SimplePie Build |
@@ -374,11 +373,6 @@ define('SIMPLEPIE_NAMESPACE_XHTML', 'http://www.w3.org/1999/xhtml'); | |||
374 | define('SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY', 'http://www.iana.org/assignments/relation/'); | 373 | define('SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY', 'http://www.iana.org/assignments/relation/'); |
375 | 374 | ||
376 | /** | 375 | /** |
377 | * Whether we're running on PHP5 | ||
378 | */ | ||
379 | define('SIMPLEPIE_PHP5', version_compare(PHP_VERSION, '5.0.0', '>=')); | ||
380 | |||
381 | /** | ||
382 | * No file source | 376 | * No file source |
383 | */ | 377 | */ |
384 | define('SIMPLEPIE_FILE_SOURCE_NONE', 0); | 378 | define('SIMPLEPIE_FILE_SOURCE_NONE', 0); |
@@ -414,8 +408,9 @@ define('SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS', 16); | |||
414 | * SimplePie | 408 | * SimplePie |
415 | * | 409 | * |
416 | * @package SimplePie | 410 | * @package SimplePie |
411 | * @subpackage API | ||
417 | */ | 412 | */ |
418 | class SimplePie_Core | 413 | class SimplePie |
419 | { | 414 | { |
420 | /** | 415 | /** |
421 | * @var array Raw data | 416 | * @var array Raw data |
@@ -488,13 +483,6 @@ class SimplePie_Core | |||
488 | public $force_feed = false; | 483 | public $force_feed = false; |
489 | 484 | ||
490 | /** | 485 | /** |
491 | * @var bool Enable/Disable XML dump | ||
492 | * @see SimplePie::enable_xml_dump() | ||
493 | * @access private | ||
494 | */ | ||
495 | public $xml_dump = false; | ||
496 | |||
497 | /** | ||
498 | * @var bool Enable/Disable Caching | 486 | * @var bool Enable/Disable Caching |
499 | * @see SimplePie::enable_cache() | 487 | * @see SimplePie::enable_cache() |
500 | * @access private | 488 | * @access private |
@@ -552,109 +540,11 @@ class SimplePie_Core | |||
552 | public $autodiscovery = SIMPLEPIE_LOCATOR_ALL; | 540 | public $autodiscovery = SIMPLEPIE_LOCATOR_ALL; |
553 | 541 | ||
554 | /** | 542 | /** |
555 | * @var string Class used for caching feeds | 543 | * Class registry object |
556 | * @see SimplePie::set_cache_class() | 544 | * |
557 | * @access private | 545 | * @var SimplePie_Registry |
558 | */ | ||
559 | public $cache_class = 'SimplePie_Cache'; | ||
560 | |||
561 | /** | ||
562 | * @var string Class used for locating feeds | ||
563 | * @see SimplePie::set_locator_class() | ||
564 | * @access private | ||
565 | */ | ||
566 | public $locator_class = 'SimplePie_Locator'; | ||
567 | |||
568 | /** | ||
569 | * @var string Class used for parsing feeds | ||
570 | * @see SimplePie::set_parser_class() | ||
571 | * @access private | ||
572 | */ | ||
573 | public $parser_class = 'SimplePie_Parser'; | ||
574 | |||
575 | /** | ||
576 | * @var string Class used for fetching feeds | ||
577 | * @see SimplePie::set_file_class() | ||
578 | * @access private | ||
579 | */ | ||
580 | public $file_class = 'SimplePie_File'; | ||
581 | |||
582 | /** | ||
583 | * @var string Class used for items | ||
584 | * @see SimplePie::set_item_class() | ||
585 | * @access private | ||
586 | */ | ||
587 | public $item_class = 'SimplePie_Item'; | ||
588 | |||
589 | /** | ||
590 | * @var string Class used for authors | ||
591 | * @see SimplePie::set_author_class() | ||
592 | * @access private | ||
593 | */ | ||
594 | public $author_class = 'SimplePie_Author'; | ||
595 | |||
596 | /** | ||
597 | * @var string Class used for categories | ||
598 | * @see SimplePie::set_category_class() | ||
599 | * @access private | ||
600 | */ | ||
601 | public $category_class = 'SimplePie_Category'; | ||
602 | |||
603 | /** | ||
604 | * @var string Class used for enclosures | ||
605 | * @see SimplePie::set_enclosures_class() | ||
606 | * @access private | ||
607 | */ | ||
608 | public $enclosure_class = 'SimplePie_Enclosure'; | ||
609 | |||
610 | /** | ||
611 | * @var string Class used for Media RSS <media:text> captions | ||
612 | * @see SimplePie::set_caption_class() | ||
613 | * @access private | ||
614 | */ | ||
615 | public $caption_class = 'SimplePie_Caption'; | ||
616 | |||
617 | /** | ||
618 | * @var string Class used for Media RSS <media:copyright> | ||
619 | * @see SimplePie::set_copyright_class() | ||
620 | * @access private | ||
621 | */ | ||
622 | public $copyright_class = 'SimplePie_Copyright'; | ||
623 | |||
624 | /** | ||
625 | * @var string Class used for Media RSS <media:credit> | ||
626 | * @see SimplePie::set_credit_class() | ||
627 | * @access private | ||
628 | */ | ||
629 | public $credit_class = 'SimplePie_Credit'; | ||
630 | |||
631 | /** | ||
632 | * @var string Class used for Media RSS <media:rating> | ||
633 | * @see SimplePie::set_rating_class() | ||
634 | * @access private | ||
635 | */ | ||
636 | public $rating_class = 'SimplePie_Rating'; | ||
637 | |||
638 | /** | ||
639 | * @var string Class used for Media RSS <media:restriction> | ||
640 | * @see SimplePie::set_restriction_class() | ||
641 | * @access private | ||
642 | */ | ||
643 | public $restriction_class = 'SimplePie_Restriction'; | ||
644 | |||
645 | /** | ||
646 | * @var string Class used for content-type sniffing | ||
647 | * @see SimplePie::set_content_type_sniffer_class() | ||
648 | * @access private | ||
649 | */ | ||
650 | public $content_type_sniffer_class = 'SimplePie_Content_Type_Sniffer'; | ||
651 | |||
652 | /** | ||
653 | * @var string Class used for item sources. | ||
654 | * @see SimplePie::set_source_class() | ||
655 | * @access private | ||
656 | */ | 546 | */ |
657 | public $source_class = 'SimplePie_Source'; | 547 | public $registry; |
658 | 548 | ||
659 | /** | 549 | /** |
660 | * @var int Maximum number of feeds to check with autodiscovery | 550 | * @var int Maximum number of feeds to check with autodiscovery |
@@ -721,35 +611,44 @@ class SimplePie_Core | |||
721 | /** | 611 | /** |
722 | * The SimplePie class contains feed level data and options | 612 | * The SimplePie class contains feed level data and options |
723 | * | 613 | * |
724 | * There are two ways that you can create a new SimplePie object. The first | 614 | * To use SimplePie, create the SimplePie object with no parameters. You can |
725 | * is by passing a feed URL as a parameter to the SimplePie constructor | 615 | * then set configuration options using the provided methods. After setting |
726 | * (as well as optionally setting the cache location and cache expiry). This | ||
727 | * will initialise the whole feed with all of the default settings, and you | ||
728 | * can begin accessing methods and properties immediately. | ||
729 | * | ||
730 | * The second way is to create the SimplePie object with no parameters | ||
731 | * at all. This will enable you to set configuration options. After setting | ||
732 | * them, you must initialise the feed using $feed->init(). At that point the | 616 | * them, you must initialise the feed using $feed->init(). At that point the |
733 | * object's methods and properties will be available to you. This format is | 617 | * object's methods and properties will be available to you. |
734 | * what is used throughout this documentation. | 618 | * |
619 | * Previously, it was possible to pass in the feed URL along with cache | ||
620 | * options directly into the constructor. This has been removed as of 1.3 as | ||
621 | * it caused a lot of confusion. | ||
735 | * | 622 | * |
736 | * @access public | ||
737 | * @since 1.0 Preview Release | 623 | * @since 1.0 Preview Release |
738 | */ | 624 | */ |
739 | public function __construct() | 625 | public function __construct() |
740 | { | 626 | { |
741 | if (version_compare(PHP_VERSION, '5.0', '<')) | 627 | if (version_compare(PHP_VERSION, '5.2', '<')) |
742 | { | 628 | { |
743 | trigger_error('PHP 4.x is no longer supported. Please upgrade to PHP 5.2 or newer.'); | 629 | trigger_error('PHP 4.x, 5.0 and 5.1 are no longer supported. Please upgrade to PHP 5.2 or newer.'); |
744 | die(); | 630 | die(); |
745 | } | 631 | } |
746 | 632 | ||
747 | // Other objects, instances created here so we can set options on them | 633 | // Other objects, instances created here so we can set options on them |
748 | $this->sanitize = new SimplePie_Sanitize(); | 634 | $this->sanitize = new SimplePie_Sanitize(); |
635 | $this->registry = new SimplePie_Registry(); | ||
749 | 636 | ||
750 | if (func_num_args() > 0) | 637 | if (func_num_args() > 0) |
751 | { | 638 | { |
752 | trigger_error('Passing parameters to the constructor is no longer supported. Please use set_feed_url(), set_cache_location(), and set_cache_location() directly.'); | 639 | $level = defined('E_USER_DEPRECATED') ? E_USER_DEPRECATED : E_USER_WARNING; |
640 | trigger_error('Passing parameters to the constructor is no longer supported. Please use set_feed_url(), set_cache_location(), and set_cache_location() directly.', $level); | ||
641 | |||
642 | $args = func_get_args(); | ||
643 | switch (count($args)) { | ||
644 | case 3: | ||
645 | $this->set_cache_duration($args[2]); | ||
646 | case 2: | ||
647 | $this->set_cache_location($args[1]); | ||
648 | case 1: | ||
649 | $this->set_feed_url($args[0]); | ||
650 | $this->init(); | ||
651 | } | ||
753 | } | 652 | } |
754 | } | 653 | } |
755 | 654 | ||
@@ -788,10 +687,11 @@ class SimplePie_Core | |||
788 | } | 687 | } |
789 | 688 | ||
790 | /** | 689 | /** |
791 | * Force the given data/URL to be treated as a feed no matter what it | 690 | * Force the given data/URL to be treated as a feed |
792 | * appears like | 691 | * |
692 | * This tells SimplePie to ignore the content-type provided by the server. | ||
693 | * Be careful when using this option, as it will also disable autodiscovery. | ||
793 | * | 694 | * |
794 | * @access public | ||
795 | * @since 1.1 | 695 | * @since 1.1 |
796 | * @param bool $enable Force the given data/URL to be treated as a feed | 696 | * @param bool $enable Force the given data/URL to be treated as a feed |
797 | */ | 697 | */ |
@@ -801,7 +701,7 @@ class SimplePie_Core | |||
801 | } | 701 | } |
802 | 702 | ||
803 | /** | 703 | /** |
804 | * This is the URL of the feed you want to parse. | 704 | * Set the URL of the feed you want to parse |
805 | * | 705 | * |
806 | * This allows you to enter the URL of the feed you want to parse, or the | 706 | * This allows you to enter the URL of the feed you want to parse, or the |
807 | * website you want to try to use auto-discovery on. This takes priority | 707 | * website you want to try to use auto-discovery on. This takes priority |
@@ -811,37 +711,35 @@ class SimplePie_Core | |||
811 | * of a string for the $url. Remember that with each additional feed comes | 711 | * of a string for the $url. Remember that with each additional feed comes |
812 | * additional processing and resources. | 712 | * additional processing and resources. |
813 | * | 713 | * |
814 | * @access public | ||
815 | * @since 1.0 Preview Release | 714 | * @since 1.0 Preview Release |
816 | * @param mixed $url This is the URL (or array of URLs) that you want to parse. | 715 | * @see set_raw_data() |
817 | * @see SimplePie::set_raw_data() | 716 | * @param string|array $url This is the URL (or array of URLs) that you want to parse. |
818 | */ | 717 | */ |
819 | public function set_feed_url($url) | 718 | public function set_feed_url($url) |
820 | { | 719 | { |
720 | $this->multifeed_url = array(); | ||
821 | if (is_array($url)) | 721 | if (is_array($url)) |
822 | { | 722 | { |
823 | $this->multifeed_url = array(); | ||
824 | foreach ($url as $value) | 723 | foreach ($url as $value) |
825 | { | 724 | { |
826 | $this->multifeed_url[] = SimplePie_Misc::fix_protocol($value, 1); | 725 | $this->multifeed_url[] = $this->registry->call('Misc', 'fix_protocol', array($value, 1)); |
827 | } | 726 | } |
828 | } | 727 | } |
829 | else | 728 | else |
830 | { | 729 | { |
831 | $this->feed_url = SimplePie_Misc::fix_protocol($url, 1); | 730 | $this->feed_url = $this->registry->call('Misc', 'fix_protocol', array($url, 1)); |
832 | } | 731 | } |
833 | } | 732 | } |
834 | 733 | ||
835 | /** | 734 | /** |
836 | * Provides an instance of SimplePie_File to use as a feed | 735 | * Set an instance of {@see SimplePie_File} to use as a feed |
837 | * | 736 | * |
838 | * @access public | 737 | * @param SimplePie_File &$file |
839 | * @param object &$file Instance of SimplePie_File (or subclass) | ||
840 | * @return bool True on success, false on failure | 738 | * @return bool True on success, false on failure |
841 | */ | 739 | */ |
842 | public function set_file(&$file) | 740 | public function set_file(&$file) |
843 | { | 741 | { |
844 | if (is_a($file, 'SimplePie_File')) | 742 | if ($file instanceof SimplePie_File) |
845 | { | 743 | { |
846 | $this->feed_url = $file->url; | 744 | $this->feed_url = $file->url; |
847 | $this->file =& $file; | 745 | $this->file =& $file; |
@@ -851,16 +749,17 @@ class SimplePie_Core | |||
851 | } | 749 | } |
852 | 750 | ||
853 | /** | 751 | /** |
752 | * Set the raw XML data to parse | ||
753 | * | ||
854 | * Allows you to use a string of RSS/Atom data instead of a remote feed. | 754 | * Allows you to use a string of RSS/Atom data instead of a remote feed. |
855 | * | 755 | * |
856 | * If you have a feed available as a string in PHP, you can tell SimplePie | 756 | * If you have a feed available as a string in PHP, you can tell SimplePie |
857 | * to parse that data string instead of a remote feed. Any set feed URL | 757 | * to parse that data string instead of a remote feed. Any set feed URL |
858 | * takes precedence. | 758 | * takes precedence. |
859 | * | 759 | * |
860 | * @access public | ||
861 | * @since 1.0 Beta 3 | 760 | * @since 1.0 Beta 3 |
862 | * @param string $data RSS or Atom data as a string. | 761 | * @param string $data RSS or Atom data as a string. |
863 | * @see SimplePie::set_feed_url() | 762 | * @see set_feed_url() |
864 | */ | 763 | */ |
865 | public function set_raw_data($data) | 764 | public function set_raw_data($data) |
866 | { | 765 | { |
@@ -868,12 +767,11 @@ class SimplePie_Core | |||
868 | } | 767 | } |
869 | 768 | ||
870 | /** | 769 | /** |
871 | * Allows you to override the default timeout for fetching remote feeds. | 770 | * Set the the default timeout for fetching remote feeds |
872 | * | 771 | * |
873 | * This allows you to change the maximum time the feed's server to respond | 772 | * This allows you to change the maximum time the feed's server to respond |
874 | * and send the feed back. | 773 | * and send the feed back. |
875 | * | 774 | * |
876 | * @access public | ||
877 | * @since 1.0 Beta 3 | 775 | * @since 1.0 Beta 3 |
878 | * @param int $timeout The maximum number of seconds to spend waiting to retrieve a feed. | 776 | * @param int $timeout The maximum number of seconds to spend waiting to retrieve a feed. |
879 | */ | 777 | */ |
@@ -883,10 +781,8 @@ class SimplePie_Core | |||
883 | } | 781 | } |
884 | 782 | ||
885 | /** | 783 | /** |
886 | * Forces SimplePie to use fsockopen() instead of the preferred cURL | 784 | * Force SimplePie to use fsockopen() instead of cURL |
887 | * functions. | ||
888 | * | 785 | * |
889 | * @access public | ||
890 | * @since 1.0 Beta 3 | 786 | * @since 1.0 Beta 3 |
891 | * @param bool $enable Force fsockopen() to be used | 787 | * @param bool $enable Force fsockopen() to be used |
892 | */ | 788 | */ |
@@ -896,12 +792,11 @@ class SimplePie_Core | |||
896 | } | 792 | } |
897 | 793 | ||
898 | /** | 794 | /** |
899 | * Enables/disables caching in SimplePie. | 795 | * Enable/disable caching in SimplePie. |
900 | * | 796 | * |
901 | * This option allows you to disable caching all-together in SimplePie. | 797 | * This option allows you to disable caching all-together in SimplePie. |
902 | * However, disabling the cache can lead to longer load times. | 798 | * However, disabling the cache can lead to longer load times. |
903 | * | 799 | * |
904 | * @access public | ||
905 | * @since 1.0 Preview Release | 800 | * @since 1.0 Preview Release |
906 | * @param bool $enable Enable caching | 801 | * @param bool $enable Enable caching |
907 | */ | 802 | */ |
@@ -911,11 +806,10 @@ class SimplePie_Core | |||
911 | } | 806 | } |
912 | 807 | ||
913 | /** | 808 | /** |
914 | * Set the length of time (in seconds) that the contents of a feed | 809 | * Set the length of time (in seconds) that the contents of a feed will be |
915 | * will be cached. | 810 | * cached |
916 | * | 811 | * |
917 | * @access public | 812 | * @param int $seconds The feed content cache duration |
918 | * @param int $seconds The feed content cache duration. | ||
919 | */ | 813 | */ |
920 | public function set_cache_duration($seconds = 3600) | 814 | public function set_cache_duration($seconds = 3600) |
921 | { | 815 | { |
@@ -923,10 +817,9 @@ class SimplePie_Core | |||
923 | } | 817 | } |
924 | 818 | ||
925 | /** | 819 | /** |
926 | * Set the length of time (in seconds) that the autodiscovered feed | 820 | * Set the length of time (in seconds) that the autodiscovered feed URL will |
927 | * URL will be cached. | 821 | * be cached |
928 | * | 822 | * |
929 | * @access public | ||
930 | * @param int $seconds The autodiscovered feed URL cache duration. | 823 | * @param int $seconds The autodiscovered feed URL cache duration. |
931 | */ | 824 | */ |
932 | public function set_autodiscovery_cache_duration($seconds = 604800) | 825 | public function set_autodiscovery_cache_duration($seconds = 604800) |
@@ -935,9 +828,8 @@ class SimplePie_Core | |||
935 | } | 828 | } |
936 | 829 | ||
937 | /** | 830 | /** |
938 | * Set the file system location where the cached files should be stored. | 831 | * Set the file system location where the cached files should be stored |
939 | * | 832 | * |
940 | * @access public | ||
941 | * @param string $location The file system location. | 833 | * @param string $location The file system location. |
942 | */ | 834 | */ |
943 | public function set_cache_location($location = './cache') | 835 | public function set_cache_location($location = './cache') |
@@ -946,9 +838,8 @@ class SimplePie_Core | |||
946 | } | 838 | } |
947 | 839 | ||
948 | /** | 840 | /** |
949 | * Determines whether feed items should be sorted into reverse chronological order. | 841 | * Set whether feed items should be sorted into reverse chronological order |
950 | * | 842 | * |
951 | * @access public | ||
952 | * @param bool $enable Sort as reverse chronological order. | 843 | * @param bool $enable Sort as reverse chronological order. |
953 | */ | 844 | */ |
954 | public function enable_order_by_date($enable = true) | 845 | public function enable_order_by_date($enable = true) |
@@ -957,10 +848,12 @@ class SimplePie_Core | |||
957 | } | 848 | } |
958 | 849 | ||
959 | /** | 850 | /** |
960 | * Allows you to override the character encoding reported by the feed. | 851 | * Set the character encoding used to parse the feed |
961 | * | 852 | * |
962 | * @access public | 853 | * This overrides the encoding reported by the feed, however it will fall |
963 | * @param string $encoding Character encoding. | 854 | * back to the normal encoding detection if the override fails |
855 | * | ||
856 | * @param string $encoding Character encoding | ||
964 | */ | 857 | */ |
965 | public function set_input_encoding($encoding = false) | 858 | public function set_input_encoding($encoding = false) |
966 | { | 859 | { |
@@ -977,7 +870,6 @@ class SimplePie_Core | |||
977 | /** | 870 | /** |
978 | * Set how much feed autodiscovery to do | 871 | * Set how much feed autodiscovery to do |
979 | * | 872 | * |
980 | * @access public | ||
981 | * @see SIMPLEPIE_LOCATOR_NONE | 873 | * @see SIMPLEPIE_LOCATOR_NONE |
982 | * @see SIMPLEPIE_LOCATOR_AUTODISCOVERY | 874 | * @see SIMPLEPIE_LOCATOR_AUTODISCOVERY |
983 | * @see SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | 875 | * @see SIMPLEPIE_LOCATOR_LOCAL_EXTENSION |
@@ -985,8 +877,7 @@ class SimplePie_Core | |||
985 | * @see SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | 877 | * @see SIMPLEPIE_LOCATOR_REMOTE_EXTENSION |
986 | * @see SIMPLEPIE_LOCATOR_REMOTE_BODY | 878 | * @see SIMPLEPIE_LOCATOR_REMOTE_BODY |
987 | * @see SIMPLEPIE_LOCATOR_ALL | 879 | * @see SIMPLEPIE_LOCATOR_ALL |
988 | * @param int $level Feed Autodiscovery Level (level can be a | 880 | * @param int $level Feed Autodiscovery Level (level can be a combination of the above constants, see bitwise OR operator) |
989 | * combination of the above constants, see bitwise OR operator) | ||
990 | */ | 881 | */ |
991 | public function set_autodiscovery_level($level = SIMPLEPIE_LOCATOR_ALL) | 882 | public function set_autodiscovery_level($level = SIMPLEPIE_LOCATOR_ALL) |
992 | { | 883 | { |
@@ -994,297 +885,157 @@ class SimplePie_Core | |||
994 | } | 885 | } |
995 | 886 | ||
996 | /** | 887 | /** |
997 | * Allows you to change which class SimplePie uses for caching. | 888 | * Get the class registry |
889 | * | ||
890 | * Use this to override SimplePie's default classes | ||
891 | * @see SimplePie_Registry | ||
892 | * @return SimplePie_Registry | ||
893 | */ | ||
894 | public function &get_registry() | ||
895 | { | ||
896 | return $this->registry; | ||
897 | } | ||
898 | |||
899 | /**#@+ | ||
998 | * Useful when you are overloading or extending SimplePie's default classes. | 900 | * Useful when you are overloading or extending SimplePie's default classes. |
999 | * | 901 | * |
1000 | * @access public | 902 | * @deprecated Use {@see get_registry()} instead |
1001 | * @param string $class Name of custom class. | ||
1002 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | 903 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation |
904 | * @param string $class Name of custom class | ||
905 | * @return boolean True on success, false otherwise | ||
906 | */ | ||
907 | /** | ||
908 | * Set which class SimplePie uses for caching | ||
1003 | */ | 909 | */ |
1004 | public function set_cache_class($class = 'SimplePie_Cache') | 910 | public function set_cache_class($class = 'SimplePie_Cache') |
1005 | { | 911 | { |
1006 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Cache')) | 912 | return $this->registry->register('Cache', $class, true); |
1007 | { | ||
1008 | $this->cache_class = $class; | ||
1009 | return true; | ||
1010 | } | ||
1011 | return false; | ||
1012 | } | 913 | } |
1013 | 914 | ||
1014 | /** | 915 | /** |
1015 | * Allows you to change which class SimplePie uses for auto-discovery. | 916 | * Set which class SimplePie uses for auto-discovery |
1016 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1017 | * | ||
1018 | * @access public | ||
1019 | * @param string $class Name of custom class. | ||
1020 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1021 | */ | 917 | */ |
1022 | public function set_locator_class($class = 'SimplePie_Locator') | 918 | public function set_locator_class($class = 'SimplePie_Locator') |
1023 | { | 919 | { |
1024 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Locator')) | 920 | return $this->registry->register('Locator', $class, true); |
1025 | { | ||
1026 | $this->locator_class = $class; | ||
1027 | return true; | ||
1028 | } | ||
1029 | return false; | ||
1030 | } | 921 | } |
1031 | 922 | ||
1032 | /** | 923 | /** |
1033 | * Allows you to change which class SimplePie uses for XML parsing. | 924 | * Set which class SimplePie uses for XML parsing |
1034 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1035 | * | ||
1036 | * @access public | ||
1037 | * @param string $class Name of custom class. | ||
1038 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1039 | */ | 925 | */ |
1040 | public function set_parser_class($class = 'SimplePie_Parser') | 926 | public function set_parser_class($class = 'SimplePie_Parser') |
1041 | { | 927 | { |
1042 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Parser')) | 928 | return $this->registry->register('Parser', $class, true); |
1043 | { | ||
1044 | $this->parser_class = $class; | ||
1045 | return true; | ||
1046 | } | ||
1047 | return false; | ||
1048 | } | 929 | } |
1049 | 930 | ||
1050 | /** | 931 | /** |
1051 | * Allows you to change which class SimplePie uses for remote file fetching. | 932 | * Set which class SimplePie uses for remote file fetching |
1052 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1053 | * | ||
1054 | * @access public | ||
1055 | * @param string $class Name of custom class. | ||
1056 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1057 | */ | 933 | */ |
1058 | public function set_file_class($class = 'SimplePie_File') | 934 | public function set_file_class($class = 'SimplePie_File') |
1059 | { | 935 | { |
1060 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_File')) | 936 | return $this->registry->register('File', $class, true); |
1061 | { | ||
1062 | $this->file_class = $class; | ||
1063 | return true; | ||
1064 | } | ||
1065 | return false; | ||
1066 | } | 937 | } |
1067 | 938 | ||
1068 | /** | 939 | /** |
1069 | * Allows you to change which class SimplePie uses for data sanitization. | 940 | * Set which class SimplePie uses for data sanitization |
1070 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1071 | * | ||
1072 | * @access public | ||
1073 | * @param string $class Name of custom class. | ||
1074 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1075 | */ | 941 | */ |
1076 | public function set_sanitize_class($class = 'SimplePie_Sanitize') | 942 | public function set_sanitize_class($class = 'SimplePie_Sanitize') |
1077 | { | 943 | { |
1078 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Sanitize')) | 944 | return $this->registry->register('Sanitize', $class, true); |
1079 | { | ||
1080 | $this->sanitize = new $class(); | ||
1081 | return true; | ||
1082 | } | ||
1083 | return false; | ||
1084 | } | 945 | } |
1085 | 946 | ||
1086 | /** | 947 | /** |
1087 | * Allows you to change which class SimplePie uses for handling feed items. | 948 | * Set which class SimplePie uses for handling feed items |
1088 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1089 | * | ||
1090 | * @access public | ||
1091 | * @param string $class Name of custom class. | ||
1092 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1093 | */ | 949 | */ |
1094 | public function set_item_class($class = 'SimplePie_Item') | 950 | public function set_item_class($class = 'SimplePie_Item') |
1095 | { | 951 | { |
1096 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Item')) | 952 | return $this->registry->register('Item', $class, true); |
1097 | { | ||
1098 | $this->item_class = $class; | ||
1099 | return true; | ||
1100 | } | ||
1101 | return false; | ||
1102 | } | 953 | } |
1103 | 954 | ||
1104 | /** | 955 | /** |
1105 | * Allows you to change which class SimplePie uses for handling author data. | 956 | * Set which class SimplePie uses for handling author data |
1106 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1107 | * | ||
1108 | * @access public | ||
1109 | * @param string $class Name of custom class. | ||
1110 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1111 | */ | 957 | */ |
1112 | public function set_author_class($class = 'SimplePie_Author') | 958 | public function set_author_class($class = 'SimplePie_Author') |
1113 | { | 959 | { |
1114 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Author')) | 960 | return $this->registry->register('Author', $class, true); |
1115 | { | ||
1116 | $this->author_class = $class; | ||
1117 | return true; | ||
1118 | } | ||
1119 | return false; | ||
1120 | } | 961 | } |
1121 | 962 | ||
1122 | /** | 963 | /** |
1123 | * Allows you to change which class SimplePie uses for handling category data. | 964 | * Set which class SimplePie uses for handling category data |
1124 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1125 | * | ||
1126 | * @access public | ||
1127 | * @param string $class Name of custom class. | ||
1128 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1129 | */ | 965 | */ |
1130 | public function set_category_class($class = 'SimplePie_Category') | 966 | public function set_category_class($class = 'SimplePie_Category') |
1131 | { | 967 | { |
1132 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Category')) | 968 | return $this->registry->register('Category', $class, true); |
1133 | { | ||
1134 | $this->category_class = $class; | ||
1135 | return true; | ||
1136 | } | ||
1137 | return false; | ||
1138 | } | 969 | } |
1139 | 970 | ||
1140 | /** | 971 | /** |
1141 | * Allows you to change which class SimplePie uses for feed enclosures. | 972 | * Set which class SimplePie uses for feed enclosures |
1142 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1143 | * | ||
1144 | * @access public | ||
1145 | * @param string $class Name of custom class. | ||
1146 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1147 | */ | 973 | */ |
1148 | public function set_enclosure_class($class = 'SimplePie_Enclosure') | 974 | public function set_enclosure_class($class = 'SimplePie_Enclosure') |
1149 | { | 975 | { |
1150 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Enclosure')) | 976 | return $this->registry->register('Enclosure', $class, true); |
1151 | { | ||
1152 | $this->enclosure_class = $class; | ||
1153 | return true; | ||
1154 | } | ||
1155 | return false; | ||
1156 | } | 977 | } |
1157 | 978 | ||
1158 | /** | 979 | /** |
1159 | * Allows you to change which class SimplePie uses for <media:text> captions | 980 | * Set which class SimplePie uses for `<media:text>` captions |
1160 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1161 | * | ||
1162 | * @access public | ||
1163 | * @param string $class Name of custom class. | ||
1164 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1165 | */ | 981 | */ |
1166 | public function set_caption_class($class = 'SimplePie_Caption') | 982 | public function set_caption_class($class = 'SimplePie_Caption') |
1167 | { | 983 | { |
1168 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Caption')) | 984 | return $this->registry->register('Caption', $class, true); |
1169 | { | ||
1170 | $this->caption_class = $class; | ||
1171 | return true; | ||
1172 | } | ||
1173 | return false; | ||
1174 | } | 985 | } |
1175 | 986 | ||
1176 | /** | 987 | /** |
1177 | * Allows you to change which class SimplePie uses for <media:copyright> | 988 | * Set which class SimplePie uses for `<media:copyright>` |
1178 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1179 | * | ||
1180 | * @access public | ||
1181 | * @param string $class Name of custom class. | ||
1182 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1183 | */ | 989 | */ |
1184 | public function set_copyright_class($class = 'SimplePie_Copyright') | 990 | public function set_copyright_class($class = 'SimplePie_Copyright') |
1185 | { | 991 | { |
1186 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Copyright')) | 992 | return $this->registry->register('Copyright', $class, true); |
1187 | { | ||
1188 | $this->copyright_class = $class; | ||
1189 | return true; | ||
1190 | } | ||
1191 | return false; | ||
1192 | } | 993 | } |
1193 | 994 | ||
1194 | /** | 995 | /** |
1195 | * Allows you to change which class SimplePie uses for <media:credit> | 996 | * Set which class SimplePie uses for `<media:credit>` |
1196 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1197 | * | ||
1198 | * @access public | ||
1199 | * @param string $class Name of custom class. | ||
1200 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1201 | */ | 997 | */ |
1202 | public function set_credit_class($class = 'SimplePie_Credit') | 998 | public function set_credit_class($class = 'SimplePie_Credit') |
1203 | { | 999 | { |
1204 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Credit')) | 1000 | return $this->registry->register('Credit', $class, true); |
1205 | { | ||
1206 | $this->credit_class = $class; | ||
1207 | return true; | ||
1208 | } | ||
1209 | return false; | ||
1210 | } | 1001 | } |
1211 | 1002 | ||
1212 | /** | 1003 | /** |
1213 | * Allows you to change which class SimplePie uses for <media:rating> | 1004 | * Set which class SimplePie uses for `<media:rating>` |
1214 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1215 | * | ||
1216 | * @access public | ||
1217 | * @param string $class Name of custom class. | ||
1218 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1219 | */ | 1005 | */ |
1220 | public function set_rating_class($class = 'SimplePie_Rating') | 1006 | public function set_rating_class($class = 'SimplePie_Rating') |
1221 | { | 1007 | { |
1222 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Rating')) | 1008 | return $this->registry->register('Rating', $class, true); |
1223 | { | ||
1224 | $this->rating_class = $class; | ||
1225 | return true; | ||
1226 | } | ||
1227 | return false; | ||
1228 | } | 1009 | } |
1229 | 1010 | ||
1230 | /** | 1011 | /** |
1231 | * Allows you to change which class SimplePie uses for <media:restriction> | 1012 | * Set which class SimplePie uses for `<media:restriction>` |
1232 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1233 | * | ||
1234 | * @access public | ||
1235 | * @param string $class Name of custom class. | ||
1236 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1237 | */ | 1013 | */ |
1238 | public function set_restriction_class($class = 'SimplePie_Restriction') | 1014 | public function set_restriction_class($class = 'SimplePie_Restriction') |
1239 | { | 1015 | { |
1240 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Restriction')) | 1016 | return $this->registry->register('Restriction', $class, true); |
1241 | { | ||
1242 | $this->restriction_class = $class; | ||
1243 | return true; | ||
1244 | } | ||
1245 | return false; | ||
1246 | } | 1017 | } |
1247 | 1018 | ||
1248 | /** | 1019 | /** |
1249 | * Allows you to change which class SimplePie uses for content-type sniffing. | 1020 | * Set which class SimplePie uses for content-type sniffing |
1250 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1251 | * | ||
1252 | * @access public | ||
1253 | * @param string $class Name of custom class. | ||
1254 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1255 | */ | 1021 | */ |
1256 | public function set_content_type_sniffer_class($class = 'SimplePie_Content_Type_Sniffer') | 1022 | public function set_content_type_sniffer_class($class = 'SimplePie_Content_Type_Sniffer') |
1257 | { | 1023 | { |
1258 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Content_Type_Sniffer')) | 1024 | return $this->registry->register('Content_Type_Sniffer', $class, true); |
1259 | { | ||
1260 | $this->content_type_sniffer_class = $class; | ||
1261 | return true; | ||
1262 | } | ||
1263 | return false; | ||
1264 | } | 1025 | } |
1265 | 1026 | ||
1266 | /** | 1027 | /** |
1267 | * Allows you to change which class SimplePie uses item sources. | 1028 | * Set which class SimplePie uses item sources |
1268 | * Useful when you are overloading or extending SimplePie's default classes. | ||
1269 | * | ||
1270 | * @access public | ||
1271 | * @param string $class Name of custom class. | ||
1272 | * @link http://php.net/manual/en/language.oop5.basic.php#language.oop5.basic.extends PHP5 extends documentation | ||
1273 | */ | 1029 | */ |
1274 | public function set_source_class($class = 'SimplePie_Source') | 1030 | public function set_source_class($class = 'SimplePie_Source') |
1275 | { | 1031 | { |
1276 | if (SimplePie_Misc::is_subclass_of($class, 'SimplePie_Source')) | 1032 | return $this->registry->register('Source', $class, true); |
1277 | { | ||
1278 | $this->source_class = $class; | ||
1279 | return true; | ||
1280 | } | ||
1281 | return false; | ||
1282 | } | 1033 | } |
1034 | /**#@-*/ | ||
1283 | 1035 | ||
1284 | /** | 1036 | /** |
1285 | * Allows you to override the default user agent string. | 1037 | * Set the user agent string |
1286 | * | 1038 | * |
1287 | * @access public | ||
1288 | * @param string $ua New user agent string. | 1039 | * @param string $ua New user agent string. |
1289 | */ | 1040 | */ |
1290 | public function set_useragent($ua = SIMPLEPIE_USERAGENT) | 1041 | public function set_useragent($ua = SIMPLEPIE_USERAGENT) |
@@ -1295,7 +1046,6 @@ class SimplePie_Core | |||
1295 | /** | 1046 | /** |
1296 | * Set callback function to create cache filename with | 1047 | * Set callback function to create cache filename with |
1297 | * | 1048 | * |
1298 | * @access public | ||
1299 | * @param mixed $function Callback function | 1049 | * @param mixed $function Callback function |
1300 | */ | 1050 | */ |
1301 | public function set_cache_name_function($function = 'md5') | 1051 | public function set_cache_name_function($function = 'md5') |
@@ -1307,10 +1057,11 @@ class SimplePie_Core | |||
1307 | } | 1057 | } |
1308 | 1058 | ||
1309 | /** | 1059 | /** |
1310 | * Set options to make SP as fast as possible. Forgoes a | 1060 | * Set options to make SP as fast as possible |
1311 | * substantial amount of data sanitization in favor of speed. | 1061 | * |
1062 | * Forgoes a substantial amount of data sanitization in favor of speed. This | ||
1063 | * turns SimplePie into a dumb parser of feeds. | ||
1312 | * | 1064 | * |
1313 | * @access public | ||
1314 | * @param bool $set Whether to set them or not | 1065 | * @param bool $set Whether to set them or not |
1315 | */ | 1066 | */ |
1316 | public function set_stupidly_fast($set = false) | 1067 | public function set_stupidly_fast($set = false) |
@@ -1329,7 +1080,6 @@ class SimplePie_Core | |||
1329 | /** | 1080 | /** |
1330 | * Set maximum number of feeds to check with autodiscovery | 1081 | * Set maximum number of feeds to check with autodiscovery |
1331 | * | 1082 | * |
1332 | * @access public | ||
1333 | * @param int $max Maximum number of feeds to check | 1083 | * @param int $max Maximum number of feeds to check |
1334 | */ | 1084 | */ |
1335 | public function set_max_checked_feeds($max = 10) | 1085 | public function set_max_checked_feeds($max = 10) |
@@ -1369,6 +1119,28 @@ class SimplePie_Core | |||
1369 | $this->sanitize->strip_attributes($attribs); | 1119 | $this->sanitize->strip_attributes($attribs); |
1370 | } | 1120 | } |
1371 | 1121 | ||
1122 | /** | ||
1123 | * Set the output encoding | ||
1124 | * | ||
1125 | * Allows you to override SimplePie's output to match that of your webpage. | ||
1126 | * This is useful for times when your webpages are not being served as | ||
1127 | * UTF-8. This setting will be obeyed by {@see handle_content_type()}, and | ||
1128 | * is similar to {@see set_input_encoding()}. | ||
1129 | * | ||
1130 | * It should be noted, however, that not all character encodings can support | ||
1131 | * all characters. If your page is being served as ISO-8859-1 and you try | ||
1132 | * to display a Japanese feed, you'll likely see garbled characters. | ||
1133 | * Because of this, it is highly recommended to ensure that your webpages | ||
1134 | * are served as UTF-8. | ||
1135 | * | ||
1136 | * The number of supported character encodings depends on whether your web | ||
1137 | * host supports {@link http://php.net/mbstring mbstring}, | ||
1138 | * {@link http://php.net/iconv iconv}, or both. See | ||
1139 | * {@link http://simplepie.org/wiki/faq/Supported_Character_Encodings} for | ||
1140 | * more information. | ||
1141 | * | ||
1142 | * @param string $encoding | ||
1143 | */ | ||
1372 | public function set_output_encoding($encoding = 'UTF-8') | 1144 | public function set_output_encoding($encoding = 'UTF-8') |
1373 | { | 1145 | { |
1374 | $this->sanitize->set_output_encoding($encoding); | 1146 | $this->sanitize->set_output_encoding($encoding); |
@@ -1383,11 +1155,14 @@ class SimplePie_Core | |||
1383 | * Set element/attribute key/value pairs of HTML attributes | 1155 | * Set element/attribute key/value pairs of HTML attributes |
1384 | * containing URLs that need to be resolved relative to the feed | 1156 | * containing URLs that need to be resolved relative to the feed |
1385 | * | 1157 | * |
1386 | * @access public | 1158 | * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite, |
1159 | * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite, | ||
1160 | * |q|@cite | ||
1161 | * | ||
1387 | * @since 1.0 | 1162 | * @since 1.0 |
1388 | * @param array $element_attribute Element/attribute key/value pairs | 1163 | * @param array|null $element_attribute Element/attribute key/value pairs, null for default |
1389 | */ | 1164 | */ |
1390 | public function set_url_replacements($element_attribute = array('a' => 'href', 'area' => 'href', 'blockquote' => 'cite', 'del' => 'cite', 'form' => 'action', 'img' => array('longdesc', 'src'), 'input' => 'src', 'ins' => 'cite', 'q' => 'cite')) | 1165 | public function set_url_replacements($element_attribute = null) |
1391 | { | 1166 | { |
1392 | $this->sanitize->set_url_replacements($element_attribute); | 1167 | $this->sanitize->set_url_replacements($element_attribute); |
1393 | } | 1168 | } |
@@ -1395,7 +1170,6 @@ class SimplePie_Core | |||
1395 | /** | 1170 | /** |
1396 | * Set the handler to enable the display of cached images. | 1171 | * Set the handler to enable the display of cached images. |
1397 | * | 1172 | * |
1398 | * @access public | ||
1399 | * @param str $page Web-accessible path to the handler_image.php file. | 1173 | * @param str $page Web-accessible path to the handler_image.php file. |
1400 | * @param str $qs The query string that the value should be passed to. | 1174 | * @param str $qs The query string that the value should be passed to. |
1401 | */ | 1175 | */ |
@@ -1412,9 +1186,8 @@ class SimplePie_Core | |||
1412 | } | 1186 | } |
1413 | 1187 | ||
1414 | /** | 1188 | /** |
1415 | * Set the limit for items returned per-feed with multifeeds. | 1189 | * Set the limit for items returned per-feed with multifeeds |
1416 | * | 1190 | * |
1417 | * @access public | ||
1418 | * @param integer $limit The maximum number of items to return. | 1191 | * @param integer $limit The maximum number of items to return. |
1419 | */ | 1192 | */ |
1420 | public function set_item_limit($limit = 0) | 1193 | public function set_item_limit($limit = 0) |
@@ -1422,10 +1195,19 @@ class SimplePie_Core | |||
1422 | $this->item_limit = (int) $limit; | 1195 | $this->item_limit = (int) $limit; |
1423 | } | 1196 | } |
1424 | 1197 | ||
1198 | /** | ||
1199 | * Initialize the feed object | ||
1200 | * | ||
1201 | * This is what makes everything happen. Period. This is where all of the | ||
1202 | * configuration options get processed, feeds are fetched, cached, and | ||
1203 | * parsed, and all of that other good stuff. | ||
1204 | * | ||
1205 | * @return boolean True if successful, false otherwise | ||
1206 | */ | ||
1425 | public function init() | 1207 | public function init() |
1426 | { | 1208 | { |
1427 | // Check absolute bare minimum requirements. | 1209 | // Check absolute bare minimum requirements. |
1428 | if ((function_exists('version_compare') && version_compare(PHP_VERSION, '5.0', '<')) || !extension_loaded('xml') || !extension_loaded('pcre')) | 1210 | if (!extension_loaded('xml') || !extension_loaded('pcre')) |
1429 | { | 1211 | { |
1430 | return false; | 1212 | return false; |
1431 | } | 1213 | } |
@@ -1446,321 +1228,333 @@ class SimplePie_Core | |||
1446 | } | 1228 | } |
1447 | } | 1229 | } |
1448 | 1230 | ||
1231 | if (method_exists($this->sanitize, 'set_registry')) | ||
1232 | { | ||
1233 | $this->sanitize->set_registry($this->registry); | ||
1234 | } | ||
1235 | |||
1449 | // Pass whatever was set with config options over to the sanitizer. | 1236 | // Pass whatever was set with config options over to the sanitizer. |
1450 | $this->sanitize->pass_cache_data($this->cache, $this->cache_location, $this->cache_name_function, $this->cache_class); | 1237 | // Pass the classes in for legacy support; new classes should use the registry instead |
1451 | $this->sanitize->pass_file_data($this->file_class, $this->timeout, $this->useragent, $this->force_fsockopen); | 1238 | $this->sanitize->pass_cache_data($this->cache, $this->cache_location, $this->cache_name_function, $this->registry->get_class('Cache')); |
1239 | $this->sanitize->pass_file_data($this->registry->get_class('File'), $this->timeout, $this->useragent, $this->force_fsockopen); | ||
1452 | 1240 | ||
1453 | if ($this->feed_url !== null || $this->raw_data !== null) | 1241 | if (!empty($this->multifeed_url)) |
1454 | { | 1242 | { |
1455 | $this->error = null; | 1243 | $i = 0; |
1456 | $this->data = array(); | 1244 | $success = 0; |
1457 | $this->multifeed_objects = array(); | 1245 | $this->multifeed_objects = array(); |
1458 | $cache = false; | 1246 | $this->error = array(); |
1459 | 1247 | foreach ($this->multifeed_url as $url) | |
1460 | if ($this->feed_url !== null) | ||
1461 | { | 1248 | { |
1462 | $parsed_feed_url = SimplePie_Misc::parse_url($this->feed_url); | 1249 | $this->multifeed_objects[$i] = clone $this; |
1463 | // Decide whether to enable caching | 1250 | $this->multifeed_objects[$i]->set_feed_url($url); |
1464 | if ($this->cache && $parsed_feed_url['scheme'] !== '') | 1251 | $single_success = $this->multifeed_objects[$i]->init(); |
1465 | { | 1252 | $success |= $single_success; |
1466 | $cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, call_user_func($this->cache_name_function, $this->feed_url), 'spc'); | 1253 | if (!$single_success) |
1467 | } | ||
1468 | // If it's enabled and we don't want an XML dump, use the cache | ||
1469 | if ($cache && !$this->xml_dump) | ||
1470 | { | ||
1471 | // Load the Cache | ||
1472 | $this->data = $cache->load(); | ||
1473 | if (!empty($this->data)) | ||
1474 | { | ||
1475 | // If the cache is for an outdated build of SimplePie | ||
1476 | if (!isset($this->data['build']) || $this->data['build'] !== SIMPLEPIE_BUILD) | ||
1477 | { | ||
1478 | $cache->unlink(); | ||
1479 | $this->data = array(); | ||
1480 | } | ||
1481 | // If we've hit a collision just rerun it with caching disabled | ||
1482 | elseif (isset($this->data['url']) && $this->data['url'] !== $this->feed_url) | ||
1483 | { | ||
1484 | $cache = false; | ||
1485 | $this->data = array(); | ||
1486 | } | ||
1487 | // If we've got a non feed_url stored (if the page isn't actually a feed, or is a redirect) use that URL. | ||
1488 | elseif (isset($this->data['feed_url'])) | ||
1489 | { | ||
1490 | // If the autodiscovery cache is still valid use it. | ||
1491 | if ($cache->mtime() + $this->autodiscovery_cache_duration > time()) | ||
1492 | { | ||
1493 | // Do not need to do feed autodiscovery yet. | ||
1494 | if ($this->data['feed_url'] === $this->data['url']) | ||
1495 | { | ||
1496 | $cache->unlink(); | ||
1497 | $this->data = array(); | ||
1498 | } | ||
1499 | else | ||
1500 | { | ||
1501 | $this->set_feed_url($this->data['feed_url']); | ||
1502 | return $this->init(); | ||
1503 | } | ||
1504 | } | ||
1505 | } | ||
1506 | // Check if the cache has been updated | ||
1507 | elseif ($cache->mtime() + $this->cache_duration < time()) | ||
1508 | { | ||
1509 | // If we have last-modified and/or etag set | ||
1510 | if (isset($this->data['headers']['last-modified']) || isset($this->data['headers']['etag'])) | ||
1511 | { | ||
1512 | $headers = array( | ||
1513 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | ||
1514 | ); | ||
1515 | if (isset($this->data['headers']['last-modified'])) | ||
1516 | { | ||
1517 | $headers['if-modified-since'] = $this->data['headers']['last-modified']; | ||
1518 | } | ||
1519 | if (isset($this->data['headers']['etag'])) | ||
1520 | { | ||
1521 | $headers['if-none-match'] = $this->data['headers']['etag']; | ||
1522 | } | ||
1523 | |||
1524 | $file = new $this->file_class($this->feed_url, $this->timeout/10, 5, $headers, $this->useragent, $this->force_fsockopen); | ||
1525 | |||
1526 | if ($file->success) | ||
1527 | { | ||
1528 | if ($file->status_code === 304) | ||
1529 | { | ||
1530 | $cache->touch(); | ||
1531 | return true; | ||
1532 | } | ||
1533 | else | ||
1534 | { | ||
1535 | $headers = $file->headers; | ||
1536 | } | ||
1537 | } | ||
1538 | else | ||
1539 | { | ||
1540 | unset($file); | ||
1541 | } | ||
1542 | } | ||
1543 | } | ||
1544 | // If the cache is still valid, just return true | ||
1545 | else | ||
1546 | { | ||
1547 | $this->raw_data = false; | ||
1548 | return true; | ||
1549 | } | ||
1550 | } | ||
1551 | // If the cache is empty, delete it | ||
1552 | else | ||
1553 | { | ||
1554 | $cache->unlink(); | ||
1555 | $this->data = array(); | ||
1556 | } | ||
1557 | } | ||
1558 | // If we don't already have the file (it'll only exist if we've opened it to check if the cache has been modified), open it. | ||
1559 | if (!isset($file)) | ||
1560 | { | ||
1561 | if (is_a($this->file, 'SimplePie_File') && $this->file->url === $this->feed_url) | ||
1562 | { | ||
1563 | $file =& $this->file; | ||
1564 | } | ||
1565 | else | ||
1566 | { | ||
1567 | $headers = array( | ||
1568 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | ||
1569 | ); | ||
1570 | $file = new $this->file_class($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen); | ||
1571 | } | ||
1572 | } | ||
1573 | // If the file connection has an error, set SimplePie::error to that and quit | ||
1574 | if (!$file->success && !($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) | ||
1575 | { | 1254 | { |
1576 | $this->error = $file->error; | 1255 | $this->error[$i] = $this->multifeed_objects[$i]->error(); |
1577 | if (!empty($this->data)) | ||
1578 | { | ||
1579 | return true; | ||
1580 | } | ||
1581 | else | ||
1582 | { | ||
1583 | return false; | ||
1584 | } | ||
1585 | } | 1256 | } |
1257 | $i++; | ||
1258 | } | ||
1259 | return (bool) $success; | ||
1260 | } | ||
1261 | elseif ($this->feed_url === null && $this->raw_data === null) | ||
1262 | { | ||
1263 | return false; | ||
1264 | } | ||
1586 | 1265 | ||
1587 | if (!$this->force_feed) | 1266 | $this->error = null; |
1588 | { | 1267 | $this->data = array(); |
1589 | // Check if the supplied URL is a feed, if it isn't, look for it. | 1268 | $this->multifeed_objects = array(); |
1590 | $locate = new $this->locator_class($file, $this->timeout, $this->useragent, $this->file_class, $this->max_checked_feeds, $this->content_type_sniffer_class); | 1269 | $cache = false; |
1591 | 1270 | ||
1592 | if (!$locate->is_feed($file)) | 1271 | if ($this->feed_url !== null) |
1593 | { | 1272 | { |
1594 | // We need to unset this so that if SimplePie::set_file() has been called that object is untouched | 1273 | $parsed_feed_url = $this->registry->call('Misc', 'parse_url', array($this->feed_url)); |
1595 | unset($file); | ||
1596 | if ($file = $locate->find($this->autodiscovery, $this->all_discovered_feeds)) | ||
1597 | { | ||
1598 | if ($cache) | ||
1599 | { | ||
1600 | $this->data = array('url' => $this->feed_url, 'feed_url' => $file->url, 'build' => SIMPLEPIE_BUILD); | ||
1601 | if (!$cache->save($this)) | ||
1602 | { | ||
1603 | trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); | ||
1604 | } | ||
1605 | $cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, call_user_func($this->cache_name_function, $file->url), 'spc'); | ||
1606 | } | ||
1607 | $this->feed_url = $file->url; | ||
1608 | } | ||
1609 | else | ||
1610 | { | ||
1611 | $this->error = "A feed could not be found at $this->feed_url. A feed with an invalid mime type may fall victim to this error, or " . SIMPLEPIE_NAME . " was unable to auto-discover it.. Use force_feed() if you are certain this URL is a real feed."; | ||
1612 | SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__); | ||
1613 | return false; | ||
1614 | } | ||
1615 | } | ||
1616 | $locate = null; | ||
1617 | } | ||
1618 | 1274 | ||
1619 | $headers = $file->headers; | 1275 | // Decide whether to enable caching |
1620 | $data = $file->body; | 1276 | if ($this->cache && $parsed_feed_url['scheme'] !== '') |
1621 | $sniffer = new $this->content_type_sniffer_class($file); | 1277 | { |
1622 | $sniffed = $sniffer->get_type(); | 1278 | $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, call_user_func($this->cache_name_function, $this->feed_url), 'spc')); |
1623 | } | 1279 | } |
1624 | else | 1280 | |
1281 | // Fetch the data via SimplePie_File into $this->raw_data | ||
1282 | if (($fetched = $this->fetch_data($cache)) === true) | ||
1625 | { | 1283 | { |
1626 | $data = $this->raw_data; | 1284 | return true; |
1285 | } | ||
1286 | elseif ($fetched === false) { | ||
1287 | return false; | ||
1627 | } | 1288 | } |
1628 | 1289 | ||
1629 | // This is exposed via get_raw_data() | 1290 | list($headers, $sniffed) = $fetched; |
1630 | $this->raw_data = $data; | 1291 | } |
1292 | |||
1293 | // Set up array of possible encodings | ||
1294 | $encodings = array(); | ||
1295 | |||
1296 | // First check to see if input has been overridden. | ||
1297 | if ($this->input_encoding !== false) | ||
1298 | { | ||
1299 | $encodings[] = $this->input_encoding; | ||
1300 | } | ||
1631 | 1301 | ||
1632 | // Set up array of possible encodings | 1302 | $application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity'); |
1633 | $encodings = array(); | 1303 | $text_types = array('text/xml', 'text/xml-external-parsed-entity'); |
1634 | 1304 | ||
1635 | // First check to see if input has been overridden. | 1305 | // RFC 3023 (only applies to sniffed content) |
1636 | if ($this->input_encoding !== false) | 1306 | if (isset($sniffed)) |
1307 | { | ||
1308 | if (in_array($sniffed, $application_types) || substr($sniffed, 0, 12) === 'application/' && substr($sniffed, -4) === '+xml') | ||
1309 | { | ||
1310 | if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) | ||
1311 | { | ||
1312 | $encodings[] = strtoupper($charset[1]); | ||
1313 | } | ||
1314 | $encodings = array_merge($encodings, $this->registry->call('Misc', 'xml_encoding', array($this->raw_data, &$this->registry))); | ||
1315 | $encodings[] = 'UTF-8'; | ||
1316 | } | ||
1317 | elseif (in_array($sniffed, $text_types) || substr($sniffed, 0, 5) === 'text/' && substr($sniffed, -4) === '+xml') | ||
1318 | { | ||
1319 | if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) | ||
1320 | { | ||
1321 | $encodings[] = $charset[1]; | ||
1322 | } | ||
1323 | $encodings[] = 'US-ASCII'; | ||
1324 | } | ||
1325 | // Text MIME-type default | ||
1326 | elseif (substr($sniffed, 0, 5) === 'text/') | ||
1637 | { | 1327 | { |
1638 | $encodings[] = $this->input_encoding; | 1328 | $encodings[] = 'US-ASCII'; |
1639 | } | 1329 | } |
1330 | } | ||
1331 | |||
1332 | // Fallback to XML 1.0 Appendix F.1/UTF-8/ISO-8859-1 | ||
1333 | $encodings = array_merge($encodings, $this->registry->call('Misc', 'xml_encoding', array($this->raw_data, &$this->registry))); | ||
1334 | $encodings[] = 'UTF-8'; | ||
1335 | $encodings[] = 'ISO-8859-1'; | ||
1640 | 1336 | ||
1641 | $application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity'); | 1337 | // There's no point in trying an encoding twice |
1642 | $text_types = array('text/xml', 'text/xml-external-parsed-entity'); | 1338 | $encodings = array_unique($encodings); |
1643 | 1339 | ||
1644 | // RFC 3023 (only applies to sniffed content) | 1340 | // Loop through each possible encoding, till we return something, or run out of possibilities |
1645 | if (isset($sniffed)) | 1341 | foreach ($encodings as $encoding) |
1342 | { | ||
1343 | // Change the encoding to UTF-8 (as we always use UTF-8 internally) | ||
1344 | if ($utf8_data = $this->registry->call('Misc', 'change_encoding', array($this->raw_data, $encoding, 'UTF-8'))) | ||
1646 | { | 1345 | { |
1647 | if (in_array($sniffed, $application_types) || substr($sniffed, 0, 12) === 'application/' && substr($sniffed, -4) === '+xml') | 1346 | // Create new parser |
1347 | $parser = $this->registry->create('Parser'); | ||
1348 | |||
1349 | // If it's parsed fine | ||
1350 | if ($parser->parse($utf8_data, 'UTF-8')) | ||
1648 | { | 1351 | { |
1649 | if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) | 1352 | $this->data = $parser->get_data(); |
1353 | if (!($this->get_type() & ~SIMPLEPIE_TYPE_NONE)) | ||
1650 | { | 1354 | { |
1651 | $encodings[] = strtoupper($charset[1]); | 1355 | $this->error = "A feed could not be found at $this->feed_url. This does not appear to be a valid RSS or Atom feed."; |
1356 | $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); | ||
1357 | return false; | ||
1652 | } | 1358 | } |
1653 | $encodings = array_merge($encodings, SimplePie_Misc::xml_encoding($data)); | 1359 | |
1654 | $encodings[] = 'UTF-8'; | 1360 | if (isset($headers)) |
1655 | } | ||
1656 | elseif (in_array($sniffed, $text_types) || substr($sniffed, 0, 5) === 'text/' && substr($sniffed, -4) === '+xml') | ||
1657 | { | ||
1658 | if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) | ||
1659 | { | 1361 | { |
1660 | $encodings[] = $charset[1]; | 1362 | $this->data['headers'] = $headers; |
1661 | } | 1363 | } |
1662 | $encodings[] = 'US-ASCII'; | 1364 | $this->data['build'] = SIMPLEPIE_BUILD; |
1663 | } | 1365 | |
1664 | // Text MIME-type default | 1366 | // Cache the file if caching is enabled |
1665 | elseif (substr($sniffed, 0, 5) === 'text/') | 1367 | if ($cache && !$cache->save($this)) |
1666 | { | 1368 | { |
1667 | $encodings[] = 'US-ASCII'; | 1369 | trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); |
1370 | } | ||
1371 | return true; | ||
1668 | } | 1372 | } |
1669 | } | 1373 | } |
1374 | } | ||
1670 | 1375 | ||
1671 | // Fallback to XML 1.0 Appendix F.1/UTF-8/ISO-8859-1 | 1376 | if (isset($parser)) |
1672 | $encodings = array_merge($encodings, SimplePie_Misc::xml_encoding($data)); | 1377 | { |
1673 | $encodings[] = 'UTF-8'; | 1378 | // We have an error, just set SimplePie_Misc::error to it and quit |
1674 | $encodings[] = 'ISO-8859-1'; | 1379 | $this->error = sprintf('This XML document is invalid, likely due to invalid characters. XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column()); |
1380 | } | ||
1381 | else | ||
1382 | { | ||
1383 | $this->error = 'The data could not be converted to UTF-8. You MUST have either the iconv or mbstring extension installed. Upgrading to PHP 5.x (which includes iconv) is highly recommended.'; | ||
1384 | } | ||
1675 | 1385 | ||
1676 | // There's no point in trying an encoding twice | 1386 | $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); |
1677 | $encodings = array_unique($encodings); | ||
1678 | 1387 | ||
1679 | // If we want the XML, just output that with the most likely encoding and quit | 1388 | return false; |
1680 | if ($this->xml_dump) | 1389 | } |
1681 | { | ||
1682 | header('Content-type: text/xml; charset=' . $encodings[0]); | ||
1683 | echo $data; | ||
1684 | exit; | ||
1685 | } | ||
1686 | 1390 | ||
1687 | // Loop through each possible encoding, till we return something, or run out of possibilities | 1391 | /** |
1688 | foreach ($encodings as $encoding) | 1392 | * Fetch the data via SimplePie_File |
1393 | * | ||
1394 | * If the data is already cached, attempt to fetch it from there instead | ||
1395 | * @param SimplePie_Cache|false $cache Cache handler, or false to not load from the cache | ||
1396 | * @return array|true Returns true if the data was loaded from the cache, or an array of HTTP headers and sniffed type | ||
1397 | */ | ||
1398 | protected function fetch_data(&$cache) | ||
1399 | { | ||
1400 | // If it's enabled, use the cache | ||
1401 | if ($cache) | ||
1402 | { | ||
1403 | // Load the Cache | ||
1404 | $this->data = $cache->load(); | ||
1405 | if (!empty($this->data)) | ||
1689 | { | 1406 | { |
1690 | // Change the encoding to UTF-8 (as we always use UTF-8 internally) | 1407 | // If the cache is for an outdated build of SimplePie |
1691 | if ($utf8_data = SimplePie_Misc::change_encoding($data, $encoding, 'UTF-8')) | 1408 | if (!isset($this->data['build']) || $this->data['build'] !== SIMPLEPIE_BUILD) |
1692 | { | 1409 | { |
1693 | // Create new parser | 1410 | $cache->unlink(); |
1694 | $parser = new $this->parser_class(); | 1411 | $this->data = array(); |
1412 | } | ||
1413 | // If we've hit a collision just rerun it with caching disabled | ||
1414 | elseif (isset($this->data['url']) && $this->data['url'] !== $this->feed_url) | ||
1415 | { | ||
1416 | $cache = false; | ||
1417 | $this->data = array(); | ||
1418 | } | ||
1419 | // If we've got a non feed_url stored (if the page isn't actually a feed, or is a redirect) use that URL. | ||
1420 | elseif (isset($this->data['feed_url'])) | ||
1421 | { | ||
1422 | // If the autodiscovery cache is still valid use it. | ||
1423 | if ($cache->mtime() + $this->autodiscovery_cache_duration > time()) | ||
1424 | { | ||
1425 | // Do not need to do feed autodiscovery yet. | ||
1426 | if ($this->data['feed_url'] !== $this->data['url']) | ||
1427 | { | ||
1428 | $this->set_feed_url($this->data['feed_url']); | ||
1429 | return $this->init(); | ||
1430 | } | ||
1695 | 1431 | ||
1696 | // If it's parsed fine | 1432 | $cache->unlink(); |
1697 | if ($parser->parse($utf8_data, 'UTF-8')) | 1433 | $this->data = array(); |
1434 | } | ||
1435 | } | ||
1436 | // Check if the cache has been updated | ||
1437 | elseif ($cache->mtime() + $this->cache_duration < time()) | ||
1438 | { | ||
1439 | // If we have last-modified and/or etag set | ||
1440 | if (isset($this->data['headers']['last-modified']) || isset($this->data['headers']['etag'])) | ||
1698 | { | 1441 | { |
1699 | $this->data = $parser->get_data(); | 1442 | $headers = array( |
1700 | if ($this->get_type() & ~SIMPLEPIE_TYPE_NONE) | 1443 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', |
1444 | ); | ||
1445 | if (isset($this->data['headers']['last-modified'])) | ||
1701 | { | 1446 | { |
1702 | if (isset($headers)) | 1447 | $headers['if-modified-since'] = $this->data['headers']['last-modified']; |
1703 | { | 1448 | } |
1704 | $this->data['headers'] = $headers; | 1449 | if (isset($this->data['headers']['etag'])) |
1705 | } | 1450 | { |
1706 | $this->data['build'] = SIMPLEPIE_BUILD; | 1451 | $headers['if-none-match'] = $this->data['headers']['etag']; |
1452 | } | ||
1453 | |||
1454 | $file = $this->registry->create('File', array($this->feed_url, $this->timeout/10, 5, $headers, $this->useragent, $this->force_fsockopen)); | ||
1707 | 1455 | ||
1708 | // Cache the file if caching is enabled | 1456 | if ($file->success) |
1709 | if ($cache && !$cache->save($this)) | 1457 | { |
1458 | if ($file->status_code === 304) | ||
1710 | { | 1459 | { |
1711 | trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); | 1460 | $cache->touch(); |
1461 | return true; | ||
1712 | } | 1462 | } |
1713 | return true; | ||
1714 | } | 1463 | } |
1715 | else | 1464 | else |
1716 | { | 1465 | { |
1717 | $this->error = "A feed could not be found at $this->feed_url. This does not appear to be a valid RSS or Atom feed."; | 1466 | unset($file); |
1718 | SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__); | ||
1719 | return false; | ||
1720 | } | 1467 | } |
1721 | } | 1468 | } |
1722 | } | 1469 | } |
1470 | // If the cache is still valid, just return true | ||
1471 | else | ||
1472 | { | ||
1473 | $this->raw_data = false; | ||
1474 | return true; | ||
1475 | } | ||
1723 | } | 1476 | } |
1724 | 1477 | // If the cache is empty, delete it | |
1725 | if (isset($parser)) | ||
1726 | { | ||
1727 | // We have an error, just set SimplePie_Misc::error to it and quit | ||
1728 | $this->error = sprintf('This XML document is invalid, likely due to invalid characters. XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column()); | ||
1729 | } | ||
1730 | else | 1478 | else |
1731 | { | 1479 | { |
1732 | $this->error = 'The data could not be converted to UTF-8. You MUST have either the iconv or mbstring extension installed. Upgrading to PHP 5.x (which includes iconv) is highly recommended.'; | 1480 | $cache->unlink(); |
1481 | $this->data = array(); | ||
1733 | } | 1482 | } |
1734 | |||
1735 | SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__); | ||
1736 | |||
1737 | return false; | ||
1738 | } | 1483 | } |
1739 | elseif (!empty($this->multifeed_url)) | 1484 | // If we don't already have the file (it'll only exist if we've opened it to check if the cache has been modified), open it. |
1485 | if (!isset($file)) | ||
1740 | { | 1486 | { |
1741 | $i = 0; | 1487 | if ($this->file instanceof SimplePie_File && $this->file->url === $this->feed_url) |
1742 | $success = 0; | ||
1743 | $this->multifeed_objects = array(); | ||
1744 | foreach ($this->multifeed_url as $url) | ||
1745 | { | 1488 | { |
1746 | $this->multifeed_objects[$i] = clone $this; | 1489 | $file =& $this->file; |
1747 | $this->multifeed_objects[$i]->set_feed_url($url); | 1490 | } |
1748 | $success |= $this->multifeed_objects[$i]->init(); | 1491 | else |
1749 | $i++; | 1492 | { |
1493 | $headers = array( | ||
1494 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | ||
1495 | ); | ||
1496 | $file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen)); | ||
1750 | } | 1497 | } |
1751 | return (bool) $success; | ||
1752 | } | 1498 | } |
1753 | else | 1499 | // If the file connection has an error, set SimplePie::error to that and quit |
1500 | if (!$file->success && !($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) | ||
1754 | { | 1501 | { |
1755 | return false; | 1502 | $this->error = $file->error; |
1503 | return !empty($this->data); | ||
1504 | } | ||
1505 | |||
1506 | if (!$this->force_feed) | ||
1507 | { | ||
1508 | // Check if the supplied URL is a feed, if it isn't, look for it. | ||
1509 | $locate = $this->registry->create('Locator', array(&$file, $this->timeout, $this->useragent, $this->max_checked_feeds)); | ||
1510 | |||
1511 | if (!$locate->is_feed($file)) | ||
1512 | { | ||
1513 | // We need to unset this so that if SimplePie::set_file() has been called that object is untouched | ||
1514 | unset($file); | ||
1515 | try | ||
1516 | { | ||
1517 | if (!($file = $locate->find($this->autodiscovery, $this->all_discovered_feeds))) | ||
1518 | { | ||
1519 | $this->error = "A feed could not be found at $this->feed_url. A feed with an invalid mime type may fall victim to this error, or " . SIMPLEPIE_NAME . " was unable to auto-discover it.. Use force_feed() if you are certain this URL is a real feed."; | ||
1520 | $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); | ||
1521 | return false; | ||
1522 | } | ||
1523 | } | ||
1524 | catch (SimplePie_Exception $e) | ||
1525 | { | ||
1526 | // This is usually because DOMDocument doesn't exist | ||
1527 | $this->error = $e->getMessage(); | ||
1528 | $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, $e->getFile(), $e->getLine())); | ||
1529 | return false; | ||
1530 | } | ||
1531 | if ($cache) | ||
1532 | { | ||
1533 | $this->data = array('url' => $this->feed_url, 'feed_url' => $file->url, 'build' => SIMPLEPIE_BUILD); | ||
1534 | if (!$cache->save($this)) | ||
1535 | { | ||
1536 | trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); | ||
1537 | } | ||
1538 | $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, call_user_func($this->cache_name_function, $file->url), 'spc')); | ||
1539 | } | ||
1540 | $this->feed_url = $file->url; | ||
1541 | } | ||
1542 | $locate = null; | ||
1756 | } | 1543 | } |
1544 | |||
1545 | $this->raw_data = $file->body; | ||
1546 | |||
1547 | $headers = $file->headers; | ||
1548 | $sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file)); | ||
1549 | $sniffed = $sniffer->get_type(); | ||
1550 | |||
1551 | return array($headers, $sniffed); | ||
1757 | } | 1552 | } |
1758 | 1553 | ||
1759 | /** | 1554 | /** |
1760 | * Return the error message for the occured error | 1555 | * Get the error message for the occured error |
1761 | * | 1556 | * |
1762 | * @access public | 1557 | * @return string|array Error message, or array of messages for multifeeds |
1763 | * @return string Error message | ||
1764 | */ | 1558 | */ |
1765 | public function error() | 1559 | public function error() |
1766 | { | 1560 | { |
@@ -1768,9 +1562,9 @@ class SimplePie_Core | |||
1768 | } | 1562 | } |
1769 | 1563 | ||
1770 | /** | 1564 | /** |
1771 | * Return the raw XML | 1565 | * Get the raw XML |
1772 | * | 1566 | * |
1773 | * This is the same as setting `$xml_dump = true;`, but returns | 1567 | * This is the same as the old `$feed->enable_xml_dump(true)`, but returns |
1774 | * the data instead of printing it. | 1568 | * the data instead of printing it. |
1775 | * | 1569 | * |
1776 | * @return string|boolean Raw XML data, false if the cache is used | 1570 | * @return string|boolean Raw XML data, false if the cache is used |
@@ -1780,11 +1574,36 @@ class SimplePie_Core | |||
1780 | return $this->raw_data; | 1574 | return $this->raw_data; |
1781 | } | 1575 | } |
1782 | 1576 | ||
1577 | /** | ||
1578 | * Get the character encoding used for output | ||
1579 | * | ||
1580 | * @since Preview Release | ||
1581 | * @return string | ||
1582 | */ | ||
1783 | public function get_encoding() | 1583 | public function get_encoding() |
1784 | { | 1584 | { |
1785 | return $this->sanitize->output_encoding; | 1585 | return $this->sanitize->output_encoding; |
1786 | } | 1586 | } |
1787 | 1587 | ||
1588 | /** | ||
1589 | * Send the content-type header with correct encoding | ||
1590 | * | ||
1591 | * This method ensures that the SimplePie-enabled page is being served with | ||
1592 | * the correct {@link http://www.iana.org/assignments/media-types/ mime-type} | ||
1593 | * and character encoding HTTP headers (character encoding determined by the | ||
1594 | * {@see set_output_encoding} config option). | ||
1595 | * | ||
1596 | * This won't work properly if any content or whitespace has already been | ||
1597 | * sent to the browser, because it relies on PHP's | ||
1598 | * {@link http://php.net/header header()} function, and these are the | ||
1599 | * circumstances under which the function works. | ||
1600 | * | ||
1601 | * Because it's setting these settings for the entire page (as is the nature | ||
1602 | * of HTTP headers), this should only be used once per page (again, at the | ||
1603 | * top). | ||
1604 | * | ||
1605 | * @param string $mime MIME type to serve the page as | ||
1606 | */ | ||
1788 | public function handle_content_type($mime = 'text/html') | 1607 | public function handle_content_type($mime = 'text/html') |
1789 | { | 1608 | { |
1790 | if (!headers_sent()) | 1609 | if (!headers_sent()) |
@@ -1802,6 +1621,32 @@ class SimplePie_Core | |||
1802 | } | 1621 | } |
1803 | } | 1622 | } |
1804 | 1623 | ||
1624 | /** | ||
1625 | * Get the type of the feed | ||
1626 | * | ||
1627 | * This returns a SIMPLEPIE_TYPE_* constant, which can be tested against | ||
1628 | * using {@link http://php.net/language.operators.bitwise bitwise operators} | ||
1629 | * | ||
1630 | * @since 0.8 (usage changed to using constants in 1.0) | ||
1631 | * @see SIMPLEPIE_TYPE_NONE Unknown. | ||
1632 | * @see SIMPLEPIE_TYPE_RSS_090 RSS 0.90. | ||
1633 | * @see SIMPLEPIE_TYPE_RSS_091_NETSCAPE RSS 0.91 (Netscape). | ||
1634 | * @see SIMPLEPIE_TYPE_RSS_091_USERLAND RSS 0.91 (Userland). | ||
1635 | * @see SIMPLEPIE_TYPE_RSS_091 RSS 0.91. | ||
1636 | * @see SIMPLEPIE_TYPE_RSS_092 RSS 0.92. | ||
1637 | * @see SIMPLEPIE_TYPE_RSS_093 RSS 0.93. | ||
1638 | * @see SIMPLEPIE_TYPE_RSS_094 RSS 0.94. | ||
1639 | * @see SIMPLEPIE_TYPE_RSS_10 RSS 1.0. | ||
1640 | * @see SIMPLEPIE_TYPE_RSS_20 RSS 2.0.x. | ||
1641 | * @see SIMPLEPIE_TYPE_RSS_RDF RDF-based RSS. | ||
1642 | * @see SIMPLEPIE_TYPE_RSS_SYNDICATION Non-RDF-based RSS (truly intended as syndication format). | ||
1643 | * @see SIMPLEPIE_TYPE_RSS_ALL Any version of RSS. | ||
1644 | * @see SIMPLEPIE_TYPE_ATOM_03 Atom 0.3. | ||
1645 | * @see SIMPLEPIE_TYPE_ATOM_10 Atom 1.0. | ||
1646 | * @see SIMPLEPIE_TYPE_ATOM_ALL Any version of Atom. | ||
1647 | * @see SIMPLEPIE_TYPE_ALL Any known/supported feed type. | ||
1648 | * @return int SIMPLEPIE_TYPE_* constant | ||
1649 | */ | ||
1805 | public function get_type() | 1650 | public function get_type() |
1806 | { | 1651 | { |
1807 | if (!isset($this->data['type'])) | 1652 | if (!isset($this->data['type'])) |
@@ -1883,9 +1728,16 @@ class SimplePie_Core | |||
1883 | } | 1728 | } |
1884 | 1729 | ||
1885 | /** | 1730 | /** |
1731 | * Get the URL for the feed | ||
1732 | * | ||
1733 | * May or may not be different from the URL passed to {@see set_feed_url()}, | ||
1734 | * depending on whether auto-discovery was used. | ||
1735 | * | ||
1736 | * @since Preview Release (previously called `get_feed_url()` since SimplePie 0.8.) | ||
1886 | * @todo If we have a perm redirect we should return the new URL | 1737 | * @todo If we have a perm redirect we should return the new URL |
1887 | * @todo When we make the above change, let's support <itunes:new-feed-url> as well | 1738 | * @todo When we make the above change, let's support <itunes:new-feed-url> as well |
1888 | * @todo Also, |atom:link|@rel=self | 1739 | * @todo Also, |atom:link|@rel=self |
1740 | * @return string|null | ||
1889 | */ | 1741 | */ |
1890 | public function subscribe_url() | 1742 | public function subscribe_url() |
1891 | { | 1743 | { |
@@ -1899,6 +1751,37 @@ class SimplePie_Core | |||
1899 | } | 1751 | } |
1900 | } | 1752 | } |
1901 | 1753 | ||
1754 | /** | ||
1755 | * Get data for an feed-level element | ||
1756 | * | ||
1757 | * This method allows you to get access to ANY element/attribute that is a | ||
1758 | * sub-element of the opening feed tag. | ||
1759 | * | ||
1760 | * The return value is an indexed array of elements matching the given | ||
1761 | * namespace and tag name. Each element has `attribs`, `data` and `child` | ||
1762 | * subkeys. For `attribs` and `child`, these contain namespace subkeys. | ||
1763 | * `attribs` then has one level of associative name => value data (where | ||
1764 | * `value` is a string) after the namespace. `child` has tag-indexed keys | ||
1765 | * after the namespace, each member of which is an indexed array matching | ||
1766 | * this same format. | ||
1767 | * | ||
1768 | * For example: | ||
1769 | * <pre> | ||
1770 | * // This is probably a bad example because we already support | ||
1771 | * // <media:content> natively, but it shows you how to parse through | ||
1772 | * // the nodes. | ||
1773 | * $group = $item->get_item_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'group'); | ||
1774 | * $content = $group[0]['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['content']; | ||
1775 | * $file = $content[0]['attribs']['']['url']; | ||
1776 | * echo $file; | ||
1777 | * </pre> | ||
1778 | * | ||
1779 | * @since 1.0 | ||
1780 | * @see http://simplepie.org/wiki/faq/supported_xml_namespaces | ||
1781 | * @param string $namespace The URL of the XML namespace of the elements you're trying to access | ||
1782 | * @param string $tag Tag name | ||
1783 | * @return array | ||
1784 | */ | ||
1902 | public function get_feed_tags($namespace, $tag) | 1785 | public function get_feed_tags($namespace, $tag) |
1903 | { | 1786 | { |
1904 | $type = $this->get_type(); | 1787 | $type = $this->get_type(); |
@@ -1933,6 +1816,20 @@ class SimplePie_Core | |||
1933 | return null; | 1816 | return null; |
1934 | } | 1817 | } |
1935 | 1818 | ||
1819 | /** | ||
1820 | * Get data for an channel-level element | ||
1821 | * | ||
1822 | * This method allows you to get access to ANY element/attribute in the | ||
1823 | * channel/header section of the feed. | ||
1824 | * | ||
1825 | * See {@see SimplePie::get_feed_tags()} for a description of the return value | ||
1826 | * | ||
1827 | * @since 1.0 | ||
1828 | * @see http://simplepie.org/wiki/faq/supported_xml_namespaces | ||
1829 | * @param string $namespace The URL of the XML namespace of the elements you're trying to access | ||
1830 | * @param string $tag Tag name | ||
1831 | * @return array | ||
1832 | */ | ||
1936 | public function get_channel_tags($namespace, $tag) | 1833 | public function get_channel_tags($namespace, $tag) |
1937 | { | 1834 | { |
1938 | $type = $this->get_type(); | 1835 | $type = $this->get_type(); |
@@ -1976,6 +1873,20 @@ class SimplePie_Core | |||
1976 | return null; | 1873 | return null; |
1977 | } | 1874 | } |
1978 | 1875 | ||
1876 | /** | ||
1877 | * Get data for an channel-level element | ||
1878 | * | ||
1879 | * This method allows you to get access to ANY element/attribute in the | ||
1880 | * image/logo section of the feed. | ||
1881 | * | ||
1882 | * See {@see SimplePie::get_feed_tags()} for a description of the return value | ||
1883 | * | ||
1884 | * @since 1.0 | ||
1885 | * @see http://simplepie.org/wiki/faq/supported_xml_namespaces | ||
1886 | * @param string $namespace The URL of the XML namespace of the elements you're trying to access | ||
1887 | * @param string $tag Tag name | ||
1888 | * @return array | ||
1889 | */ | ||
1979 | public function get_image_tags($namespace, $tag) | 1890 | public function get_image_tags($namespace, $tag) |
1980 | { | 1891 | { |
1981 | $type = $this->get_type(); | 1892 | $type = $this->get_type(); |
@@ -2012,6 +1923,18 @@ class SimplePie_Core | |||
2012 | return null; | 1923 | return null; |
2013 | } | 1924 | } |
2014 | 1925 | ||
1926 | /** | ||
1927 | * Get the base URL value from the feed | ||
1928 | * | ||
1929 | * Uses `<xml:base>` if available, otherwise uses the first link in the | ||
1930 | * feed, or failing that, the URL of the feed itself. | ||
1931 | * | ||
1932 | * @see get_link | ||
1933 | * @see subscribe_url | ||
1934 | * | ||
1935 | * @param array $element | ||
1936 | * @return string | ||
1937 | */ | ||
2015 | public function get_base($element = array()) | 1938 | public function get_base($element = array()) |
2016 | { | 1939 | { |
2017 | if (!($this->get_type() & SIMPLEPIE_TYPE_RSS_SYNDICATION) && !empty($element['xml_base_explicit']) && isset($element['xml_base'])) | 1940 | if (!($this->get_type() & SIMPLEPIE_TYPE_RSS_SYNDICATION) && !empty($element['xml_base_explicit']) && isset($element['xml_base'])) |
@@ -2028,20 +1951,38 @@ class SimplePie_Core | |||
2028 | } | 1951 | } |
2029 | } | 1952 | } |
2030 | 1953 | ||
1954 | /** | ||
1955 | * Sanitize feed data | ||
1956 | * | ||
1957 | * @access private | ||
1958 | * @see SimplePie_Sanitize::sanitize() | ||
1959 | * @param string $data Data to sanitize | ||
1960 | * @param int $type One of the SIMPLEPIE_CONSTRUCT_* constants | ||
1961 | * @param string $base Base URL to resolve URLs against | ||
1962 | * @return string Sanitized data | ||
1963 | */ | ||
2031 | public function sanitize($data, $type, $base = '') | 1964 | public function sanitize($data, $type, $base = '') |
2032 | { | 1965 | { |
2033 | return $this->sanitize->sanitize($data, $type, $base); | 1966 | return $this->sanitize->sanitize($data, $type, $base); |
2034 | } | 1967 | } |
2035 | 1968 | ||
1969 | /** | ||
1970 | * Get the title of the feed | ||
1971 | * | ||
1972 | * Uses `<atom:title>`, `<title>` or `<dc:title>` | ||
1973 | * | ||
1974 | * @since 1.0 (previously called `get_feed_title` since 0.8) | ||
1975 | * @return string|null | ||
1976 | */ | ||
2036 | public function get_title() | 1977 | public function get_title() |
2037 | { | 1978 | { |
2038 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title')) | 1979 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title')) |
2039 | { | 1980 | { |
2040 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 1981 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
2041 | } | 1982 | } |
2042 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title')) | 1983 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title')) |
2043 | { | 1984 | { |
2044 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 1985 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
2045 | } | 1986 | } |
2046 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title')) | 1987 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title')) |
2047 | { | 1988 | { |
@@ -2069,6 +2010,13 @@ class SimplePie_Core | |||
2069 | } | 2010 | } |
2070 | } | 2011 | } |
2071 | 2012 | ||
2013 | /** | ||
2014 | * Get a category for the feed | ||
2015 | * | ||
2016 | * @since Unknown | ||
2017 | * @param int $key The category that you want to return. Remember that arrays begin with 0, not 1 | ||
2018 | * @return SimplePie_Category|null | ||
2019 | */ | ||
2072 | public function get_category($key = 0) | 2020 | public function get_category($key = 0) |
2073 | { | 2021 | { |
2074 | $categories = $this->get_categories(); | 2022 | $categories = $this->get_categories(); |
@@ -2082,6 +2030,14 @@ class SimplePie_Core | |||
2082 | } | 2030 | } |
2083 | } | 2031 | } |
2084 | 2032 | ||
2033 | /** | ||
2034 | * Get all categories for the feed | ||
2035 | * | ||
2036 | * Uses `<atom:category>`, `<category>` or `<dc:subject>` | ||
2037 | * | ||
2038 | * @since Unknown | ||
2039 | * @return array|null List of {@see SimplePie_Category} objects | ||
2040 | */ | ||
2085 | public function get_categories() | 2041 | public function get_categories() |
2086 | { | 2042 | { |
2087 | $categories = array(); | 2043 | $categories = array(); |
@@ -2103,7 +2059,7 @@ class SimplePie_Core | |||
2103 | { | 2059 | { |
2104 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); | 2060 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); |
2105 | } | 2061 | } |
2106 | $categories[] = new $this->category_class($term, $scheme, $label); | 2062 | $categories[] = $this->registry->create('Category', array($term, $scheme, $label)); |
2107 | } | 2063 | } |
2108 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category) | 2064 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category) |
2109 | { | 2065 | { |
@@ -2118,20 +2074,20 @@ class SimplePie_Core | |||
2118 | { | 2074 | { |
2119 | $scheme = null; | 2075 | $scheme = null; |
2120 | } | 2076 | } |
2121 | $categories[] = new $this->category_class($term, $scheme, null); | 2077 | $categories[] = $this->registry->create('Category', array($term, $scheme, null)); |
2122 | } | 2078 | } |
2123 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category) | 2079 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category) |
2124 | { | 2080 | { |
2125 | $categories[] = new $this->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 2081 | $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
2126 | } | 2082 | } |
2127 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category) | 2083 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category) |
2128 | { | 2084 | { |
2129 | $categories[] = new $this->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 2085 | $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
2130 | } | 2086 | } |
2131 | 2087 | ||
2132 | if (!empty($categories)) | 2088 | if (!empty($categories)) |
2133 | { | 2089 | { |
2134 | return SimplePie_Misc::array_unique($categories); | 2090 | return array_unique($categories); |
2135 | } | 2091 | } |
2136 | else | 2092 | else |
2137 | { | 2093 | { |
@@ -2139,6 +2095,13 @@ class SimplePie_Core | |||
2139 | } | 2095 | } |
2140 | } | 2096 | } |
2141 | 2097 | ||
2098 | /** | ||
2099 | * Get an author for the feed | ||
2100 | * | ||
2101 | * @since 1.1 | ||
2102 | * @param int $key The author that you want to return. Remember that arrays begin with 0, not 1 | ||
2103 | * @return SimplePie_Author|null | ||
2104 | */ | ||
2142 | public function get_author($key = 0) | 2105 | public function get_author($key = 0) |
2143 | { | 2106 | { |
2144 | $authors = $this->get_authors(); | 2107 | $authors = $this->get_authors(); |
@@ -2152,6 +2115,14 @@ class SimplePie_Core | |||
2152 | } | 2115 | } |
2153 | } | 2116 | } |
2154 | 2117 | ||
2118 | /** | ||
2119 | * Get all authors for the feed | ||
2120 | * | ||
2121 | * Uses `<atom:author>`, `<author>`, `<dc:creator>` or `<itunes:author>` | ||
2122 | * | ||
2123 | * @since 1.1 | ||
2124 | * @return array|null List of {@see SimplePie_Author} objects | ||
2125 | */ | ||
2155 | public function get_authors() | 2126 | public function get_authors() |
2156 | { | 2127 | { |
2157 | $authors = array(); | 2128 | $authors = array(); |
@@ -2174,7 +2145,7 @@ class SimplePie_Core | |||
2174 | } | 2145 | } |
2175 | if ($name !== null || $email !== null || $uri !== null) | 2146 | if ($name !== null || $email !== null || $uri !== null) |
2176 | { | 2147 | { |
2177 | $authors[] = new $this->author_class($name, $uri, $email); | 2148 | $authors[] = $this->registry->create('Author', array($name, $uri, $email)); |
2178 | } | 2149 | } |
2179 | } | 2150 | } |
2180 | if ($author = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author')) | 2151 | if ($author = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author')) |
@@ -2196,25 +2167,25 @@ class SimplePie_Core | |||
2196 | } | 2167 | } |
2197 | if ($name !== null || $email !== null || $url !== null) | 2168 | if ($name !== null || $email !== null || $url !== null) |
2198 | { | 2169 | { |
2199 | $authors[] = new $this->author_class($name, $url, $email); | 2170 | $authors[] = $this->registry->create('Author', array($name, $url, $email)); |
2200 | } | 2171 | } |
2201 | } | 2172 | } |
2202 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author) | 2173 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author) |
2203 | { | 2174 | { |
2204 | $authors[] = new $this->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 2175 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
2205 | } | 2176 | } |
2206 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author) | 2177 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author) |
2207 | { | 2178 | { |
2208 | $authors[] = new $this->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 2179 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
2209 | } | 2180 | } |
2210 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author) | 2181 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author) |
2211 | { | 2182 | { |
2212 | $authors[] = new $this->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 2183 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
2213 | } | 2184 | } |
2214 | 2185 | ||
2215 | if (!empty($authors)) | 2186 | if (!empty($authors)) |
2216 | { | 2187 | { |
2217 | return SimplePie_Misc::array_unique($authors); | 2188 | return array_unique($authors); |
2218 | } | 2189 | } |
2219 | else | 2190 | else |
2220 | { | 2191 | { |
@@ -2222,6 +2193,13 @@ class SimplePie_Core | |||
2222 | } | 2193 | } |
2223 | } | 2194 | } |
2224 | 2195 | ||
2196 | /** | ||
2197 | * Get a contributor for the feed | ||
2198 | * | ||
2199 | * @since 1.1 | ||
2200 | * @param int $key The contrbutor that you want to return. Remember that arrays begin with 0, not 1 | ||
2201 | * @return SimplePie_Author|null | ||
2202 | */ | ||
2225 | public function get_contributor($key = 0) | 2203 | public function get_contributor($key = 0) |
2226 | { | 2204 | { |
2227 | $contributors = $this->get_contributors(); | 2205 | $contributors = $this->get_contributors(); |
@@ -2235,6 +2213,14 @@ class SimplePie_Core | |||
2235 | } | 2213 | } |
2236 | } | 2214 | } |
2237 | 2215 | ||
2216 | /** | ||
2217 | * Get all contributors for the feed | ||
2218 | * | ||
2219 | * Uses `<atom:contributor>` | ||
2220 | * | ||
2221 | * @since 1.1 | ||
2222 | * @return array|null List of {@see SimplePie_Author} objects | ||
2223 | */ | ||
2238 | public function get_contributors() | 2224 | public function get_contributors() |
2239 | { | 2225 | { |
2240 | $contributors = array(); | 2226 | $contributors = array(); |
@@ -2257,7 +2243,7 @@ class SimplePie_Core | |||
2257 | } | 2243 | } |
2258 | if ($name !== null || $email !== null || $uri !== null) | 2244 | if ($name !== null || $email !== null || $uri !== null) |
2259 | { | 2245 | { |
2260 | $contributors[] = new $this->author_class($name, $uri, $email); | 2246 | $contributors[] = $this->registry->create('Author', array($name, $uri, $email)); |
2261 | } | 2247 | } |
2262 | } | 2248 | } |
2263 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor) | 2249 | foreach ((array) $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor) |
@@ -2279,13 +2265,13 @@ class SimplePie_Core | |||
2279 | } | 2265 | } |
2280 | if ($name !== null || $email !== null || $url !== null) | 2266 | if ($name !== null || $email !== null || $url !== null) |
2281 | { | 2267 | { |
2282 | $contributors[] = new $this->author_class($name, $url, $email); | 2268 | $contributors[] = $this->registry->create('Author', array($name, $url, $email)); |
2283 | } | 2269 | } |
2284 | } | 2270 | } |
2285 | 2271 | ||
2286 | if (!empty($contributors)) | 2272 | if (!empty($contributors)) |
2287 | { | 2273 | { |
2288 | return SimplePie_Misc::array_unique($contributors); | 2274 | return array_unique($contributors); |
2289 | } | 2275 | } |
2290 | else | 2276 | else |
2291 | { | 2277 | { |
@@ -2293,6 +2279,14 @@ class SimplePie_Core | |||
2293 | } | 2279 | } |
2294 | } | 2280 | } |
2295 | 2281 | ||
2282 | /** | ||
2283 | * Get a single link for the feed | ||
2284 | * | ||
2285 | * @since 1.0 (previously called `get_feed_link` since Preview Release, `get_feed_permalink()` since 0.8) | ||
2286 | * @param int $key The link that you want to return. Remember that arrays begin with 0, not 1 | ||
2287 | * @param string $rel The relationship of the link to return | ||
2288 | * @return string|null Link URL | ||
2289 | */ | ||
2296 | public function get_link($key = 0, $rel = 'alternate') | 2290 | public function get_link($key = 0, $rel = 'alternate') |
2297 | { | 2291 | { |
2298 | $links = $this->get_links($rel); | 2292 | $links = $this->get_links($rel); |
@@ -2307,13 +2301,30 @@ class SimplePie_Core | |||
2307 | } | 2301 | } |
2308 | 2302 | ||
2309 | /** | 2303 | /** |
2310 | * Added for parity between the parent-level and the item/entry-level. | 2304 | * Get the permalink for the item |
2305 | * | ||
2306 | * Returns the first link available with a relationship of "alternate". | ||
2307 | * Identical to {@see get_link()} with key 0 | ||
2308 | * | ||
2309 | * @see get_link | ||
2310 | * @since 1.0 (previously called `get_feed_link` since Preview Release, `get_feed_permalink()` since 0.8) | ||
2311 | * @internal Added for parity between the parent-level and the item/entry-level. | ||
2312 | * @return string|null Link URL | ||
2311 | */ | 2313 | */ |
2312 | public function get_permalink() | 2314 | public function get_permalink() |
2313 | { | 2315 | { |
2314 | return $this->get_link(0); | 2316 | return $this->get_link(0); |
2315 | } | 2317 | } |
2316 | 2318 | ||
2319 | /** | ||
2320 | * Get all links for the feed | ||
2321 | * | ||
2322 | * Uses `<atom:link>` or `<link>` | ||
2323 | * | ||
2324 | * @since Beta 2 | ||
2325 | * @param string $rel The relationship of links to return | ||
2326 | * @return array|null Links found for the feed (strings) | ||
2327 | */ | ||
2317 | public function get_links($rel = 'alternate') | 2328 | public function get_links($rel = 'alternate') |
2318 | { | 2329 | { |
2319 | if (!isset($this->data['links'])) | 2330 | if (!isset($this->data['links'])) |
@@ -2358,7 +2369,7 @@ class SimplePie_Core | |||
2358 | $keys = array_keys($this->data['links']); | 2369 | $keys = array_keys($this->data['links']); |
2359 | foreach ($keys as $key) | 2370 | foreach ($keys as $key) |
2360 | { | 2371 | { |
2361 | if (SimplePie_Misc::is_isegment_nz_nc($key)) | 2372 | if ($this->registry->call('Misc', 'is_isegment_nz_nc', array($key))) |
2362 | { | 2373 | { |
2363 | if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key])) | 2374 | if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key])) |
2364 | { | 2375 | { |
@@ -2393,15 +2404,24 @@ class SimplePie_Core | |||
2393 | return $this->all_discovered_feeds; | 2404 | return $this->all_discovered_feeds; |
2394 | } | 2405 | } |
2395 | 2406 | ||
2407 | /** | ||
2408 | * Get the content for the item | ||
2409 | * | ||
2410 | * Uses `<atom:subtitle>`, `<atom:tagline>`, `<description>`, | ||
2411 | * `<dc:description>`, `<itunes:summary>` or `<itunes:subtitle>` | ||
2412 | * | ||
2413 | * @since 1.0 (previously called `get_feed_description()` since 0.8) | ||
2414 | * @return string|null | ||
2415 | */ | ||
2396 | public function get_description() | 2416 | public function get_description() |
2397 | { | 2417 | { |
2398 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'subtitle')) | 2418 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'subtitle')) |
2399 | { | 2419 | { |
2400 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 2420 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
2401 | } | 2421 | } |
2402 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'tagline')) | 2422 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'tagline')) |
2403 | { | 2423 | { |
2404 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 2424 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
2405 | } | 2425 | } |
2406 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) | 2426 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) |
2407 | { | 2427 | { |
@@ -2437,15 +2457,23 @@ class SimplePie_Core | |||
2437 | } | 2457 | } |
2438 | } | 2458 | } |
2439 | 2459 | ||
2460 | /** | ||
2461 | * Get the copyright info for the feed | ||
2462 | * | ||
2463 | * Uses `<atom:rights>`, `<atom:copyright>` or `<dc:rights>` | ||
2464 | * | ||
2465 | * @since 1.0 (previously called `get_feed_copyright()` since 0.8) | ||
2466 | * @return string|null | ||
2467 | */ | ||
2440 | public function get_copyright() | 2468 | public function get_copyright() |
2441 | { | 2469 | { |
2442 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights')) | 2470 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights')) |
2443 | { | 2471 | { |
2444 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 2472 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
2445 | } | 2473 | } |
2446 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'copyright')) | 2474 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'copyright')) |
2447 | { | 2475 | { |
2448 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 2476 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
2449 | } | 2477 | } |
2450 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'copyright')) | 2478 | elseif ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'copyright')) |
2451 | { | 2479 | { |
@@ -2465,6 +2493,14 @@ class SimplePie_Core | |||
2465 | } | 2493 | } |
2466 | } | 2494 | } |
2467 | 2495 | ||
2496 | /** | ||
2497 | * Get the language for the feed | ||
2498 | * | ||
2499 | * Uses `<language>`, `<dc:language>`, or @xml_lang | ||
2500 | * | ||
2501 | * @since 1.0 (previously called `get_feed_language()` since 0.8) | ||
2502 | * @return string|null | ||
2503 | */ | ||
2468 | public function get_language() | 2504 | public function get_language() |
2469 | { | 2505 | { |
2470 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'language')) | 2506 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'language')) |
@@ -2501,6 +2537,18 @@ class SimplePie_Core | |||
2501 | } | 2537 | } |
2502 | } | 2538 | } |
2503 | 2539 | ||
2540 | /** | ||
2541 | * Get the latitude coordinates for the item | ||
2542 | * | ||
2543 | * Compatible with the W3C WGS84 Basic Geo and GeoRSS specifications | ||
2544 | * | ||
2545 | * Uses `<geo:lat>` or `<georss:point>` | ||
2546 | * | ||
2547 | * @since 1.0 | ||
2548 | * @link http://www.w3.org/2003/01/geo/ W3C WGS84 Basic Geo | ||
2549 | * @link http://www.georss.org/ GeoRSS | ||
2550 | * @return string|null | ||
2551 | */ | ||
2504 | public function get_latitude() | 2552 | public function get_latitude() |
2505 | { | 2553 | { |
2506 | 2554 | ||
@@ -2518,6 +2566,18 @@ class SimplePie_Core | |||
2518 | } | 2566 | } |
2519 | } | 2567 | } |
2520 | 2568 | ||
2569 | /** | ||
2570 | * Get the longitude coordinates for the feed | ||
2571 | * | ||
2572 | * Compatible with the W3C WGS84 Basic Geo and GeoRSS specifications | ||
2573 | * | ||
2574 | * Uses `<geo:long>`, `<geo:lon>` or `<georss:point>` | ||
2575 | * | ||
2576 | * @since 1.0 | ||
2577 | * @link http://www.w3.org/2003/01/geo/ W3C WGS84 Basic Geo | ||
2578 | * @link http://www.georss.org/ GeoRSS | ||
2579 | * @return string|null | ||
2580 | */ | ||
2521 | public function get_longitude() | 2581 | public function get_longitude() |
2522 | { | 2582 | { |
2523 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'long')) | 2583 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'long')) |
@@ -2538,6 +2598,15 @@ class SimplePie_Core | |||
2538 | } | 2598 | } |
2539 | } | 2599 | } |
2540 | 2600 | ||
2601 | /** | ||
2602 | * Get the feed logo's title | ||
2603 | * | ||
2604 | * RSS 0.9.0, 1.0 and 2.0 feeds are allowed to have a "feed logo" title. | ||
2605 | * | ||
2606 | * Uses `<image><title>` or `<image><dc:title>` | ||
2607 | * | ||
2608 | * @return string|null | ||
2609 | */ | ||
2541 | public function get_image_title() | 2610 | public function get_image_title() |
2542 | { | 2611 | { |
2543 | if ($return = $this->get_image_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title')) | 2612 | if ($return = $this->get_image_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title')) |
@@ -2566,6 +2635,17 @@ class SimplePie_Core | |||
2566 | } | 2635 | } |
2567 | } | 2636 | } |
2568 | 2637 | ||
2638 | /** | ||
2639 | * Get the feed logo's URL | ||
2640 | * | ||
2641 | * RSS 0.9.0, 2.0, Atom 1.0, and feeds with iTunes RSS tags are allowed to | ||
2642 | * have a "feed logo" URL. This points directly to the image itself. | ||
2643 | * | ||
2644 | * Uses `<itunes:image>`, `<atom:logo>`, `<atom:icon>`, | ||
2645 | * `<image><title>` or `<image><dc:title>` | ||
2646 | * | ||
2647 | * @return string|null | ||
2648 | */ | ||
2569 | public function get_image_url() | 2649 | public function get_image_url() |
2570 | { | 2650 | { |
2571 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'image')) | 2651 | if ($return = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'image')) |
@@ -2598,6 +2678,18 @@ class SimplePie_Core | |||
2598 | } | 2678 | } |
2599 | } | 2679 | } |
2600 | 2680 | ||
2681 | |||
2682 | /** | ||
2683 | * Get the feed logo's link | ||
2684 | * | ||
2685 | * RSS 0.9.0, 1.0 and 2.0 feeds are allowed to have a "feed logo" link. This | ||
2686 | * points to a human-readable page that the image should link to. | ||
2687 | * | ||
2688 | * Uses `<itunes:image>`, `<atom:logo>`, `<atom:icon>`, | ||
2689 | * `<image><title>` or `<image><dc:title>` | ||
2690 | * | ||
2691 | * @return string|null | ||
2692 | */ | ||
2601 | public function get_image_link() | 2693 | public function get_image_link() |
2602 | { | 2694 | { |
2603 | if ($return = $this->get_image_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'link')) | 2695 | if ($return = $this->get_image_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'link')) |
@@ -2618,6 +2710,16 @@ class SimplePie_Core | |||
2618 | } | 2710 | } |
2619 | } | 2711 | } |
2620 | 2712 | ||
2713 | /** | ||
2714 | * Get the feed logo's link | ||
2715 | * | ||
2716 | * RSS 2.0 feeds are allowed to have a "feed logo" width. | ||
2717 | * | ||
2718 | * Uses `<image><width>` or defaults to 88.0 if no width is specified and | ||
2719 | * the feed is an RSS 2.0 feed. | ||
2720 | * | ||
2721 | * @return int|float|null | ||
2722 | */ | ||
2621 | public function get_image_width() | 2723 | public function get_image_width() |
2622 | { | 2724 | { |
2623 | if ($return = $this->get_image_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'width')) | 2725 | if ($return = $this->get_image_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'width')) |
@@ -2634,6 +2736,16 @@ class SimplePie_Core | |||
2634 | } | 2736 | } |
2635 | } | 2737 | } |
2636 | 2738 | ||
2739 | /** | ||
2740 | * Get the feed logo's height | ||
2741 | * | ||
2742 | * RSS 2.0 feeds are allowed to have a "feed logo" height. | ||
2743 | * | ||
2744 | * Uses `<image><height>` or defaults to 31.0 if no height is specified and | ||
2745 | * the feed is an RSS 2.0 feed. | ||
2746 | * | ||
2747 | * @return int|float|null | ||
2748 | */ | ||
2637 | public function get_image_height() | 2749 | public function get_image_height() |
2638 | { | 2750 | { |
2639 | if ($return = $this->get_image_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'height')) | 2751 | if ($return = $this->get_image_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'height')) |
@@ -2650,6 +2762,15 @@ class SimplePie_Core | |||
2650 | } | 2762 | } |
2651 | } | 2763 | } |
2652 | 2764 | ||
2765 | /** | ||
2766 | * Get the number of items in the feed | ||
2767 | * | ||
2768 | * This is well-suited for {@link http://php.net/for for()} loops with | ||
2769 | * {@see get_item()} | ||
2770 | * | ||
2771 | * @param int $max Maximum value to return. 0 for no limit | ||
2772 | * @return int Number of items in the feed | ||
2773 | */ | ||
2653 | public function get_item_quantity($max = 0) | 2774 | public function get_item_quantity($max = 0) |
2654 | { | 2775 | { |
2655 | $max = (int) $max; | 2776 | $max = (int) $max; |
@@ -2664,6 +2785,18 @@ class SimplePie_Core | |||
2664 | } | 2785 | } |
2665 | } | 2786 | } |
2666 | 2787 | ||
2788 | /** | ||
2789 | * Get a single item from the feed | ||
2790 | * | ||
2791 | * This is better suited for {@link http://php.net/for for()} loops, whereas | ||
2792 | * {@see get_items()} is better suited for | ||
2793 | * {@link http://php.net/foreach foreach()} loops. | ||
2794 | * | ||
2795 | * @see get_item_quantity() | ||
2796 | * @since Beta 2 | ||
2797 | * @param int $key The item that you want to return. Remember that arrays begin with 0, not 1 | ||
2798 | * @return SimplePie_Item|null | ||
2799 | */ | ||
2667 | public function get_item($key = 0) | 2800 | public function get_item($key = 0) |
2668 | { | 2801 | { |
2669 | $items = $this->get_items(); | 2802 | $items = $this->get_items(); |
@@ -2677,6 +2810,19 @@ class SimplePie_Core | |||
2677 | } | 2810 | } |
2678 | } | 2811 | } |
2679 | 2812 | ||
2813 | /** | ||
2814 | * Get all items from the feed | ||
2815 | * | ||
2816 | * This is better suited for {@link http://php.net/for for()} loops, whereas | ||
2817 | * {@see get_items()} is better suited for | ||
2818 | * {@link http://php.net/foreach foreach()} loops. | ||
2819 | * | ||
2820 | * @see get_item_quantity | ||
2821 | * @since Beta 2 | ||
2822 | * @param int $start Index to start at | ||
2823 | * @param int $end Number of items to return. 0 for all items after `$start` | ||
2824 | * @return array|null List of {@see SimplePie_Item} objects | ||
2825 | */ | ||
2680 | public function get_items($start = 0, $end = 0) | 2826 | public function get_items($start = 0, $end = 0) |
2681 | { | 2827 | { |
2682 | if (!isset($this->data['items'])) | 2828 | if (!isset($this->data['items'])) |
@@ -2693,7 +2839,7 @@ class SimplePie_Core | |||
2693 | $keys = array_keys($items); | 2839 | $keys = array_keys($items); |
2694 | foreach ($keys as $key) | 2840 | foreach ($keys as $key) |
2695 | { | 2841 | { |
2696 | $this->data['items'][] = new $this->item_class($this, $items[$key]); | 2842 | $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); |
2697 | } | 2843 | } |
2698 | } | 2844 | } |
2699 | if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'entry')) | 2845 | if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'entry')) |
@@ -2701,7 +2847,7 @@ class SimplePie_Core | |||
2701 | $keys = array_keys($items); | 2847 | $keys = array_keys($items); |
2702 | foreach ($keys as $key) | 2848 | foreach ($keys as $key) |
2703 | { | 2849 | { |
2704 | $this->data['items'][] = new $this->item_class($this, $items[$key]); | 2850 | $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); |
2705 | } | 2851 | } |
2706 | } | 2852 | } |
2707 | if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'item')) | 2853 | if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'item')) |
@@ -2709,7 +2855,7 @@ class SimplePie_Core | |||
2709 | $keys = array_keys($items); | 2855 | $keys = array_keys($items); |
2710 | foreach ($keys as $key) | 2856 | foreach ($keys as $key) |
2711 | { | 2857 | { |
2712 | $this->data['items'][] = new $this->item_class($this, $items[$key]); | 2858 | $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); |
2713 | } | 2859 | } |
2714 | } | 2860 | } |
2715 | if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'item')) | 2861 | if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'item')) |
@@ -2717,7 +2863,7 @@ class SimplePie_Core | |||
2717 | $keys = array_keys($items); | 2863 | $keys = array_keys($items); |
2718 | foreach ($keys as $key) | 2864 | foreach ($keys as $key) |
2719 | { | 2865 | { |
2720 | $this->data['items'][] = new $this->item_class($this, $items[$key]); | 2866 | $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); |
2721 | } | 2867 | } |
2722 | } | 2868 | } |
2723 | if ($items = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'item')) | 2869 | if ($items = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'item')) |
@@ -2725,7 +2871,7 @@ class SimplePie_Core | |||
2725 | $keys = array_keys($items); | 2871 | $keys = array_keys($items); |
2726 | foreach ($keys as $key) | 2872 | foreach ($keys as $key) |
2727 | { | 2873 | { |
2728 | $this->data['items'][] = new $this->item_class($this, $items[$key]); | 2874 | $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); |
2729 | } | 2875 | } |
2730 | } | 2876 | } |
2731 | } | 2877 | } |
@@ -2751,7 +2897,7 @@ class SimplePie_Core | |||
2751 | $this->data['ordered_items'] = $this->data['items']; | 2897 | $this->data['ordered_items'] = $this->data['items']; |
2752 | if ($do_sort) | 2898 | if ($do_sort) |
2753 | { | 2899 | { |
2754 | usort($this->data['ordered_items'], array(&$this, 'sort_items')); | 2900 | usort($this->data['ordered_items'], array(get_class($this), 'sort_items')); |
2755 | } | 2901 | } |
2756 | } | 2902 | } |
2757 | $items = $this->data['ordered_items']; | 2903 | $items = $this->data['ordered_items']; |
@@ -2778,24 +2924,98 @@ class SimplePie_Core | |||
2778 | } | 2924 | } |
2779 | 2925 | ||
2780 | /** | 2926 | /** |
2781 | * @static | 2927 | * Set the favicon handler |
2928 | * | ||
2929 | * @deprecated Use your own favicon handling instead | ||
2930 | */ | ||
2931 | public function set_favicon_handler($page = false, $qs = 'i') | ||
2932 | { | ||
2933 | $level = defined('E_USER_DEPRECATED') ? E_USER_DEPRECATED : E_USER_WARNING; | ||
2934 | trigger_error('Favicon handling has been removed, please use your own handling', $level); | ||
2935 | return false; | ||
2936 | } | ||
2937 | |||
2938 | /** | ||
2939 | * Get the favicon for the current feed | ||
2940 | * | ||
2941 | * @deprecated Use your own favicon handling instead | ||
2942 | */ | ||
2943 | public function get_favicon() | ||
2944 | { | ||
2945 | $level = defined('E_USER_DEPRECATED') ? E_USER_DEPRECATED : E_USER_WARNING; | ||
2946 | trigger_error('Favicon handling has been removed, please use your own handling', $level); | ||
2947 | |||
2948 | if (($url = $this->get_link()) !== null) | ||
2949 | { | ||
2950 | return 'http://g.etfv.co/' . urlencode($url); | ||
2951 | } | ||
2952 | |||
2953 | return false; | ||
2954 | } | ||
2955 | |||
2956 | /** | ||
2957 | * Magic method handler | ||
2958 | * | ||
2959 | * @param string $method Method name | ||
2960 | * @param array $args Arguments to the method | ||
2961 | * @return mixed | ||
2962 | */ | ||
2963 | public function __call($method, $args) | ||
2964 | { | ||
2965 | if (strpos($method, 'subscribe_') === 0) | ||
2966 | { | ||
2967 | $level = defined('E_USER_DEPRECATED') ? E_USER_DEPRECATED : E_USER_WARNING; | ||
2968 | trigger_error('subscribe_*() has been deprecated, implement the callback yourself', $level); | ||
2969 | return ''; | ||
2970 | } | ||
2971 | if ($method === 'enable_xml_dump') | ||
2972 | { | ||
2973 | $level = defined('E_USER_DEPRECATED') ? E_USER_DEPRECATED : E_USER_WARNING; | ||
2974 | trigger_error('enable_xml_dump() has been deprecated, use get_raw_data() instead', $level); | ||
2975 | return false; | ||
2976 | } | ||
2977 | |||
2978 | $class = get_class($this); | ||
2979 | $trace = debug_backtrace(); | ||
2980 | $file = $trace[0]['file']; | ||
2981 | $line = $trace[0]['line']; | ||
2982 | trigger_error("Call to undefined method $class::$method() in $file on line $line", E_USER_ERROR); | ||
2983 | } | ||
2984 | |||
2985 | /** | ||
2986 | * Sorting callback for items | ||
2987 | * | ||
2988 | * @access private | ||
2989 | * @param SimplePie $a | ||
2990 | * @param SimplePie $b | ||
2991 | * @return boolean | ||
2782 | */ | 2992 | */ |
2783 | public function sort_items($a, $b) | 2993 | public static function sort_items($a, $b) |
2784 | { | 2994 | { |
2785 | return $a->get_date('U') <= $b->get_date('U'); | 2995 | return $a->get_date('U') <= $b->get_date('U'); |
2786 | } | 2996 | } |
2787 | 2997 | ||
2788 | /** | 2998 | /** |
2789 | * @static | 2999 | * Merge items from several feeds into one |
3000 | * | ||
3001 | * If you're merging multiple feeds together, they need to all have dates | ||
3002 | * for the items or else SimplePie will refuse to sort them. | ||
3003 | * | ||
3004 | * @link http://simplepie.org/wiki/tutorial/sort_multiple_feeds_by_time_and_date#if_feeds_require_separate_per-feed_settings | ||
3005 | * @param array $urls List of SimplePie feed objects to merge | ||
3006 | * @param int $start Starting item | ||
3007 | * @param int $end Number of items to return | ||
3008 | * @param int $limit Maximum number of items per feed | ||
3009 | * @return array | ||
2790 | */ | 3010 | */ |
2791 | public function merge_items($urls, $start = 0, $end = 0, $limit = 0) | 3011 | public static function merge_items($urls, $start = 0, $end = 0, $limit = 0) |
2792 | { | 3012 | { |
2793 | if (is_array($urls) && sizeof($urls) > 0) | 3013 | if (is_array($urls) && sizeof($urls) > 0) |
2794 | { | 3014 | { |
2795 | $items = array(); | 3015 | $items = array(); |
2796 | foreach ($urls as $arg) | 3016 | foreach ($urls as $arg) |
2797 | { | 3017 | { |
2798 | if (is_a($arg, 'SimplePie')) | 3018 | if ($arg instanceof SimplePie) |
2799 | { | 3019 | { |
2800 | $items = array_merge($items, $arg->get_items(0, $limit)); | 3020 | $items = array_merge($items, $arg->get_items(0, $limit)); |
2801 | } | 3021 | } |
@@ -2817,7 +3037,7 @@ class SimplePie_Core | |||
2817 | $item = null; | 3037 | $item = null; |
2818 | if ($do_sort) | 3038 | if ($do_sort) |
2819 | { | 3039 | { |
2820 | usort($items, array('SimplePie', 'sort_items')); | 3040 | usort($items, array(get_class($urls[0]), 'sort_items')); |
2821 | } | 3041 | } |
2822 | 3042 | ||
2823 | if ($end === 0) | 3043 | if ($end === 0) |
diff --git a/inc/3rdparty/simplepie/SimplePie/Author.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Author.php index 77e5dc58..bbf3812f 100644 --- a/inc/3rdparty/simplepie/SimplePie/Author.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Author.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,24 +33,58 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Manages all author-related data | ||
47 | * | ||
48 | * Used by {@see SimplePie_Item::get_author()} and {@see SimplePie::get_authors()} | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_author_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage API | ||
54 | */ | ||
47 | class SimplePie_Author | 55 | class SimplePie_Author |
48 | { | 56 | { |
57 | /** | ||
58 | * Author's name | ||
59 | * | ||
60 | * @var string | ||
61 | * @see get_name() | ||
62 | */ | ||
49 | var $name; | 63 | var $name; |
64 | |||
65 | /** | ||
66 | * Author's link | ||
67 | * | ||
68 | * @var string | ||
69 | * @see get_link() | ||
70 | */ | ||
50 | var $link; | 71 | var $link; |
72 | |||
73 | /** | ||
74 | * Author's email address | ||
75 | * | ||
76 | * @var string | ||
77 | * @see get_email() | ||
78 | */ | ||
51 | var $email; | 79 | var $email; |
52 | 80 | ||
53 | // Constructor, used to input the data | 81 | /** |
82 | * Constructor, used to input the data | ||
83 | * | ||
84 | * @param string $name | ||
85 | * @param string $link | ||
86 | * @param string $email | ||
87 | */ | ||
54 | public function __construct($name = null, $link = null, $email = null) | 88 | public function __construct($name = null, $link = null, $email = null) |
55 | { | 89 | { |
56 | $this->name = $name; | 90 | $this->name = $name; |
@@ -58,12 +92,22 @@ class SimplePie_Author | |||
58 | $this->email = $email; | 92 | $this->email = $email; |
59 | } | 93 | } |
60 | 94 | ||
95 | /** | ||
96 | * String-ified version | ||
97 | * | ||
98 | * @return string | ||
99 | */ | ||
61 | public function __toString() | 100 | public function __toString() |
62 | { | 101 | { |
63 | // There is no $this->data here | 102 | // There is no $this->data here |
64 | return md5(serialize($this)); | 103 | return md5(serialize($this)); |
65 | } | 104 | } |
66 | 105 | ||
106 | /** | ||
107 | * Author's name | ||
108 | * | ||
109 | * @return string|null | ||
110 | */ | ||
67 | public function get_name() | 111 | public function get_name() |
68 | { | 112 | { |
69 | if ($this->name !== null) | 113 | if ($this->name !== null) |
@@ -76,6 +120,11 @@ class SimplePie_Author | |||
76 | } | 120 | } |
77 | } | 121 | } |
78 | 122 | ||
123 | /** | ||
124 | * Author's link | ||
125 | * | ||
126 | * @return string|null | ||
127 | */ | ||
79 | public function get_link() | 128 | public function get_link() |
80 | { | 129 | { |
81 | if ($this->link !== null) | 130 | if ($this->link !== null) |
@@ -88,6 +137,11 @@ class SimplePie_Author | |||
88 | } | 137 | } |
89 | } | 138 | } |
90 | 139 | ||
140 | /** | ||
141 | * Author's email address | ||
142 | * | ||
143 | * @return string|null | ||
144 | */ | ||
91 | public function get_email() | 145 | public function get_email() |
92 | { | 146 | { |
93 | if ($this->email !== null) | 147 | if ($this->email !== null) |
diff --git a/inc/3rdparty/simplepie/SimplePie/Cache.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache.php index 819ddeea..75586d74 100644 --- a/inc/3rdparty/simplepie/SimplePie/Cache.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,17 +33,25 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Used to create cache objects | ||
47 | * | ||
48 | * This class can be overloaded with {@see SimplePie::set_cache_class()}, | ||
49 | * although the preferred way is to create your own handler | ||
50 | * via {@see register()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage Caching | ||
54 | */ | ||
47 | class SimplePie_Cache | 55 | class SimplePie_Cache |
48 | { | 56 | { |
49 | /** | 57 | /** |
@@ -65,8 +73,13 @@ class SimplePie_Cache | |||
65 | 73 | ||
66 | /** | 74 | /** |
67 | * Create a new SimplePie_Cache object | 75 | * Create a new SimplePie_Cache object |
76 | * | ||
77 | * @param string $location URL location (scheme is used to determine handler) | ||
78 | * @param string $filename Unique identifier for cache object | ||
79 | * @param string $extension 'spi' or 'spc' | ||
80 | * @return SimplePie_Cache_Base Type of object depends on scheme of `$location` | ||
68 | */ | 81 | */ |
69 | public static function create($location, $filename, $extension) | 82 | public static function get_handler($location, $filename, $extension) |
70 | { | 83 | { |
71 | $type = explode(':', $location, 2); | 84 | $type = explode(':', $location, 2); |
72 | $type = $type[0]; | 85 | $type = $type[0]; |
@@ -80,6 +93,17 @@ class SimplePie_Cache | |||
80 | } | 93 | } |
81 | 94 | ||
82 | /** | 95 | /** |
96 | * Create a new SimplePie_Cache object | ||
97 | * | ||
98 | * @deprecated Use {@see get_handler} instead | ||
99 | */ | ||
100 | public function create($location, $filename, $extension) | ||
101 | { | ||
102 | trigger_error('Cache::create() has been replaced with Cache::get_handler(). Switch to the registry system to use this.', E_USER_DEPRECATED); | ||
103 | return self::get_handler($location, $filename, $extension); | ||
104 | } | ||
105 | |||
106 | /** | ||
83 | * Register a handler | 107 | * Register a handler |
84 | * | 108 | * |
85 | * @param string $type DSN type to register for | 109 | * @param string $type DSN type to register for |
diff --git a/inc/3rdparty/simplepie/SimplePie/Cache/Base.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/Base.php index e3cfa8af..937e3463 100644 --- a/inc/3rdparty/simplepie/SimplePie/Cache/Base.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/Base.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,26 +33,37 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Base for cache objects | ||
47 | * | ||
48 | * Classes to be used with {@see SimplePie_Cache::register()} are expected | ||
49 | * to implement this interface. | ||
50 | * | ||
51 | * @package SimplePie | ||
52 | * @subpackage Caching | ||
53 | */ | ||
47 | interface SimplePie_Cache_Base | 54 | interface SimplePie_Cache_Base |
48 | { | 55 | { |
49 | /** | 56 | /** |
50 | * Feed cache type | 57 | * Feed cache type |
58 | * | ||
59 | * @var string | ||
51 | */ | 60 | */ |
52 | const TYPE_FEED = 'spc'; | 61 | const TYPE_FEED = 'spc'; |
53 | 62 | ||
54 | /** | 63 | /** |
55 | * Image cache type | 64 | * Image cache type |
65 | * | ||
66 | * @var string | ||
56 | */ | 67 | */ |
57 | const TYPE_IMAGE = 'spi'; | 68 | const TYPE_IMAGE = 'spi'; |
58 | 69 | ||
@@ -69,6 +80,7 @@ interface SimplePie_Cache_Base | |||
69 | * Save data to the cache | 80 | * Save data to the cache |
70 | * | 81 | * |
71 | * @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property | 82 | * @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property |
83 | * @return bool Successfulness | ||
72 | */ | 84 | */ |
73 | public function save($data); | 85 | public function save($data); |
74 | 86 | ||
diff --git a/inc/3rdparty/simplepie/SimplePie/Cache/DB.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/DB.php index 9a1f2ffc..ac509ae0 100644 --- a/inc/3rdparty/simplepie/SimplePie/Cache/DB.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/DB.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,19 +33,32 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
45 | /** | ||
46 | * Base class for database-based caches | ||
47 | * | ||
48 | * @package SimplePie | ||
49 | * @subpackage Caching | ||
50 | */ | ||
46 | abstract class SimplePie_Cache_DB implements SimplePie_Cache_Base | 51 | abstract class SimplePie_Cache_DB implements SimplePie_Cache_Base |
47 | { | 52 | { |
48 | protected static function prepare_simplepie_object_for_cache(&$data) | 53 | /** |
54 | * Helper for database conversion | ||
55 | * | ||
56 | * Converts a given {@see SimplePie} object into data to be stored | ||
57 | * | ||
58 | * @param SimplePie $data | ||
59 | * @return array First item is the serialized data for storage, second item is the unique ID for this item | ||
60 | */ | ||
61 | protected static function prepare_simplepie_object_for_cache($data) | ||
49 | { | 62 | { |
50 | $items = $data->get_items(); | 63 | $items = $data->get_items(); |
51 | $items_by_id = array(); | 64 | $items_by_id = array(); |
diff --git a/inc/3rdparty/simplepie/SimplePie/Cache/File.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/File.php index f496ff50..5797b3ae 100644 --- a/inc/3rdparty/simplepie/SimplePie/Cache/File.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/File.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,37 +33,78 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Caches data to the filesystem | ||
47 | * | ||
48 | * @package SimplePie | ||
49 | * @subpackage Caching | ||
50 | */ | ||
47 | class SimplePie_Cache_File implements SimplePie_Cache_Base | 51 | class SimplePie_Cache_File implements SimplePie_Cache_Base |
48 | { | 52 | { |
53 | /** | ||
54 | * Location string | ||
55 | * | ||
56 | * @see SimplePie::$cache_location | ||
57 | * @var string | ||
58 | */ | ||
49 | protected $location; | 59 | protected $location; |
60 | |||
61 | /** | ||
62 | * Filename | ||
63 | * | ||
64 | * @var string | ||
65 | */ | ||
50 | protected $filename; | 66 | protected $filename; |
67 | |||
68 | /** | ||
69 | * File extension | ||
70 | * | ||
71 | * @var string | ||
72 | */ | ||
51 | protected $extension; | 73 | protected $extension; |
74 | |||
75 | /** | ||
76 | * File path | ||
77 | * | ||
78 | * @var string | ||
79 | */ | ||
52 | protected $name; | 80 | protected $name; |
53 | 81 | ||
54 | public function __construct($location, $filename, $extension) | 82 | /** |
83 | * Create a new cache object | ||
84 | * | ||
85 | * @param string $location Location string (from SimplePie::$cache_location) | ||
86 | * @param string $name Unique ID for the cache | ||
87 | * @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data | ||
88 | */ | ||
89 | public function __construct($location, $name, $type) | ||
55 | { | 90 | { |
56 | $this->location = $location; | 91 | $this->location = $location; |
57 | $this->filename = $filename; | 92 | $this->filename = $name; |
58 | $this->extension = $extension; | 93 | $this->extension = $type; |
59 | $this->name = "$this->location/$this->filename.$this->extension"; | 94 | $this->name = "$this->location/$this->filename.$this->extension"; |
60 | } | 95 | } |
61 | 96 | ||
97 | /** | ||
98 | * Save data to the cache | ||
99 | * | ||
100 | * @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property | ||
101 | * @return bool Successfulness | ||
102 | */ | ||
62 | public function save($data) | 103 | public function save($data) |
63 | { | 104 | { |
64 | if (file_exists($this->name) && is_writeable($this->name) || file_exists($this->location) && is_writeable($this->location)) | 105 | if (file_exists($this->name) && is_writeable($this->name) || file_exists($this->location) && is_writeable($this->location)) |
65 | { | 106 | { |
66 | if (is_a($data, 'SimplePie')) | 107 | if ($data instanceof SimplePie) |
67 | { | 108 | { |
68 | $data = $data->data; | 109 | $data = $data->data; |
69 | } | 110 | } |
@@ -74,6 +115,11 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base | |||
74 | return false; | 115 | return false; |
75 | } | 116 | } |
76 | 117 | ||
118 | /** | ||
119 | * Retrieve the data saved to the cache | ||
120 | * | ||
121 | * @return array Data for SimplePie::$data | ||
122 | */ | ||
77 | public function load() | 123 | public function load() |
78 | { | 124 | { |
79 | if (file_exists($this->name) && is_readable($this->name)) | 125 | if (file_exists($this->name) && is_readable($this->name)) |
@@ -83,6 +129,11 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base | |||
83 | return false; | 129 | return false; |
84 | } | 130 | } |
85 | 131 | ||
132 | /** | ||
133 | * Retrieve the last modified time for the cache | ||
134 | * | ||
135 | * @return int Timestamp | ||
136 | */ | ||
86 | public function mtime() | 137 | public function mtime() |
87 | { | 138 | { |
88 | if (file_exists($this->name)) | 139 | if (file_exists($this->name)) |
@@ -92,6 +143,11 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base | |||
92 | return false; | 143 | return false; |
93 | } | 144 | } |
94 | 145 | ||
146 | /** | ||
147 | * Set the last modified time to the current time | ||
148 | * | ||
149 | * @return bool Success status | ||
150 | */ | ||
95 | public function touch() | 151 | public function touch() |
96 | { | 152 | { |
97 | if (file_exists($this->name)) | 153 | if (file_exists($this->name)) |
@@ -101,6 +157,11 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base | |||
101 | return false; | 157 | return false; |
102 | } | 158 | } |
103 | 159 | ||
160 | /** | ||
161 | * Remove the cache | ||
162 | * | ||
163 | * @return bool Success status | ||
164 | */ | ||
104 | public function unlink() | 165 | public function unlink() |
105 | { | 166 | { |
106 | if (file_exists($this->name)) | 167 | if (file_exists($this->name)) |
diff --git a/inc/3rdparty/simplepie/SimplePie/Cache/Memcache.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/Memcache.php index 3535fecc..fd447806 100644 --- a/inc/3rdparty/simplepie/SimplePie/Cache/Memcache.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/Memcache.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,23 +33,59 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
45 | /** | ||
46 | * Caches data to memcache | ||
47 | * | ||
48 | * Registered for URLs with the "memcache" protocol | ||
49 | * | ||
50 | * For example, `memcache://localhost:11211/?timeout=3600&prefix=sp_` will | ||
51 | * connect to memcache on `localhost` on port 11211. All tables will be | ||
52 | * prefixed with `sp_` and data will expire after 3600 seconds | ||
53 | * | ||
54 | * @package SimplePie | ||
55 | * @subpackage Caching | ||
56 | * @uses Memcache | ||
57 | */ | ||
46 | class SimplePie_Cache_Memcache implements SimplePie_Cache_Base | 58 | class SimplePie_Cache_Memcache implements SimplePie_Cache_Base |
47 | { | 59 | { |
60 | /** | ||
61 | * Memcache instance | ||
62 | * | ||
63 | * @var Memcache | ||
64 | */ | ||
48 | protected $cache; | 65 | protected $cache; |
66 | |||
67 | /** | ||
68 | * Options | ||
69 | * | ||
70 | * @var array | ||
71 | */ | ||
49 | protected $options; | 72 | protected $options; |
73 | |||
74 | /** | ||
75 | * Cache name | ||
76 | * | ||
77 | * @var string | ||
78 | */ | ||
50 | protected $name; | 79 | protected $name; |
51 | 80 | ||
52 | public function __construct($url, $filename, $extension) | 81 | /** |
82 | * Create a new cache object | ||
83 | * | ||
84 | * @param string $location Location string (from SimplePie::$cache_location) | ||
85 | * @param string $name Unique ID for the cache | ||
86 | * @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data | ||
87 | */ | ||
88 | public function __construct($location, $name, $type) | ||
53 | { | 89 | { |
54 | $this->options = array( | 90 | $this->options = array( |
55 | 'host' => '127.0.0.1', | 91 | 'host' => '127.0.0.1', |
@@ -59,22 +95,36 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base | |||
59 | 'prefix' => 'simplepie_', | 95 | 'prefix' => 'simplepie_', |
60 | ), | 96 | ), |
61 | ); | 97 | ); |
62 | $this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($url)); | 98 | $parsed = SimplePie_Cache::parse_URL($location); |
63 | $this->name = $this->options['extras']['prefix'] . md5("$filename:$extension"); | 99 | $this->options['host'] = empty($parsed['host']) ? $this->options['host'] : $parsed['host']; |
100 | $this->options['port'] = empty($parsed['port']) ? $this->options['port'] : $parsed['port']; | ||
101 | $this->options['extras'] = array_merge($this->options['extras'], $parsed['extras']); | ||
102 | $this->name = $this->options['extras']['prefix'] . md5("$name:$type"); | ||
64 | 103 | ||
65 | $this->cache = new Memcache(); | 104 | $this->cache = new Memcache(); |
66 | $this->cache->addServer($this->options['host'], (int) $this->options['port']); | 105 | $this->cache->addServer($this->options['host'], (int) $this->options['port']); |
67 | } | 106 | } |
68 | 107 | ||
108 | /** | ||
109 | * Save data to the cache | ||
110 | * | ||
111 | * @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property | ||
112 | * @return bool Successfulness | ||
113 | */ | ||
69 | public function save($data) | 114 | public function save($data) |
70 | { | 115 | { |
71 | if (is_a($data, 'SimplePie')) | 116 | if ($data instanceof SimplePie) |
72 | { | 117 | { |
73 | $data = $data->data; | 118 | $data = $data->data; |
74 | } | 119 | } |
75 | return $this->cache->set($this->name, serialize($data), MEMCACHE_COMPRESSED, (int) $this->options['extras']['timeout']); | 120 | return $this->cache->set($this->name, serialize($data), MEMCACHE_COMPRESSED, (int) $this->options['extras']['timeout']); |
76 | } | 121 | } |
77 | 122 | ||
123 | /** | ||
124 | * Retrieve the data saved to the cache | ||
125 | * | ||
126 | * @return array Data for SimplePie::$data | ||
127 | */ | ||
78 | public function load() | 128 | public function load() |
79 | { | 129 | { |
80 | $data = $this->cache->get($this->name); | 130 | $data = $this->cache->get($this->name); |
@@ -86,6 +136,11 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base | |||
86 | return false; | 136 | return false; |
87 | } | 137 | } |
88 | 138 | ||
139 | /** | ||
140 | * Retrieve the last modified time for the cache | ||
141 | * | ||
142 | * @return int Timestamp | ||
143 | */ | ||
89 | public function mtime() | 144 | public function mtime() |
90 | { | 145 | { |
91 | $data = $this->cache->get($this->name); | 146 | $data = $this->cache->get($this->name); |
@@ -99,6 +154,11 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base | |||
99 | return false; | 154 | return false; |
100 | } | 155 | } |
101 | 156 | ||
157 | /** | ||
158 | * Set the last modified time to the current time | ||
159 | * | ||
160 | * @return bool Success status | ||
161 | */ | ||
102 | public function touch() | 162 | public function touch() |
103 | { | 163 | { |
104 | $data = $this->cache->get($this->name); | 164 | $data = $this->cache->get($this->name); |
@@ -111,8 +171,13 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base | |||
111 | return false; | 171 | return false; |
112 | } | 172 | } |
113 | 173 | ||
174 | /** | ||
175 | * Remove the cache | ||
176 | * | ||
177 | * @return bool Success status | ||
178 | */ | ||
114 | public function unlink() | 179 | public function unlink() |
115 | { | 180 | { |
116 | return $this->cache->delete($this->name); | 181 | return $this->cache->delete($this->name, 0); |
117 | } | 182 | } |
118 | } | 183 | } |
diff --git a/inc/3rdparty/simplepie/SimplePie/Cache/MySQL.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/MySQL.php index 84b2cb6b..d53ebc11 100644 --- a/inc/3rdparty/simplepie/SimplePie/Cache/MySQL.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Cache/MySQL.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,24 +33,58 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Caches data to a MySQL database | ||
47 | * | ||
48 | * Registered for URLs with the "mysql" protocol | ||
49 | * | ||
50 | * For example, `mysql://root:password@localhost:3306/mydb?prefix=sp_` will | ||
51 | * connect to the `mydb` database on `localhost` on port 3306, with the user | ||
52 | * `root` and the password `password`. All tables will be prefixed with `sp_` | ||
53 | * | ||
54 | * @package SimplePie | ||
55 | * @subpackage Caching | ||
56 | */ | ||
47 | class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | 57 | class SimplePie_Cache_MySQL extends SimplePie_Cache_DB |
48 | { | 58 | { |
59 | /** | ||
60 | * PDO instance | ||
61 | * | ||
62 | * @var PDO | ||
63 | */ | ||
49 | protected $mysql; | 64 | protected $mysql; |
65 | |||
66 | /** | ||
67 | * Options | ||
68 | * | ||
69 | * @var array | ||
70 | */ | ||
50 | protected $options; | 71 | protected $options; |
72 | |||
73 | /** | ||
74 | * Cache ID | ||
75 | * | ||
76 | * @var string | ||
77 | */ | ||
51 | protected $id; | 78 | protected $id; |
52 | 79 | ||
53 | public function __construct($url, $name, $extension) | 80 | /** |
81 | * Create a new cache object | ||
82 | * | ||
83 | * @param string $location Location string (from SimplePie::$cache_location) | ||
84 | * @param string $name Unique ID for the cache | ||
85 | * @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data | ||
86 | */ | ||
87 | public function __construct($location, $name, $type) | ||
54 | { | 88 | { |
55 | $this->options = array( | 89 | $this->options = array( |
56 | 'user' => null, | 90 | 'user' => null, |
@@ -62,7 +96,7 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | |||
62 | 'prefix' => '', | 96 | 'prefix' => '', |
63 | ), | 97 | ), |
64 | ); | 98 | ); |
65 | $this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($url)); | 99 | $this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location)); |
66 | 100 | ||
67 | // Path is prefixed with a "/" | 101 | // Path is prefixed with a "/" |
68 | $this->options['dbname'] = substr($this->options['path'], 1); | 102 | $this->options['dbname'] = substr($this->options['path'], 1); |
@@ -77,7 +111,7 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | |||
77 | return; | 111 | return; |
78 | } | 112 | } |
79 | 113 | ||
80 | $this->id = $name . $extension; | 114 | $this->id = $name . $type; |
81 | 115 | ||
82 | if (!$query = $this->mysql->query('SHOW TABLES')) | 116 | if (!$query = $this->mysql->query('SHOW TABLES')) |
83 | { | 117 | { |
@@ -110,6 +144,12 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | |||
110 | } | 144 | } |
111 | } | 145 | } |
112 | 146 | ||
147 | /** | ||
148 | * Save data to the cache | ||
149 | * | ||
150 | * @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property | ||
151 | * @return bool Successfulness | ||
152 | */ | ||
113 | public function save($data) | 153 | public function save($data) |
114 | { | 154 | { |
115 | if ($this->mysql === null) | 155 | if ($this->mysql === null) |
@@ -117,7 +157,7 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | |||
117 | return false; | 157 | return false; |
118 | } | 158 | } |
119 | 159 | ||
120 | if (is_a($data, 'SimplePie')) | 160 | if ($data instanceof SimplePie) |
121 | { | 161 | { |
122 | $data = clone $data; | 162 | $data = clone $data; |
123 | 163 | ||
@@ -243,6 +283,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | |||
243 | return false; | 283 | return false; |
244 | } | 284 | } |
245 | 285 | ||
286 | /** | ||
287 | * Retrieve the data saved to the cache | ||
288 | * | ||
289 | * @return array Data for SimplePie::$data | ||
290 | */ | ||
246 | public function load() | 291 | public function load() |
247 | { | 292 | { |
248 | if ($this->mysql === null) | 293 | if ($this->mysql === null) |
@@ -316,6 +361,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | |||
316 | return false; | 361 | return false; |
317 | } | 362 | } |
318 | 363 | ||
364 | /** | ||
365 | * Retrieve the last modified time for the cache | ||
366 | * | ||
367 | * @return int Timestamp | ||
368 | */ | ||
319 | public function mtime() | 369 | public function mtime() |
320 | { | 370 | { |
321 | if ($this->mysql === null) | 371 | if ($this->mysql === null) |
@@ -335,6 +385,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | |||
335 | } | 385 | } |
336 | } | 386 | } |
337 | 387 | ||
388 | /** | ||
389 | * Set the last modified time to the current time | ||
390 | * | ||
391 | * @return bool Success status | ||
392 | */ | ||
338 | public function touch() | 393 | public function touch() |
339 | { | 394 | { |
340 | if ($this->mysql === null) | 395 | if ($this->mysql === null) |
@@ -355,6 +410,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB | |||
355 | } | 410 | } |
356 | } | 411 | } |
357 | 412 | ||
413 | /** | ||
414 | * Remove the cache | ||
415 | * | ||
416 | * @return bool Success status | ||
417 | */ | ||
358 | public function unlink() | 418 | public function unlink() |
359 | { | 419 | { |
360 | if ($this->mysql === null) | 420 | if ($this->mysql === null) |
diff --git a/inc/3rdparty/simplepie/SimplePie/Caption.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Caption.php index df6fedc5..52922c5d 100644 --- a/inc/3rdparty/simplepie/SimplePie/Caption.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Caption.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,26 +33,74 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | ||
46 | /** | ||
47 | * Handles `<media:text>` captions as defined in Media RSS. | ||
48 | * | ||
49 | * Used by {@see SimplePie_Enclosure::get_caption()} and {@see SimplePie_Enclosure::get_captions()} | ||
50 | * | ||
51 | * This class can be overloaded with {@see SimplePie::set_caption_class()} | ||
52 | * | ||
53 | * @package SimplePie | ||
54 | * @subpackage API | ||
55 | */ | ||
47 | class SimplePie_Caption | 56 | class SimplePie_Caption |
48 | { | 57 | { |
58 | /** | ||
59 | * Content type | ||
60 | * | ||
61 | * @var string | ||
62 | * @see get_type() | ||
63 | */ | ||
49 | var $type; | 64 | var $type; |
65 | |||
66 | /** | ||
67 | * Language | ||
68 | * | ||
69 | * @var string | ||
70 | * @see get_language() | ||
71 | */ | ||
50 | var $lang; | 72 | var $lang; |
73 | |||
74 | /** | ||
75 | * Start time | ||
76 | * | ||
77 | * @var string | ||
78 | * @see get_starttime() | ||
79 | */ | ||
51 | var $startTime; | 80 | var $startTime; |
81 | |||
82 | /** | ||
83 | * End time | ||
84 | * | ||
85 | * @var string | ||
86 | * @see get_endtime() | ||
87 | */ | ||
52 | var $endTime; | 88 | var $endTime; |
89 | |||
90 | /** | ||
91 | * Caption text | ||
92 | * | ||
93 | * @var string | ||
94 | * @see get_text() | ||
95 | */ | ||
53 | var $text; | 96 | var $text; |
54 | 97 | ||
55 | // Constructor, used to input the data | 98 | /** |
99 | * Constructor, used to input the data | ||
100 | * | ||
101 | * For documentation on all the parameters, see the corresponding | ||
102 | * properties and their accessors | ||
103 | */ | ||
56 | public function __construct($type = null, $lang = null, $startTime = null, $endTime = null, $text = null) | 104 | public function __construct($type = null, $lang = null, $startTime = null, $endTime = null, $text = null) |
57 | { | 105 | { |
58 | $this->type = $type; | 106 | $this->type = $type; |
@@ -62,12 +110,22 @@ class SimplePie_Caption | |||
62 | $this->text = $text; | 110 | $this->text = $text; |
63 | } | 111 | } |
64 | 112 | ||
113 | /** | ||
114 | * String-ified version | ||
115 | * | ||
116 | * @return string | ||
117 | */ | ||
65 | public function __toString() | 118 | public function __toString() |
66 | { | 119 | { |
67 | // There is no $this->data here | 120 | // There is no $this->data here |
68 | return md5(serialize($this)); | 121 | return md5(serialize($this)); |
69 | } | 122 | } |
70 | 123 | ||
124 | /** | ||
125 | * Get the end time | ||
126 | * | ||
127 | * @return string|null Time in the format 'hh:mm:ss.SSS' | ||
128 | */ | ||
71 | public function get_endtime() | 129 | public function get_endtime() |
72 | { | 130 | { |
73 | if ($this->endTime !== null) | 131 | if ($this->endTime !== null) |
@@ -80,6 +138,12 @@ class SimplePie_Caption | |||
80 | } | 138 | } |
81 | } | 139 | } |
82 | 140 | ||
141 | /** | ||
142 | * Get the language | ||
143 | * | ||
144 | * @link http://tools.ietf.org/html/rfc3066 | ||
145 | * @return string|null Language code as per RFC 3066 | ||
146 | */ | ||
83 | public function get_language() | 147 | public function get_language() |
84 | { | 148 | { |
85 | if ($this->lang !== null) | 149 | if ($this->lang !== null) |
@@ -92,6 +156,11 @@ class SimplePie_Caption | |||
92 | } | 156 | } |
93 | } | 157 | } |
94 | 158 | ||
159 | /** | ||
160 | * Get the start time | ||
161 | * | ||
162 | * @return string|null Time in the format 'hh:mm:ss.SSS' | ||
163 | */ | ||
95 | public function get_starttime() | 164 | public function get_starttime() |
96 | { | 165 | { |
97 | if ($this->startTime !== null) | 166 | if ($this->startTime !== null) |
@@ -104,6 +173,11 @@ class SimplePie_Caption | |||
104 | } | 173 | } |
105 | } | 174 | } |
106 | 175 | ||
176 | /** | ||
177 | * Get the text of the caption | ||
178 | * | ||
179 | * @return string|null | ||
180 | */ | ||
107 | public function get_text() | 181 | public function get_text() |
108 | { | 182 | { |
109 | if ($this->text !== null) | 183 | if ($this->text !== null) |
@@ -116,6 +190,11 @@ class SimplePie_Caption | |||
116 | } | 190 | } |
117 | } | 191 | } |
118 | 192 | ||
193 | /** | ||
194 | * Get the content type (not MIME type) | ||
195 | * | ||
196 | * @return string|null Either 'text' or 'html' | ||
197 | */ | ||
119 | public function get_type() | 198 | public function get_type() |
120 | { | 199 | { |
121 | if ($this->type !== null) | 200 | if ($this->type !== null) |
diff --git a/inc/3rdparty/simplepie/SimplePie/Category.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Category.php index ed4b842d..ad0407b4 100644 --- a/inc/3rdparty/simplepie/SimplePie/Category.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Category.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,24 +33,58 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Manages all category-related data | ||
47 | * | ||
48 | * Used by {@see SimplePie_Item::get_category()} and {@see SimplePie_Item::get_categories()} | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_category_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage API | ||
54 | */ | ||
47 | class SimplePie_Category | 55 | class SimplePie_Category |
48 | { | 56 | { |
57 | /** | ||
58 | * Category identifier | ||
59 | * | ||
60 | * @var string | ||
61 | * @see get_term | ||
62 | */ | ||
49 | var $term; | 63 | var $term; |
64 | |||
65 | /** | ||
66 | * Categorization scheme identifier | ||
67 | * | ||
68 | * @var string | ||
69 | * @see get_scheme() | ||
70 | */ | ||
50 | var $scheme; | 71 | var $scheme; |
72 | |||
73 | /** | ||
74 | * Human readable label | ||
75 | * | ||
76 | * @var string | ||
77 | * @see get_label() | ||
78 | */ | ||
51 | var $label; | 79 | var $label; |
52 | 80 | ||
53 | // Constructor, used to input the data | 81 | /** |
82 | * Constructor, used to input the data | ||
83 | * | ||
84 | * @param string $term | ||
85 | * @param string $scheme | ||
86 | * @param string $label | ||
87 | */ | ||
54 | public function __construct($term = null, $scheme = null, $label = null) | 88 | public function __construct($term = null, $scheme = null, $label = null) |
55 | { | 89 | { |
56 | $this->term = $term; | 90 | $this->term = $term; |
@@ -58,12 +92,22 @@ class SimplePie_Category | |||
58 | $this->label = $label; | 92 | $this->label = $label; |
59 | } | 93 | } |
60 | 94 | ||
95 | /** | ||
96 | * String-ified version | ||
97 | * | ||
98 | * @return string | ||
99 | */ | ||
61 | public function __toString() | 100 | public function __toString() |
62 | { | 101 | { |
63 | // There is no $this->data here | 102 | // There is no $this->data here |
64 | return md5(serialize($this)); | 103 | return md5(serialize($this)); |
65 | } | 104 | } |
66 | 105 | ||
106 | /** | ||
107 | * Get the category identifier | ||
108 | * | ||
109 | * @return string|null | ||
110 | */ | ||
67 | public function get_term() | 111 | public function get_term() |
68 | { | 112 | { |
69 | if ($this->term !== null) | 113 | if ($this->term !== null) |
@@ -76,6 +120,11 @@ class SimplePie_Category | |||
76 | } | 120 | } |
77 | } | 121 | } |
78 | 122 | ||
123 | /** | ||
124 | * Get the categorization scheme identifier | ||
125 | * | ||
126 | * @return string|null | ||
127 | */ | ||
79 | public function get_scheme() | 128 | public function get_scheme() |
80 | { | 129 | { |
81 | if ($this->scheme !== null) | 130 | if ($this->scheme !== null) |
@@ -88,6 +137,11 @@ class SimplePie_Category | |||
88 | } | 137 | } |
89 | } | 138 | } |
90 | 139 | ||
140 | /** | ||
141 | * Get the human readable label | ||
142 | * | ||
143 | * @return string|null | ||
144 | */ | ||
91 | public function get_label() | 145 | public function get_label() |
92 | { | 146 | { |
93 | if ($this->label !== null) | 147 | if ($this->label !== null) |
diff --git a/inc/3rdparty/simplepie/SimplePie/Content/Type/Sniffer.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Content/Type/Sniffer.php index 7be71374..20d053dc 100644 --- a/inc/3rdparty/simplepie/SimplePie/Content/Type/Sniffer.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Content/Type/Sniffer.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,14 +33,13 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | ||
@@ -48,7 +47,15 @@ | |||
48 | * Content-type sniffing | 47 | * Content-type sniffing |
49 | * | 48 | * |
50 | * Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06 | 49 | * Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06 |
50 | * | ||
51 | * This is used since we can't always trust Content-Type headers, and is based | ||
52 | * upon the HTML5 parsing rules. | ||
53 | * | ||
54 | * | ||
55 | * This class can be overloaded with {@see SimplePie::set_content_type_sniffer_class()} | ||
56 | * | ||
51 | * @package SimplePie | 57 | * @package SimplePie |
58 | * @subpackage HTTP | ||
52 | */ | 59 | */ |
53 | class SimplePie_Content_Type_Sniffer | 60 | class SimplePie_Content_Type_Sniffer |
54 | { | 61 | { |
diff --git a/inc/3rdparty/simplepie/SimplePie/Copyright.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Copyright.php index cc16f9c4..57c535a6 100644 --- a/inc/3rdparty/simplepie/SimplePie/Copyright.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Copyright.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,35 +33,71 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Manages `<media:copyright>` copyright tags as defined in Media RSS | ||
47 | * | ||
48 | * Used by {@see SimplePie_Enclosure::get_copyright()} | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_copyright_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage API | ||
54 | */ | ||
47 | class SimplePie_Copyright | 55 | class SimplePie_Copyright |
48 | { | 56 | { |
57 | /** | ||
58 | * Copyright URL | ||
59 | * | ||
60 | * @var string | ||
61 | * @see get_url() | ||
62 | */ | ||
49 | var $url; | 63 | var $url; |
64 | |||
65 | /** | ||
66 | * Attribution | ||
67 | * | ||
68 | * @var string | ||
69 | * @see get_attribution() | ||
70 | */ | ||
50 | var $label; | 71 | var $label; |
51 | 72 | ||
52 | // Constructor, used to input the data | 73 | /** |
74 | * Constructor, used to input the data | ||
75 | * | ||
76 | * For documentation on all the parameters, see the corresponding | ||
77 | * properties and their accessors | ||
78 | */ | ||
53 | public function __construct($url = null, $label = null) | 79 | public function __construct($url = null, $label = null) |
54 | { | 80 | { |
55 | $this->url = $url; | 81 | $this->url = $url; |
56 | $this->label = $label; | 82 | $this->label = $label; |
57 | } | 83 | } |
58 | 84 | ||
85 | /** | ||
86 | * String-ified version | ||
87 | * | ||
88 | * @return string | ||
89 | */ | ||
59 | public function __toString() | 90 | public function __toString() |
60 | { | 91 | { |
61 | // There is no $this->data here | 92 | // There is no $this->data here |
62 | return md5(serialize($this)); | 93 | return md5(serialize($this)); |
63 | } | 94 | } |
64 | 95 | ||
96 | /** | ||
97 | * Get the copyright URL | ||
98 | * | ||
99 | * @return string|null URL to copyright information | ||
100 | */ | ||
65 | public function get_url() | 101 | public function get_url() |
66 | { | 102 | { |
67 | if ($this->url !== null) | 103 | if ($this->url !== null) |
@@ -74,6 +110,11 @@ class SimplePie_Copyright | |||
74 | } | 110 | } |
75 | } | 111 | } |
76 | 112 | ||
113 | /** | ||
114 | * Get the attribution text | ||
115 | * | ||
116 | * @return string|null | ||
117 | */ | ||
77 | public function get_attribution() | 118 | public function get_attribution() |
78 | { | 119 | { |
79 | if ($this->label !== null) | 120 | if ($this->label !== null) |
diff --git a/inc/3rdparty/simplepie/SimplePie.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Core.php index 9e07c137..46d99662 100644 --- a/inc/3rdparty/simplepie/SimplePie.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Core.php | |||
@@ -33,14 +33,13 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | /** | 45 | /** |
@@ -48,9 +47,11 @@ | |||
48 | * | 47 | * |
49 | * Class for backward compatibility. | 48 | * Class for backward compatibility. |
50 | * | 49 | * |
50 | * @deprecated Use {@see SimplePie} directly | ||
51 | * @package SimplePie | 51 | * @package SimplePie |
52 | * @subpackage API | ||
52 | */ | 53 | */ |
53 | class SimplePie extends SimplePie_Core | 54 | class SimplePie_Core extends SimplePie |
54 | { | 55 | { |
55 | 56 | ||
56 | } \ No newline at end of file | 57 | } \ No newline at end of file |
diff --git a/inc/3rdparty/simplepie/SimplePie/Credit.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Credit.php index 3894b30c..d3a3442a 100644 --- a/inc/3rdparty/simplepie/SimplePie/Credit.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Credit.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,24 +33,57 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Handles `<media:credit>` as defined in Media RSS | ||
47 | * | ||
48 | * Used by {@see SimplePie_Enclosure::get_credit()} and {@see SimplePie_Enclosure::get_credits()} | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_credit_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage API | ||
54 | */ | ||
47 | class SimplePie_Credit | 55 | class SimplePie_Credit |
48 | { | 56 | { |
57 | /** | ||
58 | * Credited role | ||
59 | * | ||
60 | * @var string | ||
61 | * @see get_role() | ||
62 | */ | ||
49 | var $role; | 63 | var $role; |
64 | |||
65 | /** | ||
66 | * Organizational scheme | ||
67 | * | ||
68 | * @var string | ||
69 | * @see get_scheme() | ||
70 | */ | ||
50 | var $scheme; | 71 | var $scheme; |
72 | |||
73 | /** | ||
74 | * Credited name | ||
75 | * | ||
76 | * @var string | ||
77 | * @see get_name() | ||
78 | */ | ||
51 | var $name; | 79 | var $name; |
52 | 80 | ||
53 | // Constructor, used to input the data | 81 | /** |
82 | * Constructor, used to input the data | ||
83 | * | ||
84 | * For documentation on all the parameters, see the corresponding | ||
85 | * properties and their accessors | ||
86 | */ | ||
54 | public function __construct($role = null, $scheme = null, $name = null) | 87 | public function __construct($role = null, $scheme = null, $name = null) |
55 | { | 88 | { |
56 | $this->role = $role; | 89 | $this->role = $role; |
@@ -58,12 +91,22 @@ class SimplePie_Credit | |||
58 | $this->name = $name; | 91 | $this->name = $name; |
59 | } | 92 | } |
60 | 93 | ||
94 | /** | ||
95 | * String-ified version | ||
96 | * | ||
97 | * @return string | ||
98 | */ | ||
61 | public function __toString() | 99 | public function __toString() |
62 | { | 100 | { |
63 | // There is no $this->data here | 101 | // There is no $this->data here |
64 | return md5(serialize($this)); | 102 | return md5(serialize($this)); |
65 | } | 103 | } |
66 | 104 | ||
105 | /** | ||
106 | * Get the role of the person receiving credit | ||
107 | * | ||
108 | * @return string|null | ||
109 | */ | ||
67 | public function get_role() | 110 | public function get_role() |
68 | { | 111 | { |
69 | if ($this->role !== null) | 112 | if ($this->role !== null) |
@@ -76,6 +119,11 @@ class SimplePie_Credit | |||
76 | } | 119 | } |
77 | } | 120 | } |
78 | 121 | ||
122 | /** | ||
123 | * Get the organizational scheme | ||
124 | * | ||
125 | * @return string|null | ||
126 | */ | ||
79 | public function get_scheme() | 127 | public function get_scheme() |
80 | { | 128 | { |
81 | if ($this->scheme !== null) | 129 | if ($this->scheme !== null) |
@@ -88,6 +136,11 @@ class SimplePie_Credit | |||
88 | } | 136 | } |
89 | } | 137 | } |
90 | 138 | ||
139 | /** | ||
140 | * Get the credited person/entity's name | ||
141 | * | ||
142 | * @return string|null | ||
143 | */ | ||
91 | public function get_name() | 144 | public function get_name() |
92 | { | 145 | { |
93 | if ($this->name !== null) | 146 | if ($this->name !== null) |
diff --git a/inc/3rdparty/libraries/simplepie/library/SimplePie/Decode/HTML/Entities.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Decode/HTML/Entities.php new file mode 100644 index 00000000..069e8d8e --- /dev/null +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Decode/HTML/Entities.php | |||
@@ -0,0 +1,617 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3.1 | ||
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | */ | ||
44 | |||
45 | |||
46 | /** | ||
47 | * Decode HTML Entities | ||
48 | * | ||
49 | * This implements HTML5 as of revision 967 (2007-06-28) | ||
50 | * | ||
51 | * @deprecated Use DOMDocument instead! | ||
52 | * @package SimplePie | ||
53 | */ | ||
54 | class SimplePie_Decode_HTML_Entities | ||
55 | { | ||
56 | /** | ||
57 | * Data to be parsed | ||
58 | * | ||
59 | * @access private | ||
60 | * @var string | ||
61 | */ | ||
62 | var $data = ''; | ||
63 | |||
64 | /** | ||
65 | * Currently consumed bytes | ||
66 | * | ||
67 | * @access private | ||
68 | * @var string | ||
69 | */ | ||
70 | var $consumed = ''; | ||
71 | |||
72 | /** | ||
73 | * Position of the current byte being parsed | ||
74 | * | ||
75 | * @access private | ||
76 | * @var int | ||
77 | */ | ||
78 | var $position = 0; | ||
79 | |||
80 | /** | ||
81 | * Create an instance of the class with the input data | ||
82 | * | ||
83 | * @access public | ||
84 | * @param string $data Input data | ||
85 | */ | ||
86 | public function __construct($data) | ||
87 | { | ||
88 | $this->data = $data; | ||
89 | } | ||
90 | |||
91 | /** | ||
92 | * Parse the input data | ||
93 | * | ||
94 | * @access public | ||
95 | * @return string Output data | ||
96 | */ | ||
97 | public function parse() | ||
98 | { | ||
99 | while (($this->position = strpos($this->data, '&', $this->position)) !== false) | ||
100 | { | ||
101 | $this->consume(); | ||
102 | $this->entity(); | ||
103 | $this->consumed = ''; | ||
104 | } | ||
105 | return $this->data; | ||
106 | } | ||
107 | |||
108 | /** | ||
109 | * Consume the next byte | ||
110 | * | ||
111 | * @access private | ||
112 | * @return mixed The next byte, or false, if there is no more data | ||
113 | */ | ||
114 | public function consume() | ||
115 | { | ||
116 | if (isset($this->data[$this->position])) | ||
117 | { | ||
118 | $this->consumed .= $this->data[$this->position]; | ||
119 | return $this->data[$this->position++]; | ||
120 | } | ||
121 | else | ||
122 | { | ||
123 | return false; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * Consume a range of characters | ||
129 | * | ||
130 | * @access private | ||
131 | * @param string $chars Characters to consume | ||
132 | * @return mixed A series of characters that match the range, or false | ||
133 | */ | ||
134 | public function consume_range($chars) | ||
135 | { | ||
136 | if ($len = strspn($this->data, $chars, $this->position)) | ||
137 | { | ||
138 | $data = substr($this->data, $this->position, $len); | ||
139 | $this->consumed .= $data; | ||
140 | $this->position += $len; | ||
141 | return $data; | ||
142 | } | ||
143 | else | ||
144 | { | ||
145 | return false; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * Unconsume one byte | ||
151 | * | ||
152 | * @access private | ||
153 | */ | ||
154 | public function unconsume() | ||
155 | { | ||
156 | $this->consumed = substr($this->consumed, 0, -1); | ||
157 | $this->position--; | ||
158 | } | ||
159 | |||
160 | /** | ||
161 | * Decode an entity | ||
162 | * | ||
163 | * @access private | ||
164 | */ | ||
165 | public function entity() | ||
166 | { | ||
167 | switch ($this->consume()) | ||
168 | { | ||
169 | case "\x09": | ||
170 | case "\x0A": | ||
171 | case "\x0B": | ||
172 | case "\x0B": | ||
173 | case "\x0C": | ||
174 | case "\x20": | ||
175 | case "\x3C": | ||
176 | case "\x26": | ||
177 | case false: | ||
178 | break; | ||
179 | |||
180 | case "\x23": | ||
181 | switch ($this->consume()) | ||
182 | { | ||
183 | case "\x78": | ||
184 | case "\x58": | ||
185 | $range = '0123456789ABCDEFabcdef'; | ||
186 | $hex = true; | ||
187 | break; | ||
188 | |||
189 | default: | ||
190 | $range = '0123456789'; | ||
191 | $hex = false; | ||
192 | $this->unconsume(); | ||
193 | break; | ||
194 | } | ||
195 | |||
196 | if ($codepoint = $this->consume_range($range)) | ||
197 | { | ||
198 | static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8"); | ||
199 | |||
200 | if ($hex) | ||
201 | { | ||
202 | $codepoint = hexdec($codepoint); | ||
203 | } | ||
204 | else | ||
205 | { | ||
206 | $codepoint = intval($codepoint); | ||
207 | } | ||
208 | |||
209 | if (isset($windows_1252_specials[$codepoint])) | ||
210 | { | ||
211 | $replacement = $windows_1252_specials[$codepoint]; | ||
212 | } | ||
213 | else | ||
214 | { | ||
215 | $replacement = SimplePie_Misc::codepoint_to_utf8($codepoint); | ||
216 | } | ||
217 | |||
218 | if (!in_array($this->consume(), array(';', false), true)) | ||
219 | { | ||
220 | $this->unconsume(); | ||
221 | } | ||
222 | |||
223 | $consumed_length = strlen($this->consumed); | ||
224 | $this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length); | ||
225 | $this->position += strlen($replacement) - $consumed_length; | ||
226 | } | ||
227 | break; | ||
228 | |||
229 | default: | ||
230 | static $entities = array( | ||
231 | 'Aacute' => "\xC3\x81", | ||
232 | 'aacute' => "\xC3\xA1", | ||
233 | 'Aacute;' => "\xC3\x81", | ||
234 | 'aacute;' => "\xC3\xA1", | ||
235 | 'Acirc' => "\xC3\x82", | ||
236 | 'acirc' => "\xC3\xA2", | ||
237 | 'Acirc;' => "\xC3\x82", | ||
238 | 'acirc;' => "\xC3\xA2", | ||
239 | 'acute' => "\xC2\xB4", | ||
240 | 'acute;' => "\xC2\xB4", | ||
241 | 'AElig' => "\xC3\x86", | ||
242 | 'aelig' => "\xC3\xA6", | ||
243 | 'AElig;' => "\xC3\x86", | ||
244 | 'aelig;' => "\xC3\xA6", | ||
245 | 'Agrave' => "\xC3\x80", | ||
246 | 'agrave' => "\xC3\xA0", | ||
247 | 'Agrave;' => "\xC3\x80", | ||
248 | 'agrave;' => "\xC3\xA0", | ||
249 | 'alefsym;' => "\xE2\x84\xB5", | ||
250 | 'Alpha;' => "\xCE\x91", | ||
251 | 'alpha;' => "\xCE\xB1", | ||
252 | 'AMP' => "\x26", | ||
253 | 'amp' => "\x26", | ||
254 | 'AMP;' => "\x26", | ||
255 | 'amp;' => "\x26", | ||
256 | 'and;' => "\xE2\x88\xA7", | ||
257 | 'ang;' => "\xE2\x88\xA0", | ||
258 | 'apos;' => "\x27", | ||
259 | 'Aring' => "\xC3\x85", | ||
260 | 'aring' => "\xC3\xA5", | ||
261 | 'Aring;' => "\xC3\x85", | ||
262 | 'aring;' => "\xC3\xA5", | ||
263 | 'asymp;' => "\xE2\x89\x88", | ||
264 | 'Atilde' => "\xC3\x83", | ||
265 | 'atilde' => "\xC3\xA3", | ||
266 | 'Atilde;' => "\xC3\x83", | ||
267 | 'atilde;' => "\xC3\xA3", | ||
268 | 'Auml' => "\xC3\x84", | ||
269 | 'auml' => "\xC3\xA4", | ||
270 | 'Auml;' => "\xC3\x84", | ||
271 | 'auml;' => "\xC3\xA4", | ||
272 | 'bdquo;' => "\xE2\x80\x9E", | ||
273 | 'Beta;' => "\xCE\x92", | ||
274 | 'beta;' => "\xCE\xB2", | ||
275 | 'brvbar' => "\xC2\xA6", | ||
276 | 'brvbar;' => "\xC2\xA6", | ||
277 | 'bull;' => "\xE2\x80\xA2", | ||
278 | 'cap;' => "\xE2\x88\xA9", | ||
279 | 'Ccedil' => "\xC3\x87", | ||
280 | 'ccedil' => "\xC3\xA7", | ||
281 | 'Ccedil;' => "\xC3\x87", | ||
282 | 'ccedil;' => "\xC3\xA7", | ||
283 | 'cedil' => "\xC2\xB8", | ||
284 | 'cedil;' => "\xC2\xB8", | ||
285 | 'cent' => "\xC2\xA2", | ||
286 | 'cent;' => "\xC2\xA2", | ||
287 | 'Chi;' => "\xCE\xA7", | ||
288 | 'chi;' => "\xCF\x87", | ||
289 | 'circ;' => "\xCB\x86", | ||
290 | 'clubs;' => "\xE2\x99\xA3", | ||
291 | 'cong;' => "\xE2\x89\x85", | ||
292 | 'COPY' => "\xC2\xA9", | ||
293 | 'copy' => "\xC2\xA9", | ||
294 | 'COPY;' => "\xC2\xA9", | ||
295 | 'copy;' => "\xC2\xA9", | ||
296 | 'crarr;' => "\xE2\x86\xB5", | ||
297 | 'cup;' => "\xE2\x88\xAA", | ||
298 | 'curren' => "\xC2\xA4", | ||
299 | 'curren;' => "\xC2\xA4", | ||
300 | 'Dagger;' => "\xE2\x80\xA1", | ||
301 | 'dagger;' => "\xE2\x80\xA0", | ||
302 | 'dArr;' => "\xE2\x87\x93", | ||
303 | 'darr;' => "\xE2\x86\x93", | ||
304 | 'deg' => "\xC2\xB0", | ||
305 | 'deg;' => "\xC2\xB0", | ||
306 | 'Delta;' => "\xCE\x94", | ||
307 | 'delta;' => "\xCE\xB4", | ||
308 | 'diams;' => "\xE2\x99\xA6", | ||
309 | 'divide' => "\xC3\xB7", | ||
310 | 'divide;' => "\xC3\xB7", | ||
311 | 'Eacute' => "\xC3\x89", | ||
312 | 'eacute' => "\xC3\xA9", | ||
313 | 'Eacute;' => "\xC3\x89", | ||
314 | 'eacute;' => "\xC3\xA9", | ||
315 | 'Ecirc' => "\xC3\x8A", | ||
316 | 'ecirc' => "\xC3\xAA", | ||
317 | 'Ecirc;' => "\xC3\x8A", | ||
318 | 'ecirc;' => "\xC3\xAA", | ||
319 | 'Egrave' => "\xC3\x88", | ||
320 | 'egrave' => "\xC3\xA8", | ||
321 | 'Egrave;' => "\xC3\x88", | ||
322 | 'egrave;' => "\xC3\xA8", | ||
323 | 'empty;' => "\xE2\x88\x85", | ||
324 | 'emsp;' => "\xE2\x80\x83", | ||
325 | 'ensp;' => "\xE2\x80\x82", | ||
326 | 'Epsilon;' => "\xCE\x95", | ||
327 | 'epsilon;' => "\xCE\xB5", | ||
328 | 'equiv;' => "\xE2\x89\xA1", | ||
329 | 'Eta;' => "\xCE\x97", | ||
330 | 'eta;' => "\xCE\xB7", | ||
331 | 'ETH' => "\xC3\x90", | ||
332 | 'eth' => "\xC3\xB0", | ||
333 | 'ETH;' => "\xC3\x90", | ||
334 | 'eth;' => "\xC3\xB0", | ||
335 | 'Euml' => "\xC3\x8B", | ||
336 | 'euml' => "\xC3\xAB", | ||
337 | 'Euml;' => "\xC3\x8B", | ||
338 | 'euml;' => "\xC3\xAB", | ||
339 | 'euro;' => "\xE2\x82\xAC", | ||
340 | 'exist;' => "\xE2\x88\x83", | ||
341 | 'fnof;' => "\xC6\x92", | ||
342 | 'forall;' => "\xE2\x88\x80", | ||
343 | 'frac12' => "\xC2\xBD", | ||
344 | 'frac12;' => "\xC2\xBD", | ||
345 | 'frac14' => "\xC2\xBC", | ||
346 | 'frac14;' => "\xC2\xBC", | ||
347 | 'frac34' => "\xC2\xBE", | ||
348 | 'frac34;' => "\xC2\xBE", | ||
349 | 'frasl;' => "\xE2\x81\x84", | ||
350 | 'Gamma;' => "\xCE\x93", | ||
351 | 'gamma;' => "\xCE\xB3", | ||
352 | 'ge;' => "\xE2\x89\xA5", | ||
353 | 'GT' => "\x3E", | ||
354 | 'gt' => "\x3E", | ||
355 | 'GT;' => "\x3E", | ||
356 | 'gt;' => "\x3E", | ||
357 | 'hArr;' => "\xE2\x87\x94", | ||
358 | 'harr;' => "\xE2\x86\x94", | ||
359 | 'hearts;' => "\xE2\x99\xA5", | ||
360 | 'hellip;' => "\xE2\x80\xA6", | ||
361 | 'Iacute' => "\xC3\x8D", | ||
362 | 'iacute' => "\xC3\xAD", | ||
363 | 'Iacute;' => "\xC3\x8D", | ||
364 | 'iacute;' => "\xC3\xAD", | ||
365 | 'Icirc' => "\xC3\x8E", | ||
366 | 'icirc' => "\xC3\xAE", | ||
367 | 'Icirc;' => "\xC3\x8E", | ||
368 | 'icirc;' => "\xC3\xAE", | ||
369 | 'iexcl' => "\xC2\xA1", | ||
370 | 'iexcl;' => "\xC2\xA1", | ||
371 | 'Igrave' => "\xC3\x8C", | ||
372 | 'igrave' => "\xC3\xAC", | ||
373 | 'Igrave;' => "\xC3\x8C", | ||
374 | 'igrave;' => "\xC3\xAC", | ||
375 | 'image;' => "\xE2\x84\x91", | ||
376 | 'infin;' => "\xE2\x88\x9E", | ||
377 | 'int;' => "\xE2\x88\xAB", | ||
378 | 'Iota;' => "\xCE\x99", | ||
379 | 'iota;' => "\xCE\xB9", | ||
380 | 'iquest' => "\xC2\xBF", | ||
381 | 'iquest;' => "\xC2\xBF", | ||
382 | 'isin;' => "\xE2\x88\x88", | ||
383 | 'Iuml' => "\xC3\x8F", | ||
384 | 'iuml' => "\xC3\xAF", | ||
385 | 'Iuml;' => "\xC3\x8F", | ||
386 | 'iuml;' => "\xC3\xAF", | ||
387 | 'Kappa;' => "\xCE\x9A", | ||
388 | 'kappa;' => "\xCE\xBA", | ||
389 | 'Lambda;' => "\xCE\x9B", | ||
390 | 'lambda;' => "\xCE\xBB", | ||
391 | 'lang;' => "\xE3\x80\x88", | ||
392 | 'laquo' => "\xC2\xAB", | ||
393 | 'laquo;' => "\xC2\xAB", | ||
394 | 'lArr;' => "\xE2\x87\x90", | ||
395 | 'larr;' => "\xE2\x86\x90", | ||
396 | 'lceil;' => "\xE2\x8C\x88", | ||
397 | 'ldquo;' => "\xE2\x80\x9C", | ||
398 | 'le;' => "\xE2\x89\xA4", | ||
399 | 'lfloor;' => "\xE2\x8C\x8A", | ||
400 | 'lowast;' => "\xE2\x88\x97", | ||
401 | 'loz;' => "\xE2\x97\x8A", | ||
402 | 'lrm;' => "\xE2\x80\x8E", | ||
403 | 'lsaquo;' => "\xE2\x80\xB9", | ||
404 | 'lsquo;' => "\xE2\x80\x98", | ||
405 | 'LT' => "\x3C", | ||
406 | 'lt' => "\x3C", | ||
407 | 'LT;' => "\x3C", | ||
408 | 'lt;' => "\x3C", | ||
409 | 'macr' => "\xC2\xAF", | ||
410 | 'macr;' => "\xC2\xAF", | ||
411 | 'mdash;' => "\xE2\x80\x94", | ||
412 | 'micro' => "\xC2\xB5", | ||
413 | 'micro;' => "\xC2\xB5", | ||
414 | 'middot' => "\xC2\xB7", | ||
415 | 'middot;' => "\xC2\xB7", | ||
416 | 'minus;' => "\xE2\x88\x92", | ||
417 | 'Mu;' => "\xCE\x9C", | ||
418 | 'mu;' => "\xCE\xBC", | ||
419 | 'nabla;' => "\xE2\x88\x87", | ||
420 | 'nbsp' => "\xC2\xA0", | ||
421 | 'nbsp;' => "\xC2\xA0", | ||
422 | 'ndash;' => "\xE2\x80\x93", | ||
423 | 'ne;' => "\xE2\x89\xA0", | ||
424 | 'ni;' => "\xE2\x88\x8B", | ||
425 | 'not' => "\xC2\xAC", | ||
426 | 'not;' => "\xC2\xAC", | ||
427 | 'notin;' => "\xE2\x88\x89", | ||
428 | 'nsub;' => "\xE2\x8A\x84", | ||
429 | 'Ntilde' => "\xC3\x91", | ||
430 | 'ntilde' => "\xC3\xB1", | ||
431 | 'Ntilde;' => "\xC3\x91", | ||
432 | 'ntilde;' => "\xC3\xB1", | ||
433 | 'Nu;' => "\xCE\x9D", | ||
434 | 'nu;' => "\xCE\xBD", | ||
435 | 'Oacute' => "\xC3\x93", | ||
436 | 'oacute' => "\xC3\xB3", | ||
437 | 'Oacute;' => "\xC3\x93", | ||
438 | 'oacute;' => "\xC3\xB3", | ||
439 | 'Ocirc' => "\xC3\x94", | ||
440 | 'ocirc' => "\xC3\xB4", | ||
441 | 'Ocirc;' => "\xC3\x94", | ||
442 | 'ocirc;' => "\xC3\xB4", | ||
443 | 'OElig;' => "\xC5\x92", | ||
444 | 'oelig;' => "\xC5\x93", | ||
445 | 'Ograve' => "\xC3\x92", | ||
446 | 'ograve' => "\xC3\xB2", | ||
447 | 'Ograve;' => "\xC3\x92", | ||
448 | 'ograve;' => "\xC3\xB2", | ||
449 | 'oline;' => "\xE2\x80\xBE", | ||
450 | 'Omega;' => "\xCE\xA9", | ||
451 | 'omega;' => "\xCF\x89", | ||
452 | 'Omicron;' => "\xCE\x9F", | ||
453 | 'omicron;' => "\xCE\xBF", | ||
454 | 'oplus;' => "\xE2\x8A\x95", | ||
455 | 'or;' => "\xE2\x88\xA8", | ||
456 | 'ordf' => "\xC2\xAA", | ||
457 | 'ordf;' => "\xC2\xAA", | ||
458 | 'ordm' => "\xC2\xBA", | ||
459 | 'ordm;' => "\xC2\xBA", | ||
460 | 'Oslash' => "\xC3\x98", | ||
461 | 'oslash' => "\xC3\xB8", | ||
462 | 'Oslash;' => "\xC3\x98", | ||
463 | 'oslash;' => "\xC3\xB8", | ||
464 | 'Otilde' => "\xC3\x95", | ||
465 | 'otilde' => "\xC3\xB5", | ||
466 | 'Otilde;' => "\xC3\x95", | ||
467 | 'otilde;' => "\xC3\xB5", | ||
468 | 'otimes;' => "\xE2\x8A\x97", | ||
469 | 'Ouml' => "\xC3\x96", | ||
470 | 'ouml' => "\xC3\xB6", | ||
471 | 'Ouml;' => "\xC3\x96", | ||
472 | 'ouml;' => "\xC3\xB6", | ||
473 | 'para' => "\xC2\xB6", | ||
474 | 'para;' => "\xC2\xB6", | ||
475 | 'part;' => "\xE2\x88\x82", | ||
476 | 'permil;' => "\xE2\x80\xB0", | ||
477 | 'perp;' => "\xE2\x8A\xA5", | ||
478 | 'Phi;' => "\xCE\xA6", | ||
479 | 'phi;' => "\xCF\x86", | ||
480 | 'Pi;' => "\xCE\xA0", | ||
481 | 'pi;' => "\xCF\x80", | ||
482 | 'piv;' => "\xCF\x96", | ||
483 | 'plusmn' => "\xC2\xB1", | ||
484 | 'plusmn;' => "\xC2\xB1", | ||
485 | 'pound' => "\xC2\xA3", | ||
486 | 'pound;' => "\xC2\xA3", | ||
487 | 'Prime;' => "\xE2\x80\xB3", | ||
488 | 'prime;' => "\xE2\x80\xB2", | ||
489 | 'prod;' => "\xE2\x88\x8F", | ||
490 | 'prop;' => "\xE2\x88\x9D", | ||
491 | 'Psi;' => "\xCE\xA8", | ||
492 | 'psi;' => "\xCF\x88", | ||
493 | 'QUOT' => "\x22", | ||
494 | 'quot' => "\x22", | ||
495 | 'QUOT;' => "\x22", | ||
496 | 'quot;' => "\x22", | ||
497 | 'radic;' => "\xE2\x88\x9A", | ||
498 | 'rang;' => "\xE3\x80\x89", | ||
499 | 'raquo' => "\xC2\xBB", | ||
500 | 'raquo;' => "\xC2\xBB", | ||
501 | 'rArr;' => "\xE2\x87\x92", | ||
502 | 'rarr;' => "\xE2\x86\x92", | ||
503 | 'rceil;' => "\xE2\x8C\x89", | ||
504 | 'rdquo;' => "\xE2\x80\x9D", | ||
505 | 'real;' => "\xE2\x84\x9C", | ||
506 | 'REG' => "\xC2\xAE", | ||
507 | 'reg' => "\xC2\xAE", | ||
508 | 'REG;' => "\xC2\xAE", | ||
509 | 'reg;' => "\xC2\xAE", | ||
510 | 'rfloor;' => "\xE2\x8C\x8B", | ||
511 | 'Rho;' => "\xCE\xA1", | ||
512 | 'rho;' => "\xCF\x81", | ||
513 | 'rlm;' => "\xE2\x80\x8F", | ||
514 | 'rsaquo;' => "\xE2\x80\xBA", | ||
515 | 'rsquo;' => "\xE2\x80\x99", | ||
516 | 'sbquo;' => "\xE2\x80\x9A", | ||
517 | 'Scaron;' => "\xC5\xA0", | ||
518 | 'scaron;' => "\xC5\xA1", | ||
519 | 'sdot;' => "\xE2\x8B\x85", | ||
520 | 'sect' => "\xC2\xA7", | ||
521 | 'sect;' => "\xC2\xA7", | ||
522 | 'shy' => "\xC2\xAD", | ||
523 | 'shy;' => "\xC2\xAD", | ||
524 | 'Sigma;' => "\xCE\xA3", | ||
525 | 'sigma;' => "\xCF\x83", | ||
526 | 'sigmaf;' => "\xCF\x82", | ||
527 | 'sim;' => "\xE2\x88\xBC", | ||
528 | 'spades;' => "\xE2\x99\xA0", | ||
529 | 'sub;' => "\xE2\x8A\x82", | ||
530 | 'sube;' => "\xE2\x8A\x86", | ||
531 | 'sum;' => "\xE2\x88\x91", | ||
532 | 'sup;' => "\xE2\x8A\x83", | ||
533 | 'sup1' => "\xC2\xB9", | ||
534 | 'sup1;' => "\xC2\xB9", | ||
535 | 'sup2' => "\xC2\xB2", | ||
536 | 'sup2;' => "\xC2\xB2", | ||
537 | 'sup3' => "\xC2\xB3", | ||
538 | 'sup3;' => "\xC2\xB3", | ||
539 | 'supe;' => "\xE2\x8A\x87", | ||
540 | 'szlig' => "\xC3\x9F", | ||
541 | 'szlig;' => "\xC3\x9F", | ||
542 | 'Tau;' => "\xCE\xA4", | ||
543 | 'tau;' => "\xCF\x84", | ||
544 | 'there4;' => "\xE2\x88\xB4", | ||
545 | 'Theta;' => "\xCE\x98", | ||
546 | 'theta;' => "\xCE\xB8", | ||
547 | 'thetasym;' => "\xCF\x91", | ||
548 | 'thinsp;' => "\xE2\x80\x89", | ||
549 | 'THORN' => "\xC3\x9E", | ||
550 | 'thorn' => "\xC3\xBE", | ||
551 | 'THORN;' => "\xC3\x9E", | ||
552 | 'thorn;' => "\xC3\xBE", | ||
553 | 'tilde;' => "\xCB\x9C", | ||
554 | 'times' => "\xC3\x97", | ||
555 | 'times;' => "\xC3\x97", | ||
556 | 'TRADE;' => "\xE2\x84\xA2", | ||
557 | 'trade;' => "\xE2\x84\xA2", | ||
558 | 'Uacute' => "\xC3\x9A", | ||
559 | 'uacute' => "\xC3\xBA", | ||
560 | 'Uacute;' => "\xC3\x9A", | ||
561 | 'uacute;' => "\xC3\xBA", | ||
562 | 'uArr;' => "\xE2\x87\x91", | ||
563 | 'uarr;' => "\xE2\x86\x91", | ||
564 | 'Ucirc' => "\xC3\x9B", | ||
565 | 'ucirc' => "\xC3\xBB", | ||
566 | 'Ucirc;' => "\xC3\x9B", | ||
567 | 'ucirc;' => "\xC3\xBB", | ||
568 | 'Ugrave' => "\xC3\x99", | ||
569 | 'ugrave' => "\xC3\xB9", | ||
570 | 'Ugrave;' => "\xC3\x99", | ||
571 | 'ugrave;' => "\xC3\xB9", | ||
572 | 'uml' => "\xC2\xA8", | ||
573 | 'uml;' => "\xC2\xA8", | ||
574 | 'upsih;' => "\xCF\x92", | ||
575 | 'Upsilon;' => "\xCE\xA5", | ||
576 | 'upsilon;' => "\xCF\x85", | ||
577 | 'Uuml' => "\xC3\x9C", | ||
578 | 'uuml' => "\xC3\xBC", | ||
579 | 'Uuml;' => "\xC3\x9C", | ||
580 | 'uuml;' => "\xC3\xBC", | ||
581 | 'weierp;' => "\xE2\x84\x98", | ||
582 | 'Xi;' => "\xCE\x9E", | ||
583 | 'xi;' => "\xCE\xBE", | ||
584 | 'Yacute' => "\xC3\x9D", | ||
585 | 'yacute' => "\xC3\xBD", | ||
586 | 'Yacute;' => "\xC3\x9D", | ||
587 | 'yacute;' => "\xC3\xBD", | ||
588 | 'yen' => "\xC2\xA5", | ||
589 | 'yen;' => "\xC2\xA5", | ||
590 | 'yuml' => "\xC3\xBF", | ||
591 | 'Yuml;' => "\xC5\xB8", | ||
592 | 'yuml;' => "\xC3\xBF", | ||
593 | 'Zeta;' => "\xCE\x96", | ||
594 | 'zeta;' => "\xCE\xB6", | ||
595 | 'zwj;' => "\xE2\x80\x8D", | ||
596 | 'zwnj;' => "\xE2\x80\x8C" | ||
597 | ); | ||
598 | |||
599 | for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++) | ||
600 | { | ||
601 | $consumed = substr($this->consumed, 1); | ||
602 | if (isset($entities[$consumed])) | ||
603 | { | ||
604 | $match = $consumed; | ||
605 | } | ||
606 | } | ||
607 | |||
608 | if ($match !== null) | ||
609 | { | ||
610 | $this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1); | ||
611 | $this->position += strlen($entities[$match]) - strlen($consumed) - 1; | ||
612 | } | ||
613 | break; | ||
614 | } | ||
615 | } | ||
616 | } | ||
617 | |||
diff --git a/inc/3rdparty/simplepie/SimplePie/Enclosure.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Enclosure.php index ca9a8b46..55674379 100644 --- a/inc/3rdparty/simplepie/SimplePie/Enclosure.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Enclosure.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,48 +33,197 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Handles everything related to enclosures (including Media RSS and iTunes RSS) | ||
47 | * | ||
48 | * Used by {@see SimplePie_Item::get_enclosure()} and {@see SimplePie_Item::get_enclosures()} | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_enclosure_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage API | ||
54 | */ | ||
47 | class SimplePie_Enclosure | 55 | class SimplePie_Enclosure |
48 | { | 56 | { |
57 | /** | ||
58 | * @var string | ||
59 | * @see get_bitrate() | ||
60 | */ | ||
49 | var $bitrate; | 61 | var $bitrate; |
62 | |||
63 | /** | ||
64 | * @var array | ||
65 | * @see get_captions() | ||
66 | */ | ||
50 | var $captions; | 67 | var $captions; |
68 | |||
69 | /** | ||
70 | * @var array | ||
71 | * @see get_categories() | ||
72 | */ | ||
51 | var $categories; | 73 | var $categories; |
74 | |||
75 | /** | ||
76 | * @var int | ||
77 | * @see get_channels() | ||
78 | */ | ||
52 | var $channels; | 79 | var $channels; |
80 | |||
81 | /** | ||
82 | * @var SimplePie_Copyright | ||
83 | * @see get_copyright() | ||
84 | */ | ||
53 | var $copyright; | 85 | var $copyright; |
86 | |||
87 | /** | ||
88 | * @var array | ||
89 | * @see get_credits() | ||
90 | */ | ||
54 | var $credits; | 91 | var $credits; |
92 | |||
93 | /** | ||
94 | * @var string | ||
95 | * @see get_description() | ||
96 | */ | ||
55 | var $description; | 97 | var $description; |
98 | |||
99 | /** | ||
100 | * @var int | ||
101 | * @see get_duration() | ||
102 | */ | ||
56 | var $duration; | 103 | var $duration; |
104 | |||
105 | /** | ||
106 | * @var string | ||
107 | * @see get_expression() | ||
108 | */ | ||
57 | var $expression; | 109 | var $expression; |
110 | |||
111 | /** | ||
112 | * @var string | ||
113 | * @see get_framerate() | ||
114 | */ | ||
58 | var $framerate; | 115 | var $framerate; |
116 | |||
117 | /** | ||
118 | * @var string | ||
119 | * @see get_handler() | ||
120 | */ | ||
59 | var $handler; | 121 | var $handler; |
122 | |||
123 | /** | ||
124 | * @var array | ||
125 | * @see get_hashes() | ||
126 | */ | ||
60 | var $hashes; | 127 | var $hashes; |
128 | |||
129 | /** | ||
130 | * @var string | ||
131 | * @see get_height() | ||
132 | */ | ||
61 | var $height; | 133 | var $height; |
134 | |||
135 | /** | ||
136 | * @deprecated | ||
137 | * @var null | ||
138 | */ | ||
62 | var $javascript; | 139 | var $javascript; |
140 | |||
141 | /** | ||
142 | * @var array | ||
143 | * @see get_keywords() | ||
144 | */ | ||
63 | var $keywords; | 145 | var $keywords; |
146 | |||
147 | /** | ||
148 | * @var string | ||
149 | * @see get_language() | ||
150 | */ | ||
64 | var $lang; | 151 | var $lang; |
152 | |||
153 | /** | ||
154 | * @var string | ||
155 | * @see get_length() | ||
156 | */ | ||
65 | var $length; | 157 | var $length; |
158 | |||
159 | /** | ||
160 | * @var string | ||
161 | * @see get_link() | ||
162 | */ | ||
66 | var $link; | 163 | var $link; |
164 | |||
165 | /** | ||
166 | * @var string | ||
167 | * @see get_medium() | ||
168 | */ | ||
67 | var $medium; | 169 | var $medium; |
170 | |||
171 | /** | ||
172 | * @var string | ||
173 | * @see get_player() | ||
174 | */ | ||
68 | var $player; | 175 | var $player; |
176 | |||
177 | /** | ||
178 | * @var array | ||
179 | * @see get_ratings() | ||
180 | */ | ||
69 | var $ratings; | 181 | var $ratings; |
182 | |||
183 | /** | ||
184 | * @var array | ||
185 | * @see get_restrictions() | ||
186 | */ | ||
70 | var $restrictions; | 187 | var $restrictions; |
188 | |||
189 | /** | ||
190 | * @var string | ||
191 | * @see get_sampling_rate() | ||
192 | */ | ||
71 | var $samplingrate; | 193 | var $samplingrate; |
194 | |||
195 | /** | ||
196 | * @var array | ||
197 | * @see get_thumbnails() | ||
198 | */ | ||
72 | var $thumbnails; | 199 | var $thumbnails; |
200 | |||
201 | /** | ||
202 | * @var string | ||
203 | * @see get_title() | ||
204 | */ | ||
73 | var $title; | 205 | var $title; |
206 | |||
207 | /** | ||
208 | * @var string | ||
209 | * @see get_type() | ||
210 | */ | ||
74 | var $type; | 211 | var $type; |
212 | |||
213 | /** | ||
214 | * @var string | ||
215 | * @see get_width() | ||
216 | */ | ||
75 | var $width; | 217 | var $width; |
76 | 218 | ||
77 | // Constructor, used to input the data | 219 | /** |
220 | * Constructor, used to input the data | ||
221 | * | ||
222 | * For documentation on all the parameters, see the corresponding | ||
223 | * properties and their accessors | ||
224 | * | ||
225 | * @uses idna_convert If available, this will convert an IDN | ||
226 | */ | ||
78 | public function __construct($link = null, $type = null, $length = null, $javascript = null, $bitrate = null, $captions = null, $categories = null, $channels = null, $copyright = null, $credits = null, $description = null, $duration = null, $expression = null, $framerate = null, $hashes = null, $height = null, $keywords = null, $lang = null, $medium = null, $player = null, $ratings = null, $restrictions = null, $samplingrate = null, $thumbnails = null, $title = null, $width = null) | 227 | public function __construct($link = null, $type = null, $length = null, $javascript = null, $bitrate = null, $captions = null, $categories = null, $channels = null, $copyright = null, $credits = null, $description = null, $duration = null, $expression = null, $framerate = null, $hashes = null, $height = null, $keywords = null, $lang = null, $medium = null, $player = null, $ratings = null, $restrictions = null, $samplingrate = null, $thumbnails = null, $title = null, $width = null) |
79 | { | 228 | { |
80 | $this->bitrate = $bitrate; | 229 | $this->bitrate = $bitrate; |
@@ -112,12 +261,22 @@ class SimplePie_Enclosure | |||
112 | $this->handler = $this->get_handler(); // Needs to load last | 261 | $this->handler = $this->get_handler(); // Needs to load last |
113 | } | 262 | } |
114 | 263 | ||
264 | /** | ||
265 | * String-ified version | ||
266 | * | ||
267 | * @return string | ||
268 | */ | ||
115 | public function __toString() | 269 | public function __toString() |
116 | { | 270 | { |
117 | // There is no $this->data here | 271 | // There is no $this->data here |
118 | return md5(serialize($this)); | 272 | return md5(serialize($this)); |
119 | } | 273 | } |
120 | 274 | ||
275 | /** | ||
276 | * Get the bitrate | ||
277 | * | ||
278 | * @return string|null | ||
279 | */ | ||
121 | public function get_bitrate() | 280 | public function get_bitrate() |
122 | { | 281 | { |
123 | if ($this->bitrate !== null) | 282 | if ($this->bitrate !== null) |
@@ -130,6 +289,12 @@ class SimplePie_Enclosure | |||
130 | } | 289 | } |
131 | } | 290 | } |
132 | 291 | ||
292 | /** | ||
293 | * Get a single caption | ||
294 | * | ||
295 | * @param int $key | ||
296 | * @return SimplePie_Caption|null | ||
297 | */ | ||
133 | public function get_caption($key = 0) | 298 | public function get_caption($key = 0) |
134 | { | 299 | { |
135 | $captions = $this->get_captions(); | 300 | $captions = $this->get_captions(); |
@@ -143,6 +308,11 @@ class SimplePie_Enclosure | |||
143 | } | 308 | } |
144 | } | 309 | } |
145 | 310 | ||
311 | /** | ||
312 | * Get all captions | ||
313 | * | ||
314 | * @return array|null Array of {@see SimplePie_Caption} objects | ||
315 | */ | ||
146 | public function get_captions() | 316 | public function get_captions() |
147 | { | 317 | { |
148 | if ($this->captions !== null) | 318 | if ($this->captions !== null) |
@@ -155,6 +325,12 @@ class SimplePie_Enclosure | |||
155 | } | 325 | } |
156 | } | 326 | } |
157 | 327 | ||
328 | /** | ||
329 | * Get a single category | ||
330 | * | ||
331 | * @param int $key | ||
332 | * @return SimplePie_Category|null | ||
333 | */ | ||
158 | public function get_category($key = 0) | 334 | public function get_category($key = 0) |
159 | { | 335 | { |
160 | $categories = $this->get_categories(); | 336 | $categories = $this->get_categories(); |
@@ -168,6 +344,11 @@ class SimplePie_Enclosure | |||
168 | } | 344 | } |
169 | } | 345 | } |
170 | 346 | ||
347 | /** | ||
348 | * Get all categories | ||
349 | * | ||
350 | * @return array|null Array of {@see SimplePie_Category} objects | ||
351 | */ | ||
171 | public function get_categories() | 352 | public function get_categories() |
172 | { | 353 | { |
173 | if ($this->categories !== null) | 354 | if ($this->categories !== null) |
@@ -180,6 +361,11 @@ class SimplePie_Enclosure | |||
180 | } | 361 | } |
181 | } | 362 | } |
182 | 363 | ||
364 | /** | ||
365 | * Get the number of audio channels | ||
366 | * | ||
367 | * @return int|null | ||
368 | */ | ||
183 | public function get_channels() | 369 | public function get_channels() |
184 | { | 370 | { |
185 | if ($this->channels !== null) | 371 | if ($this->channels !== null) |
@@ -192,6 +378,11 @@ class SimplePie_Enclosure | |||
192 | } | 378 | } |
193 | } | 379 | } |
194 | 380 | ||
381 | /** | ||
382 | * Get the copyright information | ||
383 | * | ||
384 | * @return SimplePie_Copyright|null | ||
385 | */ | ||
195 | public function get_copyright() | 386 | public function get_copyright() |
196 | { | 387 | { |
197 | if ($this->copyright !== null) | 388 | if ($this->copyright !== null) |
@@ -204,6 +395,12 @@ class SimplePie_Enclosure | |||
204 | } | 395 | } |
205 | } | 396 | } |
206 | 397 | ||
398 | /** | ||
399 | * Get a single credit | ||
400 | * | ||
401 | * @param int $key | ||
402 | * @return SimplePie_Credit|null | ||
403 | */ | ||
207 | public function get_credit($key = 0) | 404 | public function get_credit($key = 0) |
208 | { | 405 | { |
209 | $credits = $this->get_credits(); | 406 | $credits = $this->get_credits(); |
@@ -217,6 +414,11 @@ class SimplePie_Enclosure | |||
217 | } | 414 | } |
218 | } | 415 | } |
219 | 416 | ||
417 | /** | ||
418 | * Get all credits | ||
419 | * | ||
420 | * @return array|null Array of {@see SimplePie_Credit} objects | ||
421 | */ | ||
220 | public function get_credits() | 422 | public function get_credits() |
221 | { | 423 | { |
222 | if ($this->credits !== null) | 424 | if ($this->credits !== null) |
@@ -229,6 +431,11 @@ class SimplePie_Enclosure | |||
229 | } | 431 | } |
230 | } | 432 | } |
231 | 433 | ||
434 | /** | ||
435 | * Get the description of the enclosure | ||
436 | * | ||
437 | * @return string|null | ||
438 | */ | ||
232 | public function get_description() | 439 | public function get_description() |
233 | { | 440 | { |
234 | if ($this->description !== null) | 441 | if ($this->description !== null) |
@@ -241,6 +448,12 @@ class SimplePie_Enclosure | |||
241 | } | 448 | } |
242 | } | 449 | } |
243 | 450 | ||
451 | /** | ||
452 | * Get the duration of the enclosure | ||
453 | * | ||
454 | * @param string $convert Convert seconds into hh:mm:ss | ||
455 | * @return string|int|null 'hh:mm:ss' string if `$convert` was specified, otherwise integer (or null if none found) | ||
456 | */ | ||
244 | public function get_duration($convert = false) | 457 | public function get_duration($convert = false) |
245 | { | 458 | { |
246 | if ($this->duration !== null) | 459 | if ($this->duration !== null) |
@@ -261,6 +474,11 @@ class SimplePie_Enclosure | |||
261 | } | 474 | } |
262 | } | 475 | } |
263 | 476 | ||
477 | /** | ||
478 | * Get the expression | ||
479 | * | ||
480 | * @return string Probably one of 'sample', 'full', 'nonstop', 'clip'. Defaults to 'full' | ||
481 | */ | ||
264 | public function get_expression() | 482 | public function get_expression() |
265 | { | 483 | { |
266 | if ($this->expression !== null) | 484 | if ($this->expression !== null) |
@@ -273,6 +491,11 @@ class SimplePie_Enclosure | |||
273 | } | 491 | } |
274 | } | 492 | } |
275 | 493 | ||
494 | /** | ||
495 | * Get the file extension | ||
496 | * | ||
497 | * @return string|null | ||
498 | */ | ||
276 | public function get_extension() | 499 | public function get_extension() |
277 | { | 500 | { |
278 | if ($this->link !== null) | 501 | if ($this->link !== null) |
@@ -286,6 +509,11 @@ class SimplePie_Enclosure | |||
286 | return null; | 509 | return null; |
287 | } | 510 | } |
288 | 511 | ||
512 | /** | ||
513 | * Get the framerate (in frames-per-second) | ||
514 | * | ||
515 | * @return string|null | ||
516 | */ | ||
289 | public function get_framerate() | 517 | public function get_framerate() |
290 | { | 518 | { |
291 | if ($this->framerate !== null) | 519 | if ($this->framerate !== null) |
@@ -298,11 +526,23 @@ class SimplePie_Enclosure | |||
298 | } | 526 | } |
299 | } | 527 | } |
300 | 528 | ||
529 | /** | ||
530 | * Get the preferred handler | ||
531 | * | ||
532 | * @return string|null One of 'flash', 'fmedia', 'quicktime', 'wmedia', 'mp3' | ||
533 | */ | ||
301 | public function get_handler() | 534 | public function get_handler() |
302 | { | 535 | { |
303 | return $this->get_real_type(true); | 536 | return $this->get_real_type(true); |
304 | } | 537 | } |
305 | 538 | ||
539 | /** | ||
540 | * Get a single hash | ||
541 | * | ||
542 | * @link http://www.rssboard.org/media-rss#media-hash | ||
543 | * @param int $key | ||
544 | * @return string|null Hash as per `media:hash`, prefixed with "$algo:" | ||
545 | */ | ||
306 | public function get_hash($key = 0) | 546 | public function get_hash($key = 0) |
307 | { | 547 | { |
308 | $hashes = $this->get_hashes(); | 548 | $hashes = $this->get_hashes(); |
@@ -316,6 +556,11 @@ class SimplePie_Enclosure | |||
316 | } | 556 | } |
317 | } | 557 | } |
318 | 558 | ||
559 | /** | ||
560 | * Get all credits | ||
561 | * | ||
562 | * @return array|null Array of strings, see {@see get_hash()} | ||
563 | */ | ||
319 | public function get_hashes() | 564 | public function get_hashes() |
320 | { | 565 | { |
321 | if ($this->hashes !== null) | 566 | if ($this->hashes !== null) |
@@ -328,6 +573,11 @@ class SimplePie_Enclosure | |||
328 | } | 573 | } |
329 | } | 574 | } |
330 | 575 | ||
576 | /** | ||
577 | * Get the height | ||
578 | * | ||
579 | * @return string|null | ||
580 | */ | ||
331 | public function get_height() | 581 | public function get_height() |
332 | { | 582 | { |
333 | if ($this->height !== null) | 583 | if ($this->height !== null) |
@@ -340,6 +590,12 @@ class SimplePie_Enclosure | |||
340 | } | 590 | } |
341 | } | 591 | } |
342 | 592 | ||
593 | /** | ||
594 | * Get the language | ||
595 | * | ||
596 | * @link http://tools.ietf.org/html/rfc3066 | ||
597 | * @return string|null Language code as per RFC 3066 | ||
598 | */ | ||
343 | public function get_language() | 599 | public function get_language() |
344 | { | 600 | { |
345 | if ($this->lang !== null) | 601 | if ($this->lang !== null) |
@@ -352,6 +608,12 @@ class SimplePie_Enclosure | |||
352 | } | 608 | } |
353 | } | 609 | } |
354 | 610 | ||
611 | /** | ||
612 | * Get a single keyword | ||
613 | * | ||
614 | * @param int $key | ||
615 | * @return string|null | ||
616 | */ | ||
355 | public function get_keyword($key = 0) | 617 | public function get_keyword($key = 0) |
356 | { | 618 | { |
357 | $keywords = $this->get_keywords(); | 619 | $keywords = $this->get_keywords(); |
@@ -365,6 +627,11 @@ class SimplePie_Enclosure | |||
365 | } | 627 | } |
366 | } | 628 | } |
367 | 629 | ||
630 | /** | ||
631 | * Get all keywords | ||
632 | * | ||
633 | * @return array|null Array of strings | ||
634 | */ | ||
368 | public function get_keywords() | 635 | public function get_keywords() |
369 | { | 636 | { |
370 | if ($this->keywords !== null) | 637 | if ($this->keywords !== null) |
@@ -377,6 +644,11 @@ class SimplePie_Enclosure | |||
377 | } | 644 | } |
378 | } | 645 | } |
379 | 646 | ||
647 | /** | ||
648 | * Get length | ||
649 | * | ||
650 | * @return float Length in bytes | ||
651 | */ | ||
380 | public function get_length() | 652 | public function get_length() |
381 | { | 653 | { |
382 | if ($this->length !== null) | 654 | if ($this->length !== null) |
@@ -389,6 +661,11 @@ class SimplePie_Enclosure | |||
389 | } | 661 | } |
390 | } | 662 | } |
391 | 663 | ||
664 | /** | ||
665 | * Get the URL | ||
666 | * | ||
667 | * @return string|null | ||
668 | */ | ||
392 | public function get_link() | 669 | public function get_link() |
393 | { | 670 | { |
394 | if ($this->link !== null) | 671 | if ($this->link !== null) |
@@ -401,6 +678,12 @@ class SimplePie_Enclosure | |||
401 | } | 678 | } |
402 | } | 679 | } |
403 | 680 | ||
681 | /** | ||
682 | * Get the medium | ||
683 | * | ||
684 | * @link http://www.rssboard.org/media-rss#media-content | ||
685 | * @return string|null Should be one of 'image', 'audio', 'video', 'document', 'executable' | ||
686 | */ | ||
404 | public function get_medium() | 687 | public function get_medium() |
405 | { | 688 | { |
406 | if ($this->medium !== null) | 689 | if ($this->medium !== null) |
@@ -413,6 +696,12 @@ class SimplePie_Enclosure | |||
413 | } | 696 | } |
414 | } | 697 | } |
415 | 698 | ||
699 | /** | ||
700 | * Get the player URL | ||
701 | * | ||
702 | * Typically the same as {@see get_permalink()} | ||
703 | * @return string|null Player URL | ||
704 | */ | ||
416 | public function get_player() | 705 | public function get_player() |
417 | { | 706 | { |
418 | if ($this->player !== null) | 707 | if ($this->player !== null) |
@@ -425,6 +714,12 @@ class SimplePie_Enclosure | |||
425 | } | 714 | } |
426 | } | 715 | } |
427 | 716 | ||
717 | /** | ||
718 | * Get a single rating | ||
719 | * | ||
720 | * @param int $key | ||
721 | * @return SimplePie_Rating|null | ||
722 | */ | ||
428 | public function get_rating($key = 0) | 723 | public function get_rating($key = 0) |
429 | { | 724 | { |
430 | $ratings = $this->get_ratings(); | 725 | $ratings = $this->get_ratings(); |
@@ -438,6 +733,11 @@ class SimplePie_Enclosure | |||
438 | } | 733 | } |
439 | } | 734 | } |
440 | 735 | ||
736 | /** | ||
737 | * Get all ratings | ||
738 | * | ||
739 | * @return array|null Array of {@see SimplePie_Rating} objects | ||
740 | */ | ||
441 | public function get_ratings() | 741 | public function get_ratings() |
442 | { | 742 | { |
443 | if ($this->ratings !== null) | 743 | if ($this->ratings !== null) |
@@ -450,6 +750,12 @@ class SimplePie_Enclosure | |||
450 | } | 750 | } |
451 | } | 751 | } |
452 | 752 | ||
753 | /** | ||
754 | * Get a single restriction | ||
755 | * | ||
756 | * @param int $key | ||
757 | * @return SimplePie_Restriction|null | ||
758 | */ | ||
453 | public function get_restriction($key = 0) | 759 | public function get_restriction($key = 0) |
454 | { | 760 | { |
455 | $restrictions = $this->get_restrictions(); | 761 | $restrictions = $this->get_restrictions(); |
@@ -463,6 +769,11 @@ class SimplePie_Enclosure | |||
463 | } | 769 | } |
464 | } | 770 | } |
465 | 771 | ||
772 | /** | ||
773 | * Get all restrictions | ||
774 | * | ||
775 | * @return array|null Array of {@see SimplePie_Restriction} objects | ||
776 | */ | ||
466 | public function get_restrictions() | 777 | public function get_restrictions() |
467 | { | 778 | { |
468 | if ($this->restrictions !== null) | 779 | if ($this->restrictions !== null) |
@@ -475,6 +786,11 @@ class SimplePie_Enclosure | |||
475 | } | 786 | } |
476 | } | 787 | } |
477 | 788 | ||
789 | /** | ||
790 | * Get the sampling rate (in kHz) | ||
791 | * | ||
792 | * @return string|null | ||
793 | */ | ||
478 | public function get_sampling_rate() | 794 | public function get_sampling_rate() |
479 | { | 795 | { |
480 | if ($this->samplingrate !== null) | 796 | if ($this->samplingrate !== null) |
@@ -487,6 +803,11 @@ class SimplePie_Enclosure | |||
487 | } | 803 | } |
488 | } | 804 | } |
489 | 805 | ||
806 | /** | ||
807 | * Get the file size (in MiB) | ||
808 | * | ||
809 | * @return float|null File size in mebibytes (1048 bytes) | ||
810 | */ | ||
490 | public function get_size() | 811 | public function get_size() |
491 | { | 812 | { |
492 | $length = $this->get_length(); | 813 | $length = $this->get_length(); |
@@ -500,6 +821,12 @@ class SimplePie_Enclosure | |||
500 | } | 821 | } |
501 | } | 822 | } |
502 | 823 | ||
824 | /** | ||
825 | * Get a single thumbnail | ||
826 | * | ||
827 | * @param int $key | ||
828 | * @return string|null Thumbnail URL | ||
829 | */ | ||
503 | public function get_thumbnail($key = 0) | 830 | public function get_thumbnail($key = 0) |
504 | { | 831 | { |
505 | $thumbnails = $this->get_thumbnails(); | 832 | $thumbnails = $this->get_thumbnails(); |
@@ -513,6 +840,11 @@ class SimplePie_Enclosure | |||
513 | } | 840 | } |
514 | } | 841 | } |
515 | 842 | ||
843 | /** | ||
844 | * Get all thumbnails | ||
845 | * | ||
846 | * @return array|null Array of thumbnail URLs | ||
847 | */ | ||
516 | public function get_thumbnails() | 848 | public function get_thumbnails() |
517 | { | 849 | { |
518 | if ($this->thumbnails !== null) | 850 | if ($this->thumbnails !== null) |
@@ -525,6 +857,11 @@ class SimplePie_Enclosure | |||
525 | } | 857 | } |
526 | } | 858 | } |
527 | 859 | ||
860 | /** | ||
861 | * Get the title | ||
862 | * | ||
863 | * @return string|null | ||
864 | */ | ||
528 | public function get_title() | 865 | public function get_title() |
529 | { | 866 | { |
530 | if ($this->title !== null) | 867 | if ($this->title !== null) |
@@ -537,6 +874,12 @@ class SimplePie_Enclosure | |||
537 | } | 874 | } |
538 | } | 875 | } |
539 | 876 | ||
877 | /** | ||
878 | * Get mimetype of the enclosure | ||
879 | * | ||
880 | * @see get_real_type() | ||
881 | * @return string|null MIME type | ||
882 | */ | ||
540 | public function get_type() | 883 | public function get_type() |
541 | { | 884 | { |
542 | if ($this->type !== null) | 885 | if ($this->type !== null) |
@@ -549,6 +892,11 @@ class SimplePie_Enclosure | |||
549 | } | 892 | } |
550 | } | 893 | } |
551 | 894 | ||
895 | /** | ||
896 | * Get the width | ||
897 | * | ||
898 | * @return string|null | ||
899 | */ | ||
552 | public function get_width() | 900 | public function get_width() |
553 | { | 901 | { |
554 | if ($this->width !== null) | 902 | if ($this->width !== null) |
@@ -561,13 +909,63 @@ class SimplePie_Enclosure | |||
561 | } | 909 | } |
562 | } | 910 | } |
563 | 911 | ||
912 | /** | ||
913 | * Embed the enclosure using `<embed>` | ||
914 | * | ||
915 | * @deprecated Use the second parameter to {@see embed} instead | ||
916 | * | ||
917 | * @param array|string $options See first paramter to {@see embed} | ||
918 | * @return string HTML string to output | ||
919 | */ | ||
564 | public function native_embed($options='') | 920 | public function native_embed($options='') |
565 | { | 921 | { |
566 | return $this->embed($options, true); | 922 | return $this->embed($options, true); |
567 | } | 923 | } |
568 | 924 | ||
569 | /** | 925 | /** |
926 | * Embed the enclosure using Javascript | ||
927 | * | ||
928 | * `$options` is an array or comma-separated key:value string, with the | ||
929 | * following properties: | ||
930 | * | ||
931 | * - `alt` (string): Alternate content for when an end-user does not have | ||
932 | * the appropriate handler installed or when a file type is | ||
933 | * unsupported. Can be any text or HTML. Defaults to blank. | ||
934 | * - `altclass` (string): If a file type is unsupported, the end-user will | ||
935 | * see the alt text (above) linked directly to the content. That link | ||
936 | * will have this value as its class name. Defaults to blank. | ||
937 | * - `audio` (string): This is an image that should be used as a | ||
938 | * placeholder for audio files before they're loaded (QuickTime-only). | ||
939 | * Can be any relative or absolute URL. Defaults to blank. | ||
940 | * - `bgcolor` (string): The background color for the media, if not | ||
941 | * already transparent. Defaults to `#ffffff`. | ||
942 | * - `height` (integer): The height of the embedded media. Accepts any | ||
943 | * numeric pixel value (such as `360`) or `auto`. Defaults to `auto`, | ||
944 | * and it is recommended that you use this default. | ||
945 | * - `loop` (boolean): Do you want the media to loop when its done? | ||
946 | * Defaults to `false`. | ||
947 | * - `mediaplayer` (string): The location of the included | ||
948 | * `mediaplayer.swf` file. This allows for the playback of Flash Video | ||
949 | * (`.flv`) files, and is the default handler for non-Odeo MP3's. | ||
950 | * Defaults to blank. | ||
951 | * - `video` (string): This is an image that should be used as a | ||
952 | * placeholder for video files before they're loaded (QuickTime-only). | ||
953 | * Can be any relative or absolute URL. Defaults to blank. | ||
954 | * - `width` (integer): The width of the embedded media. Accepts any | ||
955 | * numeric pixel value (such as `480`) or `auto`. Defaults to `auto`, | ||
956 | * and it is recommended that you use this default. | ||
957 | * - `widescreen` (boolean): Is the enclosure widescreen or standard? | ||
958 | * This applies only to video enclosures, and will automatically resize | ||
959 | * the content appropriately. Defaults to `false`, implying 4:3 mode. | ||
960 | * | ||
961 | * Note: Non-widescreen (4:3) mode with `width` and `height` set to `auto` | ||
962 | * will default to 480x360 video resolution. Widescreen (16:9) mode with | ||
963 | * `width` and `height` set to `auto` will default to 480x270 video resolution. | ||
964 | * | ||
570 | * @todo If the dimensions for media:content are defined, use them when width/height are set to 'auto'. | 965 | * @todo If the dimensions for media:content are defined, use them when width/height are set to 'auto'. |
966 | * @param array|string $options Comma-separated key:value list, or array | ||
967 | * @param bool $native Use `<embed>` | ||
968 | * @return string HTML string to output | ||
571 | */ | 969 | */ |
572 | public function embed($options = '', $native = false) | 970 | public function embed($options = '', $native = false) |
573 | { | 971 | { |
@@ -723,21 +1121,8 @@ class SimplePie_Enclosure | |||
723 | 1121 | ||
724 | $embed = ''; | 1122 | $embed = ''; |
725 | 1123 | ||
726 | // Odeo Feed MP3's | ||
727 | if ($handler === 'odeo') | ||
728 | { | ||
729 | if ($native) | ||
730 | { | ||
731 | $embed .= '<embed src="http://odeo.com/flash/audio_player_fullsize.swf" pluginspage="http://adobe.com/go/getflashplayer" type="application/x-shockwave-flash" quality="high" width="440" height="80" wmode="transparent" allowScriptAccess="any" flashvars="valid_sample_rate=true&external_url=' . $this->get_link() . '"></embed>'; | ||
732 | } | ||
733 | else | ||
734 | { | ||
735 | $embed .= '<script type="text/javascript">embed_odeo("' . $this->get_link() . '");</script>'; | ||
736 | } | ||
737 | } | ||
738 | |||
739 | // Flash | 1124 | // Flash |
740 | elseif ($handler === 'flash') | 1125 | if ($handler === 'flash') |
741 | { | 1126 | { |
742 | if ($native) | 1127 | if ($native) |
743 | { | 1128 | { |
@@ -806,14 +1191,19 @@ class SimplePie_Enclosure | |||
806 | return $embed; | 1191 | return $embed; |
807 | } | 1192 | } |
808 | 1193 | ||
1194 | /** | ||
1195 | * Get the real media type | ||
1196 | * | ||
1197 | * Often, feeds lie to us, necessitating a bit of deeper inspection. This | ||
1198 | * converts types to their canonical representations based on the file | ||
1199 | * extension | ||
1200 | * | ||
1201 | * @see get_type() | ||
1202 | * @param bool $find_handler Internal use only, use {@see get_handler()} instead | ||
1203 | * @return string MIME type | ||
1204 | */ | ||
809 | public function get_real_type($find_handler = false) | 1205 | public function get_real_type($find_handler = false) |
810 | { | 1206 | { |
811 | // If it's Odeo, let's get it out of the way. | ||
812 | if (substr(strtolower($this->get_link()), 0, 15) === 'http://odeo.com') | ||
813 | { | ||
814 | return 'odeo'; | ||
815 | } | ||
816 | |||
817 | // Mime-types by handler. | 1207 | // Mime-types by handler. |
818 | $types_flash = array('application/x-shockwave-flash', 'application/futuresplash'); // Flash | 1208 | $types_flash = array('application/x-shockwave-flash', 'application/futuresplash'); // Flash |
819 | $types_fmedia = array('video/flv', 'video/x-flv','flv-application/octet-stream'); // Flash Media Player | 1209 | $types_fmedia = array('video/flv', 'video/x-flv','flv-application/octet-stream'); // Flash Media Player |
diff --git a/inc/3rdparty/simplepie/SimplePie/Rating.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Exception.php index bedc701c..73e104d6 100644 --- a/inc/3rdparty/simplepie/SimplePie/Rating.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Exception.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,56 +33,20 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.4-dev |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
47 | class SimplePie_Rating | 46 | * General SimplePie exception class |
47 | * | ||
48 | * @package SimplePie | ||
49 | */ | ||
50 | class SimplePie_Exception extends Exception | ||
48 | { | 51 | { |
49 | var $scheme; | 52 | } \ No newline at end of file |
50 | var $value; | ||
51 | |||
52 | // Constructor, used to input the data | ||
53 | public function __construct($scheme = null, $value = null) | ||
54 | { | ||
55 | $this->scheme = $scheme; | ||
56 | $this->value = $value; | ||
57 | } | ||
58 | |||
59 | public function __toString() | ||
60 | { | ||
61 | // There is no $this->data here | ||
62 | return md5(serialize($this)); | ||
63 | } | ||
64 | |||
65 | public function get_scheme() | ||
66 | { | ||
67 | if ($this->scheme !== null) | ||
68 | { | ||
69 | return $this->scheme; | ||
70 | } | ||
71 | else | ||
72 | { | ||
73 | return null; | ||
74 | } | ||
75 | } | ||
76 | |||
77 | public function get_value() | ||
78 | { | ||
79 | if ($this->value !== null) | ||
80 | { | ||
81 | return $this->value; | ||
82 | } | ||
83 | else | ||
84 | { | ||
85 | return null; | ||
86 | } | ||
87 | } | ||
88 | } | ||
diff --git a/inc/3rdparty/simplepie/SimplePie/File.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/File.php index 55e74079..b7d1a2ac 100644 --- a/inc/3rdparty/simplepie/SimplePie/File.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/File.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,18 +33,24 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | |||
47 | /** | 45 | /** |
46 | * Used for fetching remote files and reading local files | ||
47 | * | ||
48 | * Supports HTTP 1.0 via cURL or fsockopen, with spotty HTTP 1.1 support | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_file_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage HTTP | ||
48 | * @todo Move to properly supporting RFC2616 (HTTP/1.1) | 54 | * @todo Move to properly supporting RFC2616 (HTTP/1.1) |
49 | */ | 55 | */ |
50 | class SimplePie_File | 56 | class SimplePie_File |
@@ -238,15 +244,23 @@ class SimplePie_File | |||
238 | break; | 244 | break; |
239 | 245 | ||
240 | case 'deflate': | 246 | case 'deflate': |
241 | if (($body = gzuncompress($this->body)) === false) | 247 | if (($decompressed = gzinflate($this->body)) !== false) |
248 | { | ||
249 | $this->body = $decompressed; | ||
250 | } | ||
251 | else if (($decompressed = gzuncompress($this->body)) !== false) | ||
242 | { | 252 | { |
243 | if (($body = gzinflate($this->body)) === false) | 253 | $this->body = $decompressed; |
244 | { | 254 | } |
245 | $this->error = 'Unable to decode HTTP "deflate" stream'; | 255 | else if (function_exists('gzdecode') && ($decompressed = gzdecode($this->body)) !== false) |
246 | $this->success = false; | 256 | { |
247 | } | 257 | $this->body = $decompressed; |
258 | } | ||
259 | else | ||
260 | { | ||
261 | $this->error = 'Unable to decode HTTP "deflate" stream'; | ||
262 | $this->success = false; | ||
248 | } | 263 | } |
249 | $this->body = $body; | ||
250 | break; | 264 | break; |
251 | 265 | ||
252 | default: | 266 | default: |
diff --git a/inc/3rdparty/simplepie/SimplePie/HTTP/Parser.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/HTTP/Parser.php index cc9660c6..bff2222b 100644 --- a/inc/3rdparty/simplepie/SimplePie/HTTP/Parser.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/HTTP/Parser.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,14 +33,13 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | ||
@@ -48,6 +47,7 @@ | |||
48 | * HTTP Response Parser | 47 | * HTTP Response Parser |
49 | * | 48 | * |
50 | * @package SimplePie | 49 | * @package SimplePie |
50 | * @subpackage HTTP | ||
51 | */ | 51 | */ |
52 | class SimplePie_HTTP_Parser | 52 | class SimplePie_HTTP_Parser |
53 | { | 53 | { |
@@ -457,7 +457,7 @@ class SimplePie_HTTP_Parser | |||
457 | */ | 457 | */ |
458 | protected function chunked() | 458 | protected function chunked() |
459 | { | 459 | { |
460 | if (!preg_match('/^[0-9a-f]+(\s|\r|\n)+/mi', trim($this->body))) | 460 | if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) |
461 | { | 461 | { |
462 | $this->state = 'emit'; | 462 | $this->state = 'emit'; |
463 | return; | 463 | return; |
@@ -468,7 +468,7 @@ class SimplePie_HTTP_Parser | |||
468 | 468 | ||
469 | while (true) | 469 | while (true) |
470 | { | 470 | { |
471 | $is_chunked = (bool) preg_match( '/^([0-9a-f]+)(\s|\r|\n)+/mi', $encoded, $matches ); | 471 | $is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches ); |
472 | if (!$is_chunked) | 472 | if (!$is_chunked) |
473 | { | 473 | { |
474 | // Looks like it's not chunked after all | 474 | // Looks like it's not chunked after all |
@@ -476,12 +476,20 @@ class SimplePie_HTTP_Parser | |||
476 | return; | 476 | return; |
477 | } | 477 | } |
478 | 478 | ||
479 | $length = hexdec($matches[1]); | 479 | $length = hexdec(trim($matches[1])); |
480 | if ($length === 0) | ||
481 | { | ||
482 | // Ignore trailer headers | ||
483 | $this->state = 'emit'; | ||
484 | $this->body = $decoded; | ||
485 | return; | ||
486 | } | ||
487 | |||
480 | $chunk_length = strlen($matches[0]); | 488 | $chunk_length = strlen($matches[0]); |
481 | $decoded .= $part = substr($encoded, $chunk_length, $length); | 489 | $decoded .= $part = substr($encoded, $chunk_length, $length); |
482 | $encoded = ltrim(substr($encoded, $chunk_length + $length), "\r\n"); | 490 | $encoded = substr($encoded, $chunk_length + $length + 2); |
483 | 491 | ||
484 | if (trim($encoded) === '0') | 492 | if (trim($encoded) === '0' || empty($encoded)) |
485 | { | 493 | { |
486 | $this->state = 'emit'; | 494 | $this->state = 'emit'; |
487 | $this->body = $decoded; | 495 | $this->body = $decoded; |
diff --git a/inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php new file mode 100644 index 00000000..d3198c04 --- /dev/null +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php | |||
@@ -0,0 +1,1238 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3.1 | ||
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | */ | ||
44 | |||
45 | /** | ||
46 | * IRI parser/serialiser/normaliser | ||
47 | * | ||
48 | * @package SimplePie | ||
49 | * @subpackage HTTP | ||
50 | * @author Geoffrey Sneddon | ||
51 | * @author Steve Minutillo | ||
52 | * @author Ryan McCue | ||
53 | * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue | ||
54 | * @license http://www.opensource.org/licenses/bsd-license.php | ||
55 | */ | ||
56 | class SimplePie_IRI | ||
57 | { | ||
58 | /** | ||
59 | * Scheme | ||
60 | * | ||
61 | * @var string | ||
62 | */ | ||
63 | protected $scheme = null; | ||
64 | |||
65 | /** | ||
66 | * User Information | ||
67 | * | ||
68 | * @var string | ||
69 | */ | ||
70 | protected $iuserinfo = null; | ||
71 | |||
72 | /** | ||
73 | * ihost | ||
74 | * | ||
75 | * @var string | ||
76 | */ | ||
77 | protected $ihost = null; | ||
78 | |||
79 | /** | ||
80 | * Port | ||
81 | * | ||
82 | * @var string | ||
83 | */ | ||
84 | protected $port = null; | ||
85 | |||
86 | /** | ||
87 | * ipath | ||
88 | * | ||
89 | * @var string | ||
90 | */ | ||
91 | protected $ipath = ''; | ||
92 | |||
93 | /** | ||
94 | * iquery | ||
95 | * | ||
96 | * @var string | ||
97 | */ | ||
98 | protected $iquery = null; | ||
99 | |||
100 | /** | ||
101 | * ifragment | ||
102 | * | ||
103 | * @var string | ||
104 | */ | ||
105 | protected $ifragment = null; | ||
106 | |||
107 | /** | ||
108 | * Normalization database | ||
109 | * | ||
110 | * Each key is the scheme, each value is an array with each key as the IRI | ||
111 | * part and value as the default value for that part. | ||
112 | */ | ||
113 | protected $normalization = array( | ||
114 | 'acap' => array( | ||
115 | 'port' => 674 | ||
116 | ), | ||
117 | 'dict' => array( | ||
118 | 'port' => 2628 | ||
119 | ), | ||
120 | 'file' => array( | ||
121 | 'ihost' => 'localhost' | ||
122 | ), | ||
123 | 'http' => array( | ||
124 | 'port' => 80, | ||
125 | 'ipath' => '/' | ||
126 | ), | ||
127 | 'https' => array( | ||
128 | 'port' => 443, | ||
129 | 'ipath' => '/' | ||
130 | ), | ||
131 | ); | ||
132 | |||
133 | /** | ||
134 | * Return the entire IRI when you try and read the object as a string | ||
135 | * | ||
136 | * @return string | ||
137 | */ | ||
138 | public function __toString() | ||
139 | { | ||
140 | return $this->get_iri(); | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * Overload __set() to provide access via properties | ||
145 | * | ||
146 | * @param string $name Property name | ||
147 | * @param mixed $value Property value | ||
148 | */ | ||
149 | public function __set($name, $value) | ||
150 | { | ||
151 | if (method_exists($this, 'set_' . $name)) | ||
152 | { | ||
153 | call_user_func(array($this, 'set_' . $name), $value); | ||
154 | } | ||
155 | elseif ( | ||
156 | $name === 'iauthority' | ||
157 | || $name === 'iuserinfo' | ||
158 | || $name === 'ihost' | ||
159 | || $name === 'ipath' | ||
160 | || $name === 'iquery' | ||
161 | || $name === 'ifragment' | ||
162 | ) | ||
163 | { | ||
164 | call_user_func(array($this, 'set_' . substr($name, 1)), $value); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | /** | ||
169 | * Overload __get() to provide access via properties | ||
170 | * | ||
171 | * @param string $name Property name | ||
172 | * @return mixed | ||
173 | */ | ||
174 | public function __get($name) | ||
175 | { | ||
176 | // isset() returns false for null, we don't want to do that | ||
177 | // Also why we use array_key_exists below instead of isset() | ||
178 | $props = get_object_vars($this); | ||
179 | |||
180 | if ( | ||
181 | $name === 'iri' || | ||
182 | $name === 'uri' || | ||
183 | $name === 'iauthority' || | ||
184 | $name === 'authority' | ||
185 | ) | ||
186 | { | ||
187 | $return = $this->{"get_$name"}(); | ||
188 | } | ||
189 | elseif (array_key_exists($name, $props)) | ||
190 | { | ||
191 | $return = $this->$name; | ||
192 | } | ||
193 | // host -> ihost | ||
194 | elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) | ||
195 | { | ||
196 | $name = $prop; | ||
197 | $return = $this->$prop; | ||
198 | } | ||
199 | // ischeme -> scheme | ||
200 | elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) | ||
201 | { | ||
202 | $name = $prop; | ||
203 | $return = $this->$prop; | ||
204 | } | ||
205 | else | ||
206 | { | ||
207 | trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE); | ||
208 | $return = null; | ||
209 | } | ||
210 | |||
211 | if ($return === null && isset($this->normalization[$this->scheme][$name])) | ||
212 | { | ||
213 | return $this->normalization[$this->scheme][$name]; | ||
214 | } | ||
215 | else | ||
216 | { | ||
217 | return $return; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | /** | ||
222 | * Overload __isset() to provide access via properties | ||
223 | * | ||
224 | * @param string $name Property name | ||
225 | * @return bool | ||
226 | */ | ||
227 | public function __isset($name) | ||
228 | { | ||
229 | if (method_exists($this, 'get_' . $name) || isset($this->$name)) | ||
230 | { | ||
231 | return true; | ||
232 | } | ||
233 | else | ||
234 | { | ||
235 | return false; | ||
236 | } | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * Overload __unset() to provide access via properties | ||
241 | * | ||
242 | * @param string $name Property name | ||
243 | */ | ||
244 | public function __unset($name) | ||
245 | { | ||
246 | if (method_exists($this, 'set_' . $name)) | ||
247 | { | ||
248 | call_user_func(array($this, 'set_' . $name), ''); | ||
249 | } | ||
250 | } | ||
251 | |||
252 | /** | ||
253 | * Create a new IRI object, from a specified string | ||
254 | * | ||
255 | * @param string $iri | ||
256 | */ | ||
257 | public function __construct($iri = null) | ||
258 | { | ||
259 | $this->set_iri($iri); | ||
260 | } | ||
261 | |||
262 | /** | ||
263 | * Create a new IRI object by resolving a relative IRI | ||
264 | * | ||
265 | * Returns false if $base is not absolute, otherwise an IRI. | ||
266 | * | ||
267 | * @param IRI|string $base (Absolute) Base IRI | ||
268 | * @param IRI|string $relative Relative IRI | ||
269 | * @return IRI|false | ||
270 | */ | ||
271 | public static function absolutize($base, $relative) | ||
272 | { | ||
273 | if (!($relative instanceof SimplePie_IRI)) | ||
274 | { | ||
275 | $relative = new SimplePie_IRI($relative); | ||
276 | } | ||
277 | if (!$relative->is_valid()) | ||
278 | { | ||
279 | return false; | ||
280 | } | ||
281 | elseif ($relative->scheme !== null) | ||
282 | { | ||
283 | return clone $relative; | ||
284 | } | ||
285 | else | ||
286 | { | ||
287 | if (!($base instanceof SimplePie_IRI)) | ||
288 | { | ||
289 | $base = new SimplePie_IRI($base); | ||
290 | } | ||
291 | if ($base->scheme !== null && $base->is_valid()) | ||
292 | { | ||
293 | if ($relative->get_iri() !== '') | ||
294 | { | ||
295 | if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) | ||
296 | { | ||
297 | $target = clone $relative; | ||
298 | $target->scheme = $base->scheme; | ||
299 | } | ||
300 | else | ||
301 | { | ||
302 | $target = new SimplePie_IRI; | ||
303 | $target->scheme = $base->scheme; | ||
304 | $target->iuserinfo = $base->iuserinfo; | ||
305 | $target->ihost = $base->ihost; | ||
306 | $target->port = $base->port; | ||
307 | if ($relative->ipath !== '') | ||
308 | { | ||
309 | if ($relative->ipath[0] === '/') | ||
310 | { | ||
311 | $target->ipath = $relative->ipath; | ||
312 | } | ||
313 | elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') | ||
314 | { | ||
315 | $target->ipath = '/' . $relative->ipath; | ||
316 | } | ||
317 | elseif (($last_segment = strrpos($base->ipath, '/')) !== false) | ||
318 | { | ||
319 | $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath; | ||
320 | } | ||
321 | else | ||
322 | { | ||
323 | $target->ipath = $relative->ipath; | ||
324 | } | ||
325 | $target->ipath = $target->remove_dot_segments($target->ipath); | ||
326 | $target->iquery = $relative->iquery; | ||
327 | } | ||
328 | else | ||
329 | { | ||
330 | $target->ipath = $base->ipath; | ||
331 | if ($relative->iquery !== null) | ||
332 | { | ||
333 | $target->iquery = $relative->iquery; | ||
334 | } | ||
335 | elseif ($base->iquery !== null) | ||
336 | { | ||
337 | $target->iquery = $base->iquery; | ||
338 | } | ||
339 | } | ||
340 | $target->ifragment = $relative->ifragment; | ||
341 | } | ||
342 | } | ||
343 | else | ||
344 | { | ||
345 | $target = clone $base; | ||
346 | $target->ifragment = null; | ||
347 | } | ||
348 | $target->scheme_normalization(); | ||
349 | return $target; | ||
350 | } | ||
351 | else | ||
352 | { | ||
353 | return false; | ||
354 | } | ||
355 | } | ||
356 | } | ||
357 | |||
358 | /** | ||
359 | * Parse an IRI into scheme/authority/path/query/fragment segments | ||
360 | * | ||
361 | * @param string $iri | ||
362 | * @return array | ||
363 | */ | ||
364 | protected function parse_iri($iri) | ||
365 | { | ||
366 | $iri = trim($iri, "\x20\x09\x0A\x0C\x0D"); | ||
367 | if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) | ||
368 | { | ||
369 | if ($match[1] === '') | ||
370 | { | ||
371 | $match['scheme'] = null; | ||
372 | } | ||
373 | if (!isset($match[3]) || $match[3] === '') | ||
374 | { | ||
375 | $match['authority'] = null; | ||
376 | } | ||
377 | if (!isset($match[5])) | ||
378 | { | ||
379 | $match['path'] = ''; | ||
380 | } | ||
381 | if (!isset($match[6]) || $match[6] === '') | ||
382 | { | ||
383 | $match['query'] = null; | ||
384 | } | ||
385 | if (!isset($match[8]) || $match[8] === '') | ||
386 | { | ||
387 | $match['fragment'] = null; | ||
388 | } | ||
389 | return $match; | ||
390 | } | ||
391 | else | ||
392 | { | ||
393 | // This can occur when a paragraph is accidentally parsed as a URI | ||
394 | return false; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | /** | ||
399 | * Remove dot segments from a path | ||
400 | * | ||
401 | * @param string $input | ||
402 | * @return string | ||
403 | */ | ||
404 | protected function remove_dot_segments($input) | ||
405 | { | ||
406 | $output = ''; | ||
407 | while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') | ||
408 | { | ||
409 | // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, | ||
410 | if (strpos($input, '../') === 0) | ||
411 | { | ||
412 | $input = substr($input, 3); | ||
413 | } | ||
414 | elseif (strpos($input, './') === 0) | ||
415 | { | ||
416 | $input = substr($input, 2); | ||
417 | } | ||
418 | // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, | ||
419 | elseif (strpos($input, '/./') === 0) | ||
420 | { | ||
421 | $input = substr($input, 2); | ||
422 | } | ||
423 | elseif ($input === '/.') | ||
424 | { | ||
425 | $input = '/'; | ||
426 | } | ||
427 | // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, | ||
428 | elseif (strpos($input, '/../') === 0) | ||
429 | { | ||
430 | $input = substr($input, 3); | ||
431 | $output = substr_replace($output, '', strrpos($output, '/')); | ||
432 | } | ||
433 | elseif ($input === '/..') | ||
434 | { | ||
435 | $input = '/'; | ||
436 | $output = substr_replace($output, '', strrpos($output, '/')); | ||
437 | } | ||
438 | // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, | ||
439 | elseif ($input === '.' || $input === '..') | ||
440 | { | ||
441 | $input = ''; | ||
442 | } | ||
443 | // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer | ||
444 | elseif (($pos = strpos($input, '/', 1)) !== false) | ||
445 | { | ||
446 | $output .= substr($input, 0, $pos); | ||
447 | $input = substr_replace($input, '', 0, $pos); | ||
448 | } | ||
449 | else | ||
450 | { | ||
451 | $output .= $input; | ||
452 | $input = ''; | ||
453 | } | ||
454 | } | ||
455 | return $output . $input; | ||
456 | } | ||
457 | |||
458 | /** | ||
459 | * Replace invalid character with percent encoding | ||
460 | * | ||
461 | * @param string $string Input string | ||
462 | * @param string $extra_chars Valid characters not in iunreserved or | ||
463 | * iprivate (this is ASCII-only) | ||
464 | * @param bool $iprivate Allow iprivate | ||
465 | * @return string | ||
466 | */ | ||
467 | protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) | ||
468 | { | ||
469 | // Normalize as many pct-encoded sections as possible | ||
470 | $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string); | ||
471 | |||
472 | // Replace invalid percent characters | ||
473 | $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); | ||
474 | |||
475 | // Add unreserved and % to $extra_chars (the latter is safe because all | ||
476 | // pct-encoded sections are now valid). | ||
477 | $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; | ||
478 | |||
479 | // Now replace any bytes that aren't allowed with their pct-encoded versions | ||
480 | $position = 0; | ||
481 | $strlen = strlen($string); | ||
482 | while (($position += strspn($string, $extra_chars, $position)) < $strlen) | ||
483 | { | ||
484 | $value = ord($string[$position]); | ||
485 | |||
486 | // Start position | ||
487 | $start = $position; | ||
488 | |||
489 | // By default we are valid | ||
490 | $valid = true; | ||
491 | |||
492 | // No one byte sequences are valid due to the while. | ||
493 | // Two byte sequence: | ||
494 | if (($value & 0xE0) === 0xC0) | ||
495 | { | ||
496 | $character = ($value & 0x1F) << 6; | ||
497 | $length = 2; | ||
498 | $remaining = 1; | ||
499 | } | ||
500 | // Three byte sequence: | ||
501 | elseif (($value & 0xF0) === 0xE0) | ||
502 | { | ||
503 | $character = ($value & 0x0F) << 12; | ||
504 | $length = 3; | ||
505 | $remaining = 2; | ||
506 | } | ||
507 | // Four byte sequence: | ||
508 | elseif (($value & 0xF8) === 0xF0) | ||
509 | { | ||
510 | $character = ($value & 0x07) << 18; | ||
511 | $length = 4; | ||
512 | $remaining = 3; | ||
513 | } | ||
514 | // Invalid byte: | ||
515 | else | ||
516 | { | ||
517 | $valid = false; | ||
518 | $length = 1; | ||
519 | $remaining = 0; | ||
520 | } | ||
521 | |||
522 | if ($remaining) | ||
523 | { | ||
524 | if ($position + $length <= $strlen) | ||
525 | { | ||
526 | for ($position++; $remaining; $position++) | ||
527 | { | ||
528 | $value = ord($string[$position]); | ||
529 | |||
530 | // Check that the byte is valid, then add it to the character: | ||
531 | if (($value & 0xC0) === 0x80) | ||
532 | { | ||
533 | $character |= ($value & 0x3F) << (--$remaining * 6); | ||
534 | } | ||
535 | // If it is invalid, count the sequence as invalid and reprocess the current byte: | ||
536 | else | ||
537 | { | ||
538 | $valid = false; | ||
539 | $position--; | ||
540 | break; | ||
541 | } | ||
542 | } | ||
543 | } | ||
544 | else | ||
545 | { | ||
546 | $position = $strlen - 1; | ||
547 | $valid = false; | ||
548 | } | ||
549 | } | ||
550 | |||
551 | // Percent encode anything invalid or not in ucschar | ||
552 | if ( | ||
553 | // Invalid sequences | ||
554 | !$valid | ||
555 | // Non-shortest form sequences are invalid | ||
556 | || $length > 1 && $character <= 0x7F | ||
557 | || $length > 2 && $character <= 0x7FF | ||
558 | || $length > 3 && $character <= 0xFFFF | ||
559 | // Outside of range of ucschar codepoints | ||
560 | // Noncharacters | ||
561 | || ($character & 0xFFFE) === 0xFFFE | ||
562 | || $character >= 0xFDD0 && $character <= 0xFDEF | ||
563 | || ( | ||
564 | // Everything else not in ucschar | ||
565 | $character > 0xD7FF && $character < 0xF900 | ||
566 | || $character < 0xA0 | ||
567 | || $character > 0xEFFFD | ||
568 | ) | ||
569 | && ( | ||
570 | // Everything not in iprivate, if it applies | ||
571 | !$iprivate | ||
572 | || $character < 0xE000 | ||
573 | || $character > 0x10FFFD | ||
574 | ) | ||
575 | ) | ||
576 | { | ||
577 | // If we were a character, pretend we weren't, but rather an error. | ||
578 | if ($valid) | ||
579 | $position--; | ||
580 | |||
581 | for ($j = $start; $j <= $position; $j++) | ||
582 | { | ||
583 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); | ||
584 | $j += 2; | ||
585 | $position += 2; | ||
586 | $strlen += 2; | ||
587 | } | ||
588 | } | ||
589 | } | ||
590 | |||
591 | return $string; | ||
592 | } | ||
593 | |||
594 | /** | ||
595 | * Callback function for preg_replace_callback. | ||
596 | * | ||
597 | * Removes sequences of percent encoded bytes that represent UTF-8 | ||
598 | * encoded characters in iunreserved | ||
599 | * | ||
600 | * @param array $match PCRE match | ||
601 | * @return string Replacement | ||
602 | */ | ||
603 | protected function remove_iunreserved_percent_encoded($match) | ||
604 | { | ||
605 | // As we just have valid percent encoded sequences we can just explode | ||
606 | // and ignore the first member of the returned array (an empty string). | ||
607 | $bytes = explode('%', $match[0]); | ||
608 | |||
609 | // Initialize the new string (this is what will be returned) and that | ||
610 | // there are no bytes remaining in the current sequence (unsurprising | ||
611 | // at the first byte!). | ||
612 | $string = ''; | ||
613 | $remaining = 0; | ||
614 | |||
615 | // Loop over each and every byte, and set $value to its value | ||
616 | for ($i = 1, $len = count($bytes); $i < $len; $i++) | ||
617 | { | ||
618 | $value = hexdec($bytes[$i]); | ||
619 | |||
620 | // If we're the first byte of sequence: | ||
621 | if (!$remaining) | ||
622 | { | ||
623 | // Start position | ||
624 | $start = $i; | ||
625 | |||
626 | // By default we are valid | ||
627 | $valid = true; | ||
628 | |||
629 | // One byte sequence: | ||
630 | if ($value <= 0x7F) | ||
631 | { | ||
632 | $character = $value; | ||
633 | $length = 1; | ||
634 | } | ||
635 | // Two byte sequence: | ||
636 | elseif (($value & 0xE0) === 0xC0) | ||
637 | { | ||
638 | $character = ($value & 0x1F) << 6; | ||
639 | $length = 2; | ||
640 | $remaining = 1; | ||
641 | } | ||
642 | // Three byte sequence: | ||
643 | elseif (($value & 0xF0) === 0xE0) | ||
644 | { | ||
645 | $character = ($value & 0x0F) << 12; | ||
646 | $length = 3; | ||
647 | $remaining = 2; | ||
648 | } | ||
649 | // Four byte sequence: | ||
650 | elseif (($value & 0xF8) === 0xF0) | ||
651 | { | ||
652 | $character = ($value & 0x07) << 18; | ||
653 | $length = 4; | ||
654 | $remaining = 3; | ||
655 | } | ||
656 | // Invalid byte: | ||
657 | else | ||
658 | { | ||
659 | $valid = false; | ||
660 | $remaining = 0; | ||
661 | } | ||
662 | } | ||
663 | // Continuation byte: | ||
664 | else | ||
665 | { | ||
666 | // Check that the byte is valid, then add it to the character: | ||
667 | if (($value & 0xC0) === 0x80) | ||
668 | { | ||
669 | $remaining--; | ||
670 | $character |= ($value & 0x3F) << ($remaining * 6); | ||
671 | } | ||
672 | // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: | ||
673 | else | ||
674 | { | ||
675 | $valid = false; | ||
676 | $remaining = 0; | ||
677 | $i--; | ||
678 | } | ||
679 | } | ||
680 | |||
681 | // If we've reached the end of the current byte sequence, append it to Unicode::$data | ||
682 | if (!$remaining) | ||
683 | { | ||
684 | // Percent encode anything invalid or not in iunreserved | ||
685 | if ( | ||
686 | // Invalid sequences | ||
687 | !$valid | ||
688 | // Non-shortest form sequences are invalid | ||
689 | || $length > 1 && $character <= 0x7F | ||
690 | || $length > 2 && $character <= 0x7FF | ||
691 | || $length > 3 && $character <= 0xFFFF | ||
692 | // Outside of range of iunreserved codepoints | ||
693 | || $character < 0x2D | ||
694 | || $character > 0xEFFFD | ||
695 | // Noncharacters | ||
696 | || ($character & 0xFFFE) === 0xFFFE | ||
697 | || $character >= 0xFDD0 && $character <= 0xFDEF | ||
698 | // Everything else not in iunreserved (this is all BMP) | ||
699 | || $character === 0x2F | ||
700 | || $character > 0x39 && $character < 0x41 | ||
701 | || $character > 0x5A && $character < 0x61 | ||
702 | || $character > 0x7A && $character < 0x7E | ||
703 | || $character > 0x7E && $character < 0xA0 | ||
704 | || $character > 0xD7FF && $character < 0xF900 | ||
705 | ) | ||
706 | { | ||
707 | for ($j = $start; $j <= $i; $j++) | ||
708 | { | ||
709 | $string .= '%' . strtoupper($bytes[$j]); | ||
710 | } | ||
711 | } | ||
712 | else | ||
713 | { | ||
714 | for ($j = $start; $j <= $i; $j++) | ||
715 | { | ||
716 | $string .= chr(hexdec($bytes[$j])); | ||
717 | } | ||
718 | } | ||
719 | } | ||
720 | } | ||
721 | |||
722 | // If we have any bytes left over they are invalid (i.e., we are | ||
723 | // mid-way through a multi-byte sequence) | ||
724 | if ($remaining) | ||
725 | { | ||
726 | for ($j = $start; $j < $len; $j++) | ||
727 | { | ||
728 | $string .= '%' . strtoupper($bytes[$j]); | ||
729 | } | ||
730 | } | ||
731 | |||
732 | return $string; | ||
733 | } | ||
734 | |||
735 | protected function scheme_normalization() | ||
736 | { | ||
737 | if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) | ||
738 | { | ||
739 | $this->iuserinfo = null; | ||
740 | } | ||
741 | if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) | ||
742 | { | ||
743 | $this->ihost = null; | ||
744 | } | ||
745 | if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) | ||
746 | { | ||
747 | $this->port = null; | ||
748 | } | ||
749 | if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) | ||
750 | { | ||
751 | $this->ipath = ''; | ||
752 | } | ||
753 | if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) | ||
754 | { | ||
755 | $this->iquery = null; | ||
756 | } | ||
757 | if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) | ||
758 | { | ||
759 | $this->ifragment = null; | ||
760 | } | ||
761 | } | ||
762 | |||
763 | /** | ||
764 | * Check if the object represents a valid IRI. This needs to be done on each | ||
765 | * call as some things change depending on another part of the IRI. | ||
766 | * | ||
767 | * @return bool | ||
768 | */ | ||
769 | public function is_valid() | ||
770 | { | ||
771 | $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; | ||
772 | if ($this->ipath !== '' && | ||
773 | ( | ||
774 | $isauthority && ( | ||
775 | $this->ipath[0] !== '/' || | ||
776 | substr($this->ipath, 0, 2) === '//' | ||
777 | ) || | ||
778 | ( | ||
779 | $this->scheme === null && | ||
780 | !$isauthority && | ||
781 | strpos($this->ipath, ':') !== false && | ||
782 | (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) | ||
783 | ) | ||
784 | ) | ||
785 | ) | ||
786 | { | ||
787 | return false; | ||
788 | } | ||
789 | |||
790 | return true; | ||
791 | } | ||
792 | |||
793 | /** | ||
794 | * Set the entire IRI. Returns true on success, false on failure (if there | ||
795 | * are any invalid characters). | ||
796 | * | ||
797 | * @param string $iri | ||
798 | * @return bool | ||
799 | */ | ||
800 | public function set_iri($iri) | ||
801 | { | ||
802 | static $cache; | ||
803 | if (!$cache) | ||
804 | { | ||
805 | $cache = array(); | ||
806 | } | ||
807 | |||
808 | if ($iri === null) | ||
809 | { | ||
810 | return true; | ||
811 | } | ||
812 | elseif (isset($cache[$iri])) | ||
813 | { | ||
814 | list($this->scheme, | ||
815 | $this->iuserinfo, | ||
816 | $this->ihost, | ||
817 | $this->port, | ||
818 | $this->ipath, | ||
819 | $this->iquery, | ||
820 | $this->ifragment, | ||
821 | $return) = $cache[$iri]; | ||
822 | return $return; | ||
823 | } | ||
824 | else | ||
825 | { | ||
826 | $parsed = $this->parse_iri((string) $iri); | ||
827 | if (!$parsed) | ||
828 | { | ||
829 | return false; | ||
830 | } | ||
831 | |||
832 | $return = $this->set_scheme($parsed['scheme']) | ||
833 | && $this->set_authority($parsed['authority']) | ||
834 | && $this->set_path($parsed['path']) | ||
835 | && $this->set_query($parsed['query']) | ||
836 | && $this->set_fragment($parsed['fragment']); | ||
837 | |||
838 | $cache[$iri] = array($this->scheme, | ||
839 | $this->iuserinfo, | ||
840 | $this->ihost, | ||
841 | $this->port, | ||
842 | $this->ipath, | ||
843 | $this->iquery, | ||
844 | $this->ifragment, | ||
845 | $return); | ||
846 | return $return; | ||
847 | } | ||
848 | } | ||
849 | |||
850 | /** | ||
851 | * Set the scheme. Returns true on success, false on failure (if there are | ||
852 | * any invalid characters). | ||
853 | * | ||
854 | * @param string $scheme | ||
855 | * @return bool | ||
856 | */ | ||
857 | public function set_scheme($scheme) | ||
858 | { | ||
859 | if ($scheme === null) | ||
860 | { | ||
861 | $this->scheme = null; | ||
862 | } | ||
863 | elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) | ||
864 | { | ||
865 | $this->scheme = null; | ||
866 | return false; | ||
867 | } | ||
868 | else | ||
869 | { | ||
870 | $this->scheme = strtolower($scheme); | ||
871 | } | ||
872 | return true; | ||
873 | } | ||
874 | |||
875 | /** | ||
876 | * Set the authority. Returns true on success, false on failure (if there are | ||
877 | * any invalid characters). | ||
878 | * | ||
879 | * @param string $authority | ||
880 | * @return bool | ||
881 | */ | ||
882 | public function set_authority($authority) | ||
883 | { | ||
884 | static $cache; | ||
885 | if (!$cache) | ||
886 | $cache = array(); | ||
887 | |||
888 | if ($authority === null) | ||
889 | { | ||
890 | $this->iuserinfo = null; | ||
891 | $this->ihost = null; | ||
892 | $this->port = null; | ||
893 | return true; | ||
894 | } | ||
895 | elseif (isset($cache[$authority])) | ||
896 | { | ||
897 | list($this->iuserinfo, | ||
898 | $this->ihost, | ||
899 | $this->port, | ||
900 | $return) = $cache[$authority]; | ||
901 | |||
902 | return $return; | ||
903 | } | ||
904 | else | ||
905 | { | ||
906 | $remaining = $authority; | ||
907 | if (($iuserinfo_end = strrpos($remaining, '@')) !== false) | ||
908 | { | ||
909 | $iuserinfo = substr($remaining, 0, $iuserinfo_end); | ||
910 | $remaining = substr($remaining, $iuserinfo_end + 1); | ||
911 | } | ||
912 | else | ||
913 | { | ||
914 | $iuserinfo = null; | ||
915 | } | ||
916 | if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) | ||
917 | { | ||
918 | if (($port = substr($remaining, $port_start + 1)) === false) | ||
919 | { | ||
920 | $port = null; | ||
921 | } | ||
922 | $remaining = substr($remaining, 0, $port_start); | ||
923 | } | ||
924 | else | ||
925 | { | ||
926 | $port = null; | ||
927 | } | ||
928 | |||
929 | $return = $this->set_userinfo($iuserinfo) && | ||
930 | $this->set_host($remaining) && | ||
931 | $this->set_port($port); | ||
932 | |||
933 | $cache[$authority] = array($this->iuserinfo, | ||
934 | $this->ihost, | ||
935 | $this->port, | ||
936 | $return); | ||
937 | |||
938 | return $return; | ||
939 | } | ||
940 | } | ||
941 | |||
942 | /** | ||
943 | * Set the iuserinfo. | ||
944 | * | ||
945 | * @param string $iuserinfo | ||
946 | * @return bool | ||
947 | */ | ||
948 | public function set_userinfo($iuserinfo) | ||
949 | { | ||
950 | if ($iuserinfo === null) | ||
951 | { | ||
952 | $this->iuserinfo = null; | ||
953 | } | ||
954 | else | ||
955 | { | ||
956 | $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:'); | ||
957 | $this->scheme_normalization(); | ||
958 | } | ||
959 | |||
960 | return true; | ||
961 | } | ||
962 | |||
963 | /** | ||
964 | * Set the ihost. Returns true on success, false on failure (if there are | ||
965 | * any invalid characters). | ||
966 | * | ||
967 | * @param string $ihost | ||
968 | * @return bool | ||
969 | */ | ||
970 | public function set_host($ihost) | ||
971 | { | ||
972 | if ($ihost === null) | ||
973 | { | ||
974 | $this->ihost = null; | ||
975 | return true; | ||
976 | } | ||
977 | elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') | ||
978 | { | ||
979 | if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1))) | ||
980 | { | ||
981 | $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']'; | ||
982 | } | ||
983 | else | ||
984 | { | ||
985 | $this->ihost = null; | ||
986 | return false; | ||
987 | } | ||
988 | } | ||
989 | else | ||
990 | { | ||
991 | $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;='); | ||
992 | |||
993 | // Lowercase, but ignore pct-encoded sections (as they should | ||
994 | // remain uppercase). This must be done after the previous step | ||
995 | // as that can add unescaped characters. | ||
996 | $position = 0; | ||
997 | $strlen = strlen($ihost); | ||
998 | while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) | ||
999 | { | ||
1000 | if ($ihost[$position] === '%') | ||
1001 | { | ||
1002 | $position += 3; | ||
1003 | } | ||
1004 | else | ||
1005 | { | ||
1006 | $ihost[$position] = strtolower($ihost[$position]); | ||
1007 | $position++; | ||
1008 | } | ||
1009 | } | ||
1010 | |||
1011 | $this->ihost = $ihost; | ||
1012 | } | ||
1013 | |||
1014 | $this->scheme_normalization(); | ||
1015 | |||
1016 | return true; | ||
1017 | } | ||
1018 | |||
1019 | /** | ||
1020 | * Set the port. Returns true on success, false on failure (if there are | ||
1021 | * any invalid characters). | ||
1022 | * | ||
1023 | * @param string $port | ||
1024 | * @return bool | ||
1025 | */ | ||
1026 | public function set_port($port) | ||
1027 | { | ||
1028 | if ($port === null) | ||
1029 | { | ||
1030 | $this->port = null; | ||
1031 | return true; | ||
1032 | } | ||
1033 | elseif (strspn($port, '0123456789') === strlen($port)) | ||
1034 | { | ||
1035 | $this->port = (int) $port; | ||
1036 | $this->scheme_normalization(); | ||
1037 | return true; | ||
1038 | } | ||
1039 | else | ||
1040 | { | ||
1041 | $this->port = null; | ||
1042 | return false; | ||
1043 | } | ||
1044 | } | ||
1045 | |||
1046 | /** | ||
1047 | * Set the ipath. | ||
1048 | * | ||
1049 | * @param string $ipath | ||
1050 | * @return bool | ||
1051 | */ | ||
1052 | public function set_path($ipath) | ||
1053 | { | ||
1054 | static $cache; | ||
1055 | if (!$cache) | ||
1056 | { | ||
1057 | $cache = array(); | ||
1058 | } | ||
1059 | |||
1060 | $ipath = (string) $ipath; | ||
1061 | |||
1062 | if (isset($cache[$ipath])) | ||
1063 | { | ||
1064 | $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; | ||
1065 | } | ||
1066 | else | ||
1067 | { | ||
1068 | $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); | ||
1069 | $removed = $this->remove_dot_segments($valid); | ||
1070 | |||
1071 | $cache[$ipath] = array($valid, $removed); | ||
1072 | $this->ipath = ($this->scheme !== null) ? $removed : $valid; | ||
1073 | } | ||
1074 | |||
1075 | $this->scheme_normalization(); | ||
1076 | return true; | ||
1077 | } | ||
1078 | |||
1079 | /** | ||
1080 | * Set the iquery. | ||
1081 | * | ||
1082 | * @param string $iquery | ||
1083 | * @return bool | ||
1084 | */ | ||
1085 | public function set_query($iquery) | ||
1086 | { | ||
1087 | if ($iquery === null) | ||
1088 | { | ||
1089 | $this->iquery = null; | ||
1090 | } | ||
1091 | else | ||
1092 | { | ||
1093 | $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); | ||
1094 | $this->scheme_normalization(); | ||
1095 | } | ||
1096 | return true; | ||
1097 | } | ||
1098 | |||
1099 | /** | ||
1100 | * Set the ifragment. | ||
1101 | * | ||
1102 | * @param string $ifragment | ||
1103 | * @return bool | ||
1104 | */ | ||
1105 | public function set_fragment($ifragment) | ||
1106 | { | ||
1107 | if ($ifragment === null) | ||
1108 | { | ||
1109 | $this->ifragment = null; | ||
1110 | } | ||
1111 | else | ||
1112 | { | ||
1113 | $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); | ||
1114 | $this->scheme_normalization(); | ||
1115 | } | ||
1116 | return true; | ||
1117 | } | ||
1118 | |||
1119 | /** | ||
1120 | * Convert an IRI to a URI (or parts thereof) | ||
1121 | * | ||
1122 | * @return string | ||
1123 | */ | ||
1124 | public function to_uri($string) | ||
1125 | { | ||
1126 | static $non_ascii; | ||
1127 | if (!$non_ascii) | ||
1128 | { | ||
1129 | $non_ascii = implode('', range("\x80", "\xFF")); | ||
1130 | } | ||
1131 | |||
1132 | $position = 0; | ||
1133 | $strlen = strlen($string); | ||
1134 | while (($position += strcspn($string, $non_ascii, $position)) < $strlen) | ||
1135 | { | ||
1136 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); | ||
1137 | $position += 3; | ||
1138 | $strlen += 2; | ||
1139 | } | ||
1140 | |||
1141 | return $string; | ||
1142 | } | ||
1143 | |||
1144 | /** | ||
1145 | * Get the complete IRI | ||
1146 | * | ||
1147 | * @return string | ||
1148 | */ | ||
1149 | public function get_iri() | ||
1150 | { | ||
1151 | if (!$this->is_valid()) | ||
1152 | { | ||
1153 | return false; | ||
1154 | } | ||
1155 | |||
1156 | $iri = ''; | ||
1157 | if ($this->scheme !== null) | ||
1158 | { | ||
1159 | $iri .= $this->scheme . ':'; | ||
1160 | } | ||
1161 | if (($iauthority = $this->get_iauthority()) !== null) | ||
1162 | { | ||
1163 | $iri .= '//' . $iauthority; | ||
1164 | } | ||
1165 | if ($this->ipath !== '') | ||
1166 | { | ||
1167 | $iri .= $this->ipath; | ||
1168 | } | ||
1169 | elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') | ||
1170 | { | ||
1171 | $iri .= $this->normalization[$this->scheme]['ipath']; | ||
1172 | } | ||
1173 | if ($this->iquery !== null) | ||
1174 | { | ||
1175 | $iri .= '?' . $this->iquery; | ||
1176 | } | ||
1177 | if ($this->ifragment !== null) | ||
1178 | { | ||
1179 | $iri .= '#' . $this->ifragment; | ||
1180 | } | ||
1181 | |||
1182 | return $iri; | ||
1183 | } | ||
1184 | |||
1185 | /** | ||
1186 | * Get the complete URI | ||
1187 | * | ||
1188 | * @return string | ||
1189 | */ | ||
1190 | public function get_uri() | ||
1191 | { | ||
1192 | return $this->to_uri($this->get_iri()); | ||
1193 | } | ||
1194 | |||
1195 | /** | ||
1196 | * Get the complete iauthority | ||
1197 | * | ||
1198 | * @return string | ||
1199 | */ | ||
1200 | protected function get_iauthority() | ||
1201 | { | ||
1202 | if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) | ||
1203 | { | ||
1204 | $iauthority = ''; | ||
1205 | if ($this->iuserinfo !== null) | ||
1206 | { | ||
1207 | $iauthority .= $this->iuserinfo . '@'; | ||
1208 | } | ||
1209 | if ($this->ihost !== null) | ||
1210 | { | ||
1211 | $iauthority .= $this->ihost; | ||
1212 | } | ||
1213 | if ($this->port !== null) | ||
1214 | { | ||
1215 | $iauthority .= ':' . $this->port; | ||
1216 | } | ||
1217 | return $iauthority; | ||
1218 | } | ||
1219 | else | ||
1220 | { | ||
1221 | return null; | ||
1222 | } | ||
1223 | } | ||
1224 | |||
1225 | /** | ||
1226 | * Get the complete authority | ||
1227 | * | ||
1228 | * @return string | ||
1229 | */ | ||
1230 | protected function get_authority() | ||
1231 | { | ||
1232 | $iauthority = $this->get_iauthority(); | ||
1233 | if (is_string($iauthority)) | ||
1234 | return $this->to_uri($iauthority); | ||
1235 | else | ||
1236 | return $iauthority; | ||
1237 | } | ||
1238 | } | ||
diff --git a/inc/3rdparty/simplepie/SimplePie/Item.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Item.php index 7538038a..a77574b3 100644 --- a/inc/3rdparty/simplepie/SimplePie/Item.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Item.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,28 +33,85 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | ||
46 | /** | ||
47 | * Manages all item-related data | ||
48 | * | ||
49 | * Used by {@see SimplePie::get_item()} and {@see SimplePie::get_items()} | ||
50 | * | ||
51 | * This class can be overloaded with {@see SimplePie::set_item_class()} | ||
52 | * | ||
53 | * @package SimplePie | ||
54 | * @subpackage API | ||
55 | */ | ||
47 | class SimplePie_Item | 56 | class SimplePie_Item |
48 | { | 57 | { |
58 | /** | ||
59 | * Parent feed | ||
60 | * | ||
61 | * @access private | ||
62 | * @var SimplePie | ||
63 | */ | ||
49 | var $feed; | 64 | var $feed; |
65 | |||
66 | /** | ||
67 | * Raw data | ||
68 | * | ||
69 | * @access private | ||
70 | * @var array | ||
71 | */ | ||
50 | var $data = array(); | 72 | var $data = array(); |
51 | 73 | ||
74 | /** | ||
75 | * Registry object | ||
76 | * | ||
77 | * @see set_registry | ||
78 | * @var SimplePie_Registry | ||
79 | */ | ||
80 | protected $registry; | ||
81 | |||
82 | /** | ||
83 | * Create a new item object | ||
84 | * | ||
85 | * This is usually used by {@see SimplePie::get_items} and | ||
86 | * {@see SimplePie::get_item}. Avoid creating this manually. | ||
87 | * | ||
88 | * @param SimplePie $feed Parent feed | ||
89 | * @param array $data Raw data | ||
90 | */ | ||
52 | public function __construct($feed, $data) | 91 | public function __construct($feed, $data) |
53 | { | 92 | { |
54 | $this->feed = $feed; | 93 | $this->feed = $feed; |
55 | $this->data = $data; | 94 | $this->data = $data; |
56 | } | 95 | } |
57 | 96 | ||
97 | /** | ||
98 | * Set the registry handler | ||
99 | * | ||
100 | * This is usually used by {@see SimplePie_Registry::create} | ||
101 | * | ||
102 | * @since 1.3 | ||
103 | * @param SimplePie_Registry $registry | ||
104 | */ | ||
105 | public function set_registry(SimplePie_Registry $registry) | ||
106 | { | ||
107 | $this->registry = $registry; | ||
108 | } | ||
109 | |||
110 | /** | ||
111 | * Get a string representation of the item | ||
112 | * | ||
113 | * @return string | ||
114 | */ | ||
58 | public function __toString() | 115 | public function __toString() |
59 | { | 116 | { |
60 | return md5(serialize($this->data)); | 117 | return md5(serialize($this->data)); |
@@ -71,6 +128,20 @@ class SimplePie_Item | |||
71 | } | 128 | } |
72 | } | 129 | } |
73 | 130 | ||
131 | /** | ||
132 | * Get data for an item-level element | ||
133 | * | ||
134 | * This method allows you to get access to ANY element/attribute that is a | ||
135 | * sub-element of the item/entry tag. | ||
136 | * | ||
137 | * See {@see SimplePie::get_feed_tags()} for a description of the return value | ||
138 | * | ||
139 | * @since 1.0 | ||
140 | * @see http://simplepie.org/wiki/faq/supported_xml_namespaces | ||
141 | * @param string $namespace The URL of the XML namespace of the elements you're trying to access | ||
142 | * @param string $tag Tag name | ||
143 | * @return array | ||
144 | */ | ||
74 | public function get_item_tags($namespace, $tag) | 145 | public function get_item_tags($namespace, $tag) |
75 | { | 146 | { |
76 | if (isset($this->data['child'][$namespace][$tag])) | 147 | if (isset($this->data['child'][$namespace][$tag])) |
@@ -83,21 +154,62 @@ class SimplePie_Item | |||
83 | } | 154 | } |
84 | } | 155 | } |
85 | 156 | ||
157 | /** | ||
158 | * Get the base URL value from the parent feed | ||
159 | * | ||
160 | * Uses `<xml:base>` | ||
161 | * | ||
162 | * @param array $element | ||
163 | * @return string | ||
164 | */ | ||
86 | public function get_base($element = array()) | 165 | public function get_base($element = array()) |
87 | { | 166 | { |
88 | return $this->feed->get_base($element); | 167 | return $this->feed->get_base($element); |
89 | } | 168 | } |
90 | 169 | ||
170 | /** | ||
171 | * Sanitize feed data | ||
172 | * | ||
173 | * @access private | ||
174 | * @see SimplePie::sanitize() | ||
175 | * @param string $data Data to sanitize | ||
176 | * @param int $type One of the SIMPLEPIE_CONSTRUCT_* constants | ||
177 | * @param string $base Base URL to resolve URLs against | ||
178 | * @return string Sanitized data | ||
179 | */ | ||
91 | public function sanitize($data, $type, $base = '') | 180 | public function sanitize($data, $type, $base = '') |
92 | { | 181 | { |
93 | return $this->feed->sanitize($data, $type, $base); | 182 | return $this->feed->sanitize($data, $type, $base); |
94 | } | 183 | } |
95 | 184 | ||
185 | /** | ||
186 | * Get the parent feed | ||
187 | * | ||
188 | * Note: this may not work as you think for multifeeds! | ||
189 | * | ||
190 | * @link http://simplepie.org/faq/typical_multifeed_gotchas#missing_data_from_feed | ||
191 | * @since 1.0 | ||
192 | * @return SimplePie | ||
193 | */ | ||
96 | public function get_feed() | 194 | public function get_feed() |
97 | { | 195 | { |
98 | return $this->feed; | 196 | return $this->feed; |
99 | } | 197 | } |
100 | 198 | ||
199 | /** | ||
200 | * Get the unique identifier for the item | ||
201 | * | ||
202 | * This is usually used when writing code to check for new items in a feed. | ||
203 | * | ||
204 | * Uses `<atom:id>`, `<guid>`, `<dc:identifier>` or the `about` attribute | ||
205 | * for RDF. If none of these are supplied (or `$hash` is true), creates an | ||
206 | * MD5 hash based on the permalink and title. If either of those are not | ||
207 | * supplied, creates a hash based on the full feed data. | ||
208 | * | ||
209 | * @since Beta 2 | ||
210 | * @param boolean $hash Should we force using a hash instead of the supplied ID? | ||
211 | * @return string | ||
212 | */ | ||
101 | public function get_id($hash = false) | 213 | public function get_id($hash = false) |
102 | { | 214 | { |
103 | if (!$hash) | 215 | if (!$hash) |
@@ -145,17 +257,25 @@ class SimplePie_Item | |||
145 | } | 257 | } |
146 | } | 258 | } |
147 | 259 | ||
260 | /** | ||
261 | * Get the title of the item | ||
262 | * | ||
263 | * Uses `<atom:title>`, `<title>` or `<dc:title>` | ||
264 | * | ||
265 | * @since Beta 2 (previously called `get_item_title` since 0.8) | ||
266 | * @return string|null | ||
267 | */ | ||
148 | public function get_title() | 268 | public function get_title() |
149 | { | 269 | { |
150 | if (!isset($this->data['title'])) | 270 | if (!isset($this->data['title'])) |
151 | { | 271 | { |
152 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title')) | 272 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title')) |
153 | { | 273 | { |
154 | $this->data['title'] = $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 274 | $this->data['title'] = $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
155 | } | 275 | } |
156 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title')) | 276 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title')) |
157 | { | 277 | { |
158 | $this->data['title'] = $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 278 | $this->data['title'] = $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
159 | } | 279 | } |
160 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title')) | 280 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title')) |
161 | { | 281 | { |
@@ -185,15 +305,30 @@ class SimplePie_Item | |||
185 | return $this->data['title']; | 305 | return $this->data['title']; |
186 | } | 306 | } |
187 | 307 | ||
308 | /** | ||
309 | * Get the content for the item | ||
310 | * | ||
311 | * Prefers summaries over full content , but will return full content if a | ||
312 | * summary does not exist. | ||
313 | * | ||
314 | * To prefer full content instead, use {@see get_content} | ||
315 | * | ||
316 | * Uses `<atom:summary>`, `<description>`, `<dc:description>` or | ||
317 | * `<itunes:subtitle>` | ||
318 | * | ||
319 | * @since 0.8 | ||
320 | * @param boolean $description_only Should we avoid falling back to the content? | ||
321 | * @return string|null | ||
322 | */ | ||
188 | public function get_description($description_only = false) | 323 | public function get_description($description_only = false) |
189 | { | 324 | { |
190 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'summary')) | 325 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'summary')) |
191 | { | 326 | { |
192 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 327 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
193 | } | 328 | } |
194 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'summary')) | 329 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'summary')) |
195 | { | 330 | { |
196 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 331 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
197 | } | 332 | } |
198 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) | 333 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) |
199 | { | 334 | { |
@@ -234,15 +369,29 @@ class SimplePie_Item | |||
234 | } | 369 | } |
235 | } | 370 | } |
236 | 371 | ||
372 | /** | ||
373 | * Get the content for the item | ||
374 | * | ||
375 | * Prefers full content over summaries, but will return a summary if full | ||
376 | * content does not exist. | ||
377 | * | ||
378 | * To prefer summaries instead, use {@see get_description} | ||
379 | * | ||
380 | * Uses `<atom:content>` or `<content:encoded>` (RSS 1.0 Content Module) | ||
381 | * | ||
382 | * @since 1.0 | ||
383 | * @param boolean $content_only Should we avoid falling back to the description? | ||
384 | * @return string|null | ||
385 | */ | ||
237 | public function get_content($content_only = false) | 386 | public function get_content($content_only = false) |
238 | { | 387 | { |
239 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'content')) | 388 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'content')) |
240 | { | 389 | { |
241 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_content_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 390 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_content_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
242 | } | 391 | } |
243 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'content')) | 392 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'content')) |
244 | { | 393 | { |
245 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 394 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
246 | } | 395 | } |
247 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10_MODULES_CONTENT, 'encoded')) | 396 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10_MODULES_CONTENT, 'encoded')) |
248 | { | 397 | { |
@@ -258,6 +407,13 @@ class SimplePie_Item | |||
258 | } | 407 | } |
259 | } | 408 | } |
260 | 409 | ||
410 | /** | ||
411 | * Get a category for the item | ||
412 | * | ||
413 | * @since Beta 3 (previously called `get_categories()` since Beta 2) | ||
414 | * @param int $key The category that you want to return. Remember that arrays begin with 0, not 1 | ||
415 | * @return SimplePie_Category|null | ||
416 | */ | ||
261 | public function get_category($key = 0) | 417 | public function get_category($key = 0) |
262 | { | 418 | { |
263 | $categories = $this->get_categories(); | 419 | $categories = $this->get_categories(); |
@@ -271,6 +427,14 @@ class SimplePie_Item | |||
271 | } | 427 | } |
272 | } | 428 | } |
273 | 429 | ||
430 | /** | ||
431 | * Get all categories for the item | ||
432 | * | ||
433 | * Uses `<atom:category>`, `<category>` or `<dc:subject>` | ||
434 | * | ||
435 | * @since Beta 3 | ||
436 | * @return array|null List of {@see SimplePie_Category} objects | ||
437 | */ | ||
274 | public function get_categories() | 438 | public function get_categories() |
275 | { | 439 | { |
276 | $categories = array(); | 440 | $categories = array(); |
@@ -292,7 +456,7 @@ class SimplePie_Item | |||
292 | { | 456 | { |
293 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); | 457 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); |
294 | } | 458 | } |
295 | $categories[] = new $this->feed->category_class($term, $scheme, $label); | 459 | $categories[] = $this->registry->create('Category', array($term, $scheme, $label)); |
296 | } | 460 | } |
297 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category) | 461 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category) |
298 | { | 462 | { |
@@ -307,20 +471,20 @@ class SimplePie_Item | |||
307 | { | 471 | { |
308 | $scheme = null; | 472 | $scheme = null; |
309 | } | 473 | } |
310 | $categories[] = new $this->feed->category_class($term, $scheme, null); | 474 | $categories[] = $this->registry->create('Category', array($term, $scheme, null)); |
311 | } | 475 | } |
312 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category) | 476 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category) |
313 | { | 477 | { |
314 | $categories[] = new $this->feed->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 478 | $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
315 | } | 479 | } |
316 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category) | 480 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category) |
317 | { | 481 | { |
318 | $categories[] = new $this->feed->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 482 | $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
319 | } | 483 | } |
320 | 484 | ||
321 | if (!empty($categories)) | 485 | if (!empty($categories)) |
322 | { | 486 | { |
323 | return SimplePie_Misc::array_unique($categories); | 487 | return array_unique($categories); |
324 | } | 488 | } |
325 | else | 489 | else |
326 | { | 490 | { |
@@ -328,6 +492,13 @@ class SimplePie_Item | |||
328 | } | 492 | } |
329 | } | 493 | } |
330 | 494 | ||
495 | /** | ||
496 | * Get an author for the item | ||
497 | * | ||
498 | * @since Beta 2 | ||
499 | * @param int $key The author that you want to return. Remember that arrays begin with 0, not 1 | ||
500 | * @return SimplePie_Author|null | ||
501 | */ | ||
331 | public function get_author($key = 0) | 502 | public function get_author($key = 0) |
332 | { | 503 | { |
333 | $authors = $this->get_authors(); | 504 | $authors = $this->get_authors(); |
@@ -341,6 +512,13 @@ class SimplePie_Item | |||
341 | } | 512 | } |
342 | } | 513 | } |
343 | 514 | ||
515 | /** | ||
516 | * Get a contributor for the item | ||
517 | * | ||
518 | * @since 1.1 | ||
519 | * @param int $key The contrbutor that you want to return. Remember that arrays begin with 0, not 1 | ||
520 | * @return SimplePie_Author|null | ||
521 | */ | ||
344 | public function get_contributor($key = 0) | 522 | public function get_contributor($key = 0) |
345 | { | 523 | { |
346 | $contributors = $this->get_contributors(); | 524 | $contributors = $this->get_contributors(); |
@@ -354,6 +532,14 @@ class SimplePie_Item | |||
354 | } | 532 | } |
355 | } | 533 | } |
356 | 534 | ||
535 | /** | ||
536 | * Get all contributors for the item | ||
537 | * | ||
538 | * Uses `<atom:contributor>` | ||
539 | * | ||
540 | * @since 1.1 | ||
541 | * @return array|null List of {@see SimplePie_Author} objects | ||
542 | */ | ||
357 | public function get_contributors() | 543 | public function get_contributors() |
358 | { | 544 | { |
359 | $contributors = array(); | 545 | $contributors = array(); |
@@ -376,7 +562,7 @@ class SimplePie_Item | |||
376 | } | 562 | } |
377 | if ($name !== null || $email !== null || $uri !== null) | 563 | if ($name !== null || $email !== null || $uri !== null) |
378 | { | 564 | { |
379 | $contributors[] = new $this->feed->author_class($name, $uri, $email); | 565 | $contributors[] = $this->registry->create('Author', array($name, $uri, $email)); |
380 | } | 566 | } |
381 | } | 567 | } |
382 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor) | 568 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor) |
@@ -398,13 +584,13 @@ class SimplePie_Item | |||
398 | } | 584 | } |
399 | if ($name !== null || $email !== null || $url !== null) | 585 | if ($name !== null || $email !== null || $url !== null) |
400 | { | 586 | { |
401 | $contributors[] = new $this->feed->author_class($name, $url, $email); | 587 | $contributors[] = $this->registry->create('Author', array($name, $url, $email)); |
402 | } | 588 | } |
403 | } | 589 | } |
404 | 590 | ||
405 | if (!empty($contributors)) | 591 | if (!empty($contributors)) |
406 | { | 592 | { |
407 | return SimplePie_Misc::array_unique($contributors); | 593 | return array_unique($contributors); |
408 | } | 594 | } |
409 | else | 595 | else |
410 | { | 596 | { |
@@ -412,6 +598,14 @@ class SimplePie_Item | |||
412 | } | 598 | } |
413 | } | 599 | } |
414 | 600 | ||
601 | /** | ||
602 | * Get all authors for the item | ||
603 | * | ||
604 | * Uses `<atom:author>`, `<author>`, `<dc:creator>` or `<itunes:author>` | ||
605 | * | ||
606 | * @since Beta 2 | ||
607 | * @return array|null List of {@see SimplePie_Author} objects | ||
608 | */ | ||
415 | public function get_authors() | 609 | public function get_authors() |
416 | { | 610 | { |
417 | $authors = array(); | 611 | $authors = array(); |
@@ -434,7 +628,7 @@ class SimplePie_Item | |||
434 | } | 628 | } |
435 | if ($name !== null || $email !== null || $uri !== null) | 629 | if ($name !== null || $email !== null || $uri !== null) |
436 | { | 630 | { |
437 | $authors[] = new $this->feed->author_class($name, $uri, $email); | 631 | $authors[] = $this->registry->create('Author', array($name, $uri, $email)); |
438 | } | 632 | } |
439 | } | 633 | } |
440 | if ($author = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author')) | 634 | if ($author = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author')) |
@@ -456,29 +650,29 @@ class SimplePie_Item | |||
456 | } | 650 | } |
457 | if ($name !== null || $email !== null || $url !== null) | 651 | if ($name !== null || $email !== null || $url !== null) |
458 | { | 652 | { |
459 | $authors[] = new $this->feed->author_class($name, $url, $email); | 653 | $authors[] = $this->registry->create('Author', array($name, $url, $email)); |
460 | } | 654 | } |
461 | } | 655 | } |
462 | if ($author = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'author')) | 656 | if ($author = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'author')) |
463 | { | 657 | { |
464 | $authors[] = new $this->feed->author_class(null, null, $this->sanitize($author[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT)); | 658 | $authors[] = $this->registry->create('Author', array(null, null, $this->sanitize($author[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT))); |
465 | } | 659 | } |
466 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author) | 660 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author) |
467 | { | 661 | { |
468 | $authors[] = new $this->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 662 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
469 | } | 663 | } |
470 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author) | 664 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author) |
471 | { | 665 | { |
472 | $authors[] = new $this->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 666 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
473 | } | 667 | } |
474 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author) | 668 | foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author) |
475 | { | 669 | { |
476 | $authors[] = new $this->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 670 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
477 | } | 671 | } |
478 | 672 | ||
479 | if (!empty($authors)) | 673 | if (!empty($authors)) |
480 | { | 674 | { |
481 | return SimplePie_Misc::array_unique($authors); | 675 | return array_unique($authors); |
482 | } | 676 | } |
483 | elseif (($source = $this->get_source()) && ($authors = $source->get_authors())) | 677 | elseif (($source = $this->get_source()) && ($authors = $source->get_authors())) |
484 | { | 678 | { |
@@ -494,11 +688,19 @@ class SimplePie_Item | |||
494 | } | 688 | } |
495 | } | 689 | } |
496 | 690 | ||
691 | /** | ||
692 | * Get the copyright info for the item | ||
693 | * | ||
694 | * Uses `<atom:rights>` or `<dc:rights>` | ||
695 | * | ||
696 | * @since 1.1 | ||
697 | * @return string | ||
698 | */ | ||
497 | public function get_copyright() | 699 | public function get_copyright() |
498 | { | 700 | { |
499 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights')) | 701 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights')) |
500 | { | 702 | { |
501 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 703 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
502 | } | 704 | } |
503 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'rights')) | 705 | elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'rights')) |
504 | { | 706 | { |
@@ -514,6 +716,20 @@ class SimplePie_Item | |||
514 | } | 716 | } |
515 | } | 717 | } |
516 | 718 | ||
719 | /** | ||
720 | * Get the posting date/time for the item | ||
721 | * | ||
722 | * Uses `<atom:published>`, `<atom:updated>`, `<atom:issued>`, | ||
723 | * `<atom:modified>`, `<pubDate>` or `<dc:date>` | ||
724 | * | ||
725 | * Note: obeys PHP's timezone setting. To get a UTC date/time, use | ||
726 | * {@see get_gmdate} | ||
727 | * | ||
728 | * @since Beta 2 (previously called `get_item_date` since 0.8) | ||
729 | * | ||
730 | * @param string $date_format Supports any PHP date format from {@see http://php.net/date} (empty for the raw data) | ||
731 | * @return int|string|null | ||
732 | */ | ||
517 | public function get_date($date_format = 'j F Y, g:i a') | 733 | public function get_date($date_format = 'j F Y, g:i a') |
518 | { | 734 | { |
519 | if (!isset($this->data['date'])) | 735 | if (!isset($this->data['date'])) |
@@ -553,7 +769,7 @@ class SimplePie_Item | |||
553 | 769 | ||
554 | if (!empty($this->data['date']['raw'])) | 770 | if (!empty($this->data['date']['raw'])) |
555 | { | 771 | { |
556 | $parser = SimplePie_Parse_Date::get(); | 772 | $parser = $this->registry->call('Parse_Date', 'get'); |
557 | $this->data['date']['parsed'] = $parser->parse($this->data['date']['raw']); | 773 | $this->data['date']['parsed'] = $parser->parse($this->data['date']['raw']); |
558 | } | 774 | } |
559 | else | 775 | else |
@@ -582,6 +798,70 @@ class SimplePie_Item | |||
582 | } | 798 | } |
583 | } | 799 | } |
584 | 800 | ||
801 | /** | ||
802 | * Get the update date/time for the item | ||
803 | * | ||
804 | * Uses `<atom:updated>` | ||
805 | * | ||
806 | * Note: obeys PHP's timezone setting. To get a UTC date/time, use | ||
807 | * {@see get_gmdate} | ||
808 | * | ||
809 | * @param string $date_format Supports any PHP date format from {@see http://php.net/date} (empty for the raw data) | ||
810 | * @return int|string|null | ||
811 | */ | ||
812 | public function get_updated_date($date_format = 'j F Y, g:i a') | ||
813 | { | ||
814 | if (!isset($this->data['updated'])) | ||
815 | { | ||
816 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'updated')) | ||
817 | { | ||
818 | $this->data['updated']['raw'] = $return[0]['data']; | ||
819 | } | ||
820 | |||
821 | if (!empty($this->data['updated']['raw'])) | ||
822 | { | ||
823 | $parser = $this->registry->call('Parse_Date', 'get'); | ||
824 | $this->data['updated']['parsed'] = $parser->parse($this->data['date']['raw']); | ||
825 | } | ||
826 | else | ||
827 | { | ||
828 | $this->data['updated'] = null; | ||
829 | } | ||
830 | } | ||
831 | if ($this->data['updated']) | ||
832 | { | ||
833 | $date_format = (string) $date_format; | ||
834 | switch ($date_format) | ||
835 | { | ||
836 | case '': | ||
837 | return $this->sanitize($this->data['updated']['raw'], SIMPLEPIE_CONSTRUCT_TEXT); | ||
838 | |||
839 | case 'U': | ||
840 | return $this->data['updated']['parsed']; | ||
841 | |||
842 | default: | ||
843 | return date($date_format, $this->data['updated']['parsed']); | ||
844 | } | ||
845 | } | ||
846 | else | ||
847 | { | ||
848 | return null; | ||
849 | } | ||
850 | } | ||
851 | |||
852 | /** | ||
853 | * Get the localized posting date/time for the item | ||
854 | * | ||
855 | * Returns the date formatted in the localized language. To display in | ||
856 | * languages other than the server's default, you need to change the locale | ||
857 | * with {@link http://php.net/setlocale setlocale()}. The available | ||
858 | * localizations depend on which ones are installed on your web server. | ||
859 | * | ||
860 | * @since 1.0 | ||
861 | * | ||
862 | * @param string $date_format Supports any PHP date format from {@see http://php.net/strftime} (empty for the raw data) | ||
863 | * @return int|string|null | ||
864 | */ | ||
585 | public function get_local_date($date_format = '%c') | 865 | public function get_local_date($date_format = '%c') |
586 | { | 866 | { |
587 | if (!$date_format) | 867 | if (!$date_format) |
@@ -598,6 +878,52 @@ class SimplePie_Item | |||
598 | } | 878 | } |
599 | } | 879 | } |
600 | 880 | ||
881 | /** | ||
882 | * Get the posting date/time for the item (UTC time) | ||
883 | * | ||
884 | * @see get_date | ||
885 | * @param string $date_format Supports any PHP date format from {@see http://php.net/date} | ||
886 | * @return int|string|null | ||
887 | */ | ||
888 | public function get_gmdate($date_format = 'j F Y, g:i a') | ||
889 | { | ||
890 | $date = $this->get_date('U'); | ||
891 | if ($date === null) | ||
892 | { | ||
893 | return null; | ||
894 | } | ||
895 | |||
896 | return gmdate($date_format, $date); | ||
897 | } | ||
898 | |||
899 | /** | ||
900 | * Get the update date/time for the item (UTC time) | ||
901 | * | ||
902 | * @see get_updated_date | ||
903 | * @param string $date_format Supports any PHP date format from {@see http://php.net/date} | ||
904 | * @return int|string|null | ||
905 | */ | ||
906 | public function get_updated_gmdate($date_format = 'j F Y, g:i a') | ||
907 | { | ||
908 | $date = $this->get_updated_date('U'); | ||
909 | if ($date === null) | ||
910 | { | ||
911 | return null; | ||
912 | } | ||
913 | |||
914 | return gmdate($date_format, $date); | ||
915 | } | ||
916 | |||
917 | /** | ||
918 | * Get the permalink for the item | ||
919 | * | ||
920 | * Returns the first link available with a relationship of "alternate". | ||
921 | * Identical to {@see get_link()} with key 0 | ||
922 | * | ||
923 | * @see get_link | ||
924 | * @since 0.8 | ||
925 | * @return string|null Permalink URL | ||
926 | */ | ||
601 | public function get_permalink() | 927 | public function get_permalink() |
602 | { | 928 | { |
603 | $link = $this->get_link(); | 929 | $link = $this->get_link(); |
@@ -616,6 +942,14 @@ class SimplePie_Item | |||
616 | } | 942 | } |
617 | } | 943 | } |
618 | 944 | ||
945 | /** | ||
946 | * Get a single link for the item | ||
947 | * | ||
948 | * @since Beta 3 | ||
949 | * @param int $key The link that you want to return. Remember that arrays begin with 0, not 1 | ||
950 | * @param string $rel The relationship of the link to return | ||
951 | * @return string|null Link URL | ||
952 | */ | ||
619 | public function get_link($key = 0, $rel = 'alternate') | 953 | public function get_link($key = 0, $rel = 'alternate') |
620 | { | 954 | { |
621 | $links = $this->get_links($rel); | 955 | $links = $this->get_links($rel); |
@@ -629,6 +963,15 @@ class SimplePie_Item | |||
629 | } | 963 | } |
630 | } | 964 | } |
631 | 965 | ||
966 | /** | ||
967 | * Get all links for the item | ||
968 | * | ||
969 | * Uses `<atom:link>`, `<link>` or `<guid>` | ||
970 | * | ||
971 | * @since Beta 2 | ||
972 | * @param string $rel The relationship of links to return | ||
973 | * @return array|null Links found for the item (strings) | ||
974 | */ | ||
632 | public function get_links($rel = 'alternate') | 975 | public function get_links($rel = 'alternate') |
633 | { | 976 | { |
634 | if (!isset($this->data['links'])) | 977 | if (!isset($this->data['links'])) |
@@ -674,7 +1017,7 @@ class SimplePie_Item | |||
674 | $keys = array_keys($this->data['links']); | 1017 | $keys = array_keys($this->data['links']); |
675 | foreach ($keys as $key) | 1018 | foreach ($keys as $key) |
676 | { | 1019 | { |
677 | if (SimplePie_Misc::is_isegment_nz_nc($key)) | 1020 | if ($this->registry->call('Misc', 'is_isegment_nz_nc', array($key))) |
678 | { | 1021 | { |
679 | if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key])) | 1022 | if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key])) |
680 | { | 1023 | { |
@@ -704,7 +1047,14 @@ class SimplePie_Item | |||
704 | } | 1047 | } |
705 | 1048 | ||
706 | /** | 1049 | /** |
1050 | * Get an enclosure from the item | ||
1051 | * | ||
1052 | * Supports the <enclosure> RSS tag, as well as Media RSS and iTunes RSS. | ||
1053 | * | ||
1054 | * @since Beta 2 | ||
707 | * @todo Add ability to prefer one type of content over another (in a media group). | 1055 | * @todo Add ability to prefer one type of content over another (in a media group). |
1056 | * @param int $key The enclosure that you want to return. Remember that arrays begin with 0, not 1 | ||
1057 | * @return SimplePie_Enclosure|null | ||
708 | */ | 1058 | */ |
709 | public function get_enclosure($key = 0, $prefer = null) | 1059 | public function get_enclosure($key = 0, $prefer = null) |
710 | { | 1060 | { |
@@ -720,14 +1070,18 @@ class SimplePie_Item | |||
720 | } | 1070 | } |
721 | 1071 | ||
722 | /** | 1072 | /** |
723 | * Grabs all available enclosures (podcasts, etc.) | 1073 | * Get all available enclosures (podcasts, etc.) |
724 | * | 1074 | * |
725 | * Supports the <enclosure> RSS tag, as well as Media RSS and iTunes RSS. | 1075 | * Supports the <enclosure> RSS tag, as well as Media RSS and iTunes RSS. |
726 | * | 1076 | * |
727 | * At this point, we're pretty much assuming that all enclosures for an item are the same content. Anything else is too complicated to properly support. | 1077 | * At this point, we're pretty much assuming that all enclosures for an item |
1078 | * are the same content. Anything else is too complicated to | ||
1079 | * properly support. | ||
728 | * | 1080 | * |
1081 | * @since Beta 2 | ||
729 | * @todo Add support for end-user defined sorting of enclosures by type/handler (so we can prefer the faster-loading FLV over MP4). | 1082 | * @todo Add support for end-user defined sorting of enclosures by type/handler (so we can prefer the faster-loading FLV over MP4). |
730 | * @todo If an element exists at a level, but it's value is empty, we should fall back to the value from the parent (if it exists). | 1083 | * @todo If an element exists at a level, but it's value is empty, we should fall back to the value from the parent (if it exists). |
1084 | * @return array|null List of SimplePie_Enclosure items | ||
731 | */ | 1085 | */ |
732 | public function get_enclosures() | 1086 | public function get_enclosures() |
733 | { | 1087 | { |
@@ -783,7 +1137,7 @@ class SimplePie_Item | |||
783 | { | 1137 | { |
784 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1138 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
785 | } | 1139 | } |
786 | $captions_parent[] = new $this->feed->caption_class($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text); | 1140 | $captions_parent[] = $this->registry->create('Caption', array($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text)); |
787 | } | 1141 | } |
788 | } | 1142 | } |
789 | elseif ($captions = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'text')) | 1143 | elseif ($captions = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'text')) |
@@ -815,12 +1169,12 @@ class SimplePie_Item | |||
815 | { | 1169 | { |
816 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1170 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
817 | } | 1171 | } |
818 | $captions_parent[] = new $this->feed->caption_class($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text); | 1172 | $captions_parent[] = $this->registry->create('Caption', array($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text)); |
819 | } | 1173 | } |
820 | } | 1174 | } |
821 | if (is_array($captions_parent)) | 1175 | if (is_array($captions_parent)) |
822 | { | 1176 | { |
823 | $captions_parent = array_values(SimplePie_Misc::array_unique($captions_parent)); | 1177 | $captions_parent = array_values(array_unique($captions_parent)); |
824 | } | 1178 | } |
825 | 1179 | ||
826 | // CATEGORIES | 1180 | // CATEGORIES |
@@ -845,7 +1199,7 @@ class SimplePie_Item | |||
845 | { | 1199 | { |
846 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); | 1200 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); |
847 | } | 1201 | } |
848 | $categories_parent[] = new $this->feed->category_class($term, $scheme, $label); | 1202 | $categories_parent[] = $this->registry->create('Category', array($term, $scheme, $label)); |
849 | } | 1203 | } |
850 | foreach ((array) $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'category') as $category) | 1204 | foreach ((array) $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'category') as $category) |
851 | { | 1205 | { |
@@ -868,7 +1222,7 @@ class SimplePie_Item | |||
868 | { | 1222 | { |
869 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); | 1223 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); |
870 | } | 1224 | } |
871 | $categories_parent[] = new $this->feed->category_class($term, $scheme, $label); | 1225 | $categories_parent[] = $this->registry->create('Category', array($term, $scheme, $label)); |
872 | } | 1226 | } |
873 | foreach ((array) $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'category') as $category) | 1227 | foreach ((array) $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'category') as $category) |
874 | { | 1228 | { |
@@ -879,7 +1233,7 @@ class SimplePie_Item | |||
879 | { | 1233 | { |
880 | $label = $this->sanitize($category['attribs']['']['text'], SIMPLEPIE_CONSTRUCT_TEXT); | 1234 | $label = $this->sanitize($category['attribs']['']['text'], SIMPLEPIE_CONSTRUCT_TEXT); |
881 | } | 1235 | } |
882 | $categories_parent[] = new $this->feed->category_class($term, $scheme, $label); | 1236 | $categories_parent[] = $this->registry->create('Category', array($term, $scheme, $label)); |
883 | 1237 | ||
884 | if (isset($category['child'][SIMPLEPIE_NAMESPACE_ITUNES]['category'])) | 1238 | if (isset($category['child'][SIMPLEPIE_NAMESPACE_ITUNES]['category'])) |
885 | { | 1239 | { |
@@ -889,13 +1243,13 @@ class SimplePie_Item | |||
889 | { | 1243 | { |
890 | $label = $this->sanitize($subcategory['attribs']['']['text'], SIMPLEPIE_CONSTRUCT_TEXT); | 1244 | $label = $this->sanitize($subcategory['attribs']['']['text'], SIMPLEPIE_CONSTRUCT_TEXT); |
891 | } | 1245 | } |
892 | $categories_parent[] = new $this->feed->category_class($term, $scheme, $label); | 1246 | $categories_parent[] = $this->registry->create('Category', array($term, $scheme, $label)); |
893 | } | 1247 | } |
894 | } | 1248 | } |
895 | } | 1249 | } |
896 | if (is_array($categories_parent)) | 1250 | if (is_array($categories_parent)) |
897 | { | 1251 | { |
898 | $categories_parent = array_values(SimplePie_Misc::array_unique($categories_parent)); | 1252 | $categories_parent = array_values(array_unique($categories_parent)); |
899 | } | 1253 | } |
900 | 1254 | ||
901 | // COPYRIGHT | 1255 | // COPYRIGHT |
@@ -911,7 +1265,7 @@ class SimplePie_Item | |||
911 | { | 1265 | { |
912 | $copyright_label = $this->sanitize($copyright[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1266 | $copyright_label = $this->sanitize($copyright[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
913 | } | 1267 | } |
914 | $copyrights_parent = new $this->feed->copyright_class($copyright_url, $copyright_label); | 1268 | $copyrights_parent = $this->registry->create('Copyright', array($copyright_url, $copyright_label)); |
915 | } | 1269 | } |
916 | elseif ($copyright = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'copyright')) | 1270 | elseif ($copyright = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'copyright')) |
917 | { | 1271 | { |
@@ -925,7 +1279,7 @@ class SimplePie_Item | |||
925 | { | 1279 | { |
926 | $copyright_label = $this->sanitize($copyright[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1280 | $copyright_label = $this->sanitize($copyright[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
927 | } | 1281 | } |
928 | $copyrights_parent = new $this->feed->copyright_class($copyright_url, $copyright_label); | 1282 | $copyrights_parent = $this->registry->create('Copyright', array($copyright_url, $copyright_label)); |
929 | } | 1283 | } |
930 | 1284 | ||
931 | // CREDITS | 1285 | // CREDITS |
@@ -952,7 +1306,7 @@ class SimplePie_Item | |||
952 | { | 1306 | { |
953 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1307 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
954 | } | 1308 | } |
955 | $credits_parent[] = new $this->feed->credit_class($credit_role, $credit_scheme, $credit_name); | 1309 | $credits_parent[] = $this->registry->create('Credit', array($credit_role, $credit_scheme, $credit_name)); |
956 | } | 1310 | } |
957 | } | 1311 | } |
958 | elseif ($credits = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'credit')) | 1312 | elseif ($credits = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'credit')) |
@@ -978,12 +1332,12 @@ class SimplePie_Item | |||
978 | { | 1332 | { |
979 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1333 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
980 | } | 1334 | } |
981 | $credits_parent[] = new $this->feed->credit_class($credit_role, $credit_scheme, $credit_name); | 1335 | $credits_parent[] = $this->registry->create('Credit', array($credit_role, $credit_scheme, $credit_name)); |
982 | } | 1336 | } |
983 | } | 1337 | } |
984 | if (is_array($credits_parent)) | 1338 | if (is_array($credits_parent)) |
985 | { | 1339 | { |
986 | $credits_parent = array_values(SimplePie_Misc::array_unique($credits_parent)); | 1340 | $credits_parent = array_values(array_unique($credits_parent)); |
987 | } | 1341 | } |
988 | 1342 | ||
989 | // DESCRIPTION | 1343 | // DESCRIPTION |
@@ -1075,7 +1429,7 @@ class SimplePie_Item | |||
1075 | } | 1429 | } |
1076 | if (is_array($hashes_parent)) | 1430 | if (is_array($hashes_parent)) |
1077 | { | 1431 | { |
1078 | $hashes_parent = array_values(SimplePie_Misc::array_unique($hashes_parent)); | 1432 | $hashes_parent = array_values(array_unique($hashes_parent)); |
1079 | } | 1433 | } |
1080 | 1434 | ||
1081 | // KEYWORDS | 1435 | // KEYWORDS |
@@ -1129,7 +1483,7 @@ class SimplePie_Item | |||
1129 | } | 1483 | } |
1130 | if (is_array($keywords_parent)) | 1484 | if (is_array($keywords_parent)) |
1131 | { | 1485 | { |
1132 | $keywords_parent = array_values(SimplePie_Misc::array_unique($keywords_parent)); | 1486 | $keywords_parent = array_values(array_unique($keywords_parent)); |
1133 | } | 1487 | } |
1134 | 1488 | ||
1135 | // PLAYER | 1489 | // PLAYER |
@@ -1167,7 +1521,7 @@ class SimplePie_Item | |||
1167 | { | 1521 | { |
1168 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1522 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1169 | } | 1523 | } |
1170 | $ratings_parent[] = new $this->feed->rating_class($rating_scheme, $rating_value); | 1524 | $ratings_parent[] = $this->registry->create('Rating', array($rating_scheme, $rating_value)); |
1171 | } | 1525 | } |
1172 | } | 1526 | } |
1173 | elseif ($ratings = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'explicit')) | 1527 | elseif ($ratings = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'explicit')) |
@@ -1180,7 +1534,7 @@ class SimplePie_Item | |||
1180 | { | 1534 | { |
1181 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1535 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1182 | } | 1536 | } |
1183 | $ratings_parent[] = new $this->feed->rating_class($rating_scheme, $rating_value); | 1537 | $ratings_parent[] = $this->registry->create('Rating', array($rating_scheme, $rating_value)); |
1184 | } | 1538 | } |
1185 | } | 1539 | } |
1186 | elseif ($ratings = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'rating')) | 1540 | elseif ($ratings = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'rating')) |
@@ -1201,7 +1555,7 @@ class SimplePie_Item | |||
1201 | { | 1555 | { |
1202 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1556 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1203 | } | 1557 | } |
1204 | $ratings_parent[] = new $this->feed->rating_class($rating_scheme, $rating_value); | 1558 | $ratings_parent[] = $this->registry->create('Rating', array($rating_scheme, $rating_value)); |
1205 | } | 1559 | } |
1206 | } | 1560 | } |
1207 | elseif ($ratings = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'explicit')) | 1561 | elseif ($ratings = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'explicit')) |
@@ -1214,12 +1568,12 @@ class SimplePie_Item | |||
1214 | { | 1568 | { |
1215 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1569 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1216 | } | 1570 | } |
1217 | $ratings_parent[] = new $this->feed->rating_class($rating_scheme, $rating_value); | 1571 | $ratings_parent[] = $this->registry->create('Rating', array($rating_scheme, $rating_value)); |
1218 | } | 1572 | } |
1219 | } | 1573 | } |
1220 | if (is_array($ratings_parent)) | 1574 | if (is_array($ratings_parent)) |
1221 | { | 1575 | { |
1222 | $ratings_parent = array_values(SimplePie_Misc::array_unique($ratings_parent)); | 1576 | $ratings_parent = array_values(array_unique($ratings_parent)); |
1223 | } | 1577 | } |
1224 | 1578 | ||
1225 | // RESTRICTIONS | 1579 | // RESTRICTIONS |
@@ -1242,7 +1596,7 @@ class SimplePie_Item | |||
1242 | { | 1596 | { |
1243 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1597 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1244 | } | 1598 | } |
1245 | $restrictions_parent[] = new $this->feed->restriction_class($restriction_relationship, $restriction_type, $restriction_value); | 1599 | $restrictions_parent[] = $this->registry->create('Restriction', array($restriction_relationship, $restriction_type, $restriction_value)); |
1246 | } | 1600 | } |
1247 | } | 1601 | } |
1248 | elseif ($restrictions = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'block')) | 1602 | elseif ($restrictions = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'block')) |
@@ -1256,7 +1610,7 @@ class SimplePie_Item | |||
1256 | { | 1610 | { |
1257 | $restriction_relationship = 'deny'; | 1611 | $restriction_relationship = 'deny'; |
1258 | } | 1612 | } |
1259 | $restrictions_parent[] = new $this->feed->restriction_class($restriction_relationship, $restriction_type, $restriction_value); | 1613 | $restrictions_parent[] = $this->registry->create('Restriction', array($restriction_relationship, $restriction_type, $restriction_value)); |
1260 | } | 1614 | } |
1261 | } | 1615 | } |
1262 | elseif ($restrictions = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'restriction')) | 1616 | elseif ($restrictions = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'restriction')) |
@@ -1278,7 +1632,7 @@ class SimplePie_Item | |||
1278 | { | 1632 | { |
1279 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1633 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1280 | } | 1634 | } |
1281 | $restrictions_parent[] = new $this->feed->restriction_class($restriction_relationship, $restriction_type, $restriction_value); | 1635 | $restrictions_parent[] = $this->registry->create('Restriction', array($restriction_relationship, $restriction_type, $restriction_value)); |
1282 | } | 1636 | } |
1283 | } | 1637 | } |
1284 | elseif ($restrictions = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'block')) | 1638 | elseif ($restrictions = $parent->get_channel_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'block')) |
@@ -1292,12 +1646,16 @@ class SimplePie_Item | |||
1292 | { | 1646 | { |
1293 | $restriction_relationship = 'deny'; | 1647 | $restriction_relationship = 'deny'; |
1294 | } | 1648 | } |
1295 | $restrictions_parent[] = new $this->feed->restriction_class($restriction_relationship, $restriction_type, $restriction_value); | 1649 | $restrictions_parent[] = $this->registry->create('Restriction', array($restriction_relationship, $restriction_type, $restriction_value)); |
1296 | } | 1650 | } |
1297 | } | 1651 | } |
1298 | if (is_array($restrictions_parent)) | 1652 | if (is_array($restrictions_parent)) |
1299 | { | 1653 | { |
1300 | $restrictions_parent = array_values(SimplePie_Misc::array_unique($restrictions_parent)); | 1654 | $restrictions_parent = array_values(array_unique($restrictions_parent)); |
1655 | } | ||
1656 | else | ||
1657 | { | ||
1658 | $restrictions_parent = array(new SimplePie_Restriction('allow', null, 'default')); | ||
1301 | } | 1659 | } |
1302 | 1660 | ||
1303 | // THUMBNAILS | 1661 | // THUMBNAILS |
@@ -1498,11 +1856,11 @@ class SimplePie_Item | |||
1498 | { | 1856 | { |
1499 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1857 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1500 | } | 1858 | } |
1501 | $captions[] = new $this->feed->caption_class($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text); | 1859 | $captions[] = $this->registry->create('Caption', array($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text)); |
1502 | } | 1860 | } |
1503 | if (is_array($captions)) | 1861 | if (is_array($captions)) |
1504 | { | 1862 | { |
1505 | $captions = array_values(SimplePie_Misc::array_unique($captions)); | 1863 | $captions = array_values(array_unique($captions)); |
1506 | } | 1864 | } |
1507 | } | 1865 | } |
1508 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['text'])) | 1866 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['text'])) |
@@ -1534,11 +1892,11 @@ class SimplePie_Item | |||
1534 | { | 1892 | { |
1535 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1893 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1536 | } | 1894 | } |
1537 | $captions[] = new $this->feed->caption_class($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text); | 1895 | $captions[] = $this->registry->create('Caption', array($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text)); |
1538 | } | 1896 | } |
1539 | if (is_array($captions)) | 1897 | if (is_array($captions)) |
1540 | { | 1898 | { |
1541 | $captions = array_values(SimplePie_Misc::array_unique($captions)); | 1899 | $captions = array_values(array_unique($captions)); |
1542 | } | 1900 | } |
1543 | } | 1901 | } |
1544 | else | 1902 | else |
@@ -1570,7 +1928,7 @@ class SimplePie_Item | |||
1570 | { | 1928 | { |
1571 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); | 1929 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); |
1572 | } | 1930 | } |
1573 | $categories[] = new $this->feed->category_class($term, $scheme, $label); | 1931 | $categories[] = $this->registry->create('Category', array($term, $scheme, $label)); |
1574 | } | 1932 | } |
1575 | } | 1933 | } |
1576 | if (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['category'])) | 1934 | if (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['category'])) |
@@ -1596,20 +1954,20 @@ class SimplePie_Item | |||
1596 | { | 1954 | { |
1597 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); | 1955 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); |
1598 | } | 1956 | } |
1599 | $categories[] = new $this->feed->category_class($term, $scheme, $label); | 1957 | $categories[] = $this->registry->create('Category', array($term, $scheme, $label)); |
1600 | } | 1958 | } |
1601 | } | 1959 | } |
1602 | if (is_array($categories) && is_array($categories_parent)) | 1960 | if (is_array($categories) && is_array($categories_parent)) |
1603 | { | 1961 | { |
1604 | $categories = array_values(SimplePie_Misc::array_unique(array_merge($categories, $categories_parent))); | 1962 | $categories = array_values(array_unique(array_merge($categories, $categories_parent))); |
1605 | } | 1963 | } |
1606 | elseif (is_array($categories)) | 1964 | elseif (is_array($categories)) |
1607 | { | 1965 | { |
1608 | $categories = array_values(SimplePie_Misc::array_unique($categories)); | 1966 | $categories = array_values(array_unique($categories)); |
1609 | } | 1967 | } |
1610 | elseif (is_array($categories_parent)) | 1968 | elseif (is_array($categories_parent)) |
1611 | { | 1969 | { |
1612 | $categories = array_values(SimplePie_Misc::array_unique($categories_parent)); | 1970 | $categories = array_values(array_unique($categories_parent)); |
1613 | } | 1971 | } |
1614 | 1972 | ||
1615 | // COPYRIGHTS | 1973 | // COPYRIGHTS |
@@ -1625,7 +1983,7 @@ class SimplePie_Item | |||
1625 | { | 1983 | { |
1626 | $copyright_label = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['copyright'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1984 | $copyright_label = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['copyright'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1627 | } | 1985 | } |
1628 | $copyrights = new $this->feed->copyright_class($copyright_url, $copyright_label); | 1986 | $copyrights = $this->registry->create('Copyright', array($copyright_url, $copyright_label)); |
1629 | } | 1987 | } |
1630 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['copyright'])) | 1988 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['copyright'])) |
1631 | { | 1989 | { |
@@ -1639,7 +1997,7 @@ class SimplePie_Item | |||
1639 | { | 1997 | { |
1640 | $copyright_label = $this->sanitize($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['copyright'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 1998 | $copyright_label = $this->sanitize($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['copyright'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1641 | } | 1999 | } |
1642 | $copyrights = new $this->feed->copyright_class($copyright_url, $copyright_label); | 2000 | $copyrights = $this->registry->create('Copyright', array($copyright_url, $copyright_label)); |
1643 | } | 2001 | } |
1644 | else | 2002 | else |
1645 | { | 2003 | { |
@@ -1670,11 +2028,11 @@ class SimplePie_Item | |||
1670 | { | 2028 | { |
1671 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2029 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1672 | } | 2030 | } |
1673 | $credits[] = new $this->feed->credit_class($credit_role, $credit_scheme, $credit_name); | 2031 | $credits[] = $this->registry->create('Credit', array($credit_role, $credit_scheme, $credit_name)); |
1674 | } | 2032 | } |
1675 | if (is_array($credits)) | 2033 | if (is_array($credits)) |
1676 | { | 2034 | { |
1677 | $credits = array_values(SimplePie_Misc::array_unique($credits)); | 2035 | $credits = array_values(array_unique($credits)); |
1678 | } | 2036 | } |
1679 | } | 2037 | } |
1680 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['credit'])) | 2038 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['credit'])) |
@@ -1700,11 +2058,11 @@ class SimplePie_Item | |||
1700 | { | 2058 | { |
1701 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2059 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1702 | } | 2060 | } |
1703 | $credits[] = new $this->feed->credit_class($credit_role, $credit_scheme, $credit_name); | 2061 | $credits[] = $this->registry->create('Credit', array($credit_role, $credit_scheme, $credit_name)); |
1704 | } | 2062 | } |
1705 | if (is_array($credits)) | 2063 | if (is_array($credits)) |
1706 | { | 2064 | { |
1707 | $credits = array_values(SimplePie_Misc::array_unique($credits)); | 2065 | $credits = array_values(array_unique($credits)); |
1708 | } | 2066 | } |
1709 | } | 2067 | } |
1710 | else | 2068 | else |
@@ -1749,7 +2107,7 @@ class SimplePie_Item | |||
1749 | } | 2107 | } |
1750 | if (is_array($hashes)) | 2108 | if (is_array($hashes)) |
1751 | { | 2109 | { |
1752 | $hashes = array_values(SimplePie_Misc::array_unique($hashes)); | 2110 | $hashes = array_values(array_unique($hashes)); |
1753 | } | 2111 | } |
1754 | } | 2112 | } |
1755 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['hash'])) | 2113 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['hash'])) |
@@ -1774,7 +2132,7 @@ class SimplePie_Item | |||
1774 | } | 2132 | } |
1775 | if (is_array($hashes)) | 2133 | if (is_array($hashes)) |
1776 | { | 2134 | { |
1777 | $hashes = array_values(SimplePie_Misc::array_unique($hashes)); | 2135 | $hashes = array_values(array_unique($hashes)); |
1778 | } | 2136 | } |
1779 | } | 2137 | } |
1780 | else | 2138 | else |
@@ -1796,7 +2154,7 @@ class SimplePie_Item | |||
1796 | } | 2154 | } |
1797 | if (is_array($keywords)) | 2155 | if (is_array($keywords)) |
1798 | { | 2156 | { |
1799 | $keywords = array_values(SimplePie_Misc::array_unique($keywords)); | 2157 | $keywords = array_values(array_unique($keywords)); |
1800 | } | 2158 | } |
1801 | } | 2159 | } |
1802 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['keywords'])) | 2160 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['keywords'])) |
@@ -1812,7 +2170,7 @@ class SimplePie_Item | |||
1812 | } | 2170 | } |
1813 | if (is_array($keywords)) | 2171 | if (is_array($keywords)) |
1814 | { | 2172 | { |
1815 | $keywords = array_values(SimplePie_Misc::array_unique($keywords)); | 2173 | $keywords = array_values(array_unique($keywords)); |
1816 | } | 2174 | } |
1817 | } | 2175 | } |
1818 | else | 2176 | else |
@@ -1853,11 +2211,11 @@ class SimplePie_Item | |||
1853 | { | 2211 | { |
1854 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2212 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1855 | } | 2213 | } |
1856 | $ratings[] = new $this->feed->rating_class($rating_scheme, $rating_value); | 2214 | $ratings[] = $this->registry->create('Rating', array($rating_scheme, $rating_value)); |
1857 | } | 2215 | } |
1858 | if (is_array($ratings)) | 2216 | if (is_array($ratings)) |
1859 | { | 2217 | { |
1860 | $ratings = array_values(SimplePie_Misc::array_unique($ratings)); | 2218 | $ratings = array_values(array_unique($ratings)); |
1861 | } | 2219 | } |
1862 | } | 2220 | } |
1863 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['rating'])) | 2221 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['rating'])) |
@@ -1878,11 +2236,11 @@ class SimplePie_Item | |||
1878 | { | 2236 | { |
1879 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2237 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1880 | } | 2238 | } |
1881 | $ratings[] = new $this->feed->rating_class($rating_scheme, $rating_value); | 2239 | $ratings[] = $this->registry->create('Rating', array($rating_scheme, $rating_value)); |
1882 | } | 2240 | } |
1883 | if (is_array($ratings)) | 2241 | if (is_array($ratings)) |
1884 | { | 2242 | { |
1885 | $ratings = array_values(SimplePie_Misc::array_unique($ratings)); | 2243 | $ratings = array_values(array_unique($ratings)); |
1886 | } | 2244 | } |
1887 | } | 2245 | } |
1888 | else | 2246 | else |
@@ -1910,11 +2268,11 @@ class SimplePie_Item | |||
1910 | { | 2268 | { |
1911 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2269 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1912 | } | 2270 | } |
1913 | $restrictions[] = new $this->feed->restriction_class($restriction_relationship, $restriction_type, $restriction_value); | 2271 | $restrictions[] = $this->registry->create('Restriction', array($restriction_relationship, $restriction_type, $restriction_value)); |
1914 | } | 2272 | } |
1915 | if (is_array($restrictions)) | 2273 | if (is_array($restrictions)) |
1916 | { | 2274 | { |
1917 | $restrictions = array_values(SimplePie_Misc::array_unique($restrictions)); | 2275 | $restrictions = array_values(array_unique($restrictions)); |
1918 | } | 2276 | } |
1919 | } | 2277 | } |
1920 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['restriction'])) | 2278 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['restriction'])) |
@@ -1936,11 +2294,11 @@ class SimplePie_Item | |||
1936 | { | 2294 | { |
1937 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2295 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
1938 | } | 2296 | } |
1939 | $restrictions[] = new $this->feed->restriction_class($restriction_relationship, $restriction_type, $restriction_value); | 2297 | $restrictions[] = $this->registry->create('Restriction', array($restriction_relationship, $restriction_type, $restriction_value)); |
1940 | } | 2298 | } |
1941 | if (is_array($restrictions)) | 2299 | if (is_array($restrictions)) |
1942 | { | 2300 | { |
1943 | $restrictions = array_values(SimplePie_Misc::array_unique($restrictions)); | 2301 | $restrictions = array_values(array_unique($restrictions)); |
1944 | } | 2302 | } |
1945 | } | 2303 | } |
1946 | else | 2304 | else |
@@ -1957,7 +2315,7 @@ class SimplePie_Item | |||
1957 | } | 2315 | } |
1958 | if (is_array($thumbnails)) | 2316 | if (is_array($thumbnails)) |
1959 | { | 2317 | { |
1960 | $thumbnails = array_values(SimplePie_Misc::array_unique($thumbnails)); | 2318 | $thumbnails = array_values(array_unique($thumbnails)); |
1961 | } | 2319 | } |
1962 | } | 2320 | } |
1963 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['thumbnail'])) | 2321 | elseif (isset($group['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['thumbnail'])) |
@@ -1968,7 +2326,7 @@ class SimplePie_Item | |||
1968 | } | 2326 | } |
1969 | if (is_array($thumbnails)) | 2327 | if (is_array($thumbnails)) |
1970 | { | 2328 | { |
1971 | $thumbnails = array_values(SimplePie_Misc::array_unique($thumbnails)); | 2329 | $thumbnails = array_values(array_unique($thumbnails)); |
1972 | } | 2330 | } |
1973 | } | 2331 | } |
1974 | else | 2332 | else |
@@ -1990,7 +2348,7 @@ class SimplePie_Item | |||
1990 | $title = $title_parent; | 2348 | $title = $title_parent; |
1991 | } | 2349 | } |
1992 | 2350 | ||
1993 | $this->data['enclosures'][] = new $this->feed->enclosure_class($url, $type, $length, null, $bitrate, $captions, $categories, $channels, $copyrights, $credits, $description, $duration, $expression, $framerate, $hashes, $height, $keywords, $lang, $medium, $player, $ratings, $restrictions, $samplingrate, $thumbnails, $title, $width); | 2351 | $this->data['enclosures'][] = $this->registry->create('Enclosure', array($url, $type, $length, null, $bitrate, $captions, $categories, $channels, $copyrights, $credits, $description, $duration, $expression, $framerate, $hashes, $height, $keywords, $lang, $medium, $player, $ratings, $restrictions, $samplingrate, $thumbnails, $title, $width)); |
1994 | } | 2352 | } |
1995 | } | 2353 | } |
1996 | } | 2354 | } |
@@ -2122,11 +2480,11 @@ class SimplePie_Item | |||
2122 | { | 2480 | { |
2123 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2481 | $caption_text = $this->sanitize($caption['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
2124 | } | 2482 | } |
2125 | $captions[] = new $this->feed->caption_class($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text); | 2483 | $captions[] = $this->registry->create('Caption', array($caption_type, $caption_lang, $caption_startTime, $caption_endTime, $caption_text)); |
2126 | } | 2484 | } |
2127 | if (is_array($captions)) | 2485 | if (is_array($captions)) |
2128 | { | 2486 | { |
2129 | $captions = array_values(SimplePie_Misc::array_unique($captions)); | 2487 | $captions = array_values(array_unique($captions)); |
2130 | } | 2488 | } |
2131 | } | 2489 | } |
2132 | else | 2490 | else |
@@ -2158,20 +2516,20 @@ class SimplePie_Item | |||
2158 | { | 2516 | { |
2159 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); | 2517 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); |
2160 | } | 2518 | } |
2161 | $categories[] = new $this->feed->category_class($term, $scheme, $label); | 2519 | $categories[] = $this->registry->create('Category', array($term, $scheme, $label)); |
2162 | } | 2520 | } |
2163 | } | 2521 | } |
2164 | if (is_array($categories) && is_array($categories_parent)) | 2522 | if (is_array($categories) && is_array($categories_parent)) |
2165 | { | 2523 | { |
2166 | $categories = array_values(SimplePie_Misc::array_unique(array_merge($categories, $categories_parent))); | 2524 | $categories = array_values(array_unique(array_merge($categories, $categories_parent))); |
2167 | } | 2525 | } |
2168 | elseif (is_array($categories)) | 2526 | elseif (is_array($categories)) |
2169 | { | 2527 | { |
2170 | $categories = array_values(SimplePie_Misc::array_unique($categories)); | 2528 | $categories = array_values(array_unique($categories)); |
2171 | } | 2529 | } |
2172 | elseif (is_array($categories_parent)) | 2530 | elseif (is_array($categories_parent)) |
2173 | { | 2531 | { |
2174 | $categories = array_values(SimplePie_Misc::array_unique($categories_parent)); | 2532 | $categories = array_values(array_unique($categories_parent)); |
2175 | } | 2533 | } |
2176 | else | 2534 | else |
2177 | { | 2535 | { |
@@ -2191,7 +2549,7 @@ class SimplePie_Item | |||
2191 | { | 2549 | { |
2192 | $copyright_label = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['copyright'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2550 | $copyright_label = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['copyright'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
2193 | } | 2551 | } |
2194 | $copyrights = new $this->feed->copyright_class($copyright_url, $copyright_label); | 2552 | $copyrights = $this->registry->create('Copyright', array($copyright_url, $copyright_label)); |
2195 | } | 2553 | } |
2196 | else | 2554 | else |
2197 | { | 2555 | { |
@@ -2222,11 +2580,11 @@ class SimplePie_Item | |||
2222 | { | 2580 | { |
2223 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2581 | $credit_name = $this->sanitize($credit['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
2224 | } | 2582 | } |
2225 | $credits[] = new $this->feed->credit_class($credit_role, $credit_scheme, $credit_name); | 2583 | $credits[] = $this->registry->create('Credit', array($credit_role, $credit_scheme, $credit_name)); |
2226 | } | 2584 | } |
2227 | if (is_array($credits)) | 2585 | if (is_array($credits)) |
2228 | { | 2586 | { |
2229 | $credits = array_values(SimplePie_Misc::array_unique($credits)); | 2587 | $credits = array_values(array_unique($credits)); |
2230 | } | 2588 | } |
2231 | } | 2589 | } |
2232 | else | 2590 | else |
@@ -2267,7 +2625,7 @@ class SimplePie_Item | |||
2267 | } | 2625 | } |
2268 | if (is_array($hashes)) | 2626 | if (is_array($hashes)) |
2269 | { | 2627 | { |
2270 | $hashes = array_values(SimplePie_Misc::array_unique($hashes)); | 2628 | $hashes = array_values(array_unique($hashes)); |
2271 | } | 2629 | } |
2272 | } | 2630 | } |
2273 | else | 2631 | else |
@@ -2289,7 +2647,7 @@ class SimplePie_Item | |||
2289 | } | 2647 | } |
2290 | if (is_array($keywords)) | 2648 | if (is_array($keywords)) |
2291 | { | 2649 | { |
2292 | $keywords = array_values(SimplePie_Misc::array_unique($keywords)); | 2650 | $keywords = array_values(array_unique($keywords)); |
2293 | } | 2651 | } |
2294 | } | 2652 | } |
2295 | else | 2653 | else |
@@ -2326,11 +2684,11 @@ class SimplePie_Item | |||
2326 | { | 2684 | { |
2327 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2685 | $rating_value = $this->sanitize($rating['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
2328 | } | 2686 | } |
2329 | $ratings[] = new $this->feed->rating_class($rating_scheme, $rating_value); | 2687 | $ratings[] = $this->registry->create('Rating', array($rating_scheme, $rating_value)); |
2330 | } | 2688 | } |
2331 | if (is_array($ratings)) | 2689 | if (is_array($ratings)) |
2332 | { | 2690 | { |
2333 | $ratings = array_values(SimplePie_Misc::array_unique($ratings)); | 2691 | $ratings = array_values(array_unique($ratings)); |
2334 | } | 2692 | } |
2335 | } | 2693 | } |
2336 | else | 2694 | else |
@@ -2358,11 +2716,11 @@ class SimplePie_Item | |||
2358 | { | 2716 | { |
2359 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); | 2717 | $restriction_value = $this->sanitize($restriction['data'], SIMPLEPIE_CONSTRUCT_TEXT); |
2360 | } | 2718 | } |
2361 | $restrictions[] = new $this->feed->restriction_class($restriction_relationship, $restriction_type, $restriction_value); | 2719 | $restrictions[] = $this->registry->create('Restriction', array($restriction_relationship, $restriction_type, $restriction_value)); |
2362 | } | 2720 | } |
2363 | if (is_array($restrictions)) | 2721 | if (is_array($restrictions)) |
2364 | { | 2722 | { |
2365 | $restrictions = array_values(SimplePie_Misc::array_unique($restrictions)); | 2723 | $restrictions = array_values(array_unique($restrictions)); |
2366 | } | 2724 | } |
2367 | } | 2725 | } |
2368 | else | 2726 | else |
@@ -2379,7 +2737,7 @@ class SimplePie_Item | |||
2379 | } | 2737 | } |
2380 | if (is_array($thumbnails)) | 2738 | if (is_array($thumbnails)) |
2381 | { | 2739 | { |
2382 | $thumbnails = array_values(SimplePie_Misc::array_unique($thumbnails)); | 2740 | $thumbnails = array_values(array_unique($thumbnails)); |
2383 | } | 2741 | } |
2384 | } | 2742 | } |
2385 | else | 2743 | else |
@@ -2397,7 +2755,7 @@ class SimplePie_Item | |||
2397 | $title = $title_parent; | 2755 | $title = $title_parent; |
2398 | } | 2756 | } |
2399 | 2757 | ||
2400 | $this->data['enclosures'][] = new $this->feed->enclosure_class($url, $type, $length, null, $bitrate, $captions, $categories, $channels, $copyrights, $credits, $description, $duration, $expression, $framerate, $hashes, $height, $keywords, $lang, $medium, $player, $ratings, $restrictions, $samplingrate, $thumbnails, $title, $width); | 2758 | $this->data['enclosures'][] = $this->registry->create('Enclosure', array($url, $type, $length, null, $bitrate, $captions, $categories, $channels, $copyrights, $credits, $description, $duration, $expression, $framerate, $hashes, $height, $keywords, $lang, $medium, $player, $ratings, $restrictions, $samplingrate, $thumbnails, $title, $width)); |
2401 | } | 2759 | } |
2402 | } | 2760 | } |
2403 | } | 2761 | } |
@@ -2433,7 +2791,7 @@ class SimplePie_Item | |||
2433 | } | 2791 | } |
2434 | 2792 | ||
2435 | // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor | 2793 | // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor |
2436 | $this->data['enclosures'][] = new $this->feed->enclosure_class($url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width); | 2794 | $this->data['enclosures'][] = $this->registry->create('Enclosure', array($url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width)); |
2437 | } | 2795 | } |
2438 | } | 2796 | } |
2439 | 2797 | ||
@@ -2468,7 +2826,7 @@ class SimplePie_Item | |||
2468 | } | 2826 | } |
2469 | 2827 | ||
2470 | // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor | 2828 | // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor |
2471 | $this->data['enclosures'][] = new $this->feed->enclosure_class($url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width); | 2829 | $this->data['enclosures'][] = $this->registry->create('Enclosure', array($url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width)); |
2472 | } | 2830 | } |
2473 | } | 2831 | } |
2474 | 2832 | ||
@@ -2503,17 +2861,17 @@ class SimplePie_Item | |||
2503 | } | 2861 | } |
2504 | 2862 | ||
2505 | // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor | 2863 | // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor |
2506 | $this->data['enclosures'][] = new $this->feed->enclosure_class($url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width); | 2864 | $this->data['enclosures'][] = $this->registry->create('Enclosure', array($url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width)); |
2507 | } | 2865 | } |
2508 | } | 2866 | } |
2509 | 2867 | ||
2510 | if (sizeof($this->data['enclosures']) === 0 && ($url || $type || $length || $bitrate || $captions_parent || $categories_parent || $channels || $copyrights_parent || $credits_parent || $description_parent || $duration_parent || $expression || $framerate || $hashes_parent || $height || $keywords_parent || $lang || $medium || $player_parent || $ratings_parent || $restrictions_parent || $samplingrate || $thumbnails_parent || $title_parent || $width)) | 2868 | if (sizeof($this->data['enclosures']) === 0 && ($url || $type || $length || $bitrate || $captions_parent || $categories_parent || $channels || $copyrights_parent || $credits_parent || $description_parent || $duration_parent || $expression || $framerate || $hashes_parent || $height || $keywords_parent || $lang || $medium || $player_parent || $ratings_parent || $restrictions_parent || $samplingrate || $thumbnails_parent || $title_parent || $width)) |
2511 | { | 2869 | { |
2512 | // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor | 2870 | // Since we don't have group or content for these, we'll just pass the '*_parent' variables directly to the constructor |
2513 | $this->data['enclosures'][] = new $this->feed->enclosure_class($url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width); | 2871 | $this->data['enclosures'][] = $this->registry->create('Enclosure', array($url, $type, $length, null, $bitrate, $captions_parent, $categories_parent, $channels, $copyrights_parent, $credits_parent, $description_parent, $duration_parent, $expression, $framerate, $hashes_parent, $height, $keywords_parent, $lang, $medium, $player_parent, $ratings_parent, $restrictions_parent, $samplingrate, $thumbnails_parent, $title_parent, $width)); |
2514 | } | 2872 | } |
2515 | 2873 | ||
2516 | $this->data['enclosures'] = array_values(SimplePie_Misc::array_unique($this->data['enclosures'])); | 2874 | $this->data['enclosures'] = array_values(array_unique($this->data['enclosures'])); |
2517 | } | 2875 | } |
2518 | if (!empty($this->data['enclosures'])) | 2876 | if (!empty($this->data['enclosures'])) |
2519 | { | 2877 | { |
@@ -2525,6 +2883,18 @@ class SimplePie_Item | |||
2525 | } | 2883 | } |
2526 | } | 2884 | } |
2527 | 2885 | ||
2886 | /** | ||
2887 | * Get the latitude coordinates for the item | ||
2888 | * | ||
2889 | * Compatible with the W3C WGS84 Basic Geo and GeoRSS specifications | ||
2890 | * | ||
2891 | * Uses `<geo:lat>` or `<georss:point>` | ||
2892 | * | ||
2893 | * @since 1.0 | ||
2894 | * @link http://www.w3.org/2003/01/geo/ W3C WGS84 Basic Geo | ||
2895 | * @link http://www.georss.org/ GeoRSS | ||
2896 | * @return string|null | ||
2897 | */ | ||
2528 | public function get_latitude() | 2898 | public function get_latitude() |
2529 | { | 2899 | { |
2530 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'lat')) | 2900 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'lat')) |
@@ -2541,6 +2911,18 @@ class SimplePie_Item | |||
2541 | } | 2911 | } |
2542 | } | 2912 | } |
2543 | 2913 | ||
2914 | /** | ||
2915 | * Get the longitude coordinates for the item | ||
2916 | * | ||
2917 | * Compatible with the W3C WGS84 Basic Geo and GeoRSS specifications | ||
2918 | * | ||
2919 | * Uses `<geo:long>`, `<geo:lon>` or `<georss:point>` | ||
2920 | * | ||
2921 | * @since 1.0 | ||
2922 | * @link http://www.w3.org/2003/01/geo/ W3C WGS84 Basic Geo | ||
2923 | * @link http://www.georss.org/ GeoRSS | ||
2924 | * @return string|null | ||
2925 | */ | ||
2544 | public function get_longitude() | 2926 | public function get_longitude() |
2545 | { | 2927 | { |
2546 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'long')) | 2928 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'long')) |
@@ -2561,11 +2943,17 @@ class SimplePie_Item | |||
2561 | } | 2943 | } |
2562 | } | 2944 | } |
2563 | 2945 | ||
2946 | /** | ||
2947 | * Get the `<atom:source>` for the item | ||
2948 | * | ||
2949 | * @since 1.1 | ||
2950 | * @return SimplePie_Source|null | ||
2951 | */ | ||
2564 | public function get_source() | 2952 | public function get_source() |
2565 | { | 2953 | { |
2566 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'source')) | 2954 | if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'source')) |
2567 | { | 2955 | { |
2568 | return new $this->feed->source_class($this, $return[0]); | 2956 | return $this->registry->create('Source', array($this, $return[0])); |
2569 | } | 2957 | } |
2570 | else | 2958 | else |
2571 | { | 2959 | { |
diff --git a/inc/3rdparty/simplepie/SimplePie/Locator.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Locator.php index f519b7ee..57e910c2 100644 --- a/inc/3rdparty/simplepie/SimplePie/Locator.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Locator.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,17 +33,23 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Used for feed auto-discovery | ||
47 | * | ||
48 | * | ||
49 | * This class can be overloaded with {@see SimplePie::set_locator_class()} | ||
50 | * | ||
51 | * @package SimplePie | ||
52 | */ | ||
47 | class SimplePie_Locator | 53 | class SimplePie_Locator |
48 | { | 54 | { |
49 | var $useragent; | 55 | var $useragent; |
@@ -51,23 +57,38 @@ class SimplePie_Locator | |||
51 | var $file; | 57 | var $file; |
52 | var $local = array(); | 58 | var $local = array(); |
53 | var $elsewhere = array(); | 59 | var $elsewhere = array(); |
54 | var $file_class = 'SimplePie_File'; | ||
55 | var $cached_entities = array(); | 60 | var $cached_entities = array(); |
56 | var $http_base; | 61 | var $http_base; |
57 | var $base; | 62 | var $base; |
58 | var $base_location = 0; | 63 | var $base_location = 0; |
59 | var $checked_feeds = 0; | 64 | var $checked_feeds = 0; |
60 | var $max_checked_feeds = 10; | 65 | var $max_checked_feeds = 10; |
61 | var $content_type_sniffer_class = 'SimplePie_Content_Type_Sniffer'; | 66 | protected $registry; |
62 | 67 | ||
63 | public function __construct(&$file, $timeout = 10, $useragent = null, $file_class = 'SimplePie_File', $max_checked_feeds = 10, $content_type_sniffer_class = 'SimplePie_Content_Type_Sniffer') | 68 | public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10) |
64 | { | 69 | { |
65 | $this->file =& $file; | 70 | $this->file = $file; |
66 | $this->file_class = $file_class; | ||
67 | $this->useragent = $useragent; | 71 | $this->useragent = $useragent; |
68 | $this->timeout = $timeout; | 72 | $this->timeout = $timeout; |
69 | $this->max_checked_feeds = $max_checked_feeds; | 73 | $this->max_checked_feeds = $max_checked_feeds; |
70 | $this->content_type_sniffer_class = $content_type_sniffer_class; | 74 | |
75 | if (class_exists('DOMDocument')) | ||
76 | { | ||
77 | $this->dom = new DOMDocument(); | ||
78 | |||
79 | set_error_handler(array('SimplePie_Misc', 'silence_errors')); | ||
80 | $this->dom->loadHTML($this->file->body); | ||
81 | restore_error_handler(); | ||
82 | } | ||
83 | else | ||
84 | { | ||
85 | $this->dom = null; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | public function set_registry(SimplePie_Registry $registry) | ||
90 | { | ||
91 | $this->registry = $registry; | ||
71 | } | 92 | } |
72 | 93 | ||
73 | public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working) | 94 | public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working) |
@@ -79,7 +100,7 @@ class SimplePie_Locator | |||
79 | 100 | ||
80 | if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE) | 101 | if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE) |
81 | { | 102 | { |
82 | $sniffer = new $this->content_type_sniffer_class($this->file); | 103 | $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file)); |
83 | if ($sniffer->get_type() !== 'text/html') | 104 | if ($sniffer->get_type() !== 'text/html') |
84 | { | 105 | { |
85 | return null; | 106 | return null; |
@@ -121,11 +142,11 @@ class SimplePie_Locator | |||
121 | return null; | 142 | return null; |
122 | } | 143 | } |
123 | 144 | ||
124 | public function is_feed(&$file) | 145 | public function is_feed($file) |
125 | { | 146 | { |
126 | if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE) | 147 | if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE) |
127 | { | 148 | { |
128 | $sniffer = new $this->content_type_sniffer_class($file); | 149 | $sniffer = $this->registry->create('Content_Type_Sniffer', array($file)); |
129 | $sniffed = $sniffer->get_type(); | 150 | $sniffed = $sniffer->get_type(); |
130 | if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml'))) | 151 | if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml'))) |
131 | { | 152 | { |
@@ -148,15 +169,24 @@ class SimplePie_Locator | |||
148 | 169 | ||
149 | public function get_base() | 170 | public function get_base() |
150 | { | 171 | { |
172 | if ($this->dom === null) | ||
173 | { | ||
174 | throw new SimplePie_Exception('DOMDocument not found, unable to use locator'); | ||
175 | } | ||
151 | $this->http_base = $this->file->url; | 176 | $this->http_base = $this->file->url; |
152 | $this->base = $this->http_base; | 177 | $this->base = $this->http_base; |
153 | $elements = SimplePie_Misc::get_element('base', $this->file->body); | 178 | $elements = $this->dom->getElementsByTagName('base'); |
154 | foreach ($elements as $element) | 179 | foreach ($elements as $element) |
155 | { | 180 | { |
156 | if ($element['attribs']['href']['data'] !== '') | 181 | if ($element->hasAttribute('href')) |
157 | { | 182 | { |
158 | $this->base = SimplePie_Misc::absolutize_url(trim($element['attribs']['href']['data']), $this->http_base); | 183 | $base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base)); |
159 | $this->base_location = $element['offset']; | 184 | if ($base === false) |
185 | { | ||
186 | continue; | ||
187 | } | ||
188 | $this->base = $base; | ||
189 | $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0; | ||
160 | break; | 190 | break; |
161 | } | 191 | } |
162 | } | 192 | } |
@@ -164,35 +194,61 @@ class SimplePie_Locator | |||
164 | 194 | ||
165 | public function autodiscovery() | 195 | public function autodiscovery() |
166 | { | 196 | { |
167 | $links = array_merge(SimplePie_Misc::get_element('link', $this->file->body), SimplePie_Misc::get_element('a', $this->file->body), SimplePie_Misc::get_element('area', $this->file->body)); | ||
168 | $done = array(); | 197 | $done = array(); |
169 | $feeds = array(); | 198 | $feeds = array(); |
199 | $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds)); | ||
200 | $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds)); | ||
201 | $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds)); | ||
202 | |||
203 | if (!empty($feeds)) | ||
204 | { | ||
205 | return array_values($feeds); | ||
206 | } | ||
207 | else | ||
208 | { | ||
209 | return null; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | protected function search_elements_by_tag($name, &$done, $feeds) | ||
214 | { | ||
215 | if ($this->dom === null) | ||
216 | { | ||
217 | throw new SimplePie_Exception('DOMDocument not found, unable to use locator'); | ||
218 | } | ||
219 | |||
220 | $links = $this->dom->getElementsByTagName($name); | ||
170 | foreach ($links as $link) | 221 | foreach ($links as $link) |
171 | { | 222 | { |
172 | if ($this->checked_feeds === $this->max_checked_feeds) | 223 | if ($this->checked_feeds === $this->max_checked_feeds) |
173 | { | 224 | { |
174 | break; | 225 | break; |
175 | } | 226 | } |
176 | if (isset($link['attribs']['href']['data']) && isset($link['attribs']['rel']['data'])) | 227 | if ($link->hasAttribute('href') && $link->hasAttribute('rel')) |
177 | { | 228 | { |
178 | $rel = array_unique(SimplePie_Misc::space_seperated_tokens(strtolower($link['attribs']['rel']['data']))); | 229 | $rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel'))))); |
230 | $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1; | ||
179 | 231 | ||
180 | if ($this->base_location < $link['offset']) | 232 | if ($this->base_location < $line) |
181 | { | 233 | { |
182 | $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base); | 234 | $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base)); |
183 | } | 235 | } |
184 | else | 236 | else |
185 | { | 237 | { |
186 | $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base); | 238 | $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base)); |
239 | } | ||
240 | if ($href === false) | ||
241 | { | ||
242 | continue; | ||
187 | } | 243 | } |
188 | 244 | ||
189 | if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && !empty($link['attribs']['type']['data']) && in_array(strtolower(SimplePie_Misc::parse_mime($link['attribs']['type']['data'])), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href])) | 245 | if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href])) |
190 | { | 246 | { |
191 | $this->checked_feeds++; | 247 | $this->checked_feeds++; |
192 | $headers = array( | 248 | $headers = array( |
193 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | 249 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', |
194 | ); | 250 | ); |
195 | $feed = new $this->file_class($href, $this->timeout, 5, $headers, $this->useragent); | 251 | $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent)); |
196 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) | 252 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) |
197 | { | 253 | { |
198 | $feeds[$href] = $feed; | 254 | $feeds[$href] = $feed; |
@@ -202,37 +258,39 @@ class SimplePie_Locator | |||
202 | } | 258 | } |
203 | } | 259 | } |
204 | 260 | ||
205 | if (!empty($feeds)) | 261 | return $feeds; |
206 | { | ||
207 | return array_values($feeds); | ||
208 | } | ||
209 | else | ||
210 | { | ||
211 | return null; | ||
212 | } | ||
213 | } | 262 | } |
214 | 263 | ||
215 | public function get_links() | 264 | public function get_links() |
216 | { | 265 | { |
217 | $links = SimplePie_Misc::get_element('a', $this->file->body); | 266 | if ($this->dom === null) |
267 | { | ||
268 | throw new SimplePie_Exception('DOMDocument not found, unable to use locator'); | ||
269 | } | ||
270 | |||
271 | $links = $this->dom->getElementsByTagName('a'); | ||
218 | foreach ($links as $link) | 272 | foreach ($links as $link) |
219 | { | 273 | { |
220 | if (isset($link['attribs']['href']['data'])) | 274 | if ($link->hasAttribute('href')) |
221 | { | 275 | { |
222 | $href = trim($link['attribs']['href']['data']); | 276 | $href = trim($link->getAttribute('href')); |
223 | $parsed = SimplePie_Misc::parse_url($href); | 277 | $parsed = $this->registry->call('Misc', 'parse_url', array($href)); |
224 | if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) | 278 | if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) |
225 | { | 279 | { |
226 | if ($this->base_location < $link['offset']) | 280 | if ($this->base_location < $link->getLineNo()) |
227 | { | 281 | { |
228 | $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base); | 282 | $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base)); |
229 | } | 283 | } |
230 | else | 284 | else |
231 | { | 285 | { |
232 | $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base); | 286 | $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base)); |
287 | } | ||
288 | if ($href === false) | ||
289 | { | ||
290 | continue; | ||
233 | } | 291 | } |
234 | 292 | ||
235 | $current = SimplePie_Misc::parse_url($this->file->url); | 293 | $current = $this->registry->call('Misc', 'parse_url', array($this->file->url)); |
236 | 294 | ||
237 | if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) | 295 | if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) |
238 | { | 296 | { |
@@ -269,7 +327,7 @@ class SimplePie_Locator | |||
269 | $headers = array( | 327 | $headers = array( |
270 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | 328 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', |
271 | ); | 329 | ); |
272 | $feed = new $this->file_class($value, $this->timeout, 5, $headers, $this->useragent); | 330 | $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent)); |
273 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) | 331 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) |
274 | { | 332 | { |
275 | return $feed; | 333 | return $feed; |
@@ -297,7 +355,7 @@ class SimplePie_Locator | |||
297 | $headers = array( | 355 | $headers = array( |
298 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | 356 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', |
299 | ); | 357 | ); |
300 | $feed = new $this->file_class($value, $this->timeout, 5, null, $this->useragent); | 358 | $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent)); |
301 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) | 359 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) |
302 | { | 360 | { |
303 | return $feed; | 361 | return $feed; |
diff --git a/inc/3rdparty/simplepie/SimplePie/Misc.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Misc.php index 17dbf963..5d7367f6 100644 --- a/inc/3rdparty/simplepie/SimplePie/Misc.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Misc.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,17 +33,20 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Miscellanous utilities | ||
47 | * | ||
48 | * @package SimplePie | ||
49 | */ | ||
47 | class SimplePie_Misc | 50 | class SimplePie_Misc |
48 | { | 51 | { |
49 | public static function time_hms($seconds) | 52 | public static function time_hms($seconds) |
@@ -77,63 +80,21 @@ class SimplePie_Misc | |||
77 | public static function absolutize_url($relative, $base) | 80 | public static function absolutize_url($relative, $base) |
78 | { | 81 | { |
79 | $iri = SimplePie_IRI::absolutize(new SimplePie_IRI($base), $relative); | 82 | $iri = SimplePie_IRI::absolutize(new SimplePie_IRI($base), $relative); |
80 | return $iri->get_iri(); | 83 | if ($iri === false) |
81 | } | ||
82 | |||
83 | public static function remove_dot_segments($input) | ||
84 | { | ||
85 | $output = ''; | ||
86 | while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') | ||
87 | { | 84 | { |
88 | // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, | 85 | return false; |
89 | if (strpos($input, '../') === 0) | ||
90 | { | ||
91 | $input = substr($input, 3); | ||
92 | } | ||
93 | elseif (strpos($input, './') === 0) | ||
94 | { | ||
95 | $input = substr($input, 2); | ||
96 | } | ||
97 | // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, | ||
98 | elseif (strpos($input, '/./') === 0) | ||
99 | { | ||
100 | $input = substr_replace($input, '/', 0, 3); | ||
101 | } | ||
102 | elseif ($input === '/.') | ||
103 | { | ||
104 | $input = '/'; | ||
105 | } | ||
106 | // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, | ||
107 | elseif (strpos($input, '/../') === 0) | ||
108 | { | ||
109 | $input = substr_replace($input, '/', 0, 4); | ||
110 | $output = substr_replace($output, '', strrpos($output, '/')); | ||
111 | } | ||
112 | elseif ($input === '/..') | ||
113 | { | ||
114 | $input = '/'; | ||
115 | $output = substr_replace($output, '', strrpos($output, '/')); | ||
116 | } | ||
117 | // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, | ||
118 | elseif ($input === '.' || $input === '..') | ||
119 | { | ||
120 | $input = ''; | ||
121 | } | ||
122 | // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer | ||
123 | elseif (($pos = strpos($input, '/', 1)) !== false) | ||
124 | { | ||
125 | $output .= substr($input, 0, $pos); | ||
126 | $input = substr_replace($input, '', 0, $pos); | ||
127 | } | ||
128 | else | ||
129 | { | ||
130 | $output .= $input; | ||
131 | $input = ''; | ||
132 | } | ||
133 | } | 86 | } |
134 | return $output . $input; | 87 | return $iri->get_uri(); |
135 | } | 88 | } |
136 | 89 | ||
90 | /** | ||
91 | * Get a HTML/XML element from a HTML string | ||
92 | * | ||
93 | * @deprecated Use DOMDocument instead (parsing HTML with regex is bad!) | ||
94 | * @param string $realname Element name (including namespace prefix if applicable) | ||
95 | * @param string $string HTML document | ||
96 | * @return array | ||
97 | */ | ||
137 | public static function get_element($realname, $string) | 98 | public static function get_element($realname, $string) |
138 | { | 99 | { |
139 | $return = array(); | 100 | $return = array(); |
@@ -267,29 +228,29 @@ class SimplePie_Misc | |||
267 | { | 228 | { |
268 | $iri = new SimplePie_IRI($url); | 229 | $iri = new SimplePie_IRI($url); |
269 | return array( | 230 | return array( |
270 | 'scheme' => (string) $iri->get_scheme(), | 231 | 'scheme' => (string) $iri->scheme, |
271 | 'authority' => (string) $iri->get_authority(), | 232 | 'authority' => (string) $iri->authority, |
272 | 'path' => (string) $iri->get_path(), | 233 | 'path' => (string) $iri->path, |
273 | 'query' => (string) $iri->get_query(), | 234 | 'query' => (string) $iri->query, |
274 | 'fragment' => (string) $iri->get_fragment() | 235 | 'fragment' => (string) $iri->fragment |
275 | ); | 236 | ); |
276 | } | 237 | } |
277 | 238 | ||
278 | public static function compress_parse_url($scheme = '', $authority = '', $path = '', $query = '', $fragment = '') | 239 | public static function compress_parse_url($scheme = '', $authority = '', $path = '', $query = '', $fragment = '') |
279 | { | 240 | { |
280 | $iri = new SimplePie_IRI(''); | 241 | $iri = new SimplePie_IRI(''); |
281 | $iri->set_scheme($scheme); | 242 | $iri->scheme = $scheme; |
282 | $iri->set_authority($authority); | 243 | $iri->authority = $authority; |
283 | $iri->set_path($path); | 244 | $iri->path = $path; |
284 | $iri->set_query($query); | 245 | $iri->query = $query; |
285 | $iri->set_fragment($fragment); | 246 | $iri->fragment = $fragment; |
286 | return $iri->get_iri(); | 247 | return $iri->get_uri(); |
287 | } | 248 | } |
288 | 249 | ||
289 | public static function normalize_url($url) | 250 | public static function normalize_url($url) |
290 | { | 251 | { |
291 | $iri = new SimplePie_IRI($url); | 252 | $iri = new SimplePie_IRI($url); |
292 | return $iri->get_iri(); | 253 | return $iri->get_uri(); |
293 | } | 254 | } |
294 | 255 | ||
295 | public static function percent_encoding_normalization($match) | 256 | public static function percent_encoding_normalization($match) |
@@ -378,6 +339,14 @@ class SimplePie_Misc | |||
378 | { | 339 | { |
379 | $output = 'EUC-KR'; | 340 | $output = 'EUC-KR'; |
380 | } | 341 | } |
342 | if ($input === 'Windows-31J') | ||
343 | { | ||
344 | $input = 'SJIS'; | ||
345 | } | ||
346 | if ($output === 'Windows-31J') | ||
347 | { | ||
348 | $output = 'SJIS'; | ||
349 | } | ||
381 | 350 | ||
382 | // Check that the encoding is supported | 351 | // Check that the encoding is supported |
383 | if (@mb_convert_encoding("\x80", 'UTF-16BE', $input) === "\x00\x80") | 352 | if (@mb_convert_encoding("\x80", 'UTF-16BE', $input) === "\x00\x80") |
@@ -1613,7 +1582,6 @@ class SimplePie_Misc | |||
1613 | case 'utf7': | 1582 | case 'utf7': |
1614 | return 'UTF-7'; | 1583 | return 'UTF-7'; |
1615 | 1584 | ||
1616 | case 'utf8lias': | ||
1617 | case 'utf8': | 1585 | case 'utf8': |
1618 | return 'UTF-8'; | 1586 | return 'UTF-8'; |
1619 | 1587 | ||
@@ -1665,8 +1633,7 @@ class SimplePie_Misc | |||
1665 | case 'mskanji': | 1633 | case 'mskanji': |
1666 | case 'shiftjis': | 1634 | case 'shiftjis': |
1667 | case 'windows31j': | 1635 | case 'windows31j': |
1668 | return 'SJIS'; | 1636 | return 'Windows-31J'; |
1669 | //return 'Windows-31J'; | ||
1670 | 1637 | ||
1671 | case 'iso885911': | 1638 | case 'iso885911': |
1672 | case 'tis620': | 1639 | case 'tis620': |
@@ -1750,40 +1717,6 @@ class SimplePie_Misc | |||
1750 | return $curl; | 1717 | return $curl; |
1751 | } | 1718 | } |
1752 | 1719 | ||
1753 | public static function is_subclass_of($class1, $class2) | ||
1754 | { | ||
1755 | if (func_num_args() !== 2) | ||
1756 | { | ||
1757 | trigger_error('Wrong parameter count for SimplePie_Misc::is_subclass_of()', E_USER_WARNING); | ||
1758 | } | ||
1759 | elseif (version_compare(PHP_VERSION, '5.0.3', '>=') || is_object($class1)) | ||
1760 | { | ||
1761 | return is_subclass_of($class1, $class2); | ||
1762 | } | ||
1763 | elseif (is_string($class1) && is_string($class2)) | ||
1764 | { | ||
1765 | if (class_exists($class1)) | ||
1766 | { | ||
1767 | if (class_exists($class2)) | ||
1768 | { | ||
1769 | $class2 = strtolower($class2); | ||
1770 | while ($class1 = strtolower(get_parent_class($class1))) | ||
1771 | { | ||
1772 | if ($class1 === $class2) | ||
1773 | { | ||
1774 | return true; | ||
1775 | } | ||
1776 | } | ||
1777 | } | ||
1778 | } | ||
1779 | else | ||
1780 | { | ||
1781 | trigger_error('Unknown class passed as parameter', E_USER_WARNNG); | ||
1782 | } | ||
1783 | } | ||
1784 | return false; | ||
1785 | } | ||
1786 | |||
1787 | /** | 1720 | /** |
1788 | * Strip HTML comments | 1721 | * Strip HTML comments |
1789 | * | 1722 | * |
@@ -1817,7 +1750,7 @@ class SimplePie_Misc | |||
1817 | /** | 1750 | /** |
1818 | * Decode HTML entities | 1751 | * Decode HTML entities |
1819 | * | 1752 | * |
1820 | * @static | 1753 | * @deprecated Use DOMDocument instead |
1821 | * @param string $data Input data | 1754 | * @param string $data Input data |
1822 | * @return string Output data | 1755 | * @return string Output data |
1823 | */ | 1756 | */ |
@@ -1899,18 +1832,6 @@ class SimplePie_Misc | |||
1899 | } | 1832 | } |
1900 | } | 1833 | } |
1901 | 1834 | ||
1902 | public static function htmlspecialchars_decode($string, $quote_style) | ||
1903 | { | ||
1904 | if (function_exists('htmlspecialchars_decode')) | ||
1905 | { | ||
1906 | return htmlspecialchars_decode($string, $quote_style); | ||
1907 | } | ||
1908 | else | ||
1909 | { | ||
1910 | return strtr($string, array_flip(get_html_translation_table(HTML_SPECIALCHARS, $quote_style))); | ||
1911 | } | ||
1912 | } | ||
1913 | |||
1914 | public static function atom_03_construct_type($attribs) | 1835 | public static function atom_03_construct_type($attribs) |
1915 | { | 1836 | { |
1916 | if (isset($attribs['']['mode']) && strtolower(trim($attribs['']['mode']) === 'base64')) | 1837 | if (isset($attribs['']['mode']) && strtolower(trim($attribs['']['mode']) === 'base64')) |
@@ -2024,48 +1945,6 @@ class SimplePie_Misc | |||
2024 | return $tokens; | 1945 | return $tokens; |
2025 | } | 1946 | } |
2026 | 1947 | ||
2027 | public static function array_unique($array) | ||
2028 | { | ||
2029 | if (version_compare(PHP_VERSION, '5.2', '>=')) | ||
2030 | { | ||
2031 | return array_unique($array); | ||
2032 | } | ||
2033 | else | ||
2034 | { | ||
2035 | $array = (array) $array; | ||
2036 | $new_array = array(); | ||
2037 | $new_array_strings = array(); | ||
2038 | foreach ($array as $key => $value) | ||
2039 | { | ||
2040 | if (is_object($value)) | ||
2041 | { | ||
2042 | if (method_exists($value, '__toString')) | ||
2043 | { | ||
2044 | $cmp = $value->__toString(); | ||
2045 | } | ||
2046 | else | ||
2047 | { | ||
2048 | trigger_error('Object of class ' . get_class($value) . ' could not be converted to string', E_USER_ERROR); | ||
2049 | } | ||
2050 | } | ||
2051 | elseif (is_array($value)) | ||
2052 | { | ||
2053 | $cmp = (string) reset($value); | ||
2054 | } | ||
2055 | else | ||
2056 | { | ||
2057 | $cmp = (string) $value; | ||
2058 | } | ||
2059 | if (!in_array($cmp, $new_array_strings)) | ||
2060 | { | ||
2061 | $new_array[$key] = $value; | ||
2062 | $new_array_strings[] = $cmp; | ||
2063 | } | ||
2064 | } | ||
2065 | return $new_array; | ||
2066 | } | ||
2067 | } | ||
2068 | |||
2069 | /** | 1948 | /** |
2070 | * Converts a unicode codepoint to a UTF-8 character | 1949 | * Converts a unicode codepoint to a UTF-8 character |
2071 | * | 1950 | * |
@@ -2139,9 +2018,10 @@ class SimplePie_Misc | |||
2139 | * | 2018 | * |
2140 | * @todo Add support for EBCDIC | 2019 | * @todo Add support for EBCDIC |
2141 | * @param string $data XML data | 2020 | * @param string $data XML data |
2021 | * @param SimplePie_Registry $registry Class registry | ||
2142 | * @return array Possible encodings | 2022 | * @return array Possible encodings |
2143 | */ | 2023 | */ |
2144 | public static function xml_encoding($data) | 2024 | public static function xml_encoding($data, $registry) |
2145 | { | 2025 | { |
2146 | // UTF-32 Big Endian BOM | 2026 | // UTF-32 Big Endian BOM |
2147 | if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") | 2027 | if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") |
@@ -2173,7 +2053,7 @@ class SimplePie_Misc | |||
2173 | { | 2053 | { |
2174 | if ($pos = strpos($data, "\x00\x00\x00\x3F\x00\x00\x00\x3E")) | 2054 | if ($pos = strpos($data, "\x00\x00\x00\x3F\x00\x00\x00\x3E")) |
2175 | { | 2055 | { |
2176 | $parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8')); | 2056 | $parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8'))); |
2177 | if ($parser->parse()) | 2057 | if ($parser->parse()) |
2178 | { | 2058 | { |
2179 | $encoding[] = $parser->encoding; | 2059 | $encoding[] = $parser->encoding; |
@@ -2186,7 +2066,7 @@ class SimplePie_Misc | |||
2186 | { | 2066 | { |
2187 | if ($pos = strpos($data, "\x3F\x00\x00\x00\x3E\x00\x00\x00")) | 2067 | if ($pos = strpos($data, "\x3F\x00\x00\x00\x3E\x00\x00\x00")) |
2188 | { | 2068 | { |
2189 | $parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8')); | 2069 | $parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8'))); |
2190 | if ($parser->parse()) | 2070 | if ($parser->parse()) |
2191 | { | 2071 | { |
2192 | $encoding[] = $parser->encoding; | 2072 | $encoding[] = $parser->encoding; |
@@ -2199,7 +2079,7 @@ class SimplePie_Misc | |||
2199 | { | 2079 | { |
2200 | if ($pos = strpos($data, "\x00\x3F\x00\x3E")) | 2080 | if ($pos = strpos($data, "\x00\x3F\x00\x3E")) |
2201 | { | 2081 | { |
2202 | $parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8')); | 2082 | $parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8'))); |
2203 | if ($parser->parse()) | 2083 | if ($parser->parse()) |
2204 | { | 2084 | { |
2205 | $encoding[] = $parser->encoding; | 2085 | $encoding[] = $parser->encoding; |
@@ -2212,7 +2092,7 @@ class SimplePie_Misc | |||
2212 | { | 2092 | { |
2213 | if ($pos = strpos($data, "\x3F\x00\x3E\x00")) | 2093 | if ($pos = strpos($data, "\x3F\x00\x3E\x00")) |
2214 | { | 2094 | { |
2215 | $parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8')); | 2095 | $parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8'))); |
2216 | if ($parser->parse()) | 2096 | if ($parser->parse()) |
2217 | { | 2097 | { |
2218 | $encoding[] = $parser->encoding; | 2098 | $encoding[] = $parser->encoding; |
@@ -2225,7 +2105,7 @@ class SimplePie_Misc | |||
2225 | { | 2105 | { |
2226 | if ($pos = strpos($data, "\x3F\x3E")) | 2106 | if ($pos = strpos($data, "\x3F\x3E")) |
2227 | { | 2107 | { |
2228 | $parser = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5)); | 2108 | $parser = $registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); |
2229 | if ($parser->parse()) | 2109 | if ($parser->parse()) |
2230 | { | 2110 | { |
2231 | $encoding[] = $parser->encoding; | 2111 | $encoding[] = $parser->encoding; |
@@ -2251,10 +2131,6 @@ class SimplePie_Misc | |||
2251 | header('Cache-Control: must-revalidate'); | 2131 | header('Cache-Control: must-revalidate'); |
2252 | header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 604800) . ' GMT'); // 7 days | 2132 | header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 604800) . ' GMT'); // 7 days |
2253 | ?> | 2133 | ?> |
2254 | function embed_odeo(link) { | ||
2255 | document.writeln('<embed src="http://odeo.com/flash/audio_player_fullsize.swf" pluginspage="http://www.macromedia.com/go/getflashplayer" type="application/x-shockwave-flash" quality="high" width="440" height="80" wmode="transparent" allowScriptAccess="any" flashvars="valid_sample_rate=true&external_url='+link+'"></embed>'); | ||
2256 | } | ||
2257 | |||
2258 | function embed_quicktime(type, bgcolor, width, height, link, placeholder, loop) { | 2134 | function embed_quicktime(type, bgcolor, width, height, link, placeholder, loop) { |
2259 | if (placeholder != '') { | 2135 | if (placeholder != '') { |
2260 | document.writeln('<embed type="'+type+'" style="cursor:hand; cursor:pointer;" href="'+link+'" src="'+placeholder+'" width="'+width+'" height="'+height+'" autoplay="false" target="myself" controller="false" loop="'+loop+'" scale="aspect" bgcolor="'+bgcolor+'" pluginspage="http://www.apple.com/quicktime/download/"></embed>'); | 2136 | document.writeln('<embed type="'+type+'" style="cursor:hand; cursor:pointer;" href="'+link+'" src="'+placeholder+'" width="'+width+'" height="'+height+'" autoplay="false" target="myself" controller="false" loop="'+loop+'" scale="aspect" bgcolor="'+bgcolor+'" pluginspage="http://www.apple.com/quicktime/download/"></embed>'); |
@@ -2362,5 +2238,10 @@ function embed_wmedia(width, height, link) { | |||
2362 | } | 2238 | } |
2363 | return $info; | 2239 | return $info; |
2364 | } | 2240 | } |
2241 | |||
2242 | public static function silence_errors($num, $str) | ||
2243 | { | ||
2244 | // No-op | ||
2245 | } | ||
2365 | } | 2246 | } |
2366 | 2247 | ||
diff --git a/inc/3rdparty/libraries/simplepie/library/SimplePie/Net/IPv6.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Net/IPv6.php new file mode 100644 index 00000000..da80d8ac --- /dev/null +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Net/IPv6.php | |||
@@ -0,0 +1,276 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3.1 | ||
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | */ | ||
44 | |||
45 | |||
46 | /** | ||
47 | * Class to validate and to work with IPv6 addresses. | ||
48 | * | ||
49 | * @package SimplePie | ||
50 | * @subpackage HTTP | ||
51 | * @copyright 2003-2005 The PHP Group | ||
52 | * @license http://www.opensource.org/licenses/bsd-license.php | ||
53 | * @link http://pear.php.net/package/Net_IPv6 | ||
54 | * @author Alexander Merz <alexander.merz@web.de> | ||
55 | * @author elfrink at introweb dot nl | ||
56 | * @author Josh Peck <jmp at joshpeck dot org> | ||
57 | * @author Geoffrey Sneddon <geoffers@gmail.com> | ||
58 | */ | ||
59 | class SimplePie_Net_IPv6 | ||
60 | { | ||
61 | /** | ||
62 | * Uncompresses an IPv6 address | ||
63 | * | ||
64 | * RFC 4291 allows you to compress concecutive zero pieces in an address to | ||
65 | * '::'. This method expects a valid IPv6 address and expands the '::' to | ||
66 | * the required number of zero pieces. | ||
67 | * | ||
68 | * Example: FF01::101 -> FF01:0:0:0:0:0:0:101 | ||
69 | * ::1 -> 0:0:0:0:0:0:0:1 | ||
70 | * | ||
71 | * @author Alexander Merz <alexander.merz@web.de> | ||
72 | * @author elfrink at introweb dot nl | ||
73 | * @author Josh Peck <jmp at joshpeck dot org> | ||
74 | * @copyright 2003-2005 The PHP Group | ||
75 | * @license http://www.opensource.org/licenses/bsd-license.php | ||
76 | * @param string $ip An IPv6 address | ||
77 | * @return string The uncompressed IPv6 address | ||
78 | */ | ||
79 | public static function uncompress($ip) | ||
80 | { | ||
81 | $c1 = -1; | ||
82 | $c2 = -1; | ||
83 | if (substr_count($ip, '::') === 1) | ||
84 | { | ||
85 | list($ip1, $ip2) = explode('::', $ip); | ||
86 | if ($ip1 === '') | ||
87 | { | ||
88 | $c1 = -1; | ||
89 | } | ||
90 | else | ||
91 | { | ||
92 | $c1 = substr_count($ip1, ':'); | ||
93 | } | ||
94 | if ($ip2 === '') | ||
95 | { | ||
96 | $c2 = -1; | ||
97 | } | ||
98 | else | ||
99 | { | ||
100 | $c2 = substr_count($ip2, ':'); | ||
101 | } | ||
102 | if (strpos($ip2, '.') !== false) | ||
103 | { | ||
104 | $c2++; | ||
105 | } | ||
106 | // :: | ||
107 | if ($c1 === -1 && $c2 === -1) | ||
108 | { | ||
109 | $ip = '0:0:0:0:0:0:0:0'; | ||
110 | } | ||
111 | // ::xxx | ||
112 | else if ($c1 === -1) | ||
113 | { | ||
114 | $fill = str_repeat('0:', 7 - $c2); | ||
115 | $ip = str_replace('::', $fill, $ip); | ||
116 | } | ||
117 | // xxx:: | ||
118 | else if ($c2 === -1) | ||
119 | { | ||
120 | $fill = str_repeat(':0', 7 - $c1); | ||
121 | $ip = str_replace('::', $fill, $ip); | ||
122 | } | ||
123 | // xxx::xxx | ||
124 | else | ||
125 | { | ||
126 | $fill = ':' . str_repeat('0:', 6 - $c2 - $c1); | ||
127 | $ip = str_replace('::', $fill, $ip); | ||
128 | } | ||
129 | } | ||
130 | return $ip; | ||
131 | } | ||
132 | |||
133 | /** | ||
134 | * Compresses an IPv6 address | ||
135 | * | ||
136 | * RFC 4291 allows you to compress concecutive zero pieces in an address to | ||
137 | * '::'. This method expects a valid IPv6 address and compresses consecutive | ||
138 | * zero pieces to '::'. | ||
139 | * | ||
140 | * Example: FF01:0:0:0:0:0:0:101 -> FF01::101 | ||
141 | * 0:0:0:0:0:0:0:1 -> ::1 | ||
142 | * | ||
143 | * @see uncompress() | ||
144 | * @param string $ip An IPv6 address | ||
145 | * @return string The compressed IPv6 address | ||
146 | */ | ||
147 | public static function compress($ip) | ||
148 | { | ||
149 | // Prepare the IP to be compressed | ||
150 | $ip = self::uncompress($ip); | ||
151 | $ip_parts = self::split_v6_v4($ip); | ||
152 | |||
153 | // Replace all leading zeros | ||
154 | $ip_parts[0] = preg_replace('/(^|:)0+([0-9])/', '\1\2', $ip_parts[0]); | ||
155 | |||
156 | // Find bunches of zeros | ||
157 | if (preg_match_all('/(?:^|:)(?:0(?::|$))+/', $ip_parts[0], $matches, PREG_OFFSET_CAPTURE)) | ||
158 | { | ||
159 | $max = 0; | ||
160 | $pos = null; | ||
161 | foreach ($matches[0] as $match) | ||
162 | { | ||
163 | if (strlen($match[0]) > $max) | ||
164 | { | ||
165 | $max = strlen($match[0]); | ||
166 | $pos = $match[1]; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | $ip_parts[0] = substr_replace($ip_parts[0], '::', $pos, $max); | ||
171 | } | ||
172 | |||
173 | if ($ip_parts[1] !== '') | ||
174 | { | ||
175 | return implode(':', $ip_parts); | ||
176 | } | ||
177 | else | ||
178 | { | ||
179 | return $ip_parts[0]; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * Splits an IPv6 address into the IPv6 and IPv4 representation parts | ||
185 | * | ||
186 | * RFC 4291 allows you to represent the last two parts of an IPv6 address | ||
187 | * using the standard IPv4 representation | ||
188 | * | ||
189 | * Example: 0:0:0:0:0:0:13.1.68.3 | ||
190 | * 0:0:0:0:0:FFFF:129.144.52.38 | ||
191 | * | ||
192 | * @param string $ip An IPv6 address | ||
193 | * @return array [0] contains the IPv6 represented part, and [1] the IPv4 represented part | ||
194 | */ | ||
195 | private static function split_v6_v4($ip) | ||
196 | { | ||
197 | if (strpos($ip, '.') !== false) | ||
198 | { | ||
199 | $pos = strrpos($ip, ':'); | ||
200 | $ipv6_part = substr($ip, 0, $pos); | ||
201 | $ipv4_part = substr($ip, $pos + 1); | ||
202 | return array($ipv6_part, $ipv4_part); | ||
203 | } | ||
204 | else | ||
205 | { | ||
206 | return array($ip, ''); | ||
207 | } | ||
208 | } | ||
209 | |||
210 | /** | ||
211 | * Checks an IPv6 address | ||
212 | * | ||
213 | * Checks if the given IP is a valid IPv6 address | ||
214 | * | ||
215 | * @param string $ip An IPv6 address | ||
216 | * @return bool true if $ip is a valid IPv6 address | ||
217 | */ | ||
218 | public static function check_ipv6($ip) | ||
219 | { | ||
220 | $ip = self::uncompress($ip); | ||
221 | list($ipv6, $ipv4) = self::split_v6_v4($ip); | ||
222 | $ipv6 = explode(':', $ipv6); | ||
223 | $ipv4 = explode('.', $ipv4); | ||
224 | if (count($ipv6) === 8 && count($ipv4) === 1 || count($ipv6) === 6 && count($ipv4) === 4) | ||
225 | { | ||
226 | foreach ($ipv6 as $ipv6_part) | ||
227 | { | ||
228 | // The section can't be empty | ||
229 | if ($ipv6_part === '') | ||
230 | return false; | ||
231 | |||
232 | // Nor can it be over four characters | ||
233 | if (strlen($ipv6_part) > 4) | ||
234 | return false; | ||
235 | |||
236 | // Remove leading zeros (this is safe because of the above) | ||
237 | $ipv6_part = ltrim($ipv6_part, '0'); | ||
238 | if ($ipv6_part === '') | ||
239 | $ipv6_part = '0'; | ||
240 | |||
241 | // Check the value is valid | ||
242 | $value = hexdec($ipv6_part); | ||
243 | if (dechex($value) !== strtolower($ipv6_part) || $value < 0 || $value > 0xFFFF) | ||
244 | return false; | ||
245 | } | ||
246 | if (count($ipv4) === 4) | ||
247 | { | ||
248 | foreach ($ipv4 as $ipv4_part) | ||
249 | { | ||
250 | $value = (int) $ipv4_part; | ||
251 | if ((string) $value !== $ipv4_part || $value < 0 || $value > 0xFF) | ||
252 | return false; | ||
253 | } | ||
254 | } | ||
255 | return true; | ||
256 | } | ||
257 | else | ||
258 | { | ||
259 | return false; | ||
260 | } | ||
261 | } | ||
262 | |||
263 | /** | ||
264 | * Checks if the given IP is a valid IPv6 address | ||
265 | * | ||
266 | * @codeCoverageIgnore | ||
267 | * @deprecated Use {@see SimplePie_Net_IPv6::check_ipv6()} instead | ||
268 | * @see check_ipv6 | ||
269 | * @param string $ip An IPv6 address | ||
270 | * @return bool true if $ip is a valid IPv6 address | ||
271 | */ | ||
272 | public static function checkIPv6($ip) | ||
273 | { | ||
274 | return self::check_ipv6($ip); | ||
275 | } | ||
276 | } | ||
diff --git a/inc/3rdparty/simplepie/SimplePie/Parse/Date.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Parse/Date.php index 2694443c..d51f500d 100644 --- a/inc/3rdparty/simplepie/SimplePie/Parse/Date.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Parse/Date.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,14 +33,13 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | ||
@@ -48,6 +47,7 @@ | |||
48 | * Date Parser | 47 | * Date Parser |
49 | * | 48 | * |
50 | * @package SimplePie | 49 | * @package SimplePie |
50 | * @subpackage Parsing | ||
51 | */ | 51 | */ |
52 | class SimplePie_Parse_Date | 52 | class SimplePie_Parse_Date |
53 | { | 53 | { |
@@ -599,7 +599,7 @@ class SimplePie_Parse_Date | |||
599 | 599 | ||
600 | foreach ($this->built_in as $method) | 600 | foreach ($this->built_in as $method) |
601 | { | 601 | { |
602 | if (($returned = call_user_func(array(&$this, $method), $date)) !== false) | 602 | if (($returned = call_user_func(array($this, $method), $date)) !== false) |
603 | { | 603 | { |
604 | return $returned; | 604 | return $returned; |
605 | } | 605 | } |
diff --git a/inc/3rdparty/simplepie/SimplePie/Parser.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Parser.php index 182bf861..d698552c 100644 --- a/inc/3rdparty/simplepie/SimplePie/Parser.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Parser.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,17 +33,24 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Parses XML into something sane | ||
47 | * | ||
48 | * | ||
49 | * This class can be overloaded with {@see SimplePie::set_parser_class()} | ||
50 | * | ||
51 | * @package SimplePie | ||
52 | * @subpackage Parsing | ||
53 | */ | ||
47 | class SimplePie_Parser | 54 | class SimplePie_Parser |
48 | { | 55 | { |
49 | var $error_code; | 56 | var $error_code; |
@@ -61,6 +68,12 @@ class SimplePie_Parser | |||
61 | var $datas = array(array()); | 68 | var $datas = array(array()); |
62 | var $current_xhtml_construct = -1; | 69 | var $current_xhtml_construct = -1; |
63 | var $encoding; | 70 | var $encoding; |
71 | protected $registry; | ||
72 | |||
73 | public function set_registry(SimplePie_Registry $registry) | ||
74 | { | ||
75 | $this->registry = $registry; | ||
76 | } | ||
64 | 77 | ||
65 | public function parse(&$data, $encoding) | 78 | public function parse(&$data, $encoding) |
66 | { | 79 | { |
@@ -103,7 +116,7 @@ class SimplePie_Parser | |||
103 | 116 | ||
104 | if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false) | 117 | if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false) |
105 | { | 118 | { |
106 | $declaration = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5)); | 119 | $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5))); |
107 | if ($declaration->parse()) | 120 | if ($declaration->parse()) |
108 | { | 121 | { |
109 | $data = substr($data, $pos + 2); | 122 | $data = substr($data, $pos + 2); |
@@ -265,8 +278,12 @@ class SimplePie_Parser | |||
265 | 278 | ||
266 | if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base'])) | 279 | if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base'])) |
267 | { | 280 | { |
268 | $this->xml_base[] = SimplePie_Misc::absolutize_url($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)); | 281 | $base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base))); |
269 | $this->xml_base_explicit[] = true; | 282 | if ($base !== false) |
283 | { | ||
284 | $this->xml_base[] = $base; | ||
285 | $this->xml_base_explicit[] = true; | ||
286 | } | ||
270 | } | 287 | } |
271 | else | 288 | else |
272 | { | 289 | { |
@@ -305,7 +322,10 @@ class SimplePie_Parser | |||
305 | $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][]; | 322 | $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][]; |
306 | $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang)); | 323 | $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang)); |
307 | if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml') | 324 | if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml') |
308 | || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')) | 325 | || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml') |
326 | || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title'))) | ||
327 | || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title'))) | ||
328 | || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title')))) | ||
309 | { | 329 | { |
310 | $this->current_xhtml_construct = 0; | 330 | $this->current_xhtml_construct = 0; |
311 | } | 331 | } |
diff --git a/inc/3rdparty/libraries/simplepie/library/SimplePie/Rating.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Rating.php new file mode 100644 index 00000000..8689e5df --- /dev/null +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Rating.php | |||
@@ -0,0 +1,129 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3.1 | ||
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | */ | ||
44 | |||
45 | /** | ||
46 | * Handles `<media:rating>` or `<itunes:explicit>` tags as defined in Media RSS and iTunes RSS respectively | ||
47 | * | ||
48 | * Used by {@see SimplePie_Enclosure::get_rating()} and {@see SimplePie_Enclosure::get_ratings()} | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_rating_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage API | ||
54 | */ | ||
55 | class SimplePie_Rating | ||
56 | { | ||
57 | /** | ||
58 | * Rating scheme | ||
59 | * | ||
60 | * @var string | ||
61 | * @see get_scheme() | ||
62 | */ | ||
63 | var $scheme; | ||
64 | |||
65 | /** | ||
66 | * Rating value | ||
67 | * | ||
68 | * @var string | ||
69 | * @see get_value() | ||
70 | */ | ||
71 | var $value; | ||
72 | |||
73 | /** | ||
74 | * Constructor, used to input the data | ||
75 | * | ||
76 | * For documentation on all the parameters, see the corresponding | ||
77 | * properties and their accessors | ||
78 | */ | ||
79 | public function __construct($scheme = null, $value = null) | ||
80 | { | ||
81 | $this->scheme = $scheme; | ||
82 | $this->value = $value; | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * String-ified version | ||
87 | * | ||
88 | * @return string | ||
89 | */ | ||
90 | public function __toString() | ||
91 | { | ||
92 | // There is no $this->data here | ||
93 | return md5(serialize($this)); | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * Get the organizational scheme for the rating | ||
98 | * | ||
99 | * @return string|null | ||
100 | */ | ||
101 | public function get_scheme() | ||
102 | { | ||
103 | if ($this->scheme !== null) | ||
104 | { | ||
105 | return $this->scheme; | ||
106 | } | ||
107 | else | ||
108 | { | ||
109 | return null; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | /** | ||
114 | * Get the value of the rating | ||
115 | * | ||
116 | * @return string|null | ||
117 | */ | ||
118 | public function get_value() | ||
119 | { | ||
120 | if ($this->value !== null) | ||
121 | { | ||
122 | return $this->value; | ||
123 | } | ||
124 | else | ||
125 | { | ||
126 | return null; | ||
127 | } | ||
128 | } | ||
129 | } | ||
diff --git a/inc/3rdparty/libraries/simplepie/library/SimplePie/Registry.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Registry.php new file mode 100644 index 00000000..1072cdeb --- /dev/null +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Registry.php | |||
@@ -0,0 +1,225 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3.1 | ||
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | */ | ||
44 | |||
45 | /** | ||
46 | * Handles creating objects and calling methods | ||
47 | * | ||
48 | * Access this via {@see SimplePie::get_registry()} | ||
49 | * | ||
50 | * @package SimplePie | ||
51 | */ | ||
52 | class SimplePie_Registry | ||
53 | { | ||
54 | /** | ||
55 | * Default class mapping | ||
56 | * | ||
57 | * Overriding classes *must* subclass these. | ||
58 | * | ||
59 | * @var array | ||
60 | */ | ||
61 | protected $default = array( | ||
62 | 'Cache' => 'SimplePie_Cache', | ||
63 | 'Locator' => 'SimplePie_Locator', | ||
64 | 'Parser' => 'SimplePie_Parser', | ||
65 | 'File' => 'SimplePie_File', | ||
66 | 'Sanitize' => 'SimplePie_Sanitize', | ||
67 | 'Item' => 'SimplePie_Item', | ||
68 | 'Author' => 'SimplePie_Author', | ||
69 | 'Category' => 'SimplePie_Category', | ||
70 | 'Enclosure' => 'SimplePie_Enclosure', | ||
71 | 'Caption' => 'SimplePie_Caption', | ||
72 | 'Copyright' => 'SimplePie_Copyright', | ||
73 | 'Credit' => 'SimplePie_Credit', | ||
74 | 'Rating' => 'SimplePie_Rating', | ||
75 | 'Restriction' => 'SimplePie_Restriction', | ||
76 | 'Content_Type_Sniffer' => 'SimplePie_Content_Type_Sniffer', | ||
77 | 'Source' => 'SimplePie_Source', | ||
78 | 'Misc' => 'SimplePie_Misc', | ||
79 | 'XML_Declaration_Parser' => 'SimplePie_XML_Declaration_Parser', | ||
80 | 'Parse_Date' => 'SimplePie_Parse_Date', | ||
81 | ); | ||
82 | |||
83 | /** | ||
84 | * Class mapping | ||
85 | * | ||
86 | * @see register() | ||
87 | * @var array | ||
88 | */ | ||
89 | protected $classes = array(); | ||
90 | |||
91 | /** | ||
92 | * Legacy classes | ||
93 | * | ||
94 | * @see register() | ||
95 | * @var array | ||
96 | */ | ||
97 | protected $legacy = array(); | ||
98 | |||
99 | /** | ||
100 | * Constructor | ||
101 | * | ||
102 | * No-op | ||
103 | */ | ||
104 | public function __construct() { } | ||
105 | |||
106 | /** | ||
107 | * Register a class | ||
108 | * | ||
109 | * @param string $type See {@see $default} for names | ||
110 | * @param string $class Class name, must subclass the corresponding default | ||
111 | * @param bool $legacy Whether to enable legacy support for this class | ||
112 | * @return bool Successfulness | ||
113 | */ | ||
114 | public function register($type, $class, $legacy = false) | ||
115 | { | ||
116 | if (!is_subclass_of($class, $this->default[$type])) | ||
117 | { | ||
118 | return false; | ||
119 | } | ||
120 | |||
121 | $this->classes[$type] = $class; | ||
122 | |||
123 | if ($legacy) | ||
124 | { | ||
125 | $this->legacy[] = $class; | ||
126 | } | ||
127 | |||
128 | return true; | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * Get the class registered for a type | ||
133 | * | ||
134 | * Where possible, use {@see create()} or {@see call()} instead | ||
135 | * | ||
136 | * @param string $type | ||
137 | * @return string|null | ||
138 | */ | ||
139 | public function get_class($type) | ||
140 | { | ||
141 | if (!empty($this->classes[$type])) | ||
142 | { | ||
143 | return $this->classes[$type]; | ||
144 | } | ||
145 | if (!empty($this->default[$type])) | ||
146 | { | ||
147 | return $this->default[$type]; | ||
148 | } | ||
149 | |||
150 | return null; | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * Create a new instance of a given type | ||
155 | * | ||
156 | * @param string $type | ||
157 | * @param array $parameters Parameters to pass to the constructor | ||
158 | * @return object Instance of class | ||
159 | */ | ||
160 | public function &create($type, $parameters = array()) | ||
161 | { | ||
162 | $class = $this->get_class($type); | ||
163 | |||
164 | if (in_array($class, $this->legacy)) | ||
165 | { | ||
166 | switch ($type) | ||
167 | { | ||
168 | case 'locator': | ||
169 | // Legacy: file, timeout, useragent, file_class, max_checked_feeds, content_type_sniffer_class | ||
170 | // Specified: file, timeout, useragent, max_checked_feeds | ||
171 | $replacement = array($this->get_class('file'), $parameters[3], $this->get_class('content_type_sniffer')); | ||
172 | array_splice($parameters, 3, 1, $replacement); | ||
173 | break; | ||
174 | } | ||
175 | } | ||
176 | |||
177 | if (!method_exists($class, '__construct')) | ||
178 | { | ||
179 | $instance = new $class; | ||
180 | } | ||
181 | else | ||
182 | { | ||
183 | $reflector = new ReflectionClass($class); | ||
184 | $instance = $reflector->newInstanceArgs($parameters); | ||
185 | } | ||
186 | |||
187 | if (method_exists($instance, 'set_registry')) | ||
188 | { | ||
189 | $instance->set_registry($this); | ||
190 | } | ||
191 | return $instance; | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * Call a static method for a type | ||
196 | * | ||
197 | * @param string $type | ||
198 | * @param string $method | ||
199 | * @param array $parameters | ||
200 | * @return mixed | ||
201 | */ | ||
202 | public function &call($type, $method, $parameters = array()) | ||
203 | { | ||
204 | $class = $this->get_class($type); | ||
205 | |||
206 | if (in_array($class, $this->legacy)) | ||
207 | { | ||
208 | switch ($type) | ||
209 | { | ||
210 | case 'Cache': | ||
211 | // For backwards compatibility with old non-static | ||
212 | // Cache::create() methods | ||
213 | if ($method === 'get_handler') | ||
214 | { | ||
215 | $result = @call_user_func_array(array($class, 'create'), $parameters); | ||
216 | return $result; | ||
217 | } | ||
218 | break; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | $result = call_user_func_array(array($class, $method), $parameters); | ||
223 | return $result; | ||
224 | } | ||
225 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/simplepie/SimplePie/Restriction.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Restriction.php index b0e7667a..4ba371bf 100644 --- a/inc/3rdparty/simplepie/SimplePie/Restriction.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Restriction.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,24 +33,57 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Handles `<media:restriction>` as defined in Media RSS | ||
47 | * | ||
48 | * Used by {@see SimplePie_Enclosure::get_restriction()} and {@see SimplePie_Enclosure::get_restrictions()} | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_restriction_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage API | ||
54 | */ | ||
47 | class SimplePie_Restriction | 55 | class SimplePie_Restriction |
48 | { | 56 | { |
57 | /** | ||
58 | * Relationship ('allow'/'deny') | ||
59 | * | ||
60 | * @var string | ||
61 | * @see get_relationship() | ||
62 | */ | ||
49 | var $relationship; | 63 | var $relationship; |
64 | |||
65 | /** | ||
66 | * Type of restriction | ||
67 | * | ||
68 | * @var string | ||
69 | * @see get_type() | ||
70 | */ | ||
50 | var $type; | 71 | var $type; |
72 | |||
73 | /** | ||
74 | * Restricted values | ||
75 | * | ||
76 | * @var string | ||
77 | * @see get_value() | ||
78 | */ | ||
51 | var $value; | 79 | var $value; |
52 | 80 | ||
53 | // Constructor, used to input the data | 81 | /** |
82 | * Constructor, used to input the data | ||
83 | * | ||
84 | * For documentation on all the parameters, see the corresponding | ||
85 | * properties and their accessors | ||
86 | */ | ||
54 | public function __construct($relationship = null, $type = null, $value = null) | 87 | public function __construct($relationship = null, $type = null, $value = null) |
55 | { | 88 | { |
56 | $this->relationship = $relationship; | 89 | $this->relationship = $relationship; |
@@ -58,12 +91,22 @@ class SimplePie_Restriction | |||
58 | $this->value = $value; | 91 | $this->value = $value; |
59 | } | 92 | } |
60 | 93 | ||
94 | /** | ||
95 | * String-ified version | ||
96 | * | ||
97 | * @return string | ||
98 | */ | ||
61 | public function __toString() | 99 | public function __toString() |
62 | { | 100 | { |
63 | // There is no $this->data here | 101 | // There is no $this->data here |
64 | return md5(serialize($this)); | 102 | return md5(serialize($this)); |
65 | } | 103 | } |
66 | 104 | ||
105 | /** | ||
106 | * Get the relationship | ||
107 | * | ||
108 | * @return string|null Either 'allow' or 'deny' | ||
109 | */ | ||
67 | public function get_relationship() | 110 | public function get_relationship() |
68 | { | 111 | { |
69 | if ($this->relationship !== null) | 112 | if ($this->relationship !== null) |
@@ -76,6 +119,11 @@ class SimplePie_Restriction | |||
76 | } | 119 | } |
77 | } | 120 | } |
78 | 121 | ||
122 | /** | ||
123 | * Get the type | ||
124 | * | ||
125 | * @return string|null | ||
126 | */ | ||
79 | public function get_type() | 127 | public function get_type() |
80 | { | 128 | { |
81 | if ($this->type !== null) | 129 | if ($this->type !== null) |
@@ -88,6 +136,11 @@ class SimplePie_Restriction | |||
88 | } | 136 | } |
89 | } | 137 | } |
90 | 138 | ||
139 | /** | ||
140 | * Get the list of restricted things | ||
141 | * | ||
142 | * @return string|null | ||
143 | */ | ||
91 | public function get_value() | 144 | public function get_value() |
92 | { | 145 | { |
93 | if ($this->value !== null) | 146 | if ($this->value !== null) |
diff --git a/inc/3rdparty/simplepie/SimplePie/Sanitize.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Sanitize.php index 73705c0d..6810cc49 100644 --- a/inc/3rdparty/simplepie/SimplePie/Sanitize.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Sanitize.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,18 +33,22 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | |||
47 | /** | 45 | /** |
46 | * Used for data cleanup and post-processing | ||
47 | * | ||
48 | * | ||
49 | * This class can be overloaded with {@see SimplePie::set_sanitize_class()} | ||
50 | * | ||
51 | * @package SimplePie | ||
48 | * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags | 52 | * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags |
49 | */ | 53 | */ |
50 | class SimplePie_Sanitize | 54 | class SimplePie_Sanitize |
@@ -63,23 +67,16 @@ class SimplePie_Sanitize | |||
63 | var $enable_cache = true; | 67 | var $enable_cache = true; |
64 | var $cache_location = './cache'; | 68 | var $cache_location = './cache'; |
65 | var $cache_name_function = 'md5'; | 69 | var $cache_name_function = 'md5'; |
66 | var $cache_class = 'SimplePie_Cache'; | ||
67 | var $file_class = 'SimplePie_File'; | ||
68 | var $timeout = 10; | 70 | var $timeout = 10; |
69 | var $useragent = ''; | 71 | var $useragent = ''; |
70 | var $force_fsockopen = false; | 72 | var $force_fsockopen = false; |
73 | var $replace_url_attributes = null; | ||
71 | 74 | ||
72 | var $replace_url_attributes = array( | 75 | public function __construct() |
73 | 'a' => 'href', | 76 | { |
74 | 'area' => 'href', | 77 | // Set defaults |
75 | 'blockquote' => 'cite', | 78 | $this->set_url_replacements(null); |
76 | 'del' => 'cite', | 79 | } |
77 | 'form' => 'action', | ||
78 | 'img' => array('longdesc', 'src'), | ||
79 | 'input' => 'src', | ||
80 | 'ins' => 'cite', | ||
81 | 'q' => 'cite' | ||
82 | ); | ||
83 | 80 | ||
84 | public function remove_div($enable = true) | 81 | public function remove_div($enable = true) |
85 | { | 82 | { |
@@ -98,6 +95,11 @@ class SimplePie_Sanitize | |||
98 | } | 95 | } |
99 | } | 96 | } |
100 | 97 | ||
98 | public function set_registry(SimplePie_Registry $registry) | ||
99 | { | ||
100 | $this->registry = $registry; | ||
101 | } | ||
102 | |||
101 | public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') | 103 | public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') |
102 | { | 104 | { |
103 | if (isset($enable_cache)) | 105 | if (isset($enable_cache)) |
@@ -114,20 +116,10 @@ class SimplePie_Sanitize | |||
114 | { | 116 | { |
115 | $this->cache_name_function = (string) $cache_name_function; | 117 | $this->cache_name_function = (string) $cache_name_function; |
116 | } | 118 | } |
117 | |||
118 | if ($cache_class) | ||
119 | { | ||
120 | $this->cache_class = (string) $cache_class; | ||
121 | } | ||
122 | } | 119 | } |
123 | 120 | ||
124 | public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) | 121 | public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) |
125 | { | 122 | { |
126 | if ($file_class) | ||
127 | { | ||
128 | $this->file_class = (string) $file_class; | ||
129 | } | ||
130 | |||
131 | if ($timeout) | 123 | if ($timeout) |
132 | { | 124 | { |
133 | $this->timeout = (string) $timeout; | 125 | $this->timeout = (string) $timeout; |
@@ -201,12 +193,32 @@ class SimplePie_Sanitize | |||
201 | * Set element/attribute key/value pairs of HTML attributes | 193 | * Set element/attribute key/value pairs of HTML attributes |
202 | * containing URLs that need to be resolved relative to the feed | 194 | * containing URLs that need to be resolved relative to the feed |
203 | * | 195 | * |
204 | * @access public | 196 | * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite, |
197 | * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite, | ||
198 | * |q|@cite | ||
199 | * | ||
205 | * @since 1.0 | 200 | * @since 1.0 |
206 | * @param array $element_attribute Element/attribute key/value pairs | 201 | * @param array|null $element_attribute Element/attribute key/value pairs, null for default |
207 | */ | 202 | */ |
208 | public function set_url_replacements($element_attribute = array('a' => 'href', 'area' => 'href', 'blockquote' => 'cite', 'del' => 'cite', 'form' => 'action', 'img' => array('longdesc', 'src'), 'input' => 'src', 'ins' => 'cite', 'q' => 'cite')) | 203 | public function set_url_replacements($element_attribute = null) |
209 | { | 204 | { |
205 | if ($element_attribute === null) | ||
206 | { | ||
207 | $element_attribute = array( | ||
208 | 'a' => 'href', | ||
209 | 'area' => 'href', | ||
210 | 'blockquote' => 'cite', | ||
211 | 'del' => 'cite', | ||
212 | 'form' => 'action', | ||
213 | 'img' => array( | ||
214 | 'longdesc', | ||
215 | 'src' | ||
216 | ), | ||
217 | 'input' => 'src', | ||
218 | 'ins' => 'cite', | ||
219 | 'q' => 'cite' | ||
220 | ); | ||
221 | } | ||
210 | $this->replace_url_attributes = (array) $element_attribute; | 222 | $this->replace_url_attributes = (array) $element_attribute; |
211 | } | 223 | } |
212 | 224 | ||
@@ -232,25 +244,27 @@ class SimplePie_Sanitize | |||
232 | $data = base64_decode($data); | 244 | $data = base64_decode($data); |
233 | } | 245 | } |
234 | 246 | ||
235 | if ($type & SIMPLEPIE_CONSTRUCT_XHTML) | ||
236 | { | ||
237 | if ($this->remove_div) | ||
238 | { | ||
239 | $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); | ||
240 | $data = preg_replace('/<\/div>$/', '', $data); | ||
241 | } | ||
242 | else | ||
243 | { | ||
244 | $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) | 247 | if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) |
249 | { | 248 | { |
249 | |||
250 | $document = new DOMDocument(); | ||
251 | $document->encoding = 'UTF-8'; | ||
252 | $data = $this->preprocess($data, $type); | ||
253 | |||
254 | set_error_handler(array('SimplePie_Misc', 'silence_errors')); | ||
255 | $document->loadHTML($data); | ||
256 | restore_error_handler(); | ||
257 | |||
250 | // Strip comments | 258 | // Strip comments |
251 | if ($this->strip_comments) | 259 | if ($this->strip_comments) |
252 | { | 260 | { |
253 | $data = SimplePie_Misc::strip_comments($data); | 261 | $xpath = new DOMXPath($document); |
262 | $comments = $xpath->query('//comment()'); | ||
263 | |||
264 | foreach ($comments as $comment) | ||
265 | { | ||
266 | $comment->parentNode->removeChild($comment); | ||
267 | } | ||
254 | } | 268 | } |
255 | 269 | ||
256 | // Strip out HTML tags and attributes that might cause various security problems. | 270 | // Strip out HTML tags and attributes that might cause various security problems. |
@@ -260,11 +274,7 @@ class SimplePie_Sanitize | |||
260 | { | 274 | { |
261 | foreach ($this->strip_htmltags as $tag) | 275 | foreach ($this->strip_htmltags as $tag) |
262 | { | 276 | { |
263 | $pcre = "/<($tag)" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\/$tag" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\/)?>)/siU'; | 277 | $this->strip_tag($tag, $document, $type); |
264 | while (preg_match($pcre, $data)) | ||
265 | { | ||
266 | $data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data); | ||
267 | } | ||
268 | } | 278 | } |
269 | } | 279 | } |
270 | 280 | ||
@@ -272,7 +282,7 @@ class SimplePie_Sanitize | |||
272 | { | 282 | { |
273 | foreach ($this->strip_attributes as $attrib) | 283 | foreach ($this->strip_attributes as $attrib) |
274 | { | 284 | { |
275 | $data = preg_replace('/(<[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*)' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . trim($attrib) . '(?:\s*=\s*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\x09\x0A\x0B\x0C\x0D\x20\x22\x27\x3E][^\x09\x0A\x0B\x0C\x0D\x20\x3E]*)?))?' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>/', '\1\2\3>', $data); | 285 | $this->strip_attr($attrib, $document); |
276 | } | 286 | } |
277 | } | 287 | } |
278 | 288 | ||
@@ -280,36 +290,34 @@ class SimplePie_Sanitize | |||
280 | $this->base = $base; | 290 | $this->base = $base; |
281 | foreach ($this->replace_url_attributes as $element => $attributes) | 291 | foreach ($this->replace_url_attributes as $element => $attributes) |
282 | { | 292 | { |
283 | $data = $this->replace_urls($data, $element, $attributes); | 293 | $this->replace_urls($document, $element, $attributes); |
284 | } | 294 | } |
285 | 295 | ||
286 | // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. | 296 | // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. |
287 | if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) | 297 | if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) |
288 | { | 298 | { |
289 | $images = SimplePie_Misc::get_element('img', $data); | 299 | $images = $document->getElementsByTagName('img'); |
290 | foreach ($images as $img) | 300 | foreach ($images as $img) |
291 | { | 301 | { |
292 | if (isset($img['attribs']['src']['data'])) | 302 | if ($img->hasAttribute('src')) |
293 | { | 303 | { |
294 | $image_url = call_user_func($this->cache_name_function, $img['attribs']['src']['data']); | 304 | $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); |
295 | $cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, $image_url, 'spi'); | 305 | $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); |
296 | 306 | ||
297 | if ($cache->load()) | 307 | if ($cache->load()) |
298 | { | 308 | { |
299 | $img['attribs']['src']['data'] = $this->image_handler . $image_url; | 309 | $img->setAttribute('src', $this->image_handler . $image_url); |
300 | $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data); | ||
301 | } | 310 | } |
302 | else | 311 | else |
303 | { | 312 | { |
304 | $file = new $this->file_class($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen); | 313 | $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); |
305 | $headers = $file->headers; | 314 | $headers = $file->headers; |
306 | 315 | ||
307 | if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) | 316 | if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) |
308 | { | 317 | { |
309 | if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) | 318 | if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) |
310 | { | 319 | { |
311 | $img['attribs']['src']['data'] = $this->image_handler . $image_url; | 320 | $img->setAttribute('src', $this->image_handler . $image_url); |
312 | $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data); | ||
313 | } | 321 | } |
314 | else | 322 | else |
315 | { | 323 | { |
@@ -321,13 +329,38 @@ class SimplePie_Sanitize | |||
321 | } | 329 | } |
322 | } | 330 | } |
323 | 331 | ||
324 | // Having (possibly) taken stuff out, there may now be whitespace at the beginning/end of the data | 332 | // Remove the DOCTYPE |
325 | $data = trim($data); | 333 | // Seems to cause segfaulting if we don't do this |
334 | if ($document->firstChild instanceof DOMDocumentType) | ||
335 | { | ||
336 | $document->removeChild($document->firstChild); | ||
337 | } | ||
338 | |||
339 | // Move everything from the body to the root | ||
340 | $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0); | ||
341 | $document->replaceChild($real_body, $document->firstChild); | ||
342 | |||
343 | // Finally, convert to a HTML string | ||
344 | $data = trim($document->saveHTML()); | ||
345 | |||
346 | if ($this->remove_div) | ||
347 | { | ||
348 | $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); | ||
349 | $data = preg_replace('/<\/div>$/', '', $data); | ||
350 | } | ||
351 | else | ||
352 | { | ||
353 | $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); | ||
354 | } | ||
326 | } | 355 | } |
327 | 356 | ||
328 | if ($type & SIMPLEPIE_CONSTRUCT_IRI) | 357 | if ($type & SIMPLEPIE_CONSTRUCT_IRI) |
329 | { | 358 | { |
330 | $data = SimplePie_Misc::absolutize_url($data, $base); | 359 | $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); |
360 | if ($absolute !== false) | ||
361 | { | ||
362 | $data = $absolute; | ||
363 | } | ||
331 | } | 364 | } |
332 | 365 | ||
333 | if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) | 366 | if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) |
@@ -337,40 +370,60 @@ class SimplePie_Sanitize | |||
337 | 370 | ||
338 | if ($this->output_encoding !== 'UTF-8') | 371 | if ($this->output_encoding !== 'UTF-8') |
339 | { | 372 | { |
340 | $data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding); | 373 | $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); |
341 | } | 374 | } |
342 | } | 375 | } |
343 | return $data; | 376 | return $data; |
344 | } | 377 | } |
345 | 378 | ||
346 | public function replace_urls($data, $tag, $attributes) | 379 | protected function preprocess($html, $type) |
347 | { | 380 | { |
381 | $ret = ''; | ||
382 | if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) | ||
383 | { | ||
384 | // Atom XHTML constructs are wrapped with a div by default | ||
385 | // Note: No protection if $html contains a stray </div>! | ||
386 | $html = '<div>' . $html . '</div>'; | ||
387 | $ret .= '<!DOCTYPE html>'; | ||
388 | $content_type = 'text/html'; | ||
389 | } | ||
390 | else | ||
391 | { | ||
392 | $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; | ||
393 | $content_type = 'application/xhtml+xml'; | ||
394 | } | ||
395 | |||
396 | $ret .= '<html><head>'; | ||
397 | $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; | ||
398 | $ret .= '</head><body>' . $html . '</body></html>'; | ||
399 | return $ret; | ||
400 | } | ||
401 | |||
402 | public function replace_urls($document, $tag, $attributes) | ||
403 | { | ||
404 | if (!is_array($attributes)) | ||
405 | { | ||
406 | $attributes = array($attributes); | ||
407 | } | ||
408 | |||
348 | if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) | 409 | if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) |
349 | { | 410 | { |
350 | $elements = SimplePie_Misc::get_element($tag, $data); | 411 | $elements = $document->getElementsByTagName($tag); |
351 | foreach ($elements as $element) | 412 | foreach ($elements as $element) |
352 | { | 413 | { |
353 | if (is_array($attributes)) | 414 | foreach ($attributes as $attribute) |
354 | { | 415 | { |
355 | foreach ($attributes as $attribute) | 416 | if ($element->hasAttribute($attribute)) |
356 | { | 417 | { |
357 | if (isset($element['attribs'][$attribute]['data'])) | 418 | $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); |
419 | if ($value !== false) | ||
358 | { | 420 | { |
359 | $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base); | 421 | $element->setAttribute($attribute, $value); |
360 | $new_element = SimplePie_Misc::element_implode($element); | ||
361 | $data = str_replace($element['full'], $new_element, $data); | ||
362 | $element['full'] = $new_element; | ||
363 | } | 422 | } |
364 | } | 423 | } |
365 | } | 424 | } |
366 | elseif (isset($element['attribs'][$attributes]['data'])) | ||
367 | { | ||
368 | $element['attribs'][$attributes]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attributes]['data'], $this->base); | ||
369 | $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data); | ||
370 | } | ||
371 | } | 425 | } |
372 | } | 426 | } |
373 | return $data; | ||
374 | } | 427 | } |
375 | 428 | ||
376 | public function do_strip_htmltags($match) | 429 | public function do_strip_htmltags($match) |
@@ -397,4 +450,100 @@ class SimplePie_Sanitize | |||
397 | return ''; | 450 | return ''; |
398 | } | 451 | } |
399 | } | 452 | } |
453 | |||
454 | protected function strip_tag($tag, $document, $type) | ||
455 | { | ||
456 | $xpath = new DOMXPath($document); | ||
457 | $elements = $xpath->query('body//' . $tag); | ||
458 | if ($this->encode_instead_of_strip) | ||
459 | { | ||
460 | foreach ($elements as $element) | ||
461 | { | ||
462 | $fragment = $document->createDocumentFragment(); | ||
463 | |||
464 | // For elements which aren't script or style, include the tag itself | ||
465 | if (!in_array($tag, array('script', 'style'))) | ||
466 | { | ||
467 | $text = '<' . $tag; | ||
468 | if ($element->hasAttributes()) | ||
469 | { | ||
470 | $attrs = array(); | ||
471 | foreach ($element->attributes as $name => $attr) | ||
472 | { | ||
473 | $value = $attr->value; | ||
474 | |||
475 | // In XHTML, empty values should never exist, so we repeat the value | ||
476 | if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) | ||
477 | { | ||
478 | $value = $name; | ||
479 | } | ||
480 | // For HTML, empty is fine | ||
481 | elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) | ||
482 | { | ||
483 | $attrs[] = $name; | ||
484 | continue; | ||
485 | } | ||
486 | |||
487 | // Standard attribute text | ||
488 | $attrs[] = $name . '="' . $attr->value . '"'; | ||
489 | } | ||
490 | $text .= ' ' . implode(' ', $attrs); | ||
491 | } | ||
492 | $text .= '>'; | ||
493 | $fragment->appendChild(new DOMText($text)); | ||
494 | } | ||
495 | |||
496 | $number = $element->childNodes->length; | ||
497 | for ($i = $number; $i > 0; $i--) | ||
498 | { | ||
499 | $child = $element->childNodes->item(0); | ||
500 | $fragment->appendChild($child); | ||
501 | } | ||
502 | |||
503 | if (!in_array($tag, array('script', 'style'))) | ||
504 | { | ||
505 | $fragment->appendChild(new DOMText('</' . $tag . '>')); | ||
506 | } | ||
507 | |||
508 | $element->parentNode->replaceChild($fragment, $element); | ||
509 | } | ||
510 | |||
511 | return; | ||
512 | } | ||
513 | elseif (in_array($tag, array('script', 'style'))) | ||
514 | { | ||
515 | foreach ($elements as $element) | ||
516 | { | ||
517 | $element->parentNode->removeChild($element); | ||
518 | } | ||
519 | |||
520 | return; | ||
521 | } | ||
522 | else | ||
523 | { | ||
524 | foreach ($elements as $element) | ||
525 | { | ||
526 | $fragment = $document->createDocumentFragment(); | ||
527 | $number = $element->childNodes->length; | ||
528 | for ($i = $number; $i > 0; $i--) | ||
529 | { | ||
530 | $child = $element->childNodes->item(0); | ||
531 | $fragment->appendChild($child); | ||
532 | } | ||
533 | |||
534 | $element->parentNode->replaceChild($fragment, $element); | ||
535 | } | ||
536 | } | ||
537 | } | ||
538 | |||
539 | protected function strip_attr($attrib, $document) | ||
540 | { | ||
541 | $xpath = new DOMXPath($document); | ||
542 | $elements = $xpath->query('//*[@' . $attrib . ']'); | ||
543 | |||
544 | foreach ($elements as $element) | ||
545 | { | ||
546 | $element->removeAttribute($attrib); | ||
547 | } | ||
548 | } | ||
400 | } | 549 | } |
diff --git a/inc/3rdparty/simplepie/SimplePie/Source.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/Source.php index a6605c18..51d8e6c2 100644 --- a/inc/3rdparty/simplepie/SimplePie/Source.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/Source.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,21 +33,30 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | /** | |
46 | * Handles `<atom:source>` | ||
47 | * | ||
48 | * Used by {@see SimplePie_Item::get_source()} | ||
49 | * | ||
50 | * This class can be overloaded with {@see SimplePie::set_source_class()} | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | * @subpackage API | ||
54 | */ | ||
47 | class SimplePie_Source | 55 | class SimplePie_Source |
48 | { | 56 | { |
49 | var $item; | 57 | var $item; |
50 | var $data = array(); | 58 | var $data = array(); |
59 | protected $registry; | ||
51 | 60 | ||
52 | public function __construct($item, $data) | 61 | public function __construct($item, $data) |
53 | { | 62 | { |
@@ -55,6 +64,11 @@ class SimplePie_Source | |||
55 | $this->data = $data; | 64 | $this->data = $data; |
56 | } | 65 | } |
57 | 66 | ||
67 | public function set_registry(SimplePie_Registry $registry) | ||
68 | { | ||
69 | $this->registry = $registry; | ||
70 | } | ||
71 | |||
58 | public function __toString() | 72 | public function __toString() |
59 | { | 73 | { |
60 | return md5(serialize($this->data)); | 74 | return md5(serialize($this->data)); |
@@ -91,11 +105,11 @@ class SimplePie_Source | |||
91 | { | 105 | { |
92 | if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title')) | 106 | if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title')) |
93 | { | 107 | { |
94 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 108 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
95 | } | 109 | } |
96 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title')) | 110 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title')) |
97 | { | 111 | { |
98 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 112 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
99 | } | 113 | } |
100 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title')) | 114 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title')) |
101 | { | 115 | { |
@@ -157,7 +171,7 @@ class SimplePie_Source | |||
157 | { | 171 | { |
158 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); | 172 | $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); |
159 | } | 173 | } |
160 | $categories[] = new $this->item->feed->category_class($term, $scheme, $label); | 174 | $categories[] = $this->registry->create('Category', array($term, $scheme, $label)); |
161 | } | 175 | } |
162 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category) | 176 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category) |
163 | { | 177 | { |
@@ -172,20 +186,20 @@ class SimplePie_Source | |||
172 | { | 186 | { |
173 | $scheme = null; | 187 | $scheme = null; |
174 | } | 188 | } |
175 | $categories[] = new $this->item->feed->category_class($term, $scheme, null); | 189 | $categories[] = $this->registry->create('Category', array($term, $scheme, null)); |
176 | } | 190 | } |
177 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category) | 191 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category) |
178 | { | 192 | { |
179 | $categories[] = new $this->item->feed->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 193 | $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
180 | } | 194 | } |
181 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category) | 195 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category) |
182 | { | 196 | { |
183 | $categories[] = new $this->item->feed->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 197 | $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
184 | } | 198 | } |
185 | 199 | ||
186 | if (!empty($categories)) | 200 | if (!empty($categories)) |
187 | { | 201 | { |
188 | return SimplePie_Misc::array_unique($categories); | 202 | return array_unique($categories); |
189 | } | 203 | } |
190 | else | 204 | else |
191 | { | 205 | { |
@@ -228,7 +242,7 @@ class SimplePie_Source | |||
228 | } | 242 | } |
229 | if ($name !== null || $email !== null || $uri !== null) | 243 | if ($name !== null || $email !== null || $uri !== null) |
230 | { | 244 | { |
231 | $authors[] = new $this->item->feed->author_class($name, $uri, $email); | 245 | $authors[] = $this->registry->create('Author', array($name, $uri, $email)); |
232 | } | 246 | } |
233 | } | 247 | } |
234 | if ($author = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author')) | 248 | if ($author = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author')) |
@@ -250,25 +264,25 @@ class SimplePie_Source | |||
250 | } | 264 | } |
251 | if ($name !== null || $email !== null || $url !== null) | 265 | if ($name !== null || $email !== null || $url !== null) |
252 | { | 266 | { |
253 | $authors[] = new $this->item->feed->author_class($name, $url, $email); | 267 | $authors[] = $this->registry->create('Author', array($name, $url, $email)); |
254 | } | 268 | } |
255 | } | 269 | } |
256 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author) | 270 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author) |
257 | { | 271 | { |
258 | $authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 272 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
259 | } | 273 | } |
260 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author) | 274 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author) |
261 | { | 275 | { |
262 | $authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 276 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
263 | } | 277 | } |
264 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author) | 278 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author) |
265 | { | 279 | { |
266 | $authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null); | 280 | $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); |
267 | } | 281 | } |
268 | 282 | ||
269 | if (!empty($authors)) | 283 | if (!empty($authors)) |
270 | { | 284 | { |
271 | return SimplePie_Misc::array_unique($authors); | 285 | return array_unique($authors); |
272 | } | 286 | } |
273 | else | 287 | else |
274 | { | 288 | { |
@@ -311,7 +325,7 @@ class SimplePie_Source | |||
311 | } | 325 | } |
312 | if ($name !== null || $email !== null || $uri !== null) | 326 | if ($name !== null || $email !== null || $uri !== null) |
313 | { | 327 | { |
314 | $contributors[] = new $this->item->feed->author_class($name, $uri, $email); | 328 | $contributors[] = $this->registry->create('Author', array($name, $uri, $email)); |
315 | } | 329 | } |
316 | } | 330 | } |
317 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor) | 331 | foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor) |
@@ -333,13 +347,13 @@ class SimplePie_Source | |||
333 | } | 347 | } |
334 | if ($name !== null || $email !== null || $url !== null) | 348 | if ($name !== null || $email !== null || $url !== null) |
335 | { | 349 | { |
336 | $contributors[] = new $this->item->feed->author_class($name, $url, $email); | 350 | $contributors[] = $this->registry->create('Author', array($name, $url, $email)); |
337 | } | 351 | } |
338 | } | 352 | } |
339 | 353 | ||
340 | if (!empty($contributors)) | 354 | if (!empty($contributors)) |
341 | { | 355 | { |
342 | return SimplePie_Misc::array_unique($contributors); | 356 | return array_unique($contributors); |
343 | } | 357 | } |
344 | else | 358 | else |
345 | { | 359 | { |
@@ -412,7 +426,7 @@ class SimplePie_Source | |||
412 | $keys = array_keys($this->data['links']); | 426 | $keys = array_keys($this->data['links']); |
413 | foreach ($keys as $key) | 427 | foreach ($keys as $key) |
414 | { | 428 | { |
415 | if (SimplePie_Misc::is_isegment_nz_nc($key)) | 429 | if ($this->registry->call('Misc', 'is_isegment_nz_nc', array($key))) |
416 | { | 430 | { |
417 | if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key])) | 431 | if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key])) |
418 | { | 432 | { |
@@ -446,11 +460,11 @@ class SimplePie_Source | |||
446 | { | 460 | { |
447 | if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'subtitle')) | 461 | if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'subtitle')) |
448 | { | 462 | { |
449 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 463 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
450 | } | 464 | } |
451 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'tagline')) | 465 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'tagline')) |
452 | { | 466 | { |
453 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 467 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
454 | } | 468 | } |
455 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) | 469 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) |
456 | { | 470 | { |
@@ -490,11 +504,11 @@ class SimplePie_Source | |||
490 | { | 504 | { |
491 | if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights')) | 505 | if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights')) |
492 | { | 506 | { |
493 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 507 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
494 | } | 508 | } |
495 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'copyright')) | 509 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'copyright')) |
496 | { | 510 | { |
497 | return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0])); | 511 | return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); |
498 | } | 512 | } |
499 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'copyright')) | 513 | elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'copyright')) |
500 | { | 514 | { |
diff --git a/inc/3rdparty/simplepie/SimplePie/XML/Declaration/Parser.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php index b7ebc6f5..aec19f10 100644 --- a/inc/3rdparty/simplepie/SimplePie/XML/Declaration/Parser.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,14 +33,13 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | ||
@@ -48,6 +47,7 @@ | |||
48 | * Parses the XML Declaration | 47 | * Parses the XML Declaration |
49 | * | 48 | * |
50 | * @package SimplePie | 49 | * @package SimplePie |
50 | * @subpackage Parsing | ||
51 | */ | 51 | */ |
52 | class SimplePie_XML_Declaration_Parser | 52 | class SimplePie_XML_Declaration_Parser |
53 | { | 53 | { |
diff --git a/inc/3rdparty/simplepie/SimplePie/gzdecode.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/gzdecode.php index ddbd517e..52e024ea 100644 --- a/inc/3rdparty/simplepie/SimplePie/gzdecode.php +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/gzdecode.php | |||
@@ -5,7 +5,7 @@ | |||
5 | * A PHP-Based RSS and Atom Feed Framework. | 5 | * A PHP-Based RSS and Atom Feed Framework. |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | 6 | * Takes the hard work out of managing a complete RSS/Atom solution. |
7 | * | 7 | * |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
9 | * All rights reserved. | 9 | * All rights reserved. |
10 | * | 10 | * |
11 | * Redistribution and use in source and binary forms, with or without modification, are | 11 | * Redistribution and use in source and binary forms, with or without modification, are |
@@ -33,21 +33,22 @@ | |||
33 | * POSSIBILITY OF SUCH DAMAGE. | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | * | 34 | * |
35 | * @package SimplePie | 35 | * @package SimplePie |
36 | * @version 1.3-dev | 36 | * @version 1.3.1 |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | 37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue |
38 | * @author Ryan Parman | 38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | 39 | * @author Geoffrey Sneddon |
40 | * @author Ryan McCue | 40 | * @author Ryan McCue |
41 | * @link http://simplepie.org/ SimplePie | 41 | * @link http://simplepie.org/ SimplePie |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | 42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License |
43 | * @todo phpDoc comments | ||
44 | */ | 43 | */ |
45 | 44 | ||
46 | 45 | ||
47 | /** | 46 | /** |
48 | * gzdecode | 47 | * Decode 'gzip' encoded HTTP data |
49 | * | 48 | * |
50 | * @package SimplePie | 49 | * @package SimplePie |
50 | * @subpackage HTTP | ||
51 | * @link http://www.gzip.org/format.txt | ||
51 | */ | 52 | */ |
52 | class SimplePie_gzdecode | 53 | class SimplePie_gzdecode |
53 | { | 54 | { |
@@ -55,6 +56,7 @@ class SimplePie_gzdecode | |||
55 | * Compressed data | 56 | * Compressed data |
56 | * | 57 | * |
57 | * @access private | 58 | * @access private |
59 | * @var string | ||
58 | * @see gzdecode::$data | 60 | * @see gzdecode::$data |
59 | */ | 61 | */ |
60 | var $compressed_data; | 62 | var $compressed_data; |
@@ -63,6 +65,7 @@ class SimplePie_gzdecode | |||
63 | * Size of compressed data | 65 | * Size of compressed data |
64 | * | 66 | * |
65 | * @access private | 67 | * @access private |
68 | * @var int | ||
66 | */ | 69 | */ |
67 | var $compressed_size; | 70 | var $compressed_size; |
68 | 71 | ||
@@ -70,6 +73,7 @@ class SimplePie_gzdecode | |||
70 | * Minimum size of a valid gzip string | 73 | * Minimum size of a valid gzip string |
71 | * | 74 | * |
72 | * @access private | 75 | * @access private |
76 | * @var int | ||
73 | */ | 77 | */ |
74 | var $min_compressed_size = 18; | 78 | var $min_compressed_size = 18; |
75 | 79 | ||
@@ -77,6 +81,7 @@ class SimplePie_gzdecode | |||
77 | * Current position of pointer | 81 | * Current position of pointer |
78 | * | 82 | * |
79 | * @access private | 83 | * @access private |
84 | * @var int | ||
80 | */ | 85 | */ |
81 | var $position = 0; | 86 | var $position = 0; |
82 | 87 | ||
@@ -84,6 +89,7 @@ class SimplePie_gzdecode | |||
84 | * Flags (FLG) | 89 | * Flags (FLG) |
85 | * | 90 | * |
86 | * @access private | 91 | * @access private |
92 | * @var int | ||
87 | */ | 93 | */ |
88 | var $flags; | 94 | var $flags; |
89 | 95 | ||
@@ -92,6 +98,7 @@ class SimplePie_gzdecode | |||
92 | * | 98 | * |
93 | * @access public | 99 | * @access public |
94 | * @see gzdecode::$compressed_data | 100 | * @see gzdecode::$compressed_data |
101 | * @var string | ||
95 | */ | 102 | */ |
96 | var $data; | 103 | var $data; |
97 | 104 | ||
@@ -99,6 +106,7 @@ class SimplePie_gzdecode | |||
99 | * Modified time | 106 | * Modified time |
100 | * | 107 | * |
101 | * @access public | 108 | * @access public |
109 | * @var int | ||
102 | */ | 110 | */ |
103 | var $MTIME; | 111 | var $MTIME; |
104 | 112 | ||
@@ -106,6 +114,7 @@ class SimplePie_gzdecode | |||
106 | * Extra Flags | 114 | * Extra Flags |
107 | * | 115 | * |
108 | * @access public | 116 | * @access public |
117 | * @var int | ||
109 | */ | 118 | */ |
110 | var $XFL; | 119 | var $XFL; |
111 | 120 | ||
@@ -113,6 +122,7 @@ class SimplePie_gzdecode | |||
113 | * Operating System | 122 | * Operating System |
114 | * | 123 | * |
115 | * @access public | 124 | * @access public |
125 | * @var int | ||
116 | */ | 126 | */ |
117 | var $OS; | 127 | var $OS; |
118 | 128 | ||
@@ -122,6 +132,7 @@ class SimplePie_gzdecode | |||
122 | * @access public | 132 | * @access public |
123 | * @see gzdecode::$extra_field | 133 | * @see gzdecode::$extra_field |
124 | * @see gzdecode::$SI2 | 134 | * @see gzdecode::$SI2 |
135 | * @var string | ||
125 | */ | 136 | */ |
126 | var $SI1; | 137 | var $SI1; |
127 | 138 | ||
@@ -131,6 +142,7 @@ class SimplePie_gzdecode | |||
131 | * @access public | 142 | * @access public |
132 | * @see gzdecode::$extra_field | 143 | * @see gzdecode::$extra_field |
133 | * @see gzdecode::$SI1 | 144 | * @see gzdecode::$SI1 |
145 | * @var string | ||
134 | */ | 146 | */ |
135 | var $SI2; | 147 | var $SI2; |
136 | 148 | ||
@@ -140,6 +152,7 @@ class SimplePie_gzdecode | |||
140 | * @access public | 152 | * @access public |
141 | * @see gzdecode::$SI1 | 153 | * @see gzdecode::$SI1 |
142 | * @see gzdecode::$SI2 | 154 | * @see gzdecode::$SI2 |
155 | * @var string | ||
143 | */ | 156 | */ |
144 | var $extra_field; | 157 | var $extra_field; |
145 | 158 | ||
@@ -147,6 +160,7 @@ class SimplePie_gzdecode | |||
147 | * Original filename | 160 | * Original filename |
148 | * | 161 | * |
149 | * @access public | 162 | * @access public |
163 | * @var string | ||
150 | */ | 164 | */ |
151 | var $filename; | 165 | var $filename; |
152 | 166 | ||
@@ -154,13 +168,15 @@ class SimplePie_gzdecode | |||
154 | * Human readable comment | 168 | * Human readable comment |
155 | * | 169 | * |
156 | * @access public | 170 | * @access public |
171 | * @var string | ||
157 | */ | 172 | */ |
158 | var $comment; | 173 | var $comment; |
159 | 174 | ||
160 | /** | 175 | /** |
161 | * Don't allow anything to be set | 176 | * Don't allow anything to be set |
162 | * | 177 | * |
163 | * @access public | 178 | * @param string $name |
179 | * @param mixed $value | ||
164 | */ | 180 | */ |
165 | public function __set($name, $value) | 181 | public function __set($name, $value) |
166 | { | 182 | { |
@@ -170,7 +186,7 @@ class SimplePie_gzdecode | |||
170 | /** | 186 | /** |
171 | * Set the compressed string and related properties | 187 | * Set the compressed string and related properties |
172 | * | 188 | * |
173 | * @access public | 189 | * @param string $data |
174 | */ | 190 | */ |
175 | public function __construct($data) | 191 | public function __construct($data) |
176 | { | 192 | { |
@@ -181,7 +197,7 @@ class SimplePie_gzdecode | |||
181 | /** | 197 | /** |
182 | * Decode the GZIP stream | 198 | * Decode the GZIP stream |
183 | * | 199 | * |
184 | * @access public | 200 | * @return bool Successfulness |
185 | */ | 201 | */ |
186 | public function parse() | 202 | public function parse() |
187 | { | 203 | { |
diff --git a/inc/3rdparty/makefulltextfeed.php b/inc/3rdparty/makefulltextfeed.php new file mode 100644 index 00000000..7104bc73 --- /dev/null +++ b/inc/3rdparty/makefulltextfeed.php | |||
@@ -0,0 +1,1195 @@ | |||
1 | <?php | ||
2 | // Full-Text RSS: Create Full-Text Feeds | ||
3 | // Author: Keyvan Minoukadeh | ||
4 | // Copyright (c) 2013 Keyvan Minoukadeh | ||
5 | // License: AGPLv3 | ||
6 | // Version: 3.1 | ||
7 | // Date: 2013-03-05 | ||
8 | // More info: http://fivefilters.org/content-only/ | ||
9 | // Help: http://help.fivefilters.org | ||
10 | |||
11 | /* | ||
12 | This program is free software: you can redistribute it and/or modify | ||
13 | it under the terms of the GNU Affero General Public License as published by | ||
14 | the Free Software Foundation, either version 3 of the License, or | ||
15 | (at your option) any later version. | ||
16 | |||
17 | This program is distributed in the hope that it will be useful, | ||
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
20 | GNU Affero General Public License for more details. | ||
21 | |||
22 | You should have received a copy of the GNU Affero General Public License | ||
23 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
24 | */ | ||
25 | |||
26 | // Usage | ||
27 | // ----- | ||
28 | // Request this file passing it your feed in the querystring: makefulltextfeed.php?url=mysite.org | ||
29 | // The following options can be passed in the querystring: | ||
30 | // * URL: url=[feed or website url] (required, should be URL-encoded - in php: urlencode($url)) | ||
31 | // * URL points to HTML (not feed): html=true (optional, by default it's automatically detected) | ||
32 | // * API key: key=[api key] (optional, refer to config.php) | ||
33 | // * Max entries to process: max=[max number of items] (optional) | ||
34 | |||
35 | error_reporting(E_ALL ^ E_NOTICE); | ||
36 | ini_set("display_errors", 1); | ||
37 | @set_time_limit(120); | ||
38 | |||
39 | // Deal with magic quotes | ||
40 | if (get_magic_quotes_gpc()) { | ||
41 | $process = array(&$_GET, &$_POST, &$_REQUEST); | ||
42 | while (list($key, $val) = each($process)) { | ||
43 | foreach ($val as $k => $v) { | ||
44 | unset($process[$key][$k]); | ||
45 | if (is_array($v)) { | ||
46 | $process[$key][stripslashes($k)] = $v; | ||
47 | $process[] = &$process[$key][stripslashes($k)]; | ||
48 | } else { | ||
49 | $process[$key][stripslashes($k)] = stripslashes($v); | ||
50 | } | ||
51 | } | ||
52 | } | ||
53 | unset($process); | ||
54 | } | ||
55 | |||
56 | // set include path | ||
57 | set_include_path(realpath(dirname(__FILE__).'/libraries').PATH_SEPARATOR.get_include_path()); | ||
58 | // Autoloading of classes allows us to include files only when they're | ||
59 | // needed. If we've got a cached copy, for example, only Zend_Cache is loaded. | ||
60 | function autoload($class_name) { | ||
61 | static $dir = null; | ||
62 | if ($dir === null) $dir = dirname(__FILE__).'/libraries/'; | ||
63 | static $mapping = array( | ||
64 | // Include FeedCreator for RSS/Atom creation | ||
65 | 'FeedWriter' => 'feedwriter/FeedWriter.php', | ||
66 | 'FeedItem' => 'feedwriter/FeedItem.php', | ||
67 | // Include ContentExtractor and Readability for identifying and extracting content from URLs | ||
68 | 'ContentExtractor' => 'content-extractor/ContentExtractor.php', | ||
69 | 'SiteConfig' => 'content-extractor/SiteConfig.php', | ||
70 | 'Readability' => 'readability/Readability.php', | ||
71 | // Include Humble HTTP Agent to allow parallel requests and response caching | ||
72 | 'HumbleHttpAgent' => 'humble-http-agent/HumbleHttpAgent.php', | ||
73 | 'SimplePie_HumbleHttpAgent' => 'humble-http-agent/SimplePie_HumbleHttpAgent.php', | ||
74 | 'CookieJar' => 'humble-http-agent/CookieJar.php', | ||
75 | // Include Zend Cache to improve performance (cache results) | ||
76 | 'Zend_Cache' => 'Zend/Cache.php', | ||
77 | // Language detect | ||
78 | 'Text_LanguageDetect' => 'language-detect/LanguageDetect.php', | ||
79 | // HTML5 Lib | ||
80 | 'HTML5_Parser' => 'html5/Parser.php', | ||
81 | // htmLawed - used if XSS filter is enabled (xss_filter) | ||
82 | 'htmLawed' => 'htmLawed/htmLawed.php' | ||
83 | ); | ||
84 | if (isset($mapping[$class_name])) { | ||
85 | debug("** Loading class $class_name ({$mapping[$class_name]})"); | ||
86 | require $dir.$mapping[$class_name]; | ||
87 | return true; | ||
88 | } else { | ||
89 | return false; | ||
90 | } | ||
91 | } | ||
92 | spl_autoload_register('autoload'); | ||
93 | require dirname(__FILE__).'/libraries/simplepie/autoloader.php'; | ||
94 | |||
95 | //////////////////////////////// | ||
96 | // Load config file | ||
97 | //////////////////////////////// | ||
98 | require dirname(__FILE__).'/config.php'; | ||
99 | |||
100 | //////////////////////////////// | ||
101 | // Prevent indexing/following by search engines because: | ||
102 | // 1. The content is already public and presumably indexed (why create duplicates?) | ||
103 | // 2. Not doing so might increase number of requests from search engines, thus increasing server load | ||
104 | // Note: feed readers and services such as Yahoo Pipes will not be affected by this header. | ||
105 | // Note: Using Disallow in a robots.txt file will be more effective (search engines will check | ||
106 | // that before even requesting makefulltextfeed.php). | ||
107 | //////////////////////////////// | ||
108 | header('X-Robots-Tag: noindex, nofollow'); | ||
109 | |||
110 | //////////////////////////////// | ||
111 | // Check if service is enabled | ||
112 | //////////////////////////////// | ||
113 | if (!$options->enabled) { | ||
114 | die('The full-text RSS service is currently disabled'); | ||
115 | } | ||
116 | |||
117 | //////////////////////////////// | ||
118 | // Debug mode? | ||
119 | // See the config file for debug options. | ||
120 | //////////////////////////////// | ||
121 | $debug_mode = false; | ||
122 | if (isset($_GET['debug'])) { | ||
123 | if ($options->debug === true || $options->debug == 'user') { | ||
124 | $debug_mode = true; | ||
125 | } elseif ($options->debug == 'admin') { | ||
126 | session_start(); | ||
127 | $debug_mode = (@$_SESSION['auth'] == 1); | ||
128 | } | ||
129 | if ($debug_mode) { | ||
130 | header('Content-Type: text/plain; charset=utf-8'); | ||
131 | } else { | ||
132 | if ($options->debug == 'admin') { | ||
133 | die('You must be logged in to the <a href="admin/">admin area</a> to see debug output.'); | ||
134 | } else { | ||
135 | die('Debugging is disabled.'); | ||
136 | } | ||
137 | } | ||
138 | } | ||
139 | |||
140 | //////////////////////////////// | ||
141 | // Check for APC | ||
142 | //////////////////////////////// | ||
143 | $options->apc = $options->apc && function_exists('apc_add'); | ||
144 | if ($options->apc) { | ||
145 | debug('APC is enabled and available on server'); | ||
146 | } else { | ||
147 | debug('APC is disabled or not available on server'); | ||
148 | } | ||
149 | |||
150 | //////////////////////////////// | ||
151 | // Check for smart cache | ||
152 | //////////////////////////////// | ||
153 | $options->smart_cache = $options->smart_cache && function_exists('apc_inc'); | ||
154 | |||
155 | //////////////////////////////// | ||
156 | // Check for feed URL | ||
157 | //////////////////////////////// | ||
158 | if (!isset($_GET['url'])) { | ||
159 | die('No URL supplied'); | ||
160 | } | ||
161 | $url = trim($_GET['url']); | ||
162 | if (strtolower(substr($url, 0, 7)) == 'feed://') { | ||
163 | $url = 'http://'.substr($url, 7); | ||
164 | } | ||
165 | if (!preg_match('!^https?://.+!i', $url)) { | ||
166 | $url = 'http://'.$url; | ||
167 | } | ||
168 | |||
169 | $url = filter_var($url, FILTER_SANITIZE_URL); | ||
170 | $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); | ||
171 | // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2) | ||
172 | if ($test === false) { | ||
173 | $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); | ||
174 | } | ||
175 | if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) { | ||
176 | // all okay | ||
177 | unset($test); | ||
178 | } else { | ||
179 | die('Invalid URL supplied'); | ||
180 | } | ||
181 | debug("Supplied URL: $url"); | ||
182 | |||
183 | ///////////////////////////////// | ||
184 | // Redirect to hide API key | ||
185 | ///////////////////////////////// | ||
186 | if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->api_keys)) !== false) { | ||
187 | $host = $_SERVER['HTTP_HOST']; | ||
188 | $path = rtrim(dirname($_SERVER['SCRIPT_NAME']), '/\\'); | ||
189 | $_qs_url = (strtolower(substr($url, 0, 7)) == 'http://') ? substr($url, 7) : $url; | ||
190 | $redirect = 'http://'.htmlspecialchars($host.$path).'/makefulltextfeed.php?url='.urlencode($_qs_url); | ||
191 | $redirect .= '&key='.$key_index; | ||
192 | $redirect .= '&hash='.urlencode(sha1($_GET['key'].$url)); | ||
193 | if (isset($_GET['html'])) $redirect .= '&html='.urlencode($_GET['html']); | ||
194 | if (isset($_GET['max'])) $redirect .= '&max='.(int)$_GET['max']; | ||
195 | if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']); | ||
196 | if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']); | ||
197 | if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']); | ||
198 | if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']); | ||
199 | if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']); | ||
200 | if (isset($_GET['xss'])) $redirect .= '&xss'; | ||
201 | if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title'; | ||
202 | if (isset($_GET['debug'])) $redirect .= '&debug'; | ||
203 | if ($debug_mode) { | ||
204 | debug('Redirecting to hide access key, follow URL below to continue'); | ||
205 | debug("Location: $redirect"); | ||
206 | } else { | ||
207 | header("Location: $redirect"); | ||
208 | } | ||
209 | exit; | ||
210 | } | ||
211 | |||
212 | /////////////////////////////////////////////// | ||
213 | // Set timezone. | ||
214 | // Prevents warnings, but needs more testing - | ||
215 | // perhaps if timezone is set in php.ini we | ||
216 | // don't need to set it at all... | ||
217 | /////////////////////////////////////////////// | ||
218 | if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timezone'))) { | ||
219 | date_default_timezone_set('UTC'); | ||
220 | } | ||
221 | |||
222 | /////////////////////////////////////////////// | ||
223 | // Check if the request is explicitly for an HTML page | ||
224 | /////////////////////////////////////////////// | ||
225 | $html_only = (isset($_GET['html']) && ($_GET['html'] == '1' || $_GET['html'] == 'true')); | ||
226 | |||
227 | /////////////////////////////////////////////// | ||
228 | // Check if valid key supplied | ||
229 | /////////////////////////////////////////////// | ||
230 | $valid_key = false; | ||
231 | if (isset($_GET['key']) && isset($_GET['hash']) && isset($options->api_keys[(int)$_GET['key']])) { | ||
232 | $valid_key = ($_GET['hash'] == sha1($options->api_keys[(int)$_GET['key']].$url)); | ||
233 | } | ||
234 | $key_index = ($valid_key) ? (int)$_GET['key'] : 0; | ||
235 | if (!$valid_key && $options->key_required) { | ||
236 | die('A valid key must be supplied'); | ||
237 | } | ||
238 | if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') { | ||
239 | die('The entered key is invalid'); | ||
240 | } | ||
241 | |||
242 | if (file_exists('custom_init.php')) require 'custom_init.php'; | ||
243 | |||
244 | /////////////////////////////////////////////// | ||
245 | // Check URL against list of blacklisted URLs | ||
246 | /////////////////////////////////////////////// | ||
247 | if (!url_allowed($url)) die('URL blocked'); | ||
248 | |||
249 | /////////////////////////////////////////////// | ||
250 | // Max entries | ||
251 | // see config.php to find these values | ||
252 | /////////////////////////////////////////////// | ||
253 | if (isset($_GET['max'])) { | ||
254 | $max = (int)$_GET['max']; | ||
255 | if ($valid_key) { | ||
256 | $max = min($max, $options->max_entries_with_key); | ||
257 | } else { | ||
258 | $max = min($max, $options->max_entries); | ||
259 | } | ||
260 | } else { | ||
261 | if ($valid_key) { | ||
262 | $max = $options->default_entries_with_key; | ||
263 | } else { | ||
264 | $max = $options->default_entries; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | /////////////////////////////////////////////// | ||
269 | // Link handling | ||
270 | /////////////////////////////////////////////// | ||
271 | if (isset($_GET['links']) && in_array($_GET['links'], array('preserve', 'footnotes', 'remove'))) { | ||
272 | $links = $_GET['links']; | ||
273 | } else { | ||
274 | $links = 'preserve'; | ||
275 | } | ||
276 | |||
277 | /////////////////////////////////////////////// | ||
278 | // Favour item titles in feed? | ||
279 | /////////////////////////////////////////////// | ||
280 | $favour_feed_titles = true; | ||
281 | if ($options->favour_feed_titles == 'user') { | ||
282 | $favour_feed_titles = !isset($_GET['use_extracted_title']); | ||
283 | } else { | ||
284 | $favour_feed_titles = $options->favour_feed_titles; | ||
285 | } | ||
286 | |||
287 | /////////////////////////////////////////////// | ||
288 | // Exclude items if extraction fails | ||
289 | /////////////////////////////////////////////// | ||
290 | if ($options->exclude_items_on_fail === 'user') { | ||
291 | $exclude_on_fail = (isset($_GET['exc']) && ($_GET['exc'] == '1')); | ||
292 | } else { | ||
293 | $exclude_on_fail = $options->exclude_items_on_fail; | ||
294 | } | ||
295 | |||
296 | /////////////////////////////////////////////// | ||
297 | // Detect language | ||
298 | /////////////////////////////////////////////// | ||
299 | if ($options->detect_language === 'user') { | ||
300 | if (isset($_GET['l'])) { | ||
301 | $detect_language = (int)$_GET['l']; | ||
302 | } else { | ||
303 | $detect_language = 1; | ||
304 | } | ||
305 | } else { | ||
306 | $detect_language = $options->detect_language; | ||
307 | } | ||
308 | |||
309 | if ($detect_language >= 2) { | ||
310 | $language_codes = array('albanian' => 'sq','arabic' => 'ar','azeri' => 'az','bengali' => 'bn','bulgarian' => 'bg', | ||
311 | 'cebuano' => 'ceb', // ISO 639-2 | ||
312 | 'croatian' => 'hr','czech' => 'cs','danish' => 'da','dutch' => 'nl','english' => 'en','estonian' => 'et','farsi' => 'fa','finnish' => 'fi','french' => 'fr','german' => 'de','hausa' => 'ha', | ||
313 | 'hawaiian' => 'haw', // ISO 639-2 | ||
314 | 'hindi' => 'hi','hungarian' => 'hu','icelandic' => 'is','indonesian' => 'id','italian' => 'it','kazakh' => 'kk','kyrgyz' => 'ky','latin' => 'la','latvian' => 'lv','lithuanian' => 'lt','macedonian' => 'mk','mongolian' => 'mn','nepali' => 'ne','norwegian' => 'no','pashto' => 'ps', | ||
315 | 'pidgin' => 'cpe', // ISO 639-2 | ||
316 | 'polish' => 'pl','portuguese' => 'pt','romanian' => 'ro','russian' => 'ru','serbian' => 'sr','slovak' => 'sk','slovene' => 'sl','somali' => 'so','spanish' => 'es','swahili' => 'sw','swedish' => 'sv','tagalog' => 'tl','turkish' => 'tr','ukrainian' => 'uk','urdu' => 'ur','uzbek' => 'uz','vietnamese' => 'vi','welsh' => 'cy'); | ||
317 | } | ||
318 | $use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0); | ||
319 | |||
320 | ///////////////////////////////////// | ||
321 | // Check for valid format | ||
322 | // (stick to RSS (or RSS as JSON) for the time being) | ||
323 | ///////////////////////////////////// | ||
324 | if (isset($_GET['format']) && $_GET['format'] == 'json') { | ||
325 | $format = 'json'; | ||
326 | } else { | ||
327 | $format = 'rss'; | ||
328 | } | ||
329 | |||
330 | ///////////////////////////////////// | ||
331 | // Should we do XSS filtering? | ||
332 | ///////////////////////////////////// | ||
333 | if ($options->xss_filter === 'user') { | ||
334 | $xss_filter = isset($_GET['xss']); | ||
335 | } else { | ||
336 | $xss_filter = $options->xss_filter; | ||
337 | } | ||
338 | if (!$xss_filter && isset($_GET['xss'])) { | ||
339 | die('XSS filtering is disabled in config'); | ||
340 | } | ||
341 | |||
342 | ///////////////////////////////////// | ||
343 | // Check for JSONP | ||
344 | // Regex from https://gist.github.com/1217080 | ||
345 | ///////////////////////////////////// | ||
346 | $callback = null; | ||
347 | if ($format =='json' && isset($_GET['callback'])) { | ||
348 | $callback = trim($_GET['callback']); | ||
349 | foreach (explode('.', $callback) as $_identifier) { | ||
350 | if (!preg_match('/^[a-zA-Z_$][0-9a-zA-Z_$]*(?:\[(?:".+"|\'.+\'|\d+)\])*?$/', $_identifier)) { | ||
351 | die('Invalid JSONP callback'); | ||
352 | } | ||
353 | } | ||
354 | debug("JSONP callback: $callback"); | ||
355 | } | ||
356 | |||
357 | ////////////////////////////////// | ||
358 | // Enable Cross-Origin Resource Sharing (CORS) | ||
359 | ////////////////////////////////// | ||
360 | if ($options->cors) header('Access-Control-Allow-Origin: *'); | ||
361 | |||
362 | ////////////////////////////////// | ||
363 | // Check for cached copy | ||
364 | ////////////////////////////////// | ||
365 | if ($options->caching) { | ||
366 | debug('Caching is enabled...'); | ||
367 | $cache_id = md5($max.$url.$valid_key.$links.$favour_feed_titles.$xss_filter.$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub'])); | ||
368 | $check_cache = true; | ||
369 | if ($options->apc && $options->smart_cache) { | ||
370 | apc_add("cache.$cache_id", 0, 10*60); | ||
371 | $apc_cache_hits = (int)apc_fetch("cache.$cache_id"); | ||
372 | $check_cache = ($apc_cache_hits >= 2); | ||
373 | apc_inc("cache.$cache_id"); | ||
374 | if ($check_cache) { | ||
375 | debug('Cache key found in APC, we\'ll try to load cache file from disk'); | ||
376 | } else { | ||
377 | debug('Cache key not found in APC'); | ||
378 | } | ||
379 | } | ||
380 | if ($check_cache) { | ||
381 | $cache = get_cache(); | ||
382 | if ($data = $cache->load($cache_id)) { | ||
383 | if ($debug_mode) { | ||
384 | debug('Loaded cached copy'); | ||
385 | exit; | ||
386 | } | ||
387 | if ($format == 'json') { | ||
388 | if ($callback === null) { | ||
389 | header('Content-type: application/json; charset=UTF-8'); | ||
390 | } else { | ||
391 | header('Content-type: application/javascript; charset=UTF-8'); | ||
392 | } | ||
393 | } else { | ||
394 | header('Content-type: text/xml; charset=UTF-8'); | ||
395 | header('X-content-type-options: nosniff'); | ||
396 | } | ||
397 | if (headers_sent()) die('Some data has already been output, can\'t send RSS file'); | ||
398 | if ($callback) { | ||
399 | echo "$callback($data);"; | ||
400 | } else { | ||
401 | echo $data; | ||
402 | } | ||
403 | exit; | ||
404 | } | ||
405 | } | ||
406 | } | ||
407 | |||
408 | ////////////////////////////////// | ||
409 | // Set Expires header | ||
410 | ////////////////////////////////// | ||
411 | if (!$debug_mode) { | ||
412 | header('Expires: ' . gmdate('D, d M Y H:i:s', time()+(60*10)) . ' GMT'); | ||
413 | } | ||
414 | |||
415 | ////////////////////////////////// | ||
416 | // Set up HTTP agent | ||
417 | ////////////////////////////////// | ||
418 | $http = new HumbleHttpAgent(); | ||
419 | $http->debug = $debug_mode; | ||
420 | $http->userAgentMap = $options->user_agents; | ||
421 | $http->headerOnlyTypes = array_keys($options->content_type_exc); | ||
422 | $http->rewriteUrls = $options->rewrite_url; | ||
423 | |||
424 | ////////////////////////////////// | ||
425 | // Set up Content Extractor | ||
426 | ////////////////////////////////// | ||
427 | $extractor = new ContentExtractor(dirname(__FILE__).'/site_config/custom', dirname(__FILE__).'/site_config/standard'); | ||
428 | $extractor->debug = $debug_mode; | ||
429 | SiteConfig::$debug = $debug_mode; | ||
430 | SiteConfig::use_apc($options->apc); | ||
431 | $extractor->fingerprints = $options->fingerprints; | ||
432 | $extractor->allowedParsers = $options->allowed_parsers; | ||
433 | |||
434 | //////////////////////////////// | ||
435 | // Get RSS/Atom feed | ||
436 | //////////////////////////////// | ||
437 | if (!$html_only) { | ||
438 | debug('--------'); | ||
439 | debug("Attempting to process URL as feed"); | ||
440 | // Send user agent header showing PHP (prevents a HTML response from feedburner) | ||
441 | $http->userAgentDefault = HumbleHttpAgent::UA_PHP; | ||
442 | // configure SimplePie HTTP extension class to use our HumbleHttpAgent instance | ||
443 | SimplePie_HumbleHttpAgent::set_agent($http); | ||
444 | $feed = new SimplePie(); | ||
445 | // some feeds use the text/html content type - force_feed tells SimplePie to process anyway | ||
446 | $feed->force_feed(true); | ||
447 | $feed->set_file_class('SimplePie_HumbleHttpAgent'); | ||
448 | //$feed->set_feed_url($url); // colons appearing in the URL's path get encoded | ||
449 | $feed->feed_url = $url; | ||
450 | $feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE); | ||
451 | $feed->set_timeout(20); | ||
452 | $feed->enable_cache(false); | ||
453 | $feed->set_stupidly_fast(true); | ||
454 | $feed->enable_order_by_date(false); // we don't want to do anything to the feed | ||
455 | $feed->set_url_replacements(array()); | ||
456 | // initialise the feed | ||
457 | // the @ suppresses notices which on some servers causes a 500 internal server error | ||
458 | $result = @$feed->init(); | ||
459 | //$feed->handle_content_type(); | ||
460 | //$feed->get_title(); | ||
461 | if ($result && (!is_array($feed->data) || count($feed->data) == 0)) { | ||
462 | die('Sorry, no feed items found'); | ||
463 | } | ||
464 | // from now on, we'll identify ourselves as a browser | ||
465 | $http->userAgentDefault = HumbleHttpAgent::UA_BROWSER; | ||
466 | } | ||
467 | |||
468 | //////////////////////////////////////////////////////////////////////////////// | ||
469 | // Our given URL is not a feed, so let's create our own feed with a single item: | ||
470 | // the given URL. This basically treats all non-feed URLs as if they were | ||
471 | // single-item feeds. | ||
472 | //////////////////////////////////////////////////////////////////////////////// | ||
473 | $isDummyFeed = false; | ||
474 | if ($html_only || !$result) { | ||
475 | debug('--------'); | ||
476 | debug("Constructing a single-item feed from URL"); | ||
477 | $isDummyFeed = true; | ||
478 | unset($feed, $result); | ||
479 | // create single item dummy feed object | ||
480 | class DummySingleItemFeed { | ||
481 | public $item; | ||
482 | function __construct($url) { $this->item = new DummySingleItem($url); } | ||
483 | public function get_title() { return ''; } | ||
484 | public function get_description() { return 'Content extracted from '.$this->item->url; } | ||
485 | public function get_link() { return $this->item->url; } | ||
486 | public function get_language() { return false; } | ||
487 | public function get_image_url() { return false; } | ||
488 | public function get_items($start=0, $max=1) { return array(0=>$this->item); } | ||
489 | } | ||
490 | class DummySingleItem { | ||
491 | public $url; | ||
492 | function __construct($url) { $this->url = $url; } | ||
493 | public function get_permalink() { return $this->url; } | ||
494 | public function get_title() { return null; } | ||
495 | public function get_date($format='') { return false; } | ||
496 | public function get_author($key=0) { return null; } | ||
497 | public function get_authors() { return null; } | ||
498 | public function get_description() { return ''; } | ||
499 | public function get_enclosure($key=0, $prefer=null) { return null; } | ||
500 | public function get_enclosures() { return null; } | ||
501 | public function get_categories() { return null; } | ||
502 | } | ||
503 | $feed = new DummySingleItemFeed($url); | ||
504 | } | ||
505 | |||
506 | //////////////////////////////////////////// | ||
507 | // Create full-text feed | ||
508 | //////////////////////////////////////////// | ||
509 | $output = new FeedWriter(); | ||
510 | $output->setTitle(strip_tags($feed->get_title())); | ||
511 | $output->setDescription(strip_tags($feed->get_description())); | ||
512 | $output->setXsl('css/feed.xsl'); // Chrome uses this, most browsers ignore it | ||
513 | if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment | ||
514 | $output->addHub('http://fivefilters.superfeedr.com/'); | ||
515 | $output->addHub('http://pubsubhubbub.appspot.com/'); | ||
516 | $output->setSelf('http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI']); | ||
517 | } | ||
518 | $output->setLink($feed->get_link()); // Google Reader uses this for pulling in favicons | ||
519 | if ($img_url = $feed->get_image_url()) { | ||
520 | $output->setImage($feed->get_title(), $feed->get_link(), $img_url); | ||
521 | } | ||
522 | |||
523 | //////////////////////////////////////////// | ||
524 | // Loop through feed items | ||
525 | //////////////////////////////////////////// | ||
526 | $items = $feed->get_items(0, $max); | ||
527 | // Request all feed items in parallel (if supported) | ||
528 | $urls_sanitized = array(); | ||
529 | $urls = array(); | ||
530 | foreach ($items as $key => $item) { | ||
531 | $permalink = htmlspecialchars_decode($item->get_permalink()); | ||
532 | // Colons in URL path segments get encoded by SimplePie, yet some sites expect them unencoded | ||
533 | $permalink = str_replace('%3A', ':', $permalink); | ||
534 | // validateUrl() strips non-ascii characters | ||
535 | // simplepie already sanitizes URLs so let's not do it again here. | ||
536 | //$permalink = $http->validateUrl($permalink); | ||
537 | if ($permalink) { | ||
538 | $urls_sanitized[] = $permalink; | ||
539 | } | ||
540 | $urls[$key] = $permalink; | ||
541 | } | ||
542 | debug('--------'); | ||
543 | debug('Fetching feed items'); | ||
544 | $http->fetchAll($urls_sanitized); | ||
545 | //$http->cacheAll(); | ||
546 | |||
547 | // count number of items added to full feed | ||
548 | $item_count = 0; | ||
549 | |||
550 | foreach ($items as $key => $item) { | ||
551 | debug('--------'); | ||
552 | debug('Processing feed item '.($item_count+1)); | ||
553 | $do_content_extraction = true; | ||
554 | $extract_result = false; | ||
555 | $text_sample = null; | ||
556 | $permalink = $urls[$key]; | ||
557 | debug("Item URL: $permalink"); | ||
558 | $extracted_title = ''; | ||
559 | $feed_item_title = $item->get_title(); | ||
560 | if ($feed_item_title !== null) { | ||
561 | $feed_item_title = strip_tags(htmlspecialchars_decode($feed_item_title)); | ||
562 | } | ||
563 | $newitem = $output->createNewItem(); | ||
564 | $newitem->setTitle($feed_item_title); | ||
565 | if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment | ||
566 | if ($permalink !== false) { | ||
567 | $newitem->setLink('http://fivefilters.org/content-only/redirect.php?url='.urlencode($permalink)); | ||
568 | } else { | ||
569 | $newitem->setLink('http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink())); | ||
570 | } | ||
571 | } else { | ||
572 | if ($permalink !== false) { | ||
573 | $newitem->setLink($permalink); | ||
574 | } else { | ||
575 | $newitem->setLink($item->get_permalink()); | ||
576 | } | ||
577 | } | ||
578 | //if ($permalink && ($response = $http->get($permalink, true)) && $response['status_code'] < 300) { | ||
579 | // Allowing error codes - some sites return correct content with error status | ||
580 | // e.g. prospectmagazine.co.uk returns 403 | ||
581 | if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) { | ||
582 | $effective_url = $response['effective_url']; | ||
583 | if (!url_allowed($effective_url)) continue; | ||
584 | // check if action defined for returned Content-Type | ||
585 | $mime_info = get_mime_action_info($response['headers']); | ||
586 | if (isset($mime_info['action'])) { | ||
587 | if ($mime_info['action'] == 'exclude') { | ||
588 | continue; // skip this feed item entry | ||
589 | } elseif ($mime_info['action'] == 'link') { | ||
590 | if ($mime_info['type'] == 'image') { | ||
591 | $html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"{$mime_info['name']}\" /></a>"; | ||
592 | } else { | ||
593 | $html = "<a href=\"$effective_url\">Download {$mime_info['name']}</a>"; | ||
594 | } | ||
595 | $extracted_title = $mime_info['name']; | ||
596 | $do_content_extraction = false; | ||
597 | } | ||
598 | } | ||
599 | if ($do_content_extraction) { | ||
600 | $html = $response['body']; | ||
601 | // remove strange things | ||
602 | $html = str_replace('</[>', '', $html); | ||
603 | $html = convert_to_utf8($html, $response['headers']); | ||
604 | // check site config for single page URL - fetch it if found | ||
605 | $is_single_page = false; | ||
606 | if ($single_page_response = getSinglePage($item, $html, $effective_url)) { | ||
607 | $is_single_page = true; | ||
608 | $html = $single_page_response['body']; | ||
609 | // remove strange things | ||
610 | $html = str_replace('</[>', '', $html); | ||
611 | $html = convert_to_utf8($html, $single_page_response['headers']); | ||
612 | $effective_url = $single_page_response['effective_url']; | ||
613 | debug("Retrieved single-page view from $effective_url"); | ||
614 | unset($single_page_response); | ||
615 | } | ||
616 | debug('--------'); | ||
617 | debug('Attempting to extract content'); | ||
618 | $extract_result = $extractor->process($html, $effective_url); | ||
619 | $readability = $extractor->readability; | ||
620 | $content_block = ($extract_result) ? $extractor->getContent() : null; | ||
621 | $extracted_title = ($extract_result) ? $extractor->getTitle() : ''; | ||
622 | // Deal with multi-page articles | ||
623 | //die('Next: '.$extractor->getNextPageUrl()); | ||
624 | $is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl()); | ||
625 | if ($options->multipage && $is_multi_page) { | ||
626 | debug('--------'); | ||
627 | debug('Attempting to process multi-page article'); | ||
628 | $multi_page_urls = array(); | ||
629 | $multi_page_content = array(); | ||
630 | while ($next_page_url = $extractor->getNextPageUrl()) { | ||
631 | debug('--------'); | ||
632 | debug('Processing next page: '.$next_page_url); | ||
633 | // If we've got URL, resolve against $url | ||
634 | if ($next_page_url = makeAbsoluteStr($effective_url, $next_page_url)) { | ||
635 | // check it's not what we have already! | ||
636 | if (!in_array($next_page_url, $multi_page_urls)) { | ||
637 | // it's not, so let's attempt to fetch it | ||
638 | $multi_page_urls[] = $next_page_url; | ||
639 | $_prev_ref = $http->referer; | ||
640 | if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) { | ||
641 | // make sure mime type is not something with a different action associated | ||
642 | $page_mime_info = get_mime_action_info($response['headers']); | ||
643 | if (!isset($page_mime_info['action'])) { | ||
644 | $html = $response['body']; | ||
645 | // remove strange things | ||
646 | $html = str_replace('</[>', '', $html); | ||
647 | $html = convert_to_utf8($html, $response['headers']); | ||
648 | if ($extractor->process($html, $next_page_url)) { | ||
649 | $multi_page_content[] = $extractor->getContent(); | ||
650 | continue; | ||
651 | } else { debug('Failed to extract content'); } | ||
652 | } else { debug('MIME type requires different action'); } | ||
653 | } else { debug('Failed to fetch URL'); } | ||
654 | } else { debug('URL already processed'); } | ||
655 | } else { debug('Failed to resolve against '.$effective_url); } | ||
656 | // failed to process next_page_url, so cancel further requests | ||
657 | $multi_page_content = array(); | ||
658 | break; | ||
659 | } | ||
660 | // did we successfully deal with this multi-page article? | ||
661 | if (empty($multi_page_content)) { | ||
662 | debug('Failed to extract all parts of multi-page article, so not going to include them'); | ||
663 | $multi_page_content[] = $readability->dom->createElement('p')->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>'; | ||
664 | } | ||
665 | foreach ($multi_page_content as $_page) { | ||
666 | $_page = $content_block->ownerDocument->importNode($_page, true); | ||
667 | $content_block->appendChild($_page); | ||
668 | } | ||
669 | unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url); | ||
670 | } | ||
671 | } | ||
672 | // use extracted title for both feed and item title if we're using single-item dummy feed | ||
673 | if ($isDummyFeed) { | ||
674 | $output->setTitle($extracted_title); | ||
675 | $newitem->setTitle($extracted_title); | ||
676 | } else { | ||
677 | // use extracted title instead of feed item title? | ||
678 | if (!$favour_feed_titles && $extracted_title != '') { | ||
679 | debug('Using extracted title in generated feed'); | ||
680 | $newitem->setTitle($extracted_title); | ||
681 | } | ||
682 | } | ||
683 | } | ||
684 | if ($do_content_extraction) { | ||
685 | // if we failed to extract content... | ||
686 | if (!$extract_result) { | ||
687 | if ($exclude_on_fail) { | ||
688 | debug('Failed to extract, so skipping (due to exclude on fail parameter)'); | ||
689 | continue; // skip this and move to next item | ||
690 | } | ||
691 | //TODO: get text sample for language detection | ||
692 | $html = $options->error_message; | ||
693 | // keep the original item description | ||
694 | $html .= $item->get_description(); | ||
695 | } else { | ||
696 | $readability->clean($content_block, 'select'); | ||
697 | if ($options->rewrite_relative_urls) makeAbsolute($effective_url, $content_block); | ||
698 | // footnotes | ||
699 | if (($links == 'footnotes') && (strpos($effective_url, 'wikipedia.org') === false)) { | ||
700 | $readability->addFootnotes($content_block); | ||
701 | } | ||
702 | // remove nesting: <div><div><div><p>test</p></div></div></div> = <p>test</p> | ||
703 | while ($content_block->childNodes->length == 1 && $content_block->firstChild->nodeType === XML_ELEMENT_NODE) { | ||
704 | // only follow these tag names | ||
705 | if (!in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer'))) break; | ||
706 | //$html = $content_block->firstChild->innerHTML; // FTR 2.9.5 | ||
707 | $content_block = $content_block->firstChild; | ||
708 | } | ||
709 | // convert content block to HTML string | ||
710 | // Need to preserve things like body: //img[@id='feature'] | ||
711 | if (in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer'))) { | ||
712 | $html = $content_block->innerHTML; | ||
713 | } else { | ||
714 | $html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML | ||
715 | } | ||
716 | unset($content_block); | ||
717 | // post-processing cleanup | ||
718 | $html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html); | ||
719 | if ($links == 'remove') { | ||
720 | $html = preg_replace('!</?a[^>]*>!', '', $html); | ||
721 | } | ||
722 | // get text sample for language detection | ||
723 | $text_sample = strip_tags(substr($html, 0, 500)); | ||
724 | $html = make_substitutions($options->message_to_prepend).$html; | ||
725 | $html .= make_substitutions($options->message_to_append); | ||
726 | } | ||
727 | } | ||
728 | |||
729 | if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment | ||
730 | $newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false')); | ||
731 | } else { | ||
732 | $newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true')); | ||
733 | } | ||
734 | // filter xss? | ||
735 | if ($xss_filter) { | ||
736 | debug('Filtering HTML to remove XSS'); | ||
737 | $html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1)); | ||
738 | } | ||
739 | $newitem->setDescription($html); | ||
740 | |||
741 | // set date | ||
742 | if ((int)$item->get_date('U') > 0) { | ||
743 | $newitem->setDate((int)$item->get_date('U')); | ||
744 | } elseif ($extractor->getDate()) { | ||
745 | $newitem->setDate($extractor->getDate()); | ||
746 | } | ||
747 | |||
748 | // add authors | ||
749 | if ($authors = $item->get_authors()) { | ||
750 | foreach ($authors as $author) { | ||
751 | // for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel | ||
752 | if ($author->get_name() !== null) { | ||
753 | $newitem->addElement('dc:creator', $author->get_name()); | ||
754 | } elseif ($author->get_email() !== null) { | ||
755 | $newitem->addElement('dc:creator', $author->get_email()); | ||
756 | } | ||
757 | } | ||
758 | } elseif ($authors = $extractor->getAuthors()) { | ||
759 | //TODO: make sure the list size is reasonable | ||
760 | foreach ($authors as $author) { | ||
761 | // TODO: xpath often selects authors from other articles linked from the page. | ||
762 | // for now choose first item | ||
763 | $newitem->addElement('dc:creator', $author); | ||
764 | break; | ||
765 | } | ||
766 | } | ||
767 | |||
768 | // add language | ||
769 | if ($detect_language) { | ||
770 | $language = $extractor->getLanguage(); | ||
771 | if (!$language) $language = $feed->get_language(); | ||
772 | if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) { | ||
773 | try { | ||
774 | if ($use_cld) { | ||
775 | // Use PHP-CLD extension | ||
776 | $php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error | ||
777 | $res = $php_cld($text_sample); | ||
778 | if (is_array($res) && count($res) > 0) { | ||
779 | $language = $res[0]['code']; | ||
780 | } | ||
781 | } else { | ||
782 | //die('what'); | ||
783 | // Use PEAR's Text_LanguageDetect | ||
784 | if (!isset($l)) { | ||
785 | $l = new Text_LanguageDetect('libraries/language-detect/lang.dat', 'libraries/language-detect/unicode_blocks.dat'); | ||
786 | } | ||
787 | $l_result = $l->detect($text_sample, 1); | ||
788 | if (count($l_result) > 0) { | ||
789 | $language = $language_codes[key($l_result)]; | ||
790 | } | ||
791 | } | ||
792 | } catch (Exception $e) { | ||
793 | //die('error: '.$e); | ||
794 | // do nothing | ||
795 | } | ||
796 | } | ||
797 | if ($language && (strlen($language) < 7)) { | ||
798 | $newitem->addElement('dc:language', $language); | ||
799 | } | ||
800 | } | ||
801 | |||
802 | // add MIME type (if it appeared in our exclusions lists) | ||
803 | if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']); | ||
804 | // add effective URL (URL after redirects) | ||
805 | if (isset($effective_url)) { | ||
806 | //TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g. | ||
807 | //http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-–-25th-March-2012-Special-Program-from-Liari-(Karachi) | ||
808 | //temporary measure: use utf8_encode() | ||
809 | $newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url))); | ||
810 | } else { | ||
811 | $newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink())); | ||
812 | } | ||
813 | |||
814 | // add categories | ||
815 | if ($categories = $item->get_categories()) { | ||
816 | foreach ($categories as $category) { | ||
817 | if ($category->get_label() !== null) { | ||
818 | $newitem->addElement('category', $category->get_label()); | ||
819 | } | ||
820 | } | ||
821 | } | ||
822 | |||
823 | // check for enclosures | ||
824 | if ($options->keep_enclosures) { | ||
825 | if ($enclosures = $item->get_enclosures()) { | ||
826 | foreach ($enclosures as $enclosure) { | ||
827 | // thumbnails | ||
828 | foreach ((array)$enclosure->get_thumbnails() as $thumbnail) { | ||
829 | $newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail)); | ||
830 | } | ||
831 | if (!$enclosure->get_link()) continue; | ||
832 | $enc = array(); | ||
833 | // Media RSS spec ($enc): http://search.yahoo.com/mrss | ||
834 | // SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4 | ||
835 | $enc['url'] = $enclosure->get_link(); | ||
836 | if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length(); | ||
837 | if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type(); | ||
838 | if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium(); | ||
839 | if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression(); | ||
840 | if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate(); | ||
841 | if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate(); | ||
842 | if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate(); | ||
843 | if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels(); | ||
844 | if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration(); | ||
845 | if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height(); | ||
846 | if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width(); | ||
847 | if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language(); | ||
848 | $newitem->addElement('media:content', '', $enc); | ||
849 | } | ||
850 | } | ||
851 | } | ||
852 | /* } */ | ||
853 | $output->addItem($newitem); | ||
854 | unset($html); | ||
855 | $item_count++; | ||
856 | } | ||
857 | |||
858 | // output feed | ||
859 | debug('Done!'); | ||
860 | /* | ||
861 | if ($debug_mode) { | ||
862 | $_apc_data = apc_cache_info('user'); | ||
863 | var_dump($_apc_data); exit; | ||
864 | } | ||
865 | */ | ||
866 | if (!$debug_mode) { | ||
867 | if ($callback) echo "$callback("; // if $callback is set, $format also == 'json' | ||
868 | if ($format == 'json') $output->setFormat(($callback === null) ? JSON : JSONP); | ||
869 | $add_to_cache = $options->caching; | ||
870 | // is smart cache mode enabled? | ||
871 | if ($add_to_cache && $options->apc && $options->smart_cache) { | ||
872 | // yes, so only cache if this is the second request for this URL | ||
873 | $add_to_cache = ($apc_cache_hits >= 2); | ||
874 | // purge cache | ||
875 | if ($options->cache_cleanup > 0) { | ||
876 | if (rand(1, $options->cache_cleanup) == 1) { | ||
877 | // apc purge code adapted from from http://www.thimbleopensource.com/tutorials-snippets/php-apc-expunge-script | ||
878 | $_apc_data = apc_cache_info('user'); | ||
879 | foreach ($_apc_data['cache_list'] as $_apc_item) { | ||
880 | if ($_apc_item['ttl'] > 0 && ($_apc_item['ttl'] + $_apc_item['creation_time'] < time())) { | ||
881 | apc_delete($_apc_item['info']); | ||
882 | } | ||
883 | } | ||
884 | } | ||
885 | } | ||
886 | } | ||
887 | if ($add_to_cache) { | ||
888 | ob_start(); | ||
889 | $output->genarateFeed(); | ||
890 | $output = ob_get_contents(); | ||
891 | ob_end_clean(); | ||
892 | if ($html_only && $item_count == 0) { | ||
893 | // do not cache - in case of temporary server glitch at source URL | ||
894 | } else { | ||
895 | $cache = get_cache(); | ||
896 | if ($add_to_cache) $cache->save($output, $cache_id); | ||
897 | } | ||
898 | echo $output; | ||
899 | } else { | ||
900 | $output->genarateFeed(); | ||
901 | } | ||
902 | if ($callback) echo ');'; | ||
903 | } | ||
904 | |||
905 | /////////////////////////////// | ||
906 | // HELPER FUNCTIONS | ||
907 | /////////////////////////////// | ||
908 | |||
909 | function url_allowed($url) { | ||
910 | global $options; | ||
911 | if (!empty($options->allowed_urls)) { | ||
912 | $allowed = false; | ||
913 | foreach ($options->allowed_urls as $allowurl) { | ||
914 | if (stristr($url, $allowurl) !== false) { | ||
915 | $allowed = true; | ||
916 | break; | ||
917 | } | ||
918 | } | ||
919 | if (!$allowed) return false; | ||
920 | } else { | ||
921 | foreach ($options->blocked_urls as $blockurl) { | ||
922 | if (stristr($url, $blockurl) !== false) { | ||
923 | return false; | ||
924 | } | ||
925 | } | ||
926 | } | ||
927 | return true; | ||
928 | } | ||
929 | |||
930 | ////////////////////////////////////////////// | ||
931 | // Convert $html to UTF8 | ||
932 | // (uses HTTP headers and HTML to find encoding) | ||
933 | // adapted from http://stackoverflow.com/questions/910793/php-detect-encoding-and-make-everything-utf-8 | ||
934 | ////////////////////////////////////////////// | ||
935 | function convert_to_utf8($html, $header=null) | ||
936 | { | ||
937 | $encoding = null; | ||
938 | if ($html || $header) { | ||
939 | if (is_array($header)) $header = implode("\n", $header); | ||
940 | if (!$header || !preg_match_all('/^Content-Type:\s+([^;]+)(?:;\s*charset=["\']?([^;"\'\n]*))?/im', $header, $match, PREG_SET_ORDER)) { | ||
941 | // error parsing the response | ||
942 | debug('Could not find Content-Type header in HTTP response'); | ||
943 | } else { | ||
944 | $match = end($match); // get last matched element (in case of redirects) | ||
945 | if (isset($match[2])) $encoding = trim($match[2], "\"' \r\n\0\x0B\t"); | ||
946 | } | ||
947 | // TODO: check to see if encoding is supported (can we convert it?) | ||
948 | // If it's not, result will be empty string. | ||
949 | // For now we'll check for invalid encoding types returned by some sites, e.g. 'none' | ||
950 | // Problem URL: http://facta.co.jp/blog/archives/20111026001026.html | ||
951 | if (!$encoding || $encoding == 'none') { | ||
952 | // search for encoding in HTML - only look at the first 50000 characters | ||
953 | // Why 50000? See, for example, http://www.lemonde.fr/festival-de-cannes/article/2012/05/23/deux-cretes-en-goguette-sur-la-croisette_1705732_766360.html | ||
954 | // TODO: improve this so it looks at smaller chunks first | ||
955 | $html_head = substr($html, 0, 50000); | ||
956 | if (preg_match('/^<\?xml\s+version=(?:"[^"]*"|\'[^\']*\')\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) { | ||
957 | $encoding = trim($match[1], '"\''); | ||
958 | } elseif (preg_match('/<meta\s+http-equiv=["\']?Content-Type["\']? content=["\'][^;]+;\s*charset=["\']?([^;"\'>]+)/i', $html_head, $match)) { | ||
959 | $encoding = trim($match[1]); | ||
960 | } elseif (preg_match_all('/<meta\s+([^>]+)>/i', $html_head, $match)) { | ||
961 | foreach ($match[1] as $_test) { | ||
962 | if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) { | ||
963 | $encoding = trim($_m[1]); | ||
964 | break; | ||
965 | } | ||
966 | } | ||
967 | } | ||
968 | } | ||
969 | if (isset($encoding)) $encoding = trim($encoding); | ||
970 | // trim is important here! | ||
971 | if (!$encoding || (strtolower($encoding) == 'iso-8859-1')) { | ||
972 | // replace MS Word smart qutoes | ||
973 | $trans = array(); | ||
974 | $trans[chr(130)] = '‚'; // Single Low-9 Quotation Mark | ||
975 | $trans[chr(131)] = 'ƒ'; // Latin Small Letter F With Hook | ||
976 | $trans[chr(132)] = '„'; // Double Low-9 Quotation Mark | ||
977 | $trans[chr(133)] = '…'; // Horizontal Ellipsis | ||
978 | $trans[chr(134)] = '†'; // Dagger | ||
979 | $trans[chr(135)] = '‡'; // Double Dagger | ||
980 | $trans[chr(136)] = 'ˆ'; // Modifier Letter Circumflex Accent | ||
981 | $trans[chr(137)] = '‰'; // Per Mille Sign | ||
982 | $trans[chr(138)] = 'Š'; // Latin Capital Letter S With Caron | ||
983 | $trans[chr(139)] = '‹'; // Single Left-Pointing Angle Quotation Mark | ||
984 | $trans[chr(140)] = 'Œ'; // Latin Capital Ligature OE | ||
985 | $trans[chr(145)] = '‘'; // Left Single Quotation Mark | ||
986 | $trans[chr(146)] = '’'; // Right Single Quotation Mark | ||
987 | $trans[chr(147)] = '“'; // Left Double Quotation Mark | ||
988 | $trans[chr(148)] = '”'; // Right Double Quotation Mark | ||
989 | $trans[chr(149)] = '•'; // Bullet | ||
990 | $trans[chr(150)] = '–'; // En Dash | ||
991 | $trans[chr(151)] = '—'; // Em Dash | ||
992 | $trans[chr(152)] = '˜'; // Small Tilde | ||
993 | $trans[chr(153)] = '™'; // Trade Mark Sign | ||
994 | $trans[chr(154)] = 'š'; // Latin Small Letter S With Caron | ||
995 | $trans[chr(155)] = '›'; // Single Right-Pointing Angle Quotation Mark | ||
996 | $trans[chr(156)] = 'œ'; // Latin Small Ligature OE | ||
997 | $trans[chr(159)] = 'Ÿ'; // Latin Capital Letter Y With Diaeresis | ||
998 | $html = strtr($html, $trans); | ||
999 | } | ||
1000 | if (!$encoding) { | ||
1001 | debug('No character encoding found, so treating as UTF-8'); | ||
1002 | $encoding = 'utf-8'; | ||
1003 | } else { | ||
1004 | debug('Character encoding: '.$encoding); | ||
1005 | if (strtolower($encoding) != 'utf-8') { | ||
1006 | debug('Converting to UTF-8'); | ||
1007 | $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8'); | ||
1008 | /* | ||
1009 | if (function_exists('iconv')) { | ||
1010 | // iconv appears to handle certain character encodings better than mb_convert_encoding | ||
1011 | $html = iconv($encoding, 'utf-8', $html); | ||
1012 | } else { | ||
1013 | $html = mb_convert_encoding($html, 'utf-8', $encoding); | ||
1014 | } | ||
1015 | */ | ||
1016 | } | ||
1017 | } | ||
1018 | } | ||
1019 | return $html; | ||
1020 | } | ||
1021 | |||
1022 | function makeAbsolute($base, $elem) { | ||
1023 | $base = new SimplePie_IRI($base); | ||
1024 | // remove '//' in URL path (used to prevent URLs from resolving properly) | ||
1025 | // TODO: check if this is still the case | ||
1026 | if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path); | ||
1027 | foreach(array('a'=>'href', 'img'=>'src') as $tag => $attr) { | ||
1028 | $elems = $elem->getElementsByTagName($tag); | ||
1029 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
1030 | $e = $elems->item($i); | ||
1031 | //$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e); | ||
1032 | makeAbsoluteAttr($base, $e, $attr); | ||
1033 | } | ||
1034 | if (strtolower($elem->tagName) == $tag) makeAbsoluteAttr($base, $elem, $attr); | ||
1035 | } | ||
1036 | } | ||
1037 | function makeAbsoluteAttr($base, $e, $attr) { | ||
1038 | if ($e->hasAttribute($attr)) { | ||
1039 | // Trim leading and trailing white space. I don't really like this but | ||
1040 | // unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" /> | ||
1041 | $url = trim(str_replace('%20', ' ', $e->getAttribute($attr))); | ||
1042 | $url = str_replace(' ', '%20', $url); | ||
1043 | if (!preg_match('!https?://!i', $url)) { | ||
1044 | if ($absolute = SimplePie_IRI::absolutize($base, $url)) { | ||
1045 | $e->setAttribute($attr, $absolute); | ||
1046 | } | ||
1047 | } | ||
1048 | } | ||
1049 | } | ||
1050 | function makeAbsoluteStr($base, $url) { | ||
1051 | $base = new SimplePie_IRI($base); | ||
1052 | // remove '//' in URL path (causes URLs not to resolve properly) | ||
1053 | if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path); | ||
1054 | if (preg_match('!^https?://!i', $url)) { | ||
1055 | // already absolute | ||
1056 | return $url; | ||
1057 | } else { | ||
1058 | if ($absolute = SimplePie_IRI::absolutize($base, $url)) { | ||
1059 | return $absolute; | ||
1060 | } | ||
1061 | return false; | ||
1062 | } | ||
1063 | } | ||
1064 | // returns single page response, or false if not found | ||
1065 | function getSinglePage($item, $html, $url) { | ||
1066 | global $http, $extractor; | ||
1067 | debug('Looking for site config files to see if single page link exists'); | ||
1068 | $site_config = $extractor->buildSiteConfig($url, $html); | ||
1069 | $splink = null; | ||
1070 | if (!empty($site_config->single_page_link)) { | ||
1071 | $splink = $site_config->single_page_link; | ||
1072 | } elseif (!empty($site_config->single_page_link_in_feed)) { | ||
1073 | // single page link xpath is targeted at feed | ||
1074 | $splink = $site_config->single_page_link_in_feed; | ||
1075 | // so let's replace HTML with feed item description | ||
1076 | $html = $item->get_description(); | ||
1077 | } | ||
1078 | if (isset($splink)) { | ||
1079 | // Build DOM tree from HTML | ||
1080 | $readability = new Readability($html, $url); | ||
1081 | $xpath = new DOMXPath($readability->dom); | ||
1082 | // Loop through single_page_link xpath expressions | ||
1083 | $single_page_url = null; | ||
1084 | foreach ($splink as $pattern) { | ||
1085 | $elems = @$xpath->evaluate($pattern, $readability->dom); | ||
1086 | if (is_string($elems)) { | ||
1087 | $single_page_url = trim($elems); | ||
1088 | break; | ||
1089 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { | ||
1090 | foreach ($elems as $item) { | ||
1091 | if ($item instanceof DOMElement && $item->hasAttribute('href')) { | ||
1092 | $single_page_url = $item->getAttribute('href'); | ||
1093 | break 2; | ||
1094 | } elseif ($item instanceof DOMAttr && $item->value) { | ||
1095 | $single_page_url = $item->value; | ||
1096 | break 2; | ||
1097 | } | ||
1098 | } | ||
1099 | } | ||
1100 | } | ||
1101 | // If we've got URL, resolve against $url | ||
1102 | if (isset($single_page_url) && ($single_page_url = makeAbsoluteStr($url, $single_page_url))) { | ||
1103 | // check it's not what we have already! | ||
1104 | if ($single_page_url != $url) { | ||
1105 | // it's not, so let's try to fetch it... | ||
1106 | $_prev_ref = $http->referer; | ||
1107 | $http->referer = $single_page_url; | ||
1108 | if (($response = $http->get($single_page_url, true)) && $response['status_code'] < 300) { | ||
1109 | $http->referer = $_prev_ref; | ||
1110 | return $response; | ||
1111 | } | ||
1112 | $http->referer = $_prev_ref; | ||
1113 | } | ||
1114 | } | ||
1115 | } | ||
1116 | return false; | ||
1117 | } | ||
1118 | |||
1119 | // based on content-type http header, decide what to do | ||
1120 | // param: HTTP headers string | ||
1121 | // return: array with keys: 'mime', 'type', 'subtype', 'action', 'name' | ||
1122 | // e.g. array('mime'=>'image/jpeg', 'type'=>'image', 'subtype'=>'jpeg', 'action'=>'link', 'name'=>'Image') | ||
1123 | function get_mime_action_info($headers) { | ||
1124 | global $options; | ||
1125 | // check if action defined for returned Content-Type | ||
1126 | $info = array(); | ||
1127 | if (preg_match('!^Content-Type:\s*(([-\w]+)/([-\w\+]+))!im', $headers, $match)) { | ||
1128 | // look for full mime type (e.g. image/jpeg) or just type (e.g. image) | ||
1129 | // match[1] = full mime type, e.g. image/jpeg | ||
1130 | // match[2] = first part, e.g. image | ||
1131 | // match[3] = last part, e.g. jpeg | ||
1132 | $info['mime'] = strtolower(trim($match[1])); | ||
1133 | $info['type'] = strtolower(trim($match[2])); | ||
1134 | $info['subtype'] = strtolower(trim($match[3])); | ||
1135 | foreach (array($info['mime'], $info['type']) as $_mime) { | ||
1136 | if (isset($options->content_type_exc[$_mime])) { | ||
1137 | $info['action'] = $options->content_type_exc[$_mime]['action']; | ||
1138 | $info['name'] = $options->content_type_exc[$_mime]['name']; | ||
1139 | break; | ||
1140 | } | ||
1141 | } | ||
1142 | } | ||
1143 | return $info; | ||
1144 | } | ||
1145 | |||
1146 | function remove_url_cruft($url) { | ||
1147 | // remove google analytics for the time being | ||
1148 | // regex adapted from http://navitronic.co.uk/2010/12/removing-google-analytics-cruft-from-urls/ | ||
1149 | // https://gist.github.com/758177 | ||
1150 | return preg_replace('/(\?|\&)utm_[a-z]+=[^\&]+/', '', $url); | ||
1151 | } | ||
1152 | |||
1153 | function make_substitutions($string) { | ||
1154 | if ($string == '') return $string; | ||
1155 | global $item, $effective_url; | ||
1156 | $string = str_replace('{url}', htmlspecialchars($item->get_permalink()), $string); | ||
1157 | $string = str_replace('{effective-url}', htmlspecialchars($effective_url), $string); | ||
1158 | return $string; | ||
1159 | } | ||
1160 | |||
1161 | function get_cache() { | ||
1162 | global $options, $valid_key; | ||
1163 | static $cache = null; | ||
1164 | if ($cache === null) { | ||
1165 | $frontendOptions = array( | ||
1166 | 'lifetime' => 10*60, // cache lifetime of 10 minutes | ||
1167 | 'automatic_serialization' => false, | ||
1168 | 'write_control' => false, | ||
1169 | 'automatic_cleaning_factor' => $options->cache_cleanup, | ||
1170 | 'ignore_user_abort' => false | ||
1171 | ); | ||
1172 | $backendOptions = array( | ||
1173 | 'cache_dir' => ($valid_key) ? $options->cache_dir.'/rss-with-key/' : $options->cache_dir.'/rss/', // directory where to put the cache files | ||
1174 | 'file_locking' => false, | ||
1175 | 'read_control' => true, | ||
1176 | 'read_control_type' => 'strlen', | ||
1177 | 'hashed_directory_level' => $options->cache_directory_level, | ||
1178 | 'hashed_directory_perm' => 0777, | ||
1179 | 'cache_file_perm' => 0664, | ||
1180 | 'file_name_prefix' => 'ff' | ||
1181 | ); | ||
1182 | // getting a Zend_Cache_Core object | ||
1183 | $cache = Zend_Cache::factory('Core', 'File', $frontendOptions, $backendOptions); | ||
1184 | } | ||
1185 | return $cache; | ||
1186 | } | ||
1187 | |||
1188 | function debug($msg) { | ||
1189 | global $debug_mode; | ||
1190 | if ($debug_mode) { | ||
1191 | echo '* ',$msg,"\n"; | ||
1192 | ob_flush(); | ||
1193 | flush(); | ||
1194 | } | ||
1195 | } \ No newline at end of file | ||
diff --git a/inc/3rdparty/simplepie/SimplePie/Decode/HTML/Entities.php b/inc/3rdparty/simplepie/SimplePie/Decode/HTML/Entities.php deleted file mode 100644 index bf598579..00000000 --- a/inc/3rdparty/simplepie/SimplePie/Decode/HTML/Entities.php +++ /dev/null | |||
@@ -1,250 +0,0 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3-dev | ||
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | * @todo phpDoc comments | ||
44 | */ | ||
45 | |||
46 | |||
47 | /** | ||
48 | * Decode HTML Entities | ||
49 | * | ||
50 | * This implements HTML5 as of revision 967 (2007-06-28) | ||
51 | * | ||
52 | * @package SimplePie | ||
53 | */ | ||
54 | class SimplePie_Decode_HTML_Entities | ||
55 | { | ||
56 | /** | ||
57 | * Data to be parsed | ||
58 | * | ||
59 | * @access private | ||
60 | * @var string | ||
61 | */ | ||
62 | var $data = ''; | ||
63 | |||
64 | /** | ||
65 | * Currently consumed bytes | ||
66 | * | ||
67 | * @access private | ||
68 | * @var string | ||
69 | */ | ||
70 | var $consumed = ''; | ||
71 | |||
72 | /** | ||
73 | * Position of the current byte being parsed | ||
74 | * | ||
75 | * @access private | ||
76 | * @var int | ||
77 | */ | ||
78 | var $position = 0; | ||
79 | |||
80 | /** | ||
81 | * Create an instance of the class with the input data | ||
82 | * | ||
83 | * @access public | ||
84 | * @param string $data Input data | ||
85 | */ | ||
86 | public function __construct($data) | ||
87 | { | ||
88 | $this->data = $data; | ||
89 | } | ||
90 | |||
91 | /** | ||
92 | * Parse the input data | ||
93 | * | ||
94 | * @access public | ||
95 | * @return string Output data | ||
96 | */ | ||
97 | public function parse() | ||
98 | { | ||
99 | while (($this->position = strpos($this->data, '&', $this->position)) !== false) | ||
100 | { | ||
101 | $this->consume(); | ||
102 | $this->entity(); | ||
103 | $this->consumed = ''; | ||
104 | } | ||
105 | return $this->data; | ||
106 | } | ||
107 | |||
108 | /** | ||
109 | * Consume the next byte | ||
110 | * | ||
111 | * @access private | ||
112 | * @return mixed The next byte, or false, if there is no more data | ||
113 | */ | ||
114 | public function consume() | ||
115 | { | ||
116 | if (isset($this->data[$this->position])) | ||
117 | { | ||
118 | $this->consumed .= $this->data[$this->position]; | ||
119 | return $this->data[$this->position++]; | ||
120 | } | ||
121 | else | ||
122 | { | ||
123 | return false; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * Consume a range of characters | ||
129 | * | ||
130 | * @access private | ||
131 | * @param string $chars Characters to consume | ||
132 | * @return mixed A series of characters that match the range, or false | ||
133 | */ | ||
134 | public function consume_range($chars) | ||
135 | { | ||
136 | if ($len = strspn($this->data, $chars, $this->position)) | ||
137 | { | ||
138 | $data = substr($this->data, $this->position, $len); | ||
139 | $this->consumed .= $data; | ||
140 | $this->position += $len; | ||
141 | return $data; | ||
142 | } | ||
143 | else | ||
144 | { | ||
145 | return false; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * Unconsume one byte | ||
151 | * | ||
152 | * @access private | ||
153 | */ | ||
154 | public function unconsume() | ||
155 | { | ||
156 | $this->consumed = substr($this->consumed, 0, -1); | ||
157 | $this->position--; | ||
158 | } | ||
159 | |||
160 | /** | ||
161 | * Decode an entity | ||
162 | * | ||
163 | * @access private | ||
164 | */ | ||
165 | public function entity() | ||
166 | { | ||
167 | switch ($this->consume()) | ||
168 | { | ||
169 | case "\x09": | ||
170 | case "\x0A": | ||
171 | case "\x0B": | ||
172 | case "\x0B": | ||
173 | case "\x0C": | ||
174 | case "\x20": | ||
175 | case "\x3C": | ||
176 | case "\x26": | ||
177 | case false: | ||
178 | break; | ||
179 | |||
180 | case "\x23": | ||
181 | switch ($this->consume()) | ||
182 | { | ||
183 | case "\x78": | ||
184 | case "\x58": | ||
185 | $range = '0123456789ABCDEFabcdef'; | ||
186 | $hex = true; | ||
187 | break; | ||
188 | |||
189 | default: | ||
190 | $range = '0123456789'; | ||
191 | $hex = false; | ||
192 | $this->unconsume(); | ||
193 | break; | ||
194 | } | ||
195 | |||
196 | if ($codepoint = $this->consume_range($range)) | ||
197 | { | ||
198 | static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8"); | ||
199 | |||
200 | if ($hex) | ||
201 | { | ||
202 | $codepoint = hexdec($codepoint); | ||
203 | } | ||
204 | else | ||
205 | { | ||
206 | $codepoint = intval($codepoint); | ||
207 | } | ||
208 | |||
209 | if (isset($windows_1252_specials[$codepoint])) | ||
210 | { | ||
211 | $replacement = $windows_1252_specials[$codepoint]; | ||
212 | } | ||
213 | else | ||
214 | { | ||
215 | $replacement = SimplePie_Misc::codepoint_to_utf8($codepoint); | ||
216 | } | ||
217 | |||
218 | if (!in_array($this->consume(), array(';', false), true)) | ||
219 | { | ||
220 | $this->unconsume(); | ||
221 | } | ||
222 | |||
223 | $consumed_length = strlen($this->consumed); | ||
224 | $this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length); | ||
225 | $this->position += strlen($replacement) - $consumed_length; | ||
226 | } | ||
227 | break; | ||
228 | |||
229 | default: | ||
230 | static $entities = array('Aacute' => "\xC3\x81", 'aacute' => "\xC3\xA1", 'Aacute;' => "\xC3\x81", 'aacute;' => "\xC3\xA1", 'Acirc' => "\xC3\x82", 'acirc' => "\xC3\xA2", 'Acirc;' => "\xC3\x82", 'acirc;' => "\xC3\xA2", 'acute' => "\xC2\xB4", 'acute;' => "\xC2\xB4", 'AElig' => "\xC3\x86", 'aelig' => "\xC3\xA6", 'AElig;' => "\xC3\x86", 'aelig;' => "\xC3\xA6", 'Agrave' => "\xC3\x80", 'agrave' => "\xC3\xA0", 'Agrave;' => "\xC3\x80", 'agrave;' => "\xC3\xA0", 'alefsym;' => "\xE2\x84\xB5", 'Alpha;' => "\xCE\x91", 'alpha;' => "\xCE\xB1", 'AMP' => "\x26", 'amp' => "\x26", 'AMP;' => "\x26", 'amp;' => "\x26", 'and;' => "\xE2\x88\xA7", 'ang;' => "\xE2\x88\xA0", 'apos;' => "\x27", 'Aring' => "\xC3\x85", 'aring' => "\xC3\xA5", 'Aring;' => "\xC3\x85", 'aring;' => "\xC3\xA5", 'asymp;' => "\xE2\x89\x88", 'Atilde' => "\xC3\x83", 'atilde' => "\xC3\xA3", 'Atilde;' => "\xC3\x83", 'atilde;' => "\xC3\xA3", 'Auml' => "\xC3\x84", 'auml' => "\xC3\xA4", 'Auml;' => "\xC3\x84", 'auml;' => "\xC3\xA4", 'bdquo;' => "\xE2\x80\x9E", 'Beta;' => "\xCE\x92", 'beta;' => "\xCE\xB2", 'brvbar' => "\xC2\xA6", 'brvbar;' => "\xC2\xA6", 'bull;' => "\xE2\x80\xA2", 'cap;' => "\xE2\x88\xA9", 'Ccedil' => "\xC3\x87", 'ccedil' => "\xC3\xA7", 'Ccedil;' => "\xC3\x87", 'ccedil;' => "\xC3\xA7", 'cedil' => "\xC2\xB8", 'cedil;' => "\xC2\xB8", 'cent' => "\xC2\xA2", 'cent;' => "\xC2\xA2", 'Chi;' => "\xCE\xA7", 'chi;' => "\xCF\x87", 'circ;' => "\xCB\x86", 'clubs;' => "\xE2\x99\xA3", 'cong;' => "\xE2\x89\x85", 'COPY' => "\xC2\xA9", 'copy' => "\xC2\xA9", 'COPY;' => "\xC2\xA9", 'copy;' => "\xC2\xA9", 'crarr;' => "\xE2\x86\xB5", 'cup;' => "\xE2\x88\xAA", 'curren' => "\xC2\xA4", 'curren;' => "\xC2\xA4", 'Dagger;' => "\xE2\x80\xA1", 'dagger;' => "\xE2\x80\xA0", 'dArr;' => "\xE2\x87\x93", 'darr;' => "\xE2\x86\x93", 'deg' => "\xC2\xB0", 'deg;' => "\xC2\xB0", 'Delta;' => "\xCE\x94", 'delta;' => "\xCE\xB4", 'diams;' => "\xE2\x99\xA6", 'divide' => "\xC3\xB7", 'divide;' => "\xC3\xB7", 'Eacute' => "\xC3\x89", 'eacute' => "\xC3\xA9", 'Eacute;' => "\xC3\x89", 'eacute;' => "\xC3\xA9", 'Ecirc' => "\xC3\x8A", 'ecirc' => "\xC3\xAA", 'Ecirc;' => "\xC3\x8A", 'ecirc;' => "\xC3\xAA", 'Egrave' => "\xC3\x88", 'egrave' => "\xC3\xA8", 'Egrave;' => "\xC3\x88", 'egrave;' => "\xC3\xA8", 'empty;' => "\xE2\x88\x85", 'emsp;' => "\xE2\x80\x83", 'ensp;' => "\xE2\x80\x82", 'Epsilon;' => "\xCE\x95", 'epsilon;' => "\xCE\xB5", 'equiv;' => "\xE2\x89\xA1", 'Eta;' => "\xCE\x97", 'eta;' => "\xCE\xB7", 'ETH' => "\xC3\x90", 'eth' => "\xC3\xB0", 'ETH;' => "\xC3\x90", 'eth;' => "\xC3\xB0", 'Euml' => "\xC3\x8B", 'euml' => "\xC3\xAB", 'Euml;' => "\xC3\x8B", 'euml;' => "\xC3\xAB", 'euro;' => "\xE2\x82\xAC", 'exist;' => "\xE2\x88\x83", 'fnof;' => "\xC6\x92", 'forall;' => "\xE2\x88\x80", 'frac12' => "\xC2\xBD", 'frac12;' => "\xC2\xBD", 'frac14' => "\xC2\xBC", 'frac14;' => "\xC2\xBC", 'frac34' => "\xC2\xBE", 'frac34;' => "\xC2\xBE", 'frasl;' => "\xE2\x81\x84", 'Gamma;' => "\xCE\x93", 'gamma;' => "\xCE\xB3", 'ge;' => "\xE2\x89\xA5", 'GT' => "\x3E", 'gt' => "\x3E", 'GT;' => "\x3E", 'gt;' => "\x3E", 'hArr;' => "\xE2\x87\x94", 'harr;' => "\xE2\x86\x94", 'hearts;' => "\xE2\x99\xA5", 'hellip;' => "\xE2\x80\xA6", 'Iacute' => "\xC3\x8D", 'iacute' => "\xC3\xAD", 'Iacute;' => "\xC3\x8D", 'iacute;' => "\xC3\xAD", 'Icirc' => "\xC3\x8E", 'icirc' => "\xC3\xAE", 'Icirc;' => "\xC3\x8E", 'icirc;' => "\xC3\xAE", 'iexcl' => "\xC2\xA1", 'iexcl;' => "\xC2\xA1", 'Igrave' => "\xC3\x8C", 'igrave' => "\xC3\xAC", 'Igrave;' => "\xC3\x8C", 'igrave;' => "\xC3\xAC", 'image;' => "\xE2\x84\x91", 'infin;' => "\xE2\x88\x9E", 'int;' => "\xE2\x88\xAB", 'Iota;' => "\xCE\x99", 'iota;' => "\xCE\xB9", 'iquest' => "\xC2\xBF", 'iquest;' => "\xC2\xBF", 'isin;' => "\xE2\x88\x88", 'Iuml' => "\xC3\x8F", 'iuml' => "\xC3\xAF", 'Iuml;' => "\xC3\x8F", 'iuml;' => "\xC3\xAF", 'Kappa;' => "\xCE\x9A", 'kappa;' => "\xCE\xBA", 'Lambda;' => "\xCE\x9B", 'lambda;' => "\xCE\xBB", 'lang;' => "\xE3\x80\x88", 'laquo' => "\xC2\xAB", 'laquo;' => "\xC2\xAB", 'lArr;' => "\xE2\x87\x90", 'larr;' => "\xE2\x86\x90", 'lceil;' => "\xE2\x8C\x88", 'ldquo;' => "\xE2\x80\x9C", 'le;' => "\xE2\x89\xA4", 'lfloor;' => "\xE2\x8C\x8A", 'lowast;' => "\xE2\x88\x97", 'loz;' => "\xE2\x97\x8A", 'lrm;' => "\xE2\x80\x8E", 'lsaquo;' => "\xE2\x80\xB9", 'lsquo;' => "\xE2\x80\x98", 'LT' => "\x3C", 'lt' => "\x3C", 'LT;' => "\x3C", 'lt;' => "\x3C", 'macr' => "\xC2\xAF", 'macr;' => "\xC2\xAF", 'mdash;' => "\xE2\x80\x94", 'micro' => "\xC2\xB5", 'micro;' => "\xC2\xB5", 'middot' => "\xC2\xB7", 'middot;' => "\xC2\xB7", 'minus;' => "\xE2\x88\x92", 'Mu;' => "\xCE\x9C", 'mu;' => "\xCE\xBC", 'nabla;' => "\xE2\x88\x87", 'nbsp' => "\xC2\xA0", 'nbsp;' => "\xC2\xA0", 'ndash;' => "\xE2\x80\x93", 'ne;' => "\xE2\x89\xA0", 'ni;' => "\xE2\x88\x8B", 'not' => "\xC2\xAC", 'not;' => "\xC2\xAC", 'notin;' => "\xE2\x88\x89", 'nsub;' => "\xE2\x8A\x84", 'Ntilde' => "\xC3\x91", 'ntilde' => "\xC3\xB1", 'Ntilde;' => "\xC3\x91", 'ntilde;' => "\xC3\xB1", 'Nu;' => "\xCE\x9D", 'nu;' => "\xCE\xBD", 'Oacute' => "\xC3\x93", 'oacute' => "\xC3\xB3", 'Oacute;' => "\xC3\x93", 'oacute;' => "\xC3\xB3", 'Ocirc' => "\xC3\x94", 'ocirc' => "\xC3\xB4", 'Ocirc;' => "\xC3\x94", 'ocirc;' => "\xC3\xB4", 'OElig;' => "\xC5\x92", 'oelig;' => "\xC5\x93", 'Ograve' => "\xC3\x92", 'ograve' => "\xC3\xB2", 'Ograve;' => "\xC3\x92", 'ograve;' => "\xC3\xB2", 'oline;' => "\xE2\x80\xBE", 'Omega;' => "\xCE\xA9", 'omega;' => "\xCF\x89", 'Omicron;' => "\xCE\x9F", 'omicron;' => "\xCE\xBF", 'oplus;' => "\xE2\x8A\x95", 'or;' => "\xE2\x88\xA8", 'ordf' => "\xC2\xAA", 'ordf;' => "\xC2\xAA", 'ordm' => "\xC2\xBA", 'ordm;' => "\xC2\xBA", 'Oslash' => "\xC3\x98", 'oslash' => "\xC3\xB8", 'Oslash;' => "\xC3\x98", 'oslash;' => "\xC3\xB8", 'Otilde' => "\xC3\x95", 'otilde' => "\xC3\xB5", 'Otilde;' => "\xC3\x95", 'otilde;' => "\xC3\xB5", 'otimes;' => "\xE2\x8A\x97", 'Ouml' => "\xC3\x96", 'ouml' => "\xC3\xB6", 'Ouml;' => "\xC3\x96", 'ouml;' => "\xC3\xB6", 'para' => "\xC2\xB6", 'para;' => "\xC2\xB6", 'part;' => "\xE2\x88\x82", 'permil;' => "\xE2\x80\xB0", 'perp;' => "\xE2\x8A\xA5", 'Phi;' => "\xCE\xA6", 'phi;' => "\xCF\x86", 'Pi;' => "\xCE\xA0", 'pi;' => "\xCF\x80", 'piv;' => "\xCF\x96", 'plusmn' => "\xC2\xB1", 'plusmn;' => "\xC2\xB1", 'pound' => "\xC2\xA3", 'pound;' => "\xC2\xA3", 'Prime;' => "\xE2\x80\xB3", 'prime;' => "\xE2\x80\xB2", 'prod;' => "\xE2\x88\x8F", 'prop;' => "\xE2\x88\x9D", 'Psi;' => "\xCE\xA8", 'psi;' => "\xCF\x88", 'QUOT' => "\x22", 'quot' => "\x22", 'QUOT;' => "\x22", 'quot;' => "\x22", 'radic;' => "\xE2\x88\x9A", 'rang;' => "\xE3\x80\x89", 'raquo' => "\xC2\xBB", 'raquo;' => "\xC2\xBB", 'rArr;' => "\xE2\x87\x92", 'rarr;' => "\xE2\x86\x92", 'rceil;' => "\xE2\x8C\x89", 'rdquo;' => "\xE2\x80\x9D", 'real;' => "\xE2\x84\x9C", 'REG' => "\xC2\xAE", 'reg' => "\xC2\xAE", 'REG;' => "\xC2\xAE", 'reg;' => "\xC2\xAE", 'rfloor;' => "\xE2\x8C\x8B", 'Rho;' => "\xCE\xA1", 'rho;' => "\xCF\x81", 'rlm;' => "\xE2\x80\x8F", 'rsaquo;' => "\xE2\x80\xBA", 'rsquo;' => "\xE2\x80\x99", 'sbquo;' => "\xE2\x80\x9A", 'Scaron;' => "\xC5\xA0", 'scaron;' => "\xC5\xA1", 'sdot;' => "\xE2\x8B\x85", 'sect' => "\xC2\xA7", 'sect;' => "\xC2\xA7", 'shy' => "\xC2\xAD", 'shy;' => "\xC2\xAD", 'Sigma;' => "\xCE\xA3", 'sigma;' => "\xCF\x83", 'sigmaf;' => "\xCF\x82", 'sim;' => "\xE2\x88\xBC", 'spades;' => "\xE2\x99\xA0", 'sub;' => "\xE2\x8A\x82", 'sube;' => "\xE2\x8A\x86", 'sum;' => "\xE2\x88\x91", 'sup;' => "\xE2\x8A\x83", 'sup1' => "\xC2\xB9", 'sup1;' => "\xC2\xB9", 'sup2' => "\xC2\xB2", 'sup2;' => "\xC2\xB2", 'sup3' => "\xC2\xB3", 'sup3;' => "\xC2\xB3", 'supe;' => "\xE2\x8A\x87", 'szlig' => "\xC3\x9F", 'szlig;' => "\xC3\x9F", 'Tau;' => "\xCE\xA4", 'tau;' => "\xCF\x84", 'there4;' => "\xE2\x88\xB4", 'Theta;' => "\xCE\x98", 'theta;' => "\xCE\xB8", 'thetasym;' => "\xCF\x91", 'thinsp;' => "\xE2\x80\x89", 'THORN' => "\xC3\x9E", 'thorn' => "\xC3\xBE", 'THORN;' => "\xC3\x9E", 'thorn;' => "\xC3\xBE", 'tilde;' => "\xCB\x9C", 'times' => "\xC3\x97", 'times;' => "\xC3\x97", 'TRADE;' => "\xE2\x84\xA2", 'trade;' => "\xE2\x84\xA2", 'Uacute' => "\xC3\x9A", 'uacute' => "\xC3\xBA", 'Uacute;' => "\xC3\x9A", 'uacute;' => "\xC3\xBA", 'uArr;' => "\xE2\x87\x91", 'uarr;' => "\xE2\x86\x91", 'Ucirc' => "\xC3\x9B", 'ucirc' => "\xC3\xBB", 'Ucirc;' => "\xC3\x9B", 'ucirc;' => "\xC3\xBB", 'Ugrave' => "\xC3\x99", 'ugrave' => "\xC3\xB9", 'Ugrave;' => "\xC3\x99", 'ugrave;' => "\xC3\xB9", 'uml' => "\xC2\xA8", 'uml;' => "\xC2\xA8", 'upsih;' => "\xCF\x92", 'Upsilon;' => "\xCE\xA5", 'upsilon;' => "\xCF\x85", 'Uuml' => "\xC3\x9C", 'uuml' => "\xC3\xBC", 'Uuml;' => "\xC3\x9C", 'uuml;' => "\xC3\xBC", 'weierp;' => "\xE2\x84\x98", 'Xi;' => "\xCE\x9E", 'xi;' => "\xCE\xBE", 'Yacute' => "\xC3\x9D", 'yacute' => "\xC3\xBD", 'Yacute;' => "\xC3\x9D", 'yacute;' => "\xC3\xBD", 'yen' => "\xC2\xA5", 'yen;' => "\xC2\xA5", 'yuml' => "\xC3\xBF", 'Yuml;' => "\xC5\xB8", 'yuml;' => "\xC3\xBF", 'Zeta;' => "\xCE\x96", 'zeta;' => "\xCE\xB6", 'zwj;' => "\xE2\x80\x8D", 'zwnj;' => "\xE2\x80\x8C"); | ||
231 | |||
232 | for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++) | ||
233 | { | ||
234 | $consumed = substr($this->consumed, 1); | ||
235 | if (isset($entities[$consumed])) | ||
236 | { | ||
237 | $match = $consumed; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | if ($match !== null) | ||
242 | { | ||
243 | $this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1); | ||
244 | $this->position += strlen($entities[$match]) - strlen($consumed) - 1; | ||
245 | } | ||
246 | break; | ||
247 | } | ||
248 | } | ||
249 | } | ||
250 | |||
diff --git a/inc/3rdparty/simplepie/SimplePie/IRI.php b/inc/3rdparty/simplepie/SimplePie/IRI.php deleted file mode 100644 index 0fead324..00000000 --- a/inc/3rdparty/simplepie/SimplePie/IRI.php +++ /dev/null | |||
@@ -1,997 +0,0 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3-dev | ||
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | * @todo phpDoc comments | ||
44 | */ | ||
45 | |||
46 | /** | ||
47 | * IRI parser/serialiser | ||
48 | * | ||
49 | * @package SimplePie | ||
50 | */ | ||
51 | class SimplePie_IRI | ||
52 | { | ||
53 | /** | ||
54 | * Scheme | ||
55 | * | ||
56 | * @access private | ||
57 | * @var string | ||
58 | */ | ||
59 | var $scheme; | ||
60 | |||
61 | /** | ||
62 | * User Information | ||
63 | * | ||
64 | * @access private | ||
65 | * @var string | ||
66 | */ | ||
67 | var $userinfo; | ||
68 | |||
69 | /** | ||
70 | * Host | ||
71 | * | ||
72 | * @access private | ||
73 | * @var string | ||
74 | */ | ||
75 | var $host; | ||
76 | |||
77 | /** | ||
78 | * Port | ||
79 | * | ||
80 | * @access private | ||
81 | * @var string | ||
82 | */ | ||
83 | var $port; | ||
84 | |||
85 | /** | ||
86 | * Path | ||
87 | * | ||
88 | * @access private | ||
89 | * @var string | ||
90 | */ | ||
91 | var $path; | ||
92 | |||
93 | /** | ||
94 | * Query | ||
95 | * | ||
96 | * @access private | ||
97 | * @var string | ||
98 | */ | ||
99 | var $query; | ||
100 | |||
101 | /** | ||
102 | * Fragment | ||
103 | * | ||
104 | * @access private | ||
105 | * @var string | ||
106 | */ | ||
107 | var $fragment; | ||
108 | |||
109 | /** | ||
110 | * Whether the object represents a valid IRI | ||
111 | * | ||
112 | * @access private | ||
113 | * @var array | ||
114 | */ | ||
115 | var $valid = array(); | ||
116 | |||
117 | /** | ||
118 | * Return the entire IRI when you try and read the object as a string | ||
119 | * | ||
120 | * @access public | ||
121 | * @return string | ||
122 | */ | ||
123 | public function __toString() | ||
124 | { | ||
125 | return $this->get_iri(); | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * Create a new IRI object, from a specified string | ||
130 | * | ||
131 | * @access public | ||
132 | * @param string $iri | ||
133 | * @return SimplePie_IRI | ||
134 | */ | ||
135 | public function __construct($iri) | ||
136 | { | ||
137 | $iri = (string) $iri; | ||
138 | if ($iri !== '') | ||
139 | { | ||
140 | $parsed = $this->parse_iri($iri); | ||
141 | $this->set_scheme($parsed['scheme']); | ||
142 | $this->set_authority($parsed['authority']); | ||
143 | $this->set_path($parsed['path']); | ||
144 | $this->set_query($parsed['query']); | ||
145 | $this->set_fragment($parsed['fragment']); | ||
146 | } | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * Create a new IRI object by resolving a relative IRI | ||
151 | * | ||
152 | * @static | ||
153 | * @access public | ||
154 | * @param SimplePie_IRI $base Base IRI | ||
155 | * @param string $relative Relative IRI | ||
156 | * @return SimplePie_IRI | ||
157 | */ | ||
158 | public static function absolutize($base, $relative) | ||
159 | { | ||
160 | $relative = (string) $relative; | ||
161 | if ($relative !== '') | ||
162 | { | ||
163 | $relative = new SimplePie_IRI($relative); | ||
164 | if ($relative->get_scheme() !== null) | ||
165 | { | ||
166 | $target = $relative; | ||
167 | } | ||
168 | elseif ($base->get_iri() !== null) | ||
169 | { | ||
170 | if ($relative->get_authority() !== null) | ||
171 | { | ||
172 | $target = $relative; | ||
173 | $target->set_scheme($base->get_scheme()); | ||
174 | } | ||
175 | else | ||
176 | { | ||
177 | $target = new SimplePie_IRI(''); | ||
178 | $target->set_scheme($base->get_scheme()); | ||
179 | $target->set_userinfo($base->get_userinfo()); | ||
180 | $target->set_host($base->get_host()); | ||
181 | $target->set_port($base->get_port()); | ||
182 | if ($relative->get_path() !== null) | ||
183 | { | ||
184 | if (strpos($relative->get_path(), '/') === 0) | ||
185 | { | ||
186 | $target->set_path($relative->get_path()); | ||
187 | } | ||
188 | elseif (($base->get_userinfo() !== null || $base->get_host() !== null || $base->get_port() !== null) && $base->get_path() === null) | ||
189 | { | ||
190 | $target->set_path('/' . $relative->get_path()); | ||
191 | } | ||
192 | elseif (($last_segment = strrpos($base->get_path(), '/')) !== false) | ||
193 | { | ||
194 | $target->set_path(substr($base->get_path(), 0, $last_segment + 1) . $relative->get_path()); | ||
195 | } | ||
196 | else | ||
197 | { | ||
198 | $target->set_path($relative->get_path()); | ||
199 | } | ||
200 | $target->set_query($relative->get_query()); | ||
201 | } | ||
202 | else | ||
203 | { | ||
204 | $target->set_path($base->get_path()); | ||
205 | if ($relative->get_query() !== null) | ||
206 | { | ||
207 | $target->set_query($relative->get_query()); | ||
208 | } | ||
209 | elseif ($base->get_query() !== null) | ||
210 | { | ||
211 | $target->set_query($base->get_query()); | ||
212 | } | ||
213 | } | ||
214 | } | ||
215 | $target->set_fragment($relative->get_fragment()); | ||
216 | } | ||
217 | else | ||
218 | { | ||
219 | // No base URL, just return the relative URL | ||
220 | $target = $relative; | ||
221 | } | ||
222 | } | ||
223 | else | ||
224 | { | ||
225 | $target = $base; | ||
226 | } | ||
227 | return $target; | ||
228 | } | ||
229 | |||
230 | /** | ||
231 | * Parse an IRI into scheme/authority/path/query/fragment segments | ||
232 | * | ||
233 | * @access private | ||
234 | * @param string $iri | ||
235 | * @return array | ||
236 | */ | ||
237 | public function parse_iri($iri) | ||
238 | { | ||
239 | preg_match('/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/', $iri, $match); | ||
240 | for ($i = count($match); $i <= 9; $i++) | ||
241 | { | ||
242 | $match[$i] = ''; | ||
243 | } | ||
244 | return array('scheme' => $match[2], 'authority' => $match[4], 'path' => $match[5], 'query' => $match[7], 'fragment' => $match[9]); | ||
245 | } | ||
246 | |||
247 | /** | ||
248 | * Remove dot segments from a path | ||
249 | * | ||
250 | * @access private | ||
251 | * @param string $input | ||
252 | * @return string | ||
253 | */ | ||
254 | public function remove_dot_segments($input) | ||
255 | { | ||
256 | $output = ''; | ||
257 | while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') | ||
258 | { | ||
259 | // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, | ||
260 | if (strpos($input, '../') === 0) | ||
261 | { | ||
262 | $input = substr($input, 3); | ||
263 | } | ||
264 | elseif (strpos($input, './') === 0) | ||
265 | { | ||
266 | $input = substr($input, 2); | ||
267 | } | ||
268 | // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, | ||
269 | elseif (strpos($input, '/./') === 0) | ||
270 | { | ||
271 | $input = substr_replace($input, '/', 0, 3); | ||
272 | } | ||
273 | elseif ($input === '/.') | ||
274 | { | ||
275 | $input = '/'; | ||
276 | } | ||
277 | // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, | ||
278 | elseif (strpos($input, '/../') === 0) | ||
279 | { | ||
280 | $input = substr_replace($input, '/', 0, 4); | ||
281 | $output = substr_replace($output, '', strrpos($output, '/')); | ||
282 | } | ||
283 | elseif ($input === '/..') | ||
284 | { | ||
285 | $input = '/'; | ||
286 | $output = substr_replace($output, '', strrpos($output, '/')); | ||
287 | } | ||
288 | // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, | ||
289 | elseif ($input === '.' || $input === '..') | ||
290 | { | ||
291 | $input = ''; | ||
292 | } | ||
293 | // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer | ||
294 | elseif (($pos = strpos($input, '/', 1)) !== false) | ||
295 | { | ||
296 | $output .= substr($input, 0, $pos); | ||
297 | $input = substr_replace($input, '', 0, $pos); | ||
298 | } | ||
299 | else | ||
300 | { | ||
301 | $output .= $input; | ||
302 | $input = ''; | ||
303 | } | ||
304 | } | ||
305 | return $output . $input; | ||
306 | } | ||
307 | |||
308 | /** | ||
309 | * Replace invalid character with percent encoding | ||
310 | * | ||
311 | * @param string $string Input string | ||
312 | * @param string $valid_chars Valid characters not in iunreserved or iprivate (this is ASCII-only) | ||
313 | * @param int $case Normalise case | ||
314 | * @param bool $iprivate Allow iprivate | ||
315 | * @return string | ||
316 | */ | ||
317 | protected function replace_invalid_with_pct_encoding($string, $valid_chars, $case = SIMPLEPIE_SAME_CASE, $iprivate = false) | ||
318 | { | ||
319 | // Normalize as many pct-encoded sections as possible | ||
320 | $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array(&$this, 'remove_iunreserved_percent_encoded'), $string); | ||
321 | |||
322 | // Replace invalid percent characters | ||
323 | $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); | ||
324 | |||
325 | // Add unreserved and % to $valid_chars (the latter is safe because all | ||
326 | // pct-encoded sections are now valid). | ||
327 | $valid_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; | ||
328 | |||
329 | // Now replace any bytes that aren't allowed with their pct-encoded versions | ||
330 | $position = 0; | ||
331 | $strlen = strlen($string); | ||
332 | while (($position += strspn($string, $valid_chars, $position)) < $strlen) | ||
333 | { | ||
334 | $value = ord($string[$position]); | ||
335 | |||
336 | // Start position | ||
337 | $start = $position; | ||
338 | |||
339 | // By default we are valid | ||
340 | $valid = true; | ||
341 | |||
342 | // No one byte sequences are valid due to the while. | ||
343 | // Two byte sequence: | ||
344 | if (($value & 0xE0) === 0xC0) | ||
345 | { | ||
346 | $character = ($value & 0x1F) << 6; | ||
347 | $length = 2; | ||
348 | $remaining = 1; | ||
349 | } | ||
350 | // Three byte sequence: | ||
351 | elseif (($value & 0xF0) === 0xE0) | ||
352 | { | ||
353 | $character = ($value & 0x0F) << 12; | ||
354 | $length = 3; | ||
355 | $remaining = 2; | ||
356 | } | ||
357 | // Four byte sequence: | ||
358 | elseif (($value & 0xF8) === 0xF0) | ||
359 | { | ||
360 | $character = ($value & 0x07) << 18; | ||
361 | $length = 4; | ||
362 | $remaining = 3; | ||
363 | } | ||
364 | // Invalid byte: | ||
365 | else | ||
366 | { | ||
367 | $valid = false; | ||
368 | $length = 1; | ||
369 | $remaining = 0; | ||
370 | } | ||
371 | |||
372 | if ($remaining) | ||
373 | { | ||
374 | if ($position + $length <= $strlen) | ||
375 | { | ||
376 | for ($position++; $remaining; $position++) | ||
377 | { | ||
378 | $value = ord($string[$position]); | ||
379 | |||
380 | // Check that the byte is valid, then add it to the character: | ||
381 | if (($value & 0xC0) === 0x80) | ||
382 | { | ||
383 | $character |= ($value & 0x3F) << (--$remaining * 6); | ||
384 | } | ||
385 | // If it is invalid, count the sequence as invalid and reprocess the current byte: | ||
386 | else | ||
387 | { | ||
388 | $valid = false; | ||
389 | $position--; | ||
390 | break; | ||
391 | } | ||
392 | } | ||
393 | } | ||
394 | else | ||
395 | { | ||
396 | $position = $strlen - 1; | ||
397 | $valid = false; | ||
398 | } | ||
399 | } | ||
400 | |||
401 | // Percent encode anything invalid or not in ucschar | ||
402 | if ( | ||
403 | // Invalid sequences | ||
404 | !$valid | ||
405 | // Non-shortest form sequences are invalid | ||
406 | || $length > 1 && $character <= 0x7F | ||
407 | || $length > 2 && $character <= 0x7FF | ||
408 | || $length > 3 && $character <= 0xFFFF | ||
409 | // Outside of range of ucschar codepoints | ||
410 | // Noncharacters | ||
411 | || ($character & 0xFFFE) === 0xFFFE | ||
412 | || $character >= 0xFDD0 && $character <= 0xFDEF | ||
413 | || ( | ||
414 | // Everything else not in ucschar | ||
415 | $character > 0xD7FF && $character < 0xF900 | ||
416 | || $character < 0xA0 | ||
417 | || $character > 0xEFFFD | ||
418 | ) | ||
419 | && ( | ||
420 | // Everything not in iprivate, if it applies | ||
421 | !$iprivate | ||
422 | || $character < 0xE000 | ||
423 | || $character > 0x10FFFD | ||
424 | ) | ||
425 | ) | ||
426 | { | ||
427 | // If we were a character, pretend we weren't, but rather an error. | ||
428 | if ($valid) | ||
429 | $position--; | ||
430 | |||
431 | for ($j = $start; $j <= $position; $j++) | ||
432 | { | ||
433 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); | ||
434 | $j += 2; | ||
435 | $position += 2; | ||
436 | $strlen += 2; | ||
437 | } | ||
438 | } | ||
439 | } | ||
440 | |||
441 | // Normalise case | ||
442 | if ($case & SIMPLEPIE_LOWERCASE) | ||
443 | { | ||
444 | $string = strtolower($string); | ||
445 | } | ||
446 | elseif ($case & SIMPLEPIE_UPPERCASE) | ||
447 | { | ||
448 | $string = strtoupper($string); | ||
449 | } | ||
450 | |||
451 | return $string; | ||
452 | } | ||
453 | |||
454 | /** | ||
455 | * Callback function for preg_replace_callback. | ||
456 | * | ||
457 | * Removes sequences of percent encoded bytes that represent UTF-8 | ||
458 | * encoded characters in iunreserved | ||
459 | * | ||
460 | * @param array $match PCRE match | ||
461 | * @return string Replacement | ||
462 | */ | ||
463 | protected function remove_iunreserved_percent_encoded($match) | ||
464 | { | ||
465 | // As we just have valid percent encoded sequences we can just explode | ||
466 | // and ignore the first member of the returned array (an empty string). | ||
467 | $bytes = explode('%', $match[0]); | ||
468 | |||
469 | // Initialize the new string (this is what will be returned) and that | ||
470 | // there are no bytes remaining in the current sequence (unsurprising | ||
471 | // at the first byte!). | ||
472 | $string = ''; | ||
473 | $remaining = 0; | ||
474 | |||
475 | // Loop over each and every byte, and set $value to its value | ||
476 | for ($i = 1, $len = count($bytes); $i < $len; $i++) | ||
477 | { | ||
478 | $value = hexdec($bytes[$i]); | ||
479 | |||
480 | // If we're the first byte of sequence: | ||
481 | if (!$remaining) | ||
482 | { | ||
483 | // Start position | ||
484 | $start = $i; | ||
485 | |||
486 | // By default we are valid | ||
487 | $valid = true; | ||
488 | |||
489 | // One byte sequence: | ||
490 | if ($value <= 0x7F) | ||
491 | { | ||
492 | $character = $value; | ||
493 | $length = 1; | ||
494 | } | ||
495 | // Two byte sequence: | ||
496 | elseif (($value & 0xE0) === 0xC0) | ||
497 | { | ||
498 | $character = ($value & 0x1F) << 6; | ||
499 | $length = 2; | ||
500 | $remaining = 1; | ||
501 | } | ||
502 | // Three byte sequence: | ||
503 | elseif (($value & 0xF0) === 0xE0) | ||
504 | { | ||
505 | $character = ($value & 0x0F) << 12; | ||
506 | $length = 3; | ||
507 | $remaining = 2; | ||
508 | } | ||
509 | // Four byte sequence: | ||
510 | elseif (($value & 0xF8) === 0xF0) | ||
511 | { | ||
512 | $character = ($value & 0x07) << 18; | ||
513 | $length = 4; | ||
514 | $remaining = 3; | ||
515 | } | ||
516 | // Invalid byte: | ||
517 | else | ||
518 | { | ||
519 | $valid = false; | ||
520 | $remaining = 0; | ||
521 | } | ||
522 | } | ||
523 | // Continuation byte: | ||
524 | else | ||
525 | { | ||
526 | // Check that the byte is valid, then add it to the character: | ||
527 | if (($value & 0xC0) === 0x80) | ||
528 | { | ||
529 | $remaining--; | ||
530 | $character |= ($value & 0x3F) << ($remaining * 6); | ||
531 | } | ||
532 | // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: | ||
533 | else | ||
534 | { | ||
535 | $valid = false; | ||
536 | $remaining = 0; | ||
537 | $i--; | ||
538 | } | ||
539 | } | ||
540 | |||
541 | // If we've reached the end of the current byte sequence, append it to Unicode::$data | ||
542 | if (!$remaining) | ||
543 | { | ||
544 | // Percent encode anything invalid or not in iunreserved | ||
545 | if ( | ||
546 | // Invalid sequences | ||
547 | !$valid | ||
548 | // Non-shortest form sequences are invalid | ||
549 | || $length > 1 && $character <= 0x7F | ||
550 | || $length > 2 && $character <= 0x7FF | ||
551 | || $length > 3 && $character <= 0xFFFF | ||
552 | // Outside of range of iunreserved codepoints | ||
553 | || $character < 0x2D | ||
554 | || $character > 0xEFFFD | ||
555 | // Noncharacters | ||
556 | || ($character & 0xFFFE) === 0xFFFE | ||
557 | || $character >= 0xFDD0 && $character <= 0xFDEF | ||
558 | // Everything else not in iunreserved (this is all BMP) | ||
559 | || $character === 0x2F | ||
560 | || $character > 0x39 && $character < 0x41 | ||
561 | || $character > 0x5A && $character < 0x61 | ||
562 | || $character > 0x7A && $character < 0x7E | ||
563 | || $character > 0x7E && $character < 0xA0 | ||
564 | || $character > 0xD7FF && $character < 0xF900 | ||
565 | ) | ||
566 | { | ||
567 | for ($j = $start; $j <= $i; $j++) | ||
568 | { | ||
569 | $string .= '%' . strtoupper($bytes[$j]); | ||
570 | } | ||
571 | } | ||
572 | else | ||
573 | { | ||
574 | for ($j = $start; $j <= $i; $j++) | ||
575 | { | ||
576 | $string .= chr(hexdec($bytes[$j])); | ||
577 | } | ||
578 | } | ||
579 | } | ||
580 | } | ||
581 | |||
582 | // If we have any bytes left over they are invalid (i.e., we are | ||
583 | // mid-way through a multi-byte sequence) | ||
584 | if ($remaining) | ||
585 | { | ||
586 | for ($j = $start; $j < $len; $j++) | ||
587 | { | ||
588 | $string .= '%' . strtoupper($bytes[$j]); | ||
589 | } | ||
590 | } | ||
591 | |||
592 | return $string; | ||
593 | } | ||
594 | |||
595 | /** | ||
596 | * Check if the object represents a valid IRI | ||
597 | * | ||
598 | * @access public | ||
599 | * @return bool | ||
600 | */ | ||
601 | public function is_valid() | ||
602 | { | ||
603 | return array_sum($this->valid) === count($this->valid); | ||
604 | } | ||
605 | |||
606 | /** | ||
607 | * Set the scheme. Returns true on success, false on failure (if there are | ||
608 | * any invalid characters). | ||
609 | * | ||
610 | * @access public | ||
611 | * @param string $scheme | ||
612 | * @return bool | ||
613 | */ | ||
614 | public function set_scheme($scheme) | ||
615 | { | ||
616 | if ($scheme === null || $scheme === '') | ||
617 | { | ||
618 | $this->scheme = null; | ||
619 | } | ||
620 | else | ||
621 | { | ||
622 | $len = strlen($scheme); | ||
623 | switch (true) | ||
624 | { | ||
625 | case $len > 1: | ||
626 | if (!strspn($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-.', 1)) | ||
627 | { | ||
628 | $this->scheme = null; | ||
629 | $this->valid[__FUNCTION__] = false; | ||
630 | return false; | ||
631 | } | ||
632 | |||
633 | case $len > 0: | ||
634 | if (!strspn($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 0, 1)) | ||
635 | { | ||
636 | $this->scheme = null; | ||
637 | $this->valid[__FUNCTION__] = false; | ||
638 | return false; | ||
639 | } | ||
640 | } | ||
641 | $this->scheme = strtolower($scheme); | ||
642 | } | ||
643 | $this->valid[__FUNCTION__] = true; | ||
644 | return true; | ||
645 | } | ||
646 | |||
647 | /** | ||
648 | * Set the authority. Returns true on success, false on failure (if there are | ||
649 | * any invalid characters). | ||
650 | * | ||
651 | * @access public | ||
652 | * @param string $authority | ||
653 | * @return bool | ||
654 | */ | ||
655 | public function set_authority($authority) | ||
656 | { | ||
657 | if (($userinfo_end = strrpos($authority, '@')) !== false) | ||
658 | { | ||
659 | $userinfo = substr($authority, 0, $userinfo_end); | ||
660 | $authority = substr($authority, $userinfo_end + 1); | ||
661 | } | ||
662 | else | ||
663 | { | ||
664 | $userinfo = null; | ||
665 | } | ||
666 | |||
667 | if (($port_start = strpos($authority, ':')) !== false) | ||
668 | { | ||
669 | $port = substr($authority, $port_start + 1); | ||
670 | if ($port === false) | ||
671 | { | ||
672 | $port = null; | ||
673 | } | ||
674 | $authority = substr($authority, 0, $port_start); | ||
675 | } | ||
676 | else | ||
677 | { | ||
678 | $port = null; | ||
679 | } | ||
680 | |||
681 | return $this->set_userinfo($userinfo) && $this->set_host($authority) && $this->set_port($port); | ||
682 | } | ||
683 | |||
684 | /** | ||
685 | * Set the userinfo. | ||
686 | * | ||
687 | * @access public | ||
688 | * @param string $userinfo | ||
689 | * @return bool | ||
690 | */ | ||
691 | public function set_userinfo($userinfo) | ||
692 | { | ||
693 | if ($userinfo === null || $userinfo === '') | ||
694 | { | ||
695 | $this->userinfo = null; | ||
696 | } | ||
697 | else | ||
698 | { | ||
699 | $this->userinfo = $this->replace_invalid_with_pct_encoding($userinfo, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=:'); | ||
700 | } | ||
701 | $this->valid[__FUNCTION__] = true; | ||
702 | return true; | ||
703 | } | ||
704 | |||
705 | /** | ||
706 | * Set the host. Returns true on success, false on failure (if there are | ||
707 | * any invalid characters). | ||
708 | * | ||
709 | * @access public | ||
710 | * @param string $host | ||
711 | * @return bool | ||
712 | */ | ||
713 | public function set_host($host) | ||
714 | { | ||
715 | if ($host === null || $host === '') | ||
716 | { | ||
717 | $this->host = null; | ||
718 | $this->valid[__FUNCTION__] = true; | ||
719 | return true; | ||
720 | } | ||
721 | elseif ($host[0] === '[' && substr($host, -1) === ']') | ||
722 | { | ||
723 | if (SimplePie_Net_IPv6::checkIPv6(substr($host, 1, -1))) | ||
724 | { | ||
725 | $this->host = $host; | ||
726 | $this->valid[__FUNCTION__] = true; | ||
727 | return true; | ||
728 | } | ||
729 | else | ||
730 | { | ||
731 | $this->host = null; | ||
732 | $this->valid[__FUNCTION__] = false; | ||
733 | return false; | ||
734 | } | ||
735 | } | ||
736 | else | ||
737 | { | ||
738 | $this->host = $this->replace_invalid_with_pct_encoding($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=', SIMPLEPIE_LOWERCASE); | ||
739 | $this->valid[__FUNCTION__] = true; | ||
740 | return true; | ||
741 | } | ||
742 | } | ||
743 | |||
744 | /** | ||
745 | * Set the port. Returns true on success, false on failure (if there are | ||
746 | * any invalid characters). | ||
747 | * | ||
748 | * @access public | ||
749 | * @param string $port | ||
750 | * @return bool | ||
751 | */ | ||
752 | public function set_port($port) | ||
753 | { | ||
754 | if ($port === null || $port === '') | ||
755 | { | ||
756 | $this->port = null; | ||
757 | $this->valid[__FUNCTION__] = true; | ||
758 | return true; | ||
759 | } | ||
760 | elseif (strspn($port, '0123456789') === strlen($port)) | ||
761 | { | ||
762 | $this->port = (int) $port; | ||
763 | $this->valid[__FUNCTION__] = true; | ||
764 | return true; | ||
765 | } | ||
766 | else | ||
767 | { | ||
768 | $this->port = null; | ||
769 | $this->valid[__FUNCTION__] = false; | ||
770 | return false; | ||
771 | } | ||
772 | } | ||
773 | |||
774 | /** | ||
775 | * Set the path. | ||
776 | * | ||
777 | * @access public | ||
778 | * @param string $path | ||
779 | * @return bool | ||
780 | */ | ||
781 | public function set_path($path) | ||
782 | { | ||
783 | if ($path === null || $path === '') | ||
784 | { | ||
785 | $this->path = null; | ||
786 | $this->valid[__FUNCTION__] = true; | ||
787 | return true; | ||
788 | } | ||
789 | elseif (substr($path, 0, 2) === '//' && $this->userinfo === null && $this->host === null && $this->port === null) | ||
790 | { | ||
791 | $this->path = null; | ||
792 | $this->valid[__FUNCTION__] = false; | ||
793 | return false; | ||
794 | } | ||
795 | else | ||
796 | { | ||
797 | $this->path = $this->replace_invalid_with_pct_encoding($path, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=@/'); | ||
798 | if ($this->scheme !== null) | ||
799 | { | ||
800 | $this->path = $this->remove_dot_segments($this->path); | ||
801 | } | ||
802 | $this->valid[__FUNCTION__] = true; | ||
803 | return true; | ||
804 | } | ||
805 | } | ||
806 | |||
807 | /** | ||
808 | * Set the query. | ||
809 | * | ||
810 | * @access public | ||
811 | * @param string $query | ||
812 | * @return bool | ||
813 | */ | ||
814 | public function set_query($query) | ||
815 | { | ||
816 | if ($query === null || $query === '') | ||
817 | { | ||
818 | $this->query = null; | ||
819 | } | ||
820 | else | ||
821 | { | ||
822 | $this->query = $this->replace_invalid_with_pct_encoding($query, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$\'()*+,;:@/?&='); | ||
823 | } | ||
824 | $this->valid[__FUNCTION__] = true; | ||
825 | return true; | ||
826 | } | ||
827 | |||
828 | /** | ||
829 | * Set the fragment. | ||
830 | * | ||
831 | * @access public | ||
832 | * @param string $fragment | ||
833 | * @return bool | ||
834 | */ | ||
835 | public function set_fragment($fragment) | ||
836 | { | ||
837 | if ($fragment === null || $fragment === '') | ||
838 | { | ||
839 | $this->fragment = null; | ||
840 | } | ||
841 | else | ||
842 | { | ||
843 | $this->fragment = $this->replace_invalid_with_pct_encoding($fragment, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=:@/?'); | ||
844 | } | ||
845 | $this->valid[__FUNCTION__] = true; | ||
846 | return true; | ||
847 | } | ||
848 | |||
849 | /** | ||
850 | * Get the complete IRI | ||
851 | * | ||
852 | * @access public | ||
853 | * @return string | ||
854 | */ | ||
855 | public function get_iri() | ||
856 | { | ||
857 | $iri = ''; | ||
858 | if ($this->scheme !== null) | ||
859 | { | ||
860 | $iri .= $this->scheme . ':'; | ||
861 | } | ||
862 | if (($authority = $this->get_authority()) !== null) | ||
863 | { | ||
864 | $iri .= '//' . $authority; | ||
865 | } | ||
866 | if ($this->path !== null) | ||
867 | { | ||
868 | $iri .= $this->path; | ||
869 | } | ||
870 | if ($this->query !== null) | ||
871 | { | ||
872 | $iri .= '?' . $this->query; | ||
873 | } | ||
874 | if ($this->fragment !== null) | ||
875 | { | ||
876 | $iri .= '#' . $this->fragment; | ||
877 | } | ||
878 | |||
879 | if ($iri !== '') | ||
880 | { | ||
881 | return $iri; | ||
882 | } | ||
883 | else | ||
884 | { | ||
885 | return null; | ||
886 | } | ||
887 | } | ||
888 | |||
889 | /** | ||
890 | * Get the scheme | ||
891 | * | ||
892 | * @access public | ||
893 | * @return string | ||
894 | */ | ||
895 | public function get_scheme() | ||
896 | { | ||
897 | return $this->scheme; | ||
898 | } | ||
899 | |||
900 | /** | ||
901 | * Get the complete authority | ||
902 | * | ||
903 | * @access public | ||
904 | * @return string | ||
905 | */ | ||
906 | public function get_authority() | ||
907 | { | ||
908 | $authority = ''; | ||
909 | if ($this->userinfo !== null) | ||
910 | { | ||
911 | $authority .= $this->userinfo . '@'; | ||
912 | } | ||
913 | if ($this->host !== null) | ||
914 | { | ||
915 | $authority .= $this->host; | ||
916 | } | ||
917 | if ($this->port !== null) | ||
918 | { | ||
919 | $authority .= ':' . $this->port; | ||
920 | } | ||
921 | |||
922 | if ($authority !== '') | ||
923 | { | ||
924 | return $authority; | ||
925 | } | ||
926 | else | ||
927 | { | ||
928 | return null; | ||
929 | } | ||
930 | } | ||
931 | |||
932 | /** | ||
933 | * Get the user information | ||
934 | * | ||
935 | * @access public | ||
936 | * @return string | ||
937 | */ | ||
938 | public function get_userinfo() | ||
939 | { | ||
940 | return $this->userinfo; | ||
941 | } | ||
942 | |||
943 | /** | ||
944 | * Get the host | ||
945 | * | ||
946 | * @access public | ||
947 | * @return string | ||
948 | */ | ||
949 | public function get_host() | ||
950 | { | ||
951 | return $this->host; | ||
952 | } | ||
953 | |||
954 | /** | ||
955 | * Get the port | ||
956 | * | ||
957 | * @access public | ||
958 | * @return string | ||
959 | */ | ||
960 | public function get_port() | ||
961 | { | ||
962 | return $this->port; | ||
963 | } | ||
964 | |||
965 | /** | ||
966 | * Get the path | ||
967 | * | ||
968 | * @access public | ||
969 | * @return string | ||
970 | */ | ||
971 | public function get_path() | ||
972 | { | ||
973 | return $this->path; | ||
974 | } | ||
975 | |||
976 | /** | ||
977 | * Get the query | ||
978 | * | ||
979 | * @access public | ||
980 | * @return string | ||
981 | */ | ||
982 | public function get_query() | ||
983 | { | ||
984 | return $this->query; | ||
985 | } | ||
986 | |||
987 | /** | ||
988 | * Get the fragment | ||
989 | * | ||
990 | * @access public | ||
991 | * @return string | ||
992 | */ | ||
993 | public function get_fragment() | ||
994 | { | ||
995 | return $this->fragment; | ||
996 | } | ||
997 | } | ||
diff --git a/inc/3rdparty/simplepie/SimplePie/Net/IPv6.php b/inc/3rdparty/simplepie/SimplePie/Net/IPv6.php deleted file mode 100644 index 7806d9dc..00000000 --- a/inc/3rdparty/simplepie/SimplePie/Net/IPv6.php +++ /dev/null | |||
@@ -1,258 +0,0 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3-dev | ||
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | * @todo phpDoc comments | ||
44 | */ | ||
45 | |||
46 | |||
47 | /** | ||
48 | * Class to validate and to work with IPv6 addresses. | ||
49 | * | ||
50 | * @package SimplePie | ||
51 | * @copyright 2003-2005 The PHP Group | ||
52 | * @license http://www.opensource.org/licenses/bsd-license.php | ||
53 | * @link http://pear.php.net/package/Net_IPv6 | ||
54 | * @author Alexander Merz <alexander.merz@web.de> | ||
55 | * @author elfrink at introweb dot nl | ||
56 | * @author Josh Peck <jmp at joshpeck dot org> | ||
57 | * @author Geoffrey Sneddon <geoffers@gmail.com> | ||
58 | */ | ||
59 | class SimplePie_Net_IPv6 | ||
60 | { | ||
61 | /** | ||
62 | * Removes a possible existing netmask specification of an IP address. | ||
63 | * | ||
64 | * @param string $ip the (compressed) IP as Hex representation | ||
65 | * @return string the IP the without netmask | ||
66 | * @since 1.1.0 | ||
67 | * @access public | ||
68 | * @static | ||
69 | */ | ||
70 | public static function removeNetmaskSpec($ip) | ||
71 | { | ||
72 | if (strpos($ip, '/') !== false) | ||
73 | { | ||
74 | list($addr, $nm) = explode('/', $ip); | ||
75 | } | ||
76 | else | ||
77 | { | ||
78 | $addr = $ip; | ||
79 | } | ||
80 | return $addr; | ||
81 | } | ||
82 | |||
83 | /** | ||
84 | * Uncompresses an IPv6 address | ||
85 | * | ||
86 | * RFC 2373 allows you to compress zeros in an address to '::'. This | ||
87 | * function expects an valid IPv6 address and expands the '::' to | ||
88 | * the required zeros. | ||
89 | * | ||
90 | * Example: FF01::101 -> FF01:0:0:0:0:0:0:101 | ||
91 | * ::1 -> 0:0:0:0:0:0:0:1 | ||
92 | * | ||
93 | * @access public | ||
94 | * @static | ||
95 | * @param string $ip a valid IPv6-address (hex format) | ||
96 | * @return string the uncompressed IPv6-address (hex format) | ||
97 | */ | ||
98 | public static function Uncompress($ip) | ||
99 | { | ||
100 | $uip = SimplePie_Net_IPv6::removeNetmaskSpec($ip); | ||
101 | $c1 = -1; | ||
102 | $c2 = -1; | ||
103 | if (strpos($ip, '::') !== false) | ||
104 | { | ||
105 | list($ip1, $ip2) = explode('::', $ip); | ||
106 | if ($ip1 === '') | ||
107 | { | ||
108 | $c1 = -1; | ||
109 | } | ||
110 | else | ||
111 | { | ||
112 | $pos = 0; | ||
113 | if (($pos = substr_count($ip1, ':')) > 0) | ||
114 | { | ||
115 | $c1 = $pos; | ||
116 | } | ||
117 | else | ||
118 | { | ||
119 | $c1 = 0; | ||
120 | } | ||
121 | } | ||
122 | if ($ip2 === '') | ||
123 | { | ||
124 | $c2 = -1; | ||
125 | } | ||
126 | else | ||
127 | { | ||
128 | $pos = 0; | ||
129 | if (($pos = substr_count($ip2, ':')) > 0) | ||
130 | { | ||
131 | $c2 = $pos; | ||
132 | } | ||
133 | else | ||
134 | { | ||
135 | $c2 = 0; | ||
136 | } | ||
137 | } | ||
138 | if (strstr($ip2, '.')) | ||
139 | { | ||
140 | $c2++; | ||
141 | } | ||
142 | // :: | ||
143 | if ($c1 === -1 && $c2 === -1) | ||
144 | { | ||
145 | $uip = '0:0:0:0:0:0:0:0'; | ||
146 | } | ||
147 | // ::xxx | ||
148 | else if ($c1 === -1) | ||
149 | { | ||
150 | $fill = str_repeat('0:', 7 - $c2); | ||
151 | $uip = str_replace('::', $fill, $uip); | ||
152 | } | ||
153 | // xxx:: | ||
154 | else if ($c2 === -1) | ||
155 | { | ||
156 | $fill = str_repeat(':0', 7 - $c1); | ||
157 | $uip = str_replace('::', $fill, $uip); | ||
158 | } | ||
159 | // xxx::xxx | ||
160 | else | ||
161 | { | ||
162 | $fill = str_repeat(':0:', 6 - $c2 - $c1); | ||
163 | $uip = str_replace('::', $fill, $uip); | ||
164 | $uip = str_replace('::', ':', $uip); | ||
165 | } | ||
166 | } | ||
167 | return $uip; | ||
168 | } | ||
169 | |||
170 | /** | ||
171 | * Splits an IPv6 address into the IPv6 and a possible IPv4 part | ||
172 | * | ||
173 | * RFC 2373 allows you to note the last two parts of an IPv6 address as | ||
174 | * an IPv4 compatible address | ||
175 | * | ||
176 | * Example: 0:0:0:0:0:0:13.1.68.3 | ||
177 | * 0:0:0:0:0:FFFF:129.144.52.38 | ||
178 | * | ||
179 | * @access public | ||
180 | * @static | ||
181 | * @param string $ip a valid IPv6-address (hex format) | ||
182 | * @return array [0] contains the IPv6 part, [1] the IPv4 part (hex format) | ||
183 | */ | ||
184 | public static function SplitV64($ip) | ||
185 | { | ||
186 | $ip = SimplePie_Net_IPv6::Uncompress($ip); | ||
187 | if (strstr($ip, '.')) | ||
188 | { | ||
189 | $pos = strrpos($ip, ':'); | ||
190 | $ip[$pos] = '_'; | ||
191 | $ipPart = explode('_', $ip); | ||
192 | return $ipPart; | ||
193 | } | ||
194 | else | ||
195 | { | ||
196 | return array($ip, ''); | ||
197 | } | ||
198 | } | ||
199 | |||
200 | /** | ||
201 | * Checks an IPv6 address | ||
202 | * | ||
203 | * Checks if the given IP is IPv6-compatible | ||
204 | * | ||
205 | * @access public | ||
206 | * @static | ||
207 | * @param string $ip a valid IPv6-address | ||
208 | * @return bool true if $ip is an IPv6 address | ||
209 | */ | ||
210 | public static function checkIPv6($ip) | ||
211 | { | ||
212 | $ipPart = SimplePie_Net_IPv6::SplitV64($ip); | ||
213 | $count = 0; | ||
214 | if (!empty($ipPart[0])) | ||
215 | { | ||
216 | $ipv6 = explode(':', $ipPart[0]); | ||
217 | for ($i = 0; $i < count($ipv6); $i++) | ||
218 | { | ||
219 | $dec = hexdec($ipv6[$i]); | ||
220 | $hex = strtoupper(preg_replace('/^[0]{1,3}(.*[0-9a-fA-F])$/', '\\1', $ipv6[$i])); | ||
221 | if ($ipv6[$i] >= 0 && $dec <= 65535 && $hex === strtoupper(dechex($dec))) | ||
222 | { | ||
223 | $count++; | ||
224 | } | ||
225 | } | ||
226 | if ($count === 8) | ||
227 | { | ||
228 | return true; | ||
229 | } | ||
230 | elseif ($count === 6 && !empty($ipPart[1])) | ||
231 | { | ||
232 | $ipv4 = explode('.', $ipPart[1]); | ||
233 | $count = 0; | ||
234 | foreach ($ipv4 as $ipv4_part) | ||
235 | { | ||
236 | if ($ipv4_part >= 0 && $ipv4_part <= 255 && preg_match('/^\d{1,3}$/', $ipv4_part)) | ||
237 | { | ||
238 | $count++; | ||
239 | } | ||
240 | } | ||
241 | if ($count === 4) | ||
242 | { | ||
243 | return true; | ||
244 | } | ||
245 | } | ||
246 | else | ||
247 | { | ||
248 | return false; | ||
249 | } | ||
250 | |||
251 | } | ||
252 | else | ||
253 | { | ||
254 | return false; | ||
255 | } | ||
256 | } | ||
257 | } | ||
258 | |||
diff --git a/inc/3rdparty/site_config/README.md b/inc/3rdparty/site_config/README.txt index 0aff456b..e966ee74 100644 --- a/inc/3rdparty/site_config/README.md +++ b/inc/3rdparty/site_config/README.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | Full-Text RSS Site Patterns | 1 | Full-Text RSS Site Patterns |
2 | --------------------------- | 2 | --------------------------- |
3 | 3 | ||
4 | Site patterns allow you to specify what should be extracted from specific sites. | 4 | Site patterns allow you to specify what should be extracted from specific sites. |
5 | 5 | ||
6 | Please see http://help.fivefilters.org/customer/portal/articles/223153-site-patterns for more information. \ No newline at end of file | 6 | Please see http://help.fivefilters.org/customer/portal/articles/223153-site-patterns for more information. \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/custom/stackexchange.com.txt b/inc/3rdparty/site_config/custom/stackexchange.com.txt new file mode 100755 index 00000000..c9d44b1d --- /dev/null +++ b/inc/3rdparty/site_config/custom/stackexchange.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //title | ||
2 | body: //div[@id='question']//div[contains(@class,'post-text')] | //div[@id='answers-header']//h2 | //div[contains(@class,'accepted-answer')]//div[contains(@class,'post-text')] | ||
3 | |||
4 | test_url: http://cstheory.stackexchange.com/questions/14811/what-is-the-enlightenment-im-supposed-to-attain-after-studying-finite-automata/14818#14818 | ||
diff --git a/inc/3rdparty/site_config/custom/stackoverflow.com.txt b/inc/3rdparty/site_config/custom/stackoverflow.com.txt new file mode 100755 index 00000000..d2eb984d --- /dev/null +++ b/inc/3rdparty/site_config/custom/stackoverflow.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //title | ||
2 | body: //div[@id='question']//div[contains(@class,'post-text')] | //div[@id='answers-header']//h2 | //div[contains(@class,'accepted-answer')]//div[contains(@class,'post-text')] | ||
3 | |||
4 | test_url: http://stackoverflow.com/questions/20302422/calling-a-function-from-a-javascript-object | ||
diff --git a/inc/3rdparty/site_config/index.php b/inc/3rdparty/site_config/index.php index a3d5f739..a1b767fd 100644 --- a/inc/3rdparty/site_config/index.php +++ b/inc/3rdparty/site_config/index.php | |||
@@ -1,3 +1,3 @@ | |||
1 | <?php | 1 | <?php |
2 | // this is here to prevent directory listing over the web | 2 | // this is here to prevent directory listing over the web |
3 | ?> \ No newline at end of file | 3 | ?> \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/24ways.org.txt b/inc/3rdparty/site_config/standard/24ways.org.txt new file mode 100644 index 00000000..03bd1950 --- /dev/null +++ b/inc/3rdparty/site_config/standard/24ways.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class='meta']/h2/a | ||
2 | author: //div[@class='meta']/h2/following-sibling::p/a/text() | ||
3 | date://div[@class='meta']/h2/strong | ||
4 | body: //div[@id='article'] | ||
5 | strip: //div[@class='domore'] | ||
6 | test_url: http://24ways.org/2011/composing-the-new-canon \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/37signals.com.txt b/inc/3rdparty/site_config/standard/37signals.com.txt new file mode 100644 index 00000000..43a10ae5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/37signals.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class='post_header']//h2/a | ||
2 | author: //span[@class='author'] | ||
3 | date: //span[@class='date'] | ||
4 | body: //div[@id='Content'] | ||
5 | |||
6 | test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt new file mode 100644 index 00000000..c4e7940f --- /dev/null +++ b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //div[@class='content'] | ||
2 | date: //div[@class='content']/h2 | ||
3 | strip: //div[@class='content']/h2 | ||
4 | title: //div[@class='content']/h3 | ||
5 | |||
6 | strip: //div[@id='postmenu'] | ||
7 | strip: //div[@class='trackback'] | ||
8 | tidy: no | ||
9 | test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt new file mode 100644 index 00000000..b846b050 --- /dev/null +++ b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[@id='main'] | ||
2 | title: //div[@class='intro']/h1 | ||
3 | author: //ul[@class='text-data']/li[@class='author'] | ||
4 | date: //ul[@class='text-data']/li[@class='date'] | ||
5 | convert_double_br_tags: yes | ||
6 | tidy: no | ||
7 | |||
8 | strip: //div[@class='share'] | ||
9 | strip: //*[@class='zoom'] | ||
10 | strip: //div[@id='disqus_thread'] | ||
11 | test_url: http://3voor12.vpro.nl/nieuws/2012/januari/Ook-website-GroenLinks-woensdag-op-zwart-i-v-m--SOPA.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/43folders.com.txt b/inc/3rdparty/site_config/standard/43folders.com.txt new file mode 100644 index 00000000..e8073f6f --- /dev/null +++ b/inc/3rdparty/site_config/standard/43folders.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //*[@class = 'content'] | ||
2 | author: //*[@class = 'submitted']/a | ||
3 | date: substring-after(//*[@class = 'submitted']/text(), '|') | ||
4 | test_url: http://www.43folders.com/2011/04/22/cranking \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt new file mode 100644 index 00000000..68e6b2d0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/500px.com.txt | |||
@@ -0,0 +1,27 @@ | |||
1 | # very loose setup for both 500px.com/photo/* and 500px.com/blog/* | ||
2 | # photo page example: http://500px.com/photo/4181666 | ||
3 | # blog page example: http://500px.com/blog/110 | ||
4 | |||
5 | # avoid "no text" error | ||
6 | tidy:no | ||
7 | prune:no | ||
8 | |||
9 | # reorganize photo page elements | ||
10 | #body://div[contains(@class,'container')] | ||
11 | move_into(body)://div[contains(@id,'thephoto')] | ||
12 | move_into(body)://div[contains(@id,'description')] | ||
13 | move_into(body)://div[contains(@id,'tags')] | ||
14 | move_into(body)://div[contains(@id,'photo-info')] | ||
15 | |||
16 | # clean photo page info | ||
17 | strip://span[contains(@id,'copyright')] | ||
18 | strip://*[contains(@id,'store')] | ||
19 | strip://*[contains(@id,'user-info')] | ||
20 | strip://*[contains(@id,'photo-stats')] | ||
21 | strip://*[contains(@id,'voting_controls_container')] | ||
22 | strip://*[contains(@id,'more-photos')] | ||
23 | strip://*[contains(@id,'embed-photo')] | ||
24 | |||
25 | # clean blog page side bar | ||
26 | strip://*[contains(@class,'col d3 clearafter')] | ||
27 | test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt new file mode 100644 index 00000000..e458980f --- /dev/null +++ b/inc/3rdparty/site_config/standard/512pixels.net.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | title: substring-before(//title, '—') | ||
2 | test_url: http://512pixels.net/more-on-linked-lists/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/5by5.tv.txt b/inc/3rdparty/site_config/standard/5by5.tv.txt new file mode 100644 index 00000000..dce0df4e --- /dev/null +++ b/inc/3rdparty/site_config/standard/5by5.tv.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //*[@id="episode"] | ||
2 | prune: no | ||
3 | tidy: no | ||
4 | |||
5 | autodetect_next_page: no | ||
6 | strip_id_or_class: player | ||
7 | |||
8 | strip://*[@id="header"] | ||
9 | test_url: http://5by5.tv/buildanalyze/60 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/944.com.txt b/inc/3rdparty/site_config/standard/944.com.txt new file mode 100644 index 00000000..84380e79 --- /dev/null +++ b/inc/3rdparty/site_config/standard/944.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //h2[@class='border'] | ||
2 | body: //div[@class='padding'] | ||
3 | |||
4 | convert_double_br_tags: yes | ||
5 | |||
6 | strip: //div[@id='social_sharing'] | ||
7 | strip: //div[@class='socialLinks'] | ||
8 | |||
9 | test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt new file mode 100644 index 00000000..379592e0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] | ||
3 | |||
4 | strip_id_or_class: socialshareprivacy1 | ||
5 | strip_id_or_class: zvaFacebookButton | ||
6 | |||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt new file mode 100644 index 00000000..4d76fac7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] | ||
3 | |||
4 | strip_id_or_class: socialshareprivacy1 | ||
5 | strip_id_or_class: zvaFacebookButton | ||
6 | |||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/abc.es.txt b/inc/3rdparty/site_config/standard/abc.es.txt new file mode 100644 index 00000000..a99833de --- /dev/null +++ b/inc/3rdparty/site_config/standard/abc.es.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text'] | ||
3 | strip_id_or_class: colB | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt new file mode 100644 index 00000000..5e6269cb --- /dev/null +++ b/inc/3rdparty/site_config/standard/abc.net.au.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1 | ||
2 | author: //div[@class="byline"]/a | ||
3 | date: //span[@class="timestamp"] | ||
4 | |||
5 | strip: //p[@class="topics"] | ||
6 | strip: //h1 | ||
7 | strip: //div[@class="byline"] | ||
8 | strip: //p[@class="published"] | ||
9 | strip: //div[contains(@class,"featured-scroller")] | ||
10 | test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/abcnews.go.com.txt b/inc/3rdparty/site_config/standard/abcnews.go.com.txt new file mode 100644 index 00000000..c515d3e4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/abcnews.go.com.txt | |||
@@ -0,0 +1,27 @@ | |||
1 | title: //h1[@class='headline'] | ||
2 | body: //div[@id='storyText'] | ||
3 | # for video entries | ||
4 | body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')] | ||
5 | author: //div[@class='byline'] | ||
6 | date: //div[@class='date'] | ||
7 | strip: //*[@id='date_partner'] | ||
8 | |||
9 | strip: //div[@class='breadcrumb'] | ||
10 | strip: //div[contains(@class,'show_tools')] | ||
11 | strip: //div[@id='sponsoredByAd'] | ||
12 | strip: //div[contains(@class,'rel_container')] | ||
13 | strip: //p[a[starts-with(@href, 'http://www.twitter.com')]] | ||
14 | strip: //p[a[starts-with(@href, 'http://www.facebook.com')]] | ||
15 | strip: //p[contains(., 'Click here to return to')] | ||
16 | #strip_id_or_class: media | ||
17 | strip_id_or_class: mediaplayer | ||
18 | |||
19 | replace_string(<link rel="image_src" href="http): <img id="ff-img" src="http | ||
20 | |||
21 | prune: no | ||
22 | |||
23 | single_page_link: concat(//li[@class='pager']//a/@href, '&singlePage=true') | ||
24 | |||
25 | test_url: http://abcnews.go.com/Politics/newt-gingrich-rocky-rollout-presidential-campaign-recover/story?id=13632744 | ||
26 | # multi-page | ||
27 | test_url: http://abcnews.go.com/Blotter/family-freed-american-hostage-somalia-seals-obama/story?id=15439544 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/accesstoinsight.org.txt b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt new file mode 100644 index 00000000..b5d85079 --- /dev/null +++ b/inc/3rdparty/site_config/standard/accesstoinsight.org.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id='H_docTitle'] | ||
2 | |||
3 | body: //div[@id='H_meta' or @id='H_content' or @id='F_footer'] | ||
4 | |||
5 | strip_id_or_class: F_toenail | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.accesstoinsight.org/lib/authors/nyanaponika/wheel026.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/acidcow.com.txt b/inc/3rdparty/site_config/standard/acidcow.com.txt new file mode 100644 index 00000000..60ede6a6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/acidcow.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[starts-with(@id, 'news-id-')] | ||
2 | |||
3 | test_url: http://acidcow.com/fun/20933-acid-picdump-83-pics.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/acquia.com.txt b/inc/3rdparty/site_config/standard/acquia.com.txt new file mode 100644 index 00000000..5ddf542e --- /dev/null +++ b/inc/3rdparty/site_config/standard/acquia.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title://h1[@class="title"] | ||
2 | author://div[@class="submitted"]/span/a | ||
3 | date://div[@class="submitted"]/span | ||
4 | body://div[@class="content-wrapper"] | ||
5 | |||
6 | strip://div[@id="skip-link"] | ||
7 | strip://div[@id="region-content-3-3"] | ||
8 | strip://div[@id="section-footer"] | ||
9 | test_url: https://www.acquia.com/blog/drupals-long-warmth-toward-third-party-code \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/acroswing.fr.txt b/inc/3rdparty/site_config/standard/acroswing.fr.txt new file mode 100644 index 00000000..57d86d2f --- /dev/null +++ b/inc/3rdparty/site_config/standard/acroswing.fr.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | tidy:no | ||
2 | date: //time[@class='updated'] | ||
3 | dissolve: //ul[@class='video-gallery']/li | ||
4 | dissolve: //ul[@class='video-gallery'] | ||
5 | test_url: http://www.acroswing.fr/actualites/competition_rock/selectif_bellegarde_sur_valserine__2012-02-26.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt new file mode 100644 index 00000000..408e9099 --- /dev/null +++ b/inc/3rdparty/site_config/standard/aht.seriouseats.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | # clean up recipe pages | ||
4 | strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] | ||
5 | |||
6 | #recipe pages | ||
7 | strip_id_or_class: "recipe-feedback" | ||
8 | strip_id_or_class: "comments" | ||
9 | strip_id_or_class: "procedure-number" | ||
10 | strip_id_or_class: "more-with-author" | ||
11 | |||
12 | #slice | ||
13 | strip_id_or_class: "inner" | ||
14 | |||
15 | test_url: http://aht.seriouseats.com/archives/2009/12/the-burger-lab-salting-ground-beef.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alex.mullr.net.txt b/inc/3rdparty/site_config/standard/alex.mullr.net.txt new file mode 100644 index 00000000..c5f15370 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alex.mullr.net.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class="entry"] | ||
2 | test_url: http://alex.mullr.net/blog/2011/05/on-spotify/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alistapart.com.txt b/inc/3rdparty/site_config/standard/alistapart.com.txt new file mode 100644 index 00000000..090f7eb1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alistapart.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //h1[@class='title'] | ||
2 | author: //h3[@class='byline']/a | ||
3 | date: //div[@class='ishinfo'] | ||
4 | |||
5 | body: //*[@id='articletext'] | ||
6 | strip_id_or_class: 'ishinfo' | ||
7 | strip_id_or_class: 'metastuff' | ||
8 | strip_id_or_class: 'learnmore' | ||
9 | strip_id_or_class: 'discuss' | ||
10 | |||
11 | prune: no | ||
12 | test_url: http://www.alistapart.com/articles/organizing-mobile/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/aljazeera.com.txt b/inc/3rdparty/site_config/standard/aljazeera.com.txt new file mode 100644 index 00000000..4f0148f4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/aljazeera.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //span[@id='DetailedTitle'] | ||
2 | body: //td[@id='tdTextContent'] | ||
3 | strip_id_or_class: Skyscrapper_Body | ||
4 | date: //span[@id='ctl00_cphBody_lblDate'] | ||
5 | author: //div[@id="dvAuthorInfo"]//a/text() | ||
6 | strip: //table[ tbody/tr/td/object ] | ||
7 | prune: no | ||
8 | test_url: http://www.aljazeera.com/indepth/opinion/2012/01/2012114121925380575.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/allrecipes.com.txt b/inc/3rdparty/site_config/standard/allrecipes.com.txt new file mode 100644 index 00000000..e9767bda --- /dev/null +++ b/inc/3rdparty/site_config/standard/allrecipes.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //h1[@id='itemTitle'] | ||
2 | body: //img[@id="ctl00_CenterColumnPlaceHolder_recipe_photoStuff_imgPhoto"] | //div[@id='ctl00_CenterColumnPlaceHolder_recipe_divSubmitter'] | //div[contains(@class, 'recipe-details-content')] | ||
3 | strip: //div[@class='top-left' or @class='top-right' or @class='bot-left' or @class='bot-right'] | ||
4 | strip: //div[contains(@class, 'rightcoltoolsdiv')] | ||
5 | strip: //div[contains(@class, 'servings-form')] | ||
6 | strip: //p[@class='nutritional-information'] | ||
7 | strip: //a[contains(@class, 'nutritional-information') or contains(@class, 'nutritionanchor')] | ||
8 | strip: //div[@id='nutri-info']/div[contains(@class, 'title')] | ||
9 | strip: //img[@id='ctl00_CenterColumnPlaceHolder_recipe_imgSubmitter'] | ||
10 | strip_id_or_class: eshaAttribute | ||
11 | strip_id_or_class: eshaParagraph | ||
12 | prune: no | ||
13 | |||
14 | test_url: http://allrecipes.com/Recipe/Taco-Pie/Detail.aspx?src=rotd \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/allthingsd.com.txt b/inc/3rdparty/site_config/standard/allthingsd.com.txt new file mode 100644 index 00000000..cd52498f --- /dev/null +++ b/inc/3rdparty/site_config/standard/allthingsd.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title://div[@class="article-title"]/h1[@class="title"] | ||
2 | date: //p[@class="article-date"] | ||
3 | body://*[@class="article-body article-text"] | ||
4 | # Trim out related posts at bottom of article | ||
5 | strip://blockquote[@class="memo"] | ||
6 | |||
7 | # Yup, no idea why author won't work... | ||
8 | author://div[@class="page-header article-header clearfix"]/p[@class="title"] | ||
9 | # [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it. | ||
10 | test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/allyou.com.txt b/inc/3rdparty/site_config/standard/allyou.com.txt new file mode 100644 index 00000000..3c26c682 --- /dev/null +++ b/inc/3rdparty/site_config/standard/allyou.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div[@id='pageHdr']//h1 | ||
2 | body: //div[@id='pageHdr']/*[@class='dek'] | //div[@id='printArticle' or @id='slideShowPrint'] | ||
3 | strip: //div[contains(@class, 'infoBox') or @id='infoBox'] | ||
4 | single_page_link: //li[@id='print']/a | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.allyou.com/budget-home/money-shopping/freebies-online-00400000066392/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt new file mode 100644 index 00000000..f5865f89 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alphabeta.argaam.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[@class = 'entry'] | ||
2 | date: substring-after(//p[@class="date"],'بتاريخ ') | ||
3 | strip_id_or_class: date | ||
4 | strip_id_or_class: follow-single | ||
5 | strip_id_or_class: ratingblock | ||
6 | strip_id_or_class: newRatingHolder | ||
7 | strip_id_or_class: postmetadata | ||
8 | strip_id_or_class: addthis_toolbox | ||
9 | strip_id_or_class: addthis_default_style | ||
10 | strip_id_or_class: size-full | ||
11 | test_url: http://alphabeta.argaam.com/?p=35657 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alriyadh.com.txt b/inc/3rdparty/site_config/standard/alriyadh.com.txt new file mode 100644 index 00000000..d0060000 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alriyadh.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //div[@id = "article-view"] | ||
2 | body: //div[contains(@class, 'article')]//div[contains(@class, 'photo_bg')] | ||
3 | author: //p[@class = "author"] | ||
4 | strip: //h1 | ||
5 | strip: //h2 | ||
6 | strip_id_or_class: author | ||
7 | prune: no | ||
8 | test_url: http://www.alriyadh.com/2011/10/10/article674357.html | ||
9 | test_url: http://www.alriyadh.com/net/article/780935 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alseraj.net.txt b/inc/3rdparty/site_config/standard/alseraj.net.txt new file mode 100644 index 00000000..107d82d6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alseraj.net.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | title: //*[@id='normalfontyellow'] | ||
2 | test_url: http://www.alseraj.net/cgi-bin/pros/av/LeqaTextDisplay.cgi?display&2 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alt1040.com.txt b/inc/3rdparty/site_config/standard/alt1040.com.txt new file mode 100644 index 00000000..4fd45719 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alt1040.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://alt1040.com/2011/09/banda-ancha-en-america-latina-insignificante \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/altfoto.com.txt b/inc/3rdparty/site_config/standard/altfoto.com.txt new file mode 100644 index 00000000..d974cf4a --- /dev/null +++ b/inc/3rdparty/site_config/standard/altfoto.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://altfoto.com/2011/09/nikon-presenta-su-nuevo-sistema-nikon-1-y-dos-nuevas-camaras \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt new file mode 100644 index 00000000..7fd47193 --- /dev/null +++ b/inc/3rdparty/site_config/standard/alumni.stanford.edu.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | author: substring-after(//div[@class="enableBullets"]/preceding-sibling::p[1], "By ") | ||
4 | |||
5 | date: //div/a[contains (@href, "issue")] | ||
6 | |||
7 | move_into(//div[@class="enableBullets"]/p): (//div[@id="content"]//img)[1] | ||
8 | |||
9 | body: //div[@class="enableBullets"] | ||
10 | test_url: http://alumni.stanford.edu/get/page/magazine/article/?article_id=54819 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/amazon.com.txt b/inc/3rdparty/site_config/standard/amazon.com.txt new file mode 100644 index 00000000..1a23c4b7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/amazon.com.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | title: //span[@id = 'btAsinTitle'] | ||
2 | body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div | ||
3 | #strip_id_or_class: quantityDropdownDiv | ||
4 | #strip_id_or_class: addToCartSpan | ||
5 | #strip_id_or_class: oneClickDiv | ||
6 | strip_id_or_class: nocontent | ||
7 | strip_id_or_class: masDynamicConten | ||
8 | strip_id_or_class: dynamic-content | ||
9 | prune: no | ||
10 | |||
11 | find_string: <span id="actualPriceValue"> | ||
12 | replace_string: <span id="actualPriceValue"><br />Price: | ||
13 | |||
14 | strip_id_or_class: collapsePS | ||
15 | strip_id_or_class: expandPS | ||
16 | strip_id_or_class: psPlaceHolde | ||
17 | strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')] | ||
18 | |||
19 | test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/americandrink.net.txt b/inc/3rdparty/site_config/standard/americandrink.net.txt new file mode 100644 index 00000000..dee0e868 --- /dev/null +++ b/inc/3rdparty/site_config/standard/americandrink.net.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class='head']/h2/a | ||
2 | author: //div[@class='head']/a | ||
3 | date: //div[@class='head']/p[@class='date']/a | ||
4 | body: //div[@class='copy'] | ||
5 | strip: //p[@class='meta'] | ||
6 | test_url: http://americandrink.net/post/10567188712/free-the-hooch \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/americascup.com.txt b/inc/3rdparty/site_config/standard/americascup.com.txt new file mode 100644 index 00000000..b1673b6a --- /dev/null +++ b/inc/3rdparty/site_config/standard/americascup.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //div[@class="editorial-content"]/h3 | ||
2 | body: //div[@class="hero-image" or @class="editorial-content"] | ||
3 | |||
4 | strip: //ul[@class="hero-caption"] | ||
5 | strip_id_or_class: footer | ||
6 | |||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt new file mode 100644 index 00000000..8bf31ec2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h1[@class="post-title"] | ||
2 | author: //span[@class="author"]/a | ||
3 | date: //span[@class="date"] | ||
4 | body: //div[@class="post-content main"] | ||
5 | test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt new file mode 100644 index 00000000..8067e03c --- /dev/null +++ b/inc/3rdparty/site_config/standard/anandtech.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | author: //a[@class='b'][1] | ||
2 | date: substring-after(substring-before(//div, 'Posted in'), ' on ') | ||
3 | strip_image_src: /content/images/globals/ | ||
4 | strip: //h2[. = 'Page 1']/preceding::p | ||
5 | strip: //h2 | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) | ||
10 | |||
11 | test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/andyrutledge.com.txt b/inc/3rdparty/site_config/standard/andyrutledge.com.txt new file mode 100644 index 00000000..f9ffd3c3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/andyrutledge.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //h2 | ||
2 | author: string('Andy Rutledge') | ||
3 | date: //div[@class='articledate'] | ||
4 | body: //div[@class='copybody'] | ||
5 | |||
6 | strip: //*[@class='space'] | ||
7 | strip: //*[@class='articleFoot'] | ||
8 | |||
9 | test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt new file mode 100644 index 00000000..a5c7c08a --- /dev/null +++ b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //h1[@class="title"] | ||
2 | |||
3 | author: ("Anna Manasova") | ||
4 | # is ignored, unfortunately | ||
5 | |||
6 | date: //p[@class="date"] | ||
7 | |||
8 | body: //div[@class="entry"] | ||
9 | test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/applature.com.txt b/inc/3rdparty/site_config/standard/applature.com.txt new file mode 100644 index 00000000..a78a6150 --- /dev/null +++ b/inc/3rdparty/site_config/standard/applature.com.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | title: //h1[contains(@class, 'title')# | ||
2 | body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer'] | ||
3 | date: //div[@class='date'] | ||
4 | |||
5 | strip_id_or_class: sharethis | ||
6 | strip_id_or_class: stats | ||
7 | strip_id_or_class: apply_form | ||
8 | strip_id_or_class: job_map | ||
9 | strip_id_or_class: respond | ||
10 | strip: //h1//span[@class='type'] | ||
11 | strip: //li[@class='print' or @class='map'] | ||
12 | |||
13 | replace_string(<ul class="section_footer" style="display): <ul class="section_footer" style="display-bla | ||
14 | |||
15 | prune: no | ||
16 | tidy: no | ||
17 | |||
18 | test_url: http://applature.com/mining-jobs/jobs/nickel-west-leinster-analytical-laboratory-technician/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/apple.com.txt b/inc/3rdparty/site_config/standard/apple.com.txt new file mode 100644 index 00000000..4c483955 --- /dev/null +++ b/inc/3rdparty/site_config/standard/apple.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | strip: //p[@class='sosumi'] | ||
2 | # Aren't they witty? | ||
3 | |||
4 | # I can't work out what causes the  before the title. | ||
5 | title: //h1[@class='title'] | ||
6 | strip: //h1[@class='title'] | ||
7 | test_url: http://www.apple.com/pr/library/2011/02/15appstore.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/appleinsider.com.txt b/inc/3rdparty/site_config/standard/appleinsider.com.txt new file mode 100644 index 00000000..279fbce1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/appleinsider.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //p[@class='title'] | ||
2 | |||
3 | author: //p[text() = 'By ']/a/text() | ||
4 | strip: //p[text() = 'By '] | ||
5 | |||
6 | body: //td[@class='bod'] | ||
7 | strip_id_or_class: title | ||
8 | strip_id_or_class: minor | ||
9 | |||
10 | strip_id_or_class: multipagefooter | ||
11 | test_url: http://www.appleinsider.com/articles/12/02/29/inside_os_x_108_mountain_lion_safari_52_gets_a_simplified_user_interface_with_new_sharing_features.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/appleweblog.com.txt b/inc/3rdparty/site_config/standard/appleweblog.com.txt new file mode 100644 index 00000000..023c9ccb --- /dev/null +++ b/inc/3rdparty/site_config/standard/appleweblog.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://appleweblog.com/2011/09/encontrada-vulnerabilidad-grave-en-skype-para-ios \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/archdaily.com.txt b/inc/3rdparty/site_config/standard/archdaily.com.txt new file mode 100644 index 00000000..9476cf56 --- /dev/null +++ b/inc/3rdparty/site_config/standard/archdaily.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | date: //div[@class='post_date'] | ||
2 | |||
3 | body: //div[@class='post_content'] | ||
4 | |||
5 | test_url: http://www.archdaily.com/185325/p10-mixed-use-building-studio-up \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/archiveofourown.org.txt b/inc/3rdparty/site_config/standard/archiveofourown.org.txt new file mode 100644 index 00000000..50ff632d --- /dev/null +++ b/inc/3rdparty/site_config/standard/archiveofourown.org.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | # Description: Fix XPaths to include ALL chapters on 'view_full_work' pages. | ||
2 | # Include: work meta, summary, chapter information, and notes which Instapaper strips out on default. | ||
3 | # Exclude: header, footer, navigation, comments. | ||
4 | # Notes: User is a newbie with XPaths. | ||
5 | |||
6 | title: //h2[@class='title'] | ||
7 | author: //h3[@class='byline'] | ||
8 | author: //a[@class='login author'] | ||
9 | |||
10 | strip_id_or_class:header | ||
11 | strip_id_or_class:navigation | ||
12 | strip_id_or_class:feedback | ||
13 | strip_id_or_class:kudos | ||
14 | strip_id_or_class:add_comment_placeholder | ||
15 | strip_id_or_class:add_comment | ||
16 | strip_id_or_class:globalize | ||
17 | strip_id_or_class:footer | ||
18 | test_url: http://archiveofourown.org/works/229402?view_full_work=true \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/arstechnica.com.txt b/inc/3rdparty/site_config/standard/arstechnica.com.txt new file mode 100644 index 00000000..49bb3dbc --- /dev/null +++ b/inc/3rdparty/site_config/standard/arstechnica.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | author: //p[@class='byline']/a | ||
2 | body: //div[contains(@class,'article-content')] | ||
3 | strip: //h2[@class='title'] | ||
4 | strip_id_or_class: byline | ||
5 | prune: no | ||
6 | |||
7 | date: //div[@class='byline']/span[@class='posted']//abbr/@original-title | ||
8 | date: //div[@class='byline']/span[@class='posted']//abbr | ||
9 | |||
10 | title: //div[@id='story']//h2[@class='title'] | ||
11 | |||
12 | strip: //div[@class='pager'] | ||
13 | next_page_link: //nav//a[span/@class='next']/@href | ||
14 | |||
15 | test_url: http://arstechnica.com/tech-policy/news/2012/02/gigabit-internet-for-80-the-unlikely-success-of-californias-sonicnet.ars | ||
16 | test_url: http://arstechnica.com/apple/2005/04/macosx-10-4/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/articles.boston.com.txt b/inc/3rdparty/site_config/standard/articles.boston.com.txt new file mode 100644 index 00000000..e54423be --- /dev/null +++ b/inc/3rdparty/site_config/standard/articles.boston.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class="mod-bostonarticleheader mod-articleheader"]/h1 | ||
2 | author: substring-after(//div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[3],"By ") | ||
3 | date: //div[@class="mod-bostonarticlebyline mod-articlebyline"]/span[@class="pubdate"] | ||
4 | |||
5 | strip_id_or_class: mod-pagination | ||
6 | test_url: http://articles.boston.com/2011-10-23/news/30313691_1_bigfoot-free-speech-monadnock-state-park \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/articles.courant.com.txt b/inc/3rdparty/site_config/standard/articles.courant.com.txt new file mode 100644 index 00000000..a08f2041 --- /dev/null +++ b/inc/3rdparty/site_config/standard/articles.courant.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //div[@class="mod-courantarticleheader mod-articleheader"]/h1 | ||
2 | date: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[@class="pubdate"] | ||
3 | author: //div[@class="mod-courantarticlebyline mod-articlebyline"]/span[3] | ||
4 | |||
5 | strip_id_or_class: mod-article-byline | ||
6 | strip_id_or_class: mod-article-header | ||
7 | strip_id_or_class: mod-article-subtitle | ||
8 | #This leaves some crud after the article, but it's better than nothing. | ||
9 | #It would be ideal if we could set the body to every element matching //div[contains(@class, "mod-articletext")]/p, but it seems like body only takes the first matching element. | ||
10 | |||
11 | test_url: http://articles.courant.com/2011-10-22/news/hc-green-drugsearch--1022-20111022_1_drugs-in-student-lockers-police-dogs-lockdown \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/asahi.com.txt b/inc/3rdparty/site_config/standard/asahi.com.txt new file mode 100644 index 00000000..2562edb9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/asahi.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id='HeadLine'] | ||
2 | strip: //div[@id='utility_right'] | ||
3 | test_url: http://www.asahi.com/culture/update/0520/TKY201105200321.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ascarter.net.txt b/inc/3rdparty/site_config/standard/ascarter.net.txt new file mode 100644 index 00000000..5236d09e --- /dev/null +++ b/inc/3rdparty/site_config/standard/ascarter.net.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h1[@class='article_title'] | ||
2 | author: //span[@class='author'] | ||
3 | date: //h2[@class='dateline'] | ||
4 | body: //div[@class='article_body'] | ||
5 | test_url: http://ascarter.net/2012/02/20/enough-is-enough.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/astronews.com.txt b/inc/3rdparty/site_config/standard/astronews.com.txt new file mode 100644 index 00000000..33e8153d --- /dev/null +++ b/inc/3rdparty/site_config/standard/astronews.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //span[@class='titel'] | ||
2 | author: //span[@class='metadaten_C']/a//span[@class='metadaten_C'] | ||
3 | date: substring-after(//span[@class='metadaten_C'],'astronews.com') | ||
4 | strip: //span[@class='bu'] | ||
5 | strip_image_src: '/_images/' | ||
6 | |||
7 | test_url: http://www.astronews.com/news/artikel/2011/10/1110-021.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/asymco.com.txt b/inc/3rdparty/site_config/standard/asymco.com.txt new file mode 100644 index 00000000..adad5f18 --- /dev/null +++ b/inc/3rdparty/site_config/standard/asymco.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | # Johannes Stühler | ||
2 | |||
3 | title://h2 | ||
4 | author://span[@class='meta-content'] | ||
5 | date://abbr[@class='date published']/@title | ||
6 | body://div[@class='entry-content'] | ||
7 | |||
8 | test_url: http://www.asymco.com/2011/01/14/is-android-more-efficient-than-ios-at-generating-search-revenue/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/autoblog.com.txt b/inc/3rdparty/site_config/standard/autoblog.com.txt new file mode 100644 index 00000000..58681bf9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/autoblog.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | prune: no | ||
2 | body: //div[@class='post-body'] | ||
3 | author: //p[@class='byline']//a | ||
4 | date: substring-after(//div[@class='about']/p[2], 'Posted') | ||
5 | strip: //div[@class='body']/div[@class='meta'] | ||
6 | test_url: http://www.autoblog.com/2012/01/17/next-gen-bmw-x5-caught-again/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/avclub.com.txt b/inc/3rdparty/site_config/standard/avclub.com.txt new file mode 100644 index 00000000..776ee108 --- /dev/null +++ b/inc/3rdparty/site_config/standard/avclub.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | author: //*[@id="article_wrapper"]/div[1]/a[1] | ||
2 | body: //*[@id="article_wrapper"]/div[2] | ||
3 | date: //*[@id="article_wrapper"]/div[1]/text()[2] | ||
4 | test_url: http://www.avclub.com/articles/forgetmenot,70904 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/baltimoresun.com.txt b/inc/3rdparty/site_config/standard/baltimoresun.com.txt new file mode 100644 index 00000000..32adff8d --- /dev/null +++ b/inc/3rdparty/site_config/standard/baltimoresun.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | single_page_link: //div[@class='toppaginate']//a[@rel='nofollow'] | ||
2 | convert_double_br_tags: yes | ||
3 | |||
4 | title: //div[@class="story"]/h1 | ||
5 | body: //div[@id="story-body-text"] | ||
6 | author: //span[@class="byline"] | ||
7 | date: //p[@class="date"] | ||
8 | |||
9 | strip: //*[@class='all'] | ||
10 | strip: //*[@class='articlerail'] | ||
11 | |||
12 | test_url: http://www.baltimoresun.com/news/maryland/bs-md-omalley-budget-2-20120116,0,5340585.story \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/basicthinking.de.txt b/inc/3rdparty/site_config/standard/basicthinking.de.txt new file mode 100644 index 00000000..ab583145 --- /dev/null +++ b/inc/3rdparty/site_config/standard/basicthinking.de.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h2 | ||
2 | date: //span[@class='date'] | ||
3 | body: //div[@class='entry'] | ||
4 | |||
5 | strip: //div[@class='zusatz'] | ||
6 | |||
7 | test_url: http://www.basicthinking.de/blog/2011/12/13/sagt-social-networks-adieu-begrust-private-networks/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bb.is.txt b/inc/3rdparty/site_config/standard/bb.is.txt new file mode 100644 index 00000000..eaafaf18 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bb.is.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | author: substring(//h3[@class='headlines']/span[@class='dates'],0,string-length(//h3[@class='headlines']/span[@class='dates'])-20) | ||
2 | |||
3 | |||
4 | date: substring((//h3[@class='headlines']/span[@class='dates']),string-length(//h3[@class='headlines']/span[@class='dates'])-18,12) | ||
5 | |||
6 | |||
7 | body: //div[@class='first-article-big'] | ||
8 | strip: //table[@class='newsimagecontainer'] | ||
9 | strip: //h3[@class='headlines'] | ||
10 | strip: //iframe[@class='headlines'] | ||
11 | strip: //a[@class='newslink'] | ||
12 | convert_double_br_tags: yes | ||
13 | test_url: http://bb.is/Pages/82?NewsID=174119 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bbc.co.uk.txt b/inc/3rdparty/site_config/standard/bbc.co.uk.txt new file mode 100644 index 00000000..9c5c3419 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bbc.co.uk.txt | |||
@@ -0,0 +1,32 @@ | |||
1 | body: //div[@class="story-body"] | ||
2 | title: //h1[@class="story-header"] | ||
3 | date: //span[@class="story-date"]/span[@class='date'] | ||
4 | |||
5 | # recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 | ||
6 | body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] | ||
7 | |||
8 | #strip: //div[@class="story-feature narrow"] | ||
9 | #strip: //div[@class="story-feature wide"] | ||
10 | #strip: //div[@class="story-feature dslideshow-enclosure"] | ||
11 | strip: //div[contains(@class, "story-feature")] | ||
12 | strip: //span[@class="story-date"] | ||
13 | #strip: //div[@class="caption body-narrow-width"] | ||
14 | strip: //div[@class="warning"]//p | ||
15 | strip: //div[@id='page-bookmark-links-head'] | ||
16 | strip: //object | ||
17 | strip: //div[contains(@class, "bbccom_advert_placeholder")] | ||
18 | strip: //div[contains(@class, "embedded-hyper")] | ||
19 | strip: //div[contains(@class, 'market-data')] | ||
20 | strip: //a[contains(@class, 'hidden')] | ||
21 | strip: //div[contains(@class, 'hypertabs')] | ||
22 | strip: //div[contains(@class, 'related')] | ||
23 | strip: //form[@id='comment-form'] | ||
24 | strip: //div[contains(@class, 'comment-introduction')] | ||
25 | |||
26 | replace_string(<noscript>): <div> | ||
27 | replace_string(</noscript>): </div> | ||
28 | |||
29 | prune: no | ||
30 | |||
31 | dissolve: //h2 | ||
32 | test_url: http://www.bbc.co.uk/news/business-15060862 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/benoitmaison.org.txt b/inc/3rdparty/site_config/standard/benoitmaison.org.txt new file mode 100644 index 00000000..f341d593 --- /dev/null +++ b/inc/3rdparty/site_config/standard/benoitmaison.org.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | body: //div[@class="entry-content"] | ||
2 | |||
3 | # Remove text ‘Tweet’ | ||
4 | strip: //div[@class="entry-content"]/div[last()] | ||
5 | |||
6 | title: h1[@class="entry-title"] | ||
7 | |||
8 | # If the Instapaper text parser worked with HTML5 tags, we would use: | ||
9 | date: //time[@class="entry-date"] | ||
10 | |||
11 | # But since it does not, use this more complicated rule: | ||
12 | date: //div[@class="entry-meta"]/a[@rel="bookmark"] | ||
13 | |||
14 | # Unfortunately, the following rule is overridden by the automatically found author. | ||
15 | author: ("Benoit Maison") | ||
16 | test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/berlingske.dk.txt b/inc/3rdparty/site_config/standard/berlingske.dk.txt new file mode 100644 index 00000000..607c998d --- /dev/null +++ b/inc/3rdparty/site_config/standard/berlingske.dk.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h1[@class='headline'] | ||
2 | body: //div[contains(@class, 'article-wrapper')] | ||
3 | test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/betabeat.com.txt b/inc/3rdparty/site_config/standard/betabeat.com.txt new file mode 100644 index 00000000..7815cf26 --- /dev/null +++ b/inc/3rdparty/site_config/standard/betabeat.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class="entry-content"] | ||
2 | test_url: http://www.betabeat.com/2011/07/04/sheryl-sandberg-breaks-through-silicon-valleys-boys-club-sort-of/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/betanews.com.txt b/inc/3rdparty/site_config/standard/betanews.com.txt new file mode 100644 index 00000000..0eaf085e --- /dev/null +++ b/inc/3rdparty/site_config/standard/betanews.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | # some articles at this site like this one doesn't | ||
2 | # seem to pick up the article body via normal | ||
3 | # processing, other articles come through fine | ||
4 | # http://www.betanews.com/joewilcox/article | ||
5 | # /Google-is-a-marketing-sensation/1309708375 | ||
6 | body: //*[@id="article"] | ||
7 | test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/biography.com.txt b/inc/3rdparty/site_config/standard/biography.com.txt new file mode 100644 index 00000000..dc071299 --- /dev/null +++ b/inc/3rdparty/site_config/standard/biography.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div[contains(@class, 'main-content')]//h1 | ||
2 | body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')] | ||
7 | |||
8 | test_url: http://www.biography.com/print/profile/martin-luther-9389283 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bitelia.com.txt b/inc/3rdparty/site_config/standard/bitelia.com.txt new file mode 100644 index 00000000..7bffae93 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bitelia.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://bitelia.com/2011/09/klout-midiendo-influencia \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bjango.com.txt b/inc/3rdparty/site_config/standard/bjango.com.txt new file mode 100644 index 00000000..6cb04631 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bjango.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h1[@class='articlehead'] | ||
2 | body: //div[@class='column'] | ||
3 | strip: //h1 | ||
4 | strip: //div[@class='help'] | ||
5 | |||
6 | #no author or date/time provided in current layout | ||
7 | test_url: http://bjango.com/articles/actions/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.arsln.org.txt b/inc/3rdparty/site_config/standard/blog.arsln.org.txt new file mode 100644 index 00000000..1f43f490 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.arsln.org.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | date: //article/header/h6/time | ||
4 | title: //article/header/h3 | ||
5 | author: //meta[@name='author']/@content | ||
6 | body: //article//post | ||
7 | |||
8 | test_url: http://blog.arsln.org/aska-ayip-oluyor/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt new file mode 100644 index 00000000..81c3bda6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //title | ||
2 | author: //span[@class='author vcard']/a | ||
3 | date: //p[@class='headline_meta']/abbr[@class='published'] | ||
4 | body: //div[@class='format_text entry-content'] | ||
5 | |||
6 | strip: //div[@id='dd_ajax_float'] | ||
7 | test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt new file mode 100644 index 00000000..a4c5aaea --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | # Instapaper gets this back to front and only gets the blog title instead of the article title. | ||
2 | title: substring-before(//title, '-') | ||
3 | |||
4 | author: //a[ contains(@href, '/people') ] | ||
5 | |||
6 | body: //div[ @class='post' ] | ||
7 | |||
8 | # Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous. | ||
9 | test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.fefe.de.txt b/inc/3rdparty/site_config/standard/blog.fefe.de.txt new file mode 100644 index 00000000..92272b70 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.fefe.de.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h2 | ||
2 | date: //h3 | ||
3 | body: //ul | ||
4 | |||
5 | test_url: http://blog.fefe.de/?ts=b063bf55 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.instagram.com.txt b/inc/3rdparty/site_config/standard/blog.instagram.com.txt new file mode 100644 index 00000000..3065dd80 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.instagram.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | # clean Instagram blog a little bit | ||
2 | |||
3 | tidy:no | ||
4 | prune:no | ||
5 | |||
6 | body://div[contains(@id,'content')] | ||
7 | |||
8 | strip_id_or_class:meta | ||
9 | strip_id_or_class:notes | ||
10 | strip_id_or_class:pagination | ||
11 | test_url: http://blog.instagram.com/post/8757832007/fromwhereistand \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt new file mode 100644 index 00000000..4e467fe9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | date: //span[contains(@class, 'date-links')] | ||
2 | author: //span[contains(@class, 'author-links')] | ||
3 | body: //div[contains(@class, 'entry-content')] | ||
4 | test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt new file mode 100644 index 00000000..ac18ad15 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //*[contains(@class, 'post_content')] | ||
2 | author: string('Kaelig Deloumeau-Prigent') | ||
3 | title: //h1[@class='title'] | ||
4 | date: //span[@class='date'] | ||
5 | test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.naver.com.txt b/inc/3rdparty/site_config/standard/blog.naver.com.txt new file mode 100644 index 00000000..702789ad --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.naver.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //span[@class='pcol1 itemSubjectBoldfont'] | ||
2 | body: //div[@id='postListBody'] | ||
3 | date: //p[@class='date fil5 pcol2'] | ||
4 | single_page_link: /html/frameset/frame[1]/attribute::src | ||
5 | strip: //div[@class='post-btn'] | ||
6 | test_url: http://blog.naver.com/how2invest/110135068757 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.pchome.net.txt b/inc/3rdparty/site_config/standard/blog.pchome.net.txt new file mode 100644 index 00000000..3089001e --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.pchome.net.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | # PCHOME blog, a popular Chinese blog host | ||
2 | # Oct 15, 2011 | ||
3 | # | ||
4 | |||
5 | title://*[contains(@class,'imp')]/h2 | ||
6 | |||
7 | date://*[contains(@class,'imp')]/span | ||
8 | body://div[contains(@id,'blog_content')] | ||
9 | |||
10 | |||
11 | |||
12 | test_url: http://blog.pchome.net/article/462502.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt new file mode 100644 index 00000000..b7afe455 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //a[@class="blog_title"] | ||
2 | date: //p[@class="when"]/a | ||
3 | body: //div[@class="blog_entry"] | ||
4 | strip_id_or_class:blog_title | ||
5 | strip_id_or_class:when | ||
6 | test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt new file mode 100644 index 00000000..acb9ce81 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | # Sina blog, the most popular blog host in China. | ||
2 | # Its source code is horrible. | ||
3 | # | ||
4 | # Issue: | ||
5 | # Only the first image in the article is displayed. | ||
6 | # The rest images are replace by a 1x1 transparent gif by sina blog host. | ||
7 | # | ||
8 | |||
9 | title://*[contains(@class,'titName SG_txta')] | ||
10 | author://*[contains(@id,'ownernick')] | ||
11 | date://*[contains(@class,'time SG_txtc')] | ||
12 | body://div[contains(@class,'articalContent')] | ||
13 | |||
14 | # Remove redundant content which has span class start with "MASS" | ||
15 | # Example <span class="MASSf21674ffeef7"></span> | ||
16 | strip://span[contains(@class,'MASS')] | ||
17 | |||
18 | # Remove comment | ||
19 | strip://div[contains(@class,'allComm')] | ||
20 | |||
21 | # Remove hiden text and link | ||
22 | strip://ins | ||
23 | |||
24 | tidy:no | ||
25 | convert_double_br_tags:yes | ||
26 | test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.spu.edu.txt b/inc/3rdparty/site_config/standard/blog.spu.edu.txt new file mode 100644 index 00000000..68bd4e39 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.spu.edu.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body://div[@class='post'] | ||
2 | test_url: http://blog.spu.edu/lectio/from-the-frying-pan-into-the-fire/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blog.wells.ee.txt b/inc/3rdparty/site_config/standard/blog.wells.ee.txt new file mode 100644 index 00000000..8c8b3838 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blog.wells.ee.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2/a[@class="no-link title"] | ||
2 | author: //h2[@id="blog_owner"] | ||
3 | date: //time | ||
4 | strip: //h2/a[@class="no-link title"] | ||
5 | test_url: http://blog.wells.ee/retina | ||
6 | test_url: http://blog.wells.ee/skeuomorphism \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt new file mode 100644 index 00000000..f630127b --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | # 2011-08-23 [carlo@...] Initial version. | ||
2 | |||
3 | author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text() | ||
4 | |||
5 | # why yes, I do feel a bit dirty | ||
6 | date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " ) | ||
7 | |||
8 | test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.forbes.com.txt b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt new file mode 100644 index 00000000..86580d21 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class='entry'] | ||
2 | test_url: http://blogs.forbes.com/adamhartung/2011/04/08/apple-is-better-managed-than-microsoft/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt new file mode 100644 index 00000000..3664d16c --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //div[@id='pageFeature']/h1 | ||
2 | body: //div[@id='articleBody'] | ||
3 | strip: //div[@class='module wide'] | ||
4 | test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt new file mode 100644 index 00000000..3d3ec020 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h3[@class="post-name"] | ||
2 | author: //span[@class="user-name"] | ||
3 | date: //div[@class="post-date"] | ||
4 | body: //div[@class="post-content user-defined-markup"] | ||
5 | footnotes: no | ||
6 | test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt new file mode 100644 index 00000000..6907bcb2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //div[@id='single']/h1 | ||
2 | body: //div[@id='postcontent'] | ||
3 | test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt new file mode 100644 index 00000000..a7d15081 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | # meta data | ||
2 | title://h1[@class = 'postTitle'] | ||
3 | author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|') | ||
4 | date://span[@class = 'datestamp'] | ||
5 | |||
6 | #body content | ||
7 | body://div[@id = 'singleBlogPost'] | ||
8 | |||
9 | #reclaim author info | ||
10 | move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv'] | ||
11 | strip://p[@class = 'moreLink mobileHide'] | ||
12 | |||
13 | #cleanup comments, there might be some open <div> sections | ||
14 | strip://div[@id = 'comments2'] | ||
15 | strip://h3[a[@href = '#add-comment']] | ||
16 | test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt new file mode 100644 index 00000000..ba8bc6e7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | # metadata | ||
2 | author://div[@class = 'post']/div[@class='meta']/a[1] | ||
3 | date://div[@id = 'rap']/h2[1] | ||
4 | body://div[@class = 'post'] | ||
5 | |||
6 | # wrapping caption and image | ||
7 | wrap_in(fieldset)://div[contains(@class, 'wp-caption')] | ||
8 | |||
9 | |||
10 | # clean up | ||
11 | strip://div[@class = 'post']/h3[@class = 'storytitle'] | ||
12 | strip://div[@class = 'post']/div[@class = 'social'] | ||
13 | strip://img[@style = 'display:none;'] | ||
14 | strip://img[@height='0' and @width='0'] | ||
15 | test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/blogs.technet.com.txt b/inc/3rdparty/site_config/standard/blogs.technet.com.txt new file mode 100644 index 00000000..a2909fd1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/blogs.technet.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h3[@class="post-name"] | ||
2 | author: //span[@class="user-name"] | ||
3 | date: //div[@class="post-date"] | ||
4 | body: //div[@class="post-content user-defined-markup"] | ||
5 | footnotes: no | ||
6 | test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bluetouff.com.txt b/inc/3rdparty/site_config/standard/bluetouff.com.txt new file mode 100644 index 00000000..fbe7a5c6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bluetouff.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body://div[@class='entry'] | ||
2 | date://div[@class='meta'] | ||
3 | strip://a[@class='FlattrButton'] | ||
4 | test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/boagworld.com.txt b/inc/3rdparty/site_config/standard/boagworld.com.txt new file mode 100644 index 00000000..91e48fdb --- /dev/null +++ b/inc/3rdparty/site_config/standard/boagworld.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1[@class="entry-title"][2] | ||
2 | author: string("Paul Boag") | ||
3 | date: substring(//span[@class="meta"], 11) | ||
4 | body: //article | ||
5 | strip: //h2 | ||
6 | strip: //h1 | ||
7 | strip: //div[@id="callsToAction"] | ||
8 | test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/boingboing.net.txt b/inc/3rdparty/site_config/standard/boingboing.net.txt new file mode 100644 index 00000000..9169e8fb --- /dev/null +++ b/inc/3rdparty/site_config/standard/boingboing.net.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | # This is far from perfect, but so is BoingBoing's markup | ||
2 | title: //h2[@class="headline"] | ||
3 | single_page_link: //h2[@class="headline"]/a | ||
4 | #date: //p[@class="byline"] | ||
5 | body: //div[@class="post"] | ||
6 | |||
7 | strip_id_or_class: shareMe | ||
8 | strip_id_or_class: authorbox | ||
9 | strip_id_or_class: byline | ||
10 | |||
11 | test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt new file mode 100644 index 00000000..4cc49043 --- /dev/null +++ b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h2[@class='entry-title'] | ||
2 | body: //div[@class='entry-content'] | ||
3 | test_url: http://boldizsar.palotas.eu/blog/?p=1394 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/book.douban.com.txt b/inc/3rdparty/site_config/standard/book.douban.com.txt new file mode 100644 index 00000000..8b958562 --- /dev/null +++ b/inc/3rdparty/site_config/standard/book.douban.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //span[@property='v:description'] | ||
2 | date: //span[@property='v:dtreviewed'] | ||
3 | author: //span[@property='v:reviewer'] | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://book.douban.com/review/2422662/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bookforum.com.txt b/inc/3rdparty/site_config/standard/bookforum.com.txt new file mode 100644 index 00000000..331f415e --- /dev/null +++ b/inc/3rdparty/site_config/standard/bookforum.com.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | #metadata | ||
2 | title://div[@class = 'Topper']/h1 | ||
3 | author://div[@class = 'Topper']/h3 | ||
4 | date://div[@class = 'Topper']/h6 | ||
5 | body://div[@class = 'Core'] | ||
6 | |||
7 | |||
8 | |||
9 | # clean up | ||
10 | strip://div[@class = 'Topper']/h1 | ||
11 | strip://div[@class = 'Topper']/h3 | ||
12 | strip://div[@class = 'Topper']/h4 | ||
13 | strip://div[@class = 'Topper']/h5 | ||
14 | strip://div[@class = 'Topper']/h6 | ||
15 | strip://br[@clear = 'all'] | ||
16 | strip://div[@class = 'adCore'] | ||
17 | strip://div[@class = 'BookR'] | ||
18 | strip://div[@class = 'InfoBox'] | ||
19 | test_url: http://bookforum.com/inprint/018_04/8595 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt new file mode 100644 index 00000000..190738d5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title://h1 | ||
2 | author://div[@class="meta"]/span/a | ||
3 | date://div[@class="date"] | ||
4 | body://div[@class="content article"] | ||
5 | strip://div[@class="content article"]/h1 | ||
6 | |||
7 | test_url: http://borderhouseblog.com/?p=7832 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bostonglobe.com.txt b/inc/3rdparty/site_config/standard/bostonglobe.com.txt new file mode 100644 index 00000000..d3e6f43f --- /dev/null +++ b/inc/3rdparty/site_config/standard/bostonglobe.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | # NOTE: If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com. | ||
2 | |||
3 | title: //div[@class="header"]/h1 | ||
4 | author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ") | ||
5 | date: //div[@class="byline"]/p[last()] | ||
6 | body: //div[@class="article-body"] | ||
7 | |||
8 | strip_id_or_class: aside | ||
9 | strip_id_or_class: promo | ||
10 | strip_id_or_class: skip-nav | ||
11 | strip_id_or_class: article-more | ||
12 | strip_id_or_class: article-bar | ||
13 | |||
14 | # This removes image captions. If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed. | ||
15 | strip_id_or_class: figure | ||
16 | test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bostonreview.net.txt b/inc/3rdparty/site_config/standard/bostonreview.net.txt new file mode 100644 index 00000000..68567012 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bostonreview.net.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | #basics | ||
2 | title://h3[@class = 'article_title'] | ||
3 | date://span[@class = 'article_date'] | ||
4 | body://div[@id = 'center_column_article'] | ||
5 | #correct, but author not being picked up in preview | ||
6 | author://span[@class = 'article_author'] | ||
7 | |||
8 | #strips basics from article | ||
9 | strip_id_or_class:article_title | ||
10 | strip_id_or_class:article_date | ||
11 | strip_id_or_class:article_author | ||
12 | |||
13 | #strips pull quotes | ||
14 | strip_id_or_class:pull_quote | ||
15 | test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/boundlessline.org.txt b/inc/3rdparty/site_config/standard/boundlessline.org.txt new file mode 100644 index 00000000..bfc3f3d1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/boundlessline.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: substring-before(//title, '|') | ||
2 | body: //div[@class="entry"] | ||
3 | # Remove the author's picture | ||
4 | strip: //div[@class="entry"]/a[1] | ||
5 | test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brainfacts.org.txt b/inc/3rdparty/site_config/standard/brainfacts.org.txt new file mode 100644 index 00000000..94b0f56d --- /dev/null +++ b/inc/3rdparty/site_config/standard/brainfacts.org.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //div[@class="standard"]/h1 | ||
2 | author: string("BrainFacts.org") | ||
3 | date: //div[@class="meta"]/strong | ||
4 | |||
5 | strip: //p[@class="skip"] | ||
6 | strip: //div[@class="meta"] | ||
7 | strip: //div[@class="standard"]/h1 | ||
8 | strip: //div[@class="modal"] | ||
9 | strip: //div[@class="columnRight"] | ||
10 | test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brandeins.de.txt b/inc/3rdparty/site_config/standard/brandeins.de.txt new file mode 100644 index 00000000..3753ce67 --- /dev/null +++ b/inc/3rdparty/site_config/standard/brandeins.de.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | # set body | ||
2 | body: //div[@id='theContent'] | ||
3 | |||
4 | # set title | ||
5 | title: //div[@id='theContent']/h3 | ||
6 | strip: //div[@id='theContent']/h3 | ||
7 | test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt new file mode 100644 index 00000000..19504844 --- /dev/null +++ b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | date://h2[@class="date-header"] | ||
2 | body://div[@class="entry-content"] | ||
3 | test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brettterpstra.com.txt b/inc/3rdparty/site_config/standard/brettterpstra.com.txt new file mode 100644 index 00000000..f6f73778 --- /dev/null +++ b/inc/3rdparty/site_config/standard/brettterpstra.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@class='post full'] | ||
2 | title: //h1 | ||
3 | author: substring-after(//title, '- ') | ||
4 | date: //span[@class='date'] | ||
5 | test_url: http://brettterpstra.com/byword-for-ios/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt new file mode 100644 index 00000000..27e6b70c --- /dev/null +++ b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class='articleBody'] | ||
2 | test_url: http://www.brisbanetimes.com.au/opinion/blogs/blunt-instrument/losing-our-minds--for-24-hours-20120118-1q682.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brookings.edu.txt b/inc/3rdparty/site_config/standard/brookings.edu.txt new file mode 100644 index 00000000..9f4fc4e3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/brookings.edu.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //div[@id='contentheader']/h1 | ||
2 | author: //p[@class='attribution']/span[@class='author']/* | ||
3 | # Is there a way to pull multiple authors? My XPath here is just grabbing the first | ||
4 | |||
5 | date: /html/head/meta[@name="date"]/@content | ||
6 | body: //div[@class='main-content'] | ||
7 | |||
8 | strip: //p[@class='byline'] | ||
9 | strip: //div[@class='img-gallery'] | ||
10 | strip: //div[@class='callout'] | ||
11 | strip: //div[@class='add-your-view'] | ||
12 | convert_double_br_tags: yes | ||
13 | test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/brooksreview.net.txt b/inc/3rdparty/site_config/standard/brooksreview.net.txt new file mode 100644 index 00000000..71cafcdb --- /dev/null +++ b/inc/3rdparty/site_config/standard/brooksreview.net.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@class='article'] | ||
3 | body: //div[@class='post'] | ||
4 | date: //*[@id='single']/span | ||
5 | prune: no | ||
6 | test_url: http://brooksreview.net/2011/11/readability-agency/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/buquad.com.txt b/inc/3rdparty/site_config/standard/buquad.com.txt new file mode 100644 index 00000000..a75fa046 --- /dev/null +++ b/inc/3rdparty/site_config/standard/buquad.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1 | ||
2 | author: //h2/a | ||
3 | date: substring-after(//h2, '|') | ||
4 | strip_id_or_class: 'attachment' | ||
5 | strip: //h3 | ||
6 | |||
7 | body: //div[@class='entry'] | ||
8 | test_url: http://buquad.com/2012/04/09/paul-ryan/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/businessinsider.com.txt b/inc/3rdparty/site_config/standard/businessinsider.com.txt new file mode 100644 index 00000000..c773db8b --- /dev/null +++ b/inc/3rdparty/site_config/standard/businessinsider.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title://div[@class="sl-layout-post"]/h1 | ||
2 | body: //div[contains(@class, 'post-content') or contains(@class, 'KonaBody')] | ||
3 | strip: //div[contains(@class, "post-sidebar")] | ||
4 | strip: //div[@id='related-links'] | ||
5 | author://div[@class="byline"]/a | ||
6 | date://div[@class="byline"]/span[@class="date"] | ||
7 | prune: no | ||
8 | |||
9 | strip://*[contains(@class,'sponsored-text')] | ||
10 | strip: //div[@id='post_footer'] | ||
11 | |||
12 | test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt new file mode 100644 index 00000000..714cfc90 --- /dev/null +++ b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body: //div[@id='article_detail'] | ||
2 | title: //meta[@property='og:title']/@content | ||
3 | date: //div[@id='date_com_art']//a[@class='date'] | ||
4 | author: //div[@id='article_detail']//font[@class='auteur'] | ||
5 | |||
6 | strip_id_or_class: porte_titre_theme | ||
7 | strip_id_or_class: cont_param | ||
8 | strip_id_or_class: date_com_art | ||
9 | |||
10 | prune: no | ||
11 | |||
12 | test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt new file mode 100644 index 00000000..7b3d063b --- /dev/null +++ b/inc/3rdparty/site_config/standard/businessweek.com.txt | |||
@@ -0,0 +1,30 @@ | |||
1 | # story has several pages, should be detected | ||
2 | body: //div[@id='storyBody'] | ||
3 | body: //div[@id='article_body'] | ||
4 | body: //div[@id='story_body'] | ||
5 | |||
6 | title://h1[@id='article_headline'] | ||
7 | |||
8 | # article author | ||
9 | author: //p[@class='author']/a | ||
10 | # story author(s) | ||
11 | author: substring-after(//p[@class='byline'], 'By ') | ||
12 | |||
13 | # article date | ||
14 | date: //span[@class='published_date'] | ||
15 | # story date | ||
16 | date: //span[@class='date'] | ||
17 | |||
18 | date: substring-after(//div[contains(@class,'attributor')],'on') | ||
19 | strip_id_or_class: inset | ||
20 | strip: //p/span[@class='photoCredit'] | ||
21 | strip: //h1 | ||
22 | |||
23 | strip_id_or_class: page_count | ||
24 | strip_id_or_class: tools | ||
25 | strip_id_or_class: pagination | ||
26 | |||
27 | single_page_link: //li[@id='stPrint']/a | ||
28 | |||
29 | test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html | ||
30 | test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/buzzfeed.com.txt b/inc/3rdparty/site_config/standard/buzzfeed.com.txt new file mode 100644 index 00000000..6df8bc47 --- /dev/null +++ b/inc/3rdparty/site_config/standard/buzzfeed.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | # Creator: Greg Leuch <greg@...> | ||
2 | |||
3 | # It can be messy. | ||
4 | tidy:no | ||
5 | |||
6 | # The basic template. | ||
7 | title: //h1[@data-print='title'] | ||
8 | author: //a[@data-print='author'] | ||
9 | date: //time[@data-print='date'] | ||
10 | body: //div[@data-print='body'] | ||
11 | body: //section[@data-print='body'] | ||
12 | |||
13 | # For various things... | ||
14 | strip: *[@data-print="ignore"] | ||
15 | test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/bygonebureau.com.txt b/inc/3rdparty/site_config/standard/bygonebureau.com.txt new file mode 100644 index 00000000..0abb6436 --- /dev/null +++ b/inc/3rdparty/site_config/standard/bygonebureau.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1 | ||
2 | author: //a[contains(@href, '/author/')] | ||
3 | date: //*[@class='post-date'] | ||
4 | strip: //*[@class='post-date'] | ||
5 | strip: //h1 | ||
6 | test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt new file mode 100644 index 00000000..3adc7a35 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1[@class='producttabbed-title'] | ||
2 | body: //div[@class='postTabs_divs postTabs_curr_div'] | ||
3 | strip: //div[@class='ratingblock2'] | ||
4 | strip: //p[@id='breadcrumbs'] | ||
5 | strip: //div[@style='display: none'] | ||
6 | |||
7 | |||
8 | test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/carpeaqua.com.txt b/inc/3rdparty/site_config/standard/carpeaqua.com.txt new file mode 100644 index 00000000..7ba1ed78 --- /dev/null +++ b/inc/3rdparty/site_config/standard/carpeaqua.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2 | ||
2 | body: //div[@class='entry'] | ||
3 | |||
4 | prune: no | ||
5 | # otherwise the footnotes are removed | ||
6 | test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/catb.org.txt b/inc/3rdparty/site_config/standard/catb.org.txt new file mode 100644 index 00000000..8908292c --- /dev/null +++ b/inc/3rdparty/site_config/standard/catb.org.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@class='article'] | ||
2 | strip: //div[@class='revhistory'] | ||
3 | strip: //div[@class='toc'] | ||
4 | tidy: no | ||
5 | prune: no | ||
6 | |||
7 | test_url: http://catb.org/~esr/faqs/smart-questions.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cbc.ca.txt b/inc/3rdparty/site_config/standard/cbc.ca.txt new file mode 100644 index 00000000..25305109 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cbc.ca.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[contains(@class, 'headline')]/h1 | ||
2 | author: //h5[contains(@class, 'byline')] | ||
3 | date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ') | ||
4 | body: //div[@id="storyboard"] | ||
5 | test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cbsnews.com.txt b/inc/3rdparty/site_config/standard/cbsnews.com.txt new file mode 100644 index 00000000..4ba3da19 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cbsnews.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | date: //meta[@name="published"]/@content | ||
2 | date: //div[@class="timeLine"] | ||
3 | title: //div[@id='contentBody']//h1 | ||
4 | author: //dl[@class="storyBlogByline"]/dd/a | ||
5 | body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')] | ||
6 | |||
7 | # Content Pruning | ||
8 | strip: //div[@class="scrollingArrows"] | ||
9 | strip: //div[@class="timeLine"] | ||
10 | strip: //dl[@class="storyBlogByline"] | ||
11 | |||
12 | prune: no | ||
13 | |||
14 | test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chareidi.org.txt b/inc/3rdparty/site_config/standard/chareidi.org.txt new file mode 100644 index 00000000..de34a7d8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/chareidi.org.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | title: //h1 | ||
2 | test_url: http://www.chareidi.org/archives5772/tetzaveh/TZV72adraft.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chinamining.org.txt b/inc/3rdparty/site_config/standard/chinamining.org.txt new file mode 100644 index 00000000..ea0df2a3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/chinamining.org.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //*[@id='Content']/span[1] | ||
2 | author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(') | ||
3 | date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter') | ||
4 | |||
5 | strip: //*[@id='Content']/span[1] | ||
6 | strip: //*[@id='Content']/span[2] | ||
7 | |||
8 | body: //*[@id='Content'] | ||
9 | |||
10 | test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chomsky.info.txt b/inc/3rdparty/site_config/standard/chomsky.info.txt new file mode 100644 index 00000000..1d294109 --- /dev/null +++ b/inc/3rdparty/site_config/standard/chomsky.info.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@class='title'] | ||
2 | author: //div[@class='author'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.chomsky.info/onchomsky/2002----.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/christianitytoday.com.txt b/inc/3rdparty/site_config/standard/christianitytoday.com.txt new file mode 100644 index 00000000..44288a46 --- /dev/null +++ b/inc/3rdparty/site_config/standard/christianitytoday.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title://div[@class='title'] | ||
2 | author://div[@class='byline']/b | ||
3 | date:substring-after(//div[@class='byline'], 'posted') | ||
4 | body://div[@id='body'] | ||
5 | wrap_in(h2)://span[@class='subhead'] | ||
6 | wrap_in(i)://p[@class='bio'] | ||
7 | wrap_in(i)://p[@class='copyright'] | ||
8 | strip://div[@class='title'] | ||
9 | strip://div[@class='deck'] | ||
10 | strip://div[@class='byline'] | ||
11 | strip://div[@class='copyright'] | ||
12 | strip://br | ||
13 | test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/christianpf.com.txt b/inc/3rdparty/site_config/standard/christianpf.com.txt new file mode 100644 index 00000000..7f089c55 --- /dev/null +++ b/inc/3rdparty/site_config/standard/christianpf.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | author: //*[@class="author vcard fn"] | ||
3 | date: //*[@class="published"] | ||
4 | body: //div[(@class = "dd_content_wrap")] | ||
5 | test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/christies.com.txt b/inc/3rdparty/site_config/standard/christies.com.txt new file mode 100644 index 00000000..5c5889a2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/christies.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | date: //article//time[@pubdate] | ||
4 | title: //article/header/h2 | ||
5 | body: //article | ||
6 | test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chrome.google.com.txt b/inc/3rdparty/site_config/standard/chrome.google.com.txt new file mode 100644 index 00000000..d4cc8581 --- /dev/null +++ b/inc/3rdparty/site_config/standard/chrome.google.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //pre[@id='cx-desc-text'] | ||
2 | body: //div[contains(@class, 'overview-tab-right-bar-info')] | ||
3 | title: //h1[contains(@class, 'detail-dialog-title')] | ||
4 | tidy: no | ||
5 | prune: no | ||
6 | replace_string(<noscript>): <div> | ||
7 | replace_string(</noscript>): </div> | ||
8 | |||
9 | test_url: https://chrome.google.com/webstore/detail/pnaiinchjaonopoejhknmgjingcnaloc \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/chronicle.com.txt b/inc/3rdparty/site_config/standard/chronicle.com.txt new file mode 100644 index 00000000..0c6c11ed --- /dev/null +++ b/inc/3rdparty/site_config/standard/chronicle.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | title: //h1[contains(@class, "entry-title")] | ||
2 | author: //p[contains(@class, "byline")] | ||
3 | |||
4 | # blog articles (chronicle.com/blogs/*) | ||
5 | body: //div[contains(@class, "abstract")] | ||
6 | date: //p[contains(@class, "time")] | ||
7 | |||
8 | # all (?) other articles | ||
9 | body: //div[@id="article-body"] | ||
10 | date: //p[contains(@class, "dateline")] | ||
11 | |||
12 | # remove sidebars containing images (I assume this is desired for Instapaper) | ||
13 | strip: //div[@id="related"] | ||
14 | strip: //div[contains(@class, "image")] | ||
15 | |||
16 | # note that if you're not a Chronicle subscriber (personally or institutionally), you'll only see the first couple of paragraphs of the article, and Instapaper will display that with some crap above and below. thank goodness for that bookmarklet | ||
17 | test_url: http://chronicle.com/article/In-a-Land-of-Second-Chances/128375/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cicero.de.txt b/inc/3rdparty/site_config/standard/cicero.de.txt new file mode 100644 index 00000000..b9f9a12b --- /dev/null +++ b/inc/3rdparty/site_config/standard/cicero.de.txt | |||
@@ -0,0 +1,33 @@ | |||
1 | # fforst@... | ||
2 | |||
3 | # Use link to print article for single page view | ||
4 | single_page_link: //a[@class="print"] | ||
5 | |||
6 | # set body | ||
7 | tidy: no | ||
8 | body: //div[@class='artikel-content'] | ||
9 | |||
10 | # strip title and subtitle since we got it already | ||
11 | strip: //div[@class='issue'] | ||
12 | strip: //div[@class='artikel-content']/h2 | ||
13 | |||
14 | # some authors are known and have a link, others don't | ||
15 | author: //a[contains(@href, 'autor?')] | ||
16 | |||
17 | #date | ||
18 | date: //span[@class='article-date'] | ||
19 | |||
20 | # Strip author since we got him | ||
21 | strip_id_or_class: author | ||
22 | |||
23 | #strip captions | ||
24 | strip_id_or_class: field-name-field-image-credit | ||
25 | strip_id_or_class: field-name-field-article-image-subtitle | ||
26 | |||
27 | # remove community functions | ||
28 | strip: //div[@class='meta'] | ||
29 | strip: //div[@id='comments'] | ||
30 | |||
31 | # remove "continue on the next page" text | ||
32 | strip: //p[text()="[SEITE]"] | ||
33 | test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ciperchile.cl.txt b/inc/3rdparty/site_config/standard/ciperchile.cl.txt new file mode 100644 index 00000000..4d3ac804 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ciperchile.cl.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //*[(@id = "articlebody")] | ||
2 | strip_id_or_class: rotulo | ||
3 | |||
4 | test_url: http://ciperchile.cl/2011/04/18/las-operaciones-secretas-que-ordenaba-karadima-para-aniquilar-a-su-competencia/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cjr.org.txt b/inc/3rdparty/site_config/standard/cjr.org.txt new file mode 100644 index 00000000..a0c3ea5d --- /dev/null +++ b/inc/3rdparty/site_config/standard/cjr.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //p[@class='subhead' or @class='attribution'] | //div[@class='article-body'] | ||
2 | prune: no | ||
3 | |||
4 | single_page_link: //li[@class='print']/a | ||
5 | |||
6 | test_url: http://www.cjr.org/behind_the_news/from_breaking_news_to_baseless.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/clientk.com.txt b/inc/3rdparty/site_config/standard/clientk.com.txt new file mode 100644 index 00000000..369e88ad --- /dev/null +++ b/inc/3rdparty/site_config/standard/clientk.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title://div[@class="entrytitle"]/a | ||
2 | author:substring-after(substring-before(//div[@class="entrytime"], "|"), "By ") | ||
3 | date:substring-before(substring-after(//div[@class="entrytime"], "|"), "- Posted") | ||
4 | body://div[@class="entrybody"] | ||
5 | strip://div[@class="entrybody"]//p[@class="singleinfo"] | ||
6 | test_url: http://clientk.com/2011/12/19/the-impact-of-more/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/clubic.com.txt b/inc/3rdparty/site_config/standard/clubic.com.txt new file mode 100644 index 00000000..b356bbdf --- /dev/null +++ b/inc/3rdparty/site_config/standard/clubic.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h1 | ||
2 | author: //a[@class='auteur'] | ||
3 | body: //div[@class='editorial'] | ||
4 | next_page_link: //a[contains(text(),'Page suivante')] | ||
5 | strip: //a[contains(text(),'Page suivante')] | ||
6 | strip: //a[contains(text(),'Page précédente')] | ||
7 | strip_id_or_class: slideshow | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | test_url: http://www.clubic.com/carte-graphique/carte-graphique-amd/radeon-hd-7770/article-478936-1-radeon-hd-7750-7770.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cmswire.com.txt b/inc/3rdparty/site_config/standard/cmswire.com.txt new file mode 100644 index 00000000..2bc96d2e --- /dev/null +++ b/inc/3rdparty/site_config/standard/cmswire.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[contains(@id,'article-body')] | ||
2 | strip://div[contains(@id,'disqus_count_block')] | ||
3 | strip://div[contains(@id,'col-left')] | ||
4 | strip://div[contains(@id,'col-right')] | ||
5 | |||
6 | test_url: http://www.cmswire.com/cms/customer-experience/for-apps-and-appstores-the-singularity-is-approaching-014888.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cnet.com.txt b/inc/3rdparty/site_config/standard/cnet.com.txt new file mode 100644 index 00000000..74f46ba9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cnet.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[contains(@class, 'postBody')] | ||
3 | date: //div[@id='nameAndTime']/time | ||
4 | author: //div[@id='nameAndTime']/span[@class='author'] | ||
5 | |||
6 | strip_id_or_class: image-credit | ||
7 | strip_id_or_class: noAutolink | ||
8 | strip_id_or_class: related | ||
9 | |||
10 | prune: no | ||
11 | tidy: no | ||
12 | |||
13 | # early end | ||
14 | replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html> | ||
15 | |||
16 | test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cnn.com.txt b/inc/3rdparty/site_config/standard/cnn.com.txt new file mode 100644 index 00000000..995e2c79 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cnn.com.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | title: //div[@class="cnn_storyarea"]/h1 | ||
2 | author: //div[@class="cnnByline"]/strong | ||
3 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Sun') | ||
4 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Mon') | ||
5 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Tue') | ||
6 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Wed') | ||
7 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Thu') | ||
8 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Fri') | ||
9 | date: substring-after(//div[@class="cnn_strytmstmp"], 'Sat') | ||
10 | strip: //div[@class="cnn_storyarea"]/h1 | ||
11 | strip_id_or_class: cnnByline | ||
12 | strip_id_or_class: cnn_strytmstmp | ||
13 | strip_id_or_class: cnn_strycaptiontxt | ||
14 | strip_id_or_class: cnn_strybtntoolsbttm | ||
15 | strip_id_or_class: cnn_strybtntools | ||
16 | strip_id_or_class: cnn_strybtmcntnt | ||
17 | strip_id_or_class: cnn_containerwht | ||
18 | strip_id_or_class: cnn_stryathrtmp | ||
19 | test_url: http://www.cnn.com/2012/05/13/us/new-york-police-policy/index.html?eref=rss_topstories \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cnnsi.com.txt b/inc/3rdparty/site_config/standard/cnnsi.com.txt new file mode 100644 index 00000000..6a2c2b80 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cnnsi.com.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | # main sportsillustrated.com articles | ||
2 | |||
3 | body: //div[@id="cnnStoryContent"] | ||
4 | title: //div[@id="cnnStoryHeadline"]//h1 | ||
5 | author: //div[@id="cnnSubBanner"]//strong | ||
6 | date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") | ||
7 | date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") | ||
8 | |||
9 | # kill ugly font buttons | ||
10 | strip: //div[@id="cnnSCFontButtons"] | ||
11 | |||
12 | # kill misc filler videos & etc | ||
13 | strip: //div[@class="cnnDivideContent"] | ||
14 | strip: //*[@class="cnnTMbox"] | ||
15 | |||
16 | # si vault articles | ||
17 | # ------------- | ||
18 | body: //div[@class="siv_artPara"] | ||
19 | title: //div[@class="siv_artHeader"]//h1 | ||
20 | author: //div[@class="byline"] | ||
21 | date: //div[@class="date"] | ||
22 | |||
23 | next_page_link: //div[@id='cnnStoryContinue']/a | ||
24 | strip_id_or_class: cnnstorypagination | ||
25 | |||
26 | test_url: http://cnnsi.com/2012/writers/peter_king/01/08/wild.card.round/index.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/code.activestate.com.txt b/inc/3rdparty/site_config/standard/code.activestate.com.txt new file mode 100644 index 00000000..6cf72e23 --- /dev/null +++ b/inc/3rdparty/site_config/standard/code.activestate.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@id='content'] | ||
2 | title: //div[@id='page_header']/h1 | ||
3 | |||
4 | strip_id_or_class: 'lineno' | ||
5 | strip_id_or_class: 'block-toolbar-button' | ||
6 | strip_id_or_class: 'recipe_score' | ||
7 | strip: //div[@id='recipe_tools'] | ||
8 | strip: //div[@id='addcomment'] | ||
9 | |||
10 | test_url: http://code.activestate.com/recipes/500261-named-tuples/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/code.google.com.txt b/inc/3rdparty/site_config/standard/code.google.com.txt new file mode 100644 index 00000000..40a16209 --- /dev/null +++ b/inc/3rdparty/site_config/standard/code.google.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@id="gc-pagecontent"] | ||
2 | strip: //a[@class="backtotop"] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://code.google.com/apis/analytics/docs/tracking/gaTrackingEcommerce.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/codinghorror.com.txt b/inc/3rdparty/site_config/standard/codinghorror.com.txt new file mode 100644 index 00000000..9c95f107 --- /dev/null +++ b/inc/3rdparty/site_config/standard/codinghorror.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | body: //div[@class='blogbody'] | ||
2 | strip: //h3[@class='title'] | ||
3 | date: //h2[@class='date'] | ||
4 | #Should Atwood just be a literal? | ||
5 | author: substring-before( substring-after(//div[@class='posted'], 'y'), 'V') | ||
6 | |||
7 | # tim.kingman@... 2011-07-26 | ||
8 | # Prune:no to retain all-link ULs that are part of the body content like | ||
9 | # http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html | ||
10 | # Then explicitly strip the "Posted By" and prev/next links that Prune:yes would have removed. | ||
11 | |||
12 | prune: no | ||
13 | strip: //div[@class='posted']/following-sibling::* | ||
14 | strip: //div[@class='posted'] | ||
15 | test_url: http://www.codinghorror.com/blog/2011/07/building-a-pc-part-vii-rebooting.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/collegehumor.com.txt b/inc/3rdparty/site_config/standard/collegehumor.com.txt new file mode 100644 index 00000000..9d75d641 --- /dev/null +++ b/inc/3rdparty/site_config/standard/collegehumor.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //h1[@class='title'] | ||
2 | author: //p[@class='byline']/a[1] | ||
3 | date: //*[@class='date'] | ||
4 | |||
5 | body: //div[@class='article_body'] | ||
6 | strip: //p[@class='ca_intro'] | ||
7 | strip: //div[@id='action_bar'] | ||
8 | strip: //div[@class='below_content'] | ||
9 | strip: //div[@id='announcement'] | ||
10 | strip: //div[@id='leftovers'] | ||
11 | strip: //div[@class='form'] | ||
12 | strip: //div[@id='email_overlay'] | ||
13 | strip: //a[@class='close'] | ||
14 | test_url: http://www.collegehumor.com/article/6599562/how-it-happened-the-necktie \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt b/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt new file mode 100644 index 00000000..800a907d --- /dev/null +++ b/inc/3rdparty/site_config/standard/communities-dominate.blogs.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class="entry-body"] | ||
2 | test_url: http://communities-dominate.blogs.com/brands/2012/03/brutal-truth-about-lumia-cannot-sustain-even-1-to-1-replacement-of-symbian-windows-phone-strategy-do.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/community.service-now.com.txt b/inc/3rdparty/site_config/standard/community.service-now.com.txt new file mode 100644 index 00000000..10fd2516 --- /dev/null +++ b/inc/3rdparty/site_config/standard/community.service-now.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | body: //div[@id="center"]//div[@class="node"] | ||
2 | title: //div[@id="center"]//h2 | ||
3 | author: substring-after(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "—") | ||
4 | date: substring-before(//div[@id="center"]//div[@class="node"]//span[@class="submitted"], "—") | ||
5 | strip: //div[@id="center"]//h2[1] | ||
6 | strip: //span[@class="submitted"][1] | ||
7 | move_into(//div[@class="node"])://div[@class="breadcrumb"] | ||
8 | test_url: http://community.service-now.com/blog/lawrenceeng/seasons-greetings-servicenow-team \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/computer.org.txt b/inc/3rdparty/site_config/standard/computer.org.txt new file mode 100644 index 00000000..00e6fddf --- /dev/null +++ b/inc/3rdparty/site_config/standard/computer.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | strip_id_or_class:column-3 | ||
2 | strip_id_or_class:portlet-boundary | ||
3 | strip_id_or_class:banner | ||
4 | |||
5 | test_url: http://www.computer.org/portal/web/buildyourcareer/careerwatch/jt19 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/computerbase.de.txt b/inc/3rdparty/site_config/standard/computerbase.de.txt new file mode 100644 index 00000000..29199242 --- /dev/null +++ b/inc/3rdparty/site_config/standard/computerbase.de.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | title://h1 | ||
2 | |||
3 | author://div[@id="news-meta"]/a | ||
4 | |||
5 | body://*[@id="main"]/div[1] | ||
6 | |||
7 | strip://*[@id="main"]/div[2] | ||
8 | strip://*[@id="main"]/div[3] | ||
9 | strip://*[@id="page"]//footer | ||
10 | |||
11 | #date: didn't manage to parse it | ||
12 | |||
13 | #Images have to be stripped because the page does it with overlay | ||
14 | strip://img | ||
15 | |||
16 | #figures are not displayed in instapaper... | ||
17 | strip://figure | //figcaption | ||
18 | test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/computerworld.com.txt b/inc/3rdparty/site_config/standard/computerworld.com.txt new file mode 100644 index 00000000..8e1f3e11 --- /dev/null +++ b/inc/3rdparty/site_config/standard/computerworld.com.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | title: //meta[@name='headline']/@content | ||
2 | date: //meta[@name='date']/@content | ||
3 | author: //meta[@name='author']/@content | ||
4 | body: //div[contains(@class, 'article')] | ||
5 | body://div[@id="article_body"] | ||
6 | |||
7 | strip_id_or_class: banner | ||
8 | strip: //noscript | ||
9 | strip: //div[@style='width:1px;height:130px;float:right;'] | ||
10 | strip: //div[@class='storyby'] | ||
11 | strip_image_src: twitter_icon | ||
12 | strip_image_src: rss_bug | ||
13 | |||
14 | tidy: no | ||
15 | prune: no | ||
16 | |||
17 | next_page_link://div[@id="next_page"]/a | ||
18 | |||
19 | single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/')) | ||
20 | |||
21 | test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware | ||
22 | test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/computerworld.dk.txt b/inc/3rdparty/site_config/standard/computerworld.dk.txt new file mode 100644 index 00000000..a83f366f --- /dev/null +++ b/inc/3rdparty/site_config/standard/computerworld.dk.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | strip: //div[contains(@class, 'articleAdtechAd')] | ||
2 | title: //div[@id='article']/h1 | ||
3 | title: //div[contains(@class, 'article')]/h1 | ||
4 | body: //div[@id='articleText'] | ||
5 | test_url: http://www.computerworld.dk/art/56748/test-din-viden-med-computerworlds-store-sommerquiz?a=fp_1&i=0 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/contemporist.com.txt b/inc/3rdparty/site_config/standard/contemporist.com.txt new file mode 100644 index 00000000..d2b289a3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/contemporist.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | # get author from string like "Posted by <author> on <date>" | ||
2 | author: substring-before(substring-after(//div[@class='post']/p[@class='post-meta'], 'by'), 'on') | ||
3 | |||
4 | # get date from string like "Posted by <author> on <date>" | ||
5 | date: substring-after(//div[@class='post']/p[@class='post-meta'], 'on') | ||
6 | |||
7 | # this keeps thumbnail images | ||
8 | prune: no | ||
9 | test_url: http://www.contemporist.com/2011/11/02/landing-200-lamp-by-kim-hyunjoo \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt new file mode 100644 index 00000000..9bad2c84 --- /dev/null +++ b/inc/3rdparty/site_config/standard/conversaciones.nokia.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@class='article_header']/h1 | ||
2 | body: //div[@class='article_header']/p | //div[@class='article_body'] | ||
3 | strip_id_or_class: share_this | ||
4 | strip_id_or_class: sociable | ||
5 | prune: no | ||
6 | |||
7 | test_url: http://conversaciones.nokia.com/2011/10/07/cinco-atajos-en-el-nokia-n8/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/core77.com.txt b/inc/3rdparty/site_config/standard/core77.com.txt new file mode 100644 index 00000000..a24374d8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/core77.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@id="permalink"]/div[@class="post"] | ||
2 | |||
3 | strip: //div[@id='backArrow'] | ||
4 | strip: //div[@id='fwdArrow'] | ||
5 | strip: //div[@class="post-title"] | ||
6 | strip: //div[@class="sharing"] | ||
7 | test_url: http://www.core77.com/blog/columns/why_design_education_must_change_17993.asp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/counterpunch.org.txt b/inc/3rdparty/site_config/standard/counterpunch.org.txt new file mode 100644 index 00000000..c9e92287 --- /dev/null +++ b/inc/3rdparty/site_config/standard/counterpunch.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class='main']//h1[contains(@class, 'article-title')] | ||
2 | author: //div[@class='mainauthorstyle'] | ||
3 | body: //div[@class='main']//div[@class='main-text'] | ||
4 | strip: //td[@width='140'] | ||
5 | |||
6 | test_url: http://www.counterpunch.org/johnstone05172011.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/crazybutable.com.txt b/inc/3rdparty/site_config/standard/crazybutable.com.txt new file mode 100644 index 00000000..d25cd05d --- /dev/null +++ b/inc/3rdparty/site_config/standard/crazybutable.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title://h2 | ||
2 | body://div[contains(@class, 'entrytext')] | ||
3 | test_url: http://www.crazybutable.com/weblog/archives/2010/07/01/house-ideas-that-worked/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/crimemagazine.com.txt b/inc/3rdparty/site_config/standard/crimemagazine.com.txt new file mode 100644 index 00000000..9cf0bccc --- /dev/null +++ b/inc/3rdparty/site_config/standard/crimemagazine.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | autodetect_next_page: no | ||
2 | test_url: http://www.crimemagazine.com/son-sam \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/crimethinc.com.txt b/inc/3rdparty/site_config/standard/crimethinc.com.txt new file mode 100644 index 00000000..74bc6db9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/crimethinc.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class="readingtext"] | ||
2 | title: substring-after(substring-after(//title, ':'), ':') | ||
3 | test_url: http://www.crimethinc.com/texts/recentfeatures/nightmares.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/crn.de.txt b/inc/3rdparty/site_config/standard/crn.de.txt new file mode 100644 index 00000000..7fa950af --- /dev/null +++ b/inc/3rdparty/site_config/standard/crn.de.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | author: //p[contains(@class,'author')]/a | ||
2 | date: //div[contains(@class,'date')] | ||
3 | test_url: http://www.crn.de/netzwerke-tk/artikel-93103.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/csmonitor.com.txt b/inc/3rdparty/site_config/standard/csmonitor.com.txt new file mode 100644 index 00000000..d4dbc5c8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/csmonitor.com.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | title: //h1[contains(@class, 'head')] | ||
2 | |||
3 | # standard page | ||
4 | body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')] | ||
5 | # print page | ||
6 | body: //div[@id='mainColumn'] | ||
7 | |||
8 | author: //a[contains(@class, 'ui-author')] | ||
9 | |||
10 | single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')] | ||
11 | |||
12 | strip_id_or_class: storyToolbar | ||
13 | strip_id_or_class: promotion-tag | ||
14 | |||
15 | tidy: no | ||
16 | prune: no | ||
17 | |||
18 | test_url: www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/csnbayarea.com.txt b/inc/3rdparty/site_config/standard/csnbayarea.com.txt new file mode 100644 index 00000000..131a923b --- /dev/null +++ b/inc/3rdparty/site_config/standard/csnbayarea.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@id='csn_blogST_headline']/h1 | ||
2 | |||
3 | body: //div[@id='csn_blogST_main'] | ||
4 | strip_id_or_class: ipfootnotes | ||
5 | strip: //div[@id='csn_blogST_main']/p[1]/img | ||
6 | strip: //div[@id='csn_blogST_sidebar'] | ||
7 | test_url: http://www.csnbayarea.com/blog/giants-talk/post/-?blog%2Fgiants-talk%2Fpost%2F-=&blockID=578902&feedID=5987 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/csnphilly.com.txt b/inc/3rdparty/site_config/standard/csnphilly.com.txt new file mode 100644 index 00000000..0df72c32 --- /dev/null +++ b/inc/3rdparty/site_config/standard/csnphilly.com.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | # author's name is not isolated as a tag.... ugh | ||
2 | convert_double_br_tags: yes | ||
3 | body: //csn_blogST_main | ||
4 | |||
5 | #junk above and around the article | ||
6 | strip: /html/body/div[4]/div[3]/div/div/div/section/div/div/div/div/div/div | ||
7 | strip: /html/body/div[4]/header | ||
8 | strip_id_or_class: article-right-sidebar | ||
9 | strip_id_or_class: rsn-gigya-sharebar-container | ||
10 | strip_id_or_class: article-bottom | ||
11 | strip_id_or_class: hider | ||
12 | strip_id_or_class: footer | ||
13 | strip_id_or_class: masthead | ||
14 | strip_id_or_class: block-menu-menu-rsn-login-or-register | ||
15 | strip_id_or_class: block-menu-menu-header-links | ||
16 | strip_id_or_class: block-rsn-follow-bar-follow-bar | ||
17 | strip_id_or_class: block-rsn-weather-rsn-weather-scoreboard | ||
18 | strip_id_or_class: logo | ||
19 | strip_id_or_class: element-invisible | ||
20 | strip_id_or_class: site-name | ||
21 | strip: //div[contains(@style, 'none')] | ||
22 | test_url: http://www.csnphilly.com/eagles/can-stoutland-save-danny-watkins-career \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/cucharasonica.com.txt b/inc/3rdparty/site_config/standard/cucharasonica.com.txt new file mode 100644 index 00000000..e691fe83 --- /dev/null +++ b/inc/3rdparty/site_config/standard/cucharasonica.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://cucharasonica.com/2011/09/queen-busca-candidatos-para-su-propia-banda-tributo \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/da.feedsportal.com.txt b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt new file mode 100644 index 00000000..4a00ef44 --- /dev/null +++ b/inc/3rdparty/site_config/standard/da.feedsportal.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | single_page_link: //a | ||
2 | tidy: no | ||
3 | prune: no | ||
4 | |||
5 | test_url: da.feedsportal.com/c/585/f/413794/s/17037b5a/l/0L0Stelegraaf0Bnl0Cbinnenland0C10A2757860C0I0IKlacht0Itegen0Idr0B0IFrank0Iniet0I0Eontvankelijk0I0I0Bhtml0Dcid0Frss/ia1.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dailydot.com.txt b/inc/3rdparty/site_config/standard/dailydot.com.txt new file mode 100644 index 00000000..61013993 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dailydot.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | tidy: no | ||
2 | body: //article | ||
3 | |||
4 | test_url: http://www.dailydot.com/entertainment/tumblr-christopher-price-topherchris/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dailykos.com.txt b/inc/3rdparty/site_config/standard/dailykos.com.txt new file mode 100644 index 00000000..124675cb --- /dev/null +++ b/inc/3rdparty/site_config/standard/dailykos.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@id='article-1']//div[contains(@class, 'article-body')] | ||
2 | title: //div[@class='meta']//a[@id='titleHref'] | ||
3 | date: //div[@class='meta']//p[@class='date'] | ||
4 | |||
5 | strip_id_or_class: invisible | ||
6 | strip_id_or_class: divider-doodle | ||
7 | |||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.dailykos.com/story/2012/01/26/1058790/-Newt-Gingrichs-campaign-admits-he-lied-during-debate-about-ABC-News-interview-with-his ex-wife \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dailymail.co.uk.txt b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt new file mode 100644 index 00000000..c83dbdb0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dailymail.co.uk.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body: //div[@id='js-article-text'] | ||
2 | strip: //div[@class='explore-links'] | ||
3 | strip: //div[@id='js-article-text']/br[position()=1] | ||
4 | strip_id_or_class: print-or-mail-links | ||
5 | strip_id_or_class: shareArticles | ||
6 | strip_id_or_class: googleAds | ||
7 | strip_id_or_class: digg-button | ||
8 | strip_id_or_class: article-icon-links-container | ||
9 | strip_id_or_class: clickToEnlarge | ||
10 | tidy: no | ||
11 | |||
12 | test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dansdata.com.txt b/inc/3rdparty/site_config/standard/dansdata.com.txt new file mode 100644 index 00000000..96a2bc41 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dansdata.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | autodetect_next_page: no | ||
2 | tidy: no | ||
3 | prune: no | ||
4 | body: //div[@class='NoOverflow'] | ||
5 | test_url: http://www.dansdata.com/gz129.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/daringfireball.net.txt b/inc/3rdparty/site_config/standard/daringfireball.net.txt new file mode 100644 index 00000000..dca8ade7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/daringfireball.net.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@class="article"]/h1 | ||
2 | author: //div[@id="Sidebar"]/p/strong | ||
3 | date: //h6[@class="dateline"] | ||
4 | body: //div[@class="article"] | ||
5 | strip: //h6[@class="dateline"] | ||
6 | strip: //div[@class="article"]/h1 | ||
7 | test_url: http://daringfireball.net/2011/10/apps_are_the_new_channels \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/datanami.com.txt b/inc/3rdparty/site_config/standard/datanami.com.txt new file mode 100644 index 00000000..3534002a --- /dev/null +++ b/inc/3rdparty/site_config/standard/datanami.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@id="article"] | ||
2 | date: //p[@class="date"] | ||
3 | author: //p[@class="byline"] | ||
4 | test_url: http://www.datanami.com/datanami/2011-12-07/new_path_for_sap:_in_memory_computing,_predictive_analysis_converge.html?featured=top \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dcurt.is.txt b/inc/3rdparty/site_config/standard/dcurt.is.txt new file mode 100644 index 00000000..7d11c6e1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dcurt.is.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: (//article//h2)[1] | ||
2 | body: //article[contains(@class, 'post')] | ||
3 | date: //time[@id='top_time']/@datetime | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | test_url: http://dcurt.is/predictions-txt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/delong.typepad.com.txt b/inc/3rdparty/site_config/standard/delong.typepad.com.txt new file mode 100644 index 00000000..84fd4f79 --- /dev/null +++ b/inc/3rdparty/site_config/standard/delong.typepad.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | strip_id_or_class: banner | ||
2 | strip_id_or_class: gamma | ||
3 | strip_id_or_class: module-list | ||
4 | test_url: http://delong.typepad.com/sdj/2011/02/in-which-suresh-naidu-visits-the-new-jerusalem.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/derstandard.at.txt b/inc/3rdparty/site_config/standard/derstandard.at.txt new file mode 100644 index 00000000..48722ebd --- /dev/null +++ b/inc/3rdparty/site_config/standard/derstandard.at.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //div[@id='artikelHeader']/h1 | ||
2 | author: //span[@class='author'] | ||
3 | date: //span[@class='date'] | ||
4 | body: //div[@class='copytext'] | ||
5 | strip: //ul[@class='lookupLinksArtikel'] | ||
6 | |||
7 | strip: //div[@id='pageTop'] | ||
8 | strip: //div[@id='toolbar'] | ||
9 | strip: //div[@id='articleTools'] | ||
10 | strip: //div[@id='weiterlesen'] | ||
11 | strip: //div[@id='communityCanvas'] | ||
12 | |||
13 | test_url: http://derstandard.at/1318726018343/Breitband-LTE-Was-bringt-die-neue-Mobilfunk-Generation \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/designtagebuch.de.txt b/inc/3rdparty/site_config/standard/designtagebuch.de.txt new file mode 100644 index 00000000..6096db0b --- /dev/null +++ b/inc/3rdparty/site_config/standard/designtagebuch.de.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | tidy: no | ||
2 | body: //div[@class='main'] | ||
3 | |||
4 | author: substring-before(substring-after(//div[@class='meta-single'], 'erstellt von '), ' am') | ||
5 | date: substring-before(substring-after(//div[@class='meta-single'], ' am '), ' | ') | ||
6 | |||
7 | strip_id_or_class: pagelink | ||
8 | strip_id_or_class: wp-polls | ||
9 | |||
10 | next_page_link: //div[@class='post-page-next']/a | ||
11 | test_url: http://www.designtagebuch.de/die-gefuehlte-lesbarkeit/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/desitvforum.net.txt b/inc/3rdparty/site_config/standard/desitvforum.net.txt new file mode 100644 index 00000000..a6dac5fd --- /dev/null +++ b/inc/3rdparty/site_config/standard/desitvforum.net.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
2 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
3 | |||
4 | prune: no | ||
5 | tidy: no \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/details.com.txt b/inc/3rdparty/site_config/standard/details.com.txt new file mode 100644 index 00000000..548cabad --- /dev/null +++ b/inc/3rdparty/site_config/standard/details.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1[@class="content-headline"] | ||
2 | body: //div[@class="headers-container"] | //div[@class="content-container"] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | single_page_link: //li[@class='utility-print']/a | ||
7 | |||
8 | test_url: http://www.details.com/culture-trends/critical-eye/201108/best-new-designers-innovations \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/developers.facebook.com.txt b/inc/3rdparty/site_config/standard/developers.facebook.com.txt new file mode 100644 index 00000000..43a8f0a0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/developers.facebook.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //div[@class="bodyText"]/h1 | ||
2 | author: //div[@class="picture"]/a/img/@alt | ||
3 | test_url: https://developers.facebook.com/blog/post/2012/03/22/developer-spotlight--foodspotting/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt new file mode 100644 index 00000000..b960b37e --- /dev/null +++ b/inc/3rdparty/site_config/standard/devlinsangle.blogspot.co.at.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | date: //h2[@class='date-header'] | ||
2 | body: //div[@class='post hentry'] | ||
3 | title: //h3 | ||
4 | strip: //div[@class='post-footer'] | ||
5 | |||
6 | test_url: http://devlinsangle.blogspot.co.at/2012/03/difference-between-teaching-and_01.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dictionary.reference.com.txt b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt new file mode 100644 index 00000000..a1172024 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dictionary.reference.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1[@id='query_h1'] | ||
2 | body: //div[contains(@class, 'lunatext results_content')] | ||
3 | strip_id_or_class: spl_unshd | ||
4 | #replace_string(<div class="dicTl">): <div class="dicTl">------------------<br /> | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/diepresse.com.txt b/inc/3rdparty/site_config/standard/diepresse.com.txt new file mode 100644 index 00000000..7e825a91 --- /dev/null +++ b/inc/3rdparty/site_config/standard/diepresse.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class='article']/h1 | ||
2 | date: substring-before(//p[@class='articletime'],'|') | ||
3 | body: //div[@id='articletext'] | ||
4 | strip: //div[@class='inlineDiashow'] | ||
5 | |||
6 | test_url: http://diepresse.com/home/politik/aussenpolitik/701905/TibeterProteste_Nonne-verbrennt-sich-selbst?_vl_backlink=/home/politik/index.do \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt new file mode 100644 index 00000000..2d2ae2c2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/digiphoto.techbang.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | # default parser works great | ||
2 | # only add "author" and "next page link" reference | ||
3 | # 2012-04-13 | ||
4 | |||
5 | next_page_link: //div[@class = 'pagination']/a[@class = 'next_page'] | ||
6 | |||
7 | author: //*[@class = 'author metadata']/a | ||
8 | test_url: http://digiphoto.techbang.com/posts/2433--commercial-photography-communication-is-the-key-to-a-good-work \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/digital-photography-school.com.txt b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt new file mode 100644 index 00000000..37192ac0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/digital-photography-school.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class='post-title']/h1 | ||
2 | author: //a[@href='#author'] | ||
3 | body: //div[@class='post-content'] | ||
4 | strip: //div[@class='post-meta'] | ||
5 | |||
6 | test_url: http://www.digital-photography-school.com/10-ways-to-develop-yourself-photographically \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt new file mode 100644 index 00000000..b21431d7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/digitalspy.co.uk.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@class="article_header"]/h1 | ||
2 | date: //div[@class="article_pub"]/span[@class="time"] | ||
3 | author: //div[@class="article_pub"]/span[@class="editors"]/a/text() | ||
4 | body: //div[@class="article_body clear_left"] | ||
5 | test_url: http://www.digitalspy.co.uk/movies/at-the-movies/a364066/top-5-super-bowl-movie-trailers-the-avengers-battleship-more.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dilbert.com.txt b/inc/3rdparty/site_config/standard/dilbert.com.txt new file mode 100644 index 00000000..413e5506 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dilbert.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | convert_double_br_tags: yes | ||
2 | |||
3 | title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10) | ||
4 | body: //*[contains(@class, 'SB_Content')] | ||
5 | author: string('Scott Adams') | ||
6 | date: //*[contains(@class, 'SB_Detail')]/text()[1] | ||
7 | |||
8 | test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dinamalar.com.txt b/inc/3rdparty/site_config/standard/dinamalar.com.txt new file mode 100644 index 00000000..9ef198c9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dinamalar.com.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | title: //div[@class='newsdetbd'] | ||
2 | body: //div[@id='innerleft'] | ||
3 | #//p[@class = 'plnht'] | ||
4 | strip_image_src: /albums/ | ||
5 | strip: //div[@class='mrrt'] | ||
6 | prune: yes | ||
7 | strip_id_or_class: 'fdpd' | ||
8 | strip_id_or_class: 'epapt' | ||
9 | strip_id_or_class: 'newsrtwd' | ||
10 | strip_id_or_class: 'padtp' | ||
11 | strip_id_or_class: 'newdt' | ||
12 | strip_id_or_class: 'newdlt' | ||
13 | strip: //div[@id='selNotes'] | ||
14 | strip_id_or_class: 'clsNotes' | ||
15 | strip_id_or_class: 'clear' | ||
16 | strip_id_or_class: 'cmtwrap' | ||
17 | strip_id_or_class: 'sess' | ||
18 | strip_id_or_class: 'parents' | ||
19 | test_url: http://www.dinamalar.com/News_Detail.asp?Id=295725 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dn.se.txt b/inc/3rdparty/site_config/standard/dn.se.txt new file mode 100644 index 00000000..86bb3b8d --- /dev/null +++ b/inc/3rdparty/site_config/standard/dn.se.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | # Since this element has class="clear", the Instapaper stylesheets (at least this text parser preview), will render it unreadable, with a 1px font size and line height. | ||
2 | |||
3 | body: //div[@id="article-content"] | ||
4 | |||
5 | |||
6 | # Ads | ||
7 | strip_id_or_class: advert-space | ||
8 | |||
9 | # Read more, recommend, comments etc | ||
10 | strip_id_or_class: fbc-recommend | ||
11 | strip_id_or_class: recommend | ||
12 | strip_id_or_class: article-readers | ||
13 | strip_id_or_class: article-addons | ||
14 | strip_id_or_class: hook | ||
15 | strip_id_or_class: right | ||
16 | strip_id_or_class: footer | ||
17 | |||
18 | # Other news | ||
19 | strip: //div[@id="mirrors"] | ||
20 | |||
21 | # Author | ||
22 | author: //div[@id="byline"]/div/p/strong | ||
23 | |||
24 | # Date | ||
25 | date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11) | ||
26 | test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/doctac.com.txt b/inc/3rdparty/site_config/standard/doctac.com.txt new file mode 100644 index 00000000..9f65ea9b --- /dev/null +++ b/inc/3rdparty/site_config/standard/doctac.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | strip: //*[(@id = "featured")] | ||
2 | |||
3 | author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') | ||
4 | |||
5 | date: concat(//div[@class='month'],' ',//div[@class='day']) | ||
6 | |||
7 | #doctac doesn't provide a year, but month/day is better than nothing | ||
8 | test_url: http://www.doctac.com/mac/iphone/instapaper-update-app/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/domusweb.it.txt b/inc/3rdparty/site_config/standard/domusweb.it.txt new file mode 100644 index 00000000..81683f02 --- /dev/null +++ b/inc/3rdparty/site_config/standard/domusweb.it.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | # TODO: clean up the extra junk at the end of articles | ||
2 | |||
3 | # general text formatting | ||
4 | prune: no | ||
5 | convert_double_br_tags:yes | ||
6 | |||
7 | # where to find the basic metadata | ||
8 | author://a[@class='articleauthor'] | ||
9 | date://a[starts-with(@href,'/en/search/published/')] | ||
10 | title:substring-before(//h2[@class='title'],'—') | ||
11 | body://div[@id='maincontainer'] | ||
12 | |||
13 | dissolve://div[starts-with(@id,'commentableblock')] | ||
14 | |||
15 | # clean up the crap | ||
16 | strip://div[contains(@class,'domusnetwork')] | ||
17 | strip://div[contains(@class,'relative_wrapper')] | ||
18 | |||
19 | strip://div[contains(@class,'captionsubimage')]/img[contains(@class,'arrow')] | ||
20 | wrap_in(em): //div[contains(@class,'captionsubimage')]/span | ||
21 | test_url: http://www.domusweb.it/en/design/in-praise-of-lost-time/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dou.ua.txt b/inc/3rdparty/site_config/standard/dou.ua.txt new file mode 100644 index 00000000..22907c22 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dou.ua.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1[@itemprop="name"] | ||
2 | |||
3 | author: //div[contains(@class, 'author')]//div[contains(@class, 'name')]/a | ||
4 | |||
5 | date: //div[contains(@class, 'b-info')]//span[contains(@class, 'date')] | ||
6 | |||
7 | body: //div[contains(@class, 'b-typo')] | ||
8 | test_url: http://dou.ua/lenta/interviews/andrej-havryuchenko/?from=sb_mostcomm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/douban.com.txt b/inc/3rdparty/site_config/standard/douban.com.txt new file mode 100644 index 00000000..99d7e5dc --- /dev/null +++ b/inc/3rdparty/site_config/standard/douban.com.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://www.douban.com/note/215003067/ | ||
3 | # http://www.douban.com/note/213540049/ | ||
4 | # http://www.douban.com/group/topic/31140104/ | ||
5 | |||
6 | title: //div[@class='note-header']/h1 | ||
7 | title: //div[@id='content']/h1 | ||
8 | |||
9 | author: //div[@class='info']/ul/li/a | ||
10 | author: //h3/span/a | ||
11 | |||
12 | date://div[@class='note-header']/div/span | ||
13 | date://h3/span[contains(@class, 'color-green')] | ||
14 | |||
15 | body://div[contains(@class, 'note')] | ||
16 | body://div[contains(@class, 'topic-content')] | ||
17 | |||
18 | strip://h3 | ||
19 | |||
20 | convert_double_br_tags: yes | ||
21 | test_url: http://www.douban.com/group/topic/31140104/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dpreview.com.txt b/inc/3rdparty/site_config/standard/dpreview.com.txt new file mode 100644 index 00000000..30179a3b --- /dev/null +++ b/inc/3rdparty/site_config/standard/dpreview.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | # next_page_link for product review | ||
2 | # example: http://www.dpreview.com/reviews/lytro/ | ||
3 | next_page_link: //img[@alt = 'Next page']/../@href | ||
4 | |||
5 | # next_page_link for other articles | ||
6 | # example: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 | ||
7 | next_page_link: //*[@class = 'pages']/*/td[@class = 'next enabled']/a | ||
8 | single_page_link: //a[contains(.,'Print view')] | ||
9 | test_url: http://www.dpreview.com/articles/6126592906/first-impressions-using-the-fujifilm-x-pro1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dr.dk.txt b/inc/3rdparty/site_config/standard/dr.dk.txt new file mode 100644 index 00000000..7e46b0d6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dr.dk.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | author: //div[@class='articleFunctions']//a | ||
3 | date: //meta[@name='pubdate']/@content | ||
4 | |||
5 | # Can you strip elements from the body only? It is required here (`//div[@class='articleContent']/p` breaks for some reason) | ||
6 | body: //div[@class='articleContent'] | ||
7 | |||
8 | tidy: no | ||
9 | test_url: http://www.dr.dk/Nyheder/Udland/2011/10/24/150115.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dramasonline.com.txt b/inc/3rdparty/site_config/standard/dramasonline.com.txt new file mode 100644 index 00000000..659d0443 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dramasonline.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@class='postext'] | ||
2 | |||
3 | strip_id_or_class: ratingblock | ||
4 | strip_id_or_class: hreview-aggregate | ||
5 | strip: //div[contains(@style, 'display: none;')] | ||
6 | |||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.dramasonline.com/jago-pakistan-jago-7th-december-2012-ali-gul-pir/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/drdobbs.com.txt b/inc/3rdparty/site_config/standard/drdobbs.com.txt new file mode 100644 index 00000000..b1a9db6f --- /dev/null +++ b/inc/3rdparty/site_config/standard/drdobbs.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | single_page_link: //a[contains(@href, '/article/print')] | ||
2 | test_url: http://www.drdobbs.com/architecture-and-design/240001128 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/drive2.ru.txt b/inc/3rdparty/site_config/standard/drive2.ru.txt new file mode 100644 index 00000000..6125ce79 --- /dev/null +++ b/inc/3rdparty/site_config/standard/drive2.ru.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body: //div[@class = "description"] | ||
2 | body: //div[@id = "post"] | ||
3 | |||
4 | strip_id_or_class: vcard | ||
5 | strip_id_or_class: journallist | ||
6 | strip_id_or_class: infobox | ||
7 | strip_id_or_class: terms | ||
8 | strip_id_or_class: replieslist | ||
9 | strip_id_or_class: communityside | ||
10 | |||
11 | |||
12 | test_url: http://www.drive2.ru/cars/audi/a6/a6_c5/elysey/journal/288230376151836654/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/drupal.org.txt b/inc/3rdparty/site_config/standard/drupal.org.txt new file mode 100644 index 00000000..ffb77e4d --- /dev/null +++ b/inc/3rdparty/site_config/standard/drupal.org.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title://h1 | ||
2 | author://div[@class="submitted"]/a | ||
3 | date:substring-after(//div[@class="meta"],'modified: ') | ||
4 | date:substring-after(//div[@class="submitted"],'on ') | ||
5 | body://div[@class="node-content"] | ||
6 | strip://div[@class="meta"] | ||
7 | strip_id_or_class:book-navigation | ||
8 | test_url: http://drupal.org/node/1327354 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt new file mode 100644 index 00000000..418c9f62 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dukebasketballreport.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h2/a | ||
2 | author: substring-before(substring-after(//span[@class='byline'], 'by'), ',') | ||
3 | date: substring-before(substring-after(//span[@class='byline'], ','), '|') | ||
4 | body: //div[@class='entry'] | ||
5 | |||
6 | |||
7 | # strip out auction stuff at the end of posts | ||
8 | # tidy kills the center tag, so disable it | ||
9 | tidy: no | ||
10 | strip: //center//table | ||
11 | test_url: http://www.dukebasketballreport.com/articles/?p=42660 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/dvice.com.txt b/inc/3rdparty/site_config/standard/dvice.com.txt new file mode 100644 index 00000000..c8163680 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dvice.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | strip://*[@id = 'blog_top_stories'] | ||
2 | strip://*[@id = 'takeover_off'] | ||
3 | strip://*[@id = 'right_gray_box'] | ||
4 | strip://*[@class = 'blog_topics'] | ||
5 | strip://*[@class = 'section_titles'] | ||
6 | |||
7 | author://div[@class = 'post_author_info']/a | ||
8 | date://div[@class = 'post_date_info'] | ||
9 | test_url: http://dvice.com/archives/2012/05/is-nfc-and-smar.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eamesinerudition.com.txt b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt new file mode 100644 index 00000000..908a1b51 --- /dev/null +++ b/inc/3rdparty/site_config/standard/eamesinerudition.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div [@class="post contain"]/h1 | ||
2 | strip: //div [@class="post contain"]/h1 | ||
3 | body: //div [@class="post contain"] | ||
4 | author: substring-before(//title, ':') | ||
5 | author: substring-before(//title, ' ') | ||
6 | |||
7 | |||
8 | test_url: http://eamesinerudition.com/2012/03/hospital-numbers-are-bad-for-you \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eandt.theiet.org.txt b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt new file mode 100644 index 00000000..c4c38f25 --- /dev/null +++ b/inc/3rdparty/site_config/standard/eandt.theiet.org.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1 | ||
2 | date: //div[@class="et_dateUnderTitle"] | ||
3 | author: substring-after(//div[@class="et_authorUnderTitle"], 'By ') | ||
4 | body: //div[@id="et_leftCol640split"] | ||
5 | |||
6 | strip: //div[@id="et_leftCol640splitRight"] | ||
7 | strip: //div[@class="et_light_greybgboxlower"] | ||
8 | test_url: http://eandt.theiet.org/magazine/2011/12/this-festive-waste.cfm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eastoftheweb.com.txt b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt new file mode 100644 index 00000000..d762091c --- /dev/null +++ b/inc/3rdparty/site_config/standard/eastoftheweb.com.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | title: //div[@class='title_text'] | ||
2 | |||
3 | author: //div[@class='author_text'] | ||
4 | |||
5 | body: //div[@class='story_text']/.. | ||
6 | |||
7 | strip: //b | ||
8 | |||
9 | strip_id_or_class: back_to_top | ||
10 | strip_id_or_class: author_text | ||
11 | strip_id_or_class: title_text | ||
12 | |||
13 | wrap_in(center): //a | ||
14 | |||
15 | dissolve: //a | ||
16 | |||
17 | footnotes: no | ||
18 | test_url: http://www.eastoftheweb.com/short-stories/UBooks/Horl.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ebay.com.txt b/inc/3rdparty/site_config/standard/ebay.com.txt new file mode 100644 index 00000000..5fa18ff3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ebay.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //h1[@class='it-ttl'] | //div[@id='mainImgHldr'] | //span[@id='prcIsum'] | ||
2 | |||
3 | strip_image_src: imgLoading_30x30.gif | ||
4 | |||
5 | test_url: http://www.ebay.com/itm/BRAND-NEW-FM-Transmitter-Ca-r-Charger-iPhone-4S-4-4G-3GS-3G-2G-iPod-Touch-/190657497204 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ecetia.com.txt b/inc/3rdparty/site_config/standard/ecetia.com.txt new file mode 100644 index 00000000..d67e9103 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ecetia.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://ecetia.com/2011/09/vida-de-jugon-vii-las-tres-es \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/econlog.econlib.org.txt b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt new file mode 100644 index 00000000..ebafc197 --- /dev/null +++ b/inc/3rdparty/site_config/standard/econlog.econlib.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1[@class="title"] | ||
2 | author: //div[@class="hosted"]/a | ||
3 | date: substring-after(//div[@class="dateline"]/text(), '|') | ||
4 | |||
5 | strip: //a[@class="top" and @href="#"] | ||
6 | test_url: http://econlog.econlib.org/archives/2012/04/blinder_on_heal.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt new file mode 100644 index 00000000..b59f554e --- /dev/null +++ b/inc/3rdparty/site_config/standard/economia.estadao.com.br.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | date: //div[@class="bb-md-noticia-fecha"] | ||
2 | body: //div[@class="corpo"] | ||
3 | dissolve: //div[@class="bb-md-noticia-extras"] | ||
4 | strip: //strong | ||
5 | strip_id_or_class: bb-md-noticia-foto-autor | ||
6 | strip_id_or_class: bb-md-noticia-foto-bajada | ||
7 | test_url: http://economia.estadao.com.br/noticias/economia,cmn-aprova-r-67-bi-em-credito-para-20-setores-da-economia,118501,0.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/economist.com.txt b/inc/3rdparty/site_config/standard/economist.com.txt new file mode 100644 index 00000000..71dd62f5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/economist.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //div[@class='ec-blog-headline'] | ||
2 | body: //div[@class='ec-blog-body'] | ||
3 | body: //div[@class='ec-article-content clear'] | ||
4 | strip: //div[@class='related-items'] | ||
5 | date: substring-before(//p[@class='ec-article-info'], '|') | ||
6 | prune: no | ||
7 | |||
8 | autodetect_next_page: no | ||
9 | |||
10 | test_url: http://www.economist.com/node/21528429 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/edge-online.com.txt b/inc/3rdparty/site_config/standard/edge-online.com.txt new file mode 100644 index 00000000..461d909c --- /dev/null +++ b/inc/3rdparty/site_config/standard/edge-online.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //h2[@class='strapline'] | //article[contains(@class, 'node-article')] | ||
3 | date: //time[@pubdate]/@datetime | ||
4 | author: //span[@class='author-name'] | ||
5 | prune: no | ||
6 | tidy: no | ||
7 | strip: //footer | ||
8 | |||
9 | replace_string(<p>[ pagebreak ]</p>): <!-- pagebreak --> | ||
10 | |||
11 | single_page_link: //a[contains(@href, '?page=show')] | ||
12 | |||
13 | test_url: http://www.edge-online.com/features/telling-modern-warfares-story \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/edge.org.txt b/inc/3rdparty/site_config/standard/edge.org.txt new file mode 100644 index 00000000..9980000d --- /dev/null +++ b/inc/3rdparty/site_config/standard/edge.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@class='HomeLeftPannel IMGCTRL']/h2 | ||
2 | body: //div[@class='HomeLeftPannel IMGCTRL']//div[@class='Brownalink' or @id='shortdesc'] | ||
3 | tidy: no | ||
4 | |||
5 | test_url: http://edge.org/print/conversation.php?cid=the-argumentative-theory \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/edition.cnn.com.txt b/inc/3rdparty/site_config/standard/edition.cnn.com.txt new file mode 100644 index 00000000..dc8ebe14 --- /dev/null +++ b/inc/3rdparty/site_config/standard/edition.cnn.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //div[@id='cnnContentContainer']//div[contains(@class, 'cnn_strycntntlft')] | ||
2 | strip: //div[@id='cnnCVP2'] | ||
3 | strip_id_or_class: cnn_strylftcexpbx | ||
4 | strip_id_or_class: cnn_strylctcqrelt | ||
5 | strip_id_or_class: cnn_strybtntoolsbttm | ||
6 | strip_id_or_class: cnn_stryftsbttm | ||
7 | strip_id_or_class: cnn_strybtmcntnt | ||
8 | prune: no | ||
9 | test_url: http://edition.cnn.com/2011/US/04/29/severe.weather/index.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ekultura.hu.txt b/inc/3rdparty/site_config/standard/ekultura.hu.txt new file mode 100644 index 00000000..59f6a711 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ekultura.hu.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h1[@class='style6 nevek'] | ||
2 | |||
3 | body: //div[@class='bal3'] | ||
4 | |||
5 | |||
6 | prune: yes | ||
7 | |||
8 | tidy: yes | ||
9 | convert_double_br_tags: yes | ||
10 | |||
11 | test_url: http://ekultura.hu/olvasnivalo/egyeb/cikk/2010-12-15/interju-galvolgyi-judit-2010-december \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elance.com.txt b/inc/3rdparty/site_config/standard/elance.com.txt new file mode 100644 index 00000000..52ffe2d0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/elance.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id='jobDesc-bd']/p | ||
2 | |||
3 | test_url: http://www.elance.com/j/xml-technical-intergration/23687172/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elektroniknet.de.txt b/inc/3rdparty/site_config/standard/elektroniknet.de.txt new file mode 100644 index 00000000..07664719 --- /dev/null +++ b/inc/3rdparty/site_config/standard/elektroniknet.de.txt | |||
@@ -0,0 +1,27 @@ | |||
1 | title: //h1 | ||
2 | date: //div[@class='datum'] | ||
3 | single_page_link: //a[contains(@href, '?type=99')] | ||
4 | |||
5 | # this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1 | ||
6 | dissolve: //div[@class='artikelMeldung'] | ||
7 | |||
8 | |||
9 | strip_id_or_class: anzeige | ||
10 | strip_id_or_class: top_page_navigation | ||
11 | strip_id_or_class: cr_image_container | ||
12 | strip_id_or_class: cr_image_reference | ||
13 | strip_id_or_class: cr_image_icon | ||
14 | strip_id_or_class: _close_txt | ||
15 | strip_id_or_class: _close_ico | ||
16 | strip_id_or_class: clearer | ||
17 | |||
18 | strip://h1 | ||
19 | strip://h6 | ||
20 | strip://div[contains(@id, 'plista')] | ||
21 | strip://img[contains(@id,'tiny')] | ||
22 | strip://img[@class='cr_image'] | ||
23 | |||
24 | # strip url at the top | ||
25 | strip: //p[@style='font-size: 10px;'] | ||
26 | |||
27 | test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elmalpensante.com.txt b/inc/3rdparty/site_config/standard/elmalpensante.com.txt new file mode 100644 index 00000000..9fecd663 --- /dev/null +++ b/inc/3rdparty/site_config/standard/elmalpensante.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | single_page_link: //a[contains(@href, 'print_contenido')] | ||
2 | title: //h2 | ||
3 | author: //div[@class="autor"] | ||
4 | test_url: http://www.elmalpensante.com/index.php?doc=display_contenido&id=668 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/elpais.com.txt b/inc/3rdparty/site_config/standard/elpais.com.txt new file mode 100644 index 00000000..32f9fc3f --- /dev/null +++ b/inc/3rdparty/site_config/standard/elpais.com.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | title: //meta[@name='DC.title']/@content | ||
2 | title: //div[contains(@class, 'cabecera_noticia')]//h1 | ||
3 | date: //meta[@name='DC.date']/@content | ||
4 | date: //meta[@name='date']/@content | ||
5 | body: //div[@class='columna_texto'] | ||
6 | body: //div[@id='cuerpo_noticia'] | ||
7 | body: //div[@class='estructura_2col_1zq']//div[@class='margen_n'] | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | strip_id_or_class: disposicion_vertical | ||
12 | strip_id_or_class: ampliar_foto | ||
13 | strip_id_or_class: utilidades | ||
14 | strip_id_or_class: info_relacionada | ||
15 | strip_id_or_class: m-kiosko | ||
16 | strip_id_or_class: info_complementa | ||
17 | |||
18 | strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')] | ||
19 | strip: //div[@id='coment' or @id='foros_not'] | ||
20 | |||
21 | test_url: http://elpais.com/elpais/2012/02/06/gente/1328526783_491687.html | ||
22 | test_url: http://www.elpais.com/articulo/cultura/mano/retrato/materia/elpepicul/20120207elpepicul_2/Tes \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/en.espnf1.com.txt b/inc/3rdparty/site_config/standard/en.espnf1.com.txt new file mode 100644 index 00000000..c1a91063 --- /dev/null +++ b/inc/3rdparty/site_config/standard/en.espnf1.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@id='content'] | ||
2 | strip: //div[@class='rl'] | ||
3 | strip: //p[@class='authdesc'] | ||
4 | strip: //p[@class='strybtm'] | ||
5 | strip: //div[@id='stryFtrLft'] | ||
6 | strip: //div[@id='f1Conversation'] | ||
7 | strip: //div[@id='cmtSpncrRuler'] | ||
8 | strip: //div[@id='stryComments'] | ||
9 | strip: //div[@id='athrData'] | ||
10 | test_url: http://en.espnf1.com/monaco/motorsport/story/50529.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/engadget.com.txt b/inc/3rdparty/site_config/standard/engadget.com.txt new file mode 100644 index 00000000..6cc6b14e --- /dev/null +++ b/inc/3rdparty/site_config/standard/engadget.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@class='post_body'] | ||
3 | date: //*[@class='post_time'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.engadget.com/2011/05/20/screen-grabs-the-mentalist-takes-the-ipad-to-new-heights/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt new file mode 100644 index 00000000..35ace467 --- /dev/null +++ b/inc/3rdparty/site_config/standard/engineering.tumblr.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h2 | ||
2 | body: //div[@class="post_content"] | ||
3 | author: //p[@class="author"]/a | ||
4 | date: //p[@class="date"] | ||
5 | strip: //h2 | ||
6 | strip: //header | ||
7 | test_url: http://engineering.tumblr.com/post/21276808338/tumblr-firehose \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/english.aljazeera.net.txt b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt new file mode 100644 index 00000000..aed3a5f9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/english.aljazeera.net.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //span[@id='DetailedTitle'] | ||
2 | body: //div[@id='ctl00_cphBody_dvArticleInfoBlock'] | //td[@class='DetailedSummary'] | ||
3 | strip_id_or_class: sidebar | ||
4 | strip_id_or_class: Skyscrapper_Body | ||
5 | strip: //td[@class='DetailedSummary']/table[position() != 1] | ||
6 | prune: no | ||
7 | test_url: http://english.aljazeera.net//news/middleeast/2011/04/20114681444376835.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/enikos.gr.txt b/inc/3rdparty/site_config/standard/enikos.gr.txt new file mode 100644 index 00000000..e2b99bfc --- /dev/null +++ b/inc/3rdparty/site_config/standard/enikos.gr.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //div[@id='article']//div[contains(@class, 'inside')] | ||
2 | |||
3 | strip_id_or_class: tags | ||
4 | strip_id_or_class: actions | ||
5 | strip_id_or_class: google-ads | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.enikos.gr/politics/98606,To_oxi_toy_Agorastoy_stoys_Germanoys.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt new file mode 100644 index 00000000..3e7fba09 --- /dev/null +++ b/inc/3rdparty/site_config/standard/entertainment.timesonline.co.uk.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | author://div[@class = 'article-author']/span[@class = 'byline'] | ||
2 | title://h1[@class = 'heading'] | ||
3 | body://div[@id = 'related-article-links'] | ||
4 | strip://div[@id = 'comment-sort-order'] | ||
5 | strip://div[@id = 'my-profile'] | ||
6 | strip://div[@class = 'article-author'] | ||
7 | strip://div[@class = 'bg-f8f1d8 width-385 text-left'] | ||
8 | strip://div[@id = 'login-status'] | ||
9 | strip://div[@class = 'puff-padding'] | ||
10 | test_url: http://entertainment.timesonline.co.uk/tol/arts_and_entertainment/the_tls/article7177738.ece \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/es.hu.txt b/inc/3rdparty/site_config/standard/es.hu.txt new file mode 100644 index 00000000..19a1e9dd --- /dev/null +++ b/inc/3rdparty/site_config/standard/es.hu.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: concat(//div[@class='doc_author'], ' - ', upper-case(//div[@class='doc_title'])) | ||
2 | |||
3 | body: //div[@class='doc'] | ||
4 | |||
5 | prune: yes | ||
6 | |||
7 | tidy: yes | ||
8 | convert_double_br_tags: yes | ||
9 | |||
10 | strip: //a[contains(@href, 'www.facebook.com/pages/Elet-es-Irodalom/')] | ||
11 | test_url: http://www.es.hu/2010-12-08_vissza-a-partpenzt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/escapistmagazine.com.txt b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt new file mode 100644 index 00000000..7e17a04d --- /dev/null +++ b/inc/3rdparty/site_config/standard/escapistmagazine.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | strip_comments: no | ||
2 | test_url: http://www.escapistmagazine.com/articles/view/columns/extraconsideration/8717-Extra-Consideration-The-Story \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/espn.go.com.txt b/inc/3rdparty/site_config/standard/espn.go.com.txt new file mode 100644 index 00000000..319d352b --- /dev/null +++ b/inc/3rdparty/site_config/standard/espn.go.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //div[@class='headline'] | //div[@class='mod-header']/h3 | ||
2 | body: //div[contains(@class, 'article')] | ||
3 | strip: //div[contains(@class, 'mod-inline')] | ||
4 | strip: //*/span[@class='page-actions'] | ||
5 | strip: //div[@class='page-actions']/* | ||
6 | strip: //div[@class='headline'] | //div[@class='mod-header']/h3 | ||
7 | strip: //div[@class='mod-blog-navigation'] | ||
8 | strip: //div[@class='monthday'] | ||
9 | strip: //div[@class='time'] | ||
10 | strip: //div[@class='timeofday'] | ||
11 | strip: //div[contains(@class, 'mod-conversations')] | ||
12 | test_url: http://espn.go.com/boston/mlb/story/_/id/7092528/terry-francona-victim-latest-red-sox-smear-campaign \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/esquire.com.txt b/inc/3rdparty/site_config/standard/esquire.com.txt new file mode 100644 index 00000000..7566e8cc --- /dev/null +++ b/inc/3rdparty/site_config/standard/esquire.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1 | ||
2 | author: //div[@id='byline'] | ||
3 | |||
4 | body: //div[@id='printBody'] | ||
5 | |||
6 | single_page_link: concat('http://www.esquire.com/print-this/', substring-after(//link[@rel='canonical']/@href, 'esquire.com/')) | ||
7 | |||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.esquire.com/features/impossible/price-is-right-perfect-bid-0810 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt new file mode 100644 index 00000000..88c8c560 --- /dev/null +++ b/inc/3rdparty/site_config/standard/essentialpublicradio.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //*[@itemprop='headline'] | ||
2 | author: //*[@itemprop='author'] | ||
3 | date: //*[@itemprop='datePublished'] | ||
4 | body: //*[@itemprop='articleBody'] | ||
5 | strip: //*[contains(@class, 'instapaper_ignore')] | ||
6 | test_url: http://www.essentialpublicradio.org/story/2011-11-14/volunteers-sought-federal-tax-assistance-program-pennsylvania-9421 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/etc.se.txt b/inc/3rdparty/site_config/standard/etc.se.txt new file mode 100644 index 00000000..58da5ef7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/etc.se.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | strip_id_or_class: 'left' | ||
2 | strip_id_or_class: 'right' | ||
3 | strip_id_or_class: 'block-belowcontent' | ||
4 | author: //span[@class = 'name']/a | ||
5 | date: //div[@class= 'datum'] | ||
6 | test_url: http://www.etc.se/intervju/lonsamt-att-radda-jorden \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt b/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt new file mode 100644 index 00000000..bfa2c5dc --- /dev/null +++ b/inc/3rdparty/site_config/standard/eternabuenosaires.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://eternabuenosaires.com/2011/09/calle-adolfo-bioy-casares \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/eurogamer.net.txt b/inc/3rdparty/site_config/standard/eurogamer.net.txt new file mode 100644 index 00000000..6ecdf6bd --- /dev/null +++ b/inc/3rdparty/site_config/standard/eurogamer.net.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | body: //div[ @class='content' ] | //div[ @class='blog-entry' ] | ||
2 | |||
3 | strip: //h2/abbr | //div[ @class='lowleader' ] | //*[ @class='discussion' ] | //img[ @class='play-button' ] | //div[ @class='boxout' ] | //h2/a | //h2 | //h2/div | //p[ @class='timestamp' ] | //a[ @class='eurogamer-author' ] | //p[ @class='aPager' ] | //h1 | //div[ @id='lowleader' ] | //a[ @class='next' ] | //div[contains(concat(' ', normalize-space(@class), ' '), ' pullquote ')] | ||
4 | |||
5 | date://p[ @class='timestamp' ] | ||
6 | |||
7 | author://a[ @class='eurogamer-author' ] | ||
8 | test_url: http://www.eurogamer.net/articles/digitalfoundry-vs-unreal-engine-4 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/evo.co.uk.txt b/inc/3rdparty/site_config/standard/evo.co.uk.txt new file mode 100644 index 00000000..07162513 --- /dev/null +++ b/inc/3rdparty/site_config/standard/evo.co.uk.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | author: substring-after(//div[@class='articleauthor'],'By ') | ||
2 | |||
3 | # Blog posts | ||
4 | date: //div[@class='articledate'] | ||
5 | # News | ||
6 | date: //div[@class='articledate_b'] | ||
7 | |||
8 | body: //div[@class='articletext'] | ||
9 | |||
10 | convert_double_br_tags: yes | ||
11 | test_url: http://www.evo.co.uk/carreviews/evolongtermtests/280072/bmw_330d_sport_touring.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/expressen.se.txt b/inc/3rdparty/site_config/standard/expressen.se.txt new file mode 100644 index 00000000..d0cb283e --- /dev/null +++ b/inc/3rdparty/site_config/standard/expressen.se.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id='article']/div[contains(@class, 'content')]/h1 | ||
2 | body: //div[@id='article']/div[contains(@class, 'content')] | ||
3 | date: //div[contains(@class, 'article-slot')]/descendant::div[contains(@id, 'articledates')] | ||
4 | |||
5 | strip: //img[contains(@src, 'img/px.gif')] | ||
6 | prune: no | ||
7 | # remove Facebook banner and obtrusive ad | ||
8 | strip: //div[@id='article']/div[contains(@class, 'content')]/div[contains(@class, 'art-right')] | ||
9 | test_url: http://www.expressen.se/kultur/1.2683904/medan-natet-dras-at \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/extracine.com.txt b/inc/3rdparty/site_config/standard/extracine.com.txt new file mode 100644 index 00000000..52b598da --- /dev/null +++ b/inc/3rdparty/site_config/standard/extracine.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://extracine.com/2011/09/straw-dogs-la-original \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/f1actual.com.txt b/inc/3rdparty/site_config/standard/f1actual.com.txt new file mode 100644 index 00000000..6ef2738a --- /dev/null +++ b/inc/3rdparty/site_config/standard/f1actual.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://f1actual.com/2011/09/previo-gran-premio-de-singapur \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/facta.co.jp.txt b/inc/3rdparty/site_config/standard/facta.co.jp.txt new file mode 100644 index 00000000..c17e0b8c --- /dev/null +++ b/inc/3rdparty/site_config/standard/facta.co.jp.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | bosdy: //div[@class='content'] | ||
2 | |||
3 | test_url: http://facta.co.jp/blog/archives/20111026001026.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/falter.at.txt b/inc/3rdparty/site_config/standard/falter.at.txt new file mode 100644 index 00000000..b941b740 --- /dev/null +++ b/inc/3rdparty/site_config/standard/falter.at.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | title: //h2[@class='related relatedTitle'] | ||
2 | author: //a[contains(@href, 'liste.php?author_id')] | ||
3 | |||
4 | # can't think of a better way unfortunately, really bad markup on this site | ||
5 | date: substring-after(//td[@style='width:85%;'], 'vom') | ||
6 | |||
7 | # not sure why, but instapaper seems to suck up the teaser paragraph | ||
8 | # not solved! | ||
9 | body: //div[contains(@class, 'teaser')] | ||
10 | body: //div[@id='content'] | ||
11 | |||
12 | # cleanup | ||
13 | strip: //img[@src='http://www.falter.at/web/_pics/falterlogo_dblau.gif'] | ||
14 | strip: //div[@class='servicebox'] | ||
15 | strip: //h1 | ||
16 | strip: //br | ||
17 | strip: //td[@id='adcol'] | ||
18 | test_url: http://www.falter.at/web/print/detail.php?id=1634 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fanfiction.net.txt b/inc/3rdparty/site_config/standard/fanfiction.net.txt new file mode 100644 index 00000000..8d0c4daf --- /dev/null +++ b/inc/3rdparty/site_config/standard/fanfiction.net.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //*[@id = 'story text'] | ||
2 | author: //a[starts-with(@href, '/u/')] | ||
3 | next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") | ||
4 | autodetect_next_page:yes | ||
5 | strip_id_or_class: 'a2a_kit' | ||
6 | test_url: http://www.fanfiction.net/s/6497403/1/Spartan_Love \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fastcompany.com.txt b/inc/3rdparty/site_config/standard/fastcompany.com.txt new file mode 100644 index 00000000..5547a76c --- /dev/null +++ b/inc/3rdparty/site_config/standard/fastcompany.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | title: //h1 | ||
2 | author: //h5[@class='byline']//a | ||
3 | date: //h5[@class='date'] | ||
4 | body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")] | ||
5 | strip_id_or_class: article-top-wrapper | ||
6 | strip_id_or_class: footer-message | ||
7 | strip_id_or_class: print-logo | ||
8 | strip: //cite | ||
9 | strip://*[@class='timestamp'] | ||
10 | strip://div[@id='page_right'] | ||
11 | strip://section[@id='header_region'] | ||
12 | strip://h1[@class='node-title'] | ||
13 | strip://div[@class='node-submitted'] | ||
14 | strip_id_or_class: skipnav | ||
15 | test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity | ||
16 | test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/faz.net.txt b/inc/3rdparty/site_config/standard/faz.net.txt new file mode 100644 index 00000000..4fe5968b --- /dev/null +++ b/inc/3rdparty/site_config/standard/faz.net.txt | |||
@@ -0,0 +1,30 @@ | |||
1 | # Title | ||
2 | title: //p[@class='Content HeadlineShort'] | ||
3 | |||
4 | # Authors | ||
5 | # some are known and have a link, others don't | ||
6 | author: substring-after(//span[@class='Autor'], 'Von') | ||
7 | |||
8 | # Date | ||
9 | date: //span[@class='Datum'] | ||
10 | |||
11 | # Body | ||
12 | body: //div[@class='Artikel'] | ||
13 | |||
14 | # Removements before body text | ||
15 | strip: //div[@class='Breadcrumbs'] | ||
16 | strip: //div[@class='QuickSearchBox'] | ||
17 | strip: //div[@class='FAZArtikelEinleitung'] | ||
18 | strip: //div[@class='FAZArtikelReiter'] | ||
19 | strip: //div[@class='clear'] | ||
20 | |||
21 | # General removements | ||
22 | strip: //span[@class='Bildnachweis'] | ||
23 | |||
24 | # Removements after body text | ||
25 | strip: //div[@class='ArtikelAbbinder'] | ||
26 | strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content'] | ||
27 | strip: //div[@class='FAZArtikelKommentare FAZArtikelContent'] | ||
28 | strip: //div[@class='FAZArtikelFunktionen'] | ||
29 | strip: //div[@id='FAZContentRight'] | ||
30 | test_url: http://www.faz.net/aktuell/gesellschaft/ehe-haltbarkeitsformel-verliebe-dich-oft-verlobe-dich-selten-heirate-vielleicht-11685306.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fictionpress.com.txt b/inc/3rdparty/site_config/standard/fictionpress.com.txt new file mode 100644 index 00000000..4a04e832 --- /dev/null +++ b/inc/3rdparty/site_config/standard/fictionpress.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: id('storytext') | ||
2 | author: //a[starts-with(@href, '/u/')] | ||
3 | #next_page_link: substring-after(//input[contains(@value, 'Next')]/@onclick, "self.location='") | ||
4 | strip_id_or_class: 'a2a_kit' | ||
5 | test_url: http://www.fictionpress.com/s/2897964/1/All_We_Knew \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ficwad.com.txt b/inc/3rdparty/site_config/standard/ficwad.com.txt new file mode 100644 index 00000000..3dbfe76f --- /dev/null +++ b/inc/3rdparty/site_config/standard/ficwad.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //h4 | ||
2 | author: //span[@class="author"] | ||
3 | body: //div[@id="story"] | ||
4 | strip_id_or_class: summary | ||
5 | strip_id_or_class: meta | ||
6 | strip_id_or_class: storyfoot | ||
7 | convert_double_br_tags: yes | ||
8 | prune: no | ||
9 | |||
10 | # Note: this site still has trouble because single <br> tags are stripped, but I don't see a way to fix that with this interface. | ||
11 | |||
12 | test_url: http://www.ficwad.com/story/158977 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/finance.yahoo.com.txt b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt new file mode 100644 index 00000000..81c18fd3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/finance.yahoo.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //div[@id='y-article-bd'] | ||
3 | body: //div[contains(@class, 'yom-art-content')] | ||
4 | strip: //div[contains(@class, 'related-companies')] | ||
5 | strip: //div[@id='y-article-related'] | ||
6 | strip: //div[@id='ypf-article-related'] | ||
7 | prune: no | ||
8 | |||
9 | single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')] | ||
10 | |||
11 | test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1 | ||
12 | test_url: http://finance.yahoo.com/news/super-young-retirement-savers.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt new file mode 100644 index 00000000..1a5cd2e1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/findtheswagger.tumblr.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | date: //div[@class='notes']/a | ||
2 | body: //div[@id='content'] | ||
3 | |||
4 | strip_id_or_class: tags | ||
5 | strip_id_or_class: permalink | ||
6 | strip_id_or_class: notes | ||
7 | strip_id_or_class: post_nav | ||
8 | strip: //div[@id='content']//h2 | ||
9 | strip_id_or_class: right_column | ||
10 | test_url: http://findtheswagger.tumblr.com/post/11589145141/moe-resners-end-of-an-era-1957-giants-final \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/firstthings.com.txt b/inc/3rdparty/site_config/standard/firstthings.com.txt new file mode 100644 index 00000000..dd56da22 --- /dev/null +++ b/inc/3rdparty/site_config/standard/firstthings.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@class='articleTitle'] | ||
2 | author: //div[@class='articleAuthor'] | ||
3 | body: //div[@class='articleContent'] | ||
4 | prune: no | ||
5 | convert_double_br_tags: yes | ||
6 | |||
7 | test_url: http://www.firstthings.com/article/2011/05/the-trouble-with-ayn-rand \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fivechapters.com.txt b/inc/3rdparty/site_config/standard/fivechapters.com.txt new file mode 100644 index 00000000..d9c5e42e --- /dev/null +++ b/inc/3rdparty/site_config/standard/fivechapters.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class='entry'] | ||
2 | test_url: http://www.fivechapters.com/2010/paris-part-one/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fivefilters.org.txt b/inc/3rdparty/site_config/standard/fivefilters.org.txt new file mode 100644 index 00000000..dc1db432 --- /dev/null +++ b/inc/3rdparty/site_config/standard/fivefilters.org.txt | |||
@@ -0,0 +1 @@ | |||
prune: no \ No newline at end of file | |||
diff --git a/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt new file mode 100644 index 00000000..3d7b45a8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/fivethirtyeight.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: substring-after(//title, 'Right:') | ||
2 | body: //div[@class = 'post-body'] | ||
3 | author: substring-after(//*[@class='post-author'], 'by') | ||
4 | date: concat(//*[@class='date-header'], ' ', //*[@class='post-timestamp']/a) | ||
5 | convert_double_br_tags: yes | ||
6 | |||
7 | test_url: http://www.fivethirtyeight.com/2010/07/does-rnc-have-structural-problems.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fm4.orf.at.txt b/inc/3rdparty/site_config/standard/fm4.orf.at.txt new file mode 100644 index 00000000..32d44c87 --- /dev/null +++ b/inc/3rdparty/site_config/standard/fm4.orf.at.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | author: //div[@class='authorDescription']/h2 | ||
2 | body: //div[@id='story'] | ||
3 | date: substring-before(substring-after(//p[@class='date'],'Erstellt am:'), '-') | ||
4 | title: //h1[@class='detail'] | ||
5 | strip: //div[@class='fact'] | ||
6 | |||
7 | test_url: http://fm4.orf.at/stories/1689156/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fnal.gov.txt b/inc/3rdparty/site_config/standard/fnal.gov.txt new file mode 100644 index 00000000..7faa6bfc --- /dev/null +++ b/inc/3rdparty/site_config/standard/fnal.gov.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | title: normalize(//h1) | ||
2 | |||
3 | author: //td/p[position()=last()]/em | ||
4 | |||
5 | # I swear, this is really the best way to do this | ||
6 | date: normalize(//td[contains(@style, "color: #ffffff")]) | ||
7 | |||
8 | # my god, it's full of tables | ||
9 | body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td | ||
10 | strip: //h1 | ||
11 | |||
12 | # the following two lines strip the byline at the end of the article (the byline is a <p> that consists of an em dash and then some text in an <em>). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output. | ||
13 | strip: //p[position()=last()]/em | ||
14 | strip: //p[position()=last()]/child::text() | ||
15 | test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/focus.de.txt b/inc/3rdparty/site_config/standard/focus.de.txt new file mode 100644 index 00000000..3ad5cabf --- /dev/null +++ b/inc/3rdparty/site_config/standard/focus.de.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | author: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] | ||
4 | |||
5 | date: //div[@class='articleHead']/span[@class='created'] | ||
6 | |||
7 | body: //div[@id='article'] | ||
8 | |||
9 | strip: //span[@class='markerText'] | ||
10 | strip: //div[@class='articleContent small']/div[@class='textBlock']//span[@class='created'] | ||
11 | strip: //div[@class='sidebar'] | ||
12 | strip: //div[@class='starbar'] | ||
13 | strip: //div[@class='actions clearfix'] | ||
14 | strip: //div[@id='commentForm'] | ||
15 | strip: //div[@id='commentSent'] | ||
16 | strip: //div[@id='comments'] | ||
17 | strip: //div[@class='similarityBlock'] | ||
18 | |||
19 | test_url: http://www.focus.de/politik/ausland/ein-jahr-nach-bombenanschlag-u-bahn-attentaeter-von-minsk-hingerichtet_aid_724958.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fool.com.txt b/inc/3rdparty/site_config/standard/fool.com.txt new file mode 100644 index 00000000..69867ccb --- /dev/null +++ b/inc/3rdparty/site_config/standard/fool.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[@class='entry-content'] | ||
2 | date: //meta[@name="date"]/@content | ||
3 | author: //meta[@name="author"]/@content | ||
4 | |||
5 | strip_id_or_class: ecapShell | ||
6 | strip_id_or_class: noindent | ||
7 | strip_id_or_class: targetedPromotion | ||
8 | |||
9 | prune: no | ||
10 | |||
11 | test_url: http://www.fool.com/investing/general/2012/01/27/dfc-global-beats-up-on-analysts-yet-again.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/forbes.com.txt b/inc/3rdparty/site_config/standard/forbes.com.txt new file mode 100644 index 00000000..2381b56a --- /dev/null +++ b/inc/3rdparty/site_config/standard/forbes.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | title: //hgroup//h1 | ||
2 | title: //span[@class='mainarttitle'] | ||
3 | |||
4 | body: //div[@id='leftRail']//div[contains(@class, 'body')] | ||
5 | |||
6 | author: //meta[@name="author"]/@content | ||
7 | author: //span[@class='mainartauthor'] | ||
8 | |||
9 | date: substring-before(//hgroup//h6, '@') | ||
10 | date: //span[@class='mainartdate'] | ||
11 | |||
12 | prune: no | ||
13 | |||
14 | single_page_link: //a[contains(@href, '/print/')] | ||
15 | |||
16 | test_url: http://www.forbes.com/forbes/2011/0509/technology-frog-design-jan-chipchase-ethnographer-birth-cool_print.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/foreignpolicy.com.txt b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt new file mode 100644 index 00000000..6ab7a091 --- /dev/null +++ b/inc/3rdparty/site_config/standard/foreignpolicy.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //div[@id='art-mast']//h1 | ||
2 | author: substring-after(//span[@id='by-line'], 'BY ') | ||
3 | date: //span[@id='pub-date'] | ||
4 | body: //div[@id='art-mast']//h2 | //div[@id='art-mast']/h3 | //div[@id='art-body']//div[@class='translateBody'] | ||
5 | strip: //div[@id='share-box'] | ||
6 | prune: no | ||
7 | |||
8 | single_page_link: //span[@id='controls']/a[contains(@href, 'print=yes')] | ||
9 | |||
10 | test_url: http://www.foreignpolicy.com/articles/2011/08/01/a_murderers_manifesto_and_me | ||
11 | test_url: test_url: http://www.foreignpolicy.com/articles/2012/02/29/five_years_in_damascus \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/forsvaret.no.txt b/inc/3rdparty/site_config/standard/forsvaret.no.txt new file mode 100644 index 00000000..3085c8f2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/forsvaret.no.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@class="articleHeader"]/h1 | ||
2 | author: //p[@class="byline"] | ||
3 | date: //p[contains(@class,"publishedDate")]/span | ||
4 | # remove the right menu | ||
5 | strip: //div[contains(@class,"aside")] | ||
6 | # remove some SharePoint webpart label junk | ||
7 | strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"] | ||
8 | strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"] | ||
9 | test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/foxnews.com.txt b/inc/3rdparty/site_config/standard/foxnews.com.txt new file mode 100644 index 00000000..f1ee4851 --- /dev/null +++ b/inc/3rdparty/site_config/standard/foxnews.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | prune: no | ||
2 | |||
3 | author: //meta[@name="dc.publisher"]/@content | ||
4 | date: //meta[@name="dc.date"]/@content | ||
5 | strip: //p[contains(@class, 'contributor vcard')] | ||
6 | replace_string(<ul><li><div class="photo">): <div class="photo"> | ||
7 | strip: //p[a[contains(., 'Click here to read more on this story ')]] | ||
8 | |||
9 | test_url: http://www.foxnews.com/entertainment/2011/05/04/dwayne-johnson-guys-grow-pair-driving-hybrid/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/freelancer.com.txt b/inc/3rdparty/site_config/standard/freelancer.com.txt new file mode 100644 index 00000000..f3d5425c --- /dev/null +++ b/inc/3rdparty/site_config/standard/freelancer.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id="projectDetailsContent"]//td | ||
2 | |||
3 | test_url: http://www.freelancer.com/projects/PHP-Website-Design/debug-Forum-website-code.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/freytag-film.com.txt b/inc/3rdparty/site_config/standard/freytag-film.com.txt new file mode 100644 index 00000000..8dc0dabc --- /dev/null +++ b/inc/3rdparty/site_config/standard/freytag-film.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@class = 'instapaperbody'] | ||
2 | convert_double_br_tags: no | ||
3 | date: //div[@class='instadate'] | ||
4 | title: //h2[@class = 'instatitle'] | ||
5 | test_url: http://freytag-film.com/blog/artikel/shooting_a_feature_film_in_10_days \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/friendskorner.com.txt b/inc/3rdparty/site_config/standard/friendskorner.com.txt new file mode 100644 index 00000000..39a9973f --- /dev/null +++ b/inc/3rdparty/site_config/standard/friendskorner.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | #body: (//div[@class='ftr-yt-vid'])[1] | ||
2 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
3 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | #replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" | ||
9 | #replace_string(</iframe>): </iframe> </div> | ||
10 | |||
11 | test_url: http://www.friendskorner.com/forum/f137/debate-personal-lives-leaders-west-vs-pakistan-must-read-297989/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ft.com.txt b/inc/3rdparty/site_config/standard/ft.com.txt new file mode 100644 index 00000000..38d9d326 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ft.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[contains(@class, 'ft-story-body')] | ||
2 | |||
3 | author: substring-after(//div[contains(@class, 'ft-story-header')]/p[1], 'By ') | ||
4 | date: substring-before(substring-after(//div[contains(@class, 'ft-story-header')]/p[2], 'Published:'), '|') | ||
5 | test_url: http://www.ft.com/cms/s/2/e1be4b5a-620c-11e0-8ee4-00144feab49a.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ftd.de.txt b/inc/3rdparty/site_config/standard/ftd.de.txt new file mode 100644 index 00000000..a58765b0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ftd.de.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@class='boxIntroHead']/span[@class='h3'] | //div[@class='section']/div[@class='paragraph' or @class='embObjLeft'] | ||
2 | single_page_link: //a[@class='icon print'] | ||
3 | |||
4 | test_url: http://www.ftd.de/it-medien/it-telekommunikation/:mobilfunk-vivendi-und-vodafone-trennen-sich-in-frankreich/60034691.html | ||
5 | test_url: http://www.ftd.de/it-medien/medien-internet/:verkauf-von-warner-music-musikbranche-auf-dem-sprung/60048185.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/fubiz.net.txt b/inc/3rdparty/site_config/standard/fubiz.net.txt new file mode 100644 index 00000000..8e6356bf --- /dev/null +++ b/inc/3rdparty/site_config/standard/fubiz.net.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class = 'entry'] | ||
2 | |||
3 | test_url: http://www.fubiz.net/2011/05/31/world-press-photo-2011/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/futurezone.at.txt b/inc/3rdparty/site_config/standard/futurezone.at.txt new file mode 100644 index 00000000..50fc144a --- /dev/null +++ b/inc/3rdparty/site_config/standard/futurezone.at.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | date: //span[@class='date'] | ||
2 | strip: //div[@class='postsidebar'] | ||
3 | body: //div[@class='singlepost'] | ||
4 | title: //div[@class='singlepost']/h1 | ||
5 | move_into(//div[@class='singlepost']): //div[@class='info'] | ||
6 | strip: //div[@class='gallery'] | ||
7 | strip: //div[@class='biggallery'] | ||
8 | strip: //ul[@class='social'] | ||
9 | strip: //ul[@class='social_mail'] | ||
10 | |||
11 | test_url: http://futurezone.at/future/5502-erste-galileo-satelliten-starten-ins-all.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gamasutra.com.txt b/inc/3rdparty/site_config/standard/gamasutra.com.txt new file mode 100644 index 00000000..35a8762a --- /dev/null +++ b/inc/3rdparty/site_config/standard/gamasutra.com.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | # default view title | ||
2 | title: //span[@class='newsTitle'] | ||
3 | # print view title | ||
4 | title: //h3[@class='title'] | ||
5 | |||
6 | # default view author | ||
7 | author: //span[@class='newsAuth']/a | ||
8 | author: substring-after(//span[@class='newsAuth'], 'by ') | ||
9 | |||
10 | # default view date | ||
11 | date: //td[@class='newsDate'] | ||
12 | |||
13 | # default view body | ||
14 | body: //td[@class='featureText'] | ||
15 | body: //td[@class='newsText'] | ||
16 | |||
17 | strip: //h3[@class='title'] | ||
18 | |||
19 | single_page_link: //a[contains(@href, '?print=1')] | ||
20 | test_url: http://www.gamasutra.com/view/feature/132559/staying_power_rethinking_feedback_.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gameblog.fr.txt b/inc/3rdparty/site_config/standard/gameblog.fr.txt new file mode 100644 index 00000000..2cc4b378 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gameblog.fr.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | strip_id_or_class: noprint | ||
7 | strip: //div[@id='gbNewsTextContent']/following-sibling::* | ||
8 | |||
9 | test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video | ||
10 | test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/garythink.com.txt b/inc/3rdparty/site_config/standard/garythink.com.txt new file mode 100644 index 00000000..1791e816 --- /dev/null +++ b/inc/3rdparty/site_config/standard/garythink.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | tidy: no | ||
2 | |||
3 | test_url: http://www.garythink.com/eft/testing.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gasteroprod.com.txt b/inc/3rdparty/site_config/standard/gasteroprod.com.txt new file mode 100644 index 00000000..ef68082a --- /dev/null +++ b/inc/3rdparty/site_config/standard/gasteroprod.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | # These should work, but don't. They were given by Firefox XPather extension | ||
2 | title: //article//header//a//h1 | ||
3 | body: //article//section | ||
4 | test_url: http://gasteroprod.com/blog/faut-il-continuer-a-supporter-internet-explorer-6.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gatopardo.com.txt b/inc/3rdparty/site_config/standard/gatopardo.com.txt new file mode 100644 index 00000000..74346328 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gatopardo.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | body: //div[@class='panel'] | ||
2 | strip: //div[@style='float:right'] | ||
3 | strip: //span[@class='titulosHomePublicidad'] | ||
4 | strip: //div[@id='TitTop5Der'] | ||
5 | strip: //img[@src='/ImagesGatoPardo/LogoGatopardo.png'] | ||
6 | |||
7 | prune: yes | ||
8 | test_url: http://www.gatopardo.com/ReportajesGP.php?R=95 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gawker.com.txt b/inc/3rdparty/site_config/standard/gawker.com.txt new file mode 100644 index 00000000..6531d81a --- /dev/null +++ b/inc/3rdparty/site_config/standard/gawker.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@class="post-body"] | ||
2 | |||
3 | # Remove 'content is restricted' | ||
4 | strip: //div[@id='agegate_IDHERE'] | ||
5 | |||
6 | test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/geeksofdoom.com.txt b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt new file mode 100644 index 00000000..55586e1c --- /dev/null +++ b/inc/3rdparty/site_config/standard/geeksofdoom.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | author: substring-after(//span[@class='storyauthor'],'Posted by') | ||
2 | date: //span[@class='storydate'] | ||
3 | test_url: http://www.geeksofdoom.com/2012/03/14/robert-rodriguez-says-machete-kills-and-sin-city-2-will-film-this-year/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/geenstijl.nl.txt b/inc/3rdparty/site_config/standard/geenstijl.nl.txt new file mode 100644 index 00000000..f6dccf48 --- /dev/null +++ b/inc/3rdparty/site_config/standard/geenstijl.nl.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id = 'article'] | ||
2 | strip: //div[@id = 'klasbox'] | ||
3 | test_url: http://www.geenstijl.nl/mt/archieven/2010/10/vrouw_lange_frans_wou_baas_b_d.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/getnews.jp.txt b/inc/3rdparty/site_config/standard/getnews.jp.txt new file mode 100644 index 00000000..537b4c2e --- /dev/null +++ b/inc/3rdparty/site_config/standard/getnews.jp.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='post'] | ||
2 | strip: //ul[@id='bookmark_single'] | ||
3 | test_url: http://getnews.jp/archives/117312 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/giantbomb.com.txt b/inc/3rdparty/site_config/standard/giantbomb.com.txt new file mode 100644 index 00000000..8a54bc07 --- /dev/null +++ b/inc/3rdparty/site_config/standard/giantbomb.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | # 2011-11-19 - carlo@... - Initial setup. | ||
2 | |||
3 | strip_id_or_class: user-review-detail | ||
4 | strip: //h1 | ||
5 | |||
6 | body: //div[@class="wiki-content"] | //div[@class="section-bd"] | //div[@class="news-story"] | ||
7 | |||
8 | author: //span[@class="reviewer"] | //p[@class="byline"]/a/text() | ||
9 | date: //span[@class="dtreviewed"] | ||
10 | |||
11 | test_url: http://www.giantbomb.com/the-elder-scrolls-v-skyrim/61-33394/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/giga.de.txt b/inc/3rdparty/site_config/standard/giga.de.txt new file mode 100644 index 00000000..f60199ad --- /dev/null +++ b/inc/3rdparty/site_config/standard/giga.de.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | tidy:no | ||
2 | title://h2[@class="title"] | ||
3 | # author:"Ben Miller" | ||
4 | date://div[@id="stats"]/span | ||
5 | strip_id_or_class:stats | ||
6 | strip_id_or_class:breadcrumbs | ||
7 | strip_id_or_class:gn-why-content | ||
8 | strip_id_or_class:single-social | ||
9 | strip_id_or_class:sidebar-ads | ||
10 | strip_id_or_class:sidebar-top | ||
11 | strip_id_or_class:footer | ||
12 | strip_id_or_class:post_meta | ||
13 | # strip_id_or_class: | ||
14 | # strip_id_or_class: | ||
15 | # strip_id_or_class: | ||
16 | # strip_id_or_class: | ||
17 | # strip_id_or_class: | ||
18 | # strip_id_or_class: | ||
19 | |||
20 | test_url: http://www.giga.de/benm/2011/10/17/probleme-mit-ios-5-wenn-die-daten-weg-sind/#more-58033 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gigaom.com.txt b/inc/3rdparty/site_config/standard/gigaom.com.txt new file mode 100644 index 00000000..348bdf23 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gigaom.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | date: //meta[@name='DC.date.issued']/@content | ||
2 | date: //span[@class='post-meta the-date'] | ||
3 | |||
4 | title: //meta[@property='og:title']/@content | ||
5 | |||
6 | author: //meta[@name='DC.creator']/@content | ||
7 | |||
8 | body: //div[contains(@class, 'post-sub-head') or starts-with(@id, 'post-content-')] | ||
9 | |||
10 | find_string: id="content" | ||
11 | replace_string: id="content-ignore" | ||
12 | |||
13 | strip_id_or_class: sharedaddy | ||
14 | |||
15 | prune: no | ||
16 | |||
17 | test_url: http://gigaom.com/2011/10/24/groupon-google-lawsuit/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gihyo.jp.txt b/inc/3rdparty/site_config/standard/gihyo.jp.txt new file mode 100644 index 00000000..478b23a3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gihyo.jp.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | single_page_link: //p[@id='skip']//a[contains(@href, 'skip')] | ||
2 | |||
3 | test_url: http://gihyo.jp/dev/serial/01/machine-learning/0010 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gist.github.com.txt b/inc/3rdparty/site_config/standard/gist.github.com.txt new file mode 100644 index 00000000..53095b34 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gist.github.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@class="highlight"]/pre | ||
2 | |||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: https://gist.github.com/1258908 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt new file mode 100644 index 00000000..144ce045 --- /dev/null +++ b/inc/3rdparty/site_config/standard/givemesomethingtoread.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | single_page_link: //div[@id="content"]//h2/a | ||
2 | |||
3 | test_url: http://givemesomethingtoread.com/post/6285838917/the-baddest-lawyer-in-the-history-of-jersey \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt new file mode 100644 index 00000000..285e76c0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gizmodo.co.uk.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@id="leadimage" or @class="postcontent"] | ||
2 | author: //div[@class="contentauthor"] | ||
3 | date: //div[@class="timestamp"] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.gizmodo.co.uk/2013/02/bbc-forcing-poor-old-sir-david-attenborough-to-go-on-twitter/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmodo.com.txt b/inc/3rdparty/site_config/standard/gizmodo.com.txt new file mode 100644 index 00000000..c9536255 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gizmodo.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@class="post-body" or contains(@class, 'illustration top')] | ||
2 | author: (//cite//span[@class="plus-icon"])[1] | ||
3 | date: //span[@class="date"] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmologia.com.txt b/inc/3rdparty/site_config/standard/gizmologia.com.txt new file mode 100644 index 00000000..d2c7c9f9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gizmologia.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://gizmologia.com/2011/09/amd-trinity-el-sucesor-de-llano-en-una-demostracion-muy-interesante \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gizmovil.com.txt b/inc/3rdparty/site_config/standard/gizmovil.com.txt new file mode 100644 index 00000000..5fc204b8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gizmovil.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://gizmovil.com/2011/09/hipertextual-labs-receptor-bluetooth-nokia-bh-214 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/global.txt b/inc/3rdparty/site_config/standard/global.txt new file mode 100644 index 00000000..135ed500 --- /dev/null +++ b/inc/3rdparty/site_config/standard/global.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | # Look for Open Graph data - http://ogp.me | ||
2 | title: //meta[@property="og:title"]/@content | ||
3 | date: //meta[@property="article:published_time"]/@content | ||
4 | # article:author is someties URL, e.g. on guardian.co.uk \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/globalissues.org.txt b/inc/3rdparty/site_config/standard/globalissues.org.txt new file mode 100644 index 00000000..95d4becf --- /dev/null +++ b/inc/3rdparty/site_config/standard/globalissues.org.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | strip: //p[@class='top'] | ||
4 | strip: //h2[.='Where next?'] | ||
5 | strip_id_or_class: where-next | ||
6 | strip_id_or_class: social-bookmarks | ||
7 | strip_id_or_class: link-to-here | ||
8 | strip_id_or_class: options-heading | ||
9 | strip_id_or_class: page-options-content | ||
10 | strip_id_or_class: page-info-bottom | ||
11 | |||
12 | tidy: no | ||
13 | prune: no | ||
14 | |||
15 | test_url: http://www.globalissues.org/article/39/a-primer-on-neoliberalism \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/goal.com.txt b/inc/3rdparty/site_config/standard/goal.com.txt new file mode 100644 index 00000000..075c4d2b --- /dev/null +++ b/inc/3rdparty/site_config/standard/goal.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | title: //div[@id='article_headline']//h1 | ||
2 | date: //div[contains(@class, 'articleDate')]//h4 | ||
3 | body: //div[@id='article_headline']/h2 | //div[@id='large_article_image' or @id='article_content'] | ||
4 | |||
5 | strip_id_or_class: relatedLinksBox | ||
6 | strip_id_or_class: betting-widget | ||
7 | strip_image_src: install_flash.gif | ||
8 | |||
9 | strip: //table[contains(@style, 'float: right; width: 285px;')] | ||
10 | strip: //div[@class='caption'] | ||
11 | |||
12 | tidy: no | ||
13 | prune: no | ||
14 | |||
15 | test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139032/video-profile-back-to-his-very-best-for-bayern-frances-flair-and- | ||
16 | test_url: http://www.goal.com/en-gb/news/3284/euro-2012/2012/05/31/3139869/lampard-injury-a-bitter-blow-for-england-and-sorry-way-to# \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/golem.de.txt b/inc/3rdparty/site_config/standard/golem.de.txt new file mode 100644 index 00000000..6c5d1c4f --- /dev/null +++ b/inc/3rdparty/site_config/standard/golem.de.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | # Jens Kohl, jens.kohl@... | ||
2 | # - Added publication date | ||
3 | # - Striped pagination block | ||
4 | # - Added single page link | ||
5 | # - Added xpath-querys for the printer friendly version | ||
6 | |||
7 | title: //h1 | ||
8 | body: //div[@class='formatted'] | ||
9 | prune: no | ||
10 | |||
11 | date: substring-after(//li[2][@class="text1"], 'Datum:') | ||
12 | strip: //ol[@class="list-chapters"] | ||
13 | strip_comments: yes | ||
14 | |||
15 | # next: commands for printer friendly pages | ||
16 | single_page_link: //a[contains(@href, 'print.php?a=')]/@href | ||
17 | title: //body/h3 | ||
18 | strip_image_src: staticrl/images/logo.jpg | ||
19 | strip_image_src: http://cpx.golem.de/cpx.php?class=7 | ||
20 | strip: //body/h3 | ||
21 | strip: //body/b[1] | ||
22 | strip: //body/b[2] | ||
23 | strip: //body/b[3] | ||
24 | strip: //div[1] | ||
25 | test_url: http://www.golem.de/1112/88696.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/good.is.txt b/inc/3rdparty/site_config/standard/good.is.txt new file mode 100644 index 00000000..5cf67011 --- /dev/null +++ b/inc/3rdparty/site_config/standard/good.is.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //div[@class="title"]/div/h1 | ||
2 | body: //div[@class="body"] | ||
3 | date: //li[@class="date-time"] | ||
4 | test_url: http://www.good.is/post/why-amazon-is-the-next-top-tech-company/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gossip-tv.gr.txt b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt new file mode 100644 index 00000000..c2fe4e40 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gossip-tv.gr.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | date: //meta[@name='og:article:published_time']/@value | ||
2 | |||
3 | body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] | ||
4 | |||
5 | strip_id_or_class: itemImageGallery | ||
6 | |||
7 | # remove extras at end of post content | ||
8 | find_string: <div style="margin:5px 0 10px;"> | ||
9 | replace_string: </div></body></html><!-- | ||
10 | |||
11 | prune: no | ||
12 | |||
13 | test_url: http://www.gossip-tv.gr/story/158902/aggelike-daliane-semera-duskoleuontai-oloi-sta-epaggelmatika-tous | ||
14 | test_url: http://www.gossip-tv.gr/lifestyle/Taste/story/230266/lahtaristo-kai-ygieino-tost-sokolatas \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gothamist.com.txt b/inc/3rdparty/site_config/standard/gothamist.com.txt new file mode 100644 index 00000000..5179fc12 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gothamist.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@class='entry-header'] | ||
2 | author: //span[@class='vcard author'] | ||
3 | date: //abbr[@class='published'] | ||
4 | #move_into(//div[@class='entry-body']): //img[@id='photo_1'] | ||
5 | body: //div[@class='entry-body'] | ||
6 | strip: //div[@class='galleryEaseThumbs'] | ||
7 | test_url: http://gothamist.com/2012/03/15/fancy_cocktail_lounge_the_randolph.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gotomanager.com.txt b/inc/3rdparty/site_config/standard/gotomanager.com.txt new file mode 100644 index 00000000..7fb0ee03 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gotomanager.com.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | title: //span[@id="showTitle"] | ||
2 | author: //span[@id="showAuthor"] | ||
3 | date: //span[@id="showRefDate"] | ||
4 | |||
5 | strip: //span[@class="black_bold"] | ||
6 | strip: //div[@id="sectionName"] | ||
7 | strip: //div[@id="storyHeader"] | ||
8 | |||
9 | body: //div[@id="newsBodyText"] | ||
10 | |||
11 | strip_image_src: "http://www.gotomanager.com/img/mgrm/space.gif" | ||
12 | strip_image_src: "http://www.gotomanager.com/images/separator.gif" | ||
13 | strip_image_src: "http://www.gotomanager.com/images/spaces.gif" | ||
14 | |||
15 | convert_double_br_tags: yes | ||
16 | tidy: yes | ||
17 | |||
18 | strip: //div[@id="smallLeadImage"] | ||
19 | strip: //div[@id="truehitsSurvey"] | ||
20 | strip: //table[@id="relatedInfoTable"] | ||
21 | test_url: http://www.gotomanager.com/news/details.aspx?id=86759 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gq.com.txt b/inc/3rdparty/site_config/standard/gq.com.txt new file mode 100644 index 00000000..233c4a7f --- /dev/null +++ b/inc/3rdparty/site_config/standard/gq.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | next_page_link: //div[@class='pagination']//span[@class='paginationNext']/a | ||
2 | strip_id_or_class: utility | ||
3 | strip_id_or_class: keywords | ||
4 | strip_id_or_class: pagination | ||
5 | strip_id_or_class: position2_content | ||
6 | body: //div[@class='article'] | ||
7 | title: //h1[@class='content-headline'] | ||
8 | author: //span[@class='contributor']//a | ||
9 | test_url: http://www.gq.com/news-politics/newsmakers/201203/terry-thompson-ohio-zoo-massacre-chris-heath-gq-february-2012 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/grantland.com.txt b/inc/3rdparty/site_config/standard/grantland.com.txt new file mode 100644 index 00000000..3269e086 --- /dev/null +++ b/inc/3rdparty/site_config/standard/grantland.com.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | # this is fragile with footnotes -- leave it for now | ||
2 | |||
3 | #tidy: no | ||
4 | #prune: no | ||
5 | #move_into(//article): //aside[@id='footnotes'] | ||
6 | author: //cite/a | ||
7 | date: //time | ||
8 | |||
9 | strip: //a[text()='Grantland'] | ||
10 | strip_id_or_class: ad-wrapper | ||
11 | strip_id_or_class: fb-connect-link | ||
12 | strip_id_or_class: fb-status | ||
13 | strip: //li[@class='print'] | ||
14 | strip: //cite | ||
15 | strip: //a[contains(text(), '[+]')] | ||
16 | strip: //a[@id='jump-nav-link'] | ||
17 | strip: //h1[text()='Share This'] | ||
18 | strip: //h1[text()='Top Stories'] | ||
19 | strip: //div[@id="update-text-size"] | ||
20 | test_url: http://www.grantland.com/story/_/id/8421241/examining-new-albums-rock-veterans-no-doubt-green-day \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt new file mode 100644 index 00000000..a5258030 --- /dev/null +++ b/inc/3rdparty/site_config/standard/greatergreaterwashington.org.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //div[@class="blogpost"]/h2 | ||
2 | author: //div[@class="blogpost"]/p[@class="byline"]/a | ||
3 | date: //div[@class="blogpost"]/p[@class="byline"]/span[@class="time_posted"] | ||
4 | body: //div[@class="blogpost"] | ||
5 | strip_id_or_class: flag | ||
6 | strip_id_or_class: byline | ||
7 | strip_id_or_class: post_footer | ||
8 | strip_id_or_class: related_posts | ||
9 | strip_id_or_class: post_author_bios | ||
10 | strip: //h2 | ||
11 | test_url: http://greatergreaterwashington.org/post/12457/ask-ggw-what-will-happen-to-the-1000-series-railcars/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/groups.drupal.org.txt b/inc/3rdparty/site_config/standard/groups.drupal.org.txt new file mode 100644 index 00000000..7e15a5c1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/groups.drupal.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title://h1 | ||
2 | author://span[@class="submitted"]/a | ||
3 | date:substring-after(//span[@class="submitted"],'on ') | ||
4 | body://div[@class="content"] | ||
5 | test_url: http://groups.drupal.org/node/36816 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/guardian.co.uk.txt b/inc/3rdparty/site_config/standard/guardian.co.uk.txt new file mode 100644 index 00000000..71d84306 --- /dev/null +++ b/inc/3rdparty/site_config/standard/guardian.co.uk.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@id='main-article-info']//h1 | ||
2 | body: //div[@id='article-wrapper'] | ||
3 | date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate] | ||
4 | author: //li[@class='byline'] | ||
5 | prune: no | ||
6 | tidy: no | ||
7 | test_url: http://www.guardian.co.uk/business/2011/oct/06/quantitative-easing-75bn-bank-of-england \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/gulfnews.com.txt b/inc/3rdparty/site_config/standard/gulfnews.com.txt new file mode 100644 index 00000000..e69044b3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/gulfnews.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@class='wrapper_half']//ul[@class='details'] | //div[@class='wrapper_half']//p[@class='synopsis'] | //div[@class='wrapper_half']//div[@class='image'] | //div[@class='wrapper_half']//div[@class='article'] | ||
2 | strip: //div[@class='wrapper_half']//ul[@class='details']/li[position()>1] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | test_url: http://gulfnews.com/news/gulf/uae/government/abu-dhabi-centre-offers-useful-information-1.811084 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/guokr.com.txt b/inc/3rdparty/site_config/standard/guokr.com.txt new file mode 100644 index 00000000..00255eb8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/guokr.com.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | # To administrator: | ||
2 | # Please change the hostname to "www.guokr.com/article/*" | ||
3 | # Not working for "www.guokr.com/post/" pages configured by carlosliu913@gmail.com | ||
4 | |||
5 | # This filter is tested on: | ||
6 | # http://www.guokr.com/article/274325/ | ||
7 | # http://www.guokr.com/article/275013/ | ||
8 | |||
9 | title://h1 | ||
10 | author://div[contains(@class, 'content-th-info')]/a | ||
11 | date://div[contains(@class, 'content-th-info')]/span | ||
12 | body://div[contains(@class, 'Content')] | ||
13 | |||
14 | strip://div[contains(@class, 'bottom-i')] | ||
15 | strip://div[contains(@class, 'copyright')] | ||
16 | strip://div[contains(@class, 'fr')] | ||
17 | strip://div[contains(@class, 'content-th-info')] | ||
18 | strip://h1[contains(@id, 'articleTitle')] | ||
19 | strip://div[contains(@class, 'side')] | ||
20 | strip://div[contains(@class, 'top-wp')] | ||
21 | test_url: http://www.guokr.com/article/275013/ | ||
22 | test_url: http://www.guokr.com/article/338387/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/haberler.com.txt b/inc/3rdparty/site_config/standard/haberler.com.txt new file mode 100644 index 00000000..bc1ce689 --- /dev/null +++ b/inc/3rdparty/site_config/standard/haberler.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@id="habermetni"]/h1[@id="haber_baslik"] | ||
2 | body: //div[@id="habermetni"]/p | ||
3 | strip: //img[@class='newsDetailLeft'] | ||
4 | strip_image_src: /haber-resimleri/ | ||
5 | test_url: http://www.haberler.com/emniyete-atacakti-elinde-patladi-3198733-haberi/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/halo.bungie.org.txt b/inc/3rdparty/site_config/standard/halo.bungie.org.txt new file mode 100644 index 00000000..7989d09f --- /dev/null +++ b/inc/3rdparty/site_config/standard/halo.bungie.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title:substring-before(id("maincontent")/table, 'Posted') | ||
2 | body:id("maincontent")/p | ||
3 | # eventually convert linebreaks better | ||
4 | |||
5 | test_url: http://halo.bungie.org/fanfic/?story=Delahunt0312112316071.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt new file mode 100644 index 00000000..747f90a1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hammers.theoffside.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | # Remove right column | ||
2 | strip: //*[(@class = 'right_col')] | ||
3 | |||
4 | # Remove comments etc. | ||
5 | strip: //*[(@class = 'category')] | ||
6 | strip: /html/body/div[1][@class='absolute_content_high']/div[1][@class='wrapper']/div[1][@class='main_col']/div[@class='main_content']/h3 | ||
7 | test_url: http://hammers.theoffside.com/carling-cup/a-funny-thing-happened-on-the-way-to-4-nil.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hanselman.com.txt b/inc/3rdparty/site_config/standard/hanselman.com.txt new file mode 100644 index 00000000..d3ffeab1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hanselman.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | date: //span[@class="item-date"] | ||
2 | body: //div[@class="item-content"] | ||
3 | strip_comments: no | ||
4 | test_url: http://www.hanselman.com/blog/BrainBytesBackBunsTheProgrammersPriorities.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hardware.fr.txt b/inc/3rdparty/site_config/standard/hardware.fr.txt new file mode 100644 index 00000000..318885c8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hardware.fr.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1 | ||
2 | author: //a[@class='a_aut'] | ||
3 | body: //div[@class='content_dossier'] | ||
4 | strip: //div[@id='pagination'] | ||
5 | next_page_link: //div[@class='sommaire_colonne']//span[@class='page_actuelle']/following::span[@class='autres_page']//a/@href | ||
6 | test_url: http://www.hardware.fr/articles/850-1/pci-express-3-0-impact-performances.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hbr.org.txt b/inc/3rdparty/site_config/standard/hbr.org.txt new file mode 100644 index 00000000..fd6145e7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hbr.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@id='article-title'] | ||
2 | author: //div[@id='articleAuthors'] | ||
3 | body: //div[@id='article'] | ||
4 | strip: //div[@class='module wide'] | ||
5 | next_page_link: //a[@title='Next Page'] | ||
6 | test_url: http://hbr.org/2012/04/the-real-leadership-lessons-of-steve-jobs/ar/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/healthland.time.com.txt b/inc/3rdparty/site_config/standard/healthland.time.com.txt new file mode 100644 index 00000000..204d8da0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/healthland.time.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | date: //span[@class = 'date'] | ||
2 | body: //div[@class = 'entry-content'] | ||
3 | strip://div[@class='more-ways'] | ||
4 | strip://div[@id = 'stayConnected'] | ||
5 | strip://p[child::a[@rel = 'bookmark']] | ||
6 | strip://p[starts-with(string(.),'(MORE:')] | ||
7 | strip://p[starts-with(string(.),'(PHOTOS:')] | ||
8 | move_into(//p[../@class = 'entry-content'][position() = last()])://div[@id = 'featbox'] | ||
9 | |||
10 | test_url: http://healthland.time.com/2011/07/24/amy-winehouse-and-the-pain-of-addiction/?preview=true&preview_id=39210&preview_nonce=0777d4e408 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/heise-online.mobi.txt b/inc/3rdparty/site_config/standard/heise-online.mobi.txt new file mode 100644 index 00000000..1da82ac7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/heise-online.mobi.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id='content']/div | ||
2 | date: //p[@class='author_date']/span[@class='date'] | ||
3 | test_url: http://heise-online.mobi/newsticker/meldung/Amazons-Appstore-in-der-Kritik-Ein-Desaster-fuer-Kunden-und-Entwickler-1273936.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/heise.de.txt b/inc/3rdparty/site_config/standard/heise.de.txt new file mode 100644 index 00000000..5f19d3f8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/heise.de.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | single_page_link: //p[@class='news_option']/a | ||
2 | |||
3 | date: //p[@class='news_datum'] | ||
4 | title: //h1 | ||
5 | body: //div[@class='meldung_wrapper'] | ||
6 | |||
7 | test_url: http://www.heise.de/newsticker/meldung/Europa-soll-Grundrechteschutz-im-Netz-staerken-1392664.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hespress.com.txt b/inc/3rdparty/site_config/standard/hespress.com.txt new file mode 100644 index 00000000..d866f629 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hespress.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@id='article_holder']//div[@class='image'] | //div[@id='article_body'] | ||
2 | |||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://hespress.com/videos/73684.html | ||
7 | test_url: http://hespress.com/permalink/73678.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/highscalability.com.txt b/inc/3rdparty/site_config/standard/highscalability.com.txt new file mode 100644 index 00000000..fd50b6ad --- /dev/null +++ b/inc/3rdparty/site_config/standard/highscalability.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='journal-entry-text'] | ||
2 | |||
3 | test_url: http://highscalability.com/blog/2011/3/14/6-lessons-from-dropbox-one-million-files-saved-every-15-minu.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hiperpop.com.txt b/inc/3rdparty/site_config/standard/hiperpop.com.txt new file mode 100644 index 00000000..b5eb062e --- /dev/null +++ b/inc/3rdparty/site_config/standard/hiperpop.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://hiperpop.com/2011/09/marc-anthony-celebra-su-cumpleanos-con-jennifer-lopez \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt new file mode 100644 index 00000000..c57c1aa9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hiphopleeft.nl.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class = 'pd'] | ||
2 | strip: //div[@id = 'overzicht-albumrecensies'] | ||
3 | strip: //div[@id = 'jc'] | ||
4 | test_url: http://hiphopleeft.nl/index.php?option=com_content&view=article&id=2767:mark-ronson-record-collection&catid=66:m&Itemid=142 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/historytoday.com.txt b/inc/3rdparty/site_config/standard/historytoday.com.txt new file mode 100644 index 00000000..dc687f3f --- /dev/null +++ b/inc/3rdparty/site_config/standard/historytoday.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body://div[@id = 'content'] | ||
2 | author://span[@class = 'authors'] | ||
3 | author://span[@class = 'ht-vtag'][1] | ||
4 | date:substring-before(//meta[@name = 'dc.date']/@content,'T') | ||
5 | strip://div[contains(@class, 'region-ubercontent')] | ||
6 | strip://h1 | ||
7 | strip://div[@id = 'ht-author'] | ||
8 | strip://ul[@class = 'links inline'] | ||
9 | strip://div[@id = 'ht-tools'] | ||
10 | test_url: http://www.historytoday.com/carol-dyhouse/skin-deep-fall-fur \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hmercer.com.txt b/inc/3rdparty/site_config/standard/hmercer.com.txt new file mode 100644 index 00000000..eeee1594 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hmercer.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //*[@class='ptitle'] | ||
2 | date: //span[@class='date'] | ||
3 | body: //div[@class='body'] | ||
4 | prune: no | ||
5 | test_url: http://hmercer.com/2011/07/why-i-switched-to-jekyll/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hometheaterreview.com.txt b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt new file mode 100644 index 00000000..d43e6448 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hometheaterreview.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@id='entry-body'] | ||
2 | strip_id_or_class: paginate | ||
3 | strip: //p[contains(., 'Additional Resources')] | ||
4 | test_url: http://hometheaterreview.com/dreamvision-starlight-3-three-chip-d-ila-projector-reviewed/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hosted.ap.org.txt b/inc/3rdparty/site_config/standard/hosted.ap.org.txt new file mode 100644 index 00000000..e19dd526 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hosted.ap.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //table[@class='ap-smallphoto-table'] | //div[@class='body']//*[@class='entry-content'] | ||
2 | tidy: no | ||
3 | strip_image_src: analytics.apnewsregistry | ||
4 | |||
5 | test_url: http://hosted.ap.org/dynamic/stories/U/US_SPENDING_SHOWDOWN?SITE=FLPET&SECTION=HOME&TEMPLATE=DEFAULT&CTIME=2011-04-06-07-46-50 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hs.fi.txt b/inc/3rdparty/site_config/standard/hs.fi.txt new file mode 100644 index 00000000..67125fb5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hs.fi.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | prune: yes | ||
2 | tidy: yes | ||
3 | test_url: http://www.hs.fi/kotimaa/Teollisuushallin%20palo%20levitt%C3%A4%C3%A4%20vaarallista%20savua%20Tuusulassa/a1305571582405 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ht.ly.txt b/inc/3rdparty/site_config/standard/ht.ly.txt new file mode 100644 index 00000000..a8412d2a --- /dev/null +++ b/inc/3rdparty/site_config/standard/ht.ly.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | single_page_link: //iframe[@id='hootFrame']/@src | ||
2 | |||
3 | test_url: http://ht.ly/bOiZV \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/huffingtonpost.com.txt b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt new file mode 100644 index 00000000..d40513b2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/huffingtonpost.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[img[starts-with(@id, 'img_caption')]] | //div[@class="big_photo"] | //div[contains(@class, 'entry_body_text')] | ||
3 | date: //meta[@name="publish_date"]/@content | ||
4 | author: //a[@rel="author"] | ||
5 | author: //meta[@name="author"]/@content | ||
6 | prune: no | ||
7 | tidy: no | ||
8 | strip: //footer | ||
9 | strip_id_or_class: ps-slideshow | ||
10 | strip_id_or_class: fs-slideshow | ||
11 | strip: //p[contains(., 'Related on HuffPost:')] | ||
12 | # end early | ||
13 | replace_string(<div class="sbm-main): </body></html><div class="not-interested | ||
14 | |||
15 | test_url: http://www.huffingtonpost.com/mitch-moxley/tracking-beijings-boom-th_b_1209828.html | ||
16 | test_url: http://www.huffingtonpost.com/2012/09/11/president-obama-iphone-throwdown_n_1873826.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/humantransit.org.txt b/inc/3rdparty/site_config/standard/humantransit.org.txt new file mode 100644 index 00000000..ec7d3c06 --- /dev/null +++ b/inc/3rdparty/site_config/standard/humantransit.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h3[@class="entry-header"] | ||
2 | date: //h2[@class="date-header"] | ||
3 | body: //div[contains(@class, 'entry')] | ||
4 | |||
5 | test_url: http://www.humantransit.org/2012/06/can-network-primers-reduce-grief-about-network-design.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt new file mode 100644 index 00000000..ccf09dcc --- /dev/null +++ b/inc/3rdparty/site_config/standard/hurriyet.com.tr.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@class='HaberDetayTitleHold Title']/h1 | ||
2 | body: //div[@id='YazarDetayText'] | ||
3 | author: //div[@class='HaberDetayTitleHold Title']/h1 | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.hurriyet.com.tr/ekonomi/19490260.asp | ||
7 | test_url: http://www.hurriyet.com.tr/yazarlar/22078439.asp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hvg.hu.txt b/inc/3rdparty/site_config/standard/hvg.hu.txt new file mode 100644 index 00000000..06fa98d8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hvg.hu.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id='pg-content']//h1 | ||
2 | body: //div[@id='articleBody0'] | ||
3 | replace_string(</table>): </table><br /><br /> | ||
4 | |||
5 | single_page_link: //div[@class="up-header"]/a | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://hvg.hu/w/20111125_sparta \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/hypebeast.com.txt b/inc/3rdparty/site_config/standard/hypebeast.com.txt new file mode 100644 index 00000000..49b46da5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/hypebeast.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@id='content']//div[contains(@class, 'wp-image-') or contains(@class, 'entry')][1] | ||
2 | author: //span[@class='author']/a | ||
3 | |||
4 | strip_id_or_class: disqus | ||
5 | strip_id_or_class: paginator | ||
6 | strip_id_or_class: photo-number | ||
7 | |||
8 | prune: no | ||
9 | |||
10 | test_url: http://hypebeast.com/2012/11/stussy-2012-fall-winter-november-releases/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/idlewords.com.txt b/inc/3rdparty/site_config/standard/idlewords.com.txt new file mode 100644 index 00000000..e1badef7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/idlewords.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //a[@class='post_title'] | ||
2 | body: //div[@class='entrybox'] | ||
3 | strip_id_or_class: post_title | ||
4 | date: //div[@class='entrybox']/b[1] | ||
5 | strip: //div[@class='entrybox']/b[1] | ||
6 | author: string('Maciej Cegłowski') | ||
7 | test_url: http://idlewords.com/2011/08/why_arabic_is_terrific.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/igeneration.fr.txt b/inc/3rdparty/site_config/standard/igeneration.fr.txt new file mode 100644 index 00000000..d7ec2da1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/igeneration.fr.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | author: substring-after(substring-after(//span[@class='submitted'],'- '),'- ') | ||
2 | date: substring-before(//span[@class='submitted'], concat('- ',substring-after(substring-after(//span[@class='submitted'],'- '),'- '))) | ||
3 | body: //div[@class='content clear-block zoneApple'] | ||
4 | |||
5 | test_url: http://www.igeneration.fr/iphone/l-iphone-et-l-ipad-chouchous-des-tpe-et-pme-55112 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt new file mode 100644 index 00000000..f74178a9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ignoredbydinosaurs.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title://h1[@class='page-title'] | ||
2 | body://*[@id='content']//div[contains(@class,'node-content')] | ||
3 | |||
4 | author://*[@id='content']//div[contains(@class,'node-submitted')]/a | ||
5 | |||
6 | date:substring-after(//div[contains(@class,'node-submitted')],' on ') | ||
7 | test_url: http://ignoredbydinosaurs.com/2011/09/great-lie-lorem-ipsum \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ilounge.com.txt b/inc/3rdparty/site_config/standard/ilounge.com.txt new file mode 100644 index 00000000..ca1e54a8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ilounge.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | # Get proper Title, Author and Date info | ||
2 | title: substring-before(//title, '|') | ||
3 | author: substring-after(//h4/a[@href='http://www.ilounge.com/index.php/ilounge/aboutus/'], 'By') | ||
4 | date: //span[@class='instapaper_date'] | ||
5 | |||
6 | # For Reviews & First Looks, get the intro paragraph and put it in front of the main body. | ||
7 | move_into(//div[@id='instapaper_para1']): //div[@id='instapaper_body'] | ||
8 | body: //div[@id='instapaper_para1'] | ||
9 | strip: //div[@class='reviewinfo'] | ||
10 | |||
11 | # We don't use footnotes, so why bother checking for them? | ||
12 | footnotes: no | ||
13 | test_url: http://www.ilounge.com/index.php/reviews/entry/luxa2-alum-x-for-iphone-4-4s/?utm_source=twitterfeed&utm_medium=twitter \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ilyabirman.ru.txt b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt new file mode 100644 index 00000000..da6a60f6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ilyabirman.ru.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@class='published visible e2-smart-title']//span | ||
2 | author: //span[@id='e2-blog-title'] | ||
3 | date: //p[@class='super-h'] | ||
4 | body: //div[@class='text published visible'] | ||
5 | test_url: http://ilyabirman.ru/meanwhile/2011/11/15/2/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/inc.com.txt b/inc/3rdparty/site_config/standard/inc.com.txt new file mode 100644 index 00000000..0589aaae --- /dev/null +++ b/inc/3rdparty/site_config/standard/inc.com.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | author: substring-after(substring-before(//div[@id='byline'],'|'),'By') | ||
2 | author: //div[@class='byline']/a | ||
3 | date: //span[@class='pubdate'] | ||
4 | # print friendly page | ||
5 | body: //div[@id='text'] | ||
6 | # regular page | ||
7 | body: //div[@id= 'articlecontent'] | ||
8 | |||
9 | strip: //div[@id= 'articlecontent']/h1 | ||
10 | strip: //div[@id='articlecontent']/p[@class='deck'] | ||
11 | strip: //div[@id='articlecontent']/div[@class='byline'] | ||
12 | strip: //div[@id='articlespacer'] | ||
13 | strip: //div[@id='incsharebox'] | ||
14 | strip: //div[@id='articlesidebar'] | ||
15 | |||
16 | prune: no | ||
17 | |||
18 | single_page_link: //a[contains(@href, 'Printer_Friendly.html')] | ||
19 | strip: //a[contains(., 'Dig Deeper')] | ||
20 | test_url: http://www.inc.com/guides/2010/11/seven-tips-for-lobbying-politicians.html | ||
21 | test_url: http://www.inc.com/eric-schurenberg/startups-are-we-geting-irrationally-exuberant.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/independent.co.uk.txt b/inc/3rdparty/site_config/standard/independent.co.uk.txt new file mode 100644 index 00000000..47baf36b --- /dev/null +++ b/inc/3rdparty/site_config/standard/independent.co.uk.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | body: //div[contains(@class, 'articleContent')] | ||
3 | date: //meta[@property='article:published_time']/@content | ||
4 | author: //div[@id='main']//div[@class='byline']//span[@class='authorName'] | ||
5 | |||
6 | strip_id_or_class: RelatedArtTag | ||
7 | |||
8 | tidy: no | ||
9 | test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/index.php b/inc/3rdparty/site_config/standard/index.php index a3d5f739..a1b767fd 100644 --- a/inc/3rdparty/site_config/standard/index.php +++ b/inc/3rdparty/site_config/standard/index.php | |||
@@ -1,3 +1,3 @@ | |||
1 | <?php | 1 | <?php |
2 | // this is here to prevent directory listing over the web | 2 | // this is here to prevent directory listing over the web |
3 | ?> \ No newline at end of file | 3 | ?> \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/indiatimes.com.txt b/inc/3rdparty/site_config/standard/indiatimes.com.txt new file mode 100644 index 00000000..e7a35e84 --- /dev/null +++ b/inc/3rdparty/site_config/standard/indiatimes.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //figure[@class='mainVideo'] | ||
2 | strip: //figcaption | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.indiatimes.com/bollywood/kareena-insecure-about-saif-working-with-bipasha-23386.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/inessential.com.txt b/inc/3rdparty/site_config/standard/inessential.com.txt new file mode 100644 index 00000000..312cec4b --- /dev/null +++ b/inc/3rdparty/site_config/standard/inessential.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@class='weblogPost']/h3[1] | ||
2 | author: ("Brent Simmons") | ||
3 | date: //span[@class="weblogPostDisplayDate"] | ||
4 | body: //div[@class='weblogPostBody'] | ||
5 | test_url: http://inessential.com/2011/10/25/why_just_store_the_app_data_on_dropbo \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/info.abril.com.br.txt b/inc/3rdparty/site_config/standard/info.abril.com.br.txt new file mode 100644 index 00000000..64cf3c8e --- /dev/null +++ b/inc/3rdparty/site_config/standard/info.abril.com.br.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title://h1 | ||
2 | body://div[@id='texto_link'] | ||
3 | |||
4 | test_url: http://info.abril.com.br/noticias/internet/filme-do-youtube-vai-estrear-nos-cinemas-22042011-6.shl \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/infoq.com.txt b/inc/3rdparty/site_config/standard/infoq.com.txt new file mode 100644 index 00000000..3a4e402d --- /dev/null +++ b/inc/3rdparty/site_config/standard/infoq.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | body: //div[@id="intTranscript"] | ||
2 | body: //div[@class="box-content"] | ||
3 | title: //div[@class="box-content"]//h1[1] | ||
4 | author: //p[@class="info"]/strong | ||
5 | date: substring-before(substring-after(//p[@class="info"], "on"), "Length") | ||
6 | strip: //div[@class="box-content"]//h1[1] | ||
7 | strip: //div[@class="box-content"]//p[@class="info"] | ||
8 | strip_id_or_class: vendor-content-box | ||
9 | strip_id_or_class: tags2 | ||
10 | strip_id_or_class: instructions | ||
11 | strip_id_or_class: comments | ||
12 | strip_id_or_class: forum-list-tree | ||
13 | strip: //div[@class="addthis_toolbox addthis_default_style"] | ||
14 | test_url: http://www.infoq.com/interviews/oleg-zhurakousky-javaone2011-interview \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/informador.com.mx.txt b/inc/3rdparty/site_config/standard/informador.com.mx.txt new file mode 100644 index 00000000..eedec24f --- /dev/null +++ b/inc/3rdparty/site_config/standard/informador.com.mx.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@class='tituloInt'] | ||
2 | body: //div[@class='notaPortada'] | ||
3 | strip: //img[@id='imgHorizontalInt imgDetalleImg imagenNota'] | ||
4 | date: //span[@class='publi'] | ||
5 | author: //span[@class='autor'] | ||
6 | tidy: no | ||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.informador.com.mx/tecnologia/2011/337606/6/iran-desarrolla-antivirus-tras-afectaciones-por-duqu.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/information.dk.txt b/inc/3rdparty/site_config/standard/information.dk.txt new file mode 100644 index 00000000..6e3c3b1a --- /dev/null +++ b/inc/3rdparty/site_config/standard/information.dk.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | author: //*[@property='dc:creator'] | ||
3 | date: //*[@property='dc:date']/@content | ||
4 | body: //div[@id='page-content']//div[contains(@class, 'article-body')] | ||
5 | |||
6 | tidy: no | ||
7 | test_url: http://www.information.dk/282307 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/informationarchitects.net.txt b/inc/3rdparty/site_config/standard/informationarchitects.net.txt new file mode 100644 index 00000000..134306cd --- /dev/null +++ b/inc/3rdparty/site_config/standard/informationarchitects.net.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title://h1[@class="post_title"] | ||
2 | body://article[@class="post"] | ||
3 | date://h1[@class="section_separator"] | ||
4 | author://span[@class="post_author"] | ||
5 | strip://nav[@class="arrow_nav"] | ||
6 | strip://section[@id="contact"] | ||
7 | strip_id_or_class:post_title | ||
8 | strip_id_or_class:post_author | ||
9 | strip_id_or_class:section_separator | ||
10 | test_url: http://informationarchitects.net/blog/nzz-relaunch-a-quick-review/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt new file mode 100644 index 00000000..0879e9e6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/informationclearinghouse.info.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //head/title | ||
2 | body: //table[@id='table3']//div[@class='postContent'] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.informationclearinghouse.info/article28238.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/informit.com.txt b/inc/3rdparty/site_config/standard/informit.com.txt new file mode 100644 index 00000000..84c1fdcf --- /dev/null +++ b/inc/3rdparty/site_config/standard/informit.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@id='content']/h1 | ||
2 | body: //div[@id="content"] | ||
3 | strip: //img[contains(@src, 'informit_printer.png')] | ||
4 | single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')] | ||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.informit.com/articles/article.aspx?p=1729268 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/infoworld.com.txt b/inc/3rdparty/site_config/standard/infoworld.com.txt new file mode 100644 index 00000000..dd588ed8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/infoworld.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body: //div[@id='main_text'] | ||
2 | title: //div[@id='main_text']/h1 | ||
3 | strip: //div[@id='main_text']/h1 | ||
4 | strip: //div[@id='main_text']/h2 | ||
5 | strip_id_or_class: tools | ||
6 | strip_id_or_class: articleTools | ||
7 | strip_id_or_class: pagination | ||
8 | strip_id_or_class: byline | ||
9 | strip_id_or_class: tweet | ||
10 | date: //div[@class='date'] | ||
11 | strip: //div[@class='date'] | ||
12 | test_url: http://www.infoworld.com/d/the-industry-standard/it-jobs-the-rise-both-offshore-and-in-us-187689 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/infzm.com.txt b/inc/3rdparty/site_config/standard/infzm.com.txt new file mode 100644 index 00000000..012c873f --- /dev/null +++ b/inc/3rdparty/site_config/standard/infzm.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | # This filter is tested on: | ||
2 | # http://www.infzm.com/content/71068 | ||
3 | # http://www.infzm.com/content/41577 | ||
4 | |||
5 | author://em[contains(@class, 'toAuthor')] | ||
6 | date:substring(//em[contains(@class, 'pubTime')],1) | ||
7 | body://section[contains(@id, 'articleContent')] | ||
8 | title://h1[contains(@class ,'articleHeadline clearfix')] | ||
9 | test_url: http://www.infzm.com/content/41577 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/inhabitat.com.txt b/inc/3rdparty/site_config/standard/inhabitat.com.txt new file mode 100644 index 00000000..6629dafe --- /dev/null +++ b/inc/3rdparty/site_config/standard/inhabitat.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | # set body | ||
2 | body: //div[@class='post-listing'] | ||
3 | |||
4 | # remove clutter | ||
5 | strip: //a/big | ||
6 | strip: //a/em | ||
7 | strip: //p/em | ||
8 | test_url: http://inhabitat.com/2010/11/18/sliding-walls-transform-this-tokyo-house-into-an-office/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/instagr.am.txt b/inc/3rdparty/site_config/standard/instagr.am.txt new file mode 100644 index 00000000..ad9e8214 --- /dev/null +++ b/inc/3rdparty/site_config/standard/instagr.am.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class='caption'] | ||
2 | author: //p[@class='username'] | ||
3 | |||
4 | strip: //div[@class='contents']/h3 | ||
5 | strip: //div[@class='location'] | ||
6 | test_url: http://instagr.am/p/G-s_aciyDJ/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/interest.co.nz.txt b/inc/3rdparty/site_config/standard/interest.co.nz.txt new file mode 100644 index 00000000..28c3310a --- /dev/null +++ b/inc/3rdparty/site_config/standard/interest.co.nz.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@id='content'] | ||
2 | test_url: http://www.interest.co.nz/opinion/opinion-when-our-fear-corporate-way-and-our-love-small-business-man-dangerous-thing \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/iolanguage.com.txt b/inc/3rdparty/site_config/standard/iolanguage.com.txt new file mode 100644 index 00000000..231875ad --- /dev/null +++ b/inc/3rdparty/site_config/standard/iolanguage.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //center/table | ||
2 | test_url: http://www.iolanguage.com/scm/io/docs/IoGuide.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ipadclub.nl.txt b/inc/3rdparty/site_config/standard/ipadclub.nl.txt new file mode 100644 index 00000000..d196059e --- /dev/null +++ b/inc/3rdparty/site_config/standard/ipadclub.nl.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@id = 'post'] | ||
2 | strip: //div[@class = 'postinfo'] | ||
3 | strip: //div[@id = 'postmetanew'] | ||
4 | strip: //div[@class = 'paginator'] | ||
5 | strip: //div[@class = 'col-2'] | ||
6 | strip: //div[@id = 'adfactor-label'] | ||
7 | test_url: http://www.ipadclub.nl/15808/text-writer-ipad-tekstverwerker-met-functieknoppen/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ipadplanet.nl.txt b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt new file mode 100644 index 00000000..a2e49005 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ipadplanet.nl.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@id = 'post'] | ||
2 | strip: //div[@class = 'postinfo'] | ||
3 | strip: //div[@id = 'postmetanew'] | ||
4 | strip: //div[@class = 'paginator'] | ||
5 | strip: //div[@class = 'col-2'] | ||
6 | strip: //div[@id = 'adfactor-label'] | ||
7 | test_url: http://www.ipadplanet.nl/11723/steve-jobs-bevestigt-verdwijnen-fysieke-rotatieschakelaar-in-ios-4-2/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/iphoneclub.nl.txt b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt new file mode 100644 index 00000000..f8d4f6a6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/iphoneclub.nl.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@id = 'post'] | ||
2 | strip: //div[@class = 'postinfo'] | ||
3 | strip: //div[@id = 'postmetanew'] | ||
4 | strip: //div[@class = 'paginator'] | ||
5 | strip: //div[@class = 'col-2'] | ||
6 | strip: //div[@id = 'adfactor-label'] | ||
7 | test_url: http://www.iphoneclub.nl/105808/t-mobile-mobiel-internet-wordt-duurder-maar-blijft-onbeperkt/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/iphonehacks.com.txt b/inc/3rdparty/site_config/standard/iphonehacks.com.txt new file mode 100644 index 00000000..c97ff43c --- /dev/null +++ b/inc/3rdparty/site_config/standard/iphonehacks.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //meta[@name='og:title']/@content | ||
2 | body: //small[@class='postmetadata'] | //div[contains(@class, 'entry-content')] | ||
3 | |||
4 | strip: //span[@vanilla-identifier] | ||
5 | |||
6 | prune: no | ||
7 | tidy: no | ||
8 | |||
9 | test_url: http://www.iphonehacks.com/2012/07/app-review-process-behind-the-scenes.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/iplaysoft.com.txt b/inc/3rdparty/site_config/standard/iplaysoft.com.txt new file mode 100644 index 00000000..4a944768 --- /dev/null +++ b/inc/3rdparty/site_config/standard/iplaysoft.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@id='content']//div[@class='entry-banner' or @class='entry-content'] | ||
2 | test_url: http://www.iplaysoft.com/webbrowserpassview.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/isource.com.txt b/inc/3rdparty/site_config/standard/isource.com.txt new file mode 100644 index 00000000..a1c16a16 --- /dev/null +++ b/inc/3rdparty/site_config/standard/isource.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | # Remove social buttons | ||
2 | strip: //div[@id='temp_Content_Right'] | ||
3 | |||
4 | # Remove duplicate article title | ||
5 | strip: //*[(@class='storytitle')] | ||
6 | test_url: http://isource.com/2010/10/24/swearch-a-cool-iphone-web-app/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/itavisen.no.txt b/inc/3rdparty/site_config/standard/itavisen.no.txt new file mode 100644 index 00000000..8da78cb0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/itavisen.no.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | author: //p[@class = 'writer'] | ||
2 | |||
3 | date: //p[@class = 'published-time'] | ||
4 | |||
5 | body: //div[@class = 'text main'] | ||
6 | test_url: http://www.itavisen.no/899786/old-republic-blir-gratis \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/itstactical.com.txt b/inc/3rdparty/site_config/standard/itstactical.com.txt new file mode 100644 index 00000000..550875ec --- /dev/null +++ b/inc/3rdparty/site_config/standard/itstactical.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | body: //div[@class='format_text entry-content'] | ||
3 | author: //span[@class="author vcard"]/a | ||
4 | date: //abbr[@class="published"] | ||
5 | |||
6 | strip_id_or_class: related-posts | ||
7 | strip_id_or_class: membershipbox | ||
8 | strip_id_or_class: share_this_compact_bt | ||
9 | |||
10 | |||
11 | footnotes: no | ||
12 | test_url: http://www.itstactical.com/warcom/knives/exclusive-triple-aught-design-production-dauntless-knife-video-walkthrough/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/itworld.com.txt b/inc/3rdparty/site_config/standard/itworld.com.txt new file mode 100644 index 00000000..d4fa604e --- /dev/null +++ b/inc/3rdparty/site_config/standard/itworld.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //*[@id="article-title"] | ||
2 | author: //*[@id="article-info"]/strong | ||
3 | date: //*[@class="article-dateline"]/strong | ||
4 | body: //*[@id="article-content"] | ||
5 | test_url: http://www.itworld.com/open-source/140916/android-sued-microsoft-not-linux \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/izismile.com.txt b/inc/3rdparty/site_config/standard/izismile.com.txt new file mode 100644 index 00000000..af3f299a --- /dev/null +++ b/inc/3rdparty/site_config/standard/izismile.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[starts-with(@id, 'news-id-')] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://izismile.com/2011/06/13/uncanny_factoid_fashion_or_creepy_2_pics.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jalopnik.com.txt b/inc/3rdparty/site_config/standard/jalopnik.com.txt new file mode 100644 index 00000000..fc2eef8e --- /dev/null +++ b/inc/3rdparty/site_config/standard/jalopnik.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | author: //span[@class='plus-icon'] | ||
2 | test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jandan.net.txt b/inc/3rdparty/site_config/standard/jandan.net.txt new file mode 100644 index 00000000..f1dd3d17 --- /dev/null +++ b/inc/3rdparty/site_config/standard/jandan.net.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@id='content']//div[@class = 'post f'] | ||
2 | strip_id_or_class: comment-big | ||
3 | strip_id_or_class: avatar | ||
4 | strip: //div[@class='time_s'] | ||
5 | |||
6 | test_url: http://jandan.net/2011/04/03/iphone-5-sony.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt new file mode 100644 index 00000000..6e8af934 --- /dev/null +++ b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | title: //h1 | ||
2 | author: //p[contains(@class, 'author')]/a | ||
3 | date: //p[contains(@class, 'time')] | ||
4 | body: //div[@class='content']/div[contains(@class, 'text')] | ||
5 | |||
6 | # prevent "no text" errors on multi-page articles | ||
7 | tidy: no | ||
8 | |||
9 | # we use a custom next-link detector instead of the print view because | ||
10 | # it's pretty hard to strip out the unwanted parts in the print view | ||
11 | autodetect_next_page: no | ||
12 | next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more '] | ||
13 | |||
14 | strip: //h1 | ||
15 | |||
16 | strip_id_or_class: meta | ||
17 | strip_id_or_class: author | ||
18 | strip_id_or_class: paging | ||
19 | |||
20 | # prevent "Report an Error" from being recognized as footnote | ||
21 | footnotes: no | ||
22 | test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jjahnke.net.txt b/inc/3rdparty/site_config/standard/jjahnke.net.txt new file mode 100644 index 00000000..95c45ee7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/jjahnke.net.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class='entry'] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://www.jjahnke.net/rundbr87.html#2514 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt new file mode 100644 index 00000000..af8d7d17 --- /dev/null +++ b/inc/3rdparty/site_config/standard/jobbank.gc.ca.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@id='formatCont_en'] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.jobbank.gc.ca/detail-eng.aspx?Source=JobPosting&OrderNum=6397922 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/joelonsoftware.com.txt b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt new file mode 100644 index 00000000..75fbee5a --- /dev/null +++ b/inc/3rdparty/site_config/standard/joelonsoftware.com.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | # Works with old posts too, such as http://www.joelonsoftware.com/articles/fog0000000332.html | ||
2 | |||
3 | author: substring-after(//div[@class="author"], 'by ') | ||
4 | date: //div[@class="date"] | ||
5 | |||
6 | ## Clean stuff at top ## | ||
7 | |||
8 | strip: //h1[1] | ||
9 | strip: //h2[1] | ||
10 | strip: //div[@class="date"] | ||
11 | strip: //div[@class="author"] | ||
12 | |||
13 | ## Clean stuff at bottom ## | ||
14 | |||
15 | strip: //blockquote[@class="textmessage"] | ||
16 | strip: //div[@style="width:500px"]/p[last()] | ||
17 | strip: //div[@style="width:500px"]/p[last()-1] | ||
18 | strip: //div[@style="width:500px"]/h4[last()] | ||
19 | strip: //div[@style="width:500px"]/h4[last()-1] | ||
20 | strip: //div[@style="width:500px"]/div[last()] | ||
21 | test_url: http://www.joelonsoftware.com/items/2011/09/15.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/jouire.com.txt b/inc/3rdparty/site_config/standard/jouire.com.txt new file mode 100644 index 00000000..535a501e --- /dev/null +++ b/inc/3rdparty/site_config/standard/jouire.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | author: //h1 | ||
2 | date: //p[contains(@class,'date')] | ||
3 | test_url: http://jouire.com/2011/01/exquisite-whispers/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/joystiq.com.txt b/inc/3rdparty/site_config/standard/joystiq.com.txt new file mode 100644 index 00000000..7fbd467d --- /dev/null +++ b/inc/3rdparty/site_config/standard/joystiq.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | author: //a[@class="byline-author"] | ||
2 | title: //h1[@class="headline"] | ||
3 | strip: //div[@id="info-card"] | ||
4 | strip: //div[@id="breaking-news"] | ||
5 | strip: //div[@class="rmod list-post-mod"] | ||
6 | strip: //div[@id="footer"] | ||
7 | strip: //div[@id="GH_strip"] | ||
8 | test_url: http://www.joystiq.com/2012/06/20/magic-the-gathering-duels-of-the-planeswalkers-2013-review/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt new file mode 100644 index 00000000..be844e57 --- /dev/null +++ b/inc/3rdparty/site_config/standard/juedische-allgemeine.de.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | body: //div[@id='article_container'] | ||
2 | author: //h4//a[@class='author'] | ||
3 | title: //h1 | ||
4 | |||
5 | replace_string(lang="en"): lang="de" | ||
6 | replace_string(/>1</a>):/></a> | ||
7 | |||
8 | strip_id_or_class: share_toolbox | ||
9 | strip_id_or_class: article_header | ||
10 | strip_id_or_class: phototext | ||
11 | |||
12 | strip_image_src: icon_author.gif | ||
13 | |||
14 | strip: //img[@src=''] | ||
15 | strip: //h4[@id='author'] | ||
16 | |||
17 | prune: no | ||
18 | |||
19 | test_url: http://www.juedische-allgemeine.de/article/view/id/13366 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/juppy.org.txt b/inc/3rdparty/site_config/standard/juppy.org.txt new file mode 100644 index 00000000..e2d07f24 --- /dev/null +++ b/inc/3rdparty/site_config/standard/juppy.org.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | convert_double_br_tags: yes | ||
2 | |||
3 | title: //div[@id="storycredits"]/p/span[@class="title"] | ||
4 | author: //div[@id="storycredits"]/p/br[1]/following-sibling::text() | ||
5 | |||
6 | strip: //div[@id="storycredits"] | ||
7 | |||
8 | test_url: http://www.juppy.org/santa/stories.php?ForAuthorID=35&Year=2005 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kachestvo.ru.txt b/inc/3rdparty/site_config/standard/kachestvo.ru.txt new file mode 100644 index 00000000..34404e96 --- /dev/null +++ b/inc/3rdparty/site_config/standard/kachestvo.ru.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[contains(@class, 'inner_content')] | ||
2 | |||
3 | test_url: http://kachestvo.ru/promtovar/odezhda/denim.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kenrockwell.com.txt b/inc/3rdparty/site_config/standard/kenrockwell.com.txt new file mode 100644 index 00000000..e6d100ea --- /dev/null +++ b/inc/3rdparty/site_config/standard/kenrockwell.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | # Ads | ||
2 | strip: //table[@align="right"][@width="120"] | ||
3 | |||
4 | # Affiliate link paragraphs | ||
5 | strip: //a[.="Adorama"]/parent::p[contains(., "goodies")] | ||
6 | strip: //a[.="Adorama"]/parent::p[contains(., "This free website's biggest source of")] | ||
7 | test_url: http://www.kenrockwell.com/tech/composition.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kicker.de.txt b/inc/3rdparty/site_config/standard/kicker.de.txt new file mode 100644 index 00000000..7d5daa4b --- /dev/null +++ b/inc/3rdparty/site_config/standard/kicker.de.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | # set body | ||
2 | body: //div[@id='ovArtikel'] | ||
3 | |||
4 | # set title | ||
5 | title: //div[@id='ovArtikel']/h1 | ||
6 | # strip main title and leave sub title | ||
7 | strip: //div[@id='ovArtikel']/h1 | ||
8 | |||
9 | date: //div[@class='publicdate'] | ||
10 | |||
11 | #remove captions | ||
12 | strip: //*/div[@class='bu'] | ||
13 | strip: //*/div[@class='credit'] | ||
14 | |||
15 | #remove adds | ||
16 | strip: //*/div[@class='ad-head'] | ||
17 | strip: //*/div[@class='linksebay'] | ||
18 | |||
19 | # remove video content | ||
20 | strip: //*/div[@class='ovVideo'] | ||
21 | test_url: http://www.kicker.de/news/fussball/frauen/wmfr/frauen-weltmeisterschaft/2011/3/1123662/spielbericht_frankreich-frauen_deutschland-frauen.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kickstarter.com.txt b/inc/3rdparty/site_config/standard/kickstarter.com.txt new file mode 100644 index 00000000..c055659f --- /dev/null +++ b/inc/3rdparty/site_config/standard/kickstarter.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h1[@id='name'] | ||
2 | body: //*[@id='leftcol'] | ||
3 | |||
4 | strip_id_or_class: 'share-box' | ||
5 | strip_id_or_class: 'project-faqs' | ||
6 | strip_id_or_class: 'report-issue-wrap' | ||
7 | test_url: http://www.kickstarter.com/projects/hop/elevation-dock-the-best-dock-for-iphone \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kingarthurflour.com.txt b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt new file mode 100644 index 00000000..2f6783a3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/kingarthurflour.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //div[@class='post']/h2 | ||
2 | body: //div[@class='entry'] | ||
3 | strip: //p[contains(.,'Tags:')] | ||
4 | test_url: http://www.kingarthurflour.com/blog/2011/01/28/a-big-sandwich-for-the-big-game/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kotaku.com.txt b/inc/3rdparty/site_config/standard/kotaku.com.txt new file mode 100644 index 00000000..be439d75 --- /dev/null +++ b/inc/3rdparty/site_config/standard/kotaku.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | author: //span[@class="plus-icon"] | ||
2 | test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kottke.org.txt b/inc/3rdparty/site_config/standard/kottke.org.txt new file mode 100644 index 00000000..f93a61e7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/kottke.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2 | ||
2 | author: //*[@id='main']/div/a[1] | ||
3 | date: substring-before(substring-after(//div[@class='meta'],'•'),'•') | ||
4 | body: //div[@id='main'] | ||
5 | strip: //div[@class='meta'] | ||
6 | test_url: http://kottke.org/08/02/king-of-kong-a-fistful-of-quarters \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kumailplus.com.txt b/inc/3rdparty/site_config/standard/kumailplus.com.txt new file mode 100644 index 00000000..9e15cc34 --- /dev/null +++ b/inc/3rdparty/site_config/standard/kumailplus.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class = "entry-full"] | ||
2 | |||
3 | test_url: http://www.kumailplus.com/2011/12/02/24308 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kumb.com.txt b/inc/3rdparty/site_config/standard/kumb.com.txt new file mode 100644 index 00000000..3f0d2369 --- /dev/null +++ b/inc/3rdparty/site_config/standard/kumb.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //div[@id='centrediv']/h1 | ||
2 | |||
3 | author: substring-after(//div[@id='centrediv']/h3,'By: ') | ||
4 | |||
5 | date: substring-after(substring-before(//div[@id='centrediv']/h3,'By: '),'Filed: ') | ||
6 | |||
7 | body: //div[@class='KonaBody'] | ||
8 | |||
9 | convert_double_br_tags: yes | ||
10 | test_url: http://www.kumb.com/story.php?id=126084 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/kwerfeldein.de.txt b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt new file mode 100644 index 00000000..879b4d6c --- /dev/null +++ b/inc/3rdparty/site_config/standard/kwerfeldein.de.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | date: //span[@class='datum'] | ||
2 | title: //div[@class='artikel']/h2 | ||
3 | body: //div[@class='entry'] | ||
4 | strip: //p[@class='tags'] | ||
5 | author: substring-after(//div[@class='authorinfo']/em,'Dies ist ein Artikel von ') | ||
6 | strip: //div[@class='authorinfo'] | ||
7 | strip: //div[@class='authorpic'] | ||
8 | |||
9 | test_url: http://kwerfeldein.de/index.php/2011/10/17/doppelbelichtungen-mit-konzept/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt new file mode 100644 index 00000000..a34e39dd --- /dev/null +++ b/inc/3rdparty/site_config/standard/laphamsquarterly.org.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //h1[@class='headline'] | ||
2 | body: //div[@class='article'] | ||
3 | strip: //div[@class='article']//h3[contains(@class, 'section')] | ||
4 | strip: //div[@class='article']//ul[contains(@class, 'article-actions')] | ||
5 | strip: //div[@id='syndication-upper'] | ||
6 | strip: //a[@id='syndication'] | ||
7 | strip: //dl[@id='article-tags'] | ||
8 | strip: //div[@id='article-like'] | ||
9 | prune: no | ||
10 | |||
11 | single_page_link: //li[@class='single-page']/a | ||
12 | |||
13 | test_url: http://www.laphamsquarterly.org/essays/balanced-diets.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/laprensagrafica.com.txt b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt new file mode 100644 index 00000000..e771f81f --- /dev/null +++ b/inc/3rdparty/site_config/standard/laprensagrafica.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | tidy: no | ||
2 | |||
3 | test_url: http://www.laprensagrafica.com/opinion/editorial/229252-reflexiones-sobre-la-educacion-que-necesitamos.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/laquadrature.net.txt b/inc/3rdparty/site_config/standard/laquadrature.net.txt new file mode 100644 index 00000000..5bad8e65 --- /dev/null +++ b/inc/3rdparty/site_config/standard/laquadrature.net.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@id='content-content']//div[@class='content'] | ||
2 | title: //h1[@class='title'] | ||
3 | date: substring-after(//*[@class='submitted'],'Submitted on') | ||
4 | tidy: no | ||
5 | strip: //div[@class='terms terms-inline'] | ||
6 | strip: //div[@class='more'] | ||
7 | strip: //div[@class='share-links'] | ||
8 | strip: //table[@id='attachments'] | ||
9 | |||
10 | test_url: http://www.laquadrature.net/en/finalization-of-eu-parliaments-weak-net-neutrality-resolution \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt new file mode 100644 index 00000000..504dbea1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lareviewofbooks.org.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | #meta data | ||
2 | title:substring-after(title,'|') | ||
3 | |||
4 | author:substring-before( substring-after(//meta[@name = 'description']/@content, normalize-space(substring-after(//title,'|'))),' respond ') | ||
5 | date://h5[@class = 'postDate'] | ||
6 | |||
7 | #text | ||
8 | body://div[@class = 'articleBody'] | ||
9 | |||
10 | #clean up | ||
11 | strip://center | ||
12 | test_url: http://lareviewofbooks.org/post/14066007115/literary-transactions-and-their-vicissitudes \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/latimes.com.txt b/inc/3rdparty/site_config/standard/latimes.com.txt new file mode 100644 index 00000000..0d6ac851 --- /dev/null +++ b/inc/3rdparty/site_config/standard/latimes.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | strip: //div[@id="tugs_story_display"] | ||
2 | strip: //div[@id="search_overlay"] | ||
3 | strip: //div[@id="adv_search"] | ||
4 | body: //div[@class='story'] | ||
5 | tidy: no | ||
6 | convert_double_br_tags: yes | ||
7 | single_page_link: //a[contains(@href, ',print.')] | ||
8 | strip: //p[starts-with(., 'latimes.com')] | ||
9 | strip: //h1[starts-with(., 'latimes.com')] | ||
10 | strip_id_or_class: cubead | ||
11 | test_url: http://www.latimes.com/news/opinion/commentary/la-oe-gartonash-wilders-20110512,0,2876761.story \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/laughingsquid.com.txt b/inc/3rdparty/site_config/standard/laughingsquid.com.txt new file mode 100644 index 00000000..1814988a --- /dev/null +++ b/inc/3rdparty/site_config/standard/laughingsquid.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | body: //div[@class='entry-content'] | ||
3 | test_url: http://laughingsquid.com/mysterious-tiny-doors-appearing-around-san-francisco/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/leancrew.com.txt b/inc/3rdparty/site_config/standard/leancrew.com.txt new file mode 100644 index 00000000..0a4c84ba --- /dev/null +++ b/inc/3rdparty/site_config/standard/leancrew.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id="content"]/h1[1] | ||
2 | date: substring-before(//p[@class="postdate"], ' at ') | ||
3 | author: ("Dr. Drang") | ||
4 | |||
5 | strip: //div[@id="content"]/h1[1] | ||
6 | strip: //p[@class="postdate"] | ||
7 | strip: //h2[@id="respond"] | ||
8 | strip: //blockquote[@class="bbpTweet"]/p/span/a/img | ||
9 | test_url: http://www.leancrew.com/all-this/2011/12/more-shell-less-egg/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lefigaro.fr.txt b/inc/3rdparty/site_config/standard/lefigaro.fr.txt new file mode 100644 index 00000000..f5494b96 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lefigaro.fr.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //meta[@name='title']/@content | ||
2 | author: //span[@class='sign']//a[@class='journaliste'] | ||
3 | author: //meta[@name='author']/@content | ||
4 | body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte'] | ||
5 | date: //time[@pubdate]/@datetime | ||
6 | prune: no | ||
7 | test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php | ||
8 | test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lemonde.fr.txt b/inc/3rdparty/site_config/standard/lemonde.fr.txt new file mode 100644 index 00000000..eb205275 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lemonde.fr.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | # they have a single component containing both author and date | ||
4 | #author: //p[@class='source'] | ||
5 | #date: //p[@class='source'] | ||
6 | |||
7 | body: //div[@class='contenu_article'] | ||
8 | #Shoot the insane "conjugaison.lemonde.fr" links : | ||
9 | strip: //a[contains(@class, 'listLink')] | ||
10 | |||
11 | prune: no | ||
12 | |||
13 | test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lesnumeriques.com.txt b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt new file mode 100644 index 00000000..9b57f726 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lesnumeriques.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //h1/following::span[@class='fn'] | ||
2 | # Author: should stop parsing until <br> reached, but I don't know how to do this. | ||
3 | author: //following::div[@class='PDate2'] | ||
4 | date: //following::div[@class='PDate2']/strong | ||
5 | |||
6 | body: //div[@class='ArTexte'] | ||
7 | body: //div[@id='prod_txt_b'] | ||
8 | body: //div[@class='ArPhotoP'] | ||
9 | test_url: http://www.lesnumeriques.com/disque-dur-multimedia/popcorn-hour-300-p12231/test.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/letemps.ch.txt b/inc/3rdparty/site_config/standard/letemps.ch.txt new file mode 100644 index 00000000..c4bee7ec --- /dev/null +++ b/inc/3rdparty/site_config/standard/letemps.ch.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h2 | ||
2 | strip_image_src: logo.gif | ||
3 | test_url: http://www.letemps.ch/Facet/print/Uuid/7c9f912c-07c9-11e0-9b50-4d96c9eca37f \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lifeandculture.fr.txt b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt new file mode 100644 index 00000000..c3888aa8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lifeandculture.fr.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h2[@class="entry-title"] | ||
2 | body: //div[@class="entry-content"] | ||
3 | test_url: http://www.lifeandculture.fr/digital/facebook-and-the-epiphanator-an-end-to-endings/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lifehacker.com.txt b/inc/3rdparty/site_config/standard/lifehacker.com.txt new file mode 100644 index 00000000..32ade14a --- /dev/null +++ b/inc/3rdparty/site_config/standard/lifehacker.com.txt | |||
@@ -0,0 +1,42 @@ | |||
1 | # Adds author text: Gawker sites commonly show as "Author: View Profile" | ||
2 | author://a[@class="plus-icon modfont"] | ||
3 | |||
4 | # Add date and time | ||
5 | date: //span[@class="date"] | ||
6 | |||
7 | # Remove date and time from article text | ||
8 | strip: //span[@class="date"] | ||
9 | |||
10 | # Remove login/comment text | ||
11 | strip: //*[(@class="presence_control_external smalltype")] | ||
12 | |||
13 | strip: //div[@class="nodebyline modfont"] | ||
14 | |||
15 | # Remove right sidebar | ||
16 | strip: //div[@id="rightwrapper"] | ||
17 | |||
18 | # Remove print header | ||
19 | strip: //div[@id='printhead']/h1 | ||
20 | |||
21 | # Remove 'content is restricted' | ||
22 | strip: //div[@id='agegate_IDHERE'] | ||
23 | |||
24 | # Remove follow text | ||
25 | strip: //*[(@class="permalink_ads")] | ||
26 | |||
27 | # Remove view/comment count | ||
28 | strip: //div[@id='wrapper']/div[2][@class='postmeta_permalink_wrapper']/div[1][@class='postmeta_permalink']/div[2][@class='pm_line'] | ||
29 | |||
30 | # Remove contact text | ||
31 | strip: //div[@id='wrapper']/div[1][@class='content permalink']/p[6][@class='contactinfo'] | ||
32 | |||
33 | # Remove medium duplicates of the article image | ||
34 | strip_image_src: medium.jpg | ||
35 | |||
36 | # Remove "arrow" class at bottom of page | ||
37 | strip: //p[@class="arrow"] | ||
38 | |||
39 | # Remove "track" image from article body | ||
40 | strip: //img[@alt="track"] | ||
41 | test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos | ||
42 | test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/linkedin.com.txt b/inc/3rdparty/site_config/standard/linkedin.com.txt new file mode 100644 index 00000000..37e83cf6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/linkedin.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | single_page_link: //ul[@class='util-nav']//a[@class='close'] | ||
2 | test_url: http://www.linkedin.com/news?actionBar=&articleID=894735221&ids=0Rdj4Qe3wQejwIczAOc3sRdzwUb3wScPoPdzkVe2MNcz8RcPsQejwIcPASdjwTcjwU&aag=true&freq=weekly \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/longform.org.txt b/inc/3rdparty/site_config/standard/longform.org.txt new file mode 100644 index 00000000..48d5e1a7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/longform.org.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | single_page_link: //div[@class="post"]/div[@class="title"]/a | ||
2 | |||
3 | test_url: http://longform.org/2011/05/06/disconcerting-new-answers-in-models-suicide/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/loopinsight.com.txt b/inc/3rdparty/site_config/standard/loopinsight.com.txt new file mode 100644 index 00000000..08ad90c3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/loopinsight.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //div[@class='container_16']//div[@class='grid_11'] | ||
2 | strip: //h2[@class='mast'] | ||
3 | strip: //div[@class='container_16']//div[@class='grid_11']/h1 | ||
4 | strip: //div[@class='container_16']//div[@class='grid_11']/p[1] | ||
5 | strip: //div[@class='container_16']//div[@class='grid_11']/div | ||
6 | author: //a[starts-with(@title, 'Posts by')] | ||
7 | date: substring-before(substring-after(//time, 'Posted on '), ' at') | ||
8 | test_url: http://www.loopinsight.com/2012/09/13/forget-iphone-5-naysayers-this-thing-is-big/ | ||
9 | test_url: http://www.loopinsight.com/2011/05/20/playbook-returns-high-misses-sales-targets-by-90/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lostgarden.com.txt b/inc/3rdparty/site_config/standard/lostgarden.com.txt new file mode 100644 index 00000000..a823e649 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lostgarden.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | prune: no | ||
2 | convert_double_br_tags: yes | ||
3 | test_url: http://www.lostgarden.com/2012/04/loops-and-arcs.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/lrb.co.uk.txt b/inc/3rdparty/site_config/standard/lrb.co.uk.txt new file mode 100644 index 00000000..ce5053d4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/lrb.co.uk.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: substring-before(//title, ' · LRB') | ||
2 | |||
3 | body: //div[@class="article-body indent"] | ||
4 | |||
5 | date: substring-after(//p[@class="meta-info"]/a, '· ') | ||
6 | |||
7 | prune: no | ||
8 | test_url: http://www.lrb.co.uk/v33/n18/james-meek/its-already-happened \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/luminous-landscape.com.txt b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt new file mode 100644 index 00000000..92ccf3ba --- /dev/null +++ b/inc/3rdparty/site_config/standard/luminous-landscape.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2 | ||
2 | |||
3 | body: // div[@id='content'] | ||
4 | |||
5 | strip: //div[@class='sidebar_wrapper'] | ||
6 | test_url: http://www.luminous-landscape.com/tutorials/optimizing_exposure.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt new file mode 100644 index 00000000..a8af5438 --- /dev/null +++ b/inc/3rdparty/site_config/standard/m.bbc.co.uk.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div[@class="story-body"]/div[@class="story-inner"]/h1 | ||
2 | body: //div[@class="story-body"] | ||
3 | date: //p[@class='date']/strong | ||
4 | author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By') | ||
5 | |||
6 | strip: //div[@class="story-inner"]/div[@class="byline"] | ||
7 | |||
8 | test_url: http://m.bbc.co.uk/news/science-environment-19144464 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt b/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt new file mode 100644 index 00000000..f5f0dfca --- /dev/null +++ b/inc/3rdparty/site_config/standard/m.guardian.co.uk.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //p[@class="txhead"] | ||
2 | author: //div[@class='txb'] | ||
3 | wrap_in(p): //div[@class='para'] | ||
4 | date: //div[@class='txb']/following-sibling::p/text()[substring(., 14)] | ||
5 | strip: //table[@class="tlogo"] | ||
6 | strip: //div[@class="cookieText"] | ||
7 | strip: //*[@class="sltb"] | ||
8 | strip: //*[@class="ijobs-x-link"] | ||
9 | strip: //*[@class="sponscolour"] | ||
10 | strip: //*[@class="sponsouter"] | ||
11 | strip: //div[@id="bottom-nav-block"]/following::* | ||
12 | test_url: http://m.guardian.co.uk/ms/p/gnm/op/s3OOwgO3yIhGuj41C1_S3Xg/view.m?id=15&gid=world/2012/jul/26/arctic-climate-change&cat=top-stories \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mac4ever.com.txt b/inc/3rdparty/site_config/standard/mac4ever.com.txt new file mode 100644 index 00000000..892b47f5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mac4ever.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | author: substring-after(//div[@class='author'],'Par ') | ||
2 | date: //div[@class='date'] | ||
3 | body: //div[@class='content'] | ||
4 | |||
5 | test_url: http://www.mac4ever.com/news/64182/icloud_les_prix_en_euros_et_en_chf/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macdrifter.com.txt b/inc/3rdparty/site_config/standard/macdrifter.com.txt new file mode 100644 index 00000000..fd1ede7d --- /dev/null +++ b/inc/3rdparty/site_config/standard/macdrifter.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | title: substring-before(//title,' « Macdrifter') | ||
2 | test_url: http://www.macdrifter.com/2012/03/instacast-on-my-mac/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macformat.techradar.com.txt b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt new file mode 100644 index 00000000..109eae45 --- /dev/null +++ b/inc/3rdparty/site_config/standard/macformat.techradar.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | # Remove news feed | ||
2 | strip: //div[@id='news_feed_front'] | ||
3 | |||
4 | # Remove pull quote | ||
5 | strip: //div[@class='field field-type-text field-field-pull-quote'] | ||
6 | |||
7 | # Remove login | ||
8 | strip: //div[@class='right_bar_login'] | ||
9 | test_url: http://macformat.techradar.com/blog/solid-state-storage-bringing-parity-back-mac-29-10-10&article=89189666 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macgeneration.com.txt b/inc/3rdparty/site_config/standard/macgeneration.com.txt new file mode 100644 index 00000000..e6bbe28e --- /dev/null +++ b/inc/3rdparty/site_config/standard/macgeneration.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | author: substring-before(substring-after(//div[@class='dateNews'],'par '),' le') | ||
2 | date: substring-after(//div[@class='dateNews'],' le ') | ||
3 | body: //div[@class='singleNews zoneApple'] | ||
4 | |||
5 | test_url: http://www.macgeneration.com/news/voir/211162/dropbox-encore-un-mac-et-deux-comptes-dropbox \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macmagazine.com.br.txt b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt new file mode 100644 index 00000000..47ebfd79 --- /dev/null +++ b/inc/3rdparty/site_config/standard/macmagazine.com.br.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | # Remove sliders | ||
2 | strip: //*[(@class="slides_container")] | ||
3 | strip: //div[(@id="slides_two")] | ||
4 | |||
5 | # Remove tag cloud | ||
6 | strip: //span[(@class="secao")] | ||
7 | |||
8 | # Fix date article | ||
9 | # TODO | ||
10 | |||
11 | # Remove other stuff | ||
12 | strip: //div[(@id="idc-container")] | ||
13 | strip: //div[(@id="idc-noscript")] | ||
14 | strip: //div[(@class="linkwithin_div")] | ||
15 | strip: //div[(@class="navPosts")] | ||
16 | strip: //div[(@id="lateral")] | ||
17 | strip: //div[(@id="autor")] | ||
18 | strip: //div[(@id="rodape")] | ||
19 | strip: //div[(@id="post")]/h1 | ||
20 | strip: //div[(@id="post")]/div[(@id="boxInformacoes")] | ||
21 | test_url: http://macmagazine.com.br/2011/08/01/skype-para-ipad-esta-finalmente-chegando-a-app-store/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macrumors.com.txt b/inc/3rdparty/site_config/standard/macrumors.com.txt new file mode 100644 index 00000000..76f999d3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/macrumors.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | author: substring-after(//div[@class='byline'], " by ") | ||
2 | date: substring-before(//div[@class='byline'], " by ") | ||
3 | |||
4 | # set body | ||
5 | body: //div[@class='content'] | ||
6 | |||
7 | # set title | ||
8 | title: //h3 | ||
9 | #strip: //div[@class='content']/h3 | ||
10 | test_url: http://www.macrumors.com/2010/11/10/apple-debuts-new-apple-tv-and-itunes-movie-content-in-japan/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macstories.net.txt b/inc/3rdparty/site_config/standard/macstories.net.txt new file mode 100644 index 00000000..6e651ca0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/macstories.net.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | strip: //*[(@id = "featured")] | ||
2 | |||
3 | author:substring-after( //div[@class='posttitle']/h2[@class='author'],'by ') | ||
4 | |||
5 | date: concat(//div[@class='month'],' ',//div[@class='day']) | ||
6 | |||
7 | #macstories doesn't provide a year, but month/day is better than nothing | ||
8 | test_url: http://www.macstories.net/news/instapaper-4-0-available-completely-redesigned-ipad-ui-new-features-search-subscription/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mactalk.com.au.txt b/inc/3rdparty/site_config/standard/mactalk.com.au.txt new file mode 100644 index 00000000..e8d60522 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mactalk.com.au.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | author://div[@class="article_username_container_full"] | ||
2 | date://div[@class="article_username_container"] | ||
3 | body://div[@class="article cms_clear restore postcontainer"] | ||
4 | test_url: http://www.mactalk.com.au/content/chat-basil-shkara-developer-taptax-2452/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mactechnews.de.txt b/inc/3rdparty/site_config/standard/mactechnews.de.txt new file mode 100644 index 00000000..c3fc0e44 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mactechnews.de.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: substring-after(substring-after(//title, '>'), '>') | ||
2 | body: //div[@class='NewsArticleContent'] | ||
3 | test_url: http://www.mactechnews.de/news/index/Apple-Pressekonferenz-zum-iPhone-4-147316.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/macworld.com.txt b/inc/3rdparty/site_config/standard/macworld.com.txt new file mode 100644 index 00000000..96175872 --- /dev/null +++ b/inc/3rdparty/site_config/standard/macworld.com.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | title: //article//h1 | ||
2 | date: //meta[@name="date"]/@content | ||
3 | author: //div[@class="author-name" or @class="article-byline"]/a[1] | ||
4 | |||
5 | body: //section[@class="page"] | ||
6 | |||
7 | # remove 'From the Lab' and 'Recent posts' text | ||
8 | strip: //div[@class='blogLabel'] | ||
9 | |||
10 | # remove byline and meta info | ||
11 | strip: //div[@class="article-meta"] | ||
12 | strip: //div[@class="author-info"] | ||
13 | |||
14 | #strip tags and categories | ||
15 | strip: //div[@class="department"] | ||
16 | |||
17 | #strip product cap links | ||
18 | strip: //div[@class="cap-main"] | ||
19 | strip: //div[@id="compare-lede"] | ||
20 | |||
21 | prune: no | ||
22 | |||
23 | # copes less well with Review pages, seems fine for News | ||
24 | test_url: http://www.macworld.com/article/163184/2011/10/the_ipod_as_an_iconic_cultural_force.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mainichi.jp.txt b/inc/3rdparty/site_config/standard/mainichi.jp.txt new file mode 100644 index 00000000..e701207f --- /dev/null +++ b/inc/3rdparty/site_config/standard/mainichi.jp.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='NewsArticle'] | ||
2 | |||
3 | test_url: http://mainichi.jp/select/weathernews/20110311/news/20110520k0000e040062000c.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mainpost.de.txt b/inc/3rdparty/site_config/standard/mainpost.de.txt new file mode 100644 index 00000000..a2d25d56 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mainpost.de.txt | |||
@@ -0,0 +1,28 @@ | |||
1 | title: substring-before(//title, '|') | ||
2 | body: //*[@id='content-left'] | ||
3 | |||
4 | # Why is this not working here? | ||
5 | # body: //*[@id='content-left']/div[@class='content-container'][2]/div[@class='content-body']/div[@class='inner-container']/div[@class='detail'] | ||
6 | |||
7 | |||
8 | #Header | ||
9 | strip_id_or_class: 'subHead' | ||
10 | strip_id_or_class: 'fl_right' | ||
11 | strip_id_or_class: 'infolink' | ||
12 | strip_id_or_class: 'content-head' | ||
13 | strip_id_or_class: 'tab' | ||
14 | strip_id_or_class: 'tab-active' | ||
15 | strip: //*[contains(@class,'trenner')] | ||
16 | |||
17 | # Headline | ||
18 | strip: //h1/* | ||
19 | strip_id_or_class: 'font16' | ||
20 | |||
21 | #Images | ||
22 | strip_id_or_class: 'leftimage' | ||
23 | strip_id_or_class: 'rightimage' | ||
24 | |||
25 | #Comments | ||
26 | strip: //table | ||
27 | strip: //p/following-sibling::*[0] | ||
28 | test_url: http://www.mainpost.de/ueberregional/meinung/Dioxin-Skandal-bringt-Agrarministerin-in-Bedraengnis;art9517,5920211 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/makeuseof.com.txt b/inc/3rdparty/site_config/standard/makeuseof.com.txt new file mode 100644 index 00000000..6809afed --- /dev/null +++ b/inc/3rdparty/site_config/standard/makeuseof.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | tidy: no | ||
2 | |||
3 | test_url: http://www.makeuseof.com/dir/kindle-it-web-pages-kindle-friendly/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/marco.org.txt b/inc/3rdparty/site_config/standard/marco.org.txt new file mode 100644 index 00000000..ef2e03d3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/marco.org.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | date: //article//time[@pubdate] | ||
4 | title: //article/header/h2 | ||
5 | body: //article | ||
6 | strip: //header | ||
7 | test_url: http://www.marco.org/2012/09/08/businessweek-gruber | ||
8 | test_url: http://www.marco.org/2012/04/24/might-upgrade-someday \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/marksdailyapple.com.txt b/inc/3rdparty/site_config/standard/marksdailyapple.com.txt new file mode 100644 index 00000000..0077f560 --- /dev/null +++ b/inc/3rdparty/site_config/standard/marksdailyapple.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | strip_id_or_class: wwsgd | ||
2 | test_url: http://www.marksdailyapple.com/are-detoxes-and-cleanses-safe-and-effective/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/martinfowler.com.txt b/inc/3rdparty/site_config/standard/martinfowler.com.txt new file mode 100644 index 00000000..8e0e349f --- /dev/null +++ b/inc/3rdparty/site_config/standard/martinfowler.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | date: //div[@id="main"]/p[@class="date"] | ||
2 | author: string("Martin Fowler") | ||
3 | body: //div[@id="main"] | ||
4 | strip_id_or_class: date | ||
5 | strip_id_or_class: tags | ||
6 | strip_id_or_class: tagLabel | ||
7 | strip: //div[@id="main"]/h1[1] | ||
8 | test_url: http://martinfowler.com/bliki/DatabaseThaw.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mashable.com.txt b/inc/3rdparty/site_config/standard/mashable.com.txt new file mode 100644 index 00000000..2c5a14a6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mashable.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //header[@class='entry-title']/h1 | ||
2 | body: //div[@class='description'] | ||
3 | strip: //div[@class='ytm-gallery-box'] | ||
4 | test_url: http://mashable.com/2011/12/05/india-wants-google-and-facebook-to-censor-user-content/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mattcutts.com.txt b/inc/3rdparty/site_config/standard/mattcutts.com.txt new file mode 100644 index 00000000..76b1eac6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mattcutts.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | date: //*[@class = 'published'] | ||
2 | test_url: http://www.mattcutts.com/blog/internet-censorship-sopa/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mbl.is.txt b/inc/3rdparty/site_config/standard/mbl.is.txt new file mode 100644 index 00000000..fd26f091 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mbl.is.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[class="frett-main"] | ||
2 | test_url: http://mbl.is/frettir/innlent/2012/02/21/litill_munur_a_fargjaldaverdi/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/medialens.org.txt b/inc/3rdparty/site_config/standard/medialens.org.txt new file mode 100644 index 00000000..94f27b71 --- /dev/null +++ b/inc/3rdparty/site_config/standard/medialens.org.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | strip: //div[contains(@class, 'article-tools')] | ||
2 | test_url: http://www.medialens.org/index.php/alerts/alert-archive/2012/713-the-illusion-of-democracy.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/menshealth.com.txt b/inc/3rdparty/site_config/standard/menshealth.com.txt new file mode 100644 index 00000000..e7e1e269 --- /dev/null +++ b/inc/3rdparty/site_config/standard/menshealth.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | # need to find a way to eliminate <span> content for "related content" without eliminating important content | ||
2 | |||
3 | convert_double_br_tags: [yes] | ||
4 | #body: //div[@id='leftside'] | ||
5 | title: //h1 | ||
6 | title: //h2 | ||
7 | Author: substring-after(//h4, 'By ') | ||
8 | Author: substring-after(//h4, 'By: ') | ||
9 | #Strip: //span | ||
10 | strip_id_or_class: morefromcat | ||
11 | strip_id_or_class: mostpopular | ||
12 | strip_id_or_class: articlepagination | ||
13 | strip_id_or_class: toolbar | ||
14 | body: //div[@id='zmodcontent'] | ||
15 | single_page_link: //li[@class='onepage'] //a[contains (@href, 'printer.php')] | ||
16 | test_url: http://www.menshealth.com/mhlists/pursuit_of_happiness/index.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mikeash.com.txt b/inc/3rdparty/site_config/standard/mikeash.com.txt new file mode 100644 index 00000000..af8a7d30 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mikeash.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@class="blogtitle"] | ||
2 | strip: //div[@class="blogtitle"] | ||
3 | |||
4 | author: substring-after(//span[@class="blogheader"], 'Author: ') | ||
5 | test_url: http://www.mikeash.com/pyblog/friday-qa-2012-01-13-the-mac-toolbox.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mikeindustries.com.txt b/inc/3rdparty/site_config/standard/mikeindustries.com.txt new file mode 100644 index 00000000..3d488e13 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mikeindustries.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@class='post_content']/h2 | ||
2 | date: //div[@class='dateline'] | ||
3 | body: //div[@class='entry'] | ||
4 | |||
5 | strip: //div[@class='closer'] | ||
6 | strip: //div[@class='navigation'] | ||
7 | strip: //div[@class='aux_pane'] | ||
8 | strip: //div[@class='aux_aux_pane'] | ||
9 | test_url: http://www.mikeindustries.com/blog/archive/2011/10/never-be-another \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt new file mode 100644 index 00000000..7e43d63c --- /dev/null +++ b/inc/3rdparty/site_config/standard/minnesota.publicradio.org.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //*[@class="article"]/h1 | ||
2 | date: //*[@class="article"]/div[@class="date"] | ||
3 | |||
4 | # strip the title and date from the article text | ||
5 | strip: //*[@class="article"]/h1 | ||
6 | strip: //*[@class="article"]/div[@class="date"] | ||
7 | |||
8 | # strip annoying <br> between metadata and article | ||
9 | strip: //*[@class="article"]/div[@class="date"]/following-sibling::br | ||
10 | test_url: http://minnesota.publicradio.org/display/web/2012/06/19/health/senators-want-health-care-ruling-on-tv/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/minnpost.com.txt b/inc/3rdparty/site_config/standard/minnpost.com.txt new file mode 100644 index 00000000..51a0630b --- /dev/null +++ b/inc/3rdparty/site_config/standard/minnpost.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //*[@id="content-header"]/h1 | ||
2 | author: //*[contains(@class, 'byline')]/a/text() | ||
3 | date: substring-after(//*[contains(@class, 'byline')]/text()[2], '|') | ||
4 | body: //*[contains(@class, 'node-body')] | ||
5 | test_url: http://www.minnpost.com/eric-black-ink/2012/06/overturning-obamacare-would-be-game-changer-supreme-court \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt new file mode 100644 index 00000000..4215a051 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mirrorfootball.co.uk.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | # Remove extra links | ||
2 | strip: //*[@class='appended_html'] | ||
3 | test_url: http://www.mirrorfootball.co.uk/news/West-Ham-crisis-Carlton-Cole-slams-diabolical-performance-and-rips-into-Avram-Grant-lack-of-tactical-nous-following-Liverpool-mauling-article636151.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mises.org.txt b/inc/3rdparty/site_config/standard/mises.org.txt new file mode 100644 index 00000000..ae542aa6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mises.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | strip_id_or_class: 'book-ad' | ||
2 | strip_id_or_class: 'bigger pullquote' | ||
3 | strip_id_or_class: 'subscribe' | ||
4 | strip_id_or_class: 'blog-link' | ||
5 | test_url: http://mises.org/daily/4804 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mlb.mlb.com.txt b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt new file mode 100644 index 00000000..30e8aff2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mlb.mlb.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //h1[@class='article-headline'] | ||
2 | date: //span[@class='timeStamp'] | ||
3 | author: substring-before(//p[@class='article-byline'], '/') | ||
4 | body: //div[@id='article'] | ||
5 | #strip: //div[@class='inner'] | ||
6 | strip: //div[@id='article_head'] | ||
7 | strip: //p[@class='tagLine'] | ||
8 | strip: //div[@id='article_related_links'] | ||
9 | strip: //div[@id='article_related_mlb'] | ||
10 | strip: //span[@class='more'] | ||
11 | strip: //div[@class='article_component'] | ||
12 | strip: //span[@class='screen_reader'] | ||
13 | strip: //ul[@class='columnists_blurb'] | ||
14 | test_url: http://mlb.mlb.com/news/article.jsp?ymd=20120403&content_id=27880830 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt new file mode 100644 index 00000000..c4e3389e --- /dev/null +++ b/inc/3rdparty/site_config/standard/mlb.sbnation.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //h1[@id = 'stream_title'] | ||
2 | author: //p[@class = 'byline']/a | ||
3 | date: //span[@class = 'datetime'] | ||
4 | |||
5 | body: //div[@id = 'stream_container'] | ||
6 | strip: //p[@class = 'byline'] | ||
7 | strip_id_or_class: stream_summary | ||
8 | strip_id_or_class: social-spoken | ||
9 | strip_id_or_class: datetime | ||
10 | strip_id_or_class: author-mini-profile | ||
11 | strip_id_or_class: social-tools | ||
12 | strip_id_or_class: entry-tags | ||
13 | strip_id_or_class: fb-like-box | ||
14 | test_url: http://mlb.sbnation.com/2011/10/17/2495845/2011-world-series-st-louis-cardinals-texas-rangers-home-field-advantage \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mlssoccer.com.txt b/inc/3rdparty/site_config/standard/mlssoccer.com.txt new file mode 100644 index 00000000..41e15136 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mlssoccer.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //*[@class="header_title"]/h1 | ||
2 | date: //*[@class="field-date"] | ||
3 | author: //*[@class="field-author"] | ||
4 | body: //div[contains(@class, 'content')] | ||
5 | |||
6 | test_url: http://www.mlssoccer.com/news/article/2012/06/19/lack-depth-front-forces-arena-alter-las-formation \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mmo-champion.com.txt b/inc/3rdparty/site_config/standard/mmo-champion.com.txt new file mode 100644 index 00000000..918fae36 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mmo-champion.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id = 'article_content']/div[contains(@class,'article')] | ||
3 | author: //sub[@class = 'article_promoted_text']/a[starts-with(@href, 'member')] | ||
4 | date: //div[@class = 'article_username_container'] | ||
5 | test_url: http://www.mmo-champion.com/content/2688-Other-Press-Tour-Interviews-A-Night-in-Mists-of-Pandaria-Blue-Posts-MoP-Screenshot \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mnn.com.txt b/inc/3rdparty/site_config/standard/mnn.com.txt new file mode 100644 index 00000000..ddfe6fa2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mnn.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | tidy: no | ||
2 | author: //div[@id="above-content"]//img/@alt | //div[@class="comment-auth"]/span[1]/a/text() | ||
3 | date: //div[@class="comment-auth"]/div | //div[@class="comment-auth"]/span[2] | ||
4 | body: //div[@class="node"] | ||
5 | |||
6 | strip_id_or_class: vertical-social-bar | ||
7 | strip_id_or_class: blogs_paginator | ||
8 | strip_id_or_class: horizontal-social-links | ||
9 | strip_id_or_class: servicelinksdiv | ||
10 | |||
11 | test_url: http://www.mnn.com/green-tech/research-innovations/blogs/5-breakthroughs-that-will-make-solar-power-cheaper-than-coal \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mno.hu.txt b/inc/3rdparty/site_config/standard/mno.hu.txt new file mode 100644 index 00000000..ba158953 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mno.hu.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //title | ||
2 | |||
3 | author: //div[@class="author"] | ||
4 | |||
5 | strip_id_or_class: 'header' | ||
6 | strip_id_or_class: 'cikk_ajanlo' | ||
7 | strip_id_or_class: 'buttons' | ||
8 | strip_id_or_class: 'related' | ||
9 | strip_id_or_class: 'adbox ad_cikk_kozepre' | ||
10 | strip_id_or_class: 'cikk-cimkek' | ||
11 | strip_id_or_class: 'cikk_ertekeles' | ||
12 | |||
13 | strip_comments: yes | ||
14 | test_url: http://mno.hu/grund/a-gumibottal-hadonaszo-rendort-joval-konnyebb-utalni-1055351 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mobile.slate.com.txt b/inc/3rdparty/site_config/standard/mobile.slate.com.txt new file mode 100644 index 00000000..d5d81034 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mobile.slate.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h2[@class="article_title"] | ||
2 | strip: //a[@class="houseAdLink"] | ||
3 | strip: //h1 | ||
4 | strip: //div[@class="more_articles"] | ||
5 | test_url: http://mobile.slate.com/rss.jsp?rssid=411&item=http%3a%2f%2fwww.slate.com%2fdefault.aspx%3fdisplaymode%3d201%26id%3d2293749%26device%3drss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt new file mode 100644 index 00000000..a1cc5317 --- /dev/null +++ b/inc/3rdparty/site_config/standard/mobileopportunity.blogspot.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[@class='post uncustomized-post-template'] | ||
2 | |||
3 | # remove duplicate of post title, which is a link | ||
4 | strip: //h3[@class='post-title'] | ||
5 | |||
6 | # remove permalink and timestamp, which isn't useful as it's a time with no date | ||
7 | strip: //span[@class='post-timestamp'] | ||
8 | |||
9 | # remove labels (tags) | ||
10 | strip: //span[@class='post-labels'] | ||
11 | test_url: http://mobileopportunity.blogspot.com/2010/12/rims-q3-financials-tale-of-two.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/modernghana.com.txt b/inc/3rdparty/site_config/standard/modernghana.com.txt new file mode 100644 index 00000000..4c93d0cf --- /dev/null +++ b/inc/3rdparty/site_config/standard/modernghana.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | author: //meta[@name="author"]/@content | ||
3 | date: //span[@class='date1'] | ||
4 | body: //div[@id='newsimage'] | //div[@id='bodytext'] | ||
5 | tidy: no | ||
6 | prune: no | ||
7 | |||
8 | test_url: http://www.modernghana.com/news/323765/1/039ghost039-teachers-removed-salaries-allowances-p.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/money.cnn.com.txt b/inc/3rdparty/site_config/standard/money.cnn.com.txt new file mode 100644 index 00000000..a0d1628a --- /dev/null +++ b/inc/3rdparty/site_config/standard/money.cnn.com.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | title: //h1[@class='storyheadline'] | ||
3 | author: //meta[@name="AUTHOR"]/@content | ||
4 | date: //span[@class='cnnDateStamp'] | ||
5 | date: //meta[@name="DATE"]/@content | ||
6 | body: //div[@id='storytext' or @class='storytext'] | ||
7 | |||
8 | strip_id_or_class: ie_column | ||
9 | strip_id_or_class: sharewidgets | ||
10 | strip_image_src: bug.gif | ||
11 | |||
12 | strip: //div[@class="hed_side"] | ||
13 | strip: //span[@class="byline"] | ||
14 | strip: //a[@class="soc-twtname"] | ||
15 | strip: //span[@class="cnnDateStamp"] | ||
16 | strip: //div[@class="storytimestamp"] | ||
17 | strip: //div[@class="cnnCol_side"] | ||
18 | |||
19 | prune: no | ||
20 | tidy: no | ||
21 | |||
22 | test_url: http://money.cnn.com/2011/03/15/news/companies/steve_jobs_thought_process.fortune/index.htm?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 | ||
23 | test_url: http://money.cnn.com/2012/01/27/markets/markets_newyork/index.htm | ||
24 | test_url: http://money.cnn.com/2012/05/13/technology/yahoo-ceo-out-rumor/index.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/monkeyzen.com.txt b/inc/3rdparty/site_config/standard/monkeyzen.com.txt new file mode 100644 index 00000000..f779c38e --- /dev/null +++ b/inc/3rdparty/site_config/standard/monkeyzen.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://monkeyzen.com/2011/09/siluetas-de-clasicos-a-modo-de-vinilos \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/moonsault.de.txt b/inc/3rdparty/site_config/standard/moonsault.de.txt new file mode 100644 index 00000000..061a8d5c --- /dev/null +++ b/inc/3rdparty/site_config/standard/moonsault.de.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | strip_image_src: menu | ||
2 | strip_image_src: templates | ||
3 | strip: //div/a | ||
4 | strip: //div/b | ||
5 | strip: //div/strong | ||
6 | strip: //td[@width='30%'] | ||
7 | strip: //br[1] | ||
8 | strip: //br[2] | ||
9 | strip: //br[3] | ||
10 | strip: //br[4] | ||
11 | strip: //a[@href='http://www.moonsault.de/newzboard/index.php?act=home'] | ||
12 | strip_id_or_class: cse-branding-right | ||
13 | test_url: http://www.moonsault.de/newzboard/index.php?news=22321&act=previous \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt new file mode 100644 index 00000000..a7e59c30 --- /dev/null +++ b/inc/3rdparty/site_config/standard/moreintelligentlife.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h1[@class='print-title'] | ||
2 | body: //div[@class='print-submitted' or @class='print-created' or @class='print-content'] | ||
3 | prune: no | ||
4 | |||
5 | single_page_link: //li[@class='print']/a | ||
6 | |||
7 | test_url: http://moreintelligentlife.com/content/places/paul-markillie/they-trash-cars-dont-they \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/motherboard.vice.com.txt b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt new file mode 100644 index 00000000..6faf1c9a --- /dev/null +++ b/inc/3rdparty/site_config/standard/motherboard.vice.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | author: //span[@class="author"]/a | ||
2 | date: //span[@class="date"] | ||
3 | body: //div[@class="story-content"] | ||
4 | strip: //aside | ||
5 | test_url: http://motherboard.vice.com/blog/you-can-carry-a-copy-of-the-pirate-bay-in-your-pocket \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/mothering.com.txt b/inc/3rdparty/site_config/standard/mothering.com.txt new file mode 100644 index 00000000..a9d9195f --- /dev/null +++ b/inc/3rdparty/site_config/standard/mothering.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h2[contains(@class,'post_headline')] | ||
2 | body: //div[@class='entry'] | ||
3 | convert_double_br_tags: yes | ||
4 | strip_image_src: _selected.gif | ||
5 | strip_id_or_class: addthis_ | ||
6 | strip: //a[contains(@href,'feedburner.com')] | ||
7 | test_url: http://mothering.com/all-things-mothering/inspiration/motherhood-brings-me-down \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/motherjones.com.txt b/inc/3rdparty/site_config/standard/motherjones.com.txt new file mode 100644 index 00000000..d58c7d2c --- /dev/null +++ b/inc/3rdparty/site_config/standard/motherjones.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id = 'content-area'] | ||
3 | next_page_link: //div[@class='node-pager']/a[contains(@class, 'next')] | ||
4 | tidy: no | ||
5 | author: //p[contains(@class, 'byline')]/a | ||
6 | |||
7 | strip_id_or_class: node-header | ||
8 | strip_id_or_class: hdr-tools | ||
9 | strip_id_or_class: node-body-break | ||
10 | strip_id_or_class: pullquote | ||
11 | strip_id_or_class: node-pager | ||
12 | strip_id_or_class: author-bio | ||
13 | strip_id_or_class: node-footer | ||
14 | |||
15 | test_url: http://motherjones.com/politics/2012/02/mac-mcclelland-free-online-shipping-warehouses-labor \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/motorfull.com.txt b/inc/3rdparty/site_config/standard/motorfull.com.txt new file mode 100644 index 00000000..c6bec7e9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/motorfull.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://motorfull.com/2011/09/aparca-valeo-park4u-remote \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt new file mode 100644 index 00000000..f4f20450 --- /dev/null +++ b/inc/3rdparty/site_config/standard/msdn.microsoft.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[class="mainBody"] | ||
2 | footnotes: no | ||
3 | test_url: http://msdn.microsoft.com/en-us/library/hh542796(VS.103).aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt new file mode 100644 index 00000000..ad89cda8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/msnbc.msn.com.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | title: //title | ||
2 | author: //div[@id='byline'] | ||
3 | |||
4 | date: //div[contains(@class,'timestamp')]/abbr/text() | ||
5 | |||
6 | body: //div[@id='intellitTXT'] | ||
7 | |||
8 | strip: //div[@id='byline'] | ||
9 | strip: //div[contains(@class,'timestamp')] | ||
10 | strip: //div[contains(@class, 'ad-label')] | ||
11 | strip: //div[contains(@class, 'ad-break')] | ||
12 | strip: //span[contains(@class, 'x-video')] | ||
13 | strip: //span[contains(@class, 'inline')] | ||
14 | strip: //div[contains(@class, 'video')] | ||
15 | strip: //div[contains(@class, 'discuss')] | ||
16 | strip: //div[@id='most-popular'] | ||
17 | strip: //div[contains(@class,'drawer')] | ||
18 | strip: //*[contains(@class, 'hide')] | ||
19 | |||
20 | footnotes: no | ||
21 | test_url: http://www.msnbc.msn.com/id/44748412/ns/business-world_business/#.TolUv-vfDbE \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/myfoxboston.com.txt b/inc/3rdparty/site_config/standard/myfoxboston.com.txt new file mode 100644 index 00000000..1a35b4fc --- /dev/null +++ b/inc/3rdparty/site_config/standard/myfoxboston.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class="col1"]//div[@class="photo"] | //div[@class="detail"]/p[@class="fontStyle21"] | //div[@class="story last"] | ||
2 | tidy: no | ||
3 | |||
4 | test_url: http://www.myfoxboston.com/dpp/news/local/transit-police-say-woman-spat-on-mbta-bus-driver-2010611 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/myrecipes.com.txt b/inc/3rdparty/site_config/standard/myrecipes.com.txt new file mode 100644 index 00000000..8b99d22d --- /dev/null +++ b/inc/3rdparty/site_config/standard/myrecipes.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //h2[contains(@class, 'name')] | ||
2 | body: //div[@class='printFullPageContentContainer']//div[contains(@class, 'recipe')] | ||
3 | |||
4 | strip_id_or_class: photoBy | ||
5 | strip_id_or_class: link | ||
6 | |||
7 | single_page_link: //li[@class='print']/a[contains(@href, '/print/')] | ||
8 | |||
9 | prune: no | ||
10 | tidy: no | ||
11 | |||
12 | test_url: http://www.myrecipes.com/recipe/hummingbird-cake-10000000387218/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/narenji.ir.txt b/inc/3rdparty/site_config/standard/narenji.ir.txt new file mode 100644 index 00000000..6c3d0c24 --- /dev/null +++ b/inc/3rdparty/site_config/standard/narenji.ir.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class='node'] | ||
2 | test_url: http://www.narenji.ir/2806 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nasa.gov.txt b/inc/3rdparty/site_config/standard/nasa.gov.txt new file mode 100644 index 00000000..d95530f3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nasa.gov.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div[@class='address']/span | ||
2 | author: substring-before(//span[@class='credits'],',') | ||
3 | date: //div[@class='promodatepress']/span | ||
4 | body: //div[@class='default_style_wrap'] | ||
5 | strip: //div[@class='text_adjust'] | ||
6 | strip: //div[@class='skiplink'] | ||
7 | strip: //h2 | ||
8 | test_url: http://www.nasa.gov/mission_pages/kepler/news/kepler-21b.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nbweekly.com.txt b/inc/3rdparty/site_config/standard/nbweekly.com.txt new file mode 100644 index 00000000..0b722d33 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nbweekly.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | date://span[contains(@class,'date')] | ||
2 | |||
3 | body://div[contains(@class,'contWarp')] | ||
4 | |||
5 | strip://div[contains(@class,'keyWord')] | ||
6 | strip://div[contains(@class,'submitComt')] | ||
7 | strip://div[contains(@class,'cmts')] | ||
8 | strip://div[contains(@class,'notice')] | ||
9 | strip://div[contains(@class,'part pt-second')] | ||
10 | test_url: http://www.nbweekly.com/news/china/201203/29316.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/neh.gov.txt b/inc/3rdparty/site_config/standard/neh.gov.txt new file mode 100644 index 00000000..45136a2b --- /dev/null +++ b/inc/3rdparty/site_config/standard/neh.gov.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | #host configuration should be http://www.neh.gov/news/humanities/ | ||
2 | |||
3 | |||
4 | #meta data | ||
5 | title:substring-after(substring-after(//title,':'),':') | ||
6 | author:substring-after(//h2[@class = 'subHead'],'By') | ||
7 | date:substring-before(substring-after(//title,':'),':') | ||
8 | |||
9 | #img and caption handling | ||
10 | wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text() | ||
11 | wrap_in(fieldset)://div[@id = 'mainContent']/table | ||
12 | |||
13 | # clean up | ||
14 | strip: //table[@class = 'marginpaddingTop'] | ||
15 | strip: //h2[@class = 'subHead'] | ||
16 | |||
17 | test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/neomoney.co.txt b/inc/3rdparty/site_config/standard/neomoney.co.txt new file mode 100644 index 00000000..564d5492 --- /dev/null +++ b/inc/3rdparty/site_config/standard/neomoney.co.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //*[@class="header_title"]/h1 | ||
2 | body: //div[contains(@class, 'content')] | ||
3 | test_url: http://neomoney.co/personal/expatriate-and-migrant-loans/expatriate-loans/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/net-security.org.txt b/inc/3rdparty/site_config/standard/net-security.org.txt new file mode 100644 index 00000000..4e6d66d4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/net-security.org.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@class='content-title'] | ||
2 | #date: substring-after(//div[@class='dernek-text-under'],'Posted on') | ||
3 | body: //div[@class='content-item'] | ||
4 | next_page_link: //li[@class='next']/a | ||
5 | convert_double_br_tags: yes | ||
6 | |||
7 | test_url: http://www.net-security.org/article.php?id=1732 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/netmagazine.com.txt b/inc/3rdparty/site_config/standard/netmagazine.com.txt new file mode 100644 index 00000000..86885445 --- /dev/null +++ b/inc/3rdparty/site_config/standard/netmagazine.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | title: //h1 | ||
2 | author: //div[@class="submitted"]/span | ||
3 | |||
4 | # seems like this should work, but nothing is returned. Issue with xpath parser? | ||
5 | date: //div[@class="submitted"]/time | ||
6 | |||
7 | body: //div[@id="main-content"] | ||
8 | |||
9 | strip_comments: no | ||
10 | |||
11 | strip: //h1 | ||
12 | strip: //div[@class="submitted"] | ||
13 | strip: //dd[@class="profile-avatar"] | ||
14 | strip: //div[@class="author-profile"]/dl/dt[1] | ||
15 | strip: //div[@id="right-col"] | ||
16 | test_url: http://www.netmagazine.com/opinions/nielsen-wrong-mobile \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/netzpolitik.org.txt b/inc/3rdparty/site_config/standard/netzpolitik.org.txt new file mode 100644 index 00000000..87dc3cdf --- /dev/null +++ b/inc/3rdparty/site_config/standard/netzpolitik.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | author: //a[@ref='author'] | ||
3 | date: //span[@class='entry-date'] | ||
4 | body: //div[@class='entry-content'] | ||
5 | |||
6 | test_url: http://netzpolitik.org/2011/buch-generation-facebook/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newmatilda.com.txt b/inc/3rdparty/site_config/standard/newmatilda.com.txt new file mode 100644 index 00000000..ab766847 --- /dev/null +++ b/inc/3rdparty/site_config/standard/newmatilda.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id="maincontent"]/h1 | ||
2 | body: //div[@id="maincontent"] | ||
3 | date: //div[@id="maincontent"]/p[2] | ||
4 | author: //ul[@id="contributors"]/li/p/b | ||
5 | |||
6 | strip: //p[@*] | ||
7 | strip: //h1 | ||
8 | strip: //div[@id="maincontent"]/div | ||
9 | test_url: http://newmatilda.com/2011/07/22/turnbull-makes-sense-climate \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news-gazette.com.txt b/inc/3rdparty/site_config/standard/news-gazette.com.txt new file mode 100644 index 00000000..1f1e5d3a --- /dev/null +++ b/inc/3rdparty/site_config/standard/news-gazette.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div[@id="main-content"]//h2 | ||
2 | |||
3 | author: //div[@id="main-content"]//span[@class="authors"] | ||
4 | |||
5 | date: //div[@id="main-content"]//span[@class="timestamp"] | ||
6 | |||
7 | body: //div[@id="main-content"]//div[@class="content"] | ||
8 | test_url: http://www.news-gazette.com/news/business/economy/2011-08-08/ibm-drops-out-blue-waters-project.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.cnet.com.txt b/inc/3rdparty/site_config/standard/news.cnet.com.txt new file mode 100644 index 00000000..b7ab224a --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.cnet.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | #This should apply to *.cnet.com. Not just news.cnet.com. | ||
2 | title: //h1 | ||
3 | author: //img[@class="mugshot"]/@alt | ||
4 | strip: //h1 | ||
5 | strip_id_or_class: breadcrumb | ||
6 | strip: //p[@id="introP"] | ||
7 | strip: //div[@class="postByline"] | ||
8 | strip: //div[@class="editorBio"] | ||
9 | strip: //div[@class="inline-slideshow"] | ||
10 | strip: //div[@class="related"] | ||
11 | body: //div[@class="postBody txtWrap"] | ||
12 | test_url: http://news.cnet.com/8301-27076_3-57405303-248/apple-ipad-charging-fine-keep-it-plugged-in/?tag=mncol;posts \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.detik.com.txt b/inc/3rdparty/site_config/standard/news.detik.com.txt new file mode 100644 index 00000000..3ed1dc85 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.detik.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title://div[@class="content_detail"]/h1 | ||
2 | |||
3 | author://div[@class="author"]/strong | ||
4 | |||
5 | date:substring-before(substring-after(//div[@class="content_detail"]/span[@class="date"], ','), ' WIB') | ||
6 | |||
7 | body://div[@class="text_detail"] | ||
8 | test_url: http://news.detik.com/read/2012/05/22/225531/1922307/10/menkeu-cek-soal-lolosnya-315-kg-sabu-dari-bea-cukai \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt new file mode 100644 index 00000000..6fc86137 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.kanaloco.jp.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //div[@id='main'] | ||
2 | strip: //div[@id='sbs'] | ||
3 | strip: //div[@id='fsizeSwitch'] | ||
4 | strip: //div[@id='googleAd'] | ||
5 | strip: //div[@id='detailFoot'] | ||
6 | strip_image_src: counter?key | ||
7 | convert_double_br_tags: yes | ||
8 | |||
9 | test_url: http://news.kanaloco.jp/localnews/article/1105200018/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.mynavi.jp.txt b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt new file mode 100644 index 00000000..ded680f1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.mynavi.jp.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h2[@class="lyt-hdg-02-04"] | ||
2 | |||
3 | author: //div[@class="lyt-namearea"]/a | ||
4 | |||
5 | date: //div[@class="lyt-namearea"]/text() | ||
6 | |||
7 | body: //div[@class="articleContent"] | ||
8 | |||
9 | strip: //div[@id="tab-aside"] | ||
10 | |||
11 | test_url: http://news.mynavi.jp/articles/2011/12/07/nico/index.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.orf.at.txt b/inc/3rdparty/site_config/standard/news.orf.at.txt new file mode 100644 index 00000000..b60deea4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.orf.at.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | single_page_link: //div[@id='content']//p[@class='readMore']/a | ||
2 | |||
3 | title: //div[@class='hidden offscreen']/h2 | ||
4 | body: //div[@id="storyText"] | ||
5 | move_into(//div[@id='storyText']): //div[@class='fact'] | ||
6 | strip: //small[@class='credit'] | ||
7 | strip: //small[@class='caption'] | ||
8 | date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') | ||
9 | strip: //p[@class='toplink'] | ||
10 | |||
11 | test_url: http://news.orf.at/stories/2084731/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.rambler.ru.txt b/inc/3rdparty/site_config/standard/news.rambler.ru.txt new file mode 100644 index 00000000..743245f8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.rambler.ru.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //article | ||
2 | title: //h1 | ||
3 | author: //span[@class='b-article-source-dropdown'] | ||
4 | strip: //span[@class='b-article-photo-incut__source'] | ||
5 | strip: //a[@class='b-read-more b-read-more_bottom'] | ||
6 | |||
7 | |||
8 | tidy:no | ||
9 | test_url: http://news.rambler.ru/12972208/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.techmeme.com.txt b/inc/3rdparty/site_config/standard/news.techmeme.com.txt new file mode 100644 index 00000000..c80c3327 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.techmeme.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class='main']/div[@class='item'] | ||
2 | strip: //div[@class='right'] | ||
3 | |||
4 | test_url: http://news.techmeme.com/110516/fh-rip \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.yahoo.com.txt b/inc/3rdparty/site_config/standard/news.yahoo.com.txt new file mode 100644 index 00000000..5ee04049 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.yahoo.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | title: //h1[@class='headline'] | ||
3 | author: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//span[@class='fn'] | ||
4 | date: //cite[contains(@class, 'byline') and contains(@class, 'vcard')]//abbr/@title | ||
5 | body: //div[@id='mediaarticlelead']//a[@class='media'] | //div[contains(@class,'yom-art-content')] | ||
6 | #strip: //cite/abbr | ||
7 | strip_id_or_class: action | ||
8 | strip_id_or_class: prefetch | ||
9 | tidy: no | ||
10 | prune: no | ||
11 | |||
12 | test_url: http://news.yahoo.com/cold-la-nina-winter-forecast-west-coast-183535067.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/news.ycombinator.com.txt b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt new file mode 100644 index 00000000..0b01f8a1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/news.ycombinator.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | strip_comments: no | ||
2 | strip: //a[. = 'reply'] | ||
3 | test_url: http://news.ycombinator.com/item?id=1516461 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsbomb.gr.txt b/inc/3rdparty/site_config/standard/newsbomb.gr.txt new file mode 100644 index 00000000..0500890f --- /dev/null +++ b/inc/3rdparty/site_config/standard/newsbomb.gr.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | date: //meta[@name='og:article:published_time']/@value | ||
2 | |||
3 | body: //div[@class='itemIntroText' or @class='itemImageBlock' or @class='itemFullText'] | ||
4 | |||
5 | strip_id_or_class: itemImageGallery | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.newsbomb.gr/gossip/story/257234/i-proin-moy-protimoyse-na-serfarei-apo-to-na-kanoyme-sex \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsle.com.txt b/inc/3rdparty/site_config/standard/newsle.com.txt new file mode 100644 index 00000000..e500ddcc --- /dev/null +++ b/inc/3rdparty/site_config/standard/newsle.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | single_page_link: //iframe/@src | ||
2 | test_url: http://newsle.com/article/0/15831103/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsmill.se.txt b/inc/3rdparty/site_config/standard/newsmill.se.txt new file mode 100644 index 00000000..eb7d3350 --- /dev/null +++ b/inc/3rdparty/site_config/standard/newsmill.se.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //h1 | ||
2 | body: (//div[@class='articleImg']//img)[1] | //p[contains(@class, 'commentTextArticle') or contains(@class, 'articlePublished')] | //div[@id='articleLeftContent'] | ||
3 | author: //div[@class='byline']//a[contains(@href, '/user/')] | ||
4 | |||
5 | strip_id_or_class: facts | ||
6 | strip_id_or_class: articleBlogsHolder | ||
7 | strip_id_or_class: byline | ||
8 | |||
9 | prune: no | ||
10 | tidy: no | ||
11 | |||
12 | test_url: http://www.newsmill.se/artikel/2012/05/06/medielogiken-v-ger-tyngre-n-reportrarnas-sikter \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newsunspun.org.txt b/inc/3rdparty/site_config/standard/newsunspun.org.txt new file mode 100644 index 00000000..860ad66b --- /dev/null +++ b/inc/3rdparty/site_config/standard/newsunspun.org.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@class='right']//div[@class='articles'] | ||
2 | author: //div[@id='artinfo']//a[contains(@href, '/author/')] | ||
3 | strip: //div[@id='artinfo'] | ||
4 | strip: //table[//a[contains(@href, 'twitter.com')]] | ||
5 | strip_id_or_class: twitter | ||
6 | |||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | test_url: http://www.newsunspun.org/eotn/bbc-headline-change-iran-goes-from-not-building-to-undecided-on-nuclear-bomb \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/newyorker.com.txt b/inc/3rdparty/site_config/standard/newyorker.com.txt new file mode 100644 index 00000000..5624aa8c --- /dev/null +++ b/inc/3rdparty/site_config/standard/newyorker.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1[@id='articlehed'] | //h2[@id="articleintro"] | ||
2 | body: //div[@id='articletext'] | ||
3 | |||
4 | strip: //ul[@id="bc"] | //div[@id="yrail"] | //div[@class="entry-keywords"] | //div[@class="entry-categories"] | //div[@class="socialUtils"] | //div[@id="footer"] | ||
5 | |||
6 | date: //h4[@id='articleauthor']/span[@class='dd dds'] | ||
7 | date: //div[@id="pagebody"]/div[@class='hentry entry']/div[@class='published'] | ||
8 | |||
9 | single_page_link: //div[@class='paginationViewSinglePage']/a | ||
10 | test_url: http://www.newyorker.com/online/blogs/culture/2012/06/mug-shot-web-sites.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/next-gen.biz.txt b/inc/3rdparty/site_config/standard/next-gen.biz.txt new file mode 100644 index 00000000..806a3dfd --- /dev/null +++ b/inc/3rdparty/site_config/standard/next-gen.biz.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | # 2011-08-22 [carlo@...] initial version | ||
2 | # 2011-08-22 [carlo@...] removed comments & social links | ||
3 | |||
4 | tidy: no | ||
5 | |||
6 | single_page_link: //a[@class="single active"] | ||
7 | |||
8 | body: //div[@id="main"]//div[@class="content-region"]/article | ||
9 | author: //span[@class="author-name"] | ||
10 | date: //time/text() | ||
11 | |||
12 | strip_id_or_class: //aside[@id="related"] | ||
13 | strip: //footer | ||
14 | |||
15 | title: //h1 | ||
16 | test_url: http://www.next-gen.biz/reviews/deus-ex-human-revolution-review \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nfl.com.txt b/inc/3rdparty/site_config/standard/nfl.com.txt new file mode 100644 index 00000000..70f92473 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nfl.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | # doesn't look like selecting an attribute value works? | ||
2 | # author: //meta[@id="authorName"]@value | ||
3 | |||
4 | author: substring-after(//li[@id="article-hdr-meta-author"]/text(), "By ") | ||
5 | date: //abbr[@id="article-time"] | ||
6 | title: //div[@id="article-hdr"]/h1 | ||
7 | body: //div[@class="articleText"] | ||
8 | |||
9 | # strip miscellaneous teasers & etc | ||
10 | strip: //div[@class="removeformobile"] | ||
11 | test_url: http://www.nfl.com/news/story/09000d5d82388707/article/close-shave-chiefs-haley-perseveres-through-rough-start?module=HP11_content_stream \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt new file mode 100644 index 00000000..60834862 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ngm.nationalgeographic.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | next_page_link: //div[@class='nextpage_continue']/a | ||
2 | strip: //div[@class='nextpage_continue'] | ||
3 | strip_id_or_class: nextpage | ||
4 | title: //div[@class='article_title']//h1 | ||
5 | body: //div[@class='article_title']/.. | ||
6 | body: //div[@class='content'] | ||
7 | test_url: http://ngm.nationalgeographic.com/2012/02/tsunami/folger-text \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nhk.or.jp.txt b/inc/3rdparty/site_config/standard/nhk.or.jp.txt new file mode 100644 index 00000000..0a3bb913 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nhk.or.jp.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@id = 'news_right'] | ||
2 | test_url: http://www.nhk.or.jp/news/html/20110309/t10014559982000.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt new file mode 100644 index 00000000..409a8977 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nintendoworldreport.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | body: //div[@id="main"] | ||
2 | title: //div[@id="main"]/h3 | ||
3 | |||
4 | # Remove ‘Review’ and ‘Wii’. | ||
5 | strip: //div[@class="badge"] | ||
6 | |||
7 | # Remove duplicate title and country flag. | ||
8 | strip: //h3 | ||
9 | |||
10 | # Commented out below are attempts to extract the author and date, which did not work. | ||
11 | # author: //p[@class="extra "]/a | ||
12 | # date: //p[@class="extra "]/span[@class="when"] | ||
13 | test_url: http://www.nintendoworldreport.com/review/28400 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nojesguiden.se.txt b/inc/3rdparty/site_config/standard/nojesguiden.se.txt new file mode 100644 index 00000000..ae2d7e41 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nojesguiden.se.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | author: //span[@class='meta']/span[@class='username'] | ||
2 | body: //div[@class='article-content'] | ||
3 | |||
4 | strip_id_or_class: 'article-actions' | ||
5 | test_url: http://nojesguiden.se/blogg/maja-bredberg/maja-laser-tidningen-en-helt-vanlig-lordag-i \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/northumberlandview.ca.txt b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt new file mode 100644 index 00000000..04a0a34d --- /dev/null +++ b/inc/3rdparty/site_config/standard/northumberlandview.ca.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='pn-maincontent'] | ||
3 | strip_id_or_class: z-menu | ||
4 | strip_id_or_class: news_category | ||
5 | strip_id_or_class: news_title | ||
6 | strip_id_or_class: news_modify | ||
7 | strip_id_or_class: news_morearticlesincat | ||
8 | strip_id_or_class: ezc_comments | ||
9 | strip_comments: yes | ||
10 | |||
11 | test_url: http://www.northumberlandview.ca/index.php?module=news&func=display&sid=5972 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nplusonemag.com.txt b/inc/3rdparty/site_config/standard/nplusonemag.com.txt new file mode 100644 index 00000000..205b1af4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nplusonemag.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: /html/body/div[3]/div/div/h1 | ||
2 | |||
3 | body: //*[@id="article-body"] | ||
4 | |||
5 | |||
6 | test_url: http://nplusonemag.com/the-outskirts-of-progress \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/npr.org.txt b/inc/3rdparty/site_config/standard/npr.org.txt new file mode 100644 index 00000000..afab0eb3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/npr.org.txt | |||
@@ -0,0 +1,32 @@ | |||
1 | title: //div[contains(@class, 'storytitle')]//h1 | ||
2 | author: //p[@class="byline"]/span | ||
3 | body: //div[@id='storyspan02']//*[@class='duration' or @class='download' or contains(@class, 'photo')] | //div[@id='storytext'] | //div[@class='transcript'] | ||
4 | date: //meta[@name="date"]/@content | ||
5 | |||
6 | strip: //div[@class='enlarge_measure'] | ||
7 | strip: //div[@class='enlarge_html'] | ||
8 | strip: //a[@class='enlargeicon'] | ||
9 | strip: //div[contains(@class, 'bookedition')] | ||
10 | strip: //div[@class='textsize'] | ||
11 | strip: //ul[@class='genres'] | ||
12 | strip: //span[@class='bull'] | ||
13 | strip_id_or_class: secondary | ||
14 | strip_id_or_class: con1col | ||
15 | strip: //h3[@class='conheader'] | ||
16 | |||
17 | replace_string(<a name="more"> </a>): <!-- no more --> | ||
18 | replace_string(<div class="transcript">): <div class="transcript"><h2>Transcript</h2> | ||
19 | |||
20 | prune: no | ||
21 | strip://div[@class="ecommercepop"] | ||
22 | strip://span[@class="bull"] | ||
23 | strip://span[@class="purchaseLink"] | ||
24 | strip://div[@class="enlarge_html"] | ||
25 | strip://div[@class="enlarge_measure"] | ||
26 | strip://div[@class="container con1col small"] | ||
27 | strip://a[contains(@class, "enlargebtn")] | ||
28 | strip://div[contains(@class, "bucketwrap internallink")] | ||
29 | |||
30 | test_url: http://www.npr.org/blogs/thetwo-way/2011/07/12/137799301/sports-loses-its-escapist-gleam-in-a-summer-of-court-dates | ||
31 | test_url: http://www.npr.org/2012/07/04/156190948/feeling-under-siege-catholic-leadership-shifts-right | ||
32 | test_url: http://www.npr.org/2012/12/13/166480907/the-years-best-sci-fi-crosses-galaxies-and-genres \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nybooks.com.txt b/inc/3rdparty/site_config/standard/nybooks.com.txt new file mode 100644 index 00000000..8ecb8961 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nybooks.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | strip_id_or_class: sIFR-alternate | ||
2 | title: //div[@id='page-title-wrapper']/div[@id='page-title']/h2 | ||
3 | single_page_link: //a[contains(@href, 'pagination=false') and not(contains(@href, 'printpage=true'))] | ||
4 | |||
5 | body: //div[@id = 'article-body'] | ||
6 | strip_id_or_class:article-tools | ||
7 | strip_id_or_class:js_target | ||
8 | strip_id_or_class:marker | ||
9 | author://div[@id = 'page-title']/h3 | ||
10 | date://div[@id = 'page-title']/h5/a[starts-with(@href,'/issues/')] | ||
11 | |||
12 | |||
13 | test_url: http://www.nybooks.com/articles/archives/2012/feb/23/were-more-unequal-you-think/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nymag.com.txt b/inc/3rdparty/site_config/standard/nymag.com.txt new file mode 100644 index 00000000..f664c93d --- /dev/null +++ b/inc/3rdparty/site_config/standard/nymag.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h2[contains(@class, 'primary')] | ||
2 | body: //div[@id='story'] | ||
3 | author: //*[@class='by']/a | ||
4 | date: substring-after(//*[@class='date'], 'Published') | ||
5 | |||
6 | next_page_link: //div[@class='page-navigation']//li[@class='next']/a | ||
7 | |||
8 | test_url: http://nymag.com/news/features/wall-street-2012-2/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nyteknik.se.txt b/inc/3rdparty/site_config/standard/nyteknik.se.txt new file mode 100644 index 00000000..8c9e37f4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/nyteknik.se.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div[@class="article default-article"]/h1 | ||
2 | author: //p[@class="author"]/a[2] | ||
3 | |||
4 | # Article introduction: | ||
5 | #move_into(//div[@class="article-bread"]): //p[@class="lead"] | ||
6 | |||
7 | body: //div[@class="article-bread"] | ||
8 | test_url: http://www.nyteknik.se/nyheter/energi_miljo/energi/article3391426.ece \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nytimes.com.txt b/inc/3rdparty/site_config/standard/nytimes.com.txt new file mode 100644 index 00000000..8d9a794a --- /dev/null +++ b/inc/3rdparty/site_config/standard/nytimes.com.txt | |||
@@ -0,0 +1,36 @@ | |||
1 | title://h1[@class="articleHeadline"] | ||
2 | body://div[@id="article"] | ||
3 | strip_id_or_class:articleTools | ||
4 | strip_id_or_class:readerscomment | ||
5 | #strip://div[contains(@class, "articleInline runaroundLeft")] | ||
6 | strip: //div[contains(@class, "doubleRule")] | ||
7 | # strip image credit - appears as a bold heading | ||
8 | strip: //div[contains(@class, "articleInline")]//h6 | ||
9 | strip_id_or_class:enlargeThis | ||
10 | strip_id_or_class:pageLinks | ||
11 | strip_id_or_class:memberTools | ||
12 | strip_id_or_class:articleExtras | ||
13 | strip_id_or_class:singleAd | ||
14 | strip_id_or_class:byline | ||
15 | strip_id_or_class:dateline | ||
16 | strip_id_or_class:articleheadline | ||
17 | strip_id_or_class:articleBottomExtra | ||
18 | strip://a[contains(@href, 'nytimes.com/adx/')] | ||
19 | strip: //nyt_byline | ||
20 | strip: //span[contains(@class, 'slideshow') or contains(@class, 'video')] | ||
21 | strip: //p[@class='caption']//a[contains(., 'More Photos')] | ||
22 | |||
23 | prune: no | ||
24 | tidy: no | ||
25 | |||
26 | date: substring-after(//*[contains(@class, 'dateline')], 'Published:') | ||
27 | |||
28 | single_page_link: //link[contains(@href, 'pagewanted=all')] | ||
29 | #single_page_link: //a[contains(@href, 'pagewanted=all') and not(contains(@href, 'login'))] | ||
30 | |||
31 | strip://ul[@id = 'toolsList'] | ||
32 | strip://h6[@class = 'kicker'] | ||
33 | author:substring-after(//h6[@class='byline'],'By ') | ||
34 | |||
35 | test_url: http://www.nytimes.com/2011/07/24/books/review/an-academic-authors-unintentional-masterpiece.html | ||
36 | test_url: http://www.nytimes.com/2012/06/10/arts/television/the-newsroom-aaron-sorkins-return-to-tv.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/nzz.ch.txt b/inc/3rdparty/site_config/standard/nzz.ch.txt new file mode 100644 index 00000000..81faabae --- /dev/null +++ b/inc/3rdparty/site_config/standard/nzz.ch.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body: //*[@class='article-full'] | ||
2 | title: //h3 | ||
3 | strip: //header[@class='group'] | ||
4 | #body: //p[@class='lead'] | ||
5 | #move_into(//p[@class='lead']): //*[@class='article-full']/figure | ||
6 | #move_into(//p[@class='lead']): //div[@id='articleBodyText'] | ||
7 | strip: //div[@id='social-media-floater'] | ||
8 | strip: //div[@class='advertisement'] | ||
9 | strip: //div[@class='infobox'] | ||
10 | strip: //div[@id='articleComments'] | ||
11 | |||
12 | test_url: http://www.nzz.ch/wissen/wissenschaft/sonnenschutz-fuer-die-erde-1.17282213 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/observer.com.txt b/inc/3rdparty/site_config/standard/observer.com.txt new file mode 100644 index 00000000..e409ca2e --- /dev/null +++ b/inc/3rdparty/site_config/standard/observer.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //article[contains(@class, 'instapaper_body')] | ||
2 | |||
3 | prune: no | ||
4 | |||
5 | single_page_link: //a[@id='print-button'] | ||
6 | |||
7 | test_url: http://www.observer.com/2008/would-you-take-tumblr-man \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/off.net.mk.txt b/inc/3rdparty/site_config/standard/off.net.mk.txt new file mode 100644 index 00000000..a2fb5f21 --- /dev/null +++ b/inc/3rdparty/site_config/standard/off.net.mk.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[(@id = "content")] | ||
2 | strip: //div[(@class = "links-bar")] | ||
3 | strip: //div[(@class = "povrzani")] | ||
4 | strip: //div[(@class = "povrzani-dolu")] | ||
5 | strip: //div[(@class = "tags")] | ||
6 | strip: //h1[(@id = "page-title")] | ||
7 | test_url: http://off.net.mk/zhivot-i-zabava/gadzheti/dzhabe-raboti-dzhabe-ne-dishi \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/omaha.com.txt b/inc/3rdparty/site_config/standard/omaha.com.txt new file mode 100644 index 00000000..53db061d --- /dev/null +++ b/inc/3rdparty/site_config/standard/omaha.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class='story'] | ||
2 | test_url: http://www.omaha.com/article/20111031/BIGRED/111039984#pelini-tremendous-challenge-ahead-for-huskers \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/omiliya.org.txt b/inc/3rdparty/site_config/standard/omiliya.org.txt new file mode 100644 index 00000000..1b39b625 --- /dev/null +++ b/inc/3rdparty/site_config/standard/omiliya.org.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id='squeeze']/h1 | ||
2 | strip: //div[@id='squeeze']/h1 | ||
3 | author: //div[@class='submitted']/a | ||
4 | strip: //div[@class='submitted']/a | ||
5 | convert_double_br_tags: yes | ||
6 | |||
7 | |||
8 | |||
9 | test_url: http://omiliya.org/content/predchuvstvie.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/on.net.mk.txt b/inc/3rdparty/site_config/standard/on.net.mk.txt new file mode 100644 index 00000000..be7a17ef --- /dev/null +++ b/inc/3rdparty/site_config/standard/on.net.mk.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[(@class = "statija")] | ||
2 | strip: //div[(@class = "relatedBlock")] | ||
3 | strip: //div[(@class = "swftools")] | ||
4 | strip: //table[(@class = "links")] | ||
5 | test_url: http://on.net.mk/video/na-trkala/lamborghini-aventador-avionot-shto-ne-leta \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/online.wsj.com.txt b/inc/3rdparty/site_config/standard/online.wsj.com.txt new file mode 100644 index 00000000..edb52855 --- /dev/null +++ b/inc/3rdparty/site_config/standard/online.wsj.com.txt | |||
@@ -0,0 +1,23 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | body: //div[@id='article_story_body'] | ||
3 | |||
4 | author: //h3[@class='byline']/a | ||
5 | # for slid show content | ||
6 | body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1] | ||
7 | date: //li[@class='dateStamp']/small | ||
8 | |||
9 | strip_id_or_class: insetFullBracket | ||
10 | strip_id_or_class: insettipBox | ||
11 | #strip_id_or_class: legacyInset | ||
12 | strip_id_or_class: recipeACShopAndBuyText | ||
13 | |||
14 | strip: //div[contains(@class, 'insetContent')]//cite | ||
15 | strip: //*[contains(@style, 'visibility: hidden;')] | ||
16 | strip: //div[contains(@class, 'insetContent') and not(contains(@class, 'image'))] | ||
17 | |||
18 | prune: no | ||
19 | tidy: no | ||
20 | |||
21 | test_url: http://online.wsj.com/article/SB10001424052970203363504577185322849515102.html | ||
22 | # slide show | ||
23 | test_url: http://online.wsj.com/article/SB10001424052970204791104577110550376458164.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/onlinewelten.com.txt b/inc/3rdparty/site_config/standard/onlinewelten.com.txt new file mode 100644 index 00000000..1609fa83 --- /dev/null +++ b/inc/3rdparty/site_config/standard/onlinewelten.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@id='news_detail']//div[@class='contents clearfix'] | ||
2 | test_url: http://www.onlinewelten.com/games/aliens-colonial-marines/news/offizielle-spiel-ankuendigung-nintendos-wii-u-103690/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/onstartups.com.txt b/inc/3rdparty/site_config/standard/onstartups.com.txt new file mode 100644 index 00000000..cccce8cd --- /dev/null +++ b/inc/3rdparty/site_config/standard/onstartups.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | strip: //div[@id="dnn_LeftPane"] | //div[@id="dnn_ContentPane"]//h1 | //div[@id="dnn_ContentPane"]//p[@class="Normal"] | //div[@class="Submissions"] | //div[@id="listing"]//h3 | //div[@id="listing"][2] | //div[@id="emart-fail"] | //div[@id="emart-success"] | //div[@id="emart-form"] | ||
2 | test_url: http://onstartups.com/tabid/3339/bid/37737/Secrets-Of-Freemium-Pricing-Make-The-Cheapskates-Pay.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/opensource.org.txt b/inc/3rdparty/site_config/standard/opensource.org.txt new file mode 100644 index 00000000..2bd3ccdb --- /dev/null +++ b/inc/3rdparty/site_config/standard/opensource.org.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class='content clear-block'] | ||
2 | test_url: http://opensource.org/node/537 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/openthemagazine.com.txt b/inc/3rdparty/site_config/standard/openthemagazine.com.txt new file mode 100644 index 00000000..510eb252 --- /dev/null +++ b/inc/3rdparty/site_config/standard/openthemagazine.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@id = 'content-inner'] | ||
2 | strip: //div[@id = 'content-bottom'] | ||
3 | strip_id_or_class: print_sharebutton | ||
4 | test_url: http://openthemagazine.com/article/nation/sania-vs-saina \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/openwebx.org.txt b/inc/3rdparty/site_config/standard/openwebx.org.txt new file mode 100644 index 00000000..b7663540 --- /dev/null +++ b/inc/3rdparty/site_config/standard/openwebx.org.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class="chapter"] | ||
2 | prune: no | ||
3 | tidy: no | ||
4 | test_url: http://openwebx.org/docs/springext.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/orf.at.txt b/inc/3rdparty/site_config/standard/orf.at.txt new file mode 100644 index 00000000..ff16ca79 --- /dev/null +++ b/inc/3rdparty/site_config/standard/orf.at.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | single_page_link: //div[@id='content']//p[@class='readMore']/a | ||
2 | |||
3 | title: //div[@class='hidden offscreen']/h2 | ||
4 | body: //div[@id="storyText"] | ||
5 | move_into(//div[@id='storyText']): //div[@class='fact'] | ||
6 | strip: //small[@class='credit'] | ||
7 | strip: //small[@class='caption'] | ||
8 | date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') | ||
9 | strip: //p[@class='toplink'] | ||
10 | |||
11 | test_url: http://orf.at/stories/2084731/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/origo.hu.txt b/inc/3rdparty/site_config/standard/origo.hu.txt new file mode 100644 index 00000000..0dedac3d --- /dev/null +++ b/inc/3rdparty/site_config/standard/origo.hu.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | title: /html/body/div[5]/div[2]/h1 | ||
2 | body: /html/body/div[5]/div[2]/div[6]/div/div | ||
3 | body: //*[@id="cikk"] | ||
4 | strip: /html/body/div[5]/div[2]/h1 | ||
5 | strip: /html/body/div[5]/div[2]/div[4] | ||
6 | strip: //*[@id="multidoboz"] | ||
7 | strip: /html/body/div[5]/div[2]/div[6]/div[2] | ||
8 | strip: //*[@id="comments"] | ||
9 | strip: //*[@id="rating-doboz"] | ||
10 | strip: /html/body/div[5]/div[2]/div[10] | ||
11 | strip: /html/body/div[5]/div[2]/a | ||
12 | strip: /html/body/div[5]/div[2]/span | ||
13 | strip: /html/body/div[5]/div[2]/span[2] | ||
14 | strip: /html/body/div[5]/div[2]/span[3] | ||
15 | strip: /html/body/div[5]/div[2]/span[4] | ||
16 | strip: /html/body/div[5]/div[2]/span[5] | ||
17 | strip: //*[@id="kommentszam"] | ||
18 | test_url: http://www.origo.hu/itthon/20110119-lemondott-a-kulturaert-felelos-helyettes-allamtitkar.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt new file mode 100644 index 00000000..f03c9551 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pakistantvdekho.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | #body: (//div[@class='ftr-yt-vid'])[1] | ||
2 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
3 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | #replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" | ||
9 | #replace_string(</iframe>): </iframe> </div> | ||
10 | |||
11 | test_url: http://pakistantvdekho.com/showthread.php?647741-Sitam-Gar-by-HUM-TV-Episode-07&p=659080#post659080 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pandagon.net.txt b/inc/3rdparty/site_config/standard/pandagon.net.txt new file mode 100644 index 00000000..d0d2a5d0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pandagon.net.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title://h2 | ||
2 | author://div[@class="posted"]/a | ||
3 | date://div[@class="date"] | ||
4 | body://div[@class="entry"] | ||
5 | test_url: http://pandagon.net/index.php/site/its-okay-to-admit-that-mass-hysteria-is-real \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pandodaily.com.txt b/inc/3rdparty/site_config/standard/pandodaily.com.txt new file mode 100644 index 00000000..7d1c2183 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pandodaily.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | tidy: no | ||
2 | body: //article | ||
3 | date: //time/@datetime | ||
4 | strip_id_or_class: sharedaddy | ||
5 | test_url: http://pandodaily.com/2012/01/19/ibooks-author-is-not-going-to-hurt-publishers-it-might-even-help-them/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/panic.com.txt b/inc/3rdparty/site_config/standard/panic.com.txt new file mode 100644 index 00000000..0361f06d --- /dev/null +++ b/inc/3rdparty/site_config/standard/panic.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='entry'] | ||
2 | date: //h3[@class='postDate'] | ||
3 | test_url: http://www.panic.com/blog/2011/07/panic-is-ready-for-lion/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/parislemon.com.txt b/inc/3rdparty/site_config/standard/parislemon.com.txt new file mode 100644 index 00000000..a3bd4b0f --- /dev/null +++ b/inc/3rdparty/site_config/standard/parislemon.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2[@class="post-title"] | ||
2 | author: substring-after(//div[@class="description"],'Words by ') | ||
3 | date: //li[@class="date"] | ||
4 | strip: //h2[@class="post-title"] | ||
5 | body: //div[@class="copy"] | ||
6 | test_url: http://parislemon.com/post/13462682469/the-15-inch-air \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/parliament.uk.txt b/inc/3rdparty/site_config/standard/parliament.uk.txt new file mode 100644 index 00000000..478a669f --- /dev/null +++ b/inc/3rdparty/site_config/standard/parliament.uk.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='news-article'] | ||
3 | test_url: http://www.parliament.uk/business/committees/committees-a-z/commons-select/backbench-business-committee/news/guidance-for-e-petitioners/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pastebin.com.txt b/inc/3rdparty/site_config/standard/pastebin.com.txt new file mode 100644 index 00000000..89d13b2a --- /dev/null +++ b/inc/3rdparty/site_config/standard/pastebin.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title://div[@class="paste_box_line1"]/h1 | ||
2 | author://div[@class="paste_box_line2"]/a | ||
3 | body://div[@class="text"] | ||
4 | date:substring-before(substring-after(//div[@class="paste_box_line2"],'|'),'|') | ||
5 | dissolve://li | ||
6 | test_url: http://pastebin.com/LAykd1es \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt new file mode 100644 index 00000000..40a049e0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pastepad.fivefilters.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='ff-pastepad-content'] | ||
3 | prune: no | ||
4 | # todo: add test file | ||
5 | test_url: http://pastepad.fivefilters.org/test.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pathawks.com.txt b/inc/3rdparty/site_config/standard/pathawks.com.txt new file mode 100644 index 00000000..1a4cd25b --- /dev/null +++ b/inc/3rdparty/site_config/standard/pathawks.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title://*[contains(@class,'post-title')] | ||
2 | body://div[contains(@class,'post-body')] | ||
3 | body://div[contains(@class,'entry-content')] | ||
4 | strip_comments:no | ||
5 | prune:no | ||
6 | convert_double_br_tags:yes | ||
7 | tidy:yes | ||
8 | test_url: http://www.pathawks.com/2011/06/crazyawesomecoloradotrip.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pcast.me.txt b/inc/3rdparty/site_config/standard/pcast.me.txt new file mode 100644 index 00000000..ae38e8e1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pcast.me.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | prune: no | ||
2 | test_url: http://pcast.me/shownotes/get/16t \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pcmag.com.txt b/inc/3rdparty/site_config/standard/pcmag.com.txt new file mode 100644 index 00000000..cebea4d7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pcmag.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | prune:yes | ||
2 | |||
3 | date://*[contains(@class,'date')] | ||
4 | |||
5 | body://div[contains(@id,'content')] | ||
6 | |||
7 | next_page_link://a[contains(.,'Next >')] | ||
8 | |||
9 | strip_id_or_class:sponsors | ||
10 | test_url: http://www.pcmag.com/article2/0,2817,2401676,00.asp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pcworld.com.txt b/inc/3rdparty/site_config/standard/pcworld.com.txt new file mode 100644 index 00000000..30ccbb5f --- /dev/null +++ b/inc/3rdparty/site_config/standard/pcworld.com.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | title: //div[@class='articleHead']//h1 | ||
2 | author: //div[@class="author-name"]/a[1] | ||
3 | body: //div[@class="main"] | ||
4 | |||
5 | # remove 'From the Lab' and 'Recent posts' text | ||
6 | strip: //div[@class='blogLabel'] | ||
7 | |||
8 | # remove byline and meta info | ||
9 | strip: //h1 | ||
10 | strip: //div[@class="article-meta"] | ||
11 | strip: //div[@class="author-info"] | ||
12 | |||
13 | #strip tags and categories | ||
14 | strip: //div[@class="department"] | ||
15 | |||
16 | #strip product cap links | ||
17 | strip: //div[@class="cap-main"] | ||
18 | strip: //div[@id="compare-lede"] | ||
19 | test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/penny-arcade.com.txt b/inc/3rdparty/site_config/standard/penny-arcade.com.txt new file mode 100644 index 00000000..f97615f1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/penny-arcade.com.txt | |||
@@ -0,0 +1,23 @@ | |||
1 | # 2012-01-14 carlo@... - fixed title, body; added author, date | ||
2 | |||
3 | title: //div[@class="title"]/h2/a | ||
4 | # body: //div[@class="post"] | ||
5 | # author: //p[@class="iconEmail"]/a | ||
6 | # date: //p[@class="iconDate"] | ||
7 | |||
8 | # 1/24/2013 yosoyju - fixed author, date, and body, added support for PA Report | ||
9 | |||
10 | # Penny Arcade | ||
11 | |||
12 | author: //li[@class="iconEmail"]/a | ||
13 | date: //li[@class="iconDate"] | ||
14 | body: //div[@class="body"] | ||
15 | |||
16 | # PA Report | ||
17 | |||
18 | author: //div[@class="meta"]/p/a | ||
19 | date: substring-after(//div[@class="meta"]/p, '/ ') | ||
20 | title: substring-after(//title, '- ') | ||
21 | |||
22 | test_url: http://penny-arcade.com/2012/01/13/i-put-some-news-in-your-news | ||
23 | test_url: http://penny-arcade.com/report/editorial-article/the-dystopian-future-of-casual-games-personalized-targeted-pricing-and-mech \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pentaxforums.com.txt b/inc/3rdparty/site_config/standard/pentaxforums.com.txt new file mode 100644 index 00000000..00f61a48 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pentaxforums.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | next_page_link: //a[contains(., 'Next:')] | ||
2 | test_url: http://www.pentaxforums.com/reviews/long-exposure-handhelds/introduction.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt new file mode 100644 index 00000000..a369fd65 --- /dev/null +++ b/inc/3rdparty/site_config/standard/philadelphiaeagles.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | prune: no | ||
2 | tidy: no | ||
3 | body: //div[@class='article-content'] | ||
4 | dissolve: //nobr/a | ||
5 | dissolve: //nobr | ||
6 | test_url: http://www.philadelphiaeagles.com/news/article-1/Jacksons-Light-Shined-On-Sunday-Night/51a862de-42b4-40f1-a5a8-ba0fb8a435b7 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/philly.com.txt b/inc/3rdparty/site_config/standard/philly.com.txt new file mode 100644 index 00000000..41318f63 --- /dev/null +++ b/inc/3rdparty/site_config/standard/philly.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | author: //p[@class='byline']/span | ||
3 | body: //@id='body-content' | ||
4 | date: //div[@class='article_timestamp']/span | ||
5 | |||
6 | strip: //@class=b-group | ||
7 | strip: //*[contains(@style, 'none')] | ||
8 | strip: //a[contains(@href, 'comments')] | ||
9 | strip: //*[contains(@class, 'comment')] | ||
10 | test_url: http://www.philly.com/philly/sports/eagles/20120127_Ohio_State_s_Posey_didn_t_waste_time_lost_to_suspension.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt new file mode 100644 index 00000000..4e2ccb01 --- /dev/null +++ b/inc/3rdparty/site_config/standard/photo.tutsplus.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | author: substring-before(//div[@class='post_meta'],' on') | ||
2 | date: substring-after(substring-before(//div[@class='post_meta'],'with'),' on') | ||
3 | title: //h1[class='post_title'] | ||
4 | body: //div[@class='article'] | ||
5 | |||
6 | test_url: http://photo.tutsplus.com/articles/news/a-brilliant-beginners-guide-to-architectural-photography/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/php.net.txt b/inc/3rdparty/site_config/standard/php.net.txt new file mode 100644 index 00000000..7c57a84d --- /dev/null +++ b/inc/3rdparty/site_config/standard/php.net.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@id='content'] | ||
2 | strip_id_or_class: manualnavbar | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.php.net/manual/en/migration5.incompatible.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/physicstoday.org.txt b/inc/3rdparty/site_config/standard/physicstoday.org.txt new file mode 100644 index 00000000..a8163995 --- /dev/null +++ b/inc/3rdparty/site_config/standard/physicstoday.org.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //div[@class='abstitle']//h1 | ||
2 | author: //div[@class='authorList'] | ||
3 | body: //div[@id='fulltext_body'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.physicstoday.org/resource/1/phtoad/v64/i10/p48_s1?bypassSSO=1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pitchfork.com.txt b/inc/3rdparty/site_config/standard/pitchfork.com.txt new file mode 100644 index 00000000..3decc538 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pitchfork.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | title:concat(//h1,' - ',//h2,' - ',//h3) | ||
2 | author://address | ||
3 | date://span[@class='pub-date'] | ||
4 | body://div[@id='main'] | ||
5 | single_page_link://link[@rel='canonical'] | ||
6 | strip://div[@class='info'] | ||
7 | strip_id_or_class:'object-grid related-content' | ||
8 | strip_id_or_class:'object-prevnext' | ||
9 | strip_id_or_class:'object-header' | ||
10 | strip_id_or_class:'source' | ||
11 | strip_id_or_class:'label' | ||
12 | strip_id_or_class:'title' | ||
13 | dissolve://ul | ||
14 | strip://li[@class='next'] | ||
15 | strip://li[@class='prev'] | ||
16 | test_url: http://pitchfork.com/features/why-we-fight/8796-on-the-far-slope-of-the-uncanny-valley/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittnews.com.txt b/inc/3rdparty/site_config/standard/pittnews.com.txt new file mode 100644 index 00000000..92777073 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pittnews.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h2[@class='post-title'] | ||
2 | author: substring-before(substring-after(//h3[@class='post-byline'],'By:'),'/') | ||
3 | date: substring-before(substring-after(//p[@class='post-details'],'Posted on '),'in') | ||
4 | strip: //h2[@class='post-title'] | ||
5 | strip: //p[@class='post-details'] | ||
6 | strip: //h3[@class='post-byline'] | ||
7 | body: //div[@id='content'] | ||
8 | test_url: http://pittnews.com/newsstory/mens-basketball-pitt-recruit-robinson-to-bring-leadership/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt new file mode 100644 index 00000000..824cb064 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pittsburgh.pirates.mlb.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | title: substring-before(//title,'pirates.com') | ||
2 | date: //span[@class='timeStamp'] | ||
3 | author: substring-before(substring-after(//div[@class='byLine'],'By'),'/') | ||
4 | body: //div[@id='article'] | ||
5 | #strip: //div[@class='inner'] | ||
6 | strip: //div[@id='article_head'] | ||
7 | strip: //p[@class='tagLine'] | ||
8 | strip: //div[@id='article_related_links'] | ||
9 | strip: //div[@id='article_related_mlb'] | ||
10 | strip: //div[@id='article_related_club'] | ||
11 | strip: //span[@class='more'] | ||
12 | strip: //div[@class='article_component'] | ||
13 | strip: //span[@class='screen_reader'] | ||
14 | strip: //ul[@class='columnists_blurb'] | ||
15 | test_url: http://pittsburgh.pirates.mlb.com/news/article.jsp?ymd=20120330&content_id=27759040&vkey=news_pit&c_id=pit \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittsburghlive.com.txt b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt new file mode 100644 index 00000000..b3e66166 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pittsburghlive.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: substring-before(//title,'- Pittsburgh Tribune') | ||
2 | author: substring-before(substring-after(//div[@class='byline'],'By '),',') | ||
3 | date: substring-after(substring-after(//div[@class='byline'],','),',') | ||
4 | body: //div[@id='storyBody'] | ||
5 | strip: //div[@class='morestories'] | ||
6 | dissolve: //p[@class='subheader'] | ||
7 | test_url: http://www.pittsburghlive.com/x/pittsburghtrib/sports/columnists/s_785654.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt new file mode 100644 index 00000000..dd715d8f --- /dev/null +++ b/inc/3rdparty/site_config/standard/pittsburghmagazine.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //title | ||
2 | author: substring-after(//div[@class='by-line'],'BY') | ||
3 | |||
4 | body: //div[@id='article-body'] | ||
5 | |||
6 | strip: //div[@class='by-line'] | ||
7 | strip: //div[@id='article-body']/h1 | ||
8 | test_url: http://www.pittsburghmagazine.com/Pittsburgh-Magazine/May-2012/Verde-Lights-the-Night/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt new file mode 100644 index 00000000..6113b96e --- /dev/null +++ b/inc/3rdparty/site_config/standard/pittsburghpanthers.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //span[@class='StoryHeadline'] | ||
2 | strip: //div[@class='fivevert'] | ||
3 | body: //div[@id='Content'] | ||
4 | test_url: http://www.pittsburghpanthers.com/sports/m-baskbl/recaps/031412aaa.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pittscriptblog.com.txt b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt new file mode 100644 index 00000000..3936310d --- /dev/null +++ b/inc/3rdparty/site_config/standard/pittscriptblog.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1[@class='articletitle'] | ||
2 | author: substring-after(//span[@class='author'],'by') | ||
3 | date: //span[@class='created'] | ||
4 | body: //div[@class='article'] | ||
5 | strip: //div[@class='headline'] | ||
6 | strip: //p[@class='articleinfo'] | ||
7 | #dissolve: //p[@class='subheader'] | ||
8 | test_url: http://www.pittscriptblog.com/2012-articles/march/2012-football-opponents-set-and-the-attendance-dilemma.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/playboy.com.txt b/inc/3rdparty/site_config/standard/playboy.com.txt new file mode 100644 index 00000000..07b347a0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/playboy.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | author: //article//*[@class="author"] | ||
2 | date: //article//*[@class="publication-date"] | ||
3 | body: //article | ||
4 | strip: //article/header | ||
5 | strip: //article/section | ||
6 | test_url: http://www.playboy.com/playground/view/playboy-interview-jon-hamm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/plus.google.com.txt b/inc/3rdparty/site_config/standard/plus.google.com.txt new file mode 100644 index 00000000..50a5dbf5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/plus.google.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | body: //div[@id='contentPane']//div[@class='vg'] | ||
2 | body: //div[@id='contentPane'] | ||
3 | |||
4 | # Grab the author by finding the first profile pic, then backing up a node and getting the title of <a> tag which will be the author hopefully. Sorry can't test this due to parser errors, thanks google :( | ||
5 | |||
6 | author: //div[@id='contentPane']//img[contains(@alt, 'profile photo')][1]/../@title | ||
7 | |||
8 | |||
9 | strip: //*[@title="People who +1'd this"]/../.. | ||
10 | strip: //*[contains(@class, 'a-b-f-i-Hg-Uf')] | ||
11 | strip: //*[@role='menu'] | ||
12 | strip: //img[contains(@alt, 'profile photo')] | ||
13 | strip: //*[@class='a-f-i-Ad'] | ||
14 | |||
15 | tidy: no | ||
16 | |||
17 | test_url: http://plus.google.com/u/0/117840649766034848455/posts/FddaP6jeCqp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/plzkthxbai.com.txt b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt new file mode 100644 index 00000000..bb9be0a9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/plzkthxbai.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //h2[@class='jcw-pagetitle' | ||
2 | date: //p[@class='postinfo'] | ||
3 | body: //div[@class='contenttext'] | ||
4 | test_url: http://plzkthxbai.com/blog/2011/06/28/1password-and-internet-security/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt new file mode 100644 index 00000000..880311d3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/pogue.blogs.nytimes.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@id="content"]/div[1] | ||
2 | |||
3 | title: //h1[@class="entry-title"] | ||
4 | test_url: http://pogue.blogs.nytimes.com/2011/05/12/the-future-of-skype/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/politico.com.txt b/inc/3rdparty/site_config/standard/politico.com.txt new file mode 100644 index 00000000..121fd5b9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/politico.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title://div[contains(@class, "article")]/h1 | ||
2 | body://div[contains(@class,"story-text")] | ||
3 | |||
4 | # Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] | ||
5 | |||
6 | next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a | ||
7 | date://meta[@name="publish_date"]/@content | ||
8 | |||
9 | strip://div[contains(@class, "breadcrumbs")] | ||
10 | strip://a[contains(@class, "hidden")] | ||
11 | strip://div[contains(@class, "story-embed")] | ||
12 | strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. | ||
13 | test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/politifact.com.txt b/inc/3rdparty/site_config/standard/politifact.com.txt new file mode 100644 index 00000000..fd247b5b --- /dev/null +++ b/inc/3rdparty/site_config/standard/politifact.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@id="content"] | ||
2 | |||
3 | strip: //div[@class="pfcontentmid"]/div[position()>4]|//div[@class="pfad"] | ||
4 | test_url: http://www.politifact.com/truth-o-meter/statements/2011/may/30/barbara-boxer/barbara-boxer-says-medicare-overhead-far-lower-pri/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/politiken.dk.txt b/inc/3rdparty/site_config/standard/politiken.dk.txt new file mode 100644 index 00000000..8deecbca --- /dev/null +++ b/inc/3rdparty/site_config/standard/politiken.dk.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | # 21/10-2011: | ||
2 | # Added Author+Date | ||
3 | # Remove fakta-boks if found | ||
4 | # Deleted 'Læs også...' filter | ||
5 | # - Change in markup caused it to strip too much. | ||
6 | |||
7 | author://span[@class='autor-name'] | ||
8 | date:substring-after(//div[@class='art-created'], ' ') | ||
9 | title: //h1[contains(@class, 'stor-type')] | ||
10 | body: //div[@id='art-body'] | ||
11 | strip: //div[@class='art-fakta article-box'] | ||
12 | |||
13 | test_url: http://politiken.dk/kultur/boger/skonlitteratur_boger/ECE1426386/makabre-tegneserie-zombier-aeder-alt-levende/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/popularmechanics.com.txt b/inc/3rdparty/site_config/standard/popularmechanics.com.txt new file mode 100644 index 00000000..85b7656b --- /dev/null +++ b/inc/3rdparty/site_config/standard/popularmechanics.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | next_page_link: //div[@id='longPagination']/a[@class='next'] | ||
2 | |||
3 | title: //div[@id='contentHeader']//h1 | ||
4 | |||
5 | body: //div[@id='articleBody'] | ||
6 | # this is so sad | ||
7 | body: //div[@id='intelliTXT'] | ||
8 | test_url: http://www.popularmechanics.com/technology/aviation/crashes/what-really-happened-aboard-air-france-447-6611877 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/positioningmag.com.txt b/inc/3rdparty/site_config/standard/positioningmag.com.txt new file mode 100644 index 00000000..21cd833c --- /dev/null +++ b/inc/3rdparty/site_config/standard/positioningmag.com.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | title: //div[@id="newsDetailTitle"] | ||
2 | author: //span[@id="showAuthor"] | ||
3 | date: //span[@id="showRefDate"] | ||
4 | |||
5 | strip: //div[@id="breadcrumbs"] | ||
6 | strip: //span[@id="PageTitle"] | ||
7 | strip: //div[@id="newsDetailAuthorPublish"] | ||
8 | |||
9 | strip: //div[@class="leadPix"] | ||
10 | |||
11 | strip: //span[@id="ctl00_PageTitle"] | ||
12 | strip: //div[@id="newsDetailTitle"] | ||
13 | convert_double_br_tags:yes | ||
14 | |||
15 | strip: //div[@id="newsDetailCredential"] | ||
16 | strip: //div[@id="sidebar2"] | ||
17 | strip: //div[@id="footer"] | ||
18 | |||
19 | test_url: http://www.positioningmag.com/magazine/details.aspx?id=41083 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/post-gazette.com.txt b/inc/3rdparty/site_config/standard/post-gazette.com.txt new file mode 100644 index 00000000..1ea945a0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/post-gazette.com.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | title: //div[@class='story_headline'] | ||
2 | author: substring-before(substring-after(//div[@class='story_byline'],'By'),'/') | ||
3 | date: //div[@class='story_lastupdate'] | ||
4 | body: //div[@id='story'] | ||
5 | strip: //div[@class='story_byline'] | ||
6 | strip: //div[@class='story_lastupdate'] | ||
7 | strip: //div[@class='story_headline'] | ||
8 | strip: //div[@id='abuse'] | ||
9 | strip: //h2 | ||
10 | strip: //div[@class='pagenumbers_wrap'] | ||
11 | strip: //ul[@class='pagenumbers'] | ||
12 | strip: //div[starts-with(., 'To report inappropriate comments')] | ||
13 | |||
14 | strip_id_or_class: story_share | ||
15 | strip_id_or_class: OUTBRAIN | ||
16 | strip_id_or_class: story_box_right | ||
17 | strip: //div[a[@href='http://www.post-gazette.com/pg/12062/1213990-42.stm']] | ||
18 | strip: //ul[@id='pikame']/li[position()>1] | ||
19 | |||
20 | prune: no | ||
21 | tidy: no | ||
22 | |||
23 | single_page_link: //a[contains(@href, '?p=0')] | ||
24 | |||
25 | test_url: http://www.post-gazette.com/stories/sports/penguins/pens-crosby-expects-to-return-thursday-226648/ | ||
26 | test_url: http://www.post-gazette.com/stories/sports/pirates/pirates-fork-over-changes-for-fans-at-pnc-park-629789 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/posta.com.tr.txt b/inc/3rdparty/site_config/standard/posta.com.tr.txt new file mode 100644 index 00000000..86cb5d0b --- /dev/null +++ b/inc/3rdparty/site_config/standard/posta.com.tr.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | title: //div[@id='divAdnetKeyword']/h1 | ||
2 | body: //div[@id='_middle_content_bottom'] | ||
3 | |||
4 | wrap_in(fieldset)://div[@id='_middle_content_bottom_child2']/img | ||
5 | |||
6 | strip: //div[@id='_middle_content_bottom_child1'] | ||
7 | strip: //div[@id='_middle_content_bottom_child4'] | ||
8 | strip: //div[@class='cls'] | ||
9 | strip: //div[@class='iphoneBox'] | ||
10 | strip: //ul[@class='ilgiliHaber'] | ||
11 | strip: //div[@class='yorumlar'] | ||
12 | strip: //div[@class='kategoriler'] | ||
13 | strip: //div[@class='textSize'] | ||
14 | strip: //span[@class='tarih'] | ||
15 | test_url: http://www.posta.com.tr/yasam/teknoloji/HaberDetay/Fedailer_Istanbul_da.htm?ArticleID=101044 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prb.org.txt b/inc/3rdparty/site_config/standard/prb.org.txt new file mode 100644 index 00000000..7f7a5031 --- /dev/null +++ b/inc/3rdparty/site_config/standard/prb.org.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1 | ||
2 | date: /html/head/meta[@name="date"]/@content | ||
3 | body: //div[@id="featuredlinksbox"] | ||
4 | strip: //div[@class="relatedbox"] | ||
5 | strip: //h1 | ||
6 | strip: //br | ||
7 | strip_image_src: "/images" | ||
8 | test_url: http://www.prb.org/Journalists/Webcasts/2011/military-families.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt new file mode 100644 index 00000000..906c27a0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/prog21.dadgum.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='left'] | ||
3 | strip: //h1 | ||
4 | convert_double_br_tags: yes | ||
5 | strip_id_or_class: entry-footer | ||
6 | strip: //h1[. = 'Previously']/following::* | ||
7 | author: string('James Hague') | ||
8 | date: //div[@class = 'entry-footer']/text() | ||
9 | test_url: http://prog21.dadgum.com/105.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prolost.com.txt b/inc/3rdparty/site_config/standard/prolost.com.txt new file mode 100644 index 00000000..cef811d4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/prolost.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class='body'] | ||
2 | title: //h2[@class='title'] | ||
3 | date: //span[@class='posted-on'] | ||
4 | test_url: http://prolost.com/blog/2011/10/13/real-men-comp-with-film.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/propublica.org.txt b/inc/3rdparty/site_config/standard/propublica.org.txt new file mode 100644 index 00000000..11e63bd0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/propublica.org.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h1[@class="article-title"] | ||
2 | author: //meta[@name="author"]/@content | ||
3 | body: //div[@class="article-full"] | ||
4 | strip_id_or_class: sidebar_inject | ||
5 | strip_id_or_class: callout | ||
6 | strip_id_or_class: content-inset | ||
7 | strip_id_or_class: byline-block | ||
8 | strip_id_or_class: photo-caption | ||
9 | strip_id_or_class: foot-tools | ||
10 | |||
11 | test_url: http://www.propublica.org/article/pardon-applicants-benefit-from-friends-in-high-places \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prosa.dk.txt b/inc/3rdparty/site_config/standard/prosa.dk.txt new file mode 100644 index 00000000..dedd33d3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/prosa.dk.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | author: //p[@class='name'] | ||
2 | date: substring-before(//p[@class='date'], ' | ') | ||
3 | body: //div[@class='news_single_item'] | ||
4 | test_url: http://www.prosa.dk/aktuelt/nyhed/artikel/internetaktivisten-uden-maske/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt new file mode 100644 index 00000000..19059c4a --- /dev/null +++ b/inc/3rdparty/site_config/standard/prospectmagazine.co.uk.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | #basics | ||
2 | author: (//div[contains(@class,'author')])[1] | ||
3 | date: substring-before(//a[@class='issue'], '—') | ||
4 | #body://div[@class = 'entry'] | ||
5 | # use this until move_into support is ready | ||
6 | body: //div[@class = 'entry' or @class='standfirst' or @class='lead_image'] | ||
7 | |||
8 | #moves header image and tagline into body | ||
9 | move_into(//div[@class='entry']/div)://div[@class = 'lead_image'] | ||
10 | move_into(//div[@class='entry']/div)://div[@class = 'standfirst'] | ||
11 | |||
12 | |||
13 | # moves author info to end of text | ||
14 | move_into(//p[strong[string(.) = 'Follow Prospect on Twitter']])://div[@id='sidebar_content']/p/em | ||
15 | |||
16 | prune: no | ||
17 | |||
18 | # strips social links | ||
19 | strip_id_or_class:login-status | ||
20 | strip_id_or_class:shareinpost | ||
21 | strip_id_or_class:content_subscribe | ||
22 | strip_id_or_class:postinfo | ||
23 | strip_id_or_class:postutils | ||
24 | strip_id_or_class:comments | ||
25 | strip://strong[string(.) = 'Follow Prospect on Twitter'] | ||
26 | test_url: http://www.prospectmagazine.co.uk/2011/07/postmodernism-is-dead-va-exhibition-age-of-authenticism/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/psychologytoday.com.txt b/inc/3rdparty/site_config/standard/psychologytoday.com.txt new file mode 100644 index 00000000..3da3cea3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/psychologytoday.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@class="page-title"]/h1 | ||
2 | author: //a[@title="View Bio"] | ||
3 | date: substring-before(substring-after(//span[@class="submitted"], 'Published on '), ' by') | ||
4 | strip://div[@class="page-title"]/h1 | ||
5 | strip://div[@class="article-abstract"] | ||
6 | strip://div[@class="article-meta"] | ||
7 | strip://div[@id="rightColumn"] | ||
8 | strip://div[@id="inline-content-bottom-left"] | ||
9 | test_url: http://www.psychologytoday.com/blog/how-happiness/201205/my-quibble-facebook \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/publications.parliament.uk.txt b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt new file mode 100644 index 00000000..fa099473 --- /dev/null +++ b/inc/3rdparty/site_config/standard/publications.parliament.uk.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | author: //meta[@name="Author"] | ||
2 | date: //meta[@name="Date"] | ||
3 | strip: //h5 | ||
4 | test_url: http://www.publications.parliament.uk/pa/ld201011/ldhansrd/text/111109-0003.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt new file mode 100644 index 00000000..126f9e27 --- /dev/null +++ b/inc/3rdparty/site_config/standard/purpleplanetmedia.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //div[@class='title'] | ||
2 | body: //div[@class='body'] | ||
3 | next_page_link: //div[@class='source']/text()[contains(., 'page')]/following-sibling::a | ||
4 | test_url: http://purpleplanetmedia.com/eye/inte/ngaiman.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/quantumdiaries.org.txt b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt new file mode 100644 index 00000000..a366c1b3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/quantumdiaries.org.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //div[contains(@class, "hentry")]/h3 | ||
2 | |||
3 | author: //div[contains(@class, "hentry")]/h2[contains(@class, "author_bio")] | ||
4 | |||
5 | date: substring-before(substring-after(normalize-space(//p[contains(@class, "postmetadata")]/small), "was posted on "), " and is filed under") | ||
6 | |||
7 | body: //div[contains(@class, "entry")] | ||
8 | |||
9 | strip_id_or_class: addtoany_share_save_container | ||
10 | strip_id_or_class: postmetadata | ||
11 | strip_id_or_class: author_bio | ||
12 | strip_id_or_class: author_bio_2 | ||
13 | strip: //div[contains(@class, "hentry")]/h3 | ||
14 | test_url: http://www.quantumdiaries.org/2011/10/25/piling-up/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/queerty.com.txt b/inc/3rdparty/site_config/standard/queerty.com.txt new file mode 100644 index 00000000..655f8b80 --- /dev/null +++ b/inc/3rdparty/site_config/standard/queerty.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='copy'] | ||
2 | title: //h1[@class='hed'] | ||
3 | test_url: http://www.queerty.com/rawhide-radicals-meet-five-heroes-from-the-leather-community-20120302/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/quepasa.cl.txt b/inc/3rdparty/site_config/standard/quepasa.cl.txt new file mode 100644 index 00000000..fae4e6a3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/quepasa.cl.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | body: //div[@class="cuerpoArticulo"] | ||
4 | |||
5 | |||
6 | test_url: http://www.quepasa.cl/magazine/articulo/print.html?id=5299 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/quora.com.txt b/inc/3rdparty/site_config/standard/quora.com.txt new file mode 100644 index 00000000..3d34f2f8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/quora.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | tidy: no | ||
2 | prune: no | ||
3 | body: //div[contains(@class, 'main_col')] | ||
4 | title: //h1 | ||
5 | |||
6 | strip_id_or_class: hidden | ||
7 | strip_id_or_class: item_action_bar | ||
8 | strip_id_or_class: answer_voters | ||
9 | strip_id_or_class: question_topics | ||
10 | strip_id_or_class: answer_header_text | ||
11 | strip_id_or_class: editor_link | ||
12 | strip_id_or_class: view_tag | ||
13 | strip_id_or_class: include_details | ||
14 | strip_id_or_class: sig_edit | ||
15 | strip_id_or_class: profile_photo_img | ||
16 | |||
17 | test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/radar.oreilly.com.txt b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt new file mode 100644 index 00000000..99ab4bb1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/radar.oreilly.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | date://span[@class='date'] | ||
2 | body://div[@class='entry-body'] | ||
3 | test_url: http://radar.oreilly.com/2012/01/genome-cloud-digital-humanities-hadoop-world-strata.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/radionz.co.nz.txt b/inc/3rdparty/site_config/standard/radionz.co.nz.txt new file mode 100644 index 00000000..e2617dc5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/radionz.co.nz.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='body'] | ||
2 | title: //div[@class='newsstory']/h2 | ||
3 | test_url: http://www.radionz.co.nz/news/stories/2010/07/18/12481029a86d \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/randsinrepose.com.txt b/inc/3rdparty/site_config/standard/randsinrepose.com.txt new file mode 100644 index 00000000..f0c91c51 --- /dev/null +++ b/inc/3rdparty/site_config/standard/randsinrepose.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //div[@id='center-col']/h4 | ||
2 | author: substring-before(//title,'In') | ||
3 | date: substring-after(//div[@class='commenttext']/span,'#') | ||
4 | body: //div[@id='center-col'] | ||
5 | strip: //div[@id='center-col']/h4 | ||
6 | strip: //div[@class='graytext'] | ||
7 | |||
8 | # Anthony Perez-Sanz 2012.3.14 | ||
9 | # Removed long gif from the end | ||
10 | strip: //img[@src='http://www.randsinrepose.com/spreader.gif'] | ||
11 | test_url: http://www.randsinrepose.com/archives/2012/03/13/hacking_is_important.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/readability.com.txt b/inc/3rdparty/site_config/standard/readability.com.txt new file mode 100644 index 00000000..80337291 --- /dev/null +++ b/inc/3rdparty/site_config/standard/readability.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | single_page_link: //link[@rel='canonical']/@href | ||
2 | |||
3 | test_url: http://www.readability.com/read?url=http://feeds.gawker.com/~r/lifehacker/full/~3/jaxAjSay_Rw/add-a-rain-gutter-to-a-picnic-table-for-a-built+in-drink-cooler \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/readwriteweb.com.txt b/inc/3rdparty/site_config/standard/readwriteweb.com.txt new file mode 100644 index 00000000..ff799aa0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/readwriteweb.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1[@class="titlelink"] | ||
2 | date: //span[@class="timestamp"]/@data-published | ||
3 | body: //div[@class="asset-content"] | ||
4 | strip_id_or_class: related-entries | ||
5 | strip_id_or_class: like-and-retweet | ||
6 | |||
7 | author: //div[@id="submeta"]/a[1] | ||
8 | test_url: http://www.readwriteweb.com/archives/why_facebook_terrifies_google.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/real.gr.txt b/inc/3rdparty/site_config/standard/real.gr.txt new file mode 100644 index 00000000..fe5ab672 --- /dev/null +++ b/inc/3rdparty/site_config/standard/real.gr.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id='_ctl12__ctl0_Article'] | ||
2 | prune: no | ||
3 | autodetect_on_failure: no \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/recipe.com.txt b/inc/3rdparty/site_config/standard/recipe.com.txt new file mode 100644 index 00000000..8c8f0e0c --- /dev/null +++ b/inc/3rdparty/site_config/standard/recipe.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@class='recipedetailsleft' or @id='recipePrepAndServe' or @id='recipeingredients'] | ||
2 | |||
3 | strip_id_or_class: location | ||
4 | strip_id_or_class: savings | ||
5 | strip_id_or_class: recipeDetailDescButton | ||
6 | |||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | test_url: http://www.recipe.com/avocado-basil-pasta/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/red-hot-girls.com.txt b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt new file mode 100644 index 00000000..3ae959b1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/red-hot-girls.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@class='short-text' or starts-with(@id, 'news-id-')] | ||
2 | prune: no | ||
3 | tidy: no | ||
4 | |||
5 | test_url: http://red-hot-girls.com/2011/06/10/the_red_hot_natalia_maria_53_pics.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/reddit.com.txt b/inc/3rdparty/site_config/standard/reddit.com.txt new file mode 100644 index 00000000..58ca9ece --- /dev/null +++ b/inc/3rdparty/site_config/standard/reddit.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | # This setup grabs the text from a Reddit self post. It ignores all comments etc. | ||
2 | |||
3 | title: //p[@class="title"]/a/text() | ||
4 | |||
5 | author: //p[@class="tagline"]/a | ||
6 | |||
7 | # this doesn't work for some reason...? | ||
8 | date: //p[@class="tagline"]//@datetime | ||
9 | |||
10 | body: //div[@class="expando"]//div[@class="usertext-body"] | ||
11 | |||
12 | strip_id_or_class: tagline | ||
13 | strip_id_or_class: unvotable-message | ||
14 | strip_id_or_class: buttons | ||
15 | |||
16 | test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/redmondpie.com.txt b/inc/3rdparty/site_config/standard/redmondpie.com.txt new file mode 100644 index 00000000..12a96187 --- /dev/null +++ b/inc/3rdparty/site_config/standard/redmondpie.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //div[@class='posthead']//h2 | ||
2 | body: //div[contains(@class, 'postcontent') or @class='posthead'] | ||
3 | author: //div[@class='posthead']//a[@rel='author'] | ||
4 | |||
5 | strip: //div[@class='posthead']//h2 | ||
6 | replace_string(>Advertisements</div>): ></div> | ||
7 | replace_string(<p>You can follow us on): <p style="display:none;"> | ||
8 | strip_id_or_class: likeThisPost | ||
9 | |||
10 | prune: no | ||
11 | tidy: no | ||
12 | |||
13 | test_url: http://www.redmondpie.com/how-to-play-music-directly-from-home-screen-folders-on-iphone/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt new file mode 100644 index 00000000..4f195a06 --- /dev/null +++ b/inc/3rdparty/site_config/standard/redtape.msnbc.msn.com.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | # Think there might be something up with your parser that it strips out 'print' from the title :) | ||
2 | |||
3 | title: //meta[@name='title']/@content | ||
4 | author: //meta[@name='author']/@content | ||
5 | date: //meta[@name='date']/@content | ||
6 | |||
7 | body: //div[@class='articleText'] | ||
8 | |||
9 | strip: //div[contains(@class, 'day')] | ||
10 | strip: //div[contains(@class, 'month')] | ||
11 | strip: //div[contains(@class, 'year')] | ||
12 | strip: //div[contains(@class, 'time')] | ||
13 | strip: //h1[@class='gl_headline'] | ||
14 | strip: //div[@class='byline'] | ||
15 | strip: //div[@id='left_ear'] | ||
16 | strip: //div[@id='right_ear'] | ||
17 | strip: //div[contains(@class, 'PopularPosts')] | ||
18 | strip ://div[@class='discuss_page_break'] | ||
19 | strip ://div[contains(@class, 'p-content_TagList')] | ||
20 | test_url: http://redtape.msnbc.msn.com/_news/2011/09/28/8020661-sprint-raises-fee-but-wont-free-users-from-two-year-contracts?preview=true \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/reflets.info.txt b/inc/3rdparty/site_config/standard/reflets.info.txt new file mode 100644 index 00000000..4a9fab67 --- /dev/null +++ b/inc/3rdparty/site_config/standard/reflets.info.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body://div[@class='storycontent'] | ||
2 | date://div[@class='date'] | ||
3 | strip://li[@class='sharing_label'] | ||
4 | strip://a[@class='FlattrButton'] | ||
5 | test_url: http://reflets.info/orange-nokia-siemens-deep-packet-inspection/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/renenekuda.cz.txt b/inc/3rdparty/site_config/standard/renenekuda.cz.txt new file mode 100644 index 00000000..0b3dee1d --- /dev/null +++ b/inc/3rdparty/site_config/standard/renenekuda.cz.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //*[@class='entry-title'] | ||
2 | body: //div[@class='entry-content'] | ||
3 | test_url: http://www.renenekuda.cz/recept-na-produktivitu/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/retrieverweekly.com.txt b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt new file mode 100644 index 00000000..1264ee3f --- /dev/null +++ b/inc/3rdparty/site_config/standard/retrieverweekly.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | single_page_link://a[contains(@href, 'print')] | ||
2 | |||
3 | # Grab metadata from the "printer-friendly" page, after specifying single_page_link | ||
4 | title://h2 | ||
5 | date://cite | ||
6 | test_url: http://www.retrieverweekly.com/?cmd=displaystory&story_id=7548&format=html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/reuters.com.txt b/inc/3rdparty/site_config/standard/reuters.com.txt new file mode 100644 index 00000000..c5c94a4f --- /dev/null +++ b/inc/3rdparty/site_config/standard/reuters.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1[@class='headline3'] | ||
2 | author: substring-after(//p[@class="byline"], 'By ') | ||
3 | date: //meta[@name="REVISION_DATE"]/@content | ||
4 | body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation'] | ||
5 | strip: //li[@class='next'] | ||
6 | strip: //span[@class='articleLocation'] | ||
7 | prune: no | ||
8 | tidy: no | ||
9 | |||
10 | test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt new file mode 100644 index 00000000..dbe42932 --- /dev/null +++ b/inc/3rdparty/site_config/standard/revistapiaui.estadao.com.br.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //div[@class="article_header"]/h3 | ||
2 | author: //div[@class="autor"]/p/* | ||
3 | date: substring-after(substring-after(//div[@class="flt-left"],"> "), "> ") | ||
4 | |||
5 | move_into(//div[@class="new_article"]): //div[@class="img_article"]/img | ||
6 | |||
7 | body: //div[@class="article_content"] | ||
8 | convert_double_br_tags: yes | ||
9 | |||
10 | test_url: http://revistapiaui.estadao.com.br/edicao-68/questoes-latino-americanas/filhos-da-guerra-suja \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt new file mode 100644 index 00000000..904a11dd --- /dev/null +++ b/inc/3rdparty/site_config/standard/richardmuscat.wordpress.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@id="post"] | ||
2 | strip: //div[@id="author-description"] | ||
3 | date: //span[@class="entry-date"] | ||
4 | author: //span[@class="author vcard"] | ||
5 | test_url: http://richardmuscat.wordpress.com/2011/06/20/the-price-of-free/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+TheBrooksReview+%28The+Brooks+Review%29 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt new file mode 100644 index 00000000..82cfaf27 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ritemail.blogspot.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@class='post-body entry-content'] | ||
2 | strip: //div[@id='lws_0'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://ritemail.blogspot.com/2011/06/hayden-panettiere-candids-in-los.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt new file mode 100644 index 00000000..3035527c --- /dev/null +++ b/inc/3rdparty/site_config/standard/rockpapershotgun.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h2 | ||
2 | |||
3 | strip: //div[ contains(@class, 'respond') ] | //h2 | //h1 | ||
4 | |||
5 | date: substring-after(//p[@class='info'], ' on ') | ||
6 | |||
7 | author: //p[@class='info']//a | ||
8 | test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt new file mode 100644 index 00000000..abe70351 --- /dev/null +++ b/inc/3rdparty/site_config/standard/rodrigo.sharpcube.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | author: //article/header/span[@class='author'] | ||
2 | title://article/header/h1 | ||
3 | body: //article | ||
4 | strip: //article/header | ||
5 | strip: //article/p[@class='metadata'] | ||
6 | footnotes: yes | ||
7 | test_url: http://rodrigo.sharpcube.com/2010/06/20/using-and-sharing-a-vpn-connection-on-your-mac/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rogerebert.com.txt b/inc/3rdparty/site_config/standard/rogerebert.com.txt new file mode 100644 index 00000000..26792330 --- /dev/null +++ b/inc/3rdparty/site_config/standard/rogerebert.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: substring-before(//title,':') | ||
2 | author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY') | ||
3 | |||
4 | body: //div[@class='text'] | ||
5 | |||
6 | strip: //a[contains(@href,'printart')] | ||
7 | strip_id_or_class: enlarge_photo | ||
8 | test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt new file mode 100644 index 00000000..d618c23f --- /dev/null +++ b/inc/3rdparty/site_config/standard/rolfinjapan.nl.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[contains(@class, 'inhoud')] | ||
2 | date: //span[@class ='published'] | ||
3 | author: //span[@class ='author'] | ||
4 | strip: //div[@class = 'grid_2'] | ||
5 | strip: //div[@class = 'block-citation-text'] | ||
6 | test_url: http://www.rolfinjapan.nl/2011/06/duizend-kraanvogels/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rollingstone.com.txt b/inc/3rdparty/site_config/standard/rollingstone.com.txt new file mode 100644 index 00000000..9a10a69e --- /dev/null +++ b/inc/3rdparty/site_config/standard/rollingstone.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1 | ||
2 | author: //h3[@class="byline"]/strong | ||
3 | |||
4 | body: //div[@id='main']/h2 | //div[@id='main']//div[@class='body'] | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | single_page_link: //a[@class='print-page'] | ||
9 | |||
10 | test_url: http://www.rollingstone.com/politics/news/the-plastic-bag-wars-20110725 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rottentomatoes.com.txt b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt new file mode 100644 index 00000000..b5b29fe4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/rottentomatoes.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[@class='movie_content_area'] | ||
2 | strip_id_or_class: tomatometer_bar_help | ||
3 | strip_id_or_class: critic-links | ||
4 | strip_id_or_class: top-critics-numbers | ||
5 | strip_id_or_class: fan_side | ||
6 | strip_id_or_class: fblike | ||
7 | strip_id_or_class: rating_widget | ||
8 | strip_id_or_class: friend_reviews | ||
9 | prune: no | ||
10 | |||
11 | test_url: http://www.rottentomatoes.com/m/thor/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/roughtype.com.txt b/inc/3rdparty/site_config/standard/roughtype.com.txt new file mode 100644 index 00000000..f2f00392 --- /dev/null +++ b/inc/3rdparty/site_config/standard/roughtype.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@class='content'] | ||
2 | strip: //p[@class='postmeta']/following::* | ||
3 | strip: //p[@class='postmeta'] | ||
4 | strip: //p[@align='left'] | ||
5 | test_url: http://www.roughtype.com/archives/2012/01/power_to_the_da.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/roy.gbiv.com.txt b/inc/3rdparty/site_config/standard/roy.gbiv.com.txt new file mode 100644 index 00000000..6ff03de8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/roy.gbiv.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | strip_comments: no | ||
2 | test_url: http://roy.gbiv.com/untangled/2008/rest-apis-must-be-hypertext-driven \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rpgsite.net.txt b/inc/3rdparty/site_config/standard/rpgsite.net.txt new file mode 100644 index 00000000..e7f29bbe --- /dev/null +++ b/inc/3rdparty/site_config/standard/rpgsite.net.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@id='news-text'] | ||
2 | prune: no | ||
3 | test_url: http://www.rpgsite.net/news/1964-tetsuya-nomura-says-hell-soon-show-the-future-of-final-fantasy | ||
4 | test_url: http://www.rpgsite.net/news/1965-new-atelier-totori-plus-screens-and-artwork \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/rubysfera.pl.txt b/inc/3rdparty/site_config/standard/rubysfera.pl.txt new file mode 100644 index 00000000..d9df7684 --- /dev/null +++ b/inc/3rdparty/site_config/standard/rubysfera.pl.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | author: //div[contains(@class, 'author_text')]/h4/text() | ||
2 | date: //li[@class='date'] | ||
3 | |||
4 | # stripping excessive tags | ||
5 | strip: //div[contains(@class, 'entry_meta')] | ||
6 | strip: //div[contains(@class, 'single_meta')] | ||
7 | strip: //br[contains(@class, 'clear')] | ||
8 | strip: //h3[contains(., 'Komentarz')] | ||
9 | test_url: http://rubysfera.pl/2011/09/10-porad-o-rvm/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ruhlman.com.txt b/inc/3rdparty/site_config/standard/ruhlman.com.txt new file mode 100644 index 00000000..7a21c4af --- /dev/null +++ b/inc/3rdparty/site_config/standard/ruhlman.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1[@class='entry-title'] | ||
2 | author: ///span[@class='author vcard'] | ||
3 | date: //abbr[@class='published'] | ||
4 | body: //div[@class='entry-content'] | ||
5 | |||
6 | test_url: http://ruhlman.com/2009/05/cookbooks-that-teach/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ruttloff.org.txt b/inc/3rdparty/site_config/standard/ruttloff.org.txt new file mode 100644 index 00000000..c036dcf8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ruttloff.org.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | author: //a[@class='author'] | ||
2 | tidy: no | ||
3 | test_url: http://ruttloff.org/2012/06/13/intervention \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/salon.com.txt b/inc/3rdparty/site_config/standard/salon.com.txt new file mode 100644 index 00000000..04f8afd5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/salon.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | author: (//span[@class="byline"]/a)[1] | ||
3 | date: //span[contains(@class, "toLocalTime")] | ||
4 | body: (//div[contains(@class, "articleInner")]//img[contains(@src, 'media.salon.com') and contains(@src, '460x')])[1] | //div[contains(@class, "articleContent") or contains(@class, "writerMeta")] | ||
5 | |||
6 | prune: no | ||
7 | |||
8 | # deal with singleton links | ||
9 | single_page_link: (//h1/a[contains(@href, '/singleton')])[1] | ||
10 | |||
11 | test_url: http://www.salon.com/2011/10/25/occupying_the_rust_belt/singleton/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/salzburg.com.txt b/inc/3rdparty/site_config/standard/salzburg.com.txt new file mode 100644 index 00000000..31067481 --- /dev/null +++ b/inc/3rdparty/site_config/standard/salzburg.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //p[@class='teaser1 darkgrey myriad'] | ||
2 | move_into(//p[@class='teaser1 darkgrey myriad']): //div[@class='artikel clear'] | ||
3 | strip: //div[@class='hidden'] | ||
4 | strip: //div[@id='article_related_source'] | ||
5 | |||
6 | test_url: http://www.salzburg.com/nachrichten/oesterreich/politik/sn/artikel/deutliche-nachbesserungen-bei-lehrerdienstrecht-19469/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/saveyourself.ca.txt b/inc/3rdparty/site_config/standard/saveyourself.ca.txt new file mode 100644 index 00000000..354f5911 --- /dev/null +++ b/inc/3rdparty/site_config/standard/saveyourself.ca.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | title://h1 | ||
2 | |||
3 | # my section divs seem to interfere with the Instapaper parser, so I ditch 'em | ||
4 | dissolve://div[contains(@class, 'section')] | ||
5 | |||
6 | #these don't seem to be necessary, but just in case | ||
7 | strip_id_or_class:'masthead' | ||
8 | strip_id_or_class:'footer' | ||
9 | |||
10 | #again, Instapaper seems to understand where my content is, but just in case | ||
11 | body://div[@id='content'] | ||
12 | |||
13 | # in general, I want the Instapaper view to look like my print CSS, so I remove things specified for the screen or non-printing | ||
14 | strip_id_or_class:'screen-only' | ||
15 | strip_id_or_class:'no-print' | ||
16 | |||
17 | #other misc removals and simplifications | ||
18 | strip_id_or_class:'popup' | ||
19 | strip_id_or_class:'ZoomSpin' | ||
20 | |||
21 | #I have a lot of content in sidebars and "meta" asides that can work inline just fine, but has to be distinguished somehow with some minimal formatting, so I put them in blockquotes | ||
22 | wrap_in(blockquote)://div[contains(@class, 'sidebar')] | ||
23 | wrap_in(blockquote)://div[contains(@class, 'meta')] | ||
24 | wrap_in(blockquote)://p[contains(@class, 'meta')] | ||
25 | test_url: http://saveyourself.ca/tutorials/low-back-pain.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sbnation.com.txt b/inc/3rdparty/site_config/standard/sbnation.com.txt new file mode 100644 index 00000000..c213843c --- /dev/null +++ b/inc/3rdparty/site_config/standard/sbnation.com.txt | |||
@@ -0,0 +1,28 @@ | |||
1 | title: //h1[@id='stream_title'] | ||
2 | |||
3 | # Author and date don't work | ||
4 | author: //div[@class='byline'] | ||
5 | date: //div[@class='date-stamp'] | ||
6 | |||
7 | body: //div[@class='node-article'] | ||
8 | |||
9 | strip_id_or_class: fb-like-box | ||
10 | strip_id_or_class: stream-fb-like | ||
11 | strip_id_or_class: social-meta | ||
12 | strip_id_or_class: social-spoken | ||
13 | strip_id_or_class: twitter-share-button | ||
14 | strip_id_or_class: twitter-follow-button | ||
15 | strip_id_or_class: spinner_node_list | ||
16 | strip_id_or_class: node-sort-link | ||
17 | strip_id_or_class: stream_title | ||
18 | strip_id_or_class: stream_summary | ||
19 | strip_id_or_class: update-count-container | ||
20 | strip_id_or_class: major-updates | ||
21 | strip_id_or_class: newsletter-slide | ||
22 | strip_id_or_class: author-mini-profile | ||
23 | strip_id_or_class: byline | ||
24 | strip_id_or_class: header | ||
25 | strip_id_or_class: footer | ||
26 | |||
27 | # Works, but "no text" errors on: http://www.sbnation.com/nba/2012/3/9/2856780/nba-scores-dwight-howard-bulls-magic-mavs-suns | ||
28 | test_url: http://www.sbnation.com/nba/2012/3/13/2867226/dwight-howard-trade-rumors-2012-faq-orlando-magic \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/schneier.com.txt b/inc/3rdparty/site_config/standard/schneier.com.txt new file mode 100644 index 00000000..67181b65 --- /dev/null +++ b/inc/3rdparty/site_config/standard/schneier.com.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | author: //p[@class='mastname'] | ||
2 | |||
3 | body: //div[@class='indivbody'] | ||
4 | date: //div[@class='indivbody']/h2[1] | ||
5 | |||
6 | # Remove blog title. Specify first occurrence in case h1 is used in article | ||
7 | strip: //div[@class='indivbody']/h1[1] | ||
8 | |||
9 | # Remove blog description (the first p element) | ||
10 | strip: //div[@class='indivbody']/p[1] | ||
11 | |||
12 | # Remove navigation (second p element) | ||
13 | strip: //div[@class='indivbody']/p[2] | ||
14 | |||
15 | # Remove duplicate of article title. Specify first occurrence in case h3 is used in article | ||
16 | strip: //div[@class='indivbody']/h3[1] | ||
17 | |||
18 | # Remove publishing date, it's extracted by rule above | ||
19 | strip: //div[@class='indivbody']/h2[1] | ||
20 | |||
21 | # Remove duplicate of date at end, and newsletter signup | ||
22 | strip: //p[@class='posted'] | ||
23 | |||
24 | # Leave date at top | ||
25 | test_url: http://www.schneier.com/blog/archives/2010/12/security_in_202.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/science.orf.at.txt b/inc/3rdparty/site_config/standard/science.orf.at.txt new file mode 100644 index 00000000..89ebfe08 --- /dev/null +++ b/inc/3rdparty/site_config/standard/science.orf.at.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[@class="storybox"] | ||
2 | title: //div[@class="storybox"]//h1 | ||
3 | strip: //p[@class='metaline'] | ||
4 | date: substring-after(//*[@class='time'],'Erstellt am') | ||
5 | strip: //div[@class='fact'] | ||
6 | strip: //p[@class='backlink'] | ||
7 | strip: //div[@class='mailto'] | ||
8 | strip: //div[@id='forumDisclaimer'] | ||
9 | strip: //div[@class='forum'] | ||
10 | |||
11 | test_url: http://science.orf.at/stories/1700900/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scienceblogs.de.txt b/inc/3rdparty/site_config/standard/scienceblogs.de.txt new file mode 100644 index 00000000..08c16842 --- /dev/null +++ b/inc/3rdparty/site_config/standard/scienceblogs.de.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | single_page_link: //div[@class='c2c1']/div[@class='toptheme further line']//ul//li/a | ||
2 | |||
3 | author: //div[@class='details clear']//a[@class='hi'] | ||
4 | body: //div[@class='title'] | ||
5 | strip: //p[@class='entrypagination'] | ||
6 | strip: //p[@class='details_top'] | ||
7 | date: //p[@class='details_top'] | ||
8 | title: //div[@class='title']/h1 | ||
9 | strip: //p[@class='details'] | ||
10 | strip: //p[@class='details_bottom'] | ||
11 | |||
12 | test_url: http://www.scienceblogs.de/astrodicticum-simplex/2011/10/weltuntergang-reloaded-das-jungste-gericht-findet-am-21-oktober-statt.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scienceticker.info.txt b/inc/3rdparty/site_config/standard/scienceticker.info.txt new file mode 100644 index 00000000..75a52824 --- /dev/null +++ b/inc/3rdparty/site_config/standard/scienceticker.info.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body: //div[@class='post'] | ||
2 | title: //h1[@id='singlePageTitle'] | ||
3 | date: substring-before(//small,'• Rubrik') | ||
4 | |||
5 | strip: //div[@class='post-ratings'] | ||
6 | strip: //div[@class='post-ratings-loading'] | ||
7 | strip: //a[@title='Empfehlen Sie den Text weiter!'] | ||
8 | strip: //a[@title='Drucken'] | ||
9 | strip: //div[@class='share'] | ||
10 | |||
11 | test_url: http://www.scienceticker.info/2011/11/24/forscher-finden-gedachtnismolekul/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scientificamerican.com.txt b/inc/3rdparty/site_config/standard/scientificamerican.com.txt new file mode 100644 index 00000000..d510407d --- /dev/null +++ b/inc/3rdparty/site_config/standard/scientificamerican.com.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | # | ||
2 | # After site revisions at SciAm, this configuration does | ||
3 | # not work, especially for multi-page articles. For | ||
4 | # every article there is now a "Print" link which | ||
5 | # is far more reliable. So this configuration should be | ||
6 | # removed or disabled. | ||
7 | # 2/3/13 | ||
8 | # | ||
9 | |||
10 | # meta data | ||
11 | title://h1[@class = 'articleTitle'] | ||
12 | author:substring-after(//span[@class = 'byline'],'By ') | ||
13 | date:substring-before(//span[@class = 'datestamp'],'|') | ||
14 | |||
15 | #body content | ||
16 | body://div[@id = 'articleContent'] | ||
17 | #next_page_link://li[@id = 'flairPagination']/a[last()] | ||
18 | |||
19 | single_page_link: //a[contains(@href, 'print=true')] | ||
20 | |||
21 | #cleanup | ||
22 | strip://div[@class = 'fsgBooks'] | ||
23 | |||
24 | test_url: http://www.scientificamerican.com/article.cfm?id=do-brain-scans-comatose-patients-reveal-conscious-state | ||
25 | test_url: http://www.scientificamerican.com/article.cfm?id=solar-wind-transforms-venus-into-shape-of-comet \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scotusblog.com.txt b/inc/3rdparty/site_config/standard/scotusblog.com.txt new file mode 100644 index 00000000..f29e37f9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/scotusblog.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //title | ||
2 | author: //p[@id='author-name-role']/a | ||
3 | date: substring-after(//p[@class='time'],'Posted') | ||
4 | body: //div[@id='main'] | ||
5 | strip: //div[@id='author-info'] | ||
6 | strip: //div[@id='author-links'] | ||
7 | strip: //h1 | ||
8 | test_url: http://www.scotusblog.com/2012/04/shaken-baby-case-an-update/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scraplab.net.txt b/inc/3rdparty/site_config/standard/scraplab.net.txt new file mode 100644 index 00000000..84be27f9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/scraplab.net.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h2 | ||
2 | body: //div[@class='body'] | ||
3 | test_url: http://scraplab.net/2010/10/26/please-keep-your-belongings-with-you-at-all-times/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/scripting.com.txt b/inc/3rdparty/site_config/standard/scripting.com.txt new file mode 100644 index 00000000..d8b969b1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/scripting.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | strip: //a[starts-with(@href, '#')] | ||
2 | strip: //*[@class='storyByline'] | ||
3 | body: //*[@class='storyPageText']/.. | ||
4 | author: string('Dave Winer') | ||
5 | date: substring-before(substring-after(//*[@class='storyByline'], 'on'), 'at') | ||
6 | title: //h1 | ||
7 | footnotes: no | ||
8 | test_url: http://scripting.com/stories/2011/07/08/yeahImStillYawning.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sct.temple.edu.txt b/inc/3rdparty/site_config/standard/sct.temple.edu.txt new file mode 100644 index 00000000..9927675b --- /dev/null +++ b/inc/3rdparty/site_config/standard/sct.temple.edu.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //*[@class="entry-content"] | ||
2 | title: //h1[@class="entry-title"] | ||
3 | date: //*[@class="entry-date"] | ||
4 | author: //*[@class="author vcard"] | ||
5 | test_url: http://sct.temple.edu/blogs/news-events/2011/05/congratulations-sct-class-of-2011/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/searchengineland.com.txt b/inc/3rdparty/site_config/standard/searchengineland.com.txt new file mode 100644 index 00000000..f176d7c7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/searchengineland.com.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | body: //div[@class="storyBox"] | ||
2 | title: //div[@class="storyBox"]/h1 | ||
3 | author: //a[@rel="author"] | ||
4 | date: substring-before(//span[@class="dateline"], 'by') | ||
5 | |||
6 | #Removes related content but cleans up article text | ||
7 | strip: //h1 | ||
8 | strip: //p[@class="homeStory tdmSideInfo"] | ||
9 | strip: //div[@id="bylineShare"] | ||
10 | strip: //script | ||
11 | strip: //hr | ||
12 | |||
13 | strip_id_or_class: homeStory | ||
14 | strip_id_or_class: authorpic | ||
15 | strip_id_or_class: insideComments | ||
16 | strip_id_or_class: authorbio | ||
17 | strip_id_or_class: gpt-ad-sel-cube | ||
18 | strip_id_or_class: smxTextAd | ||
19 | |||
20 | test_url: http://searchengineland.com/googles-jaw-dropping-sponsored-post-campaign-for-chrome-106348 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/seattletransitblog.com.txt b/inc/3rdparty/site_config/standard/seattletransitblog.com.txt new file mode 100644 index 00000000..5129c069 --- /dev/null +++ b/inc/3rdparty/site_config/standard/seattletransitblog.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h3[@class="storytitle"] | ||
2 | date: //div[@class='meta'] | ||
3 | body: //div[@class='storycontent'] | ||
4 | |||
5 | test_url: http://seattletransitblog.com/2012/06/19/times-st-louis-interested-in-buying-waterfront-streetcars/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sebbo.net.txt b/inc/3rdparty/site_config/standard/sebbo.net.txt new file mode 100644 index 00000000..3e800a16 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sebbo.net.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: substring-before(//title, '«') | ||
2 | body: //div[@class = 'entry'] | ||
3 | strip_id_or_class: 'postmetabox' | ||
4 | test_url: http://sebbo.net/2010/12/akkus/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/seriouseats.com.txt b/inc/3rdparty/site_config/standard/seriouseats.com.txt new file mode 100644 index 00000000..d7b4788c --- /dev/null +++ b/inc/3rdparty/site_config/standard/seriouseats.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | # clean up recipe pages | ||
4 | strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] | ||
5 | |||
6 | #recipe pages | ||
7 | strip_id_or_class: "recipe-feedback" | ||
8 | strip_id_or_class: "comments" | ||
9 | strip_id_or_class: "procedure-number" | ||
10 | strip_id_or_class: "more-with-author" | ||
11 | |||
12 | #slice | ||
13 | strip_id_or_class: "inner" | ||
14 | |||
15 | test_url: http://www.seriouseats.com/recipes/2010/09/peking-duck-mandarin-pancakes-plum-sauce-recipe.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sf.curbed.com.txt b/inc/3rdparty/site_config/standard/sf.curbed.com.txt new file mode 100644 index 00000000..9f443d5c --- /dev/null +++ b/inc/3rdparty/site_config/standard/sf.curbed.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h1[@class='post-title'] | ||
2 | author: //div[@class='post-byline']/a | ||
3 | date: substring-before(//div[@class='post-byline'], ', by') | ||
4 | |||
5 | body: //div[@class='post-body'] | ||
6 | dissolve: //noscript | ||
7 | test_url: http://sf.curbed.com/archives/2011/10/17/lower_haight_loft_would_really_really_really_like_a_buyer.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sf.eater.com.txt b/inc/3rdparty/site_config/standard/sf.eater.com.txt new file mode 100644 index 00000000..fca656d2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sf.eater.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h1[@class="post-title"] | ||
2 | author: //div[@class="post-byline"]/a | ||
3 | date: substring-before(//div[@class='post-byline'], ', by') | ||
4 | |||
5 | body: //div[@class='post-body'] | ||
6 | strip_id_or_class: post-kicker | ||
7 | test_url: http://sf.eater.com/archives/2012/05/22/nate_pollack_talks_about_the_american_grilled_cheese_kitchen_moving_into_the_mission.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sfgate.com.txt b/inc/3rdparty/site_config/standard/sfgate.com.txt new file mode 100644 index 00000000..5f73fbcb --- /dev/null +++ b/inc/3rdparty/site_config/standard/sfgate.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: /html/head/title | ||
2 | |||
3 | body: //div[@id = 'articlecontent']/div[contains(@class, 'bodytext')] | ||
4 | author: //div[@class = 'articleheadings']/p[contains(@class,'author')]/span[@class = 'fn'] | ||
5 | date: //div[@class = 'articleheadings']/span[@class = 'updated'] | ||
6 | strip: //div[div[contains(@class, 'imgbox')]] | ||
7 | |||
8 | body: //div[@class = 'blogitem'] | ||
9 | author: //p[@class="credit"]/span[@class="author"]/a[position() = 1] | ||
10 | date: //span[@class = 'pubdate'] | ||
11 | |||
12 | test_url: http://www.sfgate.com/columnists/garchik/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sfweekly.com.txt b/inc/3rdparty/site_config/standard/sfweekly.com.txt new file mode 100644 index 00000000..a11fe4cb --- /dev/null +++ b/inc/3rdparty/site_config/standard/sfweekly.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[contains(@class, 'content_body')] | ||
2 | strip_id_or_class: det_rel | ||
3 | test_url: http://www.sfweekly.com/2012-03-14/news/cia-lsd-wayne-ritchie-george-h-white-mk-ultra/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/shabayek.com.txt b/inc/3rdparty/site_config/standard/shabayek.com.txt new file mode 100644 index 00000000..b175720e --- /dev/null +++ b/inc/3rdparty/site_config/standard/shabayek.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | date: //span[@class='date'] | ||
2 | body: //div[@class='post_content'] | ||
3 | test_url: http://www.shabayek.com/blog/2011/10/16/%D8%AF%D8%B1%D9%88%D8%B3-%D9%85%D9%86-%D9%82%D8%B5%D8%A9-%D8%AA%D8%A3%D8%B3%D9%8A%D8%B3-%D8%AA%D9%88%D9%8A%D8%AA%D8%B1-%E2%80%93%D8%AC3/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/shawnblanc.net.txt b/inc/3rdparty/site_config/standard/shawnblanc.net.txt new file mode 100644 index 00000000..b536fc3a --- /dev/null +++ b/inc/3rdparty/site_config/standard/shawnblanc.net.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title://*[@class='primary']/h1 | ||
2 | date: //*[@class='articledate'] | ||
3 | author: substring-before(substring-after(//*[@class='block first']/p,'2012 '),'.') | ||
4 | body: //div[@class='primary'] | ||
5 | footnotes: yes | ||
6 | strip: //*[@class='primary']/h1 | ||
7 | strip: //*[@class='articledate'] | ||
8 | strip: //*[@class='detailsarticle'] | ||
9 | strip: //*[@class='endnav'] | ||
10 | strip: //*[@class='endmeta'] | ||
11 | test_url: http://shawnblanc.net/2011/11/kindle-touch-review/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/shifteleven.com.txt b/inc/3rdparty/site_config/standard/shifteleven.com.txt new file mode 100644 index 00000000..68059ae1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/shifteleven.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[ @class='entry-content' ] | ||
2 | |||
3 | strip: //div[ contains(@class, 'sharing') ] | ||
4 | |||
5 | date: //div[ @class='entry-meta' ]/a | ||
6 | test_url: http://shifteleven.com/articles/2008/05/10/issue-tracking-git-ticgit \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/siasat.pk.txt b/inc/3rdparty/site_config/standard/siasat.pk.txt new file mode 100644 index 00000000..a82ce69c --- /dev/null +++ b/inc/3rdparty/site_config/standard/siasat.pk.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | #body: (//div[@class='ftr-yt-vid'])[1] | ||
2 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
3 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | #replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" | ||
9 | #replace_string(</iframe>): </iframe> </div> | ||
10 | |||
11 | test_url: http://www.siasat.pk/forum/showthread.php?107668-Policy-Matters-17th-March-2012-Dr-Shahid-Masood-Gen-Hameed-gul-amp-Fawad-Chudhary-Pak-US-Relationship&p=787733 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/simonwillison.net.txt b/inc/3rdparty/site_config/standard/simonwillison.net.txt new file mode 100644 index 00000000..e3ad6e41 --- /dev/null +++ b/inc/3rdparty/site_config/standard/simonwillison.net.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[contains(@class, "entry")] | ||
2 | |||
3 | date: //div[contains(@class, "entryFooter")]/a | ||
4 | |||
5 | test_url: http://simonwillison.net/2009/Oct/22/redis/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt new file mode 100644 index 00000000..a1b6b673 --- /dev/null +++ b/inc/3rdparty/site_config/standard/singaporeanstocksinvestor.blogspot.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@class='post-body'] | ||
2 | strip: //div[@id='lws_0'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://singaporeanstocksinvestor.blogspot.com/2011/04/aims-amp-capital-industrial-reit.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/singularityhub.com.txt b/inc/3rdparty/site_config/standard/singularityhub.com.txt new file mode 100644 index 00000000..3999d4d4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/singularityhub.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body://div[contains(@class,"entry-content")] | ||
2 | test_url: http://singularityhub.com/2011/05/21/google-invades-your-home-android-phones-control-your-appliances-and-accessories-video/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sintagoulis.gr.txt b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt new file mode 100644 index 00000000..822bbeb0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sintagoulis.gr.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@class='headline']//h2 | ||
2 | body: //div[contains(@class, 'storycontent')] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | test_url: http://sintagoulis.gr/sokolatenia/sokolatenia-mpompa-me-amaretti- \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/slashfilm.com.txt b/inc/3rdparty/site_config/standard/slashfilm.com.txt new file mode 100644 index 00000000..78d38ecf --- /dev/null +++ b/inc/3rdparty/site_config/standard/slashfilm.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | title: substring-before(//title,'| /Film') | ||
2 | date: substring-before(substring-after(//p[@class='post-info'],'Posted on '),'by') | ||
3 | strip: //div[@class='pm-left'] | ||
4 | strip: //div[@class='pm-right'] | ||
5 | strip: //h2/span | ||
6 | next_page_link: //h2/strong/a | ||
7 | strip: //h2/strong/a | ||
8 | strip: //p[contains(text(),'we have to split this post over')] | ||
9 | strip: //p[@class='post-info'] | ||
10 | strip: //h1/a | ||
11 | strip: //img[contains(@src,'siteimages/authors')] | ||
12 | strip: //div[@id='header'] | ||
13 | strip: //div[@class='topad-right'] | ||
14 | strip: //strong[contains(text(),'Cool Posts From Around the Web:')] | ||
15 | test_url: http://www.slashfilm.com/superhero-bits-206/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/slate.com.txt b/inc/3rdparty/site_config/standard/slate.com.txt new file mode 100644 index 00000000..e92f6a06 --- /dev/null +++ b/inc/3rdparty/site_config/standard/slate.com.txt | |||
@@ -0,0 +1,19 @@ | |||
1 | title: //h1[@class="sl-art-head-dek"] | ||
2 | body: //article//div[@class='sl-art-body']/div[contains(@class, 'body')] | ||
3 | strip: //div[@class="department_kicker"] | ||
4 | strip: //div[@id="insider_ad_wrapper" or @id="insider_ad_inner"] | ||
5 | strip: //div[@id="bottom_sponsored_links"] | ||
6 | strip: //div[@class="sl-art-ad-midflex"] | ||
7 | #strip: //dl | ||
8 | #strip: //p[em/a[contains(@href, 'facebook.com')]] | ||
9 | prune: no | ||
10 | |||
11 | author: //div[@id='author_bio']//a[contains(@href, '/author/')] | ||
12 | author: //a[contains(@href, '/authors.')] | ||
13 | |||
14 | date: substring-before(substring-after(//span[@class='sl-art-byline'], 'Posted '), ', at ') | ||
15 | |||
16 | single_page_link: //a[@class='sl-art-sinpage'] | ||
17 | |||
18 | test_url: http://www.slate.com/id/2274583/pagenum/all/ | ||
19 | test_url: http://www.slate.com/id/2293116/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt new file mode 100644 index 00000000..1a902b96 --- /dev/null +++ b/inc/3rdparty/site_config/standard/slice.seriouseats.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | # clean up recipe pages | ||
4 | strip: //h2[@class='fn'] | //h2[@class='double-lined'] | //h3 | //div[@id='threeColumn2'] | //div[@id='threeColumn3'] | ||
5 | |||
6 | #recipe pages | ||
7 | strip_id_or_class: "recipe-feedback" | ||
8 | strip_id_or_class: "comments" | ||
9 | strip_id_or_class: "procedure-number" | ||
10 | strip_id_or_class: "more-with-author" | ||
11 | |||
12 | #slice | ||
13 | strip_id_or_class: "inner" | ||
14 | |||
15 | test_url: http://slice.seriouseats.com/archives/2010/10/the-pizza-lab-how-to-make-great-new-york-style-pizza.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/slog.thestranger.com.txt b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt new file mode 100644 index 00000000..daa5e31b --- /dev/null +++ b/inc/3rdparty/site_config/standard/slog.thestranger.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | strip_id_or_class: postCategory | ||
2 | title: //h3[@class='postTitle'] | ||
3 | body: //div[@class='postBody'] | ||
4 | test_url: http://slog.thestranger.com/slog/archives/2010/10/12/sl-letter-of-the-day-leave-it-alone \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/smartinvestor.de.txt b/inc/3rdparty/site_config/standard/smartinvestor.de.txt new file mode 100644 index 00000000..ec6c55c8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/smartinvestor.de.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //td[@class='hweissblau2'] | ||
2 | body: //p[@class='copy'] | //div[@class='Section1'] | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.smartinvestor.de/news/smartinvestor/detail.hbs?itemid=item949496655&recnr=14593 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sme.sk.txt b/inc/3rdparty/site_config/standard/sme.sk.txt new file mode 100644 index 00000000..c3d01ffb --- /dev/null +++ b/inc/3rdparty/site_config/standard/sme.sk.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //meta[@property='og:title']/@content | ||
2 | date: //p[@class='autor_line']/b/text() | ||
3 | test_url: http://www.sme.sk/c/6268206/lipsic-vidi-malcharkove-uplatky.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt new file mode 100644 index 00000000..10a3f717 --- /dev/null +++ b/inc/3rdparty/site_config/standard/smithsonianmag.com.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | # meta data | ||
2 | title://h1[@id = 'articleTitle'] | ||
3 | author:substring-after(//ul[@id = 'byLine']/li[1],'By ') | ||
4 | date:substring-before(substring-after(//ul[@id = 'byLine']/li[last()],','),',') | ||
5 | body://div[@id = 'article-body'] | ||
6 | |||
7 | # full content | ||
8 | single_page_link://td/li[@class = 'article-singlepage']/a | ||
9 | |||
10 | # caption clean up | ||
11 | wrap_in(i)://span[@class='articleImageCaptionwide'] | ||
12 | move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p | ||
13 | |||
14 | |||
15 | # clean up | ||
16 | strip://p[@id = 'articlePaginationWrapper'] | ||
17 | strip://ul[contains(@class, 'cat-breadcrumb')] | ||
18 | strip://div [@class= 'viewMorePhotos'] | ||
19 | |||
20 | test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/smokingapples.com.txt b/inc/3rdparty/site_config/standard/smokingapples.com.txt new file mode 100644 index 00000000..e22af7a9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/smokingapples.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h2[@class='custom-entry-title'] | ||
2 | author: substring-after(//span[@class='author vcard'],'by ') | ||
3 | date: substring-after(//span[@class='publ'],'Published on ') | ||
4 | body: //div[@class='postentry-content'] | ||
5 | test_url: http://smokingapples.com/software/popclip-for-mac/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sourcebooks.com.txt b/inc/3rdparty/site_config/standard/sourcebooks.com.txt new file mode 100644 index 00000000..668fc44a --- /dev/null +++ b/inc/3rdparty/site_config/standard/sourcebooks.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | #grab the actual content div | ||
2 | body: //div[@class='rt-article'] | ||
3 | |||
4 | test_url: http://www.sourcebooks.com/next/sourcebooks-next-our-blog/1601-another-piece-of-the-e-puzzle-or-when-good-ebook-promotions-go-bad.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/spectator.co.uk.txt b/inc/3rdparty/site_config/standard/spectator.co.uk.txt new file mode 100644 index 00000000..a05c8395 --- /dev/null +++ b/inc/3rdparty/site_config/standard/spectator.co.uk.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | author: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link']/child::text() | ||
2 | |||
3 | body: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body'] | ||
4 | |||
5 | # Not very helpfull, the title and author are container by the same element that contains the body | ||
6 | strip: /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/h2 | /html/body/div[@id='wrapper']/div[@id='main-content']/div[@class='article_body']/a[@class='author-link'] | ||
7 | test_url: http://www.spectator.co.uk/arts-and-culture/night-and-day/7449683/spotify-sunday-my-personal-soundtrack.thtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt new file mode 100644 index 00000000..4b0704a8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/spectrum.ieee.org.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body://div[@class="articleBody"] | ||
2 | author://p[@class="articleBodyTtl"] | ||
3 | test_url: http://spectrum.ieee.org/semiconductors/processors/behind-intels-new-randomnumber-generator/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/speirs.org.txt b/inc/3rdparty/site_config/standard/speirs.org.txt new file mode 100644 index 00000000..3bf859e3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/speirs.org.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body://div[@class="body"] | ||
2 | test_url: http://speirs.org/blog/2011/5/5/ipad-trials-at-oklahoma-state.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/spiegel.de.txt b/inc/3rdparty/site_config/standard/spiegel.de.txt new file mode 100644 index 00000000..390c075c --- /dev/null +++ b/inc/3rdparty/site_config/standard/spiegel.de.txt | |||
@@ -0,0 +1,75 @@ | |||
1 | # A. Niepel, narya.de@... | ||
2 | # - added single_page_link | ||
3 | # - added author for default and single page view | ||
4 | # - added date for single page view | ||
5 | # fforst@... | ||
6 | # - Fixed it | ||
7 | # bode2104@... | ||
8 | # - Fixed single_page_link | ||
9 | # - Included intro text in single page view | ||
10 | # - Added body in default view | ||
11 | |||
12 | # set body | ||
13 | tidy: no | ||
14 | # body in single page view | ||
15 | body: //div[@id="spArticleContent"] | ||
16 | # body in default view | ||
17 | body: //div[@id="spArticleSection"] | ||
18 | # body in "Fotostrecke" | ||
19 | body: //div[@id="spBigaContent"] | ||
20 | |||
21 | # set date in single page view | ||
22 | date: //div[@id="spArticleContent"]/h3 | ||
23 | # strip date | ||
24 | strip: //div[@id="spArticleContent"]/h3 | ||
25 | # set date in "Fotostrecke" | ||
26 | date: //div[@id="spBigaDatum"] | ||
27 | |||
28 | #set title in single page view | ||
29 | title: //div[@id='spArticleContent']/h2 | ||
30 | # strip title | ||
31 | strip: //div[@id='spArticleContent']/h1 | ||
32 | strip: //div[@id='spArticleContent']/h2 | ||
33 | #set title in "Fotostrecke" | ||
34 | title: //div[@class='spBigaHeadline'] | ||
35 | |||
36 | # set author | ||
37 | author: //p[@class="spAuthor"]/a | ||
38 | author: substring-after(//p[@class="spAuthor"], 'Von ') | ||
39 | # strip author | ||
40 | strip: //p[@class='spAuthor'] | ||
41 | |||
42 | # remove captions | ||
43 | strip: //*/span[@class='spPicLayerText'] | ||
44 | strip: //*/div[@class='spPanoPlayerPaneControl'] | ||
45 | strip: //*/div[@class='spCredit'] | ||
46 | strip: //*/div[@class='spCredit']/following-sibling::p | ||
47 | |||
48 | # remove ads | ||
49 | strip: //div[@class='spMInline'] | ||
50 | |||
51 | # remove photogalleries and extras | ||
52 | strip: //div[@class='spPhotoGallery'] | ||
53 | strip: //div[@class='spPhotoGallery']/following-sibling::br | ||
54 | strip: //div[@class='spAssetAlignleft'] | ||
55 | strip: //div[contains(@class,'spAsset')] | ||
56 | strip: //br[@clear='all'] | ||
57 | |||
58 | # remove community functions | ||
59 | strip: //div[@id='spSocialBookmark'] | ||
60 | strip: //div[contains(@class, 'spCommunityBox')] | ||
61 | strip: //div[contains(@class, 'spArticleNewsfeedBox')] | ||
62 | strip: //div[@class='spArticleCredit'] | ||
63 | |||
64 | # remove clutter in "Fotostrecke" | ||
65 | strip: //div[@id='spBreadcrumb'] | ||
66 | strip: //div[@id='spBigaLatestEntries'] | ||
67 | strip: //div[contains(@class, 'spBigaNavi')] | ||
68 | strip: //div[@class='spDottedLine'] | ||
69 | |||
70 | # Use link to print article for single page view | ||
71 | single_page_link: //a[contains(@href, '-druck')] | ||
72 | |||
73 | # use next link in "Fotostrecke" | ||
74 | next_page_link: //a[@class='spBigaControlForw'] | ||
75 | test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/spin.com.txt b/inc/3rdparty/site_config/standard/spin.com.txt new file mode 100644 index 00000000..66f6192b --- /dev/null +++ b/inc/3rdparty/site_config/standard/spin.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | tidy: no | ||
2 | body: //section[contains(@class, 'main')] | ||
3 | strip: //footer | ||
4 | strip: //a[@class='paginated'] | ||
5 | test_url: http://www.spin.com/articles/bathlands-deep-heart-americas-new-drug-nightmare \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/splatf.com.txt b/inc/3rdparty/site_config/standard/splatf.com.txt new file mode 100644 index 00000000..d5671652 --- /dev/null +++ b/inc/3rdparty/site_config/standard/splatf.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | author:string('Dan Frommer/SplatF') | ||
2 | date://div[@class='postdate'] | ||
3 | body://div[@class='entry'] | ||
4 | title://div[@class='post']/h1 | ||
5 | test_url: http://www.splatf.com/2012/02/month-six/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/splitsider.com.txt b/inc/3rdparty/site_config/standard/splitsider.com.txt new file mode 100644 index 00000000..d1d392e7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/splitsider.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | author: //div[@class='byline']/a | ||
2 | date: //div[@id='date'] | ||
3 | body: //div[@class='entry'] | ||
4 | test_url: http://splitsider.com/2011/10/saturday-nights-children-rob-riggle-2004-2005/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sport.detik.com.txt b/inc/3rdparty/site_config/standard/sport.detik.com.txt new file mode 100644 index 00000000..b404b829 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sport.detik.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title://div[@class="content_detail"]/h1 | ||
2 | |||
3 | author://div[@class="author"]/strong | ||
4 | |||
5 | date:substring-before(substring-after(//div[@class="content_detail"]/*[@class="date"], ','), ' WIB') | ||
6 | |||
7 | body://div[@class='text_detail'] | ||
8 | test_url: http://sport.detik.com/sepakbola/read/2012/05/23/065011/1922350/71/agen-silva-ingin-bertahan-di-milan?b99220270 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sport.orf.at.txt b/inc/3rdparty/site_config/standard/sport.orf.at.txt new file mode 100644 index 00000000..a794ded9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sport.orf.at.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | single_page_link: //div[@id='content']//p[@class='readMore']/a | ||
2 | |||
3 | title: //div[@class='hidden offscreen']/h2 | ||
4 | body: //div[@id="storyText"] | ||
5 | move_into(//div[@id='storyText']): //div[@class='fact'] | ||
6 | strip: //small[@class='credit'] | ||
7 | strip: //small[@class='caption'] | ||
8 | date: substring-after(//div[@id='storyMeta']//p[@class='date'],'Publiziert am') | ||
9 | strip: //p[@class='toplink'] | ||
10 | |||
11 | test_url: http://sport.orf.at/stories/2084851/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sports.espn.go.com.txt b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt new file mode 100644 index 00000000..e0f8223c --- /dev/null +++ b/inc/3rdparty/site_config/standard/sports.espn.go.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //div[@class='headline'] | //div[@class='mod-header']/h3 | ||
2 | body: //div[contains(@class, 'article')] | ||
3 | strip: //div[contains(@class, 'mod-inline')] | ||
4 | strip: //*/span[@class='page-actions']/a | ||
5 | strip: //*/span[@class='page-actions']/a | ||
6 | strip: //div[@class='page-actions']/* | ||
7 | strip: //div[@class='headline'] | //div[@class='mod-header']/h3 | ||
8 | strip: //div[@class='mod-blog-navigation'] | ||
9 | strip: //div[@class='monthday'] | ||
10 | strip: //div[@class='time'] | ||
11 | strip: //div[@class='timeofday'] | ||
12 | test_url: http://sports.espn.go.com/espn/page2/story?page=simmonsnfl2010/lebron_james_return_clevelend&sportCat=nba \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sports.yahoo.com.txt b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt new file mode 100644 index 00000000..96a3bb71 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sports.yahoo.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id='article']/div[@class='hd']/h1 | ||
2 | body: //p[@id='byline'] | //div[@id='article']//div[@class='body_copy 0'] | ||
3 | strip: //div[@class='foot'] | ||
4 | strip: //div[@id='sidebar']//div[@class='ft'] | ||
5 | strip: //p[@id='byline']//em | ||
6 | tidy: no | ||
7 | prune: no | ||
8 | |||
9 | test_url: http://sports.yahoo.com/nba/news?slug=ap-nbafinals \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sportschau.de.txt b/inc/3rdparty/site_config/standard/sportschau.de.txt new file mode 100644 index 00000000..6500e75c --- /dev/null +++ b/inc/3rdparty/site_config/standard/sportschau.de.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | title://div[@id='ardContent']/h1 | ||
2 | |||
3 | author://p[@id='ardAutor'] | ||
4 | author://span[@id='ardQuelle'] | ||
5 | author:string('sportschau.de') | ||
6 | |||
7 | date:substring-after(//span[@id='ardStand'], 'Stand: ') | ||
8 | |||
9 | body://div[@id='ardContent'] | ||
10 | |||
11 | strip://div[@id='ardContent']/h1 | ||
12 | strip://p[@id='ardAutor'] | ||
13 | strip: //div[@class='embeddedPlayer_clipinfo'] | ||
14 | strip: //div[@class='ardMehrZumThemaRechts'] | ||
15 | strip: //*[contains(@class, 'inv')] | ||
16 | |||
17 | strip: //p[@id='ardAbbinder'] | ||
18 | strip: //div[@class='socialBookmarks'] | ||
19 | strip: //div[@id='ardContentEnd'] | ||
20 | strip: //div[@id='ardDisclaimer'] | ||
21 | strip: //div[@id='ardRechteSpalte'] | ||
22 | test_url: http://www.sportschau.de/sp/fussball/news201203/17/analyse_leverkusen_gladbach.jsp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt new file mode 100644 index 00000000..afc5879f --- /dev/null +++ b/inc/3rdparty/site_config/standard/sportsillustrated.cnn.com.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | # main sportsillustrated.com articles | ||
2 | # | ||
3 | body: //div[@id="cnnStoryContent"] | ||
4 | title: //div[@id="cnnStoryHeadline"]//h1 | ||
5 | author: //div[@id="cnnSubBanner"]//strong | ||
6 | date: substring-after(//div[@id="cnnTimeStamp"], "Updated: ") | ||
7 | date: substring-after(//div[@id="cnnTimeStamp"], "Posted: ") | ||
8 | |||
9 | # kill ugly font buttons | ||
10 | strip: //div[@id="cnnSCFontButtons"] | ||
11 | |||
12 | # kill misc filler videos & etc | ||
13 | strip: //div[@class="cnnDivideContent"] | ||
14 | strip: //*[@class="cnnTMbox"] | ||
15 | |||
16 | # si vault articles | ||
17 | # ------------- | ||
18 | body: //div[@class="siv_artPara"] | ||
19 | title: //div[@class="siv_artHeader"]//h1 | ||
20 | author: //div[@class="byline"] | ||
21 | date: //div[@class="date"] | ||
22 | |||
23 | next_page_link: //div[@id='cnnStoryContinue']/a | ||
24 | strip_id_or_class: cnnstorypagination | ||
25 | |||
26 | test_url: http://sportsillustrated.cnn.com/2012/writers/peter_king/02/27/combine/index.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sprengsatz.de.txt b/inc/3rdparty/site_config/standard/sprengsatz.de.txt new file mode 100644 index 00000000..16636bc5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sprengsatz.de.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //h2 | ||
2 | author: string('Michael Spreng') | ||
3 | date: //div[@class='date'] | ||
4 | body: //div[@class='entry'] | ||
5 | test_url: http://www.sprengsatz.de/?p=3691 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sqlite.org.txt b/inc/3rdparty/site_config/standard/sqlite.org.txt new file mode 100644 index 00000000..4872519a --- /dev/null +++ b/inc/3rdparty/site_config/standard/sqlite.org.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@id='ff-body'] | ||
2 | |||
3 | replace_string(<h1 align=center>): <div id="ff-body"><h1 align=center> | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | test_url: http://www.sqlite.org/fileformat2.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt new file mode 100644 index 00000000..388209a9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/squashed.tumblr.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class='content'] | ||
2 | date: substring-before( //div[@class='unit dateAndNotes'], 'with') | ||
3 | title: //h3 | ||
4 | test_url: http://squashed.tumblr.com/post/17613522228/lets-stop-blaming-the-victims-of-predatory-lending \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stackoverflow.com.txt b/inc/3rdparty/site_config/standard/stackoverflow.com.txt new file mode 100644 index 00000000..e5317bac --- /dev/null +++ b/inc/3rdparty/site_config/standard/stackoverflow.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | body: //div[@class='post-text' or @class='user-action-time' or @class='user-details' or @class='vote'] | //div[@id='answers-header']//h2 | ||
2 | |||
3 | replace_string(<div class="user-details"><br></div>): <!-- nothing --> | ||
4 | replace_string(<div class="vote">): <div class="vote"><h3>Vote count: | ||
5 | |||
6 | strip_id_or_class: vote-up | ||
7 | strip_id_or_class: vote-down | ||
8 | strip_id_or_class: star-off | ||
9 | strip_id_or_class: favoritecount | ||
10 | strip_id_or_class: -share | ||
11 | strip_id_or_class: badgecount | ||
12 | |||
13 | |||
14 | test_url: http://stackoverflow.com/questions/4484289/id-like-to-understand-the-jquery-plugin-syntax \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt new file mode 100644 index 00000000..bde14217 --- /dev/null +++ b/inc/3rdparty/site_config/standard/stalbansreview.co.uk.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //div[@class='articleLeft']/h3 | ||
2 | |||
3 | author: substring-after(//span[@class='articleAuthor']/a,'By ') | ||
4 | |||
5 | date: substring-before(//span[@class='articleDateTime'],'in ') | ||
6 | |||
7 | body: //div[@class='articleLeft'] | ||
8 | strip: //div[@class='articleMoreNews'] | ||
9 | strip: //div[@class='articleLeft']/h3 | ||
10 | strip: //div[@class='articleLeft']/p[@class='articleInfo clearfix'] | ||
11 | |||
12 | # Remove duplicate title from text | ||
13 | strip: //div[@id='site']/div[5][@class='holder']/div[1][@class='hBlock']/div[1][@class='sglCol article']/h3 | ||
14 | test_url: http://www.stalbansreview.co.uk/news/9581446.New_roundabout_in_King_Harry_Lane/r/?ref=rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/standard.co.uk.txt b/inc/3rdparty/site_config/standard/standard.co.uk.txt new file mode 100644 index 00000000..22a33484 --- /dev/null +++ b/inc/3rdparty/site_config/standard/standard.co.uk.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | autodetect_next_page: no | ||
2 | footnotes: no | ||
3 | dissolve: //div[@class="column-2"]//div[@class="widget"] | ||
4 | dissolve: //div[@class="column-2"]//div | ||
5 | |||
6 | author: //div[@class="innerbyline"]/a | ||
7 | strip: //div[@class="innerbyline"]/a | ||
8 | |||
9 | strip: //p[@class="dateline"] | ||
10 | date: //p[@class="dateline"] | ||
11 | |||
12 | title: //h1[@class="title"] | ||
13 | author: //div[@class="innerbyline"]/a | ||
14 | date: //p[@class="dateline"] | ||
15 | body: //div[@class="column-2"] | ||
16 | test_url: http://www.standard.co.uk/lifestyle/esmagazine/grace-and-flavour-pizarro-7938350.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/staradvertiser.com.txt b/inc/3rdparty/site_config/standard/staradvertiser.com.txt new file mode 100644 index 00000000..0579455f --- /dev/null +++ b/inc/3rdparty/site_config/standard/staradvertiser.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h1[@id='storyTitle'] | ||
2 | author: substring-after(//span[@class='hsa_postCredit'], 'By ') | ||
3 | date://span[@class='hsa_dateStamp'] | ||
4 | body: //div[@class='storytext'] | ||
5 | strip_id_or_class: insideStoryAd | ||
6 | strip_id_or_class: printDesc | ||
7 | strip_id_or_class: sb_2010_story_tools | ||
8 | strip_id_or_class: FBConnectButton_Text | ||
9 | strip_id_or_class: breadcrumbs | ||
10 | prune: no | ||
11 | test_url: http://www.staradvertiser.com/news/20111112_World_leaders_step_onto_isle_stage.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stephenfry.com.txt b/inc/3rdparty/site_config/standard/stephenfry.com.txt new file mode 100644 index 00000000..1169984f --- /dev/null +++ b/inc/3rdparty/site_config/standard/stephenfry.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: /html/head/meta[@name='title']/@content | ||
2 | author: //span[contains(concat(' ',normalize-space(@class),' '),' article_author ')]/a | ||
3 | date: //span[contains(concat(' ',normalize-space(@class),' '),' article_date ')] | ||
4 | |||
5 | body: //div[@class='entry-content'] | ||
6 | |||
7 | single_page_link: //p[@class='pagination']/a | ||
8 | test_url: http://www.stephenfry.com/2011/10/06/steve-jobs/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stlbeacon.org.txt b/inc/3rdparty/site_config/standard/stlbeacon.org.txt new file mode 100644 index 00000000..d66fee9f --- /dev/null +++ b/inc/3rdparty/site_config/standard/stlbeacon.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: article/h1 | ||
2 | author: //p[@class='byline'] | ||
3 | date: //p[@class='date'] | ||
4 | body: //div[@class='body'] | ||
5 | test_url: https://www.stlbeacon.org/#!/content/23404/mogop_caucus_031712 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stockholm.etc.se.txt b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt new file mode 100644 index 00000000..073043d5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/stockholm.etc.se.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | strip_id_or_class: 'left' | ||
2 | strip_id_or_class: 'right' | ||
3 | strip_id_or_class: 'block-belowcontent' | ||
4 | |||
5 | test_url: http://stockholm.etc.se/reportage/bakom-stangda-dorrar-pa-fas-3-massa \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/streetsblog.net.txt b/inc/3rdparty/site_config/standard/streetsblog.net.txt new file mode 100644 index 00000000..0b62a3d6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/streetsblog.net.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h2[@class="post-title"] | ||
2 | date: //span[@class="post-date"] | ||
3 | body: //div[@class="post-entry"] | ||
4 | |||
5 | #This is also good for *.streetsblog.org, for example: | ||
6 | #http://dc.streetsblog.org/2011/10/21/friday-job-market/ | ||
7 | test_url: http://streetsblog.net/2011/10/20/look-out-below-one-in-nine-bridges-structurally-deficient-reports-t4a/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stuff.co.nz.txt b/inc/3rdparty/site_config/standard/stuff.co.nz.txt new file mode 100644 index 00000000..12fd0939 --- /dev/null +++ b/inc/3rdparty/site_config/standard/stuff.co.nz.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | title://div[@id='left_col']/h1 | ||
2 | author:substring-after(//span[contains(@class,'storycredit')],'BY ') | ||
3 | author://span[contains(@class,'storycredit')] | ||
4 | date:substring-after(//div[contains(@class,'toolbox_date')],'Last updated ') | ||
5 | date://div[contains(@class,'toolbox_date')] | ||
6 | body://div[@id='left_col'] | ||
7 | |||
8 | strip_id_or_class: toolbox | ||
9 | strip_id_or_class: story_features | ||
10 | strip_id_or_class: sharebox_new | ||
11 | strip_id_or_class: related_box | ||
12 | strip_id_or_class: sponsored_links | ||
13 | strip_id_or_class: hidden_ad | ||
14 | strip_id_or_class: story_content_top | ||
15 | strip_id_or_class: total_number | ||
16 | strip_id_or_class: sort_order | ||
17 | strip_id_or_class: subscribe_order | ||
18 | |||
19 | strip://div[contains(@class,'ad_story')] | ||
20 | |||
21 | test_url: http://www.stuff.co.nz/national/politics/3930344/PM-issues-challenge | ||
22 | test_url: http://www.stuff.co.nz/entertainment/7045944/International-praise-for-Ladyhawke \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/stumbleupon.com.txt b/inc/3rdparty/site_config/standard/stumbleupon.com.txt new file mode 100644 index 00000000..85682166 --- /dev/null +++ b/inc/3rdparty/site_config/standard/stumbleupon.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | single_page_link: //iframe[@id='stumbleFrame']/@src | ||
2 | |||
3 | test_url: www.stumbleupon.com/su/35V0wB/zouchmagazine.com/poetry-violet/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/subtraction.com.txt b/inc/3rdparty/site_config/standard/subtraction.com.txt new file mode 100644 index 00000000..454e37b1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/subtraction.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | title: //*[@id='posts']/div[1]/h2 | ||
2 | author: //*[@id='posts']/div[1]/div[2]/span[2]/a | ||
3 | date: //*[@class='date'] | ||
4 | body: //div[@class='body-lead'] | ||
5 | |||
6 | # take out the bit saying 'body' | ||
7 | strip: //div[@class='body-lead']/div[@class='info-label'] | ||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | test_url: http://www.subtraction.com/2011/02/01/unnecessary-explanations \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt new file mode 100644 index 00000000..4aa9410c --- /dev/null +++ b/inc/3rdparty/site_config/standard/sueddeutsche.de.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | # 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@... | ||
2 | |||
3 | single_page_link: //a[ contains( @href, "/2.220/" ) ] | ||
4 | |||
5 | body: //article[@id="sitecontent"]/section[@class="body"] | ||
6 | author: //address[@class="author"] | ||
7 | date: //div[@class="header"]//h1//span[@class="updated"] | ||
8 | wrap_in(small): //div[@class="footer"] | ||
9 | wrap_in(i): //figcaption/h3 | ||
10 | dissolve: //figcaption//h3 | ||
11 | dissolve: //figure/div[@class="body"] | ||
12 | dissolve: //figure/a | ||
13 | |||
14 | strip: //figure[ not( contains(@class, "zoomimage" ) ) ] | ||
15 | strip: //div[@data-onlineonly="true"] | ||
16 | strip: //address[@class="author"] | ||
17 | |||
18 | test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/summify.com.txt b/inc/3rdparty/site_config/standard/summify.com.txt new file mode 100644 index 00000000..1128e1bb --- /dev/null +++ b/inc/3rdparty/site_config/standard/summify.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | strip_id_or_class: toolbar | ||
2 | test_url: http://summify.com/story/Tmt1YQ0JBgKTAHwK/www.nybooks.com/articles/archives/2003/jan/16/fixed-opinions-or-the-hinge-of-history/?pagination=false \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/suntimes.com.txt b/inc/3rdparty/site_config/standard/suntimes.com.txt new file mode 100644 index 00000000..13390e4f --- /dev/null +++ b/inc/3rdparty/site_config/standard/suntimes.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //div[@class='story-details']/h1 | ||
2 | date: //span[@class='date-time'] | ||
3 | Author: substring-after(//p[@class='by-line'], 'By ') | ||
4 | |||
5 | strip: //div[@class='videoThumbnails'] | ||
6 | strip: //div[@class='ad-square2-container'] | ||
7 | strip: //div[@class='homeDeliveryContainer5'] | ||
8 | |||
9 | strip: //div[@class='image-description'] | ||
10 | strip: //div[@id='internal-side-bar'] | ||
11 | |||
12 | strip: //span[@class='hide'] | ||
13 | strip: //div[@class='date'] | ||
14 | test_url: http://www.suntimes.com/technology/ihnatko/8816567-452/review-kindle-fire-is-no-ipad-killer-but-it-is-a-killer-device.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/svd.se.txt b/inc/3rdparty/site_config/standard/svd.se.txt new file mode 100644 index 00000000..02b5b8ca --- /dev/null +++ b/inc/3rdparty/site_config/standard/svd.se.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | # Ads | ||
2 | strip_id_or_class: articlead | ||
3 | |||
4 | test_url: http://www.svd.se/nyheter/inrikes/manga-huggormsbitna-golfare_5004031.svd \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sydsvenskan.se.txt b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt new file mode 100644 index 00000000..da6772aa --- /dev/null +++ b/inc/3rdparty/site_config/standard/sydsvenskan.se.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | author: //a[contains(@href, '/sok/?')]/text() | ||
4 | |||
5 | date: substring-after(//span[@class='date'], 'Publicerad ') | ||
6 | |||
7 | body: //div[@class='two_column_left'] | ||
8 | strip_id_or_class: story | ||
9 | strip: //div[@class='leadText saplo:lead']/h5 | ||
10 | |||
11 | test_url: http://www.sydsvenskan.se/kultur-och-nojen/-jag-vill-garna--stanna-- \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt new file mode 100644 index 00000000..3109c0e7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/symmetrymagazine.org.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //div[contains(@class, "post")]/h2 | ||
2 | |||
3 | author: //div[contains(@class, "post")]/p[position()=last()]/text()[1] | ||
4 | |||
5 | date: //div[contains(@class, "post")]/p[1] | ||
6 | |||
7 | body: //div[contains(@class, "post")] | ||
8 | |||
9 | strip: //div[contains(@class, "post")]/h2[1] | ||
10 | strip: //div[contains(@class, "post")]/p[1] | ||
11 | strip: //div[contains(@class, "post")]/p[position()=last()] | ||
12 | test_url: http://www.symmetrymagazine.org/breaking/?p=12784 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt new file mode 100644 index 00000000..c3e34977 --- /dev/null +++ b/inc/3rdparty/site_config/standard/sz-magazin.sueddeutsche.de.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | title: //h1 | ||
2 | body://div[@class='drucken'] | ||
3 | author: substring-after(//span[@class='autor'], 'Von ') | ||
4 | author: //span[@class='autor'] | ||
5 | |||
6 | single_page_link://a[contains(@href, '/drucken/')] | ||
7 | convert_double_br_tags:yes | ||
8 | |||
9 | dissolve://div[@class='vorspann'] | ||
10 | |||
11 | strip://h1 | ||
12 | strip_id_or_class: klassifizierung | ||
13 | strip_id_or_class: source | ||
14 | strip_id_or_class: autor | ||
15 | test_url: http://sz-magazin.sueddeutsche.de/texte/anzeigen/37567 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tagesschau.de.txt b/inc/3rdparty/site_config/standard/tagesschau.de.txt new file mode 100644 index 00000000..8ce8a90e --- /dev/null +++ b/inc/3rdparty/site_config/standard/tagesschau.de.txt | |||
@@ -0,0 +1,23 @@ | |||
1 | title://h1[1] | ||
2 | |||
3 | author: substring-after(//em, 'Von ') | ||
4 | author:string('tagesschau.de') | ||
5 | |||
6 | date:substring-after(//div[@class='standDatum'], 'Stand: ') | ||
7 | |||
8 | body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')] | ||
9 | |||
10 | strip://h1[1] | ||
11 | strip: //div[contains(@class, 'directLinks')] | ||
12 | strip: //div[contains(@class, 'zitatBox')] | ||
13 | strip: //div[contains(@class, 'teaserBox metaBlock')] | ||
14 | strip: //*[contains(@class, 'inv')] | ||
15 | strip: //span[@class='imgSubline'] | ||
16 | strip: //*[contains(@class, 'topline')][1] | ||
17 | strip: //div[@id='rightCol'][1] | ||
18 | strip: //div[@id="footer"][1] | ||
19 | strip: //div[@class="fPlayer"] | ||
20 | strip: //div[@id='seitenanfang'] | ||
21 | strip: //div[@class='standDatum'] | ||
22 | strip: //em | ||
23 | test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tampabay.com.txt b/inc/3rdparty/site_config/standard/tampabay.com.txt new file mode 100644 index 00000000..bfe841c6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tampabay.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //span[@class="entry-title"] | ||
2 | author: //*[contains(@class, 'item')]/p/a/text() | ||
3 | date: substring-after(//*[contains(@class, 'item')]/p/text()[3], 'Posted:') | ||
4 | body: //div[@class="entry-content"] | ||
5 | test_url: http://www.tampabay.com/news/salvador-dali-leaders-want-st-petersburg-city-council-to-put-brakes-on/1236349 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/taptaptap.com.txt b/inc/3rdparty/site_config/standard/taptaptap.com.txt new file mode 100644 index 00000000..13de70e9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/taptaptap.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //h3[@class="storytitle"] | ||
2 | body: //div[@class="post"] | ||
3 | strip: //div[@class="blurbBox"] | ||
4 | test_url: http://taptaptap.com/blog/apples-precedents-vs-apples-guidelines/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tasteofhome.com.txt b/inc/3rdparty/site_config/standard/tasteofhome.com.txt new file mode 100644 index 00000000..77773363 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tasteofhome.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | title: //span[@id='ctl00_ctl00_MainContent_MainContent_RecipeImage1_lblRecipeTitle'] | ||
2 | body: //div[@id='RDNEW']//*[@class='Recipe-imgCon' or @class='Recipe-Intro' or @class='recipeDetails'] | ||
3 | strip_id_or_class: rec-ExRightPanel | ||
4 | strip_id_or_class: divCarousel | ||
5 | strip_id_or_class: preptimeOuter | ||
6 | strip_id_or_class: cooktimeOuter | ||
7 | strip_id_or_class: durationOuter | ||
8 | strip_id_or_class: divImageFooter | ||
9 | strip_id_or_class: microFormatFnIngred | ||
10 | strip: //span[@class='Recipe-Intro']//*[@class='link' or @class='rating'] | ||
11 | |||
12 | prune: no | ||
13 | tidy: no | ||
14 | |||
15 | test_url: http://www.tasteofhome.com/recipes/Grinch-Punch \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/taz.de.txt b/inc/3rdparty/site_config/standard/taz.de.txt new file mode 100644 index 00000000..6e84527b --- /dev/null +++ b/inc/3rdparty/site_config/standard/taz.de.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | date: //div[@class='secthead'] | ||
2 | body: //div[@class='sectbody'] | ||
3 | title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1) | ||
4 | author: //span[@class='author'] | ||
5 | strip: //p[@class='caption'] | ||
6 | strip_id_or_class: rack | ||
7 | |||
8 | test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tbray.org.txt b/inc/3rdparty/site_config/standard/tbray.org.txt new file mode 100644 index 00000000..fbe94fa4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tbray.org.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | body: //div[@id='centercontent'] | ||
2 | strip: //div[@id='rightcontent'] | ||
3 | date: substring-before( //div[@id='cats'], '·') | ||
4 | title: //h1 | ||
5 | test_url: http://www.tbray.org/ongoing/When/201x/2012/03/04/Mobile-Money \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tcng.org.txt b/inc/3rdparty/site_config/standard/tcng.org.txt new file mode 100644 index 00000000..765224e4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tcng.org.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //div[@id='main-content']/h1 | ||
2 | body: //div[@id='main-content'] | ||
3 | strip: //div[@id='main-content']/h1 | ||
4 | test_url: http://www.tcng.org/index.php/blog/view/teaching-basic-health-cutting-down-costs \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt new file mode 100644 index 00000000..b6d17da4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tech.fortune.cnn.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //h1[@class='storyheadline'] | ||
2 | body: //div[@class='storytext'] | ||
3 | strip: //strong | ||
4 | test_url: http://tech.fortune.cnn.com/2011/03/17/why-startups-dont-go-public-anymore/?section=money_topstories&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fmoney_topstories+%28Top+Stories%29 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt new file mode 100644 index 00000000..f7228ebf --- /dev/null +++ b/inc/3rdparty/site_config/standard/tech.sina.com.cn.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title://h1[contains(@id,'artibodyTitle')] | ||
2 | |||
3 | date://span[contains(@id,'pub_date')] | ||
4 | |||
5 | body://div[contains(@id,'artibody')] | ||
6 | |||
7 | strip://div[contains(@class,'otherContent')] | ||
8 | |||
9 | next_page_link://p[@class='page']/a[contains(.,'下一页')] | ||
10 | |||
11 | test_url: http://tech.sina.com.cn/mobile/n/2012-03-22/07476863046.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techcrunch.com.txt b/inc/3rdparty/site_config/standard/techcrunch.com.txt new file mode 100644 index 00000000..f436acb5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/techcrunch.com.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | body: //div[contains(@class, 'media-container') or contains(@class, 'body-copy')] | ||
2 | |||
3 | author: //a[@class="name"] | ||
4 | |||
5 | date: //div[@class="post-time"] | ||
6 | |||
7 | title: //h1[@class="headline"] | ||
8 | strip_id_or_class: module-crunchbase | ||
9 | |||
10 | # The following is for the mobile site | ||
11 | body: //div[@id="singlentry"] | ||
12 | author: substring-after(//span[@class="single-post-meta-top"],'rsaquo; ') | ||
13 | date: substring-before(//div[@class="single-post-meta-top"],' @') | ||
14 | title: //a[@class="sh2"] | ||
15 | |||
16 | prune: no | ||
17 | |||
18 | test_url: http://techcrunch.com/2011/10/18/apples-insanely-great-q1-2012/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techdirt.com.txt b/inc/3rdparty/site_config/standard/techdirt.com.txt new file mode 100644 index 00000000..727f3701 --- /dev/null +++ b/inc/3rdparty/site_config/standard/techdirt.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body: //div[@class='story'] | ||
2 | title: //div[@class='story']/h1 | ||
3 | strip: //div[@class='story']/h1 | ||
4 | |||
5 | author: //div[@class='details']/p[contains(., 'by ')]/a | ||
6 | date: //p[@class='storydate'] | ||
7 | |||
8 | strip: //p[a[contains(., 'Leave a Comment')]] | ||
9 | strip_id_or_class: share | ||
10 | strip_id_or_class: maincolumn_head | ||
11 | strip_id_or_class: maincolmod | ||
12 | test_url: http://www.techdirt.com/articles/20120112/17455117394/sega-gets-it-right-about-sopa-its-time-hard-reset-copyright-law-congress.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techmeme.com.txt b/inc/3rdparty/site_config/standard/techmeme.com.txt new file mode 100644 index 00000000..8644e00f --- /dev/null +++ b/inc/3rdparty/site_config/standard/techmeme.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | single_page_link_in_feed: //b/a | ||
2 | |||
3 | test_url_feed: http://www.techmeme.com/feed.xml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt new file mode 100644 index 00000000..cc26ee4c --- /dev/null +++ b/inc/3rdparty/site_config/standard/technicallyjordan.tumblr.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h2 | ||
2 | author: //meta[@name="author"]/@content | ||
3 | date: //h3 | ||
4 | body: //div[@class="postBody"] | ||
5 | strip: //h1 | ||
6 | strip: //h2 | ||
7 | strip: //h3 | ||
8 | test_url: http://technicallyjordan.tumblr.com/post/22914659822/facebook-to-launch-app-store-knock-off \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/technologyreview.com.txt b/inc/3rdparty/site_config/standard/technologyreview.com.txt new file mode 100644 index 00000000..41f21d46 --- /dev/null +++ b/inc/3rdparty/site_config/standard/technologyreview.com.txt | |||
@@ -0,0 +1,16 @@ | |||
1 | title: //header[@class='article-meta']/h1 | ||
2 | title: substring-before(//title, '|') | ||
3 | |||
4 | body: //section[contains(@class, 'body')] | ||
5 | |||
6 | # Author & Date for News and Featured Stories | ||
7 | author: //ul[@class='byline']/li/a | ||
8 | author: substring-before(substring-after(//ul[@class='byline']/li, 'By '), ' on') | ||
9 | date: substring-after(//ul[@class='byline']/li, 'on ') | ||
10 | |||
11 | # Author & Date for "Views" | ||
12 | author: //div[@class='view-byline']/div[@class='meta']/h2[1] | ||
13 | date: //div[@class='view-byline']/div[@class='meta']/h2[2] | ||
14 | |||
15 | next_page_link: //section[@class='pagination']/a[contains(@class, 'continue')] | ||
16 | test_url: http://www.technologyreview.com/news/427567/facebooks-telescope-on-human-behavior/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techpinions.com.txt b/inc/3rdparty/site_config/standard/techpinions.com.txt new file mode 100644 index 00000000..89ed8349 --- /dev/null +++ b/inc/3rdparty/site_config/standard/techpinions.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: //div[@class="post"] | ||
2 | |||
3 | strip: //div[@class="post-meta"] | ||
4 | strip: //div[@id="socialicons"] | ||
5 | strip: //div[@id="authorbox"] | ||
6 | |||
7 | test_url: http://techpinions.com/why-google-and-microsoft-hate-siri/3572 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/techradar.com.txt b/inc/3rdparty/site_config/standard/techradar.com.txt new file mode 100644 index 00000000..ed92a974 --- /dev/null +++ b/inc/3rdparty/site_config/standard/techradar.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | # Title without news/reviews etc. appended | ||
2 | title: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/h1 | ||
3 | |||
4 | # Remove home link | ||
5 | strip: //div[@id='page_logo']/a | ||
6 | |||
7 | # Remove utilities | ||
8 | strip: //*[(@id = "utilities")] | ||
9 | |||
10 | # Remove comments link | ||
11 | strip: //div[@id='subColumn1Pad']/div[1][@class='article']/div[1][@class='articleHead']/p[@class='tiny'] | ||
12 | test_url: http://www.techradar.com/news/television/sky-to-rebrand-living-as-sky-living-903105 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/telegraaf.nl.txt b/inc/3rdparty/site_config/standard/telegraaf.nl.txt new file mode 100644 index 00000000..ff3cd06e --- /dev/null +++ b/inc/3rdparty/site_config/standard/telegraaf.nl.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //div[@id='artikelKolom'] | ||
2 | strip: //div[@class='broodMediaBox']/div[@class='docbox' or @class='artBannerWrapper'] | ||
3 | strip: //div[@id='artikeltoolbar'] | ||
4 | strip: //div[@class='reactiebalk artspacer' or @class='bannercenter clearfix artspacer'] | ||
5 | strip: //div[@id='artikelKolomRechts' or @id='TMGTweetWidget'] | ||
6 | tidy: no | ||
7 | prune: no | ||
8 | |||
9 | test_url: http://www.telegraaf.nl/binnenland/10275097/__Identiteit_man_in_sloot_onbekend__.html?cid=rss \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/telegraph.co.uk.txt b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt new file mode 100644 index 00000000..e1faf23b --- /dev/null +++ b/inc/3rdparty/site_config/standard/telegraph.co.uk.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | body: //div[@class='byline' or @id='storyEmbSlide' or @id='mainBodyArea'] | ||
2 | strip: //p[@class='comments'] | ||
3 | strip: //div[@id='storyEmbSlide']//div[contains(@class, "hide")] | ||
4 | strip: //div[@id='tmg-related-links' or @id='outbrain-related-links' or @id='onespot-related-links'] | ||
5 | strip: //p[@class='bbpTweet']/span[@class='timestamp'] | ||
6 | strip: //p[@class='bbpTweet']/span[@class='metadata']//img | ||
7 | tidy: no | ||
8 | prune: no | ||
9 | |||
10 | test_url: http://www.telegraph.co.uk/news/worldnews/europe/ireland/8663451/Is-Ireland-divorcing-from-the-Catholic-Church.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theappleblog.com.txt b/inc/3rdparty/site_config/standard/theappleblog.com.txt new file mode 100644 index 00000000..3bd555f1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theappleblog.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | # Remove home link | ||
2 | strip: //div[@id='blog-title']/a | ||
3 | test_url: http://theappleblog.com/2010/10/21/the-new-macbook-air-is-underwhelming/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theatlantic.com.txt b/inc/3rdparty/site_config/standard/theatlantic.com.txt new file mode 100644 index 00000000..267fd39c --- /dev/null +++ b/inc/3rdparty/site_config/standard/theatlantic.com.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | title: //div[@id='article']/h1 | ||
2 | title: //h1 | ||
3 | |||
4 | body: //div[@class='articleText'] | ||
5 | body: //div[@class='articleContent'] | ||
6 | body: //div[@id='article'] | ||
7 | date: //*[contains(@class, 'date')] | ||
8 | author: //div[@id='profile']//*[@class='authors']//a[1] | ||
9 | author: //*[@class='author']/span | ||
10 | prune: no | ||
11 | |||
12 | strip: //div[@class='moreOnBoxWithImages'] | ||
13 | |||
14 | single_page_link: //a[@class='print'] | ||
15 | |||
16 | test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/ | ||
17 | test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/ | ||
18 | test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thebostonchannel.com.txt b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt new file mode 100644 index 00000000..64df90c1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thebostonchannel.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //meta[@name='og:title']/@content | ||
2 | date: //meta[@name='created']/@content | ||
3 | body: //div[@class="StoryBody" or @class="storyTeaser"] | ||
4 | |||
5 | replace_string(<p></p>): <br /><br /> | ||
6 | |||
7 | test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thebrowser.com.txt b/inc/3rdparty/site_config/standard/thebrowser.com.txt new file mode 100644 index 00000000..c3c20504 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thebrowser.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h2[contains(@class, 'page-title')] | ||
2 | body: //div[@id='content']/div[contains(@id, 'node-')]/div[@class='content'] | ||
3 | |||
4 | prune: no | ||
5 | |||
6 | strip: //div[contains(@class, 'node-book')]//a[@class='button'] | ||
7 | |||
8 | single_page_link: //a[@class='tool-print'] | ||
9 | |||
10 | test_url: http://thebrowser.com/interviews/yotam-ottolenghi-on-his-favourite-cookery-books \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thecarton.net.txt b/inc/3rdparty/site_config/standard/thecarton.net.txt new file mode 100644 index 00000000..9ef4ed8b --- /dev/null +++ b/inc/3rdparty/site_config/standard/thecarton.net.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: substring-before(//title, ' – ') | ||
2 | author:string('Shawn') | ||
3 | date: //*/time/@pubdate | ||
4 | |||
5 | |||
6 | strip: //header | ||
7 | strip: //div[@id='prev_next'] | ||
8 | strip: //div[@id='masthead'] | ||
9 | |||
10 | test_url: http://thecarton.net/2012/12/20/imdb \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thedaily.com.txt b/inc/3rdparty/site_config/standard/thedaily.com.txt new file mode 100644 index 00000000..24ebbbac --- /dev/null +++ b/inc/3rdparty/site_config/standard/thedaily.com.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | #keep all body text | ||
2 | prune: no | ||
3 | |||
4 | #title, body, metadata | ||
5 | title: //div[@class='story_header']/h1 | ||
6 | body: //div[@id='content'] | ||
7 | author: substring-after(//span[@class='byline'], "by ") | ||
8 | author: substring-after(//span[@class='byline'], "By ") | ||
9 | author: //span[@class='byline'] | ||
10 | date: //span[@class='date'] | ||
11 | |||
12 | #formatting | ||
13 | convert_double_br_tags: yes | ||
14 | dissolve: //div[@class='slides_full']/ul/li | ||
15 | |||
16 | # cleanup | ||
17 | strip: //a[@id='story_note'] | ||
18 | strip: //br | ||
19 | strip: //div[@class='intro'] | ||
20 | strip: //div[@class='share-block'] | ||
21 | strip: //div[@class='sidebar-social'] | ||
22 | strip: //div[@class='top-stories'] | ||
23 | strip: //div[@class='prevnext'] | ||
24 | test_url: http://www.thedaily.com/page/2012/01/09/010912-news-college-costs-1-5/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thedailybeast.com.txt b/inc/3rdparty/site_config/standard/thedailybeast.com.txt new file mode 100644 index 00000000..4781c65a --- /dev/null +++ b/inc/3rdparty/site_config/standard/thedailybeast.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //h1 | ||
2 | body: //article/div[contains(@class, 'article-body')] | ||
3 | #strip: //header/hgroup/h1 | ||
4 | strip: //footer[@class='storyFooter'] | ||
5 | single_page_link: //li[@class='print']/a | ||
6 | prune: no | ||
7 | test_url: http://www.thedailybeast.com/articles/2010/04/06/how-mastercard-predicts-divorce.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt new file mode 100644 index 00000000..0f15558d --- /dev/null +++ b/inc/3rdparty/site_config/standard/thedailymash.co.uk.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | # Remove duplicated title | ||
2 | strip: //div[@id='content']/div[1][@class='full_intro']/h2 | ||
3 | |||
4 | # Remove links, ads etc. | ||
5 | strip: //*[(@class= "aside")] | ||
6 | |||
7 | # Remove the date and add it to the date published field in Instapaper | ||
8 | strip: //div[@class="date"] | ||
9 | date: //div[@class="date"] | ||
10 | |||
11 | # There is no byline on The Daily Mash. | ||
12 | |||
13 | convert_double_br_tags: yes | ||
14 | test_url: http://www.thedailymash.co.uk/index.php?option=com_content&task=view&id=4994&Itemid=81&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+thedailymash+%28The+Daily+Mash.+It%27s+news+to+us.%29 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thefilmexperience.net.txt b/inc/3rdparty/site_config/standard/thefilmexperience.net.txt new file mode 100644 index 00000000..e6b5115a --- /dev/null +++ b/inc/3rdparty/site_config/standard/thefilmexperience.net.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class='body'] | ||
2 | test_url: http://thefilmexperience.net/blog/2011/12/30/distant-relatives-2001-a-space-odyssey-and-the-tree-of-life.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theglobalmail.org.txt b/inc/3rdparty/site_config/standard/theglobalmail.org.txt new file mode 100644 index 00000000..fae0fb29 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theglobalmail.org.txt | |||
@@ -0,0 +1,41 @@ | |||
1 | title: //h1[@id="headline"] | ||
2 | author: //div[contains(@class, "editorial-byline-author")]/a | ||
3 | date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ") | ||
4 | |||
5 | # The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed | ||
6 | body: //div[@id="template"] | ||
7 | strip_id_or_class: editorial-byline-pic | ||
8 | strip_id_or_class: editorial-byline | ||
9 | strip_id_or_class: headline | ||
10 | |||
11 | # Include the leadin paragraph in the body text, but remove quotes because they're out of context | ||
12 | dissolve: //div[contains(@id, "leadin")] | ||
13 | strip_id_or_class: pullquote | ||
14 | |||
15 | # Image captions removed because they're confusing in body text | ||
16 | strip_id_or_class: image-caption-content | ||
17 | |||
18 | # Remove header and footer | ||
19 | strip_id_or_class: header | ||
20 | strip_id_or_class: footer | ||
21 | |||
22 | # Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image | ||
23 | strip: /html/body/span[contains(@style, "display: none")] | ||
24 | |||
25 | # Remove search box | ||
26 | strip_id_or_class: searchContainer | ||
27 | strip: //div[contains(@class, "searchInstruction")] | ||
28 | strip: //div[contains(@class, "searchResults")]/h4 | ||
29 | |||
30 | # Remove the 'Letters to the Editor' section | ||
31 | strip_id_or_class: letter-text | ||
32 | strip_id_or_class: letter-from | ||
33 | strip_id_or_class: letter-date | ||
34 | |||
35 | # Remove Like/Tweet links | ||
36 | strip_id_or_class: social-tab | ||
37 | |||
38 | # Remove 'divider' which causes an inexplicable slash to appear in the article body | ||
39 | strip_id_or_class: divider | ||
40 | |||
41 | test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theglobeandmail.com.txt b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt new file mode 100644 index 00000000..90634a08 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theglobeandmail.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | single_page_link: //div[contains(@class, 'pagination')]//a[contains(@title, 'ingle page')] | ||
2 | tidy: no | ||
3 | prune: no | ||
4 | |||
5 | test_url: http://www.theglobeandmail.com/report-on-business/rob-magazine/how-a-novice-miner-survived-a-summer-in-the-klondike/article2345350/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theindychannel.com.txt b/inc/3rdparty/site_config/standard/theindychannel.com.txt new file mode 100644 index 00000000..3544f247 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theindychannel.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //h1[@class="Headline"] | ||
2 | date: substring-after(//div[@class="posted"], 'EDT ') | ||
3 | body: //div[@class="storyBody"] | ||
4 | |||
5 | strip: //td[@class="AssocContentTD"] | ||
6 | strip: //div[@id="pageTitle"] | ||
7 | strip: //div[@class="posted"] | ||
8 | strip: //div[@class="updated"] | ||
9 | strip: //div[@class="js-kit-disclaimer"] | ||
10 | strip: //table[@class="row3table"] | ||
11 | strip: //div[@class="container2"] | ||
12 | strip: //div[@id="delta"] | ||
13 | test_url: http://www.theindychannel.com/news/31050840/detail.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/themillions.com.txt b/inc/3rdparty/site_config/standard/themillions.com.txt new file mode 100644 index 00000000..e3e57fea --- /dev/null +++ b/inc/3rdparty/site_config/standard/themillions.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: /html/body/div/div[2]/div/div/div/h3 | ||
2 | |||
3 | body: /html/body/div/div[2]/div/div/div/div[2] | ||
4 | |||
5 | strip: /html/body/div/div[2]/div/div/div/div[6]/div[3]/div/div/div | ||
6 | |||
7 | tidy: no | ||
8 | |||
9 | # any way to get rid of this word character garbage? | ||
10 | test_url: http://www.themillions.com/2010/07/at-the-movies-with-david-mitchell-the-thousand-autumns-of-jacob-de-zoet.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt new file mode 100644 index 00000000..518bff93 --- /dev/null +++ b/inc/3rdparty/site_config/standard/themuseumofinnocence.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | body: single-review | ||
2 | strip_id_or_class: featured-review | ||
3 | strip_id_or_class: resources | ||
4 | strip_id_or_class: rate-the-book | ||
5 | strip_id_or_class: write-review | ||
6 | |||
7 | test_url: http://themuseumofinnocence.com/review.php?id=1179 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thenation.com.txt b/inc/3rdparty/site_config/standard/thenation.com.txt new file mode 100644 index 00000000..d88bcdd6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thenation.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h1[@class='print-title'] | ||
2 | body: //div[@class='print-content'] | ||
3 | author: //a[contains(@href, '/authors')] | ||
4 | author: substring-before(//div[@class='print-created'], '|') | ||
5 | date: //span[@class='article-date'] | ||
6 | date: substring-after(//div[@class='print-created'], '|') | ||
7 | prune: no | ||
8 | |||
9 | single_page_link: //ul[contains(@class, 'article-actions-bar')]//a[contains(@href, '/print/article/')] | ||
10 | |||
11 | test_url: http://www.thenation.com/article/162331/hard-against-time-roy-fisher \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt new file mode 100644 index 00000000..846b8a8a --- /dev/null +++ b/inc/3rdparty/site_config/standard/thenetworkgarden.blogs.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@id="beta-inner"] | ||
2 | title: //h3[@class="entry-header"] | ||
3 | |||
4 | test_url: http://thenetworkgarden.blogs.com/weblog/2011/09/microsoft-metro-and-the-next-wave-in-computing.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thenextweb.com.txt b/inc/3rdparty/site_config/standard/thenextweb.com.txt new file mode 100644 index 00000000..fdc70005 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thenextweb.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body: //div[@class= 'article-body'] | ||
2 | author: //div[@class='featured mb-1']//a[starts-with(@href,'/author')] | ||
3 | |||
4 | strip: //div[@class = 'bargo'] | ||
5 | strip: //div[@class = 'tf'] | ||
6 | strip: //div[@class = 'article']/div[@class = 'blue-box'] | ||
7 | strip_id_or_class: respond | ||
8 | |||
9 | tidy: no | ||
10 | next_page_link: //div[@class='pages-wrapper']//span/following-sibling::a/@href | ||
11 | |||
12 | test_url: http://thenextweb.com/apple/2011/10/12/tnw-review-a-complete-guide-to-apples-ios-5-with-icloud-an-os-14-years-in-the-making/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theoaklandpress.com.txt b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt new file mode 100644 index 00000000..c7132321 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theoaklandpress.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id='fullstory'] | ||
2 | strip: //div[@id='page_leftbar'] | ||
3 | test_url: http://theoaklandpress.com/articles/2011/04/25/news/doc4db5330e0bce9220005852.txt \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theonion.com.txt b/inc/3rdparty/site_config/standard/theonion.com.txt new file mode 100644 index 00000000..12918b88 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theonion.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h2[@class='title'] | ||
2 | date: substring-before(//p[@class='meta'], '|') | ||
3 | body: //div[@class='story'] | ||
4 | #body: //div[@class='article_body'] | ||
5 | |||
6 | strip: //h2[@class='title'] | ||
7 | strip: //p[@class='meta'] | ||
8 | strip: //div[@class='ga_section'] | ||
9 | strip: //div[@id='recent_slider'] | ||
10 | |||
11 | test_url: http://www.theonion.com/articles/pathetic-bobcats-owner-again-regaling-players-with,27572/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt new file mode 100644 index 00000000..f89f3a87 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thepioneerwoman.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //h1[@class='post-title'] | ||
2 | body: //div[@class='post'] | ||
3 | author: //p[@class='posted-by'] | ||
4 | date: //div[@class='sprite post-date'] | ||
5 | |||
6 | # The body of the post doesn't have it's own div so we have to strip out the metadata | ||
7 | strip: //div[@class='author_avatar'] | ||
8 | strip: //div[@class='sprite post-date'] | ||
9 | strip: //h1[@class='post-title'] | ||
10 | strip: //p[@class='posted-by'] | ||
11 | test_url: http://thepioneerwoman.com/cooking/2011/08/pie-fats-a-comparison/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theregister.co.uk.txt b/inc/3rdparty/site_config/standard/theregister.co.uk.txt new file mode 100644 index 00000000..ebcc55d5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theregister.co.uk.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@id="article"]/h2 | ||
2 | author: //div[@id="article"]/p[@class="byline"]/a[1] | ||
3 | date: //div[@id="article"]/p[@class="dateline"]/a[2] | ||
4 | body: //div[@id="article"]/div[@id="body"] | ||
5 | test_url: http://www.theregister.co.uk/2011/10/06/gas_bill_shocker/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theroot.com.txt b/inc/3rdparty/site_config/standard/theroot.com.txt new file mode 100644 index 00000000..ebff662d --- /dev/null +++ b/inc/3rdparty/site_config/standard/theroot.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id='node-content'] | ||
2 | strip_id_or_class: pager | ||
3 | test_url: http://www.theroot.com/views/why-i-am-male-feminist \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/therumpus.net.txt b/inc/3rdparty/site_config/standard/therumpus.net.txt new file mode 100644 index 00000000..d01a89bb --- /dev/null +++ b/inc/3rdparty/site_config/standard/therumpus.net.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: /html/body/div/div[2]/div/div/h1 | ||
2 | |||
3 | body: /html/body/div/div[2]/div/div/div[2] | ||
4 | test_url: http://therumpus.net/2010/07/the-rumpus-interview-with-david-means/?full=yes \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thesiasat.com.txt b/inc/3rdparty/site_config/standard/thesiasat.com.txt new file mode 100644 index 00000000..ab9a99e8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thesiasat.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | #body: (//div[@class='ftr-yt-vid'])[1] | ||
2 | body: (//blockquote[contains(@class, 'postcontent')])[1] | ||
3 | body: (//div[starts-with(@id, 'post_message')])[1] | ||
4 | |||
5 | prune: no | ||
6 | tidy: no | ||
7 | |||
8 | #replace_string(<iframe title="YouTube video player"): <div class="ftr-yt-vid"><iframe title="YouTube video player" | ||
9 | #replace_string(</iframe>): </iframe> </div> | ||
10 | |||
11 | test_url: http://www.thesiasat.com/showthread.php?19220-Dunya-News-HASB-E-HAAL-16-06-2012-Part-1-5 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thesimpledollar.com.txt b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt new file mode 100644 index 00000000..d5c6c9e0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thesimpledollar.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //h3[@class='post-title']/a[@class='post-title-link'] | ||
2 | body: //div[@class='post-content'] | ||
3 | author: //div[@class='post-meta-under-title']/a | ||
4 | test_url: http://www.thesimpledollar.com/2011/09/13/determining-the-size-of-your-emergency-fund/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt new file mode 100644 index 00000000..e2ed1e63 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thespoiler.co.uk.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | strip: //*[(@id = "content")]/h2 | ||
2 | strip: //*[(@class = "wp-notable-line")] | ||
3 | test_url: http://www.thespoiler.co.uk/index.php/2010/10/21/wayne-rooney-tells-man-utd-its-not-me-its-you \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thespoof.com.txt b/inc/3rdparty/site_config/standard/thespoof.com.txt new file mode 100644 index 00000000..409dc0c9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thespoof.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //h1[contains(@class, 'cTitle')] | ||
2 | body: //div[contains(@class, 'KonaBody') or @id='articleimageright'] | ||
3 | author: //meta[@name='Author']/@content | ||
4 | date: //meta[@name='OriginalPublicationDate']/@content | ||
5 | |||
6 | prune: no | ||
7 | tidy: no | ||
8 | |||
9 | test_url: http://www.thespoof.com/news/spoof.cfm?headline=s8i108389 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thestranger.com.txt b/inc/3rdparty/site_config/standard/thestranger.com.txt new file mode 100644 index 00000000..0f9855c8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thestranger.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | # savage* filtering is for Savage Love, such as: http://www.thestranger.com/seattle/SavageLove?oid=5135029 | ||
2 | |||
3 | #other filtering are plain articles, such as: http://www.thestranger.com/seattle/the-stranger-election-control-board/Content?oid=5142885 | ||
4 | |||
5 | title: //div[@id='savageColumn_head']/h1 | ||
6 | title: //h1[@class="headlineLarge"] | ||
7 | |||
8 | strip: //div[@id='savage_right'] | //div[@id='savageColumn_head'] | //div[@id='savageArticleRight'] | //div[@id='articleRight'] | //div[@class='savAppBanner'] | ||
9 | |||
10 | body: //div[@id='savageColumn'] | ||
11 | body: //div[@id='story_text'] | ||
12 | test_url: http://www.thestranger.com/seattle/SavageLove?oid=5135029 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thestreet.com.txt b/inc/3rdparty/site_config/standard/thestreet.com.txt new file mode 100644 index 00000000..5de75637 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thestreet.com.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | title: //div[@id='storyHdr']/h1 | ||
2 | title: //div[@id='print']//h2 | ||
3 | body: //div[@class="virtualpage"] | ||
4 | body: //div[@id='print']//div[@id='bd'] | ||
5 | author: //meta[@name="AUTHOR"]/@content | ||
6 | author: (//div[@id='print']//div[@id='bd']/h4)[1] | ||
7 | date: //meta[@name="DATE"]/@content | ||
8 | date: //div[@id='print']//div[@id='dte'] | ||
9 | |||
10 | strip_id_or_class: articleFooter | ||
11 | strip_id_or_class: sidebar | ||
12 | strip_id_or_class: ie6PrintSubhead | ||
13 | strip_id_or_class: subHdr | ||
14 | |||
15 | |||
16 | replace_string(<P/>): </p><p> | ||
17 | |||
18 | prune: no | ||
19 | |||
20 | #TODO: redirects back - perhaps needs referer to work | ||
21 | single_page_link: //div[@id='storyDetail']//a[contains(@href, '/print/')] | ||
22 | |||
23 | test_url: http://www.thestreet.com/story/11386556/1/which-of-these-10-dividend-stocks-is-worth-the-risk.html | ||
24 | # multi page | ||
25 | test_url: http://www.thestreet.com/story/11387090/1/7-ubs-stock-picks-for-2012.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt b/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt new file mode 100644 index 00000000..6b3277eb --- /dev/null +++ b/inc/3rdparty/site_config/standard/thethaovanhoa.vn.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | strip:/html/body/form/div[2]/div[3]/div/div/div/div/div/div/div/div/div/div[2]/div[3]/div[2]/div/p[2] | ||
2 | test_url: http://thethaovanhoa.vn/151N20110519085606745T129/levante-quyet-giu-caicedo.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theverge.com.txt b/inc/3rdparty/site_config/standard/theverge.com.txt new file mode 100644 index 00000000..11c5c153 --- /dev/null +++ b/inc/3rdparty/site_config/standard/theverge.com.txt | |||
@@ -0,0 +1,31 @@ | |||
1 | title: //h1[contains(@class, "headline")] | ||
2 | |||
3 | author: //p[contains(@class, "byline")]/a[contains(@class, "author")] | ||
4 | |||
5 | date: substring-after(normalize-space(//p[contains(@class, "byline")]/span[contains(@class, "publish-date")]), "on ") | ||
6 | |||
7 | body: //article[contains(@class, 'feature-entry')] | ||
8 | body: //article | ||
9 | prune: no | ||
10 | tidy: no | ||
11 | |||
12 | strip: //article/header | ||
13 | strip: //*[@id='sticky-menu'] | ||
14 | strip: //aside | ||
15 | strip: //nav | ||
16 | |||
17 | strip_id_or_class: gallery | ||
18 | strip_id_or_class: article-meta | ||
19 | strip_id_or_class: story-navigation | ||
20 | strip_id_or_class: slegend | ||
21 | strip_id_or_class: related-product-meta | ||
22 | strip_id_or_class: comments | ||
23 | strip_id_or_class: ui-jump-list | ||
24 | strip_id_or_class: pullquote | ||
25 | |||
26 | strip: //q | ||
27 | |||
28 | strip: //a[contains(@class, 'entry-section-title')] | ||
29 | |||
30 | test_url: http://www.theverge.com/2012/2/29/2821763/lytro-review | ||
31 | test_url: http://www.theverge.com/2011/11/3/2534861/nokia-lumia-800-review \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/theweek.com.txt b/inc/3rdparty/site_config/standard/theweek.com.txt new file mode 100644 index 00000000..27281ceb --- /dev/null +++ b/inc/3rdparty/site_config/standard/theweek.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | body: //div[@class="briefingEntry"] | ||
2 | prune: no | ||
3 | |||
4 | test_url: http://theweek.com/article/index/215763/insider-trading-on-capitol-hill \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thinkprogress.org.txt b/inc/3rdparty/site_config/standard/thinkprogress.org.txt new file mode 100644 index 00000000..8934b68e --- /dev/null +++ b/inc/3rdparty/site_config/standard/thinkprogress.org.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | author: //p[@class="byline"]/a | ||
2 | body: //div[@class="post"] | ||
3 | |||
4 | test_url: http://thinkprogress.org/special/2011/11/12/367040/harvard-law-professor-criticizes-homeland-security-feel-of-overreaction-to-occupy-harvard/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thisdaylive.com.txt b/inc/3rdparty/site_config/standard/thisdaylive.com.txt new file mode 100644 index 00000000..958d4b27 --- /dev/null +++ b/inc/3rdparty/site_config/standard/thisdaylive.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //div[@class='main-content-panel']/div[@class='img'] | //div[@id='page_content_Content9_oModuleContent_2_div_Body'] | ||
2 | test_url: http://www.thisdaylive.com/articles/australia-pm-talks-human-rights-with-chinas-wen/90394/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/thisismynext.com.txt b/inc/3rdparty/site_config/standard/thisismynext.com.txt new file mode 100644 index 00000000..6850b4be --- /dev/null +++ b/inc/3rdparty/site_config/standard/thisismynext.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | author: //div[@class='meta clearfix']/a | ||
2 | body: //div[@class='post'] | ||
3 | |||
4 | strip: //div[@class='metaCat'] | ||
5 | strip: //div[@class='post']/h1 | ||
6 | strip: //div[@class='post']/div[@class='meta clearfix'] | ||
7 | strip: //div[@class='post']/div[@class='social-bar clearfix'] | ||
8 | test_url: http://thisismynext.com/2011/10/18/galaxy-nexus-android-ice-cream-sandwich-pictures-video-hands-on/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tidbits.com.txt b/inc/3rdparty/site_config/standard/tidbits.com.txt new file mode 100644 index 00000000..8bcf2ec1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tidbits.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | author: //span[@class='fn'] | ||
2 | date: substring-before(substring-after(//*[@id='center_ajax_sub']/div/div[3],'|'),'|') | ||
3 | test_url: http://tidbits.com/article/12651 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/time.com.txt b/inc/3rdparty/site_config/standard/time.com.txt new file mode 100644 index 00000000..fd3fe08c --- /dev/null +++ b/inc/3rdparty/site_config/standard/time.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | # 2011-10-25 - carlo@... - Initial setup. | ||
2 | |||
3 | single_page_link: //li[@class='print']/a/@href | ||
4 | |||
5 | title: //h1 | ||
6 | author: //meta[@name="byline"]/@content | ||
7 | date: //meta[@name="date"]/@content | ||
8 | |||
9 | strip: //span[@class="see"] | ||
10 | strip: //div[@class="byline"] | ||
11 | strip: //div[@id="date2"] | ||
12 | strip: //h1 | ||
13 | |||
14 | test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt new file mode 100644 index 00000000..17297732 --- /dev/null +++ b/inc/3rdparty/site_config/standard/timeshighereducation.co.uk.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@class="storytext"] | ||
3 | strip: //div[@id="thelogin"] | ||
4 | strip: //*[@class="hide"] | ||
5 | strip: //div[@id="anchored"] | ||
6 | test_url: http://www.timeshighereducation.co.uk/story.asp?sectioncode=26&storycode=416124&c=1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tipb.com.txt b/inc/3rdparty/site_config/standard/tipb.com.txt new file mode 100644 index 00000000..9533eb0f --- /dev/null +++ b/inc/3rdparty/site_config/standard/tipb.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | body: //div[@id='content'] | ||
2 | |||
3 | strip_id_or_class: featured-box | ||
4 | strip_id_or_class: postmeta | ||
5 | strip_id_or_class: respond | ||
6 | |||
7 | author: //a[contains(@href, '/author/') and contains(@title, 'Posts by')] | ||
8 | date: substring-before(//a[contains(@href, '/author/') and contains(@title, 'Posts by')]/.., ' by ') | ||
9 | test_url: http://www.tipb.com/2011/10/17/iphone-4s-review/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tnr.com.txt b/inc/3rdparty/site_config/standard/tnr.com.txt new file mode 100644 index 00000000..65a1899f --- /dev/null +++ b/inc/3rdparty/site_config/standard/tnr.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | title: //div[contains(@class, 'article_detail')]/div[@class='entry_header']/h1 | ||
2 | title: //div[contains(@class, 'article_detail')]//h1 | ||
3 | title: //h1 | ||
4 | |||
5 | body: //div[contains(@class, 'article_detail')] | ||
6 | |||
7 | author: //div[@class='article_detail']/div[@class='entry_header']/li/div[@class='author']//h3 | ||
8 | author: div[@class='author']//h3 | ||
9 | strip: //div[contains(@class, 'field-field-book-cover')] | ||
10 | |||
11 | date: translate(//*[@class='post_date' and contains(., ' 20')], '|', '') | ||
12 | |||
13 | prune: no | ||
14 | |||
15 | single_page_link: //a[@class='print-page'] | ||
16 | |||
17 | test_url: http://www.tnr.com/blog/jonathan-chait/92991/did-obama-get-rolled \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tomdispatch.com.txt b/inc/3rdparty/site_config/standard/tomdispatch.com.txt new file mode 100644 index 00000000..d8548c78 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tomdispatch.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //div[@id='maincontent']//div[@class='title'] | ||
2 | body: //div[@id='maincontent']//div[@class='byline'] | //div[@id='maincontent']//div[@class='meat'] | ||
3 | |||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.tomdispatch.com/post/175436/tomgram:_noam_chomsky%2C_the_imperial_mentality_and_9_11/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tomshardware.com.txt b/inc/3rdparty/site_config/standard/tomshardware.com.txt new file mode 100644 index 00000000..2bba6de8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tomshardware.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | tidy: no | ||
2 | title: //title | ||
3 | author: //a[@itemprop = 'author'] | ||
4 | date: //time[@itemprop = 'datePublished'] | ||
5 | body: //div[@id = 'intelliTXT'] | ||
6 | |||
7 | next_page_link: //li[@class="pagin next"]/a | ||
8 | test_url: http://www.tomshardware.com/reviews/gaming-graphics-card-review,3107.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tomshardware.de.txt b/inc/3rdparty/site_config/standard/tomshardware.de.txt new file mode 100644 index 00000000..e910003c --- /dev/null +++ b/inc/3rdparty/site_config/standard/tomshardware.de.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | body://div[@id="news-content"]/div[@id="intelliTXT"][1] | ||
2 | |||
3 | author://div[@id="header-news-infos"]/a[1] | ||
4 | |||
5 | date: //div[@id="header-news-infos"]/span[1] | ||
6 | |||
7 | title://h1[@id="header-news-title" and @class="hardwareTitle"][1] | ||
8 | |||
9 | strip://div[@id="news-content"]/div[@id="intelliTXT"]/table | ||
10 | |||
11 | footnotes: no | ||
12 | test_url: http://www.tomshardware.de/DDR4-DDR3-ISSCC-Samsung-Hynix,news-247133.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/toolsandtoys.net.txt b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt new file mode 100644 index 00000000..dbe60b15 --- /dev/null +++ b/inc/3rdparty/site_config/standard/toolsandtoys.net.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@class='post'] | ||
2 | |||
3 | strip: //div[@class='social'] | ||
4 | strip: //span[@class='next'] | ||
5 | strip: //span[@class='previous'] | ||
6 | test_url: http://toolsandtoys.net/noble-tonic-02/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/trailer.web-view.net.txt b/inc/3rdparty/site_config/standard/trailer.web-view.net.txt new file mode 100644 index 00000000..e7a9c82d --- /dev/null +++ b/inc/3rdparty/site_config/standard/trailer.web-view.net.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | title: concat(substring-before(//title,':'),': ',//div[@class='Date2']) | ||
2 | test_url: http://trailer.web-view.net/Show/0XC4EFE5D648B716BA2E134BC7CE61B9CC001E04F11E9434438186735DBD637488.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/traningslara.se.txt b/inc/3rdparty/site_config/standard/traningslara.se.txt new file mode 100644 index 00000000..96e491fa --- /dev/null +++ b/inc/3rdparty/site_config/standard/traningslara.se.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //div[@class="Post-body"]//span[@class="PostHeader"] | ||
2 | author: //div[@class="PostHeaderIcons metadata"]/a[@title="Author"] | ||
3 | date: substring-before(//div[@class="PostHeaderIcons metadata"], '|') | ||
4 | body: //div[@class="Post-body"] | ||
5 | strip_id_or_class: print1 | ||
6 | strip_id_or_class: metadata | ||
7 | strip_id_or_class: authorbox | ||
8 | test_url: http://traningslara.se/skoinlagg-och-skador-finns-det-nagot-samband/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/triblive.com.txt b/inc/3rdparty/site_config/standard/triblive.com.txt new file mode 100644 index 00000000..82797db9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/triblive.com.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //title | ||
2 | author: //span/a | ||
3 | date: substring-after(//small,'Published:') | ||
4 | |||
5 | strip: //h1[@class='vert_class'] | ||
6 | strip: //h1[@class='headline'] | ||
7 | strip: //img[contains(@src,'logo_triblive.gif')] | ||
8 | |||
9 | #strip: //h6 | ||
10 | #strip_img_src: logo_triblive.gif | ||
11 | |||
12 | single_page_link: //a[@class='stprint'] | ||
13 | test_url: http://triblive.com/sports/2819913-85/lemieux-deal-penguins-burkle-nhl-owners-team-mario-bettman-case \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/truthdig.com.txt b/inc/3rdparty/site_config/standard/truthdig.com.txt new file mode 100644 index 00000000..e7c1a4bc --- /dev/null +++ b/inc/3rdparty/site_config/standard/truthdig.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //div[@class='printbody']/h1 | ||
2 | body: //div[@class='printbody'] | ||
3 | prune: no | ||
4 | |||
5 | strip: //div[@class='printbody']/a[@href='http://www.truthdig.com/'] | ||
6 | strip: //table[@class='footer'] | ||
7 | |||
8 | single_page_link: //div[@class='article_tools']//a[contains(@href, '/print/')] | ||
9 | |||
10 | test_url: http://www.truthdig.com/report/item/the_election_march_of_the_trolls_20110829/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tthfanfic.org.txt b/inc/3rdparty/site_config/standard/tthfanfic.org.txt new file mode 100644 index 00000000..0dab5b0f --- /dev/null +++ b/inc/3rdparty/site_config/standard/tthfanfic.org.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //h2 | ||
2 | author: //a[starts-with(@href, '/AuthorStories')] | ||
3 | body: //div[@id='storyinnerbody'] | ||
4 | test_url: http://www.tthfanfic.org/Story-6512/Kudra+Journeys.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tthor.com.txt b/inc/3rdparty/site_config/standard/tthor.com.txt new file mode 100644 index 00000000..902fcd13 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tthor.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | prune: no | ||
2 | test_url: http://www.tthor.com/06/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tuaw.com.txt b/inc/3rdparty/site_config/standard/tuaw.com.txt new file mode 100644 index 00000000..b86f8ccb --- /dev/null +++ b/inc/3rdparty/site_config/standard/tuaw.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1[@class='posttitle'] | ||
2 | author: //span[@class='author']/a | ||
3 | date: //span[@class='timestamp'] | ||
4 | body: //div[@class='body'] | ||
5 | |||
6 | test_url: http://www.tuaw.com/2011/10/19/apple-posts-fans-memories-of-steve-jobs/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tuckreview.com.txt b/inc/3rdparty/site_config/standard/tuckreview.com.txt new file mode 100644 index 00000000..a3946cbc --- /dev/null +++ b/inc/3rdparty/site_config/standard/tuckreview.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1[@class='post-title'] | ||
2 | author: //div[@class='display-name'] | ||
3 | date: //div[@class='date'] | ||
4 | body: //div[@class='body'] | ||
5 | footnotes: no | ||
6 | test_url: http://tuckreview.com/2012/8/14/migrating-to-v6 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/tvtropes.org.txt b/inc/3rdparty/site_config/standard/tvtropes.org.txt new file mode 100644 index 00000000..08dbba59 --- /dev/null +++ b/inc/3rdparty/site_config/standard/tvtropes.org.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | # Google Custom Search | ||
2 | strip_id_or_class: google_branding_style | ||
3 | |||
4 | # Avoid double title | ||
5 | strip_id_or_class: pagetitle | ||
6 | |||
7 | # external links are labelled | ||
8 | strip_image_src: http://static.mediatropes.info/pmwiki/pub/external_link.gif | ||
9 | |||
10 | title: //div[@class="pagetitle"] | ||
11 | body: //div[@id="wikitext"] | ||
12 | |||
13 | # don't get clever. | ||
14 | strip_comments: no | ||
15 | prune: no | ||
16 | |||
17 | # navigation in footer lives inside the wikitext div, annoyingly. | ||
18 | strip_id_or_class: pathholder | ||
19 | |||
20 | test_url: http://tvtropes.org/pmwiki/pmwiki.php/Main/WithinParameters \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/twitter.com.txt b/inc/3rdparty/site_config/standard/twitter.com.txt new file mode 100644 index 00000000..12ab1546 --- /dev/null +++ b/inc/3rdparty/site_config/standard/twitter.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //title | ||
2 | body: (//p[contains(@class, 'js-tweet-text')])[1] | ||
3 | author: (//strong[contains(@class, 'fullname')])[1] | ||
4 | date: //span[contains(@class, 'js-short-timestamp')]/@data-time | ||
5 | |||
6 | prune: no | ||
7 | tidy: no | ||
8 | |||
9 | test_url: https://twitter.com/medialens/status/216883678582804480 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/uefa.com.txt b/inc/3rdparty/site_config/standard/uefa.com.txt new file mode 100644 index 00000000..088d6586 --- /dev/null +++ b/inc/3rdparty/site_config/standard/uefa.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@class='d3cmsCBody']//div[@class='pubText pubDate' or @class='newsComment' or contains(@class, 'newsPhoto') or @class='newsText'] | ||
2 | strip: //div[contains(@class, 'mpindex')] | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://www.uefa.com/uefaeuropaleague/news/newsid=1617320.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt new file mode 100644 index 00000000..29e19565 --- /dev/null +++ b/inc/3rdparty/site_config/standard/uk.xbox360.ign.com.txt | |||
@@ -0,0 +1,23 @@ | |||
1 | # applies to uk.ds.ign.com, uk.wii.ign.com etc. | ||
2 | # possibly to non-UK versions, but I can’t test that | ||
3 | |||
4 | title: //h1[@class="headline"] | ||
5 | author: //div[@class="hdr-sub byline"]/a | ||
6 | date: //h2[@class="publish-date"]/span | ||
7 | body: //div[@id="main-article-content"] | ||
8 | |||
9 | strip: //ul[@class="lnks-readmore"] | ||
10 | |||
11 | strip: //div[@class="inlineImageCaption"] | ||
12 | # can’t make the images appear, so remove the captions | ||
13 | |||
14 | strip: //div[@style="width:468px"] | ||
15 | # video caption links | ||
16 | |||
17 | convert_double_br_tags: yes | ||
18 | |||
19 | strip_comments: no | ||
20 | # otherwise the ‘Closing Comments’ are removed | ||
21 | |||
22 | # Ratings box could do with some rearranging, but it’s tricky | ||
23 | test_url: http://uk.xbox360.ign.com/articles/121/1210717p1.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/uni-watch.com.txt b/inc/3rdparty/site_config/standard/uni-watch.com.txt new file mode 100644 index 00000000..cbe87d19 --- /dev/null +++ b/inc/3rdparty/site_config/standard/uni-watch.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on') | ||
2 | date: substring-after(//div[@class='post-byline'], ', on') | ||
3 | |||
4 | # for some reason, the following is producing a "no text [48]" error | ||
5 | #title: //div[@class='post-headline'] | ||
6 | |||
7 | # for some reason, the following doesn't appear to isolate just the body copy | ||
8 | body: //div[@class='post-bodycopy'] | ||
9 | |||
10 | # we solve the above issue by stripping out everything else we don't want | ||
11 | # these can probably all be removed if the body: command above worked | ||
12 | strip_id_or_class: reply | ||
13 | strip_id_or_class: left | ||
14 | strip_id_or_class: post-headline | ||
15 | strip_id_or_class: post-byline | ||
16 | strip_id_or_class: footer | ||
17 | test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/urbandictionary.com.txt b/inc/3rdparty/site_config/standard/urbandictionary.com.txt new file mode 100644 index 00000000..86061f77 --- /dev/null +++ b/inc/3rdparty/site_config/standard/urbandictionary.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //title | ||
2 | body: //td[@id='content'] | ||
3 | test_url: http://www.urbandictionary.com/define.php?term=Grown-Ass \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/usccb.org.txt b/inc/3rdparty/site_config/standard/usccb.org.txt new file mode 100644 index 00000000..eb10a48f --- /dev/null +++ b/inc/3rdparty/site_config/standard/usccb.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@id='CS_Element_maincontent'] | ||
2 | |||
3 | tidy: no | ||
4 | prune: no | ||
5 | |||
6 | test_url: http://www.usccb.org/bible/readings/072412.cfm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/useit.com.txt b/inc/3rdparty/site_config/standard/useit.com.txt new file mode 100644 index 00000000..f6be84c4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/useit.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1 | ||
2 | |||
3 | date: substring-after(//p[@class='overline']/strong, ',') | ||
4 | body: //div[@class="maintext"] | ||
5 | strip: //p[@class='overline'] | ||
6 | strip: //h1 | ||
7 | tidy: no | ||
8 | test_url: http://www.useit.com/alertbox/mobile-startup-screen.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/ux.artu.tv.txt b/inc/3rdparty/site_config/standard/ux.artu.tv.txt new file mode 100644 index 00000000..a893bda0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/ux.artu.tv.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | author: ("Arturo Toledo") | ||
2 | title: //div[@class="post"]/h2 | ||
3 | body: //div[@class="entry"] | ||
4 | |||
5 | # Remove Twitter button | ||
6 | strip: //div[@class="entry"]/p[2]/a/img | ||
7 | test_url: http://ux.artu.tv/?p=192 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt b/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt new file mode 100644 index 00000000..3661b06a --- /dev/null +++ b/inc/3rdparty/site_config/standard/uzivatelsketestovani.cz.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | title:h1 | ||
2 | test_url: http://www.uzivatelsketestovani.cz/wiki/doku.php/skoleni-axure-rp \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vanityfair.com.txt b/inc/3rdparty/site_config/standard/vanityfair.com.txt new file mode 100644 index 00000000..bfc47d1f --- /dev/null +++ b/inc/3rdparty/site_config/standard/vanityfair.com.txt | |||
@@ -0,0 +1,30 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | author: //div[contains(@class, 'byline')]//span[contains(@class, 'name')] | ||
3 | date: //div[contains(@class, 'cn_date_time')] | ||
4 | body: //div[contains(@class, 'pageContainers')] | ||
5 | body: //article[@id='items-container'] | ||
6 | #body: //h2[@class='sub-header'] | //div[contains(@class, 'contributor-type') or @class='display-date' or @class='content-container'] | ||
7 | |||
8 | strip_id_or_class: bc | ||
9 | strip_id_or_class: utilities | ||
10 | strip_id_or_class: list-supporting | ||
11 | strip_id_or_class: yrail | ||
12 | strip_id_or_class: urail | ||
13 | |||
14 | prune: no | ||
15 | #tidy: no | ||
16 | |||
17 | strip_id_or_class: super-rubric-section | ||
18 | strip_id_or_class: cn_date_time | ||
19 | strip_id_or_class: cn_contributors | ||
20 | strip_id_or_class: cn_pagination_controls | ||
21 | strip_id_or_class: cn_features_container | ||
22 | strip_id_or_class: global-footer | ||
23 | strip_id_or_class: cn_ecom_placement | ||
24 | strip: //li[@class='blogNavPrev'] | ||
25 | |||
26 | single_page_link: //a[@title='Print this page'] | ||
27 | |||
28 | test_url: http://www.vanityfair.com/politics/features/2011/05/egypt-revolutionaries-201105 | ||
29 | test_url: http://www.vanityfair.com/politics/features/2008/08/hitchens200808 | ||
30 | test_url: http://www.vanityfair.com/style/2012/01/prisoners-of-style-201201 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/varingen.no.txt b/inc/3rdparty/site_config/standard/varingen.no.txt new file mode 100644 index 00000000..6b5e0ae0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/varingen.no.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@class='ArticleHeadlineDetailedView'] | ||
2 | date: //span[@class='ArticlePublicationDateTimeDetailedView'] | ||
3 | author://span[@class='ArticleBylineDetailedView'] | ||
4 | body: //div[@class='ArticleTextDetailedView'] | ||
5 | test_url: http://www.varingen.no/Nyheter/tabid/392/Default.aspx?ModuleId=56651&articleView=true \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/varsity.co.uk.txt b/inc/3rdparty/site_config/standard/varsity.co.uk.txt new file mode 100644 index 00000000..b1db4c35 --- /dev/null +++ b/inc/3rdparty/site_config/standard/varsity.co.uk.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | # FB comments are inside an h2. Weird. Without this, the line 'Comments' is preserved by the text parser | ||
2 | |||
3 | strip: //h2 | ||
4 | test_url: http://www.varsity.co.uk/reviews/2662 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vedomosti.ru.txt b/inc/3rdparty/site_config/standard/vedomosti.ru.txt new file mode 100644 index 00000000..ba999171 --- /dev/null +++ b/inc/3rdparty/site_config/standard/vedomosti.ru.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //td[@class='second_content']/h1 | ||
2 | body: //td[@class='second_content']/div[@class='article_text'] | ||
3 | test_url: http://www.vedomosti.ru/newspaper/article/259377/rasprodazha_mailru \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/veggbilder.no.txt b/inc/3rdparty/site_config/standard/veggbilder.no.txt new file mode 100644 index 00000000..14144c0f --- /dev/null +++ b/inc/3rdparty/site_config/standard/veggbilder.no.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | author: //div[@class="blogginnleggForfatter"] | ||
2 | date: concat(//div[@class='blogginnleggDatoDag'],' ',//div[@class='blogginnleggDatoMnd']) | ||
3 | strip: //div[contains(@id,"bloggDelingslenker")] | ||
4 | strip: //div[contains(@id,"bloggDelingslenker")] | ||
5 | test_url: http://veggbilder.no/blogginnlegg/fristelser \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vemedio.com.txt b/inc/3rdparty/site_config/standard/vemedio.com.txt new file mode 100644 index 00000000..294ace9c --- /dev/null +++ b/inc/3rdparty/site_config/standard/vemedio.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2 | ||
2 | date: substring-before(//small," • Permalink") | ||
3 | author:string('Martin Hering') | ||
4 | |||
5 | Strip: //p/small | ||
6 | test_url: http://vemedio.com/blog/posts/state-of-support-and-icloud \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/venturebeat.com.txt b/inc/3rdparty/site_config/standard/venturebeat.com.txt new file mode 100644 index 00000000..41bfa8c5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/venturebeat.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | author: //div[@class="author-name"] | ||
3 | date: //span[@class="the-time"] | ||
4 | body: //div[@class="entry-content"] | ||
5 | strip: //div[@class="vb-gallery"] | ||
6 | test_url: http://venturebeat.com/2012/07/17/marissa-mayer-yahoo/#s:mayer-1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/version.php b/inc/3rdparty/site_config/standard/version.php index e61807ed..34a87357 100644 --- a/inc/3rdparty/site_config/standard/version.php +++ b/inc/3rdparty/site_config/standard/version.php | |||
@@ -1,2 +1 @@ | |||
1 | <?php | <?php return 4; ?> \ No newline at end of file | |
2 | return 1; \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/version.txt b/inc/3rdparty/site_config/standard/version.txt new file mode 100644 index 00000000..bf0d87ab --- /dev/null +++ b/inc/3rdparty/site_config/standard/version.txt | |||
@@ -0,0 +1 @@ | |||
4 \ No newline at end of file | |||
diff --git a/inc/3rdparty/site_config/standard/version2.dk.txt b/inc/3rdparty/site_config/standard/version2.dk.txt new file mode 100644 index 00000000..74203cad --- /dev/null +++ b/inc/3rdparty/site_config/standard/version2.dk.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //article/header/h1 | ||
2 | |||
3 | author: //article/header/section[@class='byline']/span[contains(@class, 'author')]/a | ||
4 | date: //article/header/section[@class='byline']/span[@class='published']/span | ||
5 | |||
6 | body: //article/section[@class='body'] | ||
7 | |||
8 | convert_double_br_tags: yes | ||
9 | |||
10 | # This is required, because Tidy chokes on the HTML5 tags... | ||
11 | tidy: no | ||
12 | test_url: http://www.version2.dk/artikel/17069-amerikansk-hit-investor-er-vild-med-danske-net-ivaerksaettere \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/verybestbaking.com.txt b/inc/3rdparty/site_config/standard/verybestbaking.com.txt new file mode 100644 index 00000000..4cdd0c0f --- /dev/null +++ b/inc/3rdparty/site_config/standard/verybestbaking.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | title: //title | ||
2 | body: //div[contains(@class, 'printRecipe')] | ||
3 | strip: //div[@class='recipeHeader'] | ||
4 | prune: no | ||
5 | tidy: no | ||
6 | single_page_link: //ul[@class='printOptions']//a[contains(@href, 'detail.aspx?p=1&showphoto=true')] | ||
7 | test_url: http://www.verybestbaking.com/recipes/143190/Penne-Pasta-with-Sun-dried-Tomato-Cream-Sauce/detail.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vg.no.txt b/inc/3rdparty/site_config/standard/vg.no.txt new file mode 100644 index 00000000..fceeea09 --- /dev/null +++ b/inc/3rdparty/site_config/standard/vg.no.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@id='artikkelspalte'] | ||
2 | strip_id_or_class: 'breadcrumb' | ||
3 | test_url: http://www.vg.no/spill/artikkel.php?artid=10003628 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/video.forbes.com.txt b/inc/3rdparty/site_config/standard/video.forbes.com.txt new file mode 100644 index 00000000..1dca55a3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/video.forbes.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: concat("Video: ", //div[@id='currentVideoTitleDivId']) | ||
2 | body: //div[@id='currentVideoDescriptionId'] | ||
3 | author: //meta[@name='author']/@content | ||
4 | |||
5 | replace_string(<div id="currentVideoDescriptionId" style="display): <div id="currentVideoDescriptionId" style="displayitplease | ||
6 | |||
7 | replace_string(<div id="currentVideoTitleDivId" style="display): <div id="currentVideoTitleDivId" style="displayitplease | ||
8 | |||
9 | test_url: http://video.forbes.com/fvn/business/wells-fargo-inside-the-bank-that-works \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/videogum.com.txt b/inc/3rdparty/site_config/standard/videogum.com.txt new file mode 100644 index 00000000..a1663813 --- /dev/null +++ b/inc/3rdparty/site_config/standard/videogum.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2[@class='posttitle'] | ||
2 | date: substring-before(substring-after(//span[@class='postdate'], 'on '), ' by') | ||
3 | date: //span[@class='postdate'] | ||
4 | author: //span[@class='postdate']/a | ||
5 | body: //div[@class='entry line_top'] | ||
6 | test_url: http://videogum.com/395042/here-are-some-afternoon-links-92/list/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/villagevoice.com.txt b/inc/3rdparty/site_config/standard/villagevoice.com.txt new file mode 100644 index 00000000..df374602 --- /dev/null +++ b/inc/3rdparty/site_config/standard/villagevoice.com.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //h2[@class='headline'] | ||
2 | |||
3 | body: //div[@class='ContentPrint'] | ||
4 | |||
5 | prune: no | ||
6 | |||
7 | single_page_link: //a[contains(@href, '/printVersion/')] | ||
8 | |||
9 | test_url: http://www.villagevoice.com/2010-03-16/news/new-york-s-ten-worst-landlords/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vimeo.com.txt b/inc/3rdparty/site_config/standard/vimeo.com.txt new file mode 100644 index 00000000..d6c6701a --- /dev/null +++ b/inc/3rdparty/site_config/standard/vimeo.com.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | title: //title | ||
2 | body: //iframe | ||
3 | |||
4 | find_string: <html><iframe | ||
5 | replace_string: <iframe id="video" | ||
6 | |||
7 | find_string: ></iframe></html> | ||
8 | replace_string: ></iframe> | ||
9 | |||
10 | replace_string("): " | ||
11 | |||
12 | single_page_link: //link[@type='text/xml+oembed'] | ||
13 | |||
14 | prune: no | ||
15 | tidy: no | ||
16 | |||
17 | test_url: http://vimeo.com/35941909 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/visir.is.txt b/inc/3rdparty/site_config/standard/visir.is.txt new file mode 100644 index 00000000..0f03198e --- /dev/null +++ b/inc/3rdparty/site_config/standard/visir.is.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | # Author's name, when present, has 'skrifar:' ('writes:') appended to it. | ||
2 | # In case of multiple authors, this would be 'skrifa:', hence only 7 characters | ||
3 | # are stripped off. | ||
4 | author: substring(//div[@class='paragraph']/div[@class='meta'], 0, string-length(//div[@class='paragraph']/div[@class='meta']) - 7) | ||
5 | |||
6 | date: //span[@class='date'] | ||
7 | title: //h1 | ||
8 | body: //div[@class='paragraph'] | ||
9 | |||
10 | # Strip out author string when present | ||
11 | strip: //div[@class='paragraph']/div[@class='meta'] | ||
12 | |||
13 | convert_double_br_tags: yes | ||
14 | test_url: http://visir.is/esb,-ipa,-bhm-og-bsrb/article/2012701319997 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vitispr.com.txt b/inc/3rdparty/site_config/standard/vitispr.com.txt new file mode 100644 index 00000000..8b2a300e --- /dev/null +++ b/inc/3rdparty/site_config/standard/vitispr.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | strip: //*[(@id = "ja-search")] | ||
2 | body: //*[(@id = "ja-mainbody")] | ||
3 | body: //*[(@id = "content-mass-bottom")] | ||
4 | strip://h3[contains(span,'Related Posts')] | ||
5 | strip://img | ||
6 | test_url: http://vitispr.com/blog/coventry-is-a-technology-hotspot \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vivirmexico.com.txt b/inc/3rdparty/site_config/standard/vivirmexico.com.txt new file mode 100644 index 00000000..e6a72700 --- /dev/null +++ b/inc/3rdparty/site_config/standard/vivirmexico.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | body: //*[(@class = "historia")] | ||
2 | test_url: http://vivirmexico.com/2011/09/en-veracruz-arrojan-35-cuerpos-a-plena-luz-del-dia-esta-si-es-una-alarma-social \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vnexpress.net.txt b/inc/3rdparty/site_config/standard/vnexpress.net.txt new file mode 100644 index 00000000..23c928bf --- /dev/null +++ b/inc/3rdparty/site_config/standard/vnexpress.net.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | body: //div[@cpms_content]//h2[@class='Lead'] | //div[@cpms_content]//p[@class='Normal'] | //div[@cpms_content]//table | ||
2 | strip://div[@class="box-item"] | ||
3 | strip://div[@id="ARTICLE_BANNER"] | ||
4 | strip://a | ||
5 | strip://div[@class="tag-parent"] | ||
6 | strip://div[@class="email-print txtr"] | ||
7 | |||
8 | test_url: http://vnexpress.net/gl/xa-hoi/2011/04/tim-thay-nan-nhan-cuoi-cung-vu-sap-mo-da-o-len-co/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt new file mode 100644 index 00000000..6bd0e855 --- /dev/null +++ b/inc/3rdparty/site_config/standard/voices.washingtonpost.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@class='entrytext'] | ||
3 | test_url: http://voices.washingtonpost.com/ezra-klein/2010/10/why_isnt_monetary_policy_discr.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/vworker.com.txt b/inc/3rdparty/site_config/standard/vworker.com.txt new file mode 100644 index 00000000..a39c9f4e --- /dev/null +++ b/inc/3rdparty/site_config/standard/vworker.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[contains(@class, 'KonaBody')] | ||
2 | |||
3 | test_url: http://www.vworker.com/RentACoder/misc/BidRequests/ShowBidRequest.asp?lngBidRequestId=1634186 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/waffle.wootest.net.txt b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt new file mode 100644 index 00000000..afcba0f3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/waffle.wootest.net.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //h2[@class="title"] | ||
2 | body: //div[@class="post"] | ||
3 | |||
4 | test_url: http://waffle.wootest.net/2011/06/22/on-reading-news/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/walrusmagazine.com.txt b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt new file mode 100644 index 00000000..3ab22172 --- /dev/null +++ b/inc/3rdparty/site_config/standard/walrusmagazine.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | title: //div[@id='pr']/h3 | ||
2 | author: //div[@class='dateline']//a[contains(@href, '/author/')] | ||
3 | |||
4 | # print page | ||
5 | body: //div[@id='prbody'] | ||
6 | # standard page | ||
7 | body: //div[@id='pgbody'] | ||
8 | |||
9 | # for multi-page articles | ||
10 | single_page_link: //div[@class='tipjar']//a[contains(@href, '/printerFriendly.php?')] | ||
11 | |||
12 | prune: no | ||
13 | |||
14 | test_url: http://www.walrusmagazine.com/articles/2011.12-memoir-kidnapped \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/warnerbros.fr.txt b/inc/3rdparty/site_config/standard/warnerbros.fr.txt new file mode 100644 index 00000000..a41a3511 --- /dev/null +++ b/inc/3rdparty/site_config/standard/warnerbros.fr.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h3 | ||
2 | body: //div[@class="content_wysiwyg"] | ||
3 | test_url: http://www.warnerbros.fr/game-of-thrones-un-junket-vu-de-l-interieur-268.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt new file mode 100644 index 00000000..edf16422 --- /dev/null +++ b/inc/3rdparty/site_config/standard/washingtonmonthly.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title://a[@class = 'headline-article'] | ||
2 | |||
3 | author: substring-after(//div[@class = 'article']/p[@class = 'author'], 'By ') | ||
4 | date://div[@class = 'article']/span[@class = 'date'] | ||
5 | body://div[@class = 'article'] | ||
6 | single_page_link://a[@class = 'print'] | ||
7 | strip://p[@class = 'author'] | ||
8 | strip://a[@class = 'headline-article'] | ||
9 | strip://span[@class = 'date'] | ||
10 | test_url: http://www.washingtonmonthly.com/magazine/julyaugust_2011/features/the_trinity_sisters030380.php \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/washingtonpost.com.txt b/inc/3rdparty/site_config/standard/washingtonpost.com.txt new file mode 100644 index 00000000..2931ca5f --- /dev/null +++ b/inc/3rdparty/site_config/standard/washingtonpost.com.txt | |||
@@ -0,0 +1,21 @@ | |||
1 | body: //div[@class="article_body"] | ||
2 | author://meta[@name='DC.creator']/@content | ||
3 | title://meta[@name='title']/@content | ||
4 | date://div[contains(@class,'byline')]//span[contains(@class,'published')]/@title | ||
5 | date://meta[@name="DC.date.issued"]/@content | ||
6 | strip://div[@class="relative primary-slot padding-top img-border gallery-container photo-wrapper"] | ||
7 | strip://div[@id="wp-column six end"] | ||
8 | strip://div[contains(@class,'hidden')] | ||
9 | strip://div[@id='article-side-rail'] | ||
10 | strip://div[@class="module component todays-paper-module curved"] | ||
11 | strip://div[@class="module component live-qa curved img-border"] | ||
12 | strip://div[@class="module component newsletter-signup curved"] | ||
13 | strip://div[@class="module featured-stories component curved img-border"] | ||
14 | |||
15 | strip_id_or_class: carousel | ||
16 | strip_id_or_class: toolbar | ||
17 | strip_id_or_class: module | ||
18 | |||
19 | test_url: http://www.washingtonpost.com/world/europe/in-europe-new-fears-of-german-might/2011/10/19/gIQA3baZ7L_story.html?hpid=z1 | ||
20 | test_url: http://www.washingtonpost.com/national/health-science/radical-theory-of-first-americans-places-stone-age-europeans-in-delmarva-20000-years-ago/2012/02/28/gIQA4mriiR_story.html | ||
21 | test_url: http://www.washingtonpost.com/lifestyle/magazine/the-sorry-fate-of-a-tech-pioneer-halsey-minor-and-historic-virginia-estate-carters-grove/2012/05/30/gJQAwdJG4U_story.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/web-libre.org.txt b/inc/3rdparty/site_config/standard/web-libre.org.txt new file mode 100644 index 00000000..dfcd0081 --- /dev/null +++ b/inc/3rdparty/site_config/standard/web-libre.org.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | body: //div[@id='template_article'] | ||
2 | |||
3 | strip_id_or_class: article_more | ||
4 | strip: //hr | ||
5 | |||
6 | test_url: http://www.web-libre.org/dossiers/jacuzzi-gonflable,8493.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt new file mode 100644 index 00000000..9e75a8a8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/weblog.bignerdranch.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title://div[@class="post"]/h2 | ||
2 | author://p[@class="postinfo"]/a | ||
3 | date:substring-before(substring-after(//p[@class="postinfo"],' on '),' under ') | ||
4 | body://div[@class="contenttext"] | ||
5 | test_url: http://weblog.bignerdranch.com/?p=304 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/weblogs.asp.net.txt b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt new file mode 100644 index 00000000..3fabda0b --- /dev/null +++ b/inc/3rdparty/site_config/standard/weblogs.asp.net.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //h2[@class="pageTitle"] | ||
2 | strip: //div[@class="postfoot"] | ||
3 | strip: //h2[@class="pageTitle"] | ||
4 | strip: //h3[@class="pageTitle"] | ||
5 | body: //div[@class="post"] | ||
6 | author: substring-before(substring-after(//div[@class="postfoot"], 'by'), 'Filed') | ||
7 | date: substring-before(substring-after(//div[@class="postfoot"], 'Published'), 'by') | ||
8 | |||
9 | test_url: http://weblogs.asp.net/scottgu/archive/2011/08/31/html-editor-smart-tasks-and-event-handler-generation-asp-net-vnext-series.aspx \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt new file mode 100644 index 00000000..8922b02f --- /dev/null +++ b/inc/3rdparty/site_config/standard/webpaper.nzz.ch.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | tidy: no | ||
2 | dissolve: //div[@id="content"]/div/article/header | ||
3 | body: //div[@id="content"]/div/article | ||
4 | title: //div[@id="content"]/div/article/h1 | ||
5 | date: //div[@id="content"]/div/article/header/div[@id="issueSelectTrigger"] | ||
6 | strip: //div[@id="content"]/div/article/h1 | ||
7 | |||
8 | test_url: http://webpaper.nzz.ch/2012/06/23/front/JJKMS/aphrodite-und-die-kommunisten?guest_pass=24a3ca5b6d%3AJJKMS%3Ad30e1be8628c099669671d4da56cdce4187790ba \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/welt.de.txt b/inc/3rdparty/site_config/standard/welt.de.txt new file mode 100644 index 00000000..6e4f828f --- /dev/null +++ b/inc/3rdparty/site_config/standard/welt.de.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | # set body | ||
2 | tidy: no | ||
3 | body: //div[contains(@class, 'articleContent')] | ||
4 | |||
5 | # remove clutter | ||
6 | strip: //div[@class='advertising'] | ||
7 | strip: //div[@class='themenalarm'] | ||
8 | strip: //div[contains(@class, 'inTextTeaser')] | ||
9 | |||
10 | # remove captions | ||
11 | strip: //span[@class='copyRight'] | ||
12 | |||
13 | # remove photo galleries and extras | ||
14 | strip: //div[contains(@class, 'textGallery')] | ||
15 | strip: //div[contains(@class, 'videoGallery')] | ||
16 | strip: //div[contains(@class, 'imageGallery')] | ||
17 | strip: //div[contains(@class, 'openContent')] | ||
18 | |||
19 | # remove comments | ||
20 | strip: //div[@id = 'writeComment'] | ||
21 | |||
22 | test_url: http://www.welt.de/vermischtes/weltgeschehen/article11050589/27-Bergleute-in-neuseelaendischer-Mine-vermisst.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/westhamtillidie.com.txt b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt new file mode 100644 index 00000000..b9343029 --- /dev/null +++ b/inc/3rdparty/site_config/standard/westhamtillidie.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: substring-before(//title, '«') | ||
2 | |||
3 | body: //div[@class='entry'] | ||
4 | strip: //div[@class='sharing_label'] | ||
5 | strip: //div[@class='snap_nopreview sharing robots-nocontent'] | ||
6 | test_url: http://www.westhamtillidie.com/2012/03/11/twelve-things-we-learned-from-the-doncaster-game/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt b/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt new file mode 100644 index 00000000..a88a02c9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/what-if.xkcd.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | autodetect_next_page: no | ||
2 | test_url: http://what-if.xkcd.com/1/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt new file mode 100644 index 00000000..52c5cf1b --- /dev/null +++ b/inc/3rdparty/site_config/standard/whatever.scalzi.com.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | strip: //div[@class="navigation"] | ||
2 | strip: //div[@id="sidebar"] | ||
3 | strip: //div[@id="post-extra-content"] | ||
4 | strip: //div[@id="footer"] | ||
5 | strip: //div[contains(@class, "sharing")] | ||
6 | |||
7 | test_url: http://whatever.scalzi.com/2011/01/09/quick-giffords-follow-up/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wheelyric.com.txt b/inc/3rdparty/site_config/standard/wheelyric.com.txt new file mode 100644 index 00000000..aa9783cf --- /dev/null +++ b/inc/3rdparty/site_config/standard/wheelyric.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | body://div[contains(@class,'oAndtLyrics')] | ||
2 | strip://div[contains(@class,'info')] | ||
3 | strip://div[contains(@id,'romanization')] | ||
4 | strip://div[contains(@id,'youtube')] | ||
5 | strip://div[contains(@id,'romanizationSelector')] | ||
6 | strip://div[contains(@id,'langSelectWrap')] | ||
7 | strip://div[contains(@id,'requestTranslationWrap')] | ||
8 | strip://div[contains(@id,'viewMore')] | ||
9 | strip://div[contains(@class,'lyricsListInMainContent')] | ||
10 | strip://div[contains(@class,'descIpNoti')] | ||
11 | test_url: http://wheelyric.com/lyrics/121#2 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt new file mode 100644 index 00000000..1f262a0a --- /dev/null +++ b/inc/3rdparty/site_config/standard/wiki.guildwars.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='content'] | ||
3 | strip_id_or_class: editsection | ||
4 | strip_id_or_class: toc | ||
5 | strip: //div[@id='siteNotice'] | ||
6 | strip: //div[@id='content']//table[last()] | ||
7 | prune: no | ||
8 | test_url: http://wiki.guildwars.com/wiki/Monk \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt new file mode 100644 index 00000000..e176907e --- /dev/null +++ b/inc/3rdparty/site_config/standard/wiki.guildwars2.com.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id='content'] | ||
3 | strip_id_or_class: editsection | ||
4 | strip_id_or_class: toc | ||
5 | strip: //div[@id='siteNotice'] | ||
6 | strip: //div[@id='content']//table[last()] | ||
7 | prune: no | ||
8 | test_url: http://wiki.guildwars2.com/wiki/Guardian \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wikitravel.org.txt b/inc/3rdparty/site_config/standard/wikitravel.org.txt new file mode 100644 index 00000000..da5bd0b5 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wikitravel.org.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | # copied from .wikipedia.org.txt | ||
2 | title: //h1[@id='firstHeading' or @class='firstHeading'] | ||
3 | body: //div[@id = 'bodyContent'] | ||
4 | strip_id_or_class: editsection | ||
5 | #strip_id_or_class: toc | ||
6 | strip_id_or_class: vertical-navbox | ||
7 | strip: //table[@id='toc'] | //div[@id='p-toc'] | ||
8 | strip: //div[@id='catlinks' or @id='contentSub'] | ||
9 | strip: //div[@id='jump-to-nav'] | ||
10 | strip: //div[@class='thumbcaption']//div[@class='magnify'] | ||
11 | strip: //table[@class='navbox'] | ||
12 | prune: no | ||
13 | tidy: no | ||
14 | test_url: http://wikitravel.org/wiki/en/index.php?title=Bangkok&printable=yes \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/will-self.com.txt b/inc/3rdparty/site_config/standard/will-self.com.txt new file mode 100644 index 00000000..24467c22 --- /dev/null +++ b/inc/3rdparty/site_config/standard/will-self.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | strip: //div[@class="widget-area"] | ||
2 | title: //*[@class="entry-title"] | ||
3 | date: //time[@class="entry-date"] | ||
4 | test_url: http://will-self.com/2012/02/01/real-meals-dominos-pizza/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/williampfaff.com.txt b/inc/3rdparty/site_config/standard/williampfaff.com.txt new file mode 100644 index 00000000..fb5f92ed --- /dev/null +++ b/inc/3rdparty/site_config/standard/williampfaff.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: substring-after(//span[@class='itemTitle'], ':') | ||
2 | body: //div[@id='content'] | ||
3 | test_url: http://www.williampfaff.com/modules/news/article.php?storyid=491 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/winfuture.de.txt b/inc/3rdparty/site_config/standard/winfuture.de.txt new file mode 100644 index 00000000..bc936370 --- /dev/null +++ b/inc/3rdparty/site_config/standard/winfuture.de.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //h1/span | ||
2 | |||
3 | body: //div[@id="news_content"] | ||
4 | |||
5 | author: //div[@class="bookmarks_btm"]/p[1]/a[1]/text() | ||
6 | |||
7 | date: //span[@class='date'] | ||
8 | |||
9 | # Rubrikenbild entfernen | ||
10 | strip: //div[@id="news_content"]/a[1] | ||
11 | |||
12 | test_url: http://winfuture.de/news,69672.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/winrumors.com.txt b/inc/3rdparty/site_config/standard/winrumors.com.txt new file mode 100644 index 00000000..cedb4390 --- /dev/null +++ b/inc/3rdparty/site_config/standard/winrumors.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h1[@class='page-heading'] | ||
2 | author: //small/strong/a | ||
3 | #their date string is relative, so if you save the page 2 hours after it is posted it may say 'two hours ago, instead of providing a useful date/time' | ||
4 | date: substring-before(substring-after(//small,'on'),'with') | ||
5 | body: //div[@class='entry'] | ||
6 | test_url: http://www.winrumors.com/chinese-windows-phone-launch-still-on-track-for-early-2012/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/winsupersite.com.txt b/inc/3rdparty/site_config/standard/winsupersite.com.txt new file mode 100644 index 00000000..db6a6fc9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/winsupersite.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | date: //*[@class='kicker'] | ||
2 | body: //*[@class='KonaBody'] | ||
3 | test_url: http://www.winsupersite.com/article/paul-thurrotts-wininfo/android-malware-surges-separate-studies-141364 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wired.com.txt b/inc/3rdparty/site_config/standard/wired.com.txt new file mode 100644 index 00000000..69bbf5b7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wired.com.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | title: //meta[@property="og:title"]/@content | ||
2 | title: //h1 | ||
3 | title: //*[@class='posttitle'] | ||
4 | author: //*[@class='entryAuthor']/a[1] | ||
5 | author://*[@class='member-title'] | ||
6 | author://li[@class='author']/a[contains(@href, '/author/')] | ||
7 | date: substring-after(//div[@class='entryAuthor'], '·') | ||
8 | date: substring-before(//*[@class='entryDate'], '|') | ||
9 | body: //div[@class='entry'] | ||
10 | strip: //span[contains(@class, 'nextprev')] | ||
11 | #strip_id_or_class: ngg-galleryoverview | ||
12 | # ngg-galleryoverview is the whole content sometimes, e.g. http://www.wired.com/underwire/2011/12/best-mixtapes-of-2011/?pid=5736&viewall=true | ||
13 | |||
14 | strip: //p[span[contains(@class, 'contentjump')]] | ||
15 | strip: //text()[contains(., 'nextpage')] | ||
16 | |||
17 | prune: no | ||
18 | |||
19 | single_page_link: //a[contains(@href, '/all/1') and contains(@class, 'contentjumpall')] | ||
20 | |||
21 | test_url: http://www.wired.com/cloudline/2011/10/meet-arms-cortex-a15-the-future-of-the-ipad-and-possibly-the-macbook-air/ | ||
22 | test_url: http://www.wired.com/threatlevel/2012/05/ff_counterfeiter/all/1 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wmnf.org.txt b/inc/3rdparty/site_config/standard/wmnf.org.txt new file mode 100644 index 00000000..ffb6b2d1 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wmnf.org.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | title: //div[@class="bodyText"]/h1/text() | ||
2 | body: //div[@class="bodyText"] | ||
3 | |||
4 | # author and date are separated by only a newline | ||
5 | # can't figure out how to tokenize that yet | ||
6 | author: //div[@class="bodyText"]/span[@class="info"]/text() | ||
7 | date: //div[@class="bodyText"]/span[@class="info"]/text() | ||
8 | |||
9 | # strip metdata from body text | ||
10 | strip: //div[@class="bodyText"]/h1/text() | ||
11 | strip: //div[@class="bodyText"]/span[@class="info"] | ||
12 | strip: //div[@class="bodyText"]/span[@class="info"] | ||
13 | test_url: http://www.wmnf.org/news_stories/light-rail-advocates-join-forces-to-combat-opposition-in-pinellas \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wmpoweruser.com.txt b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt new file mode 100644 index 00000000..d9011d24 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wmpoweruser.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | date://*[@class="entry-date"] | ||
2 | author://*[@class="author vcard"] | ||
3 | strip://*[@style="position:relative;left:72px;top:2px;"]|//*[@id="authorbox"] | ||
4 | test_url: http://wmpoweruser.com/breaking-nokia-announces-nfc-support-in-lumia-610-windows-phone-device/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/worldpoultry.net.txt b/inc/3rdparty/site_config/standard/worldpoultry.net.txt new file mode 100644 index 00000000..0e42ca5e --- /dev/null +++ b/inc/3rdparty/site_config/standard/worldpoultry.net.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title: //div[@class="content article"]/h1 | ||
2 | date: substring-after(//*[@class='date'], '//') | ||
3 | body: //*[@class='article-content'] | ||
4 | strip: //*[@id='nomodal'] | ||
5 | test_url: http://www.worldpoultry.net/news/kyrgyzstan-restricts-poultry-imports-from-russia-and-kazakhstan-9332.html \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/worldwidewords.org.txt b/inc/3rdparty/site_config/standard/worldwidewords.org.txt new file mode 100644 index 00000000..733d607f --- /dev/null +++ b/inc/3rdparty/site_config/standard/worldwidewords.org.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //p[@id='content'] | ||
2 | |||
3 | body: //div[@class='contentblock'] | ||
4 | test_url: http://www.worldwidewords.org/weirdwords/ww-gro1.htm \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wow.joystiq.com.txt b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt new file mode 100644 index 00000000..759fb81f --- /dev/null +++ b/inc/3rdparty/site_config/standard/wow.joystiq.com.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2[@class="posttitle"] | ||
2 | body: //div[@class="post"] | ||
3 | strip: //h2[@class="posttitle"] | ||
4 | strip: //p[@class="filed-under"] | ||
5 | convert_double_br_tags: yes | ||
6 | test_url: http://wow.joystiq.com/2011/06/20/the-overachiever-guide-to-midsummer-festival-2011-achievements/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt new file mode 100644 index 00000000..0846be2c --- /dev/null +++ b/inc/3rdparty/site_config/standard/www1.folha.uol.com.br.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | body://div[@id='articleNew'] | ||
2 | strip://div[@id='articleBy'] | ||
3 | strip://div[@id='articleDate'] | ||
4 | strip://td[@class='articleGraphicCredit'] | ||
5 | strip://h1 | ||
6 | strip://div[@id='articleEnd'] | ||
7 | strip://p[@class='tagline'] | ||
8 | strip://div[@class='openBox adslibraryArticle'] | ||
9 | strip_id_or_class:ad-180x150-1 | ||
10 | |||
11 | |||
12 | title: //div[@id="articleNew"]/h1 | ||
13 | author: //div[@id="articleBy"]/p/b | ||
14 | date: substring-before(//div[@id="articleDate"], "-") | ||
15 | test_url: http://www1.folha.uol.com.br/mundo/1115805-ex-ditador-argentino-videla-e-condenado-a-50-anos-de-prisao.shtml \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt b/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt new file mode 100644 index 00000000..71306af2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/www3.imperial.ac.uk.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | strip_id_or_class: hidelabel | ||
2 | test_url: http://www3.imperial.ac.uk/newsandeventspggrp/imperialcollege/newssummary/news_14-7-2010-15-53-18 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wyborcza.pl.txt b/inc/3rdparty/site_config/standard/wyborcza.pl.txt new file mode 100644 index 00000000..f99467c2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wyborcza.pl.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title:h1 | ||
2 | author: //*[@class = 'author'] | ||
3 | date: //*[@class = 'date'] | ||
4 | body: //*[@id = 'art'] | ||
5 | next_page_link: //*[@id='Str']/a[contains(text(), 'nastepne')] | ||
6 | strip: //*[@class = 'rel_zdjTOP'] | ||
7 | strip: //*[@id = 'rel'] | ||
8 | strip: //*[@class = 'txt_upl'] | ||
9 | strip: //*[@id='Str'] | ||
10 | strip: //*[@id='source'] | ||
11 | test_url: http://wyborcza.pl/1,123455,11536088,Gdy_peknie_fejs__obryzga_wszystko.html?as=1&startsz=x \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wyctim.com.txt b/inc/3rdparty/site_config/standard/wyctim.com.txt new file mode 100644 index 00000000..d8c8713b --- /dev/null +++ b/inc/3rdparty/site_config/standard/wyctim.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | body: //div[@class='article-body'] | ||
2 | title: //h1 | ||
3 | test_url: http://wyctim.com/icloud-sync-regebbi-rendszereken/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/wz-newsline.de.txt b/inc/3rdparty/site_config/standard/wz-newsline.de.txt new file mode 100644 index 00000000..fbc1d3d2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/wz-newsline.de.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title://h1 | ||
2 | |||
3 | date://p[@class='articleDate'] | ||
4 | body://div[@class='articleBody wzStandardArticle'] | ||
5 | test_url: http://www.wz-newsline.de/home/sport/tennis/federer-zum-vierten-mal-sieger-in-indian-wells-1.938050 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/xoeb.us.txt b/inc/3rdparty/site_config/standard/xoeb.us.txt new file mode 100644 index 00000000..e02960e0 --- /dev/null +++ b/inc/3rdparty/site_config/standard/xoeb.us.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | title: //h1[@class="entry-title"] | ||
2 | author: //span[@class="fn"] | ||
3 | date: //p[@class="meta"] | ||
4 | test_url: http://xoeb.us/blog/2012/03/16/my-mistakes-with-our-first-release/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/yated.com.txt b/inc/3rdparty/site_config/standard/yated.com.txt new file mode 100644 index 00000000..13a3ea64 --- /dev/null +++ b/inc/3rdparty/site_config/standard/yated.com.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | title: //div[@class='pagetitle'] | ||
2 | test_url: http://www.yated.com/content.asp?categoryid=7&contentid=582 \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/yostivanich.com.txt b/inc/3rdparty/site_config/standard/yostivanich.com.txt new file mode 100644 index 00000000..9e24db3c --- /dev/null +++ b/inc/3rdparty/site_config/standard/yostivanich.com.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | title://div[@class='entry-title'] | ||
2 | body://div[@class='entry-content'] | ||
3 | strip_comments:yes | ||
4 | convert_double_br_tags:yes | ||
5 | test_url: http://www.yostivanich.com/2010/07/11/wired-com-with-world-watching-wikileaks-falls-into-disrepair/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/youtube.com.txt b/inc/3rdparty/site_config/standard/youtube.com.txt new file mode 100644 index 00000000..d52b7356 --- /dev/null +++ b/inc/3rdparty/site_config/standard/youtube.com.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | title: //title | ||
2 | body: //iframe | ||
3 | |||
4 | find_string: <html><iframe | ||
5 | replace_string: <iframe id="video" | ||
6 | |||
7 | find_string: ></iframe></html> | ||
8 | replace_string: ></iframe> | ||
9 | |||
10 | single_page_link: //link[@type='text/xml+oembed'] | ||
11 | |||
12 | prune: no | ||
13 | tidy: no | ||
14 | |||
15 | test_url: http://www.youtube.com/watch?v=F6gLH0r3iVU \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zdnet.com.txt b/inc/3rdparty/site_config/standard/zdnet.com.txt new file mode 100644 index 00000000..b244b229 --- /dev/null +++ b/inc/3rdparty/site_config/standard/zdnet.com.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | title: //h1[@class="h s-1"] | ||
2 | author: substring-before(substring-after(//p[@class="meta s-10"], 'By'), '|') | ||
3 | author: substring-after(//div[@class="bio"]//h3, 'About ') | ||
4 | date: substring-after(//p[@class="meta s-10"], '|') | ||
5 | date: substring-after(//p[@class="meta"], '|') | ||
6 | body: //div[@class="content-1 entry space-1 clear"] | ||
7 | body: //div[@class="storyBody"] | ||
8 | |||
9 | test_url: http://www.zdnet.com/blog/microsoft/the-bing-back-end-more-on-cosmos-tiger-and-scope/10920 | ||
10 | test_url: http://www.zdnet.com/researchers-find-web-tracking-up-privacy-down-7000000358/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt new file mode 100644 index 00000000..66a7f1ac --- /dev/null +++ b/inc/3rdparty/site_config/standard/zeit.de.txt | |||
@@ -0,0 +1,44 @@ | |||
1 | # 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions | ||
2 | # 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section) | ||
3 | # 2011-12-09 [carlo@...] Removed "related articles" block | ||
4 | # 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications. | ||
5 | # 2011-08-20 [carlo@...] added author, fixed date | ||
6 | |||
7 | |||
8 | single_page_link: //a[@title='Druckversion'] | ||
9 | tidy: no | ||
10 | |||
11 | title: //title | ||
12 | date: substring-before( //li[@class="date"], " " ) | ||
13 | author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text() | ||
14 | author: substring-after(//li[@class='source first '], 'Quelle: ') | ||
15 | |||
16 | strip_id_or_class: articleheader | ||
17 | strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"] | ||
18 | |||
19 | #Removes author and date from the start | ||
20 | strip: //ul[@class="tools"] | ||
21 | #Removes copyright statement - often disturb as first line of the news | ||
22 | strip: //p[@class="copyright"] | ||
23 | strip: //div[@class="copyright"] | ||
24 | #Removes pagination links at the end | ||
25 | strip: //div[@class="pagination"] | ||
26 | |||
27 | # Fix picture captions | ||
28 | wrap_in(small): //p[@class="caption"]/text() | ||
29 | |||
30 | # Fix sub-headlines | ||
31 | wrap_in(h2): //p/strong | ||
32 | dissolve: //h2/strong | ||
33 | |||
34 | #Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here. | ||
35 | strip_id_or_class:"informatives" | ||
36 | strip_id_or_class:"bottom" | ||
37 | strip_id_or_class:"teasermosaic" | ||
38 | strip_id_or_class:"comments" | ||
39 | strip_id_or_class:"articlefooter af" | ||
40 | strip_id_or_class:"relateds" | ||
41 | strip_id_or_class:"pagination" | ||
42 | |||
43 | footnotes: no | ||
44 | test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zerodistraction.com.txt b/inc/3rdparty/site_config/standard/zerodistraction.com.txt new file mode 100644 index 00000000..d3b60c7d --- /dev/null +++ b/inc/3rdparty/site_config/standard/zerodistraction.com.txt | |||
@@ -0,0 +1,4 @@ | |||
1 | author: //span[@class='author']//a | ||
2 | date: //span[@class='date'] | ||
3 | test_url: http://zerodistraction.com/blog/2012/3/11/retina-ipad-that-means-i-am-going-digital-only-for-comic-boo.html | ||
4 | test_url: http://zerodistraction.com/notes/unreasonably-grumpy \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zerokspot.com.txt b/inc/3rdparty/site_config/standard/zerokspot.com.txt new file mode 100644 index 00000000..ea9132aa --- /dev/null +++ b/inc/3rdparty/site_config/standard/zerokspot.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: //h1 | ||
2 | body: //div[@id="primarycontent"] | ||
3 | test_url: http://zerokspot.com/weblog/2011/06/26/europython2011/ \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/zingtrain.com.txt b/inc/3rdparty/site_config/standard/zingtrain.com.txt new file mode 100644 index 00000000..2a2f58a8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/zingtrain.com.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | title: substring-after(id, 'post')/h2 | ||
2 | body://div[@class = 'entry'] | ||
3 | test_url: http://www.zingtrain.com/category/ontrack/january-2007/ \ No newline at end of file | ||
diff --git a/inc/poche/Database.class.php b/inc/poche/Database.class.php index bf67de2a..afe02a41 100644 --- a/inc/poche/Database.class.php +++ b/inc/poche/Database.class.php | |||
@@ -39,12 +39,79 @@ class Database { | |||
39 | public function isInstalled() { | 39 | public function isInstalled() { |
40 | $sql = "SELECT username FROM users"; | 40 | $sql = "SELECT username FROM users"; |
41 | $query = $this->executeQuery($sql, array()); | 41 | $query = $this->executeQuery($sql, array()); |
42 | if ($query == false) { | ||
43 | die(STORAGE . ' database looks empty. You have to create it (you can find database structure in install folder).'); | ||
44 | } | ||
42 | $hasAdmin = count($query->fetchAll()); | 45 | $hasAdmin = count($query->fetchAll()); |
43 | 46 | ||
44 | if ($hasAdmin == 0) | 47 | if ($hasAdmin == 0) |
45 | return FALSE; | 48 | return false; |
46 | 49 | ||
47 | return TRUE; | 50 | return true; |
51 | } | ||
52 | |||
53 | public function checkTags() { | ||
54 | |||
55 | if (STORAGE == 'sqlite') { | ||
56 | $sql = ' | ||
57 | CREATE TABLE IF NOT EXISTS tags ( | ||
58 | id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE, | ||
59 | value TEXT | ||
60 | )'; | ||
61 | } | ||
62 | elseif(STORAGE == 'mysql') { | ||
63 | $sql = ' | ||
64 | CREATE TABLE IF NOT EXISTS `tags` ( | ||
65 | `id` int(11) NOT NULL AUTO_INCREMENT, | ||
66 | `value` varchar(255) NOT NULL, | ||
67 | PRIMARY KEY (`id`) | ||
68 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; | ||
69 | '; | ||
70 | } | ||
71 | else { | ||
72 | $sql = ' | ||
73 | CREATE TABLE tags ( | ||
74 | id bigserial primary key, | ||
75 | value varchar(255) NOT NULL | ||
76 | ); | ||
77 | '; | ||
78 | } | ||
79 | |||
80 | $query = $this->executeQuery($sql, array()); | ||
81 | |||
82 | if (STORAGE == 'sqlite') { | ||
83 | $sql = ' | ||
84 | CREATE TABLE tags_entries ( | ||
85 | id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE, | ||
86 | entry_id INTEGER, | ||
87 | tag_id INTEGER, | ||
88 | FOREIGN KEY(entry_id) REFERENCES entries(id) ON DELETE CASCADE, | ||
89 | FOREIGN KEY(tag_id) REFERENCES tags(id) ON DELETE CASCADE | ||
90 | )'; | ||
91 | } | ||
92 | elseif(STORAGE == 'mysql') { | ||
93 | $sql = ' | ||
94 | CREATE TABLE IF NOT EXISTS `tags_entries` ( | ||
95 | `id` int(11) NOT NULL AUTO_INCREMENT, | ||
96 | `entry_id` int(11) NOT NULL, | ||
97 | `tag_id` int(11) NOT NULL, | ||
98 | FOREIGN KEY(entry_id) REFERENCES entries(id) ON DELETE CASCADE, | ||
99 | FOREIGN KEY(tag_id) REFERENCES tags(id) ON DELETE CASCADE, | ||
100 | PRIMARY KEY (`id`) | ||
101 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; | ||
102 | '; | ||
103 | } | ||
104 | else { | ||
105 | $sql = ' | ||
106 | CREATE TABLE tags_entries ( | ||
107 | id bigserial primary key, | ||
108 | entry_id integer NOT NULL, | ||
109 | tag_id integer NOT NULL | ||
110 | ) | ||
111 | '; | ||
112 | } | ||
113 | |||
114 | $query = $this->executeQuery($sql, array()); | ||
48 | } | 115 | } |
49 | 116 | ||
50 | public function install($login, $password) { | 117 | public function install($login, $password) { |
@@ -74,7 +141,7 @@ class Database { | |||
74 | return TRUE; | 141 | return TRUE; |
75 | } | 142 | } |
76 | 143 | ||
77 | private function getConfigUser($id) { | 144 | public function getConfigUser($id) { |
78 | $sql = "SELECT * FROM users_config WHERE user_id = ?"; | 145 | $sql = "SELECT * FROM users_config WHERE user_id = ?"; |
79 | $query = $this->executeQuery($sql, array($id)); | 146 | $query = $this->executeQuery($sql, array($id)); |
80 | $result = $query->fetchAll(); | 147 | $result = $query->fetchAll(); |
@@ -127,10 +194,10 @@ class Database { | |||
127 | $config = $this->getConfigUser($userId); | 194 | $config = $this->getConfigUser($userId); |
128 | 195 | ||
129 | if (!isset ($user_config[$key])) { | 196 | if (!isset ($user_config[$key])) { |
130 | $sql = "INSERT INTO users_config (`value`, `user_id`, `name`) VALUES (?, ?, ?)"; | 197 | $sql = "INSERT INTO users_config (value, user_id, name) VALUES (?, ?, ?)"; |
131 | } | 198 | } |
132 | else { | 199 | else { |
133 | $sql = "UPDATE users_config SET `value`=? WHERE `user_id`=? AND `name`=?"; | 200 | $sql = "UPDATE users_config SET value=? WHERE user_id=? AND name=?"; |
134 | } | 201 | } |
135 | 202 | ||
136 | $params = array($value, $userId, $key); | 203 | $params = array($value, $userId, $key); |
@@ -249,4 +316,75 @@ class Database { | |||
249 | public function getLastId($column = '') { | 316 | public function getLastId($column = '') { |
250 | return $this->getHandle()->lastInsertId($column); | 317 | return $this->getHandle()->lastInsertId($column); |
251 | } | 318 | } |
319 | |||
320 | public function retrieveAllTags() { | ||
321 | $sql = "SELECT * FROM tags"; | ||
322 | $query = $this->executeQuery($sql, array()); | ||
323 | $tags = $query->fetchAll(); | ||
324 | |||
325 | return $tags; | ||
326 | } | ||
327 | |||
328 | public function retrieveTag($id) { | ||
329 | $tag = NULL; | ||
330 | $sql = "SELECT * FROM tags WHERE id=?"; | ||
331 | $params = array(intval($id)); | ||
332 | $query = $this->executeQuery($sql, $params); | ||
333 | $tag = $query->fetchAll(); | ||
334 | |||
335 | return isset($tag[0]) ? $tag[0] : null; | ||
336 | } | ||
337 | |||
338 | public function retrieveEntriesByTag($tag_id) { | ||
339 | $sql = | ||
340 | "SELECT * FROM entries | ||
341 | LEFT JOIN tags_entries ON tags_entries.entry_id=entries.id | ||
342 | WHERE tags_entries.tag_id = ?"; | ||
343 | $query = $this->executeQuery($sql, array($tag_id)); | ||
344 | $entries = $query->fetchAll(); | ||
345 | |||
346 | return $entries; | ||
347 | } | ||
348 | |||
349 | public function retrieveTagsByEntry($entry_id) { | ||
350 | $sql = | ||
351 | "SELECT * FROM tags | ||
352 | LEFT JOIN tags_entries ON tags_entries.tag_id=tags.id | ||
353 | WHERE tags_entries.entry_id = ?"; | ||
354 | $query = $this->executeQuery($sql, array($entry_id)); | ||
355 | $tags = $query->fetchAll(); | ||
356 | |||
357 | return $tags; | ||
358 | } | ||
359 | |||
360 | public function removeTagForEntry($entry_id, $tag_id) { | ||
361 | $sql_action = "DELETE FROM tags_entries WHERE tag_id=? AND entry_id=?"; | ||
362 | $params_action = array($tag_id, $entry_id); | ||
363 | $query = $this->executeQuery($sql_action, $params_action); | ||
364 | return $query; | ||
365 | } | ||
366 | |||
367 | public function retrieveTagByValue($value) { | ||
368 | $tag = NULL; | ||
369 | $sql = "SELECT * FROM tags WHERE value=?"; | ||
370 | $params = array($value); | ||
371 | $query = $this->executeQuery($sql, $params); | ||
372 | $tag = $query->fetchAll(); | ||
373 | |||
374 | return isset($tag[0]) ? $tag[0] : null; | ||
375 | } | ||
376 | |||
377 | public function createTag($value) { | ||
378 | $sql_action = 'INSERT INTO tags ( value ) VALUES (?)'; | ||
379 | $params_action = array($value); | ||
380 | $query = $this->executeQuery($sql_action, $params_action); | ||
381 | return $query; | ||
382 | } | ||
383 | |||
384 | public function setTagToEntry($tag_id, $entry_id) { | ||
385 | $sql_action = 'INSERT INTO tags_entries ( tag_id, entry_id ) VALUES (?, ?)'; | ||
386 | $params_action = array($tag_id, $entry_id); | ||
387 | $query = $this->executeQuery($sql_action, $params_action); | ||
388 | return $query; | ||
389 | } | ||
252 | } | 390 | } |
diff --git a/inc/poche/Poche.class.php b/inc/poche/Poche.class.php index 3ecaf084..4f70afb7 100644 --- a/inc/poche/Poche.class.php +++ b/inc/poche/Poche.class.php | |||
@@ -49,6 +49,7 @@ class Poche | |||
49 | if (! $this->store->isInstalled()) { | 49 | if (! $this->store->isInstalled()) { |
50 | $this->install(); | 50 | $this->install(); |
51 | } | 51 | } |
52 | $this->store->checkTags(); | ||
52 | } | 53 | } |
53 | } | 54 | } |
54 | 55 | ||
@@ -332,9 +333,12 @@ class Poche | |||
332 | switch ($action) | 333 | switch ($action) |
333 | { | 334 | { |
334 | case 'add': | 335 | case 'add': |
335 | $content = $url->extract(); | 336 | $json = file_get_contents(Tools::getPocheUrl() . '/inc/3rdparty/makefulltextfeed.php?url='.urlencode($url->getUrl()).'&max=5&links=preserve&exc=&format=json&submit=Create+Feed'); |
337 | $content = json_decode($json, true); | ||
338 | $title = $content['rss']['channel']['item']['title']; | ||
339 | $body = $content['rss']['channel']['item']['description']; | ||
336 | 340 | ||
337 | if ($this->store->add($url->getUrl(), $content['title'], $content['body'], $this->user->getId())) { | 341 | if ($this->store->add($url->getUrl(), $title, $body, $this->user->getId())) { |
338 | Tools::logm('add link ' . $url->getUrl()); | 342 | Tools::logm('add link ' . $url->getUrl()); |
339 | $sequence = ''; | 343 | $sequence = ''; |
340 | if (STORAGE == 'postgres') { | 344 | if (STORAGE == 'postgres') { |
@@ -342,7 +346,7 @@ class Poche | |||
342 | } | 346 | } |
343 | $last_id = $this->store->getLastId($sequence); | 347 | $last_id = $this->store->getLastId($sequence); |
344 | if (DOWNLOAD_PICTURES) { | 348 | if (DOWNLOAD_PICTURES) { |
345 | $content = filtre_picture($content['body'], $url->getUrl(), $last_id); | 349 | $content = filtre_picture($body, $url->getUrl(), $last_id); |
346 | Tools::logm('updating content article'); | 350 | Tools::logm('updating content article'); |
347 | $this->store->updateContent($last_id, $content, $this->user->getId()); | 351 | $this->store->updateContent($last_id, $content, $this->user->getId()); |
348 | } | 352 | } |
@@ -394,6 +398,36 @@ class Poche | |||
394 | Tools::redirect(); | 398 | Tools::redirect(); |
395 | } | 399 | } |
396 | break; | 400 | break; |
401 | case 'add_tag' : | ||
402 | $tags = explode(',', $_POST['value']); | ||
403 | $entry_id = $_POST['entry_id']; | ||
404 | foreach($tags as $key => $tag_value) { | ||
405 | $value = trim($tag_value); | ||
406 | $tag = $this->store->retrieveTagByValue($value); | ||
407 | |||
408 | if (is_null($tag)) { | ||
409 | # we create the tag | ||
410 | $tag = $this->store->createTag($value); | ||
411 | $sequence = ''; | ||
412 | if (STORAGE == 'postgres') { | ||
413 | $sequence = 'tags_id_seq'; | ||
414 | } | ||
415 | $tag_id = $this->store->getLastId($sequence); | ||
416 | } | ||
417 | else { | ||
418 | $tag_id = $tag['id']; | ||
419 | } | ||
420 | |||
421 | # we assign the tag to the article | ||
422 | $this->store->setTagToEntry($tag_id, $entry_id); | ||
423 | } | ||
424 | Tools::redirect(); | ||
425 | break; | ||
426 | case 'remove_tag' : | ||
427 | $tag_id = $_GET['tag_id']; | ||
428 | $this->store->removeTagForEntry($id, $tag_id); | ||
429 | Tools::redirect(); | ||
430 | break; | ||
397 | default: | 431 | default: |
398 | break; | 432 | break; |
399 | } | 433 | } |
@@ -412,7 +446,8 @@ class Poche | |||
412 | $compare_prod = version_compare(POCHE, $prod); | 446 | $compare_prod = version_compare(POCHE, $prod); |
413 | $themes = $this->getInstalledThemes(); | 447 | $themes = $this->getInstalledThemes(); |
414 | $languages = $this->getInstalledLanguages(); | 448 | $languages = $this->getInstalledLanguages(); |
415 | $http_auth = (isset($_SERVER['PHP_AUTH_USER']))?true:false; | 449 | $token = $this->user->getConfigValue('token'); |
450 | $http_auth = (isset($_SERVER['PHP_AUTH_USER']) || isset($_SERVER['REMOTE_USER'])) ? true : false; | ||
416 | $tpl_vars = array( | 451 | $tpl_vars = array( |
417 | 'themes' => $themes, | 452 | 'themes' => $themes, |
418 | 'languages' => $languages, | 453 | 'languages' => $languages, |
@@ -420,10 +455,37 @@ class Poche | |||
420 | 'prod' => $prod, | 455 | 'prod' => $prod, |
421 | 'compare_dev' => $compare_dev, | 456 | 'compare_dev' => $compare_dev, |
422 | 'compare_prod' => $compare_prod, | 457 | 'compare_prod' => $compare_prod, |
458 | 'token' => $token, | ||
459 | 'user_id' => $this->user->getId(), | ||
423 | 'http_auth' => $http_auth, | 460 | 'http_auth' => $http_auth, |
424 | ); | 461 | ); |
425 | Tools::logm('config view'); | 462 | Tools::logm('config view'); |
426 | break; | 463 | break; |
464 | case 'edit-tags': | ||
465 | # tags | ||
466 | $tags = $this->store->retrieveTagsByEntry($id); | ||
467 | $tpl_vars = array( | ||
468 | 'entry_id' => $id, | ||
469 | 'tags' => $tags, | ||
470 | ); | ||
471 | break; | ||
472 | case 'tag': | ||
473 | $entries = $this->store->retrieveEntriesByTag($id); | ||
474 | $tag = $this->store->retrieveTag($id); | ||
475 | $tpl_vars = array( | ||
476 | 'tag' => $tag, | ||
477 | 'entries' => $entries, | ||
478 | ); | ||
479 | break; | ||
480 | case 'tags': | ||
481 | $token = $this->user->getConfigValue('token'); | ||
482 | $tags = $this->store->retrieveAllTags(); | ||
483 | $tpl_vars = array( | ||
484 | 'token' => $token, | ||
485 | 'user_id' => $this->user->getId(), | ||
486 | 'tags' => $tags, | ||
487 | ); | ||
488 | break; | ||
427 | case 'view': | 489 | case 'view': |
428 | $entry = $this->store->retrieveOneById($id, $this->user->getId()); | 490 | $entry = $this->store->retrieveOneById($id, $this->user->getId()); |
429 | if ($entry != NULL) { | 491 | if ($entry != NULL) { |
@@ -437,12 +499,16 @@ class Poche | |||
437 | 499 | ||
438 | # flattr checking | 500 | # flattr checking |
439 | $flattr = new FlattrItem(); | 501 | $flattr = new FlattrItem(); |
440 | $flattr->checkItem($entry['url'],$entry['id']); | 502 | $flattr->checkItem($entry['url'], $entry['id']); |
503 | |||
504 | # tags | ||
505 | $tags = $this->store->retrieveTagsByEntry($entry['id']); | ||
441 | 506 | ||
442 | $tpl_vars = array( | 507 | $tpl_vars = array( |
443 | 'entry' => $entry, | 508 | 'entry' => $entry, |
444 | 'content' => $content, | 509 | 'content' => $content, |
445 | 'flattr' => $flattr | 510 | 'flattr' => $flattr, |
511 | 'tags' => $tags | ||
446 | ); | 512 | ); |
447 | } | 513 | } |
448 | else { | 514 | else { |
@@ -584,14 +650,18 @@ class Poche | |||
584 | * it redirects the user to the $referer link | 650 | * it redirects the user to the $referer link |
585 | * @return array | 651 | * @return array |
586 | */ | 652 | */ |
587 | private function credentials() { | 653 | private function credentials() { |
588 | if(isset($_SERVER['PHP_AUTH_USER'])) { | 654 | if(isset($_SERVER['PHP_AUTH_USER'])) { |
589 | return array($_SERVER['PHP_AUTH_USER'],'php_auth'); | 655 | return array($_SERVER['PHP_AUTH_USER'],'php_auth'); |
590 | } | 656 | } |
591 | if(!empty($_POST['login']) && !empty($_POST['password'])) { | 657 | if(!empty($_POST['login']) && !empty($_POST['password'])) { |
592 | return array($_POST['login'],$_POST['password']); | 658 | return array($_POST['login'],$_POST['password']); |
593 | } | 659 | } |
594 | return array(false,false); | 660 | if(isset($_SERVER['REMOTE_USER'])) { |
661 | return array($_SERVER['REMOTE_USER'],'http_auth'); | ||
662 | } | ||
663 | |||
664 | return array(false,false); | ||
595 | } | 665 | } |
596 | 666 | ||
597 | /** | 667 | /** |
@@ -613,7 +683,8 @@ class Poche | |||
613 | $user = $this->store->login($login, Tools::encodeString($password . $login)); | 683 | $user = $this->store->login($login, Tools::encodeString($password . $login)); |
614 | if ($user != array()) { | 684 | if ($user != array()) { |
615 | # Save login into Session | 685 | # Save login into Session |
616 | Session::login($user['username'], $user['password'], $login, Tools::encodeString($password . $login), array('poche_user' => new User($user))); | 686 | $longlastingsession = isset($_POST['longlastingsession']); |
687 | Session::login($user['username'], $user['password'], $login, Tools::encodeString($password . $login), $longlastingsession, array('poche_user' => new User($user))); | ||
617 | $this->messages->add('s', _('welcome to your poche')); | 688 | $this->messages->add('s', _('welcome to your poche')); |
618 | Tools::logm('login successful'); | 689 | Tools::logm('login successful'); |
619 | Tools::redirect($referer); | 690 | Tools::redirect($referer); |
@@ -837,4 +908,58 @@ class Poche | |||
837 | } | 908 | } |
838 | return $version; | 909 | return $version; |
839 | } | 910 | } |
911 | |||
912 | public function generateToken() | ||
913 | { | ||
914 | if (ini_get('open_basedir') === '') { | ||
915 | $token = substr(base64_encode(file_get_contents('/dev/urandom', false, null, 0, 20)), 0, 15); | ||
916 | } | ||
917 | else { | ||
918 | $token = substr(base64_encode(uniqid(mt_rand(), true)), 0, 20); | ||
919 | } | ||
920 | |||
921 | $this->store->updateUserConfig($this->user->getId(), 'token', $token); | ||
922 | $currentConfig = $_SESSION['poche_user']->config; | ||
923 | $currentConfig['token'] = $token; | ||
924 | $_SESSION['poche_user']->setConfig($currentConfig); | ||
925 | } | ||
926 | |||
927 | public function generateFeeds($token, $user_id, $tag_id, $type = 'home') | ||
928 | { | ||
929 | $allowed_types = array('home', 'fav', 'archive', 'tag'); | ||
930 | $config = $this->store->getConfigUser($user_id); | ||
931 | |||
932 | if (!in_array($type, $allowed_types) || | ||
933 | $token != $config['token']) { | ||
934 | die(_('Uh, there is a problem while generating feeds.')); | ||
935 | } | ||
936 | // Check the token | ||
937 | |||
938 | $feed = new FeedWriter(RSS2); | ||
939 | $feed->setTitle('poche - ' . $type . ' feed'); | ||
940 | $feed->setLink(Tools::getPocheUrl()); | ||
941 | $feed->setChannelElement('updated', date(DATE_RSS , time())); | ||
942 | $feed->setChannelElement('author', 'poche'); | ||
943 | |||
944 | if ($type == 'tag') { | ||
945 | $entries = $this->store->retrieveEntriesByTag($tag_id); | ||
946 | } | ||
947 | else { | ||
948 | $entries = $this->store->getEntriesByView($type, $user_id); | ||
949 | } | ||
950 | |||
951 | if (count($entries) > 0) { | ||
952 | foreach ($entries as $entry) { | ||
953 | $newItem = $feed->createNewItem(); | ||
954 | $newItem->setTitle(htmlentities($entry['title'])); | ||
955 | $newItem->setLink(Tools::getPocheUrl() . '?view=view&id=' . $entry['id']); | ||
956 | $newItem->setDate(time()); | ||
957 | $newItem->setDescription($entry['content']); | ||
958 | $feed->addItem($newItem); | ||
959 | } | ||
960 | } | ||
961 | |||
962 | $feed->genarateFeed(); | ||
963 | exit; | ||
964 | } | ||
840 | } | 965 | } |
diff --git a/inc/poche/PocheReadability.php b/inc/poche/PocheReadability.php deleted file mode 100644 index 48ae90d0..00000000 --- a/inc/poche/PocheReadability.php +++ /dev/null | |||
@@ -1,46 +0,0 @@ | |||
1 | <?php | ||
2 | |||
3 | class PocheReadability extends Readability | ||
4 | { | ||
5 | /** | ||
6 | * Get the article title as an H1. | ||
7 | * | ||
8 | * @return DOMElement | ||
9 | */ | ||
10 | protected function getArticleTitle() { | ||
11 | $curTitle = ''; | ||
12 | $origTitle = ''; | ||
13 | |||
14 | try { | ||
15 | $curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0)); | ||
16 | } catch(Exception $e) {} | ||
17 | |||
18 | if (preg_match('/ [\|\-] /', $curTitle)) | ||
19 | { | ||
20 | $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); | ||
21 | |||
22 | if (count(explode(' ', $curTitle)) < 3) { | ||
23 | $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); | ||
24 | } | ||
25 | } | ||
26 | else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) | ||
27 | { | ||
28 | $hOnes = $this->dom->getElementsByTagName('h1'); | ||
29 | if($hOnes->length == 1) | ||
30 | { | ||
31 | $curTitle = $this->getInnerText($hOnes->item(0)); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | $curTitle = trim($curTitle); | ||
36 | |||
37 | if (count(explode(' ', $curTitle)) <= 4) { | ||
38 | $curTitle = $origTitle; | ||
39 | } | ||
40 | |||
41 | $articleTitle = $this->dom->createElement('h1'); | ||
42 | $articleTitle->innerHTML = $curTitle; | ||
43 | |||
44 | return $articleTitle; | ||
45 | } | ||
46 | } \ No newline at end of file | ||
diff --git a/inc/poche/Tools.class.php b/inc/poche/Tools.class.php index 750553f1..63916582 100644 --- a/inc/poche/Tools.class.php +++ b/inc/poche/Tools.class.php | |||
@@ -88,39 +88,16 @@ class Tools | |||
88 | 88 | ||
89 | public static function getTplFile($view) | 89 | public static function getTplFile($view) |
90 | { | 90 | { |
91 | $default_tpl = 'home.twig'; | 91 | $views = array( |
92 | 92 | 'install', 'import', 'export', 'config', 'tags', | |
93 | switch ($view) { | 93 | 'edit-tags', 'view', 'login', 'error', 'tag' |
94 | case 'install': | 94 | ); |
95 | $tpl_file = 'install.twig'; | 95 | |
96 | break; | 96 | if (in_array($view, $views)) { |
97 | case 'import'; | 97 | return $view . '.twig'; |
98 | $tpl_file = 'import.twig'; | ||
99 | break; | ||
100 | case 'export': | ||
101 | $tpl_file = 'export.twig'; | ||
102 | break; | ||
103 | case 'config': | ||
104 | $tpl_file = 'config.twig'; | ||
105 | break; | ||
106 | case 'view': | ||
107 | $tpl_file = 'view.twig'; | ||
108 | break; | ||
109 | |||
110 | case 'login': | ||
111 | $tpl_file = 'login.twig'; | ||
112 | break; | ||
113 | |||
114 | case 'error': | ||
115 | $tpl_file = 'error.twig'; | ||
116 | break; | ||
117 | |||
118 | default: | ||
119 | $tpl_file = $default_tpl; | ||
120 | break; | ||
121 | } | 98 | } |
122 | 99 | ||
123 | return $tpl_file; | 100 | return 'home.twig'; |
124 | } | 101 | } |
125 | 102 | ||
126 | public static function getFile($url) | 103 | public static function getFile($url) |
@@ -249,4 +226,28 @@ class Tools | |||
249 | $lang = explode('.', $userlanguage); | 226 | $lang = explode('.', $userlanguage); |
250 | return str_replace('_', '-', $lang[0]); | 227 | return str_replace('_', '-', $lang[0]); |
251 | } | 228 | } |
229 | |||
230 | public static function status($status_code) | ||
231 | { | ||
232 | if (strpos(php_sapi_name(), 'apache') !== false) { | ||
233 | |||
234 | header('HTTP/1.0 '.$status_code); | ||
235 | } | ||
236 | else { | ||
237 | |||
238 | header('Status: '.$status_code); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | |||
243 | public static function download_db() { | ||
244 | header('Content-Disposition: attachment; filename="poche.sqlite.gz"'); | ||
245 | self::status(200); | ||
246 | |||
247 | header('Content-Transfer-Encoding: binary'); | ||
248 | header('Content-Type: application/octet-stream'); | ||
249 | echo gzencode(file_get_contents(STORAGE_SQLITE)); | ||
250 | |||
251 | exit; | ||
252 | } | ||
252 | } | 253 | } |
diff --git a/inc/poche/Url.class.php b/inc/poche/Url.class.php index 600a2166..8b3468c3 100644 --- a/inc/poche/Url.class.php +++ b/inc/poche/Url.class.php | |||
@@ -12,45 +12,6 @@ class Url | |||
12 | { | 12 | { |
13 | public $url; | 13 | public $url; |
14 | 14 | ||
15 | private $fingerprints = array( | ||
16 | // Posterous | ||
17 | '<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true), | ||
18 | // Blogger | ||
19 | '<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true), | ||
20 | '<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true), | ||
21 | // WordPress (self-hosted and hosted) | ||
22 | '<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true) | ||
23 | ); | ||
24 | |||
25 | private $user_agents = array( 'lifehacker.com' => 'PHP/5.2', | ||
26 | 'gawker.com' => 'PHP/5.2', | ||
27 | 'deadspin.com' => 'PHP/5.2', | ||
28 | 'kotaku.com' => 'PHP/5.2', | ||
29 | 'jezebel.com' => 'PHP/5.2', | ||
30 | 'io9.com' => 'PHP/5.2', | ||
31 | 'jalopnik.com' => 'PHP/5.2', | ||
32 | 'gizmodo.com' => 'PHP/5.2', | ||
33 | '.wikipedia.org' => 'Mozilla/5.2' | ||
34 | ); | ||
35 | |||
36 | private $content_type_exc = array( | ||
37 | 'application/pdf' => array('action'=>'link', 'name'=>'PDF'), | ||
38 | 'image' => array('action'=>'link', 'name'=>'Image'), | ||
39 | 'audio' => array('action'=>'link', 'name'=>'Audio'), | ||
40 | 'video' => array('action'=>'link', 'name'=>'Video') | ||
41 | ); | ||
42 | |||
43 | private $rewrite_url = array( | ||
44 | // Rewrite public Google Docs URLs to point to HTML view: | ||
45 | // if a URL contains docs.google.com, replace /Doc? with /View? | ||
46 | 'docs.google.com' => array('/Doc?' => '/View?'), | ||
47 | 'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'), | ||
48 | '.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org') | ||
49 | ); | ||
50 | |||
51 | private $rewrite_relative_urls = true; | ||
52 | private $error_message = '[unable to retrieve full-text content]'; | ||
53 | |||
54 | function __construct($url) | 15 | function __construct($url) |
55 | { | 16 | { |
56 | $this->url = base64_decode($url); | 17 | $this->url = base64_decode($url); |
@@ -67,329 +28,4 @@ class Url | |||
67 | public function isCorrect() { | 28 | public function isCorrect() { |
68 | return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE; | 29 | return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE; |
69 | } | 30 | } |
70 | |||
71 | public function extract() { | ||
72 | global $http, $extractor; | ||
73 | $extractor = new ContentExtractor(dirname(__FILE__).'/../3rdparty/site_config/custom', dirname(__FILE__).'/../3rdparty/site_config/standard'); | ||
74 | $extractor->fingerprints = $this->fingerprints; | ||
75 | |||
76 | $http = new HumbleHttpAgent(); | ||
77 | $http->userAgentMap = $this->user_agents; | ||
78 | $http->headerOnlyTypes = array_keys($this->content_type_exc); | ||
79 | $http->rewriteUrls = $this->rewrite_url; | ||
80 | $http->userAgentDefault = HumbleHttpAgent::UA_PHP; | ||
81 | // configure SimplePie HTTP extension class to use our HumbleHttpAgent instance | ||
82 | SimplePie_HumbleHttpAgent::set_agent($http); | ||
83 | $feed = new SimplePie(); | ||
84 | // some feeds use the text/html content type - force_feed tells SimplePie to process anyway | ||
85 | $feed->force_feed(true); | ||
86 | $feed->set_file_class('SimplePie_HumbleHttpAgent'); | ||
87 | $feed->feed_url = $this->url; | ||
88 | $feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE); | ||
89 | $feed->set_timeout(20); | ||
90 | $feed->enable_cache(false); | ||
91 | $feed->set_stupidly_fast(true); | ||
92 | $feed->enable_order_by_date(false); // we don't want to do anything to the feed | ||
93 | $feed->set_url_replacements(array()); | ||
94 | // initialise the feed | ||
95 | // the @ suppresses notices which on some servers causes a 500 internal server error | ||
96 | $result = @$feed->init(); | ||
97 | if ($result && (!is_array($feed->data) || count($feed->data) == 0)) { | ||
98 | die('Sorry, no feed items found'); | ||
99 | } | ||
100 | // from now on, we'll identify ourselves as a browser | ||
101 | $http->userAgentDefault = HumbleHttpAgent::UA_BROWSER; | ||
102 | unset($feed, $result); | ||
103 | |||
104 | $feed = new DummySingleItemFeed($this->url); | ||
105 | |||
106 | $items = $feed->get_items(0, 1); | ||
107 | // Request all feed items in parallel (if supported) | ||
108 | $urls_sanitized = array(); | ||
109 | $urls = array(); | ||
110 | foreach ($items as $key => $item) { | ||
111 | $permalink = htmlspecialchars_decode($item->get_permalink()); | ||
112 | // Colons in URL path segments get encoded by SimplePie, yet some sites expect them unencoded | ||
113 | $permalink = str_replace('%3A', ':', $permalink); | ||
114 | if ($permalink) { | ||
115 | $urls_sanitized[] = $permalink; | ||
116 | } | ||
117 | $urls[$key] = $permalink; | ||
118 | } | ||
119 | $http->fetchAll($urls_sanitized); | ||
120 | |||
121 | foreach ($items as $key => $item) { | ||
122 | $do_content_extraction = true; | ||
123 | $extract_result = false; | ||
124 | $permalink = $urls[$key]; | ||
125 | |||
126 | // TODO: Allow error codes - some sites return correct content with error status | ||
127 | // e.g. prospectmagazine.co.uk returns 403 | ||
128 | |||
129 | if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) { | ||
130 | $effective_url = $response['effective_url']; | ||
131 | // check if action defined for returned Content-Type | ||
132 | $type = null; | ||
133 | if (preg_match('!^Content-Type:\s*(([-\w]+)/([-\w\+]+))!im', $response['headers'], $match)) { | ||
134 | // look for full mime type (e.g. image/jpeg) or just type (e.g. image) | ||
135 | $match[1] = strtolower(trim($match[1])); | ||
136 | $match[2] = strtolower(trim($match[2])); | ||
137 | foreach (array($match[1], $match[2]) as $_mime) { | ||
138 | if (isset($this->content_type_exc[$_mime])) { | ||
139 | $type = $match[1]; | ||
140 | $_act = $this->content_type_exc[$_mime]['action']; | ||
141 | $_name = $this->content_type_exc[$_mime]['name']; | ||
142 | if ($_act == 'exclude') { | ||
143 | continue 2; // skip this feed item entry | ||
144 | } elseif ($_act == 'link') { | ||
145 | if ($match[2] == 'image') { | ||
146 | $html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"$_name\" /></a>"; | ||
147 | } else { | ||
148 | $html = "<a href=\"$effective_url\">Download $_name</a>"; | ||
149 | } | ||
150 | $title = $_name; | ||
151 | $do_content_extraction = false; | ||
152 | break; | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | unset($_mime, $_act, $_name, $match); | ||
157 | } | ||
158 | if ($do_content_extraction) { | ||
159 | $html = $response['body']; | ||
160 | // remove strange things | ||
161 | $html = str_replace('</[>', '', $html); | ||
162 | $html = $this->convert_to_utf8($html, $response['headers']); | ||
163 | |||
164 | // check site config for single page URL - fetch it if found | ||
165 | if ($single_page_response = $this->getSinglePage($item, $html, $effective_url)) { | ||
166 | $html = $single_page_response['body']; | ||
167 | // remove strange things | ||
168 | $html = str_replace('</[>', '', $html); | ||
169 | $html = $this->convert_to_utf8($html, $single_page_response['headers']); | ||
170 | $effective_url = $single_page_response['effective_url']; | ||
171 | unset($single_page_response); | ||
172 | } | ||
173 | $extract_result = $extractor->process($html, $effective_url); | ||
174 | $readability = $extractor->readability; | ||
175 | $content_block = ($extract_result) ? $extractor->getContent() : null; | ||
176 | } | ||
177 | } | ||
178 | if ($do_content_extraction) { | ||
179 | // if we failed to extract content... | ||
180 | if (!$extract_result) { | ||
181 | $html = $this->error_message; | ||
182 | // keep the original item description | ||
183 | $html .= $item->get_description(); | ||
184 | } else { | ||
185 | $readability->clean($content_block, 'select'); | ||
186 | if ($this->rewrite_relative_urls) $this->makeAbsolute($effective_url, $content_block); | ||
187 | if ($content_block->childNodes->length == 1 && $content_block->firstChild->nodeType === XML_ELEMENT_NODE) { | ||
188 | $html = $content_block->firstChild->innerHTML; | ||
189 | } else { | ||
190 | $html = $content_block->innerHTML; | ||
191 | } | ||
192 | // post-processing cleanup | ||
193 | $html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html); | ||
194 | } | ||
195 | } | ||
196 | } | ||
197 | |||
198 | $title = ($extractor->getTitle() != '' ? $extractor->getTitle() : _('Untitled')); | ||
199 | $content = array ('title' => $title, 'body' => $html); | ||
200 | |||
201 | return $content; | ||
202 | } | ||
203 | |||
204 | private function convert_to_utf8($html, $header=null) | ||
205 | { | ||
206 | $encoding = null; | ||
207 | if ($html || $header) { | ||
208 | if (is_array($header)) $header = implode("\n", $header); | ||
209 | if (!$header || !preg_match_all('/^Content-Type:\s+([^;]+)(?:;\s*charset=["\']?([^;"\'\n]*))?/im', $header, $match, PREG_SET_ORDER)) { | ||
210 | // error parsing the response | ||
211 | } else { | ||
212 | $match = end($match); // get last matched element (in case of redirects) | ||
213 | if (isset($match[2])) $encoding = trim($match[2], "\"' \r\n\0\x0B\t"); | ||
214 | } | ||
215 | // TODO: check to see if encoding is supported (can we convert it?) | ||
216 | // If it's not, result will be empty string. | ||
217 | // For now we'll check for invalid encoding types returned by some sites, e.g. 'none' | ||
218 | // Problem URL: http://facta.co.jp/blog/archives/20111026001026.html | ||
219 | if (!$encoding || $encoding == 'none') { | ||
220 | // search for encoding in HTML - only look at the first 35000 characters | ||
221 | $html_head = substr($html, 0, 40000); | ||
222 | if (preg_match('/^<\?xml\s+version=(?:"[^"]*"|\'[^\']*\')\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) { | ||
223 | $encoding = trim($match[1], '"\''); | ||
224 | } elseif (preg_match('/<meta\s+http-equiv=["\']?Content-Type["\']? content=["\'][^;]+;\s*charset=["\']?([^;"\'>]+)/i', $html_head, $match)) { | ||
225 | $encoding = trim($match[1]); | ||
226 | } elseif (preg_match_all('/<meta\s+([^>]+)>/i', $html_head, $match)) { | ||
227 | foreach ($match[1] as $_test) { | ||
228 | if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) { | ||
229 | $encoding = trim($_m[1]); | ||
230 | break; | ||
231 | } | ||
232 | } | ||
233 | } | ||
234 | } | ||
235 | if (isset($encoding)) $encoding = trim($encoding); | ||
236 | // trim is important here! | ||
237 | if (!$encoding || (strtolower($encoding) == 'iso-8859-1')) { | ||
238 | // replace MS Word smart qutoes | ||
239 | $trans = array(); | ||
240 | $trans[chr(130)] = '‚'; // Single Low-9 Quotation Mark | ||
241 | $trans[chr(131)] = 'ƒ'; // Latin Small Letter F With Hook | ||
242 | $trans[chr(132)] = '„'; // Double Low-9 Quotation Mark | ||
243 | $trans[chr(133)] = '…'; // Horizontal Ellipsis | ||
244 | $trans[chr(134)] = '†'; // Dagger | ||
245 | $trans[chr(135)] = '‡'; // Double Dagger | ||
246 | $trans[chr(136)] = 'ˆ'; // Modifier Letter Circumflex Accent | ||
247 | $trans[chr(137)] = '‰'; // Per Mille Sign | ||
248 | $trans[chr(138)] = 'Š'; // Latin Capital Letter S With Caron | ||
249 | $trans[chr(139)] = '‹'; // Single Left-Pointing Angle Quotation Mark | ||
250 | $trans[chr(140)] = 'Œ'; // Latin Capital Ligature OE | ||
251 | $trans[chr(145)] = '‘'; // Left Single Quotation Mark | ||
252 | $trans[chr(146)] = '’'; // Right Single Quotation Mark | ||
253 | $trans[chr(147)] = '“'; // Left Double Quotation Mark | ||
254 | $trans[chr(148)] = '”'; // Right Double Quotation Mark | ||
255 | $trans[chr(149)] = '•'; // Bullet | ||
256 | $trans[chr(150)] = '–'; // En Dash | ||
257 | $trans[chr(151)] = '—'; // Em Dash | ||
258 | $trans[chr(152)] = '˜'; // Small Tilde | ||
259 | $trans[chr(153)] = '™'; // Trade Mark Sign | ||
260 | $trans[chr(154)] = 'š'; // Latin Small Letter S With Caron | ||
261 | $trans[chr(155)] = '›'; // Single Right-Pointing Angle Quotation Mark | ||
262 | $trans[chr(156)] = 'œ'; // Latin Small Ligature OE | ||
263 | $trans[chr(159)] = 'Ÿ'; // Latin Capital Letter Y With Diaeresis | ||
264 | $html = strtr($html, $trans); | ||
265 | } | ||
266 | if (!$encoding) { | ||
267 | $encoding = 'utf-8'; | ||
268 | } else { | ||
269 | if (strtolower($encoding) != 'utf-8') { | ||
270 | $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8'); | ||
271 | /* | ||
272 | if (function_exists('iconv')) { | ||
273 | // iconv appears to handle certain character encodings better than mb_convert_encoding | ||
274 | $html = iconv($encoding, 'utf-8', $html); | ||
275 | } else { | ||
276 | $html = mb_convert_encoding($html, 'utf-8', $encoding); | ||
277 | } | ||
278 | */ | ||
279 | } | ||
280 | } | ||
281 | } | ||
282 | return $html; | ||
283 | } | ||
284 | |||
285 | private function makeAbsolute($base, $elem) { | ||
286 | $base = new SimplePie_IRI($base); | ||
287 | // remove '//' in URL path (used to prevent URLs from resolving properly) | ||
288 | // TODO: check if this is still the case | ||
289 | if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path); | ||
290 | foreach(array('a'=>'href', 'img'=>'src') as $tag => $attr) { | ||
291 | $elems = $elem->getElementsByTagName($tag); | ||
292 | for ($i = $elems->length-1; $i >= 0; $i--) { | ||
293 | $e = $elems->item($i); | ||
294 | //$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e); | ||
295 | $this->makeAbsoluteAttr($base, $e, $attr); | ||
296 | } | ||
297 | if (strtolower($elem->tagName) == $tag) $this->makeAbsoluteAttr($base, $elem, $attr); | ||
298 | } | ||
299 | } | ||
300 | |||
301 | private function makeAbsoluteAttr($base, $e, $attr) { | ||
302 | if ($e->hasAttribute($attr)) { | ||
303 | // Trim leading and trailing white space. I don't really like this but | ||
304 | // unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" /> | ||
305 | $url = trim(str_replace('%20', ' ', $e->getAttribute($attr))); | ||
306 | $url = str_replace(' ', '%20', $url); | ||
307 | if (!preg_match('!https?://!i', $url)) { | ||
308 | if ($absolute = SimplePie_IRI::absolutize($base, $url)) { | ||
309 | $e->setAttribute($attr, $absolute); | ||
310 | } | ||
311 | } | ||
312 | } | ||
313 | } | ||
314 | |||
315 | private function makeAbsoluteStr($base, $url) { | ||
316 | $base = new SimplePie_IRI($base); | ||
317 | // remove '//' in URL path (causes URLs not to resolve properly) | ||
318 | if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path); | ||
319 | if (preg_match('!^https?://!i', $url)) { | ||
320 | // already absolute | ||
321 | return $url; | ||
322 | } else { | ||
323 | if ($absolute = SimplePie_IRI::absolutize($base, $url)) { | ||
324 | return $absolute; | ||
325 | } | ||
326 | return false; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | // returns single page response, or false if not found | ||
331 | private function getSinglePage($item, $html, $url) { | ||
332 | global $http, $extractor; | ||
333 | $host = @parse_url($url, PHP_URL_HOST); | ||
334 | $site_config = SiteConfig::build($host); | ||
335 | if ($site_config === false) { | ||
336 | // check for fingerprints | ||
337 | if (!empty($extractor->fingerprints) && ($_fphost = $extractor->findHostUsingFingerprints($html))) { | ||
338 | $site_config = SiteConfig::build($_fphost); | ||
339 | } | ||
340 | if ($site_config === false) $site_config = new SiteConfig(); | ||
341 | SiteConfig::add_to_cache($host, $site_config); | ||
342 | return false; | ||
343 | } else { | ||
344 | SiteConfig::add_to_cache($host, $site_config); | ||
345 | } | ||
346 | $splink = null; | ||
347 | if (!empty($site_config->single_page_link)) { | ||
348 | $splink = $site_config->single_page_link; | ||
349 | } elseif (!empty($site_config->single_page_link_in_feed)) { | ||
350 | // single page link xpath is targeted at feed | ||
351 | $splink = $site_config->single_page_link_in_feed; | ||
352 | // so let's replace HTML with feed item description | ||
353 | $html = $item->get_description(); | ||
354 | } | ||
355 | if (isset($splink)) { | ||
356 | // Build DOM tree from HTML | ||
357 | $readability = new PocheReadability($html, $url); | ||
358 | $xpath = new DOMXPath($readability->dom); | ||
359 | // Loop through single_page_link xpath expressions | ||
360 | $single_page_url = null; | ||
361 | foreach ($splink as $pattern) { | ||
362 | $elems = @$xpath->evaluate($pattern, $readability->dom); | ||
363 | if (is_string($elems)) { | ||
364 | $single_page_url = trim($elems); | ||
365 | break; | ||
366 | } elseif ($elems instanceof DOMNodeList && $elems->length > 0) { | ||
367 | foreach ($elems as $item) { | ||
368 | if ($item instanceof DOMElement && $item->hasAttribute('href')) { | ||
369 | $single_page_url = $item->getAttribute('href'); | ||
370 | break; | ||
371 | } elseif ($item instanceof DOMAttr && $item->value) { | ||
372 | $single_page_url = $item->value; | ||
373 | break; | ||
374 | } | ||
375 | } | ||
376 | } | ||
377 | } | ||
378 | // If we've got URL, resolve against $url | ||
379 | if (isset($single_page_url) && ($single_page_url = $this->makeAbsoluteStr($url, $single_page_url))) { | ||
380 | // check it's not what we have already! | ||
381 | if ($single_page_url != $url) { | ||
382 | // it's not, so let's try to fetch it... | ||
383 | $_prev_ref = $http->referer; | ||
384 | $http->referer = $single_page_url; | ||
385 | if (($response = $http->get($single_page_url, true)) && $response['status_code'] < 300) { | ||
386 | $http->referer = $_prev_ref; | ||
387 | return $response; | ||
388 | } | ||
389 | $http->referer = $_prev_ref; | ||
390 | } | ||
391 | } | ||
392 | } | ||
393 | return false; | ||
394 | } | ||
395 | } \ No newline at end of file | 31 | } \ No newline at end of file |
diff --git a/inc/poche/global.inc.php b/inc/poche/global.inc.php index 65a026a7..846699d3 100644 --- a/inc/poche/global.inc.php +++ b/inc/poche/global.inc.php | |||
@@ -20,25 +20,13 @@ require_once INCLUDES . '/poche/Url.class.php'; | |||
20 | require_once INCLUDES . '/3rdparty/class.messages.php'; | 20 | require_once INCLUDES . '/3rdparty/class.messages.php'; |
21 | require_once INCLUDES . '/poche/Poche.class.php'; | 21 | require_once INCLUDES . '/poche/Poche.class.php'; |
22 | 22 | ||
23 | require_once INCLUDES . '/3rdparty/Readability.php'; | ||
24 | require_once INCLUDES . '/poche/PocheReadability.php'; | ||
25 | |||
26 | require_once INCLUDES . '/3rdparty/Encoding.php'; | ||
27 | require_once INCLUDES . '/poche/Database.class.php'; | 23 | require_once INCLUDES . '/poche/Database.class.php'; |
28 | require_once INCLUDES . '/3rdparty/simple_html_dom.php'; | 24 | require_once INCLUDES . '/3rdparty/simple_html_dom.php'; |
29 | require_once INCLUDES . '/3rdparty/paginator.php'; | 25 | require_once INCLUDES . '/3rdparty/paginator.php'; |
30 | require_once INCLUDES . '/3rdparty/Session.class.php'; | 26 | require_once INCLUDES . '/3rdparty/Session.class.php'; |
31 | 27 | ||
32 | require_once INCLUDES . '/3rdparty/simplepie/SimplePieAutoloader.php'; | 28 | require_once INCLUDES . '/3rdparty/libraries/feedwriter/FeedItem.php'; |
33 | require_once INCLUDES . '/3rdparty/simplepie/SimplePie/Core.php'; | 29 | require_once INCLUDES . '/3rdparty/libraries/feedwriter/FeedWriter.php'; |
34 | require_once INCLUDES . '/3rdparty/content-extractor/ContentExtractor.php'; | ||
35 | require_once INCLUDES . '/3rdparty/content-extractor/SiteConfig.php'; | ||
36 | require_once INCLUDES . '/3rdparty/humble-http-agent/HumbleHttpAgent.php'; | ||
37 | require_once INCLUDES . '/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php'; | ||
38 | require_once INCLUDES . '/3rdparty/humble-http-agent/CookieJar.php'; | ||
39 | require_once INCLUDES . '/3rdparty/feedwriter/FeedItem.php'; | ||
40 | require_once INCLUDES . '/3rdparty/feedwriter/FeedWriter.php'; | ||
41 | require_once INCLUDES . '/3rdparty/feedwriter/DummySingleItemFeed.php'; | ||
42 | require_once INCLUDES . '/3rdparty/FlattrItem.class.php'; | 30 | require_once INCLUDES . '/3rdparty/FlattrItem.class.php'; |
43 | 31 | ||
44 | # Composer its autoloader for automatically loading Twig | 32 | # Composer its autoloader for automatically loading Twig |