]>
Commit | Line | Data |
---|---|---|
ec397236 NL |
1 | <?php |
2 | /** | |
3 | * SimplePie | |
4 | * | |
5 | * A PHP-Based RSS and Atom Feed Framework. | |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | |
7 | * | |
42c80841 | 8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors |
ec397236 NL |
9 | * All rights reserved. |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without modification, are | |
12 | * permitted provided that the following conditions are met: | |
13 | * | |
14 | * * Redistributions of source code must retain the above copyright notice, this list of | |
15 | * conditions and the following disclaimer. | |
16 | * | |
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | |
18 | * of conditions and the following disclaimer in the documentation and/or other materials | |
19 | * provided with the distribution. | |
20 | * | |
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | |
22 | * to endorse or promote products derived from this software without specific prior | |
23 | * written permission. | |
24 | * | |
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | |
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | |
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
33 | * POSSIBILITY OF SUCH DAMAGE. | |
34 | * | |
35 | * @package SimplePie | |
42c80841 NL |
36 | * @version 1.3.1 |
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue | |
ec397236 NL |
38 | * @author Ryan Parman |
39 | * @author Geoffrey Sneddon | |
40 | * @author Ryan McCue | |
41 | * @link http://simplepie.org/ SimplePie | |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | |
ec397236 NL |
43 | */ |
44 | ||
42c80841 NL |
45 | /** |
46 | * Used for feed auto-discovery | |
47 | * | |
48 | * | |
49 | * This class can be overloaded with {@see SimplePie::set_locator_class()} | |
50 | * | |
51 | * @package SimplePie | |
52 | */ | |
ec397236 NL |
53 | class SimplePie_Locator |
54 | { | |
55 | var $useragent; | |
56 | var $timeout; | |
57 | var $file; | |
58 | var $local = array(); | |
59 | var $elsewhere = array(); | |
ec397236 NL |
60 | var $cached_entities = array(); |
61 | var $http_base; | |
62 | var $base; | |
63 | var $base_location = 0; | |
64 | var $checked_feeds = 0; | |
65 | var $max_checked_feeds = 10; | |
42c80841 | 66 | protected $registry; |
ec397236 | 67 | |
42c80841 | 68 | public function __construct(SimplePie_File $file, $timeout = 10, $useragent = null, $max_checked_feeds = 10) |
ec397236 | 69 | { |
42c80841 | 70 | $this->file = $file; |
ec397236 NL |
71 | $this->useragent = $useragent; |
72 | $this->timeout = $timeout; | |
73 | $this->max_checked_feeds = $max_checked_feeds; | |
42c80841 NL |
74 | |
75 | if (class_exists('DOMDocument')) | |
76 | { | |
77 | $this->dom = new DOMDocument(); | |
78 | ||
79 | set_error_handler(array('SimplePie_Misc', 'silence_errors')); | |
80 | $this->dom->loadHTML($this->file->body); | |
81 | restore_error_handler(); | |
82 | } | |
83 | else | |
84 | { | |
85 | $this->dom = null; | |
86 | } | |
87 | } | |
88 | ||
89 | public function set_registry(SimplePie_Registry $registry) | |
90 | { | |
91 | $this->registry = $registry; | |
ec397236 NL |
92 | } |
93 | ||
94 | public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working) | |
95 | { | |
96 | if ($this->is_feed($this->file)) | |
97 | { | |
98 | return $this->file; | |
99 | } | |
100 | ||
101 | if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE) | |
102 | { | |
42c80841 | 103 | $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file)); |
ec397236 NL |
104 | if ($sniffer->get_type() !== 'text/html') |
105 | { | |
106 | return null; | |
107 | } | |
108 | } | |
109 | ||
110 | if ($type & ~SIMPLEPIE_LOCATOR_NONE) | |
111 | { | |
112 | $this->get_base(); | |
113 | } | |
114 | ||
115 | if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery()) | |
116 | { | |
117 | return $working[0]; | |
118 | } | |
119 | ||
120 | if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links()) | |
121 | { | |
122 | if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local)) | |
123 | { | |
124 | return $working; | |
125 | } | |
126 | ||
127 | if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local)) | |
128 | { | |
129 | return $working; | |
130 | } | |
131 | ||
132 | if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere)) | |
133 | { | |
134 | return $working; | |
135 | } | |
136 | ||
137 | if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere)) | |
138 | { | |
139 | return $working; | |
140 | } | |
141 | } | |
142 | return null; | |
143 | } | |
144 | ||
42c80841 | 145 | public function is_feed($file) |
ec397236 NL |
146 | { |
147 | if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE) | |
148 | { | |
42c80841 | 149 | $sniffer = $this->registry->create('Content_Type_Sniffer', array($file)); |
ec397236 NL |
150 | $sniffed = $sniffer->get_type(); |
151 | if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml'))) | |
152 | { | |
153 | return true; | |
154 | } | |
155 | else | |
156 | { | |
157 | return false; | |
158 | } | |
159 | } | |
160 | elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL) | |
161 | { | |
162 | return true; | |
163 | } | |
164 | else | |
165 | { | |
166 | return false; | |
167 | } | |
168 | } | |
169 | ||
170 | public function get_base() | |
171 | { | |
42c80841 NL |
172 | if ($this->dom === null) |
173 | { | |
174 | throw new SimplePie_Exception('DOMDocument not found, unable to use locator'); | |
175 | } | |
ec397236 NL |
176 | $this->http_base = $this->file->url; |
177 | $this->base = $this->http_base; | |
42c80841 | 178 | $elements = $this->dom->getElementsByTagName('base'); |
ec397236 NL |
179 | foreach ($elements as $element) |
180 | { | |
42c80841 | 181 | if ($element->hasAttribute('href')) |
ec397236 | 182 | { |
42c80841 NL |
183 | $base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base)); |
184 | if ($base === false) | |
185 | { | |
186 | continue; | |
187 | } | |
188 | $this->base = $base; | |
189 | $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0; | |
ec397236 NL |
190 | break; |
191 | } | |
192 | } | |
193 | } | |
194 | ||
195 | public function autodiscovery() | |
196 | { | |
ec397236 NL |
197 | $done = array(); |
198 | $feeds = array(); | |
42c80841 NL |
199 | $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds)); |
200 | $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds)); | |
201 | $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds)); | |
202 | ||
203 | if (!empty($feeds)) | |
204 | { | |
205 | return array_values($feeds); | |
206 | } | |
207 | else | |
208 | { | |
209 | return null; | |
210 | } | |
211 | } | |
212 | ||
213 | protected function search_elements_by_tag($name, &$done, $feeds) | |
214 | { | |
215 | if ($this->dom === null) | |
216 | { | |
217 | throw new SimplePie_Exception('DOMDocument not found, unable to use locator'); | |
218 | } | |
219 | ||
220 | $links = $this->dom->getElementsByTagName($name); | |
ec397236 NL |
221 | foreach ($links as $link) |
222 | { | |
223 | if ($this->checked_feeds === $this->max_checked_feeds) | |
224 | { | |
225 | break; | |
226 | } | |
42c80841 | 227 | if ($link->hasAttribute('href') && $link->hasAttribute('rel')) |
ec397236 | 228 | { |
42c80841 NL |
229 | $rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel'))))); |
230 | $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1; | |
ec397236 | 231 | |
42c80841 | 232 | if ($this->base_location < $line) |
ec397236 | 233 | { |
42c80841 | 234 | $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base)); |
ec397236 NL |
235 | } |
236 | else | |
237 | { | |
42c80841 NL |
238 | $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base)); |
239 | } | |
240 | if ($href === false) | |
241 | { | |
242 | continue; | |
ec397236 NL |
243 | } |
244 | ||
42c80841 | 245 | if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href])) |
ec397236 NL |
246 | { |
247 | $this->checked_feeds++; | |
248 | $headers = array( | |
249 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | |
250 | ); | |
42c80841 | 251 | $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent)); |
ec397236 NL |
252 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) |
253 | { | |
254 | $feeds[$href] = $feed; | |
255 | } | |
256 | } | |
257 | $done[] = $href; | |
258 | } | |
259 | } | |
260 | ||
42c80841 | 261 | return $feeds; |
ec397236 NL |
262 | } |
263 | ||
264 | public function get_links() | |
265 | { | |
42c80841 NL |
266 | if ($this->dom === null) |
267 | { | |
268 | throw new SimplePie_Exception('DOMDocument not found, unable to use locator'); | |
269 | } | |
270 | ||
271 | $links = $this->dom->getElementsByTagName('a'); | |
ec397236 NL |
272 | foreach ($links as $link) |
273 | { | |
42c80841 | 274 | if ($link->hasAttribute('href')) |
ec397236 | 275 | { |
42c80841 NL |
276 | $href = trim($link->getAttribute('href')); |
277 | $parsed = $this->registry->call('Misc', 'parse_url', array($href)); | |
ec397236 NL |
278 | if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) |
279 | { | |
42c80841 | 280 | if ($this->base_location < $link->getLineNo()) |
ec397236 | 281 | { |
42c80841 | 282 | $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base)); |
ec397236 NL |
283 | } |
284 | else | |
285 | { | |
42c80841 NL |
286 | $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base)); |
287 | } | |
288 | if ($href === false) | |
289 | { | |
290 | continue; | |
ec397236 NL |
291 | } |
292 | ||
42c80841 | 293 | $current = $this->registry->call('Misc', 'parse_url', array($this->file->url)); |
ec397236 NL |
294 | |
295 | if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) | |
296 | { | |
297 | $this->local[] = $href; | |
298 | } | |
299 | else | |
300 | { | |
301 | $this->elsewhere[] = $href; | |
302 | } | |
303 | } | |
304 | } | |
305 | } | |
306 | $this->local = array_unique($this->local); | |
307 | $this->elsewhere = array_unique($this->elsewhere); | |
308 | if (!empty($this->local) || !empty($this->elsewhere)) | |
309 | { | |
310 | return true; | |
311 | } | |
312 | return null; | |
313 | } | |
314 | ||
315 | public function extension(&$array) | |
316 | { | |
317 | foreach ($array as $key => $value) | |
318 | { | |
319 | if ($this->checked_feeds === $this->max_checked_feeds) | |
320 | { | |
321 | break; | |
322 | } | |
323 | if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml'))) | |
324 | { | |
325 | $this->checked_feeds++; | |
326 | ||
327 | $headers = array( | |
328 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | |
329 | ); | |
42c80841 | 330 | $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent)); |
ec397236 NL |
331 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) |
332 | { | |
333 | return $feed; | |
334 | } | |
335 | else | |
336 | { | |
337 | unset($array[$key]); | |
338 | } | |
339 | } | |
340 | } | |
341 | return null; | |
342 | } | |
343 | ||
344 | public function body(&$array) | |
345 | { | |
346 | foreach ($array as $key => $value) | |
347 | { | |
348 | if ($this->checked_feeds === $this->max_checked_feeds) | |
349 | { | |
350 | break; | |
351 | } | |
352 | if (preg_match('/(rss|rdf|atom|xml)/i', $value)) | |
353 | { | |
354 | $this->checked_feeds++; | |
355 | $headers = array( | |
356 | 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', | |
357 | ); | |
42c80841 | 358 | $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent)); |
ec397236 NL |
359 | if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) |
360 | { | |
361 | return $feed; | |
362 | } | |
363 | else | |
364 | { | |
365 | unset($array[$key]); | |
366 | } | |
367 | } | |
368 | } | |
369 | return null; | |
370 | } | |
371 | } | |
372 |