]>
Commit | Line | Data |
---|---|---|
1 | <?php | |
2 | /** | |
3 | * SimplePie | |
4 | * | |
5 | * A PHP-Based RSS and Atom Feed Framework. | |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | |
7 | * | |
8 | * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | |
9 | * All rights reserved. | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without modification, are | |
12 | * permitted provided that the following conditions are met: | |
13 | * | |
14 | * * Redistributions of source code must retain the above copyright notice, this list of | |
15 | * conditions and the following disclaimer. | |
16 | * | |
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | |
18 | * of conditions and the following disclaimer in the documentation and/or other materials | |
19 | * provided with the distribution. | |
20 | * | |
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | |
22 | * to endorse or promote products derived from this software without specific prior | |
23 | * written permission. | |
24 | * | |
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | |
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | |
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
33 | * POSSIBILITY OF SUCH DAMAGE. | |
34 | * | |
35 | * @package SimplePie | |
36 | * @version 1.3-dev | |
37 | * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue | |
38 | * @author Ryan Parman | |
39 | * @author Geoffrey Sneddon | |
40 | * @author Ryan McCue | |
41 | * @link http://simplepie.org/ SimplePie | |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | |
43 | * @todo phpDoc comments | |
44 | */ | |
45 | ||
46 | ||
47 | /** | |
48 | * HTTP Response Parser | |
49 | * | |
50 | * @package SimplePie | |
51 | */ | |
52 | class SimplePie_HTTP_Parser | |
53 | { | |
54 | /** | |
55 | * HTTP Version | |
56 | * | |
57 | * @var float | |
58 | */ | |
59 | public $http_version = 0.0; | |
60 | ||
61 | /** | |
62 | * Status code | |
63 | * | |
64 | * @var int | |
65 | */ | |
66 | public $status_code = 0; | |
67 | ||
68 | /** | |
69 | * Reason phrase | |
70 | * | |
71 | * @var string | |
72 | */ | |
73 | public $reason = ''; | |
74 | ||
75 | /** | |
76 | * Key/value pairs of the headers | |
77 | * | |
78 | * @var array | |
79 | */ | |
80 | public $headers = array(); | |
81 | ||
82 | /** | |
83 | * Body of the response | |
84 | * | |
85 | * @var string | |
86 | */ | |
87 | public $body = ''; | |
88 | ||
89 | /** | |
90 | * Current state of the state machine | |
91 | * | |
92 | * @var string | |
93 | */ | |
94 | protected $state = 'http_version'; | |
95 | ||
96 | /** | |
97 | * Input data | |
98 | * | |
99 | * @var string | |
100 | */ | |
101 | protected $data = ''; | |
102 | ||
103 | /** | |
104 | * Input data length (to avoid calling strlen() everytime this is needed) | |
105 | * | |
106 | * @var int | |
107 | */ | |
108 | protected $data_length = 0; | |
109 | ||
110 | /** | |
111 | * Current position of the pointer | |
112 | * | |
113 | * @var int | |
114 | */ | |
115 | protected $position = 0; | |
116 | ||
117 | /** | |
118 | * Name of the hedaer currently being parsed | |
119 | * | |
120 | * @var string | |
121 | */ | |
122 | protected $name = ''; | |
123 | ||
124 | /** | |
125 | * Value of the hedaer currently being parsed | |
126 | * | |
127 | * @var string | |
128 | */ | |
129 | protected $value = ''; | |
130 | ||
131 | /** | |
132 | * Create an instance of the class with the input data | |
133 | * | |
134 | * @param string $data Input data | |
135 | */ | |
136 | public function __construct($data) | |
137 | { | |
138 | $this->data = $data; | |
139 | $this->data_length = strlen($this->data); | |
140 | } | |
141 | ||
142 | /** | |
143 | * Parse the input data | |
144 | * | |
145 | * @return bool true on success, false on failure | |
146 | */ | |
147 | public function parse() | |
148 | { | |
149 | while ($this->state && $this->state !== 'emit' && $this->has_data()) | |
150 | { | |
151 | $state = $this->state; | |
152 | $this->$state(); | |
153 | } | |
154 | $this->data = ''; | |
155 | if ($this->state === 'emit' || $this->state === 'body') | |
156 | { | |
157 | return true; | |
158 | } | |
159 | else | |
160 | { | |
161 | $this->http_version = ''; | |
162 | $this->status_code = ''; | |
163 | $this->reason = ''; | |
164 | $this->headers = array(); | |
165 | $this->body = ''; | |
166 | return false; | |
167 | } | |
168 | } | |
169 | ||
170 | /** | |
171 | * Check whether there is data beyond the pointer | |
172 | * | |
173 | * @return bool true if there is further data, false if not | |
174 | */ | |
175 | protected function has_data() | |
176 | { | |
177 | return (bool) ($this->position < $this->data_length); | |
178 | } | |
179 | ||
180 | /** | |
181 | * See if the next character is LWS | |
182 | * | |
183 | * @return bool true if the next character is LWS, false if not | |
184 | */ | |
185 | protected function is_linear_whitespace() | |
186 | { | |
187 | return (bool) ($this->data[$this->position] === "\x09" | |
188 | || $this->data[$this->position] === "\x20" | |
189 | || ($this->data[$this->position] === "\x0A" | |
190 | && isset($this->data[$this->position + 1]) | |
191 | && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20"))); | |
192 | } | |
193 | ||
194 | /** | |
195 | * Parse the HTTP version | |
196 | */ | |
197 | protected function http_version() | |
198 | { | |
199 | if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') | |
200 | { | |
201 | $len = strspn($this->data, '0123456789.', 5); | |
202 | $this->http_version = substr($this->data, 5, $len); | |
203 | $this->position += 5 + $len; | |
204 | if (substr_count($this->http_version, '.') <= 1) | |
205 | { | |
206 | $this->http_version = (float) $this->http_version; | |
207 | $this->position += strspn($this->data, "\x09\x20", $this->position); | |
208 | $this->state = 'status'; | |
209 | } | |
210 | else | |
211 | { | |
212 | $this->state = false; | |
213 | } | |
214 | } | |
215 | else | |
216 | { | |
217 | $this->state = false; | |
218 | } | |
219 | } | |
220 | ||
221 | /** | |
222 | * Parse the status code | |
223 | */ | |
224 | protected function status() | |
225 | { | |
226 | if ($len = strspn($this->data, '0123456789', $this->position)) | |
227 | { | |
228 | $this->status_code = (int) substr($this->data, $this->position, $len); | |
229 | $this->position += $len; | |
230 | $this->state = 'reason'; | |
231 | } | |
232 | else | |
233 | { | |
234 | $this->state = false; | |
235 | } | |
236 | } | |
237 | ||
238 | /** | |
239 | * Parse the reason phrase | |
240 | */ | |
241 | protected function reason() | |
242 | { | |
243 | $len = strcspn($this->data, "\x0A", $this->position); | |
244 | $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20"); | |
245 | $this->position += $len + 1; | |
246 | $this->state = 'new_line'; | |
247 | } | |
248 | ||
249 | /** | |
250 | * Deal with a new line, shifting data around as needed | |
251 | */ | |
252 | protected function new_line() | |
253 | { | |
254 | $this->value = trim($this->value, "\x0D\x20"); | |
255 | if ($this->name !== '' && $this->value !== '') | |
256 | { | |
257 | $this->name = strtolower($this->name); | |
258 | // We should only use the last Content-Type header. c.f. issue #1 | |
259 | if (isset($this->headers[$this->name]) && $this->name !== 'content-type') | |
260 | { | |
261 | $this->headers[$this->name] .= ', ' . $this->value; | |
262 | } | |
263 | else | |
264 | { | |
265 | $this->headers[$this->name] = $this->value; | |
266 | } | |
267 | } | |
268 | $this->name = ''; | |
269 | $this->value = ''; | |
270 | if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") | |
271 | { | |
272 | $this->position += 2; | |
273 | $this->state = 'body'; | |
274 | } | |
275 | elseif ($this->data[$this->position] === "\x0A") | |
276 | { | |
277 | $this->position++; | |
278 | $this->state = 'body'; | |
279 | } | |
280 | else | |
281 | { | |
282 | $this->state = 'name'; | |
283 | } | |
284 | } | |
285 | ||
286 | /** | |
287 | * Parse a header name | |
288 | */ | |
289 | protected function name() | |
290 | { | |
291 | $len = strcspn($this->data, "\x0A:", $this->position); | |
292 | if (isset($this->data[$this->position + $len])) | |
293 | { | |
294 | if ($this->data[$this->position + $len] === "\x0A") | |
295 | { | |
296 | $this->position += $len; | |
297 | $this->state = 'new_line'; | |
298 | } | |
299 | else | |
300 | { | |
301 | $this->name = substr($this->data, $this->position, $len); | |
302 | $this->position += $len + 1; | |
303 | $this->state = 'value'; | |
304 | } | |
305 | } | |
306 | else | |
307 | { | |
308 | $this->state = false; | |
309 | } | |
310 | } | |
311 | ||
312 | /** | |
313 | * Parse LWS, replacing consecutive LWS characters with a single space | |
314 | */ | |
315 | protected function linear_whitespace() | |
316 | { | |
317 | do | |
318 | { | |
319 | if (substr($this->data, $this->position, 2) === "\x0D\x0A") | |
320 | { | |
321 | $this->position += 2; | |
322 | } | |
323 | elseif ($this->data[$this->position] === "\x0A") | |
324 | { | |
325 | $this->position++; | |
326 | } | |
327 | $this->position += strspn($this->data, "\x09\x20", $this->position); | |
328 | } while ($this->has_data() && $this->is_linear_whitespace()); | |
329 | $this->value .= "\x20"; | |
330 | } | |
331 | ||
332 | /** | |
333 | * See what state to move to while within non-quoted header values | |
334 | */ | |
335 | protected function value() | |
336 | { | |
337 | if ($this->is_linear_whitespace()) | |
338 | { | |
339 | $this->linear_whitespace(); | |
340 | } | |
341 | else | |
342 | { | |
343 | switch ($this->data[$this->position]) | |
344 | { | |
345 | case '"': | |
346 | // Workaround for ETags: we have to include the quotes as | |
347 | // part of the tag. | |
348 | if (strtolower($this->name) === 'etag') | |
349 | { | |
350 | $this->value .= '"'; | |
351 | $this->position++; | |
352 | $this->state = 'value_char'; | |
353 | break; | |
354 | } | |
355 | $this->position++; | |
356 | $this->state = 'quote'; | |
357 | break; | |
358 | ||
359 | case "\x0A": | |
360 | $this->position++; | |
361 | $this->state = 'new_line'; | |
362 | break; | |
363 | ||
364 | default: | |
365 | $this->state = 'value_char'; | |
366 | break; | |
367 | } | |
368 | } | |
369 | } | |
370 | ||
371 | /** | |
372 | * Parse a header value while outside quotes | |
373 | */ | |
374 | protected function value_char() | |
375 | { | |
376 | $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position); | |
377 | $this->value .= substr($this->data, $this->position, $len); | |
378 | $this->position += $len; | |
379 | $this->state = 'value'; | |
380 | } | |
381 | ||
382 | /** | |
383 | * See what state to move to while within quoted header values | |
384 | */ | |
385 | protected function quote() | |
386 | { | |
387 | if ($this->is_linear_whitespace()) | |
388 | { | |
389 | $this->linear_whitespace(); | |
390 | } | |
391 | else | |
392 | { | |
393 | switch ($this->data[$this->position]) | |
394 | { | |
395 | case '"': | |
396 | $this->position++; | |
397 | $this->state = 'value'; | |
398 | break; | |
399 | ||
400 | case "\x0A": | |
401 | $this->position++; | |
402 | $this->state = 'new_line'; | |
403 | break; | |
404 | ||
405 | case '\\': | |
406 | $this->position++; | |
407 | $this->state = 'quote_escaped'; | |
408 | break; | |
409 | ||
410 | default: | |
411 | $this->state = 'quote_char'; | |
412 | break; | |
413 | } | |
414 | } | |
415 | } | |
416 | ||
417 | /** | |
418 | * Parse a header value while within quotes | |
419 | */ | |
420 | protected function quote_char() | |
421 | { | |
422 | $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position); | |
423 | $this->value .= substr($this->data, $this->position, $len); | |
424 | $this->position += $len; | |
425 | $this->state = 'value'; | |
426 | } | |
427 | ||
428 | /** | |
429 | * Parse an escaped character within quotes | |
430 | */ | |
431 | protected function quote_escaped() | |
432 | { | |
433 | $this->value .= $this->data[$this->position]; | |
434 | $this->position++; | |
435 | $this->state = 'quote'; | |
436 | } | |
437 | ||
438 | /** | |
439 | * Parse the body | |
440 | */ | |
441 | protected function body() | |
442 | { | |
443 | $this->body = substr($this->data, $this->position); | |
444 | if (!empty($this->headers['transfer-encoding'])) | |
445 | { | |
446 | unset($this->headers['transfer-encoding']); | |
447 | $this->state = 'chunked'; | |
448 | } | |
449 | else | |
450 | { | |
451 | $this->state = 'emit'; | |
452 | } | |
453 | } | |
454 | ||
455 | /** | |
456 | * Parsed a "Transfer-Encoding: chunked" body | |
457 | */ | |
458 | protected function chunked() | |
459 | { | |
460 | if (!preg_match('/^[0-9a-f]+(\s|\r|\n)+/mi', trim($this->body))) | |
461 | { | |
462 | $this->state = 'emit'; | |
463 | return; | |
464 | } | |
465 | ||
466 | $decoded = ''; | |
467 | $encoded = $this->body; | |
468 | ||
469 | while (true) | |
470 | { | |
471 | $is_chunked = (bool) preg_match( '/^([0-9a-f]+)(\s|\r|\n)+/mi', $encoded, $matches ); | |
472 | if (!$is_chunked) | |
473 | { | |
474 | // Looks like it's not chunked after all | |
475 | $this->state = 'emit'; | |
476 | return; | |
477 | } | |
478 | ||
479 | $length = hexdec($matches[1]); | |
480 | $chunk_length = strlen($matches[0]); | |
481 | $decoded .= $part = substr($encoded, $chunk_length, $length); | |
482 | $encoded = ltrim(substr($encoded, $chunk_length + $length), "\r\n"); | |
483 | ||
484 | if (trim($encoded) === '0') | |
485 | { | |
486 | $this->state = 'emit'; | |
487 | $this->body = $decoded; | |
488 | return; | |
489 | } | |
490 | } | |
491 | } | |
492 | } |