]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/simplepie/SimplePie/HTTP/Parser.php
Merge pull request #181 from inthepoche/dev
[github/wallabag/wallabag.git] / inc / 3rdparty / simplepie / SimplePie / HTTP / Parser.php
1 <?php
2 /**
3 * SimplePie
4 *
5 * A PHP-Based RSS and Atom Feed Framework.
6 * Takes the hard work out of managing a complete RSS/Atom solution.
7 *
8 * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
13 *
14 * * Redistributions of source code must retain the above copyright notice, this list of
15 * conditions and the following disclaimer.
16 *
17 * * Redistributions in binary form must reproduce the above copyright notice, this list
18 * of conditions and the following disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used
22 * to endorse or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * @package SimplePie
36 * @version 1.3-dev
37 * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
38 * @author Ryan Parman
39 * @author Geoffrey Sneddon
40 * @author Ryan McCue
41 * @link http://simplepie.org/ SimplePie
42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43 * @todo phpDoc comments
44 */
45
46
47 /**
48 * HTTP Response Parser
49 *
50 * @package SimplePie
51 */
52 class SimplePie_HTTP_Parser
53 {
54 /**
55 * HTTP Version
56 *
57 * @var float
58 */
59 public $http_version = 0.0;
60
61 /**
62 * Status code
63 *
64 * @var int
65 */
66 public $status_code = 0;
67
68 /**
69 * Reason phrase
70 *
71 * @var string
72 */
73 public $reason = '';
74
75 /**
76 * Key/value pairs of the headers
77 *
78 * @var array
79 */
80 public $headers = array();
81
82 /**
83 * Body of the response
84 *
85 * @var string
86 */
87 public $body = '';
88
89 /**
90 * Current state of the state machine
91 *
92 * @var string
93 */
94 protected $state = 'http_version';
95
96 /**
97 * Input data
98 *
99 * @var string
100 */
101 protected $data = '';
102
103 /**
104 * Input data length (to avoid calling strlen() everytime this is needed)
105 *
106 * @var int
107 */
108 protected $data_length = 0;
109
110 /**
111 * Current position of the pointer
112 *
113 * @var int
114 */
115 protected $position = 0;
116
117 /**
118 * Name of the hedaer currently being parsed
119 *
120 * @var string
121 */
122 protected $name = '';
123
124 /**
125 * Value of the hedaer currently being parsed
126 *
127 * @var string
128 */
129 protected $value = '';
130
131 /**
132 * Create an instance of the class with the input data
133 *
134 * @param string $data Input data
135 */
136 public function __construct($data)
137 {
138 $this->data = $data;
139 $this->data_length = strlen($this->data);
140 }
141
142 /**
143 * Parse the input data
144 *
145 * @return bool true on success, false on failure
146 */
147 public function parse()
148 {
149 while ($this->state && $this->state !== 'emit' && $this->has_data())
150 {
151 $state = $this->state;
152 $this->$state();
153 }
154 $this->data = '';
155 if ($this->state === 'emit' || $this->state === 'body')
156 {
157 return true;
158 }
159 else
160 {
161 $this->http_version = '';
162 $this->status_code = '';
163 $this->reason = '';
164 $this->headers = array();
165 $this->body = '';
166 return false;
167 }
168 }
169
170 /**
171 * Check whether there is data beyond the pointer
172 *
173 * @return bool true if there is further data, false if not
174 */
175 protected function has_data()
176 {
177 return (bool) ($this->position < $this->data_length);
178 }
179
180 /**
181 * See if the next character is LWS
182 *
183 * @return bool true if the next character is LWS, false if not
184 */
185 protected function is_linear_whitespace()
186 {
187 return (bool) ($this->data[$this->position] === "\x09"
188 || $this->data[$this->position] === "\x20"
189 || ($this->data[$this->position] === "\x0A"
190 && isset($this->data[$this->position + 1])
191 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
192 }
193
194 /**
195 * Parse the HTTP version
196 */
197 protected function http_version()
198 {
199 if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
200 {
201 $len = strspn($this->data, '0123456789.', 5);
202 $this->http_version = substr($this->data, 5, $len);
203 $this->position += 5 + $len;
204 if (substr_count($this->http_version, '.') <= 1)
205 {
206 $this->http_version = (float) $this->http_version;
207 $this->position += strspn($this->data, "\x09\x20", $this->position);
208 $this->state = 'status';
209 }
210 else
211 {
212 $this->state = false;
213 }
214 }
215 else
216 {
217 $this->state = false;
218 }
219 }
220
221 /**
222 * Parse the status code
223 */
224 protected function status()
225 {
226 if ($len = strspn($this->data, '0123456789', $this->position))
227 {
228 $this->status_code = (int) substr($this->data, $this->position, $len);
229 $this->position += $len;
230 $this->state = 'reason';
231 }
232 else
233 {
234 $this->state = false;
235 }
236 }
237
238 /**
239 * Parse the reason phrase
240 */
241 protected function reason()
242 {
243 $len = strcspn($this->data, "\x0A", $this->position);
244 $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
245 $this->position += $len + 1;
246 $this->state = 'new_line';
247 }
248
249 /**
250 * Deal with a new line, shifting data around as needed
251 */
252 protected function new_line()
253 {
254 $this->value = trim($this->value, "\x0D\x20");
255 if ($this->name !== '' && $this->value !== '')
256 {
257 $this->name = strtolower($this->name);
258 // We should only use the last Content-Type header. c.f. issue #1
259 if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
260 {
261 $this->headers[$this->name] .= ', ' . $this->value;
262 }
263 else
264 {
265 $this->headers[$this->name] = $this->value;
266 }
267 }
268 $this->name = '';
269 $this->value = '';
270 if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
271 {
272 $this->position += 2;
273 $this->state = 'body';
274 }
275 elseif ($this->data[$this->position] === "\x0A")
276 {
277 $this->position++;
278 $this->state = 'body';
279 }
280 else
281 {
282 $this->state = 'name';
283 }
284 }
285
286 /**
287 * Parse a header name
288 */
289 protected function name()
290 {
291 $len = strcspn($this->data, "\x0A:", $this->position);
292 if (isset($this->data[$this->position + $len]))
293 {
294 if ($this->data[$this->position + $len] === "\x0A")
295 {
296 $this->position += $len;
297 $this->state = 'new_line';
298 }
299 else
300 {
301 $this->name = substr($this->data, $this->position, $len);
302 $this->position += $len + 1;
303 $this->state = 'value';
304 }
305 }
306 else
307 {
308 $this->state = false;
309 }
310 }
311
312 /**
313 * Parse LWS, replacing consecutive LWS characters with a single space
314 */
315 protected function linear_whitespace()
316 {
317 do
318 {
319 if (substr($this->data, $this->position, 2) === "\x0D\x0A")
320 {
321 $this->position += 2;
322 }
323 elseif ($this->data[$this->position] === "\x0A")
324 {
325 $this->position++;
326 }
327 $this->position += strspn($this->data, "\x09\x20", $this->position);
328 } while ($this->has_data() && $this->is_linear_whitespace());
329 $this->value .= "\x20";
330 }
331
332 /**
333 * See what state to move to while within non-quoted header values
334 */
335 protected function value()
336 {
337 if ($this->is_linear_whitespace())
338 {
339 $this->linear_whitespace();
340 }
341 else
342 {
343 switch ($this->data[$this->position])
344 {
345 case '"':
346 // Workaround for ETags: we have to include the quotes as
347 // part of the tag.
348 if (strtolower($this->name) === 'etag')
349 {
350 $this->value .= '"';
351 $this->position++;
352 $this->state = 'value_char';
353 break;
354 }
355 $this->position++;
356 $this->state = 'quote';
357 break;
358
359 case "\x0A":
360 $this->position++;
361 $this->state = 'new_line';
362 break;
363
364 default:
365 $this->state = 'value_char';
366 break;
367 }
368 }
369 }
370
371 /**
372 * Parse a header value while outside quotes
373 */
374 protected function value_char()
375 {
376 $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
377 $this->value .= substr($this->data, $this->position, $len);
378 $this->position += $len;
379 $this->state = 'value';
380 }
381
382 /**
383 * See what state to move to while within quoted header values
384 */
385 protected function quote()
386 {
387 if ($this->is_linear_whitespace())
388 {
389 $this->linear_whitespace();
390 }
391 else
392 {
393 switch ($this->data[$this->position])
394 {
395 case '"':
396 $this->position++;
397 $this->state = 'value';
398 break;
399
400 case "\x0A":
401 $this->position++;
402 $this->state = 'new_line';
403 break;
404
405 case '\\':
406 $this->position++;
407 $this->state = 'quote_escaped';
408 break;
409
410 default:
411 $this->state = 'quote_char';
412 break;
413 }
414 }
415 }
416
417 /**
418 * Parse a header value while within quotes
419 */
420 protected function quote_char()
421 {
422 $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
423 $this->value .= substr($this->data, $this->position, $len);
424 $this->position += $len;
425 $this->state = 'value';
426 }
427
428 /**
429 * Parse an escaped character within quotes
430 */
431 protected function quote_escaped()
432 {
433 $this->value .= $this->data[$this->position];
434 $this->position++;
435 $this->state = 'quote';
436 }
437
438 /**
439 * Parse the body
440 */
441 protected function body()
442 {
443 $this->body = substr($this->data, $this->position);
444 if (!empty($this->headers['transfer-encoding']))
445 {
446 unset($this->headers['transfer-encoding']);
447 $this->state = 'chunked';
448 }
449 else
450 {
451 $this->state = 'emit';
452 }
453 }
454
455 /**
456 * Parsed a "Transfer-Encoding: chunked" body
457 */
458 protected function chunked()
459 {
460 if (!preg_match('/^[0-9a-f]+(\s|\r|\n)+/mi', trim($this->body)))
461 {
462 $this->state = 'emit';
463 return;
464 }
465
466 $decoded = '';
467 $encoded = $this->body;
468
469 while (true)
470 {
471 $is_chunked = (bool) preg_match( '/^([0-9a-f]+)(\s|\r|\n)+/mi', $encoded, $matches );
472 if (!$is_chunked)
473 {
474 // Looks like it's not chunked after all
475 $this->state = 'emit';
476 return;
477 }
478
479 $length = hexdec($matches[1]);
480 $chunk_length = strlen($matches[0]);
481 $decoded .= $part = substr($encoded, $chunk_length, $length);
482 $encoded = ltrim(substr($encoded, $chunk_length + $length), "\r\n");
483
484 if (trim($encoded) === '0')
485 {
486 $this->state = 'emit';
487 $this->body = $decoded;
488 return;
489 }
490 }
491 }
492 }