]>
git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/simplepie/SimplePie/HTTP/Parser.php
5 * A PHP-Based RSS and Atom Feed Framework.
6 * Takes the hard work out of managing a complete RSS/Atom solution.
8 * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
14 * * Redistributions of source code must retain the above copyright notice, this list of
15 * conditions and the following disclaimer.
17 * * Redistributions in binary form must reproduce the above copyright notice, this list
18 * of conditions and the following disclaimer in the documentation and/or other materials
19 * provided with the distribution.
21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used
22 * to endorse or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
37 * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
39 * @author Geoffrey Sneddon
41 * @link http://simplepie.org/ SimplePie
42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43 * @todo phpDoc comments
48 * HTTP Response Parser
52 class SimplePie_HTTP_Parser
59 public $http_version = 0.0;
66 public $status_code = 0;
76 * Key/value pairs of the headers
80 public $headers = array();
83 * Body of the response
90 * Current state of the state machine
94 protected $state = 'http_version';
101 protected $data = '';
104 * Input data length (to avoid calling strlen() everytime this is needed)
108 protected $data_length = 0;
111 * Current position of the pointer
115 protected $position = 0;
118 * Name of the hedaer currently being parsed
122 protected $name = '';
125 * Value of the hedaer currently being parsed
129 protected $value = '';
132 * Create an instance of the class with the input data
134 * @param string $data Input data
136 public function __construct($data)
139 $this->data_length
= strlen($this->data
);
143 * Parse the input data
145 * @return bool true on success, false on failure
147 public function parse()
149 while ($this->state
&& $this->state
!== 'emit' && $this->has_data())
151 $state = $this->state
;
155 if ($this->state
=== 'emit' || $this->state
=== 'body')
161 $this->http_version
= '';
162 $this->status_code
= '';
164 $this->headers
= array();
171 * Check whether there is data beyond the pointer
173 * @return bool true if there is further data, false if not
175 protected function has_data()
177 return (bool) ($this->position
< $this->data_length
);
181 * See if the next character is LWS
183 * @return bool true if the next character is LWS, false if not
185 protected function is_linear_whitespace()
187 return (bool) ($this->data
[$this->position
] === "\x09"
188 || $this->data
[$this->position
] === "\x20"
189 || ($this->data
[$this->position
] === "\x0A"
190 && isset($this->data
[$this->position +
1])
191 && ($this->data
[$this->position +
1] === "\x09" || $this->data
[$this->position +
1] === "\x20")));
195 * Parse the HTTP version
197 protected function http_version()
199 if (strpos($this->data
, "\x0A") !== false && strtoupper(substr($this->data
, 0, 5)) === 'HTTP/')
201 $len = strspn($this->data
, '0123456789.', 5);
202 $this->http_version
= substr($this->data
, 5, $len);
203 $this->position +
= 5 +
$len;
204 if (substr_count($this->http_version
, '.') <= 1)
206 $this->http_version
= (float) $this->http_version
;
207 $this->position +
= strspn($this->data
, "\x09\x20", $this->position
);
208 $this->state
= 'status';
212 $this->state
= false;
217 $this->state
= false;
222 * Parse the status code
224 protected function status()
226 if ($len = strspn($this->data
, '0123456789', $this->position
))
228 $this->status_code
= (int) substr($this->data
, $this->position
, $len);
229 $this->position +
= $len;
230 $this->state
= 'reason';
234 $this->state
= false;
239 * Parse the reason phrase
241 protected function reason()
243 $len = strcspn($this->data
, "\x0A", $this->position
);
244 $this->reason
= trim(substr($this->data
, $this->position
, $len), "\x09\x0D\x20");
245 $this->position +
= $len +
1;
246 $this->state
= 'new_line';
250 * Deal with a new line, shifting data around as needed
252 protected function new_line()
254 $this->value
= trim($this->value
, "\x0D\x20");
255 if ($this->name
!== '' && $this->value
!== '')
257 $this->name
= strtolower($this->name
);
258 // We should only use the last Content-Type header. c.f. issue #1
259 if (isset($this->headers
[$this->name
]) && $this->name
!== 'content-type')
261 $this->headers
[$this->name
] .= ', ' . $this->value
;
265 $this->headers
[$this->name
] = $this->value
;
270 if (substr($this->data
[$this->position
], 0, 2) === "\x0D\x0A")
272 $this->position +
= 2;
273 $this->state
= 'body';
275 elseif ($this->data
[$this->position
] === "\x0A")
278 $this->state
= 'body';
282 $this->state
= 'name';
287 * Parse a header name
289 protected function name()
291 $len = strcspn($this->data
, "\x0A:", $this->position
);
292 if (isset($this->data
[$this->position +
$len]))
294 if ($this->data
[$this->position +
$len] === "\x0A")
296 $this->position +
= $len;
297 $this->state
= 'new_line';
301 $this->name
= substr($this->data
, $this->position
, $len);
302 $this->position +
= $len +
1;
303 $this->state
= 'value';
308 $this->state
= false;
313 * Parse LWS, replacing consecutive LWS characters with a single space
315 protected function linear_whitespace()
319 if (substr($this->data
, $this->position
, 2) === "\x0D\x0A")
321 $this->position +
= 2;
323 elseif ($this->data
[$this->position
] === "\x0A")
327 $this->position +
= strspn($this->data
, "\x09\x20", $this->position
);
328 } while ($this->has_data() && $this->is_linear_whitespace());
329 $this->value
.= "\x20";
333 * See what state to move to while within non-quoted header values
335 protected function value()
337 if ($this->is_linear_whitespace())
339 $this->linear_whitespace();
343 switch ($this->data
[$this->position
])
346 // Workaround for ETags: we have to include the quotes as
348 if (strtolower($this->name
) === 'etag')
352 $this->state
= 'value_char';
356 $this->state
= 'quote';
361 $this->state
= 'new_line';
365 $this->state
= 'value_char';
372 * Parse a header value while outside quotes
374 protected function value_char()
376 $len = strcspn($this->data
, "\x09\x20\x0A\"", $this->position
);
377 $this->value
.= substr($this->data
, $this->position
, $len);
378 $this->position +
= $len;
379 $this->state
= 'value';
383 * See what state to move to while within quoted header values
385 protected function quote()
387 if ($this->is_linear_whitespace())
389 $this->linear_whitespace();
393 switch ($this->data
[$this->position
])
397 $this->state
= 'value';
402 $this->state
= 'new_line';
407 $this->state
= 'quote_escaped';
411 $this->state
= 'quote_char';
418 * Parse a header value while within quotes
420 protected function quote_char()
422 $len = strcspn($this->data
, "\x09\x20\x0A\"\\", $this->position
);
423 $this->value
.= substr($this->data
, $this->position
, $len);
424 $this->position +
= $len;
425 $this->state
= 'value';
429 * Parse an escaped character within quotes
431 protected function quote_escaped()
433 $this->value
.= $this->data
[$this->position
];
435 $this->state
= 'quote';
441 protected function body()
443 $this->body
= substr($this->data
, $this->position
);
444 if (!empty($this->headers
['transfer-encoding']))
446 unset($this->headers
['transfer-encoding']);
447 $this->state
= 'chunked';
451 $this->state
= 'emit';
456 * Parsed a "Transfer-Encoding: chunked" body
458 protected function chunked()
460 if (!preg_match('/^[0-9a-f]+(\s|\r|\n)+/mi', trim($this->body
)))
462 $this->state
= 'emit';
467 $encoded = $this->body
;
471 $is_chunked = (bool) preg_match( '/^([0-9a-f]+)(\s|\r|\n)+/mi', $encoded, $matches );
474 // Looks like it's not chunked after all
475 $this->state
= 'emit';
479 $length = hexdec($matches[1]);
480 $chunk_length = strlen($matches[0]);
481 $decoded .= $part = substr($encoded, $chunk_length, $length);
482 $encoded = ltrim(substr($encoded, $chunk_length +
$length), "\r\n");
484 if (trim($encoded) === '0')
486 $this->state
= 'emit';
487 $this->body
= $decoded;