]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/simplepie/SimplePie/Content/Type/Sniffer.php
poche now uses Full Text RSS to fetch content
[github/wallabag/wallabag.git] / inc / 3rdparty / simplepie / SimplePie / Content / Type / Sniffer.php
1 <?php
2 /**
3 * SimplePie
4 *
5 * A PHP-Based RSS and Atom Feed Framework.
6 * Takes the hard work out of managing a complete RSS/Atom solution.
7 *
8 * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
13 *
14 * * Redistributions of source code must retain the above copyright notice, this list of
15 * conditions and the following disclaimer.
16 *
17 * * Redistributions in binary form must reproduce the above copyright notice, this list
18 * of conditions and the following disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used
22 * to endorse or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * @package SimplePie
36 * @version 1.3-dev
37 * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
38 * @author Ryan Parman
39 * @author Geoffrey Sneddon
40 * @author Ryan McCue
41 * @link http://simplepie.org/ SimplePie
42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43 * @todo phpDoc comments
44 */
45
46
47 /**
48 * Content-type sniffing
49 *
50 * Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06
51 * @package SimplePie
52 */
53 class SimplePie_Content_Type_Sniffer
54 {
55 /**
56 * File object
57 *
58 * @var SimplePie_File
59 */
60 var $file;
61
62 /**
63 * Create an instance of the class with the input file
64 *
65 * @param SimplePie_Content_Type_Sniffer $file Input file
66 */
67 public function __construct($file)
68 {
69 $this->file = $file;
70 }
71
72 /**
73 * Get the Content-Type of the specified file
74 *
75 * @return string Actual Content-Type
76 */
77 public function get_type()
78 {
79 if (isset($this->file->headers['content-type']))
80 {
81 if (!isset($this->file->headers['content-encoding'])
82 && ($this->file->headers['content-type'] === 'text/plain'
83 || $this->file->headers['content-type'] === 'text/plain; charset=ISO-8859-1'
84 || $this->file->headers['content-type'] === 'text/plain; charset=iso-8859-1'
85 || $this->file->headers['content-type'] === 'text/plain; charset=UTF-8'))
86 {
87 return $this->text_or_binary();
88 }
89
90 if (($pos = strpos($this->file->headers['content-type'], ';')) !== false)
91 {
92 $official = substr($this->file->headers['content-type'], 0, $pos);
93 }
94 else
95 {
96 $official = $this->file->headers['content-type'];
97 }
98 $official = trim(strtolower($official));
99
100 if ($official === 'unknown/unknown'
101 || $official === 'application/unknown')
102 {
103 return $this->unknown();
104 }
105 elseif (substr($official, -4) === '+xml'
106 || $official === 'text/xml'
107 || $official === 'application/xml')
108 {
109 return $official;
110 }
111 elseif (substr($official, 0, 6) === 'image/')
112 {
113 if ($return = $this->image())
114 {
115 return $return;
116 }
117 else
118 {
119 return $official;
120 }
121 }
122 elseif ($official === 'text/html')
123 {
124 return $this->feed_or_html();
125 }
126 else
127 {
128 return $official;
129 }
130 }
131 else
132 {
133 return $this->unknown();
134 }
135 }
136
137 /**
138 * Sniff text or binary
139 *
140 * @return string Actual Content-Type
141 */
142 public function text_or_binary()
143 {
144 if (substr($this->file->body, 0, 2) === "\xFE\xFF"
145 || substr($this->file->body, 0, 2) === "\xFF\xFE"
146 || substr($this->file->body, 0, 4) === "\x00\x00\xFE\xFF"
147 || substr($this->file->body, 0, 3) === "\xEF\xBB\xBF")
148 {
149 return 'text/plain';
150 }
151 elseif (preg_match('/[\x00-\x08\x0E-\x1A\x1C-\x1F]/', $this->file->body))
152 {
153 return 'application/octect-stream';
154 }
155 else
156 {
157 return 'text/plain';
158 }
159 }
160
161 /**
162 * Sniff unknown
163 *
164 * @return string Actual Content-Type
165 */
166 public function unknown()
167 {
168 $ws = strspn($this->file->body, "\x09\x0A\x0B\x0C\x0D\x20");
169 if (strtolower(substr($this->file->body, $ws, 14)) === '<!doctype html'
170 || strtolower(substr($this->file->body, $ws, 5)) === '<html'
171 || strtolower(substr($this->file->body, $ws, 7)) === '<script')
172 {
173 return 'text/html';
174 }
175 elseif (substr($this->file->body, 0, 5) === '%PDF-')
176 {
177 return 'application/pdf';
178 }
179 elseif (substr($this->file->body, 0, 11) === '%!PS-Adobe-')
180 {
181 return 'application/postscript';
182 }
183 elseif (substr($this->file->body, 0, 6) === 'GIF87a'
184 || substr($this->file->body, 0, 6) === 'GIF89a')
185 {
186 return 'image/gif';
187 }
188 elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
189 {
190 return 'image/png';
191 }
192 elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
193 {
194 return 'image/jpeg';
195 }
196 elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
197 {
198 return 'image/bmp';
199 }
200 elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
201 {
202 return 'image/vnd.microsoft.icon';
203 }
204 else
205 {
206 return $this->text_or_binary();
207 }
208 }
209
210 /**
211 * Sniff images
212 *
213 * @return string Actual Content-Type
214 */
215 public function image()
216 {
217 if (substr($this->file->body, 0, 6) === 'GIF87a'
218 || substr($this->file->body, 0, 6) === 'GIF89a')
219 {
220 return 'image/gif';
221 }
222 elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
223 {
224 return 'image/png';
225 }
226 elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
227 {
228 return 'image/jpeg';
229 }
230 elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
231 {
232 return 'image/bmp';
233 }
234 elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
235 {
236 return 'image/vnd.microsoft.icon';
237 }
238 else
239 {
240 return false;
241 }
242 }
243
244 /**
245 * Sniff HTML
246 *
247 * @return string Actual Content-Type
248 */
249 public function feed_or_html()
250 {
251 $len = strlen($this->file->body);
252 $pos = strspn($this->file->body, "\x09\x0A\x0D\x20");
253
254 while ($pos < $len)
255 {
256 switch ($this->file->body[$pos])
257 {
258 case "\x09":
259 case "\x0A":
260 case "\x0D":
261 case "\x20":
262 $pos += strspn($this->file->body, "\x09\x0A\x0D\x20", $pos);
263 continue 2;
264
265 case '<':
266 $pos++;
267 break;
268
269 default:
270 return 'text/html';
271 }
272
273 if (substr($this->file->body, $pos, 3) === '!--')
274 {
275 $pos += 3;
276 if ($pos < $len && ($pos = strpos($this->file->body, '-->', $pos)) !== false)
277 {
278 $pos += 3;
279 }
280 else
281 {
282 return 'text/html';
283 }
284 }
285 elseif (substr($this->file->body, $pos, 1) === '!')
286 {
287 if ($pos < $len && ($pos = strpos($this->file->body, '>', $pos)) !== false)
288 {
289 $pos++;
290 }
291 else
292 {
293 return 'text/html';
294 }
295 }
296 elseif (substr($this->file->body, $pos, 1) === '?')
297 {
298 if ($pos < $len && ($pos = strpos($this->file->body, '?>', $pos)) !== false)
299 {
300 $pos += 2;
301 }
302 else
303 {
304 return 'text/html';
305 }
306 }
307 elseif (substr($this->file->body, $pos, 3) === 'rss'
308 || substr($this->file->body, $pos, 7) === 'rdf:RDF')
309 {
310 return 'application/rss+xml';
311 }
312 elseif (substr($this->file->body, $pos, 4) === 'feed')
313 {
314 return 'application/atom+xml';
315 }
316 else
317 {
318 return 'text/html';
319 }
320 }
321
322 return 'text/html';
323 }
324 }
325