diff options
author | ArthurHoaro <arthur@hoa.ro> | 2017-09-30 11:04:13 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2017-10-28 14:35:49 +0200 |
commit | d65342e304f92643ba922200953cfebc51e1e482 (patch) | |
tree | 3097c77bb4dd0590c4644422b5dc4369a4186eb7 /tests | |
parent | a59bbf50d7530d7e82a91896a210b9da49cb1568 (diff) | |
download | Shaarli-d65342e304f92643ba922200953cfebc51e1e482.tar.gz Shaarli-d65342e304f92643ba922200953cfebc51e1e482.tar.zst Shaarli-d65342e304f92643ba922200953cfebc51e1e482.zip |
Extract the title/charset during page download, and check content type
Use CURLOPT_WRITEFUNCTION to check the response code and content type (only allow HTML).
Also extract the title and charset during downloading chunk of data, and stop it when everything has been extracted.
Closes #579
Diffstat (limited to 'tests')
-rw-r--r-- | tests/LinkUtilsTest.php | 244 |
1 files changed, 224 insertions, 20 deletions
diff --git a/tests/LinkUtilsTest.php b/tests/LinkUtilsTest.php index 7c0d4b0b..ef650f44 100644 --- a/tests/LinkUtilsTest.php +++ b/tests/LinkUtilsTest.php | |||
@@ -29,27 +29,13 @@ class LinkUtilsTest extends PHPUnit_Framework_TestCase | |||
29 | } | 29 | } |
30 | 30 | ||
31 | /** | 31 | /** |
32 | * Test get_charset() with all priorities. | ||
33 | */ | ||
34 | public function testGetCharset() | ||
35 | { | ||
36 | $headers = array('Content-Type' => 'text/html; charset=Headers'); | ||
37 | $html = '<html><meta>stuff</meta><meta charset="Html"/></html>'; | ||
38 | $default = 'default'; | ||
39 | $this->assertEquals('headers', get_charset($headers, $html, $default)); | ||
40 | $this->assertEquals('html', get_charset(array(), $html, $default)); | ||
41 | $this->assertEquals($default, get_charset(array(), '', $default)); | ||
42 | $this->assertEquals('utf-8', get_charset(array(), '')); | ||
43 | } | ||
44 | |||
45 | /** | ||
46 | * Test headers_extract_charset() when the charset is found. | 32 | * Test headers_extract_charset() when the charset is found. |
47 | */ | 33 | */ |
48 | public function testHeadersExtractExistentCharset() | 34 | public function testHeadersExtractExistentCharset() |
49 | { | 35 | { |
50 | $charset = 'x-MacCroatian'; | 36 | $charset = 'x-MacCroatian'; |
51 | $headers = array('Content-Type' => 'text/html; charset='. $charset); | 37 | $headers = 'text/html; charset='. $charset; |
52 | $this->assertEquals(strtolower($charset), headers_extract_charset($headers)); | 38 | $this->assertEquals(strtolower($charset), header_extract_charset($headers)); |
53 | } | 39 | } |
54 | 40 | ||
55 | /** | 41 | /** |
@@ -57,11 +43,11 @@ class LinkUtilsTest extends PHPUnit_Framework_TestCase | |||
57 | */ | 43 | */ |
58 | public function testHeadersExtractNonExistentCharset() | 44 | public function testHeadersExtractNonExistentCharset() |
59 | { | 45 | { |
60 | $headers = array(); | 46 | $headers = ''; |
61 | $this->assertFalse(headers_extract_charset($headers)); | 47 | $this->assertFalse(header_extract_charset($headers)); |
62 | 48 | ||
63 | $headers = array('Content-Type' => 'text/html'); | 49 | $headers = 'text/html'; |
64 | $this->assertFalse(headers_extract_charset($headers)); | 50 | $this->assertFalse(header_extract_charset($headers)); |
65 | } | 51 | } |
66 | 52 | ||
67 | /** | 53 | /** |
@@ -86,6 +72,131 @@ class LinkUtilsTest extends PHPUnit_Framework_TestCase | |||
86 | } | 72 | } |
87 | 73 | ||
88 | /** | 74 | /** |
75 | * Test the download callback with valid value | ||
76 | */ | ||
77 | public function testCurlDownloadCallbackOk() | ||
78 | { | ||
79 | $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok'); | ||
80 | $data = [ | ||
81 | 'HTTP/1.1 200 OK', | ||
82 | 'Server: GitHub.com', | ||
83 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
84 | 'Content-Type: text/html; charset=utf-8', | ||
85 | 'Status: 200 OK', | ||
86 | 'end' => 'th=device-width"><title>Refactoring · GitHub</title><link rel="search" type="application/opensea', | ||
87 | '<title>ignored</title>', | ||
88 | ]; | ||
89 | foreach ($data as $key => $line) { | ||
90 | $ignore = null; | ||
91 | $expected = $key !== 'end' ? strlen($line) : false; | ||
92 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
93 | if ($expected === false) { | ||
94 | break; | ||
95 | } | ||
96 | } | ||
97 | $this->assertEquals('utf-8', $charset); | ||
98 | $this->assertEquals('Refactoring · GitHub', $title); | ||
99 | } | ||
100 | |||
101 | /** | ||
102 | * Test the download callback with valid values and no charset | ||
103 | */ | ||
104 | public function testCurlDownloadCallbackOkNoCharset() | ||
105 | { | ||
106 | $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset'); | ||
107 | $data = [ | ||
108 | 'HTTP/1.1 200 OK', | ||
109 | 'end' => 'th=device-width"><title>Refactoring · GitHub</title><link rel="search" type="application/opensea', | ||
110 | '<title>ignored</title>', | ||
111 | ]; | ||
112 | foreach ($data as $key => $line) { | ||
113 | $ignore = null; | ||
114 | $this->assertEquals(strlen($line), $callback($ignore, $line)); | ||
115 | } | ||
116 | $this->assertEmpty($charset); | ||
117 | $this->assertEquals('Refactoring · GitHub', $title); | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * Test the download callback with valid values and no charset | ||
122 | */ | ||
123 | public function testCurlDownloadCallbackOkHtmlCharset() | ||
124 | { | ||
125 | $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset'); | ||
126 | $data = [ | ||
127 | 'HTTP/1.1 200 OK', | ||
128 | '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', | ||
129 | 'end' => 'th=device-width"><title>Refactoring · GitHub</title><link rel="search" type="application/opensea', | ||
130 | '<title>ignored</title>', | ||
131 | ]; | ||
132 | foreach ($data as $key => $line) { | ||
133 | $ignore = null; | ||
134 | $expected = $key !== 'end' ? strlen($line) : false; | ||
135 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
136 | if ($expected === false) { | ||
137 | break; | ||
138 | } | ||
139 | } | ||
140 | $this->assertEquals('utf-8', $charset); | ||
141 | $this->assertEquals('Refactoring · GitHub', $title); | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * Test the download callback with valid values and no title | ||
146 | */ | ||
147 | public function testCurlDownloadCallbackOkNoTitle() | ||
148 | { | ||
149 | $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok'); | ||
150 | $data = [ | ||
151 | 'HTTP/1.1 200 OK', | ||
152 | 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea', | ||
153 | 'ignored', | ||
154 | ]; | ||
155 | foreach ($data as $key => $line) { | ||
156 | $ignore = null; | ||
157 | $this->assertEquals(strlen($line), $callback($ignore, $line)); | ||
158 | } | ||
159 | $this->assertEquals('utf-8', $charset); | ||
160 | $this->assertEmpty($title); | ||
161 | } | ||
162 | |||
163 | /** | ||
164 | * Test the download callback with an invalid content type. | ||
165 | */ | ||
166 | public function testCurlDownloadCallbackInvalidContentType() | ||
167 | { | ||
168 | $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ct_ko'); | ||
169 | $ignore = null; | ||
170 | $this->assertFalse($callback($ignore, '')); | ||
171 | $this->assertEmpty($charset); | ||
172 | $this->assertEmpty($title); | ||
173 | } | ||
174 | |||
175 | /** | ||
176 | * Test the download callback with an invalid response code. | ||
177 | */ | ||
178 | public function testCurlDownloadCallbackInvalidResponseCode() | ||
179 | { | ||
180 | $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rc_ko'); | ||
181 | $ignore = null; | ||
182 | $this->assertFalse($callback($ignore, '')); | ||
183 | $this->assertEmpty($charset); | ||
184 | $this->assertEmpty($title); | ||
185 | } | ||
186 | |||
187 | /** | ||
188 | * Test the download callback with an invalid content type and response code. | ||
189 | */ | ||
190 | public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode() | ||
191 | { | ||
192 | $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rs_ct_ko'); | ||
193 | $ignore = null; | ||
194 | $this->assertFalse($callback($ignore, '')); | ||
195 | $this->assertEmpty($charset); | ||
196 | $this->assertEmpty($title); | ||
197 | } | ||
198 | |||
199 | /** | ||
89 | * Test count_private. | 200 | * Test count_private. |
90 | */ | 201 | */ |
91 | public function testCountPrivateLinks() | 202 | public function testCountPrivateLinks() |
@@ -182,3 +293,96 @@ class LinkUtilsTest extends PHPUnit_Framework_TestCase | |||
182 | return str_replace('$1', $hashtag, $hashtagLink); | 293 | return str_replace('$1', $hashtag, $hashtagLink); |
183 | } | 294 | } |
184 | } | 295 | } |
296 | |||
297 | // old style mock: PHPUnit doesn't allow function mock | ||
298 | |||
299 | /** | ||
300 | * Returns code 200 or html content type. | ||
301 | * | ||
302 | * @param resource $ch cURL resource | ||
303 | * @param int $type cURL info type | ||
304 | * | ||
305 | * @return int|string 200 or 'text/html' | ||
306 | */ | ||
307 | function ut_curl_getinfo_ok($ch, $type) | ||
308 | { | ||
309 | switch ($type) { | ||
310 | case CURLINFO_RESPONSE_CODE: | ||
311 | return 200; | ||
312 | case CURLINFO_CONTENT_TYPE: | ||
313 | return 'text/html; charset=utf-8'; | ||
314 | } | ||
315 | } | ||
316 | |||
317 | /** | ||
318 | * Returns code 200 or html content type without charset. | ||
319 | * | ||
320 | * @param resource $ch cURL resource | ||
321 | * @param int $type cURL info type | ||
322 | * | ||
323 | * @return int|string 200 or 'text/html' | ||
324 | */ | ||
325 | function ut_curl_getinfo_no_charset($ch, $type) | ||
326 | { | ||
327 | switch ($type) { | ||
328 | case CURLINFO_RESPONSE_CODE: | ||
329 | return 200; | ||
330 | case CURLINFO_CONTENT_TYPE: | ||
331 | return 'text/html'; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | /** | ||
336 | * Invalid response code. | ||
337 | * | ||
338 | * @param resource $ch cURL resource | ||
339 | * @param int $type cURL info type | ||
340 | * | ||
341 | * @return int|string 404 or 'text/html' | ||
342 | */ | ||
343 | function ut_curl_getinfo_rc_ko($ch, $type) | ||
344 | { | ||
345 | switch ($type) { | ||
346 | case CURLINFO_RESPONSE_CODE: | ||
347 | return 404; | ||
348 | case CURLINFO_CONTENT_TYPE: | ||
349 | return 'text/html; charset=utf-8'; | ||
350 | } | ||
351 | } | ||
352 | |||
353 | /** | ||
354 | * Invalid content type. | ||
355 | * | ||
356 | * @param resource $ch cURL resource | ||
357 | * @param int $type cURL info type | ||
358 | * | ||
359 | * @return int|string 200 or 'text/plain' | ||
360 | */ | ||
361 | function ut_curl_getinfo_ct_ko($ch, $type) | ||
362 | { | ||
363 | switch ($type) { | ||
364 | case CURLINFO_RESPONSE_CODE: | ||
365 | return 200; | ||
366 | case CURLINFO_CONTENT_TYPE: | ||
367 | return 'text/plain'; | ||
368 | } | ||
369 | } | ||
370 | |||
371 | /** | ||
372 | * Invalid response code and content type. | ||
373 | * | ||
374 | * @param resource $ch cURL resource | ||
375 | * @param int $type cURL info type | ||
376 | * | ||
377 | * @return int|string 404 or 'text/plain' | ||
378 | */ | ||
379 | function ut_curl_getinfo_rs_ct_ko($ch, $type) | ||
380 | { | ||
381 | switch ($type) { | ||
382 | case CURLINFO_RESPONSE_CODE: | ||
383 | return 404; | ||
384 | case CURLINFO_CONTENT_TYPE: | ||
385 | return 'text/plain'; | ||
386 | } | ||
387 | } | ||
388 | |||