diff options
-rw-r--r-- | application/http/HttpAccess.php | 22 | ||||
-rw-r--r-- | application/http/HttpUtils.php | 123 | ||||
-rw-r--r-- | application/http/MetadataRetriever.php | 1 | ||||
-rw-r--r-- | tests/bookmark/LinkUtilsTest.php | 223 | ||||
-rw-r--r-- | tests/http/MetadataRetrieverTest.php | 45 |
5 files changed, 237 insertions, 177 deletions
diff --git a/application/http/HttpAccess.php b/application/http/HttpAccess.php index 81d9e076..646a5264 100644 --- a/application/http/HttpAccess.php +++ b/application/http/HttpAccess.php | |||
@@ -14,9 +14,14 @@ namespace Shaarli\Http; | |||
14 | */ | 14 | */ |
15 | class HttpAccess | 15 | class HttpAccess |
16 | { | 16 | { |
17 | public function getHttpResponse($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) | 17 | public function getHttpResponse( |
18 | { | 18 | $url, |
19 | return get_http_response($url, $timeout, $maxBytes, $curlWriteFunction); | 19 | $timeout = 30, |
20 | $maxBytes = 4194304, | ||
21 | $curlHeaderFunction = null, | ||
22 | $curlWriteFunction = null | ||
23 | ) { | ||
24 | return get_http_response($url, $timeout, $maxBytes, $curlHeaderFunction, $curlWriteFunction); | ||
20 | } | 25 | } |
21 | 26 | ||
22 | public function getCurlDownloadCallback( | 27 | public function getCurlDownloadCallback( |
@@ -24,16 +29,19 @@ class HttpAccess | |||
24 | &$title, | 29 | &$title, |
25 | &$description, | 30 | &$description, |
26 | &$keywords, | 31 | &$keywords, |
27 | $retrieveDescription, | 32 | $retrieveDescription |
28 | $curlGetInfo = 'curl_getinfo' | ||
29 | ) { | 33 | ) { |
30 | return get_curl_download_callback( | 34 | return get_curl_download_callback( |
31 | $charset, | 35 | $charset, |
32 | $title, | 36 | $title, |
33 | $description, | 37 | $description, |
34 | $keywords, | 38 | $keywords, |
35 | $retrieveDescription, | 39 | $retrieveDescription |
36 | $curlGetInfo | ||
37 | ); | 40 | ); |
38 | } | 41 | } |
42 | |||
43 | public function getCurlHeaderCallback(&$charset, $curlGetInfo = 'curl_getinfo') | ||
44 | { | ||
45 | return get_curl_header_callback($charset, $curlGetInfo); | ||
46 | } | ||
39 | } | 47 | } |
diff --git a/application/http/HttpUtils.php b/application/http/HttpUtils.php index 9f414073..28c12969 100644 --- a/application/http/HttpUtils.php +++ b/application/http/HttpUtils.php | |||
@@ -6,12 +6,14 @@ use Shaarli\Http\Url; | |||
6 | * GET an HTTP URL to retrieve its content | 6 | * GET an HTTP URL to retrieve its content |
7 | * Uses the cURL library or a fallback method | 7 | * Uses the cURL library or a fallback method |
8 | * | 8 | * |
9 | * @param string $url URL to get (http://...) | 9 | * @param string $url URL to get (http://...) |
10 | * @param int $timeout network timeout (in seconds) | 10 | * @param int $timeout network timeout (in seconds) |
11 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) | 11 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) |
12 | * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION). | 12 | * @param callable|string $curlHeaderFunction Optional callback called during the download of headers |
13 | * Can be used to add download conditions on the | 13 | * (CURLOPT_HEADERFUNCTION) |
14 | * headers (response code, content type, etc.). | 14 | * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION). |
15 | * Can be used to add download conditions on the | ||
16 | * headers (response code, content type, etc.). | ||
15 | * | 17 | * |
16 | * @return array HTTP response headers, downloaded content | 18 | * @return array HTTP response headers, downloaded content |
17 | * | 19 | * |
@@ -35,8 +37,13 @@ use Shaarli\Http\Url; | |||
35 | * @see http://stackoverflow.com/q/9183178 | 37 | * @see http://stackoverflow.com/q/9183178 |
36 | * @see http://stackoverflow.com/q/1462720 | 38 | * @see http://stackoverflow.com/q/1462720 |
37 | */ | 39 | */ |
38 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) | 40 | function get_http_response( |
39 | { | 41 | $url, |
42 | $timeout = 30, | ||
43 | $maxBytes = 4194304, | ||
44 | $curlHeaderFunction = null, | ||
45 | $curlWriteFunction = null | ||
46 | ) { | ||
40 | $urlObj = new Url($url); | 47 | $urlObj = new Url($url); |
41 | $cleanUrl = $urlObj->idnToAscii(); | 48 | $cleanUrl = $urlObj->idnToAscii(); |
42 | 49 | ||
@@ -70,7 +77,8 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF | |||
70 | // General cURL settings | 77 | // General cURL settings |
71 | curl_setopt($ch, CURLOPT_AUTOREFERER, true); | 78 | curl_setopt($ch, CURLOPT_AUTOREFERER, true); |
72 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | 79 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); |
73 | curl_setopt($ch, CURLOPT_HEADER, true); | 80 | // Default header download if the $curlHeaderFunction is not defined |
81 | curl_setopt($ch, CURLOPT_HEADER, !is_callable($curlHeaderFunction)); | ||
74 | curl_setopt( | 82 | curl_setopt( |
75 | $ch, | 83 | $ch, |
76 | CURLOPT_HTTPHEADER, | 84 | CURLOPT_HTTPHEADER, |
@@ -81,25 +89,21 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF | |||
81 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); | 89 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); |
82 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); | 90 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); |
83 | 91 | ||
84 | if (is_callable($curlWriteFunction)) { | ||
85 | curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction); | ||
86 | } | ||
87 | |||
88 | // Max download size management | 92 | // Max download size management |
89 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16); | 93 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16); |
90 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); | 94 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); |
95 | if (is_callable($curlHeaderFunction)) { | ||
96 | curl_setopt($ch, CURLOPT_HEADERFUNCTION, $curlHeaderFunction); | ||
97 | } | ||
98 | if (is_callable($curlWriteFunction)) { | ||
99 | curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction); | ||
100 | } | ||
91 | curl_setopt( | 101 | curl_setopt( |
92 | $ch, | 102 | $ch, |
93 | CURLOPT_PROGRESSFUNCTION, | 103 | CURLOPT_PROGRESSFUNCTION, |
94 | function ($arg0, $arg1, $arg2, $arg3, $arg4 = 0) use ($maxBytes) { | 104 | function ($arg0, $arg1, $arg2, $arg3, $arg4) use ($maxBytes) { |
95 | if (version_compare(phpversion(), '5.5', '<')) { | 105 | $downloaded = $arg2; |
96 | // PHP version lower than 5.5 | 106 | |
97 | // Callback has 4 arguments | ||
98 | $downloaded = $arg1; | ||
99 | } else { | ||
100 | // Callback has 5 arguments | ||
101 | $downloaded = $arg2; | ||
102 | } | ||
103 | // Non-zero return stops downloading | 107 | // Non-zero return stops downloading |
104 | return ($downloaded > $maxBytes) ? 1 : 0; | 108 | return ($downloaded > $maxBytes) ? 1 : 0; |
105 | } | 109 | } |
@@ -493,6 +497,46 @@ function is_https($server) | |||
493 | * Get cURL callback function for CURLOPT_WRITEFUNCTION | 497 | * Get cURL callback function for CURLOPT_WRITEFUNCTION |
494 | * | 498 | * |
495 | * @param string $charset to extract from the downloaded page (reference) | 499 | * @param string $charset to extract from the downloaded page (reference) |
500 | * @param string $curlGetInfo Optionally overrides curl_getinfo function | ||
501 | * | ||
502 | * @return Closure | ||
503 | */ | ||
504 | function get_curl_header_callback( | ||
505 | &$charset, | ||
506 | $curlGetInfo = 'curl_getinfo' | ||
507 | ) { | ||
508 | $isRedirected = false; | ||
509 | |||
510 | return function ($ch, $data) use ($curlGetInfo, &$charset, &$isRedirected) { | ||
511 | $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); | ||
512 | $chunkLength = strlen($data); | ||
513 | if (!empty($responseCode) && in_array($responseCode, [301, 302])) { | ||
514 | $isRedirected = true; | ||
515 | return $chunkLength; | ||
516 | } | ||
517 | if (!empty($responseCode) && $responseCode !== 200) { | ||
518 | return false; | ||
519 | } | ||
520 | // After a redirection, the content type will keep the previous request value | ||
521 | // until it finds the next content-type header. | ||
522 | if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) { | ||
523 | $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); | ||
524 | } | ||
525 | if (!empty($contentType) && strpos($contentType, 'text/html') === false) { | ||
526 | return false; | ||
527 | } | ||
528 | if (!empty($contentType) && empty($charset)) { | ||
529 | $charset = header_extract_charset($contentType); | ||
530 | } | ||
531 | |||
532 | return $chunkLength; | ||
533 | }; | ||
534 | } | ||
535 | |||
536 | /** | ||
537 | * Get cURL callback function for CURLOPT_WRITEFUNCTION | ||
538 | * | ||
539 | * @param string $charset to extract from the downloaded page (reference) | ||
496 | * @param string $title to extract from the downloaded page (reference) | 540 | * @param string $title to extract from the downloaded page (reference) |
497 | * @param string $description to extract from the downloaded page (reference) | 541 | * @param string $description to extract from the downloaded page (reference) |
498 | * @param string $keywords to extract from the downloaded page (reference) | 542 | * @param string $keywords to extract from the downloaded page (reference) |
@@ -506,10 +550,8 @@ function get_curl_download_callback( | |||
506 | &$title, | 550 | &$title, |
507 | &$description, | 551 | &$description, |
508 | &$keywords, | 552 | &$keywords, |
509 | $retrieveDescription, | 553 | $retrieveDescription |
510 | $curlGetInfo = 'curl_getinfo' | ||
511 | ) { | 554 | ) { |
512 | $isRedirected = false; | ||
513 | $currentChunk = 0; | 555 | $currentChunk = 0; |
514 | $foundChunk = null; | 556 | $foundChunk = null; |
515 | 557 | ||
@@ -524,37 +566,18 @@ function get_curl_download_callback( | |||
524 | * | 566 | * |
525 | * @return int|bool length of $data or false if we need to stop the download | 567 | * @return int|bool length of $data or false if we need to stop the download |
526 | */ | 568 | */ |
527 | return function (&$ch, $data) use ( | 569 | return function ($ch, $data) use ( |
528 | $retrieveDescription, | 570 | $retrieveDescription, |
529 | $curlGetInfo, | ||
530 | &$charset, | 571 | &$charset, |
531 | &$title, | 572 | &$title, |
532 | &$description, | 573 | &$description, |
533 | &$keywords, | 574 | &$keywords, |
534 | &$isRedirected, | ||
535 | &$currentChunk, | 575 | &$currentChunk, |
536 | &$foundChunk | 576 | &$foundChunk |
537 | ) { | 577 | ) { |
578 | $chunkLength = strlen($data); | ||
538 | $currentChunk++; | 579 | $currentChunk++; |
539 | $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); | 580 | |
540 | if (!empty($responseCode) && in_array($responseCode, [301, 302])) { | ||
541 | $isRedirected = true; | ||
542 | return strlen($data); | ||
543 | } | ||
544 | if (!empty($responseCode) && $responseCode !== 200) { | ||
545 | return false; | ||
546 | } | ||
547 | // After a redirection, the content type will keep the previous request value | ||
548 | // until it finds the next content-type header. | ||
549 | if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) { | ||
550 | $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); | ||
551 | } | ||
552 | if (!empty($contentType) && strpos($contentType, 'text/html') === false) { | ||
553 | return false; | ||
554 | } | ||
555 | if (!empty($contentType) && empty($charset)) { | ||
556 | $charset = header_extract_charset($contentType); | ||
557 | } | ||
558 | if (empty($charset)) { | 581 | if (empty($charset)) { |
559 | $charset = html_extract_charset($data); | 582 | $charset = html_extract_charset($data); |
560 | } | 583 | } |
@@ -562,6 +585,10 @@ function get_curl_download_callback( | |||
562 | $title = html_extract_title($data); | 585 | $title = html_extract_title($data); |
563 | $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; | 586 | $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; |
564 | } | 587 | } |
588 | if (empty($title)) { | ||
589 | $title = html_extract_tag('title', $data); | ||
590 | $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; | ||
591 | } | ||
565 | if ($retrieveDescription && empty($description)) { | 592 | if ($retrieveDescription && empty($description)) { |
566 | $description = html_extract_tag('description', $data); | 593 | $description = html_extract_tag('description', $data); |
567 | $foundChunk = ! empty($description) ? $currentChunk : $foundChunk; | 594 | $foundChunk = ! empty($description) ? $currentChunk : $foundChunk; |
@@ -591,6 +618,6 @@ function get_curl_download_callback( | |||
591 | return false; | 618 | return false; |
592 | } | 619 | } |
593 | 620 | ||
594 | return strlen($data); | 621 | return $chunkLength; |
595 | }; | 622 | }; |
596 | } | 623 | } |
diff --git a/application/http/MetadataRetriever.php b/application/http/MetadataRetriever.php index 2ca982e2..ba9bd40c 100644 --- a/application/http/MetadataRetriever.php +++ b/application/http/MetadataRetriever.php | |||
@@ -46,6 +46,7 @@ class MetadataRetriever | |||
46 | $url, | 46 | $url, |
47 | $this->conf->get('general.download_timeout', 30), | 47 | $this->conf->get('general.download_timeout', 30), |
48 | $this->conf->get('general.download_max_size', 4194304), | 48 | $this->conf->get('general.download_max_size', 4194304), |
49 | $this->httpAccess->getCurlHeaderCallback($charset), | ||
49 | $this->httpAccess->getCurlDownloadCallback( | 50 | $this->httpAccess->getCurlDownloadCallback( |
50 | $charset, | 51 | $charset, |
51 | $title, | 52 | $title, |
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index 29941c8c..3321242f 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php | |||
@@ -216,60 +216,91 @@ class LinkUtilsTest extends TestCase | |||
216 | } | 216 | } |
217 | 217 | ||
218 | /** | 218 | /** |
219 | * Test the header callback with valid value | ||
220 | */ | ||
221 | public function testCurlHeaderCallbackOk(): void | ||
222 | { | ||
223 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok'); | ||
224 | $data = [ | ||
225 | 'HTTP/1.1 200 OK', | ||
226 | 'Server: GitHub.com', | ||
227 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
228 | 'Content-Type: text/html; charset=utf-8', | ||
229 | 'Status: 200 OK', | ||
230 | ]; | ||
231 | |||
232 | foreach ($data as $chunk) { | ||
233 | static::assertIsInt($callback(null, $chunk)); | ||
234 | } | ||
235 | |||
236 | static::assertSame('utf-8', $charset); | ||
237 | } | ||
238 | |||
239 | /** | ||
219 | * Test the download callback with valid value | 240 | * Test the download callback with valid value |
220 | */ | 241 | */ |
221 | public function testCurlDownloadCallbackOk() | 242 | public function testCurlDownloadCallbackOk(): void |
222 | { | 243 | { |
244 | $charset = 'utf-8'; | ||
223 | $callback = get_curl_download_callback( | 245 | $callback = get_curl_download_callback( |
224 | $charset, | 246 | $charset, |
225 | $title, | 247 | $title, |
226 | $desc, | 248 | $desc, |
227 | $keywords, | 249 | $keywords, |
228 | false, | 250 | false |
229 | 'ut_curl_getinfo_ok' | ||
230 | ); | 251 | ); |
252 | |||
231 | $data = [ | 253 | $data = [ |
232 | 'HTTP/1.1 200 OK', | 254 | 'th=device-width">' |
233 | 'Server: GitHub.com', | ||
234 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
235 | 'Content-Type: text/html; charset=utf-8', | ||
236 | 'Status: 200 OK', | ||
237 | 'end' => 'th=device-width">' | ||
238 | . '<title>Refactoring · GitHub</title>' | 255 | . '<title>Refactoring · GitHub</title>' |
239 | . '<link rel="search" type="application/opensea', | 256 | . '<link rel="search" type="application/opensea', |
240 | '<title>ignored</title>' | 257 | '<title>ignored</title>' |
241 | . '<meta name="description" content="desc" />' | 258 | . '<meta name="description" content="desc" />' |
242 | . '<meta name="keywords" content="key1,key2" />', | 259 | . '<meta name="keywords" content="key1,key2" />', |
243 | ]; | 260 | ]; |
244 | foreach ($data as $key => $line) { | 261 | |
245 | $ignore = null; | 262 | foreach ($data as $chunk) { |
246 | $expected = $key !== 'end' ? strlen($line) : false; | 263 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
247 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
248 | if ($expected === false) { | ||
249 | break; | ||
250 | } | ||
251 | } | 264 | } |
252 | $this->assertEquals('utf-8', $charset); | 265 | |
253 | $this->assertEquals('Refactoring · GitHub', $title); | 266 | static::assertSame('utf-8', $charset); |
254 | $this->assertEmpty($desc); | 267 | static::assertSame('Refactoring · GitHub', $title); |
255 | $this->assertEmpty($keywords); | 268 | static::assertEmpty($desc); |
269 | static::assertEmpty($keywords); | ||
270 | } | ||
271 | |||
272 | /** | ||
273 | * Test the header callback with valid value | ||
274 | */ | ||
275 | public function testCurlHeaderCallbackNoCharset(): void | ||
276 | { | ||
277 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset'); | ||
278 | $data = [ | ||
279 | 'HTTP/1.1 200 OK', | ||
280 | ]; | ||
281 | |||
282 | foreach ($data as $chunk) { | ||
283 | static::assertSame(strlen($chunk), $callback(null, $chunk)); | ||
284 | } | ||
285 | |||
286 | static::assertFalse($charset); | ||
256 | } | 287 | } |
257 | 288 | ||
258 | /** | 289 | /** |
259 | * Test the download callback with valid values and no charset | 290 | * Test the download callback with valid values and no charset |
260 | */ | 291 | */ |
261 | public function testCurlDownloadCallbackOkNoCharset() | 292 | public function testCurlDownloadCallbackOkNoCharset(): void |
262 | { | 293 | { |
294 | $charset = null; | ||
263 | $callback = get_curl_download_callback( | 295 | $callback = get_curl_download_callback( |
264 | $charset, | 296 | $charset, |
265 | $title, | 297 | $title, |
266 | $desc, | 298 | $desc, |
267 | $keywords, | 299 | $keywords, |
268 | false, | 300 | false |
269 | 'ut_curl_getinfo_no_charset' | ||
270 | ); | 301 | ); |
302 | |||
271 | $data = [ | 303 | $data = [ |
272 | 'HTTP/1.1 200 OK', | ||
273 | 'end' => 'th=device-width">' | 304 | 'end' => 'th=device-width">' |
274 | . '<title>Refactoring · GitHub</title>' | 305 | . '<title>Refactoring · GitHub</title>' |
275 | . '<link rel="search" type="application/opensea', | 306 | . '<link rel="search" type="application/opensea', |
@@ -277,10 +308,11 @@ class LinkUtilsTest extends TestCase | |||
277 | . '<meta name="description" content="desc" />' | 308 | . '<meta name="description" content="desc" />' |
278 | . '<meta name="keywords" content="key1,key2" />', | 309 | . '<meta name="keywords" content="key1,key2" />', |
279 | ]; | 310 | ]; |
280 | foreach ($data as $key => $line) { | 311 | |
281 | $ignore = null; | 312 | foreach ($data as $chunk) { |
282 | $this->assertEquals(strlen($line), $callback($ignore, $line)); | 313 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
283 | } | 314 | } |
315 | |||
284 | $this->assertEmpty($charset); | 316 | $this->assertEmpty($charset); |
285 | $this->assertEquals('Refactoring · GitHub', $title); | 317 | $this->assertEquals('Refactoring · GitHub', $title); |
286 | $this->assertEmpty($desc); | 318 | $this->assertEmpty($desc); |
@@ -290,18 +322,18 @@ class LinkUtilsTest extends TestCase | |||
290 | /** | 322 | /** |
291 | * Test the download callback with valid values and no charset | 323 | * Test the download callback with valid values and no charset |
292 | */ | 324 | */ |
293 | public function testCurlDownloadCallbackOkHtmlCharset() | 325 | public function testCurlDownloadCallbackOkHtmlCharset(): void |
294 | { | 326 | { |
327 | $charset = null; | ||
295 | $callback = get_curl_download_callback( | 328 | $callback = get_curl_download_callback( |
296 | $charset, | 329 | $charset, |
297 | $title, | 330 | $title, |
298 | $desc, | 331 | $desc, |
299 | $keywords, | 332 | $keywords, |
300 | false, | 333 | false |
301 | 'ut_curl_getinfo_no_charset' | ||
302 | ); | 334 | ); |
335 | |||
303 | $data = [ | 336 | $data = [ |
304 | 'HTTP/1.1 200 OK', | ||
305 | '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', | 337 | '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', |
306 | 'end' => 'th=device-width">' | 338 | 'end' => 'th=device-width">' |
307 | . '<title>Refactoring · GitHub</title>' | 339 | . '<title>Refactoring · GitHub</title>' |
@@ -310,14 +342,10 @@ class LinkUtilsTest extends TestCase | |||
310 | . '<meta name="description" content="desc" />' | 342 | . '<meta name="description" content="desc" />' |
311 | . '<meta name="keywords" content="key1,key2" />', | 343 | . '<meta name="keywords" content="key1,key2" />', |
312 | ]; | 344 | ]; |
313 | foreach ($data as $key => $line) { | 345 | foreach ($data as $chunk) { |
314 | $ignore = null; | 346 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
315 | $expected = $key !== 'end' ? strlen($line) : false; | ||
316 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
317 | if ($expected === false) { | ||
318 | break; | ||
319 | } | ||
320 | } | 347 | } |
348 | |||
321 | $this->assertEquals('utf-8', $charset); | 349 | $this->assertEquals('utf-8', $charset); |
322 | $this->assertEquals('Refactoring · GitHub', $title); | 350 | $this->assertEquals('Refactoring · GitHub', $title); |
323 | $this->assertEmpty($desc); | 351 | $this->assertEmpty($desc); |
@@ -327,25 +355,26 @@ class LinkUtilsTest extends TestCase | |||
327 | /** | 355 | /** |
328 | * Test the download callback with valid values and no title | 356 | * Test the download callback with valid values and no title |
329 | */ | 357 | */ |
330 | public function testCurlDownloadCallbackOkNoTitle() | 358 | public function testCurlDownloadCallbackOkNoTitle(): void |
331 | { | 359 | { |
360 | $charset = 'utf-8'; | ||
332 | $callback = get_curl_download_callback( | 361 | $callback = get_curl_download_callback( |
333 | $charset, | 362 | $charset, |
334 | $title, | 363 | $title, |
335 | $desc, | 364 | $desc, |
336 | $keywords, | 365 | $keywords, |
337 | false, | 366 | false |
338 | 'ut_curl_getinfo_ok' | ||
339 | ); | 367 | ); |
368 | |||
340 | $data = [ | 369 | $data = [ |
341 | 'HTTP/1.1 200 OK', | ||
342 | 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea', | 370 | 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea', |
343 | 'ignored', | 371 | 'ignored', |
344 | ]; | 372 | ]; |
345 | foreach ($data as $key => $line) { | 373 | |
346 | $ignore = null; | 374 | foreach ($data as $chunk) { |
347 | $this->assertEquals(strlen($line), $callback($ignore, $line)); | 375 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
348 | } | 376 | } |
377 | |||
349 | $this->assertEquals('utf-8', $charset); | 378 | $this->assertEquals('utf-8', $charset); |
350 | $this->assertEmpty($title); | 379 | $this->assertEmpty($title); |
351 | $this->assertEmpty($desc); | 380 | $this->assertEmpty($desc); |
@@ -353,81 +382,55 @@ class LinkUtilsTest extends TestCase | |||
353 | } | 382 | } |
354 | 383 | ||
355 | /** | 384 | /** |
356 | * Test the download callback with an invalid content type. | 385 | * Test the header callback with an invalid content type. |
357 | */ | 386 | */ |
358 | public function testCurlDownloadCallbackInvalidContentType() | 387 | public function testCurlHeaderCallbackInvalidContentType(): void |
359 | { | 388 | { |
360 | $callback = get_curl_download_callback( | 389 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ct_ko'); |
361 | $charset, | 390 | $data = [ |
362 | $title, | 391 | 'HTTP/1.1 200 OK', |
363 | $desc, | 392 | ]; |
364 | $keywords, | 393 | |
365 | false, | 394 | static::assertFalse($callback(null, $data[0])); |
366 | 'ut_curl_getinfo_ct_ko' | 395 | static::assertNull($charset); |
367 | ); | ||
368 | $ignore = null; | ||
369 | $this->assertFalse($callback($ignore, '')); | ||
370 | $this->assertEmpty($charset); | ||
371 | $this->assertEmpty($title); | ||
372 | } | 396 | } |
373 | 397 | ||
374 | /** | 398 | /** |
375 | * Test the download callback with an invalid response code. | 399 | * Test the header callback with an invalid response code. |
376 | */ | 400 | */ |
377 | public function testCurlDownloadCallbackInvalidResponseCode() | 401 | public function testCurlHeaderCallbackInvalidResponseCode(): void |
378 | { | 402 | { |
379 | $callback = $callback = get_curl_download_callback( | 403 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rc_ko'); |
380 | $charset, | 404 | |
381 | $title, | 405 | static::assertFalse($callback(null, '')); |
382 | $desc, | 406 | static::assertNull($charset); |
383 | $keywords, | ||
384 | false, | ||
385 | 'ut_curl_getinfo_rc_ko' | ||
386 | ); | ||
387 | $ignore = null; | ||
388 | $this->assertFalse($callback($ignore, '')); | ||
389 | $this->assertEmpty($charset); | ||
390 | $this->assertEmpty($title); | ||
391 | } | 407 | } |
392 | 408 | ||
393 | /** | 409 | /** |
394 | * Test the download callback with an invalid content type and response code. | 410 | * Test the header callback with an invalid content type and response code. |
395 | */ | 411 | */ |
396 | public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode() | 412 | public function testCurlHeaderCallbackInvalidContentTypeAndResponseCode(): void |
397 | { | 413 | { |
398 | $callback = $callback = get_curl_download_callback( | 414 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rs_ct_ko'); |
399 | $charset, | 415 | |
400 | $title, | 416 | static::assertFalse($callback(null, '')); |
401 | $desc, | 417 | static::assertNull($charset); |
402 | $keywords, | ||
403 | false, | ||
404 | 'ut_curl_getinfo_rs_ct_ko' | ||
405 | ); | ||
406 | $ignore = null; | ||
407 | $this->assertFalse($callback($ignore, '')); | ||
408 | $this->assertEmpty($charset); | ||
409 | $this->assertEmpty($title); | ||
410 | } | 418 | } |
411 | 419 | ||
412 | /** | 420 | /** |
413 | * Test the download callback with valid value, and retrieve_description option enabled. | 421 | * Test the download callback with valid value, and retrieve_description option enabled. |
414 | */ | 422 | */ |
415 | public function testCurlDownloadCallbackOkWithDesc() | 423 | public function testCurlDownloadCallbackOkWithDesc(): void |
416 | { | 424 | { |
425 | $charset = 'utf-8'; | ||
417 | $callback = get_curl_download_callback( | 426 | $callback = get_curl_download_callback( |
418 | $charset, | 427 | $charset, |
419 | $title, | 428 | $title, |
420 | $desc, | 429 | $desc, |
421 | $keywords, | 430 | $keywords, |
422 | true, | 431 | true |
423 | 'ut_curl_getinfo_ok' | ||
424 | ); | 432 | ); |
425 | $data = [ | 433 | $data = [ |
426 | 'HTTP/1.1 200 OK', | ||
427 | 'Server: GitHub.com', | ||
428 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
429 | 'Content-Type: text/html; charset=utf-8', | ||
430 | 'Status: 200 OK', | ||
431 | 'th=device-width">' | 434 | 'th=device-width">' |
432 | . '<title>Refactoring · GitHub</title>' | 435 | . '<title>Refactoring · GitHub</title>' |
433 | . '<link rel="search" type="application/opensea', | 436 | . '<link rel="search" type="application/opensea', |
@@ -435,14 +438,11 @@ class LinkUtilsTest extends TestCase | |||
435 | . '<meta name="description" content="link desc" />' | 438 | . '<meta name="description" content="link desc" />' |
436 | . '<meta name="keywords" content="key1,key2" />', | 439 | . '<meta name="keywords" content="key1,key2" />', |
437 | ]; | 440 | ]; |
438 | foreach ($data as $key => $line) { | 441 | |
439 | $ignore = null; | 442 | foreach ($data as $chunk) { |
440 | $expected = $key !== 'end' ? strlen($line) : false; | 443 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
441 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
442 | if ($expected === false) { | ||
443 | break; | ||
444 | } | ||
445 | } | 444 | } |
445 | |||
446 | $this->assertEquals('utf-8', $charset); | 446 | $this->assertEquals('utf-8', $charset); |
447 | $this->assertEquals('Refactoring · GitHub', $title); | 447 | $this->assertEquals('Refactoring · GitHub', $title); |
448 | $this->assertEquals('link desc', $desc); | 448 | $this->assertEquals('link desc', $desc); |
@@ -453,8 +453,9 @@ class LinkUtilsTest extends TestCase | |||
453 | * Test the download callback with valid value, and retrieve_description option enabled, | 453 | * Test the download callback with valid value, and retrieve_description option enabled, |
454 | * but no desc or keyword defined in the page. | 454 | * but no desc or keyword defined in the page. |
455 | */ | 455 | */ |
456 | public function testCurlDownloadCallbackOkWithDescNotFound() | 456 | public function testCurlDownloadCallbackOkWithDescNotFound(): void |
457 | { | 457 | { |
458 | $charset = 'utf-8'; | ||
458 | $callback = get_curl_download_callback( | 459 | $callback = get_curl_download_callback( |
459 | $charset, | 460 | $charset, |
460 | $title, | 461 | $title, |
@@ -464,24 +465,16 @@ class LinkUtilsTest extends TestCase | |||
464 | 'ut_curl_getinfo_ok' | 465 | 'ut_curl_getinfo_ok' |
465 | ); | 466 | ); |
466 | $data = [ | 467 | $data = [ |
467 | 'HTTP/1.1 200 OK', | ||
468 | 'Server: GitHub.com', | ||
469 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
470 | 'Content-Type: text/html; charset=utf-8', | ||
471 | 'Status: 200 OK', | ||
472 | 'th=device-width">' | 468 | 'th=device-width">' |
473 | . '<title>Refactoring · GitHub</title>' | 469 | . '<title>Refactoring · GitHub</title>' |
474 | . '<link rel="search" type="application/opensea', | 470 | . '<link rel="search" type="application/opensea', |
475 | 'end' => '<title>ignored</title>', | 471 | 'end' => '<title>ignored</title>', |
476 | ]; | 472 | ]; |
477 | foreach ($data as $key => $line) { | 473 | |
478 | $ignore = null; | 474 | foreach ($data as $chunk) { |
479 | $expected = $key !== 'end' ? strlen($line) : false; | 475 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
480 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
481 | if ($expected === false) { | ||
482 | break; | ||
483 | } | ||
484 | } | 476 | } |
477 | |||
485 | $this->assertEquals('utf-8', $charset); | 478 | $this->assertEquals('utf-8', $charset); |
486 | $this->assertEquals('Refactoring · GitHub', $title); | 479 | $this->assertEquals('Refactoring · GitHub', $title); |
487 | $this->assertEmpty($desc); | 480 | $this->assertEmpty($desc); |
diff --git a/tests/http/MetadataRetrieverTest.php b/tests/http/MetadataRetrieverTest.php index 2a1838e8..3c9eaa0e 100644 --- a/tests/http/MetadataRetrieverTest.php +++ b/tests/http/MetadataRetrieverTest.php | |||
@@ -38,6 +38,7 @@ class MetadataRetrieverTest extends TestCase | |||
38 | $remoteTitle = 'Remote Title '; | 38 | $remoteTitle = 'Remote Title '; |
39 | $remoteDesc = 'Sometimes the meta description is relevant.'; | 39 | $remoteDesc = 'Sometimes the meta description is relevant.'; |
40 | $remoteTags = 'abc def'; | 40 | $remoteTags = 'abc def'; |
41 | $remoteCharset = 'utf-8'; | ||
41 | 42 | ||
42 | $expectedResult = [ | 43 | $expectedResult = [ |
43 | 'title' => $remoteTitle, | 44 | 'title' => $remoteTitle, |
@@ -47,9 +48,26 @@ class MetadataRetrieverTest extends TestCase | |||
47 | 48 | ||
48 | $this->httpAccess | 49 | $this->httpAccess |
49 | ->expects(static::once()) | 50 | ->expects(static::once()) |
51 | ->method('getCurlHeaderCallback') | ||
52 | ->willReturnCallback( | ||
53 | function (&$charset) use ( | ||
54 | $remoteCharset | ||
55 | ): callable { | ||
56 | return function () use ( | ||
57 | &$charset, | ||
58 | $remoteCharset | ||
59 | ): void { | ||
60 | $charset = $remoteCharset; | ||
61 | }; | ||
62 | } | ||
63 | ) | ||
64 | ; | ||
65 | $this->httpAccess | ||
66 | ->expects(static::once()) | ||
50 | ->method('getCurlDownloadCallback') | 67 | ->method('getCurlDownloadCallback') |
51 | ->willReturnCallback( | 68 | ->willReturnCallback( |
52 | function (&$charset, &$title, &$description, &$tags) use ( | 69 | function (&$charset, &$title, &$description, &$tags) use ( |
70 | $remoteCharset, | ||
53 | $remoteTitle, | 71 | $remoteTitle, |
54 | $remoteDesc, | 72 | $remoteDesc, |
55 | $remoteTags | 73 | $remoteTags |
@@ -59,11 +77,13 @@ class MetadataRetrieverTest extends TestCase | |||
59 | &$title, | 77 | &$title, |
60 | &$description, | 78 | &$description, |
61 | &$tags, | 79 | &$tags, |
80 | $remoteCharset, | ||
62 | $remoteTitle, | 81 | $remoteTitle, |
63 | $remoteDesc, | 82 | $remoteDesc, |
64 | $remoteTags | 83 | $remoteTags |
65 | ): void { | 84 | ): void { |
66 | $charset = 'ISO-8859-1'; | 85 | static::assertSame($remoteCharset, $charset); |
86 | |||
67 | $title = $remoteTitle; | 87 | $title = $remoteTitle; |
68 | $description = $remoteDesc; | 88 | $description = $remoteDesc; |
69 | $tags = $remoteTags; | 89 | $tags = $remoteTags; |
@@ -75,8 +95,9 @@ class MetadataRetrieverTest extends TestCase | |||
75 | ->expects(static::once()) | 95 | ->expects(static::once()) |
76 | ->method('getHttpResponse') | 96 | ->method('getHttpResponse') |
77 | ->with($url, 30, 4194304) | 97 | ->with($url, 30, 4194304) |
78 | ->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void { | 98 | ->willReturnCallback(function($url, $timeout, $maxBytes, $headerCallback, $dlCallback): void { |
79 | $callback(); | 99 | $headerCallback(); |
100 | $dlCallback(); | ||
80 | }) | 101 | }) |
81 | ; | 102 | ; |
82 | 103 | ||
@@ -102,8 +123,17 @@ class MetadataRetrieverTest extends TestCase | |||
102 | ->expects(static::once()) | 123 | ->expects(static::once()) |
103 | ->method('getCurlDownloadCallback') | 124 | ->method('getCurlDownloadCallback') |
104 | ->willReturnCallback( | 125 | ->willReturnCallback( |
105 | function (&$charset, &$title, &$description, &$tags): callable { | 126 | function (): callable { |
106 | return function () use (&$charset, &$title, &$description, &$tags): void {}; | 127 | return function (): void {}; |
128 | } | ||
129 | ) | ||
130 | ; | ||
131 | $this->httpAccess | ||
132 | ->expects(static::once()) | ||
133 | ->method('getCurlHeaderCallback') | ||
134 | ->willReturnCallback( | ||
135 | function (): callable { | ||
136 | return function (): void {}; | ||
107 | } | 137 | } |
108 | ) | 138 | ) |
109 | ; | 139 | ; |
@@ -111,8 +141,9 @@ class MetadataRetrieverTest extends TestCase | |||
111 | ->expects(static::once()) | 141 | ->expects(static::once()) |
112 | ->method('getHttpResponse') | 142 | ->method('getHttpResponse') |
113 | ->with($url, 30, 4194304) | 143 | ->with($url, 30, 4194304) |
114 | ->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void { | 144 | ->willReturnCallback(function($url, $timeout, $maxBytes, $headerCallback, $dlCallback): void { |
115 | $callback(); | 145 | $headerCallback(); |
146 | $dlCallback(); | ||
116 | }) | 147 | }) |
117 | ; | 148 | ; |
118 | 149 | ||