aboutsummaryrefslogtreecommitdiffhomepage
path: root/tests
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2020-10-15 11:20:33 +0200
committerArthurHoaro <arthur@hoa.ro>2020-10-15 11:36:56 +0200
commit5334090be04e66da5cb5c3ad487604b3733c5cac (patch)
tree419217a587c2a15bc97b943acee11fcf7b559937 /tests
parent4cf3564d28dc8e4d08a3e64f09ad045ffbde97ae (diff)
downloadShaarli-5334090be04e66da5cb5c3ad487604b3733c5cac.tar.gz
Shaarli-5334090be04e66da5cb5c3ad487604b3733c5cac.tar.zst
Shaarli-5334090be04e66da5cb5c3ad487604b3733c5cac.zip
Improve metadata retrieval (performances and accuracy)
- Use dedicated function to download headers to avoid apply multiple regexps on headers - Also try to extract title from meta tags
Diffstat (limited to 'tests')
-rw-r--r--tests/bookmark/LinkUtilsTest.php223
-rw-r--r--tests/http/MetadataRetrieverTest.php45
2 files changed, 146 insertions, 122 deletions
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php
index 29941c8c..3321242f 100644
--- a/tests/bookmark/LinkUtilsTest.php
+++ b/tests/bookmark/LinkUtilsTest.php
@@ -216,60 +216,91 @@ class LinkUtilsTest extends TestCase
216 } 216 }
217 217
218 /** 218 /**
219 * Test the header callback with valid value
220 */
221 public function testCurlHeaderCallbackOk(): void
222 {
223 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok');
224 $data = [
225 'HTTP/1.1 200 OK',
226 'Server: GitHub.com',
227 'Date: Sat, 28 Oct 2017 12:01:33 GMT',
228 'Content-Type: text/html; charset=utf-8',
229 'Status: 200 OK',
230 ];
231
232 foreach ($data as $chunk) {
233 static::assertIsInt($callback(null, $chunk));
234 }
235
236 static::assertSame('utf-8', $charset);
237 }
238
239 /**
219 * Test the download callback with valid value 240 * Test the download callback with valid value
220 */ 241 */
221 public function testCurlDownloadCallbackOk() 242 public function testCurlDownloadCallbackOk(): void
222 { 243 {
244 $charset = 'utf-8';
223 $callback = get_curl_download_callback( 245 $callback = get_curl_download_callback(
224 $charset, 246 $charset,
225 $title, 247 $title,
226 $desc, 248 $desc,
227 $keywords, 249 $keywords,
228 false, 250 false
229 'ut_curl_getinfo_ok'
230 ); 251 );
252
231 $data = [ 253 $data = [
232 'HTTP/1.1 200 OK', 254 'th=device-width">'
233 'Server: GitHub.com',
234 'Date: Sat, 28 Oct 2017 12:01:33 GMT',
235 'Content-Type: text/html; charset=utf-8',
236 'Status: 200 OK',
237 'end' => 'th=device-width">'
238 . '<title>Refactoring · GitHub</title>' 255 . '<title>Refactoring · GitHub</title>'
239 . '<link rel="search" type="application/opensea', 256 . '<link rel="search" type="application/opensea',
240 '<title>ignored</title>' 257 '<title>ignored</title>'
241 . '<meta name="description" content="desc" />' 258 . '<meta name="description" content="desc" />'
242 . '<meta name="keywords" content="key1,key2" />', 259 . '<meta name="keywords" content="key1,key2" />',
243 ]; 260 ];
244 foreach ($data as $key => $line) { 261
245 $ignore = null; 262 foreach ($data as $chunk) {
246 $expected = $key !== 'end' ? strlen($line) : false; 263 static::assertSame(strlen($chunk), $callback(null, $chunk));
247 $this->assertEquals($expected, $callback($ignore, $line));
248 if ($expected === false) {
249 break;
250 }
251 } 264 }
252 $this->assertEquals('utf-8', $charset); 265
253 $this->assertEquals('Refactoring · GitHub', $title); 266 static::assertSame('utf-8', $charset);
254 $this->assertEmpty($desc); 267 static::assertSame('Refactoring · GitHub', $title);
255 $this->assertEmpty($keywords); 268 static::assertEmpty($desc);
269 static::assertEmpty($keywords);
270 }
271
272 /**
273 * Test the header callback with valid value
274 */
275 public function testCurlHeaderCallbackNoCharset(): void
276 {
277 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset');
278 $data = [
279 'HTTP/1.1 200 OK',
280 ];
281
282 foreach ($data as $chunk) {
283 static::assertSame(strlen($chunk), $callback(null, $chunk));
284 }
285
286 static::assertFalse($charset);
256 } 287 }
257 288
258 /** 289 /**
259 * Test the download callback with valid values and no charset 290 * Test the download callback with valid values and no charset
260 */ 291 */
261 public function testCurlDownloadCallbackOkNoCharset() 292 public function testCurlDownloadCallbackOkNoCharset(): void
262 { 293 {
294 $charset = null;
263 $callback = get_curl_download_callback( 295 $callback = get_curl_download_callback(
264 $charset, 296 $charset,
265 $title, 297 $title,
266 $desc, 298 $desc,
267 $keywords, 299 $keywords,
268 false, 300 false
269 'ut_curl_getinfo_no_charset'
270 ); 301 );
302
271 $data = [ 303 $data = [
272 'HTTP/1.1 200 OK',
273 'end' => 'th=device-width">' 304 'end' => 'th=device-width">'
274 . '<title>Refactoring · GitHub</title>' 305 . '<title>Refactoring · GitHub</title>'
275 . '<link rel="search" type="application/opensea', 306 . '<link rel="search" type="application/opensea',
@@ -277,10 +308,11 @@ class LinkUtilsTest extends TestCase
277 . '<meta name="description" content="desc" />' 308 . '<meta name="description" content="desc" />'
278 . '<meta name="keywords" content="key1,key2" />', 309 . '<meta name="keywords" content="key1,key2" />',
279 ]; 310 ];
280 foreach ($data as $key => $line) { 311
281 $ignore = null; 312 foreach ($data as $chunk) {
282 $this->assertEquals(strlen($line), $callback($ignore, $line)); 313 static::assertSame(strlen($chunk), $callback(null, $chunk));
283 } 314 }
315
284 $this->assertEmpty($charset); 316 $this->assertEmpty($charset);
285 $this->assertEquals('Refactoring · GitHub', $title); 317 $this->assertEquals('Refactoring · GitHub', $title);
286 $this->assertEmpty($desc); 318 $this->assertEmpty($desc);
@@ -290,18 +322,18 @@ class LinkUtilsTest extends TestCase
290 /** 322 /**
291 * Test the download callback with valid values and no charset 323 * Test the download callback with valid values and no charset
292 */ 324 */
293 public function testCurlDownloadCallbackOkHtmlCharset() 325 public function testCurlDownloadCallbackOkHtmlCharset(): void
294 { 326 {
327 $charset = null;
295 $callback = get_curl_download_callback( 328 $callback = get_curl_download_callback(
296 $charset, 329 $charset,
297 $title, 330 $title,
298 $desc, 331 $desc,
299 $keywords, 332 $keywords,
300 false, 333 false
301 'ut_curl_getinfo_no_charset'
302 ); 334 );
335
303 $data = [ 336 $data = [
304 'HTTP/1.1 200 OK',
305 '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', 337 '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
306 'end' => 'th=device-width">' 338 'end' => 'th=device-width">'
307 . '<title>Refactoring · GitHub</title>' 339 . '<title>Refactoring · GitHub</title>'
@@ -310,14 +342,10 @@ class LinkUtilsTest extends TestCase
310 . '<meta name="description" content="desc" />' 342 . '<meta name="description" content="desc" />'
311 . '<meta name="keywords" content="key1,key2" />', 343 . '<meta name="keywords" content="key1,key2" />',
312 ]; 344 ];
313 foreach ($data as $key => $line) { 345 foreach ($data as $chunk) {
314 $ignore = null; 346 static::assertSame(strlen($chunk), $callback(null, $chunk));
315 $expected = $key !== 'end' ? strlen($line) : false;
316 $this->assertEquals($expected, $callback($ignore, $line));
317 if ($expected === false) {
318 break;
319 }
320 } 347 }
348
321 $this->assertEquals('utf-8', $charset); 349 $this->assertEquals('utf-8', $charset);
322 $this->assertEquals('Refactoring · GitHub', $title); 350 $this->assertEquals('Refactoring · GitHub', $title);
323 $this->assertEmpty($desc); 351 $this->assertEmpty($desc);
@@ -327,25 +355,26 @@ class LinkUtilsTest extends TestCase
327 /** 355 /**
328 * Test the download callback with valid values and no title 356 * Test the download callback with valid values and no title
329 */ 357 */
330 public function testCurlDownloadCallbackOkNoTitle() 358 public function testCurlDownloadCallbackOkNoTitle(): void
331 { 359 {
360 $charset = 'utf-8';
332 $callback = get_curl_download_callback( 361 $callback = get_curl_download_callback(
333 $charset, 362 $charset,
334 $title, 363 $title,
335 $desc, 364 $desc,
336 $keywords, 365 $keywords,
337 false, 366 false
338 'ut_curl_getinfo_ok'
339 ); 367 );
368
340 $data = [ 369 $data = [
341 'HTTP/1.1 200 OK',
342 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea', 370 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
343 'ignored', 371 'ignored',
344 ]; 372 ];
345 foreach ($data as $key => $line) { 373
346 $ignore = null; 374 foreach ($data as $chunk) {
347 $this->assertEquals(strlen($line), $callback($ignore, $line)); 375 static::assertSame(strlen($chunk), $callback(null, $chunk));
348 } 376 }
377
349 $this->assertEquals('utf-8', $charset); 378 $this->assertEquals('utf-8', $charset);
350 $this->assertEmpty($title); 379 $this->assertEmpty($title);
351 $this->assertEmpty($desc); 380 $this->assertEmpty($desc);
@@ -353,81 +382,55 @@ class LinkUtilsTest extends TestCase
353 } 382 }
354 383
355 /** 384 /**
356 * Test the download callback with an invalid content type. 385 * Test the header callback with an invalid content type.
357 */ 386 */
358 public function testCurlDownloadCallbackInvalidContentType() 387 public function testCurlHeaderCallbackInvalidContentType(): void
359 { 388 {
360 $callback = get_curl_download_callback( 389 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ct_ko');
361 $charset, 390 $data = [
362 $title, 391 'HTTP/1.1 200 OK',
363 $desc, 392 ];
364 $keywords, 393
365 false, 394 static::assertFalse($callback(null, $data[0]));
366 'ut_curl_getinfo_ct_ko' 395 static::assertNull($charset);
367 );
368 $ignore = null;
369 $this->assertFalse($callback($ignore, ''));
370 $this->assertEmpty($charset);
371 $this->assertEmpty($title);
372 } 396 }
373 397
374 /** 398 /**
375 * Test the download callback with an invalid response code. 399 * Test the header callback with an invalid response code.
376 */ 400 */
377 public function testCurlDownloadCallbackInvalidResponseCode() 401 public function testCurlHeaderCallbackInvalidResponseCode(): void
378 { 402 {
379 $callback = $callback = get_curl_download_callback( 403 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rc_ko');
380 $charset, 404
381 $title, 405 static::assertFalse($callback(null, ''));
382 $desc, 406 static::assertNull($charset);
383 $keywords,
384 false,
385 'ut_curl_getinfo_rc_ko'
386 );
387 $ignore = null;
388 $this->assertFalse($callback($ignore, ''));
389 $this->assertEmpty($charset);
390 $this->assertEmpty($title);
391 } 407 }
392 408
393 /** 409 /**
394 * Test the download callback with an invalid content type and response code. 410 * Test the header callback with an invalid content type and response code.
395 */ 411 */
396 public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode() 412 public function testCurlHeaderCallbackInvalidContentTypeAndResponseCode(): void
397 { 413 {
398 $callback = $callback = get_curl_download_callback( 414 $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rs_ct_ko');
399 $charset, 415
400 $title, 416 static::assertFalse($callback(null, ''));
401 $desc, 417 static::assertNull($charset);
402 $keywords,
403 false,
404 'ut_curl_getinfo_rs_ct_ko'
405 );
406 $ignore = null;
407 $this->assertFalse($callback($ignore, ''));
408 $this->assertEmpty($charset);
409 $this->assertEmpty($title);
410 } 418 }
411 419
412 /** 420 /**
413 * Test the download callback with valid value, and retrieve_description option enabled. 421 * Test the download callback with valid value, and retrieve_description option enabled.
414 */ 422 */
415 public function testCurlDownloadCallbackOkWithDesc() 423 public function testCurlDownloadCallbackOkWithDesc(): void
416 { 424 {
425 $charset = 'utf-8';
417 $callback = get_curl_download_callback( 426 $callback = get_curl_download_callback(
418 $charset, 427 $charset,
419 $title, 428 $title,
420 $desc, 429 $desc,
421 $keywords, 430 $keywords,
422 true, 431 true
423 'ut_curl_getinfo_ok'
424 ); 432 );
425 $data = [ 433 $data = [
426 'HTTP/1.1 200 OK',
427 'Server: GitHub.com',
428 'Date: Sat, 28 Oct 2017 12:01:33 GMT',
429 'Content-Type: text/html; charset=utf-8',
430 'Status: 200 OK',
431 'th=device-width">' 434 'th=device-width">'
432 . '<title>Refactoring · GitHub</title>' 435 . '<title>Refactoring · GitHub</title>'
433 . '<link rel="search" type="application/opensea', 436 . '<link rel="search" type="application/opensea',
@@ -435,14 +438,11 @@ class LinkUtilsTest extends TestCase
435 . '<meta name="description" content="link desc" />' 438 . '<meta name="description" content="link desc" />'
436 . '<meta name="keywords" content="key1,key2" />', 439 . '<meta name="keywords" content="key1,key2" />',
437 ]; 440 ];
438 foreach ($data as $key => $line) { 441
439 $ignore = null; 442 foreach ($data as $chunk) {
440 $expected = $key !== 'end' ? strlen($line) : false; 443 static::assertSame(strlen($chunk), $callback(null, $chunk));
441 $this->assertEquals($expected, $callback($ignore, $line));
442 if ($expected === false) {
443 break;
444 }
445 } 444 }
445
446 $this->assertEquals('utf-8', $charset); 446 $this->assertEquals('utf-8', $charset);
447 $this->assertEquals('Refactoring · GitHub', $title); 447 $this->assertEquals('Refactoring · GitHub', $title);
448 $this->assertEquals('link desc', $desc); 448 $this->assertEquals('link desc', $desc);
@@ -453,8 +453,9 @@ class LinkUtilsTest extends TestCase
453 * Test the download callback with valid value, and retrieve_description option enabled, 453 * Test the download callback with valid value, and retrieve_description option enabled,
454 * but no desc or keyword defined in the page. 454 * but no desc or keyword defined in the page.
455 */ 455 */
456 public function testCurlDownloadCallbackOkWithDescNotFound() 456 public function testCurlDownloadCallbackOkWithDescNotFound(): void
457 { 457 {
458 $charset = 'utf-8';
458 $callback = get_curl_download_callback( 459 $callback = get_curl_download_callback(
459 $charset, 460 $charset,
460 $title, 461 $title,
@@ -464,24 +465,16 @@ class LinkUtilsTest extends TestCase
464 'ut_curl_getinfo_ok' 465 'ut_curl_getinfo_ok'
465 ); 466 );
466 $data = [ 467 $data = [
467 'HTTP/1.1 200 OK',
468 'Server: GitHub.com',
469 'Date: Sat, 28 Oct 2017 12:01:33 GMT',
470 'Content-Type: text/html; charset=utf-8',
471 'Status: 200 OK',
472 'th=device-width">' 468 'th=device-width">'
473 . '<title>Refactoring · GitHub</title>' 469 . '<title>Refactoring · GitHub</title>'
474 . '<link rel="search" type="application/opensea', 470 . '<link rel="search" type="application/opensea',
475 'end' => '<title>ignored</title>', 471 'end' => '<title>ignored</title>',
476 ]; 472 ];
477 foreach ($data as $key => $line) { 473
478 $ignore = null; 474 foreach ($data as $chunk) {
479 $expected = $key !== 'end' ? strlen($line) : false; 475 static::assertSame(strlen($chunk), $callback(null, $chunk));
480 $this->assertEquals($expected, $callback($ignore, $line));
481 if ($expected === false) {
482 break;
483 }
484 } 476 }
477
485 $this->assertEquals('utf-8', $charset); 478 $this->assertEquals('utf-8', $charset);
486 $this->assertEquals('Refactoring · GitHub', $title); 479 $this->assertEquals('Refactoring · GitHub', $title);
487 $this->assertEmpty($desc); 480 $this->assertEmpty($desc);
diff --git a/tests/http/MetadataRetrieverTest.php b/tests/http/MetadataRetrieverTest.php
index 2a1838e8..3c9eaa0e 100644
--- a/tests/http/MetadataRetrieverTest.php
+++ b/tests/http/MetadataRetrieverTest.php
@@ -38,6 +38,7 @@ class MetadataRetrieverTest extends TestCase
38 $remoteTitle = 'Remote Title '; 38 $remoteTitle = 'Remote Title ';
39 $remoteDesc = 'Sometimes the meta description is relevant.'; 39 $remoteDesc = 'Sometimes the meta description is relevant.';
40 $remoteTags = 'abc def'; 40 $remoteTags = 'abc def';
41 $remoteCharset = 'utf-8';
41 42
42 $expectedResult = [ 43 $expectedResult = [
43 'title' => $remoteTitle, 44 'title' => $remoteTitle,
@@ -47,9 +48,26 @@ class MetadataRetrieverTest extends TestCase
47 48
48 $this->httpAccess 49 $this->httpAccess
49 ->expects(static::once()) 50 ->expects(static::once())
51 ->method('getCurlHeaderCallback')
52 ->willReturnCallback(
53 function (&$charset) use (
54 $remoteCharset
55 ): callable {
56 return function () use (
57 &$charset,
58 $remoteCharset
59 ): void {
60 $charset = $remoteCharset;
61 };
62 }
63 )
64 ;
65 $this->httpAccess
66 ->expects(static::once())
50 ->method('getCurlDownloadCallback') 67 ->method('getCurlDownloadCallback')
51 ->willReturnCallback( 68 ->willReturnCallback(
52 function (&$charset, &$title, &$description, &$tags) use ( 69 function (&$charset, &$title, &$description, &$tags) use (
70 $remoteCharset,
53 $remoteTitle, 71 $remoteTitle,
54 $remoteDesc, 72 $remoteDesc,
55 $remoteTags 73 $remoteTags
@@ -59,11 +77,13 @@ class MetadataRetrieverTest extends TestCase
59 &$title, 77 &$title,
60 &$description, 78 &$description,
61 &$tags, 79 &$tags,
80 $remoteCharset,
62 $remoteTitle, 81 $remoteTitle,
63 $remoteDesc, 82 $remoteDesc,
64 $remoteTags 83 $remoteTags
65 ): void { 84 ): void {
66 $charset = 'ISO-8859-1'; 85 static::assertSame($remoteCharset, $charset);
86
67 $title = $remoteTitle; 87 $title = $remoteTitle;
68 $description = $remoteDesc; 88 $description = $remoteDesc;
69 $tags = $remoteTags; 89 $tags = $remoteTags;
@@ -75,8 +95,9 @@ class MetadataRetrieverTest extends TestCase
75 ->expects(static::once()) 95 ->expects(static::once())
76 ->method('getHttpResponse') 96 ->method('getHttpResponse')
77 ->with($url, 30, 4194304) 97 ->with($url, 30, 4194304)
78 ->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void { 98 ->willReturnCallback(function($url, $timeout, $maxBytes, $headerCallback, $dlCallback): void {
79 $callback(); 99 $headerCallback();
100 $dlCallback();
80 }) 101 })
81 ; 102 ;
82 103
@@ -102,8 +123,17 @@ class MetadataRetrieverTest extends TestCase
102 ->expects(static::once()) 123 ->expects(static::once())
103 ->method('getCurlDownloadCallback') 124 ->method('getCurlDownloadCallback')
104 ->willReturnCallback( 125 ->willReturnCallback(
105 function (&$charset, &$title, &$description, &$tags): callable { 126 function (): callable {
106 return function () use (&$charset, &$title, &$description, &$tags): void {}; 127 return function (): void {};
128 }
129 )
130 ;
131 $this->httpAccess
132 ->expects(static::once())
133 ->method('getCurlHeaderCallback')
134 ->willReturnCallback(
135 function (): callable {
136 return function (): void {};
107 } 137 }
108 ) 138 )
109 ; 139 ;
@@ -111,8 +141,9 @@ class MetadataRetrieverTest extends TestCase
111 ->expects(static::once()) 141 ->expects(static::once())
112 ->method('getHttpResponse') 142 ->method('getHttpResponse')
113 ->with($url, 30, 4194304) 143 ->with($url, 30, 4194304)
114 ->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void { 144 ->willReturnCallback(function($url, $timeout, $maxBytes, $headerCallback, $dlCallback): void {
115 $callback(); 145 $headerCallback();
146 $dlCallback();
116 }) 147 })
117 ; 148 ;
118 149