diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-10-15 11:20:33 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2020-10-15 11:36:56 +0200 |
commit | 5334090be04e66da5cb5c3ad487604b3733c5cac (patch) | |
tree | 419217a587c2a15bc97b943acee11fcf7b559937 /tests/bookmark/LinkUtilsTest.php | |
parent | 4cf3564d28dc8e4d08a3e64f09ad045ffbde97ae (diff) | |
download | Shaarli-5334090be04e66da5cb5c3ad487604b3733c5cac.tar.gz Shaarli-5334090be04e66da5cb5c3ad487604b3733c5cac.tar.zst Shaarli-5334090be04e66da5cb5c3ad487604b3733c5cac.zip |
Improve metadata retrieval (performances and accuracy)
- Use dedicated function to download headers to avoid apply multiple regexps on headers
- Also try to extract title from meta tags
Diffstat (limited to 'tests/bookmark/LinkUtilsTest.php')
-rw-r--r-- | tests/bookmark/LinkUtilsTest.php | 223 |
1 files changed, 108 insertions, 115 deletions
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index 29941c8c..3321242f 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php | |||
@@ -216,60 +216,91 @@ class LinkUtilsTest extends TestCase | |||
216 | } | 216 | } |
217 | 217 | ||
218 | /** | 218 | /** |
219 | * Test the header callback with valid value | ||
220 | */ | ||
221 | public function testCurlHeaderCallbackOk(): void | ||
222 | { | ||
223 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok'); | ||
224 | $data = [ | ||
225 | 'HTTP/1.1 200 OK', | ||
226 | 'Server: GitHub.com', | ||
227 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
228 | 'Content-Type: text/html; charset=utf-8', | ||
229 | 'Status: 200 OK', | ||
230 | ]; | ||
231 | |||
232 | foreach ($data as $chunk) { | ||
233 | static::assertIsInt($callback(null, $chunk)); | ||
234 | } | ||
235 | |||
236 | static::assertSame('utf-8', $charset); | ||
237 | } | ||
238 | |||
239 | /** | ||
219 | * Test the download callback with valid value | 240 | * Test the download callback with valid value |
220 | */ | 241 | */ |
221 | public function testCurlDownloadCallbackOk() | 242 | public function testCurlDownloadCallbackOk(): void |
222 | { | 243 | { |
244 | $charset = 'utf-8'; | ||
223 | $callback = get_curl_download_callback( | 245 | $callback = get_curl_download_callback( |
224 | $charset, | 246 | $charset, |
225 | $title, | 247 | $title, |
226 | $desc, | 248 | $desc, |
227 | $keywords, | 249 | $keywords, |
228 | false, | 250 | false |
229 | 'ut_curl_getinfo_ok' | ||
230 | ); | 251 | ); |
252 | |||
231 | $data = [ | 253 | $data = [ |
232 | 'HTTP/1.1 200 OK', | 254 | 'th=device-width">' |
233 | 'Server: GitHub.com', | ||
234 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
235 | 'Content-Type: text/html; charset=utf-8', | ||
236 | 'Status: 200 OK', | ||
237 | 'end' => 'th=device-width">' | ||
238 | . '<title>Refactoring · GitHub</title>' | 255 | . '<title>Refactoring · GitHub</title>' |
239 | . '<link rel="search" type="application/opensea', | 256 | . '<link rel="search" type="application/opensea', |
240 | '<title>ignored</title>' | 257 | '<title>ignored</title>' |
241 | . '<meta name="description" content="desc" />' | 258 | . '<meta name="description" content="desc" />' |
242 | . '<meta name="keywords" content="key1,key2" />', | 259 | . '<meta name="keywords" content="key1,key2" />', |
243 | ]; | 260 | ]; |
244 | foreach ($data as $key => $line) { | 261 | |
245 | $ignore = null; | 262 | foreach ($data as $chunk) { |
246 | $expected = $key !== 'end' ? strlen($line) : false; | 263 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
247 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
248 | if ($expected === false) { | ||
249 | break; | ||
250 | } | ||
251 | } | 264 | } |
252 | $this->assertEquals('utf-8', $charset); | 265 | |
253 | $this->assertEquals('Refactoring · GitHub', $title); | 266 | static::assertSame('utf-8', $charset); |
254 | $this->assertEmpty($desc); | 267 | static::assertSame('Refactoring · GitHub', $title); |
255 | $this->assertEmpty($keywords); | 268 | static::assertEmpty($desc); |
269 | static::assertEmpty($keywords); | ||
270 | } | ||
271 | |||
272 | /** | ||
273 | * Test the header callback with valid value | ||
274 | */ | ||
275 | public function testCurlHeaderCallbackNoCharset(): void | ||
276 | { | ||
277 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset'); | ||
278 | $data = [ | ||
279 | 'HTTP/1.1 200 OK', | ||
280 | ]; | ||
281 | |||
282 | foreach ($data as $chunk) { | ||
283 | static::assertSame(strlen($chunk), $callback(null, $chunk)); | ||
284 | } | ||
285 | |||
286 | static::assertFalse($charset); | ||
256 | } | 287 | } |
257 | 288 | ||
258 | /** | 289 | /** |
259 | * Test the download callback with valid values and no charset | 290 | * Test the download callback with valid values and no charset |
260 | */ | 291 | */ |
261 | public function testCurlDownloadCallbackOkNoCharset() | 292 | public function testCurlDownloadCallbackOkNoCharset(): void |
262 | { | 293 | { |
294 | $charset = null; | ||
263 | $callback = get_curl_download_callback( | 295 | $callback = get_curl_download_callback( |
264 | $charset, | 296 | $charset, |
265 | $title, | 297 | $title, |
266 | $desc, | 298 | $desc, |
267 | $keywords, | 299 | $keywords, |
268 | false, | 300 | false |
269 | 'ut_curl_getinfo_no_charset' | ||
270 | ); | 301 | ); |
302 | |||
271 | $data = [ | 303 | $data = [ |
272 | 'HTTP/1.1 200 OK', | ||
273 | 'end' => 'th=device-width">' | 304 | 'end' => 'th=device-width">' |
274 | . '<title>Refactoring · GitHub</title>' | 305 | . '<title>Refactoring · GitHub</title>' |
275 | . '<link rel="search" type="application/opensea', | 306 | . '<link rel="search" type="application/opensea', |
@@ -277,10 +308,11 @@ class LinkUtilsTest extends TestCase | |||
277 | . '<meta name="description" content="desc" />' | 308 | . '<meta name="description" content="desc" />' |
278 | . '<meta name="keywords" content="key1,key2" />', | 309 | . '<meta name="keywords" content="key1,key2" />', |
279 | ]; | 310 | ]; |
280 | foreach ($data as $key => $line) { | 311 | |
281 | $ignore = null; | 312 | foreach ($data as $chunk) { |
282 | $this->assertEquals(strlen($line), $callback($ignore, $line)); | 313 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
283 | } | 314 | } |
315 | |||
284 | $this->assertEmpty($charset); | 316 | $this->assertEmpty($charset); |
285 | $this->assertEquals('Refactoring · GitHub', $title); | 317 | $this->assertEquals('Refactoring · GitHub', $title); |
286 | $this->assertEmpty($desc); | 318 | $this->assertEmpty($desc); |
@@ -290,18 +322,18 @@ class LinkUtilsTest extends TestCase | |||
290 | /** | 322 | /** |
291 | * Test the download callback with valid values and no charset | 323 | * Test the download callback with valid values and no charset |
292 | */ | 324 | */ |
293 | public function testCurlDownloadCallbackOkHtmlCharset() | 325 | public function testCurlDownloadCallbackOkHtmlCharset(): void |
294 | { | 326 | { |
327 | $charset = null; | ||
295 | $callback = get_curl_download_callback( | 328 | $callback = get_curl_download_callback( |
296 | $charset, | 329 | $charset, |
297 | $title, | 330 | $title, |
298 | $desc, | 331 | $desc, |
299 | $keywords, | 332 | $keywords, |
300 | false, | 333 | false |
301 | 'ut_curl_getinfo_no_charset' | ||
302 | ); | 334 | ); |
335 | |||
303 | $data = [ | 336 | $data = [ |
304 | 'HTTP/1.1 200 OK', | ||
305 | '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', | 337 | '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', |
306 | 'end' => 'th=device-width">' | 338 | 'end' => 'th=device-width">' |
307 | . '<title>Refactoring · GitHub</title>' | 339 | . '<title>Refactoring · GitHub</title>' |
@@ -310,14 +342,10 @@ class LinkUtilsTest extends TestCase | |||
310 | . '<meta name="description" content="desc" />' | 342 | . '<meta name="description" content="desc" />' |
311 | . '<meta name="keywords" content="key1,key2" />', | 343 | . '<meta name="keywords" content="key1,key2" />', |
312 | ]; | 344 | ]; |
313 | foreach ($data as $key => $line) { | 345 | foreach ($data as $chunk) { |
314 | $ignore = null; | 346 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
315 | $expected = $key !== 'end' ? strlen($line) : false; | ||
316 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
317 | if ($expected === false) { | ||
318 | break; | ||
319 | } | ||
320 | } | 347 | } |
348 | |||
321 | $this->assertEquals('utf-8', $charset); | 349 | $this->assertEquals('utf-8', $charset); |
322 | $this->assertEquals('Refactoring · GitHub', $title); | 350 | $this->assertEquals('Refactoring · GitHub', $title); |
323 | $this->assertEmpty($desc); | 351 | $this->assertEmpty($desc); |
@@ -327,25 +355,26 @@ class LinkUtilsTest extends TestCase | |||
327 | /** | 355 | /** |
328 | * Test the download callback with valid values and no title | 356 | * Test the download callback with valid values and no title |
329 | */ | 357 | */ |
330 | public function testCurlDownloadCallbackOkNoTitle() | 358 | public function testCurlDownloadCallbackOkNoTitle(): void |
331 | { | 359 | { |
360 | $charset = 'utf-8'; | ||
332 | $callback = get_curl_download_callback( | 361 | $callback = get_curl_download_callback( |
333 | $charset, | 362 | $charset, |
334 | $title, | 363 | $title, |
335 | $desc, | 364 | $desc, |
336 | $keywords, | 365 | $keywords, |
337 | false, | 366 | false |
338 | 'ut_curl_getinfo_ok' | ||
339 | ); | 367 | ); |
368 | |||
340 | $data = [ | 369 | $data = [ |
341 | 'HTTP/1.1 200 OK', | ||
342 | 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea', | 370 | 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea', |
343 | 'ignored', | 371 | 'ignored', |
344 | ]; | 372 | ]; |
345 | foreach ($data as $key => $line) { | 373 | |
346 | $ignore = null; | 374 | foreach ($data as $chunk) { |
347 | $this->assertEquals(strlen($line), $callback($ignore, $line)); | 375 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
348 | } | 376 | } |
377 | |||
349 | $this->assertEquals('utf-8', $charset); | 378 | $this->assertEquals('utf-8', $charset); |
350 | $this->assertEmpty($title); | 379 | $this->assertEmpty($title); |
351 | $this->assertEmpty($desc); | 380 | $this->assertEmpty($desc); |
@@ -353,81 +382,55 @@ class LinkUtilsTest extends TestCase | |||
353 | } | 382 | } |
354 | 383 | ||
355 | /** | 384 | /** |
356 | * Test the download callback with an invalid content type. | 385 | * Test the header callback with an invalid content type. |
357 | */ | 386 | */ |
358 | public function testCurlDownloadCallbackInvalidContentType() | 387 | public function testCurlHeaderCallbackInvalidContentType(): void |
359 | { | 388 | { |
360 | $callback = get_curl_download_callback( | 389 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ct_ko'); |
361 | $charset, | 390 | $data = [ |
362 | $title, | 391 | 'HTTP/1.1 200 OK', |
363 | $desc, | 392 | ]; |
364 | $keywords, | 393 | |
365 | false, | 394 | static::assertFalse($callback(null, $data[0])); |
366 | 'ut_curl_getinfo_ct_ko' | 395 | static::assertNull($charset); |
367 | ); | ||
368 | $ignore = null; | ||
369 | $this->assertFalse($callback($ignore, '')); | ||
370 | $this->assertEmpty($charset); | ||
371 | $this->assertEmpty($title); | ||
372 | } | 396 | } |
373 | 397 | ||
374 | /** | 398 | /** |
375 | * Test the download callback with an invalid response code. | 399 | * Test the header callback with an invalid response code. |
376 | */ | 400 | */ |
377 | public function testCurlDownloadCallbackInvalidResponseCode() | 401 | public function testCurlHeaderCallbackInvalidResponseCode(): void |
378 | { | 402 | { |
379 | $callback = $callback = get_curl_download_callback( | 403 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rc_ko'); |
380 | $charset, | 404 | |
381 | $title, | 405 | static::assertFalse($callback(null, '')); |
382 | $desc, | 406 | static::assertNull($charset); |
383 | $keywords, | ||
384 | false, | ||
385 | 'ut_curl_getinfo_rc_ko' | ||
386 | ); | ||
387 | $ignore = null; | ||
388 | $this->assertFalse($callback($ignore, '')); | ||
389 | $this->assertEmpty($charset); | ||
390 | $this->assertEmpty($title); | ||
391 | } | 407 | } |
392 | 408 | ||
393 | /** | 409 | /** |
394 | * Test the download callback with an invalid content type and response code. | 410 | * Test the header callback with an invalid content type and response code. |
395 | */ | 411 | */ |
396 | public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode() | 412 | public function testCurlHeaderCallbackInvalidContentTypeAndResponseCode(): void |
397 | { | 413 | { |
398 | $callback = $callback = get_curl_download_callback( | 414 | $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rs_ct_ko'); |
399 | $charset, | 415 | |
400 | $title, | 416 | static::assertFalse($callback(null, '')); |
401 | $desc, | 417 | static::assertNull($charset); |
402 | $keywords, | ||
403 | false, | ||
404 | 'ut_curl_getinfo_rs_ct_ko' | ||
405 | ); | ||
406 | $ignore = null; | ||
407 | $this->assertFalse($callback($ignore, '')); | ||
408 | $this->assertEmpty($charset); | ||
409 | $this->assertEmpty($title); | ||
410 | } | 418 | } |
411 | 419 | ||
412 | /** | 420 | /** |
413 | * Test the download callback with valid value, and retrieve_description option enabled. | 421 | * Test the download callback with valid value, and retrieve_description option enabled. |
414 | */ | 422 | */ |
415 | public function testCurlDownloadCallbackOkWithDesc() | 423 | public function testCurlDownloadCallbackOkWithDesc(): void |
416 | { | 424 | { |
425 | $charset = 'utf-8'; | ||
417 | $callback = get_curl_download_callback( | 426 | $callback = get_curl_download_callback( |
418 | $charset, | 427 | $charset, |
419 | $title, | 428 | $title, |
420 | $desc, | 429 | $desc, |
421 | $keywords, | 430 | $keywords, |
422 | true, | 431 | true |
423 | 'ut_curl_getinfo_ok' | ||
424 | ); | 432 | ); |
425 | $data = [ | 433 | $data = [ |
426 | 'HTTP/1.1 200 OK', | ||
427 | 'Server: GitHub.com', | ||
428 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
429 | 'Content-Type: text/html; charset=utf-8', | ||
430 | 'Status: 200 OK', | ||
431 | 'th=device-width">' | 434 | 'th=device-width">' |
432 | . '<title>Refactoring · GitHub</title>' | 435 | . '<title>Refactoring · GitHub</title>' |
433 | . '<link rel="search" type="application/opensea', | 436 | . '<link rel="search" type="application/opensea', |
@@ -435,14 +438,11 @@ class LinkUtilsTest extends TestCase | |||
435 | . '<meta name="description" content="link desc" />' | 438 | . '<meta name="description" content="link desc" />' |
436 | . '<meta name="keywords" content="key1,key2" />', | 439 | . '<meta name="keywords" content="key1,key2" />', |
437 | ]; | 440 | ]; |
438 | foreach ($data as $key => $line) { | 441 | |
439 | $ignore = null; | 442 | foreach ($data as $chunk) { |
440 | $expected = $key !== 'end' ? strlen($line) : false; | 443 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
441 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
442 | if ($expected === false) { | ||
443 | break; | ||
444 | } | ||
445 | } | 444 | } |
445 | |||
446 | $this->assertEquals('utf-8', $charset); | 446 | $this->assertEquals('utf-8', $charset); |
447 | $this->assertEquals('Refactoring · GitHub', $title); | 447 | $this->assertEquals('Refactoring · GitHub', $title); |
448 | $this->assertEquals('link desc', $desc); | 448 | $this->assertEquals('link desc', $desc); |
@@ -453,8 +453,9 @@ class LinkUtilsTest extends TestCase | |||
453 | * Test the download callback with valid value, and retrieve_description option enabled, | 453 | * Test the download callback with valid value, and retrieve_description option enabled, |
454 | * but no desc or keyword defined in the page. | 454 | * but no desc or keyword defined in the page. |
455 | */ | 455 | */ |
456 | public function testCurlDownloadCallbackOkWithDescNotFound() | 456 | public function testCurlDownloadCallbackOkWithDescNotFound(): void |
457 | { | 457 | { |
458 | $charset = 'utf-8'; | ||
458 | $callback = get_curl_download_callback( | 459 | $callback = get_curl_download_callback( |
459 | $charset, | 460 | $charset, |
460 | $title, | 461 | $title, |
@@ -464,24 +465,16 @@ class LinkUtilsTest extends TestCase | |||
464 | 'ut_curl_getinfo_ok' | 465 | 'ut_curl_getinfo_ok' |
465 | ); | 466 | ); |
466 | $data = [ | 467 | $data = [ |
467 | 'HTTP/1.1 200 OK', | ||
468 | 'Server: GitHub.com', | ||
469 | 'Date: Sat, 28 Oct 2017 12:01:33 GMT', | ||
470 | 'Content-Type: text/html; charset=utf-8', | ||
471 | 'Status: 200 OK', | ||
472 | 'th=device-width">' | 468 | 'th=device-width">' |
473 | . '<title>Refactoring · GitHub</title>' | 469 | . '<title>Refactoring · GitHub</title>' |
474 | . '<link rel="search" type="application/opensea', | 470 | . '<link rel="search" type="application/opensea', |
475 | 'end' => '<title>ignored</title>', | 471 | 'end' => '<title>ignored</title>', |
476 | ]; | 472 | ]; |
477 | foreach ($data as $key => $line) { | 473 | |
478 | $ignore = null; | 474 | foreach ($data as $chunk) { |
479 | $expected = $key !== 'end' ? strlen($line) : false; | 475 | static::assertSame(strlen($chunk), $callback(null, $chunk)); |
480 | $this->assertEquals($expected, $callback($ignore, $line)); | ||
481 | if ($expected === false) { | ||
482 | break; | ||
483 | } | ||
484 | } | 476 | } |
477 | |||
485 | $this->assertEquals('utf-8', $charset); | 478 | $this->assertEquals('utf-8', $charset); |
486 | $this->assertEquals('Refactoring · GitHub', $title); | 479 | $this->assertEquals('Refactoring · GitHub', $title); |
487 | $this->assertEmpty($desc); | 480 | $this->assertEmpty($desc); |