From 5334090be04e66da5cb5c3ad487604b3733c5cac Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Thu, 15 Oct 2020 11:20:33 +0200 Subject: Improve metadata retrieval (performances and accuracy) - Use dedicated function to download headers to avoid apply multiple regexps on headers - Also try to extract title from meta tags --- tests/bookmark/LinkUtilsTest.php | 223 +++++++++++++++++------------------ tests/http/MetadataRetrieverTest.php | 45 +++++-- 2 files changed, 146 insertions(+), 122 deletions(-) (limited to 'tests') diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index 29941c8c..3321242f 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php @@ -215,61 +215,92 @@ class LinkUtilsTest extends TestCase $this->assertFalse(html_extract_tag('description', $html)); } + /** + * Test the header callback with valid value + */ + public function testCurlHeaderCallbackOk(): void + { + $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok'); + $data = [ + 'HTTP/1.1 200 OK', + 'Server: GitHub.com', + 'Date: Sat, 28 Oct 2017 12:01:33 GMT', + 'Content-Type: text/html; charset=utf-8', + 'Status: 200 OK', + ]; + + foreach ($data as $chunk) { + static::assertIsInt($callback(null, $chunk)); + } + + static::assertSame('utf-8', $charset); + } + /** * Test the download callback with valid value */ - public function testCurlDownloadCallbackOk() + public function testCurlDownloadCallbackOk(): void { + $charset = 'utf-8'; $callback = get_curl_download_callback( $charset, $title, $desc, $keywords, - false, - 'ut_curl_getinfo_ok' + false ); + $data = [ - 'HTTP/1.1 200 OK', - 'Server: GitHub.com', - 'Date: Sat, 28 Oct 2017 12:01:33 GMT', - 'Content-Type: text/html; charset=utf-8', - 'Status: 200 OK', - 'end' => 'th=device-width">' + 'th=device-width">' . 'Refactoring · GitHub' . '' . '', ]; - foreach ($data as $key => $line) { - $ignore = null; - $expected = $key !== 'end' ? strlen($line) : false; - $this->assertEquals($expected, $callback($ignore, $line)); - if ($expected === false) { - break; - } + + foreach ($data as $chunk) { + static::assertSame(strlen($chunk), $callback(null, $chunk)); } - $this->assertEquals('utf-8', $charset); - $this->assertEquals('Refactoring · GitHub', $title); - $this->assertEmpty($desc); - $this->assertEmpty($keywords); + + static::assertSame('utf-8', $charset); + static::assertSame('Refactoring · GitHub', $title); + static::assertEmpty($desc); + static::assertEmpty($keywords); + } + + /** + * Test the header callback with valid value + */ + public function testCurlHeaderCallbackNoCharset(): void + { + $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset'); + $data = [ + 'HTTP/1.1 200 OK', + ]; + + foreach ($data as $chunk) { + static::assertSame(strlen($chunk), $callback(null, $chunk)); + } + + static::assertFalse($charset); } /** * Test the download callback with valid values and no charset */ - public function testCurlDownloadCallbackOkNoCharset() + public function testCurlDownloadCallbackOkNoCharset(): void { + $charset = null; $callback = get_curl_download_callback( $charset, $title, $desc, $keywords, - false, - 'ut_curl_getinfo_no_charset' + false ); + $data = [ - 'HTTP/1.1 200 OK', 'end' => 'th=device-width">' . 'Refactoring · GitHub' . '' . '', ]; - foreach ($data as $key => $line) { - $ignore = null; - $this->assertEquals(strlen($line), $callback($ignore, $line)); + + foreach ($data as $chunk) { + static::assertSame(strlen($chunk), $callback(null, $chunk)); } + $this->assertEmpty($charset); $this->assertEquals('Refactoring · GitHub', $title); $this->assertEmpty($desc); @@ -290,18 +322,18 @@ class LinkUtilsTest extends TestCase /** * Test the download callback with valid values and no charset */ - public function testCurlDownloadCallbackOkHtmlCharset() + public function testCurlDownloadCallbackOkHtmlCharset(): void { + $charset = null; $callback = get_curl_download_callback( $charset, $title, $desc, $keywords, - false, - 'ut_curl_getinfo_no_charset' + false ); + $data = [ - 'HTTP/1.1 200 OK', '', 'end' => 'th=device-width">' . 'Refactoring · GitHub' @@ -310,14 +342,10 @@ class LinkUtilsTest extends TestCase . '' . '', ]; - foreach ($data as $key => $line) { - $ignore = null; - $expected = $key !== 'end' ? strlen($line) : false; - $this->assertEquals($expected, $callback($ignore, $line)); - if ($expected === false) { - break; - } + foreach ($data as $chunk) { + static::assertSame(strlen($chunk), $callback(null, $chunk)); } + $this->assertEquals('utf-8', $charset); $this->assertEquals('Refactoring · GitHub', $title); $this->assertEmpty($desc); @@ -327,25 +355,26 @@ class LinkUtilsTest extends TestCase /** * Test the download callback with valid values and no title */ - public function testCurlDownloadCallbackOkNoTitle() + public function testCurlDownloadCallbackOkNoTitle(): void { + $charset = 'utf-8'; $callback = get_curl_download_callback( $charset, $title, $desc, $keywords, - false, - 'ut_curl_getinfo_ok' + false ); + $data = [ - 'HTTP/1.1 200 OK', 'end' => 'th=device-width">Refactoring · GitHub' . 'Refactoring · GitHub' . '' . '', ]; - foreach ($data as $key => $line) { - $ignore = null; - $expected = $key !== 'end' ? strlen($line) : false; - $this->assertEquals($expected, $callback($ignore, $line)); - if ($expected === false) { - break; - } + + foreach ($data as $chunk) { + static::assertSame(strlen($chunk), $callback(null, $chunk)); } + $this->assertEquals('utf-8', $charset); $this->assertEquals('Refactoring · GitHub', $title); $this->assertEquals('link desc', $desc); @@ -453,8 +453,9 @@ class LinkUtilsTest extends TestCase * Test the download callback with valid value, and retrieve_description option enabled, * but no desc or keyword defined in the page. */ - public function testCurlDownloadCallbackOkWithDescNotFound() + public function testCurlDownloadCallbackOkWithDescNotFound(): void { + $charset = 'utf-8'; $callback = get_curl_download_callback( $charset, $title, @@ -464,24 +465,16 @@ class LinkUtilsTest extends TestCase 'ut_curl_getinfo_ok' ); $data = [ - 'HTTP/1.1 200 OK', - 'Server: GitHub.com', - 'Date: Sat, 28 Oct 2017 12:01:33 GMT', - 'Content-Type: text/html; charset=utf-8', - 'Status: 200 OK', 'th=device-width">' . 'Refactoring · GitHub' . '