From 1557cefbd76257ceb830f65806831b490faf0acc Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Mon, 4 Jan 2016 10:45:54 +0100 Subject: Fixes #410 - Retrieve title fails in multiple cases * `get_http_url()` renamed to `get_http_response()`. * Use the same HTTP context to retrieve response headers and content. * Follow HTTP 301 and 302 redirections to retrieve the title (default max 3 redirections). * Add `LinkUtils` to extract titles and charset. * Try to retrieve charset from HTTP headers first (new), then HTML content. * Use mb_string to re-encode title if necessary. --- tests/HttpUtils/GetHttpUrlTest.php | 26 ++++++------ tests/LinkUtilsTest.php | 85 ++++++++++++++++++++++++++++++++++++++ tests/Url/UrlTest.php | 18 ++++++++ 3 files changed, 116 insertions(+), 13 deletions(-) create mode 100644 tests/LinkUtilsTest.php (limited to 'tests') diff --git a/tests/HttpUtils/GetHttpUrlTest.php b/tests/HttpUtils/GetHttpUrlTest.php index 76092b80..fd293505 100644 --- a/tests/HttpUtils/GetHttpUrlTest.php +++ b/tests/HttpUtils/GetHttpUrlTest.php @@ -6,7 +6,7 @@ require_once 'application/HttpUtils.php'; /** - * Unitary tests for get_http_url() + * Unitary tests for get_http_response() */ class GetHttpUrlTest extends PHPUnit_Framework_TestCase { @@ -15,12 +15,15 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase */ public function testGetInvalidLocalUrl() { - list($headers, $content) = get_http_url('/non/existent', 1); - $this->assertEquals('HTTP Error', $headers[0]); - $this->assertRegexp( - '/failed to open stream: No such file or directory/', - $content - ); + // Local + list($headers, $content) = get_http_response('/non/existent', 1); + $this->assertEquals('Invalid HTTP Url', $headers[0]); + $this->assertFalse($content); + + // Non HTTP + list($headers, $content) = get_http_response('ftp://save.tld/mysave', 1); + $this->assertEquals('Invalid HTTP Url', $headers[0]); + $this->assertFalse($content); } /** @@ -28,11 +31,8 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase */ public function testGetInvalidRemoteUrl() { - list($headers, $content) = get_http_url('http://non.existent', 1); - $this->assertEquals('HTTP Error', $headers[0]); - $this->assertRegexp( - '/Name or service not known/', - $content - ); + list($headers, $content) = @get_http_response('http://non.existent', 1); + $this->assertFalse($headers); + $this->assertFalse($content); } } diff --git a/tests/LinkUtilsTest.php b/tests/LinkUtilsTest.php new file mode 100644 index 00000000..c2257590 --- /dev/null +++ b/tests/LinkUtilsTest.php @@ -0,0 +1,85 @@ +stuff'. $title .''; + $this->assertEquals($title, html_extract_title($html)); + } + + /** + * Test html_extract_title() when the title is not found. + */ + public function testHtmlExtractNonExistentTitle() + { + $html = 'stuff'; + $this->assertFalse(html_extract_title($html)); + } + + /** + * Test get_charset() with all priorities. + */ + public function testGetCharset() + { + $headers = array('Content-Type' => 'text/html; charset=Headers'); + $html = 'stuff'; + $default = 'default'; + $this->assertEquals('headers', get_charset($headers, $html, $default)); + $this->assertEquals('html', get_charset(array(), $html, $default)); + $this->assertEquals($default, get_charset(array(), '', $default)); + $this->assertEquals('utf-8', get_charset(array(), '')); + } + + /** + * Test headers_extract_charset() when the charset is found. + */ + public function testHeadersExtractExistentCharset() + { + $charset = 'x-MacCroatian'; + $headers = array('Content-Type' => 'text/html; charset='. $charset); + $this->assertEquals(strtolower($charset), headers_extract_charset($headers)); + } + + /** + * Test headers_extract_charset() when the charset is not found. + */ + public function testHeadersExtractNonExistentCharset() + { + $headers = array(); + $this->assertFalse(headers_extract_charset($headers)); + + $headers = array('Content-Type' => 'text/html'); + $this->assertFalse(headers_extract_charset($headers)); + } + + /** + * Test html_extract_charset() when the charset is found. + */ + public function testHtmlExtractExistentCharset() + { + $charset = 'x-MacCroatian'; + $html = 'stuff2'; + $this->assertEquals(strtolower($charset), html_extract_charset($html)); + } + + /** + * Test html_extract_charset() when the charset is not found. + */ + public function testHtmlExtractNonExistentCharset() + { + $html = 'stuff'; + $this->assertFalse(html_extract_charset($html)); + $html = 'stuff'; + $this->assertFalse(html_extract_charset($html)); + } +} diff --git a/tests/Url/UrlTest.php b/tests/Url/UrlTest.php index af6daaa4..425327ed 100644 --- a/tests/Url/UrlTest.php +++ b/tests/Url/UrlTest.php @@ -156,4 +156,22 @@ class UrlTest extends PHPUnit_Framework_TestCase $this->assertEquals($strOn, add_trailing_slash($strOn)); $this->assertEquals($strOn, add_trailing_slash($strOff)); } + + /** + * Test valid HTTP url. + */ + function testUrlIsHttp() + { + $url = new Url(self::$baseUrl); + $this->assertTrue($url->isHttp()); + } + + /** + * Test non HTTP url. + */ + function testUrlIsNotHttp() + { + $url = new Url('ftp://save.tld/mysave'); + $this->assertFalse($url->isHttp()); + } } -- cgit v1.2.3