From 1557cefbd76257ceb830f65806831b490faf0acc Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Mon, 4 Jan 2016 10:45:54 +0100 Subject: Fixes #410 - Retrieve title fails in multiple cases * `get_http_url()` renamed to `get_http_response()`. * Use the same HTTP context to retrieve response headers and content. * Follow HTTP 301 and 302 redirections to retrieve the title (default max 3 redirections). * Add `LinkUtils` to extract titles and charset. * Try to retrieve charset from HTTP headers first (new), then HTML content. * Use mb_string to re-encode title if necessary. --- tests/LinkUtilsTest.php | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 tests/LinkUtilsTest.php (limited to 'tests/LinkUtilsTest.php') diff --git a/tests/LinkUtilsTest.php b/tests/LinkUtilsTest.php new file mode 100644 index 00000000..c2257590 --- /dev/null +++ b/tests/LinkUtilsTest.php @@ -0,0 +1,85 @@ +stuff'. $title .''; + $this->assertEquals($title, html_extract_title($html)); + } + + /** + * Test html_extract_title() when the title is not found. + */ + public function testHtmlExtractNonExistentTitle() + { + $html = 'stuff'; + $this->assertFalse(html_extract_title($html)); + } + + /** + * Test get_charset() with all priorities. + */ + public function testGetCharset() + { + $headers = array('Content-Type' => 'text/html; charset=Headers'); + $html = 'stuff'; + $default = 'default'; + $this->assertEquals('headers', get_charset($headers, $html, $default)); + $this->assertEquals('html', get_charset(array(), $html, $default)); + $this->assertEquals($default, get_charset(array(), '', $default)); + $this->assertEquals('utf-8', get_charset(array(), '')); + } + + /** + * Test headers_extract_charset() when the charset is found. + */ + public function testHeadersExtractExistentCharset() + { + $charset = 'x-MacCroatian'; + $headers = array('Content-Type' => 'text/html; charset='. $charset); + $this->assertEquals(strtolower($charset), headers_extract_charset($headers)); + } + + /** + * Test headers_extract_charset() when the charset is not found. + */ + public function testHeadersExtractNonExistentCharset() + { + $headers = array(); + $this->assertFalse(headers_extract_charset($headers)); + + $headers = array('Content-Type' => 'text/html'); + $this->assertFalse(headers_extract_charset($headers)); + } + + /** + * Test html_extract_charset() when the charset is found. + */ + public function testHtmlExtractExistentCharset() + { + $charset = 'x-MacCroatian'; + $html = 'stuff2'; + $this->assertEquals(strtolower($charset), html_extract_charset($html)); + } + + /** + * Test html_extract_charset() when the charset is not found. + */ + public function testHtmlExtractNonExistentCharset() + { + $html = 'stuff'; + $this->assertFalse(html_extract_charset($html)); + $html = 'stuff'; + $this->assertFalse(html_extract_charset($html)); + } +} -- cgit v1.2.3