From 1557cefbd76257ceb830f65806831b490faf0acc Mon Sep 17 00:00:00 2001
From: ArthurHoaro <arthur@hoa.ro>
Date: Mon, 4 Jan 2016 10:45:54 +0100
Subject: Fixes #410 - Retrieve title fails in multiple cases

  * `get_http_url()` renamed to `get_http_response()`.
  * Use the same HTTP context to retrieve response headers and content.
  * Follow HTTP 301 and 302 redirections to retrieve the title (default max 3 redirections).
  * Add `LinkUtils` to extract titles and charset.
  * Try to retrieve charset from HTTP headers first (new), then HTML content.
  * Use mb_string to re-encode title if necessary.
---
 tests/HttpUtils/GetHttpUrlTest.php | 26 ++++++------
 tests/LinkUtilsTest.php            | 85 ++++++++++++++++++++++++++++++++++++++
 tests/Url/UrlTest.php              | 18 ++++++++
 3 files changed, 116 insertions(+), 13 deletions(-)
 create mode 100644 tests/LinkUtilsTest.php

(limited to 'tests')

diff --git a/tests/HttpUtils/GetHttpUrlTest.php b/tests/HttpUtils/GetHttpUrlTest.php
index 76092b80..fd293505 100644
--- a/tests/HttpUtils/GetHttpUrlTest.php
+++ b/tests/HttpUtils/GetHttpUrlTest.php
@@ -6,7 +6,7 @@
 require_once 'application/HttpUtils.php';
 
 /**
- * Unitary tests for get_http_url()
+ * Unitary tests for get_http_response()
  */
 class GetHttpUrlTest extends PHPUnit_Framework_TestCase
 {
@@ -15,12 +15,15 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
      */
     public function testGetInvalidLocalUrl()
     {
-        list($headers, $content) = get_http_url('/non/existent', 1);
-        $this->assertEquals('HTTP Error', $headers[0]);
-        $this->assertRegexp(
-            '/failed to open stream: No such file or directory/',
-            $content
-        );
+        // Local
+        list($headers, $content) = get_http_response('/non/existent', 1);
+        $this->assertEquals('Invalid HTTP Url', $headers[0]);
+        $this->assertFalse($content);
+
+        // Non HTTP
+        list($headers, $content) = get_http_response('ftp://save.tld/mysave', 1);
+        $this->assertEquals('Invalid HTTP Url', $headers[0]);
+        $this->assertFalse($content);
     }
 
     /**
@@ -28,11 +31,8 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
      */
     public function testGetInvalidRemoteUrl()
     {
-        list($headers, $content) = get_http_url('http://non.existent', 1);
-        $this->assertEquals('HTTP Error', $headers[0]);
-        $this->assertRegexp(
-            '/Name or service not known/',
-            $content
-        );
+        list($headers, $content) = @get_http_response('http://non.existent', 1);
+        $this->assertFalse($headers);
+        $this->assertFalse($content);
     }
 }
diff --git a/tests/LinkUtilsTest.php b/tests/LinkUtilsTest.php
new file mode 100644
index 00000000..c2257590
--- /dev/null
+++ b/tests/LinkUtilsTest.php
@@ -0,0 +1,85 @@
+<?php
+
+require_once 'application/LinkUtils.php';
+
+/**
+* Class LinkUtilsTest.
+*/
+class LinkUtilsTest extends PHPUnit_Framework_TestCase
+{
+    /**
+     * Test html_extract_title() when the title is found.
+     */
+    public function testHtmlExtractExistentTitle()
+    {
+        $title = 'Read me please.';
+        $html = '<html><meta>stuff</meta><title>'. $title .'</title></html>';
+        $this->assertEquals($title, html_extract_title($html));
+    }
+
+    /**
+     * Test html_extract_title() when the title is not found.
+     */
+    public function testHtmlExtractNonExistentTitle()
+    {
+        $html = '<html><meta>stuff</meta></html>';
+        $this->assertFalse(html_extract_title($html));
+    }
+
+    /**
+     * Test get_charset() with all priorities.
+     */
+    public function testGetCharset()
+    {
+        $headers = array('Content-Type' => 'text/html; charset=Headers');
+        $html = '<html><meta>stuff</meta><meta charset="Html"/></html>';
+        $default = 'default';
+        $this->assertEquals('headers', get_charset($headers, $html, $default));
+        $this->assertEquals('html', get_charset(array(), $html, $default));
+        $this->assertEquals($default, get_charset(array(), '', $default));
+        $this->assertEquals('utf-8', get_charset(array(), ''));
+    }
+
+    /**
+     * Test headers_extract_charset() when the charset is found.
+     */
+    public function testHeadersExtractExistentCharset()
+    {
+        $charset = 'x-MacCroatian';
+        $headers = array('Content-Type' => 'text/html; charset='. $charset);
+        $this->assertEquals(strtolower($charset), headers_extract_charset($headers));
+    }
+
+    /**
+     * Test headers_extract_charset() when the charset is not found.
+     */
+    public function testHeadersExtractNonExistentCharset()
+    {
+        $headers = array();
+        $this->assertFalse(headers_extract_charset($headers));
+
+        $headers = array('Content-Type' => 'text/html');
+        $this->assertFalse(headers_extract_charset($headers));
+    }
+
+    /**
+     * Test html_extract_charset() when the charset is found.
+     */
+    public function testHtmlExtractExistentCharset()
+    {
+        $charset = 'x-MacCroatian';
+        $html = '<html><meta>stuff2</meta><meta charset="'. $charset .'"/></html>';
+        $this->assertEquals(strtolower($charset), html_extract_charset($html));
+    }
+
+    /**
+     * Test html_extract_charset() when the charset is not found.
+     */
+    public function testHtmlExtractNonExistentCharset()
+    {
+        $html = '<html><meta>stuff</meta></html>';
+        $this->assertFalse(html_extract_charset($html));
+        $html = '<html><meta>stuff</meta><meta charset=""/></html>';
+        $this->assertFalse(html_extract_charset($html));
+    }
+}
diff --git a/tests/Url/UrlTest.php b/tests/Url/UrlTest.php
index af6daaa4..425327ed 100644
--- a/tests/Url/UrlTest.php
+++ b/tests/Url/UrlTest.php
@@ -156,4 +156,22 @@ class UrlTest extends PHPUnit_Framework_TestCase
         $this->assertEquals($strOn, add_trailing_slash($strOn));
         $this->assertEquals($strOn, add_trailing_slash($strOff));
     }
+
+    /**
+     * Test valid HTTP url.
+     */
+    function testUrlIsHttp()
+    {
+        $url = new Url(self::$baseUrl);
+        $this->assertTrue($url->isHttp());
+    }
+
+    /**
+     * Test non HTTP url.
+     */
+    function testUrlIsNotHttp()
+    {
+        $url = new Url('ftp://save.tld/mysave');
+        $this->assertFalse($url->isHttp());
+    }
 }
-- 
cgit v1.2.3