]> git.immae.eu Git - github/shaarli/Shaarli.git/blobdiff - tests/bookmark/LinkUtilsTest.php
Fix an issue truncating extracted metadata content
[github/shaarli/Shaarli.git] / tests / bookmark / LinkUtilsTest.php
index cc7819bcd767825318d0b7ce7dd5e8f69883ecfe..9bddf84b3f02939ec9afb479948eaee368d4a1ff 100644 (file)
@@ -2,7 +2,7 @@
 
 namespace Shaarli\Bookmark;
 
-use PHPUnit\Framework\TestCase;
+use Shaarli\TestCase;
 
 require_once 'tests/utils/CurlUtils.php';
 
@@ -42,6 +42,19 @@ class LinkUtilsTest extends TestCase
         $this->assertEquals(strtolower($charset), header_extract_charset($headers));
     }
 
+    /**
+     * Test headers_extract_charset() when the charset is found with odd quotes.
+     */
+    public function testHeadersExtractExistentCharsetWithQuotes()
+    {
+        $charset = 'x-MacCroatian';
+        $headers = 'text/html; charset="' . $charset . '"otherstuff="test"';
+        $this->assertEquals(strtolower($charset), header_extract_charset($headers));
+
+        $headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"';
+        $this->assertEquals(strtolower($charset), header_extract_charset($headers));
+    }
+
     /**
      * Test headers_extract_charset() when the charset is not found.
      */
@@ -155,6 +168,36 @@ class LinkUtilsTest extends TestCase
         $this->assertEquals($description, html_extract_tag('description', $html));
     }
 
+    /**
+     * Test html_extract_tag() with double quoted content containing single quote, and the opposite.
+     */
+    public function testHtmlExtractExistentNameTagWithMixedQuotes(): void
+    {
+        $description = 'Bob and Alice share M&M\'s.';
+
+        $html = '<meta property="og:description" content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
+            'tag2="content2" content="' . $description . '" tag3="content3">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        $html = '<meta property="og:description" name="description" content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        $description = 'Bob and Alice share "cookies".';
+
+        $html = '<meta property="og:description" content=\'' . $description . '\'>';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
+            'tag2="content2" content=\'' . $description . '\' tag3="content3">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        $html = '<meta property="og:description" name="description" content=\'' . $description . '\'>';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+    }
+
     /**
      * Test html_extract_tag() when the tag <meta name= is not found.
      */
@@ -202,61 +245,92 @@ class LinkUtilsTest extends TestCase
         $this->assertFalse(html_extract_tag('description', $html));
     }
 
+    /**
+     * Test the header callback with valid value
+     */
+    public function testCurlHeaderCallbackOk(): void
+    {
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok');
+        $data = [
+            'HTTP/1.1 200 OK',
+            'Server: GitHub.com',
+            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
+            'Content-Type: text/html; charset=utf-8',
+            'Status: 200 OK',
+        ];
+
+        foreach ($data as $chunk) {
+            static::assertIsInt($callback(null, $chunk));
+        }
+
+        static::assertSame('utf-8', $charset);
+    }
+
     /**
      * Test the download callback with valid value
      */
-    public function testCurlDownloadCallbackOk()
+    public function testCurlDownloadCallbackOk(): void
     {
+        $charset = 'utf-8';
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            false,
-            'ut_curl_getinfo_ok'
+            false
         );
+
         $data = [
-            'HTTP/1.1 200 OK',
-            'Server: GitHub.com',
-            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
-            'Content-Type: text/html; charset=utf-8',
-            'Status: 200 OK',
-            'end' => 'th=device-width">'
+            'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
             '<title>ignored</title>'
                 . '<meta name="description" content="desc" />'
                 . '<meta name="keywords" content="key1,key2" />',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $expected = $key !== 'end' ? strlen($line) : false;
-            $this->assertEquals($expected, $callback($ignore, $line));
-            if ($expected === false) {
-                break;
-            }
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
-        $this->assertEquals('utf-8', $charset);
-        $this->assertEquals('Refactoring · GitHub', $title);
-        $this->assertEmpty($desc);
-        $this->assertEmpty($keywords);
+
+        static::assertSame('utf-8', $charset);
+        static::assertSame('Refactoring · GitHub', $title);
+        static::assertEmpty($desc);
+        static::assertEmpty($keywords);
+    }
+
+    /**
+     * Test the header callback with valid value
+     */
+    public function testCurlHeaderCallbackNoCharset(): void
+    {
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset');
+        $data = [
+            'HTTP/1.1 200 OK',
+        ];
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
+        }
+
+        static::assertFalse($charset);
     }
 
     /**
      * Test the download callback with valid values and no charset
      */
-    public function testCurlDownloadCallbackOkNoCharset()
+    public function testCurlDownloadCallbackOkNoCharset(): void
     {
+        $charset = null;
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            false,
-            'ut_curl_getinfo_no_charset'
+            false
         );
+
         $data = [
-            'HTTP/1.1 200 OK',
             'end' => 'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
@@ -264,10 +338,11 @@ class LinkUtilsTest extends TestCase
             . '<meta name="description" content="desc" />'
             . '<meta name="keywords" content="key1,key2" />',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $this->assertEquals(strlen($line), $callback($ignore, $line));
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEmpty($charset);
         $this->assertEquals('Refactoring · GitHub', $title);
         $this->assertEmpty($desc);
@@ -277,18 +352,18 @@ class LinkUtilsTest extends TestCase
     /**
      * Test the download callback with valid values and no charset
      */
-    public function testCurlDownloadCallbackOkHtmlCharset()
+    public function testCurlDownloadCallbackOkHtmlCharset(): void
     {
+        $charset = null;
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            false,
-            'ut_curl_getinfo_no_charset'
+            false
         );
+
         $data = [
-            'HTTP/1.1 200 OK',
             '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
             'end' => 'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
@@ -297,14 +372,10 @@ class LinkUtilsTest extends TestCase
             . '<meta name="description" content="desc" />'
             . '<meta name="keywords" content="key1,key2" />',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $expected = $key !== 'end' ? strlen($line) : false;
-            $this->assertEquals($expected, $callback($ignore, $line));
-            if ($expected === false) {
-                break;
-            }
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEquals('utf-8', $charset);
         $this->assertEquals('Refactoring · GitHub', $title);
         $this->assertEmpty($desc);
@@ -314,25 +385,26 @@ class LinkUtilsTest extends TestCase
     /**
      * Test the download callback with valid values and no title
      */
-    public function testCurlDownloadCallbackOkNoTitle()
+    public function testCurlDownloadCallbackOkNoTitle(): void
     {
+        $charset = 'utf-8';
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            false,
-            'ut_curl_getinfo_ok'
+            false
         );
+
         $data = [
-            'HTTP/1.1 200 OK',
             'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
             'ignored',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $this->assertEquals(strlen($line), $callback($ignore, $line));
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEquals('utf-8', $charset);
         $this->assertEmpty($title);
         $this->assertEmpty($desc);
@@ -340,81 +412,55 @@ class LinkUtilsTest extends TestCase
     }
 
     /**
-     * Test the download callback with an invalid content type.
+     * Test the header callback with an invalid content type.
      */
-    public function testCurlDownloadCallbackInvalidContentType()
+    public function testCurlHeaderCallbackInvalidContentType(): void
     {
-        $callback = get_curl_download_callback(
-            $charset,
-            $title,
-            $desc,
-            $keywords,
-            false,
-            'ut_curl_getinfo_ct_ko'
-        );
-        $ignore = null;
-        $this->assertFalse($callback($ignore, ''));
-        $this->assertEmpty($charset);
-        $this->assertEmpty($title);
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ct_ko');
+        $data = [
+            'HTTP/1.1 200 OK',
+        ];
+
+        static::assertFalse($callback(null, $data[0]));
+        static::assertNull($charset);
     }
 
     /**
-     * Test the download callback with an invalid response code.
+     * Test the header callback with an invalid response code.
      */
-    public function testCurlDownloadCallbackInvalidResponseCode()
+    public function testCurlHeaderCallbackInvalidResponseCode(): void
     {
-        $callback = $callback = get_curl_download_callback(
-            $charset,
-            $title,
-            $desc,
-            $keywords,
-            false,
-            'ut_curl_getinfo_rc_ko'
-        );
-        $ignore = null;
-        $this->assertFalse($callback($ignore, ''));
-        $this->assertEmpty($charset);
-        $this->assertEmpty($title);
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rc_ko');
+
+        static::assertFalse($callback(null, ''));
+        static::assertNull($charset);
     }
 
     /**
-     * Test the download callback with an invalid content type and response code.
+     * Test the header callback with an invalid content type and response code.
      */
-    public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode()
+    public function testCurlHeaderCallbackInvalidContentTypeAndResponseCode(): void
     {
-        $callback = $callback = get_curl_download_callback(
-            $charset,
-            $title,
-            $desc,
-            $keywords,
-            false,
-            'ut_curl_getinfo_rs_ct_ko'
-        );
-        $ignore = null;
-        $this->assertFalse($callback($ignore, ''));
-        $this->assertEmpty($charset);
-        $this->assertEmpty($title);
+        $callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rs_ct_ko');
+
+        static::assertFalse($callback(null, ''));
+        static::assertNull($charset);
     }
 
     /**
      * Test the download callback with valid value, and retrieve_description option enabled.
      */
-    public function testCurlDownloadCallbackOkWithDesc()
+    public function testCurlDownloadCallbackOkWithDesc(): void
     {
+        $charset = 'utf-8';
         $callback = get_curl_download_callback(
             $charset,
             $title,
             $desc,
             $keywords,
-            true,
-            'ut_curl_getinfo_ok'
+            true
         );
         $data = [
-            'HTTP/1.1 200 OK',
-            'Server: GitHub.com',
-            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
-            'Content-Type: text/html; charset=utf-8',
-            'Status: 200 OK',
             'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
@@ -422,14 +468,11 @@ class LinkUtilsTest extends TestCase
             . '<meta name="description" content="link desc" />'
             . '<meta name="keywords" content="key1,key2" />',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $expected = $key !== 'end' ? strlen($line) : false;
-            $this->assertEquals($expected, $callback($ignore, $line));
-            if ($expected === false) {
-                break;
-            }
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEquals('utf-8', $charset);
         $this->assertEquals('Refactoring · GitHub', $title);
         $this->assertEquals('link desc', $desc);
@@ -440,8 +483,9 @@ class LinkUtilsTest extends TestCase
      * Test the download callback with valid value, and retrieve_description option enabled,
      * but no desc or keyword defined in the page.
      */
-    public function testCurlDownloadCallbackOkWithDescNotFound()
+    public function testCurlDownloadCallbackOkWithDescNotFound(): void
     {
+        $charset = 'utf-8';
         $callback = get_curl_download_callback(
             $charset,
             $title,
@@ -451,24 +495,16 @@ class LinkUtilsTest extends TestCase
             'ut_curl_getinfo_ok'
         );
         $data = [
-            'HTTP/1.1 200 OK',
-            'Server: GitHub.com',
-            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
-            'Content-Type: text/html; charset=utf-8',
-            'Status: 200 OK',
             'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
             'end' => '<title>ignored</title>',
         ];
-        foreach ($data as $key => $line) {
-            $ignore = null;
-            $expected = $key !== 'end' ? strlen($line) : false;
-            $this->assertEquals($expected, $callback($ignore, $line));
-            if ($expected === false) {
-                break;
-            }
+
+        foreach ($data as $chunk) {
+            static::assertSame(strlen($chunk), $callback(null, $chunk));
         }
+
         $this->assertEquals('utf-8', $charset);
         $this->assertEquals('Refactoring · GitHub', $title);
         $this->assertEmpty($desc);
@@ -526,13 +562,13 @@ class LinkUtilsTest extends TestCase
             カタカナ #カタカナ」カタカナ\n';
         $autolinkedDescription = hashtag_autolink($rawDescription, $index);
 
-        $this->assertContains($this->getHashtagLink('hashtag', $index), $autolinkedDescription);
-        $this->assertNotContains(' #hashtag', $autolinkedDescription);
-        $this->assertNotContains('>#nothashtag', $autolinkedDescription);
-        $this->assertContains($this->getHashtagLink('ашок', $index), $autolinkedDescription);
-        $this->assertContains($this->getHashtagLink('カタカナ', $index), $autolinkedDescription);
-        $this->assertContains($this->getHashtagLink('hashtag_hashtag', $index), $autolinkedDescription);
-        $this->assertNotContains($this->getHashtagLink('hashtag-nothashtag', $index), $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('hashtag', $index), $autolinkedDescription);
+        $this->assertNotContainsPolyfill(' #hashtag', $autolinkedDescription);
+        $this->assertNotContainsPolyfill('>#nothashtag', $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('ашок', $index), $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('カタカナ', $index), $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('hashtag_hashtag', $index), $autolinkedDescription);
+        $this->assertNotContainsPolyfill($this->getHashtagLink('hashtag-nothashtag', $index), $autolinkedDescription);
     }
 
     /**
@@ -543,9 +579,9 @@ class LinkUtilsTest extends TestCase
         $rawDescription = 'blabla #hashtag x#nothashtag';
         $autolinkedDescription = hashtag_autolink($rawDescription);
 
-        $this->assertContains($this->getHashtagLink('hashtag'), $autolinkedDescription);
-        $this->assertNotContains(' #hashtag', $autolinkedDescription);
-        $this->assertNotContains('>#nothashtag', $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('hashtag'), $autolinkedDescription);
+        $this->assertNotContainsPolyfill(' #hashtag', $autolinkedDescription);
+        $this->assertNotContainsPolyfill('>#nothashtag', $autolinkedDescription);
     }
 
     /**