]> git.immae.eu Git - github/shaarli/Shaarli.git/blobdiff - tests/bookmark/LinkUtilsTest.php
Merge pull request #1540 from ArthurHoaro/fix/metadata-regexes
[github/shaarli/Shaarli.git] / tests / bookmark / LinkUtilsTest.php
index 5b31115bf50aa570937c62123922205fda1b7950..29941c8cd0ed32307faa0eea5bde9b99b3d77967 100644 (file)
@@ -2,14 +2,14 @@
 
 namespace Shaarli\Bookmark;
 
-use ReferenceLinkDB;
+use Shaarli\TestCase;
 
 require_once 'tests/utils/CurlUtils.php';
 
 /**
  * Class LinkUtilsTest.
  */
-class LinkUtilsTest extends \PHPUnit\Framework\TestCase
+class LinkUtilsTest extends TestCase
 {
     /**
      * Test html_extract_title() when the title is found.
@@ -42,6 +42,19 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
         $this->assertEquals(strtolower($charset), header_extract_charset($headers));
     }
 
+    /**
+     * Test headers_extract_charset() when the charset is found with odd quotes.
+     */
+    public function testHeadersExtractExistentCharsetWithQuotes()
+    {
+        $charset = 'x-MacCroatian';
+        $headers = 'text/html; charset="' . $charset . '"otherstuff="test"';
+        $this->assertEquals(strtolower($charset), header_extract_charset($headers));
+
+        $headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"';
+        $this->assertEquals(strtolower($charset), header_extract_charset($headers));
+    }
+
     /**
      * Test headers_extract_charset() when the charset is not found.
      */
@@ -75,12 +88,146 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
         $this->assertFalse(html_extract_charset($html));
     }
 
+    /**
+     * Test html_extract_tag() when the tag <meta name= is found.
+     */
+    public function testHtmlExtractExistentNameTag()
+    {
+        $description = 'Bob and Alice share cookies.';
+
+        // Simple one line
+        $html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // Simple OpenGraph
+        $html = '<meta property="og:description" content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // Simple reversed OpenGraph
+        $html = '<meta content="' . $description . '" property="og:description">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // ItemProp OpenGraph
+        $html = '<meta itemprop="og:description" content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph without quotes
+        $html = '<meta property=og:description content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph reversed without quotes
+        $html = '<meta content="' . $description . '" property=og:description>';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph with noise
+        $html = '<meta tag1="content1" property="og:description" tag2="content2" content="' .
+            $description . '" tag3="content3">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph reversed with noise
+        $html = '<meta tag1="content1" content="' . $description . '" ' .
+            'tag3="content3" tag2="content2" property="og:description">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph multiple properties start
+        $html = '<meta property="unrelated og:description" content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph multiple properties end
+        $html = '<meta property="og:description unrelated" content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph multiple properties both end
+        $html = '<meta property="og:unrelated1 og:description og:unrelated2" content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph multiple properties both end with noise
+        $html = '<meta tag1="content1" property="og:unrelated1 og:description og:unrelated2" '.
+            'tag2="content2" content="' . $description . '" tag3="content3">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph reversed multiple properties start
+        $html = '<meta content="' . $description . '" property="unrelated og:description">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph reversed multiple properties end
+        $html = '<meta content="' . $description . '" property="og:description unrelated">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph reversed multiple properties both end
+        $html = '<meta content="' . $description . '" property="og:unrelated1 og:description og:unrelated2">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // OpenGraph reversed multiple properties both end with noise
+        $html = '<meta tag1="content1" content="' . $description . '" tag2="content2" '.
+            'property="og:unrelated1 og:description og:unrelated2" tag3="content3">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+
+        // Suggestion from #1375
+        $html = '<meta property="og:description" name="description" content="' . $description . '">';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+    }
+
+    /**
+     * Test html_extract_tag() when the tag <meta name= is not found.
+     */
+    public function testHtmlExtractNonExistentNameTag()
+    {
+        $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
+        $this->assertFalse(html_extract_tag('description', $html));
+
+        // Partial meta tag
+        $html = '<meta content="Brief description">';
+        $this->assertFalse(html_extract_tag('description', $html));
+
+        $html = '<meta property="og:description">';
+        $this->assertFalse(html_extract_tag('description', $html));
+
+        $html = '<meta tag1="content1" property="og:description">';
+        $this->assertFalse(html_extract_tag('description', $html));
+
+        $html = '<meta property="og:description" tag1="content1">';
+        $this->assertFalse(html_extract_tag('description', $html));
+
+        $html = '<meta tag1="content1" content="Brief description">';
+        $this->assertFalse(html_extract_tag('description', $html));
+
+        $html = '<meta content="Brief description" tag1="content1">';
+        $this->assertFalse(html_extract_tag('description', $html));
+    }
+
+    /**
+     * Test html_extract_tag() when the tag <meta property="og: is found.
+     */
+    public function testHtmlExtractExistentOgTag()
+    {
+        $description = 'Bob and Alice share cookies.';
+        $html = '<html><meta>stuff2</meta><meta property="og:description" content="' . $description . '"/></html>';
+        $this->assertEquals($description, html_extract_tag('description', $html));
+    }
+
+    /**
+     * Test html_extract_tag() when the tag <meta property="og: is not found.
+     */
+    public function testHtmlExtractNonExistentOgTag()
+    {
+        $html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
+        $this->assertFalse(html_extract_tag('description', $html));
+    }
+
     /**
      * Test the download callback with valid value
      */
     public function testCurlDownloadCallbackOk()
     {
-        $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok');
+        $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            false,
+            'ut_curl_getinfo_ok'
+        );
         $data = [
             'HTTP/1.1 200 OK',
             'Server: GitHub.com',
@@ -90,7 +237,9 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
             'end' => 'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
-            '<title>ignored</title>',
+            '<title>ignored</title>'
+                . '<meta name="description" content="desc" />'
+                . '<meta name="keywords" content="key1,key2" />',
         ];
         foreach ($data as $key => $line) {
             $ignore = null;
@@ -102,6 +251,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
         }
         $this->assertEquals('utf-8', $charset);
         $this->assertEquals('Refactoring · GitHub', $title);
+        $this->assertEmpty($desc);
+        $this->assertEmpty($keywords);
     }
 
     /**
@@ -109,13 +260,22 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
      */
     public function testCurlDownloadCallbackOkNoCharset()
     {
-        $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset');
+        $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            false,
+            'ut_curl_getinfo_no_charset'
+        );
         $data = [
             'HTTP/1.1 200 OK',
             'end' => 'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
-            '<title>ignored</title>',
+            '<title>ignored</title>'
+            . '<meta name="description" content="desc" />'
+            . '<meta name="keywords" content="key1,key2" />',
         ];
         foreach ($data as $key => $line) {
             $ignore = null;
@@ -123,6 +283,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
         }
         $this->assertEmpty($charset);
         $this->assertEquals('Refactoring · GitHub', $title);
+        $this->assertEmpty($desc);
+        $this->assertEmpty($keywords);
     }
 
     /**
@@ -130,14 +292,23 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
      */
     public function testCurlDownloadCallbackOkHtmlCharset()
     {
-        $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset');
+        $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            false,
+            'ut_curl_getinfo_no_charset'
+        );
         $data = [
             'HTTP/1.1 200 OK',
             '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
             'end' => 'th=device-width">'
                 . '<title>Refactoring · GitHub</title>'
                 . '<link rel="search" type="application/opensea',
-            '<title>ignored</title>',
+            '<title>ignored</title>'
+            . '<meta name="description" content="desc" />'
+            . '<meta name="keywords" content="key1,key2" />',
         ];
         foreach ($data as $key => $line) {
             $ignore = null;
@@ -149,6 +320,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
         }
         $this->assertEquals('utf-8', $charset);
         $this->assertEquals('Refactoring · GitHub', $title);
+        $this->assertEmpty($desc);
+        $this->assertEmpty($keywords);
     }
 
     /**
@@ -156,7 +329,14 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
      */
     public function testCurlDownloadCallbackOkNoTitle()
     {
-        $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok');
+        $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            false,
+            'ut_curl_getinfo_ok'
+        );
         $data = [
             'HTTP/1.1 200 OK',
             'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
@@ -168,6 +348,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
         }
         $this->assertEquals('utf-8', $charset);
         $this->assertEmpty($title);
+        $this->assertEmpty($desc);
+        $this->assertEmpty($keywords);
     }
 
     /**
@@ -175,7 +357,14 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
      */
     public function testCurlDownloadCallbackInvalidContentType()
     {
-        $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ct_ko');
+        $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            false,
+            'ut_curl_getinfo_ct_ko'
+        );
         $ignore = null;
         $this->assertFalse($callback($ignore, ''));
         $this->assertEmpty($charset);
@@ -187,7 +376,14 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
      */
     public function testCurlDownloadCallbackInvalidResponseCode()
     {
-        $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rc_ko');
+        $callback = $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            false,
+            'ut_curl_getinfo_rc_ko'
+        );
         $ignore = null;
         $this->assertFalse($callback($ignore, ''));
         $this->assertEmpty($charset);
@@ -199,7 +395,14 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
      */
     public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode()
     {
-        $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rs_ct_ko');
+        $callback = $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            false,
+            'ut_curl_getinfo_rs_ct_ko'
+        );
         $ignore = null;
         $this->assertFalse($callback($ignore, ''));
         $this->assertEmpty($charset);
@@ -207,65 +410,106 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
     }
 
     /**
-     * Test count_private.
+     * Test the download callback with valid value, and retrieve_description option enabled.
      */
-    public function testCountPrivateLinks()
+    public function testCurlDownloadCallbackOkWithDesc()
     {
-        $refDB = new ReferenceLinkDB();
-        $this->assertEquals($refDB->countPrivateLinks(), count_private($refDB->getLinks()));
+        $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            true,
+            'ut_curl_getinfo_ok'
+        );
+        $data = [
+            'HTTP/1.1 200 OK',
+            'Server: GitHub.com',
+            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
+            'Content-Type: text/html; charset=utf-8',
+            'Status: 200 OK',
+            'th=device-width">'
+                . '<title>Refactoring · GitHub</title>'
+                . '<link rel="search" type="application/opensea',
+            'end' => '<title>ignored</title>'
+            . '<meta name="description" content="link desc" />'
+            . '<meta name="keywords" content="key1,key2" />',
+        ];
+        foreach ($data as $key => $line) {
+            $ignore = null;
+            $expected = $key !== 'end' ? strlen($line) : false;
+            $this->assertEquals($expected, $callback($ignore, $line));
+            if ($expected === false) {
+                break;
+            }
+        }
+        $this->assertEquals('utf-8', $charset);
+        $this->assertEquals('Refactoring · GitHub', $title);
+        $this->assertEquals('link desc', $desc);
+        $this->assertEquals('key1 key2', $keywords);
+    }
+
+    /**
+     * Test the download callback with valid value, and retrieve_description option enabled,
+     * but no desc or keyword defined in the page.
+     */
+    public function testCurlDownloadCallbackOkWithDescNotFound()
+    {
+        $callback = get_curl_download_callback(
+            $charset,
+            $title,
+            $desc,
+            $keywords,
+            true,
+            'ut_curl_getinfo_ok'
+        );
+        $data = [
+            'HTTP/1.1 200 OK',
+            'Server: GitHub.com',
+            'Date: Sat, 28 Oct 2017 12:01:33 GMT',
+            'Content-Type: text/html; charset=utf-8',
+            'Status: 200 OK',
+            'th=device-width">'
+                . '<title>Refactoring · GitHub</title>'
+                . '<link rel="search" type="application/opensea',
+            'end' => '<title>ignored</title>',
+        ];
+        foreach ($data as $key => $line) {
+            $ignore = null;
+            $expected = $key !== 'end' ? strlen($line) : false;
+            $this->assertEquals($expected, $callback($ignore, $line));
+            if ($expected === false) {
+                break;
+            }
+        }
+        $this->assertEquals('utf-8', $charset);
+        $this->assertEquals('Refactoring · GitHub', $title);
+        $this->assertEmpty($desc);
+        $this->assertEmpty($keywords);
     }
 
     /**
-     * Test text2clickable without a redirector being set.
+     * Test text2clickable.
      */
-    public function testText2clickableWithoutRedirector()
+    public function testText2clickable()
     {
         $text = 'stuff http://hello.there/is=someone#here otherstuff';
         $expectedText = 'stuff <a href="http://hello.there/is=someone#here">'
             . 'http://hello.there/is=someone#here</a> otherstuff';
-        $processedText = text2clickable($text, '');
+        $processedText = text2clickable($text);
         $this->assertEquals($expectedText, $processedText);
 
         $text = 'stuff http://hello.there/is=someone#here(please) otherstuff';
         $expectedText = 'stuff <a href="http://hello.there/is=someone#here(please)">'
             . 'http://hello.there/is=someone#here(please)</a> otherstuff';
-        $processedText = text2clickable($text, '');
+        $processedText = text2clickable($text);
         $this->assertEquals($expectedText, $processedText);
 
+        $text = 'stuff http://hello.there/is=someone#here(please)&no otherstuff';
         $text = 'stuff http://hello.there/is=someone#here(please)&no otherstuff';
         $expectedText = 'stuff <a href="http://hello.there/is=someone#here(please)&no">'
             . 'http://hello.there/is=someone#here(please)&no</a> otherstuff';
-        $processedText = text2clickable($text, '');
-        $this->assertEquals($expectedText, $processedText);
-    }
-
-    /**
-     * Test text2clickable with a redirector set.
-     */
-    public function testText2clickableWithRedirector()
-    {
-        $text = 'stuff http://hello.there/is=someone#here otherstuff';
-        $redirector = 'http://redirector.to';
-        $expectedText = 'stuff <a href="' .
-            $redirector .
-            urlencode('http://hello.there/is=someone#here') .
-            '">http://hello.there/is=someone#here</a> otherstuff';
-        $processedText = text2clickable($text, $redirector);
-        $this->assertEquals($expectedText, $processedText);
-    }
-
-    /**
-     * Test text2clickable a redirector set and without URL encode.
-     */
-    public function testText2clickableWithRedirectorDontEncode()
-    {
-        $text = 'stuff http://hello.there/?is=someone&or=something#here otherstuff';
-        $redirector = 'http://redirector.to';
-        $expectedText = 'stuff <a href="' .
-            $redirector .
-            'http://hello.there/?is=someone&or=something#here' .
-            '">http://hello.there/?is=someone&or=something#here</a> otherstuff';
-        $processedText = text2clickable($text, $redirector, false);
+        $processedText = text2clickable($text);
         $this->assertEquals($expectedText, $processedText);
     }
 
@@ -295,13 +539,13 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
             カタカナ #カタカナ」カタカナ\n';
         $autolinkedDescription = hashtag_autolink($rawDescription, $index);
 
-        $this->assertContains($this->getHashtagLink('hashtag', $index), $autolinkedDescription);
-        $this->assertNotContains(' #hashtag', $autolinkedDescription);
-        $this->assertNotContains('>#nothashtag', $autolinkedDescription);
-        $this->assertContains($this->getHashtagLink('ашок', $index), $autolinkedDescription);
-        $this->assertContains($this->getHashtagLink('カタカナ', $index), $autolinkedDescription);
-        $this->assertContains($this->getHashtagLink('hashtag_hashtag', $index), $autolinkedDescription);
-        $this->assertNotContains($this->getHashtagLink('hashtag-nothashtag', $index), $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('hashtag', $index), $autolinkedDescription);
+        $this->assertNotContainsPolyfill(' #hashtag', $autolinkedDescription);
+        $this->assertNotContainsPolyfill('>#nothashtag', $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('ашок', $index), $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('カタカナ', $index), $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('hashtag_hashtag', $index), $autolinkedDescription);
+        $this->assertNotContainsPolyfill($this->getHashtagLink('hashtag-nothashtag', $index), $autolinkedDescription);
     }
 
     /**
@@ -312,9 +556,9 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
         $rawDescription = 'blabla #hashtag x#nothashtag';
         $autolinkedDescription = hashtag_autolink($rawDescription);
 
-        $this->assertContains($this->getHashtagLink('hashtag'), $autolinkedDescription);
-        $this->assertNotContains(' #hashtag', $autolinkedDescription);
-        $this->assertNotContains('>#nothashtag', $autolinkedDescription);
+        $this->assertContainsPolyfill($this->getHashtagLink('hashtag'), $autolinkedDescription);
+        $this->assertNotContainsPolyfill(' #hashtag', $autolinkedDescription);
+        $this->assertNotContainsPolyfill('>#nothashtag', $autolinkedDescription);
     }
 
     /**
@@ -347,7 +591,7 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
      */
     private function getHashtagLink($hashtag, $index = '')
     {
-        $hashtagLink = '<a href="' . $index . '?addtag=$1" title="Hashtag $1">#$1</a>';
+        $hashtagLink = '<a href="' . $index . './add-tag/$1" title="Hashtag $1">#$1</a>';
         return str_replace('$1', $hashtag, $hashtagLink);
     }
 }