]> git.immae.eu Git - github/wallabag/wallabag.git/commitdiff
Remove htmlawed and use graby instead
authorJeremy Benoist <jeremy.benoist@gmail.com>
Tue, 16 May 2017 21:11:20 +0000 (23:11 +0200)
committerJeremy Benoist <jeremy.benoist@gmail.com>
Wed, 31 May 2017 12:00:15 +0000 (14:00 +0200)
Instead of using htmlawed (which is already used in graby) use graby directly (which require some refacto on graby side).
Still needs some tests.

composer.json
src/Wallabag/CoreBundle/Helper/ContentProxy.php
tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php

index 31cfb6a140f7084e65dfd1d92480384d05bf26b1..a3d40050d7ffd298310132e36266571bd4a380f6 100644 (file)
@@ -61,7 +61,6 @@
         "wallabag/tcpdf": "^6.2",
         "simplepie/simplepie": "~1.3.1",
         "willdurand/hateoas-bundle": "~1.0",
-        "htmlawed/htmlawed": "~1.1.19",
         "liip/theme-bundle": "~1.1",
         "lexik/form-filter-bundle": "~5.0",
         "j0k3r/graby": "dev-extractor",
index a1df16d8cc4184534c856e5c8124b5a5228368a6..66d72fe61f5444b9258a06e829d3c49a7607e17c 100644 (file)
@@ -47,24 +47,7 @@ class ContentProxy
     {
         // ensure content is a bit cleaned up
         if (!empty($content['html'])) {
-            $extractor = $this->graby->getExtractor();
-            $contentExtracted = $extractor->process($content['html'], $url);
-
-            if ($contentExtracted) {
-                $contentBlock = $extractor->getContent();
-                $contentBlock->normalize();
-
-                $content['html'] = trim($contentBlock->innerHTML);
-            }
-
-            $content['html'] = htmLawed($content['html'], [
-                'safe' => 1,
-                // which means: do not remove iframe elements
-                'elements' => '*+iframe',
-                'deny_attribute' => 'style',
-                'comment' => 1,
-                'cdata' => 1,
-            ]);
+            $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
         }
 
         // do we have to fetch the content or the provided one is ok?
index 7a50b3737284f002d1a2b81d2767f0798ff1ae1b..11f1d410fb8a39a207697cb06488e74965e12c18 100644 (file)
@@ -210,16 +210,18 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
         $tagger->expects($this->once())
             ->method('tag');
 
-        $graby = $this->getMockBuilder('Graby\Graby')->getMock();
-
-        $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
-        $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
-            'html' => str_repeat('this is my content', 325),
-            'title' => 'this is my title',
-            'url' => 'http://1.1.1.1',
-            'content_type' => 'text/html',
-            'language' => 'fr',
-        ]);
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = $proxy->updateEntry(
+            new Entry(new User()),
+            'http://0.0.0.0',
+            [
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+            ]
+        );
 
         $this->assertEquals('http://1.1.1.1', $entry->getUrl());
         $this->assertEquals('this is my title', $entry->getTitle());
@@ -277,9 +279,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
         $tagger->expects($this->once())
             ->method('tag');
 
-        $graby = new Graby();
-
-        $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
         $entry = $proxy->updateEntry(
             new Entry(new User()),
             'http://1.1.1.1',