From 0d6cfb884c8ef75e4dc5fd667fb9d29702523a2a Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Tue, 16 May 2017 23:11:20 +0200 Subject: [PATCH] Remove htmlawed and use graby instead Instead of using htmlawed (which is already used in graby) use graby directly (which require some refacto on graby side). Still needs some tests. --- composer.json | 1 - .../CoreBundle/Helper/ContentProxy.php | 19 +------------- .../CoreBundle/Helper/ContentProxyTest.php | 26 +++++++++---------- 3 files changed, 14 insertions(+), 32 deletions(-) diff --git a/composer.json b/composer.json index 31cfb6a1..a3d40050 100644 --- a/composer.json +++ b/composer.json @@ -61,7 +61,6 @@ "wallabag/tcpdf": "^6.2", "simplepie/simplepie": "~1.3.1", "willdurand/hateoas-bundle": "~1.0", - "htmlawed/htmlawed": "~1.1.19", "liip/theme-bundle": "~1.1", "lexik/form-filter-bundle": "~5.0", "j0k3r/graby": "dev-extractor", diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index a1df16d8..66d72fe6 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -47,24 +47,7 @@ class ContentProxy { // ensure content is a bit cleaned up if (!empty($content['html'])) { - $extractor = $this->graby->getExtractor(); - $contentExtracted = $extractor->process($content['html'], $url); - - if ($contentExtracted) { - $contentBlock = $extractor->getContent(); - $contentBlock->normalize(); - - $content['html'] = trim($contentBlock->innerHTML); - } - - $content['html'] = htmLawed($content['html'], [ - 'safe' => 1, - // which means: do not remove iframe elements - 'elements' => '*+iframe', - 'deny_attribute' => 'style', - 'comment' => 1, - 'cdata' => 1, - ]); + $content['html'] = $this->graby->cleanupHtml($content['html'], $url); } // do we have to fetch the content or the provided one is ok? diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 7a50b373..11f1d410 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -210,16 +210,18 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase $tagger->expects($this->once()) ->method('tag'); - $graby = $this->getMockBuilder('Graby\Graby')->getMock(); - - $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage); - $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [ - 'html' => str_repeat('this is my content', 325), - 'title' => 'this is my title', - 'url' => 'http://1.1.1.1', - 'content_type' => 'text/html', - 'language' => 'fr', - ]); + $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage); + $entry = $proxy->updateEntry( + new Entry(new User()), + 'http://0.0.0.0', + [ + 'html' => str_repeat('this is my content', 325), + 'title' => 'this is my title', + 'url' => 'http://1.1.1.1', + 'content_type' => 'text/html', + 'language' => 'fr', + ] + ); $this->assertEquals('http://1.1.1.1', $entry->getUrl()); $this->assertEquals('this is my title', $entry->getTitle()); @@ -277,9 +279,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase $tagger->expects($this->once()) ->method('tag'); - $graby = new Graby(); - - $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage); + $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage); $entry = $proxy->updateEntry( new Entry(new User()), 'http://1.1.1.1', -- 2.41.0