From: Jeremy Benoist Date: Tue, 16 May 2017 21:11:20 +0000 (+0200) Subject: Remove htmlawed and use graby instead X-Git-Tag: 2.3.0~31^2~79^2~5 X-Git-Url: https://git.immae.eu/?p=github%2Fwallabag%2Fwallabag.git;a=commitdiff_plain;h=0d6cfb884c8ef75e4dc5fd667fb9d29702523a2a Remove htmlawed and use graby instead Instead of using htmlawed (which is already used in graby) use graby directly (which require some refacto on graby side). Still needs some tests. --- diff --git a/composer.json b/composer.json index 31cfb6a1..a3d40050 100644 --- a/composer.json +++ b/composer.json @@ -61,7 +61,6 @@ "wallabag/tcpdf": "^6.2", "simplepie/simplepie": "~1.3.1", "willdurand/hateoas-bundle": "~1.0", - "htmlawed/htmlawed": "~1.1.19", "liip/theme-bundle": "~1.1", "lexik/form-filter-bundle": "~5.0", "j0k3r/graby": "dev-extractor", diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index a1df16d8..66d72fe6 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -47,24 +47,7 @@ class ContentProxy { // ensure content is a bit cleaned up if (!empty($content['html'])) { - $extractor = $this->graby->getExtractor(); - $contentExtracted = $extractor->process($content['html'], $url); - - if ($contentExtracted) { - $contentBlock = $extractor->getContent(); - $contentBlock->normalize(); - - $content['html'] = trim($contentBlock->innerHTML); - } - - $content['html'] = htmLawed($content['html'], [ - 'safe' => 1, - // which means: do not remove iframe elements - 'elements' => '*+iframe', - 'deny_attribute' => 'style', - 'comment' => 1, - 'cdata' => 1, - ]); + $content['html'] = $this->graby->cleanupHtml($content['html'], $url); } // do we have to fetch the content or the provided one is ok? diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 7a50b373..11f1d410 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -210,16 +210,18 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase $tagger->expects($this->once()) ->method('tag'); - $graby = $this->getMockBuilder('Graby\Graby')->getMock(); - - $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage); - $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [ - 'html' => str_repeat('this is my content', 325), - 'title' => 'this is my title', - 'url' => 'http://1.1.1.1', - 'content_type' => 'text/html', - 'language' => 'fr', - ]); + $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage); + $entry = $proxy->updateEntry( + new Entry(new User()), + 'http://0.0.0.0', + [ + 'html' => str_repeat('this is my content', 325), + 'title' => 'this is my title', + 'url' => 'http://1.1.1.1', + 'content_type' => 'text/html', + 'language' => 'fr', + ] + ); $this->assertEquals('http://1.1.1.1', $entry->getUrl()); $this->assertEquals('this is my title', $entry->getTitle()); @@ -277,9 +279,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase $tagger->expects($this->once()) ->method('tag'); - $graby = new Graby(); - - $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage); + $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage); $entry = $proxy->updateEntry( new Entry(new User()), 'http://1.1.1.1',