aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJeremy Benoist <jeremy.benoist@gmail.com>2017-05-16 23:11:20 +0200
committerJeremy Benoist <jeremy.benoist@gmail.com>2017-05-31 14:00:15 +0200
commit0d6cfb884c8ef75e4dc5fd667fb9d29702523a2a (patch)
tree7a6b882ce812fdea3de5675e3bcad185727dfe6c
parentcf05a1ae342b8f59ee0944eeba0f75aa0a1a2816 (diff)
downloadwallabag-0d6cfb884c8ef75e4dc5fd667fb9d29702523a2a.tar.gz
wallabag-0d6cfb884c8ef75e4dc5fd667fb9d29702523a2a.tar.zst
wallabag-0d6cfb884c8ef75e4dc5fd667fb9d29702523a2a.zip
Remove htmlawed and use graby instead
Instead of using htmlawed (which is already used in graby) use graby directly (which require some refacto on graby side). Still needs some tests.
-rw-r--r--composer.json1
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php19
-rw-r--r--tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php26
3 files changed, 14 insertions, 32 deletions
diff --git a/composer.json b/composer.json
index 31cfb6a1..a3d40050 100644
--- a/composer.json
+++ b/composer.json
@@ -61,7 +61,6 @@
61 "wallabag/tcpdf": "^6.2", 61 "wallabag/tcpdf": "^6.2",
62 "simplepie/simplepie": "~1.3.1", 62 "simplepie/simplepie": "~1.3.1",
63 "willdurand/hateoas-bundle": "~1.0", 63 "willdurand/hateoas-bundle": "~1.0",
64 "htmlawed/htmlawed": "~1.1.19",
65 "liip/theme-bundle": "~1.1", 64 "liip/theme-bundle": "~1.1",
66 "lexik/form-filter-bundle": "~5.0", 65 "lexik/form-filter-bundle": "~5.0",
67 "j0k3r/graby": "dev-extractor", 66 "j0k3r/graby": "dev-extractor",
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index a1df16d8..66d72fe6 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -47,24 +47,7 @@ class ContentProxy
47 { 47 {
48 // ensure content is a bit cleaned up 48 // ensure content is a bit cleaned up
49 if (!empty($content['html'])) { 49 if (!empty($content['html'])) {
50 $extractor = $this->graby->getExtractor(); 50 $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
51 $contentExtracted = $extractor->process($content['html'], $url);
52
53 if ($contentExtracted) {
54 $contentBlock = $extractor->getContent();
55 $contentBlock->normalize();
56
57 $content['html'] = trim($contentBlock->innerHTML);
58 }
59
60 $content['html'] = htmLawed($content['html'], [
61 'safe' => 1,
62 // which means: do not remove iframe elements
63 'elements' => '*+iframe',
64 'deny_attribute' => 'style',
65 'comment' => 1,
66 'cdata' => 1,
67 ]);
68 } 51 }
69 52
70 // do we have to fetch the content or the provided one is ok? 53 // do we have to fetch the content or the provided one is ok?
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
index 7a50b373..11f1d410 100644
--- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
@@ -210,16 +210,18 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
210 $tagger->expects($this->once()) 210 $tagger->expects($this->once())
211 ->method('tag'); 211 ->method('tag');
212 212
213 $graby = $this->getMockBuilder('Graby\Graby')->getMock(); 213 $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
214 214 $entry = $proxy->updateEntry(
215 $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage); 215 new Entry(new User()),
216 $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [ 216 'http://0.0.0.0',
217 'html' => str_repeat('this is my content', 325), 217 [
218 'title' => 'this is my title', 218 'html' => str_repeat('this is my content', 325),
219 'url' => 'http://1.1.1.1', 219 'title' => 'this is my title',
220 'content_type' => 'text/html', 220 'url' => 'http://1.1.1.1',
221 'language' => 'fr', 221 'content_type' => 'text/html',
222 ]); 222 'language' => 'fr',
223 ]
224 );
223 225
224 $this->assertEquals('http://1.1.1.1', $entry->getUrl()); 226 $this->assertEquals('http://1.1.1.1', $entry->getUrl());
225 $this->assertEquals('this is my title', $entry->getTitle()); 227 $this->assertEquals('this is my title', $entry->getTitle());
@@ -277,9 +279,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
277 $tagger->expects($this->once()) 279 $tagger->expects($this->once())
278 ->method('tag'); 280 ->method('tag');
279 281
280 $graby = new Graby(); 282 $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
281
282 $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
283 $entry = $proxy->updateEntry( 283 $entry = $proxy->updateEntry(
284 new Entry(new User()), 284 new Entry(new User()),
285 'http://1.1.1.1', 285 'http://1.1.1.1',