From 74a75f7d430eb7a69cd377194e52012db34d39b4 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 12 May 2017 07:53:21 +0200 Subject: Use graby ContentExtractor to clean html It might be better to re-use some graby functionalities to clean html instead of building a new system. --- .../CoreBundle/Helper/ContentProxyTest.php | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'tests/Wallabag') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 44fca073..7a50b373 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -8,6 +8,7 @@ use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Entity\Tag; use Wallabag\UserBundle\Entity\User; use Wallabag\CoreBundle\Helper\RuleBasedTagger; +use Graby\Graby; class ContentProxyTest extends \PHPUnit_Framework_TestCase { @@ -253,6 +254,60 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase $this->assertCount(0, $entry->getTags()); } + public function dataForCrazyHtml() + { + return [ + 'script and comment' => [ + 'Script inside:
', + 'lol' + ], + 'script' => [ + 'Script inside:', + 'script' + ], + ]; + } + + /** + * @dataProvider dataForCrazyHtml + */ + public function testWithCrazyHtmlContent($html, $escapedString) + { + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = new Graby(); + + $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = $proxy->updateEntry( + new Entry(new User()), + 'http://1.1.1.1', + [ + 'html' => $html, + 'title' => 'this is my title', + 'url' => 'http://1.1.1.1', + 'content_type' => 'text/html', + 'language' => 'fr', + 'status' => '200', + 'open_graph' => [ + 'og_title' => 'my OG title', + 'og_description' => 'OG desc', + 'og_image' => 'http://3.3.3.3/cover.jpg', + ], + ] + ); + + $this->assertEquals('http://1.1.1.1', $entry->getUrl()); + $this->assertEquals('this is my title', $entry->getTitle()); + $this->assertNotContains($escapedString, $entry->getContent()); + $this->assertEquals('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture()); + $this->assertEquals('text/html', $entry->getMimetype()); + $this->assertEquals('fr', $entry->getLanguage()); + $this->assertEquals('200', $entry->getHttpStatus()); + $this->assertEquals('1.1.1.1', $entry->getDomainName()); + } + private function getTaggerMock() { return $this->getMockBuilder(RuleBasedTagger::class) -- cgit v1.2.3