aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorJeremy Benoist <jeremy.benoist@gmail.com>2017-05-12 07:53:21 +0200
committerJeremy Benoist <jeremy.benoist@gmail.com>2017-05-31 14:00:15 +0200
commit74a75f7d430eb7a69cd377194e52012db34d39b4 (patch)
treebb85741afe742e24351167699c434a955ab4a9fa /src
parentfb436e8ca0c7468b9698050df0b78447e2d0854f (diff)
downloadwallabag-74a75f7d430eb7a69cd377194e52012db34d39b4.tar.gz
wallabag-74a75f7d430eb7a69cd377194e52012db34d39b4.tar.zst
wallabag-74a75f7d430eb7a69cd377194e52012db34d39b4.zip
Use graby ContentExtractor to clean html
It might be better to re-use some graby functionalities to clean html instead of building a new system.
Diffstat (limited to 'src')
-rw-r--r--src/Wallabag/ApiBundle/Controller/EntryRestController.php1
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php10
2 files changed, 10 insertions, 1 deletions
diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php
index e6bbe552..0930c109 100644
--- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php
+++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php
@@ -336,7 +336,6 @@ class EntryRestController extends WallabagRestController
336 $entry->setUrl($url); 336 $entry->setUrl($url);
337 } 337 }
338 338
339
340 if (!empty($tags)) { 339 if (!empty($tags)) {
341 $this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags); 340 $this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags);
342 } 341 }
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index e06ad3d6..a1df16d8 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -47,6 +47,16 @@ class ContentProxy
47 { 47 {
48 // ensure content is a bit cleaned up 48 // ensure content is a bit cleaned up
49 if (!empty($content['html'])) { 49 if (!empty($content['html'])) {
50 $extractor = $this->graby->getExtractor();
51 $contentExtracted = $extractor->process($content['html'], $url);
52
53 if ($contentExtracted) {
54 $contentBlock = $extractor->getContent();
55 $contentBlock->normalize();
56
57 $content['html'] = trim($contentBlock->innerHTML);
58 }
59
50 $content['html'] = htmLawed($content['html'], [ 60 $content['html'] = htmLawed($content['html'], [
51 'safe' => 1, 61 'safe' => 1,
52 // which means: do not remove iframe elements 62 // which means: do not remove iframe elements