diff options
author | Jeremy Benoist <jeremy.benoist@gmail.com> | 2017-05-12 07:53:21 +0200 |
---|---|---|
committer | Jeremy Benoist <jeremy.benoist@gmail.com> | 2017-05-31 14:00:15 +0200 |
commit | 74a75f7d430eb7a69cd377194e52012db34d39b4 (patch) | |
tree | bb85741afe742e24351167699c434a955ab4a9fa /src/Wallabag | |
parent | fb436e8ca0c7468b9698050df0b78447e2d0854f (diff) | |
download | wallabag-74a75f7d430eb7a69cd377194e52012db34d39b4.tar.gz wallabag-74a75f7d430eb7a69cd377194e52012db34d39b4.tar.zst wallabag-74a75f7d430eb7a69cd377194e52012db34d39b4.zip |
Use graby ContentExtractor to clean html
It might be better to re-use some graby functionalities to clean html instead of building a new system.
Diffstat (limited to 'src/Wallabag')
-rw-r--r-- | src/Wallabag/ApiBundle/Controller/EntryRestController.php | 1 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 10 |
2 files changed, 10 insertions, 1 deletions
diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index e6bbe552..0930c109 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php | |||
@@ -336,7 +336,6 @@ class EntryRestController extends WallabagRestController | |||
336 | $entry->setUrl($url); | 336 | $entry->setUrl($url); |
337 | } | 337 | } |
338 | 338 | ||
339 | |||
340 | if (!empty($tags)) { | 339 | if (!empty($tags)) { |
341 | $this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags); | 340 | $this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags); |
342 | } | 341 | } |
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index e06ad3d6..a1df16d8 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -47,6 +47,16 @@ class ContentProxy | |||
47 | { | 47 | { |
48 | // ensure content is a bit cleaned up | 48 | // ensure content is a bit cleaned up |
49 | if (!empty($content['html'])) { | 49 | if (!empty($content['html'])) { |
50 | $extractor = $this->graby->getExtractor(); | ||
51 | $contentExtracted = $extractor->process($content['html'], $url); | ||
52 | |||
53 | if ($contentExtracted) { | ||
54 | $contentBlock = $extractor->getContent(); | ||
55 | $contentBlock->normalize(); | ||
56 | |||
57 | $content['html'] = trim($contentBlock->innerHTML); | ||
58 | } | ||
59 | |||
50 | $content['html'] = htmLawed($content['html'], [ | 60 | $content['html'] = htmLawed($content['html'], [ |
51 | 'safe' => 1, | 61 | 'safe' => 1, |
52 | // which means: do not remove iframe elements | 62 | // which means: do not remove iframe elements |