From e668a8124c46d47add4248963d77f3b29b37b3ce Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Thu, 11 May 2017 08:14:29 +0200 Subject: Allow other fields to be send using API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Entry API can now have these new fields: - content - language - preview_picture - published_at Re-use the ContentProxy to be able to do the same using the web UI (in the future). htmLawed is used to clean stuff from content, I hope it’ll be enough to avoid security breach. Lower content validation when we want to update an entry with content already defined. Before, language & content_type were required. If there weren’t provided, we re-fetched the content using graby. I think these fields aren’t required for an entry to be created. So I removed them. Which means some import from the v1 export won’t be re-fetched since they provide content, url & title. Also, remove liberation link from Readability import to avoid overlaping import (from wallabag v1, which had the same link) --- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 30 +++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'src/Wallabag/CoreBundle') diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 4b3e6fbb..e06ad3d6 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -45,6 +45,18 @@ class ContentProxy */ public function updateEntry(Entry $entry, $url, array $content = []) { + // ensure content is a bit cleaned up + if (!empty($content['html'])) { + $content['html'] = htmLawed($content['html'], [ + 'safe' => 1, + // which means: do not remove iframe elements + 'elements' => '*+iframe', + 'deny_attribute' => 'style', + 'comment' => 1, + 'cdata' => 1, + ]); + } + // do we have to fetch the content or the provided one is ok? if (empty($content) || false === $this->validateContent($content)) { $fetchedContent = $this->graby->fetchContent($url); @@ -57,7 +69,7 @@ class ContentProxy } $title = $content['title']; - if (!$title && isset($content['open_graph']['og_title'])) { + if (!$title && !empty($content['open_graph']['og_title'])) { $title = $content['open_graph']['og_title']; } @@ -65,7 +77,7 @@ class ContentProxy if (false === $html) { $html = $this->fetchingErrorMessage; - if (isset($content['open_graph']['og_description'])) { + if (!empty($content['open_graph']['og_description'])) { $html .= '

But we found a short description:

'; $html .= $content['open_graph']['og_description']; } @@ -76,8 +88,12 @@ class ContentProxy $entry->setContent($html); $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); - if (isset($content['date']) && null !== $content['date'] && '' !== $content['date']) { - $entry->setPublishedAt(new \DateTime($content['date'])); + if (!empty($content['date'])) { + try { + $entry->setPublishedAt(new \DateTime($content['date'])); + } catch (\Exception $e) { + $this->logger->warn('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); + } } if (!empty($content['authors'])) { @@ -97,12 +113,12 @@ class ContentProxy $entry->setDomainName($domainName); } - if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) { + if (!empty($content['open_graph']['og_image'])) { $entry->setPreviewPicture($content['open_graph']['og_image']); } // if content is an image define as a preview too - if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { + if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { $entry->setPreviewPicture($content['url']); } @@ -128,6 +144,6 @@ class ContentProxy */ private function validateContent(array $content) { - return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); + return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); } } -- cgit v1.2.3