From e668a8124c46d47add4248963d77f3b29b37b3ce Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Thu, 11 May 2017 08:14:29 +0200 Subject: Allow other fields to be send using API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Entry API can now have these new fields: - content - language - preview_picture - published_at Re-use the ContentProxy to be able to do the same using the web UI (in the future). htmLawed is used to clean stuff from content, I hope it’ll be enough to avoid security breach. Lower content validation when we want to update an entry with content already defined. Before, language & content_type were required. If there weren’t provided, we re-fetched the content using graby. I think these fields aren’t required for an entry to be created. So I removed them. Which means some import from the v1 export won’t be re-fetched since they provide content, url & title. Also, remove liberation link from Readability import to avoid overlaping import (from wallabag v1, which had the same link) --- .../ApiBundle/Controller/EntryRestController.php | 46 +++++++++++++++------- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 30 ++++++++++---- 2 files changed, 54 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index 31bb67fd..dfd04fb4 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -280,6 +280,10 @@ class EntryRestController extends WallabagRestController * {"name"="tags", "dataType"="string", "required"=false, "format"="tag1,tag2,tag3", "description"="a comma-separated list of tags."}, * {"name"="starred", "dataType"="integer", "required"=false, "format"="1 or 0", "description"="entry already starred"}, * {"name"="archive", "dataType"="integer", "required"=false, "format"="1 or 0", "description"="entry already archived"}, + * {"name"="content", "dataType"="string", "required"=false, "description"="Content of the entry"}, + * {"name"="language", "dataType"="string", "required"=false, "description"="Language of the entry"}, + * {"name"="preview_picture", "dataType"="string", "required"=false, "description"="Preview picture of the entry"}, + * {"name"="published_at", "dataType"="datetime", "format"="YYYY-MM-DDTHH:II:SS+TZ", "required"=false, "description"="Published date of the entry"}, * } * ) * @@ -293,30 +297,42 @@ class EntryRestController extends WallabagRestController $title = $request->request->get('title'); $isArchived = $request->request->get('archive'); $isStarred = $request->request->get('starred'); + $content = $request->request->get('content'); + $language = $request->request->get('language'); + $picture = $request->request->get('preview_picture'); + $publishedAt = $request->request->get('published_at'); $entry = $this->get('wallabag_core.entry_repository')->findByUrlAndUserId($url, $this->getUser()->getId()); if (false === $entry) { $entry = new Entry($this->getUser()); - try { - $entry = $this->get('wallabag_core.content_proxy')->updateEntry( - $entry, - $url - ); - } catch (\Exception $e) { - $this->get('logger')->error('Error while saving an entry', [ - 'exception' => $e, - 'entry' => $entry, - ]); - $entry->setUrl($url); - } } - if (!is_null($title)) { - $entry->setTitle($title); + try { + $entry = $this->get('wallabag_core.content_proxy')->updateEntry( + $entry, + $url, + [ + 'title' => $title, + 'html' => $content, + 'url' => $url, + 'language' => $language, + 'date' => $publishedAt, + // faking the preview picture + 'open_graph' => [ + 'og_image' => $picture, + ], + ] + ); + } catch (\Exception $e) { + $this->get('logger')->error('Error while saving an entry', [ + 'exception' => $e, + 'entry' => $entry, + ]); + $entry->setUrl($url); } - $tags = $request->request->get('tags', ''); + $tags = $request->request->get('tags', []); if (!empty($tags)) { $this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags); } diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 4b3e6fbb..e06ad3d6 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -45,6 +45,18 @@ class ContentProxy */ public function updateEntry(Entry $entry, $url, array $content = []) { + // ensure content is a bit cleaned up + if (!empty($content['html'])) { + $content['html'] = htmLawed($content['html'], [ + 'safe' => 1, + // which means: do not remove iframe elements + 'elements' => '*+iframe', + 'deny_attribute' => 'style', + 'comment' => 1, + 'cdata' => 1, + ]); + } + // do we have to fetch the content or the provided one is ok? if (empty($content) || false === $this->validateContent($content)) { $fetchedContent = $this->graby->fetchContent($url); @@ -57,7 +69,7 @@ class ContentProxy } $title = $content['title']; - if (!$title && isset($content['open_graph']['og_title'])) { + if (!$title && !empty($content['open_graph']['og_title'])) { $title = $content['open_graph']['og_title']; } @@ -65,7 +77,7 @@ class ContentProxy if (false === $html) { $html = $this->fetchingErrorMessage; - if (isset($content['open_graph']['og_description'])) { + if (!empty($content['open_graph']['og_description'])) { $html .= '

But we found a short description:

'; $html .= $content['open_graph']['og_description']; } @@ -76,8 +88,12 @@ class ContentProxy $entry->setContent($html); $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); - if (isset($content['date']) && null !== $content['date'] && '' !== $content['date']) { - $entry->setPublishedAt(new \DateTime($content['date'])); + if (!empty($content['date'])) { + try { + $entry->setPublishedAt(new \DateTime($content['date'])); + } catch (\Exception $e) { + $this->logger->warn('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); + } } if (!empty($content['authors'])) { @@ -97,12 +113,12 @@ class ContentProxy $entry->setDomainName($domainName); } - if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) { + if (!empty($content['open_graph']['og_image'])) { $entry->setPreviewPicture($content['open_graph']['og_image']); } // if content is an image define as a preview too - if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { + if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { $entry->setPreviewPicture($content['url']); } @@ -128,6 +144,6 @@ class ContentProxy */ private function validateContent(array $content) { - return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); + return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); } } -- cgit v1.2.3 From fb436e8ca0c7468b9698050df0b78447e2d0854f Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Thu, 11 May 2017 20:10:22 +0200 Subject: Add support for authors --- src/Wallabag/ApiBundle/Controller/EntryRestController.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index dfd04fb4..e6bbe552 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -284,6 +284,7 @@ class EntryRestController extends WallabagRestController * {"name"="language", "dataType"="string", "required"=false, "description"="Language of the entry"}, * {"name"="preview_picture", "dataType"="string", "required"=false, "description"="Preview picture of the entry"}, * {"name"="published_at", "dataType"="datetime", "format"="YYYY-MM-DDTHH:II:SS+TZ", "required"=false, "description"="Published date of the entry"}, + * {"name"="authors", "dataType"="string", "format"="Name Firstname,author2,author3", "required"=false, "description"="Authors of the entry"}, * } * ) * @@ -295,12 +296,14 @@ class EntryRestController extends WallabagRestController $url = $request->request->get('url'); $title = $request->request->get('title'); + $tags = $request->request->get('tags', []); $isArchived = $request->request->get('archive'); $isStarred = $request->request->get('starred'); $content = $request->request->get('content'); $language = $request->request->get('language'); $picture = $request->request->get('preview_picture'); $publishedAt = $request->request->get('published_at'); + $authors = $request->request->get('authors', ''); $entry = $this->get('wallabag_core.entry_repository')->findByUrlAndUserId($url, $this->getUser()->getId()); @@ -322,6 +325,7 @@ class EntryRestController extends WallabagRestController 'open_graph' => [ 'og_image' => $picture, ], + 'authors' => explode(',', $authors), ] ); } catch (\Exception $e) { @@ -332,7 +336,7 @@ class EntryRestController extends WallabagRestController $entry->setUrl($url); } - $tags = $request->request->get('tags', []); + if (!empty($tags)) { $this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags); } -- cgit v1.2.3 From 74a75f7d430eb7a69cd377194e52012db34d39b4 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Fri, 12 May 2017 07:53:21 +0200 Subject: Use graby ContentExtractor to clean html It might be better to re-use some graby functionalities to clean html instead of building a new system. --- src/Wallabag/ApiBundle/Controller/EntryRestController.php | 1 - src/Wallabag/CoreBundle/Helper/ContentProxy.php | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index e6bbe552..0930c109 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -336,7 +336,6 @@ class EntryRestController extends WallabagRestController $entry->setUrl($url); } - if (!empty($tags)) { $this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags); } diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index e06ad3d6..a1df16d8 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -47,6 +47,16 @@ class ContentProxy { // ensure content is a bit cleaned up if (!empty($content['html'])) { + $extractor = $this->graby->getExtractor(); + $contentExtracted = $extractor->process($content['html'], $url); + + if ($contentExtracted) { + $contentBlock = $extractor->getContent(); + $contentBlock->normalize(); + + $content['html'] = trim($contentBlock->innerHTML); + } + $content['html'] = htmLawed($content['html'], [ 'safe' => 1, // which means: do not remove iframe elements -- cgit v1.2.3 From 0d6cfb884c8ef75e4dc5fd667fb9d29702523a2a Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Tue, 16 May 2017 23:11:20 +0200 Subject: Remove htmlawed and use graby instead Instead of using htmlawed (which is already used in graby) use graby directly (which require some refacto on graby side). Still needs some tests. --- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'src') diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index a1df16d8..66d72fe6 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -47,24 +47,7 @@ class ContentProxy { // ensure content is a bit cleaned up if (!empty($content['html'])) { - $extractor = $this->graby->getExtractor(); - $contentExtracted = $extractor->process($content['html'], $url); - - if ($contentExtracted) { - $contentBlock = $extractor->getContent(); - $contentBlock->normalize(); - - $content['html'] = trim($contentBlock->innerHTML); - } - - $content['html'] = htmLawed($content['html'], [ - 'safe' => 1, - // which means: do not remove iframe elements - 'elements' => '*+iframe', - 'deny_attribute' => 'style', - 'comment' => 1, - 'cdata' => 1, - ]); + $content['html'] = $this->graby->cleanupHtml($content['html'], $url); } // do we have to fetch the content or the provided one is ok? -- cgit v1.2.3 From 9e349f08a651c43c6d5dd890303ed529c38c4fde Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 24 May 2017 16:02:49 +0200 Subject: Improve docs --- src/Wallabag/ApiBundle/Controller/EntryRestController.php | 3 +++ src/Wallabag/CoreBundle/Helper/ContentProxy.php | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index 0930c109..cc2cca64 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -273,6 +273,9 @@ class EntryRestController extends WallabagRestController /** * Create an entry. * + * If you want to provide the HTML content (which means wallabag won't fetch it from the url), you must provide `content`, `title` & `url` fields **non-empty**. + * Otherwise, content will be fetched as normal from the url and values will be overwritten. + * * @ApiDoc( * parameters={ * {"name"="url", "dataType"="string", "required"=true, "format"="http://www.test.com/article.html", "description"="Url for the entry."}, diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 66d72fe6..90d0c50d 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -31,7 +31,7 @@ class ContentProxy } /** - * Fetch content using graby and hydrate given entry with results information. + * Fetch content using graby and hydrate given $entry with results information. * In case we couldn't find content, we'll try to use Open Graph data. * * We can also force the content, in case of an import from the v1 for example, so the function won't @@ -39,7 +39,7 @@ class ContentProxy * * @param Entry $entry Entry to update * @param string $url Url to grab content for - * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url + * @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url * * @return Entry */ @@ -85,7 +85,7 @@ class ContentProxy try { $entry->setPublishedAt(new \DateTime($content['date'])); } catch (\Exception $e) { - $this->logger->warn('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); + $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); } } -- cgit v1.2.3 From f0378b4d7c7b8c971239445f3a2a1535abab7d00 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 24 May 2017 16:44:03 +0200 Subject: Forced date can now be a timestamp too Add adding more tests for forced content --- src/Wallabag/ApiBundle/Controller/EntryRestController.php | 2 +- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Wallabag/ApiBundle/Controller/EntryRestController.php b/src/Wallabag/ApiBundle/Controller/EntryRestController.php index cc2cca64..c3ba1858 100644 --- a/src/Wallabag/ApiBundle/Controller/EntryRestController.php +++ b/src/Wallabag/ApiBundle/Controller/EntryRestController.php @@ -286,7 +286,7 @@ class EntryRestController extends WallabagRestController * {"name"="content", "dataType"="string", "required"=false, "description"="Content of the entry"}, * {"name"="language", "dataType"="string", "required"=false, "description"="Language of the entry"}, * {"name"="preview_picture", "dataType"="string", "required"=false, "description"="Preview picture of the entry"}, - * {"name"="published_at", "dataType"="datetime", "format"="YYYY-MM-DDTHH:II:SS+TZ", "required"=false, "description"="Published date of the entry"}, + * {"name"="published_at", "dataType"="datetime|integer", "format"="YYYY-MM-DDTHH:II:SS+TZ or a timestamp", "required"=false, "description"="Published date of the entry"}, * {"name"="authors", "dataType"="string", "format"="Name Firstname,author2,author3", "required"=false, "description"="Authors of the entry"}, * } * ) diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 90d0c50d..8ba77ca9 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -82,8 +82,15 @@ class ContentProxy $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); if (!empty($content['date'])) { + $date = $content['date']; + + // is it a timestamp? + if (filter_var($date, FILTER_VALIDATE_INT) !== false) { + $date = '@'.$content['date']; + } + try { - $entry->setPublishedAt(new \DateTime($content['date'])); + $entry->setPublishedAt(new \DateTime($date)); } catch (\Exception $e) { $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); } -- cgit v1.2.3