X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=src%2FWallabag%2FCoreBundle%2FHelper%2FContentProxy.php;h=9c6fa8db142a9a9bd3a75a981e833a450ad31010;hb=48b0163d247554d7e2f1ec63b717c8216ea9ec59;hp=0534d27b22ca58fe48c88a3d34a8835bac864bfe;hpb=bfe02a0b481055bb4e799200c8daa9a0ad987c71;p=github%2Fwallabag%2Fwallabag.git diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 0534d27b..9c6fa8db 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Tools\Utils; /** - * This kind of proxy class take care of getting the content from an url - * and update the entry with what it found. + * This kind of proxy class takes care of getting the content from an url + * and updates the entry with what it found. */ class ContentProxy { @@ -47,13 +47,18 @@ class ContentProxy */ public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) { + $this->graby->toggleImgNoReferrer(true); if (!empty($content['html'])) { $content['html'] = $this->graby->cleanupHtml($content['html'], $url); } if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { $fetchedContent = $this->graby->fetchContent($url); - $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); + + $fetchedContent['title'] = $this->sanitizeContentTitle( + $fetchedContent['title'], + isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : '' + ); // when content is imported, we have information in $content // in case fetching content goes bad, we'll keep the imported information instead of overriding them @@ -73,13 +78,14 @@ class ContentProxy $entry->setUrl($url); } + $entry->setGivenUrl($url); + $this->stockEntry($entry, $content); } /** * Use a Symfony validator to ensure the language is well formatted. * - * @param Entry $entry * @param string $value Language to validate and save */ public function updateLanguage(Entry $entry, $value) @@ -105,7 +111,6 @@ class ContentProxy /** * Use a Symfony validator to ensure the preview picture is a real url. * - * @param Entry $entry * @param string $value URL to validate and save */ public function updatePreviewPicture(Entry $entry, $value) @@ -127,7 +132,6 @@ class ContentProxy /** * Update date. * - * @param Entry $entry * @param string $value Date to validate and save */ public function updatePublishedAt(Entry $entry, $value) @@ -154,8 +158,6 @@ class ContentProxy /** * Helper to extract and save host from entry url. - * - * @param Entry $entry */ public function setEntryDomainName(Entry $entry) { @@ -169,8 +171,6 @@ class ContentProxy * Helper to set a default title using: * - url basename, if applicable * - hostname. - * - * @param Entry $entry */ public function setDefaultEntryTitle(Entry $entry) { @@ -187,8 +187,8 @@ class ContentProxy /** * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. * - * @param $title - * @param $contentType + * @param string $title + * @param string $contentType * * @return string */ @@ -248,22 +248,18 @@ class ContentProxy { $this->updateOriginUrl($entry, $content['url']); - $entry->setHashedUrl(hash('md5', $entry->getUrl())); - $this->setEntryDomainName($entry); if (!empty($content['title'])) { $entry->setTitle($content['title']); - } elseif (!empty($content['open_graph']['og_title'])) { - $entry->setTitle($content['open_graph']['og_title']); } if (empty($content['html'])) { $content['html'] = $this->fetchingErrorMessage; - if (!empty($content['open_graph']['og_description'])) { + if (!empty($content['description'])) { $content['html'] .= '

But we found a short description:

'; - $content['html'] .= $content['open_graph']['og_description']; + $content['html'] .= $content['description']; } } @@ -278,8 +274,8 @@ class ContentProxy $entry->setPublishedBy($content['authors']); } - if (!empty($content['all_headers']) && $this->storeArticleHeaders) { - $entry->setHeaders($content['all_headers']); + if (!empty($content['headers'])) { + $entry->setHeaders($content['headers']); } if (!empty($content['date'])) { @@ -290,17 +286,30 @@ class ContentProxy $this->updateLanguage($entry, $content['language']); } - if (!empty($content['open_graph']['og_image'])) { - $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); + $previewPictureUrl = ''; + if (!empty($content['image'])) { + $previewPictureUrl = $content['image']; } // if content is an image, define it as a preview too - if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->updatePreviewPicture($entry, $content['url']); + if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { + $previewPictureUrl = $content['url']; + } elseif (empty($previewPictureUrl)) { + $this->logger->debug('Extracting images from content to provide a default preview picture'); + $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); + $this->logger->debug(\count($imagesUrls) . ' pictures found'); + + if (!empty($imagesUrls)) { + $previewPictureUrl = $imagesUrls[0]; + } } - if (!empty($content['content_type'])) { - $entry->setMimetype($content['content_type']); + if (!empty($content['headers']['content-type'])) { + $entry->setMimetype($content['headers']['content-type']); + } + + if (!empty($previewPictureUrl)) { + $this->updatePreviewPicture($entry, $previewPictureUrl); } try { @@ -317,7 +326,6 @@ class ContentProxy * Update the origin_url field when a redirection occurs * This field is set if it is empty and new url does not match ignore list. * - * @param Entry $entry * @param string $url */ private function updateOriginUrl(Entry $entry, $url) @@ -425,8 +433,6 @@ class ContentProxy /** * Validate that the given content has at least a title, an html and a url. * - * @param array $content - * * @return bool true if valid otherwise false */ private function validateContent(array $content)