diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
5 files changed, 64 insertions, 35 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 4b3e6fbb..bfaa1976 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -31,22 +31,20 @@ class ContentProxy | |||
31 | } | 31 | } |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * Fetch content using graby and hydrate given entry with results information. | 34 | * Update entry using either fetched or provided content. |
35 | * In case we couldn't find content, we'll try to use Open Graph data. | ||
36 | * | 35 | * |
37 | * We can also force the content, in case of an import from the v1 for example, so the function won't | 36 | * @param Entry $entry Entry to update |
38 | * fetch the content from the website but rather use information given with the $content parameter. | 37 | * @param string $url Url of the content |
39 | * | 38 | * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url |
40 | * @param Entry $entry Entry to update | 39 | * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby |
41 | * @param string $url Url to grab content for | ||
42 | * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url | ||
43 | * | ||
44 | * @return Entry | ||
45 | */ | 40 | */ |
46 | public function updateEntry(Entry $entry, $url, array $content = []) | 41 | public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) |
47 | { | 42 | { |
48 | // do we have to fetch the content or the provided one is ok? | 43 | if (!empty($content['html'])) { |
49 | if (empty($content) || false === $this->validateContent($content)) { | 44 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); |
45 | } | ||
46 | |||
47 | if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { | ||
50 | $fetchedContent = $this->graby->fetchContent($url); | 48 | $fetchedContent = $this->graby->fetchContent($url); |
51 | 49 | ||
52 | // when content is imported, we have information in $content | 50 | // when content is imported, we have information in $content |
@@ -56,8 +54,24 @@ class ContentProxy | |||
56 | } | 54 | } |
57 | } | 55 | } |
58 | 56 | ||
57 | // be sure to keep the url in case of error | ||
58 | // so we'll be able to refetch it in the future | ||
59 | $content['url'] = !empty($content['url']) ? $content['url'] : $url; | ||
60 | |||
61 | $this->stockEntry($entry, $content); | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | * Stock entry with fetched or imported content. | ||
66 | * Will fall back to OpenGraph data if available. | ||
67 | * | ||
68 | * @param Entry $entry Entry to stock | ||
69 | * @param array $content Array with at least title, url & html | ||
70 | */ | ||
71 | private function stockEntry(Entry $entry, array $content) | ||
72 | { | ||
59 | $title = $content['title']; | 73 | $title = $content['title']; |
60 | if (!$title && isset($content['open_graph']['og_title'])) { | 74 | if (!$title && !empty($content['open_graph']['og_title'])) { |
61 | $title = $content['open_graph']['og_title']; | 75 | $title = $content['open_graph']['og_title']; |
62 | } | 76 | } |
63 | 77 | ||
@@ -65,19 +79,30 @@ class ContentProxy | |||
65 | if (false === $html) { | 79 | if (false === $html) { |
66 | $html = $this->fetchingErrorMessage; | 80 | $html = $this->fetchingErrorMessage; |
67 | 81 | ||
68 | if (isset($content['open_graph']['og_description'])) { | 82 | if (!empty($content['open_graph']['og_description'])) { |
69 | $html .= '<p><i>But we found a short description: </i></p>'; | 83 | $html .= '<p><i>But we found a short description: </i></p>'; |
70 | $html .= $content['open_graph']['og_description']; | 84 | $html .= $content['open_graph']['og_description']; |
71 | } | 85 | } |
72 | } | 86 | } |
73 | 87 | ||
74 | $entry->setUrl($content['url'] ?: $url); | 88 | $entry->setUrl($content['url']); |
75 | $entry->setTitle($title); | 89 | $entry->setTitle($title); |
76 | $entry->setContent($html); | 90 | $entry->setContent($html); |
77 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); | 91 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); |
78 | 92 | ||
79 | if (isset($content['date']) && null !== $content['date'] && '' !== $content['date']) { | 93 | if (!empty($content['date'])) { |
80 | $entry->setPublishedAt(new \DateTime($content['date'])); | 94 | $date = $content['date']; |
95 | |||
96 | // is it a timestamp? | ||
97 | if (filter_var($date, FILTER_VALIDATE_INT) !== false) { | ||
98 | $date = '@'.$content['date']; | ||
99 | } | ||
100 | |||
101 | try { | ||
102 | $entry->setPublishedAt(new \DateTime($date)); | ||
103 | } catch (\Exception $e) { | ||
104 | $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]); | ||
105 | } | ||
81 | } | 106 | } |
82 | 107 | ||
83 | if (!empty($content['authors'])) { | 108 | if (!empty($content['authors'])) { |
@@ -97,12 +122,12 @@ class ContentProxy | |||
97 | $entry->setDomainName($domainName); | 122 | $entry->setDomainName($domainName); |
98 | } | 123 | } |
99 | 124 | ||
100 | if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) { | 125 | if (!empty($content['open_graph']['og_image'])) { |
101 | $entry->setPreviewPicture($content['open_graph']['og_image']); | 126 | $entry->setPreviewPicture($content['open_graph']['og_image']); |
102 | } | 127 | } |
103 | 128 | ||
104 | // if content is an image define as a preview too | 129 | // if content is an image define as a preview too |
105 | if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | 130 | if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
106 | $entry->setPreviewPicture($content['url']); | 131 | $entry->setPreviewPicture($content['url']); |
107 | } | 132 | } |
108 | 133 | ||
@@ -110,17 +135,14 @@ class ContentProxy | |||
110 | $this->tagger->tag($entry); | 135 | $this->tagger->tag($entry); |
111 | } catch (\Exception $e) { | 136 | } catch (\Exception $e) { |
112 | $this->logger->error('Error while trying to automatically tag an entry.', [ | 137 | $this->logger->error('Error while trying to automatically tag an entry.', [ |
113 | 'entry_url' => $url, | 138 | 'entry_url' => $content['url'], |
114 | 'error_msg' => $e->getMessage(), | 139 | 'error_msg' => $e->getMessage(), |
115 | ]); | 140 | ]); |
116 | } | 141 | } |
117 | |||
118 | return $entry; | ||
119 | } | 142 | } |
120 | 143 | ||
121 | /** | 144 | /** |
122 | * Validate that the given content as enough value to be used | 145 | * Validate that the given content has at least a title, an html and a url. |
123 | * instead of fetch the content from the url. | ||
124 | * | 146 | * |
125 | * @param array $content | 147 | * @param array $content |
126 | * | 148 | * |
@@ -128,6 +150,6 @@ class ContentProxy | |||
128 | */ | 150 | */ |
129 | private function validateContent(array $content) | 151 | private function validateContent(array $content) |
130 | { | 152 | { |
131 | return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); | 153 | return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); |
132 | } | 154 | } |
133 | } | 155 | } |
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 0d330d2a..54e23a05 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -54,7 +54,7 @@ class DownloadImages | |||
54 | $crawler = new Crawler($html); | 54 | $crawler = new Crawler($html); |
55 | $result = $crawler | 55 | $result = $crawler |
56 | ->filterXpath('//img') | 56 | ->filterXpath('//img') |
57 | ->extract(array('src')); | 57 | ->extract(['src']); |
58 | 58 | ||
59 | $relativePath = $this->getRelativePath($entryId); | 59 | $relativePath = $this->getRelativePath($entryId); |
60 | 60 | ||
@@ -66,6 +66,11 @@ class DownloadImages | |||
66 | continue; | 66 | continue; |
67 | } | 67 | } |
68 | 68 | ||
69 | // if image contains "&" and we can't find it in the html it might be because it's encoded as & | ||
70 | if (false !== stripos($image, '&') && false === stripos($html, $image)) { | ||
71 | $image = str_replace('&', '&', $image); | ||
72 | } | ||
73 | |||
69 | $html = str_replace($image, $imagePath, $html); | 74 | $html = str_replace($image, $imagePath, $html); |
70 | } | 75 | } |
71 | 76 | ||
@@ -114,7 +119,7 @@ class DownloadImages | |||
114 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | 119 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); |
115 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | 120 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); |
116 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | 121 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { |
117 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath); | 122 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); |
118 | 123 | ||
119 | return false; | 124 | return false; |
120 | } | 125 | } |
diff --git a/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php b/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php index 7d3798b9..df579ebd 100644 --- a/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php +++ b/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php | |||
@@ -20,16 +20,15 @@ class PreparePagerForEntries | |||
20 | 20 | ||
21 | /** | 21 | /** |
22 | * @param AdapterInterface $adapter | 22 | * @param AdapterInterface $adapter |
23 | * @param int $page | ||
24 | * | 23 | * |
25 | * @return null|Pagerfanta | 24 | * @return null|Pagerfanta |
26 | */ | 25 | */ |
27 | public function prepare(AdapterInterface $adapter, $page = 1) | 26 | public function prepare(AdapterInterface $adapter) |
28 | { | 27 | { |
29 | $user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null; | 28 | $user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null; |
30 | 29 | ||
31 | if (null === $user || !is_object($user)) { | 30 | if (null === $user || !is_object($user)) { |
32 | return null; | 31 | return; |
33 | } | 32 | } |
34 | 33 | ||
35 | $entries = new Pagerfanta($adapter); | 34 | $entries = new Pagerfanta($adapter); |
diff --git a/src/Wallabag/CoreBundle/Helper/Redirect.php b/src/Wallabag/CoreBundle/Helper/Redirect.php index f78b7fe0..abc84d08 100644 --- a/src/Wallabag/CoreBundle/Helper/Redirect.php +++ b/src/Wallabag/CoreBundle/Helper/Redirect.php | |||
@@ -21,12 +21,13 @@ class Redirect | |||
21 | } | 21 | } |
22 | 22 | ||
23 | /** | 23 | /** |
24 | * @param string $url URL to redirect | 24 | * @param string $url URL to redirect |
25 | * @param string $fallback Fallback URL if $url is null | 25 | * @param string $fallback Fallback URL if $url is null |
26 | * @param bool $ignoreActionMarkAsRead Ignore configured action when mark as read | ||
26 | * | 27 | * |
27 | * @return string | 28 | * @return string |
28 | */ | 29 | */ |
29 | public function to($url, $fallback = '') | 30 | public function to($url, $fallback = '', $ignoreActionMarkAsRead = false) |
30 | { | 31 | { |
31 | $user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null; | 32 | $user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null; |
32 | 33 | ||
@@ -34,7 +35,8 @@ class Redirect | |||
34 | return $url; | 35 | return $url; |
35 | } | 36 | } |
36 | 37 | ||
37 | if (Config::REDIRECT_TO_HOMEPAGE === $user->getConfig()->getActionMarkAsRead()) { | 38 | if (!$ignoreActionMarkAsRead && |
39 | Config::REDIRECT_TO_HOMEPAGE === $user->getConfig()->getActionMarkAsRead()) { | ||
38 | return $this->router->generate('homepage'); | 40 | return $this->router->generate('homepage'); |
39 | } | 41 | } |
40 | 42 | ||
diff --git a/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php b/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php index add27db2..509d0dec 100644 --- a/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php +++ b/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php | |||
@@ -15,6 +15,7 @@ class RuleBasedTagger | |||
15 | private $rulerz; | 15 | private $rulerz; |
16 | private $tagRepository; | 16 | private $tagRepository; |
17 | private $entryRepository; | 17 | private $entryRepository; |
18 | private $logger; | ||
18 | 19 | ||
19 | public function __construct(RulerZ $rulerz, TagRepository $tagRepository, EntryRepository $entryRepository, LoggerInterface $logger) | 20 | public function __construct(RulerZ $rulerz, TagRepository $tagRepository, EntryRepository $entryRepository, LoggerInterface $logger) |
20 | { | 21 | { |