aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/ContentProxy.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php200
1 files changed, 122 insertions, 78 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index f222dd88..51bb2ca2 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -5,10 +5,11 @@ namespace Wallabag\CoreBundle\Helper;
5use Graby\Graby; 5use Graby\Graby;
6use Psr\Log\LoggerInterface; 6use Psr\Log\LoggerInterface;
7use Wallabag\CoreBundle\Entity\Entry; 7use Wallabag\CoreBundle\Entity\Entry;
8use Wallabag\CoreBundle\Entity\Tag;
9use Wallabag\CoreBundle\Tools\Utils; 8use Wallabag\CoreBundle\Tools\Utils;
10use Wallabag\CoreBundle\Repository\TagRepository;
11use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
10use Symfony\Component\Validator\Constraints\Locale as LocaleConstraint;
11use Symfony\Component\Validator\Constraints\Url as UrlConstraint;
12use Symfony\Component\Validator\Validator\ValidatorInterface;
12 13
13/** 14/**
14 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class take care of getting the content from an url
@@ -18,38 +19,37 @@ class ContentProxy
18{ 19{
19 protected $graby; 20 protected $graby;
20 protected $tagger; 21 protected $tagger;
22 protected $validator;
21 protected $logger; 23 protected $logger;
22 protected $tagRepository;
23 protected $mimeGuesser; 24 protected $mimeGuesser;
24 protected $fetchingErrorMessage; 25 protected $fetchingErrorMessage;
26 protected $eventDispatcher;
25 27
26 public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, LoggerInterface $logger, $fetchingErrorMessage) 28 public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage)
27 { 29 {
28 $this->graby = $graby; 30 $this->graby = $graby;
29 $this->tagger = $tagger; 31 $this->tagger = $tagger;
32 $this->validator = $validator;
30 $this->logger = $logger; 33 $this->logger = $logger;
31 $this->tagRepository = $tagRepository;
32 $this->mimeGuesser = new MimeTypeExtensionGuesser(); 34 $this->mimeGuesser = new MimeTypeExtensionGuesser();
33 $this->fetchingErrorMessage = $fetchingErrorMessage; 35 $this->fetchingErrorMessage = $fetchingErrorMessage;
34 } 36 }
35 37
36 /** 38 /**
37 * Fetch content using graby and hydrate given entry with results information. 39 * Update entry using either fetched or provided content.
38 * In case we couldn't find content, we'll try to use Open Graph data.
39 * 40 *
40 * We can also force the content, in case of an import from the v1 for example, so the function won't 41 * @param Entry $entry Entry to update
41 * fetch the content from the website but rather use information given with the $content parameter. 42 * @param string $url Url of the content
42 * 43 * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
43 * @param Entry $entry Entry to update 44 * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
44 * @param string $url Url to grab content for
45 * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
46 *
47 * @return Entry
48 */ 45 */
49 public function updateEntry(Entry $entry, $url, array $content = []) 46 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
50 { 47 {
51 // do we have to fetch the content or the provided one is ok? 48 if (!empty($content['html'])) {
52 if (empty($content) || false === $this->validateContent($content)) { 49 $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
50 }
51
52 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
53 $fetchedContent = $this->graby->fetchContent($url); 53 $fetchedContent = $this->graby->fetchContent($url);
54 54
55 // when content is imported, we have information in $content 55 // when content is imported, we have information in $content
@@ -59,8 +59,24 @@ class ContentProxy
59 } 59 }
60 } 60 }
61 61
62 // be sure to keep the url in case of error
63 // so we'll be able to refetch it in the future
64 $content['url'] = !empty($content['url']) ? $content['url'] : $url;
65
66 $this->stockEntry($entry, $content);
67 }
68
69 /**
70 * Stock entry with fetched or imported content.
71 * Will fall back to OpenGraph data if available.
72 *
73 * @param Entry $entry Entry to stock
74 * @param array $content Array with at least title, url & html
75 */
76 private function stockEntry(Entry $entry, array $content)
77 {
62 $title = $content['title']; 78 $title = $content['title'];
63 if (!$title && isset($content['open_graph']['og_title'])) { 79 if (!$title && !empty($content['open_graph']['og_title'])) {
64 $title = $content['open_graph']['og_title']; 80 $title = $content['open_graph']['og_title'];
65 } 81 }
66 82
@@ -68,18 +84,58 @@ class ContentProxy
68 if (false === $html) { 84 if (false === $html) {
69 $html = $this->fetchingErrorMessage; 85 $html = $this->fetchingErrorMessage;
70 86
71 if (isset($content['open_graph']['og_description'])) { 87 if (!empty($content['open_graph']['og_description'])) {
72 $html .= '<p><i>But we found a short description: </i></p>'; 88 $html .= '<p><i>But we found a short description: </i></p>';
73 $html .= $content['open_graph']['og_description']; 89 $html .= $content['open_graph']['og_description'];
74 } 90 }
75 } 91 }
76 92
77 $entry->setUrl($content['url'] ?: $url); 93 $entry->setUrl($content['url']);
78 $entry->setTitle($title); 94 $entry->setTitle($title);
79 $entry->setContent($html); 95 $entry->setContent($html);
80 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); 96 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
81 97
82 $entry->setLanguage(isset($content['language']) ? $content['language'] : ''); 98 if (!empty($content['date'])) {
99 $date = $content['date'];
100
101 // is it a timestamp?
102 if (filter_var($date, FILTER_VALIDATE_INT) !== false) {
103 $date = '@'.$content['date'];
104 }
105
106 try {
107 $entry->setPublishedAt(new \DateTime($date));
108 } catch (\Exception $e) {
109 $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]);
110 }
111 }
112
113 if (!empty($content['authors']) && is_array($content['authors'])) {
114 $entry->setPublishedBy($content['authors']);
115 }
116
117 if (!empty($content['all_headers'])) {
118 $entry->setHeaders($content['all_headers']);
119 }
120
121 $this->validateAndSetLanguage(
122 $entry,
123 isset($content['language']) ? $content['language'] : null
124 );
125
126 $this->validateAndSetPreviewPicture(
127 $entry,
128 isset($content['open_graph']['og_image']) ? $content['open_graph']['og_image'] : null
129 );
130
131 // if content is an image, define it as a preview too
132 if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
133 $this->validateAndSetPreviewPicture(
134 $entry,
135 $content['url']
136 );
137 }
138
83 $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : ''); 139 $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : '');
84 $entry->setReadingTime(Utils::getReadingTime($html)); 140 $entry->setReadingTime(Utils::getReadingTime($html));
85 141
@@ -88,85 +144,73 @@ class ContentProxy
88 $entry->setDomainName($domainName); 144 $entry->setDomainName($domainName);
89 } 145 }
90 146
91 if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) {
92 $entry->setPreviewPicture($content['open_graph']['og_image']);
93 }
94
95 // if content is an image define as a preview too
96 if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
97 $entry->setPreviewPicture($content['url']);
98 }
99
100 try { 147 try {
101 $this->tagger->tag($entry); 148 $this->tagger->tag($entry);
102 } catch (\Exception $e) { 149 } catch (\Exception $e) {
103 $this->logger->error('Error while trying to automatically tag an entry.', [ 150 $this->logger->error('Error while trying to automatically tag an entry.', [
104 'entry_url' => $url, 151 'entry_url' => $content['url'],
105 'error_msg' => $e->getMessage(), 152 'error_msg' => $e->getMessage(),
106 ]); 153 ]);
107 } 154 }
108
109 return $entry;
110 } 155 }
111 156
112 /** 157 /**
113 * Assign some tags to an entry. 158 * Validate that the given content has at least a title, an html and a url.
159 *
160 * @param array $content
114 * 161 *
115 * @param Entry $entry 162 * @return bool true if valid otherwise false
116 * @param array|string $tags An array of tag or a string coma separated of tag
117 * @param array $entitiesReady Entities from the EntityManager which are persisted but not yet flushed
118 * It is mostly to fix duplicate tag on import @see http://stackoverflow.com/a/7879164/569101
119 */ 163 */
120 public function assignTagsToEntry(Entry $entry, $tags, array $entitiesReady = []) 164 private function validateContent(array $content)
121 { 165 {
122 if (!is_array($tags)) { 166 return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
123 $tags = explode(',', $tags); 167 }
124 }
125
126 // keeps only Tag entity from the "not yet flushed entities"
127 $tagsNotYetFlushed = [];
128 foreach ($entitiesReady as $entity) {
129 if ($entity instanceof Tag) {
130 $tagsNotYetFlushed[$entity->getLabel()] = $entity;
131 }
132 }
133
134 foreach ($tags as $label) {
135 $label = trim($label);
136 168
137 // avoid empty tag 169 /**
138 if (0 === strlen($label)) { 170 * Use a Symfony validator to ensure the language is well formatted.
139 continue; 171 *
140 } 172 * @param Entry $entry
173 * @param string $value Language to validate
174 */
175 private function validateAndSetLanguage($entry, $value)
176 {
177 // some lang are defined as fr-FR, es-ES.
178 // replacing - by _ might increase language support
179 $value = str_replace('-', '_', $value);
141 180
142 if (isset($tagsNotYetFlushed[$label])) { 181 $errors = $this->validator->validate(
143 $tagEntity = $tagsNotYetFlushed[$label]; 182 $value,
144 } else { 183 (new LocaleConstraint())
145 $tagEntity = $this->tagRepository->findOneByLabel($label); 184 );
146 185
147 if (is_null($tagEntity)) { 186 if (0 === count($errors)) {
148 $tagEntity = new Tag(); 187 $entry->setLanguage($value);
149 $tagEntity->setLabel($label);
150 }
151 }
152 188
153 // only add the tag on the entry if the relation doesn't exist 189 return;
154 if (false === $entry->getTags()->contains($tagEntity)) {
155 $entry->addTag($tagEntity);
156 }
157 } 190 }
191
192 $this->logger->warning('Language validation failed. '.(string) $errors);
158 } 193 }
159 194
160 /** 195 /**
161 * Validate that the given content as enough value to be used 196 * Use a Symfony validator to ensure the preview picture is a real url.
162 * instead of fetch the content from the url.
163 *
164 * @param array $content
165 * 197 *
166 * @return bool true if valid otherwise false 198 * @param Entry $entry
199 * @param string $value URL to validate
167 */ 200 */
168 private function validateContent(array $content) 201 private function validateAndSetPreviewPicture($entry, $value)
169 { 202 {
170 return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); 203 $errors = $this->validator->validate(
204 $value,
205 (new UrlConstraint())
206 );
207
208 if (0 === count($errors)) {
209 $entry->setPreviewPicture($value);
210
211 return;
212 }
213
214 $this->logger->warning('PreviewPicture validation failed. '.(string) $errors);
171 } 215 }
172} 216}