aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/ContentProxy.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php137
1 files changed, 60 insertions, 77 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index f222dd88..bfaa1976 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -5,9 +5,7 @@ namespace Wallabag\CoreBundle\Helper;
5use Graby\Graby; 5use Graby\Graby;
6use Psr\Log\LoggerInterface; 6use Psr\Log\LoggerInterface;
7use Wallabag\CoreBundle\Entity\Entry; 7use Wallabag\CoreBundle\Entity\Entry;
8use Wallabag\CoreBundle\Entity\Tag;
9use Wallabag\CoreBundle\Tools\Utils; 8use Wallabag\CoreBundle\Tools\Utils;
10use Wallabag\CoreBundle\Repository\TagRepository;
11use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
12 10
13/** 11/**
@@ -19,37 +17,34 @@ class ContentProxy
19 protected $graby; 17 protected $graby;
20 protected $tagger; 18 protected $tagger;
21 protected $logger; 19 protected $logger;
22 protected $tagRepository;
23 protected $mimeGuesser; 20 protected $mimeGuesser;
24 protected $fetchingErrorMessage; 21 protected $fetchingErrorMessage;
22 protected $eventDispatcher;
25 23
26 public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, LoggerInterface $logger, $fetchingErrorMessage) 24 public function __construct(Graby $graby, RuleBasedTagger $tagger, LoggerInterface $logger, $fetchingErrorMessage)
27 { 25 {
28 $this->graby = $graby; 26 $this->graby = $graby;
29 $this->tagger = $tagger; 27 $this->tagger = $tagger;
30 $this->logger = $logger; 28 $this->logger = $logger;
31 $this->tagRepository = $tagRepository;
32 $this->mimeGuesser = new MimeTypeExtensionGuesser(); 29 $this->mimeGuesser = new MimeTypeExtensionGuesser();
33 $this->fetchingErrorMessage = $fetchingErrorMessage; 30 $this->fetchingErrorMessage = $fetchingErrorMessage;
34 } 31 }
35 32
36 /** 33 /**
37 * Fetch content using graby and hydrate given entry with results information. 34 * Update entry using either fetched or provided content.
38 * In case we couldn't find content, we'll try to use Open Graph data.
39 * 35 *
40 * We can also force the content, in case of an import from the v1 for example, so the function won't 36 * @param Entry $entry Entry to update
41 * fetch the content from the website but rather use information given with the $content parameter. 37 * @param string $url Url of the content
42 * 38 * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
43 * @param Entry $entry Entry to update 39 * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
44 * @param string $url Url to grab content for
45 * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
46 *
47 * @return Entry
48 */ 40 */
49 public function updateEntry(Entry $entry, $url, array $content = []) 41 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
50 { 42 {
51 // do we have to fetch the content or the provided one is ok? 43 if (!empty($content['html'])) {
52 if (empty($content) || false === $this->validateContent($content)) { 44 $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
45 }
46
47 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
53 $fetchedContent = $this->graby->fetchContent($url); 48 $fetchedContent = $this->graby->fetchContent($url);
54 49
55 // when content is imported, we have information in $content 50 // when content is imported, we have information in $content
@@ -59,8 +54,24 @@ class ContentProxy
59 } 54 }
60 } 55 }
61 56
57 // be sure to keep the url in case of error
58 // so we'll be able to refetch it in the future
59 $content['url'] = !empty($content['url']) ? $content['url'] : $url;
60
61 $this->stockEntry($entry, $content);
62 }
63
64 /**
65 * Stock entry with fetched or imported content.
66 * Will fall back to OpenGraph data if available.
67 *
68 * @param Entry $entry Entry to stock
69 * @param array $content Array with at least title, url & html
70 */
71 private function stockEntry(Entry $entry, array $content)
72 {
62 $title = $content['title']; 73 $title = $content['title'];
63 if (!$title && isset($content['open_graph']['og_title'])) { 74 if (!$title && !empty($content['open_graph']['og_title'])) {
64 $title = $content['open_graph']['og_title']; 75 $title = $content['open_graph']['og_title'];
65 } 76 }
66 77
@@ -68,17 +79,40 @@ class ContentProxy
68 if (false === $html) { 79 if (false === $html) {
69 $html = $this->fetchingErrorMessage; 80 $html = $this->fetchingErrorMessage;
70 81
71 if (isset($content['open_graph']['og_description'])) { 82 if (!empty($content['open_graph']['og_description'])) {
72 $html .= '<p><i>But we found a short description: </i></p>'; 83 $html .= '<p><i>But we found a short description: </i></p>';
73 $html .= $content['open_graph']['og_description']; 84 $html .= $content['open_graph']['og_description'];
74 } 85 }
75 } 86 }
76 87
77 $entry->setUrl($content['url'] ?: $url); 88 $entry->setUrl($content['url']);
78 $entry->setTitle($title); 89 $entry->setTitle($title);
79 $entry->setContent($html); 90 $entry->setContent($html);
80 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); 91 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
81 92
93 if (!empty($content['date'])) {
94 $date = $content['date'];
95
96 // is it a timestamp?
97 if (filter_var($date, FILTER_VALIDATE_INT) !== false) {
98 $date = '@'.$content['date'];
99 }
100
101 try {
102 $entry->setPublishedAt(new \DateTime($date));
103 } catch (\Exception $e) {
104 $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]);
105 }
106 }
107
108 if (!empty($content['authors'])) {
109 $entry->setPublishedBy($content['authors']);
110 }
111
112 if (!empty($content['all_headers'])) {
113 $entry->setHeaders($content['all_headers']);
114 }
115
82 $entry->setLanguage(isset($content['language']) ? $content['language'] : ''); 116 $entry->setLanguage(isset($content['language']) ? $content['language'] : '');
83 $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : ''); 117 $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : '');
84 $entry->setReadingTime(Utils::getReadingTime($html)); 118 $entry->setReadingTime(Utils::getReadingTime($html));
@@ -88,12 +122,12 @@ class ContentProxy
88 $entry->setDomainName($domainName); 122 $entry->setDomainName($domainName);
89 } 123 }
90 124
91 if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) { 125 if (!empty($content['open_graph']['og_image'])) {
92 $entry->setPreviewPicture($content['open_graph']['og_image']); 126 $entry->setPreviewPicture($content['open_graph']['og_image']);
93 } 127 }
94 128
95 // if content is an image define as a preview too 129 // if content is an image define as a preview too
96 if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 130 if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
97 $entry->setPreviewPicture($content['url']); 131 $entry->setPreviewPicture($content['url']);
98 } 132 }
99 133
@@ -101,65 +135,14 @@ class ContentProxy
101 $this->tagger->tag($entry); 135 $this->tagger->tag($entry);
102 } catch (\Exception $e) { 136 } catch (\Exception $e) {
103 $this->logger->error('Error while trying to automatically tag an entry.', [ 137 $this->logger->error('Error while trying to automatically tag an entry.', [
104 'entry_url' => $url, 138 'entry_url' => $content['url'],
105 'error_msg' => $e->getMessage(), 139 'error_msg' => $e->getMessage(),
106 ]); 140 ]);
107 } 141 }
108
109 return $entry;
110 }
111
112 /**
113 * Assign some tags to an entry.
114 *
115 * @param Entry $entry
116 * @param array|string $tags An array of tag or a string coma separated of tag
117 * @param array $entitiesReady Entities from the EntityManager which are persisted but not yet flushed
118 * It is mostly to fix duplicate tag on import @see http://stackoverflow.com/a/7879164/569101
119 */
120 public function assignTagsToEntry(Entry $entry, $tags, array $entitiesReady = [])
121 {
122 if (!is_array($tags)) {
123 $tags = explode(',', $tags);
124 }
125
126 // keeps only Tag entity from the "not yet flushed entities"
127 $tagsNotYetFlushed = [];
128 foreach ($entitiesReady as $entity) {
129 if ($entity instanceof Tag) {
130 $tagsNotYetFlushed[$entity->getLabel()] = $entity;
131 }
132 }
133
134 foreach ($tags as $label) {
135 $label = trim($label);
136
137 // avoid empty tag
138 if (0 === strlen($label)) {
139 continue;
140 }
141
142 if (isset($tagsNotYetFlushed[$label])) {
143 $tagEntity = $tagsNotYetFlushed[$label];
144 } else {
145 $tagEntity = $this->tagRepository->findOneByLabel($label);
146
147 if (is_null($tagEntity)) {
148 $tagEntity = new Tag();
149 $tagEntity->setLabel($label);
150 }
151 }
152
153 // only add the tag on the entry if the relation doesn't exist
154 if (false === $entry->getTags()->contains($tagEntity)) {
155 $entry->addTag($tagEntity);
156 }
157 }
158 } 142 }
159 143
160 /** 144 /**
161 * Validate that the given content as enough value to be used 145 * Validate that the given content has at least a title, an html and a url.
162 * instead of fetch the content from the url.
163 * 146 *
164 * @param array $content 147 * @param array $content
165 * 148 *
@@ -167,6 +150,6 @@ class ContentProxy
167 */ 150 */
168 private function validateContent(array $content) 151 private function validateContent(array $content)
169 { 152 {
170 return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); 153 return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
171 } 154 }
172} 155}