diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 137 |
1 files changed, 60 insertions, 77 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index f222dd88..bfaa1976 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -5,9 +5,7 @@ namespace Wallabag\CoreBundle\Helper; | |||
5 | use Graby\Graby; | 5 | use Graby\Graby; |
6 | use Psr\Log\LoggerInterface; | 6 | use Psr\Log\LoggerInterface; |
7 | use Wallabag\CoreBundle\Entity\Entry; | 7 | use Wallabag\CoreBundle\Entity\Entry; |
8 | use Wallabag\CoreBundle\Entity\Tag; | ||
9 | use Wallabag\CoreBundle\Tools\Utils; | 8 | use Wallabag\CoreBundle\Tools\Utils; |
10 | use Wallabag\CoreBundle\Repository\TagRepository; | ||
11 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | 9 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; |
12 | 10 | ||
13 | /** | 11 | /** |
@@ -19,37 +17,34 @@ class ContentProxy | |||
19 | protected $graby; | 17 | protected $graby; |
20 | protected $tagger; | 18 | protected $tagger; |
21 | protected $logger; | 19 | protected $logger; |
22 | protected $tagRepository; | ||
23 | protected $mimeGuesser; | 20 | protected $mimeGuesser; |
24 | protected $fetchingErrorMessage; | 21 | protected $fetchingErrorMessage; |
22 | protected $eventDispatcher; | ||
25 | 23 | ||
26 | public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, LoggerInterface $logger, $fetchingErrorMessage) | 24 | public function __construct(Graby $graby, RuleBasedTagger $tagger, LoggerInterface $logger, $fetchingErrorMessage) |
27 | { | 25 | { |
28 | $this->graby = $graby; | 26 | $this->graby = $graby; |
29 | $this->tagger = $tagger; | 27 | $this->tagger = $tagger; |
30 | $this->logger = $logger; | 28 | $this->logger = $logger; |
31 | $this->tagRepository = $tagRepository; | ||
32 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); | 29 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); |
33 | $this->fetchingErrorMessage = $fetchingErrorMessage; | 30 | $this->fetchingErrorMessage = $fetchingErrorMessage; |
34 | } | 31 | } |
35 | 32 | ||
36 | /** | 33 | /** |
37 | * Fetch content using graby and hydrate given entry with results information. | 34 | * Update entry using either fetched or provided content. |
38 | * In case we couldn't find content, we'll try to use Open Graph data. | ||
39 | * | 35 | * |
40 | * We can also force the content, in case of an import from the v1 for example, so the function won't | 36 | * @param Entry $entry Entry to update |
41 | * fetch the content from the website but rather use information given with the $content parameter. | 37 | * @param string $url Url of the content |
42 | * | 38 | * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url |
43 | * @param Entry $entry Entry to update | 39 | * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby |
44 | * @param string $url Url to grab content for | ||
45 | * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url | ||
46 | * | ||
47 | * @return Entry | ||
48 | */ | 40 | */ |
49 | public function updateEntry(Entry $entry, $url, array $content = []) | 41 | public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) |
50 | { | 42 | { |
51 | // do we have to fetch the content or the provided one is ok? | 43 | if (!empty($content['html'])) { |
52 | if (empty($content) || false === $this->validateContent($content)) { | 44 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); |
45 | } | ||
46 | |||
47 | if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { | ||
53 | $fetchedContent = $this->graby->fetchContent($url); | 48 | $fetchedContent = $this->graby->fetchContent($url); |
54 | 49 | ||
55 | // when content is imported, we have information in $content | 50 | // when content is imported, we have information in $content |
@@ -59,8 +54,24 @@ class ContentProxy | |||
59 | } | 54 | } |
60 | } | 55 | } |
61 | 56 | ||
57 | // be sure to keep the url in case of error | ||
58 | // so we'll be able to refetch it in the future | ||
59 | $content['url'] = !empty($content['url']) ? $content['url'] : $url; | ||
60 | |||
61 | $this->stockEntry($entry, $content); | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | * Stock entry with fetched or imported content. | ||
66 | * Will fall back to OpenGraph data if available. | ||
67 | * | ||
68 | * @param Entry $entry Entry to stock | ||
69 | * @param array $content Array with at least title, url & html | ||
70 | */ | ||
71 | private function stockEntry(Entry $entry, array $content) | ||
72 | { | ||
62 | $title = $content['title']; | 73 | $title = $content['title']; |
63 | if (!$title && isset($content['open_graph']['og_title'])) { | 74 | if (!$title && !empty($content['open_graph']['og_title'])) { |
64 | $title = $content['open_graph']['og_title']; | 75 | $title = $content['open_graph']['og_title']; |
65 | } | 76 | } |
66 | 77 | ||
@@ -68,17 +79,40 @@ class ContentProxy | |||
68 | if (false === $html) { | 79 | if (false === $html) { |
69 | $html = $this->fetchingErrorMessage; | 80 | $html = $this->fetchingErrorMessage; |
70 | 81 | ||
71 | if (isset($content['open_graph']['og_description'])) { | 82 | if (!empty($content['open_graph']['og_description'])) { |
72 | $html .= '<p><i>But we found a short description: </i></p>'; | 83 | $html .= '<p><i>But we found a short description: </i></p>'; |
73 | $html .= $content['open_graph']['og_description']; | 84 | $html .= $content['open_graph']['og_description']; |
74 | } | 85 | } |
75 | } | 86 | } |
76 | 87 | ||
77 | $entry->setUrl($content['url'] ?: $url); | 88 | $entry->setUrl($content['url']); |
78 | $entry->setTitle($title); | 89 | $entry->setTitle($title); |
79 | $entry->setContent($html); | 90 | $entry->setContent($html); |
80 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); | 91 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); |
81 | 92 | ||
93 | if (!empty($content['date'])) { | ||
94 | $date = $content['date']; | ||
95 | |||
96 | // is it a timestamp? | ||
97 | if (filter_var($date, FILTER_VALIDATE_INT) !== false) { | ||
98 | $date = '@'.$content['date']; | ||
99 | } | ||
100 | |||
101 | try { | ||
102 | $entry->setPublishedAt(new \DateTime($date)); | ||
103 | } catch (\Exception $e) { | ||
104 | $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | if (!empty($content['authors'])) { | ||
109 | $entry->setPublishedBy($content['authors']); | ||
110 | } | ||
111 | |||
112 | if (!empty($content['all_headers'])) { | ||
113 | $entry->setHeaders($content['all_headers']); | ||
114 | } | ||
115 | |||
82 | $entry->setLanguage(isset($content['language']) ? $content['language'] : ''); | 116 | $entry->setLanguage(isset($content['language']) ? $content['language'] : ''); |
83 | $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : ''); | 117 | $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : ''); |
84 | $entry->setReadingTime(Utils::getReadingTime($html)); | 118 | $entry->setReadingTime(Utils::getReadingTime($html)); |
@@ -88,12 +122,12 @@ class ContentProxy | |||
88 | $entry->setDomainName($domainName); | 122 | $entry->setDomainName($domainName); |
89 | } | 123 | } |
90 | 124 | ||
91 | if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) { | 125 | if (!empty($content['open_graph']['og_image'])) { |
92 | $entry->setPreviewPicture($content['open_graph']['og_image']); | 126 | $entry->setPreviewPicture($content['open_graph']['og_image']); |
93 | } | 127 | } |
94 | 128 | ||
95 | // if content is an image define as a preview too | 129 | // if content is an image define as a preview too |
96 | if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | 130 | if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
97 | $entry->setPreviewPicture($content['url']); | 131 | $entry->setPreviewPicture($content['url']); |
98 | } | 132 | } |
99 | 133 | ||
@@ -101,65 +135,14 @@ class ContentProxy | |||
101 | $this->tagger->tag($entry); | 135 | $this->tagger->tag($entry); |
102 | } catch (\Exception $e) { | 136 | } catch (\Exception $e) { |
103 | $this->logger->error('Error while trying to automatically tag an entry.', [ | 137 | $this->logger->error('Error while trying to automatically tag an entry.', [ |
104 | 'entry_url' => $url, | 138 | 'entry_url' => $content['url'], |
105 | 'error_msg' => $e->getMessage(), | 139 | 'error_msg' => $e->getMessage(), |
106 | ]); | 140 | ]); |
107 | } | 141 | } |
108 | |||
109 | return $entry; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * Assign some tags to an entry. | ||
114 | * | ||
115 | * @param Entry $entry | ||
116 | * @param array|string $tags An array of tag or a string coma separated of tag | ||
117 | * @param array $entitiesReady Entities from the EntityManager which are persisted but not yet flushed | ||
118 | * It is mostly to fix duplicate tag on import @see http://stackoverflow.com/a/7879164/569101 | ||
119 | */ | ||
120 | public function assignTagsToEntry(Entry $entry, $tags, array $entitiesReady = []) | ||
121 | { | ||
122 | if (!is_array($tags)) { | ||
123 | $tags = explode(',', $tags); | ||
124 | } | ||
125 | |||
126 | // keeps only Tag entity from the "not yet flushed entities" | ||
127 | $tagsNotYetFlushed = []; | ||
128 | foreach ($entitiesReady as $entity) { | ||
129 | if ($entity instanceof Tag) { | ||
130 | $tagsNotYetFlushed[$entity->getLabel()] = $entity; | ||
131 | } | ||
132 | } | ||
133 | |||
134 | foreach ($tags as $label) { | ||
135 | $label = trim($label); | ||
136 | |||
137 | // avoid empty tag | ||
138 | if (0 === strlen($label)) { | ||
139 | continue; | ||
140 | } | ||
141 | |||
142 | if (isset($tagsNotYetFlushed[$label])) { | ||
143 | $tagEntity = $tagsNotYetFlushed[$label]; | ||
144 | } else { | ||
145 | $tagEntity = $this->tagRepository->findOneByLabel($label); | ||
146 | |||
147 | if (is_null($tagEntity)) { | ||
148 | $tagEntity = new Tag(); | ||
149 | $tagEntity->setLabel($label); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | // only add the tag on the entry if the relation doesn't exist | ||
154 | if (false === $entry->getTags()->contains($tagEntity)) { | ||
155 | $entry->addTag($tagEntity); | ||
156 | } | ||
157 | } | ||
158 | } | 142 | } |
159 | 143 | ||
160 | /** | 144 | /** |
161 | * Validate that the given content as enough value to be used | 145 | * Validate that the given content has at least a title, an html and a url. |
162 | * instead of fetch the content from the url. | ||
163 | * | 146 | * |
164 | * @param array $content | 147 | * @param array $content |
165 | * | 148 | * |
@@ -167,6 +150,6 @@ class ContentProxy | |||
167 | */ | 150 | */ |
168 | private function validateContent(array $content) | 151 | private function validateContent(array $content) |
169 | { | 152 | { |
170 | return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); | 153 | return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); |
171 | } | 154 | } |
172 | } | 155 | } |