aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php73
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php93
-rw-r--r--src/Wallabag/CoreBundle/Helper/EntriesExport.php114
-rw-r--r--src/Wallabag/CoreBundle/Helper/FileCookieJar.php91
-rw-r--r--src/Wallabag/CoreBundle/Helper/HttpClientFactory.php51
-rw-r--r--src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php5
-rw-r--r--src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php5
-rw-r--r--src/Wallabag/CoreBundle/Helper/TagsAssigner.php1
-rw-r--r--src/Wallabag/CoreBundle/Helper/UrlHasher.php22
9 files changed, 324 insertions, 131 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index d38811a2..9c6fa8db 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
12use Wallabag\CoreBundle\Tools\Utils; 12use Wallabag\CoreBundle\Tools\Utils;
13 13
14/** 14/**
15 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class takes care of getting the content from an url
16 * and update the entry with what it found. 16 * and updates the entry with what it found.
17 */ 17 */
18class ContentProxy 18class ContentProxy
19{ 19{
@@ -47,13 +47,18 @@ class ContentProxy
47 */ 47 */
48 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) 48 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
49 { 49 {
50 $this->graby->toggleImgNoReferrer(true);
50 if (!empty($content['html'])) { 51 if (!empty($content['html'])) {
51 $content['html'] = $this->graby->cleanupHtml($content['html'], $url); 52 $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
52 } 53 }
53 54
54 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { 55 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
55 $fetchedContent = $this->graby->fetchContent($url); 56 $fetchedContent = $this->graby->fetchContent($url);
56 $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); 57
58 $fetchedContent['title'] = $this->sanitizeContentTitle(
59 $fetchedContent['title'],
60 isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : ''
61 );
57 62
58 // when content is imported, we have information in $content 63 // when content is imported, we have information in $content
59 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 64 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
@@ -73,13 +78,14 @@ class ContentProxy
73 $entry->setUrl($url); 78 $entry->setUrl($url);
74 } 79 }
75 80
81 $entry->setGivenUrl($url);
82
76 $this->stockEntry($entry, $content); 83 $this->stockEntry($entry, $content);
77 } 84 }
78 85
79 /** 86 /**
80 * Use a Symfony validator to ensure the language is well formatted. 87 * Use a Symfony validator to ensure the language is well formatted.
81 * 88 *
82 * @param Entry $entry
83 * @param string $value Language to validate and save 89 * @param string $value Language to validate and save
84 */ 90 */
85 public function updateLanguage(Entry $entry, $value) 91 public function updateLanguage(Entry $entry, $value)
@@ -105,7 +111,6 @@ class ContentProxy
105 /** 111 /**
106 * Use a Symfony validator to ensure the preview picture is a real url. 112 * Use a Symfony validator to ensure the preview picture is a real url.
107 * 113 *
108 * @param Entry $entry
109 * @param string $value URL to validate and save 114 * @param string $value URL to validate and save
110 */ 115 */
111 public function updatePreviewPicture(Entry $entry, $value) 116 public function updatePreviewPicture(Entry $entry, $value)
@@ -127,7 +132,6 @@ class ContentProxy
127 /** 132 /**
128 * Update date. 133 * Update date.
129 * 134 *
130 * @param Entry $entry
131 * @param string $value Date to validate and save 135 * @param string $value Date to validate and save
132 */ 136 */
133 public function updatePublishedAt(Entry $entry, $value) 137 public function updatePublishedAt(Entry $entry, $value)
@@ -154,8 +158,6 @@ class ContentProxy
154 158
155 /** 159 /**
156 * Helper to extract and save host from entry url. 160 * Helper to extract and save host from entry url.
157 *
158 * @param Entry $entry
159 */ 161 */
160 public function setEntryDomainName(Entry $entry) 162 public function setEntryDomainName(Entry $entry)
161 { 163 {
@@ -169,8 +171,6 @@ class ContentProxy
169 * Helper to set a default title using: 171 * Helper to set a default title using:
170 * - url basename, if applicable 172 * - url basename, if applicable
171 * - hostname. 173 * - hostname.
172 *
173 * @param Entry $entry
174 */ 174 */
175 public function setDefaultEntryTitle(Entry $entry) 175 public function setDefaultEntryTitle(Entry $entry)
176 { 176 {
@@ -187,8 +187,8 @@ class ContentProxy
187 /** 187 /**
188 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. 188 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
189 * 189 *
190 * @param $title 190 * @param string $title
191 * @param $contentType 191 * @param string $contentType
192 * 192 *
193 * @return string 193 * @return string
194 */ 194 */
@@ -252,22 +252,19 @@ class ContentProxy
252 252
253 if (!empty($content['title'])) { 253 if (!empty($content['title'])) {
254 $entry->setTitle($content['title']); 254 $entry->setTitle($content['title']);
255 } elseif (!empty($content['open_graph']['og_title'])) {
256 $entry->setTitle($content['open_graph']['og_title']);
257 } 255 }
258 256
259 $html = $content['html']; 257 if (empty($content['html'])) {
260 if (false === $html) { 258 $content['html'] = $this->fetchingErrorMessage;
261 $html = $this->fetchingErrorMessage;
262 259
263 if (!empty($content['open_graph']['og_description'])) { 260 if (!empty($content['description'])) {
264 $html .= '<p><i>But we found a short description: </i></p>'; 261 $content['html'] .= '<p><i>But we found a short description: </i></p>';
265 $html .= $content['open_graph']['og_description']; 262 $content['html'] .= $content['description'];
266 } 263 }
267 } 264 }
268 265
269 $entry->setContent($html); 266 $entry->setContent($content['html']);
270 $entry->setReadingTime(Utils::getReadingTime($html)); 267 $entry->setReadingTime(Utils::getReadingTime($content['html']));
271 268
272 if (!empty($content['status'])) { 269 if (!empty($content['status'])) {
273 $entry->setHttpStatus($content['status']); 270 $entry->setHttpStatus($content['status']);
@@ -277,8 +274,8 @@ class ContentProxy
277 $entry->setPublishedBy($content['authors']); 274 $entry->setPublishedBy($content['authors']);
278 } 275 }
279 276
280 if (!empty($content['all_headers']) && $this->storeArticleHeaders) { 277 if (!empty($content['headers'])) {
281 $entry->setHeaders($content['all_headers']); 278 $entry->setHeaders($content['headers']);
282 } 279 }
283 280
284 if (!empty($content['date'])) { 281 if (!empty($content['date'])) {
@@ -289,17 +286,30 @@ class ContentProxy
289 $this->updateLanguage($entry, $content['language']); 286 $this->updateLanguage($entry, $content['language']);
290 } 287 }
291 288
292 if (!empty($content['open_graph']['og_image'])) { 289 $previewPictureUrl = '';
293 $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); 290 if (!empty($content['image'])) {
291 $previewPictureUrl = $content['image'];
294 } 292 }
295 293
296 // if content is an image, define it as a preview too 294 // if content is an image, define it as a preview too
297 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 295 if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
298 $this->updatePreviewPicture($entry, $content['url']); 296 $previewPictureUrl = $content['url'];
297 } elseif (empty($previewPictureUrl)) {
298 $this->logger->debug('Extracting images from content to provide a default preview picture');
299 $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
300 $this->logger->debug(\count($imagesUrls) . ' pictures found');
301
302 if (!empty($imagesUrls)) {
303 $previewPictureUrl = $imagesUrls[0];
304 }
305 }
306
307 if (!empty($content['headers']['content-type'])) {
308 $entry->setMimetype($content['headers']['content-type']);
299 } 309 }
300 310
301 if (!empty($content['content_type'])) { 311 if (!empty($previewPictureUrl)) {
302 $entry->setMimetype($content['content_type']); 312 $this->updatePreviewPicture($entry, $previewPictureUrl);
303 } 313 }
304 314
305 try { 315 try {
@@ -316,7 +326,6 @@ class ContentProxy
316 * Update the origin_url field when a redirection occurs 326 * Update the origin_url field when a redirection occurs
317 * This field is set if it is empty and new url does not match ignore list. 327 * This field is set if it is empty and new url does not match ignore list.
318 * 328 *
319 * @param Entry $entry
320 * @param string $url 329 * @param string $url
321 */ 330 */
322 private function updateOriginUrl(Entry $entry, $url) 331 private function updateOriginUrl(Entry $entry, $url)
@@ -424,8 +433,6 @@ class ContentProxy
424 /** 433 /**
425 * Validate that the given content has at least a title, an html and a url. 434 * Validate that the given content has at least a title, an html and a url.
426 * 435 *
427 * @param array $content
428 *
429 * @return bool true if valid otherwise false 436 * @return bool true if valid otherwise false
430 */ 437 */
431 private function validateContent(array $content) 438 private function validateContent(array $content)
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index cc3dcfce..1d98fd1a 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -2,8 +2,15 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use GuzzleHttp\Client; 5use GuzzleHttp\Psr7\Uri;
6use GuzzleHttp\Message\Response; 6use GuzzleHttp\Psr7\UriResolver;
7use Http\Client\Common\HttpMethodsClient;
8use Http\Client\Common\Plugin\ErrorPlugin;
9use Http\Client\Common\PluginClient;
10use Http\Client\HttpClient;
11use Http\Discovery\MessageFactoryDiscovery;
12use Http\Message\MessageFactory;
13use Psr\Http\Message\ResponseInterface;
7use Psr\Log\LoggerInterface; 14use Psr\Log\LoggerInterface;
8use Symfony\Component\DomCrawler\Crawler; 15use Symfony\Component\DomCrawler\Crawler;
9use Symfony\Component\Finder\Finder; 16use Symfony\Component\Finder\Finder;
@@ -19,9 +26,9 @@ class DownloadImages
19 private $mimeGuesser; 26 private $mimeGuesser;
20 private $wallabagUrl; 27 private $wallabagUrl;
21 28
22 public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) 29 public function __construct(HttpClient $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, MessageFactory $messageFactory = null)
23 { 30 {
24 $this->client = $client; 31 $this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin()]), $messageFactory ?: MessageFactoryDiscovery::find());
25 $this->baseFolder = $baseFolder; 32 $this->baseFolder = $baseFolder;
26 $this->wallabagUrl = rtrim($wallabagUrl, '/'); 33 $this->wallabagUrl = rtrim($wallabagUrl, '/');
27 $this->logger = $logger; 34 $this->logger = $logger;
@@ -31,6 +38,23 @@ class DownloadImages
31 } 38 }
32 39
33 /** 40 /**
41 * Process the html and extract images URLs from it.
42 *
43 * @param string $html
44 *
45 * @return string[]
46 */
47 public static function extractImagesUrlsFromHtml($html)
48 {
49 $crawler = new Crawler($html);
50 $imagesCrawler = $crawler->filterXpath('//img');
51 $imagesUrls = $imagesCrawler->extract(['src']);
52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
53
54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
55 }
56
57 /**
34 * Process the html and extract image from it, save them to local and return the updated html. 58 * Process the html and extract image from it, save them to local and return the updated html.
35 * 59 *
36 * @param int $entryId ID of the entry 60 * @param int $entryId ID of the entry
@@ -41,13 +65,7 @@ class DownloadImages
41 */ 65 */
42 public function processHtml($entryId, $html, $url) 66 public function processHtml($entryId, $html, $url)
43 { 67 {
44 $crawler = new Crawler($html); 68 $imagesUrls = self::extractImagesUrlsFromHtml($html);
45 $imagesCrawler = $crawler
46 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler
48 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
51 69
52 $relativePath = $this->getRelativePath($entryId); 70 $relativePath = $this->getRelativePath($entryId);
53 71
@@ -122,7 +140,7 @@ class DownloadImages
122 $localPath = $folderPath . '/' . $hashImage . '.' . $ext; 140 $localPath = $folderPath . '/' . $hashImage . '.' . $ext;
123 141
124 try { 142 try {
125 $im = imagecreatefromstring($res->getBody()); 143 $im = imagecreatefromstring((string) $res->getBody());
126 } catch (\Exception $e) { 144 } catch (\Exception $e) {
127 $im = false; 145 $im = false;
128 } 146 }
@@ -135,7 +153,21 @@ class DownloadImages
135 153
136 switch ($ext) { 154 switch ($ext) {
137 case 'gif': 155 case 'gif':
138 imagegif($im, $localPath); 156 // use Imagick if available to keep GIF animation
157 if (class_exists('\\Imagick')) {
158 try {
159 $imagick = new \Imagick();
160 $imagick->readImageBlob($res->getBody());
161 $imagick->setImageFormat('gif');
162 $imagick->writeImages($localPath, true);
163 } catch (\Exception $e) {
164 // if Imagick fail, fallback to the default solution
165 imagegif($im, $localPath);
166 }
167 } else {
168 imagegif($im, $localPath);
169 }
170
139 $this->logger->debug('DownloadImages: Re-creating gif'); 171 $this->logger->debug('DownloadImages: Re-creating gif');
140 break; 172 break;
141 case 'jpeg': 173 case 'jpeg':
@@ -181,29 +213,30 @@ class DownloadImages
181 /** 213 /**
182 * Get images urls from the srcset image attribute. 214 * Get images urls from the srcset image attribute.
183 * 215 *
184 * @param Crawler $imagesCrawler
185 *
186 * @return array An array of urls 216 * @return array An array of urls
187 */ 217 */
188 private function getSrcsetUrls(Crawler $imagesCrawler) 218 private static function getSrcsetUrls(Crawler $imagesCrawler)
189 { 219 {
190 $urls = []; 220 $urls = [];
191 $iterator = $imagesCrawler 221 $iterator = $imagesCrawler->getIterator();
192 ->getIterator(); 222
193 while ($iterator->valid()) { 223 while ($iterator->valid()) {
194 $srcsetAttribute = $iterator->current()->getAttribute('srcset'); 224 $srcsetAttribute = $iterator->current()->getAttribute('srcset');
225
195 if ('' !== $srcsetAttribute) { 226 if ('' !== $srcsetAttribute) {
196 // Couldn't start with " OR ' OR a white space 227 // Couldn't start with " OR ' OR a white space
197 // Could be one or more white space 228 // Could be one or more white space
198 // Must be one or more digits followed by w OR x 229 // Must be one or more digits followed by w OR x
199 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; 230 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
200 preg_match_all($pattern, $srcsetAttribute, $matches); 231 preg_match_all($pattern, $srcsetAttribute, $matches);
232
201 $srcset = \call_user_func_array('array_merge', $matches); 233 $srcset = \call_user_func_array('array_merge', $matches);
202 $srcsetUrls = array_map(function ($src) { 234 $srcsetUrls = array_map(function ($src) {
203 return trim(explode(' ', $src, 2)[0]); 235 return trim(explode(' ', $src, 2)[0]);
204 }, $srcset); 236 }, $srcset);
205 $urls = array_merge($srcsetUrls, $urls); 237 $urls = array_merge($srcsetUrls, $urls);
206 } 238 }
239
207 $iterator->next(); 240 $iterator->next();
208 } 241 }
209 242
@@ -260,33 +293,29 @@ class DownloadImages
260 return $url; 293 return $url;
261 } 294 }
262 295
263 $base = new \SimplePie_IRI($base); 296 $base = new Uri($base);
264 297
265 // remove '//' in URL path (causes URLs not to resolve properly) 298 // in case the url has no scheme & host
266 if (isset($base->ipath)) { 299 if ('' === $base->getAuthority() || '' === $base->getScheme()) {
267 $base->ipath = preg_replace('!//+!', '/', $base->ipath); 300 $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]);
268 }
269 301
270 if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { 302 return false;
271 return $absolute->get_uri();
272 } 303 }
273 304
274 $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); 305 return (string) UriResolver::resolve($base, new Uri($url));
275
276 return false;
277 } 306 }
278 307
279 /** 308 /**
280 * Retrieve and validate the extension from the response of the url of the image. 309 * Retrieve and validate the extension from the response of the url of the image.
281 * 310 *
282 * @param Response $res Guzzle Response 311 * @param ResponseInterface $res Http Response
283 * @param string $imagePath Path from the src image from the content (used for log only) 312 * @param string $imagePath Path from the src image from the content (used for log only)
284 * 313 *
285 * @return string|false Extension name or false if validation failed 314 * @return string|false Extension name or false if validation failed
286 */ 315 */
287 private function getExtensionFromResponse(Response $res, $imagePath) 316 private function getExtensionFromResponse(ResponseInterface $res, $imagePath)
288 { 317 {
289 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 318 $ext = $this->mimeGuesser->guess(current($res->getHeader('content-type')));
290 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 319 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
291 320
292 // ok header doesn't have the extension, try a different way 321 // ok header doesn't have the extension, try a different way
diff --git a/src/Wallabag/CoreBundle/Helper/EntriesExport.php b/src/Wallabag/CoreBundle/Helper/EntriesExport.php
index cbf1037b..f981ee50 100644
--- a/src/Wallabag/CoreBundle/Helper/EntriesExport.php
+++ b/src/Wallabag/CoreBundle/Helper/EntriesExport.php
@@ -85,7 +85,7 @@ class EntriesExport
85 public function updateAuthor($method) 85 public function updateAuthor($method)
86 { 86 {
87 if ('entry' !== $method) { 87 if ('entry' !== $method) {
88 $this->author = $method . ' authors'; 88 $this->author = 'Various authors';
89 89
90 return $this; 90 return $this;
91 } 91 }
@@ -150,8 +150,6 @@ class EntriesExport
150 */ 150 */
151 151
152 $book->setTitle($this->title); 152 $book->setTitle($this->title);
153 // Could also be the ISBN number, prefered for published books, or a UUID.
154 $book->setIdentifier($this->title, EPub::IDENTIFIER_URI);
155 // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc. 153 // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
156 $book->setLanguage($this->language); 154 $book->setLanguage($this->language);
157 $book->setDescription('Some articles saved on my wallabag'); 155 $book->setDescription('Some articles saved on my wallabag');
@@ -167,12 +165,9 @@ class EntriesExport
167 $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'PHP'); 165 $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'PHP');
168 $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'wallabag'); 166 $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'wallabag');
169 167
170 /* 168 $entryIds = [];
171 * Front page 169 $entryCount = \count($this->entries);
172 */ 170 $i = 0;
173 if (file_exists($this->logoPath)) {
174 $book->setCoverImage('Cover.png', file_get_contents($this->logoPath), 'image/png');
175 }
176 171
177 /* 172 /*
178 * Adding actual entries 173 * Adding actual entries
@@ -180,21 +175,48 @@ class EntriesExport
180 175
181 // set tags as subjects 176 // set tags as subjects
182 foreach ($this->entries as $entry) { 177 foreach ($this->entries as $entry) {
178 ++$i;
179
180 /*
181 * Front page
182 * Set if there's only one entry in the given set
183 */
184 if (1 === $entryCount && null !== $entry->getPreviewPicture()) {
185 $book->setCoverImage($entry->getPreviewPicture());
186 }
187
183 foreach ($entry->getTags() as $tag) { 188 foreach ($entry->getTags() as $tag) {
184 $book->setSubject($tag->getLabel()); 189 $book->setSubject($tag->getLabel());
185 } 190 }
191 $filename = sha1(sprintf('%s:%s', $entry->getUrl(), $entry->getTitle()));
186 192
187 // the reader in Kobo Devices doesn't likes special caracters 193 $publishedBy = $entry->getPublishedBy();
188 // in filenames, we limit to A-z/0-9 194 $authors = $this->translator->trans('export.unknown');
189 $filename = preg_replace('/[^A-Za-z0-9\-]/', '', $entry->getTitle()); 195 if (!empty($publishedBy)) {
196 $authors = implode(',', $publishedBy);
197 }
190 198
191 $titlepage = $content_start . '<h1>' . $entry->getTitle() . '</h1>' . $this->getExportInformation('PHPePub') . $bookEnd; 199 $titlepage = $content_start .
192 $book->addChapter('Title', 'Title.html', $titlepage, true, EPub::EXTERNAL_REF_ADD); 200 '<h1>' . $entry->getTitle() . '</h1>' .
201 '<dl>' .
202 '<dt>' . $this->translator->trans('entry.view.published_by') . '</dt><dd>' . $authors . '</dd>' .
203 '<dt>' . $this->translator->trans('entry.metadata.reading_time') . '</dt><dd>' . $this->translator->trans('entry.metadata.reading_time_minutes_short', ['%readingTime%' => $entry->getReadingTime()]) . '</dd>' .
204 '<dt>' . $this->translator->trans('entry.metadata.added_on') . '</dt><dd>' . $entry->getCreatedAt()->format('Y-m-d') . '</dd>' .
205 '<dt>' . $this->translator->trans('entry.metadata.address') . '</dt><dd><a href="' . $entry->getUrl() . '">' . $entry->getUrl() . '</a></dd>' .
206 '</dl>' .
207 $bookEnd;
208 $book->addChapter("Entry {$i} of {$entryCount}", "{$filename}_cover.html", $titlepage, true, EPub::EXTERNAL_REF_ADD);
193 $chapter = $content_start . $entry->getContent() . $bookEnd; 209 $chapter = $content_start . $entry->getContent() . $bookEnd;
194 $book->addChapter($entry->getTitle(), htmlspecialchars($filename) . '.html', $chapter, true, EPub::EXTERNAL_REF_ADD); 210
211 $entryIds[] = $entry->getId();
212 $book->addChapter($entry->getTitle(), "{$filename}.html", $chapter, true, EPub::EXTERNAL_REF_ADD);
195 } 213 }
196 214
197 $book->buildTOC(); 215 $book->addChapter('Notices', 'Cover2.html', $content_start . $this->getExportInformation('PHPePub') . $bookEnd);
216
217 // Could also be the ISBN number, prefered for published books, or a UUID.
218 $hash = sha1(sprintf('%s:%s', $this->wallabagUrl, implode(',', $entryIds)));
219 $book->setIdentifier(sprintf('urn:wallabag:%s', $hash), EPub::IDENTIFIER_URI);
198 220
199 return Response::create( 221 return Response::create(
200 $book->getBook(), 222 $book->getBook(),
@@ -202,7 +224,7 @@ class EntriesExport
202 [ 224 [
203 'Content-Description' => 'File Transfer', 225 'Content-Description' => 'File Transfer',
204 'Content-type' => 'application/epub+zip', 226 'Content-type' => 'application/epub+zip',
205 'Content-Disposition' => 'attachment; filename="' . $this->title . '.epub"', 227 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.epub"',
206 'Content-Transfer-Encoding' => 'binary', 228 'Content-Transfer-Encoding' => 'binary',
207 ] 229 ]
208 ); 230 );
@@ -244,9 +266,6 @@ class EntriesExport
244 } 266 }
245 $mobi->setContentProvider($content); 267 $mobi->setContentProvider($content);
246 268
247 // the browser inside Kindle Devices doesn't likes special caracters either, we limit to A-z/0-9
248 $this->title = preg_replace('/[^A-Za-z0-9\-]/', '', $this->title);
249
250 return Response::create( 269 return Response::create(
251 $mobi->toString(), 270 $mobi->toString(),
252 200, 271 200,
@@ -254,7 +273,7 @@ class EntriesExport
254 'Accept-Ranges' => 'bytes', 273 'Accept-Ranges' => 'bytes',
255 'Content-Description' => 'File Transfer', 274 'Content-Description' => 'File Transfer',
256 'Content-type' => 'application/x-mobipocket-ebook', 275 'Content-type' => 'application/x-mobipocket-ebook',
257 'Content-Disposition' => 'attachment; filename="' . $this->title . '.mobi"', 276 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.mobi"',
258 'Content-Transfer-Encoding' => 'binary', 277 'Content-Transfer-Encoding' => 'binary',
259 ] 278 ]
260 ); 279 );
@@ -279,14 +298,6 @@ class EntriesExport
279 $pdf->SetKeywords('wallabag'); 298 $pdf->SetKeywords('wallabag');
280 299
281 /* 300 /*
282 * Front page
283 */
284 $pdf->AddPage();
285 $intro = '<h1>' . $this->title . '</h1>' . $this->getExportInformation('tcpdf');
286
287 $pdf->writeHTMLCell(0, 0, '', '', $intro, 0, 1, 0, true, '', true);
288
289 /*
290 * Adding actual entries 301 * Adding actual entries
291 */ 302 */
292 foreach ($this->entries as $entry) { 303 foreach ($this->entries as $entry) {
@@ -294,6 +305,22 @@ class EntriesExport
294 $pdf->SetKeywords($tag->getLabel()); 305 $pdf->SetKeywords($tag->getLabel());
295 } 306 }
296 307
308 $publishedBy = $entry->getPublishedBy();
309 $authors = $this->translator->trans('export.unknown');
310 if (!empty($publishedBy)) {
311 $authors = implode(',', $publishedBy);
312 }
313
314 $pdf->addPage();
315 $html = '<h1>' . $entry->getTitle() . '</h1>' .
316 '<dl>' .
317 '<dt>' . $this->translator->trans('entry.view.published_by') . '</dt><dd>' . $authors . '</dd>' .
318 '<dt>' . $this->translator->trans('entry.metadata.reading_time') . '</dt><dd>' . $this->translator->trans('entry.metadata.reading_time_minutes_short', ['%readingTime%' => $entry->getReadingTime()]) . '</dd>' .
319 '<dt>' . $this->translator->trans('entry.metadata.added_on') . '</dt><dd>' . $entry->getCreatedAt()->format('Y-m-d') . '</dd>' .
320 '<dt>' . $this->translator->trans('entry.metadata.address') . '</dt><dd><a href="' . $entry->getUrl() . '">' . $entry->getUrl() . '</a></dd>' .
321 '</dl>';
322 $pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
323
297 $pdf->AddPage(); 324 $pdf->AddPage();
298 $html = '<h1>' . $entry->getTitle() . '</h1>'; 325 $html = '<h1>' . $entry->getTitle() . '</h1>';
299 $html .= $entry->getContent(); 326 $html .= $entry->getContent();
@@ -301,6 +328,14 @@ class EntriesExport
301 $pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true); 328 $pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
302 } 329 }
303 330
331 /*
332 * Last page
333 */
334 $pdf->AddPage();
335 $html = $this->getExportInformation('tcpdf');
336
337 $pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
338
304 // set image scale factor 339 // set image scale factor
305 $pdf->setImageScale(PDF_IMAGE_SCALE_RATIO); 340 $pdf->setImageScale(PDF_IMAGE_SCALE_RATIO);
306 341
@@ -310,7 +345,7 @@ class EntriesExport
310 [ 345 [
311 'Content-Description' => 'File Transfer', 346 'Content-Description' => 'File Transfer',
312 'Content-type' => 'application/pdf', 347 'Content-type' => 'application/pdf',
313 'Content-Disposition' => 'attachment; filename="' . $this->title . '.pdf"', 348 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.pdf"',
314 'Content-Transfer-Encoding' => 'binary', 349 'Content-Transfer-Encoding' => 'binary',
315 ] 350 ]
316 ); 351 );
@@ -356,7 +391,7 @@ class EntriesExport
356 200, 391 200,
357 [ 392 [
358 'Content-type' => 'application/csv', 393 'Content-type' => 'application/csv',
359 'Content-Disposition' => 'attachment; filename="' . $this->title . '.csv"', 394 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.csv"',
360 'Content-Transfer-Encoding' => 'UTF-8', 395 'Content-Transfer-Encoding' => 'UTF-8',
361 ] 396 ]
362 ); 397 );
@@ -374,7 +409,7 @@ class EntriesExport
374 200, 409 200,
375 [ 410 [
376 'Content-type' => 'application/json', 411 'Content-type' => 'application/json',
377 'Content-Disposition' => 'attachment; filename="' . $this->title . '.json"', 412 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.json"',
378 'Content-Transfer-Encoding' => 'UTF-8', 413 'Content-Transfer-Encoding' => 'UTF-8',
379 ] 414 ]
380 ); 415 );
@@ -392,7 +427,7 @@ class EntriesExport
392 200, 427 200,
393 [ 428 [
394 'Content-type' => 'application/xml', 429 'Content-type' => 'application/xml',
395 'Content-Disposition' => 'attachment; filename="' . $this->title . '.xml"', 430 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.xml"',
396 'Content-Transfer-Encoding' => 'UTF-8', 431 'Content-Transfer-Encoding' => 'UTF-8',
397 ] 432 ]
398 ); 433 );
@@ -418,7 +453,7 @@ class EntriesExport
418 200, 453 200,
419 [ 454 [
420 'Content-type' => 'text/plain', 455 'Content-type' => 'text/plain',
421 'Content-Disposition' => 'attachment; filename="' . $this->title . '.txt"', 456 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.txt"',
422 'Content-Transfer-Encoding' => 'UTF-8', 457 'Content-Transfer-Encoding' => 'UTF-8',
423 ] 458 ]
424 ); 459 );
@@ -461,4 +496,15 @@ class EntriesExport
461 496
462 return str_replace('%IMAGE%', '', $info); 497 return str_replace('%IMAGE%', '', $info);
463 } 498 }
499
500 /**
501 * Return a sanitized version of the title by applying translit iconv
502 * and removing non alphanumeric characters, - and space.
503 *
504 * @return string Sanitized filename
505 */
506 private function getSanitizedFilename()
507 {
508 return preg_replace('/[^A-Za-z0-9\- \']/', '', iconv('utf-8', 'us-ascii//TRANSLIT', $this->title));
509 }
464} 510}
diff --git a/src/Wallabag/CoreBundle/Helper/FileCookieJar.php b/src/Wallabag/CoreBundle/Helper/FileCookieJar.php
new file mode 100644
index 00000000..9a63e949
--- /dev/null
+++ b/src/Wallabag/CoreBundle/Helper/FileCookieJar.php
@@ -0,0 +1,91 @@
1<?php
2
3namespace Wallabag\CoreBundle\Helper;
4
5use GuzzleHttp\Cookie\FileCookieJar as BaseFileCookieJar;
6use GuzzleHttp\Cookie\SetCookie;
7use GuzzleHttp\Utils;
8use Psr\Log\LoggerInterface;
9
10/**
11 * Overidden Cookie behavior to:
12 * - fix multiple concurrent writes (see https://github.com/guzzle/guzzle/pull/1884)
13 * - ignore error when the cookie file is malformatted (resulting in clearing it).
14 */
15class FileCookieJar extends BaseFileCookieJar
16{
17 private $logger;
18
19 /**
20 * @param LoggerInterface $logger Only used to log info when something goes wrong
21 * @param string $cookieFile File to store the cookie data
22 */
23 public function __construct(LoggerInterface $logger, $cookieFile)
24 {
25 parent::__construct($cookieFile);
26
27 $this->logger = $logger;
28 }
29
30 /**
31 * Saves the cookies to a file.
32 *
33 * @param string $filename File to save
34 *
35 * @throws \RuntimeException if the file cannot be found or created
36 */
37 public function save($filename)
38 {
39 $json = [];
40 foreach ($this as $cookie) {
41 if ($cookie->getExpires() && !$cookie->getDiscard()) {
42 $json[] = $cookie->toArray();
43 }
44 }
45
46 if (false === file_put_contents($filename, json_encode($json), LOCK_EX)) {
47 // @codeCoverageIgnoreStart
48 throw new \RuntimeException("Unable to save file {$filename}");
49 // @codeCoverageIgnoreEnd
50 }
51 }
52
53 /**
54 * Load cookies from a JSON formatted file.
55 *
56 * Old cookies are kept unless overwritten by newly loaded ones.
57 *
58 * @param string $filename cookie file to load
59 *
60 * @throws \RuntimeException if the file cannot be loaded
61 */
62 public function load($filename)
63 {
64 $json = file_get_contents($filename);
65 if (false === $json) {
66 // @codeCoverageIgnoreStart
67 throw new \RuntimeException("Unable to load file {$filename}");
68 // @codeCoverageIgnoreEnd
69 }
70
71 try {
72 $data = Utils::jsonDecode($json, true);
73 } catch (\InvalidArgumentException $e) {
74 $this->logger->error('JSON inside the cookie is broken', [
75 'json' => $json,
76 'error_msg' => $e->getMessage(),
77 ]);
78
79 // cookie file is invalid, just ignore the exception and it'll reset the whole cookie file
80 $data = '';
81 }
82
83 if (\is_array($data)) {
84 foreach (Utils::jsonDecode($json, true) as $cookie) {
85 $this->setCookie(new SetCookie($cookie));
86 }
87 } elseif (\strlen($data)) {
88 throw new \RuntimeException("Invalid cookie file: {$filename}");
89 }
90 }
91}
diff --git a/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php b/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
index 4602a684..ea864acb 100644
--- a/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
+++ b/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
@@ -2,16 +2,18 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use Graby\Ring\Client\SafeCurlHandler; 5use GuzzleHttp\Client as GuzzleClient;
6use GuzzleHttp\Client;
7use GuzzleHttp\Cookie\CookieJar; 6use GuzzleHttp\Cookie\CookieJar;
8use GuzzleHttp\Event\SubscriberInterface; 7use GuzzleHttp\Event\SubscriberInterface;
8use Http\Adapter\Guzzle5\Client as GuzzleAdapter;
9use Http\Client\HttpClient;
10use Http\HttplugBundle\ClientFactory\ClientFactory;
9use Psr\Log\LoggerInterface; 11use Psr\Log\LoggerInterface;
10 12
11/** 13/**
12 * Builds and configures the Guzzle HTTP client. 14 * Builds and configures the HTTP client.
13 */ 15 */
14class HttpClientFactory 16class HttpClientFactory implements ClientFactory
15{ 17{
16 /** @var [\GuzzleHttp\Event\SubscriberInterface] */ 18 /** @var [\GuzzleHttp\Event\SubscriberInterface] */
17 private $subscribers = []; 19 private $subscribers = [];
@@ -25,9 +27,7 @@ class HttpClientFactory
25 /** 27 /**
26 * HttpClientFactory constructor. 28 * HttpClientFactory constructor.
27 * 29 *
28 * @param \GuzzleHttp\Cookie\CookieJar $cookieJar 30 * @param string $restrictedAccess This param is a kind of boolean. Values: 0 or 1
29 * @param string $restrictedAccess This param is a kind of boolean. Values: 0 or 1
30 * @param LoggerInterface $logger
31 */ 31 */
32 public function __construct(CookieJar $cookieJar, $restrictedAccess, LoggerInterface $logger) 32 public function __construct(CookieJar $cookieJar, $restrictedAccess, LoggerInterface $logger)
33 { 33 {
@@ -37,35 +37,38 @@ class HttpClientFactory
37 } 37 }
38 38
39 /** 39 /**
40 * @return \GuzzleHttp\Client|null 40 * Adds a subscriber to the HTTP client.
41 */
42 public function addSubscriber(SubscriberInterface $subscriber)
43 {
44 $this->subscribers[] = $subscriber;
45 }
46
47 /**
48 * Input an array of configuration to be able to create a HttpClient.
49 *
50 * @return HttpClient
41 */ 51 */
42 public function buildHttpClient() 52 public function createClient(array $config = [])
43 { 53 {
44 $this->logger->log('debug', 'Restricted access config enabled?', ['enabled' => (int) $this->restrictedAccess]); 54 $this->logger->log('debug', 'Restricted access config enabled?', ['enabled' => (int) $this->restrictedAccess]);
45 55
46 if (0 === (int) $this->restrictedAccess) { 56 if (0 === (int) $this->restrictedAccess) {
47 return; 57 return new GuzzleAdapter(new GuzzleClient($config));
48 } 58 }
49 59
50 // we clear the cookie to avoid websites who use cookies for analytics 60 // we clear the cookie to avoid websites who use cookies for analytics
51 $this->cookieJar->clear(); 61 $this->cookieJar->clear();
52 // need to set the (shared) cookie jar 62 if (!isset($config['defaults']['cookies'])) {
53 $client = new Client(['handler' => new SafeCurlHandler(), 'defaults' => ['cookies' => $this->cookieJar]]); 63 // need to set the (shared) cookie jar
64 $config['defaults']['cookies'] = $this->cookieJar;
65 }
54 66
67 $guzzle = new GuzzleClient($config);
55 foreach ($this->subscribers as $subscriber) { 68 foreach ($this->subscribers as $subscriber) {
56 $client->getEmitter()->attach($subscriber); 69 $guzzle->getEmitter()->attach($subscriber);
57 } 70 }
58 71
59 return $client; 72 return new GuzzleAdapter($guzzle);
60 }
61
62 /**
63 * Adds a subscriber to the HTTP client.
64 *
65 * @param SubscriberInterface $subscriber
66 */
67 public function addSubscriber(SubscriberInterface $subscriber)
68 {
69 $this->subscribers[] = $subscriber;
70 } 73 }
71} 74}
diff --git a/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php b/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
index 1c2c5093..3d56a6d8 100644
--- a/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
+++ b/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
@@ -20,10 +20,9 @@ class PreparePagerForEntries
20 } 20 }
21 21
22 /** 22 /**
23 * @param AdapterInterface $adapter 23 * @param User $user If user isn't logged in, we can force it (like for feed)
24 * @param User $user If user isn't logged in, we can force it (like for rss)
25 * 24 *
26 * @return null|Pagerfanta 25 * @return Pagerfanta|null
27 */ 26 */
28 public function prepare(AdapterInterface $adapter, User $user = null) 27 public function prepare(AdapterInterface $adapter, User $user = null)
29 { 28 {
diff --git a/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php b/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php
index 63f65067..d48e2469 100644
--- a/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php
+++ b/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php
@@ -6,6 +6,7 @@ use Psr\Log\LoggerInterface;
6use RulerZ\RulerZ; 6use RulerZ\RulerZ;
7use Wallabag\CoreBundle\Entity\Entry; 7use Wallabag\CoreBundle\Entity\Entry;
8use Wallabag\CoreBundle\Entity\Tag; 8use Wallabag\CoreBundle\Entity\Tag;
9use Wallabag\CoreBundle\Entity\TaggingRule;
9use Wallabag\CoreBundle\Repository\EntryRepository; 10use Wallabag\CoreBundle\Repository\EntryRepository;
10use Wallabag\CoreBundle\Repository\TagRepository; 11use Wallabag\CoreBundle\Repository\TagRepository;
11use Wallabag\UserBundle\Entity\User; 12use Wallabag\UserBundle\Entity\User;
@@ -55,8 +56,6 @@ class RuleBasedTagger
55 /** 56 /**
56 * Apply all the tagging rules defined by a user on its entries. 57 * Apply all the tagging rules defined by a user on its entries.
57 * 58 *
58 * @param User $user
59 *
60 * @return array<Entry> A list of modified entries 59 * @return array<Entry> A list of modified entries
61 */ 60 */
62 public function tagAllForUser(User $user) 61 public function tagAllForUser(User $user)
@@ -108,8 +107,6 @@ class RuleBasedTagger
108 /** 107 /**
109 * Retrieves the tagging rules for a given user. 108 * Retrieves the tagging rules for a given user.
110 * 109 *
111 * @param User $user
112 *
113 * @return array<TaggingRule> 110 * @return array<TaggingRule>
114 */ 111 */
115 private function getRulesForUser(User $user) 112 private function getRulesForUser(User $user)
diff --git a/src/Wallabag/CoreBundle/Helper/TagsAssigner.php b/src/Wallabag/CoreBundle/Helper/TagsAssigner.php
index e6b4989f..433b09fe 100644
--- a/src/Wallabag/CoreBundle/Helper/TagsAssigner.php
+++ b/src/Wallabag/CoreBundle/Helper/TagsAssigner.php
@@ -21,7 +21,6 @@ class TagsAssigner
21 /** 21 /**
22 * Assign some tags to an entry. 22 * Assign some tags to an entry.
23 * 23 *
24 * @param Entry $entry
25 * @param array|string $tags An array of tag or a string coma separated of tag 24 * @param array|string $tags An array of tag or a string coma separated of tag
26 * @param array $entitiesReady Entities from the EntityManager which are persisted but not yet flushed 25 * @param array $entitiesReady Entities from the EntityManager which are persisted but not yet flushed
27 * It is mostly to fix duplicate tag on import @see http://stackoverflow.com/a/7879164/569101 26 * It is mostly to fix duplicate tag on import @see http://stackoverflow.com/a/7879164/569101
diff --git a/src/Wallabag/CoreBundle/Helper/UrlHasher.php b/src/Wallabag/CoreBundle/Helper/UrlHasher.php
new file mode 100644
index 00000000..6753745f
--- /dev/null
+++ b/src/Wallabag/CoreBundle/Helper/UrlHasher.php
@@ -0,0 +1,22 @@
1<?php
2
3namespace Wallabag\CoreBundle\Helper;
4
5/**
6 * Hash URLs for privacy and performance.
7 */
8class UrlHasher
9{
10 /**
11 * Hash the given url using the given algorithm.
12 * Hashed url are faster to be retrieved in the database than the real url.
13 *
14 * @param string $algorithm
15 *
16 * @return string
17 */
18 public static function hashUrl(string $url, $algorithm = 'sha1')
19 {
20 return hash($algorithm, urldecode($url));
21 }
22}