]>
Commit | Line | Data |
---|---|---|
1 | <?php | |
2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | |
4 | ||
5 | use Html2Text\Html2Text; | |
6 | use JMS\Serializer\SerializationContext; | |
7 | use JMS\Serializer\SerializerBuilder; | |
8 | use PHPePub\Core\EPub; | |
9 | use PHPePub\Core\Structure\OPF\DublinCore; | |
10 | use Symfony\Component\HttpFoundation\Response; | |
11 | use Symfony\Component\Translation\TranslatorInterface; | |
12 | use Wallabag\CoreBundle\Entity\Entry; | |
13 | ||
14 | /** | |
15 | * This class doesn't have unit test BUT it's fully covered by a functional test with ExportControllerTest. | |
16 | */ | |
17 | class EntriesExport | |
18 | { | |
19 | private $wallabagUrl; | |
20 | private $logoPath; | |
21 | private $translator; | |
22 | private $title = ''; | |
23 | private $entries = []; | |
24 | private $author = 'wallabag'; | |
25 | private $language = ''; | |
26 | ||
27 | /** | |
28 | * @param TranslatorInterface $translator Translator service | |
29 | * @param string $wallabagUrl Wallabag instance url | |
30 | * @param string $logoPath Path to the logo FROM THE BUNDLE SCOPE | |
31 | */ | |
32 | public function __construct(TranslatorInterface $translator, $wallabagUrl, $logoPath) | |
33 | { | |
34 | $this->translator = $translator; | |
35 | $this->wallabagUrl = $wallabagUrl; | |
36 | $this->logoPath = $logoPath; | |
37 | } | |
38 | ||
39 | /** | |
40 | * Define entries. | |
41 | * | |
42 | * @param array|Entry $entries An array of entries or one entry | |
43 | * | |
44 | * @return EntriesExport | |
45 | */ | |
46 | public function setEntries($entries) | |
47 | { | |
48 | if (!\is_array($entries)) { | |
49 | $this->language = $entries->getLanguage(); | |
50 | $entries = [$entries]; | |
51 | } | |
52 | ||
53 | $this->entries = $entries; | |
54 | ||
55 | return $this; | |
56 | } | |
57 | ||
58 | /** | |
59 | * Sets the category of which we want to get articles, or just one entry. | |
60 | * | |
61 | * @param string $method Method to get articles | |
62 | * | |
63 | * @return EntriesExport | |
64 | */ | |
65 | public function updateTitle($method) | |
66 | { | |
67 | $this->title = $method . ' articles'; | |
68 | ||
69 | if ('entry' === $method) { | |
70 | $this->title = $this->entries[0]->getTitle(); | |
71 | } | |
72 | ||
73 | return $this; | |
74 | } | |
75 | ||
76 | /** | |
77 | * Sets the author for one entry or category. | |
78 | * | |
79 | * The publishers are used, or the domain name if empty. | |
80 | * | |
81 | * @param string $method Method to get articles | |
82 | * | |
83 | * @return EntriesExport | |
84 | */ | |
85 | public function updateAuthor($method) | |
86 | { | |
87 | if ('entry' !== $method) { | |
88 | $this->author = 'Various authors'; | |
89 | ||
90 | return $this; | |
91 | } | |
92 | ||
93 | $this->author = $this->entries[0]->getDomainName(); | |
94 | ||
95 | $publishedBy = $this->entries[0]->getPublishedBy(); | |
96 | if (!empty($publishedBy)) { | |
97 | $this->author = implode(', ', $publishedBy); | |
98 | } | |
99 | ||
100 | return $this; | |
101 | } | |
102 | ||
103 | /** | |
104 | * Sets the output format. | |
105 | * | |
106 | * @param string $format | |
107 | * | |
108 | * @return Response | |
109 | */ | |
110 | public function exportAs($format) | |
111 | { | |
112 | $functionName = 'produce' . ucfirst($format); | |
113 | if (method_exists($this, $functionName)) { | |
114 | return $this->$functionName(); | |
115 | } | |
116 | ||
117 | throw new \InvalidArgumentException(sprintf('The format "%s" is not yet supported.', $format)); | |
118 | } | |
119 | ||
120 | public function exportJsonData() | |
121 | { | |
122 | return $this->prepareSerializingContent('json'); | |
123 | } | |
124 | ||
125 | /** | |
126 | * Use PHPePub to dump a .epub file. | |
127 | * | |
128 | * @return Response | |
129 | */ | |
130 | private function produceEpub() | |
131 | { | |
132 | /* | |
133 | * Start and End of the book | |
134 | */ | |
135 | $content_start = | |
136 | "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" | |
137 | . "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\">\n" | |
138 | . '<head>' | |
139 | . "<meta http-equiv=\"Default-Style\" content=\"text/html; charset=utf-8\" />\n" | |
140 | . "<title>wallabag articles book</title>\n" | |
141 | . "</head>\n" | |
142 | . "<body>\n"; | |
143 | ||
144 | $bookEnd = "</body>\n</html>\n"; | |
145 | ||
146 | $book = new EPub(EPub::BOOK_VERSION_EPUB3); | |
147 | ||
148 | /* | |
149 | * Book metadata | |
150 | */ | |
151 | ||
152 | $book->setTitle($this->title); | |
153 | // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc. | |
154 | $book->setLanguage($this->language); | |
155 | $book->setDescription('Some articles saved on my wallabag'); | |
156 | ||
157 | $book->setAuthor($this->author, $this->author); | |
158 | ||
159 | // I hope this is a non existant address :) | |
160 | $book->setPublisher('wallabag', 'wallabag'); | |
161 | // Strictly not needed as the book date defaults to time(). | |
162 | $book->setDate(time()); | |
163 | $book->setSourceURL($this->wallabagUrl); | |
164 | ||
165 | $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'PHP'); | |
166 | $book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'wallabag'); | |
167 | ||
168 | $entryIds = []; | |
169 | $entryCount = \count($this->entries); | |
170 | $i = 0; | |
171 | ||
172 | /* | |
173 | * Adding actual entries | |
174 | */ | |
175 | ||
176 | // set tags as subjects | |
177 | foreach ($this->entries as $entry) { | |
178 | ++$i; | |
179 | ||
180 | /* | |
181 | * Front page | |
182 | * Set if there's only one entry in the given set | |
183 | */ | |
184 | if (1 === $entryCount && null !== $entry->getPreviewPicture()) { | |
185 | $book->setCoverImage($entry->getPreviewPicture()); | |
186 | } | |
187 | ||
188 | foreach ($entry->getTags() as $tag) { | |
189 | $book->setSubject($tag->getLabel()); | |
190 | } | |
191 | $filename = sha1(sprintf('%s:%s', $entry->getUrl(), $entry->getTitle())); | |
192 | ||
193 | $publishedBy = $entry->getPublishedBy(); | |
194 | $authors = $this->translator->trans('export.unknown'); | |
195 | if (!empty($publishedBy)) { | |
196 | $authors = implode(',', $publishedBy); | |
197 | } | |
198 | ||
199 | $publishedAt = $entry->getPublishedAt(); | |
200 | $publishedDate = $this->translator->trans('export.unknown'); | |
201 | if (!empty($publishedAt)) { | |
202 | $publishedDate = $entry->getPublishedAt()->format('Y-m-d'); | |
203 | } | |
204 | ||
205 | $titlepage = $content_start . | |
206 | '<h1>' . $entry->getTitle() . '</h1>' . | |
207 | '<dl>' . | |
208 | '<dt>' . $this->translator->trans('entry.view.published_by') . '</dt><dd>' . $authors . '</dd>' . | |
209 | '<dt>' . $this->translator->trans('entry.metadata.published_on') . '</dt><dd>' . $publishedDate . '</dd>' . | |
210 | '<dt>' . $this->translator->trans('entry.metadata.reading_time') . '</dt><dd>' . $this->translator->trans('entry.metadata.reading_time_minutes_short', ['%readingTime%' => $entry->getReadingTime()]) . '</dd>' . | |
211 | '<dt>' . $this->translator->trans('entry.metadata.added_on') . '</dt><dd>' . $entry->getCreatedAt()->format('Y-m-d') . '</dd>' . | |
212 | '<dt>' . $this->translator->trans('entry.metadata.address') . '</dt><dd><a href="' . $entry->getUrl() . '">' . $entry->getUrl() . '</a></dd>' . | |
213 | '</dl>' . | |
214 | $bookEnd; | |
215 | $book->addChapter("Entry {$i} of {$entryCount}", "{$filename}_cover.html", $titlepage, true, EPub::EXTERNAL_REF_ADD); | |
216 | $chapter = $content_start . $entry->getContent() . $bookEnd; | |
217 | ||
218 | $entryIds[] = $entry->getId(); | |
219 | $book->addChapter($entry->getTitle(), "{$filename}.html", $chapter, true, EPub::EXTERNAL_REF_ADD); | |
220 | } | |
221 | ||
222 | $book->addChapter('Notices', 'Cover2.html', $content_start . $this->getExportInformation('PHPePub') . $bookEnd); | |
223 | ||
224 | // Could also be the ISBN number, prefered for published books, or a UUID. | |
225 | $hash = sha1(sprintf('%s:%s', $this->wallabagUrl, implode(',', $entryIds))); | |
226 | $book->setIdentifier(sprintf('urn:wallabag:%s', $hash), EPub::IDENTIFIER_URI); | |
227 | ||
228 | return Response::create( | |
229 | $book->getBook(), | |
230 | 200, | |
231 | [ | |
232 | 'Content-Description' => 'File Transfer', | |
233 | 'Content-type' => 'application/epub+zip', | |
234 | 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.epub"', | |
235 | 'Content-Transfer-Encoding' => 'binary', | |
236 | ] | |
237 | ); | |
238 | } | |
239 | ||
240 | /** | |
241 | * Use PHPMobi to dump a .mobi file. | |
242 | * | |
243 | * @return Response | |
244 | */ | |
245 | private function produceMobi() | |
246 | { | |
247 | $mobi = new \MOBI(); | |
248 | $content = new \MOBIFile(); | |
249 | ||
250 | /* | |
251 | * Book metadata | |
252 | */ | |
253 | $content->set('title', $this->title); | |
254 | $content->set('author', $this->author); | |
255 | $content->set('subject', $this->title); | |
256 | ||
257 | /* | |
258 | * Front page | |
259 | */ | |
260 | $content->appendParagraph($this->getExportInformation('PHPMobi')); | |
261 | if (file_exists($this->logoPath)) { | |
262 | $content->appendImage(imagecreatefrompng($this->logoPath)); | |
263 | } | |
264 | $content->appendPageBreak(); | |
265 | ||
266 | /* | |
267 | * Adding actual entries | |
268 | */ | |
269 | foreach ($this->entries as $entry) { | |
270 | $content->appendChapterTitle($entry->getTitle()); | |
271 | $content->appendParagraph($entry->getContent()); | |
272 | $content->appendPageBreak(); | |
273 | } | |
274 | $mobi->setContentProvider($content); | |
275 | ||
276 | return Response::create( | |
277 | $mobi->toString(), | |
278 | 200, | |
279 | [ | |
280 | 'Accept-Ranges' => 'bytes', | |
281 | 'Content-Description' => 'File Transfer', | |
282 | 'Content-type' => 'application/x-mobipocket-ebook', | |
283 | 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.mobi"', | |
284 | 'Content-Transfer-Encoding' => 'binary', | |
285 | ] | |
286 | ); | |
287 | } | |
288 | ||
289 | /** | |
290 | * Use TCPDF to dump a .pdf file. | |
291 | * | |
292 | * @return Response | |
293 | */ | |
294 | private function producePdf() | |
295 | { | |
296 | $pdf = new \TCPDF(PDF_PAGE_ORIENTATION, PDF_UNIT, PDF_PAGE_FORMAT, true, 'UTF-8', false); | |
297 | ||
298 | /* | |
299 | * Book metadata | |
300 | */ | |
301 | $pdf->SetCreator(PDF_CREATOR); | |
302 | $pdf->SetAuthor($this->author); | |
303 | $pdf->SetTitle($this->title); | |
304 | $pdf->SetSubject('Articles via wallabag'); | |
305 | $pdf->SetKeywords('wallabag'); | |
306 | ||
307 | /* | |
308 | * Adding actual entries | |
309 | */ | |
310 | foreach ($this->entries as $entry) { | |
311 | foreach ($entry->getTags() as $tag) { | |
312 | $pdf->SetKeywords($tag->getLabel()); | |
313 | } | |
314 | ||
315 | $publishedBy = $entry->getPublishedBy(); | |
316 | $authors = $this->translator->trans('export.unknown'); | |
317 | if (!empty($publishedBy)) { | |
318 | $authors = implode(',', $publishedBy); | |
319 | } | |
320 | ||
321 | $pdf->addPage(); | |
322 | $html = '<h1>' . $entry->getTitle() . '</h1>' . | |
323 | '<dl>' . | |
324 | '<dt>' . $this->translator->trans('entry.view.published_by') . '</dt><dd>' . $authors . '</dd>' . | |
325 | '<dt>' . $this->translator->trans('entry.metadata.reading_time') . '</dt><dd>' . $this->translator->trans('entry.metadata.reading_time_minutes_short', ['%readingTime%' => $entry->getReadingTime()]) . '</dd>' . | |
326 | '<dt>' . $this->translator->trans('entry.metadata.added_on') . '</dt><dd>' . $entry->getCreatedAt()->format('Y-m-d') . '</dd>' . | |
327 | '<dt>' . $this->translator->trans('entry.metadata.address') . '</dt><dd><a href="' . $entry->getUrl() . '">' . $entry->getUrl() . '</a></dd>' . | |
328 | '</dl>'; | |
329 | $pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true); | |
330 | ||
331 | $pdf->AddPage(); | |
332 | $html = '<h1>' . $entry->getTitle() . '</h1>'; | |
333 | $html .= $entry->getContent(); | |
334 | ||
335 | $pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true); | |
336 | } | |
337 | ||
338 | /* | |
339 | * Last page | |
340 | */ | |
341 | $pdf->AddPage(); | |
342 | $html = $this->getExportInformation('tcpdf'); | |
343 | ||
344 | $pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true); | |
345 | ||
346 | // set image scale factor | |
347 | $pdf->setImageScale(PDF_IMAGE_SCALE_RATIO); | |
348 | ||
349 | return Response::create( | |
350 | $pdf->Output('', 'S'), | |
351 | 200, | |
352 | [ | |
353 | 'Content-Description' => 'File Transfer', | |
354 | 'Content-type' => 'application/pdf', | |
355 | 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.pdf"', | |
356 | 'Content-Transfer-Encoding' => 'binary', | |
357 | ] | |
358 | ); | |
359 | } | |
360 | ||
361 | /** | |
362 | * Inspired from CsvFileDumper. | |
363 | * | |
364 | * @return Response | |
365 | */ | |
366 | private function produceCsv() | |
367 | { | |
368 | $delimiter = ';'; | |
369 | $enclosure = '"'; | |
370 | $handle = fopen('php://memory', 'b+r'); | |
371 | ||
372 | fputcsv($handle, ['Title', 'URL', 'Content', 'Tags', 'MIME Type', 'Language', 'Creation date'], $delimiter, $enclosure); | |
373 | ||
374 | foreach ($this->entries as $entry) { | |
375 | fputcsv( | |
376 | $handle, | |
377 | [ | |
378 | $entry->getTitle(), | |
379 | $entry->getURL(), | |
380 | // remove new line to avoid crazy results | |
381 | str_replace(["\r\n", "\r", "\n"], '', $entry->getContent()), | |
382 | implode(', ', $entry->getTags()->toArray()), | |
383 | $entry->getMimetype(), | |
384 | $entry->getLanguage(), | |
385 | $entry->getCreatedAt()->format('d/m/Y h:i:s'), | |
386 | ], | |
387 | $delimiter, | |
388 | $enclosure | |
389 | ); | |
390 | } | |
391 | ||
392 | rewind($handle); | |
393 | $output = stream_get_contents($handle); | |
394 | fclose($handle); | |
395 | ||
396 | return Response::create( | |
397 | $output, | |
398 | 200, | |
399 | [ | |
400 | 'Content-type' => 'application/csv', | |
401 | 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.csv"', | |
402 | 'Content-Transfer-Encoding' => 'UTF-8', | |
403 | ] | |
404 | ); | |
405 | } | |
406 | ||
407 | /** | |
408 | * Dump a JSON file. | |
409 | * | |
410 | * @return Response | |
411 | */ | |
412 | private function produceJson() | |
413 | { | |
414 | return Response::create( | |
415 | $this->prepareSerializingContent('json'), | |
416 | 200, | |
417 | [ | |
418 | 'Content-type' => 'application/json', | |
419 | 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.json"', | |
420 | 'Content-Transfer-Encoding' => 'UTF-8', | |
421 | ] | |
422 | ); | |
423 | } | |
424 | ||
425 | /** | |
426 | * Dump a XML file. | |
427 | * | |
428 | * @return Response | |
429 | */ | |
430 | private function produceXml() | |
431 | { | |
432 | return Response::create( | |
433 | $this->prepareSerializingContent('xml'), | |
434 | 200, | |
435 | [ | |
436 | 'Content-type' => 'application/xml', | |
437 | 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.xml"', | |
438 | 'Content-Transfer-Encoding' => 'UTF-8', | |
439 | ] | |
440 | ); | |
441 | } | |
442 | ||
443 | /** | |
444 | * Dump a TXT file. | |
445 | * | |
446 | * @return Response | |
447 | */ | |
448 | private function produceTxt() | |
449 | { | |
450 | $content = ''; | |
451 | $bar = str_repeat('=', 100); | |
452 | foreach ($this->entries as $entry) { | |
453 | $content .= "\n\n" . $bar . "\n\n" . $entry->getTitle() . "\n\n" . $bar . "\n\n"; | |
454 | $html = new Html2Text($entry->getContent(), ['do_links' => 'none', 'width' => 100]); | |
455 | $content .= $html->getText(); | |
456 | } | |
457 | ||
458 | return Response::create( | |
459 | $content, | |
460 | 200, | |
461 | [ | |
462 | 'Content-type' => 'text/plain', | |
463 | 'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.txt"', | |
464 | 'Content-Transfer-Encoding' => 'UTF-8', | |
465 | ] | |
466 | ); | |
467 | } | |
468 | ||
469 | /** | |
470 | * Return a Serializer object for producing processes that need it (JSON & XML). | |
471 | * | |
472 | * @param string $format | |
473 | * | |
474 | * @return string | |
475 | */ | |
476 | private function prepareSerializingContent($format) | |
477 | { | |
478 | $serializer = SerializerBuilder::create()->build(); | |
479 | ||
480 | return $serializer->serialize( | |
481 | $this->entries, | |
482 | $format, | |
483 | SerializationContext::create()->setGroups(['entries_for_user']) | |
484 | ); | |
485 | } | |
486 | ||
487 | /** | |
488 | * Return a kind of footer / information for the epub. | |
489 | * | |
490 | * @param string $type Generator of the export, can be: tdpdf, PHPePub, PHPMobi | |
491 | * | |
492 | * @return string | |
493 | */ | |
494 | private function getExportInformation($type) | |
495 | { | |
496 | $info = $this->translator->trans('export.footer_template', [ | |
497 | '%method%' => $type, | |
498 | ]); | |
499 | ||
500 | if ('tcpdf' === $type) { | |
501 | return str_replace('%IMAGE%', '<img src="' . $this->logoPath . '" />', $info); | |
502 | } | |
503 | ||
504 | return str_replace('%IMAGE%', '', $info); | |
505 | } | |
506 | ||
507 | /** | |
508 | * Return a sanitized version of the title by applying translit iconv | |
509 | * and removing non alphanumeric characters, - and space. | |
510 | * | |
511 | * @return string Sanitized filename | |
512 | */ | |
513 | private function getSanitizedFilename() | |
514 | { | |
515 | return preg_replace('/[^A-Za-z0-9\- \']/', '', iconv('utf-8', 'us-ascii//TRANSLIT', $this->title)); | |
516 | } | |
517 | } |