aboutsummaryrefslogtreecommitdiffhomepage
path: root/application
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2020-06-06 14:01:03 +0200
committerArthurHoaro <arthur@hoa.ro>2020-07-23 21:19:21 +0200
commitc22fa57a5505fe95fd01860e3d3dfbb089f869cd (patch)
treea72b57e49b7b2b995ace278bad00fc47d5b6d61d /application
parent8eac2e54882d8adae8cbb45386dca1b465242632 (diff)
downloadShaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.tar.gz
Shaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.tar.zst
Shaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.zip
Handle shaare creation/edition/deletion through Slim controllers
Diffstat (limited to 'application')
-rw-r--r--application/Utils.php4
-rw-r--r--application/bookmark/LinkUtils.php106
-rw-r--r--application/container/ContainerBuilder.php10
-rw-r--r--application/container/ShaarliContainer.php4
-rw-r--r--application/front/controller/admin/PostBookmarkController.php258
-rw-r--r--application/front/controller/admin/ToolsController.php2
-rw-r--r--application/front/controller/visitor/DailyController.php2
-rw-r--r--application/front/controller/visitor/FeedController.php2
-rw-r--r--application/front/controller/visitor/ShaarliVisitorController.php14
-rw-r--r--application/http/HttpAccess.php39
-rw-r--r--application/http/HttpUtils.php106
11 files changed, 432 insertions, 115 deletions
diff --git a/application/Utils.php b/application/Utils.php
index 72c90049..9c9eaaa2 100644
--- a/application/Utils.php
+++ b/application/Utils.php
@@ -91,6 +91,10 @@ function endsWith($haystack, $needle, $case = true)
91 */ 91 */
92function escape($input) 92function escape($input)
93{ 93{
94 if (null === $input) {
95 return null;
96 }
97
94 if (is_bool($input)) { 98 if (is_bool($input)) {
95 return $input; 99 return $input;
96 } 100 }
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
index 98d9038a..68914fca 100644
--- a/application/bookmark/LinkUtils.php
+++ b/application/bookmark/LinkUtils.php
@@ -3,112 +3,6 @@
3use Shaarli\Bookmark\Bookmark; 3use Shaarli\Bookmark\Bookmark;
4 4
5/** 5/**
6 * Get cURL callback function for CURLOPT_WRITEFUNCTION
7 *
8 * @param string $charset to extract from the downloaded page (reference)
9 * @param string $title to extract from the downloaded page (reference)
10 * @param string $description to extract from the downloaded page (reference)
11 * @param string $keywords to extract from the downloaded page (reference)
12 * @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
13 * @param string $curlGetInfo Optionally overrides curl_getinfo function
14 *
15 * @return Closure
16 */
17function get_curl_download_callback(
18 &$charset,
19 &$title,
20 &$description,
21 &$keywords,
22 $retrieveDescription,
23 $curlGetInfo = 'curl_getinfo'
24) {
25 $isRedirected = false;
26 $currentChunk = 0;
27 $foundChunk = null;
28
29 /**
30 * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
31 *
32 * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
33 * Then we extract the title and the charset and stop the download when it's done.
34 *
35 * @param resource $ch cURL resource
36 * @param string $data chunk of data being downloaded
37 *
38 * @return int|bool length of $data or false if we need to stop the download
39 */
40 return function (&$ch, $data) use (
41 $retrieveDescription,
42 $curlGetInfo,
43 &$charset,
44 &$title,
45 &$description,
46 &$keywords,
47 &$isRedirected,
48 &$currentChunk,
49 &$foundChunk
50 ) {
51 $currentChunk++;
52 $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
53 if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
54 $isRedirected = true;
55 return strlen($data);
56 }
57 if (!empty($responseCode) && $responseCode !== 200) {
58 return false;
59 }
60 // After a redirection, the content type will keep the previous request value
61 // until it finds the next content-type header.
62 if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
63 $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
64 }
65 if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
66 return false;
67 }
68 if (!empty($contentType) && empty($charset)) {
69 $charset = header_extract_charset($contentType);
70 }
71 if (empty($charset)) {
72 $charset = html_extract_charset($data);
73 }
74 if (empty($title)) {
75 $title = html_extract_title($data);
76 $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
77 }
78 if ($retrieveDescription && empty($description)) {
79 $description = html_extract_tag('description', $data);
80 $foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
81 }
82 if ($retrieveDescription && empty($keywords)) {
83 $keywords = html_extract_tag('keywords', $data);
84 if (! empty($keywords)) {
85 $foundChunk = $currentChunk;
86 // Keywords use the format tag1, tag2 multiple words, tag
87 // So we format them to match Shaarli's separator and glue multiple words with '-'
88 $keywords = implode(' ', array_map(function($keyword) {
89 return implode('-', preg_split('/\s+/', trim($keyword)));
90 }, explode(',', $keywords)));
91 }
92 }
93
94 // We got everything we want, stop the download.
95 // If we already found either the title, description or keywords,
96 // it's highly unlikely that we'll found the other metas further than
97 // in the same chunk of data or the next one. So we also stop the download after that.
98 if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
99 && (! $retrieveDescription
100 || $foundChunk < $currentChunk
101 || (!empty($title) && !empty($description) && !empty($keywords))
102 )
103 ) {
104 return false;
105 }
106
107 return strlen($data);
108 };
109}
110
111/**
112 * Extract title from an HTML document. 6 * Extract title from an HTML document.
113 * 7 *
114 * @param string $html HTML content where to look for a title. 8 * @param string $html HTML content where to look for a title.
diff --git a/application/container/ContainerBuilder.php b/application/container/ContainerBuilder.php
index 84406979..85126246 100644
--- a/application/container/ContainerBuilder.php
+++ b/application/container/ContainerBuilder.php
@@ -10,11 +10,13 @@ use Shaarli\Config\ConfigManager;
10use Shaarli\Feed\FeedBuilder; 10use Shaarli\Feed\FeedBuilder;
11use Shaarli\Formatter\FormatterFactory; 11use Shaarli\Formatter\FormatterFactory;
12use Shaarli\History; 12use Shaarli\History;
13use Shaarli\Http\HttpAccess;
13use Shaarli\Plugin\PluginManager; 14use Shaarli\Plugin\PluginManager;
14use Shaarli\Render\PageBuilder; 15use Shaarli\Render\PageBuilder;
15use Shaarli\Render\PageCacheManager; 16use Shaarli\Render\PageCacheManager;
16use Shaarli\Security\LoginManager; 17use Shaarli\Security\LoginManager;
17use Shaarli\Security\SessionManager; 18use Shaarli\Security\SessionManager;
19use Shaarli\Thumbnailer;
18 20
19/** 21/**
20 * Class ContainerBuilder 22 * Class ContainerBuilder
@@ -110,6 +112,14 @@ class ContainerBuilder
110 ); 112 );
111 }; 113 };
112 114
115 $container['thumbnailer'] = function (ShaarliContainer $container): Thumbnailer {
116 return new Thumbnailer($container->conf);
117 };
118
119 $container['httpAccess'] = function (): HttpAccess {
120 return new HttpAccess();
121 };
122
113 return $container; 123 return $container;
114 } 124 }
115} 125}
diff --git a/application/container/ShaarliContainer.php b/application/container/ShaarliContainer.php
index deb07197..fec398d0 100644
--- a/application/container/ShaarliContainer.php
+++ b/application/container/ShaarliContainer.php
@@ -9,11 +9,13 @@ use Shaarli\Config\ConfigManager;
9use Shaarli\Feed\FeedBuilder; 9use Shaarli\Feed\FeedBuilder;
10use Shaarli\Formatter\FormatterFactory; 10use Shaarli\Formatter\FormatterFactory;
11use Shaarli\History; 11use Shaarli\History;
12use Shaarli\Http\HttpAccess;
12use Shaarli\Plugin\PluginManager; 13use Shaarli\Plugin\PluginManager;
13use Shaarli\Render\PageBuilder; 14use Shaarli\Render\PageBuilder;
14use Shaarli\Render\PageCacheManager; 15use Shaarli\Render\PageCacheManager;
15use Shaarli\Security\LoginManager; 16use Shaarli\Security\LoginManager;
16use Shaarli\Security\SessionManager; 17use Shaarli\Security\SessionManager;
18use Shaarli\Thumbnailer;
17use Slim\Container; 19use Slim\Container;
18 20
19/** 21/**
@@ -31,6 +33,8 @@ use Slim\Container;
31 * @property FormatterFactory $formatterFactory 33 * @property FormatterFactory $formatterFactory
32 * @property PageCacheManager $pageCacheManager 34 * @property PageCacheManager $pageCacheManager
33 * @property FeedBuilder $feedBuilder 35 * @property FeedBuilder $feedBuilder
36 * @property Thumbnailer $thumbnailer
37 * @property HttpAccess $httpAccess
34 */ 38 */
35class ShaarliContainer extends Container 39class ShaarliContainer extends Container
36{ 40{
diff --git a/application/front/controller/admin/PostBookmarkController.php b/application/front/controller/admin/PostBookmarkController.php
new file mode 100644
index 00000000..dbe570e2
--- /dev/null
+++ b/application/front/controller/admin/PostBookmarkController.php
@@ -0,0 +1,258 @@
1<?php
2
3declare(strict_types=1);
4
5namespace Shaarli\Front\Controller\Admin;
6
7use Shaarli\Bookmark\Bookmark;
8use Shaarli\Bookmark\Exception\BookmarkNotFoundException;
9use Shaarli\Formatter\BookmarkMarkdownFormatter;
10use Shaarli\Thumbnailer;
11use Slim\Http\Request;
12use Slim\Http\Response;
13
14/**
15 * Class PostBookmarkController
16 *
17 * Slim controller used to handle Shaarli create or edit bookmarks.
18 */
19class PostBookmarkController extends ShaarliAdminController
20{
21 /**
22 * GET /add-shaare - Displays the form used to create a new bookmark from an URL
23 */
24 public function addShaare(Request $request, Response $response): Response
25 {
26 $this->assignView(
27 'pagetitle',
28 t('Shaare a new link') .' - '. $this->container->conf->get('general.title', 'Shaarli')
29 );
30
31 return $response->write($this->render('addlink'));
32 }
33
34 /**
35 * GET /shaare - Displays the bookmark form for creation.
36 * Note that if the URL is found in existing bookmarks, then it will be in edit mode.
37 */
38 public function displayCreateForm(Request $request, Response $response): Response
39 {
40 $url = cleanup_url($request->getParam('post'));
41
42 $linkIsNew = false;
43 // Check if URL is not already in database (in this case, we will edit the existing link)
44 $bookmark = $this->container->bookmarkService->findByUrl($url);
45 if (null === $bookmark) {
46 $linkIsNew = true;
47 // Get shaare data if it was provided in URL (e.g.: by the bookmarklet).
48 $title = $request->getParam('title');
49 $description = $request->getParam('description');
50 $tags = $request->getParam('tags');
51 $private = filter_var($request->getParam('private'), FILTER_VALIDATE_BOOLEAN);
52
53 // If this is an HTTP(S) link, we try go get the page to extract
54 // the title (otherwise we will to straight to the edit form.)
55 if (empty($title) && strpos(get_url_scheme($url) ?: '', 'http') !== false) {
56 $retrieveDescription = $this->container->conf->get('general.retrieve_description');
57 // Short timeout to keep the application responsive
58 // The callback will fill $charset and $title with data from the downloaded page.
59 $this->container->httpAccess->getHttpResponse(
60 $url,
61 $this->container->conf->get('general.download_timeout', 30),
62 $this->container->conf->get('general.download_max_size', 4194304),
63 $this->container->httpAccess->getCurlDownloadCallback(
64 $charset,
65 $title,
66 $description,
67 $tags,
68 $retrieveDescription
69 )
70 );
71 if (! empty($title) && strtolower($charset) !== 'utf-8') {
72 $title = mb_convert_encoding($title, 'utf-8', $charset);
73 }
74 }
75
76 if (empty($url) && empty($title)) {
77 $title = $this->container->conf->get('general.default_note_title', t('Note: '));
78 }
79
80 $link = escape([
81 'title' => $title,
82 'url' => $url ?? '',
83 'description' => $description ?? '',
84 'tags' => $tags ?? '',
85 'private' => $private,
86 ]);
87 } else {
88 $formatter = $this->container->formatterFactory->getFormatter('raw');
89 $link = $formatter->format($bookmark);
90 }
91
92 return $this->displayForm($link, $linkIsNew, $request, $response);
93 }
94
95 /**
96 * GET /shaare-{id} - Displays the bookmark form in edition mode.
97 */
98 public function displayEditForm(Request $request, Response $response, array $args): Response
99 {
100 $id = $args['id'];
101 try {
102 if (false === ctype_digit($id)) {
103 throw new BookmarkNotFoundException();
104 }
105 $bookmark = $this->container->bookmarkService->get($id); // Read database
106 } catch (BookmarkNotFoundException $e) {
107 $this->saveErrorMessage(t('Bookmark not found'));
108
109 return $response->withRedirect('./');
110 }
111
112 $formatter = $this->container->formatterFactory->getFormatter('raw');
113 $link = $formatter->format($bookmark);
114
115 return $this->displayForm($link, false, $request, $response);
116 }
117
118 /**
119 * POST /shaare
120 */
121 public function save(Request $request, Response $response): Response
122 {
123 $this->checkToken($request);
124
125 // lf_id should only be present if the link exists.
126 $id = $request->getParam('lf_id') ? intval(escape($request->getParam('lf_id'))) : null;
127 if (null !== $id && true === $this->container->bookmarkService->exists($id)) {
128 // Edit
129 $bookmark = $this->container->bookmarkService->get($id);
130 } else {
131 // New link
132 $bookmark = new Bookmark();
133 }
134
135 $bookmark->setTitle($request->getParam('lf_title'));
136 $bookmark->setDescription($request->getParam('lf_description'));
137 $bookmark->setUrl($request->getParam('lf_url'), $this->container->conf->get('security.allowed_protocols', []));
138 $bookmark->setPrivate(filter_var($request->getParam('lf_private'), FILTER_VALIDATE_BOOLEAN));
139 $bookmark->setTagsString($request->getParam('lf_tags'));
140
141 if ($this->container->conf->get('thumbnails.mode', Thumbnailer::MODE_NONE) !== Thumbnailer::MODE_NONE
142 && false === $bookmark->isNote()
143 ) {
144 $bookmark->setThumbnail($this->container->thumbnailer->get($bookmark->getUrl()));
145 }
146 $this->container->bookmarkService->addOrSet($bookmark, false);
147
148 // To preserve backward compatibility with 3rd parties, plugins still use arrays
149 $formatter = $this->container->formatterFactory->getFormatter('raw');
150 $data = $formatter->format($bookmark);
151 $data = $this->executeHooks('save_link', $data);
152
153 $bookmark->fromArray($data);
154 $this->container->bookmarkService->set($bookmark);
155
156 // If we are called from the bookmarklet, we must close the popup:
157 if ($request->getParam('source') === 'bookmarklet') {
158 return $response->write('<script>self.close();</script>');
159 }
160
161 if (!empty($request->getParam('returnurl'))) {
162 $this->container->environment['HTTP_REFERER'] = escape($request->getParam('returnurl'));
163 }
164
165 return $this->redirectFromReferer(
166 $request,
167 $response,
168 ['add-shaare', 'shaare'], ['addlink', 'post', 'edit_link'],
169 $bookmark->getShortUrl()
170 );
171 }
172
173 public function deleteBookmark(Request $request, Response $response): Response
174 {
175 $this->checkToken($request);
176
177 $ids = escape(trim($request->getParam('lf_linkdate')));
178 if (strpos($ids, ' ') !== false) {
179 // multiple, space-separated ids provided
180 $ids = array_values(array_filter(preg_split('/\s+/', $ids), 'strlen'));
181 } else {
182 $ids = [$ids];
183 }
184
185 // assert at least one id is given
186 if (0 === count($ids)) {
187 $this->saveErrorMessage(t('Invalid bookmark ID provided.'));
188
189 return $this->redirectFromReferer($request, $response, [], ['delete-shaare']);
190 }
191
192 $formatter = $this->container->formatterFactory->getFormatter('raw');
193 foreach ($ids as $id) {
194 $id = (int) $id;
195 // TODO: check if it exists
196 $bookmark = $this->container->bookmarkService->get($id);
197 $data = $formatter->format($bookmark);
198 $this->container->pluginManager->executeHooks('delete_link', $data);
199 $this->container->bookmarkService->remove($bookmark, false);
200 }
201
202 $this->container->bookmarkService->save();
203
204 // If we are called from the bookmarklet, we must close the popup:
205 if ($request->getParam('source') === 'bookmarklet') {
206 return $response->write('<script>self.close();</script>');
207 }
208
209 // Don't redirect to where we were previously because the datastore has changed.
210 return $response->withRedirect('./');
211 }
212
213 protected function displayForm(array $link, bool $isNew, Request $request, Response $response): Response
214 {
215 $tags = $this->container->bookmarkService->bookmarksCountPerTag();
216 if ($this->container->conf->get('formatter') === 'markdown') {
217 $tags[BookmarkMarkdownFormatter::NO_MD_TAG] = 1;
218 }
219
220 $data = [
221 'link' => $link,
222 'link_is_new' => $isNew,
223 'http_referer' => escape($this->container->environment['HTTP_REFERER'] ?? ''),
224 'source' => $request->getParam('source') ?? '',
225 'tags' => $tags,
226 'default_private_links' => $this->container->conf->get('privacy.default_private_links', false),
227 ];
228
229 $data = $this->executeHooks('render_editlink', $data);
230
231 foreach ($data as $key => $value) {
232 $this->assignView($key, $value);
233 }
234
235 $editLabel = false === $isNew ? t('Edit') .' ' : '';
236 $this->assignView(
237 'pagetitle',
238 $editLabel . t('Shaare') .' - '. $this->container->conf->get('general.title', 'Shaarli')
239 );
240
241 return $response->write($this->render('editlink'));
242 }
243
244 /**
245 * @param mixed[] $data Variables passed to the template engine
246 *
247 * @return mixed[] Template data after active plugins render_picwall hook execution.
248 */
249 protected function executeHooks(string $hook, array $data): array
250 {
251 $this->container->pluginManager->executeHooks(
252 $hook,
253 $data
254 );
255
256 return $data;
257 }
258}
diff --git a/application/front/controller/admin/ToolsController.php b/application/front/controller/admin/ToolsController.php
index 66db5ad9..d087f2cd 100644
--- a/application/front/controller/admin/ToolsController.php
+++ b/application/front/controller/admin/ToolsController.php
@@ -21,7 +21,7 @@ class ToolsController extends ShaarliAdminController
21 'sslenabled' => is_https($this->container->environment), 21 'sslenabled' => is_https($this->container->environment),
22 ]; 22 ];
23 23
24 $this->executeHooks($data); 24 $data = $this->executeHooks($data);
25 25
26 foreach ($data as $key => $value) { 26 foreach ($data as $key => $value) {
27 $this->assignView($key, $value); 27 $this->assignView($key, $value);
diff --git a/application/front/controller/visitor/DailyController.php b/application/front/controller/visitor/DailyController.php
index 47e2503a..e5c9ddac 100644
--- a/application/front/controller/visitor/DailyController.php
+++ b/application/front/controller/visitor/DailyController.php
@@ -71,7 +71,7 @@ class DailyController extends ShaarliVisitorController
71 ]; 71 ];
72 72
73 // Hooks are called before column construction so that plugins don't have to deal with columns. 73 // Hooks are called before column construction so that plugins don't have to deal with columns.
74 $this->executeHooks($data); 74 $data = $this->executeHooks($data);
75 75
76 $data['cols'] = $this->calculateColumns($data['linksToDisplay']); 76 $data['cols'] = $this->calculateColumns($data['linksToDisplay']);
77 77
diff --git a/application/front/controller/visitor/FeedController.php b/application/front/controller/visitor/FeedController.php
index 70664635..f76f55fd 100644
--- a/application/front/controller/visitor/FeedController.php
+++ b/application/front/controller/visitor/FeedController.php
@@ -46,7 +46,7 @@ class FeedController extends ShaarliVisitorController
46 46
47 $data = $this->container->feedBuilder->buildData($feedType, $request->getParams()); 47 $data = $this->container->feedBuilder->buildData($feedType, $request->getParams());
48 48
49 $this->executeHooks($data, $feedType); 49 $data = $this->executeHooks($data, $feedType);
50 $this->assignAllView($data); 50 $this->assignAllView($data);
51 51
52 $content = $this->render('feed.'. $feedType); 52 $content = $this->render('feed.'. $feedType);
diff --git a/application/front/controller/visitor/ShaarliVisitorController.php b/application/front/controller/visitor/ShaarliVisitorController.php
index f12915c1..98423d90 100644
--- a/application/front/controller/visitor/ShaarliVisitorController.php
+++ b/application/front/controller/visitor/ShaarliVisitorController.php
@@ -78,16 +78,16 @@ abstract class ShaarliVisitorController
78 ]; 78 ];
79 79
80 foreach ($common_hooks as $name) { 80 foreach ($common_hooks as $name) {
81 $plugin_data = []; 81 $pluginData = [];
82 $this->container->pluginManager->executeHooks( 82 $this->container->pluginManager->executeHooks(
83 'render_' . $name, 83 'render_' . $name,
84 $plugin_data, 84 $pluginData,
85 [ 85 [
86 'target' => $template, 86 'target' => $template,
87 'loggedin' => $this->container->loginManager->isLoggedIn() 87 'loggedin' => $this->container->loginManager->isLoggedIn()
88 ] 88 ]
89 ); 89 );
90 $this->assignView('plugins_' . $name, $plugin_data); 90 $this->assignView('plugins_' . $name, $pluginData);
91 } 91 }
92 } 92 }
93 93
@@ -102,9 +102,10 @@ abstract class ShaarliVisitorController
102 Request $request, 102 Request $request,
103 Response $response, 103 Response $response,
104 array $loopTerms = [], 104 array $loopTerms = [],
105 array $clearParams = [] 105 array $clearParams = [],
106 string $anchor = null
106 ): Response { 107 ): Response {
107 $defaultPath = $request->getUri()->getBasePath(); 108 $defaultPath = rtrim($request->getUri()->getBasePath(), '/') . '/';
108 $referer = $this->container->environment['HTTP_REFERER'] ?? null; 109 $referer = $this->container->environment['HTTP_REFERER'] ?? null;
109 110
110 if (null !== $referer) { 111 if (null !== $referer) {
@@ -133,7 +134,8 @@ abstract class ShaarliVisitorController
133 } 134 }
134 135
135 $queryString = count($params) > 0 ? '?'. http_build_query($params) : ''; 136 $queryString = count($params) > 0 ? '?'. http_build_query($params) : '';
137 $anchor = $anchor ? '#' . $anchor : '';
136 138
137 return $response->withRedirect($path . $queryString); 139 return $response->withRedirect($path . $queryString . $anchor);
138 } 140 }
139} 141}
diff --git a/application/http/HttpAccess.php b/application/http/HttpAccess.php
new file mode 100644
index 00000000..81d9e076
--- /dev/null
+++ b/application/http/HttpAccess.php
@@ -0,0 +1,39 @@
1<?php
2
3declare(strict_types=1);
4
5namespace Shaarli\Http;
6
7/**
8 * Class HttpAccess
9 *
10 * This is mostly an OOP wrapper for HTTP functions defined in `HttpUtils`.
11 * It is used as dependency injection in Shaarli's container.
12 *
13 * @package Shaarli\Http
14 */
15class HttpAccess
16{
17 public function getHttpResponse($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null)
18 {
19 return get_http_response($url, $timeout, $maxBytes, $curlWriteFunction);
20 }
21
22 public function getCurlDownloadCallback(
23 &$charset,
24 &$title,
25 &$description,
26 &$keywords,
27 $retrieveDescription,
28 $curlGetInfo = 'curl_getinfo'
29 ) {
30 return get_curl_download_callback(
31 $charset,
32 $title,
33 $description,
34 $keywords,
35 $retrieveDescription,
36 $curlGetInfo
37 );
38 }
39}
diff --git a/application/http/HttpUtils.php b/application/http/HttpUtils.php
index f00c4336..4fc4e3dc 100644
--- a/application/http/HttpUtils.php
+++ b/application/http/HttpUtils.php
@@ -484,3 +484,109 @@ function is_https($server)
484 484
485 return ! empty($server['HTTPS']); 485 return ! empty($server['HTTPS']);
486} 486}
487
488/**
489 * Get cURL callback function for CURLOPT_WRITEFUNCTION
490 *
491 * @param string $charset to extract from the downloaded page (reference)
492 * @param string $title to extract from the downloaded page (reference)
493 * @param string $description to extract from the downloaded page (reference)
494 * @param string $keywords to extract from the downloaded page (reference)
495 * @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
496 * @param string $curlGetInfo Optionally overrides curl_getinfo function
497 *
498 * @return Closure
499 */
500function get_curl_download_callback(
501 &$charset,
502 &$title,
503 &$description,
504 &$keywords,
505 $retrieveDescription,
506 $curlGetInfo = 'curl_getinfo'
507) {
508 $isRedirected = false;
509 $currentChunk = 0;
510 $foundChunk = null;
511
512 /**
513 * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
514 *
515 * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
516 * Then we extract the title and the charset and stop the download when it's done.
517 *
518 * @param resource $ch cURL resource
519 * @param string $data chunk of data being downloaded
520 *
521 * @return int|bool length of $data or false if we need to stop the download
522 */
523 return function (&$ch, $data) use (
524 $retrieveDescription,
525 $curlGetInfo,
526 &$charset,
527 &$title,
528 &$description,
529 &$keywords,
530 &$isRedirected,
531 &$currentChunk,
532 &$foundChunk
533 ) {
534 $currentChunk++;
535 $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
536 if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
537 $isRedirected = true;
538 return strlen($data);
539 }
540 if (!empty($responseCode) && $responseCode !== 200) {
541 return false;
542 }
543 // After a redirection, the content type will keep the previous request value
544 // until it finds the next content-type header.
545 if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
546 $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
547 }
548 if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
549 return false;
550 }
551 if (!empty($contentType) && empty($charset)) {
552 $charset = header_extract_charset($contentType);
553 }
554 if (empty($charset)) {
555 $charset = html_extract_charset($data);
556 }
557 if (empty($title)) {
558 $title = html_extract_title($data);
559 $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
560 }
561 if ($retrieveDescription && empty($description)) {
562 $description = html_extract_tag('description', $data);
563 $foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
564 }
565 if ($retrieveDescription && empty($keywords)) {
566 $keywords = html_extract_tag('keywords', $data);
567 if (! empty($keywords)) {
568 $foundChunk = $currentChunk;
569 // Keywords use the format tag1, tag2 multiple words, tag
570 // So we format them to match Shaarli's separator and glue multiple words with '-'
571 $keywords = implode(' ', array_map(function($keyword) {
572 return implode('-', preg_split('/\s+/', trim($keyword)));
573 }, explode(',', $keywords)));
574 }
575 }
576
577 // We got everything we want, stop the download.
578 // If we already found either the title, description or keywords,
579 // it's highly unlikely that we'll found the other metas further than
580 // in the same chunk of data or the next one. So we also stop the download after that.
581 if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
582 && (! $retrieveDescription
583 || $foundChunk < $currentChunk
584 || (!empty($title) && !empty($description) && !empty($keywords))
585 )
586 ) {
587 return false;
588 }
589
590 return strlen($data);
591 };
592}