aboutsummaryrefslogtreecommitdiffhomepage
path: root/application
diff options
context:
space:
mode:
Diffstat (limited to 'application')
-rw-r--r--application/config/ConfigManager.php3
-rw-r--r--application/container/ContainerBuilder.php5
-rw-r--r--application/container/ShaarliContainer.php2
-rw-r--r--application/front/controller/admin/ManageShaareController.php36
-rw-r--r--application/front/controller/admin/MetadataController.php29
-rw-r--r--application/http/HttpAccess.php22
-rw-r--r--application/http/HttpUtils.php123
-rw-r--r--application/http/MetadataRetriever.php69
8 files changed, 209 insertions, 80 deletions
diff --git a/application/config/ConfigManager.php b/application/config/ConfigManager.php
index 4c98be30..fb085023 100644
--- a/application/config/ConfigManager.php
+++ b/application/config/ConfigManager.php
@@ -366,7 +366,8 @@ class ConfigManager
366 $this->setEmpty('general.links_per_page', 20); 366 $this->setEmpty('general.links_per_page', 20);
367 $this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS); 367 $this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS);
368 $this->setEmpty('general.default_note_title', 'Note: '); 368 $this->setEmpty('general.default_note_title', 'Note: ');
369 $this->setEmpty('general.retrieve_description', false); 369 $this->setEmpty('general.retrieve_description', true);
370 $this->setEmpty('general.enable_async_metadata', true);
370 371
371 $this->setEmpty('updates.check_updates', false); 372 $this->setEmpty('updates.check_updates', false);
372 $this->setEmpty('updates.check_updates_branch', 'stable'); 373 $this->setEmpty('updates.check_updates_branch', 'stable');
diff --git a/application/container/ContainerBuilder.php b/application/container/ContainerBuilder.php
index c21d58dd..fd94a1c3 100644
--- a/application/container/ContainerBuilder.php
+++ b/application/container/ContainerBuilder.php
@@ -14,6 +14,7 @@ use Shaarli\Front\Controller\Visitor\ErrorController;
14use Shaarli\Front\Controller\Visitor\ErrorNotFoundController; 14use Shaarli\Front\Controller\Visitor\ErrorNotFoundController;
15use Shaarli\History; 15use Shaarli\History;
16use Shaarli\Http\HttpAccess; 16use Shaarli\Http\HttpAccess;
17use Shaarli\Http\MetadataRetriever;
17use Shaarli\Netscape\NetscapeBookmarkUtils; 18use Shaarli\Netscape\NetscapeBookmarkUtils;
18use Shaarli\Plugin\PluginManager; 19use Shaarli\Plugin\PluginManager;
19use Shaarli\Render\PageBuilder; 20use Shaarli\Render\PageBuilder;
@@ -90,6 +91,10 @@ class ContainerBuilder
90 ); 91 );
91 }; 92 };
92 93
94 $container['metadataRetriever'] = function (ShaarliContainer $container): MetadataRetriever {
95 return new MetadataRetriever($container->conf, $container->httpAccess);
96 };
97
93 $container['pageBuilder'] = function (ShaarliContainer $container): PageBuilder { 98 $container['pageBuilder'] = function (ShaarliContainer $container): PageBuilder {
94 return new PageBuilder( 99 return new PageBuilder(
95 $container->conf, 100 $container->conf,
diff --git a/application/container/ShaarliContainer.php b/application/container/ShaarliContainer.php
index 66e669aa..3a7c238f 100644
--- a/application/container/ShaarliContainer.php
+++ b/application/container/ShaarliContainer.php
@@ -10,6 +10,7 @@ use Shaarli\Feed\FeedBuilder;
10use Shaarli\Formatter\FormatterFactory; 10use Shaarli\Formatter\FormatterFactory;
11use Shaarli\History; 11use Shaarli\History;
12use Shaarli\Http\HttpAccess; 12use Shaarli\Http\HttpAccess;
13use Shaarli\Http\MetadataRetriever;
13use Shaarli\Netscape\NetscapeBookmarkUtils; 14use Shaarli\Netscape\NetscapeBookmarkUtils;
14use Shaarli\Plugin\PluginManager; 15use Shaarli\Plugin\PluginManager;
15use Shaarli\Render\PageBuilder; 16use Shaarli\Render\PageBuilder;
@@ -35,6 +36,7 @@ use Slim\Container;
35 * @property History $history 36 * @property History $history
36 * @property HttpAccess $httpAccess 37 * @property HttpAccess $httpAccess
37 * @property LoginManager $loginManager 38 * @property LoginManager $loginManager
39 * @property MetadataRetriever $metadataRetriever
38 * @property NetscapeBookmarkUtils $netscapeBookmarkUtils 40 * @property NetscapeBookmarkUtils $netscapeBookmarkUtils
39 * @property callable $notFoundHandler Overrides default Slim exception display 41 * @property callable $notFoundHandler Overrides default Slim exception display
40 * @property PageBuilder $pageBuilder 42 * @property PageBuilder $pageBuilder
diff --git a/application/front/controller/admin/ManageShaareController.php b/application/front/controller/admin/ManageShaareController.php
index bb083486..df2f1631 100644
--- a/application/front/controller/admin/ManageShaareController.php
+++ b/application/front/controller/admin/ManageShaareController.php
@@ -53,36 +53,22 @@ class ManageShaareController extends ShaarliAdminController
53 53
54 // If this is an HTTP(S) link, we try go get the page to extract 54 // If this is an HTTP(S) link, we try go get the page to extract
55 // the title (otherwise we will to straight to the edit form.) 55 // the title (otherwise we will to straight to the edit form.)
56 if (empty($title) && strpos(get_url_scheme($url) ?: '', 'http') !== false) { 56 if (true !== $this->container->conf->get('general.enable_async_metadata', true)
57 $retrieveDescription = $this->container->conf->get('general.retrieve_description'); 57 && empty($title)
58 // Short timeout to keep the application responsive 58 && strpos(get_url_scheme($url) ?: '', 'http') !== false
59 // The callback will fill $charset and $title with data from the downloaded page. 59 ) {
60 $this->container->httpAccess->getHttpResponse( 60 $metadata = $this->container->metadataRetriever->retrieve($url);
61 $url,
62 $this->container->conf->get('general.download_timeout', 30),
63 $this->container->conf->get('general.download_max_size', 4194304),
64 $this->container->httpAccess->getCurlDownloadCallback(
65 $charset,
66 $title,
67 $description,
68 $tags,
69 $retrieveDescription
70 )
71 );
72 if (! empty($title) && strtolower($charset) !== 'utf-8' && mb_check_encoding($charset)) {
73 $title = mb_convert_encoding($title, 'utf-8', $charset);
74 }
75 } 61 }
76 62
77 if (empty($url) && empty($title)) { 63 if (empty($url)) {
78 $title = $this->container->conf->get('general.default_note_title', t('Note: ')); 64 $metadata['title'] = $this->container->conf->get('general.default_note_title', t('Note: '));
79 } 65 }
80 66
81 $link = [ 67 $link = [
82 'title' => $title, 68 'title' => $title ?? $metadata['title'] ?? '',
83 'url' => $url ?? '', 69 'url' => $url ?? '',
84 'description' => $description ?? '', 70 'description' => $description ?? $metadata['description'] ?? '',
85 'tags' => $tags ?? '', 71 'tags' => $tags ?? $metadata['tags'] ?? '',
86 'private' => $private, 72 'private' => $private,
87 ]; 73 ];
88 } else { 74 } else {
@@ -352,6 +338,8 @@ class ManageShaareController extends ShaarliAdminController
352 'source' => $request->getParam('source') ?? '', 338 'source' => $request->getParam('source') ?? '',
353 'tags' => $tags, 339 'tags' => $tags,
354 'default_private_links' => $this->container->conf->get('privacy.default_private_links', false), 340 'default_private_links' => $this->container->conf->get('privacy.default_private_links', false),
341 'async_metadata' => $this->container->conf->get('general.enable_async_metadata', true),
342 'retrieve_description' => $this->container->conf->get('general.retrieve_description', false),
355 ]); 343 ]);
356 344
357 $this->executePageHooks('render_editlink', $data, TemplatePage::EDIT_LINK); 345 $this->executePageHooks('render_editlink', $data, TemplatePage::EDIT_LINK);
diff --git a/application/front/controller/admin/MetadataController.php b/application/front/controller/admin/MetadataController.php
new file mode 100644
index 00000000..ff845944
--- /dev/null
+++ b/application/front/controller/admin/MetadataController.php
@@ -0,0 +1,29 @@
1<?php
2
3declare(strict_types=1);
4
5namespace Shaarli\Front\Controller\Admin;
6
7use Slim\Http\Request;
8use Slim\Http\Response;
9
10/**
11 * Controller used to retrieve/update bookmark's metadata.
12 */
13class MetadataController extends ShaarliAdminController
14{
15 /**
16 * GET /admin/metadata/{url} - Attempt to retrieve the bookmark title from provided URL.
17 */
18 public function ajaxRetrieveTitle(Request $request, Response $response): Response
19 {
20 $url = $request->getParam('url');
21
22 // Only try to extract metadata from URL with HTTP(s) scheme
23 if (!empty($url) && strpos(get_url_scheme($url) ?: '', 'http') !== false) {
24 return $response->withJson($this->container->metadataRetriever->retrieve($url));
25 }
26
27 return $response->withJson([]);
28 }
29}
diff --git a/application/http/HttpAccess.php b/application/http/HttpAccess.php
index 81d9e076..646a5264 100644
--- a/application/http/HttpAccess.php
+++ b/application/http/HttpAccess.php
@@ -14,9 +14,14 @@ namespace Shaarli\Http;
14 */ 14 */
15class HttpAccess 15class HttpAccess
16{ 16{
17 public function getHttpResponse($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) 17 public function getHttpResponse(
18 { 18 $url,
19 return get_http_response($url, $timeout, $maxBytes, $curlWriteFunction); 19 $timeout = 30,
20 $maxBytes = 4194304,
21 $curlHeaderFunction = null,
22 $curlWriteFunction = null
23 ) {
24 return get_http_response($url, $timeout, $maxBytes, $curlHeaderFunction, $curlWriteFunction);
20 } 25 }
21 26
22 public function getCurlDownloadCallback( 27 public function getCurlDownloadCallback(
@@ -24,16 +29,19 @@ class HttpAccess
24 &$title, 29 &$title,
25 &$description, 30 &$description,
26 &$keywords, 31 &$keywords,
27 $retrieveDescription, 32 $retrieveDescription
28 $curlGetInfo = 'curl_getinfo'
29 ) { 33 ) {
30 return get_curl_download_callback( 34 return get_curl_download_callback(
31 $charset, 35 $charset,
32 $title, 36 $title,
33 $description, 37 $description,
34 $keywords, 38 $keywords,
35 $retrieveDescription, 39 $retrieveDescription
36 $curlGetInfo
37 ); 40 );
38 } 41 }
42
43 public function getCurlHeaderCallback(&$charset, $curlGetInfo = 'curl_getinfo')
44 {
45 return get_curl_header_callback($charset, $curlGetInfo);
46 }
39} 47}
diff --git a/application/http/HttpUtils.php b/application/http/HttpUtils.php
index 9f414073..28c12969 100644
--- a/application/http/HttpUtils.php
+++ b/application/http/HttpUtils.php
@@ -6,12 +6,14 @@ use Shaarli\Http\Url;
6 * GET an HTTP URL to retrieve its content 6 * GET an HTTP URL to retrieve its content
7 * Uses the cURL library or a fallback method 7 * Uses the cURL library or a fallback method
8 * 8 *
9 * @param string $url URL to get (http://...) 9 * @param string $url URL to get (http://...)
10 * @param int $timeout network timeout (in seconds) 10 * @param int $timeout network timeout (in seconds)
11 * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) 11 * @param int $maxBytes maximum downloaded bytes (default: 4 MiB)
12 * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION). 12 * @param callable|string $curlHeaderFunction Optional callback called during the download of headers
13 * Can be used to add download conditions on the 13 * (CURLOPT_HEADERFUNCTION)
14 * headers (response code, content type, etc.). 14 * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION).
15 * Can be used to add download conditions on the
16 * headers (response code, content type, etc.).
15 * 17 *
16 * @return array HTTP response headers, downloaded content 18 * @return array HTTP response headers, downloaded content
17 * 19 *
@@ -35,8 +37,13 @@ use Shaarli\Http\Url;
35 * @see http://stackoverflow.com/q/9183178 37 * @see http://stackoverflow.com/q/9183178
36 * @see http://stackoverflow.com/q/1462720 38 * @see http://stackoverflow.com/q/1462720
37 */ 39 */
38function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) 40function get_http_response(
39{ 41 $url,
42 $timeout = 30,
43 $maxBytes = 4194304,
44 $curlHeaderFunction = null,
45 $curlWriteFunction = null
46) {
40 $urlObj = new Url($url); 47 $urlObj = new Url($url);
41 $cleanUrl = $urlObj->idnToAscii(); 48 $cleanUrl = $urlObj->idnToAscii();
42 49
@@ -70,7 +77,8 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF
70 // General cURL settings 77 // General cURL settings
71 curl_setopt($ch, CURLOPT_AUTOREFERER, true); 78 curl_setopt($ch, CURLOPT_AUTOREFERER, true);
72 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); 79 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
73 curl_setopt($ch, CURLOPT_HEADER, true); 80 // Default header download if the $curlHeaderFunction is not defined
81 curl_setopt($ch, CURLOPT_HEADER, !is_callable($curlHeaderFunction));
74 curl_setopt( 82 curl_setopt(
75 $ch, 83 $ch,
76 CURLOPT_HTTPHEADER, 84 CURLOPT_HTTPHEADER,
@@ -81,25 +89,21 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF
81 curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); 89 curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
82 curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); 90 curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
83 91
84 if (is_callable($curlWriteFunction)) {
85 curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction);
86 }
87
88 // Max download size management 92 // Max download size management
89 curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16); 93 curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16);
90 curl_setopt($ch, CURLOPT_NOPROGRESS, false); 94 curl_setopt($ch, CURLOPT_NOPROGRESS, false);
95 if (is_callable($curlHeaderFunction)) {
96 curl_setopt($ch, CURLOPT_HEADERFUNCTION, $curlHeaderFunction);
97 }
98 if (is_callable($curlWriteFunction)) {
99 curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction);
100 }
91 curl_setopt( 101 curl_setopt(
92 $ch, 102 $ch,
93 CURLOPT_PROGRESSFUNCTION, 103 CURLOPT_PROGRESSFUNCTION,
94 function ($arg0, $arg1, $arg2, $arg3, $arg4 = 0) use ($maxBytes) { 104 function ($arg0, $arg1, $arg2, $arg3, $arg4) use ($maxBytes) {
95 if (version_compare(phpversion(), '5.5', '<')) { 105 $downloaded = $arg2;
96 // PHP version lower than 5.5 106
97 // Callback has 4 arguments
98 $downloaded = $arg1;
99 } else {
100 // Callback has 5 arguments
101 $downloaded = $arg2;
102 }
103 // Non-zero return stops downloading 107 // Non-zero return stops downloading
104 return ($downloaded > $maxBytes) ? 1 : 0; 108 return ($downloaded > $maxBytes) ? 1 : 0;
105 } 109 }
@@ -493,6 +497,46 @@ function is_https($server)
493 * Get cURL callback function for CURLOPT_WRITEFUNCTION 497 * Get cURL callback function for CURLOPT_WRITEFUNCTION
494 * 498 *
495 * @param string $charset to extract from the downloaded page (reference) 499 * @param string $charset to extract from the downloaded page (reference)
500 * @param string $curlGetInfo Optionally overrides curl_getinfo function
501 *
502 * @return Closure
503 */
504function get_curl_header_callback(
505 &$charset,
506 $curlGetInfo = 'curl_getinfo'
507) {
508 $isRedirected = false;
509
510 return function ($ch, $data) use ($curlGetInfo, &$charset, &$isRedirected) {
511 $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
512 $chunkLength = strlen($data);
513 if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
514 $isRedirected = true;
515 return $chunkLength;
516 }
517 if (!empty($responseCode) && $responseCode !== 200) {
518 return false;
519 }
520 // After a redirection, the content type will keep the previous request value
521 // until it finds the next content-type header.
522 if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
523 $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
524 }
525 if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
526 return false;
527 }
528 if (!empty($contentType) && empty($charset)) {
529 $charset = header_extract_charset($contentType);
530 }
531
532 return $chunkLength;
533 };
534}
535
536/**
537 * Get cURL callback function for CURLOPT_WRITEFUNCTION
538 *
539 * @param string $charset to extract from the downloaded page (reference)
496 * @param string $title to extract from the downloaded page (reference) 540 * @param string $title to extract from the downloaded page (reference)
497 * @param string $description to extract from the downloaded page (reference) 541 * @param string $description to extract from the downloaded page (reference)
498 * @param string $keywords to extract from the downloaded page (reference) 542 * @param string $keywords to extract from the downloaded page (reference)
@@ -506,10 +550,8 @@ function get_curl_download_callback(
506 &$title, 550 &$title,
507 &$description, 551 &$description,
508 &$keywords, 552 &$keywords,
509 $retrieveDescription, 553 $retrieveDescription
510 $curlGetInfo = 'curl_getinfo'
511) { 554) {
512 $isRedirected = false;
513 $currentChunk = 0; 555 $currentChunk = 0;
514 $foundChunk = null; 556 $foundChunk = null;
515 557
@@ -524,37 +566,18 @@ function get_curl_download_callback(
524 * 566 *
525 * @return int|bool length of $data or false if we need to stop the download 567 * @return int|bool length of $data or false if we need to stop the download
526 */ 568 */
527 return function (&$ch, $data) use ( 569 return function ($ch, $data) use (
528 $retrieveDescription, 570 $retrieveDescription,
529 $curlGetInfo,
530 &$charset, 571 &$charset,
531 &$title, 572 &$title,
532 &$description, 573 &$description,
533 &$keywords, 574 &$keywords,
534 &$isRedirected,
535 &$currentChunk, 575 &$currentChunk,
536 &$foundChunk 576 &$foundChunk
537 ) { 577 ) {
578 $chunkLength = strlen($data);
538 $currentChunk++; 579 $currentChunk++;
539 $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); 580
540 if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
541 $isRedirected = true;
542 return strlen($data);
543 }
544 if (!empty($responseCode) && $responseCode !== 200) {
545 return false;
546 }
547 // After a redirection, the content type will keep the previous request value
548 // until it finds the next content-type header.
549 if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
550 $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
551 }
552 if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
553 return false;
554 }
555 if (!empty($contentType) && empty($charset)) {
556 $charset = header_extract_charset($contentType);
557 }
558 if (empty($charset)) { 581 if (empty($charset)) {
559 $charset = html_extract_charset($data); 582 $charset = html_extract_charset($data);
560 } 583 }
@@ -562,6 +585,10 @@ function get_curl_download_callback(
562 $title = html_extract_title($data); 585 $title = html_extract_title($data);
563 $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; 586 $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
564 } 587 }
588 if (empty($title)) {
589 $title = html_extract_tag('title', $data);
590 $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
591 }
565 if ($retrieveDescription && empty($description)) { 592 if ($retrieveDescription && empty($description)) {
566 $description = html_extract_tag('description', $data); 593 $description = html_extract_tag('description', $data);
567 $foundChunk = ! empty($description) ? $currentChunk : $foundChunk; 594 $foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
@@ -591,6 +618,6 @@ function get_curl_download_callback(
591 return false; 618 return false;
592 } 619 }
593 620
594 return strlen($data); 621 return $chunkLength;
595 }; 622 };
596} 623}
diff --git a/application/http/MetadataRetriever.php b/application/http/MetadataRetriever.php
new file mode 100644
index 00000000..ba9bd40c
--- /dev/null
+++ b/application/http/MetadataRetriever.php
@@ -0,0 +1,69 @@
1<?php
2
3declare(strict_types=1);
4
5namespace Shaarli\Http;
6
7use Shaarli\Config\ConfigManager;
8
9/**
10 * HTTP Tool used to extract metadata from external URL (title, description, etc.).
11 */
12class MetadataRetriever
13{
14 /** @var ConfigManager */
15 protected $conf;
16
17 /** @var HttpAccess */
18 protected $httpAccess;
19
20 public function __construct(ConfigManager $conf, HttpAccess $httpAccess)
21 {
22 $this->conf = $conf;
23 $this->httpAccess = $httpAccess;
24 }
25
26 /**
27 * Retrieve metadata for given URL.
28 *
29 * @return array [
30 * 'title' => <remote title>,
31 * 'description' => <remote description>,
32 * 'tags' => <remote keywords>,
33 * ]
34 */
35 public function retrieve(string $url): array
36 {
37 $charset = null;
38 $title = null;
39 $description = null;
40 $tags = null;
41 $retrieveDescription = $this->conf->get('general.retrieve_description');
42
43 // Short timeout to keep the application responsive
44 // The callback will fill $charset and $title with data from the downloaded page.
45 $this->httpAccess->getHttpResponse(
46 $url,
47 $this->conf->get('general.download_timeout', 30),
48 $this->conf->get('general.download_max_size', 4194304),
49 $this->httpAccess->getCurlHeaderCallback($charset),
50 $this->httpAccess->getCurlDownloadCallback(
51 $charset,
52 $title,
53 $description,
54 $tags,
55 $retrieveDescription
56 )
57 );
58
59 if (!empty($title) && strtolower($charset) !== 'utf-8') {
60 $title = mb_convert_encoding($title, 'utf-8', $charset);
61 }
62
63 return [
64 'title' => $title,
65 'description' => $description,
66 'tags' => $tags,
67 ];
68 }
69}