3 declare(strict_types
=1);
5 namespace Shaarli\Http
;
7 use Shaarli\Config\ConfigManager
;
10 * HTTP Tool used to extract metadata from external URL (title, description, etc.).
12 class MetadataRetriever
14 /** @var ConfigManager */
17 /** @var HttpAccess */
18 protected $httpAccess;
20 public function __construct(ConfigManager
$conf, HttpAccess
$httpAccess)
23 $this->httpAccess
= $httpAccess;
27 * Retrieve metadata for given URL.
30 * 'title' => <remote title>,
31 * 'description' => <remote description>,
32 * 'tags' => <remote keywords>,
35 public function retrieve(string $url): array
42 // Short timeout to keep the application responsive
43 // The callback will fill $charset and $title with data from the downloaded page.
44 $this->httpAccess
->getHttpResponse(
46 $this->conf
->get('general.download_timeout', 30),
47 $this->conf
->get('general.download_max_size', 4194304),
48 $this->httpAccess
->getCurlHeaderCallback($charset),
49 $this->httpAccess
->getCurlDownloadCallback(
54 $this->conf
->get('general.retrieve_description'),
55 $this->conf
->get('general.tags_separator', ' ')
59 if (!empty($title) && strtolower($charset) !== 'utf-8') {
60 $title = mb_convert_encoding($title, 'utf-8', $charset);
65 'description' => $description,