]> git.immae.eu Git - github/shaarli/Shaarli.git/blame - application/http/MetadataRetriever.php
Fix: synchronous metadata retrieval is failing in strict mode
[github/shaarli/Shaarli.git] / application / http / MetadataRetriever.php
CommitLineData
4cf3564d
A
1<?php
2
3declare(strict_types=1);
4
5namespace Shaarli\Http;
6
7use Shaarli\Config\ConfigManager;
8
9/**
10 * HTTP Tool used to extract metadata from external URL (title, description, etc.).
11 */
12class MetadataRetriever
13{
14 /** @var ConfigManager */
15 protected $conf;
16
17 /** @var HttpAccess */
18 protected $httpAccess;
19
20 public function __construct(ConfigManager $conf, HttpAccess $httpAccess)
21 {
22 $this->conf = $conf;
23 $this->httpAccess = $httpAccess;
24 }
25
26 /**
27 * Retrieve metadata for given URL.
28 *
29 * @return array [
30 * 'title' => <remote title>,
31 * 'description' => <remote description>,
32 * 'tags' => <remote keywords>,
33 * ]
34 */
35 public function retrieve(string $url): array
36 {
37 $charset = null;
38 $title = null;
39 $description = null;
40 $tags = null;
4cf3564d
A
41
42 // Short timeout to keep the application responsive
43 // The callback will fill $charset and $title with data from the downloaded page.
44 $this->httpAccess->getHttpResponse(
45 $url,
46 $this->conf->get('general.download_timeout', 30),
47 $this->conf->get('general.download_max_size', 4194304),
5334090b 48 $this->httpAccess->getCurlHeaderCallback($charset),
4cf3564d
A
49 $this->httpAccess->getCurlDownloadCallback(
50 $charset,
51 $title,
52 $description,
53 $tags,
b3bd8c3e
A
54 $this->conf->get('general.retrieve_description'),
55 $this->conf->get('general.tags_separator', ' ')
4cf3564d
A
56 )
57 );
58
59 if (!empty($title) && strtolower($charset) !== 'utf-8') {
60 $title = mb_convert_encoding($title, 'utf-8', $charset);
61 }
62
6a3a78d0 63 return array_map([$this, 'cleanMetadata'], [
4cf3564d
A
64 'title' => $title,
65 'description' => $description,
66 'tags' => $tags,
6a3a78d0
A
67 ]);
68 }
69
70 protected function cleanMetadata($data): ?string
71 {
72 return !is_string($data) || empty(trim($data)) ? null : trim($data);
4cf3564d
A
73 }
74}