]>
Commit | Line | Data |
---|---|---|
1 | <?php | |
2 | ||
3 | declare(strict_types=1); | |
4 | ||
5 | namespace Shaarli\Http; | |
6 | ||
7 | use Shaarli\Config\ConfigManager; | |
8 | ||
9 | /** | |
10 | * HTTP Tool used to extract metadata from external URL (title, description, etc.). | |
11 | */ | |
12 | class MetadataRetriever | |
13 | { | |
14 | /** @var ConfigManager */ | |
15 | protected $conf; | |
16 | ||
17 | /** @var HttpAccess */ | |
18 | protected $httpAccess; | |
19 | ||
20 | public function __construct(ConfigManager $conf, HttpAccess $httpAccess) | |
21 | { | |
22 | $this->conf = $conf; | |
23 | $this->httpAccess = $httpAccess; | |
24 | } | |
25 | ||
26 | /** | |
27 | * Retrieve metadata for given URL. | |
28 | * | |
29 | * @return array [ | |
30 | * 'title' => <remote title>, | |
31 | * 'description' => <remote description>, | |
32 | * 'tags' => <remote keywords>, | |
33 | * ] | |
34 | */ | |
35 | public function retrieve(string $url): array | |
36 | { | |
37 | $charset = null; | |
38 | $title = null; | |
39 | $description = null; | |
40 | $tags = null; | |
41 | ||
42 | // Short timeout to keep the application responsive | |
43 | // The callback will fill $charset and $title with data from the downloaded page. | |
44 | $this->httpAccess->getHttpResponse( | |
45 | $url, | |
46 | $this->conf->get('general.download_timeout', 30), | |
47 | $this->conf->get('general.download_max_size', 4194304), | |
48 | $this->httpAccess->getCurlHeaderCallback($charset), | |
49 | $this->httpAccess->getCurlDownloadCallback( | |
50 | $charset, | |
51 | $title, | |
52 | $description, | |
53 | $tags, | |
54 | $this->conf->get('general.retrieve_description'), | |
55 | $this->conf->get('general.tags_separator', ' ') | |
56 | ) | |
57 | ); | |
58 | ||
59 | if (!empty($title) && strtolower($charset) !== 'utf-8') { | |
60 | $title = mb_convert_encoding($title, 'utf-8', $charset); | |
61 | } | |
62 | ||
63 | return array_map([$this, 'cleanMetadata'], [ | |
64 | 'title' => $title, | |
65 | 'description' => $description, | |
66 | 'tags' => $tags, | |
67 | ]); | |
68 | } | |
69 | ||
70 | protected function cleanMetadata($data): ?string | |
71 | { | |
72 | return !is_string($data) || empty(trim($data)) ? null : trim($data); | |
73 | } | |
74 | } |