1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
<?php
declare(strict_types=1);
namespace Shaarli\Http;
use Shaarli\Config\ConfigManager;
/**
* HTTP Tool used to extract metadata from external URL (title, description, etc.).
*/
class MetadataRetriever
{
/** @var ConfigManager */
protected $conf;
/** @var HttpAccess */
protected $httpAccess;
public function __construct(ConfigManager $conf, HttpAccess $httpAccess)
{
$this->conf = $conf;
$this->httpAccess = $httpAccess;
}
/**
* Retrieve metadata for given URL.
*
* @return array [
* 'title' => <remote title>,
* 'description' => <remote description>,
* 'tags' => <remote keywords>,
* ]
*/
public function retrieve(string $url): array
{
$charset = null;
$title = null;
$description = null;
$tags = null;
// Short timeout to keep the application responsive
// The callback will fill $charset and $title with data from the downloaded page.
$this->httpAccess->getHttpResponse(
$url,
$this->conf->get('general.download_timeout', 30),
$this->conf->get('general.download_max_size', 4194304),
$this->httpAccess->getCurlHeaderCallback($charset),
$this->httpAccess->getCurlDownloadCallback(
$charset,
$title,
$description,
$tags,
$this->conf->get('general.retrieve_description'),
$this->conf->get('general.tags_separator', ' ')
)
);
if (!empty($title) && strtolower($charset) !== 'utf-8') {
$title = mb_convert_encoding($title, 'utf-8', $charset);
}
return array_map([$this, 'cleanMetadata'], [
'title' => $title,
'description' => $description,
'tags' => $tags,
]);
}
protected function cleanMetadata($data): ?string
{
return !is_string($data) || empty(trim($data)) ? null : trim($data);
}
}
|