diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-11-12 13:11:07 +0100 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2020-11-12 13:11:07 +0100 |
commit | af50eba28a7bd286de4c8c9ee6dc5216b915d149 (patch) | |
tree | ffa30a9358e82d27be75d8fc5e57f3c8820dc6d3 /application/http/MetadataRetriever.php | |
parent | b6f678a5a1d15acf284ebcec16c905e976671ce1 (diff) | |
parent | 1409f1c89a7ca01456ae2dcd6357d296e2b99f5a (diff) | |
download | Shaarli-af50eba28a7bd286de4c8c9ee6dc5216b915d149.tar.gz Shaarli-af50eba28a7bd286de4c8c9ee6dc5216b915d149.tar.zst Shaarli-af50eba28a7bd286de4c8c9ee6dc5216b915d149.zip |
Merge tag 'v0.12.1' into latestlatest
v0.12.1
Diffstat (limited to 'application/http/MetadataRetriever.php')
-rw-r--r-- | application/http/MetadataRetriever.php | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/application/http/MetadataRetriever.php b/application/http/MetadataRetriever.php new file mode 100644 index 00000000..2e1401ec --- /dev/null +++ b/application/http/MetadataRetriever.php | |||
@@ -0,0 +1,69 @@ | |||
1 | <?php | ||
2 | |||
3 | declare(strict_types=1); | ||
4 | |||
5 | namespace Shaarli\Http; | ||
6 | |||
7 | use Shaarli\Config\ConfigManager; | ||
8 | |||
9 | /** | ||
10 | * HTTP Tool used to extract metadata from external URL (title, description, etc.). | ||
11 | */ | ||
12 | class MetadataRetriever | ||
13 | { | ||
14 | /** @var ConfigManager */ | ||
15 | protected $conf; | ||
16 | |||
17 | /** @var HttpAccess */ | ||
18 | protected $httpAccess; | ||
19 | |||
20 | public function __construct(ConfigManager $conf, HttpAccess $httpAccess) | ||
21 | { | ||
22 | $this->conf = $conf; | ||
23 | $this->httpAccess = $httpAccess; | ||
24 | } | ||
25 | |||
26 | /** | ||
27 | * Retrieve metadata for given URL. | ||
28 | * | ||
29 | * @return array [ | ||
30 | * 'title' => <remote title>, | ||
31 | * 'description' => <remote description>, | ||
32 | * 'tags' => <remote keywords>, | ||
33 | * ] | ||
34 | */ | ||
35 | public function retrieve(string $url): array | ||
36 | { | ||
37 | $charset = null; | ||
38 | $title = null; | ||
39 | $description = null; | ||
40 | $tags = null; | ||
41 | |||
42 | // Short timeout to keep the application responsive | ||
43 | // The callback will fill $charset and $title with data from the downloaded page. | ||
44 | $this->httpAccess->getHttpResponse( | ||
45 | $url, | ||
46 | $this->conf->get('general.download_timeout', 30), | ||
47 | $this->conf->get('general.download_max_size', 4194304), | ||
48 | $this->httpAccess->getCurlHeaderCallback($charset), | ||
49 | $this->httpAccess->getCurlDownloadCallback( | ||
50 | $charset, | ||
51 | $title, | ||
52 | $description, | ||
53 | $tags, | ||
54 | $this->conf->get('general.retrieve_description'), | ||
55 | $this->conf->get('general.tags_separator', ' ') | ||
56 | ) | ||
57 | ); | ||
58 | |||
59 | if (!empty($title) && strtolower($charset) !== 'utf-8') { | ||
60 | $title = mb_convert_encoding($title, 'utf-8', $charset); | ||
61 | } | ||
62 | |||
63 | return [ | ||
64 | 'title' => $title, | ||
65 | 'description' => $description, | ||
66 | 'tags' => $tags, | ||
67 | ]; | ||
68 | } | ||
69 | } | ||