diff options
Diffstat (limited to 'application/http/MetadataRetriever.php')
-rw-r--r-- | application/http/MetadataRetriever.php | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/application/http/MetadataRetriever.php b/application/http/MetadataRetriever.php new file mode 100644 index 00000000..2ca982e2 --- /dev/null +++ b/application/http/MetadataRetriever.php | |||
@@ -0,0 +1,68 @@ | |||
1 | <?php | ||
2 | |||
3 | declare(strict_types=1); | ||
4 | |||
5 | namespace Shaarli\Http; | ||
6 | |||
7 | use Shaarli\Config\ConfigManager; | ||
8 | |||
9 | /** | ||
10 | * HTTP Tool used to extract metadata from external URL (title, description, etc.). | ||
11 | */ | ||
12 | class MetadataRetriever | ||
13 | { | ||
14 | /** @var ConfigManager */ | ||
15 | protected $conf; | ||
16 | |||
17 | /** @var HttpAccess */ | ||
18 | protected $httpAccess; | ||
19 | |||
20 | public function __construct(ConfigManager $conf, HttpAccess $httpAccess) | ||
21 | { | ||
22 | $this->conf = $conf; | ||
23 | $this->httpAccess = $httpAccess; | ||
24 | } | ||
25 | |||
26 | /** | ||
27 | * Retrieve metadata for given URL. | ||
28 | * | ||
29 | * @return array [ | ||
30 | * 'title' => <remote title>, | ||
31 | * 'description' => <remote description>, | ||
32 | * 'tags' => <remote keywords>, | ||
33 | * ] | ||
34 | */ | ||
35 | public function retrieve(string $url): array | ||
36 | { | ||
37 | $charset = null; | ||
38 | $title = null; | ||
39 | $description = null; | ||
40 | $tags = null; | ||
41 | $retrieveDescription = $this->conf->get('general.retrieve_description'); | ||
42 | |||
43 | // Short timeout to keep the application responsive | ||
44 | // The callback will fill $charset and $title with data from the downloaded page. | ||
45 | $this->httpAccess->getHttpResponse( | ||
46 | $url, | ||
47 | $this->conf->get('general.download_timeout', 30), | ||
48 | $this->conf->get('general.download_max_size', 4194304), | ||
49 | $this->httpAccess->getCurlDownloadCallback( | ||
50 | $charset, | ||
51 | $title, | ||
52 | $description, | ||
53 | $tags, | ||
54 | $retrieveDescription | ||
55 | ) | ||
56 | ); | ||
57 | |||
58 | if (!empty($title) && strtolower($charset) !== 'utf-8') { | ||
59 | $title = mb_convert_encoding($title, 'utf-8', $charset); | ||
60 | } | ||
61 | |||
62 | return [ | ||
63 | 'title' => $title, | ||
64 | 'description' => $description, | ||
65 | 'tags' => $tags, | ||
66 | ]; | ||
67 | } | ||
68 | } | ||