diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-10-20 10:14:28 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-20 10:14:28 +0200 |
commit | 9b3c1270bcbe4f8e30e0160da8badd43dd94871a (patch) | |
tree | f1d87ed084970cf0c3ef99fef8e4ad6dcea423ce /application/http/MetadataRetriever.php | |
parent | 552c3b942afe565b780785eab5b2e29c1e800c2e (diff) | |
parent | 5334090be04e66da5cb5c3ad487604b3733c5cac (diff) | |
download | Shaarli-9b3c1270bcbe4f8e30e0160da8badd43dd94871a.tar.gz Shaarli-9b3c1270bcbe4f8e30e0160da8badd43dd94871a.tar.zst Shaarli-9b3c1270bcbe4f8e30e0160da8badd43dd94871a.zip |
Merge pull request #1567 from ArthurHoaro/feature/async-title-retrieval
Diffstat (limited to 'application/http/MetadataRetriever.php')
-rw-r--r-- | application/http/MetadataRetriever.php | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/application/http/MetadataRetriever.php b/application/http/MetadataRetriever.php new file mode 100644 index 00000000..ba9bd40c --- /dev/null +++ b/application/http/MetadataRetriever.php | |||
@@ -0,0 +1,69 @@ | |||
1 | <?php | ||
2 | |||
3 | declare(strict_types=1); | ||
4 | |||
5 | namespace Shaarli\Http; | ||
6 | |||
7 | use Shaarli\Config\ConfigManager; | ||
8 | |||
9 | /** | ||
10 | * HTTP Tool used to extract metadata from external URL (title, description, etc.). | ||
11 | */ | ||
12 | class MetadataRetriever | ||
13 | { | ||
14 | /** @var ConfigManager */ | ||
15 | protected $conf; | ||
16 | |||
17 | /** @var HttpAccess */ | ||
18 | protected $httpAccess; | ||
19 | |||
20 | public function __construct(ConfigManager $conf, HttpAccess $httpAccess) | ||
21 | { | ||
22 | $this->conf = $conf; | ||
23 | $this->httpAccess = $httpAccess; | ||
24 | } | ||
25 | |||
26 | /** | ||
27 | * Retrieve metadata for given URL. | ||
28 | * | ||
29 | * @return array [ | ||
30 | * 'title' => <remote title>, | ||
31 | * 'description' => <remote description>, | ||
32 | * 'tags' => <remote keywords>, | ||
33 | * ] | ||
34 | */ | ||
35 | public function retrieve(string $url): array | ||
36 | { | ||
37 | $charset = null; | ||
38 | $title = null; | ||
39 | $description = null; | ||
40 | $tags = null; | ||
41 | $retrieveDescription = $this->conf->get('general.retrieve_description'); | ||
42 | |||
43 | // Short timeout to keep the application responsive | ||
44 | // The callback will fill $charset and $title with data from the downloaded page. | ||
45 | $this->httpAccess->getHttpResponse( | ||
46 | $url, | ||
47 | $this->conf->get('general.download_timeout', 30), | ||
48 | $this->conf->get('general.download_max_size', 4194304), | ||
49 | $this->httpAccess->getCurlHeaderCallback($charset), | ||
50 | $this->httpAccess->getCurlDownloadCallback( | ||
51 | $charset, | ||
52 | $title, | ||
53 | $description, | ||
54 | $tags, | ||
55 | $retrieveDescription | ||
56 | ) | ||
57 | ); | ||
58 | |||
59 | if (!empty($title) && strtolower($charset) !== 'utf-8') { | ||
60 | $title = mb_convert_encoding($title, 'utf-8', $charset); | ||
61 | } | ||
62 | |||
63 | return [ | ||
64 | 'title' => $title, | ||
65 | 'description' => $description, | ||
66 | 'tags' => $tags, | ||
67 | ]; | ||
68 | } | ||
69 | } | ||