diff options
author | yude <yudesleepy@gmail.com> | 2021-01-04 18:51:10 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-04 18:51:10 +0900 |
commit | e6754f2154a79abd8e5e64bd923f6984aa9ad44b (patch) | |
tree | f074119530bb59ef155938ea367f719f1e4b70f1 /application/http/MetadataRetriever.php | |
parent | 5256b4287021342a9f8868967b2a77e481314331 (diff) | |
parent | ed4ee8f0297941ac83300389b7de6a293312d20e (diff) | |
download | Shaarli-e6754f2154a79abd8e5e64bd923f6984aa9ad44b.tar.gz Shaarli-e6754f2154a79abd8e5e64bd923f6984aa9ad44b.tar.zst Shaarli-e6754f2154a79abd8e5e64bd923f6984aa9ad44b.zip |
Merge pull request #2 from shaarli/master
Merge fork source
Diffstat (limited to 'application/http/MetadataRetriever.php')
-rw-r--r-- | application/http/MetadataRetriever.php | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/application/http/MetadataRetriever.php b/application/http/MetadataRetriever.php new file mode 100644 index 00000000..cfc72583 --- /dev/null +++ b/application/http/MetadataRetriever.php | |||
@@ -0,0 +1,74 @@ | |||
1 | <?php | ||
2 | |||
3 | declare(strict_types=1); | ||
4 | |||
5 | namespace Shaarli\Http; | ||
6 | |||
7 | use Shaarli\Config\ConfigManager; | ||
8 | |||
9 | /** | ||
10 | * HTTP Tool used to extract metadata from external URL (title, description, etc.). | ||
11 | */ | ||
12 | class MetadataRetriever | ||
13 | { | ||
14 | /** @var ConfigManager */ | ||
15 | protected $conf; | ||
16 | |||
17 | /** @var HttpAccess */ | ||
18 | protected $httpAccess; | ||
19 | |||
20 | public function __construct(ConfigManager $conf, HttpAccess $httpAccess) | ||
21 | { | ||
22 | $this->conf = $conf; | ||
23 | $this->httpAccess = $httpAccess; | ||
24 | } | ||
25 | |||
26 | /** | ||
27 | * Retrieve metadata for given URL. | ||
28 | * | ||
29 | * @return array [ | ||
30 | * 'title' => <remote title>, | ||
31 | * 'description' => <remote description>, | ||
32 | * 'tags' => <remote keywords>, | ||
33 | * ] | ||
34 | */ | ||
35 | public function retrieve(string $url): array | ||
36 | { | ||
37 | $charset = null; | ||
38 | $title = null; | ||
39 | $description = null; | ||
40 | $tags = null; | ||
41 | |||
42 | // Short timeout to keep the application responsive | ||
43 | // The callback will fill $charset and $title with data from the downloaded page. | ||
44 | $this->httpAccess->getHttpResponse( | ||
45 | $url, | ||
46 | $this->conf->get('general.download_timeout', 30), | ||
47 | $this->conf->get('general.download_max_size', 4194304), | ||
48 | $this->httpAccess->getCurlHeaderCallback($charset), | ||
49 | $this->httpAccess->getCurlDownloadCallback( | ||
50 | $charset, | ||
51 | $title, | ||
52 | $description, | ||
53 | $tags, | ||
54 | $this->conf->get('general.retrieve_description'), | ||
55 | $this->conf->get('general.tags_separator', ' ') | ||
56 | ) | ||
57 | ); | ||
58 | |||
59 | if (!empty($title) && strtolower($charset) !== 'utf-8') { | ||
60 | $title = mb_convert_encoding($title, 'utf-8', $charset); | ||
61 | } | ||
62 | |||
63 | return array_map([$this, 'cleanMetadata'], [ | ||
64 | 'title' => $title, | ||
65 | 'description' => $description, | ||
66 | 'tags' => $tags, | ||
67 | ]); | ||
68 | } | ||
69 | |||
70 | protected function cleanMetadata($data): ?string | ||
71 | { | ||
72 | return !is_string($data) || empty(trim($data)) ? null : trim($data); | ||
73 | } | ||
74 | } | ||