X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2Fhttp%2FMetadataRetriever.php;fp=application%2Fhttp%2FMetadataRetriever.php;h=2e1401eca74f0c9857d7d31c34934a14b6811066;hb=af50eba28a7bd286de4c8c9ee6dc5216b915d149;hp=0000000000000000000000000000000000000000;hpb=b6f678a5a1d15acf284ebcec16c905e976671ce1;p=github%2Fshaarli%2FShaarli.git diff --git a/application/http/MetadataRetriever.php b/application/http/MetadataRetriever.php new file mode 100644 index 00000000..2e1401ec --- /dev/null +++ b/application/http/MetadataRetriever.php @@ -0,0 +1,69 @@ +conf = $conf; + $this->httpAccess = $httpAccess; + } + + /** + * Retrieve metadata for given URL. + * + * @return array [ + * 'title' => , + * 'description' => , + * 'tags' => , + * ] + */ + public function retrieve(string $url): array + { + $charset = null; + $title = null; + $description = null; + $tags = null; + + // Short timeout to keep the application responsive + // The callback will fill $charset and $title with data from the downloaded page. + $this->httpAccess->getHttpResponse( + $url, + $this->conf->get('general.download_timeout', 30), + $this->conf->get('general.download_max_size', 4194304), + $this->httpAccess->getCurlHeaderCallback($charset), + $this->httpAccess->getCurlDownloadCallback( + $charset, + $title, + $description, + $tags, + $this->conf->get('general.retrieve_description'), + $this->conf->get('general.tags_separator', ' ') + ) + ); + + if (!empty($title) && strtolower($charset) !== 'utf-8') { + $title = mb_convert_encoding($title, 'utf-8', $charset); + } + + return [ + 'title' => $title, + 'description' => $description, + 'tags' => $tags, + ]; + } +}