]> git.immae.eu Git - github/shaarli/Shaarli.git/blobdiff - application/http/MetadataRetriever.php
Merge tag 'v0.12.1' into latest
[github/shaarli/Shaarli.git] / application / http / MetadataRetriever.php
diff --git a/application/http/MetadataRetriever.php b/application/http/MetadataRetriever.php
new file mode 100644 (file)
index 0000000..2e1401e
--- /dev/null
@@ -0,0 +1,69 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Shaarli\Http;
+
+use Shaarli\Config\ConfigManager;
+
+/**
+ * HTTP Tool used to extract metadata from external URL (title, description, etc.).
+ */
+class MetadataRetriever
+{
+    /** @var ConfigManager */
+    protected $conf;
+
+    /** @var HttpAccess */
+    protected $httpAccess;
+
+    public function __construct(ConfigManager $conf, HttpAccess $httpAccess)
+    {
+        $this->conf = $conf;
+        $this->httpAccess = $httpAccess;
+    }
+
+    /**
+     * Retrieve metadata for given URL.
+     *
+     * @return array [
+     *                  'title' => <remote title>,
+     *                  'description' => <remote description>,
+     *                  'tags' => <remote keywords>,
+     *               ]
+     */
+    public function retrieve(string $url): array
+    {
+        $charset = null;
+        $title = null;
+        $description = null;
+        $tags = null;
+
+        // Short timeout to keep the application responsive
+        // The callback will fill $charset and $title with data from the downloaded page.
+        $this->httpAccess->getHttpResponse(
+            $url,
+            $this->conf->get('general.download_timeout', 30),
+            $this->conf->get('general.download_max_size', 4194304),
+            $this->httpAccess->getCurlHeaderCallback($charset),
+            $this->httpAccess->getCurlDownloadCallback(
+                $charset,
+                $title,
+                $description,
+                $tags,
+                $this->conf->get('general.retrieve_description'),
+                $this->conf->get('general.tags_separator', ' ')
+            )
+        );
+
+        if (!empty($title) && strtolower($charset) !== 'utf-8') {
+            $title = mb_convert_encoding($title, 'utf-8', $charset);
+        }
+
+        return [
+            'title' => $title,
+            'description' => $description,
+            'tags' => $tags,
+        ];
+    }
+}