Handle shaare creation/edition/deletion through Slim controllers

author: ArthurHoaro <arthur@hoa.ro> 2020-06-06 14:01:03 +0200
committer: ArthurHoaro <arthur@hoa.ro> 2020-07-23 21:19:21 +0200
commit: c22fa57a5505fe95fd01860e3d3dfbb089f869cd (patch)
tree: a72b57e49b7b2b995ace278bad00fc47d5b6d61d /application/http/HttpUtils.php
parent: 8eac2e54882d8adae8cbb45386dca1b465242632 (diff)
download: Shaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.tar.gz
Shaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.tar.zst
Shaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.zip
1 files changed, 106 insertions, 0 deletions
diff --git a/application/http/HttpUtils.php b/application/http/HttpUtils.php
index f00c4336..4fc4e3dc 100644
--- a/application/http/HttpUtils.php
+++ b/application/http/HttpUtils.php
@@ -484,3 +484,109 @@ function is_https($server)
    return ! empty($server['HTTPS']);
 }
+/**
+ * Get cURL callback function for CURLOPT_WRITEFUNCTION
+ *
+ * @param string $charset     to extract from the downloaded page (reference)
+ * @param string $title       to extract from the downloaded page (reference)
+ * @param string $description to extract from the downloaded page (reference)
+ * @param string $keywords    to extract from the downloaded page (reference)
+ * @param bool   $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
+ * @param string $curlGetInfo Optionally overrides curl_getinfo function
+ *
+ * @return Closure
+ */
+function get_curl_download_callback(
+    &$charset,
+    &$title,
+    &$description,
+    &$keywords,
+    $retrieveDescription,
+    $curlGetInfo = 'curl_getinfo'
+) {
+    $isRedirected = false;
+    $currentChunk = 0;
+    $foundChunk = null;
+    /**
+     * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
+     *
+     * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
+     * Then we extract the title and the charset and stop the download when it's done.
+     *
+     * @param resource $ch   cURL resource
+     * @param string   $data chunk of data being downloaded
+     *
+     * @return int|bool length of $data or false if we need to stop the download
+     */
+    return function (&$ch, $data) use (
+        $retrieveDescription,
+        $curlGetInfo,
+        &$charset,
+        &$title,
+        &$description,
+        &$keywords,
+        &$isRedirected,
+        &$currentChunk,
+        &$foundChunk
+    ) {
+        $currentChunk++;
+        $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
+        if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
+            $isRedirected = true;
+            return strlen($data);
+        }
+        if (!empty($responseCode) && $responseCode !== 200) {
+            return false;
+        }
+        // After a redirection, the content type will keep the previous request value
+        // until it finds the next content-type header.
+        if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
+            $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
+        }
+        if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
+            return false;
+        }
+        if (!empty($contentType) && empty($charset)) {
+            $charset = header_extract_charset($contentType);
+        }
+        if (empty($charset)) {
+            $charset = html_extract_charset($data);
+        }
+        if (empty($title)) {
+            $title = html_extract_title($data);
+            $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
+        }
+        if ($retrieveDescription && empty($description)) {
+            $description = html_extract_tag('description', $data);
+            $foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
+        }
+        if ($retrieveDescription && empty($keywords)) {
+            $keywords = html_extract_tag('keywords', $data);
+            if (! empty($keywords)) {
+                $foundChunk = $currentChunk;
+                // Keywords use the format tag1, tag2 multiple words, tag
+                // So we format them to match Shaarli's separator and glue multiple words with '-'
+                $keywords = implode(' ', array_map(function($keyword) {
+                    return implode('-', preg_split('/\s+/', trim($keyword)));
+                }, explode(',', $keywords)));
+            }
+        }
+        // We got everything we want, stop the download.
+        // If we already found either the title, description or keywords,
+        // it's highly unlikely that we'll found the other metas further than
+        // in the same chunk of data or the next one. So we also stop the download after that.
+        if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
+            && (! $retrieveDescription
+                || $foundChunk < $currentChunk
+                || (!empty($title) && !empty($description) && !empty($keywords))
+            )
+        ) {
+            return false;
+        }
+        return strlen($data);
+    };
+}
author	ArthurHoaro <arthur@hoa.ro>	2020-06-06 14:01:03 +0200
committer	ArthurHoaro <arthur@hoa.ro>	2020-07-23 21:19:21 +0200
commit	c22fa57a5505fe95fd01860e3d3dfbb089f869cd (patch)
tree	a72b57e49b7b2b995ace278bad00fc47d5b6d61d /application/http/HttpUtils.php
parent	8eac2e54882d8adae8cbb45386dca1b465242632 (diff)
download	Shaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.tar.gz Shaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.tar.zst Shaarli-c22fa57a5505fe95fd01860e3d3dfbb089f869cd.zip

diff --git a/application/http/HttpUtils.php b/application/http/HttpUtils.php index f00c4336..4fc4e3dc 100644 --- a/application/http/HttpUtils.php +++ b/application/http/HttpUtils.php
@@ -484,3 +484,109 @@ function is_https($server)
484		484
485	return ! empty($server['HTTPS']);	485	return ! empty($server['HTTPS']);
486	}	486	}
		487
		488	/**
		489	* Get cURL callback function for CURLOPT_WRITEFUNCTION
		490	*
		491	* @param string $charset to extract from the downloaded page (reference)
		492	* @param string $title to extract from the downloaded page (reference)
		493	* @param string $description to extract from the downloaded page (reference)
		494	* @param string $keywords to extract from the downloaded page (reference)
		495	* @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
		496	* @param string $curlGetInfo Optionally overrides curl_getinfo function
		497	*
		498	* @return Closure
		499	*/
		500	function get_curl_download_callback(
		501	&$charset,
		502	&$title,
		503	&$description,
		504	&$keywords,
		505	$retrieveDescription,
		506	$curlGetInfo = 'curl_getinfo'
		507	) {
		508	$isRedirected = false;
		509	$currentChunk = 0;
		510	$foundChunk = null;
		511
		512	/**
		513	* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
		514	*
		515	* While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
		516	* Then we extract the title and the charset and stop the download when it's done.
		517	*
		518	* @param resource $ch cURL resource
		519	* @param string $data chunk of data being downloaded
		520	*
		521	* @return int\|bool length of $data or false if we need to stop the download
		522	*/
		523	return function (&$ch, $data) use (
		524	$retrieveDescription,
		525	$curlGetInfo,
		526	&$charset,
		527	&$title,
		528	&$description,
		529	&$keywords,
		530	&$isRedirected,
		531	&$currentChunk,
		532	&$foundChunk
		533	) {
		534	$currentChunk++;
		535	$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
		536	if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
		537	$isRedirected = true;
		538	return strlen($data);
		539	}
		540	if (!empty($responseCode) && $responseCode !== 200) {
		541	return false;
		542	}
		543	// After a redirection, the content type will keep the previous request value
		544	// until it finds the next content-type header.
		545	if (! $isRedirected \|\| strpos(strtolower($data), 'content-type') !== false) {
		546	$contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
		547	}
		548	if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
		549	return false;
		550	}
		551	if (!empty($contentType) && empty($charset)) {
		552	$charset = header_extract_charset($contentType);
		553	}
		554	if (empty($charset)) {
		555	$charset = html_extract_charset($data);
		556	}
		557	if (empty($title)) {
		558	$title = html_extract_title($data);
		559	$foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
		560	}
		561	if ($retrieveDescription && empty($description)) {
		562	$description = html_extract_tag('description', $data);
		563	$foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
		564	}
		565	if ($retrieveDescription && empty($keywords)) {
		566	$keywords = html_extract_tag('keywords', $data);
		567	if (! empty($keywords)) {
		568	$foundChunk = $currentChunk;
		569	// Keywords use the format tag1, tag2 multiple words, tag
		570	// So we format them to match Shaarli's separator and glue multiple words with '-'
		571	$keywords = implode(' ', array_map(function($keyword) {
		572	return implode('-', preg_split('/\s+/', trim($keyword)));
		573	}, explode(',', $keywords)));
		574	}
		575	}
		576
		577	// We got everything we want, stop the download.
		578	// If we already found either the title, description or keywords,
		579	// it's highly unlikely that we'll found the other metas further than
		580	// in the same chunk of data or the next one. So we also stop the download after that.
		581	if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
		582	&& (! $retrieveDescription
		583	\|\| $foundChunk < $currentChunk
		584	\|\| (!empty($title) && !empty($description) && !empty($keywords))
		585	)
		586	) {
		587	return false;
		588	}
		589
		590	return strlen($data);
		591	};
		592	}