Merge pull request #1248 from virtualtam/refactor/namespacing

Ensure all PHP classes are properly namespaced
author: Aurélien Tamisier <virtualtam+github@flibidi.net> 2019-01-18 21:26:03 +0100
committer: GitHub <noreply@github.com> 2019-01-18 21:26:03 +0100
commit: ff3b5dc5542ec150f0d9b447394364a15e9156d0 (patch)
tree: 5e926e36816d510e3b3a10e20b94c23f43b55092 /application/bookmark/LinkUtils.php
parent: 1826e383ecf501302974132fd443cf1ca06e10f6 (diff)
parent: dea72c711ff740b3b829d238fcf85648465143a0 (diff)
download: Shaarli-ff3b5dc5542ec150f0d9b447394364a15e9156d0.tar.gz
Shaarli-ff3b5dc5542ec150f0d9b447394364a15e9156d0.tar.zst
Shaarli-ff3b5dc5542ec150f0d9b447394364a15e9156d0.zip
1 files changed, 222 insertions, 0 deletions
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
new file mode 100644
index 00000000..de5b61cb
--- /dev/null
+++ b/application/bookmark/LinkUtils.php
@@ -0,0 +1,222 @@
+<?php
+use Shaarli\Bookmark\LinkDB;
+/**
+ * Get cURL callback function for CURLOPT_WRITEFUNCTION
+ *
+ * @param string $charset     to extract from the downloaded page (reference)
+ * @param string $title       to extract from the downloaded page (reference)
+ * @param string $curlGetInfo Optionally overrides curl_getinfo function
+ *
+ * @return Closure
+ */
+function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
+{
+    $isRedirected = false;
+    /**
+     * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
+     *
+     * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
+     * Then we extract the title and the charset and stop the download when it's done.
+     *
+     * @param resource $ch   cURL resource
+     * @param string   $data chunk of data being downloaded
+     *
+     * @return int|bool length of $data or false if we need to stop the download
+     */
+    return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
+        $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
+        if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
+            $isRedirected = true;
+            return strlen($data);
+        }
+        if (!empty($responseCode) && $responseCode !== 200) {
+            return false;
+        }
+        // After a redirection, the content type will keep the previous request value
+        // until it finds the next content-type header.
+        if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
+            $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
+        }
+        if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
+            return false;
+        }
+        if (!empty($contentType) && empty($charset)) {
+            $charset = header_extract_charset($contentType);
+        }
+        if (empty($charset)) {
+            $charset = html_extract_charset($data);
+        }
+        if (empty($title)) {
+            $title = html_extract_title($data);
+        }
+        // We got everything we want, stop the download.
+        if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
+            return false;
+        }
+        return strlen($data);
+    };
+}
+/**
+ * Extract title from an HTML document.
+ *
+ * @param string $html HTML content where to look for a title.
+ *
+ * @return bool|string Extracted title if found, false otherwise.
+ */
+function html_extract_title($html)
+{
+    if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
+        return trim(str_replace("\n", '', $matches[1]));
+    }
+    return false;
+}
+/**
+ * Extract charset from HTTP header if it's defined.
+ *
+ * @param string $header HTTP header Content-Type line.
+ *
+ * @return bool|string Charset string if found (lowercase), false otherwise.
+ */
+function header_extract_charset($header)
+{
+    preg_match('/charset="?([^; ]+)/i', $header, $match);
+    if (! empty($match[1])) {
+        return strtolower(trim($match[1]));
+    }
+    return false;
+}
+/**
+ * Extract charset HTML content (tag <meta charset>).
+ *
+ * @param string $html HTML content where to look for charset.
+ *
+ * @return bool|string Charset string if found, false otherwise.
+ */
+function html_extract_charset($html)
+{
+    // Get encoding specified in HTML header.
+    preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc);
+    if (!empty($enc[1])) {
+        return strtolower($enc[1]);
+    }
+    return false;
+}
+/**
+ * Count private links in given linklist.
+ *
+ * @param array|Countable $links Linklist.
+ *
+ * @return int Number of private links.
+ */
+function count_private($links)
+{
+    $cpt = 0;
+    foreach ($links as $link) {
+        if ($link['private']) {
+            $cpt += 1;
+        }
+    }
+    return $cpt;
+}
+/**
+ * In a string, converts URLs to clickable links.
+ *
+ * @param string $text       input string.
+ * @param string $redirector if a redirector is set, use it to gerenate links.
+ * @param bool   $urlEncode  Use `urlencode()` on the URL after the redirector or not.
+ *
+ * @return string returns $text with all links converted to HTML links.
+ *
+ * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
+ */
+function text2clickable($text, $redirector = '', $urlEncode = true)
+{
+    $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si';
+    if (empty($redirector)) {
+        return preg_replace($regex, '<a href="$1">$1</a>', $text);
+    }
+    // Redirector is set, urlencode the final URL.
+    return preg_replace_callback(
+        $regex,
+        function ($matches) use ($redirector, $urlEncode) {
+            $url = $urlEncode ? urlencode($matches[1]) : $matches[1];
+            return '<a href="' . $redirector . $url .'">'. $matches[1] .'</a>';
+        },
+        $text
+    );
+}
+/**
+ * Auto-link hashtags.
+ *
+ * @param string $description Given description.
+ * @param string $indexUrl    Root URL.
+ *
+ * @return string Description with auto-linked hashtags.
+ */
+function hashtag_autolink($description, $indexUrl = '')
+{
+    /*
+     * To support unicode: http://stackoverflow.com/a/35498078/1484919
+     * \p{Pc} - to match underscore
+     * \p{N} - numeric character in any script
+     * \p{L} - letter from any language
+     * \p{Mn} - any non marking space (accents, umlauts, etc)
+     */
+    $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
+    $replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>';
+    return preg_replace($regex, $replacement, $description);
+}
+/**
+ * This function inserts &nbsp; where relevant so that multiple spaces are properly displayed in HTML
+ * even in the absence of <pre>  (This is used in description to keep text formatting).
+ *
+ * @param string $text input text.
+ *
+ * @return string formatted text.
+ */
+function space2nbsp($text)
+{
+    return preg_replace('/(^| ) /m', '$1&nbsp;', $text);
+}
+/**
+ * Format Shaarli's description
+ *
+ * @param string $description shaare's description.
+ * @param string $redirector  if a redirector is set, use it to gerenate links.
+ * @param bool   $urlEncode   Use `urlencode()` on the URL after the redirector or not.
+ * @param string $indexUrl    URL to Shaarli's index.
+ * @return string formatted description.
+ */
+function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '')
+{
+    return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl)));
+}
+/**
+ * Generate a small hash for a link.
+ *
+ * @param DateTime $date Link creation date.
+ * @param int      $id   Link ID.
+ *
+ * @return string the small hash generated from link data.
+ */
+function link_small_hash($date, $id)
+{
+    return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
+}
author	Aurélien Tamisier <virtualtam+github@flibidi.net>	2019-01-18 21:26:03 +0100
committer	GitHub <noreply@github.com>	2019-01-18 21:26:03 +0100
commit	ff3b5dc5542ec150f0d9b447394364a15e9156d0 (patch)
tree	5e926e36816d510e3b3a10e20b94c23f43b55092 /application/bookmark/LinkUtils.php
parent	1826e383ecf501302974132fd443cf1ca06e10f6 (diff)
parent	dea72c711ff740b3b829d238fcf85648465143a0 (diff)
download	Shaarli-ff3b5dc5542ec150f0d9b447394364a15e9156d0.tar.gz Shaarli-ff3b5dc5542ec150f0d9b447394364a15e9156d0.tar.zst Shaarli-ff3b5dc5542ec150f0d9b447394364a15e9156d0.zip

diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php new file mode 100644 index 00000000..de5b61cb --- /dev/null +++ b/application/bookmark/LinkUtils.php
@@ -0,0 +1,222 @@
	1	<?php
	2
	3	use Shaarli\Bookmark\LinkDB;
	4
	5	/**
	6	* Get cURL callback function for CURLOPT_WRITEFUNCTION
	7	*
	8	* @param string $charset to extract from the downloaded page (reference)
	9	* @param string $title to extract from the downloaded page (reference)
	10	* @param string $curlGetInfo Optionally overrides curl_getinfo function
	11	*
	12	* @return Closure
	13	*/
	14	function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
	15	{
	16	$isRedirected = false;
	17	/**
	18	* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
	19	*
	20	* While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
	21	* Then we extract the title and the charset and stop the download when it's done.
	22	*
	23	* @param resource $ch cURL resource
	24	* @param string $data chunk of data being downloaded
	25	*
	26	* @return int\|bool length of $data or false if we need to stop the download
	27	*/
	28	return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
	29	$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
	30	if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
	31	$isRedirected = true;
	32	return strlen($data);
	33	}
	34	if (!empty($responseCode) && $responseCode !== 200) {
	35	return false;
	36	}
	37	// After a redirection, the content type will keep the previous request value
	38	// until it finds the next content-type header.
	39	if (! $isRedirected \|\| strpos(strtolower($data), 'content-type') !== false) {
	40	$contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
	41	}
	42	if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
	43	return false;
	44	}
	45	if (!empty($contentType) && empty($charset)) {
	46	$charset = header_extract_charset($contentType);
	47	}
	48	if (empty($charset)) {
	49	$charset = html_extract_charset($data);
	50	}
	51	if (empty($title)) {
	52	$title = html_extract_title($data);
	53	}
	54	// We got everything we want, stop the download.
	55	if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
	56	return false;
	57	}
	58
	59	return strlen($data);
	60	};
	61	}
	62
	63	/**
	64	* Extract title from an HTML document.
	65	*
	66	* @param string $html HTML content where to look for a title.
	67	*
	68	* @return bool\|string Extracted title if found, false otherwise.
	69	*/
	70	function html_extract_title($html)
	71	{
	72	if (preg_match('!<title.?>(.?)</title>!is', $html, $matches)) {
	73	return trim(str_replace("\n", '', $matches[1]));
	74	}
	75	return false;
	76	}
	77
	78	/**
	79	* Extract charset from HTTP header if it's defined.
	80	*
	81	* @param string $header HTTP header Content-Type line.
	82	*
	83	* @return bool\|string Charset string if found (lowercase), false otherwise.
	84	*/
	85	function header_extract_charset($header)
	86	{
	87	preg_match('/charset="?([^; ]+)/i', $header, $match);
	88	if (! empty($match[1])) {
	89	return strtolower(trim($match[1]));
	90	}
	91
	92	return false;
	93	}
	94
	95	/**
	96	* Extract charset HTML content (tag <meta charset>).
	97	*
	98	* @param string $html HTML content where to look for charset.
	99	*
	100	* @return bool\|string Charset string if found, false otherwise.
	101	*/
	102	function html_extract_charset($html)
	103	{
	104	// Get encoding specified in HTML header.
	105	preg_match('#<meta .charset=["\']?([^";\'>/]+)["\']? /?>#Usi', $html, $enc);
	106	if (!empty($enc[1])) {
	107	return strtolower($enc[1]);
	108	}
	109
	110	return false;
	111	}
	112
	113	/**
	114	* Count private links in given linklist.
	115	*
	116	* @param array\|Countable $links Linklist.
	117	*
	118	* @return int Number of private links.
	119	*/
	120	function count_private($links)
	121	{
	122	$cpt = 0;
	123	foreach ($links as $link) {
	124	if ($link['private']) {
	125	$cpt += 1;
	126	}
	127	}
	128
	129	return $cpt;
	130	}
	131
	132	/**
	133	* In a string, converts URLs to clickable links.
	134	*
	135	* @param string $text input string.
	136	* @param string $redirector if a redirector is set, use it to gerenate links.
	137	* @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not.
	138	*
	139	* @return string returns $text with all links converted to HTML links.
	140	*
	141	* @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
	142	*/
	143	function text2clickable($text, $redirector = '', $urlEncode = true)
	144	{
	145	$regex = '!(((?:https?\|ftp\|file)://\|apt:\|magnet:)\S+[a-z0-9\(\)]/?)!si';
	146
	147	if (empty($redirector)) {
	148	return preg_replace($regex, '<a href="$1">$1</a>', $text);
	149	}
	150	// Redirector is set, urlencode the final URL.
	151	return preg_replace_callback(
	152	$regex,
	153	function ($matches) use ($redirector, $urlEncode) {
	154	$url = $urlEncode ? urlencode($matches[1]) : $matches[1];
	155	return '<a href="' . $redirector . $url .'">'. $matches[1] .'</a>';
	156	},
	157	$text
	158	);
	159	}
	160
	161	/**
	162	* Auto-link hashtags.
	163	*
	164	* @param string $description Given description.
	165	* @param string $indexUrl Root URL.
	166	*
	167	* @return string Description with auto-linked hashtags.
	168	*/
	169	function hashtag_autolink($description, $indexUrl = '')
	170	{
	171	/*
	172	* To support unicode: http://stackoverflow.com/a/35498078/1484919
	173	* \p{Pc} - to match underscore
	174	* \p{N} - numeric character in any script
	175	* \p{L} - letter from any language
	176	* \p{Mn} - any non marking space (accents, umlauts, etc)
	177	*/
	178	$regex = '/(^\|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
	179	$replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>';
	180	return preg_replace($regex, $replacement, $description);
	181	}
	182
	183	/**
	184	* This function inserts   where relevant so that multiple spaces are properly displayed in HTML
	185	* even in the absence of <pre> (This is used in description to keep text formatting).
	186	*
	187	* @param string $text input text.
	188	*
	189	* @return string formatted text.
	190	*/
	191	function space2nbsp($text)
	192	{
	193	return preg_replace('/(^\| ) /m', '$1 ', $text);
	194	}
	195
	196	/**
	197	* Format Shaarli's description
	198	*
	199	* @param string $description shaare's description.
	200	* @param string $redirector if a redirector is set, use it to gerenate links.
	201	* @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not.
	202	* @param string $indexUrl URL to Shaarli's index.
	203
	204	* @return string formatted description.
	205	*/
	206	function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '')
	207	{
	208	return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl)));
	209	}
	210
	211	/**
	212	* Generate a small hash for a link.
	213	*
	214	* @param DateTime $date Link creation date.
	215	* @param int $id Link ID.
	216	*
	217	* @return string the small hash generated from link data.
	218	*/
	219	function link_small_hash($date, $id)
	220	{
	221	return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
	222	}