]> git.immae.eu Git - github/shaarli/Shaarli.git/commitdiff
Made tag/title search unicode aware, fixes #75 82/head
authorFlorian Eula <eula.florian@gmail.com>
Mon, 22 Dec 2014 15:43:37 +0000 (16:43 +0100)
committerFlorian Eula <eula.florian@gmail.com>
Mon, 22 Dec 2014 15:43:37 +0000 (16:43 +0100)
index.php

index 35e83eb54498613497c9f926939e38dc2eab6edd..e2c68869f5112b55d5b81a4e962ccab148aa7c3f 100644 (file)
--- a/index.php
+++ b/index.php
@@ -794,14 +794,16 @@ class linkdb implements Iterator, Countable, ArrayAccess
     {
         // FIXME: explode(' ',$searchterms) and perform a AND search.
         // FIXME: accept double-quotes to search for a string "as is"?
+        // Using mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') allows us to perform searches on
+        // Unicode text. See https://github.com/shaarli/Shaarli/issues/75 for examples.
         $filtered=array();
-        $s = strtolower($searchterms);
+        $s = mb_convert_case($searchterms, MB_CASE_LOWER, 'UTF-8');
         foreach($this->links as $l)
         {
-            $found=   (strpos(strtolower($l['title']),$s)!==false)
-                   || (strpos(strtolower($l['description']),$s)!==false)
-                   || (strpos(strtolower($l['url']),$s)!==false)
-                   || (strpos(strtolower($l['tags']),$s)!==false);
+            $found=   (strpos(mb_convert_case($l['title'],       MB_CASE_LOWER, 'UTF-8'),$s) !== false)
+                   || (strpos(mb_convert_case($l['description'], MB_CASE_LOWER, 'UTF-8'),$s) !== false)
+                   || (strpos(mb_convert_case($l['url'],         MB_CASE_LOWER, 'UTF-8'),$s) !== false)
+                   || (strpos(mb_convert_case($l['tags'],        MB_CASE_LOWER, 'UTF-8'),$s) !== false);
             if ($found) $filtered[$l['linkdate']] = $l;
         }
         krsort($filtered);
@@ -813,12 +815,14 @@ class linkdb implements Iterator, Countable, ArrayAccess
     // e.g. print_r($mydb->filterTags('linux programming'));
     public function filterTags($tags,$casesensitive=false)
     {
-        $t = str_replace(',',' ',($casesensitive?$tags:strtolower($tags)));
+        // Same as above, we use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek)
+        // TODO: is $casesensitive ever true ?
+        $t = str_replace(',',' ',($casesensitive?$tags:mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8')));
         $searchtags=explode(' ',$t);
         $filtered=array();
         foreach($this->links as $l)
         {
-            $linktags = explode(' ',($casesensitive?$l['tags']:strtolower($l['tags'])));
+            $linktags = explode(' ',($casesensitive?$l['tags']:mb_convert_case($l['tags'], MB_CASE_LOWER, 'UTF-8')));
             if (count(array_intersect($linktags,$searchtags)) == count($searchtags))
                 $filtered[$l['linkdate']] = $l;
         }