aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorArthur <arthur@hoa.ro>2016-02-15 21:43:07 +0100
committerArthur <arthur@hoa.ro>2016-02-15 21:43:07 +0100
commitbfec695df1205864b46ca7175e1598b184602687 (patch)
tree9d64988c49fd53978c58c64bbd013a363c5b2d78
parent07c2f73543b358d39b3751c8542966794f28db03 (diff)
parent522b278b03280ed809025ebbeb3eac284b68bf81 (diff)
downloadShaarli-bfec695df1205864b46ca7175e1598b184602687.tar.gz
Shaarli-bfec695df1205864b46ca7175e1598b184602687.tar.zst
Shaarli-bfec695df1205864b46ca7175e1598b184602687.zip
Merge pull request #455 from ArthurHoaro/improved-search-454
Improved search: combine AND, exact terms and exclude search.
-rw-r--r--application/LinkFilter.php88
-rw-r--r--tests/LinkDBTest.php2
-rw-r--r--tests/LinkFilterTest.php79
-rw-r--r--tests/utils/ReferenceLinkDB.php15
4 files changed, 138 insertions, 46 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php
index ceb47d16..17594e8f 100644
--- a/application/LinkFilter.php
+++ b/application/LinkFilter.php
@@ -120,7 +120,9 @@ class LinkFilter
120 * 120 *
121 * Searches: 121 * Searches:
122 * - in the URLs, title and description; 122 * - in the URLs, title and description;
123 * - are case-insensitive. 123 * - are case-insensitive;
124 * - terms surrounded by quotes " are exact terms search.
125 * - terms starting with a dash - are excluded (except exact terms).
124 * 126 *
125 * Example: 127 * Example:
126 * print_r($mydb->filterFulltext('hollandais')); 128 * print_r($mydb->filterFulltext('hollandais'));
@@ -136,19 +138,30 @@ class LinkFilter
136 */ 138 */
137 private function filterFulltext($searchterms, $privateonly = false) 139 private function filterFulltext($searchterms, $privateonly = false)
138 { 140 {
141 $filtered = array();
139 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); 142 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
140 $explodedSearch = explode(' ', trim($search)); 143 $exactRegex = '/"([^"]+)"/';
141 $keys = array('title', 'description', 'url', 'tags'); 144 // Retrieve exact search terms.
142 $found = true; 145 preg_match_all($exactRegex, $search, $exactSearch);
143 $searchExactPhrase = false; 146 $exactSearch = array_values(array_filter($exactSearch[1]));
144 147
145 // Check if we're using double-quotes to search for the exact string 148 // Remove exact search terms to get AND terms search.
146 if ($search[0] == '"' && $search[strlen($search) - 1] == '"') { 149 $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search)));
147 $searchExactPhrase = true; 150 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
148 151
149 // Remove the double-quotes as they are not what we search for 152 // Filter excluding terms and update andSearch.
150 $search = substr($search, 1, -1); 153 $excludeSearch = array();
154 $andSearch = array();
155 foreach ($explodedSearchAnd as $needle) {
156 if ($needle[0] == '-' && strlen($needle) > 1) {
157 $excludeSearch[] = substr($needle, 1);
158 } else {
159 $andSearch[] = $needle;
160 }
151 } 161 }
162
163 $keys = array('title', 'description', 'url', 'tags');
164
152 // Iterate over every stored link. 165 // Iterate over every stored link.
153 foreach ($this->links as $link) { 166 foreach ($this->links as $link) {
154 167
@@ -157,35 +170,32 @@ class LinkFilter
157 continue; 170 continue;
158 } 171 }
159 172
160 // Iterate over searchable link fields. 173 // Concatenate link fields to search across fields.
174 // Adds a '\' separator for exact search terms.
175 $content = '';
161 foreach ($keys as $key) { 176 foreach ($keys as $key) {
162 // Be optimistic 177 $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\';
163 $found = true; 178 }
164 179
165 // FIXME: Find a better word for where you're searching in 180 // Be optimistic
166 $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); 181 $found = true;
167 182
168 // When searching for the phrase, check if it's in the haystack... 183 // First, we look for exact term search
169 if ( $searchExactPhrase && strpos($haystack, $search) !== false) { 184 for ($i = 0; $i < count($exactSearch) && $found; $i++) {
170 break; 185 $found = strpos($content, $exactSearch[$i]) !== false;
171 }
172 else {
173 // Iterate over keywords, if keyword is not found,
174 // no need to check for the others. We want all or nothing.
175 foreach($explodedSearch as $keyword) {
176 if(strpos($haystack, $keyword) === false) {
177 $found = false;
178 break;
179 }
180 }
181 }
182
183 // One of the fields of the link matches, no need to check the other.
184 if ($found) {
185 break;
186 }
187 } 186 }
188 187
188 // Iterate over keywords, if keyword is not found,
189 // no need to check for the others. We want all or nothing.
190 for ($i = 0; $i < count($andSearch) && $found; $i++) {
191 $found = strpos($content, $andSearch[$i]) !== false;
192 }
193
194 // Exclude terms.
195 for ($i = 0; $i < count($excludeSearch) && $found; $i++) {
196 $found = strpos($content, $excludeSearch[$i]) === false;
197 }
198
189 if ($found) { 199 if ($found) {
190 $filtered[$link['linkdate']] = $link; 200 $filtered[$link['linkdate']] = $link;
191 } 201 }
diff --git a/tests/LinkDBTest.php b/tests/LinkDBTest.php
index 765f771e..b6a273b3 100644
--- a/tests/LinkDBTest.php
+++ b/tests/LinkDBTest.php
@@ -278,6 +278,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase
278 'stallman' => 1, 278 'stallman' => 1,
279 'free' => 1, 279 'free' => 1,
280 '-exclude' => 1, 280 '-exclude' => 1,
281 'stuff' => 2,
281 ), 282 ),
282 self::$publicLinkDB->allTags() 283 self::$publicLinkDB->allTags()
283 ); 284 );
@@ -297,6 +298,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase
297 'w3c' => 1, 298 'w3c' => 1,
298 'css' => 1, 299 'css' => 1,
299 'Mercurial' => 1, 300 'Mercurial' => 1,
301 'stuff' => 2,
300 '-exclude' => 1, 302 '-exclude' => 1,
301 '.hidden' => 1, 303 '.hidden' => 1,
302 ), 304 ),
diff --git a/tests/LinkFilterTest.php b/tests/LinkFilterTest.php
index 164af0d4..31fd4cf4 100644
--- a/tests/LinkFilterTest.php
+++ b/tests/LinkFilterTest.php
@@ -27,7 +27,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
27 public function testFilter() 27 public function testFilter()
28 { 28 {
29 $this->assertEquals( 29 $this->assertEquals(
30 6, 30 7,
31 count(self::$linkFilter->filter('', '')) 31 count(self::$linkFilter->filter('', ''))
32 ); 32 );
33 33
@@ -165,6 +165,17 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
165 } 165 }
166 166
167 /** 167 /**
168 * Full-text search - no result found.
169 */
170 public function testFilterFullTextNoResult()
171 {
172 $this->assertEquals(
173 0,
174 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'azertyuiop'))
175 );
176 }
177
178 /**
168 * Full-text search - result from a link's URL 179 * Full-text search - result from a link's URL
169 */ 180 */
170 public function testFilterFullTextURL() 181 public function testFilterFullTextURL()
@@ -222,7 +233,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
222 ); 233 );
223 234
224 $this->assertEquals( 235 $this->assertEquals(
225 2, 236 3,
226 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software"')) 237 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software"'))
227 ); 238 );
228 } 239 }
@@ -250,12 +261,72 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
250 public function testFilterFullTextMixed() 261 public function testFilterFullTextMixed()
251 { 262 {
252 $this->assertEquals( 263 $this->assertEquals(
253 2, 264 3,
254 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free software')) 265 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free software'))
255 ); 266 );
256 } 267 }
257 268
258 /** 269 /**
270 * Full-text search - test exclusion with '-'.
271 */
272 public function testExcludeSearch()
273 {
274 $this->assertEquals(
275 1,
276 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free -gnu'))
277 );
278
279 $this->assertEquals(
280 6,
281 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '-revolution'))
282 );
283 }
284
285 /**
286 * Full-text search - test AND, exact terms and exclusion combined, across fields.
287 */
288 public function testMultiSearch()
289 {
290 $this->assertEquals(
291 2,
292 count(self::$linkFilter->filter(
293 LinkFilter::$FILTER_TEXT,
294 '"Free Software " stallman "read this" @website stuff'
295 ))
296 );
297
298 $this->assertEquals(
299 1,
300 count(self::$linkFilter->filter(
301 LinkFilter::$FILTER_TEXT,
302 '"free software " stallman "read this" -beard @website stuff'
303 ))
304 );
305 }
306
307 /**
308 * Full-text search - make sure that exact search won't work across fields.
309 */
310 public function testSearchExactTermMultiFieldsKo()
311 {
312 $this->assertEquals(
313 0,
314 count(self::$linkFilter->filter(
315 LinkFilter::$FILTER_TEXT,
316 '"designer naming"'
317 ))
318 );
319
320 $this->assertEquals(
321 0,
322 count(self::$linkFilter->filter(
323 LinkFilter::$FILTER_TEXT,
324 '"designernaming"'
325 ))
326 );
327 }
328
329 /**
259 * Tag search with exclusion. 330 * Tag search with exclusion.
260 */ 331 */
261 public function testTagFilterWithExclusion() 332 public function testTagFilterWithExclusion()
@@ -266,7 +337,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
266 ); 337 );
267 338
268 $this->assertEquals( 339 $this->assertEquals(
269 5, 340 6,
270 count(self::$linkFilter->filter(LinkFilter::$FILTER_TAG, '-free')) 341 count(self::$linkFilter->filter(LinkFilter::$FILTER_TAG, '-free'))
271 ); 342 );
272 } 343 }
diff --git a/tests/utils/ReferenceLinkDB.php b/tests/utils/ReferenceLinkDB.php
index da3e8c65..61faef05 100644
--- a/tests/utils/ReferenceLinkDB.php
+++ b/tests/utils/ReferenceLinkDB.php
@@ -14,12 +14,21 @@ class ReferenceLinkDB
14 function __construct() 14 function __construct()
15 { 15 {
16 $this->addLink( 16 $this->addLink(
17 'Free as in Freedom 2.0', 17 'Free as in Freedom 2.0 @website',
18 'https://static.fsf.org/nosvn/faif-2.0.pdf', 18 'https://static.fsf.org/nosvn/faif-2.0.pdf',
19 'Richard Stallman and the Free Software Revolution', 19 'Richard Stallman and the Free Software Revolution. Read this.',
20 0, 20 0,
21 '20150310_114633', 21 '20150310_114633',
22 'free gnu software stallman -exclude' 22 'free gnu software stallman -exclude stuff'
23 );
24
25 $this->addLink(
26 'Link title: @website',
27 'local',
28 'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.',
29 0,
30 '20150310_114651',
31 'stuff'
23 ); 32 );
24 33
25 $this->addLink( 34 $this->addLink(