diff options
author | Arthur <arthur@hoa.ro> | 2016-02-15 21:43:07 +0100 |
---|---|---|
committer | Arthur <arthur@hoa.ro> | 2016-02-15 21:43:07 +0100 |
commit | bfec695df1205864b46ca7175e1598b184602687 (patch) | |
tree | 9d64988c49fd53978c58c64bbd013a363c5b2d78 | |
parent | 07c2f73543b358d39b3751c8542966794f28db03 (diff) | |
parent | 522b278b03280ed809025ebbeb3eac284b68bf81 (diff) | |
download | Shaarli-bfec695df1205864b46ca7175e1598b184602687.tar.gz Shaarli-bfec695df1205864b46ca7175e1598b184602687.tar.zst Shaarli-bfec695df1205864b46ca7175e1598b184602687.zip |
Merge pull request #455 from ArthurHoaro/improved-search-454
Improved search: combine AND, exact terms and exclude search.
-rw-r--r-- | application/LinkFilter.php | 88 | ||||
-rw-r--r-- | tests/LinkDBTest.php | 2 | ||||
-rw-r--r-- | tests/LinkFilterTest.php | 79 | ||||
-rw-r--r-- | tests/utils/ReferenceLinkDB.php | 15 |
4 files changed, 138 insertions, 46 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php index ceb47d16..17594e8f 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php | |||
@@ -120,7 +120,9 @@ class LinkFilter | |||
120 | * | 120 | * |
121 | * Searches: | 121 | * Searches: |
122 | * - in the URLs, title and description; | 122 | * - in the URLs, title and description; |
123 | * - are case-insensitive. | 123 | * - are case-insensitive; |
124 | * - terms surrounded by quotes " are exact terms search. | ||
125 | * - terms starting with a dash - are excluded (except exact terms). | ||
124 | * | 126 | * |
125 | * Example: | 127 | * Example: |
126 | * print_r($mydb->filterFulltext('hollandais')); | 128 | * print_r($mydb->filterFulltext('hollandais')); |
@@ -136,19 +138,30 @@ class LinkFilter | |||
136 | */ | 138 | */ |
137 | private function filterFulltext($searchterms, $privateonly = false) | 139 | private function filterFulltext($searchterms, $privateonly = false) |
138 | { | 140 | { |
141 | $filtered = array(); | ||
139 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); | 142 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); |
140 | $explodedSearch = explode(' ', trim($search)); | 143 | $exactRegex = '/"([^"]+)"/'; |
141 | $keys = array('title', 'description', 'url', 'tags'); | 144 | // Retrieve exact search terms. |
142 | $found = true; | 145 | preg_match_all($exactRegex, $search, $exactSearch); |
143 | $searchExactPhrase = false; | 146 | $exactSearch = array_values(array_filter($exactSearch[1])); |
144 | 147 | ||
145 | // Check if we're using double-quotes to search for the exact string | 148 | // Remove exact search terms to get AND terms search. |
146 | if ($search[0] == '"' && $search[strlen($search) - 1] == '"') { | 149 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); |
147 | $searchExactPhrase = true; | 150 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); |
148 | 151 | ||
149 | // Remove the double-quotes as they are not what we search for | 152 | // Filter excluding terms and update andSearch. |
150 | $search = substr($search, 1, -1); | 153 | $excludeSearch = array(); |
154 | $andSearch = array(); | ||
155 | foreach ($explodedSearchAnd as $needle) { | ||
156 | if ($needle[0] == '-' && strlen($needle) > 1) { | ||
157 | $excludeSearch[] = substr($needle, 1); | ||
158 | } else { | ||
159 | $andSearch[] = $needle; | ||
160 | } | ||
151 | } | 161 | } |
162 | |||
163 | $keys = array('title', 'description', 'url', 'tags'); | ||
164 | |||
152 | // Iterate over every stored link. | 165 | // Iterate over every stored link. |
153 | foreach ($this->links as $link) { | 166 | foreach ($this->links as $link) { |
154 | 167 | ||
@@ -157,35 +170,32 @@ class LinkFilter | |||
157 | continue; | 170 | continue; |
158 | } | 171 | } |
159 | 172 | ||
160 | // Iterate over searchable link fields. | 173 | // Concatenate link fields to search across fields. |
174 | // Adds a '\' separator for exact search terms. | ||
175 | $content = ''; | ||
161 | foreach ($keys as $key) { | 176 | foreach ($keys as $key) { |
162 | // Be optimistic | 177 | $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; |
163 | $found = true; | 178 | } |
164 | 179 | ||
165 | // FIXME: Find a better word for where you're searching in | 180 | // Be optimistic |
166 | $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); | 181 | $found = true; |
167 | 182 | ||
168 | // When searching for the phrase, check if it's in the haystack... | 183 | // First, we look for exact term search |
169 | if ( $searchExactPhrase && strpos($haystack, $search) !== false) { | 184 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { |
170 | break; | 185 | $found = strpos($content, $exactSearch[$i]) !== false; |
171 | } | ||
172 | else { | ||
173 | // Iterate over keywords, if keyword is not found, | ||
174 | // no need to check for the others. We want all or nothing. | ||
175 | foreach($explodedSearch as $keyword) { | ||
176 | if(strpos($haystack, $keyword) === false) { | ||
177 | $found = false; | ||
178 | break; | ||
179 | } | ||
180 | } | ||
181 | } | ||
182 | |||
183 | // One of the fields of the link matches, no need to check the other. | ||
184 | if ($found) { | ||
185 | break; | ||
186 | } | ||
187 | } | 186 | } |
188 | 187 | ||
188 | // Iterate over keywords, if keyword is not found, | ||
189 | // no need to check for the others. We want all or nothing. | ||
190 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | ||
191 | $found = strpos($content, $andSearch[$i]) !== false; | ||
192 | } | ||
193 | |||
194 | // Exclude terms. | ||
195 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | ||
196 | $found = strpos($content, $excludeSearch[$i]) === false; | ||
197 | } | ||
198 | |||
189 | if ($found) { | 199 | if ($found) { |
190 | $filtered[$link['linkdate']] = $link; | 200 | $filtered[$link['linkdate']] = $link; |
191 | } | 201 | } |
diff --git a/tests/LinkDBTest.php b/tests/LinkDBTest.php index 765f771e..b6a273b3 100644 --- a/tests/LinkDBTest.php +++ b/tests/LinkDBTest.php | |||
@@ -278,6 +278,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase | |||
278 | 'stallman' => 1, | 278 | 'stallman' => 1, |
279 | 'free' => 1, | 279 | 'free' => 1, |
280 | '-exclude' => 1, | 280 | '-exclude' => 1, |
281 | 'stuff' => 2, | ||
281 | ), | 282 | ), |
282 | self::$publicLinkDB->allTags() | 283 | self::$publicLinkDB->allTags() |
283 | ); | 284 | ); |
@@ -297,6 +298,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase | |||
297 | 'w3c' => 1, | 298 | 'w3c' => 1, |
298 | 'css' => 1, | 299 | 'css' => 1, |
299 | 'Mercurial' => 1, | 300 | 'Mercurial' => 1, |
301 | 'stuff' => 2, | ||
300 | '-exclude' => 1, | 302 | '-exclude' => 1, |
301 | '.hidden' => 1, | 303 | '.hidden' => 1, |
302 | ), | 304 | ), |
diff --git a/tests/LinkFilterTest.php b/tests/LinkFilterTest.php index 164af0d4..31fd4cf4 100644 --- a/tests/LinkFilterTest.php +++ b/tests/LinkFilterTest.php | |||
@@ -27,7 +27,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
27 | public function testFilter() | 27 | public function testFilter() |
28 | { | 28 | { |
29 | $this->assertEquals( | 29 | $this->assertEquals( |
30 | 6, | 30 | 7, |
31 | count(self::$linkFilter->filter('', '')) | 31 | count(self::$linkFilter->filter('', '')) |
32 | ); | 32 | ); |
33 | 33 | ||
@@ -165,6 +165,17 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
165 | } | 165 | } |
166 | 166 | ||
167 | /** | 167 | /** |
168 | * Full-text search - no result found. | ||
169 | */ | ||
170 | public function testFilterFullTextNoResult() | ||
171 | { | ||
172 | $this->assertEquals( | ||
173 | 0, | ||
174 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'azertyuiop')) | ||
175 | ); | ||
176 | } | ||
177 | |||
178 | /** | ||
168 | * Full-text search - result from a link's URL | 179 | * Full-text search - result from a link's URL |
169 | */ | 180 | */ |
170 | public function testFilterFullTextURL() | 181 | public function testFilterFullTextURL() |
@@ -222,7 +233,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
222 | ); | 233 | ); |
223 | 234 | ||
224 | $this->assertEquals( | 235 | $this->assertEquals( |
225 | 2, | 236 | 3, |
226 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software"')) | 237 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software"')) |
227 | ); | 238 | ); |
228 | } | 239 | } |
@@ -250,12 +261,72 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
250 | public function testFilterFullTextMixed() | 261 | public function testFilterFullTextMixed() |
251 | { | 262 | { |
252 | $this->assertEquals( | 263 | $this->assertEquals( |
253 | 2, | 264 | 3, |
254 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free software')) | 265 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free software')) |
255 | ); | 266 | ); |
256 | } | 267 | } |
257 | 268 | ||
258 | /** | 269 | /** |
270 | * Full-text search - test exclusion with '-'. | ||
271 | */ | ||
272 | public function testExcludeSearch() | ||
273 | { | ||
274 | $this->assertEquals( | ||
275 | 1, | ||
276 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free -gnu')) | ||
277 | ); | ||
278 | |||
279 | $this->assertEquals( | ||
280 | 6, | ||
281 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '-revolution')) | ||
282 | ); | ||
283 | } | ||
284 | |||
285 | /** | ||
286 | * Full-text search - test AND, exact terms and exclusion combined, across fields. | ||
287 | */ | ||
288 | public function testMultiSearch() | ||
289 | { | ||
290 | $this->assertEquals( | ||
291 | 2, | ||
292 | count(self::$linkFilter->filter( | ||
293 | LinkFilter::$FILTER_TEXT, | ||
294 | '"Free Software " stallman "read this" @website stuff' | ||
295 | )) | ||
296 | ); | ||
297 | |||
298 | $this->assertEquals( | ||
299 | 1, | ||
300 | count(self::$linkFilter->filter( | ||
301 | LinkFilter::$FILTER_TEXT, | ||
302 | '"free software " stallman "read this" -beard @website stuff' | ||
303 | )) | ||
304 | ); | ||
305 | } | ||
306 | |||
307 | /** | ||
308 | * Full-text search - make sure that exact search won't work across fields. | ||
309 | */ | ||
310 | public function testSearchExactTermMultiFieldsKo() | ||
311 | { | ||
312 | $this->assertEquals( | ||
313 | 0, | ||
314 | count(self::$linkFilter->filter( | ||
315 | LinkFilter::$FILTER_TEXT, | ||
316 | '"designer naming"' | ||
317 | )) | ||
318 | ); | ||
319 | |||
320 | $this->assertEquals( | ||
321 | 0, | ||
322 | count(self::$linkFilter->filter( | ||
323 | LinkFilter::$FILTER_TEXT, | ||
324 | '"designernaming"' | ||
325 | )) | ||
326 | ); | ||
327 | } | ||
328 | |||
329 | /** | ||
259 | * Tag search with exclusion. | 330 | * Tag search with exclusion. |
260 | */ | 331 | */ |
261 | public function testTagFilterWithExclusion() | 332 | public function testTagFilterWithExclusion() |
@@ -266,7 +337,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
266 | ); | 337 | ); |
267 | 338 | ||
268 | $this->assertEquals( | 339 | $this->assertEquals( |
269 | 5, | 340 | 6, |
270 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TAG, '-free')) | 341 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TAG, '-free')) |
271 | ); | 342 | ); |
272 | } | 343 | } |
diff --git a/tests/utils/ReferenceLinkDB.php b/tests/utils/ReferenceLinkDB.php index da3e8c65..61faef05 100644 --- a/tests/utils/ReferenceLinkDB.php +++ b/tests/utils/ReferenceLinkDB.php | |||
@@ -14,12 +14,21 @@ class ReferenceLinkDB | |||
14 | function __construct() | 14 | function __construct() |
15 | { | 15 | { |
16 | $this->addLink( | 16 | $this->addLink( |
17 | 'Free as in Freedom 2.0', | 17 | 'Free as in Freedom 2.0 @website', |
18 | 'https://static.fsf.org/nosvn/faif-2.0.pdf', | 18 | 'https://static.fsf.org/nosvn/faif-2.0.pdf', |
19 | 'Richard Stallman and the Free Software Revolution', | 19 | 'Richard Stallman and the Free Software Revolution. Read this.', |
20 | 0, | 20 | 0, |
21 | '20150310_114633', | 21 | '20150310_114633', |
22 | 'free gnu software stallman -exclude' | 22 | 'free gnu software stallman -exclude stuff' |
23 | ); | ||
24 | |||
25 | $this->addLink( | ||
26 | 'Link title: @website', | ||
27 | 'local', | ||
28 | 'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.', | ||
29 | 0, | ||
30 | '20150310_114651', | ||
31 | 'stuff' | ||
23 | ); | 32 | ); |
24 | 33 | ||
25 | $this->addLink( | 34 | $this->addLink( |