diff options
author | ArthurHoaro <arthur@hoa.ro> | 2016-02-02 19:42:48 +0100 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2016-02-15 21:38:45 +0100 |
commit | 522b278b03280ed809025ebbeb3eac284b68bf81 (patch) | |
tree | 9d64988c49fd53978c58c64bbd013a363c5b2d78 | |
parent | bedd176a5406003631da42366736fd5ebae29135 (diff) | |
download | Shaarli-522b278b03280ed809025ebbeb3eac284b68bf81.tar.gz Shaarli-522b278b03280ed809025ebbeb3eac284b68bf81.tar.zst Shaarli-522b278b03280ed809025ebbeb3eac284b68bf81.zip |
Support text search across link fields.
-rw-r--r-- | application/LinkFilter.php | 48 | ||||
-rw-r--r-- | tests/LinkDBTest.php | 1 | ||||
-rw-r--r-- | tests/LinkFilterTest.php | 51 | ||||
-rw-r--r-- | tests/utils/ReferenceLinkDB.php | 8 |
4 files changed, 73 insertions, 35 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php index e2ef94ea..17594e8f 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php | |||
@@ -138,6 +138,7 @@ class LinkFilter | |||
138 | */ | 138 | */ |
139 | private function filterFulltext($searchterms, $privateonly = false) | 139 | private function filterFulltext($searchterms, $privateonly = false) |
140 | { | 140 | { |
141 | $filtered = array(); | ||
141 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); | 142 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); |
142 | $exactRegex = '/"([^"]+)"/'; | 143 | $exactRegex = '/"([^"]+)"/'; |
143 | // Retrieve exact search terms. | 144 | // Retrieve exact search terms. |
@@ -169,35 +170,32 @@ class LinkFilter | |||
169 | continue; | 170 | continue; |
170 | } | 171 | } |
171 | 172 | ||
172 | // Iterate over searchable link fields. | 173 | // Concatenate link fields to search across fields. |
174 | // Adds a '\' separator for exact search terms. | ||
175 | $content = ''; | ||
173 | foreach ($keys as $key) { | 176 | foreach ($keys as $key) { |
174 | // Be optimistic | 177 | $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; |
175 | $found = true; | 178 | } |
176 | |||
177 | $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); | ||
178 | |||
179 | // First, we look for exact term search | ||
180 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { | ||
181 | $found = strpos($haystack, $exactSearch[$i]) !== false; | ||
182 | } | ||
183 | 179 | ||
184 | // Iterate over keywords, if keyword is not found, | 180 | // Be optimistic |
185 | // no need to check for the others. We want all or nothing. | 181 | $found = true; |
186 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | ||
187 | $found = strpos($haystack, $andSearch[$i]) !== false; | ||
188 | } | ||
189 | 182 | ||
190 | // Exclude terms. | 183 | // First, we look for exact term search |
191 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | 184 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { |
192 | $found = strpos($haystack, $excludeSearch[$i]) === false; | 185 | $found = strpos($content, $exactSearch[$i]) !== false; |
193 | } | 186 | } |
194 | 187 | ||
195 | // One of the fields of the link matches, no need to check the other. | 188 | // Iterate over keywords, if keyword is not found, |
196 | if ($found) { | 189 | // no need to check for the others. We want all or nothing. |
197 | break; | 190 | for ($i = 0; $i < count($andSearch) && $found; $i++) { |
198 | } | 191 | $found = strpos($content, $andSearch[$i]) !== false; |
199 | } | 192 | } |
200 | 193 | ||
194 | // Exclude terms. | ||
195 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | ||
196 | $found = strpos($content, $excludeSearch[$i]) === false; | ||
197 | } | ||
198 | |||
201 | if ($found) { | 199 | if ($found) { |
202 | $filtered[$link['linkdate']] = $link; | 200 | $filtered[$link['linkdate']] = $link; |
203 | } | 201 | } |
diff --git a/tests/LinkDBTest.php b/tests/LinkDBTest.php index 78f42e56..b6a273b3 100644 --- a/tests/LinkDBTest.php +++ b/tests/LinkDBTest.php | |||
@@ -298,6 +298,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase | |||
298 | 'w3c' => 1, | 298 | 'w3c' => 1, |
299 | 'css' => 1, | 299 | 'css' => 1, |
300 | 'Mercurial' => 1, | 300 | 'Mercurial' => 1, |
301 | 'stuff' => 2, | ||
301 | '-exclude' => 1, | 302 | '-exclude' => 1, |
302 | '.hidden' => 1, | 303 | '.hidden' => 1, |
303 | ), | 304 | ), |
diff --git a/tests/LinkFilterTest.php b/tests/LinkFilterTest.php index 4d754d25..31fd4cf4 100644 --- a/tests/LinkFilterTest.php +++ b/tests/LinkFilterTest.php | |||
@@ -165,6 +165,17 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
165 | } | 165 | } |
166 | 166 | ||
167 | /** | 167 | /** |
168 | * Full-text search - no result found. | ||
169 | */ | ||
170 | public function testFilterFullTextNoResult() | ||
171 | { | ||
172 | $this->assertEquals( | ||
173 | 0, | ||
174 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'azertyuiop')) | ||
175 | ); | ||
176 | } | ||
177 | |||
178 | /** | ||
168 | * Full-text search - result from a link's URL | 179 | * Full-text search - result from a link's URL |
169 | */ | 180 | */ |
170 | public function testFilterFullTextURL() | 181 | public function testFilterFullTextURL() |
@@ -262,28 +273,56 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
262 | { | 273 | { |
263 | $this->assertEquals( | 274 | $this->assertEquals( |
264 | 1, | 275 | 1, |
265 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free -software')) | 276 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free -gnu')) |
266 | ); | 277 | ); |
267 | 278 | ||
268 | $this->assertEquals( | 279 | $this->assertEquals( |
269 | 7, | 280 | 6, |
270 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '-software')) | 281 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '-revolution')) |
271 | ); | 282 | ); |
272 | } | 283 | } |
273 | 284 | ||
274 | /** | 285 | /** |
275 | * Full-text search - test AND, exact terms and exclusion combined. | 286 | * Full-text search - test AND, exact terms and exclusion combined, across fields. |
276 | */ | 287 | */ |
277 | public function testMultiSearch() | 288 | public function testMultiSearch() |
278 | { | 289 | { |
279 | $this->assertEquals( | 290 | $this->assertEquals( |
280 | 2, | 291 | 2, |
281 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"Free Software " stallman "read this"')) | 292 | count(self::$linkFilter->filter( |
293 | LinkFilter::$FILTER_TEXT, | ||
294 | '"Free Software " stallman "read this" @website stuff' | ||
295 | )) | ||
282 | ); | 296 | ); |
283 | 297 | ||
284 | $this->assertEquals( | 298 | $this->assertEquals( |
285 | 1, | 299 | 1, |
286 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software " stallman "read this" -beard')) | 300 | count(self::$linkFilter->filter( |
301 | LinkFilter::$FILTER_TEXT, | ||
302 | '"free software " stallman "read this" -beard @website stuff' | ||
303 | )) | ||
304 | ); | ||
305 | } | ||
306 | |||
307 | /** | ||
308 | * Full-text search - make sure that exact search won't work across fields. | ||
309 | */ | ||
310 | public function testSearchExactTermMultiFieldsKo() | ||
311 | { | ||
312 | $this->assertEquals( | ||
313 | 0, | ||
314 | count(self::$linkFilter->filter( | ||
315 | LinkFilter::$FILTER_TEXT, | ||
316 | '"designer naming"' | ||
317 | )) | ||
318 | ); | ||
319 | |||
320 | $this->assertEquals( | ||
321 | 0, | ||
322 | count(self::$linkFilter->filter( | ||
323 | LinkFilter::$FILTER_TEXT, | ||
324 | '"designernaming"' | ||
325 | )) | ||
287 | ); | 326 | ); |
288 | } | 327 | } |
289 | 328 | ||
diff --git a/tests/utils/ReferenceLinkDB.php b/tests/utils/ReferenceLinkDB.php index b64b58bf..61faef05 100644 --- a/tests/utils/ReferenceLinkDB.php +++ b/tests/utils/ReferenceLinkDB.php | |||
@@ -14,21 +14,21 @@ class ReferenceLinkDB | |||
14 | function __construct() | 14 | function __construct() |
15 | { | 15 | { |
16 | $this->addLink( | 16 | $this->addLink( |
17 | 'Free as in Freedom 2.0', | 17 | 'Free as in Freedom 2.0 @website', |
18 | 'https://static.fsf.org/nosvn/faif-2.0.pdf', | 18 | 'https://static.fsf.org/nosvn/faif-2.0.pdf', |
19 | 'Richard Stallman and the Free Software Revolution. Read this.', | 19 | 'Richard Stallman and the Free Software Revolution. Read this.', |
20 | 0, | 20 | 0, |
21 | '20150310_114633', | 21 | '20150310_114633', |
22 | 'free gnu software stallman -exclude' | 22 | 'free gnu software stallman -exclude stuff' |
23 | ); | 23 | ); |
24 | 24 | ||
25 | $this->addLink( | 25 | $this->addLink( |
26 | 'Note:', | 26 | 'Link title: @website', |
27 | 'local', | 27 | 'local', |
28 | 'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.', | 28 | 'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.', |
29 | 0, | 29 | 0, |
30 | '20150310_114651', | 30 | '20150310_114651', |
31 | '' | 31 | 'stuff' |
32 | ); | 32 | ); |
33 | 33 | ||
34 | $this->addLink( | 34 | $this->addLink( |