aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2016-02-01 20:33:58 +0100
committerArthurHoaro <arthur@hoa.ro>2016-02-15 21:38:40 +0100
commitbedd176a5406003631da42366736fd5ebae29135 (patch)
treeb7577d5f5fcb4534d293a4a6c59d84e8b0f48d42
parent07c2f73543b358d39b3751c8542966794f28db03 (diff)
downloadShaarli-bedd176a5406003631da42366736fd5ebae29135.tar.gz
Shaarli-bedd176a5406003631da42366736fd5ebae29135.tar.zst
Shaarli-bedd176a5406003631da42366736fd5ebae29135.zip
Improved search: combine AND, exact terms and exclude search.
-rw-r--r--application/LinkFilter.php60
-rw-r--r--tests/LinkDBTest.php1
-rw-r--r--tests/LinkFilterTest.php40
-rw-r--r--tests/utils/ReferenceLinkDB.php11
4 files changed, 83 insertions, 29 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php
index ceb47d16..e2ef94ea 100644
--- a/application/LinkFilter.php
+++ b/application/LinkFilter.php
@@ -120,7 +120,9 @@ class LinkFilter
120 * 120 *
121 * Searches: 121 * Searches:
122 * - in the URLs, title and description; 122 * - in the URLs, title and description;
123 * - are case-insensitive. 123 * - are case-insensitive;
124 * - terms surrounded by quotes " are exact terms search.
125 * - terms starting with a dash - are excluded (except exact terms).
124 * 126 *
125 * Example: 127 * Example:
126 * print_r($mydb->filterFulltext('hollandais')); 128 * print_r($mydb->filterFulltext('hollandais'));
@@ -137,18 +139,28 @@ class LinkFilter
137 private function filterFulltext($searchterms, $privateonly = false) 139 private function filterFulltext($searchterms, $privateonly = false)
138 { 140 {
139 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); 141 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
140 $explodedSearch = explode(' ', trim($search)); 142 $exactRegex = '/"([^"]+)"/';
141 $keys = array('title', 'description', 'url', 'tags'); 143 // Retrieve exact search terms.
142 $found = true; 144 preg_match_all($exactRegex, $search, $exactSearch);
143 $searchExactPhrase = false; 145 $exactSearch = array_values(array_filter($exactSearch[1]));
144 146
145 // Check if we're using double-quotes to search for the exact string 147 // Remove exact search terms to get AND terms search.
146 if ($search[0] == '"' && $search[strlen($search) - 1] == '"') { 148 $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search)));
147 $searchExactPhrase = true; 149 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
148 150
149 // Remove the double-quotes as they are not what we search for 151 // Filter excluding terms and update andSearch.
150 $search = substr($search, 1, -1); 152 $excludeSearch = array();
153 $andSearch = array();
154 foreach ($explodedSearchAnd as $needle) {
155 if ($needle[0] == '-' && strlen($needle) > 1) {
156 $excludeSearch[] = substr($needle, 1);
157 } else {
158 $andSearch[] = $needle;
159 }
151 } 160 }
161
162 $keys = array('title', 'description', 'url', 'tags');
163
152 // Iterate over every stored link. 164 // Iterate over every stored link.
153 foreach ($this->links as $link) { 165 foreach ($this->links as $link) {
154 166
@@ -162,22 +174,22 @@ class LinkFilter
162 // Be optimistic 174 // Be optimistic
163 $found = true; 175 $found = true;
164 176
165 // FIXME: Find a better word for where you're searching in
166 $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); 177 $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8');
167 178
168 // When searching for the phrase, check if it's in the haystack... 179 // First, we look for exact term search
169 if ( $searchExactPhrase && strpos($haystack, $search) !== false) { 180 for ($i = 0; $i < count($exactSearch) && $found; $i++) {
170 break; 181 $found = strpos($haystack, $exactSearch[$i]) !== false;
171 } 182 }
172 else { 183
173 // Iterate over keywords, if keyword is not found, 184 // Iterate over keywords, if keyword is not found,
174 // no need to check for the others. We want all or nothing. 185 // no need to check for the others. We want all or nothing.
175 foreach($explodedSearch as $keyword) { 186 for ($i = 0; $i < count($andSearch) && $found; $i++) {
176 if(strpos($haystack, $keyword) === false) { 187 $found = strpos($haystack, $andSearch[$i]) !== false;
177 $found = false; 188 }
178 break; 189
179 } 190 // Exclude terms.
180 } 191 for ($i = 0; $i < count($excludeSearch) && $found; $i++) {
192 $found = strpos($haystack, $excludeSearch[$i]) === false;
181 } 193 }
182 194
183 // One of the fields of the link matches, no need to check the other. 195 // One of the fields of the link matches, no need to check the other.
diff --git a/tests/LinkDBTest.php b/tests/LinkDBTest.php
index 765f771e..78f42e56 100644
--- a/tests/LinkDBTest.php
+++ b/tests/LinkDBTest.php
@@ -278,6 +278,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase
278 'stallman' => 1, 278 'stallman' => 1,
279 'free' => 1, 279 'free' => 1,
280 '-exclude' => 1, 280 '-exclude' => 1,
281 'stuff' => 2,
281 ), 282 ),
282 self::$publicLinkDB->allTags() 283 self::$publicLinkDB->allTags()
283 ); 284 );
diff --git a/tests/LinkFilterTest.php b/tests/LinkFilterTest.php
index 164af0d4..4d754d25 100644
--- a/tests/LinkFilterTest.php
+++ b/tests/LinkFilterTest.php
@@ -27,7 +27,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
27 public function testFilter() 27 public function testFilter()
28 { 28 {
29 $this->assertEquals( 29 $this->assertEquals(
30 6, 30 7,
31 count(self::$linkFilter->filter('', '')) 31 count(self::$linkFilter->filter('', ''))
32 ); 32 );
33 33
@@ -222,7 +222,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
222 ); 222 );
223 223
224 $this->assertEquals( 224 $this->assertEquals(
225 2, 225 3,
226 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software"')) 226 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software"'))
227 ); 227 );
228 } 228 }
@@ -250,12 +250,44 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
250 public function testFilterFullTextMixed() 250 public function testFilterFullTextMixed()
251 { 251 {
252 $this->assertEquals( 252 $this->assertEquals(
253 2, 253 3,
254 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free software')) 254 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free software'))
255 ); 255 );
256 } 256 }
257 257
258 /** 258 /**
259 * Full-text search - test exclusion with '-'.
260 */
261 public function testExcludeSearch()
262 {
263 $this->assertEquals(
264 1,
265 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free -software'))
266 );
267
268 $this->assertEquals(
269 7,
270 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '-software'))
271 );
272 }
273
274 /**
275 * Full-text search - test AND, exact terms and exclusion combined.
276 */
277 public function testMultiSearch()
278 {
279 $this->assertEquals(
280 2,
281 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"Free Software " stallman "read this"'))
282 );
283
284 $this->assertEquals(
285 1,
286 count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software " stallman "read this" -beard'))
287 );
288 }
289
290 /**
259 * Tag search with exclusion. 291 * Tag search with exclusion.
260 */ 292 */
261 public function testTagFilterWithExclusion() 293 public function testTagFilterWithExclusion()
@@ -266,7 +298,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
266 ); 298 );
267 299
268 $this->assertEquals( 300 $this->assertEquals(
269 5, 301 6,
270 count(self::$linkFilter->filter(LinkFilter::$FILTER_TAG, '-free')) 302 count(self::$linkFilter->filter(LinkFilter::$FILTER_TAG, '-free'))
271 ); 303 );
272 } 304 }
diff --git a/tests/utils/ReferenceLinkDB.php b/tests/utils/ReferenceLinkDB.php
index da3e8c65..b64b58bf 100644
--- a/tests/utils/ReferenceLinkDB.php
+++ b/tests/utils/ReferenceLinkDB.php
@@ -16,13 +16,22 @@ class ReferenceLinkDB
16 $this->addLink( 16 $this->addLink(
17 'Free as in Freedom 2.0', 17 'Free as in Freedom 2.0',
18 'https://static.fsf.org/nosvn/faif-2.0.pdf', 18 'https://static.fsf.org/nosvn/faif-2.0.pdf',
19 'Richard Stallman and the Free Software Revolution', 19 'Richard Stallman and the Free Software Revolution. Read this.',
20 0, 20 0,
21 '20150310_114633', 21 '20150310_114633',
22 'free gnu software stallman -exclude' 22 'free gnu software stallman -exclude'
23 ); 23 );
24 24
25 $this->addLink( 25 $this->addLink(
26 'Note:',
27 'local',
28 'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.',
29 0,
30 '20150310_114651',
31 ''
32 );
33
34 $this->addLink(
26 'MediaGoblin', 35 'MediaGoblin',
27 'http://mediagoblin.org/', 36 'http://mediagoblin.org/',
28 'A free software media publishing platform', 37 'A free software media publishing platform',