diff options
author | ArthurHoaro <arthur@hoa.ro> | 2016-02-01 20:33:58 +0100 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2016-02-15 21:38:40 +0100 |
commit | bedd176a5406003631da42366736fd5ebae29135 (patch) | |
tree | b7577d5f5fcb4534d293a4a6c59d84e8b0f48d42 | |
parent | 07c2f73543b358d39b3751c8542966794f28db03 (diff) | |
download | Shaarli-bedd176a5406003631da42366736fd5ebae29135.tar.gz Shaarli-bedd176a5406003631da42366736fd5ebae29135.tar.zst Shaarli-bedd176a5406003631da42366736fd5ebae29135.zip |
Improved search: combine AND, exact terms and exclude search.
-rw-r--r-- | application/LinkFilter.php | 60 | ||||
-rw-r--r-- | tests/LinkDBTest.php | 1 | ||||
-rw-r--r-- | tests/LinkFilterTest.php | 40 | ||||
-rw-r--r-- | tests/utils/ReferenceLinkDB.php | 11 |
4 files changed, 83 insertions, 29 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php index ceb47d16..e2ef94ea 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php | |||
@@ -120,7 +120,9 @@ class LinkFilter | |||
120 | * | 120 | * |
121 | * Searches: | 121 | * Searches: |
122 | * - in the URLs, title and description; | 122 | * - in the URLs, title and description; |
123 | * - are case-insensitive. | 123 | * - are case-insensitive; |
124 | * - terms surrounded by quotes " are exact terms search. | ||
125 | * - terms starting with a dash - are excluded (except exact terms). | ||
124 | * | 126 | * |
125 | * Example: | 127 | * Example: |
126 | * print_r($mydb->filterFulltext('hollandais')); | 128 | * print_r($mydb->filterFulltext('hollandais')); |
@@ -137,18 +139,28 @@ class LinkFilter | |||
137 | private function filterFulltext($searchterms, $privateonly = false) | 139 | private function filterFulltext($searchterms, $privateonly = false) |
138 | { | 140 | { |
139 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); | 141 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); |
140 | $explodedSearch = explode(' ', trim($search)); | 142 | $exactRegex = '/"([^"]+)"/'; |
141 | $keys = array('title', 'description', 'url', 'tags'); | 143 | // Retrieve exact search terms. |
142 | $found = true; | 144 | preg_match_all($exactRegex, $search, $exactSearch); |
143 | $searchExactPhrase = false; | 145 | $exactSearch = array_values(array_filter($exactSearch[1])); |
144 | 146 | ||
145 | // Check if we're using double-quotes to search for the exact string | 147 | // Remove exact search terms to get AND terms search. |
146 | if ($search[0] == '"' && $search[strlen($search) - 1] == '"') { | 148 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); |
147 | $searchExactPhrase = true; | 149 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); |
148 | 150 | ||
149 | // Remove the double-quotes as they are not what we search for | 151 | // Filter excluding terms and update andSearch. |
150 | $search = substr($search, 1, -1); | 152 | $excludeSearch = array(); |
153 | $andSearch = array(); | ||
154 | foreach ($explodedSearchAnd as $needle) { | ||
155 | if ($needle[0] == '-' && strlen($needle) > 1) { | ||
156 | $excludeSearch[] = substr($needle, 1); | ||
157 | } else { | ||
158 | $andSearch[] = $needle; | ||
159 | } | ||
151 | } | 160 | } |
161 | |||
162 | $keys = array('title', 'description', 'url', 'tags'); | ||
163 | |||
152 | // Iterate over every stored link. | 164 | // Iterate over every stored link. |
153 | foreach ($this->links as $link) { | 165 | foreach ($this->links as $link) { |
154 | 166 | ||
@@ -162,22 +174,22 @@ class LinkFilter | |||
162 | // Be optimistic | 174 | // Be optimistic |
163 | $found = true; | 175 | $found = true; |
164 | 176 | ||
165 | // FIXME: Find a better word for where you're searching in | ||
166 | $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); | 177 | $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); |
167 | 178 | ||
168 | // When searching for the phrase, check if it's in the haystack... | 179 | // First, we look for exact term search |
169 | if ( $searchExactPhrase && strpos($haystack, $search) !== false) { | 180 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { |
170 | break; | 181 | $found = strpos($haystack, $exactSearch[$i]) !== false; |
171 | } | 182 | } |
172 | else { | 183 | |
173 | // Iterate over keywords, if keyword is not found, | 184 | // Iterate over keywords, if keyword is not found, |
174 | // no need to check for the others. We want all or nothing. | 185 | // no need to check for the others. We want all or nothing. |
175 | foreach($explodedSearch as $keyword) { | 186 | for ($i = 0; $i < count($andSearch) && $found; $i++) { |
176 | if(strpos($haystack, $keyword) === false) { | 187 | $found = strpos($haystack, $andSearch[$i]) !== false; |
177 | $found = false; | 188 | } |
178 | break; | 189 | |
179 | } | 190 | // Exclude terms. |
180 | } | 191 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { |
192 | $found = strpos($haystack, $excludeSearch[$i]) === false; | ||
181 | } | 193 | } |
182 | 194 | ||
183 | // One of the fields of the link matches, no need to check the other. | 195 | // One of the fields of the link matches, no need to check the other. |
diff --git a/tests/LinkDBTest.php b/tests/LinkDBTest.php index 765f771e..78f42e56 100644 --- a/tests/LinkDBTest.php +++ b/tests/LinkDBTest.php | |||
@@ -278,6 +278,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase | |||
278 | 'stallman' => 1, | 278 | 'stallman' => 1, |
279 | 'free' => 1, | 279 | 'free' => 1, |
280 | '-exclude' => 1, | 280 | '-exclude' => 1, |
281 | 'stuff' => 2, | ||
281 | ), | 282 | ), |
282 | self::$publicLinkDB->allTags() | 283 | self::$publicLinkDB->allTags() |
283 | ); | 284 | ); |
diff --git a/tests/LinkFilterTest.php b/tests/LinkFilterTest.php index 164af0d4..4d754d25 100644 --- a/tests/LinkFilterTest.php +++ b/tests/LinkFilterTest.php | |||
@@ -27,7 +27,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
27 | public function testFilter() | 27 | public function testFilter() |
28 | { | 28 | { |
29 | $this->assertEquals( | 29 | $this->assertEquals( |
30 | 6, | 30 | 7, |
31 | count(self::$linkFilter->filter('', '')) | 31 | count(self::$linkFilter->filter('', '')) |
32 | ); | 32 | ); |
33 | 33 | ||
@@ -222,7 +222,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
222 | ); | 222 | ); |
223 | 223 | ||
224 | $this->assertEquals( | 224 | $this->assertEquals( |
225 | 2, | 225 | 3, |
226 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software"')) | 226 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software"')) |
227 | ); | 227 | ); |
228 | } | 228 | } |
@@ -250,12 +250,44 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
250 | public function testFilterFullTextMixed() | 250 | public function testFilterFullTextMixed() |
251 | { | 251 | { |
252 | $this->assertEquals( | 252 | $this->assertEquals( |
253 | 2, | 253 | 3, |
254 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free software')) | 254 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free software')) |
255 | ); | 255 | ); |
256 | } | 256 | } |
257 | 257 | ||
258 | /** | 258 | /** |
259 | * Full-text search - test exclusion with '-'. | ||
260 | */ | ||
261 | public function testExcludeSearch() | ||
262 | { | ||
263 | $this->assertEquals( | ||
264 | 1, | ||
265 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free -software')) | ||
266 | ); | ||
267 | |||
268 | $this->assertEquals( | ||
269 | 7, | ||
270 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '-software')) | ||
271 | ); | ||
272 | } | ||
273 | |||
274 | /** | ||
275 | * Full-text search - test AND, exact terms and exclusion combined. | ||
276 | */ | ||
277 | public function testMultiSearch() | ||
278 | { | ||
279 | $this->assertEquals( | ||
280 | 2, | ||
281 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"Free Software " stallman "read this"')) | ||
282 | ); | ||
283 | |||
284 | $this->assertEquals( | ||
285 | 1, | ||
286 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software " stallman "read this" -beard')) | ||
287 | ); | ||
288 | } | ||
289 | |||
290 | /** | ||
259 | * Tag search with exclusion. | 291 | * Tag search with exclusion. |
260 | */ | 292 | */ |
261 | public function testTagFilterWithExclusion() | 293 | public function testTagFilterWithExclusion() |
@@ -266,7 +298,7 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase | |||
266 | ); | 298 | ); |
267 | 299 | ||
268 | $this->assertEquals( | 300 | $this->assertEquals( |
269 | 5, | 301 | 6, |
270 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TAG, '-free')) | 302 | count(self::$linkFilter->filter(LinkFilter::$FILTER_TAG, '-free')) |
271 | ); | 303 | ); |
272 | } | 304 | } |
diff --git a/tests/utils/ReferenceLinkDB.php b/tests/utils/ReferenceLinkDB.php index da3e8c65..b64b58bf 100644 --- a/tests/utils/ReferenceLinkDB.php +++ b/tests/utils/ReferenceLinkDB.php | |||
@@ -16,13 +16,22 @@ class ReferenceLinkDB | |||
16 | $this->addLink( | 16 | $this->addLink( |
17 | 'Free as in Freedom 2.0', | 17 | 'Free as in Freedom 2.0', |
18 | 'https://static.fsf.org/nosvn/faif-2.0.pdf', | 18 | 'https://static.fsf.org/nosvn/faif-2.0.pdf', |
19 | 'Richard Stallman and the Free Software Revolution', | 19 | 'Richard Stallman and the Free Software Revolution. Read this.', |
20 | 0, | 20 | 0, |
21 | '20150310_114633', | 21 | '20150310_114633', |
22 | 'free gnu software stallman -exclude' | 22 | 'free gnu software stallman -exclude' |
23 | ); | 23 | ); |
24 | 24 | ||
25 | $this->addLink( | 25 | $this->addLink( |
26 | 'Note:', | ||
27 | 'local', | ||
28 | 'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.', | ||
29 | 0, | ||
30 | '20150310_114651', | ||
31 | '' | ||
32 | ); | ||
33 | |||
34 | $this->addLink( | ||
26 | 'MediaGoblin', | 35 | 'MediaGoblin', |
27 | 'http://mediagoblin.org/', | 36 | 'http://mediagoblin.org/', |
28 | 'A free software media publishing platform', | 37 | 'A free software media publishing platform', |