diff options
author | Arthur <arthur@hoa.ro> | 2016-02-15 21:43:07 +0100 |
---|---|---|
committer | Arthur <arthur@hoa.ro> | 2016-02-15 21:43:07 +0100 |
commit | bfec695df1205864b46ca7175e1598b184602687 (patch) | |
tree | 9d64988c49fd53978c58c64bbd013a363c5b2d78 /application | |
parent | 07c2f73543b358d39b3751c8542966794f28db03 (diff) | |
parent | 522b278b03280ed809025ebbeb3eac284b68bf81 (diff) | |
download | Shaarli-bfec695df1205864b46ca7175e1598b184602687.tar.gz Shaarli-bfec695df1205864b46ca7175e1598b184602687.tar.zst Shaarli-bfec695df1205864b46ca7175e1598b184602687.zip |
Merge pull request #455 from ArthurHoaro/improved-search-454
Improved search: combine AND, exact terms and exclude search.
Diffstat (limited to 'application')
-rw-r--r-- | application/LinkFilter.php | 88 |
1 files changed, 49 insertions, 39 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php index ceb47d16..17594e8f 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php | |||
@@ -120,7 +120,9 @@ class LinkFilter | |||
120 | * | 120 | * |
121 | * Searches: | 121 | * Searches: |
122 | * - in the URLs, title and description; | 122 | * - in the URLs, title and description; |
123 | * - are case-insensitive. | 123 | * - are case-insensitive; |
124 | * - terms surrounded by quotes " are exact terms search. | ||
125 | * - terms starting with a dash - are excluded (except exact terms). | ||
124 | * | 126 | * |
125 | * Example: | 127 | * Example: |
126 | * print_r($mydb->filterFulltext('hollandais')); | 128 | * print_r($mydb->filterFulltext('hollandais')); |
@@ -136,19 +138,30 @@ class LinkFilter | |||
136 | */ | 138 | */ |
137 | private function filterFulltext($searchterms, $privateonly = false) | 139 | private function filterFulltext($searchterms, $privateonly = false) |
138 | { | 140 | { |
141 | $filtered = array(); | ||
139 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); | 142 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); |
140 | $explodedSearch = explode(' ', trim($search)); | 143 | $exactRegex = '/"([^"]+)"/'; |
141 | $keys = array('title', 'description', 'url', 'tags'); | 144 | // Retrieve exact search terms. |
142 | $found = true; | 145 | preg_match_all($exactRegex, $search, $exactSearch); |
143 | $searchExactPhrase = false; | 146 | $exactSearch = array_values(array_filter($exactSearch[1])); |
144 | 147 | ||
145 | // Check if we're using double-quotes to search for the exact string | 148 | // Remove exact search terms to get AND terms search. |
146 | if ($search[0] == '"' && $search[strlen($search) - 1] == '"') { | 149 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); |
147 | $searchExactPhrase = true; | 150 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); |
148 | 151 | ||
149 | // Remove the double-quotes as they are not what we search for | 152 | // Filter excluding terms and update andSearch. |
150 | $search = substr($search, 1, -1); | 153 | $excludeSearch = array(); |
154 | $andSearch = array(); | ||
155 | foreach ($explodedSearchAnd as $needle) { | ||
156 | if ($needle[0] == '-' && strlen($needle) > 1) { | ||
157 | $excludeSearch[] = substr($needle, 1); | ||
158 | } else { | ||
159 | $andSearch[] = $needle; | ||
160 | } | ||
151 | } | 161 | } |
162 | |||
163 | $keys = array('title', 'description', 'url', 'tags'); | ||
164 | |||
152 | // Iterate over every stored link. | 165 | // Iterate over every stored link. |
153 | foreach ($this->links as $link) { | 166 | foreach ($this->links as $link) { |
154 | 167 | ||
@@ -157,35 +170,32 @@ class LinkFilter | |||
157 | continue; | 170 | continue; |
158 | } | 171 | } |
159 | 172 | ||
160 | // Iterate over searchable link fields. | 173 | // Concatenate link fields to search across fields. |
174 | // Adds a '\' separator for exact search terms. | ||
175 | $content = ''; | ||
161 | foreach ($keys as $key) { | 176 | foreach ($keys as $key) { |
162 | // Be optimistic | 177 | $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; |
163 | $found = true; | 178 | } |
164 | 179 | ||
165 | // FIXME: Find a better word for where you're searching in | 180 | // Be optimistic |
166 | $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); | 181 | $found = true; |
167 | 182 | ||
168 | // When searching for the phrase, check if it's in the haystack... | 183 | // First, we look for exact term search |
169 | if ( $searchExactPhrase && strpos($haystack, $search) !== false) { | 184 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { |
170 | break; | 185 | $found = strpos($content, $exactSearch[$i]) !== false; |
171 | } | ||
172 | else { | ||
173 | // Iterate over keywords, if keyword is not found, | ||
174 | // no need to check for the others. We want all or nothing. | ||
175 | foreach($explodedSearch as $keyword) { | ||
176 | if(strpos($haystack, $keyword) === false) { | ||
177 | $found = false; | ||
178 | break; | ||
179 | } | ||
180 | } | ||
181 | } | ||
182 | |||
183 | // One of the fields of the link matches, no need to check the other. | ||
184 | if ($found) { | ||
185 | break; | ||
186 | } | ||
187 | } | 186 | } |
188 | 187 | ||
188 | // Iterate over keywords, if keyword is not found, | ||
189 | // no need to check for the others. We want all or nothing. | ||
190 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | ||
191 | $found = strpos($content, $andSearch[$i]) !== false; | ||
192 | } | ||
193 | |||
194 | // Exclude terms. | ||
195 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | ||
196 | $found = strpos($content, $excludeSearch[$i]) === false; | ||
197 | } | ||
198 | |||
189 | if ($found) { | 199 | if ($found) { |
190 | $filtered[$link['linkdate']] = $link; | 200 | $filtered[$link['linkdate']] = $link; |
191 | } | 201 | } |