diff options
Diffstat (limited to 'src/js')
-rw-r--r-- | src/js/levenshtein.js | 223 |
1 files changed, 75 insertions, 148 deletions
diff --git a/src/js/levenshtein.js b/src/js/levenshtein.js index 3051ec5..a57b8fb 100644 --- a/src/js/levenshtein.js +++ b/src/js/levenshtein.js | |||
@@ -1,38 +1,17 @@ | |||
1 | // source | ||
2 | // https://github.com/hiddentao/fast-levenshtein/blob/2.0.6/levenshtein.js | ||
1 | (function() { | 3 | (function() { |
2 | 'use strict'; | 4 | 'use strict'; |
3 | 5 | ||
4 | /** | 6 | var collator; |
5 | * Extend an Object with another Object's properties. | 7 | try { |
6 | * | 8 | collator = (typeof Intl !== "undefined" && typeof Intl.Collator !== "undefined") ? Intl.Collator("generic", { sensitivity: "base" }) : null; |
7 | * The source objects are specified as additional arguments. | 9 | } catch (err){ |
8 | * | 10 | console.log("Collator could not be initialized and wouldn't be used"); |
9 | * @param dst Object the object to extend. | 11 | } |
10 | * | 12 | // arrays to re-use |
11 | * @return Object the final object. | 13 | var prevRow = [], |
12 | */ | 14 | str2Char = []; |
13 | var _extend = function(dst) { | ||
14 | var sources = Array.prototype.slice.call(arguments, 1); | ||
15 | for (var i=0; i<sources.length; ++i) { | ||
16 | var src = sources[i]; | ||
17 | for (var p in src) { | ||
18 | if (src.hasOwnProperty(p)) dst[p] = src[p]; | ||
19 | } | ||
20 | } | ||
21 | return dst; | ||
22 | }; | ||
23 | |||
24 | |||
25 | /** | ||
26 | * Defer execution of given function. | ||
27 | * @param {Function} func | ||
28 | */ | ||
29 | var _defer = function(func) { | ||
30 | if (typeof setImmediate === 'function') { | ||
31 | return setImmediate(func); | ||
32 | } else { | ||
33 | return setTimeout(func, 0); | ||
34 | } | ||
35 | }; | ||
36 | 15 | ||
37 | /** | 16 | /** |
38 | * Based on the algorithm at http://en.wikipedia.org/wiki/Levenshtein_distance. | 17 | * Based on the algorithm at http://en.wikipedia.org/wiki/Levenshtein_distance. |
@@ -43,148 +22,96 @@ | |||
43 | * | 22 | * |
44 | * @param str1 String the first string. | 23 | * @param str1 String the first string. |
45 | * @param str2 String the second string. | 24 | * @param str2 String the second string. |
25 | * @param [options] Additional options. | ||
26 | * @param [options.useCollator] Use `Intl.Collator` for locale-sensitive string comparison. | ||
46 | * @return Integer the levenshtein distance (0 and above). | 27 | * @return Integer the levenshtein distance (0 and above). |
47 | */ | 28 | */ |
48 | get: function(str1, str2) { | 29 | get: function(str1, str2, options) { |
30 | var useCollator = (options && collator && options.useCollator); | ||
31 | |||
32 | var str1Len = str1.length, | ||
33 | str2Len = str2.length; | ||
34 | |||
49 | // base cases | 35 | // base cases |
50 | if (str1 === str2) return 0; | 36 | if (str1Len === 0) return str2Len; |
51 | if (str1.length === 0) return str2.length; | 37 | if (str2Len === 0) return str1Len; |
52 | if (str2.length === 0) return str1.length; | ||
53 | 38 | ||
54 | // two rows | 39 | // two rows |
55 | var prevRow = new Array(str2.length + 1), | 40 | var curCol, nextCol, i, j, tmp; |
56 | curCol, nextCol, i, j, tmp; | ||
57 | 41 | ||
58 | // initialise previous row | 42 | // initialise previous row |
59 | for (i=0; i<prevRow.length; ++i) { | 43 | for (i=0; i<str2Len; ++i) { |
60 | prevRow[i] = i; | 44 | prevRow[i] = i; |
45 | str2Char[i] = str2.charCodeAt(i); | ||
61 | } | 46 | } |
47 | prevRow[str2Len] = str2Len; | ||
62 | 48 | ||
63 | // calculate current row distance from previous row | 49 | var strCmp; |
64 | for (i=0; i<str1.length; ++i) { | 50 | if (useCollator) { |
65 | nextCol = i + 1; | 51 | // calculate current row distance from previous row using collator |
52 | for (i = 0; i < str1Len; ++i) { | ||
53 | nextCol = i + 1; | ||
66 | 54 | ||
67 | for (j=0; j<str2.length; ++j) { | 55 | for (j = 0; j < str2Len; ++j) { |
68 | curCol = nextCol; | 56 | curCol = nextCol; |
69 | 57 | ||
70 | // substution | 58 | // substution |
71 | nextCol = prevRow[j] + ( (str1.charAt(i) === str2.charAt(j)) ? 0 : 1 ); | 59 | strCmp = 0 === collator.compare(str1.charAt(i), String.fromCharCode(str2Char[j])); |
72 | // insertion | ||
73 | tmp = curCol + 1; | ||
74 | if (nextCol > tmp) { | ||
75 | nextCol = tmp; | ||
76 | } | ||
77 | // deletion | ||
78 | tmp = prevRow[j + 1] + 1; | ||
79 | if (nextCol > tmp) { | ||
80 | nextCol = tmp; | ||
81 | } | ||
82 | 60 | ||
83 | // copy current col value into previous (in preparation for next iteration) | 61 | nextCol = prevRow[j] + (strCmp ? 0 : 1); |
84 | prevRow[j] = curCol; | ||
85 | } | ||
86 | 62 | ||
87 | // copy last col value into previous (in preparation for next iteration) | 63 | // insertion |
88 | prevRow[j] = nextCol; | 64 | tmp = curCol + 1; |
89 | } | 65 | if (nextCol > tmp) { |
66 | nextCol = tmp; | ||
67 | } | ||
68 | // deletion | ||
69 | tmp = prevRow[j + 1] + 1; | ||
70 | if (nextCol > tmp) { | ||
71 | nextCol = tmp; | ||
72 | } | ||
90 | 73 | ||
91 | return nextCol; | 74 | // copy current col value into previous (in preparation for next iteration) |
92 | }, | 75 | prevRow[j] = curCol; |
76 | } | ||
93 | 77 | ||
94 | /** | 78 | // copy last col value into previous (in preparation for next iteration) |
95 | * Asynchronously calculate levenshtein distance of the two strings. | 79 | prevRow[j] = nextCol; |
96 | * | 80 | } |
97 | * @param str1 String the first string. | 81 | } |
98 | * @param str2 String the second string. | 82 | else { |
99 | * @param cb Function callback function with signature: function(Error err, int distance) | 83 | // calculate current row distance from previous row without collator |
100 | * @param [options] Object additional options. | 84 | for (i = 0; i < str1Len; ++i) { |
101 | * @param [options.progress] Function progress callback with signature: function(percentComplete) | 85 | nextCol = i + 1; |
102 | */ | ||
103 | getAsync: function(str1, str2, cb, options) { | ||
104 | options = _extend({}, { | ||
105 | progress: null | ||
106 | }, options); | ||
107 | 86 | ||
108 | // base cases | 87 | for (j = 0; j < str2Len; ++j) { |
109 | if (str1 === str2) return cb(null, 0); | 88 | curCol = nextCol; |
110 | if (str1.length === 0) return cb(null, str2.length); | ||
111 | if (str2.length === 0) return cb(null, str1.length); | ||
112 | 89 | ||
113 | // two rows | 90 | // substution |
114 | var prevRow = new Array(str2.length + 1), | 91 | strCmp = str1.charCodeAt(i) === str2Char[j]; |
115 | curCol, nextCol, | ||
116 | i, j, tmp, | ||
117 | startTime, currentTime; | ||
118 | 92 | ||
119 | // initialise previous row | 93 | nextCol = prevRow[j] + (strCmp ? 0 : 1); |
120 | for (i=0; i<prevRow.length; ++i) { | ||
121 | prevRow[i] = i; | ||
122 | } | ||
123 | 94 | ||
124 | nextCol = 1; | 95 | // insertion |
125 | i = 0; | 96 | tmp = curCol + 1; |
126 | j = -1; | 97 | if (nextCol > tmp) { |
127 | 98 | nextCol = tmp; | |
128 | var __calculate = function() { | ||
129 | // reset timer | ||
130 | startTime = new Date().valueOf(); | ||
131 | currentTime = startTime; | ||
132 | |||
133 | // keep going until one second has elapsed | ||
134 | while (currentTime - startTime < 1000) { | ||
135 | // reached end of current row? | ||
136 | if (str2.length <= (++j)) { | ||
137 | // copy current into previous (in preparation for next iteration) | ||
138 | prevRow[j] = nextCol; | ||
139 | |||
140 | // if already done all chars | ||
141 | if (str1.length <= (++i)) { | ||
142 | return cb(null, nextCol); | ||
143 | } | 99 | } |
144 | // else if we have more left to do | 100 | // deletion |
145 | else { | 101 | tmp = prevRow[j + 1] + 1; |
146 | nextCol = i + 1; | 102 | if (nextCol > tmp) { |
147 | j = 0; | 103 | nextCol = tmp; |
148 | } | 104 | } |
149 | } | ||
150 | |||
151 | // calculation | ||
152 | curCol = nextCol; | ||
153 | 105 | ||
154 | // substution | 106 | // copy current col value into previous (in preparation for next iteration) |
155 | nextCol = prevRow[j] + ( (str1.charAt(i) === str2.charAt(j)) ? 0 : 1 ); | 107 | prevRow[j] = curCol; |
156 | // insertion | ||
157 | tmp = curCol + 1; | ||
158 | if (nextCol > tmp) { | ||
159 | nextCol = tmp; | ||
160 | } | ||
161 | // deletion | ||
162 | tmp = prevRow[j + 1] + 1; | ||
163 | if (nextCol > tmp) { | ||
164 | nextCol = tmp; | ||
165 | } | 108 | } |
166 | 109 | ||
167 | // copy current into previous (in preparation for next iteration) | 110 | // copy last col value into previous (in preparation for next iteration) |
168 | prevRow[j] = curCol; | 111 | prevRow[j] = nextCol; |
169 | |||
170 | // get current time | ||
171 | currentTime = new Date().valueOf(); | ||
172 | } | 112 | } |
173 | 113 | } | |
174 | // send a progress update? | 114 | return nextCol; |
175 | if (null !== options.progress) { | ||
176 | try { | ||
177 | options.progress.call(null, (i * 100.0/ str1.length)); | ||
178 | } catch (err) { | ||
179 | return cb('Progress callback: ' + err.toString()); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | // next iteration | ||
184 | _defer(__calculate); | ||
185 | }; | ||
186 | |||
187 | __calculate(); | ||
188 | } | 115 | } |
189 | 116 | ||
190 | }; | 117 | }; |