aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/agext
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/agext')
-rw-r--r--vendor/github.com/agext/levenshtein/.gitignore2
-rw-r--r--vendor/github.com/agext/levenshtein/.travis.yml70
-rw-r--r--vendor/github.com/agext/levenshtein/DCO36
-rw-r--r--vendor/github.com/agext/levenshtein/LICENSE201
-rw-r--r--vendor/github.com/agext/levenshtein/MAINTAINERS1
-rw-r--r--vendor/github.com/agext/levenshtein/NOTICE5
-rw-r--r--vendor/github.com/agext/levenshtein/README.md38
-rw-r--r--vendor/github.com/agext/levenshtein/levenshtein.go290
-rw-r--r--vendor/github.com/agext/levenshtein/params.go152
9 files changed, 795 insertions, 0 deletions
diff --git a/vendor/github.com/agext/levenshtein/.gitignore b/vendor/github.com/agext/levenshtein/.gitignore
new file mode 100644
index 0000000..404365f
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/.gitignore
@@ -0,0 +1,2 @@
1README.html
2coverage.out
diff --git a/vendor/github.com/agext/levenshtein/.travis.yml b/vendor/github.com/agext/levenshtein/.travis.yml
new file mode 100644
index 0000000..95be94a
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/.travis.yml
@@ -0,0 +1,70 @@
1language: go
2sudo: false
3go:
4 - 1.8
5 - 1.7.5
6 - 1.7.4
7 - 1.7.3
8 - 1.7.2
9 - 1.7.1
10 - 1.7
11 - tip
12 - 1.6.4
13 - 1.6.3
14 - 1.6.2
15 - 1.6.1
16 - 1.6
17 - 1.5.4
18 - 1.5.3
19 - 1.5.2
20 - 1.5.1
21 - 1.5
22 - 1.4.3
23 - 1.4.2
24 - 1.4.1
25 - 1.4
26 - 1.3.3
27 - 1.3.2
28 - 1.3.1
29 - 1.3
30 - 1.2.2
31 - 1.2.1
32 - 1.2
33 - 1.1.2
34 - 1.1.1
35 - 1.1
36before_install:
37 - go get github.com/mattn/goveralls
38script:
39 - $HOME/gopath/bin/goveralls -service=travis-ci
40notifications:
41 email:
42 on_success: never
43matrix:
44 fast_finish: true
45 allow_failures:
46 - go: tip
47 - go: 1.6.4
48 - go: 1.6.3
49 - go: 1.6.2
50 - go: 1.6.1
51 - go: 1.6
52 - go: 1.5.4
53 - go: 1.5.3
54 - go: 1.5.2
55 - go: 1.5.1
56 - go: 1.5
57 - go: 1.4.3
58 - go: 1.4.2
59 - go: 1.4.1
60 - go: 1.4
61 - go: 1.3.3
62 - go: 1.3.2
63 - go: 1.3.1
64 - go: 1.3
65 - go: 1.2.2
66 - go: 1.2.1
67 - go: 1.2
68 - go: 1.1.2
69 - go: 1.1.1
70 - go: 1.1
diff --git a/vendor/github.com/agext/levenshtein/DCO b/vendor/github.com/agext/levenshtein/DCO
new file mode 100644
index 0000000..716561d
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/DCO
@@ -0,0 +1,36 @@
1Developer Certificate of Origin
2Version 1.1
3
4Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
5660 York Street, Suite 102,
6San Francisco, CA 94110 USA
7
8Everyone is permitted to copy and distribute verbatim copies of this
9license document, but changing it is not allowed.
10
11
12Developer's Certificate of Origin 1.1
13
14By making a contribution to this project, I certify that:
15
16(a) The contribution was created in whole or in part by me and I
17 have the right to submit it under the open source license
18 indicated in the file; or
19
20(b) The contribution is based upon previous work that, to the best
21 of my knowledge, is covered under an appropriate open source
22 license and I have the right under that license to submit that
23 work with modifications, whether created in whole or in part
24 by me, under the same open source license (unless I am
25 permitted to submit under a different license), as indicated
26 in the file; or
27
28(c) The contribution was provided directly to me by some other
29 person who certified (a), (b) or (c) and I have not modified
30 it.
31
32(d) I understand and agree that this project and the contribution
33 are public and that a record of the contribution (including all
34 personal information I submit with it, including my sign-off) is
35 maintained indefinitely and may be redistributed consistent with
36 this project or the open source license(s) involved.
diff --git a/vendor/github.com/agext/levenshtein/LICENSE b/vendor/github.com/agext/levenshtein/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/LICENSE
@@ -0,0 +1,201 @@
1 Apache License
2 Version 2.0, January 2004
3 http://www.apache.org/licenses/
4
5 TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
7 1. Definitions.
8
9 "License" shall mean the terms and conditions for use, reproduction,
10 and distribution as defined by Sections 1 through 9 of this document.
11
12 "Licensor" shall mean the copyright owner or entity authorized by
13 the copyright owner that is granting the License.
14
15 "Legal Entity" shall mean the union of the acting entity and all
16 other entities that control, are controlled by, or are under common
17 control with that entity. For the purposes of this definition,
18 "control" means (i) the power, direct or indirect, to cause the
19 direction or management of such entity, whether by contract or
20 otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 outstanding shares, or (iii) beneficial ownership of such entity.
22
23 "You" (or "Your") shall mean an individual or Legal Entity
24 exercising permissions granted by this License.
25
26 "Source" form shall mean the preferred form for making modifications,
27 including but not limited to software source code, documentation
28 source, and configuration files.
29
30 "Object" form shall mean any form resulting from mechanical
31 transformation or translation of a Source form, including but
32 not limited to compiled object code, generated documentation,
33 and conversions to other media types.
34
35 "Work" shall mean the work of authorship, whether in Source or
36 Object form, made available under the License, as indicated by a
37 copyright notice that is included in or attached to the work
38 (an example is provided in the Appendix below).
39
40 "Derivative Works" shall mean any work, whether in Source or Object
41 form, that is based on (or derived from) the Work and for which the
42 editorial revisions, annotations, elaborations, or other modifications
43 represent, as a whole, an original work of authorship. For the purposes
44 of this License, Derivative Works shall not include works that remain
45 separable from, or merely link (or bind by name) to the interfaces of,
46 the Work and Derivative Works thereof.
47
48 "Contribution" shall mean any work of authorship, including
49 the original version of the Work and any modifications or additions
50 to that Work or Derivative Works thereof, that is intentionally
51 submitted to Licensor for inclusion in the Work by the copyright owner
52 or by an individual or Legal Entity authorized to submit on behalf of
53 the copyright owner. For the purposes of this definition, "submitted"
54 means any form of electronic, verbal, or written communication sent
55 to the Licensor or its representatives, including but not limited to
56 communication on electronic mailing lists, source code control systems,
57 and issue tracking systems that are managed by, or on behalf of, the
58 Licensor for the purpose of discussing and improving the Work, but
59 excluding communication that is conspicuously marked or otherwise
60 designated in writing by the copyright owner as "Not a Contribution."
61
62 "Contributor" shall mean Licensor and any individual or Legal Entity
63 on behalf of whom a Contribution has been received by Licensor and
64 subsequently incorporated within the Work.
65
66 2. Grant of Copyright License. Subject to the terms and conditions of
67 this License, each Contributor hereby grants to You a perpetual,
68 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 copyright license to reproduce, prepare Derivative Works of,
70 publicly display, publicly perform, sublicense, and distribute the
71 Work and such Derivative Works in Source or Object form.
72
73 3. Grant of Patent License. Subject to the terms and conditions of
74 this License, each Contributor hereby grants to You a perpetual,
75 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 (except as stated in this section) patent license to make, have made,
77 use, offer to sell, sell, import, and otherwise transfer the Work,
78 where such license applies only to those patent claims licensable
79 by such Contributor that are necessarily infringed by their
80 Contribution(s) alone or by combination of their Contribution(s)
81 with the Work to which such Contribution(s) was submitted. If You
82 institute patent litigation against any entity (including a
83 cross-claim or counterclaim in a lawsuit) alleging that the Work
84 or a Contribution incorporated within the Work constitutes direct
85 or contributory patent infringement, then any patent licenses
86 granted to You under this License for that Work shall terminate
87 as of the date such litigation is filed.
88
89 4. Redistribution. You may reproduce and distribute copies of the
90 Work or Derivative Works thereof in any medium, with or without
91 modifications, and in Source or Object form, provided that You
92 meet the following conditions:
93
94 (a) You must give any other recipients of the Work or
95 Derivative Works a copy of this License; and
96
97 (b) You must cause any modified files to carry prominent notices
98 stating that You changed the files; and
99
100 (c) You must retain, in the Source form of any Derivative Works
101 that You distribute, all copyright, patent, trademark, and
102 attribution notices from the Source form of the Work,
103 excluding those notices that do not pertain to any part of
104 the Derivative Works; and
105
106 (d) If the Work includes a "NOTICE" text file as part of its
107 distribution, then any Derivative Works that You distribute must
108 include a readable copy of the attribution notices contained
109 within such NOTICE file, excluding those notices that do not
110 pertain to any part of the Derivative Works, in at least one
111 of the following places: within a NOTICE text file distributed
112 as part of the Derivative Works; within the Source form or
113 documentation, if provided along with the Derivative Works; or,
114 within a display generated by the Derivative Works, if and
115 wherever such third-party notices normally appear. The contents
116 of the NOTICE file are for informational purposes only and
117 do not modify the License. You may add Your own attribution
118 notices within Derivative Works that You distribute, alongside
119 or as an addendum to the NOTICE text from the Work, provided
120 that such additional attribution notices cannot be construed
121 as modifying the License.
122
123 You may add Your own copyright statement to Your modifications and
124 may provide additional or different license terms and conditions
125 for use, reproduction, or distribution of Your modifications, or
126 for any such Derivative Works as a whole, provided Your use,
127 reproduction, and distribution of the Work otherwise complies with
128 the conditions stated in this License.
129
130 5. Submission of Contributions. Unless You explicitly state otherwise,
131 any Contribution intentionally submitted for inclusion in the Work
132 by You to the Licensor shall be under the terms and conditions of
133 this License, without any additional terms or conditions.
134 Notwithstanding the above, nothing herein shall supersede or modify
135 the terms of any separate license agreement you may have executed
136 with Licensor regarding such Contributions.
137
138 6. Trademarks. This License does not grant permission to use the trade
139 names, trademarks, service marks, or product names of the Licensor,
140 except as required for reasonable and customary use in describing the
141 origin of the Work and reproducing the content of the NOTICE file.
142
143 7. Disclaimer of Warranty. Unless required by applicable law or
144 agreed to in writing, Licensor provides the Work (and each
145 Contributor provides its Contributions) on an "AS IS" BASIS,
146 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 implied, including, without limitation, any warranties or conditions
148 of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 PARTICULAR PURPOSE. You are solely responsible for determining the
150 appropriateness of using or redistributing the Work and assume any
151 risks associated with Your exercise of permissions under this License.
152
153 8. Limitation of Liability. In no event and under no legal theory,
154 whether in tort (including negligence), contract, or otherwise,
155 unless required by applicable law (such as deliberate and grossly
156 negligent acts) or agreed to in writing, shall any Contributor be
157 liable to You for damages, including any direct, indirect, special,
158 incidental, or consequential damages of any character arising as a
159 result of this License or out of the use or inability to use the
160 Work (including but not limited to damages for loss of goodwill,
161 work stoppage, computer failure or malfunction, or any and all
162 other commercial damages or losses), even if such Contributor
163 has been advised of the possibility of such damages.
164
165 9. Accepting Warranty or Additional Liability. While redistributing
166 the Work or Derivative Works thereof, You may choose to offer,
167 and charge a fee for, acceptance of support, warranty, indemnity,
168 or other liability obligations and/or rights consistent with this
169 License. However, in accepting such obligations, You may act only
170 on Your own behalf and on Your sole responsibility, not on behalf
171 of any other Contributor, and only if You agree to indemnify,
172 defend, and hold each Contributor harmless for any liability
173 incurred by, or claims asserted against, such Contributor by reason
174 of your accepting any such warranty or additional liability.
175
176 END OF TERMS AND CONDITIONS
177
178 APPENDIX: How to apply the Apache License to your work.
179
180 To apply the Apache License to your work, attach the following
181 boilerplate notice, with the fields enclosed by brackets "[]"
182 replaced with your own identifying information. (Don't include
183 the brackets!) The text should be enclosed in the appropriate
184 comment syntax for the file format. We also recommend that a
185 file or class name and description of purpose be included on the
186 same "printed page" as the copyright notice for easier
187 identification within third-party archives.
188
189 Copyright [yyyy] [name of copyright owner]
190
191 Licensed under the Apache License, Version 2.0 (the "License");
192 you may not use this file except in compliance with the License.
193 You may obtain a copy of the License at
194
195 http://www.apache.org/licenses/LICENSE-2.0
196
197 Unless required by applicable law or agreed to in writing, software
198 distributed under the License is distributed on an "AS IS" BASIS,
199 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 See the License for the specific language governing permissions and
201 limitations under the License.
diff --git a/vendor/github.com/agext/levenshtein/MAINTAINERS b/vendor/github.com/agext/levenshtein/MAINTAINERS
new file mode 100644
index 0000000..726c2af
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/MAINTAINERS
@@ -0,0 +1 @@
Alex Bucataru <alex@alrux.com> (@AlexBucataru)
diff --git a/vendor/github.com/agext/levenshtein/NOTICE b/vendor/github.com/agext/levenshtein/NOTICE
new file mode 100644
index 0000000..eaffaab
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/NOTICE
@@ -0,0 +1,5 @@
1Alrux Go EXTensions (AGExt) - package levenshtein
2Copyright 2016 ALRUX Inc.
3
4This product includes software developed at ALRUX Inc.
5(http://www.alrux.com/).
diff --git a/vendor/github.com/agext/levenshtein/README.md b/vendor/github.com/agext/levenshtein/README.md
new file mode 100644
index 0000000..90509c2
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/README.md
@@ -0,0 +1,38 @@
1# A Go package for calculating the Levenshtein distance between two strings
2
3[![Release](https://img.shields.io/github/release/agext/levenshtein.svg?style=flat)](https://github.com/agext/levenshtein/releases/latest)
4[![GoDoc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/agext/levenshtein) 
5[![Build Status](https://travis-ci.org/agext/levenshtein.svg?branch=master&style=flat)](https://travis-ci.org/agext/levenshtein)
6[![Coverage Status](https://coveralls.io/repos/github/agext/levenshtein/badge.svg?style=flat)](https://coveralls.io/github/agext/levenshtein)
7[![Go Report Card](https://goreportcard.com/badge/github.com/agext/levenshtein?style=flat)](https://goreportcard.com/report/github.com/agext/levenshtein)
8
9
10This package implements distance and similarity metrics for strings, based on the Levenshtein measure, in [Go](http://golang.org).
11
12## Project Status
13
14v1.2.1 Stable: Guaranteed no breaking changes to the API in future v1.x releases. Probably safe to use in production, though provided on "AS IS" basis.
15
16This package is being actively maintained. If you encounter any problems or have any suggestions for improvement, please [open an issue](https://github.com/agext/levenshtein/issues). Pull requests are welcome.
17
18## Overview
19
20The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0.
21
22A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded.
23
24The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0.
25
26The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest.
27
28The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed.
29
30## Installation
31
32```
33go get github.com/agext/levenshtein
34```
35
36## License
37
38Package levenshtein is released under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details.
diff --git a/vendor/github.com/agext/levenshtein/levenshtein.go b/vendor/github.com/agext/levenshtein/levenshtein.go
new file mode 100644
index 0000000..df69ce7
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/levenshtein.go
@@ -0,0 +1,290 @@
1// Copyright 2016 ALRUX Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/*
16Package levenshtein implements distance and similarity metrics for strings, based on the Levenshtein measure.
17
18The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0.
19
20A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded.
21
22The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0.
23
24The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest.
25
26The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed.
27*/
28package levenshtein
29
30// Calculate determines the Levenshtein distance between two strings, using
31// the given costs for each edit operation. It returns the distance along with
32// the lengths of the longest common prefix and suffix.
33//
34// If maxCost is non-zero, the calculation stops as soon as the distance is determined
35// to be greater than maxCost. Therefore, any return value higher than maxCost is a
36// lower bound for the actual distance.
37func Calculate(str1, str2 []rune, maxCost, insCost, subCost, delCost int) (dist, prefixLen, suffixLen int) {
38 l1, l2 := len(str1), len(str2)
39 // trim common prefix, if any, as it doesn't affect the distance
40 for ; prefixLen < l1 && prefixLen < l2; prefixLen++ {
41 if str1[prefixLen] != str2[prefixLen] {
42 break
43 }
44 }
45 str1, str2 = str1[prefixLen:], str2[prefixLen:]
46 l1 -= prefixLen
47 l2 -= prefixLen
48 // trim common suffix, if any, as it doesn't affect the distance
49 for 0 < l1 && 0 < l2 {
50 if str1[l1-1] != str2[l2-1] {
51 str1, str2 = str1[:l1], str2[:l2]
52 break
53 }
54 l1--
55 l2--
56 suffixLen++
57 }
58 // if the first string is empty, the distance is the length of the second string times the cost of insertion
59 if l1 == 0 {
60 dist = l2 * insCost
61 return
62 }
63 // if the second string is empty, the distance is the length of the first string times the cost of deletion
64 if l2 == 0 {
65 dist = l1 * delCost
66 return
67 }
68
69 // variables used in inner "for" loops
70 var y, dy, c, l int
71
72 // if maxCost is greater than or equal to the maximum possible distance, it's equivalent to 'unlimited'
73 if maxCost > 0 {
74 if subCost < delCost+insCost {
75 if maxCost >= l1*subCost+(l2-l1)*insCost {
76 maxCost = 0
77 }
78 } else {
79 if maxCost >= l1*delCost+l2*insCost {
80 maxCost = 0
81 }
82 }
83 }
84
85 if maxCost > 0 {
86 // prefer the longer string first, to minimize time;
87 // a swap also transposes the meanings of insertion and deletion.
88 if l1 < l2 {
89 str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost
90 }
91
92 // the length differential times cost of deletion is a lower bound for the cost;
93 // if it is higher than the maxCost, there is no point going into the main calculation.
94 if dist = (l1 - l2) * delCost; dist > maxCost {
95 return
96 }
97
98 d := make([]int, l1+1)
99
100 // offset and length of d in the current row
101 doff, dlen := 0, 1
102 for y, dy = 1, delCost; y <= l1 && dy <= maxCost; dlen++ {
103 d[y] = dy
104 y++
105 dy = y * delCost
106 }
107 // fmt.Printf("%q -> %q: init doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, doff, dlen, doff, doff+dlen, d[doff:doff+dlen])
108
109 for x := 0; x < l2; x++ {
110 dy, d[doff] = d[doff], d[doff]+insCost
111 for d[doff] > maxCost && dlen > 0 {
112 if str1[doff] != str2[x] {
113 dy += subCost
114 }
115 doff++
116 dlen--
117 if c = d[doff] + insCost; c < dy {
118 dy = c
119 }
120 dy, d[doff] = d[doff], dy
121 }
122 for y, l = doff, doff+dlen-1; y < l; dy, d[y] = d[y], dy {
123 if str1[y] != str2[x] {
124 dy += subCost
125 }
126 if c = d[y] + delCost; c < dy {
127 dy = c
128 }
129 y++
130 if c = d[y] + insCost; c < dy {
131 dy = c
132 }
133 }
134 if y < l1 {
135 if str1[y] != str2[x] {
136 dy += subCost
137 }
138 if c = d[y] + delCost; c < dy {
139 dy = c
140 }
141 for ; dy <= maxCost && y < l1; dy, d[y] = dy+delCost, dy {
142 y++
143 dlen++
144 }
145 }
146 // fmt.Printf("%q -> %q: x=%d doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, x, doff, dlen, doff, doff+dlen, d[doff:doff+dlen])
147 if dlen == 0 {
148 dist = maxCost + 1
149 return
150 }
151 }
152 if doff+dlen-1 < l1 {
153 dist = maxCost + 1
154 return
155 }
156 dist = d[l1]
157 } else {
158 // ToDo: This is O(l1*l2) time and O(min(l1,l2)) space; investigate if it is
159 // worth to implement diagonal approach - O(l1*(1+dist)) time, up to O(l1*l2) space
160 // http://www.csse.monash.edu.au/~lloyd/tildeStrings/Alignment/92.IPL.html
161
162 // prefer the shorter string first, to minimize space; time is O(l1*l2) anyway;
163 // a swap also transposes the meanings of insertion and deletion.
164 if l1 > l2 {
165 str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost
166 }
167 d := make([]int, l1+1)
168
169 for y = 1; y <= l1; y++ {
170 d[y] = y * delCost
171 }
172 for x := 0; x < l2; x++ {
173 dy, d[0] = d[0], d[0]+insCost
174 for y = 0; y < l1; dy, d[y] = d[y], dy {
175 if str1[y] != str2[x] {
176 dy += subCost
177 }
178 if c = d[y] + delCost; c < dy {
179 dy = c
180 }
181 y++
182 if c = d[y] + insCost; c < dy {
183 dy = c
184 }
185 }
186 }
187 dist = d[l1]
188 }
189
190 return
191}
192
193// Distance returns the Levenshtein distance between str1 and str2, using the
194// default or provided cost values. Pass nil for the third argument to use the
195// default cost of 1 for all three operations, with no maximum.
196func Distance(str1, str2 string, p *Params) int {
197 if p == nil {
198 p = defaultParams
199 }
200 dist, _, _ := Calculate([]rune(str1), []rune(str2), p.maxCost, p.insCost, p.subCost, p.delCost)
201 return dist
202}
203
204// Similarity returns a score in the range of 0..1 for how similar the two strings are.
205// A score of 1 means the strings are identical, and 0 means they have nothing in common.
206//
207// A nil third argument uses the default cost of 1 for all three operations.
208//
209// If a non-zero MinScore value is provided in the parameters, scores lower than it
210// will be returned as 0.
211func Similarity(str1, str2 string, p *Params) float64 {
212 return Match(str1, str2, p.Clone().BonusThreshold(1.1)) // guaranteed no bonus
213}
214
215// Match returns a similarity score adjusted by the same method as proposed by Winkler for
216// the Jaro distance - giving a bonus to string pairs that share a common prefix, only if their
217// similarity score is already over a threshold.
218//
219// The score is in the range of 0..1, with 1 meaning the strings are identical,
220// and 0 meaning they have nothing in common.
221//
222// A nil third argument uses the default cost of 1 for all three operations, maximum length of
223// common prefix to consider for bonus of 4, scaling factor of 0.1, and bonus threshold of 0.7.
224//
225// If a non-zero MinScore value is provided in the parameters, scores lower than it
226// will be returned as 0.
227func Match(str1, str2 string, p *Params) float64 {
228 s1, s2 := []rune(str1), []rune(str2)
229 l1, l2 := len(s1), len(s2)
230 // two empty strings are identical; shortcut also avoids divByZero issues later on.
231 if l1 == 0 && l2 == 0 {
232 return 1
233 }
234
235 if p == nil {
236 p = defaultParams
237 }
238
239 // a min over 1 can never be satisfied, so the score is 0.
240 if p.minScore > 1 {
241 return 0
242 }
243
244 insCost, delCost, maxDist, max := p.insCost, p.delCost, 0, 0
245 if l1 > l2 {
246 l1, l2, insCost, delCost = l2, l1, delCost, insCost
247 }
248
249 if p.subCost < delCost+insCost {
250 maxDist = l1*p.subCost + (l2-l1)*insCost
251 } else {
252 maxDist = l1*delCost + l2*insCost
253 }
254
255 // a zero min is always satisfied, so no need to set a max cost.
256 if p.minScore > 0 {
257 // if p.minScore is lower than p.bonusThreshold, we can use a simplified formula
258 // for the max cost, because a sim score below min cannot receive a bonus.
259 if p.minScore < p.bonusThreshold {
260 // round down the max - a cost equal to a rounded up max would already be under min.
261 max = int((1 - p.minScore) * float64(maxDist))
262 } else {
263 // p.minScore <= sim + p.bonusPrefix*p.bonusScale*(1-sim)
264 // p.minScore <= (1-dist/maxDist) + p.bonusPrefix*p.bonusScale*(1-(1-dist/maxDist))
265 // p.minScore <= 1 - dist/maxDist + p.bonusPrefix*p.bonusScale*dist/maxDist
266 // 1 - p.minScore >= dist/maxDist - p.bonusPrefix*p.bonusScale*dist/maxDist
267 // (1-p.minScore)*maxDist/(1-p.bonusPrefix*p.bonusScale) >= dist
268 max = int((1 - p.minScore) * float64(maxDist) / (1 - float64(p.bonusPrefix)*p.bonusScale))
269 }
270 }
271
272 dist, pl, _ := Calculate(s1, s2, max, p.insCost, p.subCost, p.delCost)
273 if max > 0 && dist > max {
274 return 0
275 }
276 sim := 1 - float64(dist)/float64(maxDist)
277
278 if sim >= p.bonusThreshold && sim < 1 && p.bonusPrefix > 0 && p.bonusScale > 0 {
279 if pl > p.bonusPrefix {
280 pl = p.bonusPrefix
281 }
282 sim += float64(pl) * p.bonusScale * (1 - sim)
283 }
284
285 if sim < p.minScore {
286 return 0
287 }
288
289 return sim
290}
diff --git a/vendor/github.com/agext/levenshtein/params.go b/vendor/github.com/agext/levenshtein/params.go
new file mode 100644
index 0000000..a85727b
--- /dev/null
+++ b/vendor/github.com/agext/levenshtein/params.go
@@ -0,0 +1,152 @@
1// Copyright 2016 ALRUX Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package levenshtein
16
17// Params represents a set of parameter values for the various formulas involved
18// in the calculation of the Levenshtein string metrics.
19type Params struct {
20 insCost int
21 subCost int
22 delCost int
23 maxCost int
24 minScore float64
25 bonusPrefix int
26 bonusScale float64
27 bonusThreshold float64
28}
29
30var (
31 defaultParams = NewParams()
32)
33
34// NewParams creates a new set of parameters and initializes it with the default values.
35func NewParams() *Params {
36 return &Params{
37 insCost: 1,
38 subCost: 1,
39 delCost: 1,
40 maxCost: 0,
41 minScore: 0,
42 bonusPrefix: 4,
43 bonusScale: .1,
44 bonusThreshold: .7,
45 }
46}
47
48// Clone returns a pointer to a copy of the receiver parameter set, or of a new
49// default parameter set if the receiver is nil.
50func (p *Params) Clone() *Params {
51 if p == nil {
52 return NewParams()
53 }
54 return &Params{
55 insCost: p.insCost,
56 subCost: p.subCost,
57 delCost: p.delCost,
58 maxCost: p.maxCost,
59 minScore: p.minScore,
60 bonusPrefix: p.bonusPrefix,
61 bonusScale: p.bonusScale,
62 bonusThreshold: p.bonusThreshold,
63 }
64}
65
66// InsCost overrides the default value of 1 for the cost of insertion.
67// The new value must be zero or positive.
68func (p *Params) InsCost(v int) *Params {
69 if v >= 0 {
70 p.insCost = v
71 }
72 return p
73}
74
75// SubCost overrides the default value of 1 for the cost of substitution.
76// The new value must be zero or positive.
77func (p *Params) SubCost(v int) *Params {
78 if v >= 0 {
79 p.subCost = v
80 }
81 return p
82}
83
84// DelCost overrides the default value of 1 for the cost of deletion.
85// The new value must be zero or positive.
86func (p *Params) DelCost(v int) *Params {
87 if v >= 0 {
88 p.delCost = v
89 }
90 return p
91}
92
93// MaxCost overrides the default value of 0 (meaning unlimited) for the maximum cost.
94// The calculation of Distance() stops when the result is guaranteed to exceed
95// this maximum, returning a lower-bound rather than exact value.
96// The new value must be zero or positive.
97func (p *Params) MaxCost(v int) *Params {
98 if v >= 0 {
99 p.maxCost = v
100 }
101 return p
102}
103
104// MinScore overrides the default value of 0 for the minimum similarity score.
105// Scores below this threshold are returned as 0 by Similarity() and Match().
106// The new value must be zero or positive. Note that a minimum greater than 1
107// can never be satisfied, resulting in a score of 0 for any pair of strings.
108func (p *Params) MinScore(v float64) *Params {
109 if v >= 0 {
110 p.minScore = v
111 }
112 return p
113}
114
115// BonusPrefix overrides the default value for the maximum length of
116// common prefix to be considered for bonus by Match().
117// The new value must be zero or positive.
118func (p *Params) BonusPrefix(v int) *Params {
119 if v >= 0 {
120 p.bonusPrefix = v
121 }
122 return p
123}
124
125// BonusScale overrides the default value for the scaling factor used by Match()
126// in calculating the bonus.
127// The new value must be zero or positive. To guarantee that the similarity score
128// remains in the interval 0..1, this scaling factor is not allowed to exceed
129// 1 / BonusPrefix.
130func (p *Params) BonusScale(v float64) *Params {
131 if v >= 0 {
132 p.bonusScale = v
133 }
134
135 // the bonus cannot exceed (1-sim), or the score may become greater than 1.
136 if float64(p.bonusPrefix)*p.bonusScale > 1 {
137 p.bonusScale = 1 / float64(p.bonusPrefix)
138 }
139
140 return p
141}
142
143// BonusThreshold overrides the default value for the minimum similarity score
144// for which Match() can assign a bonus.
145// The new value must be zero or positive. Note that a threshold greater than 1
146// effectively makes Match() become the equivalent of Similarity().
147func (p *Params) BonusThreshold(v float64) *Params {
148 if v >= 0 {
149 p.bonusThreshold = v
150 }
151 return p
152}