From 15c0b25d011f37e7c20aeca9eaf461f78285b8d9 Mon Sep 17 00:00:00 2001 From: Alex Pilon Date: Fri, 22 Feb 2019 18:24:37 -0500 Subject: deps: github.com/hashicorp/terraform@sdk-v0.11-with-go-modules Updated via: go get github.com/hashicorp/terraform@sdk-v0.11-with-go-modules and go mod tidy --- vendor/github.com/agext/levenshtein/.gitignore | 2 + vendor/github.com/agext/levenshtein/.travis.yml | 70 +++++ vendor/github.com/agext/levenshtein/DCO | 36 +++ vendor/github.com/agext/levenshtein/LICENSE | 201 ++++++++++++++ vendor/github.com/agext/levenshtein/MAINTAINERS | 1 + vendor/github.com/agext/levenshtein/NOTICE | 5 + vendor/github.com/agext/levenshtein/README.md | 38 +++ vendor/github.com/agext/levenshtein/levenshtein.go | 290 +++++++++++++++++++++ vendor/github.com/agext/levenshtein/params.go | 152 +++++++++++ 9 files changed, 795 insertions(+) create mode 100644 vendor/github.com/agext/levenshtein/.gitignore create mode 100644 vendor/github.com/agext/levenshtein/.travis.yml create mode 100644 vendor/github.com/agext/levenshtein/DCO create mode 100644 vendor/github.com/agext/levenshtein/LICENSE create mode 100644 vendor/github.com/agext/levenshtein/MAINTAINERS create mode 100644 vendor/github.com/agext/levenshtein/NOTICE create mode 100644 vendor/github.com/agext/levenshtein/README.md create mode 100644 vendor/github.com/agext/levenshtein/levenshtein.go create mode 100644 vendor/github.com/agext/levenshtein/params.go (limited to 'vendor/github.com/agext/levenshtein') diff --git a/vendor/github.com/agext/levenshtein/.gitignore b/vendor/github.com/agext/levenshtein/.gitignore new file mode 100644 index 0000000..404365f --- /dev/null +++ b/vendor/github.com/agext/levenshtein/.gitignore @@ -0,0 +1,2 @@ +README.html +coverage.out diff --git a/vendor/github.com/agext/levenshtein/.travis.yml b/vendor/github.com/agext/levenshtein/.travis.yml new file mode 100644 index 0000000..95be94a --- /dev/null +++ b/vendor/github.com/agext/levenshtein/.travis.yml @@ -0,0 +1,70 @@ +language: go +sudo: false +go: + - 1.8 + - 1.7.5 + - 1.7.4 + - 1.7.3 + - 1.7.2 + - 1.7.1 + - 1.7 + - tip + - 1.6.4 + - 1.6.3 + - 1.6.2 + - 1.6.1 + - 1.6 + - 1.5.4 + - 1.5.3 + - 1.5.2 + - 1.5.1 + - 1.5 + - 1.4.3 + - 1.4.2 + - 1.4.1 + - 1.4 + - 1.3.3 + - 1.3.2 + - 1.3.1 + - 1.3 + - 1.2.2 + - 1.2.1 + - 1.2 + - 1.1.2 + - 1.1.1 + - 1.1 +before_install: + - go get github.com/mattn/goveralls +script: + - $HOME/gopath/bin/goveralls -service=travis-ci +notifications: + email: + on_success: never +matrix: + fast_finish: true + allow_failures: + - go: tip + - go: 1.6.4 + - go: 1.6.3 + - go: 1.6.2 + - go: 1.6.1 + - go: 1.6 + - go: 1.5.4 + - go: 1.5.3 + - go: 1.5.2 + - go: 1.5.1 + - go: 1.5 + - go: 1.4.3 + - go: 1.4.2 + - go: 1.4.1 + - go: 1.4 + - go: 1.3.3 + - go: 1.3.2 + - go: 1.3.1 + - go: 1.3 + - go: 1.2.2 + - go: 1.2.1 + - go: 1.2 + - go: 1.1.2 + - go: 1.1.1 + - go: 1.1 diff --git a/vendor/github.com/agext/levenshtein/DCO b/vendor/github.com/agext/levenshtein/DCO new file mode 100644 index 0000000..716561d --- /dev/null +++ b/vendor/github.com/agext/levenshtein/DCO @@ -0,0 +1,36 @@ +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. diff --git a/vendor/github.com/agext/levenshtein/LICENSE b/vendor/github.com/agext/levenshtein/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/vendor/github.com/agext/levenshtein/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/agext/levenshtein/MAINTAINERS b/vendor/github.com/agext/levenshtein/MAINTAINERS new file mode 100644 index 0000000..726c2af --- /dev/null +++ b/vendor/github.com/agext/levenshtein/MAINTAINERS @@ -0,0 +1 @@ +Alex Bucataru (@AlexBucataru) diff --git a/vendor/github.com/agext/levenshtein/NOTICE b/vendor/github.com/agext/levenshtein/NOTICE new file mode 100644 index 0000000..eaffaab --- /dev/null +++ b/vendor/github.com/agext/levenshtein/NOTICE @@ -0,0 +1,5 @@ +Alrux Go EXTensions (AGExt) - package levenshtein +Copyright 2016 ALRUX Inc. + +This product includes software developed at ALRUX Inc. +(http://www.alrux.com/). diff --git a/vendor/github.com/agext/levenshtein/README.md b/vendor/github.com/agext/levenshtein/README.md new file mode 100644 index 0000000..90509c2 --- /dev/null +++ b/vendor/github.com/agext/levenshtein/README.md @@ -0,0 +1,38 @@ +# A Go package for calculating the Levenshtein distance between two strings + +[![Release](https://img.shields.io/github/release/agext/levenshtein.svg?style=flat)](https://github.com/agext/levenshtein/releases/latest) +[![GoDoc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/agext/levenshtein)  +[![Build Status](https://travis-ci.org/agext/levenshtein.svg?branch=master&style=flat)](https://travis-ci.org/agext/levenshtein) +[![Coverage Status](https://coveralls.io/repos/github/agext/levenshtein/badge.svg?style=flat)](https://coveralls.io/github/agext/levenshtein) +[![Go Report Card](https://goreportcard.com/badge/github.com/agext/levenshtein?style=flat)](https://goreportcard.com/report/github.com/agext/levenshtein) + + +This package implements distance and similarity metrics for strings, based on the Levenshtein measure, in [Go](http://golang.org). + +## Project Status + +v1.2.1 Stable: Guaranteed no breaking changes to the API in future v1.x releases. Probably safe to use in production, though provided on "AS IS" basis. + +This package is being actively maintained. If you encounter any problems or have any suggestions for improvement, please [open an issue](https://github.com/agext/levenshtein/issues). Pull requests are welcome. + +## Overview + +The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0. + +A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded. + +The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0. + +The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest. + +The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed. + +## Installation + +``` +go get github.com/agext/levenshtein +``` + +## License + +Package levenshtein is released under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details. diff --git a/vendor/github.com/agext/levenshtein/levenshtein.go b/vendor/github.com/agext/levenshtein/levenshtein.go new file mode 100644 index 0000000..df69ce7 --- /dev/null +++ b/vendor/github.com/agext/levenshtein/levenshtein.go @@ -0,0 +1,290 @@ +// Copyright 2016 ALRUX Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* +Package levenshtein implements distance and similarity metrics for strings, based on the Levenshtein measure. + +The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0. + +A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded. + +The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0. + +The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest. + +The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed. +*/ +package levenshtein + +// Calculate determines the Levenshtein distance between two strings, using +// the given costs for each edit operation. It returns the distance along with +// the lengths of the longest common prefix and suffix. +// +// If maxCost is non-zero, the calculation stops as soon as the distance is determined +// to be greater than maxCost. Therefore, any return value higher than maxCost is a +// lower bound for the actual distance. +func Calculate(str1, str2 []rune, maxCost, insCost, subCost, delCost int) (dist, prefixLen, suffixLen int) { + l1, l2 := len(str1), len(str2) + // trim common prefix, if any, as it doesn't affect the distance + for ; prefixLen < l1 && prefixLen < l2; prefixLen++ { + if str1[prefixLen] != str2[prefixLen] { + break + } + } + str1, str2 = str1[prefixLen:], str2[prefixLen:] + l1 -= prefixLen + l2 -= prefixLen + // trim common suffix, if any, as it doesn't affect the distance + for 0 < l1 && 0 < l2 { + if str1[l1-1] != str2[l2-1] { + str1, str2 = str1[:l1], str2[:l2] + break + } + l1-- + l2-- + suffixLen++ + } + // if the first string is empty, the distance is the length of the second string times the cost of insertion + if l1 == 0 { + dist = l2 * insCost + return + } + // if the second string is empty, the distance is the length of the first string times the cost of deletion + if l2 == 0 { + dist = l1 * delCost + return + } + + // variables used in inner "for" loops + var y, dy, c, l int + + // if maxCost is greater than or equal to the maximum possible distance, it's equivalent to 'unlimited' + if maxCost > 0 { + if subCost < delCost+insCost { + if maxCost >= l1*subCost+(l2-l1)*insCost { + maxCost = 0 + } + } else { + if maxCost >= l1*delCost+l2*insCost { + maxCost = 0 + } + } + } + + if maxCost > 0 { + // prefer the longer string first, to minimize time; + // a swap also transposes the meanings of insertion and deletion. + if l1 < l2 { + str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost + } + + // the length differential times cost of deletion is a lower bound for the cost; + // if it is higher than the maxCost, there is no point going into the main calculation. + if dist = (l1 - l2) * delCost; dist > maxCost { + return + } + + d := make([]int, l1+1) + + // offset and length of d in the current row + doff, dlen := 0, 1 + for y, dy = 1, delCost; y <= l1 && dy <= maxCost; dlen++ { + d[y] = dy + y++ + dy = y * delCost + } + // fmt.Printf("%q -> %q: init doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, doff, dlen, doff, doff+dlen, d[doff:doff+dlen]) + + for x := 0; x < l2; x++ { + dy, d[doff] = d[doff], d[doff]+insCost + for d[doff] > maxCost && dlen > 0 { + if str1[doff] != str2[x] { + dy += subCost + } + doff++ + dlen-- + if c = d[doff] + insCost; c < dy { + dy = c + } + dy, d[doff] = d[doff], dy + } + for y, l = doff, doff+dlen-1; y < l; dy, d[y] = d[y], dy { + if str1[y] != str2[x] { + dy += subCost + } + if c = d[y] + delCost; c < dy { + dy = c + } + y++ + if c = d[y] + insCost; c < dy { + dy = c + } + } + if y < l1 { + if str1[y] != str2[x] { + dy += subCost + } + if c = d[y] + delCost; c < dy { + dy = c + } + for ; dy <= maxCost && y < l1; dy, d[y] = dy+delCost, dy { + y++ + dlen++ + } + } + // fmt.Printf("%q -> %q: x=%d doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, x, doff, dlen, doff, doff+dlen, d[doff:doff+dlen]) + if dlen == 0 { + dist = maxCost + 1 + return + } + } + if doff+dlen-1 < l1 { + dist = maxCost + 1 + return + } + dist = d[l1] + } else { + // ToDo: This is O(l1*l2) time and O(min(l1,l2)) space; investigate if it is + // worth to implement diagonal approach - O(l1*(1+dist)) time, up to O(l1*l2) space + // http://www.csse.monash.edu.au/~lloyd/tildeStrings/Alignment/92.IPL.html + + // prefer the shorter string first, to minimize space; time is O(l1*l2) anyway; + // a swap also transposes the meanings of insertion and deletion. + if l1 > l2 { + str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost + } + d := make([]int, l1+1) + + for y = 1; y <= l1; y++ { + d[y] = y * delCost + } + for x := 0; x < l2; x++ { + dy, d[0] = d[0], d[0]+insCost + for y = 0; y < l1; dy, d[y] = d[y], dy { + if str1[y] != str2[x] { + dy += subCost + } + if c = d[y] + delCost; c < dy { + dy = c + } + y++ + if c = d[y] + insCost; c < dy { + dy = c + } + } + } + dist = d[l1] + } + + return +} + +// Distance returns the Levenshtein distance between str1 and str2, using the +// default or provided cost values. Pass nil for the third argument to use the +// default cost of 1 for all three operations, with no maximum. +func Distance(str1, str2 string, p *Params) int { + if p == nil { + p = defaultParams + } + dist, _, _ := Calculate([]rune(str1), []rune(str2), p.maxCost, p.insCost, p.subCost, p.delCost) + return dist +} + +// Similarity returns a score in the range of 0..1 for how similar the two strings are. +// A score of 1 means the strings are identical, and 0 means they have nothing in common. +// +// A nil third argument uses the default cost of 1 for all three operations. +// +// If a non-zero MinScore value is provided in the parameters, scores lower than it +// will be returned as 0. +func Similarity(str1, str2 string, p *Params) float64 { + return Match(str1, str2, p.Clone().BonusThreshold(1.1)) // guaranteed no bonus +} + +// Match returns a similarity score adjusted by the same method as proposed by Winkler for +// the Jaro distance - giving a bonus to string pairs that share a common prefix, only if their +// similarity score is already over a threshold. +// +// The score is in the range of 0..1, with 1 meaning the strings are identical, +// and 0 meaning they have nothing in common. +// +// A nil third argument uses the default cost of 1 for all three operations, maximum length of +// common prefix to consider for bonus of 4, scaling factor of 0.1, and bonus threshold of 0.7. +// +// If a non-zero MinScore value is provided in the parameters, scores lower than it +// will be returned as 0. +func Match(str1, str2 string, p *Params) float64 { + s1, s2 := []rune(str1), []rune(str2) + l1, l2 := len(s1), len(s2) + // two empty strings are identical; shortcut also avoids divByZero issues later on. + if l1 == 0 && l2 == 0 { + return 1 + } + + if p == nil { + p = defaultParams + } + + // a min over 1 can never be satisfied, so the score is 0. + if p.minScore > 1 { + return 0 + } + + insCost, delCost, maxDist, max := p.insCost, p.delCost, 0, 0 + if l1 > l2 { + l1, l2, insCost, delCost = l2, l1, delCost, insCost + } + + if p.subCost < delCost+insCost { + maxDist = l1*p.subCost + (l2-l1)*insCost + } else { + maxDist = l1*delCost + l2*insCost + } + + // a zero min is always satisfied, so no need to set a max cost. + if p.minScore > 0 { + // if p.minScore is lower than p.bonusThreshold, we can use a simplified formula + // for the max cost, because a sim score below min cannot receive a bonus. + if p.minScore < p.bonusThreshold { + // round down the max - a cost equal to a rounded up max would already be under min. + max = int((1 - p.minScore) * float64(maxDist)) + } else { + // p.minScore <= sim + p.bonusPrefix*p.bonusScale*(1-sim) + // p.minScore <= (1-dist/maxDist) + p.bonusPrefix*p.bonusScale*(1-(1-dist/maxDist)) + // p.minScore <= 1 - dist/maxDist + p.bonusPrefix*p.bonusScale*dist/maxDist + // 1 - p.minScore >= dist/maxDist - p.bonusPrefix*p.bonusScale*dist/maxDist + // (1-p.minScore)*maxDist/(1-p.bonusPrefix*p.bonusScale) >= dist + max = int((1 - p.minScore) * float64(maxDist) / (1 - float64(p.bonusPrefix)*p.bonusScale)) + } + } + + dist, pl, _ := Calculate(s1, s2, max, p.insCost, p.subCost, p.delCost) + if max > 0 && dist > max { + return 0 + } + sim := 1 - float64(dist)/float64(maxDist) + + if sim >= p.bonusThreshold && sim < 1 && p.bonusPrefix > 0 && p.bonusScale > 0 { + if pl > p.bonusPrefix { + pl = p.bonusPrefix + } + sim += float64(pl) * p.bonusScale * (1 - sim) + } + + if sim < p.minScore { + return 0 + } + + return sim +} diff --git a/vendor/github.com/agext/levenshtein/params.go b/vendor/github.com/agext/levenshtein/params.go new file mode 100644 index 0000000..a85727b --- /dev/null +++ b/vendor/github.com/agext/levenshtein/params.go @@ -0,0 +1,152 @@ +// Copyright 2016 ALRUX Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package levenshtein + +// Params represents a set of parameter values for the various formulas involved +// in the calculation of the Levenshtein string metrics. +type Params struct { + insCost int + subCost int + delCost int + maxCost int + minScore float64 + bonusPrefix int + bonusScale float64 + bonusThreshold float64 +} + +var ( + defaultParams = NewParams() +) + +// NewParams creates a new set of parameters and initializes it with the default values. +func NewParams() *Params { + return &Params{ + insCost: 1, + subCost: 1, + delCost: 1, + maxCost: 0, + minScore: 0, + bonusPrefix: 4, + bonusScale: .1, + bonusThreshold: .7, + } +} + +// Clone returns a pointer to a copy of the receiver parameter set, or of a new +// default parameter set if the receiver is nil. +func (p *Params) Clone() *Params { + if p == nil { + return NewParams() + } + return &Params{ + insCost: p.insCost, + subCost: p.subCost, + delCost: p.delCost, + maxCost: p.maxCost, + minScore: p.minScore, + bonusPrefix: p.bonusPrefix, + bonusScale: p.bonusScale, + bonusThreshold: p.bonusThreshold, + } +} + +// InsCost overrides the default value of 1 for the cost of insertion. +// The new value must be zero or positive. +func (p *Params) InsCost(v int) *Params { + if v >= 0 { + p.insCost = v + } + return p +} + +// SubCost overrides the default value of 1 for the cost of substitution. +// The new value must be zero or positive. +func (p *Params) SubCost(v int) *Params { + if v >= 0 { + p.subCost = v + } + return p +} + +// DelCost overrides the default value of 1 for the cost of deletion. +// The new value must be zero or positive. +func (p *Params) DelCost(v int) *Params { + if v >= 0 { + p.delCost = v + } + return p +} + +// MaxCost overrides the default value of 0 (meaning unlimited) for the maximum cost. +// The calculation of Distance() stops when the result is guaranteed to exceed +// this maximum, returning a lower-bound rather than exact value. +// The new value must be zero or positive. +func (p *Params) MaxCost(v int) *Params { + if v >= 0 { + p.maxCost = v + } + return p +} + +// MinScore overrides the default value of 0 for the minimum similarity score. +// Scores below this threshold are returned as 0 by Similarity() and Match(). +// The new value must be zero or positive. Note that a minimum greater than 1 +// can never be satisfied, resulting in a score of 0 for any pair of strings. +func (p *Params) MinScore(v float64) *Params { + if v >= 0 { + p.minScore = v + } + return p +} + +// BonusPrefix overrides the default value for the maximum length of +// common prefix to be considered for bonus by Match(). +// The new value must be zero or positive. +func (p *Params) BonusPrefix(v int) *Params { + if v >= 0 { + p.bonusPrefix = v + } + return p +} + +// BonusScale overrides the default value for the scaling factor used by Match() +// in calculating the bonus. +// The new value must be zero or positive. To guarantee that the similarity score +// remains in the interval 0..1, this scaling factor is not allowed to exceed +// 1 / BonusPrefix. +func (p *Params) BonusScale(v float64) *Params { + if v >= 0 { + p.bonusScale = v + } + + // the bonus cannot exceed (1-sim), or the score may become greater than 1. + if float64(p.bonusPrefix)*p.bonusScale > 1 { + p.bonusScale = 1 / float64(p.bonusPrefix) + } + + return p +} + +// BonusThreshold overrides the default value for the minimum similarity score +// for which Match() can assign a bonus. +// The new value must be zero or positive. Note that a threshold greater than 1 +// effectively makes Match() become the equivalent of Similarity(). +func (p *Params) BonusThreshold(v float64) *Params { + if v >= 0 { + p.bonusThreshold = v + } + return p +} -- cgit v1.2.3