]>
Commit | Line | Data |
---|---|---|
107c1cdb ND |
1 | package stdlib |
2 | ||
3 | import ( | |
4 | "fmt" | |
5 | "regexp" | |
6 | resyntax "regexp/syntax" | |
7 | ||
8 | "github.com/zclconf/go-cty/cty" | |
9 | "github.com/zclconf/go-cty/cty/function" | |
10 | ) | |
11 | ||
12 | var RegexFunc = function.New(&function.Spec{ | |
13 | Params: []function.Parameter{ | |
14 | { | |
15 | Name: "pattern", | |
16 | Type: cty.String, | |
17 | }, | |
18 | { | |
19 | Name: "string", | |
20 | Type: cty.String, | |
21 | }, | |
22 | }, | |
23 | Type: func(args []cty.Value) (cty.Type, error) { | |
24 | if !args[0].IsKnown() { | |
25 | // We can't predict our type without seeing our pattern | |
26 | return cty.DynamicPseudoType, nil | |
27 | } | |
28 | ||
29 | retTy, err := regexPatternResultType(args[0].AsString()) | |
30 | if err != nil { | |
31 | err = function.NewArgError(0, err) | |
32 | } | |
33 | return retTy, err | |
34 | }, | |
35 | Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) { | |
36 | if retType == cty.DynamicPseudoType { | |
37 | return cty.DynamicVal, nil | |
38 | } | |
39 | ||
40 | re, err := regexp.Compile(args[0].AsString()) | |
41 | if err != nil { | |
42 | // Should never happen, since we checked this in the Type function above. | |
43 | return cty.NilVal, function.NewArgErrorf(0, "error parsing pattern: %s", err) | |
44 | } | |
45 | str := args[1].AsString() | |
46 | ||
47 | captureIdxs := re.FindStringSubmatchIndex(str) | |
48 | if captureIdxs == nil { | |
49 | return cty.NilVal, fmt.Errorf("pattern did not match any part of the given string") | |
50 | } | |
51 | ||
52 | return regexPatternResult(re, str, captureIdxs, retType), nil | |
53 | }, | |
54 | }) | |
55 | ||
56 | var RegexAllFunc = function.New(&function.Spec{ | |
57 | Params: []function.Parameter{ | |
58 | { | |
59 | Name: "pattern", | |
60 | Type: cty.String, | |
61 | }, | |
62 | { | |
63 | Name: "string", | |
64 | Type: cty.String, | |
65 | }, | |
66 | }, | |
67 | Type: func(args []cty.Value) (cty.Type, error) { | |
68 | if !args[0].IsKnown() { | |
69 | // We can't predict our type without seeing our pattern, | |
70 | // but we do know it'll always be a list of something. | |
71 | return cty.List(cty.DynamicPseudoType), nil | |
72 | } | |
73 | ||
74 | retTy, err := regexPatternResultType(args[0].AsString()) | |
75 | if err != nil { | |
76 | err = function.NewArgError(0, err) | |
77 | } | |
78 | return cty.List(retTy), err | |
79 | }, | |
80 | Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) { | |
81 | ety := retType.ElementType() | |
82 | if ety == cty.DynamicPseudoType { | |
83 | return cty.DynamicVal, nil | |
84 | } | |
85 | ||
86 | re, err := regexp.Compile(args[0].AsString()) | |
87 | if err != nil { | |
88 | // Should never happen, since we checked this in the Type function above. | |
89 | return cty.NilVal, function.NewArgErrorf(0, "error parsing pattern: %s", err) | |
90 | } | |
91 | str := args[1].AsString() | |
92 | ||
93 | captureIdxsEach := re.FindAllStringSubmatchIndex(str, -1) | |
94 | if len(captureIdxsEach) == 0 { | |
95 | return cty.ListValEmpty(ety), nil | |
96 | } | |
97 | ||
98 | elems := make([]cty.Value, len(captureIdxsEach)) | |
99 | for i, captureIdxs := range captureIdxsEach { | |
100 | elems[i] = regexPatternResult(re, str, captureIdxs, ety) | |
101 | } | |
102 | return cty.ListVal(elems), nil | |
103 | }, | |
104 | }) | |
105 | ||
106 | // Regex is a function that extracts one or more substrings from a given | |
107 | // string by applying a regular expression pattern, describing the first | |
108 | // match. | |
109 | // | |
110 | // The return type depends on the composition of the capture groups (if any) | |
111 | // in the pattern: | |
112 | // | |
113 | // - If there are no capture groups at all, the result is a single string | |
114 | // representing the entire matched pattern. | |
115 | // - If all of the capture groups are named, the result is an object whose | |
116 | // keys are the named groups and whose values are their sub-matches, or | |
117 | // null if a particular sub-group was inside another group that didn't | |
118 | // match. | |
119 | // - If none of the capture groups are named, the result is a tuple whose | |
120 | // elements are the sub-groups in order and whose values are their | |
121 | // sub-matches, or null if a particular sub-group was inside another group | |
122 | // that didn't match. | |
123 | // - It is invalid to use both named and un-named capture groups together in | |
124 | // the same pattern. | |
125 | // | |
126 | // If the pattern doesn't match, this function returns an error. To test for | |
127 | // a match, call RegexAll and check if the length of the result is greater | |
128 | // than zero. | |
129 | func Regex(pattern, str cty.Value) (cty.Value, error) { | |
130 | return RegexFunc.Call([]cty.Value{pattern, str}) | |
131 | } | |
132 | ||
133 | // RegexAll is similar to Regex but it finds all of the non-overlapping matches | |
134 | // in the given string and returns a list of them. | |
135 | // | |
136 | // The result type is always a list, whose element type is deduced from the | |
137 | // pattern in the same way as the return type for Regex is decided. | |
138 | // | |
139 | // If the pattern doesn't match at all, this function returns an empty list. | |
140 | func RegexAll(pattern, str cty.Value) (cty.Value, error) { | |
141 | return RegexAllFunc.Call([]cty.Value{pattern, str}) | |
142 | } | |
143 | ||
144 | // regexPatternResultType parses the given regular expression pattern and | |
145 | // returns the structural type that would be returned to represent its | |
146 | // capture groups. | |
147 | // | |
148 | // Returns an error if parsing fails or if the pattern uses a mixture of | |
149 | // named and unnamed capture groups, which is not permitted. | |
150 | func regexPatternResultType(pattern string) (cty.Type, error) { | |
151 | re, rawErr := regexp.Compile(pattern) | |
152 | switch err := rawErr.(type) { | |
153 | case *resyntax.Error: | |
154 | return cty.NilType, fmt.Errorf("invalid regexp pattern: %s in %s", err.Code, err.Expr) | |
155 | case error: | |
156 | // Should never happen, since all regexp compile errors should | |
157 | // be resyntax.Error, but just in case... | |
158 | return cty.NilType, fmt.Errorf("error parsing pattern: %s", err) | |
159 | } | |
160 | ||
161 | allNames := re.SubexpNames()[1:] | |
162 | var names []string | |
163 | unnamed := 0 | |
164 | for _, name := range allNames { | |
165 | if name == "" { | |
166 | unnamed++ | |
167 | } else { | |
168 | if names == nil { | |
169 | names = make([]string, 0, len(allNames)) | |
170 | } | |
171 | names = append(names, name) | |
172 | } | |
173 | } | |
174 | switch { | |
175 | case unnamed == 0 && len(names) == 0: | |
176 | // If there are no capture groups at all then we'll return just a | |
177 | // single string for the whole match. | |
178 | return cty.String, nil | |
179 | case unnamed > 0 && len(names) > 0: | |
180 | return cty.NilType, fmt.Errorf("invalid regexp pattern: cannot mix both named and unnamed capture groups") | |
181 | case unnamed > 0: | |
182 | // For unnamed captures, we return a tuple of them all in order. | |
183 | etys := make([]cty.Type, unnamed) | |
184 | for i := range etys { | |
185 | etys[i] = cty.String | |
186 | } | |
187 | return cty.Tuple(etys), nil | |
188 | default: | |
189 | // For named captures, we return an object using the capture names | |
190 | // as keys. | |
191 | atys := make(map[string]cty.Type, len(names)) | |
192 | for _, name := range names { | |
193 | atys[name] = cty.String | |
194 | } | |
195 | return cty.Object(atys), nil | |
196 | } | |
197 | } | |
198 | ||
199 | func regexPatternResult(re *regexp.Regexp, str string, captureIdxs []int, retType cty.Type) cty.Value { | |
200 | switch { | |
201 | case retType == cty.String: | |
202 | start, end := captureIdxs[0], captureIdxs[1] | |
203 | return cty.StringVal(str[start:end]) | |
204 | case retType.IsTupleType(): | |
205 | captureIdxs = captureIdxs[2:] // index 0 is the whole pattern span, which we ignore by skipping one pair | |
206 | vals := make([]cty.Value, len(captureIdxs)/2) | |
207 | for i := range vals { | |
208 | start, end := captureIdxs[i*2], captureIdxs[i*2+1] | |
209 | if start < 0 || end < 0 { | |
210 | vals[i] = cty.NullVal(cty.String) // Did not match anything because containing group didn't match | |
211 | continue | |
212 | } | |
213 | vals[i] = cty.StringVal(str[start:end]) | |
214 | } | |
215 | return cty.TupleVal(vals) | |
216 | case retType.IsObjectType(): | |
217 | captureIdxs = captureIdxs[2:] // index 0 is the whole pattern span, which we ignore by skipping one pair | |
218 | vals := make(map[string]cty.Value, len(captureIdxs)/2) | |
219 | names := re.SubexpNames()[1:] | |
220 | for i, name := range names { | |
221 | start, end := captureIdxs[i*2], captureIdxs[i*2+1] | |
222 | if start < 0 || end < 0 { | |
223 | vals[name] = cty.NullVal(cty.String) // Did not match anything because containing group didn't match | |
224 | continue | |
225 | } | |
226 | vals[name] = cty.StringVal(str[start:end]) | |
227 | } | |
228 | return cty.ObjectVal(vals) | |
229 | default: | |
230 | // Should never happen | |
231 | panic(fmt.Sprintf("invalid return type %#v", retType)) | |
232 | } | |
233 | } |