aboutsummaryrefslogtreecommitdiffhomepage
path: root/vendor/github.com/hashicorp/hcl2/hclwrite/format.go
blob: ded7fb4269aeeed8fb38ab764cd0ba7803e174a6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
package hclwrite

import (
	"github.com/hashicorp/hcl2/hcl/hclsyntax"
)

var inKeyword = hclsyntax.Keyword([]byte{'i', 'n'})

// placeholder token used when we don't have a token but we don't want
// to pass a real "nil" and complicate things with nil pointer checks
var nilToken = &Token{
	Type:         hclsyntax.TokenNil,
	Bytes:        []byte{},
	SpacesBefore: 0,
}

// format rewrites tokens within the given sequence, in-place, to adjust the
// whitespace around their content to achieve canonical formatting.
func format(tokens Tokens) {
	// Formatting is a multi-pass process. More details on the passes below,
	// but this is the overview:
	// - adjust the leading space on each line to create appropriate
	//   indentation
	// - adjust spaces between tokens in a single cell using a set of rules
	// - adjust the leading space in the "assign" and "comment" cells on each
	//   line to vertically align with neighboring lines.
	// All of these steps operate in-place on the given tokens, so a caller
	// may collect a flat sequence of all of the tokens underlying an AST
	// and pass it here and we will then indirectly modify the AST itself.
	// Formatting must change only whitespace. Specifically, that means
	// changing the SpacesBefore attribute on a token while leaving the
	// other token attributes unchanged.

	lines := linesForFormat(tokens)
	formatIndent(lines)
	formatSpaces(lines)
	formatCells(lines)
}

func formatIndent(lines []formatLine) {
	// Our methodology for indents is to take the input one line at a time
	// and count the bracketing delimiters on each line. If a line has a net
	// increase in open brackets, we increase the indent level by one and
	// remember how many new openers we had. If the line has a net _decrease_,
	// we'll compare it to the most recent number of openers and decrease the
	// dedent level by one each time we pass an indent level remembered
	// earlier.
	// The "indent stack" used here allows for us to recognize degenerate
	// input where brackets are not symmetrical within lines and avoid
	// pushing things too far left or right, creating confusion.

	// We'll start our indent stack at a reasonable capacity to minimize the
	// chance of us needing to grow it; 10 here means 10 levels of indent,
	// which should be more than enough for reasonable HCL uses.
	indents := make([]int, 0, 10)

	for i := range lines {
		line := &lines[i]
		if len(line.lead) == 0 {
			continue
		}

		if line.lead[0].Type == hclsyntax.TokenNewline {
			// Never place spaces before a newline
			line.lead[0].SpacesBefore = 0
			continue
		}

		netBrackets := 0
		for _, token := range line.lead {
			netBrackets += tokenBracketChange(token)
			if token.Type == hclsyntax.TokenOHeredoc {
				break
			}
		}

		for _, token := range line.assign {
			netBrackets += tokenBracketChange(token)
		}

		switch {
		case netBrackets > 0:
			line.lead[0].SpacesBefore = 2 * len(indents)
			indents = append(indents, netBrackets)
		case netBrackets < 0:
			closed := -netBrackets
			for closed > 0 && len(indents) > 0 {
				switch {

				case closed > indents[len(indents)-1]:
					closed -= indents[len(indents)-1]
					indents = indents[:len(indents)-1]

				case closed < indents[len(indents)-1]:
					indents[len(indents)-1] -= closed
					closed = 0

				default:
					indents = indents[:len(indents)-1]
					closed = 0
				}
			}
			line.lead[0].SpacesBefore = 2 * len(indents)
		default:
			line.lead[0].SpacesBefore = 2 * len(indents)
		}
	}
}

func formatSpaces(lines []formatLine) {
	for _, line := range lines {
		for i, token := range line.lead {
			var before, after *Token
			if i > 0 {
				before = line.lead[i-1]
			} else {
				before = nilToken
			}
			if i < (len(line.lead) - 1) {
				after = line.lead[i+1]
			} else {
				after = nilToken
			}
			if spaceAfterToken(token, before, after) {
				after.SpacesBefore = 1
			} else {
				after.SpacesBefore = 0
			}
		}
		for i, token := range line.assign {
			if i == 0 {
				// first token in "assign" always has one space before to
				// separate the equals sign from what it's assigning.
				token.SpacesBefore = 1
			}

			var before, after *Token
			if i > 0 {
				before = line.assign[i-1]
			} else {
				before = nilToken
			}
			if i < (len(line.assign) - 1) {
				after = line.assign[i+1]
			} else {
				after = nilToken
			}
			if spaceAfterToken(token, before, after) {
				after.SpacesBefore = 1
			} else {
				after.SpacesBefore = 0
			}
		}

	}
}

func formatCells(lines []formatLine) {

	chainStart := -1
	maxColumns := 0

	// We'll deal with the "assign" cell first, since moving that will
	// also impact the "comment" cell.
	closeAssignChain := func(i int) {
		for _, chainLine := range lines[chainStart:i] {
			columns := chainLine.lead.Columns()
			spaces := (maxColumns - columns) + 1
			chainLine.assign[0].SpacesBefore = spaces
		}
		chainStart = -1
		maxColumns = 0
	}
	for i, line := range lines {
		if line.assign == nil {
			if chainStart != -1 {
				closeAssignChain(i)
			}
		} else {
			if chainStart == -1 {
				chainStart = i
			}
			columns := line.lead.Columns()
			if columns > maxColumns {
				maxColumns = columns
			}
		}
	}
	if chainStart != -1 {
		closeAssignChain(len(lines))
	}

	// Now we'll deal with the comments
	closeCommentChain := func(i int) {
		for _, chainLine := range lines[chainStart:i] {
			columns := chainLine.lead.Columns() + chainLine.assign.Columns()
			spaces := (maxColumns - columns) + 1
			chainLine.comment[0].SpacesBefore = spaces
		}
		chainStart = -1
		maxColumns = 0
	}
	for i, line := range lines {
		if line.comment == nil {
			if chainStart != -1 {
				closeCommentChain(i)
			}
		} else {
			if chainStart == -1 {
				chainStart = i
			}
			columns := line.lead.Columns() + line.assign.Columns()
			if columns > maxColumns {
				maxColumns = columns
			}
		}
	}
	if chainStart != -1 {
		closeCommentChain(len(lines))
	}

}

// spaceAfterToken decides whether a particular subject token should have a
// space after it when surrounded by the given before and after tokens.
// "before" can be TokenNil, if the subject token is at the start of a sequence.
func spaceAfterToken(subject, before, after *Token) bool {
	switch {

	case after.Type == hclsyntax.TokenNewline || after.Type == hclsyntax.TokenNil:
		// Never add spaces before a newline
		return false

	case subject.Type == hclsyntax.TokenIdent && after.Type == hclsyntax.TokenOParen:
		// Don't split a function name from open paren in a call
		return false

	case subject.Type == hclsyntax.TokenDot || after.Type == hclsyntax.TokenDot:
		// Don't use spaces around attribute access dots
		return false

	case after.Type == hclsyntax.TokenComma || after.Type == hclsyntax.TokenEllipsis:
		// No space right before a comma or ... in an argument list
		return false

	case subject.Type == hclsyntax.TokenComma:
		// Always a space after a comma
		return true

	case subject.Type == hclsyntax.TokenQuotedLit || subject.Type == hclsyntax.TokenStringLit || subject.Type == hclsyntax.TokenOQuote || subject.Type == hclsyntax.TokenOHeredoc || after.Type == hclsyntax.TokenQuotedLit || after.Type == hclsyntax.TokenStringLit || after.Type == hclsyntax.TokenCQuote || after.Type == hclsyntax.TokenCHeredoc:
		// No extra spaces within templates
		return false

	case inKeyword.TokenMatches(subject.asHCLSyntax()) && before.Type == hclsyntax.TokenIdent:
		// This is a special case for inside for expressions where a user
		// might want to use a literal tuple constructor:
		// [for x in [foo]: x]
		// ... in that case, we would normally produce in[foo] thinking that
		// in is a reference, but we'll recognize it as a keyword here instead
		// to make the result less confusing.
		return true

	case after.Type == hclsyntax.TokenOBrack && (subject.Type == hclsyntax.TokenIdent || subject.Type == hclsyntax.TokenNumberLit || tokenBracketChange(subject) < 0):
		return false

	case subject.Type == hclsyntax.TokenMinus:
		// Since a minus can either be subtraction or negation, and the latter
		// should _not_ have a space after it, we need to use some heuristics
		// to decide which case this is.
		// We guess that we have a negation if the token before doesn't look
		// like it could be the end of an expression.

		switch before.Type {

		case hclsyntax.TokenNil:
			// Minus at the start of input must be a negation
			return false

		case hclsyntax.TokenOParen, hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenEqual, hclsyntax.TokenColon, hclsyntax.TokenComma, hclsyntax.TokenQuestion:
			// Minus immediately after an opening bracket or separator must be a negation.
			return false

		case hclsyntax.TokenPlus, hclsyntax.TokenStar, hclsyntax.TokenSlash, hclsyntax.TokenPercent, hclsyntax.TokenMinus:
			// Minus immediately after another arithmetic operator must be negation.
			return false

		case hclsyntax.TokenEqualOp, hclsyntax.TokenNotEqual, hclsyntax.TokenGreaterThan, hclsyntax.TokenGreaterThanEq, hclsyntax.TokenLessThan, hclsyntax.TokenLessThanEq:
			// Minus immediately after another comparison operator must be negation.
			return false

		case hclsyntax.TokenAnd, hclsyntax.TokenOr, hclsyntax.TokenBang:
			// Minus immediately after logical operator doesn't make sense but probably intended as negation.
			return false

		default:
			return true
		}

	case subject.Type == hclsyntax.TokenOBrace || after.Type == hclsyntax.TokenCBrace:
		// Unlike other bracket types, braces have spaces on both sides of them,
		// both in single-line nested blocks foo { bar = baz } and in object
		// constructor expressions foo = { bar = baz }.
		if subject.Type == hclsyntax.TokenOBrace && after.Type == hclsyntax.TokenCBrace {
			// An open brace followed by a close brace is an exception, however.
			// e.g. foo {} rather than foo { }
			return false
		}
		return true

	// In the unlikely event that an interpolation expression is just
	// a single object constructor, we'll put a space between the ${ and
	// the following { to make this more obvious, and then the same
	// thing for the two braces at the end.
	case (subject.Type == hclsyntax.TokenTemplateInterp || subject.Type == hclsyntax.TokenTemplateControl) && after.Type == hclsyntax.TokenOBrace:
		return true
	case subject.Type == hclsyntax.TokenCBrace && after.Type == hclsyntax.TokenTemplateSeqEnd:
		return true

	// Don't add spaces between interpolated items
	case subject.Type == hclsyntax.TokenTemplateSeqEnd && after.Type == hclsyntax.TokenTemplateInterp:
		return false

	case tokenBracketChange(subject) > 0:
		// No spaces after open brackets
		return false

	case tokenBracketChange(after) < 0:
		// No spaces before close brackets
		return false

	default:
		// Most tokens are space-separated
		return true

	}
}

func linesForFormat(tokens Tokens) []formatLine {
	if len(tokens) == 0 {
		return make([]formatLine, 0)
	}

	// first we'll count our lines, so we can allocate the array for them in
	// a single block. (We want to minimize memory pressure in this codepath,
	// so it can be run somewhat-frequently by editor integrations.)
	lineCount := 1 // if there are zero newlines then there is one line
	for _, tok := range tokens {
		if tokenIsNewline(tok) {
			lineCount++
		}
	}

	// To start, we'll just put everything in the "lead" cell on each line,
	// and then do another pass over the lines afterwards to adjust.
	lines := make([]formatLine, lineCount)
	li := 0
	lineStart := 0
	for i, tok := range tokens {
		if tok.Type == hclsyntax.TokenEOF {
			// The EOF token doesn't belong to any line, and terminates the
			// token sequence.
			lines[li].lead = tokens[lineStart:i]
			break
		}

		if tokenIsNewline(tok) {
			lines[li].lead = tokens[lineStart : i+1]
			lineStart = i + 1
			li++
		}
	}

	// If a set of tokens doesn't end in TokenEOF (e.g. because it's a
	// fragment of tokens from the middle of a file) then we might fall
	// out here with a line still pending.
	if lineStart < len(tokens) {
		lines[li].lead = tokens[lineStart:]
		if lines[li].lead[len(lines[li].lead)-1].Type == hclsyntax.TokenEOF {
			lines[li].lead = lines[li].lead[:len(lines[li].lead)-1]
		}
	}

	// Now we'll pick off any trailing comments and attribute assignments
	// to shuffle off into the "comment" and "assign" cells.
	for i := range lines {
		line := &lines[i]

		if len(line.lead) == 0 {
			// if the line is empty then there's nothing for us to do
			// (this should happen only for the final line, because all other
			// lines would have a newline token of some kind)
			continue
		}

		if len(line.lead) > 1 && line.lead[len(line.lead)-1].Type == hclsyntax.TokenComment {
			line.comment = line.lead[len(line.lead)-1:]
			line.lead = line.lead[:len(line.lead)-1]
		}

		for i, tok := range line.lead {
			if i > 0 && tok.Type == hclsyntax.TokenEqual {
				// We only move the tokens into "assign" if the RHS seems to
				// be a whole expression, which we determine by counting
				// brackets. If there's a net positive number of brackets
				// then that suggests we're introducing a multi-line expression.
				netBrackets := 0
				for _, token := range line.lead[i:] {
					netBrackets += tokenBracketChange(token)
				}

				if netBrackets == 0 {
					line.assign = line.lead[i:]
					line.lead = line.lead[:i]
				}
				break
			}
		}
	}

	return lines
}

func tokenIsNewline(tok *Token) bool {
	if tok.Type == hclsyntax.TokenNewline {
		return true
	} else if tok.Type == hclsyntax.TokenComment {
		// Single line tokens (# and //) consume their terminating newline,
		// so we need to treat them as newline tokens as well.
		if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
			return true
		}
	}
	return false
}

func tokenBracketChange(tok *Token) int {
	switch tok.Type {
	case hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenOParen, hclsyntax.TokenTemplateControl, hclsyntax.TokenTemplateInterp:
		return 1
	case hclsyntax.TokenCBrace, hclsyntax.TokenCBrack, hclsyntax.TokenCParen, hclsyntax.TokenTemplateSeqEnd:
		return -1
	default:
		return 0
	}
}

// formatLine represents a single line of source code for formatting purposes,
// splitting its tokens into up to three "cells":
//
// lead: always present, representing everything up to one of the others
// assign: if line contains an attribute assignment, represents the tokens
//    starting at (and including) the equals symbol
// comment: if line contains any non-comment tokens and ends with a
//    single-line comment token, represents the comment.
//
// When formatting, the leading spaces of the first tokens in each of these
// cells is adjusted to align vertically their occurences on consecutive
// rows.
type formatLine struct {
	lead    Tokens
	assign  Tokens
	comment Tokens
}