]>
Commit | Line | Data |
---|---|---|
1 | // Copyright 2015 The Go Authors. All rights reserved. | |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | package trace | |
6 | ||
7 | // This file implements histogramming for RPC statistics collection. | |
8 | ||
9 | import ( | |
10 | "bytes" | |
11 | "fmt" | |
12 | "html/template" | |
13 | "log" | |
14 | "math" | |
15 | "sync" | |
16 | ||
17 | "golang.org/x/net/internal/timeseries" | |
18 | ) | |
19 | ||
20 | const ( | |
21 | bucketCount = 38 | |
22 | ) | |
23 | ||
24 | // histogram keeps counts of values in buckets that are spaced | |
25 | // out in powers of 2: 0-1, 2-3, 4-7... | |
26 | // histogram implements timeseries.Observable | |
27 | type histogram struct { | |
28 | sum int64 // running total of measurements | |
29 | sumOfSquares float64 // square of running total | |
30 | buckets []int64 // bucketed values for histogram | |
31 | value int // holds a single value as an optimization | |
32 | valueCount int64 // number of values recorded for single value | |
33 | } | |
34 | ||
35 | // AddMeasurement records a value measurement observation to the histogram. | |
36 | func (h *histogram) addMeasurement(value int64) { | |
37 | // TODO: assert invariant | |
38 | h.sum += value | |
39 | h.sumOfSquares += float64(value) * float64(value) | |
40 | ||
41 | bucketIndex := getBucket(value) | |
42 | ||
43 | if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) { | |
44 | h.value = bucketIndex | |
45 | h.valueCount++ | |
46 | } else { | |
47 | h.allocateBuckets() | |
48 | h.buckets[bucketIndex]++ | |
49 | } | |
50 | } | |
51 | ||
52 | func (h *histogram) allocateBuckets() { | |
53 | if h.buckets == nil { | |
54 | h.buckets = make([]int64, bucketCount) | |
55 | h.buckets[h.value] = h.valueCount | |
56 | h.value = 0 | |
57 | h.valueCount = -1 | |
58 | } | |
59 | } | |
60 | ||
61 | func log2(i int64) int { | |
62 | n := 0 | |
63 | for ; i >= 0x100; i >>= 8 { | |
64 | n += 8 | |
65 | } | |
66 | for ; i > 0; i >>= 1 { | |
67 | n += 1 | |
68 | } | |
69 | return n | |
70 | } | |
71 | ||
72 | func getBucket(i int64) (index int) { | |
73 | index = log2(i) - 1 | |
74 | if index < 0 { | |
75 | index = 0 | |
76 | } | |
77 | if index >= bucketCount { | |
78 | index = bucketCount - 1 | |
79 | } | |
80 | return | |
81 | } | |
82 | ||
83 | // Total returns the number of recorded observations. | |
84 | func (h *histogram) total() (total int64) { | |
85 | if h.valueCount >= 0 { | |
86 | total = h.valueCount | |
87 | } | |
88 | for _, val := range h.buckets { | |
89 | total += int64(val) | |
90 | } | |
91 | return | |
92 | } | |
93 | ||
94 | // Average returns the average value of recorded observations. | |
95 | func (h *histogram) average() float64 { | |
96 | t := h.total() | |
97 | if t == 0 { | |
98 | return 0 | |
99 | } | |
100 | return float64(h.sum) / float64(t) | |
101 | } | |
102 | ||
103 | // Variance returns the variance of recorded observations. | |
104 | func (h *histogram) variance() float64 { | |
105 | t := float64(h.total()) | |
106 | if t == 0 { | |
107 | return 0 | |
108 | } | |
109 | s := float64(h.sum) / t | |
110 | return h.sumOfSquares/t - s*s | |
111 | } | |
112 | ||
113 | // StandardDeviation returns the standard deviation of recorded observations. | |
114 | func (h *histogram) standardDeviation() float64 { | |
115 | return math.Sqrt(h.variance()) | |
116 | } | |
117 | ||
118 | // PercentileBoundary estimates the value that the given fraction of recorded | |
119 | // observations are less than. | |
120 | func (h *histogram) percentileBoundary(percentile float64) int64 { | |
121 | total := h.total() | |
122 | ||
123 | // Corner cases (make sure result is strictly less than Total()) | |
124 | if total == 0 { | |
125 | return 0 | |
126 | } else if total == 1 { | |
127 | return int64(h.average()) | |
128 | } | |
129 | ||
130 | percentOfTotal := round(float64(total) * percentile) | |
131 | var runningTotal int64 | |
132 | ||
133 | for i := range h.buckets { | |
134 | value := h.buckets[i] | |
135 | runningTotal += value | |
136 | if runningTotal == percentOfTotal { | |
137 | // We hit an exact bucket boundary. If the next bucket has data, it is a | |
138 | // good estimate of the value. If the bucket is empty, we interpolate the | |
139 | // midpoint between the next bucket's boundary and the next non-zero | |
140 | // bucket. If the remaining buckets are all empty, then we use the | |
141 | // boundary for the next bucket as the estimate. | |
142 | j := uint8(i + 1) | |
143 | min := bucketBoundary(j) | |
144 | if runningTotal < total { | |
145 | for h.buckets[j] == 0 { | |
146 | j++ | |
147 | } | |
148 | } | |
149 | max := bucketBoundary(j) | |
150 | return min + round(float64(max-min)/2) | |
151 | } else if runningTotal > percentOfTotal { | |
152 | // The value is in this bucket. Interpolate the value. | |
153 | delta := runningTotal - percentOfTotal | |
154 | percentBucket := float64(value-delta) / float64(value) | |
155 | bucketMin := bucketBoundary(uint8(i)) | |
156 | nextBucketMin := bucketBoundary(uint8(i + 1)) | |
157 | bucketSize := nextBucketMin - bucketMin | |
158 | return bucketMin + round(percentBucket*float64(bucketSize)) | |
159 | } | |
160 | } | |
161 | return bucketBoundary(bucketCount - 1) | |
162 | } | |
163 | ||
164 | // Median returns the estimated median of the observed values. | |
165 | func (h *histogram) median() int64 { | |
166 | return h.percentileBoundary(0.5) | |
167 | } | |
168 | ||
169 | // Add adds other to h. | |
170 | func (h *histogram) Add(other timeseries.Observable) { | |
171 | o := other.(*histogram) | |
172 | if o.valueCount == 0 { | |
173 | // Other histogram is empty | |
174 | } else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value { | |
175 | // Both have a single bucketed value, aggregate them | |
176 | h.valueCount += o.valueCount | |
177 | } else { | |
178 | // Two different values necessitate buckets in this histogram | |
179 | h.allocateBuckets() | |
180 | if o.valueCount >= 0 { | |
181 | h.buckets[o.value] += o.valueCount | |
182 | } else { | |
183 | for i := range h.buckets { | |
184 | h.buckets[i] += o.buckets[i] | |
185 | } | |
186 | } | |
187 | } | |
188 | h.sumOfSquares += o.sumOfSquares | |
189 | h.sum += o.sum | |
190 | } | |
191 | ||
192 | // Clear resets the histogram to an empty state, removing all observed values. | |
193 | func (h *histogram) Clear() { | |
194 | h.buckets = nil | |
195 | h.value = 0 | |
196 | h.valueCount = 0 | |
197 | h.sum = 0 | |
198 | h.sumOfSquares = 0 | |
199 | } | |
200 | ||
201 | // CopyFrom copies from other, which must be a *histogram, into h. | |
202 | func (h *histogram) CopyFrom(other timeseries.Observable) { | |
203 | o := other.(*histogram) | |
204 | if o.valueCount == -1 { | |
205 | h.allocateBuckets() | |
206 | copy(h.buckets, o.buckets) | |
207 | } | |
208 | h.sum = o.sum | |
209 | h.sumOfSquares = o.sumOfSquares | |
210 | h.value = o.value | |
211 | h.valueCount = o.valueCount | |
212 | } | |
213 | ||
214 | // Multiply scales the histogram by the specified ratio. | |
215 | func (h *histogram) Multiply(ratio float64) { | |
216 | if h.valueCount == -1 { | |
217 | for i := range h.buckets { | |
218 | h.buckets[i] = int64(float64(h.buckets[i]) * ratio) | |
219 | } | |
220 | } else { | |
221 | h.valueCount = int64(float64(h.valueCount) * ratio) | |
222 | } | |
223 | h.sum = int64(float64(h.sum) * ratio) | |
224 | h.sumOfSquares = h.sumOfSquares * ratio | |
225 | } | |
226 | ||
227 | // New creates a new histogram. | |
228 | func (h *histogram) New() timeseries.Observable { | |
229 | r := new(histogram) | |
230 | r.Clear() | |
231 | return r | |
232 | } | |
233 | ||
234 | func (h *histogram) String() string { | |
235 | return fmt.Sprintf("%d, %f, %d, %d, %v", | |
236 | h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets) | |
237 | } | |
238 | ||
239 | // round returns the closest int64 to the argument | |
240 | func round(in float64) int64 { | |
241 | return int64(math.Floor(in + 0.5)) | |
242 | } | |
243 | ||
244 | // bucketBoundary returns the first value in the bucket. | |
245 | func bucketBoundary(bucket uint8) int64 { | |
246 | if bucket == 0 { | |
247 | return 0 | |
248 | } | |
249 | return 1 << bucket | |
250 | } | |
251 | ||
252 | // bucketData holds data about a specific bucket for use in distTmpl. | |
253 | type bucketData struct { | |
254 | Lower, Upper int64 | |
255 | N int64 | |
256 | Pct, CumulativePct float64 | |
257 | GraphWidth int | |
258 | } | |
259 | ||
260 | // data holds data about a Distribution for use in distTmpl. | |
261 | type data struct { | |
262 | Buckets []*bucketData | |
263 | Count, Median int64 | |
264 | Mean, StandardDeviation float64 | |
265 | } | |
266 | ||
267 | // maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets. | |
268 | const maxHTMLBarWidth = 350.0 | |
269 | ||
270 | // newData returns data representing h for use in distTmpl. | |
271 | func (h *histogram) newData() *data { | |
272 | // Force the allocation of buckets to simplify the rendering implementation | |
273 | h.allocateBuckets() | |
274 | // We scale the bars on the right so that the largest bar is | |
275 | // maxHTMLBarWidth pixels in width. | |
276 | maxBucket := int64(0) | |
277 | for _, n := range h.buckets { | |
278 | if n > maxBucket { | |
279 | maxBucket = n | |
280 | } | |
281 | } | |
282 | total := h.total() | |
283 | barsizeMult := maxHTMLBarWidth / float64(maxBucket) | |
284 | var pctMult float64 | |
285 | if total == 0 { | |
286 | pctMult = 1.0 | |
287 | } else { | |
288 | pctMult = 100.0 / float64(total) | |
289 | } | |
290 | ||
291 | buckets := make([]*bucketData, len(h.buckets)) | |
292 | runningTotal := int64(0) | |
293 | for i, n := range h.buckets { | |
294 | if n == 0 { | |
295 | continue | |
296 | } | |
297 | runningTotal += n | |
298 | var upperBound int64 | |
299 | if i < bucketCount-1 { | |
300 | upperBound = bucketBoundary(uint8(i + 1)) | |
301 | } else { | |
302 | upperBound = math.MaxInt64 | |
303 | } | |
304 | buckets[i] = &bucketData{ | |
305 | Lower: bucketBoundary(uint8(i)), | |
306 | Upper: upperBound, | |
307 | N: n, | |
308 | Pct: float64(n) * pctMult, | |
309 | CumulativePct: float64(runningTotal) * pctMult, | |
310 | GraphWidth: int(float64(n) * barsizeMult), | |
311 | } | |
312 | } | |
313 | return &data{ | |
314 | Buckets: buckets, | |
315 | Count: total, | |
316 | Median: h.median(), | |
317 | Mean: h.average(), | |
318 | StandardDeviation: h.standardDeviation(), | |
319 | } | |
320 | } | |
321 | ||
322 | func (h *histogram) html() template.HTML { | |
323 | buf := new(bytes.Buffer) | |
324 | if err := distTmpl().Execute(buf, h.newData()); err != nil { | |
325 | buf.Reset() | |
326 | log.Printf("net/trace: couldn't execute template: %v", err) | |
327 | } | |
328 | return template.HTML(buf.String()) | |
329 | } | |
330 | ||
331 | var distTmplCache *template.Template | |
332 | var distTmplOnce sync.Once | |
333 | ||
334 | func distTmpl() *template.Template { | |
335 | distTmplOnce.Do(func() { | |
336 | // Input: data | |
337 | distTmplCache = template.Must(template.New("distTmpl").Parse(` | |
338 | <table> | |
339 | <tr> | |
340 | <td style="padding:0.25em">Count: {{.Count}}</td> | |
341 | <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td> | |
342 | <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td> | |
343 | <td style="padding:0.25em">Median: {{.Median}}</td> | |
344 | </tr> | |
345 | </table> | |
346 | <hr> | |
347 | <table> | |
348 | {{range $b := .Buckets}} | |
349 | {{if $b}} | |
350 | <tr> | |
351 | <td style="padding:0 0 0 0.25em">[</td> | |
352 | <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td> | |
353 | <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td> | |
354 | <td style="text-align:right;padding:0 0.25em">{{.N}}</td> | |
355 | <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td> | |
356 | <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td> | |
357 | <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td> | |
358 | </tr> | |
359 | {{end}} | |
360 | {{end}} | |
361 | </table> | |
362 | `)) | |
363 | }) | |
364 | return distTmplCache | |
365 | } |