]>
Commit | Line | Data |
---|---|---|
cd1b8e9a C |
1 | import { readdir } from 'fs-extra' |
2 | import { constants, PerformanceObserver } from 'perf_hooks' | |
3 | import * as process from 'process' | |
4 | import { Meter, ObservableResult } from '@opentelemetry/api-metrics' | |
5 | import { ExplicitBucketHistogramAggregation, MeterProvider } from '@opentelemetry/sdk-metrics-base' | |
6 | import { View } from '@opentelemetry/sdk-metrics-base/build/src/view/View' | |
7 | import { logger } from '@server/helpers/logger' | |
8 | ||
9 | // Thanks to https://github.com/siimon/prom-client | |
10 | // We took their logic and adapter it for opentelemetry | |
11 | // Try to keep consistency with their metric name/description so it's easier to process (grafana dashboard template etc) | |
12 | ||
13 | export class NodeJSObserversBuilder { | |
14 | ||
15 | constructor (private readonly meter: Meter, private readonly meterProvider: MeterProvider) { | |
16 | } | |
17 | ||
18 | buildObservers () { | |
19 | this.buildCPUObserver() | |
20 | this.buildMemoryObserver() | |
21 | ||
22 | this.buildHandlesObserver() | |
23 | this.buildFileDescriptorsObserver() | |
24 | ||
25 | this.buildGCObserver() | |
26 | this.buildEventLoopLagObserver() | |
27 | ||
28 | this.buildLibUVActiveRequestsObserver() | |
29 | this.buildActiveResourcesObserver() | |
30 | } | |
31 | ||
32 | private buildCPUObserver () { | |
33 | const cpuTotal = this.meter.createObservableCounter('process_cpu_seconds_total', { | |
34 | description: 'Total user and system CPU time spent in seconds.' | |
35 | }) | |
36 | const cpuUser = this.meter.createObservableCounter('process_cpu_user_seconds_total', { | |
37 | description: 'Total user CPU time spent in seconds.' | |
38 | }) | |
39 | const cpuSystem = this.meter.createObservableCounter('process_cpu_system_seconds_total', { | |
40 | description: 'Total system CPU time spent in seconds.' | |
41 | }) | |
42 | ||
43 | let lastCpuUsage = process.cpuUsage() | |
44 | ||
45 | this.meter.addBatchObservableCallback(observableResult => { | |
46 | const cpuUsage = process.cpuUsage() | |
47 | ||
48 | const userUsageMicros = cpuUsage.user - lastCpuUsage.user | |
49 | const systemUsageMicros = cpuUsage.system - lastCpuUsage.system | |
50 | ||
51 | lastCpuUsage = cpuUsage | |
52 | ||
53 | observableResult.observe(cpuTotal, (userUsageMicros + systemUsageMicros) / 1e6) | |
54 | observableResult.observe(cpuUser, userUsageMicros / 1e6) | |
55 | observableResult.observe(cpuSystem, systemUsageMicros / 1e6) | |
56 | ||
57 | }, [ cpuTotal, cpuUser, cpuSystem ]) | |
58 | } | |
59 | ||
60 | private buildMemoryObserver () { | |
61 | this.meter.createObservableGauge('nodejs_memory_usage_bytes', { | |
62 | description: 'Memory' | |
63 | }).addCallback(observableResult => { | |
64 | const current = process.memoryUsage() | |
65 | ||
66 | observableResult.observe(current.heapTotal, { memoryType: 'heapTotal' }) | |
67 | observableResult.observe(current.heapUsed, { memoryType: 'heapUsed' }) | |
68 | observableResult.observe(current.arrayBuffers, { memoryType: 'arrayBuffers' }) | |
69 | observableResult.observe(current.external, { memoryType: 'external' }) | |
70 | observableResult.observe(current.rss, { memoryType: 'rss' }) | |
71 | }) | |
72 | } | |
73 | ||
74 | private buildHandlesObserver () { | |
75 | if (typeof (process as any)._getActiveHandles !== 'function') return | |
76 | ||
77 | this.meter.createObservableGauge('nodejs_active_handles_total', { | |
78 | description: 'Total number of active handles.' | |
79 | }).addCallback(observableResult => { | |
80 | const handles = (process as any)._getActiveHandles() | |
81 | ||
82 | observableResult.observe(handles.length) | |
83 | }) | |
84 | } | |
85 | ||
86 | private buildGCObserver () { | |
87 | const kinds = { | |
88 | [constants.NODE_PERFORMANCE_GC_MAJOR]: 'major', | |
89 | [constants.NODE_PERFORMANCE_GC_MINOR]: 'minor', | |
90 | [constants.NODE_PERFORMANCE_GC_INCREMENTAL]: 'incremental', | |
91 | [constants.NODE_PERFORMANCE_GC_WEAKCB]: 'weakcb' | |
92 | } | |
93 | ||
94 | this.meterProvider.addView( | |
95 | new View({ aggregation: new ExplicitBucketHistogramAggregation([ 0.001, 0.01, 0.1, 1, 2, 5 ]) }), | |
96 | { instrument: { name: 'nodejs_gc_duration_seconds' } } | |
97 | ) | |
98 | ||
99 | const histogram = this.meter.createHistogram('nodejs_gc_duration_seconds', { | |
100 | description: 'Garbage collection duration by kind, one of major, minor, incremental or weakcb' | |
101 | }) | |
102 | ||
103 | const obs = new PerformanceObserver(list => { | |
104 | const entry = list.getEntries()[0] | |
105 | ||
106 | // Node < 16 uses entry.kind | |
107 | // Node >= 16 uses entry.detail.kind | |
108 | // See: https://nodejs.org/docs/latest-v16.x/api/deprecations.html#deprecations_dep0152_extension_performanceentry_properties | |
109 | const kind = (entry as any).detail | |
110 | ? kinds[(entry as any).detail.kind] | |
111 | : kinds[entry.kind] | |
112 | ||
113 | // Convert duration from milliseconds to seconds | |
114 | histogram.record(entry.duration / 1000, { | |
115 | kind | |
116 | }) | |
117 | }) | |
118 | ||
119 | obs.observe({ entryTypes: [ 'gc' ] }) | |
120 | } | |
121 | ||
122 | private buildEventLoopLagObserver () { | |
123 | const reportEventloopLag = (start: [ number, number ], observableResult: ObservableResult, res: () => void) => { | |
124 | const delta = process.hrtime(start) | |
125 | const nanosec = delta[0] * 1e9 + delta[1] | |
126 | const seconds = nanosec / 1e9 | |
127 | ||
128 | observableResult.observe(seconds) | |
129 | ||
130 | res() | |
131 | } | |
132 | ||
133 | this.meter.createObservableGauge('nodejs_eventloop_lag_seconds', { | |
134 | description: 'Lag of event loop in seconds.' | |
135 | }).addCallback(observableResult => { | |
136 | return new Promise(res => { | |
137 | const start = process.hrtime() | |
138 | ||
139 | setImmediate(reportEventloopLag, start, observableResult, res) | |
140 | }) | |
141 | }) | |
142 | } | |
143 | ||
144 | private buildFileDescriptorsObserver () { | |
145 | this.meter.createObservableGauge('process_open_fds', { | |
146 | description: 'Number of open file descriptors.' | |
147 | }).addCallback(async observableResult => { | |
148 | try { | |
149 | const fds = await readdir('/proc/self/fd') | |
150 | observableResult.observe(fds.length - 1) | |
151 | } catch (err) { | |
152 | logger.debug('Cannot list file descriptors of current process for OpenTelemetry.', { err }) | |
153 | } | |
154 | }) | |
155 | } | |
156 | ||
157 | private buildLibUVActiveRequestsObserver () { | |
158 | if (typeof (process as any)._getActiveRequests !== 'function') return | |
159 | ||
160 | this.meter.createObservableGauge('nodejs_active_requests_total', { | |
161 | description: 'Total number of active libuv requests.' | |
7fd28c99 | 162 | }).addCallback(observableResult => { |
cd1b8e9a C |
163 | const requests = (process as any)._getActiveRequests() |
164 | ||
165 | observableResult.observe(requests.length) | |
166 | }) | |
167 | } | |
168 | ||
169 | private buildActiveResourcesObserver () { | |
170 | if (typeof (process as any).getActiveResourcesInfo !== 'function') return | |
171 | ||
172 | const grouped = this.meter.createObservableCounter('nodejs_active_resources', { | |
173 | description: 'Number of active resources that are currently keeping the event loop alive, grouped by async resource type.' | |
174 | }) | |
175 | const total = this.meter.createObservableCounter('nodejs_active_resources_total', { | |
176 | description: 'Total number of active resources.' | |
177 | }) | |
178 | ||
179 | this.meter.addBatchObservableCallback(observableResult => { | |
180 | const resources = (process as any).getActiveResourcesInfo() | |
181 | ||
182 | const data = {} | |
183 | ||
184 | for (let i = 0; i < resources.length; i++) { | |
185 | const resource = resources[i] | |
186 | ||
187 | if (data[resource] === undefined) data[resource] = 0 | |
188 | data[resource] += 1 | |
189 | } | |
190 | ||
191 | for (const type of Object.keys(data)) { | |
192 | observableResult.observe(grouped, data[type], { type }) | |
193 | } | |
194 | ||
195 | observableResult.observe(total, resources.length) | |
196 | }, [ grouped, total ]) | |
197 | } | |
198 | } |