]>
Commit | Line | Data |
---|---|---|
cd1b8e9a | 1 | import { readdir } from 'fs-extra' |
82e3ae15 | 2 | import { constants, NodeGCPerformanceDetail, PerformanceObserver } from 'perf_hooks' |
cd1b8e9a | 3 | import * as process from 'process' |
077600e0 | 4 | import { Meter, ObservableResult } from '@opentelemetry/api' |
3d2e4f03 C |
5 | import { ExplicitBucketHistogramAggregation } from '@opentelemetry/sdk-metrics' |
6 | import { View } from '@opentelemetry/sdk-metrics/build/src/view/View' | |
cd1b8e9a C |
7 | import { logger } from '@server/helpers/logger' |
8 | ||
9 | // Thanks to https://github.com/siimon/prom-client | |
10 | // We took their logic and adapter it for opentelemetry | |
11 | // Try to keep consistency with their metric name/description so it's easier to process (grafana dashboard template etc) | |
12 | ||
13 | export class NodeJSObserversBuilder { | |
14 | ||
fd3c2e87 C |
15 | constructor (private readonly meter: Meter) { |
16 | } | |
17 | ||
18 | static getViews () { | |
19 | return [ | |
20 | new View({ | |
21 | aggregation: new ExplicitBucketHistogramAggregation([ 0.001, 0.01, 0.1, 1, 2, 5 ]), | |
22 | instrumentName: 'nodejs_gc_duration_seconds' | |
23 | }) | |
24 | ] | |
cd1b8e9a C |
25 | } |
26 | ||
27 | buildObservers () { | |
28 | this.buildCPUObserver() | |
29 | this.buildMemoryObserver() | |
30 | ||
31 | this.buildHandlesObserver() | |
32 | this.buildFileDescriptorsObserver() | |
33 | ||
34 | this.buildGCObserver() | |
35 | this.buildEventLoopLagObserver() | |
36 | ||
37 | this.buildLibUVActiveRequestsObserver() | |
38 | this.buildActiveResourcesObserver() | |
39 | } | |
40 | ||
41 | private buildCPUObserver () { | |
42 | const cpuTotal = this.meter.createObservableCounter('process_cpu_seconds_total', { | |
43 | description: 'Total user and system CPU time spent in seconds.' | |
44 | }) | |
45 | const cpuUser = this.meter.createObservableCounter('process_cpu_user_seconds_total', { | |
46 | description: 'Total user CPU time spent in seconds.' | |
47 | }) | |
48 | const cpuSystem = this.meter.createObservableCounter('process_cpu_system_seconds_total', { | |
49 | description: 'Total system CPU time spent in seconds.' | |
50 | }) | |
51 | ||
52 | let lastCpuUsage = process.cpuUsage() | |
53 | ||
54 | this.meter.addBatchObservableCallback(observableResult => { | |
55 | const cpuUsage = process.cpuUsage() | |
56 | ||
57 | const userUsageMicros = cpuUsage.user - lastCpuUsage.user | |
58 | const systemUsageMicros = cpuUsage.system - lastCpuUsage.system | |
59 | ||
60 | lastCpuUsage = cpuUsage | |
61 | ||
62 | observableResult.observe(cpuTotal, (userUsageMicros + systemUsageMicros) / 1e6) | |
63 | observableResult.observe(cpuUser, userUsageMicros / 1e6) | |
64 | observableResult.observe(cpuSystem, systemUsageMicros / 1e6) | |
65 | ||
66 | }, [ cpuTotal, cpuUser, cpuSystem ]) | |
67 | } | |
68 | ||
69 | private buildMemoryObserver () { | |
70 | this.meter.createObservableGauge('nodejs_memory_usage_bytes', { | |
71 | description: 'Memory' | |
72 | }).addCallback(observableResult => { | |
73 | const current = process.memoryUsage() | |
74 | ||
75 | observableResult.observe(current.heapTotal, { memoryType: 'heapTotal' }) | |
76 | observableResult.observe(current.heapUsed, { memoryType: 'heapUsed' }) | |
77 | observableResult.observe(current.arrayBuffers, { memoryType: 'arrayBuffers' }) | |
78 | observableResult.observe(current.external, { memoryType: 'external' }) | |
79 | observableResult.observe(current.rss, { memoryType: 'rss' }) | |
80 | }) | |
81 | } | |
82 | ||
83 | private buildHandlesObserver () { | |
84 | if (typeof (process as any)._getActiveHandles !== 'function') return | |
85 | ||
86 | this.meter.createObservableGauge('nodejs_active_handles_total', { | |
87 | description: 'Total number of active handles.' | |
88 | }).addCallback(observableResult => { | |
89 | const handles = (process as any)._getActiveHandles() | |
90 | ||
91 | observableResult.observe(handles.length) | |
92 | }) | |
93 | } | |
94 | ||
95 | private buildGCObserver () { | |
96 | const kinds = { | |
97 | [constants.NODE_PERFORMANCE_GC_MAJOR]: 'major', | |
98 | [constants.NODE_PERFORMANCE_GC_MINOR]: 'minor', | |
99 | [constants.NODE_PERFORMANCE_GC_INCREMENTAL]: 'incremental', | |
100 | [constants.NODE_PERFORMANCE_GC_WEAKCB]: 'weakcb' | |
101 | } | |
102 | ||
cd1b8e9a C |
103 | const histogram = this.meter.createHistogram('nodejs_gc_duration_seconds', { |
104 | description: 'Garbage collection duration by kind, one of major, minor, incremental or weakcb' | |
105 | }) | |
106 | ||
107 | const obs = new PerformanceObserver(list => { | |
108 | const entry = list.getEntries()[0] | |
109 | ||
110 | // Node < 16 uses entry.kind | |
111 | // Node >= 16 uses entry.detail.kind | |
112 | // See: https://nodejs.org/docs/latest-v16.x/api/deprecations.html#deprecations_dep0152_extension_performanceentry_properties | |
82e3ae15 C |
113 | const kind = entry.detail |
114 | ? kinds[(entry.detail as NodeGCPerformanceDetail).kind] | |
115 | : kinds[(entry as any).kind] | |
cd1b8e9a C |
116 | |
117 | // Convert duration from milliseconds to seconds | |
118 | histogram.record(entry.duration / 1000, { | |
119 | kind | |
120 | }) | |
121 | }) | |
122 | ||
123 | obs.observe({ entryTypes: [ 'gc' ] }) | |
124 | } | |
125 | ||
126 | private buildEventLoopLagObserver () { | |
127 | const reportEventloopLag = (start: [ number, number ], observableResult: ObservableResult, res: () => void) => { | |
128 | const delta = process.hrtime(start) | |
129 | const nanosec = delta[0] * 1e9 + delta[1] | |
130 | const seconds = nanosec / 1e9 | |
131 | ||
132 | observableResult.observe(seconds) | |
133 | ||
134 | res() | |
135 | } | |
136 | ||
137 | this.meter.createObservableGauge('nodejs_eventloop_lag_seconds', { | |
138 | description: 'Lag of event loop in seconds.' | |
139 | }).addCallback(observableResult => { | |
140 | return new Promise(res => { | |
141 | const start = process.hrtime() | |
142 | ||
143 | setImmediate(reportEventloopLag, start, observableResult, res) | |
144 | }) | |
145 | }) | |
146 | } | |
147 | ||
148 | private buildFileDescriptorsObserver () { | |
149 | this.meter.createObservableGauge('process_open_fds', { | |
150 | description: 'Number of open file descriptors.' | |
151 | }).addCallback(async observableResult => { | |
152 | try { | |
153 | const fds = await readdir('/proc/self/fd') | |
154 | observableResult.observe(fds.length - 1) | |
155 | } catch (err) { | |
156 | logger.debug('Cannot list file descriptors of current process for OpenTelemetry.', { err }) | |
157 | } | |
158 | }) | |
159 | } | |
160 | ||
161 | private buildLibUVActiveRequestsObserver () { | |
162 | if (typeof (process as any)._getActiveRequests !== 'function') return | |
163 | ||
164 | this.meter.createObservableGauge('nodejs_active_requests_total', { | |
165 | description: 'Total number of active libuv requests.' | |
7fd28c99 | 166 | }).addCallback(observableResult => { |
cd1b8e9a C |
167 | const requests = (process as any)._getActiveRequests() |
168 | ||
169 | observableResult.observe(requests.length) | |
170 | }) | |
171 | } | |
172 | ||
173 | private buildActiveResourcesObserver () { | |
174 | if (typeof (process as any).getActiveResourcesInfo !== 'function') return | |
175 | ||
176 | const grouped = this.meter.createObservableCounter('nodejs_active_resources', { | |
177 | description: 'Number of active resources that are currently keeping the event loop alive, grouped by async resource type.' | |
178 | }) | |
179 | const total = this.meter.createObservableCounter('nodejs_active_resources_total', { | |
180 | description: 'Total number of active resources.' | |
181 | }) | |
182 | ||
183 | this.meter.addBatchObservableCallback(observableResult => { | |
184 | const resources = (process as any).getActiveResourcesInfo() | |
185 | ||
186 | const data = {} | |
187 | ||
188 | for (let i = 0; i < resources.length; i++) { | |
189 | const resource = resources[i] | |
190 | ||
191 | if (data[resource] === undefined) data[resource] = 0 | |
192 | data[resource] += 1 | |
193 | } | |
194 | ||
195 | for (const type of Object.keys(data)) { | |
196 | observableResult.observe(grouped, data[type], { type }) | |
197 | } | |
198 | ||
199 | observableResult.observe(total, resources.length) | |
200 | }, [ grouped, total ]) | |
201 | } | |
202 | } |