aboutsummaryrefslogtreecommitdiffhomepage
path: root/server/lib/opentelemetry/metric-helpers/nodejs-observers-builder.ts
diff options
context:
space:
mode:
Diffstat (limited to 'server/lib/opentelemetry/metric-helpers/nodejs-observers-builder.ts')
-rw-r--r--server/lib/opentelemetry/metric-helpers/nodejs-observers-builder.ts198
1 files changed, 198 insertions, 0 deletions
diff --git a/server/lib/opentelemetry/metric-helpers/nodejs-observers-builder.ts b/server/lib/opentelemetry/metric-helpers/nodejs-observers-builder.ts
new file mode 100644
index 000000000..c51a50ebb
--- /dev/null
+++ b/server/lib/opentelemetry/metric-helpers/nodejs-observers-builder.ts
@@ -0,0 +1,198 @@
1import { readdir } from 'fs-extra'
2import { constants, PerformanceObserver } from 'perf_hooks'
3import * as process from 'process'
4import { Meter, ObservableResult } from '@opentelemetry/api-metrics'
5import { ExplicitBucketHistogramAggregation, MeterProvider } from '@opentelemetry/sdk-metrics-base'
6import { View } from '@opentelemetry/sdk-metrics-base/build/src/view/View'
7import { logger } from '@server/helpers/logger'
8
9// Thanks to https://github.com/siimon/prom-client
10// We took their logic and adapter it for opentelemetry
11// Try to keep consistency with their metric name/description so it's easier to process (grafana dashboard template etc)
12
13export class NodeJSObserversBuilder {
14
15 constructor (private readonly meter: Meter, private readonly meterProvider: MeterProvider) {
16 }
17
18 buildObservers () {
19 this.buildCPUObserver()
20 this.buildMemoryObserver()
21
22 this.buildHandlesObserver()
23 this.buildFileDescriptorsObserver()
24
25 this.buildGCObserver()
26 this.buildEventLoopLagObserver()
27
28 this.buildLibUVActiveRequestsObserver()
29 this.buildActiveResourcesObserver()
30 }
31
32 private buildCPUObserver () {
33 const cpuTotal = this.meter.createObservableCounter('process_cpu_seconds_total', {
34 description: 'Total user and system CPU time spent in seconds.'
35 })
36 const cpuUser = this.meter.createObservableCounter('process_cpu_user_seconds_total', {
37 description: 'Total user CPU time spent in seconds.'
38 })
39 const cpuSystem = this.meter.createObservableCounter('process_cpu_system_seconds_total', {
40 description: 'Total system CPU time spent in seconds.'
41 })
42
43 let lastCpuUsage = process.cpuUsage()
44
45 this.meter.addBatchObservableCallback(observableResult => {
46 const cpuUsage = process.cpuUsage()
47
48 const userUsageMicros = cpuUsage.user - lastCpuUsage.user
49 const systemUsageMicros = cpuUsage.system - lastCpuUsage.system
50
51 lastCpuUsage = cpuUsage
52
53 observableResult.observe(cpuTotal, (userUsageMicros + systemUsageMicros) / 1e6)
54 observableResult.observe(cpuUser, userUsageMicros / 1e6)
55 observableResult.observe(cpuSystem, systemUsageMicros / 1e6)
56
57 }, [ cpuTotal, cpuUser, cpuSystem ])
58 }
59
60 private buildMemoryObserver () {
61 this.meter.createObservableGauge('nodejs_memory_usage_bytes', {
62 description: 'Memory'
63 }).addCallback(observableResult => {
64 const current = process.memoryUsage()
65
66 observableResult.observe(current.heapTotal, { memoryType: 'heapTotal' })
67 observableResult.observe(current.heapUsed, { memoryType: 'heapUsed' })
68 observableResult.observe(current.arrayBuffers, { memoryType: 'arrayBuffers' })
69 observableResult.observe(current.external, { memoryType: 'external' })
70 observableResult.observe(current.rss, { memoryType: 'rss' })
71 })
72 }
73
74 private buildHandlesObserver () {
75 if (typeof (process as any)._getActiveHandles !== 'function') return
76
77 this.meter.createObservableGauge('nodejs_active_handles_total', {
78 description: 'Total number of active handles.'
79 }).addCallback(observableResult => {
80 const handles = (process as any)._getActiveHandles()
81
82 observableResult.observe(handles.length)
83 })
84 }
85
86 private buildGCObserver () {
87 const kinds = {
88 [constants.NODE_PERFORMANCE_GC_MAJOR]: 'major',
89 [constants.NODE_PERFORMANCE_GC_MINOR]: 'minor',
90 [constants.NODE_PERFORMANCE_GC_INCREMENTAL]: 'incremental',
91 [constants.NODE_PERFORMANCE_GC_WEAKCB]: 'weakcb'
92 }
93
94 this.meterProvider.addView(
95 new View({ aggregation: new ExplicitBucketHistogramAggregation([ 0.001, 0.01, 0.1, 1, 2, 5 ]) }),
96 { instrument: { name: 'nodejs_gc_duration_seconds' } }
97 )
98
99 const histogram = this.meter.createHistogram('nodejs_gc_duration_seconds', {
100 description: 'Garbage collection duration by kind, one of major, minor, incremental or weakcb'
101 })
102
103 const obs = new PerformanceObserver(list => {
104 const entry = list.getEntries()[0]
105
106 // Node < 16 uses entry.kind
107 // Node >= 16 uses entry.detail.kind
108 // See: https://nodejs.org/docs/latest-v16.x/api/deprecations.html#deprecations_dep0152_extension_performanceentry_properties
109 const kind = (entry as any).detail
110 ? kinds[(entry as any).detail.kind]
111 : kinds[entry.kind]
112
113 // Convert duration from milliseconds to seconds
114 histogram.record(entry.duration / 1000, {
115 kind
116 })
117 })
118
119 obs.observe({ entryTypes: [ 'gc' ] })
120 }
121
122 private buildEventLoopLagObserver () {
123 const reportEventloopLag = (start: [ number, number ], observableResult: ObservableResult, res: () => void) => {
124 const delta = process.hrtime(start)
125 const nanosec = delta[0] * 1e9 + delta[1]
126 const seconds = nanosec / 1e9
127
128 observableResult.observe(seconds)
129
130 res()
131 }
132
133 this.meter.createObservableGauge('nodejs_eventloop_lag_seconds', {
134 description: 'Lag of event loop in seconds.'
135 }).addCallback(observableResult => {
136 return new Promise(res => {
137 const start = process.hrtime()
138
139 setImmediate(reportEventloopLag, start, observableResult, res)
140 })
141 })
142 }
143
144 private buildFileDescriptorsObserver () {
145 this.meter.createObservableGauge('process_open_fds', {
146 description: 'Number of open file descriptors.'
147 }).addCallback(async observableResult => {
148 try {
149 const fds = await readdir('/proc/self/fd')
150 observableResult.observe(fds.length - 1)
151 } catch (err) {
152 logger.debug('Cannot list file descriptors of current process for OpenTelemetry.', { err })
153 }
154 })
155 }
156
157 private buildLibUVActiveRequestsObserver () {
158 if (typeof (process as any)._getActiveRequests !== 'function') return
159
160 this.meter.createObservableGauge('nodejs_active_requests_total', {
161 description: 'Total number of active libuv requests.'
162 }).addCallback(async observableResult => {
163 const requests = (process as any)._getActiveRequests()
164
165 observableResult.observe(requests.length)
166 })
167 }
168
169 private buildActiveResourcesObserver () {
170 if (typeof (process as any).getActiveResourcesInfo !== 'function') return
171
172 const grouped = this.meter.createObservableCounter('nodejs_active_resources', {
173 description: 'Number of active resources that are currently keeping the event loop alive, grouped by async resource type.'
174 })
175 const total = this.meter.createObservableCounter('nodejs_active_resources_total', {
176 description: 'Total number of active resources.'
177 })
178
179 this.meter.addBatchObservableCallback(observableResult => {
180 const resources = (process as any).getActiveResourcesInfo()
181
182 const data = {}
183
184 for (let i = 0; i < resources.length; i++) {
185 const resource = resources[i]
186
187 if (data[resource] === undefined) data[resource] = 0
188 data[resource] += 1
189 }
190
191 for (const type of Object.keys(data)) {
192 observableResult.observe(grouped, data[type], { type })
193 }
194
195 observableResult.observe(total, resources.length)
196 }, [ grouped, total ])
197 }
198}