1 import { readdir } from 'fs-extra'
2 import { constants, NodeGCPerformanceDetail, PerformanceObserver } from 'perf_hooks'
3 import * as process from 'process'
4 import { Meter, ObservableResult } from '@opentelemetry/api'
5 import { ExplicitBucketHistogramAggregation } from '@opentelemetry/sdk-metrics'
6 import { View } from '@opentelemetry/sdk-metrics/build/src/view/View'
7 import { logger } from '@server/helpers/logger'
9 // Thanks to https://github.com/siimon/prom-client
10 // We took their logic and adapter it for opentelemetry
11 // Try to keep consistency with their metric name/description so it's easier to process (grafana dashboard template etc)
13 export class NodeJSObserversBuilder {
15 constructor (private readonly meter: Meter) {
21 aggregation: new ExplicitBucketHistogramAggregation([ 0.001, 0.01, 0.1, 1, 2, 5 ]),
22 instrumentName: 'nodejs_gc_duration_seconds'
28 this.buildCPUObserver()
29 this.buildMemoryObserver()
31 this.buildHandlesObserver()
32 this.buildFileDescriptorsObserver()
34 this.buildGCObserver()
35 this.buildEventLoopLagObserver()
37 this.buildLibUVActiveRequestsObserver()
38 this.buildActiveResourcesObserver()
41 private buildCPUObserver () {
42 const cpuTotal = this.meter.createObservableCounter('process_cpu_seconds_total', {
43 description: 'Total user and system CPU time spent in seconds.'
45 const cpuUser = this.meter.createObservableCounter('process_cpu_user_seconds_total', {
46 description: 'Total user CPU time spent in seconds.'
48 const cpuSystem = this.meter.createObservableCounter('process_cpu_system_seconds_total', {
49 description: 'Total system CPU time spent in seconds.'
52 let lastCpuUsage = process.cpuUsage()
54 this.meter.addBatchObservableCallback(observableResult => {
55 const cpuUsage = process.cpuUsage()
57 const userUsageMicros = cpuUsage.user - lastCpuUsage.user
58 const systemUsageMicros = cpuUsage.system - lastCpuUsage.system
60 lastCpuUsage = cpuUsage
62 observableResult.observe(cpuTotal, (userUsageMicros + systemUsageMicros) / 1e6)
63 observableResult.observe(cpuUser, userUsageMicros / 1e6)
64 observableResult.observe(cpuSystem, systemUsageMicros / 1e6)
66 }, [ cpuTotal, cpuUser, cpuSystem ])
69 private buildMemoryObserver () {
70 this.meter.createObservableGauge('nodejs_memory_usage_bytes', {
72 }).addCallback(observableResult => {
73 const current = process.memoryUsage()
75 observableResult.observe(current.heapTotal, { memoryType: 'heapTotal' })
76 observableResult.observe(current.heapUsed, { memoryType: 'heapUsed' })
77 observableResult.observe(current.arrayBuffers, { memoryType: 'arrayBuffers' })
78 observableResult.observe(current.external, { memoryType: 'external' })
79 observableResult.observe(current.rss, { memoryType: 'rss' })
83 private buildHandlesObserver () {
84 if (typeof (process as any)._getActiveHandles !== 'function') return
86 this.meter.createObservableGauge('nodejs_active_handles_total', {
87 description: 'Total number of active handles.'
88 }).addCallback(observableResult => {
89 const handles = (process as any)._getActiveHandles()
91 observableResult.observe(handles.length)
95 private buildGCObserver () {
97 [constants.NODE_PERFORMANCE_GC_MAJOR]: 'major',
98 [constants.NODE_PERFORMANCE_GC_MINOR]: 'minor',
99 [constants.NODE_PERFORMANCE_GC_INCREMENTAL]: 'incremental',
100 [constants.NODE_PERFORMANCE_GC_WEAKCB]: 'weakcb'
103 const histogram = this.meter.createHistogram('nodejs_gc_duration_seconds', {
104 description: 'Garbage collection duration by kind, one of major, minor, incremental or weakcb'
107 const obs = new PerformanceObserver(list => {
108 const entry = list.getEntries()[0]
110 // Node < 16 uses entry.kind
111 // Node >= 16 uses entry.detail.kind
112 // See: https://nodejs.org/docs/latest-v16.x/api/deprecations.html#deprecations_dep0152_extension_performanceentry_properties
113 const kind = entry.detail
114 ? kinds[(entry.detail as NodeGCPerformanceDetail).kind]
115 : kinds[(entry as any).kind]
117 // Convert duration from milliseconds to seconds
118 histogram.record(entry.duration / 1000, {
123 obs.observe({ entryTypes: [ 'gc' ] })
126 private buildEventLoopLagObserver () {
127 const reportEventloopLag = (start: [ number, number ], observableResult: ObservableResult, res: () => void) => {
128 const delta = process.hrtime(start)
129 const nanosec = delta[0] * 1e9 + delta[1]
130 const seconds = nanosec / 1e9
132 observableResult.observe(seconds)
137 this.meter.createObservableGauge('nodejs_eventloop_lag_seconds', {
138 description: 'Lag of event loop in seconds.'
139 }).addCallback(observableResult => {
140 return new Promise(res => {
141 const start = process.hrtime()
143 setImmediate(reportEventloopLag, start, observableResult, res)
148 private buildFileDescriptorsObserver () {
149 this.meter.createObservableGauge('process_open_fds', {
150 description: 'Number of open file descriptors.'
151 }).addCallback(async observableResult => {
153 const fds = await readdir('/proc/self/fd')
154 observableResult.observe(fds.length - 1)
156 logger.debug('Cannot list file descriptors of current process for OpenTelemetry.', { err })
161 private buildLibUVActiveRequestsObserver () {
162 if (typeof (process as any)._getActiveRequests !== 'function') return
164 this.meter.createObservableGauge('nodejs_active_requests_total', {
165 description: 'Total number of active libuv requests.'
166 }).addCallback(observableResult => {
167 const requests = (process as any)._getActiveRequests()
169 observableResult.observe(requests.length)
173 private buildActiveResourcesObserver () {
174 if (typeof (process as any).getActiveResourcesInfo !== 'function') return
176 const grouped = this.meter.createObservableCounter('nodejs_active_resources', {
177 description: 'Number of active resources that are currently keeping the event loop alive, grouped by async resource type.'
179 const total = this.meter.createObservableCounter('nodejs_active_resources_total', {
180 description: 'Total number of active resources.'
183 this.meter.addBatchObservableCallback(observableResult => {
184 const resources = (process as any).getActiveResourcesInfo()
188 for (let i = 0; i < resources.length; i++) {
189 const resource = resources[i]
191 if (data[resource] === undefined) data[resource] = 0
195 for (const type of Object.keys(data)) {
196 observableResult.observe(grouped, data[type], { type })
199 observableResult.observe(total, resources.length)
200 }, [ grouped, total ])