3 * Copyright 2018 gRPC authors.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
27 "google.golang.org/grpc/connectivity"
28 "google.golang.org/grpc/credentials"
29 "google.golang.org/grpc/grpclog"
32 // entry represents a node in the channelz database.
33 type entry interface {
34 // addChild adds a child e, whose channelz id is id to child list
35 addChild(id int64, e entry)
36 // deleteChild deletes a child with channelz id to be id from child list
38 // triggerDelete tries to delete self from channelz database. However, if child
39 // list is not empty, then deletion from the database is on hold until the last
40 // child is deleted from database.
42 // deleteSelfIfReady check whether triggerDelete() has been called before, and whether child
43 // list is now empty. If both conditions are met, then delete self from database.
45 // getParentID returns parent ID of the entry. 0 value parent ID means no parent.
49 // dummyEntry is a fake entry to handle entry not found case.
50 type dummyEntry struct {
54 func (d *dummyEntry) addChild(id int64, e entry) {
55 // Note: It is possible for a normal program to reach here under race condition.
56 // For example, there could be a race between ClientConn.Close() info being propagated
57 // to addrConn and http2Client. ClientConn.Close() cancel the context and result
58 // in http2Client to error. The error info is then caught by transport monitor
59 // and before addrConn.tearDown() is called in side ClientConn.Close(). Therefore,
60 // the addrConn will create a new transport. And when registering the new transport in
61 // channelz, its parent addrConn could have already been torn down and deleted
62 // from channelz tracking, and thus reach the code here.
63 grpclog.Infof("attempt to add child of type %T with id %d to a parent (id=%d) that doesn't currently exist", e, id, d.idNotFound)
66 func (d *dummyEntry) deleteChild(id int64) {
67 // It is possible for a normal program to reach here under race condition.
68 // Refer to the example described in addChild().
69 grpclog.Infof("attempt to delete child with id %d from a parent (id=%d) that doesn't currently exist", id, d.idNotFound)
72 func (d *dummyEntry) triggerDelete() {
73 grpclog.Warningf("attempt to delete an entry (id=%d) that doesn't currently exist", d.idNotFound)
76 func (*dummyEntry) deleteSelfIfReady() {
77 // code should not reach here. deleteSelfIfReady is always called on an existing entry.
80 func (*dummyEntry) getParentID() int64 {
84 // ChannelMetric defines the info channelz provides for a specific Channel, which
85 // includes ChannelInternalMetric and channelz-specific data, such as channelz id,
87 type ChannelMetric struct {
88 // ID is the channelz id of this channel.
90 // RefName is the human readable reference string of this channel.
92 // ChannelData contains channel internal metric reported by the channel through
94 ChannelData *ChannelInternalMetric
95 // NestedChans tracks the nested channel type children of this channel in the format of
96 // a map from nested channel channelz id to corresponding reference string.
97 NestedChans map[int64]string
98 // SubChans tracks the subchannel type children of this channel in the format of a
99 // map from subchannel channelz id to corresponding reference string.
100 SubChans map[int64]string
101 // Sockets tracks the socket type children of this channel in the format of a map
102 // from socket channelz id to corresponding reference string.
103 // Note current grpc implementation doesn't allow channel having sockets directly,
104 // therefore, this is field is unused.
105 Sockets map[int64]string
106 // Trace contains the most recent traced events.
110 // SubChannelMetric defines the info channelz provides for a specific SubChannel,
111 // which includes ChannelInternalMetric and channelz-specific data, such as
112 // channelz id, child list, etc.
113 type SubChannelMetric struct {
114 // ID is the channelz id of this subchannel.
116 // RefName is the human readable reference string of this subchannel.
118 // ChannelData contains subchannel internal metric reported by the subchannel
119 // through ChannelzMetric().
120 ChannelData *ChannelInternalMetric
121 // NestedChans tracks the nested channel type children of this subchannel in the format of
122 // a map from nested channel channelz id to corresponding reference string.
123 // Note current grpc implementation doesn't allow subchannel to have nested channels
124 // as children, therefore, this field is unused.
125 NestedChans map[int64]string
126 // SubChans tracks the subchannel type children of this subchannel in the format of a
127 // map from subchannel channelz id to corresponding reference string.
128 // Note current grpc implementation doesn't allow subchannel to have subchannels
129 // as children, therefore, this field is unused.
130 SubChans map[int64]string
131 // Sockets tracks the socket type children of this subchannel in the format of a map
132 // from socket channelz id to corresponding reference string.
133 Sockets map[int64]string
134 // Trace contains the most recent traced events.
138 // ChannelInternalMetric defines the struct that the implementor of Channel interface
139 // should return from ChannelzMetric().
140 type ChannelInternalMetric struct {
141 // current connectivity state of the channel.
142 State connectivity.State
143 // The target this channel originally tried to connect to. May be absent
145 // The number of calls started on the channel.
147 // The number of calls that have completed with an OK status.
149 // The number of calls that have a completed with a non-OK status.
151 // The last time a call was started on the channel.
152 LastCallStartedTimestamp time.Time
155 // ChannelTrace stores traced events on a channel/subchannel and related info.
156 type ChannelTrace struct {
157 // EventNum is the number of events that ever got traced (i.e. including those that have been deleted)
159 // CreationTime is the creation time of the trace.
160 CreationTime time.Time
161 // Events stores the most recent trace events (up to $maxTraceEntry, newer event will overwrite the
166 // TraceEvent represent a single trace event
167 type TraceEvent struct {
168 // Desc is a simple description of the trace event.
170 // Severity states the severity of this trace event.
172 // Timestamp is the event time.
174 // RefID is the id of the entity that gets referenced in the event. RefID is 0 if no other entity is
175 // involved in this event.
176 // e.g. SubChannel (id: 4[]) Created. --> RefID = 4, RefName = "" (inside [])
178 // RefName is the reference name for the entity that gets referenced in the event.
180 // RefType indicates the referenced entity type, i.e Channel or SubChannel.
181 RefType RefChannelType
184 // Channel is the interface that should be satisfied in order to be tracked by
185 // channelz as Channel or SubChannel.
186 type Channel interface {
187 ChannelzMetric() *ChannelInternalMetric
190 type dummyChannel struct{}
192 func (d *dummyChannel) ChannelzMetric() *ChannelInternalMetric {
193 return &ChannelInternalMetric{}
196 type channel struct {
200 nestedChans map[int64]string
201 subChans map[int64]string
206 // traceRefCount is the number of trace events that reference this channel.
207 // Non-zero traceRefCount means the trace of this channel cannot be deleted.
211 func (c *channel) addChild(id int64, e entry) {
212 switch v := e.(type) {
214 c.subChans[id] = v.refName
216 c.nestedChans[id] = v.refName
218 grpclog.Errorf("cannot add a child (id = %d) of type %T to a channel", id, e)
222 func (c *channel) deleteChild(id int64) {
223 delete(c.subChans, id)
224 delete(c.nestedChans, id)
225 c.deleteSelfIfReady()
228 func (c *channel) triggerDelete() {
230 c.deleteSelfIfReady()
233 func (c *channel) getParentID() int64 {
237 // deleteSelfFromTree tries to delete the channel from the channelz entry relation tree, which means
238 // deleting the channel reference from its parent's child list.
240 // In order for a channel to be deleted from the tree, it must meet the criteria that, removal of the
241 // corresponding grpc object has been invoked, and the channel does not have any children left.
243 // The returned boolean value indicates whether the channel has been successfully deleted from tree.
244 func (c *channel) deleteSelfFromTree() (deleted bool) {
245 if !c.closeCalled || len(c.subChans)+len(c.nestedChans) != 0 {
250 c.cm.findEntry(c.pid).deleteChild(c.id)
255 // deleteSelfFromMap checks whether it is valid to delete the channel from the map, which means
256 // deleting the channel from channelz's tracking entirely. Users can no longer use id to query the
257 // channel, and its memory will be garbage collected.
259 // The trace reference count of the channel must be 0 in order to be deleted from the map. This is
260 // specified in the channel tracing gRFC that as long as some other trace has reference to an entity,
261 // the trace of the referenced entity must not be deleted. In order to release the resource allocated
262 // by grpc, the reference to the grpc object is reset to a dummy object.
264 // deleteSelfFromMap must be called after deleteSelfFromTree returns true.
266 // It returns a bool to indicate whether the channel can be safely deleted from map.
267 func (c *channel) deleteSelfFromMap() (delete bool) {
268 if c.getTraceRefCount() != 0 {
269 c.c = &dummyChannel{}
275 // deleteSelfIfReady tries to delete the channel itself from the channelz database.
276 // The delete process includes two steps:
277 // 1. delete the channel from the entry relation tree, i.e. delete the channel reference from its
278 // parent's child list.
279 // 2. delete the channel from the map, i.e. delete the channel entirely from channelz. Lookup by id
280 // will return entry not found error.
281 func (c *channel) deleteSelfIfReady() {
282 if !c.deleteSelfFromTree() {
285 if !c.deleteSelfFromMap() {
288 c.cm.deleteEntry(c.id)
292 func (c *channel) getChannelTrace() *channelTrace {
296 func (c *channel) incrTraceRefCount() {
297 atomic.AddInt32(&c.traceRefCount, 1)
300 func (c *channel) decrTraceRefCount() {
301 atomic.AddInt32(&c.traceRefCount, -1)
304 func (c *channel) getTraceRefCount() int {
305 i := atomic.LoadInt32(&c.traceRefCount)
309 func (c *channel) getRefName() string {
313 type subChannel struct {
317 sockets map[int64]string
325 func (sc *subChannel) addChild(id int64, e entry) {
326 if v, ok := e.(*normalSocket); ok {
327 sc.sockets[id] = v.refName
329 grpclog.Errorf("cannot add a child (id = %d) of type %T to a subChannel", id, e)
333 func (sc *subChannel) deleteChild(id int64) {
334 delete(sc.sockets, id)
335 sc.deleteSelfIfReady()
338 func (sc *subChannel) triggerDelete() {
339 sc.closeCalled = true
340 sc.deleteSelfIfReady()
343 func (sc *subChannel) getParentID() int64 {
347 // deleteSelfFromTree tries to delete the subchannel from the channelz entry relation tree, which
348 // means deleting the subchannel reference from its parent's child list.
350 // In order for a subchannel to be deleted from the tree, it must meet the criteria that, removal of
351 // the corresponding grpc object has been invoked, and the subchannel does not have any children left.
353 // The returned boolean value indicates whether the channel has been successfully deleted from tree.
354 func (sc *subChannel) deleteSelfFromTree() (deleted bool) {
355 if !sc.closeCalled || len(sc.sockets) != 0 {
358 sc.cm.findEntry(sc.pid).deleteChild(sc.id)
362 // deleteSelfFromMap checks whether it is valid to delete the subchannel from the map, which means
363 // deleting the subchannel from channelz's tracking entirely. Users can no longer use id to query
364 // the subchannel, and its memory will be garbage collected.
366 // The trace reference count of the subchannel must be 0 in order to be deleted from the map. This is
367 // specified in the channel tracing gRFC that as long as some other trace has reference to an entity,
368 // the trace of the referenced entity must not be deleted. In order to release the resource allocated
369 // by grpc, the reference to the grpc object is reset to a dummy object.
371 // deleteSelfFromMap must be called after deleteSelfFromTree returns true.
373 // It returns a bool to indicate whether the channel can be safely deleted from map.
374 func (sc *subChannel) deleteSelfFromMap() (delete bool) {
375 if sc.getTraceRefCount() != 0 {
376 // free the grpc struct (i.e. addrConn)
377 sc.c = &dummyChannel{}
383 // deleteSelfIfReady tries to delete the subchannel itself from the channelz database.
384 // The delete process includes two steps:
385 // 1. delete the subchannel from the entry relation tree, i.e. delete the subchannel reference from
386 // its parent's child list.
387 // 2. delete the subchannel from the map, i.e. delete the subchannel entirely from channelz. Lookup
388 // by id will return entry not found error.
389 func (sc *subChannel) deleteSelfIfReady() {
390 if !sc.deleteSelfFromTree() {
393 if !sc.deleteSelfFromMap() {
396 sc.cm.deleteEntry(sc.id)
400 func (sc *subChannel) getChannelTrace() *channelTrace {
404 func (sc *subChannel) incrTraceRefCount() {
405 atomic.AddInt32(&sc.traceRefCount, 1)
408 func (sc *subChannel) decrTraceRefCount() {
409 atomic.AddInt32(&sc.traceRefCount, -1)
412 func (sc *subChannel) getTraceRefCount() int {
413 i := atomic.LoadInt32(&sc.traceRefCount)
417 func (sc *subChannel) getRefName() string {
421 // SocketMetric defines the info channelz provides for a specific Socket, which
422 // includes SocketInternalMetric and channelz-specific data, such as channelz id, etc.
423 type SocketMetric struct {
424 // ID is the channelz id of this socket.
426 // RefName is the human readable reference string of this socket.
428 // SocketData contains socket internal metric reported by the socket through
430 SocketData *SocketInternalMetric
433 // SocketInternalMetric defines the struct that the implementor of Socket interface
434 // should return from ChannelzMetric().
435 type SocketInternalMetric struct {
436 // The number of streams that have been started.
438 // The number of streams that have ended successfully:
439 // On client side, receiving frame with eos bit set.
440 // On server side, sending frame with eos bit set.
441 StreamsSucceeded int64
442 // The number of streams that have ended unsuccessfully:
443 // On client side, termination without receiving frame with eos bit set.
444 // On server side, termination without sending frame with eos bit set.
446 // The number of messages successfully sent on this socket.
448 MessagesReceived int64
449 // The number of keep alives sent. This is typically implemented with HTTP/2
452 // The last time a stream was created by this endpoint. Usually unset for
454 LastLocalStreamCreatedTimestamp time.Time
455 // The last time a stream was created by the remote endpoint. Usually unset
457 LastRemoteStreamCreatedTimestamp time.Time
458 // The last time a message was sent by this endpoint.
459 LastMessageSentTimestamp time.Time
460 // The last time a message was received by this endpoint.
461 LastMessageReceivedTimestamp time.Time
462 // The amount of window, granted to the local endpoint by the remote endpoint.
463 // This may be slightly out of date due to network latency. This does NOT
464 // include stream level or TCP level flow control info.
465 LocalFlowControlWindow int64
466 // The amount of window, granted to the remote endpoint by the local endpoint.
467 // This may be slightly out of date due to network latency. This does NOT
468 // include stream level or TCP level flow control info.
469 RemoteFlowControlWindow int64
470 // The locally bound address.
472 // The remote bound address. May be absent.
474 // Optional, represents the name of the remote endpoint, if different than
475 // the original target name.
477 SocketOptions *SocketOptionData
478 Security credentials.ChannelzSecurityValue
481 // Socket is the interface that should be satisfied in order to be tracked by
482 // channelz as Socket.
483 type Socket interface {
484 ChannelzMetric() *SocketInternalMetric
487 type listenSocket struct {
495 func (ls *listenSocket) addChild(id int64, e entry) {
496 grpclog.Errorf("cannot add a child (id = %d) of type %T to a listen socket", id, e)
499 func (ls *listenSocket) deleteChild(id int64) {
500 grpclog.Errorf("cannot delete a child (id = %d) from a listen socket", id)
503 func (ls *listenSocket) triggerDelete() {
504 ls.cm.deleteEntry(ls.id)
505 ls.cm.findEntry(ls.pid).deleteChild(ls.id)
508 func (ls *listenSocket) deleteSelfIfReady() {
509 grpclog.Errorf("cannot call deleteSelfIfReady on a listen socket")
512 func (ls *listenSocket) getParentID() int64 {
516 type normalSocket struct {
524 func (ns *normalSocket) addChild(id int64, e entry) {
525 grpclog.Errorf("cannot add a child (id = %d) of type %T to a normal socket", id, e)
528 func (ns *normalSocket) deleteChild(id int64) {
529 grpclog.Errorf("cannot delete a child (id = %d) from a normal socket", id)
532 func (ns *normalSocket) triggerDelete() {
533 ns.cm.deleteEntry(ns.id)
534 ns.cm.findEntry(ns.pid).deleteChild(ns.id)
537 func (ns *normalSocket) deleteSelfIfReady() {
538 grpclog.Errorf("cannot call deleteSelfIfReady on a normal socket")
541 func (ns *normalSocket) getParentID() int64 {
545 // ServerMetric defines the info channelz provides for a specific Server, which
546 // includes ServerInternalMetric and channelz-specific data, such as channelz id,
548 type ServerMetric struct {
549 // ID is the channelz id of this server.
551 // RefName is the human readable reference string of this server.
553 // ServerData contains server internal metric reported by the server through
555 ServerData *ServerInternalMetric
556 // ListenSockets tracks the listener socket type children of this server in the
557 // format of a map from socket channelz id to corresponding reference string.
558 ListenSockets map[int64]string
561 // ServerInternalMetric defines the struct that the implementor of Server interface
562 // should return from ChannelzMetric().
563 type ServerInternalMetric struct {
564 // The number of incoming calls started on the server.
566 // The number of incoming calls that have completed with an OK status.
568 // The number of incoming calls that have a completed with a non-OK status.
570 // The last time a call was started on the server.
571 LastCallStartedTimestamp time.Time
574 // Server is the interface to be satisfied in order to be tracked by channelz as
576 type Server interface {
577 ChannelzMetric() *ServerInternalMetric
584 sockets map[int64]string
585 listenSockets map[int64]string
590 func (s *server) addChild(id int64, e entry) {
591 switch v := e.(type) {
593 s.sockets[id] = v.refName
595 s.listenSockets[id] = v.refName
597 grpclog.Errorf("cannot add a child (id = %d) of type %T to a server", id, e)
601 func (s *server) deleteChild(id int64) {
602 delete(s.sockets, id)
603 delete(s.listenSockets, id)
604 s.deleteSelfIfReady()
607 func (s *server) triggerDelete() {
609 s.deleteSelfIfReady()
612 func (s *server) deleteSelfIfReady() {
613 if !s.closeCalled || len(s.sockets)+len(s.listenSockets) != 0 {
616 s.cm.deleteEntry(s.id)
619 func (s *server) getParentID() int64 {
623 type tracedChannel interface {
624 getChannelTrace() *channelTrace
630 type channelTrace struct {
632 createdTime time.Time
638 func (c *channelTrace) append(e *TraceEvent) {
640 if len(c.events) == getMaxTraceEntry() {
642 c.events = c.events[1:]
644 // start recursive cleanup in a goroutine to not block the call originated from grpc.
646 // need to acquire c.cm.mu lock to call the unlocked attemptCleanup func.
648 c.cm.decrTraceRefCount(del.RefID)
653 e.Timestamp = time.Now()
654 c.events = append(c.events, e)
659 func (c *channelTrace) clear() {
661 for _, e := range c.events {
663 // caller should have already held the c.cm.mu lock.
664 c.cm.decrTraceRefCount(e.RefID)
670 // Severity is the severity level of a trace event.
671 // The canonical enumeration of all valid values is here:
672 // https://github.com/grpc/grpc-proto/blob/9b13d199cc0d4703c7ea26c9c330ba695866eb23/grpc/channelz/v1/channelz.proto#L126.
676 // CtUNKNOWN indicates unknown severity of a trace event.
677 CtUNKNOWN Severity = iota
678 // CtINFO indicates info level severity of a trace event.
680 // CtWarning indicates warning level severity of a trace event.
682 // CtError indicates error level severity of a trace event.
686 // RefChannelType is the type of the entity being referenced in a trace event.
687 type RefChannelType int
690 // RefChannel indicates the referenced entity is a Channel.
691 RefChannel RefChannelType = iota
692 // RefSubChannel indicates the referenced entity is a SubChannel.
696 func (c *channelTrace) dumpData() *ChannelTrace {
698 ct := &ChannelTrace{EventNum: c.eventCount, CreationTime: c.createdTime}
699 ct.Events = c.events[:len(c.events)]