25 hclog "github.com/hashicorp/go-hclog"
28 // If this is 1, then we've called CleanupClients. This can be used
29 // by plugin RPC implementations to change error behavior since you
30 // can expected network connection errors at this point. This should be
31 // read by using sync/atomic.
34 // This is a slice of the "managed" clients which are cleaned up when
36 var managedClients = make([]*Client, 0, 5)
37 var managedClientsLock sync.Mutex
41 // ErrProcessNotFound is returned when a client is instantiated to
42 // reattach to an existing process and it isn't found.
43 ErrProcessNotFound = errors.New("Reattachment process not found")
45 // ErrChecksumsDoNotMatch is returned when binary's checksum doesn't match
46 // the one provided in the SecureConfig.
47 ErrChecksumsDoNotMatch = errors.New("checksums did not match")
49 // ErrSecureNoChecksum is returned when an empty checksum is provided to the
51 ErrSecureConfigNoChecksum = errors.New("no checksum provided")
53 // ErrSecureNoHash is returned when a nil Hash object is provided to the
55 ErrSecureConfigNoHash = errors.New("no hash implementation provided")
57 // ErrSecureConfigAndReattach is returned when both Reattach and
58 // SecureConfig are set.
59 ErrSecureConfigAndReattach = errors.New("only one of Reattach or SecureConfig can be set")
62 // Client handles the lifecycle of a plugin application. It launches
63 // plugins, connects to them, dispenses interface implementations, and handles
64 // killing the process.
66 // Plugin hosts should use one Client for each plugin executable. To
67 // dispense a plugin type, use the `Client.Client` function, and then
68 // cal `Dispense`. This awkward API is mostly historical but is used to split
69 // the client that deals with subprocess management and the client that
70 // does RPC management.
72 // See NewClient and ClientConfig for using a Client.
82 doneCtx context.Context
83 ctxCancel context.CancelFunc
86 // clientWaitGroup is used to manage the lifecycle of the plugin management
88 clientWaitGroup sync.WaitGroup
90 // processKilled is used for testing only, to flag when the process was
95 // NegotiatedVersion returns the protocol version negotiated with the server.
96 // This is only valid after Start() is called.
97 func (c *Client) NegotiatedVersion() int {
98 return c.negotiatedVersion
101 // ClientConfig is the configuration used to initialize a new
102 // plugin client. After being used to initialize a plugin client,
103 // that configuration must not be modified again.
104 type ClientConfig struct {
105 // HandshakeConfig is the configuration that must match servers.
108 // Plugins are the plugins that can be consumed.
109 // The implied version of this PluginSet is the Handshake.ProtocolVersion.
112 // VersionedPlugins is a map of PluginSets for specific protocol versions.
113 // These can be used to negotiate a compatible version between client and
114 // server. If this is set, Handshake.ProtocolVersion is not required.
115 VersionedPlugins map[int]PluginSet
117 // One of the following must be set, but not both.
119 // Cmd is the unstarted subprocess for starting the plugin. If this is
120 // set, then the Client starts the plugin process on its own and connects
123 // Reattach is configuration for reattaching to an existing plugin process
124 // that is already running. This isn't common.
126 Reattach *ReattachConfig
128 // SecureConfig is configuration for verifying the integrity of the
129 // executable. It can not be used with Reattach.
130 SecureConfig *SecureConfig
132 // TLSConfig is used to enable TLS on the RPC client.
133 TLSConfig *tls.Config
135 // Managed represents if the client should be managed by the
136 // plugin package or not. If true, then by calling CleanupClients,
137 // it will automatically be cleaned up. Otherwise, the client
138 // user is fully responsible for making sure to Kill all plugin
139 // clients. By default the client is _not_ managed.
142 // The minimum and maximum port to use for communicating with
143 // the subprocess. If not set, this defaults to 10,000 and 25,000
145 MinPort, MaxPort uint
147 // StartTimeout is the timeout to wait for the plugin to say it
148 // has started successfully.
149 StartTimeout time.Duration
151 // If non-nil, then the stderr of the client will be written to here
152 // (as well as the log). This is the original os.Stderr of the subprocess.
153 // This isn't the output of synced stderr.
156 // SyncStdout, SyncStderr can be set to override the
157 // respective os.Std* values in the plugin. Care should be taken to
158 // avoid races here. If these are nil, then this will automatically be
159 // hooked up to os.Stdin, Stdout, and Stderr, respectively.
161 // If the default values (nil) are used, then this package will not
162 // sync any of these streams.
166 // AllowedProtocols is a list of allowed protocols. If this isn't set,
167 // then only netrpc is allowed. This is so that older go-plugin systems
168 // can show friendly errors if they see a plugin with an unknown
171 // By setting this, you can cause an error immediately on plugin start
172 // if an unsupported protocol is used with a good error message.
174 // If this isn't set at all (nil value), then only net/rpc is accepted.
175 // This is done for legacy reasons. You must explicitly opt-in to
177 AllowedProtocols []Protocol
179 // Logger is the logger that the client will used. If none is provided,
180 // it will default to hclog's default logger.
183 // AutoMTLS has the client and server automatically negotiate mTLS for
184 // transport authentication. This ensures that only the original client will
185 // be allowed to connect to the server, and all other connections will be
186 // rejected. The client will also refuse to connect to any server that isn't
187 // the original instance started by the client.
189 // In this mode of operation, the client generates a one-time use tls
190 // certificate, sends the public x.509 certificate to the new server, and
191 // the server generates a one-time use tls certificate, and sends the public
192 // x.509 certificate back to the client. These are used to authenticate all
193 // rpc connections between the client and server.
195 // Setting AutoMTLS to true implies that the server must support the
196 // protocol, and correctly negotiate the tls certificates, or a connection
197 // failure will result.
199 // The client should not set TLSConfig, nor should the server set a
200 // TLSProvider, because AutoMTLS implies that a new certificate and tls
201 // configuration will be generated at startup.
203 // You cannot Reattach to a server with this option enabled.
207 // ReattachConfig is used to configure a client to reattach to an
208 // already-running plugin process. You can retrieve this information by
209 // calling ReattachConfig on Client.
210 type ReattachConfig struct {
216 // SecureConfig is used to configure a client to verify the integrity of an
217 // executable before running. It does this by verifying the checksum is
218 // expected. Hash is used to specify the hashing method to use when checksumming
219 // the file. The configuration is verified by the client by calling the
220 // SecureConfig.Check() function.
222 // The host process should ensure the checksum was provided by a trusted and
223 // authoritative source. The binary should be installed in such a way that it
224 // can not be modified by an unauthorized user between the time of this check
225 // and the time of execution.
226 type SecureConfig struct {
231 // Check takes the filepath to an executable and returns true if the checksum of
232 // the file matches the checksum provided in the SecureConfig.
233 func (s *SecureConfig) Check(filePath string) (bool, error) {
234 if len(s.Checksum) == 0 {
235 return false, ErrSecureConfigNoChecksum
239 return false, ErrSecureConfigNoHash
242 file, err := os.Open(filePath)
248 _, err = io.Copy(s.Hash, file)
253 sum := s.Hash.Sum(nil)
255 return subtle.ConstantTimeCompare(sum, s.Checksum) == 1, nil
258 // This makes sure all the managed subprocesses are killed and properly
259 // logged. This should be called before the parent process running the
262 // This must only be called _once_.
263 func CleanupClients() {
264 // Set the killed to true so that we don't get unexpected panics
265 atomic.StoreUint32(&Killed, 1)
267 // Kill all the managed clients in parallel and use a WaitGroup
268 // to wait for them all to finish up.
269 var wg sync.WaitGroup
270 managedClientsLock.Lock()
271 for _, client := range managedClients {
274 go func(client *Client) {
279 managedClientsLock.Unlock()
284 // Creates a new plugin client which manages the lifecycle of an external
285 // plugin and gets the address for the RPC connection.
287 // The client must be cleaned up at some point by calling Kill(). If
288 // the client is a managed client (created with NewManagedClient) you
289 // can just call CleanupClients at the end of your program and they will
290 // be properly cleaned.
291 func NewClient(config *ClientConfig) (c *Client) {
292 if config.MinPort == 0 && config.MaxPort == 0 {
293 config.MinPort = 10000
294 config.MaxPort = 25000
297 if config.StartTimeout == 0 {
298 config.StartTimeout = 1 * time.Minute
301 if config.Stderr == nil {
302 config.Stderr = ioutil.Discard
305 if config.SyncStdout == nil {
306 config.SyncStdout = ioutil.Discard
308 if config.SyncStderr == nil {
309 config.SyncStderr = ioutil.Discard
312 if config.AllowedProtocols == nil {
313 config.AllowedProtocols = []Protocol{ProtocolNetRPC}
316 if config.Logger == nil {
317 config.Logger = hclog.New(&hclog.LoggerOptions{
318 Output: hclog.DefaultOutput,
326 logger: config.Logger,
329 managedClientsLock.Lock()
330 managedClients = append(managedClients, c)
331 managedClientsLock.Unlock()
337 // Client returns the protocol client for this connection.
339 // Subsequent calls to this will return the same client.
340 func (c *Client) Client() (ClientProtocol, error) {
355 c.client, err = newRPCClient(c)
358 c.client, err = newGRPCClient(c.doneCtx, c)
361 return nil, fmt.Errorf("unknown server protocol: %s", c.protocol)
372 // Tells whether or not the underlying process has exited.
373 func (c *Client) Exited() bool {
379 // killed is used in tests to check if a process failed to exit gracefully, and
380 // needed to be killed.
381 func (c *Client) killed() bool {
384 return c.processKilled
387 // End the executing subprocess (if it is running) and perform any cleanup
388 // tasks necessary such as capturing any remaining logs and so on.
390 // This method blocks until the process successfully exits.
392 // This method can safely be called multiple times.
393 func (c *Client) Kill() {
394 // Grab a lock to read some private fields.
400 // If there is no process, there is nothing to kill.
406 // Wait for the all client goroutines to finish.
407 c.clientWaitGroup.Wait()
409 // Make sure there is no reference to the old process after it has been
416 // We need to check for address here. It is possible that the plugin
417 // started (process != nil) but has no address (addr == nil) if the
418 // plugin failed at startup. If we do have an address, we need to close
419 // the plugin net connections.
422 // Close the client to cleanly exit the process.
423 client, err := c.Client()
427 // If there is no error, then we attempt to wait for a graceful
428 // exit. If there was an error, we assume that graceful cleanup
429 // won't happen and just force kill.
430 graceful = err == nil
432 // If there was an error just log it. We're going to force
433 // kill in a moment anyways.
434 c.logger.Warn("error closing client during Kill", "err", err)
437 c.logger.Error("client", "error", err)
441 // If we're attempting a graceful exit, then we wait for a short period
442 // of time to allow that to happen. To wait for this we just wait on the
443 // doneCh which would be closed if the process exits.
446 case <-c.doneCtx.Done():
447 c.logger.Debug("plugin exited")
449 case <-time.After(2 * time.Second):
453 // If graceful exiting failed, just kill it
454 c.logger.Warn("plugin failed to exit gracefully")
458 c.processKilled = true
462 // Starts the underlying subprocess, communicating with it to negotiate
463 // a port for RPC connections, and returning the address to connect via RPC.
465 // This method is safe to call multiple times. Subsequent calls have no effect.
466 // Once a client has been started once, it cannot be started again, even if
468 func (c *Client) Start() (addr net.Addr, err error) {
472 if c.address != nil {
473 return c.address, nil
476 // If one of cmd or reattach isn't set, then it is an error. We wrap
477 // this in a {} for scoping reasons, and hopeful that the escape
478 // analysis will pop the stack here.
480 cmdSet := c.config.Cmd != nil
481 attachSet := c.config.Reattach != nil
482 secureSet := c.config.SecureConfig != nil
483 if cmdSet == attachSet {
484 return nil, fmt.Errorf("Only one of Cmd or Reattach must be set")
487 if secureSet && attachSet {
488 return nil, ErrSecureConfigAndReattach
492 if c.config.Reattach != nil {
496 if c.config.VersionedPlugins == nil {
497 c.config.VersionedPlugins = make(map[int]PluginSet)
500 // handle all plugins as versioned, using the handshake config as the default.
501 version := int(c.config.ProtocolVersion)
503 // Make sure we're not overwriting a real version 0. If ProtocolVersion was
504 // non-zero, then we have to just assume the user made sure that
505 // VersionedPlugins doesn't conflict.
506 if _, ok := c.config.VersionedPlugins[version]; !ok && c.config.Plugins != nil {
507 c.config.VersionedPlugins[version] = c.config.Plugins
510 var versionStrings []string
511 for v := range c.config.VersionedPlugins {
512 versionStrings = append(versionStrings, strconv.Itoa(v))
516 fmt.Sprintf("%s=%s", c.config.MagicCookieKey, c.config.MagicCookieValue),
517 fmt.Sprintf("PLUGIN_MIN_PORT=%d", c.config.MinPort),
518 fmt.Sprintf("PLUGIN_MAX_PORT=%d", c.config.MaxPort),
519 fmt.Sprintf("PLUGIN_PROTOCOL_VERSIONS=%s", strings.Join(versionStrings, ",")),
523 cmd.Env = append(cmd.Env, os.Environ()...)
524 cmd.Env = append(cmd.Env, env...)
527 cmdStdout, err := cmd.StdoutPipe()
531 cmdStderr, err := cmd.StderrPipe()
536 if c.config.SecureConfig != nil {
537 if ok, err := c.config.SecureConfig.Check(cmd.Path); err != nil {
538 return nil, fmt.Errorf("error verifying checksum: %s", err)
540 return nil, ErrChecksumsDoNotMatch
544 // Setup a temporary certificate for client/server mtls, and send the public
545 // certificate to the plugin.
546 if c.config.AutoMTLS {
547 c.logger.Info("configuring client automatic mTLS")
548 certPEM, keyPEM, err := generateCert()
550 c.logger.Error("failed to generate client certificate", "error", err)
553 cert, err := tls.X509KeyPair(certPEM, keyPEM)
555 c.logger.Error("failed to parse client certificate", "error", err)
559 cmd.Env = append(cmd.Env, fmt.Sprintf("PLUGIN_CLIENT_CERT=%s", certPEM))
561 c.config.TLSConfig = &tls.Config{
562 Certificates: []tls.Certificate{cert},
563 ServerName: "localhost",
567 c.logger.Debug("starting plugin", "path", cmd.Path, "args", cmd.Args)
574 c.process = cmd.Process
575 c.logger.Debug("plugin started", "path", cmd.Path, "pid", c.process.Pid)
577 // Make sure the command is properly cleaned up if there is an error
581 if err != nil || r != nil {
590 // Create a context for when we kill
591 c.doneCtx, c.ctxCancel = context.WithCancel(context.Background())
593 c.clientWaitGroup.Add(1)
595 // ensure the context is cancelled when we're done
598 defer c.clientWaitGroup.Done()
600 // get the cmd info early, since the process information will be removed
605 // Wait for the command to end.
608 debugMsgArgs := []interface{}{
613 debugMsgArgs = append(debugMsgArgs,
614 []interface{}{"error", err.Error()}...)
617 // Log and make sure to flush the logs write away
618 c.logger.Debug("plugin process exited", debugMsgArgs...)
621 // Set that we exited, which takes a lock
627 // Start goroutine that logs the stderr
628 c.clientWaitGroup.Add(1)
629 // logStderr calls Done()
630 go c.logStderr(cmdStderr)
632 // Start a goroutine that is going to be reading the lines
634 linesCh := make(chan string)
635 c.clientWaitGroup.Add(1)
637 defer c.clientWaitGroup.Done()
640 scanner := bufio.NewScanner(cmdStdout)
642 linesCh <- scanner.Text()
646 // Make sure after we exit we read the lines from stdout forever
647 // so they don't block since it is a pipe.
648 // The scanner goroutine above will close this, but track it with a wait
649 // group for completeness.
650 c.clientWaitGroup.Add(1)
653 defer c.clientWaitGroup.Done()
659 // Some channels for the next step
660 timeout := time.After(c.config.StartTimeout)
662 // Start looking for the address
663 c.logger.Debug("waiting for RPC address", "path", cmd.Path)
666 err = errors.New("timeout while waiting for plugin to start")
667 case <-c.doneCtx.Done():
668 err = errors.New("plugin exited before we could connect")
669 case line := <-linesCh:
670 // Trim the line and split by "|" in order to get the parts of
672 line = strings.TrimSpace(line)
673 parts := strings.SplitN(line, "|", 6)
676 "Unrecognized remote plugin message: %s\n\n"+
677 "This usually means that the plugin is either invalid or simply\n"+
678 "needs to be recompiled to support the latest protocol.", line)
682 // Check the core protocol. Wrapped in a {} for scoping.
684 var coreProtocol int64
685 coreProtocol, err = strconv.ParseInt(parts[0], 10, 0)
687 err = fmt.Errorf("Error parsing core protocol version: %s", err)
691 if int(coreProtocol) != CoreProtocolVersion {
692 err = fmt.Errorf("Incompatible core API version with plugin. "+
693 "Plugin version: %s, Core version: %d\n\n"+
694 "To fix this, the plugin usually only needs to be recompiled.\n"+
695 "Please report this to the plugin author.", parts[0], CoreProtocolVersion)
700 // Test the API version
701 version, pluginSet, err := c.checkProtoVersion(parts[1])
706 // set the Plugins value to the compatible set, so the version
707 // doesn't need to be passed through to the ClientProtocol
709 c.config.Plugins = pluginSet
710 c.negotiatedVersion = version
711 c.logger.Debug("using plugin", "version", version)
715 addr, err = net.ResolveTCPAddr("tcp", parts[3])
717 addr, err = net.ResolveUnixAddr("unix", parts[3])
719 err = fmt.Errorf("Unknown address type: %s", parts[3])
722 // If we have a server type, then record that. We default to net/rpc
723 // for backwards compatibility.
724 c.protocol = ProtocolNetRPC
726 c.protocol = Protocol(parts[4])
730 for _, p := range c.config.AllowedProtocols {
737 err = fmt.Errorf("Unsupported plugin protocol %q. Supported: %v",
738 c.protocol, c.config.AllowedProtocols)
742 // See if we have a TLS certificate from the server.
743 // Checking if the length is > 50 rules out catching the unused "extra"
744 // data returned from some older implementations.
745 if len(parts) >= 6 && len(parts[5]) > 50 {
746 err := c.loadServerCert(parts[5])
748 return nil, fmt.Errorf("error parsing server cert: %s", err)
757 // loadServerCert is used by AutoMTLS to read an x.509 cert returned by the
758 // server, and load it as the RootCA for the client TLSConfig.
759 func (c *Client) loadServerCert(cert string) error {
760 certPool := x509.NewCertPool()
762 asn1, err := base64.RawStdEncoding.DecodeString(cert)
767 x509Cert, err := x509.ParseCertificate([]byte(asn1))
772 certPool.AddCert(x509Cert)
774 c.config.TLSConfig.RootCAs = certPool
778 func (c *Client) reattach() (net.Addr, error) {
779 // Verify the process still exists. If not, then it is an error
780 p, err := os.FindProcess(c.config.Reattach.Pid)
785 // Attempt to connect to the addr since on Unix systems FindProcess
786 // doesn't actually return an error if it can't find the process.
787 conn, err := net.Dial(
788 c.config.Reattach.Addr.Network(),
789 c.config.Reattach.Addr.String())
792 return nil, ErrProcessNotFound
796 // Create a context for when we kill
797 c.doneCtx, c.ctxCancel = context.WithCancel(context.Background())
799 c.clientWaitGroup.Add(1)
800 // Goroutine to mark exit status
802 defer c.clientWaitGroup.Done()
804 // ensure the context is cancelled when we're done
807 // Wait for the process to die
810 // Log so we can see it
811 c.logger.Debug("reattached plugin process exited")
819 // Set the address and process
820 c.address = c.config.Reattach.Addr
822 c.protocol = c.config.Reattach.Protocol
823 if c.protocol == "" {
824 // Default the protocol to net/rpc for backwards compatibility
825 c.protocol = ProtocolNetRPC
828 return c.address, nil
831 // checkProtoVersion returns the negotiated version and PluginSet.
832 // This returns an error if the server returned an incompatible protocol
833 // version, or an invalid handshake response.
834 func (c *Client) checkProtoVersion(protoVersion string) (int, PluginSet, error) {
835 serverVersion, err := strconv.Atoi(protoVersion)
837 return 0, nil, fmt.Errorf("Error parsing protocol version %q: %s", protoVersion, err)
840 // record these for the error message
841 var clientVersions []int
843 // all versions, including the legacy ProtocolVersion have been added to
845 for version, plugins := range c.config.VersionedPlugins {
846 clientVersions = append(clientVersions, version)
848 if serverVersion != version {
851 return version, plugins, nil
854 return 0, nil, fmt.Errorf("Incompatible API version with plugin. "+
855 "Plugin version: %d, Client versions: %d", serverVersion, clientVersions)
858 // ReattachConfig returns the information that must be provided to NewClient
859 // to reattach to the plugin process that this client started. This is
860 // useful for plugins that detach from their parent process.
862 // If this returns nil then the process hasn't been started yet. Please
863 // call Start or Client before calling this.
864 func (c *Client) ReattachConfig() *ReattachConfig {
868 if c.address == nil {
872 if c.config.Cmd != nil && c.config.Cmd.Process == nil {
876 // If we connected via reattach, just return the information as-is
877 if c.config.Reattach != nil {
878 return c.config.Reattach
881 return &ReattachConfig{
882 Protocol: c.protocol,
884 Pid: c.config.Cmd.Process.Pid,
888 // Protocol returns the protocol of server on the remote end. This will
889 // start the plugin process if it isn't already started. Errors from
890 // starting the plugin are surpressed and ProtocolInvalid is returned. It
891 // is recommended you call Start explicitly before calling Protocol to ensure
893 func (c *Client) Protocol() Protocol {
896 return ProtocolInvalid
902 func netAddrDialer(addr net.Addr) func(string, time.Duration) (net.Conn, error) {
903 return func(_ string, _ time.Duration) (net.Conn, error) {
904 // Connect to the client
905 conn, err := net.Dial(addr.Network(), addr.String())
909 if tcpConn, ok := conn.(*net.TCPConn); ok {
910 // Make sure to set keep alive so that the connection doesn't die
911 tcpConn.SetKeepAlive(true)
918 // dialer is compatible with grpc.WithDialer and creates the connection
920 func (c *Client) dialer(_ string, timeout time.Duration) (net.Conn, error) {
921 conn, err := netAddrDialer(c.address)("", timeout)
926 // If we have a TLS config we wrap our connection. We only do this
927 // for net/rpc since gRPC uses its own mechanism for TLS.
928 if c.protocol == ProtocolNetRPC && c.config.TLSConfig != nil {
929 conn = tls.Client(conn, c.config.TLSConfig)
935 var stdErrBufferSize = 64 * 1024
937 func (c *Client) logStderr(r io.Reader) {
938 defer c.clientWaitGroup.Done()
939 l := c.logger.Named(filepath.Base(c.config.Cmd.Path))
941 reader := bufio.NewReaderSize(r, stdErrBufferSize)
942 // continuation indicates the previous line was a prefix
943 continuation := false
946 line, isPrefix, err := reader.ReadLine()
951 l.Error("reading plugin stderr", "error", err)
955 c.config.Stderr.Write(line)
957 // The line was longer than our max token size, so it's likely
958 // incomplete and won't unmarshal.
959 if isPrefix || continuation {
960 l.Debug(string(line))
962 // if we're finishing a continued line, add the newline back in
964 c.config.Stderr.Write([]byte{'\n'})
967 continuation = isPrefix
971 c.config.Stderr.Write([]byte{'\n'})
973 entry, err := parseJSON(line)
974 // If output is not JSON format, print directly to Debug
976 // Attempt to infer the desired log level from the commonly used
978 switch line := string(line); {
979 case strings.HasPrefix(line, "[TRACE]"):
981 case strings.HasPrefix(line, "[DEBUG]"):
983 case strings.HasPrefix(line, "[INFO]"):
985 case strings.HasPrefix(line, "[WARN]"):
987 case strings.HasPrefix(line, "[ERROR]"):
993 out := flattenKVPairs(entry.KVPairs)
995 out = append(out, "timestamp", entry.Timestamp.Format(hclog.TimeFormat))
996 switch hclog.LevelFromString(entry.Level) {
998 l.Trace(entry.Message, out...)
1000 l.Debug(entry.Message, out...)
1002 l.Info(entry.Message, out...)
1004 l.Warn(entry.Message, out...)
1006 l.Error(entry.Message, out...)
1008 // if there was no log level, it's likely this is unexpected
1009 // json from something other than hclog, and we should output
1011 l.Debug(string(line))