// Package daemon exposes the functions that occur on the host server // that the Docker daemon is running. // // In implementing the various functions of the daemon, there is often // a method-specific struct for configuring the runtime behavior. package daemon import ( "encoding/json" "fmt" "io" "io/ioutil" "net" "os" "path" "path/filepath" "runtime" "strings" "sync" "syscall" "time" "github.com/Sirupsen/logrus" containerd "github.com/docker/containerd/api/grpc/types" "github.com/docker/docker/api" "github.com/docker/docker/container" "github.com/docker/docker/daemon/events" "github.com/docker/docker/daemon/exec" "github.com/docker/engine-api/types" containertypes "github.com/docker/engine-api/types/container" "github.com/docker/libnetwork/cluster" // register graph drivers _ "github.com/docker/docker/daemon/graphdriver/register" dmetadata "github.com/docker/docker/distribution/metadata" "github.com/docker/docker/distribution/xfer" "github.com/docker/docker/image" "github.com/docker/docker/layer" "github.com/docker/docker/libcontainerd" "github.com/docker/docker/migrate/v1" "github.com/docker/docker/pkg/fileutils" "github.com/docker/docker/pkg/graphdb" "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/progress" "github.com/docker/docker/pkg/registrar" "github.com/docker/docker/pkg/signal" "github.com/docker/docker/pkg/streamformatter" "github.com/docker/docker/pkg/sysinfo" "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/truncindex" "github.com/docker/docker/reference" "github.com/docker/docker/registry" "github.com/docker/docker/runconfig" "github.com/docker/docker/utils" volumedrivers "github.com/docker/docker/volume/drivers" "github.com/docker/docker/volume/local" "github.com/docker/docker/volume/store" "github.com/docker/libnetwork" nwconfig "github.com/docker/libnetwork/config" "github.com/docker/libtrust" ) var ( // DefaultRuntimeBinary is the default runtime to be used by // containerd if none is specified DefaultRuntimeBinary = "docker-runc" errSystemNotSupported = fmt.Errorf("The Docker daemon is not supported on this platform.") ) // Daemon holds information about the Docker daemon. type Daemon struct { ID string repository string containers container.Store execCommands *exec.Store referenceStore reference.Store downloadManager *xfer.LayerDownloadManager uploadManager *xfer.LayerUploadManager distributionMetadataStore dmetadata.Store trustKey libtrust.PrivateKey idIndex *truncindex.TruncIndex configStore *Config statsCollector *statsCollector defaultLogConfig containertypes.LogConfig RegistryService registry.Service EventsService *events.Events netController libnetwork.NetworkController volumes *store.VolumeStore discoveryWatcher discoveryReloader root string seccompEnabled bool shutdown bool uidMaps []idtools.IDMap gidMaps []idtools.IDMap layerStore layer.Store imageStore image.Store nameIndex *registrar.Registrar linkIndex *linkIndex containerd libcontainerd.Client containerdRemote libcontainerd.Remote defaultIsolation containertypes.Isolation // Default isolation mode on Windows clusterProvider cluster.Provider } func (daemon *Daemon) restore() error { var ( debug = utils.IsDebugEnabled() currentDriver = daemon.GraphDriverName() containers = make(map[string]*container.Container) ) if !debug { logrus.Info("Loading containers: start.") } dir, err := ioutil.ReadDir(daemon.repository) if err != nil { return err } containerCount := 0 for _, v := range dir { id := v.Name() container, err := daemon.load(id) if !debug && logrus.GetLevel() == logrus.InfoLevel { fmt.Print(".") containerCount++ } if err != nil { logrus.Errorf("Failed to load container %v: %v", id, err) continue } // Ignore the container if it does not support the current driver being used by the graph if (container.Driver == "" && currentDriver == "aufs") || container.Driver == currentDriver { rwlayer, err := daemon.layerStore.GetRWLayer(container.ID) if err != nil { logrus.Errorf("Failed to load container mount %v: %v", id, err) continue } container.RWLayer = rwlayer logrus.Debugf("Loaded container %v", container.ID) containers[container.ID] = container } else { logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID) } } var migrateLegacyLinks bool restartContainers := make(map[*container.Container]chan struct{}) activeSandboxes := make(map[string]interface{}) for _, c := range containers { if err := daemon.registerName(c); err != nil { logrus.Errorf("Failed to register container %s: %s", c.ID, err) continue } if err := daemon.Register(c); err != nil { logrus.Errorf("Failed to register container %s: %s", c.ID, err) continue } // The LogConfig.Type is empty if the container was created before docker 1.12 with default log driver. // We should rewrite it to use the daemon defaults. // Fixes https://github.com/docker/docker/issues/22536 if c.HostConfig.LogConfig.Type == "" { if err := daemon.mergeAndVerifyLogConfig(&c.HostConfig.LogConfig); err != nil { logrus.Errorf("Failed to verify log config for container %s: %q", c.ID, err) continue } } } var wg sync.WaitGroup var mapLock sync.Mutex for _, c := range containers { wg.Add(1) go func(c *container.Container) { defer wg.Done() rm := c.RestartManager(false) if c.IsRunning() || c.IsPaused() { if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil { logrus.Errorf("Failed to restore with containerd: %q", err) return } if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() { options, err := daemon.buildSandboxOptions(c) if err != nil { logrus.Warnf("Failed build sandbox option to restore container %s: %v", c.ID, err) } mapLock.Lock() activeSandboxes[c.NetworkSettings.SandboxID] = options mapLock.Unlock() } } // fixme: only if not running // get list of containers we need to restart if daemon.configStore.AutoRestart && !c.IsRunning() && !c.IsPaused() && c.ShouldRestart() { mapLock.Lock() restartContainers[c] = make(chan struct{}) mapLock.Unlock() } if c.RemovalInProgress { // We probably crashed in the middle of a removal, reset // the flag. // // We DO NOT remove the container here as we do not // know if the user had requested for either the // associated volumes, network links or both to also // be removed. So we put the container in the "dead" // state and leave further processing up to them. logrus.Debugf("Resetting RemovalInProgress flag from %v", c.ID) c.ResetRemovalInProgress() c.SetDead() c.ToDisk() } // if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated if c.HostConfig != nil && c.HostConfig.Links == nil { migrateLegacyLinks = true } }(c) } wg.Wait() daemon.netController, err = daemon.initNetworkController(daemon.configStore, activeSandboxes) if err != nil { return fmt.Errorf("Error initializing network controller: %v", err) } // migrate any legacy links from sqlite linkdbFile := filepath.Join(daemon.root, "linkgraph.db") var legacyLinkDB *graphdb.Database if migrateLegacyLinks { legacyLinkDB, err = graphdb.NewSqliteConn(linkdbFile) if err != nil { return fmt.Errorf("error connecting to legacy link graph DB %s, container links may be lost: %v", linkdbFile, err) } defer legacyLinkDB.Close() } // Now that all the containers are registered, register the links for _, c := range containers { if migrateLegacyLinks { if err := daemon.migrateLegacySqliteLinks(legacyLinkDB, c); err != nil { return err } } if err := daemon.registerLinks(c, c.HostConfig); err != nil { logrus.Errorf("failed to register link for container %s: %v", c.ID, err) } } group := sync.WaitGroup{} for c, notifier := range restartContainers { group.Add(1) go func(c *container.Container, chNotify chan struct{}) { defer group.Done() logrus.Debugf("Starting container %s", c.ID) // ignore errors here as this is a best effort to wait for children to be // running before we try to start the container children := daemon.children(c) timeout := time.After(5 * time.Second) for _, child := range children { if notifier, exists := restartContainers[child]; exists { select { case <-notifier: case <-timeout: } } } // Make sure networks are available before starting daemon.waitForNetworks(c) if err := daemon.containerStart(c); err != nil { logrus.Errorf("Failed to start container %s: %s", c.ID, err) } close(chNotify) }(c, notifier) } group.Wait() // any containers that were started above would already have had this done, // however we need to now prepare the mountpoints for the rest of the containers as well. // This shouldn't cause any issue running on the containers that already had this run. // This must be run after any containers with a restart policy so that containerized plugins // can have a chance to be running before we try to initialize them. for _, c := range containers { // if the container has restart policy, do not // prepare the mountpoints since it has been done on restarting. // This is to speed up the daemon start when a restart container // has a volume and the volume dirver is not available. if _, ok := restartContainers[c]; ok { continue } group.Add(1) go func(c *container.Container) { defer group.Done() if err := daemon.prepareMountPoints(c); err != nil { logrus.Error(err) } }(c) } group.Wait() if !debug { if logrus.GetLevel() == logrus.InfoLevel && containerCount > 0 { fmt.Println() } logrus.Info("Loading containers: done.") } return nil } // waitForNetworks is used during daemon initialization when starting up containers // It ensures that all of a container's networks are available before the daemon tries to start the container. // In practice it just makes sure the discovery service is available for containers which use a network that require discovery. func (daemon *Daemon) waitForNetworks(c *container.Container) { if daemon.discoveryWatcher == nil { return } // Make sure if the container has a network that requires discovery that the discovery service is available before starting for netName := range c.NetworkSettings.Networks { // If we get `ErrNoSuchNetwork` here, we can assume that it is due to discovery not being ready // Most likely this is because the K/V store used for discovery is in a container and needs to be started if _, err := daemon.netController.NetworkByName(netName); err != nil { if _, ok := err.(libnetwork.ErrNoSuchNetwork); !ok { continue } // use a longish timeout here due to some slowdowns in libnetwork if the k/v store is on anything other than --net=host // FIXME: why is this slow??? logrus.Debugf("Container %s waiting for network to be ready", c.Name) select { case <-daemon.discoveryWatcher.ReadyCh(): case <-time.After(60 * time.Second): } return } } } func (daemon *Daemon) children(c *container.Container) map[string]*container.Container { return daemon.linkIndex.children(c) } // parents returns the names of the parent containers of the container // with the given name. func (daemon *Daemon) parents(c *container.Container) map[string]*container.Container { return daemon.linkIndex.parents(c) } func (daemon *Daemon) registerLink(parent, child *container.Container, alias string) error { fullName := path.Join(parent.Name, alias) if err := daemon.nameIndex.Reserve(fullName, child.ID); err != nil { if err == registrar.ErrNameReserved { logrus.Warnf("error registering link for %s, to %s, as alias %s, ignoring: %v", parent.ID, child.ID, alias, err) return nil } return err } daemon.linkIndex.link(parent, child, fullName) return nil } // SetClusterProvider sets a component for querying the current cluster state. func (daemon *Daemon) SetClusterProvider(clusterProvider cluster.Provider) { daemon.clusterProvider = clusterProvider daemon.netController.SetClusterProvider(clusterProvider) } // IsSwarmCompatible verifies if the current daemon // configuration is compatible with the swarm mode func (daemon *Daemon) IsSwarmCompatible() error { if daemon.configStore == nil { return nil } return daemon.configStore.isSwarmCompatible() } // NewDaemon sets up everything for the daemon to be able to service // requests from the webserver. func NewDaemon(config *Config, registryService registry.Service, containerdRemote libcontainerd.Remote) (daemon *Daemon, err error) { setDefaultMtu(config) // Ensure that we have a correct root key limit for launching containers. if err := ModifyRootKeyLimit(); err != nil { logrus.Warnf("unable to modify root key limit, number of containers could be limitied by this quota: %v", err) } // Ensure we have compatible and valid configuration options if err := verifyDaemonSettings(config); err != nil { return nil, err } // Do we have a disabled network? config.DisableBridge = isBridgeNetworkDisabled(config) // Verify the platform is supported as a daemon if !platformSupported { return nil, errSystemNotSupported } // Validate platform-specific requirements if err := checkSystem(); err != nil { return nil, err } // set up SIGUSR1 handler on Unix-like systems, or a Win32 global event // on Windows to dump Go routine stacks setupDumpStackTrap() uidMaps, gidMaps, err := setupRemappedRoot(config) if err != nil { return nil, err } rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps) if err != nil { return nil, err } // get the canonical path to the Docker root directory var realRoot string if _, err := os.Stat(config.Root); err != nil && os.IsNotExist(err) { realRoot = config.Root } else { realRoot, err = fileutils.ReadSymlinkedDirectory(config.Root) if err != nil { return nil, fmt.Errorf("Unable to get the full path to root (%s): %s", config.Root, err) } } if err := setupDaemonRoot(config, realRoot, rootUID, rootGID); err != nil { return nil, err } if err := setupDaemonProcess(config); err != nil { return nil, err } // set up the tmpDir to use a canonical path tmp, err := tempDir(config.Root, rootUID, rootGID) if err != nil { return nil, fmt.Errorf("Unable to get the TempDir under %s: %s", config.Root, err) } realTmp, err := fileutils.ReadSymlinkedDirectory(tmp) if err != nil { return nil, fmt.Errorf("Unable to get the full path to the TempDir (%s): %s", tmp, err) } os.Setenv("TMPDIR", realTmp) d := &Daemon{configStore: config} // Ensure the daemon is properly shutdown if there is a failure during // initialization defer func() { if err != nil { if err := d.Shutdown(); err != nil { logrus.Error(err) } } }() // Set the default isolation mode (only applicable on Windows) if err := d.setDefaultIsolation(); err != nil { return nil, fmt.Errorf("error setting default isolation mode: %v", err) } logrus.Debugf("Using default logging driver %s", config.LogConfig.Type) if err := configureMaxThreads(config); err != nil { logrus.Warnf("Failed to configure golang's threads limit: %v", err) } installDefaultAppArmorProfile() daemonRepo := filepath.Join(config.Root, "containers") if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) { return nil, err } driverName := os.Getenv("DOCKER_DRIVER") if driverName == "" { driverName = config.GraphDriver } d.layerStore, err = layer.NewStoreFromOptions(layer.StoreOptions{ StorePath: config.Root, MetadataStorePathTemplate: filepath.Join(config.Root, "image", "%s", "layerdb"), GraphDriver: driverName, GraphDriverOptions: config.GraphOptions, UIDMaps: uidMaps, GIDMaps: gidMaps, }) if err != nil { return nil, err } graphDriver := d.layerStore.DriverName() imageRoot := filepath.Join(config.Root, "image", graphDriver) // Configure and validate the kernels security support if err := configureKernelSecuritySupport(config, graphDriver); err != nil { return nil, err } logrus.Debugf("Max Concurrent Downloads: %d", *config.MaxConcurrentDownloads) d.downloadManager = xfer.NewLayerDownloadManager(d.layerStore, *config.MaxConcurrentDownloads) logrus.Debugf("Max Concurrent Uploads: %d", *config.MaxConcurrentUploads) d.uploadManager = xfer.NewLayerUploadManager(*config.MaxConcurrentUploads) ifs, err := image.NewFSStoreBackend(filepath.Join(imageRoot, "imagedb")) if err != nil { return nil, err } d.imageStore, err = image.NewImageStore(ifs, d.layerStore) if err != nil { return nil, err } // Configure the volumes driver volStore, err := d.configureVolumes(rootUID, rootGID) if err != nil { return nil, err } trustKey, err := api.LoadOrCreateTrustKey(config.TrustKeyPath) if err != nil { return nil, err } trustDir := filepath.Join(config.Root, "trust") if err := system.MkdirAll(trustDir, 0700); err != nil { return nil, err } distributionMetadataStore, err := dmetadata.NewFSMetadataStore(filepath.Join(imageRoot, "distribution")) if err != nil { return nil, err } eventsService := events.New() referenceStore, err := reference.NewReferenceStore(filepath.Join(imageRoot, "repositories.json")) if err != nil { return nil, fmt.Errorf("Couldn't create Tag store repositories: %s", err) } migrationStart := time.Now() if err := v1.Migrate(config.Root, graphDriver, d.layerStore, d.imageStore, referenceStore, distributionMetadataStore); err != nil { logrus.Errorf("Graph migration failed: %q. Your old graph data was found to be too inconsistent for upgrading to content-addressable storage. Some of the old data was probably not upgraded. We recommend starting over with a clean storage directory if possible.", err) } logrus.Infof("Graph migration to content-addressability took %.2f seconds", time.Since(migrationStart).Seconds()) // Discovery is only enabled when the daemon is launched with an address to advertise. When // initialized, the daemon is registered and we can store the discovery backend as its read-only if err := d.initDiscovery(config); err != nil { return nil, err } sysInfo := sysinfo.New(false) // Check if Devices cgroup is mounted, it is hard requirement for container security, // on Linux. if runtime.GOOS == "linux" && !sysInfo.CgroupDevicesEnabled { return nil, fmt.Errorf("Devices cgroup isn't mounted") } d.ID = trustKey.PublicKey().KeyID() d.repository = daemonRepo d.containers = container.NewMemoryStore() d.execCommands = exec.NewStore() d.referenceStore = referenceStore d.distributionMetadataStore = distributionMetadataStore d.trustKey = trustKey d.idIndex = truncindex.NewTruncIndex([]string{}) d.statsCollector = d.newStatsCollector(1 * time.Second) d.defaultLogConfig = containertypes.LogConfig{ Type: config.LogConfig.Type, Config: config.LogConfig.Config, } d.RegistryService = registryService d.EventsService = eventsService d.volumes = volStore d.root = config.Root d.uidMaps = uidMaps d.gidMaps = gidMaps d.seccompEnabled = sysInfo.Seccomp d.nameIndex = registrar.NewRegistrar() d.linkIndex = newLinkIndex() d.containerdRemote = containerdRemote go d.execCommandGC() d.containerd, err = containerdRemote.Client(d) if err != nil { return nil, err } if err := d.restore(); err != nil { return nil, err } return d, nil } func (daemon *Daemon) shutdownContainer(c *container.Container) error { // TODO(windows): Handle docker restart with paused containers if c.IsPaused() { // To terminate a process in freezer cgroup, we should send // SIGTERM to this process then unfreeze it, and the process will // force to terminate immediately. logrus.Debugf("Found container %s is paused, sending SIGTERM before unpausing it", c.ID) sig, ok := signal.SignalMap["TERM"] if !ok { return fmt.Errorf("System does not support SIGTERM") } if err := daemon.kill(c, int(sig)); err != nil { return fmt.Errorf("sending SIGTERM to container %s with error: %v", c.ID, err) } if err := daemon.containerUnpause(c); err != nil { return fmt.Errorf("Failed to unpause container %s with error: %v", c.ID, err) } if _, err := c.WaitStop(10 * time.Second); err != nil { logrus.Debugf("container %s failed to exit in 10 seconds of SIGTERM, sending SIGKILL to force", c.ID) sig, ok := signal.SignalMap["KILL"] if !ok { return fmt.Errorf("System does not support SIGKILL") } if err := daemon.kill(c, int(sig)); err != nil { logrus.Errorf("Failed to SIGKILL container %s", c.ID) } c.WaitStop(-1 * time.Second) return err } } // If container failed to exit in 10 seconds of SIGTERM, then using the force if err := daemon.containerStop(c, 10); err != nil { return fmt.Errorf("Failed to stop container %s with error: %v", c.ID, err) } c.WaitStop(-1 * time.Second) return nil } // Shutdown stops the daemon. func (daemon *Daemon) Shutdown() error { daemon.shutdown = true // Keep mounts and networking running on daemon shutdown if // we are to keep containers running and restore them. if daemon.configStore.LiveRestore && daemon.containers != nil { // check if there are any running containers, if none we should do some cleanup if ls, err := daemon.Containers(&types.ContainerListOptions{}); len(ls) != 0 || err != nil { return nil } } if daemon.containers != nil { logrus.Debug("starting clean shutdown of all containers...") daemon.containers.ApplyAll(func(c *container.Container) { if !c.IsRunning() { return } logrus.Debugf("stopping %s", c.ID) if err := daemon.shutdownContainer(c); err != nil { logrus.Errorf("Stop container error: %v", err) return } if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil { daemon.cleanupMountsByID(mountid) } logrus.Debugf("container stopped %s", c.ID) }) } // trigger libnetwork Stop only if it's initialized if daemon.netController != nil { daemon.netController.Stop() } if daemon.layerStore != nil { if err := daemon.layerStore.Cleanup(); err != nil { logrus.Errorf("Error during layer Store.Cleanup(): %v", err) } } pluginShutdown() if err := daemon.cleanupMounts(); err != nil { return err } return nil } // Mount sets container.BaseFS // (is it not set coming in? why is it unset?) func (daemon *Daemon) Mount(container *container.Container) error { dir, err := container.RWLayer.Mount(container.GetMountLabel()) if err != nil { return err } logrus.Debugf("container mounted via layerStore: %v", dir) if container.BaseFS != dir { // The mount path reported by the graph driver should always be trusted on Windows, since the // volume path for a given mounted layer may change over time. This should only be an error // on non-Windows operating systems. if container.BaseFS != "" && runtime.GOOS != "windows" { daemon.Unmount(container) return fmt.Errorf("Error: driver %s is returning inconsistent paths for container %s ('%s' then '%s')", daemon.GraphDriverName(), container.ID, container.BaseFS, dir) } } container.BaseFS = dir // TODO: combine these fields return nil } // Unmount unsets the container base filesystem func (daemon *Daemon) Unmount(container *container.Container) error { if err := container.RWLayer.Unmount(); err != nil { logrus.Errorf("Error unmounting container %s: %s", container.ID, err) return err } return nil } func writeDistributionProgress(cancelFunc func(), outStream io.Writer, progressChan <-chan progress.Progress) { progressOutput := streamformatter.NewJSONStreamFormatter().NewProgressOutput(outStream, false) operationCancelled := false for prog := range progressChan { if err := progressOutput.WriteProgress(prog); err != nil && !operationCancelled { // don't log broken pipe errors as this is the normal case when a client aborts if isBrokenPipe(err) { logrus.Info("Pull session cancelled") } else { logrus.Errorf("error writing progress to client: %v", err) } cancelFunc() operationCancelled = true // Don't return, because we need to continue draining // progressChan until it's closed to avoid a deadlock. } } } func isBrokenPipe(e error) bool { if netErr, ok := e.(*net.OpError); ok { e = netErr.Err if sysErr, ok := netErr.Err.(*os.SyscallError); ok { e = sysErr.Err } } return e == syscall.EPIPE } // GraphDriverName returns the name of the graph driver used by the layer.Store func (daemon *Daemon) GraphDriverName() string { return daemon.layerStore.DriverName() } // GetUIDGIDMaps returns the current daemon's user namespace settings // for the full uid and gid maps which will be applied to containers // started in this instance. func (daemon *Daemon) GetUIDGIDMaps() ([]idtools.IDMap, []idtools.IDMap) { return daemon.uidMaps, daemon.gidMaps } // GetRemappedUIDGID returns the current daemon's uid and gid values // if user namespaces are in use for this daemon instance. If not // this function will return "real" root values of 0, 0. func (daemon *Daemon) GetRemappedUIDGID() (int, int) { uid, gid, _ := idtools.GetRootUIDGID(daemon.uidMaps, daemon.gidMaps) return uid, gid } // tempDir returns the default directory to use for temporary files. func tempDir(rootDir string, rootUID, rootGID int) (string, error) { var tmpDir string if tmpDir = os.Getenv("DOCKER_TMPDIR"); tmpDir == "" { tmpDir = filepath.Join(rootDir, "tmp") } return tmpDir, idtools.MkdirAllAs(tmpDir, 0700, rootUID, rootGID) } func (daemon *Daemon) setupInitLayer(initPath string) error { rootUID, rootGID := daemon.GetRemappedUIDGID() return setupInitLayer(initPath, rootUID, rootGID) } func setDefaultMtu(config *Config) { // do nothing if the config does not have the default 0 value. if config.Mtu != 0 { return } config.Mtu = defaultNetworkMtu } func (daemon *Daemon) configureVolumes(rootUID, rootGID int) (*store.VolumeStore, error) { volumesDriver, err := local.New(daemon.configStore.Root, rootUID, rootGID) if err != nil { return nil, err } if !volumedrivers.Register(volumesDriver, volumesDriver.Name()) { return nil, fmt.Errorf("local volume driver could not be registered") } return store.New(daemon.configStore.Root) } // IsShuttingDown tells whether the daemon is shutting down or not func (daemon *Daemon) IsShuttingDown() bool { return daemon.shutdown } // initDiscovery initializes the discovery watcher for this daemon. func (daemon *Daemon) initDiscovery(config *Config) error { advertise, err := parseClusterAdvertiseSettings(config.ClusterStore, config.ClusterAdvertise) if err != nil { if err == errDiscoveryDisabled { return nil } return err } config.ClusterAdvertise = advertise discoveryWatcher, err := initDiscovery(config.ClusterStore, config.ClusterAdvertise, config.ClusterOpts) if err != nil { return fmt.Errorf("discovery initialization failed (%v)", err) } daemon.discoveryWatcher = discoveryWatcher return nil } // Reload reads configuration changes and modifies the // daemon according to those changes. // These are the settings that Reload changes: // - Daemon labels. // - Daemon debug log level. // - Daemon max concurrent downloads // - Daemon max concurrent uploads // - Cluster discovery (reconfigure and restart). // - Daemon live restore func (daemon *Daemon) Reload(config *Config) error { var err error // used to hold reloaded changes attributes := map[string]string{} // We need defer here to ensure the lock is released as // daemon.SystemInfo() will try to get it too defer func() { if err == nil { daemon.LogDaemonEventWithAttributes("reload", attributes) } }() daemon.configStore.reloadLock.Lock() defer daemon.configStore.reloadLock.Unlock() daemon.platformReload(config, &attributes) if err = daemon.reloadClusterDiscovery(config); err != nil { return err } if config.IsValueSet("labels") { daemon.configStore.Labels = config.Labels } if config.IsValueSet("debug") { daemon.configStore.Debug = config.Debug } if config.IsValueSet("live-restore") { daemon.configStore.LiveRestore = config.LiveRestore if err := daemon.containerdRemote.UpdateOptions(libcontainerd.WithLiveRestore(config.LiveRestore)); err != nil { return err } } // If no value is set for max-concurrent-downloads we assume it is the default value // We always "reset" as the cost is lightweight and easy to maintain. if config.IsValueSet("max-concurrent-downloads") && config.MaxConcurrentDownloads != nil { *daemon.configStore.MaxConcurrentDownloads = *config.MaxConcurrentDownloads } else { maxConcurrentDownloads := defaultMaxConcurrentDownloads daemon.configStore.MaxConcurrentDownloads = &maxConcurrentDownloads } logrus.Debugf("Reset Max Concurrent Downloads: %d", *daemon.configStore.MaxConcurrentDownloads) if daemon.downloadManager != nil { daemon.downloadManager.SetConcurrency(*daemon.configStore.MaxConcurrentDownloads) } // If no value is set for max-concurrent-upload we assume it is the default value // We always "reset" as the cost is lightweight and easy to maintain. if config.IsValueSet("max-concurrent-uploads") && config.MaxConcurrentUploads != nil { *daemon.configStore.MaxConcurrentUploads = *config.MaxConcurrentUploads } else { maxConcurrentUploads := defaultMaxConcurrentUploads daemon.configStore.MaxConcurrentUploads = &maxConcurrentUploads } logrus.Debugf("Reset Max Concurrent Uploads: %d", *daemon.configStore.MaxConcurrentUploads) if daemon.uploadManager != nil { daemon.uploadManager.SetConcurrency(*daemon.configStore.MaxConcurrentUploads) } // We emit daemon reload event here with updatable configurations attributes["debug"] = fmt.Sprintf("%t", daemon.configStore.Debug) attributes["cluster-store"] = daemon.configStore.ClusterStore if daemon.configStore.ClusterOpts != nil { opts, _ := json.Marshal(daemon.configStore.ClusterOpts) attributes["cluster-store-opts"] = string(opts) } else { attributes["cluster-store-opts"] = "{}" } attributes["cluster-advertise"] = daemon.configStore.ClusterAdvertise if daemon.configStore.Labels != nil { labels, _ := json.Marshal(daemon.configStore.Labels) attributes["labels"] = string(labels) } else { attributes["labels"] = "[]" } attributes["max-concurrent-downloads"] = fmt.Sprintf("%d", *daemon.configStore.MaxConcurrentDownloads) attributes["max-concurrent-uploads"] = fmt.Sprintf("%d", *daemon.configStore.MaxConcurrentUploads) return nil } func (daemon *Daemon) reloadClusterDiscovery(config *Config) error { var err error newAdvertise := daemon.configStore.ClusterAdvertise newClusterStore := daemon.configStore.ClusterStore if config.IsValueSet("cluster-advertise") { if config.IsValueSet("cluster-store") { newClusterStore = config.ClusterStore } newAdvertise, err = parseClusterAdvertiseSettings(newClusterStore, config.ClusterAdvertise) if err != nil && err != errDiscoveryDisabled { return err } } if daemon.clusterProvider != nil { if err := config.isSwarmCompatible(); err != nil { return err } } // check discovery modifications if !modifiedDiscoverySettings(daemon.configStore, newAdvertise, newClusterStore, config.ClusterOpts) { return nil } // enable discovery for the first time if it was not previously enabled if daemon.discoveryWatcher == nil { discoveryWatcher, err := initDiscovery(newClusterStore, newAdvertise, config.ClusterOpts) if err != nil { return fmt.Errorf("discovery initialization failed (%v)", err) } daemon.discoveryWatcher = discoveryWatcher } else { if err == errDiscoveryDisabled { // disable discovery if it was previously enabled and it's disabled now daemon.discoveryWatcher.Stop() } else { // reload discovery if err = daemon.discoveryWatcher.Reload(config.ClusterStore, newAdvertise, config.ClusterOpts); err != nil { return err } } } daemon.configStore.ClusterStore = newClusterStore daemon.configStore.ClusterOpts = config.ClusterOpts daemon.configStore.ClusterAdvertise = newAdvertise if daemon.netController == nil { return nil } netOptions, err := daemon.networkOptions(daemon.configStore, nil) if err != nil { logrus.Warnf("Failed to reload configuration with network controller: %v", err) return nil } err = daemon.netController.ReloadConfiguration(netOptions...) if err != nil { logrus.Warnf("Failed to reload configuration with network controller: %v", err) } return nil } func isBridgeNetworkDisabled(config *Config) bool { return config.bridgeConfig.Iface == disableNetworkBridge } func (daemon *Daemon) networkOptions(dconfig *Config, activeSandboxes map[string]interface{}) ([]nwconfig.Option, error) { options := []nwconfig.Option{} if dconfig == nil { return options, nil } options = append(options, nwconfig.OptionDataDir(dconfig.Root)) dd := runconfig.DefaultDaemonNetworkMode() dn := runconfig.DefaultDaemonNetworkMode().NetworkName() options = append(options, nwconfig.OptionDefaultDriver(string(dd))) options = append(options, nwconfig.OptionDefaultNetwork(dn)) if strings.TrimSpace(dconfig.ClusterStore) != "" { kv := strings.Split(dconfig.ClusterStore, "://") if len(kv) != 2 { return nil, fmt.Errorf("kv store daemon config must be of the form KV-PROVIDER://KV-URL") } options = append(options, nwconfig.OptionKVProvider(kv[0])) options = append(options, nwconfig.OptionKVProviderURL(kv[1])) } if len(dconfig.ClusterOpts) > 0 { options = append(options, nwconfig.OptionKVOpts(dconfig.ClusterOpts)) } if daemon.discoveryWatcher != nil { options = append(options, nwconfig.OptionDiscoveryWatcher(daemon.discoveryWatcher)) } if dconfig.ClusterAdvertise != "" { options = append(options, nwconfig.OptionDiscoveryAddress(dconfig.ClusterAdvertise)) } options = append(options, nwconfig.OptionLabels(dconfig.Labels)) options = append(options, driverOptions(dconfig)...) if daemon.configStore != nil && daemon.configStore.LiveRestore && len(activeSandboxes) != 0 { options = append(options, nwconfig.OptionActiveSandboxes(activeSandboxes)) } return options, nil } func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry { out := make([]types.BlkioStatEntry, len(entries)) for i, re := range entries { out[i] = types.BlkioStatEntry{ Major: re.Major, Minor: re.Minor, Op: re.Op, Value: re.Value, } } return out }