1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/daemon/container_operations_unix.go

431 lines
14 KiB
Go
Raw Normal View History

// +build linux freebsd
package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"time"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/links"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/pkg/system"
"github.com/docker/docker/runconfig"
"github.com/docker/libnetwork"
"github.com/moby/sys/mount"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
var env []string
children := daemon.children(container)
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if bridgeSettings == nil || bridgeSettings.EndpointSettings == nil {
return nil, nil
}
for linkAlias, child := range children {
if !child.IsRunning() {
Remove static errors from errors package. Moving all strings to the errors package wasn't a good idea after all. Our custom implementation of Go errors predates everything that's nice and good about working with errors in Go. Take as an example what we have to do to get an error message: ```go func GetErrorMessage(err error) string { switch err.(type) { case errcode.Error: e, _ := err.(errcode.Error) return e.Message case errcode.ErrorCode: ec, _ := err.(errcode.ErrorCode) return ec.Message() default: return err.Error() } } ``` This goes against every good practice for Go development. The language already provides a simple, intuitive and standard way to get error messages, that is calling the `Error()` method from an error. Reinventing the error interface is a mistake. Our custom implementation also makes very hard to reason about errors, another nice thing about Go. I found several (>10) error declarations that we don't use anywhere. This is a clear sign about how little we know about the errors we return. I also found several error usages where the number of arguments was different than the parameters declared in the error, another clear example of how difficult is to reason about errors. Moreover, our custom implementation didn't really make easier for people to return custom HTTP status code depending on the errors. Again, it's hard to reason about when to set custom codes and how. Take an example what we have to do to extract the message and status code from an error before returning a response from the API: ```go switch err.(type) { case errcode.ErrorCode: daError, _ := err.(errcode.ErrorCode) statusCode = daError.Descriptor().HTTPStatusCode errMsg = daError.Message() case errcode.Error: // For reference, if you're looking for a particular error // then you can do something like : // import ( derr "github.com/docker/docker/errors" ) // if daError.ErrorCode() == derr.ErrorCodeNoSuchContainer { ... } daError, _ := err.(errcode.Error) statusCode = daError.ErrorCode().Descriptor().HTTPStatusCode errMsg = daError.Message default: // This part of will be removed once we've // converted everything over to use the errcode package // FIXME: this is brittle and should not be necessary. // If we need to differentiate between different possible error types, // we should create appropriate error types with clearly defined meaning errStr := strings.ToLower(err.Error()) for keyword, status := range map[string]int{ "not found": http.StatusNotFound, "no such": http.StatusNotFound, "bad parameter": http.StatusBadRequest, "conflict": http.StatusConflict, "impossible": http.StatusNotAcceptable, "wrong login/password": http.StatusUnauthorized, "hasn't been activated": http.StatusForbidden, } { if strings.Contains(errStr, keyword) { statusCode = status break } } } ``` You can notice two things in that code: 1. We have to explain how errors work, because our implementation goes against how easy to use Go errors are. 2. At no moment we arrived to remove that `switch` statement that was the original reason to use our custom implementation. This change removes all our status errors from the errors package and puts them back in their specific contexts. IT puts the messages back with their contexts. That way, we know right away when errors used and how to generate their messages. It uses custom interfaces to reason about errors. Errors that need to response with a custom status code MUST implementent this simple interface: ```go type errorWithStatus interface { HTTPErrorStatusCode() int } ``` This interface is very straightforward to implement. It also preserves Go errors real behavior, getting the message is as simple as using the `Error()` method. I included helper functions to generate errors that use custom status code in `errors/errors.go`. By doing this, we remove the hard dependency we have eeverywhere to our custom errors package. Yes, you can use it as a helper to generate error, but it's still very easy to generate errors without it. Please, read this fantastic blog post about errors in Go: http://dave.cheney.net/2014/12/24/inspecting-errors Signed-off-by: David Calavera <david.calavera@gmail.com>
2016-02-25 10:53:35 -05:00
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
}
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
if childBridgeSettings == nil || childBridgeSettings.EndpointSettings == nil {
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
}
link := links.NewLink(
bridgeSettings.IPAddress,
childBridgeSettings.IPAddress,
linkAlias,
child.Config.Env,
child.Config.ExposedPorts,
)
env = append(env, link.ToEnv()...)
}
return env, nil
}
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
func (daemon *Daemon) getIpcContainer(id string) (*container.Container, error) {
errMsg := "can't join IPC of container " + id
// Check the container exists
ctr, err := daemon.GetContainer(id)
if err != nil {
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
return nil, errors.Wrap(err, errMsg)
}
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
// Check the container is running and not restarting
if err := daemon.checkContainer(ctr, containerIsRunning, containerIsNotRestarting); err != nil {
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
return nil, errors.Wrap(err, errMsg)
}
// Check the container ipc is shareable
if st, err := os.Stat(ctr.ShmPath); err != nil || !st.IsDir() {
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
if err == nil || os.IsNotExist(err) {
daemon: use 'private' ipc mode by default This changes the default ipc mode of daemon/engine to be private, meaning the containers will not have their /dev/shm bind-mounted from the host by default. The benefits of doing this are: 1. No leaked mounts. Eliminate a possibility to leak mounts into other namespaces (and therefore unfortunate errors like "Unable to remove filesystem for <ID>: remove /var/lib/docker/containers/<ID>/shm: device or resource busy"). 2. Working checkpoint/restore. Make `docker checkpoint` not lose the contents of `/dev/shm`, but save it to the dump, and be restored back upon `docker start --checkpoint` (currently it is lost -- while CRIU handles tmpfs mounts, the "shareable" mount is seen as external to container, and thus rightfully ignored). 3. Better security. Currently any container is opened to share its /dev/shm with any other container. Obviously, this change will break the following usage scenario: $ docker run -d --name donor busybox top $ docker run --rm -it --ipc container:donor busybox sh Error response from daemon: linux spec namespaces: can't join IPC of container <ID>: non-shareable IPC (hint: use IpcMode:shareable for the donor container) The soution, as hinted by the (amended) error message, is to explicitly enable donor sharing by using --ipc shareable: $ docker run -d --name donor --ipc shareable busybox top Compatibility notes: 1. This only applies to containers created _after_ this change. Existing containers are not affected and will work fine as their ipc mode is stored in HostConfig. 2. Old backward compatible behavior ("shareable" containers by default) can be enabled by either using `--default-ipc-mode shareable` daemon command line option, or by adding a `"default-ipc-mode": "shareable"` line in `/etc/docker/daemon.json` configuration file. 3. If an older client (API < 1.40) is used, a "shareable" container is created. A test to check that is added. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-11-27 19:10:44 -05:00
return nil, errors.New(errMsg + ": non-shareable IPC (hint: use IpcMode:shareable for the donor container)")
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
}
// stat() failed?
return nil, errors.Wrap(err, errMsg+": unexpected error from stat "+ctr.ShmPath)
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
}
return ctr, nil
}
func (daemon *Daemon) getPidContainer(ctr *container.Container) (*container.Container, error) {
containerID := ctr.HostConfig.PidMode.Container()
ctr, err := daemon.GetContainer(containerID)
if err != nil {
return nil, errors.Wrapf(err, "cannot join PID of a non running container: %s", containerID)
}
return ctr, daemon.checkContainer(ctr, containerIsRunning, containerIsNotRestarting)
}
func containerIsRunning(c *container.Container) error {
if !c.IsRunning() {
return errdefs.Conflict(errors.Errorf("container %s is not running", c.ID))
}
return nil
}
func containerIsNotRestarting(c *container.Container) error {
if c.IsRestarting() {
return errContainerIsRestarting(c.ID)
}
return nil
}
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
ipcMode := c.HostConfig.IpcMode
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
switch {
case ipcMode.IsContainer():
ic, err := daemon.getIpcContainer(ipcMode.Container())
if err != nil {
return err
}
c.ShmPath = ic.ShmPath
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
case ipcMode.IsHost():
if _, err := os.Stat("/dev/shm"); err != nil {
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
}
c.ShmPath = "/dev/shm"
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
case ipcMode.IsPrivate(), ipcMode.IsNone():
// c.ShmPath will/should not be used, so make it empty.
// Container's /dev/shm mount comes from OCI spec.
c.ShmPath = ""
case ipcMode.IsEmpty():
// A container was created by an older version of the daemon.
// The default behavior used to be what is now called "shareable".
fallthrough
case ipcMode.IsShareable():
rootIDs := daemon.idMapping.RootPair()
if !c.HasMountFor("/dev/shm") {
shmPath, err := c.ShmResourcePath()
if err != nil {
return err
}
if err := idtools.MkdirAllAndChown(shmPath, 0700, rootIDs); err != nil {
return err
}
shmproperty := "mode=1777,size=" + strconv.FormatInt(c.HostConfig.ShmSize, 10)
if err := unix.Mount("shm", shmPath, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
return fmt.Errorf("mounting shm tmpfs: %s", err)
}
if err := os.Chown(shmPath, rootIDs.UID, rootIDs.GID); err != nil {
return err
}
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
c.ShmPath = shmPath
}
Implement none, private, and shareable ipc modes Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and /dev/mqueue between containers") container's /dev/shm is mounted on the host first, then bind-mounted inside the container. This is done that way in order to be able to share this container's IPC namespace (and the /dev/shm mount point) with another container. Unfortunately, this functionality breaks container checkpoint/restore (even if IPC is not shared). Since /dev/shm is an external mount, its contents is not saved by `criu checkpoint`, and so upon restore any application that tries to access data under /dev/shm is severily disappointed (which usually results in a fatal crash). This commit solves the issue by introducing new IPC modes for containers (in addition to 'host' and 'container:ID'). The new modes are: - 'shareable': enables sharing this container's IPC with others (this used to be the implicit default); - 'private': disables sharing this container's IPC. In 'private' mode, container's /dev/shm is truly mounted inside the container, without any bind-mounting from the host, which solves the issue. While at it, let's also implement 'none' mode. The motivation, as eloquently put by Justin Cormack, is: > I wondered a while back about having a none shm mode, as currently it is > not possible to have a totally unwriteable container as there is always > a /dev/shm writeable mount. It is a bit of a niche case (and clearly > should never be allowed to be daemon default) but it would be trivial to > add now so maybe we should... ...so here's yet yet another mode: - 'none': no /dev/shm mount inside the container (though it still has its own private IPC namespace). Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd need to make 'private' the default mode, but unfortunately it breaks the backward compatibility. So, let's make the default container IPC mode per-daemon configurable (with the built-in default set to 'shareable' for now). The default can be changed either via a daemon CLI option (--default-shm-mode) or a daemon.json configuration file parameter of the same name. Note one can only set either 'shareable' or 'private' IPC modes as a daemon default (i.e. in this context 'host', 'container', or 'none' do not make much sense). Some other changes this patch introduces are: 1. A mount for /dev/shm is added to default OCI Linux spec. 2. IpcMode.Valid() is simplified to remove duplicated code that parsed 'container:ID' form. Note the old version used to check that ID does not contain a semicolon -- this is no longer the case (tests are modified accordingly). The motivation is we should either do a proper check for container ID validity, or don't check it at all (since it is checked in other places anyway). I chose the latter. 3. IpcMode.Container() is modified to not return container ID if the mode value does not start with "container:", unifying the check to be the same as in IpcMode.IsContainer(). 3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified to add checks for newly added values. [v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997] [v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833] [v4: addressed the case of upgrading from older daemon, in this case container.HostConfig.IpcMode is unset and this is valid] [v5: document old and new IpcMode values in api/swagger.yaml] [v6: add the 'none' mode, changelog entry to docs/api/version-history.md] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
default:
return fmt.Errorf("invalid IPC mode: %v", ipcMode)
}
return nil
}
func (daemon *Daemon) setupSecretDir(c *container.Container) (setupErr error) {
if len(c.SecretReferences) == 0 && len(c.ConfigReferences) == 0 {
return nil
}
if err := daemon.createSecretsDir(c); err != nil {
return err
}
defer func() {
if setupErr != nil {
daemon.cleanupSecretDir(c)
}
}()
if c.DependencyStore == nil {
return fmt.Errorf("secret store is not initialized")
}
// retrieve possible remapped range start for root UID, GID
rootIDs := daemon.idMapping.RootPair()
for _, s := range c.SecretReferences {
// TODO (ehazlett): use type switch when more are supported
if s.File == nil {
logrus.Error("secret target type is not a file target")
continue
}
// secrets are created in the SecretMountPath on the host, at a
// single level
fPath, err := c.SecretFilePath(*s)
if err != nil {
return errors.Wrap(err, "error getting secret file path")
}
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating secret mount path")
}
logrus.WithFields(logrus.Fields{
"name": s.File.Name,
"path": fPath,
}).Debug("injecting secret")
secret, err := c.DependencyStore.Secrets().Get(s.SecretID)
if err != nil {
return errors.Wrap(err, "unable to get secret from secret store")
}
if err := ioutil.WriteFile(fPath, secret.Spec.Data, s.File.Mode); err != nil {
return errors.Wrap(err, "error injecting secret")
}
uid, err := strconv.Atoi(s.File.UID)
if err != nil {
return err
}
gid, err := strconv.Atoi(s.File.GID)
if err != nil {
return err
}
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
return errors.Wrap(err, "error setting ownership for secret")
}
if err := os.Chmod(fPath, s.File.Mode); err != nil {
return errors.Wrap(err, "error setting file mode for secret")
}
}
for _, configRef := range c.ConfigReferences {
// TODO (ehazlett): use type switch when more are supported
if configRef.File == nil {
// Runtime configs are not mounted into the container, but they're
// a valid type of config so we should not error when we encounter
// one.
if configRef.Runtime == nil {
logrus.Error("config target type is not a file or runtime target")
}
// However, in any case, this isn't a file config, so we have no
// further work to do
continue
}
fPath, err := c.ConfigFilePath(*configRef)
if err != nil {
return errors.Wrap(err, "error getting config file path for container")
}
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating config mount path")
}
logrus.WithFields(logrus.Fields{
"name": configRef.File.Name,
"path": fPath,
}).Debug("injecting config")
config, err := c.DependencyStore.Configs().Get(configRef.ConfigID)
if err != nil {
return errors.Wrap(err, "unable to get config from config store")
}
if err := ioutil.WriteFile(fPath, config.Spec.Data, configRef.File.Mode); err != nil {
return errors.Wrap(err, "error injecting config")
}
uid, err := strconv.Atoi(configRef.File.UID)
if err != nil {
return err
}
gid, err := strconv.Atoi(configRef.File.GID)
if err != nil {
return err
}
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
return errors.Wrap(err, "error setting ownership for config")
}
if err := os.Chmod(fPath, configRef.File.Mode); err != nil {
return errors.Wrap(err, "error setting file mode for config")
}
}
return daemon.remountSecretDir(c)
}
// createSecretsDir is used to create a dir suitable for storing container secrets.
// In practice this is using a tmpfs mount and is used for both "configs" and "secrets"
func (daemon *Daemon) createSecretsDir(c *container.Container) error {
// retrieve possible remapped range start for root UID, GID
rootIDs := daemon.idMapping.RootPair()
dir, err := c.SecretMountPath()
if err != nil {
return errors.Wrap(err, "error getting container secrets dir")
}
// create tmpfs
if err := idtools.MkdirAllAndChown(dir, 0700, rootIDs); err != nil {
return errors.Wrap(err, "error creating secret local mount path")
}
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
if err := mount.Mount("tmpfs", dir, "tmpfs", "nodev,nosuid,noexec,"+tmpfsOwnership); err != nil {
return errors.Wrap(err, "unable to setup secret mount")
}
return nil
}
func (daemon *Daemon) remountSecretDir(c *container.Container) error {
dir, err := c.SecretMountPath()
if err != nil {
return errors.Wrap(err, "error getting container secrets path")
}
if err := label.Relabel(dir, c.MountLabel, false); err != nil {
logrus.WithError(err).WithField("dir", dir).Warn("Error while attempting to set selinux label")
}
rootIDs := daemon.idMapping.RootPair()
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
// remount secrets ro
if err := mount.Mount("tmpfs", dir, "tmpfs", "remount,ro,"+tmpfsOwnership); err != nil {
return errors.Wrap(err, "unable to remount dir as readonly")
}
return nil
}
func (daemon *Daemon) cleanupSecretDir(c *container.Container) {
dir, err := c.SecretMountPath()
if err != nil {
logrus.WithError(err).WithField("container", c.ID).Warn("error getting secrets mount path for container")
}
if err := mount.RecursiveUnmount(dir); err != nil {
logrus.WithField("dir", dir).WithError(err).Warn("Error while attempting to unmount dir, this may prevent removal of container.")
}
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
logrus.WithField("dir", dir).WithError(err).Error("Error removing dir.")
}
}
Update ContainerWait API This patch adds the untilRemoved option to the ContainerWait API which allows the client to wait until the container is not only exited but also removed. This patch also adds some more CLI integration tests for waiting for a created container and waiting with the new --until-removed flag. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Handle detach sequence in CLI Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Update Container Wait Conditions Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Apply container wait changes to API 1.30 The set of changes to the containerWait API missed the cut for the Docker 17.05 release (API version 1.29). This patch bumps the version checks to use 1.30 instead. This patch also makes a minor update to a testfile which was added to the builder/dockerfile package. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Remove wait changes from CLI Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address minor nits on wait changes - Changed the name of the tty Proxy wrapper to `escapeProxy` - Removed the unnecessary Error() method on container.State - Fixes a typo in comment (repeated word) Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Use router.WithCancel in the containerWait handler This handler previously added this functionality manually but now uses the existing wrapper which does it for us. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Add WaitCondition constants to api/types/container Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address more ContainerWait review comments - Update ContainerWait backend interface to not return pointer values for container.StateStatus type. - Updated container state's Wait() method comments to clarify that a context MUST be used for cancelling the request, setting timeouts, and to avoid goroutine leaks. - Removed unnecessary buffering when making channels in the client's ContainerWait methods. - Renamed result and error channels in client's ContainerWait methods to clarify that only a single result or error value would be sent on the channel. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Move container.WaitCondition type to separate file ... to avoid conflict with swagger-generated code for API response Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address more ContainerWait review comments Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn)
2017-03-30 23:01:41 -04:00
func killProcessDirectly(cntr *container.Container) error {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// Block until the container to stops or timeout.
Update ContainerWait API This patch adds the untilRemoved option to the ContainerWait API which allows the client to wait until the container is not only exited but also removed. This patch also adds some more CLI integration tests for waiting for a created container and waiting with the new --until-removed flag. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Handle detach sequence in CLI Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Update Container Wait Conditions Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Apply container wait changes to API 1.30 The set of changes to the containerWait API missed the cut for the Docker 17.05 release (API version 1.29). This patch bumps the version checks to use 1.30 instead. This patch also makes a minor update to a testfile which was added to the builder/dockerfile package. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Remove wait changes from CLI Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address minor nits on wait changes - Changed the name of the tty Proxy wrapper to `escapeProxy` - Removed the unnecessary Error() method on container.State - Fixes a typo in comment (repeated word) Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Use router.WithCancel in the containerWait handler This handler previously added this functionality manually but now uses the existing wrapper which does it for us. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Add WaitCondition constants to api/types/container Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address more ContainerWait review comments - Update ContainerWait backend interface to not return pointer values for container.StateStatus type. - Updated container state's Wait() method comments to clarify that a context MUST be used for cancelling the request, setting timeouts, and to avoid goroutine leaks. - Removed unnecessary buffering when making channels in the client's ContainerWait methods. - Renamed result and error channels in client's ContainerWait methods to clarify that only a single result or error value would be sent on the channel. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Move container.WaitCondition type to separate file ... to avoid conflict with swagger-generated code for API response Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address more ContainerWait review comments Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn)
2017-03-30 23:01:41 -04:00
status := <-cntr.Wait(ctx, container.WaitConditionNotRunning)
if status.Err() != nil {
// Ensure that we don't kill ourselves
Update ContainerWait API This patch adds the untilRemoved option to the ContainerWait API which allows the client to wait until the container is not only exited but also removed. This patch also adds some more CLI integration tests for waiting for a created container and waiting with the new --until-removed flag. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Handle detach sequence in CLI Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Update Container Wait Conditions Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Apply container wait changes to API 1.30 The set of changes to the containerWait API missed the cut for the Docker 17.05 release (API version 1.29). This patch bumps the version checks to use 1.30 instead. This patch also makes a minor update to a testfile which was added to the builder/dockerfile package. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Remove wait changes from CLI Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address minor nits on wait changes - Changed the name of the tty Proxy wrapper to `escapeProxy` - Removed the unnecessary Error() method on container.State - Fixes a typo in comment (repeated word) Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Use router.WithCancel in the containerWait handler This handler previously added this functionality manually but now uses the existing wrapper which does it for us. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Add WaitCondition constants to api/types/container Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address more ContainerWait review comments - Update ContainerWait backend interface to not return pointer values for container.StateStatus type. - Updated container state's Wait() method comments to clarify that a context MUST be used for cancelling the request, setting timeouts, and to avoid goroutine leaks. - Removed unnecessary buffering when making channels in the client's ContainerWait methods. - Renamed result and error channels in client's ContainerWait methods to clarify that only a single result or error value would be sent on the channel. Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Move container.WaitCondition type to separate file ... to avoid conflict with swagger-generated code for API response Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn) Address more ContainerWait review comments Docker-DCO-1.1-Signed-off-by: Josh Hawn <josh.hawn@docker.com> (github: jlhawn)
2017-03-30 23:01:41 -04:00
if pid := cntr.GetPID(); pid != 0 {
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(cntr.ID))
if err := unix.Kill(pid, 9); err != nil {
if err != unix.ESRCH {
return err
}
e := errNoSuchProcess{pid, 9}
logrus.Debug(e)
return e
}
// In case there were some exceptions(e.g., state of zombie and D)
if system.IsProcessAlive(pid) {
// Since we can not kill a zombie pid, add zombie check here
isZombie, err := system.IsProcessZombie(pid)
if err != nil {
logrus.Warnf("Container %s state is invalid", stringid.TruncateID(cntr.ID))
return err
}
if isZombie {
return errdefs.System(errors.Errorf("container %s PID %d is zombie and can not be killed. Use the --init option when creating containers to run an init inside the container that forwards signals and reaps processes", stringid.TruncateID(cntr.ID), pid))
}
}
}
}
return nil
}
func isLinkable(child *container.Container) bool {
// A container is linkable only if it belongs to the default network
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
return ok
}
Remove static errors from errors package. Moving all strings to the errors package wasn't a good idea after all. Our custom implementation of Go errors predates everything that's nice and good about working with errors in Go. Take as an example what we have to do to get an error message: ```go func GetErrorMessage(err error) string { switch err.(type) { case errcode.Error: e, _ := err.(errcode.Error) return e.Message case errcode.ErrorCode: ec, _ := err.(errcode.ErrorCode) return ec.Message() default: return err.Error() } } ``` This goes against every good practice for Go development. The language already provides a simple, intuitive and standard way to get error messages, that is calling the `Error()` method from an error. Reinventing the error interface is a mistake. Our custom implementation also makes very hard to reason about errors, another nice thing about Go. I found several (>10) error declarations that we don't use anywhere. This is a clear sign about how little we know about the errors we return. I also found several error usages where the number of arguments was different than the parameters declared in the error, another clear example of how difficult is to reason about errors. Moreover, our custom implementation didn't really make easier for people to return custom HTTP status code depending on the errors. Again, it's hard to reason about when to set custom codes and how. Take an example what we have to do to extract the message and status code from an error before returning a response from the API: ```go switch err.(type) { case errcode.ErrorCode: daError, _ := err.(errcode.ErrorCode) statusCode = daError.Descriptor().HTTPStatusCode errMsg = daError.Message() case errcode.Error: // For reference, if you're looking for a particular error // then you can do something like : // import ( derr "github.com/docker/docker/errors" ) // if daError.ErrorCode() == derr.ErrorCodeNoSuchContainer { ... } daError, _ := err.(errcode.Error) statusCode = daError.ErrorCode().Descriptor().HTTPStatusCode errMsg = daError.Message default: // This part of will be removed once we've // converted everything over to use the errcode package // FIXME: this is brittle and should not be necessary. // If we need to differentiate between different possible error types, // we should create appropriate error types with clearly defined meaning errStr := strings.ToLower(err.Error()) for keyword, status := range map[string]int{ "not found": http.StatusNotFound, "no such": http.StatusNotFound, "bad parameter": http.StatusBadRequest, "conflict": http.StatusConflict, "impossible": http.StatusNotAcceptable, "wrong login/password": http.StatusUnauthorized, "hasn't been activated": http.StatusForbidden, } { if strings.Contains(errStr, keyword) { statusCode = status break } } } ``` You can notice two things in that code: 1. We have to explain how errors work, because our implementation goes against how easy to use Go errors are. 2. At no moment we arrived to remove that `switch` statement that was the original reason to use our custom implementation. This change removes all our status errors from the errors package and puts them back in their specific contexts. IT puts the messages back with their contexts. That way, we know right away when errors used and how to generate their messages. It uses custom interfaces to reason about errors. Errors that need to response with a custom status code MUST implementent this simple interface: ```go type errorWithStatus interface { HTTPErrorStatusCode() int } ``` This interface is very straightforward to implement. It also preserves Go errors real behavior, getting the message is as simple as using the `Error()` method. I included helper functions to generate errors that use custom status code in `errors/errors.go`. By doing this, we remove the hard dependency we have eeverywhere to our custom errors package. Yes, you can use it as a helper to generate error, but it's still very easy to generate errors without it. Please, read this fantastic blog post about errors in Go: http://dave.cheney.net/2014/12/24/inspecting-errors Signed-off-by: David Calavera <david.calavera@gmail.com>
2016-02-25 10:53:35 -05:00
func enableIPOnPredefinedNetwork() bool {
return false
}
// serviceDiscoveryOnDefaultNetwork indicates if service discovery is supported on the default network
func serviceDiscoveryOnDefaultNetwork() bool {
return false
}
func (daemon *Daemon) setupPathsAndSandboxOptions(container *container.Container, sboxOptions *[]libnetwork.SandboxOption) error {
var err error
if container.HostConfig.NetworkMode.IsHost() {
// Point to the host files, so that will be copied into the container running in host mode
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts"))
}
// Copy the host's resolv.conf for the container (/etc/resolv.conf or /run/systemd/resolve/resolv.conf)
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginResolvConfPath(daemon.configStore.GetResolvConf()))
container.HostsPath, err = container.GetRootResourcePath("hosts")
if err != nil {
return err
}
*sboxOptions = append(*sboxOptions, libnetwork.OptionHostsPath(container.HostsPath))
container.ResolvConfPath, err = container.GetRootResourcePath("resolv.conf")
if err != nil {
return err
}
*sboxOptions = append(*sboxOptions, libnetwork.OptionResolvConfPath(container.ResolvConfPath))
return nil
}
func (daemon *Daemon) initializeNetworkingPaths(container *container.Container, nc *container.Container) error {
container.HostnamePath = nc.HostnamePath
container.HostsPath = nc.HostsPath
container.ResolvConfPath = nc.ResolvConfPath
return nil
}
func (daemon *Daemon) setupContainerMountsRoot(c *container.Container) error {
// get the root mount path so we can make it unbindable
p, err := c.MountsResourcePath("")
if err != nil {
return err
}
return idtools.MkdirAllAndChown(p, 0700, daemon.idMapping.RootPair())
}