2015-11-12 14:55:17 -05:00
|
|
|
// +build linux freebsd
|
|
|
|
|
2018-02-05 16:05:59 -05:00
|
|
|
package daemon // import "github.com/docker/docker/daemon"
|
2015-11-12 14:55:17 -05:00
|
|
|
|
|
|
|
import (
|
2017-03-30 16:52:40 -04:00
|
|
|
"context"
|
2015-11-12 14:55:17 -05:00
|
|
|
"fmt"
|
2016-10-19 12:22:02 -04:00
|
|
|
"io/ioutil"
|
2015-11-12 14:55:17 -05:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"strconv"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/docker/docker/container"
|
|
|
|
"github.com/docker/docker/daemon/links"
|
2018-01-11 14:53:06 -05:00
|
|
|
"github.com/docker/docker/errdefs"
|
2015-11-12 14:55:17 -05:00
|
|
|
"github.com/docker/docker/pkg/idtools"
|
|
|
|
"github.com/docker/docker/pkg/stringid"
|
|
|
|
"github.com/docker/docker/runconfig"
|
2016-10-13 17:51:10 -04:00
|
|
|
"github.com/docker/libnetwork"
|
2020-03-13 19:38:24 -04:00
|
|
|
"github.com/moby/sys/mount"
|
2017-04-18 09:26:36 -04:00
|
|
|
"github.com/opencontainers/selinux/go-selinux/label"
|
2016-10-26 16:30:53 -04:00
|
|
|
"github.com/pkg/errors"
|
2017-07-26 17:42:13 -04:00
|
|
|
"github.com/sirupsen/logrus"
|
2017-05-23 10:22:32 -04:00
|
|
|
"golang.org/x/sys/unix"
|
2015-11-12 14:55:17 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
|
|
|
|
var env []string
|
2015-09-03 20:51:04 -04:00
|
|
|
children := daemon.children(container)
|
2015-11-12 14:55:17 -05:00
|
|
|
|
2016-03-09 23:33:21 -05:00
|
|
|
bridgeSettings := container.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
2016-08-23 19:50:15 -04:00
|
|
|
if bridgeSettings == nil || bridgeSettings.EndpointSettings == nil {
|
2015-11-12 14:55:17 -05:00
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2015-09-03 20:51:04 -04:00
|
|
|
for linkAlias, child := range children {
|
|
|
|
if !child.IsRunning() {
|
Remove static errors from errors package.
Moving all strings to the errors package wasn't a good idea after all.
Our custom implementation of Go errors predates everything that's nice
and good about working with errors in Go. Take as an example what we
have to do to get an error message:
```go
func GetErrorMessage(err error) string {
switch err.(type) {
case errcode.Error:
e, _ := err.(errcode.Error)
return e.Message
case errcode.ErrorCode:
ec, _ := err.(errcode.ErrorCode)
return ec.Message()
default:
return err.Error()
}
}
```
This goes against every good practice for Go development. The language already provides a simple, intuitive and standard way to get error messages, that is calling the `Error()` method from an error. Reinventing the error interface is a mistake.
Our custom implementation also makes very hard to reason about errors, another nice thing about Go. I found several (>10) error declarations that we don't use anywhere. This is a clear sign about how little we know about the errors we return. I also found several error usages where the number of arguments was different than the parameters declared in the error, another clear example of how difficult is to reason about errors.
Moreover, our custom implementation didn't really make easier for people to return custom HTTP status code depending on the errors. Again, it's hard to reason about when to set custom codes and how. Take an example what we have to do to extract the message and status code from an error before returning a response from the API:
```go
switch err.(type) {
case errcode.ErrorCode:
daError, _ := err.(errcode.ErrorCode)
statusCode = daError.Descriptor().HTTPStatusCode
errMsg = daError.Message()
case errcode.Error:
// For reference, if you're looking for a particular error
// then you can do something like :
// import ( derr "github.com/docker/docker/errors" )
// if daError.ErrorCode() == derr.ErrorCodeNoSuchContainer { ... }
daError, _ := err.(errcode.Error)
statusCode = daError.ErrorCode().Descriptor().HTTPStatusCode
errMsg = daError.Message
default:
// This part of will be removed once we've
// converted everything over to use the errcode package
// FIXME: this is brittle and should not be necessary.
// If we need to differentiate between different possible error types,
// we should create appropriate error types with clearly defined meaning
errStr := strings.ToLower(err.Error())
for keyword, status := range map[string]int{
"not found": http.StatusNotFound,
"no such": http.StatusNotFound,
"bad parameter": http.StatusBadRequest,
"conflict": http.StatusConflict,
"impossible": http.StatusNotAcceptable,
"wrong login/password": http.StatusUnauthorized,
"hasn't been activated": http.StatusForbidden,
} {
if strings.Contains(errStr, keyword) {
statusCode = status
break
}
}
}
```
You can notice two things in that code:
1. We have to explain how errors work, because our implementation goes against how easy to use Go errors are.
2. At no moment we arrived to remove that `switch` statement that was the original reason to use our custom implementation.
This change removes all our status errors from the errors package and puts them back in their specific contexts.
IT puts the messages back with their contexts. That way, we know right away when errors used and how to generate their messages.
It uses custom interfaces to reason about errors. Errors that need to response with a custom status code MUST implementent this simple interface:
```go
type errorWithStatus interface {
HTTPErrorStatusCode() int
}
```
This interface is very straightforward to implement. It also preserves Go errors real behavior, getting the message is as simple as using the `Error()` method.
I included helper functions to generate errors that use custom status code in `errors/errors.go`.
By doing this, we remove the hard dependency we have eeverywhere to our custom errors package. Yes, you can use it as a helper to generate error, but it's still very easy to generate errors without it.
Please, read this fantastic blog post about errors in Go: http://dave.cheney.net/2014/12/24/inspecting-errors
Signed-off-by: David Calavera <david.calavera@gmail.com>
2016-02-25 10:53:35 -05:00
|
|
|
return nil, fmt.Errorf("Cannot link to a non running container: %s AS %s", child.Name, linkAlias)
|
2015-09-03 20:51:04 -04:00
|
|
|
}
|
2015-11-12 14:55:17 -05:00
|
|
|
|
2016-03-09 23:33:21 -05:00
|
|
|
childBridgeSettings := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
2016-08-23 19:50:15 -04:00
|
|
|
if childBridgeSettings == nil || childBridgeSettings.EndpointSettings == nil {
|
2015-09-03 20:51:04 -04:00
|
|
|
return nil, fmt.Errorf("container %s not attached to default bridge network", child.ID)
|
|
|
|
}
|
2015-11-12 14:55:17 -05:00
|
|
|
|
2015-09-03 20:51:04 -04:00
|
|
|
link := links.NewLink(
|
|
|
|
bridgeSettings.IPAddress,
|
|
|
|
childBridgeSettings.IPAddress,
|
|
|
|
linkAlias,
|
|
|
|
child.Config.Env,
|
|
|
|
child.Config.ExposedPorts,
|
|
|
|
)
|
2015-11-12 14:55:17 -05:00
|
|
|
|
2016-10-13 12:34:19 -04:00
|
|
|
env = append(env, link.ToEnv()...)
|
2015-11-12 14:55:17 -05:00
|
|
|
}
|
2015-09-03 20:51:04 -04:00
|
|
|
|
2015-11-12 14:55:17 -05:00
|
|
|
return env, nil
|
|
|
|
}
|
|
|
|
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
func (daemon *Daemon) getIpcContainer(id string) (*container.Container, error) {
|
|
|
|
errMsg := "can't join IPC of container " + id
|
|
|
|
// Check the container exists
|
|
|
|
container, err := daemon.GetContainer(id)
|
2015-11-12 14:55:17 -05:00
|
|
|
if err != nil {
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
return nil, errors.Wrap(err, errMsg)
|
2015-11-12 14:55:17 -05:00
|
|
|
}
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
// Check the container is running and not restarting
|
|
|
|
if err := daemon.checkContainer(container, containerIsRunning, containerIsNotRestarting); err != nil {
|
|
|
|
return nil, errors.Wrap(err, errMsg)
|
|
|
|
}
|
|
|
|
// Check the container ipc is shareable
|
|
|
|
if st, err := os.Stat(container.ShmPath); err != nil || !st.IsDir() {
|
|
|
|
if err == nil || os.IsNotExist(err) {
|
daemon: use 'private' ipc mode by default
This changes the default ipc mode of daemon/engine to be private,
meaning the containers will not have their /dev/shm bind-mounted
from the host by default. The benefits of doing this are:
1. No leaked mounts. Eliminate a possibility to leak mounts into
other namespaces (and therefore unfortunate errors like "Unable to
remove filesystem for <ID>: remove /var/lib/docker/containers/<ID>/shm:
device or resource busy").
2. Working checkpoint/restore. Make `docker checkpoint`
not lose the contents of `/dev/shm`, but save it to
the dump, and be restored back upon `docker start --checkpoint`
(currently it is lost -- while CRIU handles tmpfs mounts,
the "shareable" mount is seen as external to container,
and thus rightfully ignored).
3. Better security. Currently any container is opened to share
its /dev/shm with any other container.
Obviously, this change will break the following usage scenario:
$ docker run -d --name donor busybox top
$ docker run --rm -it --ipc container:donor busybox sh
Error response from daemon: linux spec namespaces: can't join IPC
of container <ID>: non-shareable IPC (hint: use IpcMode:shareable
for the donor container)
The soution, as hinted by the (amended) error message, is to
explicitly enable donor sharing by using --ipc shareable:
$ docker run -d --name donor --ipc shareable busybox top
Compatibility notes:
1. This only applies to containers created _after_ this change.
Existing containers are not affected and will work fine
as their ipc mode is stored in HostConfig.
2. Old backward compatible behavior ("shareable" containers
by default) can be enabled by either using
`--default-ipc-mode shareable` daemon command line option,
or by adding a `"default-ipc-mode": "shareable"`
line in `/etc/docker/daemon.json` configuration file.
3. If an older client (API < 1.40) is used, a "shareable" container
is created. A test to check that is added.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-11-27 19:10:44 -05:00
|
|
|
return nil, errors.New(errMsg + ": non-shareable IPC (hint: use IpcMode:shareable for the donor container)")
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
}
|
|
|
|
// stat() failed?
|
|
|
|
return nil, errors.Wrap(err, errMsg+": unexpected error from stat "+container.ShmPath)
|
|
|
|
}
|
|
|
|
|
|
|
|
return container, nil
|
2015-11-12 14:55:17 -05:00
|
|
|
}
|
|
|
|
|
2016-05-06 14:56:03 -04:00
|
|
|
func (daemon *Daemon) getPidContainer(container *container.Container) (*container.Container, error) {
|
|
|
|
containerID := container.HostConfig.PidMode.Container()
|
2017-02-28 05:11:48 -05:00
|
|
|
container, err := daemon.GetContainer(containerID)
|
2016-05-06 14:56:03 -04:00
|
|
|
if err != nil {
|
2017-10-10 10:45:26 -04:00
|
|
|
return nil, errors.Wrapf(err, "cannot join PID of a non running container: %s", containerID)
|
2016-05-06 14:56:03 -04:00
|
|
|
}
|
2017-02-28 05:11:48 -05:00
|
|
|
return container, daemon.checkContainer(container, containerIsRunning, containerIsNotRestarting)
|
|
|
|
}
|
|
|
|
|
|
|
|
func containerIsRunning(c *container.Container) error {
|
2016-05-06 14:56:03 -04:00
|
|
|
if !c.IsRunning() {
|
2017-11-28 23:09:37 -05:00
|
|
|
return errdefs.Conflict(errors.Errorf("container %s is not running", c.ID))
|
2016-05-06 14:56:03 -04:00
|
|
|
}
|
2017-02-28 05:11:48 -05:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func containerIsNotRestarting(c *container.Container) error {
|
2016-05-06 14:56:03 -04:00
|
|
|
if c.IsRestarting() {
|
2017-02-28 05:11:48 -05:00
|
|
|
return errContainerIsRestarting(c.ID)
|
2016-05-06 14:56:03 -04:00
|
|
|
}
|
2017-02-28 05:11:48 -05:00
|
|
|
return nil
|
2016-05-06 14:56:03 -04:00
|
|
|
}
|
|
|
|
|
2015-11-12 14:55:17 -05:00
|
|
|
func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
ipcMode := c.HostConfig.IpcMode
|
2016-03-18 14:50:19 -04:00
|
|
|
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
switch {
|
|
|
|
case ipcMode.IsContainer():
|
|
|
|
ic, err := daemon.getIpcContainer(ipcMode.Container())
|
2015-11-12 14:55:17 -05:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-03-18 14:50:19 -04:00
|
|
|
c.ShmPath = ic.ShmPath
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
|
|
|
|
case ipcMode.IsHost():
|
2016-03-18 14:50:19 -04:00
|
|
|
if _, err := os.Stat("/dev/shm"); err != nil {
|
|
|
|
return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
|
2015-11-12 14:55:17 -05:00
|
|
|
}
|
2016-03-18 14:50:19 -04:00
|
|
|
c.ShmPath = "/dev/shm"
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
|
|
|
|
case ipcMode.IsPrivate(), ipcMode.IsNone():
|
|
|
|
// c.ShmPath will/should not be used, so make it empty.
|
|
|
|
// Container's /dev/shm mount comes from OCI spec.
|
|
|
|
c.ShmPath = ""
|
|
|
|
|
|
|
|
case ipcMode.IsEmpty():
|
|
|
|
// A container was created by an older version of the daemon.
|
|
|
|
// The default behavior used to be what is now called "shareable".
|
|
|
|
fallthrough
|
|
|
|
|
|
|
|
case ipcMode.IsShareable():
|
2017-11-16 01:20:33 -05:00
|
|
|
rootIDs := daemon.idMapping.RootPair()
|
2016-03-18 14:50:19 -04:00
|
|
|
if !c.HasMountFor("/dev/shm") {
|
|
|
|
shmPath, err := c.ShmResourcePath()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-11-12 14:55:17 -05:00
|
|
|
|
2017-05-19 18:06:46 -04:00
|
|
|
if err := idtools.MkdirAllAndChown(shmPath, 0700, rootIDs); err != nil {
|
2016-03-18 14:50:19 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-07-24 13:16:38 -04:00
|
|
|
shmproperty := "mode=1777,size=" + strconv.FormatInt(c.HostConfig.ShmSize, 10)
|
2017-05-23 10:22:32 -04:00
|
|
|
if err := unix.Mount("shm", shmPath, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
|
2016-03-18 14:50:19 -04:00
|
|
|
return fmt.Errorf("mounting shm tmpfs: %s", err)
|
|
|
|
}
|
2017-05-19 18:06:46 -04:00
|
|
|
if err := os.Chown(shmPath, rootIDs.UID, rootIDs.GID); err != nil {
|
2016-03-18 14:50:19 -04:00
|
|
|
return err
|
|
|
|
}
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
c.ShmPath = shmPath
|
2015-11-12 14:55:17 -05:00
|
|
|
}
|
2016-03-18 14:50:19 -04:00
|
|
|
|
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 17:58:50 -04:00
|
|
|
default:
|
|
|
|
return fmt.Errorf("invalid IPC mode: %v", ipcMode)
|
2015-11-12 14:55:17 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2016-10-19 12:22:02 -04:00
|
|
|
|
2018-01-11 17:28:56 -05:00
|
|
|
func (daemon *Daemon) setupSecretDir(c *container.Container) (setupErr error) {
|
2018-01-17 10:49:58 -05:00
|
|
|
if len(c.SecretReferences) == 0 && len(c.ConfigReferences) == 0 {
|
2016-10-28 20:16:07 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-01-17 10:49:58 -05:00
|
|
|
if err := daemon.createSecretsDir(c); err != nil {
|
2018-01-11 17:28:56 -05:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if setupErr != nil {
|
2018-01-17 10:49:58 -05:00
|
|
|
daemon.cleanupSecretDir(c)
|
2018-01-11 17:28:56 -05:00
|
|
|
}
|
|
|
|
}()
|
2016-10-19 12:22:02 -04:00
|
|
|
|
2017-03-16 17:23:33 -04:00
|
|
|
if c.DependencyStore == nil {
|
|
|
|
return fmt.Errorf("secret store is not initialized")
|
|
|
|
}
|
2016-11-15 10:04:36 -05:00
|
|
|
|
2017-06-26 21:46:30 -04:00
|
|
|
// retrieve possible remapped range start for root UID, GID
|
2017-11-16 01:20:33 -05:00
|
|
|
rootIDs := daemon.idMapping.RootPair()
|
2017-06-26 21:46:30 -04:00
|
|
|
|
2017-03-16 17:23:33 -04:00
|
|
|
for _, s := range c.SecretReferences {
|
2016-11-15 10:04:36 -05:00
|
|
|
// TODO (ehazlett): use type switch when more are supported
|
|
|
|
if s.File == nil {
|
2017-03-16 17:23:33 -04:00
|
|
|
logrus.Error("secret target type is not a file target")
|
|
|
|
continue
|
2016-11-15 10:04:36 -05:00
|
|
|
}
|
|
|
|
|
2017-04-28 14:48:52 -04:00
|
|
|
// secrets are created in the SecretMountPath on the host, at a
|
|
|
|
// single level
|
2017-12-18 16:02:23 -05:00
|
|
|
fPath, err := c.SecretFilePath(*s)
|
|
|
|
if err != nil {
|
|
|
|
return errors.Wrap(err, "error getting secret file path")
|
|
|
|
}
|
2017-05-19 18:06:46 -04:00
|
|
|
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
|
2016-10-27 14:29:51 -04:00
|
|
|
return errors.Wrap(err, "error creating secret mount path")
|
2016-10-19 12:22:02 -04:00
|
|
|
}
|
|
|
|
|
2016-10-26 16:30:53 -04:00
|
|
|
logrus.WithFields(logrus.Fields{
|
2016-11-15 10:04:36 -05:00
|
|
|
"name": s.File.Name,
|
2016-10-26 16:30:53 -04:00
|
|
|
"path": fPath,
|
|
|
|
}).Debug("injecting secret")
|
2017-06-15 14:11:48 -04:00
|
|
|
secret, err := c.DependencyStore.Secrets().Get(s.SecretID)
|
|
|
|
if err != nil {
|
|
|
|
return errors.Wrap(err, "unable to get secret from secret store")
|
2016-11-15 10:04:36 -05:00
|
|
|
}
|
|
|
|
if err := ioutil.WriteFile(fPath, secret.Spec.Data, s.File.Mode); err != nil {
|
2016-10-27 14:29:51 -04:00
|
|
|
return errors.Wrap(err, "error injecting secret")
|
2016-10-19 12:22:02 -04:00
|
|
|
}
|
|
|
|
|
2016-11-15 10:04:36 -05:00
|
|
|
uid, err := strconv.Atoi(s.File.UID)
|
2016-11-03 14:09:13 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-11-15 10:04:36 -05:00
|
|
|
gid, err := strconv.Atoi(s.File.GID)
|
2016-11-03 14:09:13 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-05-19 18:06:46 -04:00
|
|
|
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
|
2016-10-27 14:29:51 -04:00
|
|
|
return errors.Wrap(err, "error setting ownership for secret")
|
2016-10-19 12:22:02 -04:00
|
|
|
}
|
2018-01-28 02:13:48 -05:00
|
|
|
if err := os.Chmod(fPath, s.File.Mode); err != nil {
|
|
|
|
return errors.Wrap(err, "error setting file mode for secret")
|
|
|
|
}
|
2016-10-19 12:22:02 -04:00
|
|
|
}
|
|
|
|
|
2019-08-09 09:01:40 -04:00
|
|
|
for _, configRef := range c.ConfigReferences {
|
2017-03-16 17:23:33 -04:00
|
|
|
// TODO (ehazlett): use type switch when more are supported
|
2019-08-09 09:01:40 -04:00
|
|
|
if configRef.File == nil {
|
2019-02-07 15:27:08 -05:00
|
|
|
// Runtime configs are not mounted into the container, but they're
|
|
|
|
// a valid type of config so we should not error when we encounter
|
|
|
|
// one.
|
2019-08-09 09:01:40 -04:00
|
|
|
if configRef.Runtime == nil {
|
2019-02-07 15:27:08 -05:00
|
|
|
logrus.Error("config target type is not a file or runtime target")
|
|
|
|
}
|
|
|
|
// However, in any case, this isn't a file config, so we have no
|
|
|
|
// further work to do
|
2017-03-16 17:23:33 -04:00
|
|
|
continue
|
|
|
|
}
|
2018-01-17 10:49:58 -05:00
|
|
|
|
2019-08-09 09:01:40 -04:00
|
|
|
fPath, err := c.ConfigFilePath(*configRef)
|
2017-06-26 21:46:30 -04:00
|
|
|
if err != nil {
|
2018-01-17 10:49:58 -05:00
|
|
|
return errors.Wrap(err, "error getting config file path for container")
|
2017-12-18 16:02:23 -05:00
|
|
|
}
|
2017-05-19 18:06:46 -04:00
|
|
|
if err := idtools.MkdirAllAndChown(filepath.Dir(fPath), 0700, rootIDs); err != nil {
|
2018-01-11 17:28:56 -05:00
|
|
|
return errors.Wrap(err, "error creating config mount path")
|
2017-03-16 17:23:33 -04:00
|
|
|
}
|
|
|
|
|
2018-01-11 17:28:56 -05:00
|
|
|
logrus.WithFields(logrus.Fields{
|
2019-08-09 09:01:40 -04:00
|
|
|
"name": configRef.File.Name,
|
2018-01-11 17:28:56 -05:00
|
|
|
"path": fPath,
|
|
|
|
}).Debug("injecting config")
|
2019-08-09 09:01:40 -04:00
|
|
|
config, err := c.DependencyStore.Configs().Get(configRef.ConfigID)
|
2018-01-11 17:28:56 -05:00
|
|
|
if err != nil {
|
|
|
|
return errors.Wrap(err, "unable to get config from config store")
|
|
|
|
}
|
2019-08-09 09:01:40 -04:00
|
|
|
if err := ioutil.WriteFile(fPath, config.Spec.Data, configRef.File.Mode); err != nil {
|
2017-03-16 17:23:33 -04:00
|
|
|
return errors.Wrap(err, "error injecting config")
|
|
|
|
}
|
|
|
|
|
2019-08-09 09:01:40 -04:00
|
|
|
uid, err := strconv.Atoi(configRef.File.UID)
|
2017-03-16 17:23:33 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-08-09 09:01:40 -04:00
|
|
|
gid, err := strconv.Atoi(configRef.File.GID)
|
2017-03-16 17:23:33 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-05-19 18:06:46 -04:00
|
|
|
if err := os.Chown(fPath, rootIDs.UID+uid, rootIDs.GID+gid); err != nil {
|
2017-03-16 17:23:33 -04:00
|
|
|
return errors.Wrap(err, "error setting ownership for config")
|
|
|
|
}
|
2019-08-09 09:01:40 -04:00
|
|
|
if err := os.Chmod(fPath, configRef.File.Mode); err != nil {
|
2018-01-28 02:13:48 -05:00
|
|
|
return errors.Wrap(err, "error setting file mode for config")
|
|
|
|
}
|
2018-01-17 10:49:58 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return daemon.remountSecretDir(c)
|
|
|
|
}
|
|
|
|
|
|
|
|
// createSecretsDir is used to create a dir suitable for storing container secrets.
|
|
|
|
// In practice this is using a tmpfs mount and is used for both "configs" and "secrets"
|
|
|
|
func (daemon *Daemon) createSecretsDir(c *container.Container) error {
|
|
|
|
// retrieve possible remapped range start for root UID, GID
|
2017-11-16 01:20:33 -05:00
|
|
|
rootIDs := daemon.idMapping.RootPair()
|
2018-01-17 10:49:58 -05:00
|
|
|
dir, err := c.SecretMountPath()
|
|
|
|
if err != nil {
|
|
|
|
return errors.Wrap(err, "error getting container secrets dir")
|
|
|
|
}
|
|
|
|
|
|
|
|
// create tmpfs
|
|
|
|
if err := idtools.MkdirAllAndChown(dir, 0700, rootIDs); err != nil {
|
|
|
|
return errors.Wrap(err, "error creating secret local mount path")
|
|
|
|
}
|
|
|
|
|
|
|
|
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
|
|
|
|
if err := mount.Mount("tmpfs", dir, "tmpfs", "nodev,nosuid,noexec,"+tmpfsOwnership); err != nil {
|
|
|
|
return errors.Wrap(err, "unable to setup secret mount")
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (daemon *Daemon) remountSecretDir(c *container.Container) error {
|
|
|
|
dir, err := c.SecretMountPath()
|
|
|
|
if err != nil {
|
|
|
|
return errors.Wrap(err, "error getting container secrets path")
|
2017-03-16 17:23:33 -04:00
|
|
|
}
|
2018-01-17 10:49:58 -05:00
|
|
|
if err := label.Relabel(dir, c.MountLabel, false); err != nil {
|
|
|
|
logrus.WithError(err).WithField("dir", dir).Warn("Error while attempting to set selinux label")
|
|
|
|
}
|
2017-11-16 01:20:33 -05:00
|
|
|
rootIDs := daemon.idMapping.RootPair()
|
2018-01-17 10:49:58 -05:00
|
|
|
tmpfsOwnership := fmt.Sprintf("uid=%d,gid=%d", rootIDs.UID, rootIDs.GID)
|
2017-03-16 17:23:33 -04:00
|
|
|
|
2018-01-17 10:49:58 -05:00
|
|
|
// remount secrets ro
|
|
|
|
if err := mount.Mount("tmpfs", dir, "tmpfs", "remount,ro,"+tmpfsOwnership); err != nil {
|
|
|
|
return errors.Wrap(err, "unable to remount dir as readonly")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (daemon *Daemon) cleanupSecretDir(c *container.Container) {
|
|
|
|
dir, err := c.SecretMountPath()
|
|
|
|
if err != nil {
|
|
|
|
logrus.WithError(err).WithField("container", c.ID).Warn("error getting secrets mount path for container")
|
|
|
|
}
|
|
|
|
if err := mount.RecursiveUnmount(dir); err != nil {
|
2018-08-08 11:45:00 -04:00
|
|
|
logrus.WithField("dir", dir).WithError(err).Warn("Error while attempting to unmount dir, this may prevent removal of container.")
|
2018-01-17 10:49:58 -05:00
|
|
|
}
|
|
|
|
if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
|
|
|
|
logrus.WithField("dir", dir).WithError(err).Error("Error removing dir.")
|
|
|
|
}
|
2017-03-16 17:23:33 -04:00
|
|
|
}
|
|
|
|
|
2017-03-30 23:01:41 -04:00
|
|
|
func killProcessDirectly(cntr *container.Container) error {
|
2017-03-30 16:52:40 -04:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
// Block until the container to stops or timeout.
|
2017-03-30 23:01:41 -04:00
|
|
|
status := <-cntr.Wait(ctx, container.WaitConditionNotRunning)
|
2017-03-30 16:52:40 -04:00
|
|
|
if status.Err() != nil {
|
2015-11-12 14:55:17 -05:00
|
|
|
// Ensure that we don't kill ourselves
|
2017-03-30 23:01:41 -04:00
|
|
|
if pid := cntr.GetPID(); pid != 0 {
|
|
|
|
logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(cntr.ID))
|
2017-05-23 10:22:32 -04:00
|
|
|
if err := unix.Kill(pid, 9); err != nil {
|
|
|
|
if err != unix.ESRCH {
|
2015-11-12 14:55:17 -05:00
|
|
|
return err
|
|
|
|
}
|
2016-03-04 15:41:06 -05:00
|
|
|
e := errNoSuchProcess{pid, 9}
|
|
|
|
logrus.Debug(e)
|
|
|
|
return e
|
2015-11-12 14:55:17 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func isLinkable(child *container.Container) bool {
|
|
|
|
// A container is linkable only if it belongs to the default network
|
2016-03-09 23:33:21 -05:00
|
|
|
_, ok := child.NetworkSettings.Networks[runconfig.DefaultDaemonNetworkMode().NetworkName()]
|
2015-11-12 14:55:17 -05:00
|
|
|
return ok
|
|
|
|
}
|
Remove static errors from errors package.
Moving all strings to the errors package wasn't a good idea after all.
Our custom implementation of Go errors predates everything that's nice
and good about working with errors in Go. Take as an example what we
have to do to get an error message:
```go
func GetErrorMessage(err error) string {
switch err.(type) {
case errcode.Error:
e, _ := err.(errcode.Error)
return e.Message
case errcode.ErrorCode:
ec, _ := err.(errcode.ErrorCode)
return ec.Message()
default:
return err.Error()
}
}
```
This goes against every good practice for Go development. The language already provides a simple, intuitive and standard way to get error messages, that is calling the `Error()` method from an error. Reinventing the error interface is a mistake.
Our custom implementation also makes very hard to reason about errors, another nice thing about Go. I found several (>10) error declarations that we don't use anywhere. This is a clear sign about how little we know about the errors we return. I also found several error usages where the number of arguments was different than the parameters declared in the error, another clear example of how difficult is to reason about errors.
Moreover, our custom implementation didn't really make easier for people to return custom HTTP status code depending on the errors. Again, it's hard to reason about when to set custom codes and how. Take an example what we have to do to extract the message and status code from an error before returning a response from the API:
```go
switch err.(type) {
case errcode.ErrorCode:
daError, _ := err.(errcode.ErrorCode)
statusCode = daError.Descriptor().HTTPStatusCode
errMsg = daError.Message()
case errcode.Error:
// For reference, if you're looking for a particular error
// then you can do something like :
// import ( derr "github.com/docker/docker/errors" )
// if daError.ErrorCode() == derr.ErrorCodeNoSuchContainer { ... }
daError, _ := err.(errcode.Error)
statusCode = daError.ErrorCode().Descriptor().HTTPStatusCode
errMsg = daError.Message
default:
// This part of will be removed once we've
// converted everything over to use the errcode package
// FIXME: this is brittle and should not be necessary.
// If we need to differentiate between different possible error types,
// we should create appropriate error types with clearly defined meaning
errStr := strings.ToLower(err.Error())
for keyword, status := range map[string]int{
"not found": http.StatusNotFound,
"no such": http.StatusNotFound,
"bad parameter": http.StatusBadRequest,
"conflict": http.StatusConflict,
"impossible": http.StatusNotAcceptable,
"wrong login/password": http.StatusUnauthorized,
"hasn't been activated": http.StatusForbidden,
} {
if strings.Contains(errStr, keyword) {
statusCode = status
break
}
}
}
```
You can notice two things in that code:
1. We have to explain how errors work, because our implementation goes against how easy to use Go errors are.
2. At no moment we arrived to remove that `switch` statement that was the original reason to use our custom implementation.
This change removes all our status errors from the errors package and puts them back in their specific contexts.
IT puts the messages back with their contexts. That way, we know right away when errors used and how to generate their messages.
It uses custom interfaces to reason about errors. Errors that need to response with a custom status code MUST implementent this simple interface:
```go
type errorWithStatus interface {
HTTPErrorStatusCode() int
}
```
This interface is very straightforward to implement. It also preserves Go errors real behavior, getting the message is as simple as using the `Error()` method.
I included helper functions to generate errors that use custom status code in `errors/errors.go`.
By doing this, we remove the hard dependency we have eeverywhere to our custom errors package. Yes, you can use it as a helper to generate error, but it's still very easy to generate errors without it.
Please, read this fantastic blog post about errors in Go: http://dave.cheney.net/2014/12/24/inspecting-errors
Signed-off-by: David Calavera <david.calavera@gmail.com>
2016-02-25 10:53:35 -05:00
|
|
|
|
2016-04-20 18:35:11 -04:00
|
|
|
func enableIPOnPredefinedNetwork() bool {
|
|
|
|
return false
|
|
|
|
}
|
2016-09-21 15:02:20 -04:00
|
|
|
|
2019-03-20 05:26:43 -04:00
|
|
|
// serviceDiscoveryOnDefaultNetwork indicates if service discovery is supported on the default network
|
|
|
|
func serviceDiscoveryOnDefaultNetwork() bool {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2018-07-17 15:11:38 -04:00
|
|
|
func (daemon *Daemon) setupPathsAndSandboxOptions(container *container.Container, sboxOptions *[]libnetwork.SandboxOption) error {
|
2016-10-13 17:51:10 -04:00
|
|
|
var err error
|
|
|
|
|
2018-07-17 15:11:38 -04:00
|
|
|
if container.HostConfig.NetworkMode.IsHost() {
|
|
|
|
// Point to the host files, so that will be copied into the container running in host mode
|
|
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts"))
|
|
|
|
}
|
|
|
|
|
2019-01-10 16:58:55 -05:00
|
|
|
// Copy the host's resolv.conf for the container (/etc/resolv.conf or /run/systemd/resolve/resolv.conf)
|
|
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionOriginResolvConfPath(daemon.configStore.GetResolvConf()))
|
|
|
|
|
2016-10-13 17:51:10 -04:00
|
|
|
container.HostsPath, err = container.GetRootResourcePath("hosts")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionHostsPath(container.HostsPath))
|
|
|
|
|
|
|
|
container.ResolvConfPath, err = container.GetRootResourcePath("resolv.conf")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
*sboxOptions = append(*sboxOptions, libnetwork.OptionResolvConfPath(container.ResolvConfPath))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-06-12 18:20:23 -04:00
|
|
|
func (daemon *Daemon) initializeNetworkingPaths(container *container.Container, nc *container.Container) error {
|
2016-10-13 17:51:10 -04:00
|
|
|
container.HostnamePath = nc.HostnamePath
|
|
|
|
container.HostsPath = nc.HostsPath
|
|
|
|
container.ResolvConfPath = nc.ResolvConfPath
|
2017-06-12 18:20:23 -04:00
|
|
|
return nil
|
2016-10-13 17:51:10 -04:00
|
|
|
}
|
2017-12-18 16:02:23 -05:00
|
|
|
|
|
|
|
func (daemon *Daemon) setupContainerMountsRoot(c *container.Container) error {
|
|
|
|
// get the root mount path so we can make it unbindable
|
|
|
|
p, err := c.MountsResourcePath("")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-11-16 01:20:33 -05:00
|
|
|
return idtools.MkdirAllAndChown(p, 0700, daemon.idMapping.RootPair())
|
2017-12-18 16:02:23 -05:00
|
|
|
}
|