mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
merge master
Docker-DCO-1.1-Signed-off-by: Victor Vieux <victor.vieux@docker.com> (github: vieux)
This commit is contained in:
commit
51d280f944
39 changed files with 1774 additions and 322 deletions
|
@ -6,4 +6,4 @@ Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
|
|||
api.go: Victor Vieux <victor@dotcloud.com> (@vieux)
|
||||
Dockerfile: Tianon Gravi <admwiggin@gmail.com> (@tianon)
|
||||
Makefile: Tianon Gravi <admwiggin@gmail.com> (@tianon)
|
||||
Vagrantfile: Daniel Mizyrycki <daniel@dotcloud.com> (@mzdaniel)
|
||||
Vagrantfile: Cristian Staretu <cristian.staretu@gmail.com> (@unclejack)
|
||||
|
|
5
Makefile
5
Makefile
|
@ -1,4 +1,4 @@
|
|||
.PHONY: all binary build cross default docs docs-build docs-shell shell test
|
||||
.PHONY: all binary build cross default docs docs-build docs-shell shell test test-integration
|
||||
|
||||
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD)
|
||||
DOCKER_IMAGE := docker:$(GIT_BRANCH)
|
||||
|
@ -25,6 +25,9 @@ docs-shell: docs-build
|
|||
test: build
|
||||
$(DOCKER_RUN_DOCKER) hack/make.sh test test-integration
|
||||
|
||||
test-integration: build
|
||||
$(DOCKER_RUN_DOCKER) hack/make.sh test-integration
|
||||
|
||||
shell: build
|
||||
$(DOCKER_RUN_DOCKER) bash
|
||||
|
||||
|
|
16
Vagrantfile
vendored
16
Vagrantfile
vendored
|
@ -8,10 +8,9 @@ AWS_BOX_URI = ENV['BOX_URI'] || "https://github.com/mitchellh/vagrant-aws/raw/ma
|
|||
AWS_REGION = ENV['AWS_REGION'] || "us-east-1"
|
||||
AWS_AMI = ENV['AWS_AMI'] || "ami-69f5a900"
|
||||
AWS_INSTANCE_TYPE = ENV['AWS_INSTANCE_TYPE'] || 't1.micro'
|
||||
|
||||
FORWARD_DOCKER_PORTS = ENV['FORWARD_DOCKER_PORTS']
|
||||
|
||||
SSH_PRIVKEY_PATH = ENV["SSH_PRIVKEY_PATH"]
|
||||
SSH_PRIVKEY_PATH = ENV['SSH_PRIVKEY_PATH']
|
||||
PRIVATE_NETWORK = ENV['PRIVATE_NETWORK']
|
||||
|
||||
# A script to upgrade from the 12.04 kernel to the raring backport kernel (3.8)
|
||||
# and install docker.
|
||||
|
@ -174,3 +173,14 @@ if !FORWARD_DOCKER_PORTS.nil?
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
if !PRIVATE_NETWORK.nil?
|
||||
Vagrant::VERSION < "1.1.0" and Vagrant::Config.run do |config|
|
||||
config.vm.network :hostonly, PRIVATE_NETWORK
|
||||
end
|
||||
|
||||
Vagrant::VERSION >= "1.1.0" and Vagrant.configure("2") do |config|
|
||||
config.vm.network "private_network", ip: PRIVATE_NETWORK
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1678,7 +1678,7 @@ func (cli *DockerCli) CmdSearch(args ...string) error {
|
|||
v := url.Values{}
|
||||
v.Set("term", cmd.Arg(0))
|
||||
|
||||
body, _, err := readBody(cli.call("GET", "/images/search?"+v.Encode(), nil, false))
|
||||
body, _, err := readBody(cli.call("GET", "/images/search?"+v.Encode(), nil, true))
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
28
config.go
28
config.go
|
@ -23,29 +23,31 @@ type DaemonConfig struct {
|
|||
|
||||
// ConfigFromJob creates and returns a new DaemonConfig object
|
||||
// by parsing the contents of a job's environment.
|
||||
func ConfigFromJob(job *engine.Job) *DaemonConfig {
|
||||
var config DaemonConfig
|
||||
config.Pidfile = job.Getenv("Pidfile")
|
||||
config.Root = job.Getenv("Root")
|
||||
config.AutoRestart = job.GetenvBool("AutoRestart")
|
||||
func DaemonConfigFromJob(job *engine.Job) *DaemonConfig {
|
||||
config := &DaemonConfig{
|
||||
Pidfile: job.Getenv("Pidfile"),
|
||||
Root: job.Getenv("Root"),
|
||||
AutoRestart: job.GetenvBool("AutoRestart"),
|
||||
EnableIptables: job.GetenvBool("EnableIptables"),
|
||||
EnableIpForward: job.GetenvBool("EnableIpForward"),
|
||||
BridgeIp: job.Getenv("BridgeIp"),
|
||||
DefaultIp: net.ParseIP(job.Getenv("DefaultIp")),
|
||||
InterContainerCommunication: job.GetenvBool("InterContainerCommunication"),
|
||||
GraphDriver: job.Getenv("GraphDriver"),
|
||||
}
|
||||
if dns := job.GetenvList("Dns"); dns != nil {
|
||||
config.Dns = dns
|
||||
}
|
||||
config.EnableIptables = job.GetenvBool("EnableIptables")
|
||||
config.EnableIpForward = job.GetenvBool("EnableIpForward")
|
||||
if br := job.Getenv("BridgeIface"); br != "" {
|
||||
config.BridgeIface = br
|
||||
} else {
|
||||
config.BridgeIface = DefaultNetworkBridge
|
||||
}
|
||||
config.BridgeIp = job.Getenv("BridgeIp")
|
||||
config.DefaultIp = net.ParseIP(job.Getenv("DefaultIp"))
|
||||
config.InterContainerCommunication = job.GetenvBool("InterContainerCommunication")
|
||||
config.GraphDriver = job.Getenv("GraphDriver")
|
||||
if mtu := job.GetenvInt("Mtu"); mtu != -1 {
|
||||
if mtu := job.GetenvInt("Mtu"); mtu != 0 {
|
||||
config.Mtu = mtu
|
||||
} else {
|
||||
config.Mtu = DefaultNetworkMtu
|
||||
}
|
||||
return &config
|
||||
|
||||
return config
|
||||
}
|
||||
|
|
58
container.go
58
container.go
|
@ -104,6 +104,46 @@ type Config struct {
|
|||
NetworkDisabled bool
|
||||
}
|
||||
|
||||
func ContainerConfigFromJob(job *engine.Job) *Config {
|
||||
config := &Config{
|
||||
Hostname: job.Getenv("Hostname"),
|
||||
Domainname: job.Getenv("Domainname"),
|
||||
User: job.Getenv("User"),
|
||||
Memory: job.GetenvInt64("Memory"),
|
||||
MemorySwap: job.GetenvInt64("MemorySwap"),
|
||||
CpuShares: job.GetenvInt64("CpuShares"),
|
||||
AttachStdin: job.GetenvBool("AttachStdin"),
|
||||
AttachStdout: job.GetenvBool("AttachStdout"),
|
||||
AttachStderr: job.GetenvBool("AttachStderr"),
|
||||
Tty: job.GetenvBool("Tty"),
|
||||
OpenStdin: job.GetenvBool("OpenStdin"),
|
||||
StdinOnce: job.GetenvBool("StdinOnce"),
|
||||
Image: job.Getenv("Image"),
|
||||
VolumesFrom: job.Getenv("VolumesFrom"),
|
||||
WorkingDir: job.Getenv("WorkingDir"),
|
||||
NetworkDisabled: job.GetenvBool("NetworkDisabled"),
|
||||
}
|
||||
job.GetenvJson("ExposedPorts", &config.ExposedPorts)
|
||||
job.GetenvJson("Volumes", &config.Volumes)
|
||||
if PortSpecs := job.GetenvList("PortSpecs"); PortSpecs != nil {
|
||||
config.PortSpecs = PortSpecs
|
||||
}
|
||||
if Env := job.GetenvList("Env"); Env != nil {
|
||||
config.Env = Env
|
||||
}
|
||||
if Cmd := job.GetenvList("Cmd"); Cmd != nil {
|
||||
config.Cmd = Cmd
|
||||
}
|
||||
if Dns := job.GetenvList("Dns"); Dns != nil {
|
||||
config.Dns = Dns
|
||||
}
|
||||
if Entrypoint := job.GetenvList("Entrypoint"); Entrypoint != nil {
|
||||
config.Entrypoint = Entrypoint
|
||||
}
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
type HostConfig struct {
|
||||
Binds []string
|
||||
ContainerIDFile string
|
||||
|
@ -114,6 +154,24 @@ type HostConfig struct {
|
|||
PublishAllPorts bool
|
||||
}
|
||||
|
||||
func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
|
||||
hostConfig := &HostConfig{
|
||||
ContainerIDFile: job.Getenv("ContainerIDFile"),
|
||||
Privileged: job.GetenvBool("Privileged"),
|
||||
PublishAllPorts: job.GetenvBool("PublishAllPorts"),
|
||||
}
|
||||
job.GetenvJson("LxcConf", &hostConfig.LxcConf)
|
||||
job.GetenvJson("PortBindings", &hostConfig.PortBindings)
|
||||
if Binds := job.GetenvList("Binds"); Binds != nil {
|
||||
hostConfig.Binds = Binds
|
||||
}
|
||||
if Links := job.GetenvList("Links"); Links != nil {
|
||||
hostConfig.Links = Links
|
||||
}
|
||||
|
||||
return hostConfig
|
||||
}
|
||||
|
||||
type BindMap struct {
|
||||
SrcPath string
|
||||
DstPath string
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
[Unit]
|
||||
Description=Docker Application Container Engine
|
||||
Description=Docker Application Container Engine
|
||||
Documentation=http://docs.docker.io
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
ExecStartPre=/bin/mount --make-rprivate /
|
||||
ExecStart=/usr/bin/docker -d
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -5,6 +5,7 @@ After=network.target
|
|||
|
||||
[Service]
|
||||
ExecStart=/usr/bin/docker -d -H fd://
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Create a base CentOS Docker image.
|
||||
|
||||
# This script is useful on systems with rinse available (e.g.,
|
||||
# building a CentOS image on Debian). See contrib/mkimage-yum.sh for
|
||||
# a way to build CentOS images on systems with yum installed.
|
||||
|
||||
set -e
|
||||
|
||||
repo="$1"
|
||||
|
|
90
contrib/mkimage-yum.sh
Executable file
90
contrib/mkimage-yum.sh
Executable file
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Create a base CentOS Docker image.
|
||||
#
|
||||
# This script is useful on systems with yum installed (e.g., building
|
||||
# a CentOS image on CentOS). See contrib/mkimage-rinse.sh for a way
|
||||
# to build CentOS images on other systems.
|
||||
|
||||
usage() {
|
||||
cat <<EOOPTS
|
||||
$(basename $0) [OPTIONS] <name>
|
||||
OPTIONS:
|
||||
-y <yumconf> The path to the yum config to install packages from. The
|
||||
default is /etc/yum.conf.
|
||||
EOOPTS
|
||||
exit 1
|
||||
}
|
||||
|
||||
# option defaults
|
||||
yum_config=/etc/yum.conf
|
||||
while getopts ":y:h" opt; do
|
||||
case $opt in
|
||||
y)
|
||||
yum_config=$OPTARG
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
;;
|
||||
\?)
|
||||
echo "Invalid option: -$OPTARG"
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND - 1))
|
||||
name=$1
|
||||
|
||||
if [[ -z $name ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
#--------------------
|
||||
|
||||
target=$(mktemp -d --tmpdir $(basename $0).XXXXXX)
|
||||
|
||||
set -x
|
||||
|
||||
for dev in console null zero urandom; do
|
||||
/sbin/MAKEDEV -d "$target"/dev -x $dev
|
||||
done
|
||||
|
||||
yum -c "$yum_config" --installroot="$target" --setopt=tsflags=nodocs \
|
||||
--setopt=group_package_types=mandatory -y groupinstall Core
|
||||
yum -c "$yum_config" --installroot="$mount" -y clean all
|
||||
|
||||
cat > "$target"/etc/sysconfig/network <<EOF
|
||||
NETWORKING=yes
|
||||
HOSTNAME=localhost.localdomain
|
||||
EOF
|
||||
|
||||
# effectively: febootstrap-minimize --keep-zoneinfo --keep-rpmdb
|
||||
# --keep-services "$target". Stolen from mkimage-rinse.sh
|
||||
# locales
|
||||
rm -rf "$target"/usr/{{lib,share}/locale,{lib,lib64}/gconv,bin/localedef,sbin/build-locale-archive}
|
||||
# docs
|
||||
rm -rf "$target"/usr/share/{man,doc,info,gnome/help}
|
||||
# cracklib
|
||||
rm -rf "$target"/usr/share/cracklib
|
||||
# i18n
|
||||
rm -rf "$target"/usr/share/i18n
|
||||
# sln
|
||||
rm -rf "$target"/sbin/sln
|
||||
# ldconfig
|
||||
rm -rf "$target"/etc/ld.so.cache
|
||||
rm -rf "$target"/var/cache/ldconfig/*
|
||||
|
||||
version=
|
||||
if [ -r "$target"/etc/redhat-release ]; then
|
||||
version="$(sed 's/^[^0-9\]*\([0-9.]\+\).*$/\1/' /etc/redhat-release)"
|
||||
fi
|
||||
|
||||
if [ -z "$version" ]; then
|
||||
echo >&2 "warning: cannot autodetect OS version, using '$name' as tag"
|
||||
version=$name
|
||||
fi
|
||||
|
||||
tar --numeric-owner -c -C "$target" . | docker import - $name:$version
|
||||
docker run -i -t $name:$version echo success
|
||||
|
||||
rm -rf "$target"
|
|
@ -37,7 +37,10 @@ There are more example scripts for creating base images in the
|
|||
Docker GitHub Repo:
|
||||
|
||||
* `BusyBox <https://github.com/dotcloud/docker/blob/master/contrib/mkimage-busybox.sh>`_
|
||||
* `CentOS / Scientific Linux CERN (SLC)
|
||||
* CentOS / Scientific Linux CERN (SLC) `on Debian/Ubuntu
|
||||
<https://github.com/dotcloud/docker/blob/master/contrib/mkimage-rinse.sh>`_
|
||||
or
|
||||
`on CentOS/RHEL/SLC/etc.
|
||||
<https://github.com/dotcloud/docker/blob/master/contrib/mkimage-yum.sh>`_
|
||||
* `Debian / Ubuntu
|
||||
<https://github.com/dotcloud/docker/blob/master/contrib/mkimage-debootstrap.sh>`_
|
||||
|
|
|
@ -12,3 +12,4 @@ Articles
|
|||
|
||||
security
|
||||
baseimages
|
||||
runmetrics
|
||||
|
|
463
docs/sources/articles/runmetrics.rst
Normal file
463
docs/sources/articles/runmetrics.rst
Normal file
|
@ -0,0 +1,463 @@
|
|||
:title: Runtime Metrics
|
||||
:description: Measure the behavior of running containers
|
||||
:keywords: docker, metrics, CPU, memory, disk, IO, run, runtime
|
||||
|
||||
.. _run_metrics:
|
||||
|
||||
|
||||
Runtime Metrics
|
||||
===============
|
||||
|
||||
Linux Containers rely on `control groups
|
||||
<https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt>`_ which
|
||||
not only track groups of processes, but also expose metrics about CPU,
|
||||
memory, and block I/O usage. You can access those metrics and obtain
|
||||
network usage metrics as well. This is relevant for "pure" LXC
|
||||
containers, as well as for Docker containers.
|
||||
|
||||
Control Groups
|
||||
--------------
|
||||
|
||||
Control groups are exposed through a pseudo-filesystem. In recent
|
||||
distros, you should find this filesystem under
|
||||
``/sys/fs/cgroup``. Under that directory, you will see multiple
|
||||
sub-directories, called devices, freezer, blkio, etc.; each
|
||||
sub-directory actually corresponds to a different cgroup hierarchy.
|
||||
|
||||
On older systems, the control groups might be mounted on ``/cgroup``,
|
||||
without distinct hierarchies. In that case, instead of seeing the
|
||||
sub-directories, you will see a bunch of files in that directory, and
|
||||
possibly some directories corresponding to existing containers.
|
||||
|
||||
To figure out where your control groups are mounted, you can run:
|
||||
|
||||
::
|
||||
|
||||
grep cgroup /proc/mounts
|
||||
|
||||
.. _run_findpid:
|
||||
|
||||
Enumerating Cgroups
|
||||
-------------------
|
||||
|
||||
You can look into ``/proc/cgroups`` to see the different control group
|
||||
subsystems known to the system, the hierarchy they belong to, and how
|
||||
many groups they contain.
|
||||
|
||||
You can also look at ``/proc/<pid>/cgroup`` to see which control
|
||||
groups a process belongs to. The control group will be shown as a path
|
||||
relative to the root of the hierarchy mountpoint; e.g. ``/`` means
|
||||
“this process has not been assigned into a particular group”, while
|
||||
``/lxc/pumpkin`` means that the process is likely to be a member of a
|
||||
container named ``pumpkin``.
|
||||
|
||||
Finding the Cgroup for a Given Container
|
||||
----------------------------------------
|
||||
|
||||
For each container, one cgroup will be created in each hierarchy. On
|
||||
older systems with older versions of the LXC userland tools, the name
|
||||
of the cgroup will be the name of the container. With more recent
|
||||
versions of the LXC tools, the cgroup will be ``lxc/<container_name>.``
|
||||
|
||||
For Docker containers using cgroups, the container name will be the
|
||||
full ID or long ID of the container. If a container shows up as
|
||||
ae836c95b4c3 in ``docker ps``, its long ID might be something like
|
||||
``ae836c95b4c3c9e9179e0e91015512da89fdec91612f63cebae57df9a5444c79``. You
|
||||
can look it up with ``docker inspect`` or ``docker ps -notrunc``.
|
||||
|
||||
Putting everything together to look at the memory metrics for a Docker
|
||||
container, take a look at ``/sys/fs/cgroup/memory/lxc/<longid>/``.
|
||||
|
||||
Metrics from Cgroups: Memory, CPU, Block IO
|
||||
-------------------------------------------
|
||||
|
||||
For each subsystem (memory, CPU, and block I/O), you will find one or
|
||||
more pseudo-files containing statistics.
|
||||
|
||||
Memory Metrics: ``memory.stat``
|
||||
...............................
|
||||
|
||||
Memory metrics are found in the "memory" cgroup. Note that the memory
|
||||
control group adds a little overhead, because it does very
|
||||
fine-grained accounting of the memory usage on your host. Therefore,
|
||||
many distros chose to not enable it by default. Generally, to enable
|
||||
it, all you have to do is to add some kernel command-line parameters:
|
||||
``cgroup_enable=memory swapaccount=1``.
|
||||
|
||||
The metrics are in the pseudo-file ``memory.stat``. Here is what it
|
||||
will look like:
|
||||
|
||||
::
|
||||
|
||||
cache 11492564992
|
||||
rss 1930993664
|
||||
mapped_file 306728960
|
||||
pgpgin 406632648
|
||||
pgpgout 403355412
|
||||
swap 0
|
||||
pgfault 728281223
|
||||
pgmajfault 1724
|
||||
inactive_anon 46608384
|
||||
active_anon 1884520448
|
||||
inactive_file 7003344896
|
||||
active_file 4489052160
|
||||
unevictable 32768
|
||||
hierarchical_memory_limit 9223372036854775807
|
||||
hierarchical_memsw_limit 9223372036854775807
|
||||
total_cache 11492564992
|
||||
total_rss 1930993664
|
||||
total_mapped_file 306728960
|
||||
total_pgpgin 406632648
|
||||
total_pgpgout 403355412
|
||||
total_swap 0
|
||||
total_pgfault 728281223
|
||||
total_pgmajfault 1724
|
||||
total_inactive_anon 46608384
|
||||
total_active_anon 1884520448
|
||||
total_inactive_file 7003344896
|
||||
total_active_file 4489052160
|
||||
total_unevictable 32768
|
||||
|
||||
The first half (without the ``total_`` prefix) contains statistics
|
||||
relevant to the processes within the cgroup, excluding
|
||||
sub-cgroups. The second half (with the ``total_`` prefix) includes
|
||||
sub-cgroups as well.
|
||||
|
||||
Some metrics are "gauges", i.e. values that can increase or decrease
|
||||
(e.g. swap, the amount of swap space used by the members of the
|
||||
cgroup). Some others are "counters", i.e. values that can only go up,
|
||||
because they represent occurrences of a specific event (e.g. pgfault,
|
||||
which indicates the number of page faults which happened since the
|
||||
creation of the cgroup; this number can never decrease).
|
||||
|
||||
cache
|
||||
the amount of memory used by the processes of this control group
|
||||
that can be associated precisely with a block on a block
|
||||
device. When you read from and write to files on disk, this amount
|
||||
will increase. This will be the case if you use "conventional" I/O
|
||||
(``open``, ``read``, ``write`` syscalls) as well as mapped files
|
||||
(with ``mmap``). It also accounts for the memory used by ``tmpfs``
|
||||
mounts, though the reasons are unclear.
|
||||
|
||||
rss
|
||||
the amount of memory that *doesn't* correspond to anything on
|
||||
disk: stacks, heaps, and anonymous memory maps.
|
||||
|
||||
mapped_file
|
||||
indicates the amount of memory mapped by the processes in the
|
||||
control group. It doesn't give you information about *how much*
|
||||
memory is used; it rather tells you *how* it is used.
|
||||
|
||||
pgfault and pgmajfault
|
||||
indicate the number of times that a process of the cgroup triggered
|
||||
a "page fault" and a "major fault", respectively. A page fault
|
||||
happens when a process accesses a part of its virtual memory space
|
||||
which is nonexistent or protected. The former can happen if the
|
||||
process is buggy and tries to access an invalid address (it will
|
||||
then be sent a ``SIGSEGV`` signal, typically killing it with the
|
||||
famous ``Segmentation fault`` message). The latter can happen when
|
||||
the process reads from a memory zone which has been swapped out, or
|
||||
which corresponds to a mapped file: in that case, the kernel will
|
||||
load the page from disk, and let the CPU complete the memory
|
||||
access. It can also happen when the process writes to a
|
||||
copy-on-write memory zone: likewise, the kernel will preempt the
|
||||
process, duplicate the memory page, and resume the write operation
|
||||
on the process' own copy of the page. "Major" faults happen when the
|
||||
kernel actually has to read the data from disk. When it just has to
|
||||
duplicate an existing page, or allocate an empty page, it's a
|
||||
regular (or "minor") fault.
|
||||
|
||||
swap
|
||||
the amount of swap currently used by the processes in this cgroup.
|
||||
|
||||
active_anon and inactive_anon
|
||||
the amount of *anonymous* memory that has been identified has
|
||||
respectively *active* and *inactive* by the kernel. "Anonymous"
|
||||
memory is the memory that is *not* linked to disk pages. In other
|
||||
words, that's the equivalent of the rss counter described above. In
|
||||
fact, the very definition of the rss counter is **active_anon** +
|
||||
**inactive_anon** - **tmpfs** (where tmpfs is the amount of memory
|
||||
used up by ``tmpfs`` filesystems mounted by this control
|
||||
group). Now, what's the difference between "active" and "inactive"?
|
||||
Pages are initially "active"; and at regular intervals, the kernel
|
||||
sweeps over the memory, and tags some pages as "inactive". Whenever
|
||||
they are accessed again, they are immediately retagged
|
||||
"active". When the kernel is almost out of memory, and time comes to
|
||||
swap out to disk, the kernel will swap "inactive" pages.
|
||||
|
||||
active_file and inactive_file
|
||||
cache memory, with *active* and *inactive* similar to the *anon*
|
||||
memory above. The exact formula is cache = **active_file** +
|
||||
**inactive_file** + **tmpfs**. The exact rules used by the kernel to
|
||||
move memory pages between active and inactive sets are different
|
||||
from the ones used for anonymous memory, but the general principle
|
||||
is the same. Note that when the kernel needs to reclaim memory, it
|
||||
is cheaper to reclaim a clean (=non modified) page from this pool,
|
||||
since it can be reclaimed immediately (while anonymous pages and
|
||||
dirty/modified pages have to be written to disk first).
|
||||
|
||||
unevictable
|
||||
the amount of memory that cannot be reclaimed; generally, it will
|
||||
account for memory that has been "locked" with ``mlock``. It is
|
||||
often used by crypto frameworks to make sure that secret keys and
|
||||
other sensitive material never gets swapped out to disk.
|
||||
|
||||
memory and memsw limits
|
||||
These are not really metrics, but a reminder of the limits applied
|
||||
to this cgroup. The first one indicates the maximum amount of
|
||||
physical memory that can be used by the processes of this control
|
||||
group; the second one indicates the maximum amount of RAM+swap.
|
||||
|
||||
Accounting for memory in the page cache is very complex. If two
|
||||
processes in different control groups both read the same file
|
||||
(ultimately relying on the same blocks on disk), the corresponding
|
||||
memory charge will be split between the control groups. It's nice, but
|
||||
it also means that when a cgroup is terminated, it could increase the
|
||||
memory usage of another cgroup, because they are not splitting the
|
||||
cost anymore for those memory pages.
|
||||
|
||||
CPU metrics: ``cpuacct.stat``
|
||||
.............................
|
||||
|
||||
Now that we've covered memory metrics, everything else will look very
|
||||
simple in comparison. CPU metrics will be found in the ``cpuacct``
|
||||
controller.
|
||||
|
||||
For each container, you will find a pseudo-file ``cpuacct.stat``,
|
||||
containing the CPU usage accumulated by the processes of the
|
||||
container, broken down between ``user`` and ``system`` time. If you're
|
||||
not familiar with the distinction, ``user`` is the time during which
|
||||
the processes were in direct control of the CPU (i.e. executing
|
||||
process code), and ``system`` is the time during which the CPU was
|
||||
executing system calls on behalf of those processes.
|
||||
|
||||
Those times are expressed in ticks of 1/100th of a second. Actually,
|
||||
they are expressed in "user jiffies". There are ``USER_HZ``
|
||||
*"jiffies"* per second, and on x86 systems, ``USER_HZ`` is 100. This
|
||||
used to map exactly to the number of scheduler "ticks" per second; but
|
||||
with the advent of higher frequency scheduling, as well as `tickless
|
||||
kernels <http://lwn.net/Articles/549580/>`_, the number of kernel
|
||||
ticks wasn't relevant anymore. It stuck around anyway, mainly for
|
||||
legacy and compatibility reasons.
|
||||
|
||||
Block I/O metrics
|
||||
.................
|
||||
|
||||
Block I/O is accounted in the ``blkio`` controller. Different metrics
|
||||
are scattered across different files. While you can find in-depth
|
||||
details in the `blkio-controller
|
||||
<https://www.kernel.org/doc/Documentation/cgroups/blkio-controller.txt>`_
|
||||
file in the kernel documentation, here is a short list of the most
|
||||
relevant ones:
|
||||
|
||||
blkio.sectors
|
||||
contain the number of 512-bytes sectors read and written by the
|
||||
processes member of the cgroup, device by device. Reads and writes
|
||||
are merged in a single counter.
|
||||
|
||||
blkio.io_service_bytes
|
||||
indicates the number of bytes read and written by the cgroup. It has
|
||||
4 counters per device, because for each device, it differentiates
|
||||
between synchronous vs. asynchronous I/O, and reads vs. writes.
|
||||
|
||||
blkio.io_serviced
|
||||
the number of I/O operations performed, regardless of their size. It
|
||||
also has 4 counters per device.
|
||||
|
||||
blkio.io_queued
|
||||
indicates the number of I/O operations currently queued for this
|
||||
cgroup. In other words, if the cgroup isn't doing any I/O, this will
|
||||
be zero. Note that the opposite is not true. In other words, if
|
||||
there is no I/O queued, it does not mean that the cgroup is idle
|
||||
(I/O-wise). It could be doing purely synchronous reads on an
|
||||
otherwise quiescent device, which is therefore able to handle them
|
||||
immediately, without queuing. Also, while it is helpful to figure
|
||||
out which cgroup is putting stress on the I/O subsystem, keep in
|
||||
mind that is is a relative quantity. Even if a process group does
|
||||
not perform more I/O, its queue size can increase just because the
|
||||
device load increases because of other devices.
|
||||
|
||||
Network Metrics
|
||||
---------------
|
||||
|
||||
Network metrics are not exposed directly by control groups. There is a
|
||||
good explanation for that: network interfaces exist within the context
|
||||
of *network namespaces*. The kernel could probably accumulate metrics
|
||||
about packets and bytes sent and received by a group of processes, but
|
||||
those metrics wouldn't be very useful. You want per-interface metrics
|
||||
(because traffic happening on the local ``lo`` interface doesn't
|
||||
really count). But since processes in a single cgroup can belong to
|
||||
multiple network namespaces, those metrics would be harder to
|
||||
interpret: multiple network namespaces means multiple ``lo``
|
||||
interfaces, potentially multiple ``eth0`` interfaces, etc.; so this is
|
||||
why there is no easy way to gather network metrics with control
|
||||
groups.
|
||||
|
||||
Instead we can gather network metrics from other sources:
|
||||
|
||||
IPtables
|
||||
........
|
||||
|
||||
IPtables (or rather, the netfilter framework for which iptables is
|
||||
just an interface) can do some serious accounting.
|
||||
|
||||
For instance, you can setup a rule to account for the outbound HTTP
|
||||
traffic on a web server:
|
||||
|
||||
::
|
||||
|
||||
iptables -I OUTPUT -p tcp --sport 80
|
||||
|
||||
|
||||
There is no ``-j`` or ``-g`` flag, so the rule will just count matched
|
||||
packets and go to the following rule.
|
||||
|
||||
Later, you can check the values of the counters, with:
|
||||
|
||||
::
|
||||
|
||||
iptables -nxvL OUTPUT
|
||||
|
||||
Technically, ``-n`` is not required, but it will prevent iptables from
|
||||
doing DNS reverse lookups, which are probably useless in this
|
||||
scenario.
|
||||
|
||||
Counters include packets and bytes. If you want to setup metrics for
|
||||
container traffic like this, you could execute a ``for`` loop to add
|
||||
two ``iptables`` rules per container IP address (one in each
|
||||
direction), in the ``FORWARD`` chain. This will only meter traffic
|
||||
going through the NAT layer; you will also have to add traffic going
|
||||
through the userland proxy.
|
||||
|
||||
Then, you will need to check those counters on a regular basis. If you
|
||||
happen to use ``collectd``, there is a nice plugin to automate
|
||||
iptables counters collection.
|
||||
|
||||
Interface-level counters
|
||||
........................
|
||||
|
||||
Since each container has a virtual Ethernet interface, you might want
|
||||
to check directly the TX and RX counters of this interface. You will
|
||||
notice that each container is associated to a virtual Ethernet
|
||||
interface in your host, with a name like ``vethKk8Zqi``. Figuring out
|
||||
which interface corresponds to which container is, unfortunately,
|
||||
difficult.
|
||||
|
||||
But for now, the best way is to check the metrics *from within the
|
||||
containers*. To accomplish this, you can run an executable from the
|
||||
host environment within the network namespace of a container using
|
||||
**ip-netns magic**.
|
||||
|
||||
The ``ip-netns exec`` command will let you execute any program
|
||||
(present in the host system) within any network namespace visible to
|
||||
the current process. This means that your host will be able to enter
|
||||
the network namespace of your containers, but your containers won't be
|
||||
able to access the host, nor their sibling containers. Containers will
|
||||
be able to “see” and affect their sub-containers, though.
|
||||
|
||||
The exact format of the command is::
|
||||
|
||||
ip netns exec <nsname> <command...>
|
||||
|
||||
For example::
|
||||
|
||||
ip netns exec mycontainer netstat -i
|
||||
|
||||
``ip netns`` finds the "mycontainer" container by using namespaces
|
||||
pseudo-files. Each process belongs to one network namespace, one PID
|
||||
namespace, one ``mnt`` namespace, etc., and those namespaces are
|
||||
materialized under ``/proc/<pid>/ns/``. For example, the network
|
||||
namespace of PID 42 is materialized by the pseudo-file
|
||||
``/proc/42/ns/net``.
|
||||
|
||||
When you run ``ip netns exec mycontainer ...``, it expects
|
||||
``/var/run/netns/mycontainer`` to be one of those
|
||||
pseudo-files. (Symlinks are accepted.)
|
||||
|
||||
In other words, to execute a command within the network namespace of a
|
||||
container, we need to:
|
||||
|
||||
* Find out the PID of any process within the container that we want to
|
||||
investigate;
|
||||
* Create a symlink from ``/var/run/netns/<somename>`` to
|
||||
``/proc/<thepid>/ns/net``
|
||||
* Execute ``ip netns exec <somename> ....``
|
||||
|
||||
Please review :ref:`run_findpid` to learn how to find the cgroup of a
|
||||
pprocess running in the container of which you want to measure network
|
||||
usage. From there, you can examine the pseudo-file named ``tasks``,
|
||||
which containes the PIDs that are in the control group (i.e. in the
|
||||
container). Pick any one of them.
|
||||
|
||||
Putting everything together, if the "short ID" of a container is held
|
||||
in the environment variable ``$CID``, then you can do this::
|
||||
|
||||
TASKS=/sys/fs/cgroup/devices/$CID*/tasks
|
||||
PID=$(head -n 1 $TASKS)
|
||||
mkdir -p /var/run/netns
|
||||
ln -sf /proc/$PID/ns/net /var/run/netns/$CID
|
||||
ip netns exec $CID netstat -i
|
||||
|
||||
|
||||
Tips for high-performance metric collection
|
||||
-------------------------------------------
|
||||
|
||||
Note that running a new process each time you want to update metrics
|
||||
is (relatively) expensive. If you want to collect metrics at high
|
||||
resolutions, and/or over a large number of containers (think 1000
|
||||
containers on a single host), you do not want to fork a new process
|
||||
each time.
|
||||
|
||||
Here is how to collect metrics from a single process. You will have to
|
||||
write your metric collector in C (or any language that lets you do
|
||||
low-level system calls). You need to use a special system call,
|
||||
``setns()``, which lets the current process enter any arbitrary
|
||||
namespace. It requires, however, an open file descriptor to the
|
||||
namespace pseudo-file (remember: that’s the pseudo-file in
|
||||
``/proc/<pid>/ns/net``).
|
||||
|
||||
However, there is a catch: you must not keep this file descriptor
|
||||
open. If you do, when the last process of the control group exits, the
|
||||
namespace will not be destroyed, and its network resources (like the
|
||||
virtual interface of the container) will stay around for ever (or
|
||||
until you close that file descriptor).
|
||||
|
||||
The right approach would be to keep track of the first PID of each
|
||||
container, and re-open the namespace pseudo-file each time.
|
||||
|
||||
Collecting metrics when a container exits
|
||||
-----------------------------------------
|
||||
|
||||
Sometimes, you do not care about real time metric collection, but when
|
||||
a container exits, you want to know how much CPU, memory, etc. it has
|
||||
used.
|
||||
|
||||
Docker makes this difficult because it relies on ``lxc-start``, which
|
||||
carefully cleans up after itself, but it is still possible. It is
|
||||
usually easier to collect metrics at regular intervals (e.g. every
|
||||
minute, with the collectd LXC plugin) and rely on that instead.
|
||||
|
||||
But, if you'd still like to gather the stats when a container stops,
|
||||
here is how:
|
||||
|
||||
For each container, start a collection process, and move it to the
|
||||
control groups that you want to monitor by writing its PID to the
|
||||
tasks file of the cgroup. The collection process should periodically
|
||||
re-read the tasks file to check if it's the last process of the
|
||||
control group. (If you also want to collect network statistics as
|
||||
explained in the previous section, you should also move the process to
|
||||
the appropriate network namespace.)
|
||||
|
||||
When the container exits, ``lxc-start`` will try to delete the control
|
||||
groups. It will fail, since the control group is still in use; but
|
||||
that’s fine. You process should now detect that it is the only one
|
||||
remaining in the group. Now is the right time to collect all the
|
||||
metrics you need!
|
||||
|
||||
Finally, your process should move itself back to the root control
|
||||
group, and remove the container control group. To remove a control
|
||||
group, just ``rmdir`` its directory. It's counter-intuitive to
|
||||
``rmdir`` a directory as it still contains files; but remember that
|
||||
this is a pseudo-filesystem, so usual rules don't apply. After the
|
||||
cleanup is done, the collection process can exit safely.
|
||||
|
|
@ -217,6 +217,15 @@ To install the latest version of docker, use the standard ``apt-get`` method:
|
|||
# install the latest
|
||||
sudo apt-get install lxc-docker
|
||||
|
||||
Troubleshooting
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
On Linux Mint, the ``cgroups-lite`` package is not installed by default.
|
||||
Before Docker will work correctly, you will need to install this via:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
sudo apt-get update && sudo apt-get install cgroups-lite
|
||||
|
||||
.. _ufw:
|
||||
|
||||
|
@ -224,7 +233,7 @@ Docker and UFW
|
|||
^^^^^^^^^^^^^^
|
||||
|
||||
Docker uses a bridge to manage container networking. By default, UFW drops all
|
||||
`forwarding` traffic. As a result will you need to enable UFW forwarding:
|
||||
`forwarding` traffic. As a result you will need to enable UFW forwarding:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
:title: Build Images (Dockerfile Reference)
|
||||
:title: Dockerfile Reference
|
||||
:description: Dockerfiles use a simple DSL which allows you to automate the steps you would normally manually take to create an image.
|
||||
:keywords: builder, docker, Dockerfile, automation, image creation
|
||||
|
||||
.. _dockerbuilder:
|
||||
|
||||
===================================
|
||||
Build Images (Dockerfile Reference)
|
||||
===================================
|
||||
====================
|
||||
Dockerfile Reference
|
||||
====================
|
||||
|
||||
**Docker can act as a builder** and read instructions from a text
|
||||
``Dockerfile`` to automate the steps you would otherwise take manually
|
||||
|
|
|
@ -18,6 +18,45 @@ To list available commands, either run ``docker`` with no parameters or execute
|
|||
|
||||
...
|
||||
|
||||
.. _cli_options:
|
||||
|
||||
Types of Options
|
||||
----------------
|
||||
|
||||
Boolean
|
||||
~~~~~~~
|
||||
|
||||
Boolean options look like ``-d=false``. The value you see is the
|
||||
default value which gets set if you do **not** use the boolean
|
||||
flag. If you do call ``run -d``, that sets the opposite boolean value,
|
||||
so in this case, ``true``, and so ``docker run -d`` **will** run in
|
||||
"detached" mode, in the background. Other boolean options are similar
|
||||
-- specifying them will set the value to the opposite of the default
|
||||
value.
|
||||
|
||||
Multi
|
||||
~~~~~
|
||||
|
||||
Options like ``-a=[]`` indicate they can be specified multiple times::
|
||||
|
||||
docker run -a stdin -a stdout -a stderr -i -t ubuntu /bin/bash
|
||||
|
||||
Sometimes this can use a more complex value string, as for ``-v``::
|
||||
|
||||
docker run -v /host:/container example/mysql
|
||||
|
||||
Strings and Integers
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Options like ``-name=""`` expect a string, and they can only be
|
||||
specified once. Options like ``-c=0`` expect an integer, and they can
|
||||
only be specified once.
|
||||
|
||||
----
|
||||
|
||||
Commands
|
||||
--------
|
||||
|
||||
.. _cli_daemon:
|
||||
|
||||
``daemon``
|
||||
|
|
|
@ -14,4 +14,5 @@ Contents:
|
|||
|
||||
commandline/index
|
||||
builder
|
||||
run
|
||||
api/index
|
||||
|
|
419
docs/sources/reference/run.rst
Normal file
419
docs/sources/reference/run.rst
Normal file
|
@ -0,0 +1,419 @@
|
|||
:title: Docker Run Reference
|
||||
:description: Configure containers at runtime
|
||||
:keywords: docker, run, configure, runtime
|
||||
|
||||
.. _run_docker:
|
||||
|
||||
====================
|
||||
Docker Run Reference
|
||||
====================
|
||||
|
||||
**Docker runs processes in isolated containers**. When an operator
|
||||
executes ``docker run``, she starts a process with its own file
|
||||
system, its own networking, and its own isolated process tree. The
|
||||
:ref:`image_def` which starts the process may define defaults related
|
||||
to the binary to run, the networking to expose, and more, but ``docker
|
||||
run`` gives final control to the operator who starts the container
|
||||
from the image. That's the main reason :ref:`cli_run` has more options
|
||||
than any other ``docker`` command.
|
||||
|
||||
Every one of the :ref:`example_list` shows running containers, and so
|
||||
here we try to give more in-depth guidance.
|
||||
|
||||
.. contents:: Table of Contents
|
||||
:depth: 2
|
||||
|
||||
.. _run_running:
|
||||
|
||||
General Form
|
||||
============
|
||||
|
||||
As you've seen in the :ref:`example_list`, the basic `run` command
|
||||
takes this form::
|
||||
|
||||
docker run [OPTIONS] IMAGE[:TAG] [COMMAND] [ARG...]
|
||||
|
||||
To learn how to interpret the types of ``[OPTIONS]``, see
|
||||
:ref:`cli_options`.
|
||||
|
||||
The list of ``[OPTIONS]`` breaks down into two groups:
|
||||
|
||||
1. Settings exclusive to operators, including:
|
||||
|
||||
* Detached or Foreground running,
|
||||
* Container Identification,
|
||||
* Network settings, and
|
||||
* Runtime Constraints on CPU and Memory
|
||||
* Privileges and LXC Configuration
|
||||
|
||||
2. Setting shared between operators and developers, where operators
|
||||
can override defaults developers set in images at build time.
|
||||
|
||||
Together, the ``docker run [OPTIONS]`` give complete control over
|
||||
runtime behavior to the operator, allowing them to override all
|
||||
defaults set by the developer during ``docker build`` and nearly all
|
||||
the defaults set by the Docker runtime itself.
|
||||
|
||||
Operator Exclusive Options
|
||||
==========================
|
||||
|
||||
Only the operator (the person executing ``docker run``) can set the
|
||||
following options.
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
Detached vs Foreground
|
||||
----------------------
|
||||
|
||||
When starting a Docker container, you must first decide if you want to
|
||||
run the container in the background in a "detached" mode or in the
|
||||
default foreground mode::
|
||||
|
||||
-d=false: Detached mode: Run container in the background, print new container id
|
||||
|
||||
Detached (-d)
|
||||
.............
|
||||
|
||||
In detached mode (``-d=true`` or just ``-d``), all I/O should be done
|
||||
through network connections or shared volumes because the container is
|
||||
no longer listening to the commandline where you executed ``docker
|
||||
run``. You can reattach to a detached container with ``docker``
|
||||
:ref:`cli_attach`. If you choose to run a container in the detached
|
||||
mode, then you cannot use the ``-rm`` option.
|
||||
|
||||
Foreground
|
||||
..........
|
||||
|
||||
In foreground mode (the default when ``-d`` is not specified),
|
||||
``docker run`` can start the process in the container and attach the
|
||||
console to the process's standard input, output, and standard
|
||||
error. It can even pretend to be a TTY (this is what most commandline
|
||||
executables expect) and pass along signals. All of that is
|
||||
configurable::
|
||||
|
||||
-a=[] : Attach to ``stdin``, ``stdout`` and/or ``stderr``
|
||||
-t=false : Allocate a pseudo-tty
|
||||
-sig-proxy=true: Proxify all received signal to the process (even in non-tty mode)
|
||||
-i=false : Keep STDIN open even if not attached
|
||||
|
||||
If you do not specify ``-a`` then Docker will `attach everything
|
||||
(stdin,stdout,stderr)
|
||||
<https://github.com/dotcloud/docker/blob/75a7f4d90cde0295bcfb7213004abce8d4779b75/commands.go#L1797>`_. You
|
||||
can specify to which of the three standard streams (``stdin``, ``stdout``,
|
||||
``stderr``) you'd like to connect instead, as in::
|
||||
|
||||
docker run -a stdin -a stdout -i -t ubuntu /bin/bash
|
||||
|
||||
For interactive processes (like a shell) you will typically want a tty
|
||||
as well as persistent standard input (``stdin``), so you'll use ``-i
|
||||
-t`` together in most interactive cases.
|
||||
|
||||
Container Identification
|
||||
------------------------
|
||||
|
||||
Name (-name)
|
||||
............
|
||||
|
||||
The operator can identify a container in three ways:
|
||||
|
||||
* UUID long identifier ("f78375b1c487e03c9438c729345e54db9d20cfa2ac1fc3494b6eb60872e74778")
|
||||
* UUID short identifier ("f78375b1c487")
|
||||
* Name ("evil_ptolemy")
|
||||
|
||||
The UUID identifiers come from the Docker daemon, and if you do not
|
||||
assign a name to the container with ``-name`` then the daemon will
|
||||
also generate a random string name too. The name can become a handy
|
||||
way to add meaning to a container since you can use this name when
|
||||
defining :ref:`links <working_with_links_names>` (or any other place
|
||||
you need to identify a container). This works for both background and
|
||||
foreground Docker containers.
|
||||
|
||||
PID Equivalent
|
||||
..............
|
||||
|
||||
And finally, to help with automation, you can have Docker write the
|
||||
container ID out to a file of your choosing. This is similar to how
|
||||
some programs might write out their process ID to a file (you've seen
|
||||
them as PID files)::
|
||||
|
||||
-cidfile="": Write the container ID to the file
|
||||
|
||||
Network Settings
|
||||
----------------
|
||||
|
||||
::
|
||||
-n=true : Enable networking for this container
|
||||
-dns=[] : Set custom dns servers for the container
|
||||
|
||||
By default, all containers have networking enabled and they can make
|
||||
any outgoing connections. The operator can completely disable
|
||||
networking with ``docker run -n`` which disables all incoming and outgoing
|
||||
networking. In cases like this, you would perform I/O through files or
|
||||
STDIN/STDOUT only.
|
||||
|
||||
Your container will use the same DNS servers as the host by default,
|
||||
but you can override this with ``-dns``.
|
||||
|
||||
Clean Up (-rm)
|
||||
--------------
|
||||
|
||||
By default a container's file system persists even after the container
|
||||
exits. This makes debugging a lot easier (since you can inspect the
|
||||
final state) and you retain all your data by default. But if you are
|
||||
running short-term **foreground** processes, these container file
|
||||
systems can really pile up. If instead you'd like Docker to
|
||||
**automatically clean up the container and remove the file system when
|
||||
the container exits**, you can add the ``-rm`` flag::
|
||||
|
||||
-rm=false: Automatically remove the container when it exits (incompatible with -d)
|
||||
|
||||
|
||||
Runtime Constraints on CPU and Memory
|
||||
-------------------------------------
|
||||
|
||||
The operator can also adjust the performance parameters of the container::
|
||||
|
||||
-m="": Memory limit (format: <number><optional unit>, where unit = b, k, m or g)
|
||||
-c=0 : CPU shares (relative weight)
|
||||
|
||||
The operator can constrain the memory available to a container easily
|
||||
with ``docker run -m``. If the host supports swap memory, then the
|
||||
``-m`` memory setting can be larger than physical RAM.
|
||||
|
||||
Similarly the operator can increase the priority of this container
|
||||
with the ``-c`` option. By default, all containers run at the same
|
||||
priority and get the same proportion of CPU cycles, but you can tell
|
||||
the kernel to give more shares of CPU time to one or more containers
|
||||
when you start them via Docker.
|
||||
|
||||
Runtime Privilege and LXC Configuration
|
||||
---------------------------------------
|
||||
|
||||
::
|
||||
|
||||
-privileged=false: Give extended privileges to this container
|
||||
-lxc-conf=[]: Add custom lxc options -lxc-conf="lxc.cgroup.cpuset.cpus = 0,1"
|
||||
|
||||
By default, Docker containers are "unprivileged" and cannot, for
|
||||
example, run a Docker daemon inside a Docker container. This is
|
||||
because by default a container is not allowed to access any devices,
|
||||
but a "privileged" container is given access to all devices (see
|
||||
lxc-template.go_ and documentation on `cgroups devices
|
||||
<https://www.kernel.org/doc/Documentation/cgroups/devices.txt>`_).
|
||||
|
||||
When the operator executes ``docker run -privileged``, Docker will
|
||||
enable to access to all devices on the host as well as set some
|
||||
configuration in AppArmor to allow the container nearly all the same
|
||||
access to the host as processes running outside containers on the
|
||||
host. Additional information about running with ``-privileged`` is
|
||||
available on the `Docker Blog
|
||||
<http://blog.docker.io/2013/09/docker-can-now-run-within-docker/>`_.
|
||||
|
||||
An operator can also specify LXC options using one or more
|
||||
``-lxc-conf`` parameters. These can be new parameters or override
|
||||
existing parameters from the lxc-template.go_. Note that in the
|
||||
future, a given host's Docker daemon may not use LXC, so this is an
|
||||
implementation-specific configuration meant for operators already
|
||||
familiar with using LXC directly.
|
||||
|
||||
.. _lxc-template.go: https://github.com/dotcloud/docker/blob/master/execdriver/lxc/lxc_template.go
|
||||
|
||||
|
||||
Overriding ``Dockerfile`` Image Defaults
|
||||
========================================
|
||||
|
||||
When a developer builds an image from a :ref:`Dockerfile
|
||||
<dockerbuilder>` or when she commits it, the developer can set a
|
||||
number of default parameters that take effect when the image starts up
|
||||
as a container.
|
||||
|
||||
Four of the ``Dockerfile`` commands cannot be overridden at runtime:
|
||||
``FROM, MAINTAINER, RUN``, and ``ADD``. Everything else has a
|
||||
corresponding override in ``docker run``. We'll go through what the
|
||||
developer might have set in each ``Dockerfile`` instruction and how the
|
||||
operator can override that setting.
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
CMD (Default Command or Options)
|
||||
--------------------------------
|
||||
|
||||
Recall the optional ``COMMAND`` in the Docker commandline::
|
||||
|
||||
docker run [OPTIONS] IMAGE[:TAG] [COMMAND] [ARG...]
|
||||
|
||||
This command is optional because the person who created the ``IMAGE``
|
||||
may have already provided a default ``COMMAND`` using the ``Dockerfile``
|
||||
``CMD``. As the operator (the person running a container from the
|
||||
image), you can override that ``CMD`` just by specifying a new
|
||||
``COMMAND``.
|
||||
|
||||
If the image also specifies an ``ENTRYPOINT`` then the ``CMD`` or
|
||||
``COMMAND`` get appended as arguments to the ``ENTRYPOINT``.
|
||||
|
||||
|
||||
ENTRYPOINT (Default Command to Execute at Runtime
|
||||
-------------------------------------------------
|
||||
|
||||
::
|
||||
|
||||
-entrypoint="": Overwrite the default entrypoint set by the image
|
||||
|
||||
The ENTRYPOINT of an image is similar to a ``COMMAND`` because it
|
||||
specifies what executable to run when the container starts, but it is
|
||||
(purposely) more difficult to override. The ``ENTRYPOINT`` gives a
|
||||
container its default nature or behavior, so that when you set an
|
||||
``ENTRYPOINT`` you can run the container *as if it were that binary*,
|
||||
complete with default options, and you can pass in more options via
|
||||
the ``COMMAND``. But, sometimes an operator may want to run something else
|
||||
inside the container, so you can override the default ``ENTRYPOINT`` at
|
||||
runtime by using a string to specify the new ``ENTRYPOINT``. Here is an
|
||||
example of how to run a shell in a container that has been set up to
|
||||
automatically run something else (like ``/usr/bin/redis-server``)::
|
||||
|
||||
docker run -i -t -entrypoint /bin/bash example/redis
|
||||
|
||||
or two examples of how to pass more parameters to that ENTRYPOINT::
|
||||
|
||||
docker run -i -t -entrypoint /bin/bash example/redis -c ls -l
|
||||
docker run -i -t -entrypoint /usr/bin/redis-cli example/redis --help
|
||||
|
||||
|
||||
EXPOSE (Incoming Ports)
|
||||
-----------------------
|
||||
|
||||
The ``Dockerfile`` doesn't give much control over networking, only
|
||||
providing the ``EXPOSE`` instruction to give a hint to the operator
|
||||
about what incoming ports might provide services. The following
|
||||
options work with or override the ``Dockerfile``'s exposed defaults::
|
||||
|
||||
-expose=[]: Expose a port from the container
|
||||
without publishing it to your host
|
||||
-P=false : Publish all exposed ports to the host interfaces
|
||||
-p=[] : Publish a container's port to the host (format:
|
||||
ip:hostPort:containerPort | ip::containerPort |
|
||||
hostPort:containerPort)
|
||||
(use 'docker port' to see the actual mapping)
|
||||
-link="" : Add link to another container (name:alias)
|
||||
|
||||
As mentioned previously, ``EXPOSE`` (and ``-expose``) make a port
|
||||
available **in** a container for incoming connections. The port number
|
||||
on the inside of the container (where the service listens) does not
|
||||
need to be the same number as the port exposed on the outside of the
|
||||
container (where clients connect), so inside the container you might
|
||||
have an HTTP service listening on port 80 (and so you ``EXPOSE 80`` in
|
||||
the ``Dockerfile``), but outside the container the port might be 42800.
|
||||
|
||||
To help a new client container reach the server container's internal
|
||||
port operator ``-expose``'d by the operator or ``EXPOSE``'d by the
|
||||
developer, the operator has three choices: start the server container
|
||||
with ``-P`` or ``-p,`` or start the client container with ``-link``.
|
||||
|
||||
If the operator uses ``-P`` or ``-p`` then Docker will make the
|
||||
exposed port accessible on the host and the ports will be available to
|
||||
any client that can reach the host. To find the map between the host
|
||||
ports and the exposed ports, use ``docker port``)
|
||||
|
||||
If the operator uses ``-link`` when starting the new client container,
|
||||
then the client container can access the exposed port via a private
|
||||
networking interface. Docker will set some environment variables in
|
||||
the client container to help indicate which interface and port to use.
|
||||
|
||||
ENV (Environment Variables)
|
||||
---------------------------
|
||||
|
||||
The operator can **set any environment variable** in the container by
|
||||
using one or more ``-e`` flags, even overriding those already defined by the
|
||||
developer with a Dockefile ``ENV``::
|
||||
|
||||
$ docker run -e "deep=purple" -rm ubuntu /bin/bash -c export
|
||||
declare -x HOME="/"
|
||||
declare -x HOSTNAME="85bc26a0e200"
|
||||
declare -x OLDPWD
|
||||
declare -x PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
declare -x PWD="/"
|
||||
declare -x SHLVL="1"
|
||||
declare -x container="lxc"
|
||||
declare -x deep="purple"
|
||||
|
||||
Similarly the operator can set the **hostname** with ``-h``.
|
||||
|
||||
``-link name:alias`` also sets environment variables, using the
|
||||
*alias* string to define environment variables within the container
|
||||
that give the IP and PORT information for connecting to the service
|
||||
container. Let's imagine we have a container running Redis::
|
||||
|
||||
# Start the service container, named redis-name
|
||||
$ docker run -d -name redis-name dockerfiles/redis
|
||||
4241164edf6f5aca5b0e9e4c9eccd899b0b8080c64c0cd26efe02166c73208f3
|
||||
|
||||
# The redis-name container exposed port 6379
|
||||
$ docker ps
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
4241164edf6f dockerfiles/redis:latest /redis-stable/src/re 5 seconds ago Up 4 seconds 6379/tcp redis-name
|
||||
|
||||
# Note that there are no public ports exposed since we didn't use -p or -P
|
||||
$ docker port 4241164edf6f 6379
|
||||
2014/01/25 00:55:38 Error: No public port '6379' published for 4241164edf6f
|
||||
|
||||
|
||||
Yet we can get information about the Redis container's exposed ports
|
||||
with ``-link``. Choose an alias that will form a valid environment
|
||||
variable!
|
||||
|
||||
::
|
||||
|
||||
$ docker run -rm -link redis-name:redis_alias -entrypoint /bin/bash dockerfiles/redis -c export
|
||||
declare -x HOME="/"
|
||||
declare -x HOSTNAME="acda7f7b1cdc"
|
||||
declare -x OLDPWD
|
||||
declare -x PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
declare -x PWD="/"
|
||||
declare -x REDIS_ALIAS_NAME="/distracted_wright/redis"
|
||||
declare -x REDIS_ALIAS_PORT="tcp://172.17.0.32:6379"
|
||||
declare -x REDIS_ALIAS_PORT_6379_TCP="tcp://172.17.0.32:6379"
|
||||
declare -x REDIS_ALIAS_PORT_6379_TCP_ADDR="172.17.0.32"
|
||||
declare -x REDIS_ALIAS_PORT_6379_TCP_PORT="6379"
|
||||
declare -x REDIS_ALIAS_PORT_6379_TCP_PROTO="tcp"
|
||||
declare -x SHLVL="1"
|
||||
declare -x container="lxc"
|
||||
|
||||
And we can use that information to connect from another container as a client::
|
||||
|
||||
$ docker run -i -t -rm -link redis-name:redis_alias -entrypoint /bin/bash dockerfiles/redis -c '/redis-stable/src/redis-cli -h $REDIS_ALIAS_PORT_6379_TCP_ADDR -p $REDIS_ALIAS_PORT_6379_TCP_PORT'
|
||||
172.17.0.32:6379>
|
||||
|
||||
VOLUME (Shared Filesystems)
|
||||
---------------------------
|
||||
|
||||
::
|
||||
|
||||
-v=[]: Create a bind mount with: [host-dir]:[container-dir]:[rw|ro].
|
||||
If "container-dir" is missing, then docker creates a new volume.
|
||||
-volumes-from="": Mount all volumes from the given container(s)
|
||||
|
||||
The volumes commands are complex enough to have their own
|
||||
documentation in section :ref:`volume_def`. A developer can define one
|
||||
or more ``VOLUME``\s associated with an image, but only the operator can
|
||||
give access from one container to another (or from a container to a
|
||||
volume mounted on the host).
|
||||
|
||||
USER
|
||||
----
|
||||
|
||||
The default user within a container is ``root`` (id = 0), but if the
|
||||
developer created additional users, those are accessible too. The
|
||||
developer can set a default user to run the first process with the
|
||||
``Dockerfile USER`` command, but the operator can override it ::
|
||||
|
||||
-u="": Username or UID
|
||||
|
||||
WORKDIR
|
||||
-------
|
||||
|
||||
The default working directory for running binaries within a container is the root directory (``/``), but the developer can set a different default with the ``Dockerfile WORKDIR`` command. The operator can override this with::
|
||||
|
||||
-w="": Working directory inside the container
|
||||
|
|
@ -60,7 +60,7 @@ func (env *Env) GetInt64(key string) int64 {
|
|||
s := strings.Trim(env.Get(key), " \t")
|
||||
val, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
return -1
|
||||
return 0
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
@ -213,24 +213,6 @@ func (env *Env) WriteTo(dst io.Writer) (n int64, err error) {
|
|||
return 0, env.Encode(dst)
|
||||
}
|
||||
|
||||
func (env *Env) Export(dst interface{}) (err error) {
|
||||
defer func() {
|
||||
if err != nil {
|
||||
err = fmt.Errorf("ExportEnv %s", err)
|
||||
}
|
||||
}()
|
||||
var buf bytes.Buffer
|
||||
// step 1: encode/marshal the env to an intermediary json representation
|
||||
if err := env.Encode(&buf); err != nil {
|
||||
return err
|
||||
}
|
||||
// step 2: decode/unmarshal the intermediary json into the destination object
|
||||
if err := json.NewDecoder(&buf).Decode(dst); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (env *Env) Import(src interface{}) (err error) {
|
||||
defer func() {
|
||||
if err != nil {
|
||||
|
|
|
@ -62,7 +62,7 @@ func TestSetenvInt(t *testing.T) {
|
|||
if val := job.GetenvInt("bar"); val != 42 {
|
||||
t.Fatalf("GetenvInt returns incorrect value: %d", val)
|
||||
}
|
||||
if val := job.GetenvInt("nonexistent"); val != -1 {
|
||||
if val := job.GetenvInt("nonexistent"); val != 0 {
|
||||
t.Fatalf("GetenvInt returns incorrect value: %d", val)
|
||||
}
|
||||
}
|
||||
|
@ -84,32 +84,6 @@ func TestSetenvList(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestImportEnv(t *testing.T) {
|
||||
type dummy struct {
|
||||
DummyInt int
|
||||
DummyStringArray []string
|
||||
}
|
||||
|
||||
job := mkJob(t, "dummy")
|
||||
if err := job.ImportEnv(&dummy{42, []string{"foo", "bar"}}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dmy := dummy{}
|
||||
if err := job.ExportEnv(&dmy); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if dmy.DummyInt != 42 {
|
||||
t.Fatalf("Expected 42, got %d", dmy.DummyInt)
|
||||
}
|
||||
|
||||
if len(dmy.DummyStringArray) != 2 || dmy.DummyStringArray[0] != "foo" || dmy.DummyStringArray[1] != "bar" {
|
||||
t.Fatalf("Expected {foo, bar}, got %v", dmy.DummyStringArray)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestEnviron(t *testing.T) {
|
||||
job := mkJob(t, "dummy")
|
||||
job.Setenv("foo", "bar")
|
||||
|
|
|
@ -102,6 +102,10 @@ func (job *Job) String() string {
|
|||
return fmt.Sprintf("%s.%s%s", job.Eng, job.CallString(), job.StatusString())
|
||||
}
|
||||
|
||||
func (job *Job) EnvExists(key string) (value bool) {
|
||||
return job.env.Exists(key)
|
||||
}
|
||||
|
||||
func (job *Job) Getenv(key string) (value string) {
|
||||
return job.env.Get(key)
|
||||
}
|
||||
|
@ -172,10 +176,6 @@ func (job *Job) EncodeEnv(dst io.Writer) error {
|
|||
return job.env.Encode(dst)
|
||||
}
|
||||
|
||||
func (job *Job) ExportEnv(dst interface{}) (err error) {
|
||||
return job.env.Export(dst)
|
||||
}
|
||||
|
||||
func (job *Job) ImportEnv(src interface{}) (err error) {
|
||||
return job.env.Import(src)
|
||||
}
|
||||
|
|
|
@ -155,7 +155,9 @@ func (d *driver) Run(c *execdriver.Command, startCallback execdriver.StartCallba
|
|||
)
|
||||
go func() {
|
||||
if err := c.Wait(); err != nil {
|
||||
waitErr = err
|
||||
if _, ok := err.(*exec.ExitError); !ok { // Do not propagate the error if it's simply a status code != 0
|
||||
waitErr = err
|
||||
}
|
||||
}
|
||||
close(waitLock)
|
||||
}()
|
||||
|
|
217
graphdriver/btrfs/btrfs.go
Normal file
217
graphdriver/btrfs/btrfs.go
Normal file
|
@ -0,0 +1,217 @@
|
|||
// +build linux
|
||||
|
||||
package btrfs
|
||||
|
||||
/*
|
||||
#include <stdlib.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/fs.h>
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <linux/btrfs.h>
|
||||
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/graphdriver"
|
||||
"os"
|
||||
"path"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func init() {
|
||||
graphdriver.Register("btrfs", Init)
|
||||
}
|
||||
|
||||
func Init(home string) (graphdriver.Driver, error) {
|
||||
rootdir := path.Dir(home)
|
||||
|
||||
var buf syscall.Statfs_t
|
||||
if err := syscall.Statfs(rootdir, &buf); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if buf.Type != 0x9123683E {
|
||||
return nil, fmt.Errorf("%s is not a btrfs filesystem", rootdir)
|
||||
}
|
||||
|
||||
return &Driver{
|
||||
home: home,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type Driver struct {
|
||||
home string
|
||||
}
|
||||
|
||||
func (d *Driver) String() string {
|
||||
return "btrfs"
|
||||
}
|
||||
|
||||
func (d *Driver) Status() [][2]string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Cleanup() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func free(p *C.char) {
|
||||
C.free(unsafe.Pointer(p))
|
||||
}
|
||||
|
||||
func openDir(path string) (*C.DIR, error) {
|
||||
Cpath := C.CString(path)
|
||||
defer free(Cpath)
|
||||
|
||||
dir := C.opendir(Cpath)
|
||||
if dir == nil {
|
||||
return nil, fmt.Errorf("Can't open dir")
|
||||
}
|
||||
return dir, nil
|
||||
}
|
||||
|
||||
func closeDir(dir *C.DIR) {
|
||||
if dir != nil {
|
||||
C.closedir(dir)
|
||||
}
|
||||
}
|
||||
|
||||
func getDirFd(dir *C.DIR) uintptr {
|
||||
return uintptr(C.dirfd(dir))
|
||||
}
|
||||
|
||||
func subvolCreate(path, name string) error {
|
||||
dir, err := openDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer closeDir(dir)
|
||||
|
||||
var args C.struct_btrfs_ioctl_vol_args
|
||||
for i, c := range []byte(name) {
|
||||
args.name[i] = C.char(c)
|
||||
}
|
||||
|
||||
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SUBVOL_CREATE,
|
||||
uintptr(unsafe.Pointer(&args)))
|
||||
if errno != 0 {
|
||||
return fmt.Errorf("Can't create subvolume")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func subvolSnapshot(src, dest, name string) error {
|
||||
srcDir, err := openDir(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer closeDir(srcDir)
|
||||
|
||||
destDir, err := openDir(dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer closeDir(destDir)
|
||||
|
||||
var args C.struct_btrfs_ioctl_vol_args_v2
|
||||
args.fd = C.__s64(getDirFd(srcDir))
|
||||
for i, c := range []byte(name) {
|
||||
args.name[i] = C.char(c)
|
||||
}
|
||||
|
||||
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(destDir), C.BTRFS_IOC_SNAP_CREATE_V2,
|
||||
uintptr(unsafe.Pointer(&args)))
|
||||
if errno != 0 {
|
||||
return fmt.Errorf("Can't create subvolume")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func subvolDelete(path, name string) error {
|
||||
dir, err := openDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer closeDir(dir)
|
||||
|
||||
var args C.struct_btrfs_ioctl_vol_args
|
||||
for i, c := range []byte(name) {
|
||||
args.name[i] = C.char(c)
|
||||
}
|
||||
|
||||
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_SNAP_DESTROY,
|
||||
uintptr(unsafe.Pointer(&args)))
|
||||
if errno != 0 {
|
||||
return fmt.Errorf("Can't create subvolume")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) subvolumesDir() string {
|
||||
return path.Join(d.home, "subvolumes")
|
||||
}
|
||||
|
||||
func (d *Driver) subvolumesDirId(id string) string {
|
||||
return path.Join(d.subvolumesDir(), id)
|
||||
}
|
||||
|
||||
func (d *Driver) Create(id string, parent string) error {
|
||||
subvolumes := path.Join(d.home, "subvolumes")
|
||||
if err := os.MkdirAll(subvolumes, 0700); err != nil {
|
||||
return err
|
||||
}
|
||||
if parent == "" {
|
||||
if err := subvolCreate(subvolumes, id); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
parentDir, err := d.Get(parent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := subvolSnapshot(parentDir, subvolumes, id); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Driver) Remove(id string) error {
|
||||
dir := d.subvolumesDirId(id)
|
||||
if _, err := os.Stat(dir); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := subvolDelete(d.subvolumesDir(), id); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.RemoveAll(dir)
|
||||
}
|
||||
|
||||
func (d *Driver) Get(id string) (string, error) {
|
||||
dir := d.subvolumesDirId(id)
|
||||
st, err := os.Stat(dir)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if !st.IsDir() {
|
||||
return "", fmt.Errorf("%s: not a directory", dir)
|
||||
}
|
||||
|
||||
return dir, nil
|
||||
}
|
||||
|
||||
func (d *Driver) Put(id string) {
|
||||
// Get() creates no runtime resources (like e.g. mounts)
|
||||
// so this doesn't need to do anything.
|
||||
}
|
||||
|
||||
func (d *Driver) Exists(id string) bool {
|
||||
dir := d.subvolumesDirId(id)
|
||||
_, err := os.Stat(dir)
|
||||
return err == nil
|
||||
}
|
3
graphdriver/btrfs/dummy_unsupported.go
Normal file
3
graphdriver/btrfs/dummy_unsupported.go
Normal file
|
@ -0,0 +1,3 @@
|
|||
// +build !linux
|
||||
|
||||
package btrfs
|
|
@ -41,6 +41,8 @@ var (
|
|||
"aufs",
|
||||
"devicemapper",
|
||||
"vfs",
|
||||
// experimental, has to be enabled manually for now
|
||||
"btrfs",
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
@ -37,8 +37,10 @@ if command_exists docker || command_exists lxc-docker; then
|
|||
( set -x; sleep 20 )
|
||||
fi
|
||||
|
||||
user="$(id -un 2>/dev/null || true)"
|
||||
|
||||
sh_c='sh -c'
|
||||
if [ "$(whoami 2>/dev/null || true)" != 'root' ]; then
|
||||
if [ "$user" != 'root' ]; then
|
||||
if command_exists sudo; then
|
||||
sh_c='sudo sh -c'
|
||||
elif command_exists su; then
|
||||
|
@ -124,6 +126,16 @@ case "$lsb_dist" in
|
|||
$sh_c 'docker run busybox echo "Docker has been successfully installed!"'
|
||||
) || true
|
||||
fi
|
||||
your_user=your-user
|
||||
[ "$user" != 'root' ] && your_user="$user"
|
||||
echo
|
||||
echo 'If you would like to use Docker as a non-root user, you should now consider'
|
||||
echo 'adding your user to the "docker" group with something like:'
|
||||
echo
|
||||
echo ' sudo usermod -aG docker' $your_user
|
||||
echo
|
||||
echo 'Remember that you will have to log out and back in for this to take effect!'
|
||||
echo
|
||||
exit 0
|
||||
;;
|
||||
|
||||
|
|
18
hack/make.sh
18
hack/make.sh
|
@ -25,12 +25,18 @@ set -o pipefail
|
|||
|
||||
# We're a nice, sexy, little shell script, and people might try to run us;
|
||||
# but really, they shouldn't. We want to be in a container!
|
||||
RESOLVCONF=$(readlink --canonicalize /etc/resolv.conf)
|
||||
grep -q "$RESOLVCONF" /proc/mounts || {
|
||||
echo >&2 "# WARNING! I don't seem to be running in a docker container."
|
||||
echo >&2 "# The result of this command might be an incorrect build, and will not be officially supported."
|
||||
echo >&2 "# Try this: 'make all'"
|
||||
}
|
||||
if [ "$(pwd)" != '/go/src/github.com/dotcloud/docker' ] || [ -z "$DOCKER_CROSSPLATFORMS" ]; then
|
||||
{
|
||||
echo "# WARNING! I don't seem to be running in the Docker container."
|
||||
echo "# The result of this command might be an incorrect build, and will not be"
|
||||
echo "# officially supported."
|
||||
echo "#"
|
||||
echo "# Try this instead: make all"
|
||||
echo "#"
|
||||
} >&2
|
||||
fi
|
||||
|
||||
echo
|
||||
|
||||
# List of bundles to create when no argument is passed
|
||||
DEFAULT_BUNDLES=(
|
||||
|
|
|
@ -5,7 +5,7 @@ import yaml
|
|||
|
||||
from env import commit_range
|
||||
|
||||
commit_format = '-%n hash: "%h"%n author: %aN <%aE>%n message: |%n%w(0,2,2)%B'
|
||||
commit_format = '-%n hash: "%h"%n author: %aN <%aE>%n message: |%n%w(0,2,2).%B'
|
||||
|
||||
gitlog = subprocess.check_output([
|
||||
'git', 'log', '--reverse',
|
||||
|
@ -24,6 +24,11 @@ p = re.compile(r'^{0} ([^<]+) <([^<>@]+@[^<>]+)> \(github: (\S+)\)$'.format(re.e
|
|||
failed_commits = 0
|
||||
|
||||
for commit in commits:
|
||||
commit['message'] = commit['message'][1:]
|
||||
# trim off our '.' that exists just to prevent fun YAML parsing issues
|
||||
# see https://github.com/dotcloud/docker/pull/3836#issuecomment-33723094
|
||||
# and https://travis-ci.org/dotcloud/docker/builds/17926783
|
||||
|
||||
commit['stat'] = subprocess.check_output([
|
||||
'git', 'log', '--format=format:', '--max-count=1',
|
||||
'--name-status', commit['hash'], '--',
|
||||
|
|
|
@ -114,6 +114,30 @@ func TestCreateRm(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
func TestCreateNumberHostname(t *testing.T) {
|
||||
eng := NewTestEngine(t)
|
||||
defer mkRuntimeFromEngine(eng, t).Nuke()
|
||||
|
||||
config, _, _, err := docker.ParseRun([]string{"-h", "web.0", unitTestImageID, "echo test"}, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
createTestContainer(eng, config, t)
|
||||
}
|
||||
|
||||
func TestCreateNumberUsername(t *testing.T) {
|
||||
eng := NewTestEngine(t)
|
||||
defer mkRuntimeFromEngine(eng, t).Nuke()
|
||||
|
||||
config, _, _, err := docker.ParseRun([]string{"-u", "1002", unitTestImageID, "echo test"}, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
createTestContainer(eng, config, t)
|
||||
}
|
||||
|
||||
func TestCreateRmVolumes(t *testing.T) {
|
||||
eng := NewTestEngine(t)
|
||||
defer mkRuntimeFromEngine(eng, t).Nuke()
|
||||
|
|
170
network.go
170
network.go
|
@ -5,9 +5,9 @@ import (
|
|||
"github.com/dotcloud/docker/networkdriver"
|
||||
"github.com/dotcloud/docker/networkdriver/ipallocator"
|
||||
"github.com/dotcloud/docker/networkdriver/portallocator"
|
||||
"github.com/dotcloud/docker/networkdriver/portmapper"
|
||||
"github.com/dotcloud/docker/pkg/iptables"
|
||||
"github.com/dotcloud/docker/pkg/netlink"
|
||||
"github.com/dotcloud/docker/proxy"
|
||||
"github.com/dotcloud/docker/utils"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
|
@ -159,129 +159,6 @@ func getIfaceAddr(name string) (net.Addr, error) {
|
|||
return addrs4[0], nil
|
||||
}
|
||||
|
||||
// Port mapper takes care of mapping external ports to containers by setting
|
||||
// up iptables rules.
|
||||
// It keeps track of all mappings and is able to unmap at will
|
||||
type PortMapper struct {
|
||||
tcpMapping map[string]*net.TCPAddr
|
||||
tcpProxies map[string]proxy.Proxy
|
||||
udpMapping map[string]*net.UDPAddr
|
||||
udpProxies map[string]proxy.Proxy
|
||||
|
||||
iptables *iptables.Chain
|
||||
defaultIp net.IP
|
||||
proxyFactoryFunc func(net.Addr, net.Addr) (proxy.Proxy, error)
|
||||
}
|
||||
|
||||
func (mapper *PortMapper) Map(ip net.IP, port int, backendAddr net.Addr) error {
|
||||
|
||||
if _, isTCP := backendAddr.(*net.TCPAddr); isTCP {
|
||||
mapKey := (&net.TCPAddr{Port: port, IP: ip}).String()
|
||||
if _, exists := mapper.tcpProxies[mapKey]; exists {
|
||||
return fmt.Errorf("TCP Port %s is already in use", mapKey)
|
||||
}
|
||||
backendPort := backendAddr.(*net.TCPAddr).Port
|
||||
backendIP := backendAddr.(*net.TCPAddr).IP
|
||||
if mapper.iptables != nil {
|
||||
if err := mapper.iptables.Forward(iptables.Add, ip, port, "tcp", backendIP.String(), backendPort); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
mapper.tcpMapping[mapKey] = backendAddr.(*net.TCPAddr)
|
||||
proxy, err := mapper.proxyFactoryFunc(&net.TCPAddr{IP: ip, Port: port}, backendAddr)
|
||||
if err != nil {
|
||||
mapper.Unmap(ip, port, "tcp")
|
||||
return err
|
||||
}
|
||||
mapper.tcpProxies[mapKey] = proxy
|
||||
go proxy.Run()
|
||||
} else {
|
||||
mapKey := (&net.UDPAddr{Port: port, IP: ip}).String()
|
||||
if _, exists := mapper.udpProxies[mapKey]; exists {
|
||||
return fmt.Errorf("UDP: Port %s is already in use", mapKey)
|
||||
}
|
||||
backendPort := backendAddr.(*net.UDPAddr).Port
|
||||
backendIP := backendAddr.(*net.UDPAddr).IP
|
||||
if mapper.iptables != nil {
|
||||
if err := mapper.iptables.Forward(iptables.Add, ip, port, "udp", backendIP.String(), backendPort); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
mapper.udpMapping[mapKey] = backendAddr.(*net.UDPAddr)
|
||||
proxy, err := mapper.proxyFactoryFunc(&net.UDPAddr{IP: ip, Port: port}, backendAddr)
|
||||
if err != nil {
|
||||
mapper.Unmap(ip, port, "udp")
|
||||
return err
|
||||
}
|
||||
mapper.udpProxies[mapKey] = proxy
|
||||
go proxy.Run()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mapper *PortMapper) Unmap(ip net.IP, port int, proto string) error {
|
||||
if proto == "tcp" {
|
||||
mapKey := (&net.TCPAddr{Port: port, IP: ip}).String()
|
||||
backendAddr, ok := mapper.tcpMapping[mapKey]
|
||||
if !ok {
|
||||
return fmt.Errorf("Port tcp/%s is not mapped", mapKey)
|
||||
}
|
||||
if proxy, exists := mapper.tcpProxies[mapKey]; exists {
|
||||
proxy.Close()
|
||||
delete(mapper.tcpProxies, mapKey)
|
||||
}
|
||||
if mapper.iptables != nil {
|
||||
if err := mapper.iptables.Forward(iptables.Delete, ip, port, proto, backendAddr.IP.String(), backendAddr.Port); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
delete(mapper.tcpMapping, mapKey)
|
||||
} else {
|
||||
mapKey := (&net.UDPAddr{Port: port, IP: ip}).String()
|
||||
backendAddr, ok := mapper.udpMapping[mapKey]
|
||||
if !ok {
|
||||
return fmt.Errorf("Port udp/%s is not mapped", mapKey)
|
||||
}
|
||||
if proxy, exists := mapper.udpProxies[mapKey]; exists {
|
||||
proxy.Close()
|
||||
delete(mapper.udpProxies, mapKey)
|
||||
}
|
||||
if mapper.iptables != nil {
|
||||
if err := mapper.iptables.Forward(iptables.Delete, ip, port, proto, backendAddr.IP.String(), backendAddr.Port); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
delete(mapper.udpMapping, mapKey)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func newPortMapper(config *DaemonConfig) (*PortMapper, error) {
|
||||
// We can always try removing the iptables
|
||||
if err := iptables.RemoveExistingChain("DOCKER"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var chain *iptables.Chain
|
||||
if config.EnableIptables {
|
||||
var err error
|
||||
chain, err = iptables.NewChain("DOCKER", config.BridgeIface)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to create DOCKER chain: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
mapper := &PortMapper{
|
||||
tcpMapping: make(map[string]*net.TCPAddr),
|
||||
tcpProxies: make(map[string]proxy.Proxy),
|
||||
udpMapping: make(map[string]*net.UDPAddr),
|
||||
udpProxies: make(map[string]proxy.Proxy),
|
||||
iptables: chain,
|
||||
defaultIp: config.DefaultIp,
|
||||
proxyFactoryFunc: proxy.NewProxy,
|
||||
}
|
||||
return mapper, nil
|
||||
}
|
||||
|
||||
// Network interface represents the networking stack of a container
|
||||
type NetworkInterface struct {
|
||||
IPNet net.IPNet
|
||||
|
@ -299,7 +176,7 @@ func (iface *NetworkInterface) AllocatePort(port Port, binding PortBinding) (*Na
|
|||
return nil, fmt.Errorf("Trying to allocate port for interface %v, which is disabled", iface) // FIXME
|
||||
}
|
||||
|
||||
ip := iface.manager.portMapper.defaultIp
|
||||
ip := iface.manager.defaultBindingIP
|
||||
|
||||
if binding.HostIp != "" {
|
||||
ip = net.ParseIP(binding.HostIp)
|
||||
|
@ -331,7 +208,7 @@ func (iface *NetworkInterface) AllocatePort(port Port, binding PortBinding) (*Na
|
|||
backend = &net.UDPAddr{IP: iface.IPNet.IP, Port: containerPort}
|
||||
}
|
||||
|
||||
if err := iface.manager.portMapper.Map(ip, extPort, backend); err != nil {
|
||||
if err := portmapper.Map(backend, ip, extPort); err != nil {
|
||||
portallocator.ReleasePort(ip, nat.Port.Proto(), extPort)
|
||||
return nil, err
|
||||
}
|
||||
|
@ -365,7 +242,15 @@ func (iface *NetworkInterface) Release() {
|
|||
}
|
||||
ip := net.ParseIP(nat.Binding.HostIp)
|
||||
utils.Debugf("Unmaping %s/%s:%s", nat.Port.Proto, ip.String(), nat.Binding.HostPort)
|
||||
if err := iface.manager.portMapper.Unmap(ip, hostPort, nat.Port.Proto()); err != nil {
|
||||
|
||||
var host net.Addr
|
||||
if nat.Port.Proto() == "tcp" {
|
||||
host = &net.TCPAddr{IP: ip, Port: hostPort}
|
||||
} else {
|
||||
host = &net.UDPAddr{IP: ip, Port: hostPort}
|
||||
}
|
||||
|
||||
if err := portmapper.Unmap(host); err != nil {
|
||||
log.Printf("Unable to unmap port %s: %s", nat, err)
|
||||
}
|
||||
|
||||
|
@ -382,12 +267,10 @@ func (iface *NetworkInterface) Release() {
|
|||
// Network Manager manages a set of network interfaces
|
||||
// Only *one* manager per host machine should be used
|
||||
type NetworkManager struct {
|
||||
bridgeIface string
|
||||
bridgeNetwork *net.IPNet
|
||||
|
||||
portMapper *PortMapper
|
||||
|
||||
disabled bool
|
||||
bridgeIface string
|
||||
bridgeNetwork *net.IPNet
|
||||
defaultBindingIP net.IP
|
||||
disabled bool
|
||||
}
|
||||
|
||||
// Allocate a network interface
|
||||
|
@ -444,7 +327,7 @@ func newNetworkManager(config *DaemonConfig) (*NetworkManager, error) {
|
|||
natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-d", addr.String(), "-j", "MASQUERADE"}
|
||||
|
||||
if !iptables.Exists(natArgs...) {
|
||||
if output, err := iptables.Raw(append([]string{"-A"}, natArgs...)...); err != nil {
|
||||
if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil {
|
||||
return nil, fmt.Errorf("Unable to enable network bridge NAT: %s", err)
|
||||
} else if len(output) != 0 {
|
||||
return nil, fmt.Errorf("Error iptables postrouting: %s", output)
|
||||
|
@ -508,16 +391,23 @@ func newNetworkManager(config *DaemonConfig) (*NetworkManager, error) {
|
|||
}
|
||||
}
|
||||
|
||||
portMapper, err := newPortMapper(config)
|
||||
if err != nil {
|
||||
// We can always try removing the iptables
|
||||
if err := iptables.RemoveExistingChain("DOCKER"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
manager := &NetworkManager{
|
||||
bridgeIface: config.BridgeIface,
|
||||
bridgeNetwork: network,
|
||||
portMapper: portMapper,
|
||||
if config.EnableIptables {
|
||||
chain, err := iptables.NewChain("DOCKER", config.BridgeIface)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
portmapper.SetIptablesChain(chain)
|
||||
}
|
||||
|
||||
manager := &NetworkManager{
|
||||
bridgeIface: config.BridgeIface,
|
||||
bridgeNetwork: network,
|
||||
defaultBindingIP: config.DefaultIp,
|
||||
}
|
||||
return manager, nil
|
||||
}
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
package docker
|
||||
|
||||
import (
|
||||
"github.com/dotcloud/docker/pkg/iptables"
|
||||
"github.com/dotcloud/docker/proxy"
|
||||
"net"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type StubProxy struct {
|
||||
frontendAddr *net.Addr
|
||||
backendAddr *net.Addr
|
||||
}
|
||||
|
||||
func (proxy *StubProxy) Run() {}
|
||||
func (proxy *StubProxy) Close() {}
|
||||
func (proxy *StubProxy) FrontendAddr() net.Addr { return *proxy.frontendAddr }
|
||||
func (proxy *StubProxy) BackendAddr() net.Addr { return *proxy.backendAddr }
|
||||
|
||||
func NewStubProxy(frontendAddr, backendAddr net.Addr) (proxy.Proxy, error) {
|
||||
return &StubProxy{
|
||||
frontendAddr: &frontendAddr,
|
||||
backendAddr: &backendAddr,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func TestPortMapper(t *testing.T) {
|
||||
// FIXME: is this iptables chain still used anywhere?
|
||||
var chain *iptables.Chain
|
||||
mapper := &PortMapper{
|
||||
tcpMapping: make(map[string]*net.TCPAddr),
|
||||
tcpProxies: make(map[string]proxy.Proxy),
|
||||
udpMapping: make(map[string]*net.UDPAddr),
|
||||
udpProxies: make(map[string]proxy.Proxy),
|
||||
iptables: chain,
|
||||
defaultIp: net.IP("0.0.0.0"),
|
||||
proxyFactoryFunc: NewStubProxy,
|
||||
}
|
||||
|
||||
dstIp1 := net.ParseIP("192.168.0.1")
|
||||
dstIp2 := net.ParseIP("192.168.0.2")
|
||||
srcAddr1 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")}
|
||||
srcAddr2 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")}
|
||||
|
||||
if err := mapper.Map(dstIp1, 80, srcAddr1); err != nil {
|
||||
t.Fatalf("Failed to allocate port: %s", err)
|
||||
}
|
||||
|
||||
if mapper.Map(dstIp1, 80, srcAddr1) == nil {
|
||||
t.Fatalf("Port is in use - mapping should have failed")
|
||||
}
|
||||
|
||||
if mapper.Map(dstIp1, 80, srcAddr2) == nil {
|
||||
t.Fatalf("Port is in use - mapping should have failed")
|
||||
}
|
||||
|
||||
if err := mapper.Map(dstIp2, 80, srcAddr2); err != nil {
|
||||
t.Fatalf("Failed to allocate port: %s", err)
|
||||
}
|
||||
|
||||
if mapper.Unmap(dstIp1, 80, "tcp") != nil {
|
||||
t.Fatalf("Failed to release port")
|
||||
}
|
||||
|
||||
if mapper.Unmap(dstIp2, 80, "tcp") != nil {
|
||||
t.Fatalf("Failed to release port")
|
||||
}
|
||||
|
||||
if mapper.Unmap(dstIp2, 80, "tcp") == nil {
|
||||
t.Fatalf("Port already released, but no error reported")
|
||||
}
|
||||
}
|
|
@ -99,12 +99,17 @@ func getNextIp(address *net.IPNet) (*net.IP, error) {
|
|||
return ip, nil
|
||||
}
|
||||
|
||||
var (
|
||||
firstNetIP = address.IP.To4().Mask(address.Mask)
|
||||
firstAsInt = ipToInt(&firstNetIP) + 1
|
||||
)
|
||||
|
||||
pos = int32(allocated.PullBack())
|
||||
for i := int32(0); i < max; i++ {
|
||||
pos = pos%max + 1
|
||||
next := int32(base + pos)
|
||||
|
||||
if next == ownIP {
|
||||
if next == ownIP || next == firstAsInt {
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
|
@ -213,6 +213,27 @@ func TestIPAllocator(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestAllocateFirstIP(t *testing.T) {
|
||||
defer reset()
|
||||
network := &net.IPNet{
|
||||
IP: []byte{192, 168, 0, 0},
|
||||
Mask: []byte{255, 255, 255, 0},
|
||||
}
|
||||
|
||||
firstIP := network.IP.To4().Mask(network.Mask)
|
||||
first := ipToInt(&firstIP) + 1
|
||||
|
||||
ip, err := RequestIP(network, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
allocated := ipToInt(ip)
|
||||
|
||||
if allocated == first {
|
||||
t.Fatalf("allocated ip should not equal first ip: %d == %d", first, allocated)
|
||||
}
|
||||
}
|
||||
|
||||
func assertIPEquals(t *testing.T, ip1, ip2 *net.IP) {
|
||||
if !ip1.Equal(*ip2) {
|
||||
t.Fatalf("Expected IP %s, got %s", ip1, ip2)
|
||||
|
|
131
networkdriver/portmapper/mapper.go
Normal file
131
networkdriver/portmapper/mapper.go
Normal file
|
@ -0,0 +1,131 @@
|
|||
package portmapper
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/dotcloud/docker/pkg/iptables"
|
||||
"github.com/dotcloud/docker/proxy"
|
||||
"net"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type mapping struct {
|
||||
proto string
|
||||
userlandProxy proxy.Proxy
|
||||
host net.Addr
|
||||
container net.Addr
|
||||
}
|
||||
|
||||
var (
|
||||
chain *iptables.Chain
|
||||
lock sync.Mutex
|
||||
|
||||
// udp:ip:port
|
||||
currentMappings = make(map[string]*mapping)
|
||||
newProxy = proxy.NewProxy
|
||||
)
|
||||
|
||||
var (
|
||||
ErrUnknownBackendAddressType = errors.New("unknown container address type not supported")
|
||||
ErrPortMappedForIP = errors.New("port is already mapped to ip")
|
||||
ErrPortNotMapped = errors.New("port is not mapped")
|
||||
)
|
||||
|
||||
func SetIptablesChain(c *iptables.Chain) {
|
||||
chain = c
|
||||
}
|
||||
|
||||
func Map(container net.Addr, hostIP net.IP, hostPort int) error {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
|
||||
var m *mapping
|
||||
switch container.(type) {
|
||||
case *net.TCPAddr:
|
||||
m = &mapping{
|
||||
proto: "tcp",
|
||||
host: &net.TCPAddr{IP: hostIP, Port: hostPort},
|
||||
container: container,
|
||||
}
|
||||
case *net.UDPAddr:
|
||||
m = &mapping{
|
||||
proto: "udp",
|
||||
host: &net.UDPAddr{IP: hostIP, Port: hostPort},
|
||||
container: container,
|
||||
}
|
||||
default:
|
||||
return ErrUnknownBackendAddressType
|
||||
}
|
||||
|
||||
key := getKey(m.host)
|
||||
if _, exists := currentMappings[key]; exists {
|
||||
return ErrPortMappedForIP
|
||||
}
|
||||
|
||||
containerIP, containerPort := getIPAndPort(m.container)
|
||||
if err := forward(iptables.Add, m.proto, hostIP, hostPort, containerIP.String(), containerPort); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p, err := newProxy(m.host, m.container)
|
||||
if err != nil {
|
||||
// need to undo the iptables rules before we reutrn
|
||||
forward(iptables.Delete, m.proto, hostIP, hostPort, containerIP.String(), containerPort)
|
||||
return err
|
||||
}
|
||||
|
||||
m.userlandProxy = p
|
||||
currentMappings[key] = m
|
||||
|
||||
go p.Run()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Unmap(host net.Addr) error {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
|
||||
key := getKey(host)
|
||||
data, exists := currentMappings[key]
|
||||
if !exists {
|
||||
return ErrPortNotMapped
|
||||
}
|
||||
|
||||
data.userlandProxy.Close()
|
||||
delete(currentMappings, key)
|
||||
|
||||
containerIP, containerPort := getIPAndPort(data.container)
|
||||
hostIP, hostPort := getIPAndPort(data.host)
|
||||
if err := forward(iptables.Delete, data.proto, hostIP, hostPort, containerIP.String(), containerPort); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getKey(a net.Addr) string {
|
||||
switch t := a.(type) {
|
||||
case *net.TCPAddr:
|
||||
return fmt.Sprintf("%s:%d/%s", t.IP.String(), t.Port, "tcp")
|
||||
case *net.UDPAddr:
|
||||
return fmt.Sprintf("%s:%d/%s", t.IP.String(), t.Port, "udp")
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getIPAndPort(a net.Addr) (net.IP, int) {
|
||||
switch t := a.(type) {
|
||||
case *net.TCPAddr:
|
||||
return t.IP, t.Port
|
||||
case *net.UDPAddr:
|
||||
return t.IP, t.Port
|
||||
}
|
||||
return nil, 0
|
||||
}
|
||||
|
||||
func forward(action iptables.Action, proto string, sourceIP net.IP, sourcePort int, containerIP string, containerPort int) error {
|
||||
if chain == nil {
|
||||
return nil
|
||||
}
|
||||
return chain.Forward(action, sourceIP, sourcePort, proto, containerIP, containerPort)
|
||||
}
|
107
networkdriver/portmapper/mapper_test.go
Normal file
107
networkdriver/portmapper/mapper_test.go
Normal file
|
@ -0,0 +1,107 @@
|
|||
package portmapper
|
||||
|
||||
import (
|
||||
"github.com/dotcloud/docker/pkg/iptables"
|
||||
"github.com/dotcloud/docker/proxy"
|
||||
"net"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// override this func to mock out the proxy server
|
||||
newProxy = proxy.NewStubProxy
|
||||
}
|
||||
|
||||
func reset() {
|
||||
chain = nil
|
||||
currentMappings = make(map[string]*mapping)
|
||||
}
|
||||
|
||||
func TestSetIptablesChain(t *testing.T) {
|
||||
defer reset()
|
||||
|
||||
c := &iptables.Chain{
|
||||
Name: "TEST",
|
||||
Bridge: "192.168.1.1",
|
||||
}
|
||||
|
||||
if chain != nil {
|
||||
t.Fatal("chain should be nil at init")
|
||||
}
|
||||
|
||||
SetIptablesChain(c)
|
||||
if chain == nil {
|
||||
t.Fatal("chain should not be nil after set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMapPorts(t *testing.T) {
|
||||
dstIp1 := net.ParseIP("192.168.0.1")
|
||||
dstIp2 := net.ParseIP("192.168.0.2")
|
||||
dstAddr1 := &net.TCPAddr{IP: dstIp1, Port: 80}
|
||||
dstAddr2 := &net.TCPAddr{IP: dstIp2, Port: 80}
|
||||
|
||||
srcAddr1 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")}
|
||||
srcAddr2 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")}
|
||||
|
||||
if err := Map(srcAddr1, dstIp1, 80); err != nil {
|
||||
t.Fatalf("Failed to allocate port: %s", err)
|
||||
}
|
||||
|
||||
if Map(srcAddr1, dstIp1, 80) == nil {
|
||||
t.Fatalf("Port is in use - mapping should have failed")
|
||||
}
|
||||
|
||||
if Map(srcAddr2, dstIp1, 80) == nil {
|
||||
t.Fatalf("Port is in use - mapping should have failed")
|
||||
}
|
||||
|
||||
if err := Map(srcAddr2, dstIp2, 80); err != nil {
|
||||
t.Fatalf("Failed to allocate port: %s", err)
|
||||
}
|
||||
|
||||
if Unmap(dstAddr1) != nil {
|
||||
t.Fatalf("Failed to release port")
|
||||
}
|
||||
|
||||
if Unmap(dstAddr2) != nil {
|
||||
t.Fatalf("Failed to release port")
|
||||
}
|
||||
|
||||
if Unmap(dstAddr2) == nil {
|
||||
t.Fatalf("Port already released, but no error reported")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetUDPKey(t *testing.T) {
|
||||
addr := &net.UDPAddr{IP: net.ParseIP("192.168.1.5"), Port: 53}
|
||||
|
||||
key := getKey(addr)
|
||||
|
||||
if expected := "192.168.1.5:53/udp"; key != expected {
|
||||
t.Fatalf("expected key %s got %s", expected, key)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetTCPKey(t *testing.T) {
|
||||
addr := &net.TCPAddr{IP: net.ParseIP("192.168.1.5"), Port: 80}
|
||||
|
||||
key := getKey(addr)
|
||||
|
||||
if expected := "192.168.1.5:80/tcp"; key != expected {
|
||||
t.Fatalf("expected key %s got %s", expected, key)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetUDPIPAndPort(t *testing.T) {
|
||||
addr := &net.UDPAddr{IP: net.ParseIP("192.168.1.5"), Port: 53}
|
||||
|
||||
ip, port := getIPAndPort(addr)
|
||||
if expected := "192.168.1.5"; ip.String() != expected {
|
||||
t.Fatalf("expected ip %s got %s", expected, ip)
|
||||
}
|
||||
|
||||
if ep := 53; port != ep {
|
||||
t.Fatalf("expected port %d got %d", ep, port)
|
||||
}
|
||||
}
|
|
@ -73,6 +73,23 @@ func (c *Chain) Forward(action Action, ip net.IP, port int, proto, dest_addr str
|
|||
} else if len(output) != 0 {
|
||||
return fmt.Errorf("Error iptables forward: %s", output)
|
||||
}
|
||||
|
||||
fAction := action
|
||||
if fAction == Add {
|
||||
fAction = "-I"
|
||||
}
|
||||
if output, err := Raw(string(fAction), "FORWARD",
|
||||
"!", "-i", c.Bridge,
|
||||
"-o", c.Bridge,
|
||||
"-p", proto,
|
||||
"-d", daddr,
|
||||
"--dport", strconv.Itoa(port),
|
||||
"-j", "ACCEPT"); err != nil {
|
||||
return err
|
||||
} else if len(output) != 0 {
|
||||
return fmt.Errorf("Error iptables forward: %s", output)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
22
proxy/stub_proxy.go
Normal file
22
proxy/stub_proxy.go
Normal file
|
@ -0,0 +1,22 @@
|
|||
package proxy
|
||||
|
||||
import (
|
||||
"net"
|
||||
)
|
||||
|
||||
type StubProxy struct {
|
||||
frontendAddr net.Addr
|
||||
backendAddr net.Addr
|
||||
}
|
||||
|
||||
func (p *StubProxy) Run() {}
|
||||
func (p *StubProxy) Close() {}
|
||||
func (p *StubProxy) FrontendAddr() net.Addr { return p.frontendAddr }
|
||||
func (p *StubProxy) BackendAddr() net.Addr { return p.backendAddr }
|
||||
|
||||
func NewStubProxy(frontendAddr, backendAddr net.Addr) (Proxy, error) {
|
||||
return &StubProxy{
|
||||
frontendAddr: frontendAddr,
|
||||
backendAddr: backendAddr,
|
||||
}, nil
|
||||
}
|
|
@ -9,6 +9,7 @@ import (
|
|||
"github.com/dotcloud/docker/execdriver/lxc"
|
||||
"github.com/dotcloud/docker/graphdriver"
|
||||
"github.com/dotcloud/docker/graphdriver/aufs"
|
||||
_ "github.com/dotcloud/docker/graphdriver/btrfs"
|
||||
_ "github.com/dotcloud/docker/graphdriver/devmapper"
|
||||
_ "github.com/dotcloud/docker/graphdriver/vfs"
|
||||
"github.com/dotcloud/docker/networkdriver/portallocator"
|
||||
|
|
43
server.go
43
server.go
|
@ -43,8 +43,7 @@ func init() {
|
|||
// The signals SIGINT, SIGQUIT and SIGTERM are intercepted for cleanup.
|
||||
func jobInitApi(job *engine.Job) engine.Status {
|
||||
job.Logf("Creating server")
|
||||
// FIXME: ImportEnv deprecates ConfigFromJob
|
||||
srv, err := NewServer(job.Eng, ConfigFromJob(job))
|
||||
srv, err := NewServer(job.Eng, DaemonConfigFromJob(job))
|
||||
if err != nil {
|
||||
return job.Error(err)
|
||||
}
|
||||
|
@ -1012,7 +1011,7 @@ func (srv *Server) Containers(job *engine.Job) engine.Status {
|
|||
}, -1)
|
||||
|
||||
for _, container := range srv.runtime.List() {
|
||||
if !container.State.IsRunning() && !all && n == -1 && since == "" && before == "" {
|
||||
if !container.State.IsRunning() && !all && n <= 0 && since == "" && before == "" {
|
||||
continue
|
||||
}
|
||||
if before != "" && !foundBefore {
|
||||
|
@ -1021,7 +1020,7 @@ func (srv *Server) Containers(job *engine.Job) engine.Status {
|
|||
}
|
||||
continue
|
||||
}
|
||||
if displayed == n {
|
||||
if n > 0 && displayed == n {
|
||||
break
|
||||
}
|
||||
if container.ID == since || utils.TruncateID(container.ID) == since {
|
||||
|
@ -1644,10 +1643,7 @@ func (srv *Server) ContainerCreate(job *engine.Job) engine.Status {
|
|||
} else if len(job.Args) > 1 {
|
||||
return job.Errorf("Usage: %s", job.Name)
|
||||
}
|
||||
var config Config
|
||||
if err := job.ExportEnv(&config); err != nil {
|
||||
return job.Error(err)
|
||||
}
|
||||
config := ContainerConfigFromJob(job)
|
||||
if config.Memory != 0 && config.Memory < 524288 {
|
||||
return job.Errorf("Minimum memory limit allowed is 512k")
|
||||
}
|
||||
|
@ -1668,7 +1664,7 @@ func (srv *Server) ContainerCreate(job *engine.Job) engine.Status {
|
|||
config.Dns = defaultDns
|
||||
}
|
||||
|
||||
container, buildWarnings, err := srv.runtime.Create(&config, name)
|
||||
container, buildWarnings, err := srv.runtime.Create(config, name)
|
||||
if err != nil {
|
||||
if srv.runtime.graph.IsNotExist(err) {
|
||||
_, tag := utils.ParseRepositoryTag(config.Image)
|
||||
|
@ -1699,10 +1695,12 @@ func (srv *Server) ContainerRestart(job *engine.Job) engine.Status {
|
|||
if len(job.Args) != 1 {
|
||||
return job.Errorf("Usage: %s CONTAINER\n", job.Name)
|
||||
}
|
||||
name := job.Args[0]
|
||||
t := job.GetenvInt("t")
|
||||
if t == -1 {
|
||||
t = 10
|
||||
var (
|
||||
name = job.Args[0]
|
||||
t = 10
|
||||
)
|
||||
if job.EnvExists("t") {
|
||||
t = job.GetenvInt("t")
|
||||
}
|
||||
if container := srv.runtime.Get(name); container != nil {
|
||||
if err := container.Restart(int(t)); err != nil {
|
||||
|
@ -2073,10 +2071,7 @@ func (srv *Server) ContainerStart(job *engine.Job) engine.Status {
|
|||
}
|
||||
// If no environment was set, then no hostconfig was passed.
|
||||
if len(job.Environ()) > 0 {
|
||||
var hostConfig HostConfig
|
||||
if err := job.ExportEnv(&hostConfig); err != nil {
|
||||
return job.Error(err)
|
||||
}
|
||||
hostConfig := ContainerHostConfigFromJob(job)
|
||||
// Validate the HostConfig binds. Make sure that:
|
||||
// 1) the source of a bind mount isn't /
|
||||
// The bind mount "/:/foo" isn't allowed.
|
||||
|
@ -2101,10 +2096,10 @@ func (srv *Server) ContainerStart(job *engine.Job) engine.Status {
|
|||
}
|
||||
}
|
||||
// Register any links from the host config before starting the container
|
||||
if err := srv.RegisterLinks(container, &hostConfig); err != nil {
|
||||
if err := srv.RegisterLinks(container, hostConfig); err != nil {
|
||||
return job.Error(err)
|
||||
}
|
||||
container.hostConfig = &hostConfig
|
||||
container.hostConfig = hostConfig
|
||||
container.ToDisk()
|
||||
}
|
||||
if err := container.Start(); err != nil {
|
||||
|
@ -2119,10 +2114,12 @@ func (srv *Server) ContainerStop(job *engine.Job) engine.Status {
|
|||
if len(job.Args) != 1 {
|
||||
return job.Errorf("Usage: %s CONTAINER\n", job.Name)
|
||||
}
|
||||
name := job.Args[0]
|
||||
t := job.GetenvInt("t")
|
||||
if t == -1 {
|
||||
t = 10
|
||||
var (
|
||||
name = job.Args[0]
|
||||
t = 10
|
||||
)
|
||||
if job.EnvExists("t") {
|
||||
t = job.GetenvInt("t")
|
||||
}
|
||||
if container := srv.runtime.Get(name); container != nil {
|
||||
if err := container.Stop(int(t)); err != nil {
|
||||
|
|
Loading…
Reference in a new issue