1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

read seccomp profile locally then pass to daemon

Signed-off-by: Jessica Frazelle <acidburn@docker.com>
This commit is contained in:
Jessica Frazelle 2016-01-04 09:59:26 -08:00
parent c1582f20cc
commit 062d0b3921
No known key found for this signature in database
GPG key ID: 18F3685C0022BFF3
24 changed files with 37 additions and 1942 deletions

View file

@ -5,32 +5,26 @@ package native
import (
"encoding/json"
"fmt"
"io/ioutil"
"github.com/docker/engine-api/types"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/specs"
)
func getDefaultSeccompProfile() *configs.Seccomp {
return defaultSeccompProfile
}
func loadSeccompProfile(path string) (*configs.Seccomp, error) {
f, err := ioutil.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("Opening seccomp profile failed: %v", err)
}
var config specs.Seccomp
if err := json.Unmarshal(f, &config); err != nil {
func loadSeccompProfile(body string) (*configs.Seccomp, error) {
var config types.Seccomp
if err := json.Unmarshal([]byte(body), &config); err != nil {
return nil, fmt.Errorf("Decoding seccomp profile failed: %v", err)
}
return setupSeccomp(&config)
}
func setupSeccomp(config *specs.Seccomp) (newConfig *configs.Seccomp, err error) {
func setupSeccomp(config *types.Seccomp) (newConfig *configs.Seccomp, err error) {
if config == nil {
return nil, nil
}

View file

@ -57,7 +57,6 @@ clone git github.com/jfrazelle/go v1.5.1-1
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
clone git github.com/opencontainers/runc d97d5e8b007e4657316eed76ea30bc0f690230cf # libcontainer
clone git github.com/opencontainers/specs 46d949ea81080c5f60dfb72ee91468b1e9fb2998 # specs
clone git github.com/seccomp/libseccomp-golang 1b506fc7c24eec5a3693cdcbed40d9c226cfc6a1
# libcontainer deps (see src/github.com/opencontainers/runc/Godeps/Godeps.json)
clone git github.com/coreos/go-systemd v4

View file

@ -1,7 +1,10 @@
package opts
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"path"
"strconv"
"strings"
@ -320,6 +323,11 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
return nil, nil, nil, cmd, err
}
securityOpts, err := parseSecurityOpts(flSecurityOpt.GetAll())
if err != nil {
return nil, nil, nil, cmd, err
}
resources := container.Resources{
CgroupParent: *flCgroupParent,
Memory: flMemory,
@ -394,7 +402,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
CapDrop: strslice.New(flCapDrop.GetAll()...),
GroupAdd: flGroupAdd.GetAll(),
RestartPolicy: restartPolicy,
SecurityOpt: flSecurityOpt.GetAll(),
SecurityOpt: securityOpts,
ReadonlyRootfs: *flReadonlyRootfs,
LogConfig: container.LogConfig{Type: *flLoggingDriver, Config: loggingOpts},
VolumeDriver: *flVolumeDriver,
@ -464,6 +472,29 @@ func parseLoggingOpts(loggingDriver string, loggingOpts []string) (map[string]st
return loggingOptsMap, nil
}
// takes a local seccomp daemon, reads the file contents for sending to the daemon
func parseSecurityOpts(securityOpts []string) ([]string, error) {
for key, opt := range securityOpts {
con := strings.SplitN(opt, ":", 2)
if len(con) == 1 {
return securityOpts, fmt.Errorf("Invalid --security-opt: %q", opt)
}
if con[0] == "seccomp" && con[1] != "unconfined" {
f, err := ioutil.ReadFile(con[1])
if err != nil {
return securityOpts, fmt.Errorf("Opening seccomp profile (%s) failed: %v", con[1], err)
}
b := bytes.NewBuffer(nil)
if err := json.Compact(b, f); err != nil {
return securityOpts, fmt.Errorf("Compacting json for seccomp profile (%s) failed: %v", con[1], err)
}
securityOpts[key] = fmt.Sprintf("seccomp:%s", b.Bytes())
}
}
return securityOpts, nil
}
// ParseRestartPolicy returns the parsed policy or an error indicating what is incorrect
func ParseRestartPolicy(policy string) (container.RestartPolicy, error) {
p := container.RestartPolicy{}

View file

@ -1,20 +0,0 @@
language: go
go:
- 1.5.1
- 1.4.3
- 1.3.3
sudo: false
before_install:
- go get golang.org/x/tools/cmd/vet
- go get github.com/golang/lint/golint
- go get github.com/vbatts/git-validation
install: true
script:
- go vet -x ./...
- $HOME/gopath/bin/golint ./...
- $HOME/gopath/bin/git-validation -run DCO,short-subject -v -range ${TRAVIS_COMMIT_RANGE}

View file

@ -1,191 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2015 The Linux Foundation.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -1,8 +0,0 @@
Michael Crosby <michael@docker.com> (@crosbymichael)
Alexander Morozov <lk4d4@docker.com> (@LK4D4)
Vishnu Kannan <vishnuk@google.com> (@vishnuk)
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
Vincent Batts <vbatts@redhat.com> (@vbatts)
Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
Brandon Philips <brandon.philips@coreos.com> (@philips)
Tianon Gravi <admwiggin@gmail.com> (@tianon)

View file

@ -1,144 +0,0 @@
# Open Container Specifications
[Open Container Initiative](http://www.opencontainers.org/) Specifications for standards on Operating System process and application containers.
Table of Contents
- [Container Principles](principles.md)
- [Filesystem Bundle](bundle.md)
- Configuration
- [Container Configuration](config.md)
- [Container Configuration (Linux-specific)](config-linux.md)
- [Runtime Configuration](runtime-config.md)
- [Runtime Configuration (Linux-specific)](runtime-config-linux.md)
- [Runtime and Lifecycle](runtime.md)
- [Linux Specific Runtime](runtime-linux.md)
- [Implementations](implementations.md)
# Use Cases
To provide context for users the following section gives example use cases for each part of the spec.
## Filesystem Bundle & Configuration
- A user can create a root filesystem and configuration, with low-level OS and host specific details, and launch it as a container under an Open Container runtime.
# Releases
There is a loose [Road Map](https://github.com/opencontainers/specs/wiki/RoadMap:) on the wiki.
During the `0.x` series of OCI releases we make no backwards compatibility guarantees and intend to break the schema during this series.
# Contributing
Development happens on GitHub for the spec.
Issues are used for bugs and actionable items and longer discussions can happen on the [mailing list](#mailing-list).
The specification and code is licensed under the Apache 2.0 license found in the `LICENSE` file of this repository.
## Code of Conduct
Participation in the OpenContainers community is governed by [OpenContainer's Code of Conduct](code-of-conduct.md).
## Discuss your design
The project welcomes submissions, but please let everyone know what you are working on.
Before undertaking a nontrivial change to this specification, send mail to the [mailing list](#mailing-list) to discuss what you plan to do.
This gives everyone a chance to validate the design, helps prevent duplication of effort, and ensures that the idea fits.
It also guarantees that the design is sound before code is written; a GitHub pull-request is not the place for high-level discussions.
Typos and grammatical errors can go straight to a pull-request.
When in doubt, start on the [mailing-list](#mailing-list).
## Weekly Call
The contributors and maintainers of the project have a weekly meeting Wednesdays at 10:00 AM PST.
Everyone is welcome to participate in the [BlueJeans call][BlueJeans].
An initial agenda will be posted to the [mailing list](#mailing-list) earlier in the week, and everyone is welcome to propose additional topics or suggest other agenda alterations there.
Minutes are posted to the [mailing list](#mailing-list) and minutes from past calls are archived to the [wiki](https://github.com/opencontainers/specs/wiki) for those who are unable to join the call.
## Mailing List
You can subscribe and join the mailing list on [Google Groups](https://groups.google.com/a/opencontainers.org/forum/#!forum/dev).
## IRC
OCI discussion happens on #opencontainers on Freenode.
## Markdown style
To keep consistency throughout the Markdown files in the Open Container spec all files should be formatted one sentence per line.
This fixes two things: it makes diffing easier with git and it resolves fights about line wrapping length.
For example, this paragraph will span three lines in the Markdown source.
## Git commit
### Sign your work
The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open-source patch.
The rules are pretty simple: if you can certify the below (from [developercertificate.org](http://developercertificate.org/)):
```
Developer Certificate of Origin
Version 1.1
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
660 York Street, Suite 102,
San Francisco, CA 94110 USA
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
```
then you just add a line to every git commit message:
Signed-off-by: Joe Smith <joe@gmail.com>
using your real name (sorry, no pseudonyms or anonymous contributions.)
You can add the sign off when creating the git commit via `git commit -s`.
### Commit Style
Simple house-keeping for clean git history.
Read more on [How to Write a Git Commit Message](http://chris.beams.io/posts/git-commit/) or the Discussion section of [`git-commit(1)`](http://git-scm.com/docs/git-commit).
1. Separate the subject from body with a blank line
2. Limit the subject line to 50 characters
3. Capitalize the subject line
4. Do not end the subject line with a period
5. Use the imperative mood in the subject line
6. Wrap the body at 72 characters
7. Use the body to explain what and why vs. how
* If there was important/useful/essential conversation or information, copy or include a reference
8. When possible, one keyword to scope the change in the subject (i.e. "README: ...", "runtime: ...")
[BlueJeans]: https://bluejeans.com/1771332256/

View file

@ -1,96 +0,0 @@
# OCI Specs Roadmap
This document serves to provide a long term roadmap on our quest to a 1.0 version of the OCI container specification.
Its goal is to help both maintainers and contributors find meaningful tasks to focus on and create a low noise environment.
The items in the 1.0 roadmap can be broken down into smaller milestones that are easy to accomplish.
The topics below are broad and small working groups will be needed for each to define scope and requirements or if the feature is required at all for the OCI level.
Topics listed in the roadmap do not mean that they will be implemented or added but are areas that need discussion to see if they fit in to the goals of the OCI.
## 1.0
### Digest and Hashing
A bundle is designed to be moved between hosts.
Although OCI doesn't define a transport method we should have a cryptographic digest of the on-disk bundle that can be used to verify that a bundle is not corrupted and in an expected configuration.
*Owner:* philips
### Review the need for runtime.json
There are some discussions about having `runtime.json` being optional for containers and specifying defaults.
Runtimes would use this standard set of defaults for containers and `runtime.json` would provide overrides for fine tuning of these extra host or platform specific settings.
*Owner:*
### Define Container Lifecycle
Containers have a lifecycle and being able to identify and document the lifecycle of a container is very helpful for implementations of the spec.
The lifecycle events of a container also help identify areas to implement hooks that are portable across various implementations and platforms.
*Owner:* mrunalp
### Define Standard Container Actions
Define what type of actions a runtime can perform on a container without imposing hardships on authors of platforms that do not support advanced options.
*Owner:*
### Clarify rootfs requirement in base spec
Is the rootfs needed or should it just be expected in the bundle without having a field in the spec?
*Owner:*
### Container Definition
Define what a software container is and its attributes in a cross platform way.
*Owner:*
### Live Container Updates
Should we allow dynamic container updates to runtime options?
*Owner:* vishh
### Protobuf Config
We currently have only one language binding for the spec and that is Go.
If we change the specs format in the respository to be something like protobuf then the generation for multiple language bindings become effortless.
*Owner:* vbatts
### Validation Tooling
Provide validation tooling for compliance with OCI spec and runtime environment.
*Owner:* mrunalp
### Version Schema
Decide on a robust versioning schema for the spec as it evolves.
*Owner:*
### Printable/Compiled Spec
Reguardless of how the spec is written, ensure that it is easy to read and follow for first time users.
*Owner:* vbatts
### Base Config Compatibility
Ensure that the base configuration format is viable for various platforms.
Systems:
* Solaris
* Windows
* Linux
*Owner:*
### Full Lifecycle Hooks
Ensure that we have lifecycle hooks in the correct places with full coverage over the container lifecycle.
*Owner:*

View file

@ -1,30 +0,0 @@
# Filesystem Bundle
## Container Format
This section defines a format for encoding a container as a *filesystem bundle* - a set of files organized in a certain way, and containing all the necessary data and metadata for any compliant runtime to perform all standard operations against it.
See also [OS X application bundles](http://en.wikipedia.org/wiki/Bundle_%28OS_X%29) for a similar use of the term *bundle*.
The definition of a bundle is only concerned with how a container, and its configuration data, are stored on a local file system so that it can be consumed by a compliant runtime.
A Standard Container bundle contains all the information needed to load and run a container.
This includes the following three artifacts which MUST all reside in the same directory on the local filesystem:
1. `config.json` : contains host independent configuration data.
This REQUIRED file, which MUST be named `config.json`, contains settings that are host independent and application specific such as security permissions, environment variables and arguments.
When the bundle is packaged up for distribution, this file MUST be included.
See [`config.json`](config.md) for more details.
2. `runtime.json` : contains host-specific configuration data.
This REQUIRED file, which MUST be named `runtime.json`, contains settings that are host specific such as mount sources and hooks.
The goal is that the bundle can be moved as a unit to another runtime and run the same application once a host-specific `runtime.json` is defined.
When the bundle is packaged up for distribution, this file MUST NOT be included.
See [`runtime.json`](runtime-config.md) for more details.
3. A directory representing the root filesystem of the container.
While the name of this REQUIRED directory may be arbitrary, users should consider using a conventional name, such as `rootfs`.
When the bundle is packaged up for distribution, this directory MUST be included.
This directory MUST be referenced from within the `config.json` file.
While these three artifacts MUST all be present in a single directory on the local filesytem, that directory itself is not part of the bundle.
In other words, a tar archive of a *bundle* will have these artifacts at the root of the archive, not nested within a top-level directory.

View file

@ -1,37 +0,0 @@
# OpenContainers Code of Conduct
Behave as a community member, follow the code of conduct.
## Code of Conduct
The OpenContainers community is made up of a mixture of professionals and volunteers from all over the world.
When we disagree, we try to understand why.
Disagreements, both social and technical, happen all the time and OpenContainers is no exception.
It is important that we resolve disagreements and differing views constructively.
This code of conduct applies both within project spaces and in public spaces when an individual is representing the project or its community.
Participants should be aware of these concerns.
We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, or nationality.
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery
* Personal attacks
* Trolling or insulting/derogatory comments
* Public or private harassment
* Publishing other's private information, such as physical or electronic addresses, without explicit permission
* Other unethical or unprofessional conduct
The OpenContainers team does not condone any statements by speakers contrary to these standards.
The OpenContainers team reserves the right to deny participation any individual found to be engaging in discriminatory or harassing actions.
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct.
By adopting this Code of Conduct, project maintainers commit themselves to fairly and consistently applying these principles to every aspect of managing this project.
## Thanks
Thanks to the [Fedora Code of Conduct](https://getfedora.org/code-of-conduct) and [Contributor Covenant](http://contributor-covenant.org) for inspiration and ideas.
Portions of this Code of Conduct are adapted from the Contributor Covenant, version 1.2.0, available at http://contributor-covenant.org/version/1/2/0/

View file

@ -1,63 +0,0 @@
# Linux-specific Container Configuration
The Linux container specification uses various kernel features like namespaces, cgroups, capabilities, LSM, and file system jails to fulfill the spec.
Additional information is needed for Linux over the [default spec configuration](config.md) in order to configure these various kernel features.
## Capabilities
Capabilities is an array that specifies Linux capabilities that can be provided to the process inside the container.
Valid values are the strings for capabilities defined in [the man page](http://man7.org/linux/man-pages/man7/capabilities.7.html)
```json
"capabilities": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
]
```
## User namespace mappings
```json
"uidMappings": [
{
"hostID": 1000,
"containerID": 0,
"size": 10
}
],
"gidMappings": [
{
"hostID": 1000,
"containerID": 0,
"size": 10
}
]
```
uid/gid mappings describe the user namespace mappings from the host to the container.
The mappings represent how the bundle `rootfs` expects the user namespace to be setup and the runtime SHOULD NOT modify the permissions on the rootfs to realize the mapping.
*hostID* is the starting uid/gid on the host to be mapped to *containerID* which is the starting uid/gid in the container and *size* refers to the number of ids to be mapped.
There is a limit of 5 mappings which is the Linux kernel hard limit.
## Default Devices and File Systems
The Linux ABI includes both syscalls and several special file paths.
Applications expecting a Linux environment will very likely expect these files paths to be setup correctly.
The following devices and filesystems MUST be made available in each application's filesystem
| Path | Type | Notes |
| ------------ | ------ | ------- |
| /proc | [procfs](https://www.kernel.org/doc/Documentation/filesystems/proc.txt) | |
| /sys | [sysfs](https://www.kernel.org/doc/Documentation/filesystems/sysfs.txt) | |
| /dev/null | [device](http://man7.org/linux/man-pages/man4/null.4.html) | |
| /dev/zero | [device](http://man7.org/linux/man-pages/man4/zero.4.html) | |
| /dev/full | [device](http://man7.org/linux/man-pages/man4/full.4.html) | |
| /dev/random | [device](http://man7.org/linux/man-pages/man4/random.4.html) | |
| /dev/urandom | [device](http://man7.org/linux/man-pages/man4/random.4.html) | |
| /dev/tty | [device](http://man7.org/linux/man-pages/man4/tty.4.html) | |
| /dev/console | [device](http://man7.org/linux/man-pages/man4/console.4.html) | |
| /dev/pts | [devpts](https://www.kernel.org/doc/Documentation/filesystems/devpts.txt) | |
| /dev/ptmx | [device](https://www.kernel.org/doc/Documentation/filesystems/devpts.txt) | Bind-mount or symlink of /dev/pts/ptmx |
| /dev/shm | [tmpfs](https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt) | |

View file

@ -1,59 +0,0 @@
package specs
// Spec is the base configuration for the container. It specifies platform
// independent configuration. This information must be included when the
// bundle is packaged for distribution.
type Spec struct {
// Version is the version of the specification that is supported.
Version string `json:"version"`
// Platform is the host information for OS and Arch.
Platform Platform `json:"platform"`
// Process is the container's main process.
Process Process `json:"process"`
// Root is the root information for the container's filesystem.
Root Root `json:"root"`
// Hostname is the container's host name.
Hostname string `json:"hostname"`
// Mounts profile configuration for adding mounts to the container's filesystem.
Mounts []MountPoint `json:"mounts"`
}
// Process contains information to start a specific application inside the container.
type Process struct {
// Terminal creates an interactive terminal for the container.
Terminal bool `json:"terminal"`
// User specifies user information for the process.
User User `json:"user"`
// Args specifies the binary and arguments for the application to execute.
Args []string `json:"args"`
// Env populates the process environment for the process.
Env []string `json:"env"`
// Cwd is the current working directory for the process and must be
// relative to the container's root.
Cwd string `json:"cwd"`
}
// Root contains information about the container's root filesystem on the host.
type Root struct {
// Path is the absolute path to the container's root filesystem.
Path string `json:"path"`
// Readonly makes the root filesystem for the container readonly before the process is executed.
Readonly bool `json:"readonly"`
}
// Platform specifies OS and arch information for the host system that the container
// is created for.
type Platform struct {
// OS is the operating system.
OS string `json:"os"`
// Arch is the architecture
Arch string `json:"arch"`
}
// MountPoint describes a directory that may be fullfilled by a mount in the runtime.json.
type MountPoint struct {
// Name is a unique descriptive identifier for this mount point.
Name string `json:"name"`
// Path specifies the path of the mount. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point.
Path string `json:"path"`
}

View file

@ -1,128 +0,0 @@
# Container Configuration file
The container's top-level directory MUST contain a configuration file called `config.json`.
For now the canonical schema is defined in [config.go](config.go) and [config_linux.go](config_linux.go), but this will be moved to a formal JSON schema over time.
The configuration file contains metadata necessary to implement standard operations against the container.
This includes the process to run, environment variables to inject, sandboxing features to use, etc.
Below is a detailed description of each field defined in the configuration format.
## Manifest version
* **`version`** (string, required) must be in [SemVer v2.0.0](http://semver.org/spec/v2.0.0.html) format and specifies the version of the OCF specification with which the container bundle complies. The Open Container spec follows semantic versioning and retains forward and backward compatibility within major versions. For example, if an implementation is compliant with version 1.0.1 of the spec, it is compatible with the complete 1.x series.
*Example*
```json
"version": "0.1.0"
```
## Root Configuration
Each container has exactly one *root filesystem*, specified in the *root* object:
* **`path`** (string, required) Specifies the path to the root filesystem for the container, relative to the path where the manifest is. A directory MUST exist at the relative path declared by the field.
* **`readonly`** (bool, optional) If true then the root filesystem MUST be read-only inside the container. Defaults to false.
*Example*
```json
"root": {
"path": "rootfs",
"readonly": true
}
```
## Mount Points
You can add array of mount points inside container as `mounts`.
Each record in this array must have configuration in [runtime config](runtime-config.md#mount-configuration).
The runtime MUST mount entries in the listed order.
* **`name`** (string, required) Name of mount point. Used for config lookup.
* **`path`** (string, required) Destination of mount point: path inside container.
*Example*
```json
"mounts": [
{
"name": "proc",
"path": "/proc"
},
{
"name": "dev",
"path": "/dev"
},
{
"name": "devpts",
"path": "/dev/pts"
},
{
"name": "data",
"path": "/data"
}
]
```
## Process configuration
* **`terminal`** (bool, optional) specifies whether you want a terminal attached to that process. Defaults to false.
* **`cwd`** (string, optional) is the working directory that will be set for the executable.
* **`env`** (array of strings, optional) contains a list of variables that will be set in the process's environment prior to execution. Elements in the array are specified as Strings in the form "KEY=value". The left hand side must consist solely of letters, digits, and underscores `_` as outlined in [IEEE Std 1003.1-2001](http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap08.html).
* **`args`** (string, required) executable to launch and any flags as an array. The executable is the first element and must be available at the given path inside of the rootfs. If the executable path is not an absolute path then the search $PATH is interpreted to find the executable.
The user for the process is a platform-specific structure that allows specific control over which user the process runs as.
For Linux-based systems the user structure has the following fields:
* **`uid`** (int, required) specifies the user id.
* **`gid`** (int, required) specifies the group id.
* **`additionalGids`** (array of ints, optional) specifies additional group ids to be added to the process.
*Example (Linux)*
```json
"process": {
"terminal": true,
"user": {
"uid": 1,
"gid": 1,
"additionalGids": [5, 6]
},
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm"
],
"cwd": "/root",
"args": [
"sh"
]
}
```
## Hostname
* **`hostname`** (string, optional) as it is accessible to processes running inside.
*Example*
```json
"hostname": "mrsdalloway"
```
## Platform-specific configuration
* **`os`** (string, required) specifies the operating system family this image must run on. Values for os must be in the list specified by the Go Language document for [`$GOOS`](https://golang.org/doc/install/source#environment).
* **`arch`** (string, required) specifies the instruction set for which the binaries in the image have been compiled. Values for arch must be in the list specified by the Go Language document for [`$GOARCH`](https://golang.org/doc/install/source#environment).
```json
"platform": {
"os": "linux",
"arch": "amd64"
}
```
Interpretation of the platform section of the JSON file is used to find which platform-specific sections may be available in the document.
For example, if `os` is set to `linux`, then a JSON object conforming to the [Linux-specific schema](config-linux.md) SHOULD be found at the key `linux` in the `config.json`.

View file

@ -1,25 +0,0 @@
package specs
// LinuxSpec is the full specification for linux containers.
type LinuxSpec struct {
Spec
// Linux is platform specific configuration for linux based containers.
Linux Linux `json:"linux"`
}
// Linux contains platform specific configuration for linux based containers.
type Linux struct {
// Capabilities are linux capabilities that are kept for the container.
Capabilities []string `json:"capabilities"`
}
// User specifies linux specific user and group information for the container's
// main process.
type User struct {
// UID is the user id.
UID uint32 `json:"uid"`
// GID is the group id.
GID uint32 `json:"gid"`
// AdditionalGids are additional group ids set for the container's process.
AdditionalGids []uint32 `json:"additionalGids"`
}

View file

@ -1,21 +0,0 @@
# Implementations
The following sections link to associated projects, some of which are maintained by the OCI and some of which are maintained by external organizations.
If you know of any associated projects that are not listed here, please file a pull request adding a link to that project.
## Runtime (Container)
* [opencontainers/runc](https://github.com/opencontainers/runc) - Reference implementation of OCI runtime
## Runtime (Virtual Machine)
* [hyperhq/runv](https://github.com/hyperhq/runv) - Hypervisor-based runtime for OCI
## Bundle authoring
* [kunalkushwaha/octool](https://github.com/kunalkushwaha/octool) - A config linter and validator.
* [mrunalp/ocitools](https://github.com/mrunalp/ocitools) - A config generator.
## Testing
* [huawei-openlab/oct](https://github.com/huawei-openlab/oct) - Open Container Testing framework for OCI configuration and runtime

View file

@ -1,46 +0,0 @@
# The 5 principles of Standard Containers
Define a unit of software delivery called a Standard Container.
The goal of a Standard Container is to encapsulate a software component and all its dependencies in a format that is self-describing and portable, so that any compliant runtime can run it without extra dependencies, regardless of the underlying machine and the contents of the container.
The specification for Standard Containers defines:
1. configuration file formats
2. a set of standard operations
3. an execution environment.
A great analogy for this is the physical shipping container used by the transportation industry.
Shipping containers are a fundamental unit of delivery, they can be lifted, stacked, locked, loaded, unloaded and labelled.
Irrespective of their contents, by standardizing the container itself it allowed for a consistent, more streamlined and efficient set of processes to be defined.
For software Standard Containers offer similar functionality by being the fundamental, standardized, unit of delivery for a software package.
## 1. Standard operations
Standard Containers define a set of STANDARD OPERATIONS.
They can be created, started, and stopped using standard container tools; copied and snapshotted using standard filesystem tools; and downloaded and uploaded using standard network tools.
## 2. Content-agnostic
Standard Containers are CONTENT-AGNOSTIC: all standard operations have the same effect regardless of the contents.
They are started in the same way whether they contain a postgres database, a php application with its dependencies and application server, or Java build artifacts.
## 3. Infrastructure-agnostic
Standard Containers are INFRASTRUCTURE-AGNOSTIC: they can be run in any OCI supported infrastructure.
For example, a standard container can be bundled on a laptop, uploaded to cloud storage, downloaded, run and snapshotted by a build server at a fiber hotel in Virginia, uploaded to 10 staging servers in a home-made private cloud cluster, then sent to 30 production instances across 3 public cloud regions.
## 4. Designed for automation
Standard Containers are DESIGNED FOR AUTOMATION: because they offer the same standard operations regardless of content and infrastructure, Standard Containers, are extremely well-suited for automation.
In fact, you could say automation is their secret weapon.
Many things that once required time-consuming and error-prone human effort can now be programmed.
Before Standard Containers, by the time a software component ran in production, it had been individually built, configured, bundled, documented, patched, vendored, templated, tweaked and instrumented by 10 different people on 10 different computers.
Builds failed, libraries conflicted, mirrors crashed, post-it notes were lost, logs were misplaced, cluster updates were half-broken.
The process was slow, inefficient and cost a fortune - and was entirely different depending on the language and infrastructure provider.
## 5. Industrial-grade delivery
Standard Containers make INDUSTRIAL-GRADE DELIVERY of software a reality.
Leveraging all of the properties listed above, Standard Containers are enabling large and small enterprises to streamline and automate their software delivery pipelines.
Whether it is in-house devOps flows, or external customer-based software delivery mechanisms, Standard Containers are changing the way the community thinks about software packaging and delivery.

View file

@ -1,502 +0,0 @@
# Linux-specific Runtime Configuration
## Namespaces
A namespace wraps a global system resource in an abstraction that makes it appear to the processes within the namespace that they have their own isolated instance of the global resource.
Changes to the global resource are visible to other processes that are members of the namespace, but are invisible to other processes.
For more information, see [the man page](http://man7.org/linux/man-pages/man7/namespaces.7.html).
Namespaces are specified as an array of entries inside the `namespaces` root field.
The following parameters can be specified to setup namespaces:
* **`type`** *(string, required)* - namespace type. The following namespaces types are supported:
* **`pid`** processes inside the container will only be able to see other processes inside the same container
* **`network`** the container will have its own network stack
* **`mount`** the container will have an isolated mount table
* **`ipc`** processes inside the container will only be able to communicate to other processes inside the same container via system level IPC
* **`uts`** the container will be able to have its own hostname and domain name
* **`user`** the container will be able to remap user and group IDs from the host to local users and groups within the container
* **`path`** *(string, optional)* - path to namespace file
If a path is specified, that particular file is used to join that type of namespace.
Also, when a path is specified, a runtime MUST assume that the setup for that particular namespace has already been done and error out if the config specifies anything else related to that namespace.
###### Example
```json
"namespaces": [
{
"type": "pid",
"path": "/proc/1234/ns/pid"
},
{
"type": "network",
"path": "/var/run/netns/neta"
},
{
"type": "mount"
},
{
"type": "ipc"
},
{
"type": "uts"
},
{
"type": "user"
}
]
```
## Devices
`devices` is an array specifying the list of devices to be created in the container.
The following parameters can be specified:
* **`type`** *(char, required)* - type of device: `c`, `b`, `u` or `p`. More info in `man mknod`.
* **`path`** *(string, optional)* - full path to device inside container
* **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`. There is a special value: `-1`, which means `*` for `device` cgroup setup.
* **`permissions`** *(string, optional)* - cgroup permissions for device. A composition of `r` (*read*), `w` (*write*), and `m` (*mknod*).
* **`fileMode`** *(uint32, optional)* - file mode for device file
* **`uid`** *(uint32, optional)* - uid of device owner
* **`gid`** *(uint32, optional)* - gid of device owner
**`fileMode`**, **`uid`** and **`gid`** are required if **`path`** is given and are otherwise not allowed.
###### Example
```json
"devices": [
{
"path": "/dev/random",
"type": "c",
"major": 1,
"minor": 8,
"permissions": "rwm",
"fileMode": 0666,
"uid": 0,
"gid": 0
},
{
"path": "/dev/urandom",
"type": "c",
"major": 1,
"minor": 9,
"permissions": "rwm",
"fileMode": 0666,
"uid": 0,
"gid": 0
},
{
"path": "/dev/null",
"type": "c",
"major": 1,
"minor": 3,
"permissions": "rwm",
"fileMode": 0666,
"uid": 0,
"gid": 0
},
{
"path": "/dev/zero",
"type": "c",
"major": 1,
"minor": 5,
"permissions": "rwm",
"fileMode": 0666,
"uid": 0,
"gid": 0
},
{
"path": "/dev/tty",
"type": "c",
"major": 5,
"minor": 0,
"permissions": "rwm",
"fileMode": 0666,
"uid": 0,
"gid": 0
},
{
"path": "/dev/full",
"type": "c",
"major": 1,
"minor": 7,
"permissions": "rwm",
"fileMode": 0666,
"uid": 0,
"gid": 0
}
]
```
## Control groups
Also known as cgroups, they are used to restrict resource usage for a container and handle device access.
cgroups provide controls to restrict cpu, memory, IO, pids and network for the container.
For more information, see the [kernel cgroups documentation](https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt).
The path to the cgroups can be specified in the Spec via `cgroupsPath`.
`cgroupsPath` is expected to be relative to the cgroups mount point.
If not specified, cgroups will be created under '/'.
Implementations of the Spec can choose to name cgroups in any manner.
The Spec does not include naming schema for cgroups.
The Spec does not support [split hierarchy](https://www.kernel.org/doc/Documentation/cgroups/unified-hierarchy.txt).
The cgroups will be created if they don't exist.
```json
"cgroupsPath": "/myRuntime/myContainer"
```
`cgroupsPath` can be used to either control the cgroups hierarchy for containers or to run a new process in an existing container.
You can configure a container's cgroups via the `resources` field of the Linux configuration.
Do not specify `resources` unless limits have to be updated.
For example, to run a new process in an existing container without updating limits, `resources` need not be specified.
#### Disable out-of-memory killer
`disableOOMKiller` contains a boolean (`true` or `false`) that enables or disables the Out of Memory killer for a cgroup.
If enabled (`false`), tasks that attempt to consume more memory than they are allowed are immediately killed by the OOM killer.
The OOM killer is enabled by default in every cgroup using the `memory` subsystem.
To disable it, specify a value of `true`.
For more information, see [the memory cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/memory.txt).
* **`disableOOMKiller`** *(bool, optional)* - enables or disables the OOM killer
###### Example
```json
"disableOOMKiller": false
```
#### Set oom_score_adj
More information on `oom_score_adj` available [here](https://www.kernel.org/doc/Documentation/filesystems/proc.txt).
```json
"oomScoreAdj": 0
```
#### Memory
`memory` represents the cgroup subsystem `memory` and it's used to set limits on the container's memory usage.
For more information, see [the memory cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/memory.txt).
The following parameters can be specified to setup the controller:
* **`limit`** *(uint64, optional)* - sets limit of memory usage
* **`reservation`** *(uint64, optional)* - sets soft limit of memory usage
* **`swap`** *(uint64, optional)* - sets limit of memory+Swap usage
* **`kernel`** *(uint64, optional)* - sets hard limit for kernel memory
* **`swappiness`** *(uint64, optional)* - sets swappiness parameter of vmscan (See sysctl's vm.swappiness)
###### Example
```json
"memory": {
"limit": 0,
"reservation": 0,
"swap": 0,
"kernel": 0,
"swappiness": -1
}
```
#### CPU
`cpu` represents the cgroup subsystems `cpu` and `cpusets`.
For more information, see [the cpusets cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/cpusets.txt).
The following parameters can be specified to setup the controller:
* **`shares`** *(uint64, optional)* - specifies a relative share of CPU time available to the tasks in a cgroup
* **`quota`** *(uint64, optional)* - specifies the total amount of time in microseconds for which all tasks in a cgroup can run during one period (as defined by **`period`** below)
* **`period`** *(uint64, optional)* - specifies a period of time in microseconds for how regularly a cgroup's access to CPU resources should be reallocated (CFS scheduler only)
* **`realtimeRuntime`** *(uint64, optional)* - specifies a period of time in microseconds for the longest continuous period in which the tasks in a cgroup have access to CPU resources
* **`realtimePeriod`** *(uint64, optional)* - same as **`period`** but applies to realtime scheduler only
* **`cpus`** *(cpus, optional)* - list of CPUs the container will run in
* **`mems`** *(mems, optional)* - list of Memory Nodes the container will run in
###### Example
```json
"cpu": {
"shares": 0,
"quota": 0,
"period": 0,
"realtimeRuntime": 0,
"realtimePeriod": 0,
"cpus": "",
"mems": ""
}
```
#### Block IO Controller
`blockIO` represents the cgroup subsystem `blkio` which implements the block io controller.
For more information, see [the kernel cgroups documentation about blkio](https://www.kernel.org/doc/Documentation/cgroups/blkio-controller.txt).
The following parameters can be specified to setup the controller:
* **`blkioWeight`** *(uint16, optional)* - specifies per-cgroup weight. This is default weight of the group on all devices until and unless overridden by per-device rules. The range is from 10 to 1000.
* **`blkioLeafWeight`** *(uint16, optional)* - equivalents of `blkioWeight` for the purpose of deciding how much weight tasks in the given cgroup has while competing with the cgroup's child cgroups. The range is from 10 to 1000.
* **`blkioWeightDevice`** *(array, optional)* - specifies the list of devices which will be bandwidth rate limited. The following parameters can be specified per-device:
* **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`.
* **`weight`** *(uint16, optional)* - bandwidth rate for the device, range is from 10 to 1000
* **`leafWeight`** *(uint16, optional)* - bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
You must specify at least one of `weight` or `leafWeight` in a given entry, and can specify both.
* **`blkioThrottleReadBpsDevice`**, **`blkioThrottleWriteBpsDevice`**, **`blkioThrottleReadIOPSDevice`**, **`blkioThrottleWriteIOPSDevice`** *(array, optional)* - specify the list of devices which will be IO rate limited. The following parameters can be specified per-device:
* **`major, minor`** *(int64, required)* - major, minor numbers for device. More info in `man mknod`.
* **`rate`** *(uint64, required)* - IO rate limit for the device
###### Example
```json
"blockIO": {
"blkioWeight": 0,
"blkioLeafWeight": 0,
"blkioWeightDevice": [
{
"major": 8,
"minor": 0,
"weight": 500,
"leafWeight": 300
},
{
"major": 8,
"minor": 16,
"weight": 500
}
],
"blkioThrottleReadBpsDevice": [
{
"major": 8,
"minor": 0,
"rate": 600
}
],
"blkioThrottleWriteIOPSDevice": [
{
"major": 8,
"minor": 16,
"rate": 300
}
]
}
```
#### Huge page limits
`hugepageLimits` represents the `hugetlb` controller which allows to limit the
HugeTLB usage per control group and enforces the controller limit during page fault.
For more information, see the [kernel cgroups documentation about HugeTLB](https://www.kernel.org/doc/Documentation/cgroups/hugetlb.txt).
`hugepageLimits` is an array of entries, each having the following structure:
* **`pageSize`** *(string, required)* - hugepage size
* **`limit`** *(uint64, required)* - limit in bytes of *hugepagesize* HugeTLB usage
###### Example
```json
"hugepageLimits": [
{
"pageSize": "2MB",
"limit": 9223372036854771712
}
]
```
#### Network
`network` represents the cgroup subsystems `net_cls` and `net_prio`.
For more information, see [the net\_cls cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/net_cls.txt) and [the net\_prio cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/net_prio.txt).
The following parameters can be specified to setup these cgroup controllers:
* **`classID`** *(string, optional)* - is the network class identifier the cgroup's network packets will be tagged with
* **`priorities`** *(array, optional)* - specifies a list of objects of the priorities assigned to traffic originating from
processes in the group and egressing the system on various interfaces. The following parameters can be specified per-priority:
* **`name`** *(string, required)* - interface name
* **`priority`** *(uint32, required)* - priority applied to the interface
###### Example
```json
"network": {
"classID": "0x100001",
"priorities": [
{
"name": "eth0",
"priority": 500
},
{
"name": "eth1",
"priority": 1000
}
]
}
```
#### PIDs
`pids` represents the cgroup subsystem `pids`.
For more information, see [the pids cgroup man page](https://www.kernel.org/doc/Documentation/cgroups/pids.txt
).
The following paramters can be specified to setup the controller:
* **`limit`** *(int64, required)* - specifies the maximum number of tasks in the cgroup
###### Example
```json
"pids": {
"limit": 32771
}
```
## Sysctl
sysctl allows kernel parameters to be modified at runtime for the container.
For more information, see [the man page](http://man7.org/linux/man-pages/man8/sysctl.8.html)
###### Example
```json
"sysctl": {
"net.ipv4.ip_forward": "1",
"net.core.somaxconn": "256"
}
```
## Rlimits
rlimits allow setting resource limits.
`type` is a string with a value from those defined in [the man page](http://man7.org/linux/man-pages/man2/setrlimit.2.html).
The kernel enforces the `soft` limit for a resource while the `hard` limit acts as a ceiling for that value that could be set by an unprivileged process.
###### Example
```json
"rlimits": [
{
"type": "RLIMIT_NPROC",
"soft": 1024,
"hard": 102400
}
]
```
## SELinux process label
SELinux process label specifies the label with which the processes in a container are run.
For more information about SELinux, see [Selinux documentation](http://selinuxproject.org/page/Main_Page)
###### Example
```json
"selinuxProcessLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675"
```
## Apparmor profile
Apparmor profile specifies the name of the apparmor profile that will be used for the container.
For more information about Apparmor, see [Apparmor documentation](https://wiki.ubuntu.com/AppArmor)
###### Example
```json
"apparmorProfile": "acme_secure_profile"
```
## seccomp
Seccomp provides application sandboxing mechanism in the Linux kernel.
Seccomp configuration allows one to configure actions to take for matched syscalls and furthermore also allows matching on values passed as arguments to syscalls.
For more information about Seccomp, see [Seccomp kernel documentation](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt)
The actions, architectures, and operators are strings that match the definitions in seccomp.h from [libseccomp](https://github.com/seccomp/libseccomp) and are translated to corresponding values.
A valid list of constants as of Libseccomp v2.2.3 is contained below.
Architecture Constants
* `SCMP_ARCH_X86`
* `SCMP_ARCH_X86_64`
* `SCMP_ARCH_X32`
* `SCMP_ARCH_ARM`
* `SCMP_ARCH_AARCH64`
* `SCMP_ARCH_MIPS`
* `SCMP_ARCH_MIPS64`
* `SCMP_ARCH_MIPS64N32`
* `SCMP_ARCH_MIPSEL`
* `SCMP_ARCH_MIPSEL64`
* `SCMP_ARCH_MIPSEL64N32`
Action Constants:
* `SCMP_ACT_KILL`
* `SCMP_ACT_TRAP`
* `SCMP_ACT_ERRNO`
* `SCMP_ACT_TRACE`
* `SCMP_ACT_ALLOW`
Operator Constants:
* `SCMP_CMP_NE`
* `SCMP_CMP_LT`
* `SCMP_CMP_LE`
* `SCMP_CMP_EQ`
* `SCMP_CMP_GE`
* `SCMP_CMP_GT`
* `SCMP_CMP_MASKED_EQ`
###### Example
```json
"seccomp": {
"defaultAction": "SCMP_ACT_ALLOW",
"architectures": [
"SCMP_ARCH_X86"
],
"syscalls": [
{
"name": "getcwd",
"action": "SCMP_ACT_ERRNO"
}
]
}
```
## Rootfs Mount Propagation
rootfsPropagation sets the rootfs's mount propagation.
Its value is either slave, private, or shared.
[The kernel doc](https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt) has more information about mount propagation.
###### Example
```json
"rootfsPropagation": "slave",
```

View file

@ -1,121 +0,0 @@
# Runtime Configuration
## Hooks
Lifecycle hooks allow custom events for different points in a container's runtime.
Presently there are `Prestart`, `Poststart` and `Poststop`.
* [`Prestart`](#prestart) is a list of hooks to be run before the container process is executed
* [`Poststart`](#poststart) is a list of hooks to be run immediately after the container process is started
* [`Poststop`](#poststop) is a list of hooks to be run after the container process exits
Hooks allow one to run code before/after various lifecycle events of the container.
Hooks MUST be called in the listed order.
The state of the container is passed to the hooks over stdin, so the hooks could get the information they need to do their work.
Hook paths are absolute and are executed from the host's filesystem.
### Prestart
The pre-start hooks are called after the container process is spawned, but before the user supplied command is executed.
They are called after the container namespaces are created on Linux, so they provide an opportunity to customize the container.
In Linux, for e.g., the network namespace could be configured in this hook.
If a hook returns a non-zero exit code, then an error including the exit code and the stderr is returned to the caller and the container is torn down.
### Poststart
The post-start hooks are called after the user process is started.
For example this hook can notify user that real process is spawned.
If a hook returns a non-zero exit code, then an error is logged and the remaining hooks are executed.
### Poststop
The post-stop hooks are called after the container process is stopped.
Cleanup or debugging could be performed in such a hook.
If a hook returns a non-zero exit code, then an error is logged and the remaining hooks are executed.
*Example*
```json
"hooks" : {
"prestart": [
{
"path": "/usr/bin/fix-mounts",
"args": ["arg1", "arg2"],
"env": [ "key1=value1"]
},
{
"path": "/usr/bin/setup-network"
}
],
"poststart": [
{
"path": "/usr/bin/notify-start"
}
],
"poststop": [
{
"path": "/usr/sbin/cleanup.sh",
"args": ["-f"]
}
]
}
```
`path` is required for a hook.
`args` and `env` are optional.
## Mount Configuration
Additional filesystems can be declared as "mounts", specified in the *mounts* object.
Keys in this object are names of mount points from portable config.
Values are objects with configuration of mount points.
The parameters are similar to the ones in [the Linux mount system call](http://man7.org/linux/man-pages/man2/mount.2.html).
Only [mounts from the portable config](config.md#mount-points) will be mounted.
* **`type`** (string, required) Linux, *filesystemtype* argument supported by the kernel are listed in */proc/filesystems* (e.g., "minix", "ext2", "ext3", "jfs", "xfs", "reiserfs", "msdos", "proc", "nfs", "iso9660"). Windows: ntfs
* **`source`** (string, required) a device name, but can also be a directory name or a dummy. Windows, the volume name that is the target of the mount point. \\?\Volume\{GUID}\ (on Windows source is called target)
* **`options`** (list of strings, optional) in the fstab format [https://wiki.archlinux.org/index.php/Fstab](https://wiki.archlinux.org/index.php/Fstab).
*Example (Linux)*
```json
"mounts": {
"proc": {
"type": "proc",
"source": "proc",
"options": []
},
"dev": {
"type": "tmpfs",
"source": "tmpfs",
"options": ["nosuid","strictatime","mode=755","size=65536k"]
},
"devpts": {
"type": "devpts",
"source": "devpts",
"options": ["nosuid","noexec","newinstance","ptmxmode=0666","mode=0620","gid=5"]
},
"data": {
"type": "bind",
"source": "/volumes/testing",
"options": ["rbind","rw"]
}
}
```
*Example (Windows)*
```json
"mounts": {
"myfancymountpoint": {
"type": "ntfs",
"source": "\\\\?\\Volume\\{2eca078d-5cbc-43d3-aff8-7e8511f60d0e}\\",
"options": []
}
}
```
See links for details about [mountvol](http://ss64.com/nt/mountvol.html) and [SetVolumeMountPoint](https://msdn.microsoft.com/en-us/library/windows/desktop/aa365561(v=vs.85).aspx) in Windows.

View file

@ -1,8 +0,0 @@
# Linux Runtime
## File descriptors
By default, only the `stdin`, `stdout` and `stderr` file descriptors are kept open for the application by the runtime.
The runtime may pass additional file descriptors to the application to support features such as [socket activation](http://0pointer.de/blog/projects/socket-activated-containers.html).
Some of the file descriptors may be redirected to `/dev/null` even though they are open.

View file

@ -1,56 +0,0 @@
# Runtime and Lifecycle
## State
Runtime MUST store container metadata on disk so that external tools can consume and act on this information.
It is recommended that this data be stored in a temporary filesystem so that it can be removed on a system reboot.
On Linux/Unix based systems the metadata MUST be stored under `/run/opencontainer/containers`.
For non-Linux/Unix based systems the location of the root metadata directory is currently undefined.
Within that directory there MUST be one directory for each container created, where the name of the directory MUST be the ID of the container.
For example: for a Linux container with an ID of `173975398351`, there will be a corresponding directory: `/run/opencontainer/containers/173975398351`.
Within each container's directory, there MUST be a JSON encoded file called `state.json` that contains the runtime state of the container.
For example: `/run/opencontainer/containers/173975398351/state.json`.
The `state.json` file MUST contain all of the following properties:
* **`version`**: (string) is the OCF specification version used when creating the container.
* **`id`**: (string) is the container's ID.
This MUST be unique across all containers on this host.
There is no requirement that it be unique across hosts.
The ID is provided in the state because hooks will be executed with the state as the payload.
This allows the hooks to perform cleanup and teardown logic after the runtime destroys its own state.
* **`pid`**: (int) is the ID of the main process within the container, as seen by the host.
* **`bundlePath`**: (string) is the absolute path to the container's bundle directory.
This is provided so that consumers can find the container's configuration and root filesystem on the host.
*Example*
```json
{
"id": "oc-container",
"pid": 4422,
"root": "/containers/redis"
}
```
## Lifecycle
### Create
Creates the container: file system, namespaces, cgroups, capabilities.
### Start (process)
Runs a process in a container.
Can be invoked several times.
### Stop (process)
Not sure we need that from runc cli.
Process is killed from the outside.
This event needs to be captured by runc to run onstop event handlers.
## Hooks
See [runtime configuration for hooks](./runtime-config.md)

View file

@ -1,42 +0,0 @@
package specs
// RuntimeSpec contains host-specific configuration information for
// a container. This information must not be included when the bundle
// is packaged for distribution.
type RuntimeSpec struct {
// Mounts is a mapping of names to mount configurations.
// Which mounts will be mounted and where should be chosen with MountPoints
// in Spec.
Mounts map[string]Mount `json:"mounts"`
// Hooks are the commands run at various lifecycle events of the container.
Hooks Hooks `json:"hooks"`
}
// Hook specifies a command that is run at a particular event in the lifecycle of a container
type Hook struct {
Path string `json:"path"`
Args []string `json:"args"`
Env []string `json:"env"`
}
// Hooks for container setup and teardown
type Hooks struct {
// Prestart is a list of hooks to be run before the container process is executed.
// On Linux, they are run after the container namespaces are created.
Prestart []Hook `json:"prestart"`
// Poststart is a list of hooks to be run after the container process is started.
Poststart []Hook `json:"poststart"`
// Poststop is a list of hooks to be run after the container process exits.
Poststop []Hook `json:"poststop"`
}
// Mount specifies a mount for a container
type Mount struct {
// Type specifies the mount kind.
Type string `json:"type"`
// Source specifies the source path of the mount. In the case of bind mounts on
// linux based systems this would be the file on the host.
Source string `json:"source"`
// Options are fstab style mount options.
Options []string `json:"options"`
}

View file

@ -1,301 +0,0 @@
package specs
import "os"
// LinuxStateDirectory holds the container's state information
const LinuxStateDirectory = "/run/opencontainer/containers"
// LinuxRuntimeSpec is the full specification for linux containers.
type LinuxRuntimeSpec struct {
RuntimeSpec
// LinuxRuntime is platform specific configuration for linux based containers.
Linux LinuxRuntime `json:"linux"`
}
// LinuxRuntime hosts the Linux-only runtime information
type LinuxRuntime struct {
// UIDMapping specifies user mappings for supporting user namespaces on linux.
UIDMappings []IDMapping `json:"uidMappings"`
// GIDMapping specifies group mappings for supporting user namespaces on linux.
GIDMappings []IDMapping `json:"gidMappings"`
// Rlimits specifies rlimit options to apply to the container's process.
Rlimits []Rlimit `json:"rlimits"`
// Sysctl are a set of key value pairs that are set for the container on start
Sysctl map[string]string `json:"sysctl"`
// Resources contain cgroup information for handling resource constraints
// for the container
Resources *Resources `json:"resources"`
// CgroupsPath specifies the path to cgroups that are created and/or joined by the container.
// The path is expected to be relative to the cgroups mountpoint.
// If resources are specified, the cgroups at CgroupsPath will be updated based on resources.
CgroupsPath string `json:"cgroupsPath"`
// Namespaces contains the namespaces that are created and/or joined by the container
Namespaces []Namespace `json:"namespaces"`
// Devices are a list of device nodes that are created and enabled for the container
Devices []Device `json:"devices"`
// ApparmorProfile specified the apparmor profile for the container.
ApparmorProfile string `json:"apparmorProfile"`
// SelinuxProcessLabel specifies the selinux context that the container process is run as.
SelinuxProcessLabel string `json:"selinuxProcessLabel"`
// Seccomp specifies the seccomp security settings for the container.
Seccomp Seccomp `json:"seccomp"`
// RootfsPropagation is the rootfs mount propagation mode for the container
RootfsPropagation string `json:"rootfsPropagation"`
}
// Namespace is the configuration for a linux namespace
type Namespace struct {
// Type is the type of Linux namespace
Type NamespaceType `json:"type"`
// Path is a path to an existing namespace persisted on disk that can be joined
// and is of the same type
Path string `json:"path"`
}
// NamespaceType is one of the linux namespaces
type NamespaceType string
const (
// PIDNamespace for isolating process IDs
PIDNamespace NamespaceType = "pid"
// NetworkNamespace for isolating network devices, stacks, ports, etc
NetworkNamespace = "network"
// MountNamespace for isolating mount points
MountNamespace = "mount"
// IPCNamespace for isolating System V IPC, POSIX message queues
IPCNamespace = "ipc"
// UTSNamespace for isolating hostname and NIS domain name
UTSNamespace = "uts"
// UserNamespace for isolating user and group IDs
UserNamespace = "user"
)
// IDMapping specifies UID/GID mappings
type IDMapping struct {
// HostID is the UID/GID of the host user or group
HostID uint32 `json:"hostID"`
// ContainerID is the UID/GID of the container's user or group
ContainerID uint32 `json:"containerID"`
// Size is the length of the range of IDs mapped between the two namespaces
Size uint32 `json:"size"`
}
// Rlimit type and restrictions
type Rlimit struct {
// Type of the rlimit to set
Type string `json:"type"`
// Hard is the hard limit for the specified type
Hard uint64 `json:"hard"`
// Soft is the soft limit for the specified type
Soft uint64 `json:"soft"`
}
// HugepageLimit structure corresponds to limiting kernel hugepages
type HugepageLimit struct {
// Pagesize is the hugepage size
Pagesize string `json:"pageSize"`
// Limit is the limit of "hugepagesize" hugetlb usage
Limit uint64 `json:"limit"`
}
// InterfacePriority for network interfaces
type InterfacePriority struct {
// Name is the name of the network interface
Name string `json:"name"`
// Priority for the interface
Priority uint32 `json:"priority"`
}
// blockIODevice holds major:minor format supported in blkio cgroup
type blockIODevice struct {
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
}
// WeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice
type WeightDevice struct {
blockIODevice
// Weight is the bandwidth rate for the device, range is from 10 to 1000
Weight uint16 `json:"weight"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
LeafWeight uint16 `json:"leafWeight"`
}
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
type ThrottleDevice struct {
blockIODevice
// Rate is the IO rate limit per cgroup per device
Rate uint64 `json:"rate"`
}
// BlockIO for Linux cgroup 'blkio' resource management
type BlockIO struct {
// Specifies per cgroup weight, range is from 10 to 1000
Weight uint16 `json:"blkioWeight"`
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
LeafWeight uint16 `json:"blkioLeafWeight"`
// Weight per cgroup per device, can override BlkioWeight
WeightDevice []*WeightDevice `json:"blkioWeightDevice"`
// IO read rate limit per cgroup per device, bytes per second
ThrottleReadBpsDevice []*ThrottleDevice `json:"blkioThrottleReadBpsDevice"`
// IO write rate limit per cgroup per device, bytes per second
ThrottleWriteBpsDevice []*ThrottleDevice `json:"blkioThrottleWriteBpsDevice"`
// IO read rate limit per cgroup per device, IO per second
ThrottleReadIOPSDevice []*ThrottleDevice `json:"blkioThrottleReadIOPSDevice"`
// IO write rate limit per cgroup per device, IO per second
ThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkioThrottleWriteIOPSDevice"`
}
// Memory for Linux cgroup 'memory' resource management
type Memory struct {
// Memory limit (in bytes)
Limit uint64 `json:"limit"`
// Memory reservation or soft_limit (in bytes)
Reservation uint64 `json:"reservation"`
// Total memory usage (memory + swap); set `-1' to disable swap
Swap uint64 `json:"swap"`
// Kernel memory limit (in bytes)
Kernel uint64 `json:"kernel"`
// How aggressive the kernel will swap memory pages. Range from 0 to 100. Set -1 to use system default
Swappiness uint64 `json:"swappiness"`
}
// CPU for Linux cgroup 'cpu' resource management
type CPU struct {
// CPU shares (relative weight vs. other cgroups with cpu shares)
Shares uint64 `json:"shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period
Quota uint64 `json:"quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default
Period uint64 `json:"period"`
// How many time CPU will use in realtime scheduling (in usecs)
RealtimeRuntime uint64 `json:"realtimeRuntime"`
// CPU period to be used for realtime scheduling (in usecs)
RealtimePeriod uint64 `json:"realtimePeriod"`
// CPU to use within the cpuset
Cpus string `json:"cpus"`
// MEM to use within the cpuset
Mems string `json:"mems"`
}
// Pids for Linux cgroup 'pids' resource management (Linux 4.3)
type Pids struct {
// Maximum number of PIDs. A value <= 0 indicates "no limit".
Limit int64 `json:"limit"`
}
// Network identification and priority configuration
type Network struct {
// Set class identifier for container's network packets
// this is actually a string instead of a uint64 to overcome the json
// limitation of specifying hex numbers
ClassID string `json:"classID"`
// Set priority of network traffic for container
Priorities []InterfacePriority `json:"priorities"`
}
// Resources has container runtime resource constraints
type Resources struct {
// DisableOOMKiller disables the OOM killer for out of memory conditions
DisableOOMKiller bool `json:"disableOOMKiller"`
// Specify an oom_score_adj for the container. Optional.
OOMScoreAdj int `json:"oomScoreAdj"`
// Memory restriction configuration
Memory Memory `json:"memory"`
// CPU resource restriction configuration
CPU CPU `json:"cpu"`
// Task resource restriction configuration.
Pids Pids `json:"pids"`
// BlockIO restriction configuration
BlockIO BlockIO `json:"blockIO"`
// Hugetlb limit (in bytes)
HugepageLimits []HugepageLimit `json:"hugepageLimits"`
// Network restriction configuration
Network Network `json:"network"`
}
// Device represents the information on a Linux special device file
type Device struct {
// Path to the device.
Path string `json:"path"`
// Device type, block, char, etc.
Type rune `json:"type"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Cgroup permissions format, rwm.
Permissions string `json:"permissions"`
// FileMode permission bits for the device.
FileMode os.FileMode `json:"fileMode"`
// UID of the device.
UID uint32 `json:"uid"`
// Gid of the device.
GID uint32 `json:"gid"`
}
// Seccomp represents syscall restrictions
type Seccomp struct {
DefaultAction Action `json:"defaultAction"`
Architectures []Arch `json:"architectures"`
Syscalls []*Syscall `json:"syscalls"`
}
// Additional architectures permitted to be used for system calls
// By default only the native architecture of the kernel is permitted
type Arch string
const (
ArchX86 Arch = "SCMP_ARCH_X86"
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
ArchX32 Arch = "SCMP_ARCH_X32"
ArchARM Arch = "SCMP_ARCH_ARM"
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
ArchMIPS Arch = "SCMP_ARCH_MIPS"
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
)
// Action taken upon Seccomp rule match
type Action string
const (
ActKill Action = "SCMP_ACT_KILL"
ActTrap Action = "SCMP_ACT_TRAP"
ActErrno Action = "SCMP_ACT_ERRNO"
ActTrace Action = "SCMP_ACT_TRACE"
ActAllow Action = "SCMP_ACT_ALLOW"
)
// Operator used to match syscall arguments in Seccomp
type Operator string
const (
OpNotEqual Operator = "SCMP_CMP_NE"
OpLessThan Operator = "SCMP_CMP_LT"
OpLessEqual Operator = "SCMP_CMP_LE"
OpEqualTo Operator = "SCMP_CMP_EQ"
OpGreaterEqual Operator = "SCMP_CMP_GE"
OpGreaterThan Operator = "SCMP_CMP_GT"
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
)
// Arg used for matching specific syscall arguments in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
ValueTwo uint64 `json:"valueTwo"`
Op Operator `json:"op"`
}
// Syscall is used to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
}

View file

@ -1,16 +0,0 @@
package specs
// State holds information about the runtime state of the container.
// This information will be stored in a file called `state.json`.
// The location of this file will be operating system specific. On Linux
// it will be in `/run/opencontainers/runc/<containerID>/state.json`
type State struct {
// Version is the version of the specification that is supported.
Version string `json:"version"`
// ID is the container ID
ID string `json:"id"`
// Pid is the process id for the container's main process.
Pid int `json:"pid"`
// BundlePath is the path to the container's bundle directory.
BundlePath string `json:"bundlePath"`
}

View file

@ -1,15 +0,0 @@
package specs
import "fmt"
const (
// VersionMajor is for an API incompatible changes
VersionMajor = 0
// VersionMinor is for functionality in a backwards-compatible manner
VersionMinor = 2
// VersionPatch is for backwards-compatible bug fixes
VersionPatch = 0
)
// Version is the specification version that the package types support.
var Version = fmt.Sprintf("%d.%d.%d", VersionMajor, VersionMinor, VersionPatch)