1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Vendor containerd, specs, runc

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
Tonis Tiigi 2016-03-18 11:43:13 -07:00
parent 14299daaca
commit cc83031ade
37 changed files with 3386 additions and 546 deletions

View file

@ -59,7 +59,8 @@ clone git github.com/miekg/pkcs11 df8ae6ca730422dba20c768ff38ef7d79077a59f
clone git github.com/docker/go v1.5.1-1-1-gbaf439e clone git github.com/docker/go v1.5.1-1-1-gbaf439e
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
clone git github.com/opencontainers/runc 2c3115481ee1782ad687a9e0b4834f89533c2acf # libcontainer clone git github.com/opencontainers/runc 7b6c4c418d5090f4f11eee949fdf49afd15838c9 # libcontainer
clone git github.com/opencontainers/specs 3ce138b1934bf227a418e241ead496c383eaba1c # specs
clone git github.com/seccomp/libseccomp-golang 1b506fc7c24eec5a3693cdcbed40d9c226cfc6a1 clone git github.com/seccomp/libseccomp-golang 1b506fc7c24eec5a3693cdcbed40d9c226cfc6a1
# libcontainer deps (see src/github.com/opencontainers/runc/Godeps/Godeps.json) # libcontainer deps (see src/github.com/opencontainers/runc/Godeps/Godeps.json)
clone git github.com/coreos/go-systemd v4 clone git github.com/coreos/go-systemd v4
@ -87,4 +88,6 @@ clone git golang.org/x/oauth2 2baa8a1b9338cf13d9eeb27696d761155fa480be https://g
clone git google.golang.org/api dc6d2353af16e2a2b0ff6986af051d473a4ed468 https://code.googlesource.com/google-api-go-client clone git google.golang.org/api dc6d2353af16e2a2b0ff6986af051d473a4ed468 https://code.googlesource.com/google-api-go-client
clone git google.golang.org/cloud dae7e3d993bc3812a2185af60552bb6b847e52a0 https://code.googlesource.com/gocloud clone git google.golang.org/cloud dae7e3d993bc3812a2185af60552bb6b847e52a0 https://code.googlesource.com/gocloud
# containerd
clone git github.com/docker/containerd ab5eae56bf3a800e062c6d63fb94f766a732813f
clean clean

View file

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
https://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2013-2015 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,425 @@
Attribution-ShareAlike 4.0 International
=======================================================================
Creative Commons Corporation ("Creative Commons") is not a law firm and
does not provide legal services or legal advice. Distribution of
Creative Commons public licenses does not create a lawyer-client or
other relationship. Creative Commons makes its licenses and related
information available on an "as-is" basis. Creative Commons gives no
warranties regarding its licenses, any material licensed under their
terms and conditions, or any related information. Creative Commons
disclaims all liability for damages resulting from their use to the
fullest extent possible.
Using Creative Commons Public Licenses
Creative Commons public licenses provide a standard set of terms and
conditions that creators and other rights holders may use to share
original works of authorship and other material subject to copyright
and certain other rights specified in the public license below. The
following considerations are for informational purposes only, are not
exhaustive, and do not form part of our licenses.
Considerations for licensors: Our public licenses are
intended for use by those authorized to give the public
permission to use material in ways otherwise restricted by
copyright and certain other rights. Our licenses are
irrevocable. Licensors should read and understand the terms
and conditions of the license they choose before applying it.
Licensors should also secure all rights necessary before
applying our licenses so that the public can reuse the
material as expected. Licensors should clearly mark any
material not subject to the license. This includes other CC-
licensed material, or material used under an exception or
limitation to copyright. More considerations for licensors:
wiki.creativecommons.org/Considerations_for_licensors
Considerations for the public: By using one of our public
licenses, a licensor grants the public permission to use the
licensed material under specified terms and conditions. If
the licensor's permission is not necessary for any reason--for
example, because of any applicable exception or limitation to
copyright--then that use is not regulated by the license. Our
licenses grant only permissions under copyright and certain
other rights that a licensor has authority to grant. Use of
the licensed material may still be restricted for other
reasons, including because others have copyright or other
rights in the material. A licensor may make special requests,
such as asking that all changes be marked or described.
Although not required by our licenses, you are encouraged to
respect those requests where reasonable. More_considerations
for the public:
wiki.creativecommons.org/Considerations_for_licensees
=======================================================================
Creative Commons Attribution-ShareAlike 4.0 International Public
License
By exercising the Licensed Rights (defined below), You accept and agree
to be bound by the terms and conditions of this Creative Commons
Attribution-ShareAlike 4.0 International Public License ("Public
License"). To the extent this Public License may be interpreted as a
contract, You are granted the Licensed Rights in consideration of Your
acceptance of these terms and conditions, and the Licensor grants You
such rights in consideration of benefits the Licensor receives from
making the Licensed Material available under these terms and
conditions.
Section 1 -- Definitions.
a. Adapted Material means material subject to Copyright and Similar
Rights that is derived from or based upon the Licensed Material
and in which the Licensed Material is translated, altered,
arranged, transformed, or otherwise modified in a manner requiring
permission under the Copyright and Similar Rights held by the
Licensor. For purposes of this Public License, where the Licensed
Material is a musical work, performance, or sound recording,
Adapted Material is always produced where the Licensed Material is
synched in timed relation with a moving image.
b. Adapter's License means the license You apply to Your Copyright
and Similar Rights in Your contributions to Adapted Material in
accordance with the terms and conditions of this Public License.
c. BY-SA Compatible License means a license listed at
creativecommons.org/compatiblelicenses, approved by Creative
Commons as essentially the equivalent of this Public License.
d. Copyright and Similar Rights means copyright and/or similar rights
closely related to copyright including, without limitation,
performance, broadcast, sound recording, and Sui Generis Database
Rights, without regard to how the rights are labeled or
categorized. For purposes of this Public License, the rights
specified in Section 2(b)(1)-(2) are not Copyright and Similar
Rights.
e. Effective Technological Measures means those measures that, in the
absence of proper authority, may not be circumvented under laws
fulfilling obligations under Article 11 of the WIPO Copyright
Treaty adopted on December 20, 1996, and/or similar international
agreements.
f. Exceptions and Limitations means fair use, fair dealing, and/or
any other exception or limitation to Copyright and Similar Rights
that applies to Your use of the Licensed Material.
g. License Elements means the license attributes listed in the name
of a Creative Commons Public License. The License Elements of this
Public License are Attribution and ShareAlike.
h. Licensed Material means the artistic or literary work, database,
or other material to which the Licensor applied this Public
License.
i. Licensed Rights means the rights granted to You subject to the
terms and conditions of this Public License, which are limited to
all Copyright and Similar Rights that apply to Your use of the
Licensed Material and that the Licensor has authority to license.
j. Licensor means the individual(s) or entity(ies) granting rights
under this Public License.
k. Share means to provide material to the public by any means or
process that requires permission under the Licensed Rights, such
as reproduction, public display, public performance, distribution,
dissemination, communication, or importation, and to make material
available to the public including in ways that members of the
public may access the material from a place and at a time
individually chosen by them.
l. Sui Generis Database Rights means rights other than copyright
resulting from Directive 96/9/EC of the European Parliament and of
the Council of 11 March 1996 on the legal protection of databases,
as amended and/or succeeded, as well as other essentially
equivalent rights anywhere in the world.
m. You means the individual or entity exercising the Licensed Rights
under this Public License. Your has a corresponding meaning.
Section 2 -- Scope.
a. License grant.
1. Subject to the terms and conditions of this Public License,
the Licensor hereby grants You a worldwide, royalty-free,
non-sublicensable, non-exclusive, irrevocable license to
exercise the Licensed Rights in the Licensed Material to:
a. reproduce and Share the Licensed Material, in whole or
in part; and
b. produce, reproduce, and Share Adapted Material.
2. Exceptions and Limitations. For the avoidance of doubt, where
Exceptions and Limitations apply to Your use, this Public
License does not apply, and You do not need to comply with
its terms and conditions.
3. Term. The term of this Public License is specified in Section
6(a).
4. Media and formats; technical modifications allowed. The
Licensor authorizes You to exercise the Licensed Rights in
all media and formats whether now known or hereafter created,
and to make technical modifications necessary to do so. The
Licensor waives and/or agrees not to assert any right or
authority to forbid You from making technical modifications
necessary to exercise the Licensed Rights, including
technical modifications necessary to circumvent Effective
Technological Measures. For purposes of this Public License,
simply making modifications authorized by this Section 2(a)
(4) never produces Adapted Material.
5. Downstream recipients.
a. Offer from the Licensor -- Licensed Material. Every
recipient of the Licensed Material automatically
receives an offer from the Licensor to exercise the
Licensed Rights under the terms and conditions of this
Public License.
b. Additional offer from the Licensor -- Adapted Material.
Every recipient of Adapted Material from You
automatically receives an offer from the Licensor to
exercise the Licensed Rights in the Adapted Material
under the conditions of the Adapter's License You apply.
c. No downstream restrictions. You may not offer or impose
any additional or different terms or conditions on, or
apply any Effective Technological Measures to, the
Licensed Material if doing so restricts exercise of the
Licensed Rights by any recipient of the Licensed
Material.
6. No endorsement. Nothing in this Public License constitutes or
may be construed as permission to assert or imply that You
are, or that Your use of the Licensed Material is, connected
with, or sponsored, endorsed, or granted official status by,
the Licensor or others designated to receive attribution as
provided in Section 3(a)(1)(A)(i).
b. Other rights.
1. Moral rights, such as the right of integrity, are not
licensed under this Public License, nor are publicity,
privacy, and/or other similar personality rights; however, to
the extent possible, the Licensor waives and/or agrees not to
assert any such rights held by the Licensor to the limited
extent necessary to allow You to exercise the Licensed
Rights, but not otherwise.
2. Patent and trademark rights are not licensed under this
Public License.
3. To the extent possible, the Licensor waives any right to
collect royalties from You for the exercise of the Licensed
Rights, whether directly or through a collecting society
under any voluntary or waivable statutory or compulsory
licensing scheme. In all other cases the Licensor expressly
reserves any right to collect such royalties.
Section 3 -- License Conditions.
Your exercise of the Licensed Rights is expressly made subject to the
following conditions.
a. Attribution.
1. If You Share the Licensed Material (including in modified
form), You must:
a. retain the following if it is supplied by the Licensor
with the Licensed Material:
i. identification of the creator(s) of the Licensed
Material and any others designated to receive
attribution, in any reasonable manner requested by
the Licensor (including by pseudonym if
designated);
ii. a copyright notice;
iii. a notice that refers to this Public License;
iv. a notice that refers to the disclaimer of
warranties;
v. a URI or hyperlink to the Licensed Material to the
extent reasonably practicable;
b. indicate if You modified the Licensed Material and
retain an indication of any previous modifications; and
c. indicate the Licensed Material is licensed under this
Public License, and include the text of, or the URI or
hyperlink to, this Public License.
2. You may satisfy the conditions in Section 3(a)(1) in any
reasonable manner based on the medium, means, and context in
which You Share the Licensed Material. For example, it may be
reasonable to satisfy the conditions by providing a URI or
hyperlink to a resource that includes the required
information.
3. If requested by the Licensor, You must remove any of the
information required by Section 3(a)(1)(A) to the extent
reasonably practicable.
b. ShareAlike.
In addition to the conditions in Section 3(a), if You Share
Adapted Material You produce, the following conditions also apply.
1. The Adapter's License You apply must be a Creative Commons
license with the same License Elements, this version or
later, or a BY-SA Compatible License.
2. You must include the text of, or the URI or hyperlink to, the
Adapter's License You apply. You may satisfy this condition
in any reasonable manner based on the medium, means, and
context in which You Share Adapted Material.
3. You may not offer or impose any additional or different terms
or conditions on, or apply any Effective Technological
Measures to, Adapted Material that restrict exercise of the
rights granted under the Adapter's License You apply.
Section 4 -- Sui Generis Database Rights.
Where the Licensed Rights include Sui Generis Database Rights that
apply to Your use of the Licensed Material:
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
to extract, reuse, reproduce, and Share all or a substantial
portion of the contents of the database;
b. if You include all or a substantial portion of the database
contents in a database in which You have Sui Generis Database
Rights, then the database in which You have Sui Generis Database
Rights (but not its individual contents) is Adapted Material,
including for purposes of Section 3(b); and
c. You must comply with the conditions in Section 3(a) if You Share
all or a substantial portion of the contents of the database.
For the avoidance of doubt, this Section 4 supplements and does not
replace Your obligations under this Public License where the Licensed
Rights include other Copyright and Similar Rights.
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
c. The disclaimer of warranties and limitation of liability provided
above shall be interpreted in a manner that, to the extent
possible, most closely approximates an absolute disclaimer and
waiver of all liability.
Section 6 -- Term and Termination.
a. This Public License applies for the term of the Copyright and
Similar Rights licensed here. However, if You fail to comply with
this Public License, then Your rights under this Public License
terminate automatically.
b. Where Your right to use the Licensed Material has terminated under
Section 6(a), it reinstates:
1. automatically as of the date the violation is cured, provided
it is cured within 30 days of Your discovery of the
violation; or
2. upon express reinstatement by the Licensor.
For the avoidance of doubt, this Section 6(b) does not affect any
right the Licensor may have to seek remedies for Your violations
of this Public License.
c. For the avoidance of doubt, the Licensor may also offer the
Licensed Material under separate terms or conditions or stop
distributing the Licensed Material at any time; however, doing so
will not terminate this Public License.
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
License.
Section 7 -- Other Terms and Conditions.
a. The Licensor shall not be bound by any additional or different
terms or conditions communicated by You unless expressly agreed.
b. Any arrangements, understandings, or agreements regarding the
Licensed Material not stated herein are separate from and
independent of the terms and conditions of this Public License.
Section 8 -- Interpretation.
a. For the avoidance of doubt, this Public License does not, and
shall not be interpreted to, reduce, limit, restrict, or impose
conditions on any use of the Licensed Material that could lawfully
be made without permission under this Public License.
b. To the extent possible, if any provision of this Public License is
deemed unenforceable, it shall be automatically reformed to the
minimum extent necessary to make it enforceable. If the provision
cannot be reformed, it shall be severed from this Public License
without affecting the enforceability of the remaining terms and
conditions.
c. No term or condition of this Public License will be waived and no
failure to comply consented to unless expressly agreed to by the
Licensor.
d. Nothing in this Public License constitutes or may be interpreted
as a limitation upon, or waiver of, any privileges and immunities
that apply to the Licensor or You, including from the legal
processes of any jurisdiction or authority.
=======================================================================
Creative Commons is not a party to its public licenses.
Notwithstanding, Creative Commons may elect to apply one of its public
licenses to material it publishes and in those instances will be
considered the "Licensor." Except for the limited purpose of indicating
that material is shared under a Creative Commons public license or as
otherwise permitted by the Creative Commons policies published at
creativecommons.org/policies, Creative Commons does not authorize the
use of the trademark "Creative Commons" or any other trademark or logo
of Creative Commons without its prior written consent including,
without limitation, in connection with any unauthorized modifications
to any of its public licenses or any other arrangements,
understandings, or agreements concerning use of licensed material. For
the avoidance of doubt, this paragraph does not form part of the public
licenses.
Creative Commons may be contacted at creativecommons.org.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,284 @@
syntax = "proto3";
package types;
service API {
rpc CreateContainer(CreateContainerRequest) returns (CreateContainerResponse) {}
rpc UpdateContainer(UpdateContainerRequest) returns (UpdateContainerResponse) {}
rpc Signal(SignalRequest) returns (SignalResponse) {}
rpc UpdateProcess(UpdateProcessRequest) returns (UpdateProcessResponse) {}
rpc AddProcess(AddProcessRequest) returns (AddProcessResponse) {}
rpc CreateCheckpoint(CreateCheckpointRequest) returns (CreateCheckpointResponse) {}
rpc DeleteCheckpoint(DeleteCheckpointRequest) returns (DeleteCheckpointResponse) {}
rpc ListCheckpoint(ListCheckpointRequest) returns (ListCheckpointResponse) {}
rpc State(StateRequest) returns (StateResponse) {}
rpc Events(EventsRequest) returns (stream Event) {}
rpc Stats(StatsRequest) returns (StatsResponse) {}
}
message UpdateProcessRequest {
string id = 1;
string pid = 2;
bool closeStdin = 3; // Close stdin of the container
uint32 width = 4;
uint32 height = 5;
}
message UpdateProcessResponse {
}
message CreateContainerRequest {
string id = 1; // ID of container
string bundlePath = 2; // path to OCI bundle
string checkpoint = 3; // checkpoint name if you want to create immediate checkpoint (optional)
string stdin = 4; // path to the file where stdin will be read (optional)
string stdout = 5; // path to file where stdout will be written (optional)
string stderr = 6; // path to file where stderr will be written (optional)
repeated string labels = 7;
}
message CreateContainerResponse {
Container container = 1;
}
message SignalRequest {
string id = 1; // ID of container
string pid = 2; // PID of process inside container
uint32 signal = 3; // Signal which will be sent, you can find value in "man 7 signal"
}
message SignalResponse {
}
message AddProcessRequest {
string id = 1; // ID of container
bool terminal = 2; // Use tty for container stdio
User user = 3; // User under which process will be run
repeated string args = 4; // Arguments for process, first is binary path itself
repeated string env = 5; // List of environment variables for process
string cwd = 6; // Workind directory of process
string pid = 7; // Process ID
string stdin = 8; // path to the file where stdin will be read (optional)
string stdout = 9; // path to file where stdout will be written (optional)
string stderr = 10; // path to file where stderr will be written (optional)
repeated string capabilities = 11;
string apparmorProfile = 12;
string selinuxLabel = 13;
bool noNewPrivileges = 14;
}
message User {
uint32 uid = 1; // UID of user
uint32 gid = 2; // GID of user
repeated uint32 additionalGids = 3; // Additional groups to which user will be added
}
message AddProcessResponse {
}
message CreateCheckpointRequest {
string id = 1; // ID of container
Checkpoint checkpoint = 2; // Checkpoint configuration
}
message CreateCheckpointResponse {
}
message DeleteCheckpointRequest {
string id = 1; // ID of container
string name = 2; // Name of checkpoint
}
message DeleteCheckpointResponse {
}
message ListCheckpointRequest {
string id = 1; // ID of container
}
message Checkpoint {
string name = 1; // Name of checkpoint
bool exit = 2; // checkpoint configuration: should container exit on checkpoint or not
bool tcp = 3; // allow open tcp connections
bool unixSockets = 4; // allow external unix sockets
bool shell = 5; // allow shell-jobs
}
message ListCheckpointResponse {
repeated Checkpoint checkpoints = 1; // List of checkpoints
}
message StateRequest {
string id = 1; // container id for a single container
}
message ContainerState {
string status = 1;
}
message Process {
string pid = 1;
bool terminal = 2; // Use tty for container stdio
User user = 3; // User under which process will be run
repeated string args = 4; // Arguments for process, first is binary path itself
repeated string env = 5; // List of environment variables for process
string cwd = 6; // Workind directory of process
uint32 systemPid = 7;
string stdin = 8; // path to the file where stdin will be read (optional)
string stdout = 9; // path to file where stdout will be written (optional)
string stderr = 10; // path to file where stderr will be written (optional)
repeated string capabilities = 11;
string apparmorProfile = 12;
string selinuxLabel = 13;
bool noNewPrivileges = 14;
}
message Container {
string id = 1; // ID of container
string bundlePath = 2; // Path to OCI bundle
repeated Process processes = 3; // List of processes which run in container
string status = 4; // Container status ("running", "paused", etc.)
repeated string labels = 5;
repeated uint32 pids = 6;
string runtime = 7; // runtime used to execute the container
}
// Machine is information about machine on which containerd is run
message Machine {
uint32 cpus = 1; // number of cpus
uint64 memory = 2; // amount of memory
}
// StateResponse is information about containerd daemon
message StateResponse {
repeated Container containers = 1;
Machine machine = 2;
}
message UpdateContainerRequest {
string id = 1; // ID of container
string pid = 2;
string status = 3; // Status to whcih containerd will try to change
UpdateResource resources =4;
}
message UpdateResource {
uint32 blkioWeight =1;
uint32 cpuShares = 2;
uint32 cpuPeriod = 3;
uint32 cpuQuota = 4;
string cpusetCpus = 5;
string cpusetMems = 6;
uint32 memoryLimit = 7;
uint32 memorySwap = 8;
uint32 memoryReservation = 9;
uint32 kernelMemoryLimit = 10;
}
message UpdateContainerResponse {
}
message EventsRequest {
uint64 timestamp = 1;
}
message Event {
string type = 1;
string id = 2;
uint32 status = 3;
string pid = 4;
uint64 timestamp = 5;
}
message NetworkStats {
string name = 1; // name of network interface
uint64 rx_bytes = 2;
uint64 rx_Packets = 3;
uint64 Rx_errors = 4;
uint64 Rx_dropped = 5;
uint64 Tx_bytes = 6;
uint64 Tx_packets = 7;
uint64 Tx_errors = 8;
uint64 Tx_dropped = 9;
}
message CpuUsage {
uint64 total_usage = 1;
repeated uint64 percpu_usage = 2;
uint64 usage_in_kernelmode = 3;
uint64 usage_in_usermode = 4;
}
message ThrottlingData {
uint64 periods = 1;
uint64 throttled_periods = 2;
uint64 throttled_time = 3;
}
message CpuStats {
CpuUsage cpu_usage = 1;
ThrottlingData throttling_data = 2;
uint64 system_usage = 3;
}
message PidsStats {
uint64 current = 1;
}
message MemoryData {
uint64 usage = 1;
uint64 max_usage = 2;
uint64 failcnt = 3;
uint64 limit = 4;
}
message MemoryStats {
uint64 cache = 1;
MemoryData usage = 2;
MemoryData swap_usage = 3;
MemoryData kernel_usage = 4;
map<string, uint64> stats = 5;
}
message BlkioStatsEntry {
uint64 major = 1;
uint64 minor = 2;
string op = 3;
uint64 value = 4;
}
message BlkioStats {
repeated BlkioStatsEntry io_service_bytes_recursive = 1; // number of bytes tranferred to and from the block device
repeated BlkioStatsEntry io_serviced_recursive = 2;
repeated BlkioStatsEntry io_queued_recursive = 3;
repeated BlkioStatsEntry io_service_time_recursive = 4;
repeated BlkioStatsEntry io_wait_time_recursive = 5;
repeated BlkioStatsEntry io_merged_recursive = 6;
repeated BlkioStatsEntry io_time_recursive = 7;
repeated BlkioStatsEntry sectors_recursive = 8;
}
message HugetlbStats {
uint64 usage = 1;
uint64 max_usage = 2;
uint64 failcnt = 3;
uint64 limit = 4;
}
message CgroupStats {
CpuStats cpu_stats = 1;
MemoryStats memory_stats = 2;
BlkioStats blkio_stats = 3;
map<string, HugetlbStats> hugetlb_stats = 4; // the map is in the format "size of hugepage: stats of the hugepage"
PidsStats pids_stats = 5;
}
message StatsResponse {
repeated NetworkStats network_stats = 1;
CgroupStats cgroup_stats = 2;
uint64 timestamp = 3;
};
message StatsRequest {
string id = 1;
}

View file

@ -76,7 +76,7 @@ config := &configs.Config{
Name: "test-container", Name: "test-container",
Parent: "system", Parent: "system",
Resources: &configs.Resources{ Resources: &configs.Resources{
MemorySwappiness: -1, MemorySwappiness: nil,
AllowAllDevices: false, AllowAllDevices: false,
AllowedDevices: configs.DefaultAllowedDevices, AllowedDevices: configs.DefaultAllowedDevices,
}, },

View file

@ -130,6 +130,8 @@ func (m *Manager) Apply(pid int) (err error) {
return cgroups.EnterPid(m.Paths, pid) return cgroups.EnterPid(m.Paths, pid)
} }
m.mu.Lock()
defer m.mu.Unlock()
paths := make(map[string]string) paths := make(map[string]string)
for _, sys := range subsystems { for _, sys := range subsystems {
if err := sys.Apply(d); err != nil { if err := sys.Apply(d); err != nil {

View file

@ -86,14 +86,14 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err return err
} }
} }
if cgroup.Resources.MemorySwappiness >= 0 && cgroup.Resources.MemorySwappiness <= 100 { if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.Resources.MemorySwappiness, 10)); err != nil { return nil
} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
return err return err
} }
} else if cgroup.Resources.MemorySwappiness == -1 {
return nil
} else { } else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", cgroup.Resources.MemorySwappiness) return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
} }
return nil return nil
@ -149,7 +149,7 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
cgroup.Resources.MemorySwap > 0 || cgroup.Resources.MemorySwap > 0 ||
cgroup.Resources.KernelMemory > 0 || cgroup.Resources.KernelMemory > 0 ||
cgroup.Resources.OomKillDisable || cgroup.Resources.OomKillDisable ||
cgroup.Resources.MemorySwappiness != -1 (cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
} }
func getMemoryData(path, name string) (cgroups.MemoryData, error) { func getMemoryData(path, name string) (cgroups.MemoryData, error) {

View file

@ -46,7 +46,7 @@ type MemoryStats struct {
Usage MemoryData `json:"usage,omitempty"` Usage MemoryData `json:"usage,omitempty"`
// usage of memory + swap // usage of memory + swap
SwapUsage MemoryData `json:"swap_usage,omitempty"` SwapUsage MemoryData `json:"swap_usage,omitempty"`
// usafe of kernel memory // usage of kernel memory
KernelUsage MemoryData `json:"kernel_usage,omitempty"` KernelUsage MemoryData `json:"kernel_usage,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"` Stats map[string]uint64 `json:"stats,omitempty"`
} }
@ -80,7 +80,7 @@ type HugetlbStats struct {
Usage uint64 `json:"usage,omitempty"` Usage uint64 `json:"usage,omitempty"`
// maximum usage ever recorded. // maximum usage ever recorded.
MaxUsage uint64 `json:"max_usage,omitempty"` MaxUsage uint64 `json:"max_usage,omitempty"`
// number of times htgetlb usage allocation failure. // number of times hugetlb usage allocation failure.
Failcnt uint64 `json:"failcnt"` Failcnt uint64 `json:"failcnt"`
} }

View file

@ -111,7 +111,7 @@ type Resources struct {
OomKillDisable bool `json:"oom_kill_disable"` OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup // Tuning swappiness behaviour per cgroup
MemorySwappiness int64 `json:"memory_swappiness"` MemorySwappiness *int64 `json:"memory_swappiness"`
// Set priority of network traffic for container // Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`

View file

@ -128,11 +128,11 @@ type Config struct {
// AppArmorProfile specifies the profile to apply to the process running in the container and is // AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed // change at the time the process is execed
AppArmorProfile string `json:"apparmor_profile"` AppArmorProfile string `json:"apparmor_profile,omitempty"`
// ProcessLabel specifies the label to apply to the process running in the container. It is // ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux // commonly used by selinux
ProcessLabel string `json:"process_label"` ProcessLabel string `json:"process_label,omitempty"`
// Rlimits specifies the resource limits, such as max open files, to set in the container // Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process // If Rlimits are not set, the container will inherit rlimits from the parent process
@ -172,7 +172,7 @@ type Config struct {
Seccomp *Seccomp `json:"seccomp"` Seccomp *Seccomp `json:"seccomp"`
// NoNewPrivileges controls whether processes in the container can gain additional privileges. // NoNewPrivileges controls whether processes in the container can gain additional privileges.
NoNewPrivileges bool `json:"no_new_privileges"` NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
// Hooks are a collection of actions to perform at various container lifecycle events. // Hooks are a collection of actions to perform at various container lifecycle events.
// Hooks are not able to be marshaled to json but they are also not needed to. // Hooks are not able to be marshaled to json but they are also not needed to.
@ -180,6 +180,9 @@ type Config struct {
// Version is the version of opencontainer specification that is supported. // Version is the version of opencontainer specification that is supported.
Version string `json:"version"` Version string `json:"version"`
// Labels are user defined metadata that is stored in the config and populated on the state
Labels []string `json:"labels"`
} }
type Hooks struct { type Hooks struct {

View file

@ -2,7 +2,11 @@
package configs package configs
import "fmt" import (
"fmt"
"os"
"sync"
)
const ( const (
NEWNET NamespaceType = "NEWNET" NEWNET NamespaceType = "NEWNET"
@ -13,6 +17,51 @@ const (
NEWUSER NamespaceType = "NEWUSER" NEWUSER NamespaceType = "NEWUSER"
) )
var (
nsLock sync.Mutex
supportedNamespaces = make(map[NamespaceType]bool)
)
// nsToFile converts the namespace type to its filename
func nsToFile(ns NamespaceType) string {
switch ns {
case NEWNET:
return "net"
case NEWNS:
return "mnt"
case NEWPID:
return "pid"
case NEWIPC:
return "ipc"
case NEWUSER:
return "user"
case NEWUTS:
return "uts"
}
return ""
}
// IsNamespaceSupported returns whether a namespace is available or
// not
func IsNamespaceSupported(ns NamespaceType) bool {
nsLock.Lock()
defer nsLock.Unlock()
supported, ok := supportedNamespaces[ns]
if ok {
return supported
}
nsFile := nsToFile(ns)
// if the namespace type is unknown, just return false
if nsFile == "" {
return false
}
_, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile))
// a namespace is supported if it exists and we have permissions to read it
supported = err == nil
supportedNamespaces[ns] = supported
return supported
}
func NamespaceTypes() []NamespaceType { func NamespaceTypes() []NamespaceType {
return []NamespaceType{ return []NamespaceType{
NEWNET, NEWNET,
@ -35,26 +84,7 @@ func (n *Namespace) GetPath(pid int) string {
if n.Path != "" { if n.Path != "" {
return n.Path return n.Path
} }
return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file()) return fmt.Sprintf("/proc/%d/ns/%s", pid, nsToFile(n.Type))
}
func (n *Namespace) file() string {
file := ""
switch n.Type {
case NEWNET:
file = "net"
case NEWNS:
file = "mnt"
case NEWPID:
file = "pid"
case NEWIPC:
file = "ipc"
case NEWUSER:
file = "user"
case NEWUTS:
file = "uts"
}
return file
} }
func (n *Namespaces) Remove(t NamespaceType) bool { func (n *Namespaces) Remove(t NamespaceType) bool {
@ -87,3 +117,11 @@ func (n *Namespaces) index(t NamespaceType) int {
func (n *Namespaces) Contains(t NamespaceType) bool { func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1 return n.index(t) != -1
} }
func (n *Namespaces) PathOf(t NamespaceType) string {
i := n.index(t)
if i == -1 {
return ""
}
return (*n)[i].Path
}

View file

@ -4,6 +4,7 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
@ -35,6 +36,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.usernamespace(config); err != nil { if err := v.usernamespace(config); err != nil {
return err return err
} }
if err := v.sysctl(config); err != nil {
return err
}
return nil return nil
} }
@ -91,3 +95,44 @@ func (v *ConfigValidator) usernamespace(config *configs.Config) error {
} }
return nil return nil
} }
// sysctl validates that the specified sysctl keys are valid or not.
// /proc/sys isn't completely namespaced and depending on which namespaces
// are specified, a subset of sysctls are permitted.
func (v *ConfigValidator) sysctl(config *configs.Config) error {
validSysctlPrefixes := []string{}
validSysctlMap := make(map[string]bool)
if config.Namespaces.Contains(configs.NEWNET) {
validSysctlPrefixes = append(validSysctlPrefixes, "net.")
}
if config.Namespaces.Contains(configs.NEWIPC) {
validSysctlPrefixes = append(validSysctlPrefixes, "fs.mqueue.")
validSysctlMap = map[string]bool{
"kernel.msgmax": true,
"kernel.msgmnb": true,
"kernel.msgmni": true,
"kernel.sem": true,
"kernel.shmall": true,
"kernel.shmmax": true,
"kernel.shmmni": true,
"kernel.shm_rmid_forced": true,
}
}
for s := range config.Sysctl {
if validSysctlMap[s] {
continue
}
valid := false
for _, vp := range validSysctlPrefixes {
if strings.HasPrefix(s, vp) {
valid = true
break
}
}
if !valid {
return fmt.Errorf("sysctl %q is not permitted in the config", s)
}
}
return nil
}

View file

@ -27,9 +27,6 @@ const (
// The container exists, but all its processes are paused. // The container exists, but all its processes are paused.
Paused Paused
// The container exists, but its state is saved on disk
Checkpointed
// The container does not exist. // The container does not exist.
Destroyed Destroyed
) )
@ -44,8 +41,6 @@ func (s Status) String() string {
return "pausing" return "pausing"
case Paused: case Paused:
return "paused" return "paused"
case Checkpointed:
return "checkpointed"
case Destroyed: case Destroyed:
return "destroyed" return "destroyed"
default: default:

View file

@ -23,6 +23,7 @@ import (
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc" "github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
"github.com/syndtr/gocapability/capability"
"github.com/vishvananda/netlink/nl" "github.com/vishvananda/netlink/nl"
) )
@ -268,37 +269,40 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
} }
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) { func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
t := "_LIBCONTAINER_INITTYPE=" + string(initStandard) cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
cloneFlags := c.config.Namespaces.CloneFlags() nsMaps := make(map[configs.NamespaceType]string)
if cloneFlags&syscall.CLONE_NEWUSER != 0 { for _, ns := range c.config.Namespaces {
if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil { if ns.Path != "" {
// user mappings are not supported nsMaps[ns.Type] = ns.Path
return nil, err
}
enableSetgroups(cmd.SysProcAttr)
// Default to root user when user namespaces are enabled.
if cmd.SysProcAttr.Credential == nil {
cmd.SysProcAttr.Credential = &syscall.Credential{}
} }
} }
cmd.Env = append(cmd.Env, t) _, sharePidns := nsMaps[configs.NEWPID]
cmd.SysProcAttr.Cloneflags = cloneFlags data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, "")
if err != nil {
return nil, err
}
return &initProcess{ return &initProcess{
cmd: cmd, cmd: cmd,
childPipe: childPipe, childPipe: childPipe,
parentPipe: parentPipe, parentPipe: parentPipe,
manager: c.cgroupManager, manager: c.cgroupManager,
config: c.newInitConfig(p), config: c.newInitConfig(p),
container: c, container: c,
process: p, process: p,
bootstrapData: data,
sharePidns: sharePidns,
}, nil }, nil
} }
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) { func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns)) cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
state, err := c.currentState()
if err != nil {
return nil, newSystemError(err)
}
// for setns process, we dont have to set cloneflags as the process namespaces // for setns process, we dont have to set cloneflags as the process namespaces
// will only be set via setns syscall // will only be set via setns syscall
data, err := c.bootstrapData(0, c.initProcess.pid(), p.consolePath) data, err := c.bootstrapData(0, state.NamespacePaths, p.consolePath)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -315,7 +319,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
} }
func (c *linuxContainer) newInitConfig(process *Process) *initConfig { func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
return &initConfig{ cfg := &initConfig{
Config: c.config, Config: c.config,
Args: process.Args, Args: process.Args,
Env: process.Env, Env: process.Env,
@ -324,7 +328,21 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
Console: process.consolePath, Console: process.consolePath,
Capabilities: process.Capabilities, Capabilities: process.Capabilities,
PassedFilesCount: len(process.ExtraFiles), PassedFilesCount: len(process.ExtraFiles),
ContainerId: c.ID(),
NoNewPrivileges: c.config.NoNewPrivileges,
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
} }
if process.NoNewPrivileges != nil {
cfg.NoNewPrivileges = *process.NoNewPrivileges
}
if process.AppArmorProfile != "" {
cfg.AppArmorProfile = process.AppArmorProfile
}
if process.Label != "" {
cfg.ProcessLabel = process.Label
}
return cfg
} }
func newPipe() (parent *os.File, child *os.File, err error) { func newPipe() (parent *os.File, child *os.File, err error) {
@ -1059,6 +1077,9 @@ func (c *linuxContainer) currentState() (*State, error) {
state.NamespacePaths[ns.Type] = ns.GetPath(pid) state.NamespacePaths[ns.Type] = ns.GetPath(pid)
} }
for _, nsType := range configs.NamespaceTypes() { for _, nsType := range configs.NamespaceTypes() {
if !configs.IsNamespaceSupported(nsType) {
continue
}
if _, ok := state.NamespacePaths[nsType]; !ok { if _, ok := state.NamespacePaths[nsType]; !ok {
ns := configs.Namespace{Type: nsType} ns := configs.Namespace{Type: nsType}
state.NamespacePaths[ns.Type] = ns.GetPath(pid) state.NamespacePaths[ns.Type] = ns.GetPath(pid)
@ -1068,18 +1089,69 @@ func (c *linuxContainer) currentState() (*State, error) {
return state, nil return state, nil
} }
// bootstrapData encodes the necessary data in netlink binary format as a io.Reader. // orderNamespacePaths sorts namespace paths into a list of paths that we
// Consumer can write the data to a bootstrap program such as one that uses // can setns in order.
// nsenter package to bootstrap the container's init process correctly, i.e. with func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
// correct namespaces, uid/gid mapping etc. paths := []string{}
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath string) (io.Reader, error) { nsTypes := []configs.NamespaceType{
configs.NEWIPC,
configs.NEWUTS,
configs.NEWNET,
configs.NEWPID,
configs.NEWNS,
}
// join userns if the init process explicitly requires NEWUSER
if c.config.Namespaces.Contains(configs.NEWUSER) {
nsTypes = append(nsTypes, configs.NEWUSER)
}
for _, nsType := range nsTypes {
if p, ok := namespaces[nsType]; ok && p != "" {
// check if the requested namespace is supported
if !configs.IsNamespaceSupported(nsType) {
return nil, newSystemError(fmt.Errorf("namespace %s is not supported", nsType))
}
// only set to join this namespace if it exists
if _, err := os.Lstat(p); err != nil {
return nil, newSystemError(err)
}
// do not allow namespace path with comma as we use it to separate
// the namespace paths
if strings.ContainsRune(p, ',') {
return nil, newSystemError(fmt.Errorf("invalid path %s", p))
}
paths = append(paths, p)
}
}
return paths, nil
}
func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
data := bytes.NewBuffer(nil)
for _, im := range idMap {
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
if _, err := data.WriteString(line); err != nil {
return nil, err
}
}
return data.Bytes(), nil
}
// bootstrapData encodes the necessary data in netlink binary format
// as a io.Reader.
// Consumer can write the data to a bootstrap program
// such as one that uses nsenter package to bootstrap the container's
// init process correctly, i.e. with correct namespaces, uid/gid
// mapping etc.
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, consolePath string) (io.Reader, error) {
// create the netlink message // create the netlink message
r := nl.NewNetlinkRequest(int(InitMsg), 0) r := nl.NewNetlinkRequest(int(InitMsg), 0)
// write pid
// write cloneFlags
r.AddData(&Int32msg{ r.AddData(&Int32msg{
Type: PidAttr, Type: CloneFlagsAttr,
Value: uint32(pid), Value: uint32(cloneFlags),
}) })
// write console path // write console path
if consolePath != "" { if consolePath != "" {
r.AddData(&Bytemsg{ r.AddData(&Bytemsg{
@ -1087,5 +1159,57 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath
Value: []byte(consolePath), Value: []byte(consolePath),
}) })
} }
// write custom namespace paths
if len(nsMaps) > 0 {
nsPaths, err := c.orderNamespacePaths(nsMaps)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: NsPathsAttr,
Value: []byte(strings.Join(nsPaths, ",")),
})
}
// write namespace paths only when we are not joining an existing user ns
_, joinExistingUser := nsMaps[configs.NEWUSER]
if !joinExistingUser {
// write uid mappings
if len(c.config.UidMappings) > 0 {
b, err := encodeIDMapping(c.config.UidMappings)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: UidmapAttr,
Value: b,
})
}
// write gid mappings
if len(c.config.GidMappings) > 0 {
b, err := encodeIDMapping(c.config.UidMappings)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: GidmapAttr,
Value: b,
})
// check if we have CAP_SETGID to setgroup properly
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
r.AddData(&Boolmsg{
Type: SetgroupAttr,
Value: true,
})
}
}
}
return bytes.NewReader(r.Serialize()), nil return bytes.NewReader(r.Serialize()), nil
} }

View file

@ -1,13 +0,0 @@
// +build !go1.4
package libcontainer
import (
"fmt"
"syscall"
)
// not available before go 1.4
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error {
return fmt.Errorf("User namespace is not supported in golang < 1.4")
}

View file

@ -1,26 +0,0 @@
// +build go1.4
package libcontainer
import "syscall"
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error {
if c.config.UidMappings != nil {
sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings))
for i, um := range c.config.UidMappings {
sys.UidMappings[i].ContainerID = um.ContainerID
sys.UidMappings[i].HostID = um.HostID
sys.UidMappings[i].Size = um.Size
}
}
if c.config.GidMappings != nil {
sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings))
for i, gm := range c.config.GidMappings {
sys.GidMappings[i].ContainerID = gm.ContainerID
sys.GidMappings[i].HostID = gm.HostID
sys.GidMappings[i].Size = gm.Size
}
}
return nil
}

View file

@ -227,32 +227,40 @@ func (l *LinuxFactory) StartInitialization() (err error) {
pipe = os.NewFile(uintptr(pipefd), "pipe") pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE")) it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
) )
defer pipe.Close()
// clear the current process's environment to clean any libcontainer // clear the current process's environment to clean any libcontainer
// specific env vars. // specific env vars.
os.Clearenv() os.Clearenv()
var i initer i, err := newContainerInit(it, pipe)
defer func() { if err != nil {
// We have an error during the initialization of the container's init, l.sendError(nil, pipe, err)
// send it back to the parent process in the form of an initError. return err
// If container's init successed, syscall.Exec will not return, hence }
// this defer function will never be called. if err := i.Init(); err != nil {
if !isExecError(err) {
l.sendError(i, pipe, err)
}
return err
}
return nil
}
func (l *LinuxFactory) sendError(i initer, pipe *os.File, err error) {
// We have an error during the initialization of the container's init,
// send it back to the parent process in the form of an initError.
// If container's init successed, syscall.Exec will not return, hence
// this defer function will never be called.
if i != nil {
if _, ok := i.(*linuxStandardInit); ok { if _, ok := i.(*linuxStandardInit); ok {
// Synchronisation only necessary for standard init. // Synchronisation only necessary for standard init.
if err := utils.WriteJSON(pipe, syncT{procError}); err != nil { if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
panic(err) panic(err)
} }
} }
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
panic(err)
}
// ensure that this pipe is always closed
pipe.Close()
}()
i, err = newContainerInit(it, pipe)
if err != nil {
return err
} }
return i.Init() if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
panic(err)
}
} }
func (l *LinuxFactory) loadState(root string) (*State, error) { func (l *LinuxFactory) loadState(root string) (*State, error) {
@ -280,3 +288,8 @@ func (l *LinuxFactory) validateID(id string) error {
} }
return nil return nil
} }
func isExecError(err error) bool {
_, ok := err.(*exec.Error)
return ok
}

View file

@ -15,6 +15,8 @@ const (
procReady syncType = iota procReady syncType = iota
procError procError
procRun procRun
procHooks
procResume
) )
type syncT struct { type syncT struct {

View file

@ -48,11 +48,15 @@ type initConfig struct {
Env []string `json:"env"` Env []string `json:"env"`
Cwd string `json:"cwd"` Cwd string `json:"cwd"`
Capabilities []string `json:"capabilities"` Capabilities []string `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"` User string `json:"user"`
Config *configs.Config `json:"config"` Config *configs.Config `json:"config"`
Console string `json:"console"` Console string `json:"console"`
Networks []*network `json:"network"` Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"` PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
} }
type initer interface { type initer interface {
@ -163,20 +167,22 @@ func syncParentReady(pipe io.ReadWriter) error {
return nil return nil
} }
// joinExistingNamespaces gets all the namespace paths specified for the container and // syncParentHooks sends to the given pipe a JSON payload which indicates that
// does a setns on the namespace fd so that the current process joins the namespace. // the parent should execute pre-start hooks. It then waits for the parent to
func joinExistingNamespaces(namespaces []configs.Namespace) error { // indicate that it is cleared to resume.
for _, ns := range namespaces { func syncParentHooks(pipe io.ReadWriter) error {
if ns.Path != "" { // Tell parent.
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil {
if err != nil { return err
return err }
} // Wait for parent to give the all-clear.
err = system.Setns(f.Fd(), uintptr(ns.Syscall())) var procSync syncT
f.Close() if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err != nil { if err == io.EOF {
return err return fmt.Errorf("parent closed synchronisation channel")
} }
if procSync.Type != procResume {
return fmt.Errorf("invalid synchronisation flag from parent")
} }
} }
return nil return nil
@ -319,9 +325,10 @@ func setupRlimits(config *configs.Config) error {
return nil return nil
} }
func setOomScoreAdj(oomScoreAdj int) error { func setOomScoreAdj(oomScoreAdj int, pid int) error {
path := "/proc/self/oom_score_adj" path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0700)
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600)
} }
// killCgroupProcesses freezes then iterates over all the processes inside the // killCgroupProcesses freezes then iterates over all the processes inside the

View file

@ -12,8 +12,12 @@ import (
// The number is randomly chosen to not conflict with known netlink types // The number is randomly chosen to not conflict with known netlink types
const ( const (
InitMsg uint16 = 62000 InitMsg uint16 = 62000
PidAttr uint16 = 27281 CloneFlagsAttr uint16 = 27281
ConsolePathAttr uint16 = 27282 ConsolePathAttr uint16 = 27282
NsPathsAttr uint16 = 27283
UidmapAttr uint16 = 27284
GidmapAttr uint16 = 27285
SetgroupAttr uint16 = 27286
// When syscall.NLA_HDRLEN is in gccgo, take this out. // When syscall.NLA_HDRLEN is in gccgo, take this out.
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1) syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
) )
@ -60,3 +64,25 @@ func (msg *Bytemsg) Serialize() []byte {
func (msg *Bytemsg) Len() int { func (msg *Bytemsg) Len() int {
return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
} }
type Boolmsg struct {
Type uint16
Value bool
}
func (msg *Boolmsg) Serialize() []byte {
buf := make([]byte, msg.Len())
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(msg.Len()))
native.PutUint16(buf[2:4], msg.Type)
if msg.Value {
buf[4] = 1
} else {
buf[4] = 0
}
return buf
}
func (msg *Boolmsg) Len() int {
return syscall_NLA_HDRLEN + 1
}

View file

@ -1,25 +0,0 @@
## nsenter
The `nsenter` package registers a special init constructor that is called before
the Go runtime has a chance to boot. This provides us the ability to `setns` on
existing namespaces and avoid the issues that the Go runtime has with multiple
threads. This constructor will be called if this package is registered,
imported, in your go application.
The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/)
package. In cgo, if the import of "C" is immediately preceded by a comment, that comment,
called the preamble, is used as a header when compiling the C parts of the package.
So every time we import package `nsenter`, the C code function `nsexec()` would be
called. And package `nsenter` is now only imported in Docker execdriver, so every time
before we call `execdriver.Exec()`, that C code would run.
`nsexec()` will first check the environment variable `_LIBCONTAINER_INITPID`
which will give the process of the container that should be joined. Namespaces fd will
be found from `/proc/[pid]/ns` and set by `setns` syscall.
And then get the pipe number from `_LIBCONTAINER_INITPIPE`, error message could
be transfered through it. If tty is added, `_LIBCONTAINER_CONSOLE_PATH` will
have value and start a console for output.
Finally, `nsexec()` will clone a child process , exit the parent process and let
the Go runtime take over.

View file

@ -1,12 +0,0 @@
// +build linux,!gccgo
package nsenter
/*
#cgo CFLAGS: -Wall
extern void nsexec();
void __attribute__((constructor)) init(void) {
nsexec();
}
*/
import "C"

View file

@ -1,25 +0,0 @@
// +build linux,gccgo
package nsenter
/*
#cgo CFLAGS: -Wall
extern void nsexec();
void __attribute__((constructor)) init(void) {
nsexec();
}
*/
import "C"
// AlwaysFalse is here to stay false
// (and be exported so the compiler doesn't optimize out its reference)
var AlwaysFalse bool
func init() {
if AlwaysFalse {
// by referencing this C init() in a noop test, it will ensure the compiler
// links in the C function.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
C.init()
}
}

View file

@ -1,5 +0,0 @@
// +build !linux !cgo
package nsenter
import "C"

View file

@ -1,261 +0,0 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <linux/limits.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <signal.h>
#include <setjmp.h>
#include <sched.h>
#include <signal.h>
#include <bits/sockaddr.h>
#include <linux/netlink.h>
#include <linux/types.h>
#include <stdint.h>
#include <sys/socket.h>
/* All arguments should be above stack, because it grows down */
struct clone_arg {
/*
* Reserve some space for clone() to locate arguments
* and retcode in this place
*/
char stack[4096] __attribute__ ((aligned(16)));
char stack_ptr[0];
jmp_buf *env;
};
#define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__)
static int child_func(void *_arg)
{
struct clone_arg *arg = (struct clone_arg *)_arg;
longjmp(*arg->env, 1);
}
// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12)
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
#define _GNU_SOURCE
#include "syscall.h"
#if defined(__NR_setns) && !defined(SYS_setns)
#define SYS_setns __NR_setns
#endif
#ifdef SYS_setns
int setns(int fd, int nstype)
{
return syscall(SYS_setns, fd, nstype);
}
#endif
#endif
static int clone_parent(jmp_buf * env) __attribute__ ((noinline));
static int clone_parent(jmp_buf * env)
{
struct clone_arg ca;
int child;
ca.env = env;
child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
return child;
}
static uint32_t readint32(char *buf)
{
return *(uint32_t *) buf;
}
// list of known message types we want to send to bootstrap program
// These are defined in libcontainer/message_linux.go
#define INIT_MSG 62000
#define PID_ATTR 27281
#define CONSOLE_PATH_ATTR 27282
void nsexec()
{
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
const int num = sizeof(namespaces) / sizeof(char *);
jmp_buf env;
char buf[PATH_MAX], *val;
int i, tfd, self_tfd, child, n, len, pipenum, consolefd = -1;
pid_t pid = 0;
// if we dont have INITTYPE or this is the init process, skip the bootstrap process
val = getenv("_LIBCONTAINER_INITTYPE");
if (val == NULL || strcmp(val, "standard") == 0) {
return;
}
if (strcmp(val, "setns") != 0) {
pr_perror("Invalid inittype %s", val);
exit(1);
}
val = getenv("_LIBCONTAINER_INITPIPE");
if (val == NULL) {
pr_perror("Child pipe not found");
exit(1);
}
pipenum = atoi(val);
snprintf(buf, sizeof(buf), "%d", pipenum);
if (strcmp(val, buf)) {
pr_perror("Unable to parse _LIBCONTAINER_INITPIPE");
exit(1);
}
char nlbuf[NLMSG_HDRLEN];
struct nlmsghdr *nh;
if ((n = read(pipenum, nlbuf, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {
pr_perror("Failed to read netlink header, got %d", n);
exit(1);
}
nh = (struct nlmsghdr *)nlbuf;
if (nh->nlmsg_type == NLMSG_ERROR) {
pr_perror("Invalid netlink header message");
exit(1);
}
if (nh->nlmsg_type != INIT_MSG) {
pr_perror("Unexpected netlink message type %d", nh->nlmsg_type);
exit(1);
}
// read the netlink payload
len = NLMSG_PAYLOAD(nh, 0);
char data[len];
if ((n = read(pipenum, data, len)) != len) {
pr_perror("Failed to read netlink payload, got %d", n);
exit(1);
}
int start = 0;
struct nlattr *attr;
while (start < len) {
int payload_len;
attr = (struct nlattr *)((void *)data + start);
start += NLA_HDRLEN;
payload_len = attr->nla_len - NLA_HDRLEN;
switch (attr->nla_type) {
case PID_ATTR:
pid = (pid_t) readint32(data + start);
break;
case CONSOLE_PATH_ATTR:
consolefd = open((char *)data + start, O_RDWR);
if (consolefd < 0) {
pr_perror("Failed to open console %s", (char *)data + start);
exit(1);
}
break;
}
start += NLA_ALIGN(payload_len);
}
// required pid to be passed
if (pid == 0) {
pr_perror("missing pid");
exit(1);
}
/* Check that the specified process exists */
snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid);
tfd = open(buf, O_DIRECTORY | O_RDONLY);
if (tfd == -1) {
pr_perror("Failed to open \"%s\"", buf);
exit(1);
}
self_tfd = open("/proc/self/ns", O_DIRECTORY | O_RDONLY);
if (self_tfd == -1) {
pr_perror("Failed to open /proc/self/ns");
exit(1);
}
for (i = 0; i < num; i++) {
struct stat st;
struct stat self_st;
int fd;
/* Symlinks on all namespaces exist for dead processes, but they can't be opened */
if (fstatat(tfd, namespaces[i], &st, 0) == -1) {
// Ignore nonexistent namespaces.
if (errno == ENOENT)
continue;
}
/* Skip namespaces we're already part of */
if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && st.st_ino == self_st.st_ino) {
continue;
}
fd = openat(tfd, namespaces[i], O_RDONLY);
if (fd == -1) {
pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]);
exit(1);
}
// Set the namespace.
if (setns(fd, 0) == -1) {
pr_perror("Failed to setns for %s", namespaces[i]);
exit(1);
}
close(fd);
}
close(self_tfd);
close(tfd);
if (setjmp(env) == 1) {
// Child
if (setsid() == -1) {
pr_perror("setsid failed");
exit(1);
}
if (consolefd != -1) {
if (ioctl(consolefd, TIOCSCTTY, 0) == -1) {
pr_perror("ioctl TIOCSCTTY failed");
exit(1);
}
if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) {
pr_perror("Failed to dup 0");
exit(1);
}
if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) {
pr_perror("Failed to dup 1");
exit(1);
}
if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) {
pr_perror("Failed to dup 2");
exit(1);
}
}
// Finish executing, let the Go runtime take over.
return;
}
// Parent
// We must fork to actually enter the PID namespace, use CLONE_PARENT
// so the child can have the right parent, and we don't need to forward
// the child's exit code or resend its death signal.
child = clone_parent(&env);
if (child < 0) {
pr_perror("Unable to fork");
exit(1);
}
len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child);
if (write(pipenum, buf, len) != len) {
pr_perror("Unable to send a child pid");
kill(child, SIGKILL);
exit(1);
}
exit(0);
}

View file

@ -48,6 +48,16 @@ type Process struct {
// All capabilities not specified will be dropped from the processes capability mask // All capabilities not specified will be dropped from the processes capability mask
Capabilities []string Capabilities []string
// AppArmorProfile specifies the profile to apply to the process and is
// changed at the time the process is execed
AppArmorProfile string
// Label specifies the label to apply to the process. It is commonly used by selinux
Label string
// NoNewPrivileges controls whether processes can gain additional privileges.
NoNewPrivileges *bool
ops processOperations ops processOperations
} }

View file

@ -88,6 +88,10 @@ func (p *setnsProcess) start() (err error) {
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
return newSystemError(err) return newSystemError(err)
} }
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemError(err)
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemError(err) return newSystemError(err)
@ -167,14 +171,16 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) {
} }
type initProcess struct { type initProcess struct {
cmd *exec.Cmd cmd *exec.Cmd
parentPipe *os.File parentPipe *os.File
childPipe *os.File childPipe *os.File
config *initConfig config *initConfig
manager cgroups.Manager manager cgroups.Manager
container *linuxContainer container *linuxContainer
fds []string fds []string
process *Process process *Process
bootstrapData io.Reader
sharePidns bool
} }
func (p *initProcess) pid() int { func (p *initProcess) pid() int {
@ -185,15 +191,49 @@ func (p *initProcess) externalDescriptors() []string {
return p.fds return p.fds
} }
func (p *initProcess) start() (err error) { // execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
// This is called by initProcess.start function
func (p *initProcess) execSetns() error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return err
}
if !status.Success() {
p.cmd.Wait()
return &exec.ExitError{ProcessState: status}
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return err
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
p.cmd.Process = process
return nil
}
func (p *initProcess) start() error {
defer p.parentPipe.Close() defer p.parentPipe.Close()
err = p.cmd.Start() err := p.cmd.Start()
p.process.ops = p p.process.ops = p
p.childPipe.Close() p.childPipe.Close()
if err != nil { if err != nil {
p.process.ops = nil p.process.ops = nil
return newSystemError(err) return newSystemError(err)
} }
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return err
}
if err := p.execSetns(); err != nil {
return newSystemError(err)
}
// Save the standard descriptor names before the container process // Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now, // can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up. // we won't know at checkpoint time which file descriptor to look up.
@ -213,19 +253,6 @@ func (p *initProcess) start() (err error) {
p.manager.Destroy() p.manager.Destroy()
} }
}() }()
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
}
for _, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemError(err)
}
}
}
if err := p.createNetworkInterfaces(); err != nil { if err := p.createNetworkInterfaces(); err != nil {
return newSystemError(err) return newSystemError(err)
} }
@ -233,14 +260,16 @@ func (p *initProcess) start() (err error) {
return newSystemError(err) return newSystemError(err)
} }
var ( var (
procSync syncT procSync syncT
sentRun bool sentRun bool
ierr *genericError sentResume bool
ierr *genericError
) )
dec := json.NewDecoder(p.parentPipe)
loop: loop:
for { for {
if err := json.NewDecoder(p.parentPipe).Decode(&procSync); err != nil { if err := dec.Decode(&procSync); err != nil {
if err == io.EOF { if err == io.EOF {
break loop break loop
} }
@ -251,15 +280,54 @@ loop:
if err := p.manager.Set(p.config.Config); err != nil { if err := p.manager.Set(p.config.Config); err != nil {
return newSystemError(err) return newSystemError(err)
} }
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemError(err)
}
// call prestart hooks
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
}
for _, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemError(err)
}
}
}
}
// Sync with child. // Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil { if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
return newSystemError(err) return newSystemError(err)
} }
sentRun = true sentRun = true
case procHooks:
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
}
for _, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemError(err)
}
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil {
return newSystemError(err)
}
sentResume = true
case procError: case procError:
// wait for the child process to fully complete and receive an error message // wait for the child process to fully complete and receive an error message
// if one was encoutered // if one was encoutered
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemError(err) return newSystemError(err)
} }
if ierr != nil { if ierr != nil {
@ -274,6 +342,9 @@ loop:
if !sentRun { if !sentRun {
return newSystemError(fmt.Errorf("could not synchronise with container process")) return newSystemError(fmt.Errorf("could not synchronise with container process"))
} }
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemError(err) return newSystemError(err)
} }
@ -291,7 +362,7 @@ func (p *initProcess) wait() (*os.ProcessState, error) {
return p.cmd.ProcessState, err return p.cmd.ProcessState, err
} }
// we should kill all processes in cgroup when init is died if we use host PID namespace // we should kill all processes in cgroup when init is died if we use host PID namespace
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 { if p.sharePidns {
killCgroupProcesses(p.manager) killCgroupProcesses(p.manager)
} }
return p.cmd.ProcessState, nil return p.cmd.ProcessState, nil

View file

@ -4,6 +4,7 @@ package libcontainer
import ( import (
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"os" "os"
"os/exec" "os/exec"
@ -26,7 +27,7 @@ const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NOD
// setupRootfs sets up the devices, mount points, and filesystems for use inside a // setupRootfs sets up the devices, mount points, and filesystems for use inside a
// new mount namespace. // new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole) (err error) { func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
if err := prepareRoot(config); err != nil { if err := prepareRoot(config); err != nil {
return newSystemError(err) return newSystemError(err)
} }
@ -59,6 +60,13 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
return newSystemError(err) return newSystemError(err)
} }
} }
// Signal the parent to run the pre-start hooks.
// The hooks are run after the mounts are setup, but before we switch to the new
// root, so that the old root is still available in the hooks for any mount
// manipulations.
if err := syncParentHooks(pipe); err != nil {
return err
}
if err := syscall.Chdir(config.Rootfs); err != nil { if err := syscall.Chdir(config.Rootfs); err != nil {
return newSystemError(err) return newSystemError(err)
} }
@ -75,6 +83,18 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
return newSystemError(err) return newSystemError(err)
} }
} }
// remount dev as ro if specifed
for _, m := range config.Mounts {
if m.Destination == "/dev" {
if m.Flags&syscall.MS_RDONLY != 0 {
if err := remountReadonly(m.Destination); err != nil {
return newSystemError(err)
}
}
break
}
}
// set rootfs ( / ) as readonly
if config.Readonlyfs { if config.Readonlyfs {
if err := setReadonly(); err != nil { if err := setReadonly(); err != nil {
return newSystemError(err) return newSystemError(err)
@ -138,16 +158,6 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
} }
} }
return nil return nil
case "devpts":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return mountPropagate(m, rootfs, mountLabel)
case "securityfs":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return mountPropagate(m, rootfs, mountLabel)
case "bind": case "bind":
stat, err := os.Stat(m.Source) stat, err := os.Stat(m.Source)
if err != nil { if err != nil {
@ -253,7 +263,10 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
} }
} }
default: default:
return fmt.Errorf("unknown mount device %q to %q", m.Device, m.Destination) if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return mountPropagate(m, rootfs, mountLabel)
} }
return nil return nil
} }
@ -552,7 +565,7 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error {
return nil return nil
} }
func pivotRoot(rootfs, pivotBaseDir string) error { func pivotRoot(rootfs, pivotBaseDir string) (err error) {
if pivotBaseDir == "" { if pivotBaseDir == "" {
pivotBaseDir = "/" pivotBaseDir = "/"
} }
@ -564,6 +577,12 @@ func pivotRoot(rootfs, pivotBaseDir string) error {
if err != nil { if err != nil {
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err) return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
} }
defer func() {
errVal := os.Remove(pivotDir)
if err == nil {
err = errVal
}
}()
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil { if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err) return fmt.Errorf("pivot_root %s", err)
} }
@ -582,7 +601,7 @@ func pivotRoot(rootfs, pivotBaseDir string) error {
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err) return fmt.Errorf("unmount pivot_root dir %s", err)
} }
return os.Remove(pivotDir) return nil
} }
func msMoveRoot(rootfs string) error { func msMoveRoot(rootfs string) error {
@ -671,14 +690,18 @@ func remount(m *configs.Mount, rootfs string) error {
// of propagation flags. // of propagation flags.
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error { func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
var ( var (
dest = m.Destination dest = m.Destination
data = label.FormatMountLabel(m.Data, mountLabel) data = label.FormatMountLabel(m.Data, mountLabel)
flags = m.Flags
) )
if dest == "/dev" {
flags &= ^syscall.MS_RDONLY
}
if !strings.HasPrefix(dest, rootfs) { if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest) dest = filepath.Join(rootfs, dest)
} }
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil { if err := syscall.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
return err return err
} }

View file

@ -158,12 +158,14 @@ func Setfilecon(path string, scon string) error {
// Getfilecon returns the SELinux label for this path or returns an error. // Getfilecon returns the SELinux label for this path or returns an error.
func Getfilecon(path string) (string, error) { func Getfilecon(path string) (string, error) {
con, err := system.Lgetxattr(path, xattrNameSelinux) con, err := system.Lgetxattr(path, xattrNameSelinux)
if err != nil {
return "", err
}
// Trim the NUL byte at the end of the byte buffer, if present. // Trim the NUL byte at the end of the byte buffer, if present.
if con[len(con)-1] == '\x00' { if len(con) > 0 && con[len(con)-1] == '\x00' {
con = con[:len(con)-1] con = con[:len(con)-1]
} }
return string(con), err return string(con), nil
} }
func Setfscreatecon(scon string) error { func Setfscreatecon(scon string) error {

View file

@ -3,6 +3,7 @@
package libcontainer package libcontainer
import ( import (
"fmt"
"os" "os"
"github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/apparmor"
@ -18,18 +19,19 @@ type linuxSetnsInit struct {
config *initConfig config *initConfig
} }
func (l *linuxSetnsInit) getSessionRingName() string {
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
}
func (l *linuxSetnsInit) Init() error { func (l *linuxSetnsInit) Init() error {
// do not inherit the parent's session keyring // do not inherit the parent's session keyring
if _, err := keyctl.JoinSessionKeyring("_ses"); err != nil { if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil {
return err return err
} }
if err := setupRlimits(l.config.Config); err != nil { if err := setupRlimits(l.config.Config); err != nil {
return err return err
} }
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil { if l.config.NoNewPrivileges {
return err
}
if l.config.Config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err return err
} }
@ -42,11 +44,11 @@ func (l *linuxSetnsInit) Init() error {
if err := finalizeNamespace(l.config); err != nil { if err := finalizeNamespace(l.config); err != nil {
return err return err
} }
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err return err
} }
if l.config.Config.ProcessLabel != "" { if l.config.ProcessLabel != "" {
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err return err
} }
} }

View file

@ -3,6 +3,7 @@
package libcontainer package libcontainer
import ( import (
"fmt"
"io" "io"
"os" "os"
"syscall" "syscall"
@ -21,27 +22,39 @@ type linuxStandardInit struct {
config *initConfig config *initConfig
} }
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
var newperms uint32
if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
// with user ns we need 'other' search permissions
newperms = 0x8
} else {
// without user ns we need 'UID' search permissions
newperms = 0x80000
}
// create a unique per session container name that we can
// join in setns; however, other containers can also join it
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
}
// PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value // PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
// the kernel // the kernel
const PR_SET_NO_NEW_PRIVS = 0x26 const PR_SET_NO_NEW_PRIVS = 0x26
func (l *linuxStandardInit) Init() error { func (l *linuxStandardInit) Init() error {
ringname, keepperms, newperms := l.getSessionRingParams()
// do not inherit the parent's session keyring // do not inherit the parent's session keyring
sessKeyId, err := keyctl.JoinSessionKeyring("") sessKeyId, err := keyctl.JoinSessionKeyring(ringname)
if err != nil { if err != nil {
return err return err
} }
// make session keyring searcheable // make session keyring searcheable
// without user ns we need 'UID' search permissions if err := keyctl.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
// with user ns we need 'other' search permissions
if err := keyctl.ModKeyringPerm(sessKeyId, 0xffffffff, 0x080008); err != nil {
return err return err
} }
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
return err
}
var console *linuxConsole var console *linuxConsole
if l.config.Console != "" { if l.config.Console != "" {
console = newConsoleFromPath(l.config.Console) console = newConsoleFromPath(l.config.Console)
@ -49,9 +62,6 @@ func (l *linuxStandardInit) Init() error {
return err return err
} }
} }
if _, err := syscall.Setsid(); err != nil {
return err
}
if console != nil { if console != nil {
if err := system.Setctty(); err != nil { if err := system.Setctty(); err != nil {
return err return err
@ -66,13 +76,11 @@ func (l *linuxStandardInit) Init() error {
if err := setupRlimits(l.config.Config); err != nil { if err := setupRlimits(l.config.Config); err != nil {
return err return err
} }
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
return err
}
label.Init() label.Init()
// InitializeMountNamespace() can be executed only for a new mount namespace // InitializeMountNamespace() can be executed only for a new mount namespace
if l.config.Config.Namespaces.Contains(configs.NEWNS) { if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := setupRootfs(l.config.Config, console); err != nil { if err := setupRootfs(l.config.Config, console, l.pipe); err != nil {
return err return err
} }
} }
@ -81,10 +89,10 @@ func (l *linuxStandardInit) Init() error {
return err return err
} }
} }
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil { if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err return err
} }
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil { if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err return err
} }
@ -107,7 +115,7 @@ func (l *linuxStandardInit) Init() error {
if err != nil { if err != nil {
return err return err
} }
if l.config.Config.NoNewPrivileges { if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err return err
} }

View file

@ -0,0 +1,9 @@
// +build !linux
package system
// RunningInUserNS is a stub for non-Linux systems
// Always returns false
func RunningInUserNS() bool {
return false
}

View file

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2015 The Linux Foundation.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,412 @@
package specs
import "os"
// Spec is the base configuration for the container. It specifies platform
// independent configuration. This information must be included when the
// bundle is packaged for distribution.
type Spec struct {
// Version is the version of the specification that is supported.
Version string `json:"ociVersion"`
// Platform is the host information for OS and Arch.
Platform Platform `json:"platform"`
// Process is the container's main process.
Process Process `json:"process"`
// Root is the root information for the container's filesystem.
Root Root `json:"root"`
// Hostname is the container's host name.
Hostname string `json:"hostname,omitempty"`
// Mounts profile configuration for adding mounts to the container's filesystem.
Mounts []Mount `json:"mounts"`
// Hooks are the commands run at various lifecycle events of the container.
Hooks Hooks `json:"hooks"`
// Annotations is an unstructured key value map that may be set by external tools to store and retrieve arbitrary metadata.
Annotations map[string]string `json:"annotations,omitempty"`
// Linux is platform specific configuration for Linux based containers.
Linux Linux `json:"linux" platform:"linux"`
}
// Process contains information to start a specific application inside the container.
type Process struct {
// Terminal creates an interactive terminal for the container.
Terminal bool `json:"terminal"`
// User specifies user information for the process.
User User `json:"user"`
// Args specifies the binary and arguments for the application to execute.
Args []string `json:"args"`
// Env populates the process environment for the process.
Env []string `json:"env,omitempty"`
// Cwd is the current working directory for the process and must be
// relative to the container's root.
Cwd string `json:"cwd"`
// Capabilities are Linux capabilities that are kept for the container.
Capabilities []string `json:"capabilities,omitempty" platform:"linux"`
// Rlimits specifies rlimit options to apply to the process.
Rlimits []Rlimit `json:"rlimits,omitempty"`
// NoNewPrivileges controls whether additional privileges could be gained by processes in the container.
NoNewPrivileges bool `json:"noNewPrivileges,omitempty"`
// ApparmorProfile specified the apparmor profile for the container. (this field is platform dependent)
ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"`
// SelinuxProcessLabel specifies the selinux context that the container process is run as. (this field is platform dependent)
SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"`
}
// User specifies Linux specific user and group information for the container's
// main process.
type User struct {
// UID is the user id. (this field is platform dependent)
UID uint32 `json:"uid,omitempty" platform:"linux"`
// GID is the group id. (this field is platform dependent)
GID uint32 `json:"gid,omitempty" platform:"linux"`
// AdditionalGids are additional group ids set for the container's process. (this field is platform dependent)
AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux"`
}
// Root contains information about the container's root filesystem on the host.
type Root struct {
// Path is the absolute path to the container's root filesystem.
Path string `json:"path"`
// Readonly makes the root filesystem for the container readonly before the process is executed.
Readonly bool `json:"readonly"`
}
// Platform specifies OS and arch information for the host system that the container
// is created for.
type Platform struct {
// OS is the operating system.
OS string `json:"os"`
// Arch is the architecture
Arch string `json:"arch"`
}
// Mount specifies a mount for a container.
type Mount struct {
// Destination is the path where the mount will be placed relative to the container's root. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point.
Destination string `json:"destination"`
// Type specifies the mount kind.
Type string `json:"type"`
// Source specifies the source path of the mount. In the case of bind mounts on
// Linux based systems this would be the file on the host.
Source string `json:"source"`
// Options are fstab style mount options.
Options []string `json:"options,omitempty"`
}
// Hook specifies a command that is run at a particular event in the lifecycle of a container
type Hook struct {
Path string `json:"path"`
Args []string `json:"args,omitempty"`
Env []string `json:"env,omitempty"`
}
// Hooks for container setup and teardown
type Hooks struct {
// Prestart is a list of hooks to be run before the container process is executed.
// On Linux, they are run after the container namespaces are created.
Prestart []Hook `json:"prestart,omitempty"`
// Poststart is a list of hooks to be run after the container process is started.
Poststart []Hook `json:"poststart,omitempty"`
// Poststop is a list of hooks to be run after the container process exits.
Poststop []Hook `json:"poststop,omitempty"`
}
// Linux contains platform specific configuration for Linux based containers.
type Linux struct {
// UIDMapping specifies user mappings for supporting user namespaces on Linux.
UIDMappings []IDMapping `json:"uidMappings,omitempty"`
// GIDMapping specifies group mappings for supporting user namespaces on Linux.
GIDMappings []IDMapping `json:"gidMappings,omitempty"`
// Sysctl are a set of key value pairs that are set for the container on start
Sysctl map[string]string `json:"sysctl,omitempty"`
// Resources contain cgroup information for handling resource constraints
// for the container
Resources *Resources `json:"resources,omitempty"`
// CgroupsPath specifies the path to cgroups that are created and/or joined by the container.
// The path is expected to be relative to the cgroups mountpoint.
// If resources are specified, the cgroups at CgroupsPath will be updated based on resources.
CgroupsPath *string `json:"cgroupsPath,omitempty"`
// Namespaces contains the namespaces that are created and/or joined by the container
Namespaces []Namespace `json:"namespaces"`
// Devices are a list of device nodes that are created for the container
Devices []Device `json:"devices"`
// Seccomp specifies the seccomp security settings for the container.
Seccomp *Seccomp `json:"seccomp,omitempty"`
// RootfsPropagation is the rootfs mount propagation mode for the container.
RootfsPropagation string `json:"rootfsPropagation,omitempty"`
}
// Namespace is the configuration for a Linux namespace
type Namespace struct {
// Type is the type of Linux namespace
Type NamespaceType `json:"type"`
// Path is a path to an existing namespace persisted on disk that can be joined
// and is of the same type
Path string `json:"path,omitempty"`
}
// NamespaceType is one of the Linux namespaces
type NamespaceType string
const (
// PIDNamespace for isolating process IDs
PIDNamespace NamespaceType = "pid"
// NetworkNamespace for isolating network devices, stacks, ports, etc
NetworkNamespace = "network"
// MountNamespace for isolating mount points
MountNamespace = "mount"
// IPCNamespace for isolating System V IPC, POSIX message queues
IPCNamespace = "ipc"
// UTSNamespace for isolating hostname and NIS domain name
UTSNamespace = "uts"
// UserNamespace for isolating user and group IDs
UserNamespace = "user"
)
// IDMapping specifies UID/GID mappings
type IDMapping struct {
// HostID is the UID/GID of the host user or group
HostID uint32 `json:"hostID"`
// ContainerID is the UID/GID of the container's user or group
ContainerID uint32 `json:"containerID"`
// Size is the length of the range of IDs mapped between the two namespaces
Size uint32 `json:"size"`
}
// Rlimit type and restrictions
type Rlimit struct {
// Type of the rlimit to set
Type string `json:"type"`
// Hard is the hard limit for the specified type
Hard uint64 `json:"hard"`
// Soft is the soft limit for the specified type
Soft uint64 `json:"soft"`
}
// HugepageLimit structure corresponds to limiting kernel hugepages
type HugepageLimit struct {
// Pagesize is the hugepage size
Pagesize *string `json:"pageSize,omitempty"`
// Limit is the limit of "hugepagesize" hugetlb usage
Limit *uint64 `json:"limit,omitempty"`
}
// InterfacePriority for network interfaces
type InterfacePriority struct {
// Name is the name of the network interface
Name string `json:"name"`
// Priority for the interface
Priority uint32 `json:"priority"`
}
// blockIODevice holds major:minor format supported in blkio cgroup
type blockIODevice struct {
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
}
// WeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice
type WeightDevice struct {
blockIODevice
// Weight is the bandwidth rate for the device, range is from 10 to 1000
Weight *uint16 `json:"weight,omitempty"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
LeafWeight *uint16 `json:"leafWeight,omitempty"`
}
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
type ThrottleDevice struct {
blockIODevice
// Rate is the IO rate limit per cgroup per device
Rate *uint64 `json:"rate,omitempty"`
}
// BlockIO for Linux cgroup 'blkio' resource management
type BlockIO struct {
// Specifies per cgroup weight, range is from 10 to 1000
Weight *uint16 `json:"blkioWeight,omitempty"`
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only
LeafWeight *uint16 `json:"blkioLeafWeight,omitempty"`
// Weight per cgroup per device, can override BlkioWeight
WeightDevice []WeightDevice `json:"blkioWeightDevice,omitempty"`
// IO read rate limit per cgroup per device, bytes per second
ThrottleReadBpsDevice []ThrottleDevice `json:"blkioThrottleReadBpsDevice,omitempty"`
// IO write rate limit per cgroup per device, bytes per second
ThrottleWriteBpsDevice []ThrottleDevice `json:"blkioThrottleWriteBpsDevice,omitempty"`
// IO read rate limit per cgroup per device, IO per second
ThrottleReadIOPSDevice []ThrottleDevice `json:"blkioThrottleReadIOPSDevice,omitempty"`
// IO write rate limit per cgroup per device, IO per second
ThrottleWriteIOPSDevice []ThrottleDevice `json:"blkioThrottleWriteIOPSDevice,omitempty"`
}
// Memory for Linux cgroup 'memory' resource management
type Memory struct {
// Memory limit (in bytes).
Limit *uint64 `json:"limit,omitempty"`
// Memory reservation or soft_limit (in bytes).
Reservation *uint64 `json:"reservation,omitempty"`
// Total memory limit (memory + swap).
Swap *uint64 `json:"swap,omitempty"`
// Kernel memory limit (in bytes).
Kernel *uint64 `json:"kernel,omitempty"`
// Kernel memory limit for tcp (in bytes)
KernelTCP *uint64 `json:"kernelTCP"`
// How aggressive the kernel will swap memory pages. Range from 0 to 100.
Swappiness *uint64 `json:"swappiness,omitempty"`
}
// CPU for Linux cgroup 'cpu' resource management
type CPU struct {
// CPU shares (relative weight (ratio) vs. other cgroups with cpu shares).
Shares *uint64 `json:"shares,omitempty"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
Quota *uint64 `json:"quota,omitempty"`
// CPU period to be used for hardcapping (in usecs).
Period *uint64 `json:"period,omitempty"`
// How much time realtime scheduling may use (in usecs).
RealtimeRuntime *uint64 `json:"realtimeRuntime,omitempty"`
// CPU period to be used for realtime scheduling (in usecs).
RealtimePeriod *uint64 `json:"realtimePeriod,omitempty"`
// CPUs to use within the cpuset. Default is to use any CPU available.
Cpus *string `json:"cpus,omitempty"`
// List of memory nodes in the cpuset. Default is to use any available memory node.
Mems *string `json:"mems,omitempty"`
}
// Pids for Linux cgroup 'pids' resource management (Linux 4.3)
type Pids struct {
// Maximum number of PIDs. Default is "no limit".
Limit *int64 `json:"limit,omitempty"`
}
// Network identification and priority configuration
type Network struct {
// Set class identifier for container's network packets
ClassID *uint32 `json:"classID"`
// Set priority of network traffic for container
Priorities []InterfacePriority `json:"priorities,omitempty"`
}
// Resources has container runtime resource constraints
type Resources struct {
// Devices are a list of device rules for the whitelist controller
Devices []DeviceCgroup `json:"devices"`
// DisableOOMKiller disables the OOM killer for out of memory conditions
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
// Specify an oom_score_adj for the container.
OOMScoreAdj *int `json:"oomScoreAdj,omitempty"`
// Memory restriction configuration
Memory *Memory `json:"memory,omitempty"`
// CPU resource restriction configuration
CPU *CPU `json:"cpu,omitempty"`
// Task resource restriction configuration.
Pids *Pids `json:"pids,omitempty"`
// BlockIO restriction configuration
BlockIO *BlockIO `json:"blockIO,omitempty"`
// Hugetlb limit (in bytes)
HugepageLimits []HugepageLimit `json:"hugepageLimits,omitempty"`
// Network restriction configuration
Network *Network `json:"network,omitempty"`
}
// Device represents the mknod information for a Linux special device file
type Device struct {
// Path to the device.
Path string `json:"path"`
// Device type, block, char, etc.
Type string `json:"type"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// FileMode permission bits for the device.
FileMode *os.FileMode `json:"fileMode,omitempty"`
// UID of the device.
UID *uint32 `json:"uid,omitempty"`
// Gid of the device.
GID *uint32 `json:"gid,omitempty"`
}
// DeviceCgroup represents a device rule for the whitelist controller
type DeviceCgroup struct {
// Allow or deny
Allow bool `json:"allow"`
// Device type, block, char, etc.
Type *string `json:"type,omitempty"`
// Major is the device's major number.
Major *int64 `json:"major,omitempty"`
// Minor is the device's minor number.
Minor *int64 `json:"minor,omitempty"`
// Cgroup access permissions format, rwm.
Access *string `json:"access,omitempty"`
}
// Seccomp represents syscall restrictions
type Seccomp struct {
DefaultAction Action `json:"defaultAction"`
Architectures []Arch `json:"architectures"`
Syscalls []Syscall `json:"syscalls,omitempty"`
}
// Arch used for additional architectures
type Arch string
// Additional architectures permitted to be used for system calls
// By default only the native architecture of the kernel is permitted
const (
ArchX86 Arch = "SCMP_ARCH_X86"
ArchX86_64 Arch = "SCMP_ARCH_X86_64"
ArchX32 Arch = "SCMP_ARCH_X32"
ArchARM Arch = "SCMP_ARCH_ARM"
ArchAARCH64 Arch = "SCMP_ARCH_AARCH64"
ArchMIPS Arch = "SCMP_ARCH_MIPS"
ArchMIPS64 Arch = "SCMP_ARCH_MIPS64"
ArchMIPS64N32 Arch = "SCMP_ARCH_MIPS64N32"
ArchMIPSEL Arch = "SCMP_ARCH_MIPSEL"
ArchMIPSEL64 Arch = "SCMP_ARCH_MIPSEL64"
ArchMIPSEL64N32 Arch = "SCMP_ARCH_MIPSEL64N32"
)
// Action taken upon Seccomp rule match
type Action string
// Define actions for Seccomp rules
const (
ActKill Action = "SCMP_ACT_KILL"
ActTrap Action = "SCMP_ACT_TRAP"
ActErrno Action = "SCMP_ACT_ERRNO"
ActTrace Action = "SCMP_ACT_TRACE"
ActAllow Action = "SCMP_ACT_ALLOW"
)
// Operator used to match syscall arguments in Seccomp
type Operator string
// Define operators for syscall arguments in Seccomp
const (
OpNotEqual Operator = "SCMP_CMP_NE"
OpLessThan Operator = "SCMP_CMP_LT"
OpLessEqual Operator = "SCMP_CMP_LE"
OpEqualTo Operator = "SCMP_CMP_EQ"
OpGreaterEqual Operator = "SCMP_CMP_GE"
OpGreaterThan Operator = "SCMP_CMP_GT"
OpMaskedEqual Operator = "SCMP_CMP_MASKED_EQ"
)
// Arg used for matching specific syscall arguments in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
ValueTwo uint64 `json:"valueTwo"`
Op Operator `json:"op"`
}
// Syscall is used to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
Args []Arg `json:"args,omitempty"`
}

View file

@ -0,0 +1,13 @@
package specs
// State holds information about the runtime state of the container.
type State struct {
// Version is the version of the specification that is supported.
Version string `json:"version"`
// ID is the container ID
ID string `json:"id"`
// Pid is the process id for the container's main process.
Pid int `json:"pid"`
// BundlePath is the path to the container's bundle directory.
BundlePath string `json:"bundlePath"`
}

View file

@ -0,0 +1,18 @@
package specs
import "fmt"
const (
// VersionMajor is for an API incompatible changes
VersionMajor = 0
// VersionMinor is for functionality in a backwards-compatible manner
VersionMinor = 4
// VersionPatch is for backwards-compatible bug fixes
VersionPatch = 0
// VersionDev indicates development branch. Releases will be empty string.
VersionDev = ""
)
// Version is the specification version that the package types support.
var Version = fmt.Sprintf("%d.%d.%d%s", VersionMajor, VersionMinor, VersionPatch, VersionDev)