From 3f5f6b038f56bc89d39a74fbb9c765daf98ae4d8 Mon Sep 17 00:00:00 2001 From: Erik Hollensbe Date: Wed, 6 Aug 2014 22:56:44 -0700 Subject: [PATCH] builder: comments / documentation Docker-DCO-1.1-Signed-off-by: Erik Hollensbe (github: erikh) --- builder/evaluator/dispatchers.go | 81 +++++++++++++++++++++++- builder/evaluator/evaluator.go | 102 +++++++++++++++++++++++-------- builder/evaluator/internals.go | 3 + builder/evaluator/support.go | 1 + builder/parser/line_parsers.go | 11 ++++ builder/parser/parser.go | 32 +++------- 6 files changed, 181 insertions(+), 49 deletions(-) diff --git a/builder/evaluator/dispatchers.go b/builder/evaluator/dispatchers.go index eefcb629be..e7db2f008b 100644 --- a/builder/evaluator/dispatchers.go +++ b/builder/evaluator/dispatchers.go @@ -1,5 +1,12 @@ package evaluator +// This file contains the dispatchers for each command. Note that +// `nullDispatch` is not actually a command, but support for commands we parse +// but do nothing with. +// +// See evaluator.go for a higher level discussion of the whole evaluator +// package. + import ( "fmt" "path/filepath" @@ -10,11 +17,16 @@ import ( "github.com/docker/docker/utils" ) -// dispatch with no layer / parsing. +// dispatch with no layer / parsing. This is effectively not a command. func nullDispatch(b *buildFile, args []string) error { return nil } +// ENV foo bar +// +// Sets the environment variable foo to bar, also makes interpolation +// in the dockerfile available from the next statement on via ${foo}. +// func env(b *buildFile, args []string) error { if len(args) != 2 { return fmt.Errorf("ENV accepts two arguments") @@ -29,6 +41,9 @@ func env(b *buildFile, args []string) error { return b.commit("", b.config.Cmd, fmt.Sprintf("ENV %s=%s", key, b.env[key])) } +// MAINTAINER some text +// +// Sets the maintainer metadata. func maintainer(b *buildFile, args []string) error { if len(args) != 1 { return fmt.Errorf("MAINTAINER requires only one argument") @@ -38,6 +53,11 @@ func maintainer(b *buildFile, args []string) error { return b.commit("", b.config.Cmd, fmt.Sprintf("MAINTAINER %s", b.maintainer)) } +// ADD foo /path +// +// Add the file 'foo' to '/path'. Tarball and Remote URL (git, http) handling +// exist here. If you do not wish to have this automatic handling, use COPY. +// func add(b *buildFile, args []string) error { if len(args) != 2 { return fmt.Errorf("ADD requires two arguments") @@ -46,6 +66,10 @@ func add(b *buildFile, args []string) error { return b.runContextCommand(args, true, true, "ADD") } +// COPY foo /path +// +// Same as 'ADD' but without the tar and remote url handling. +// func dispatchCopy(b *buildFile, args []string) error { if len(args) != 2 { return fmt.Errorf("COPY requires two arguments") @@ -54,6 +78,10 @@ func dispatchCopy(b *buildFile, args []string) error { return b.runContextCommand(args, false, false, "COPY") } +// FROM imagename +// +// This sets the image the dockerfile will build on top of. +// func from(b *buildFile, args []string) error { if len(args) != 1 { return fmt.Errorf("FROM requires one argument") @@ -77,6 +105,15 @@ func from(b *buildFile, args []string) error { return b.processImageFrom(image) } +// ONBUILD RUN echo yo +// +// ONBUILD triggers run when the image is used in a FROM statement. +// +// ONBUILD handling has a lot of special-case functionality, the heading in +// evaluator.go and comments around dispatch() in the same file explain the +// special cases. search for 'OnBuild' in internals.go for additional special +// cases. +// func onbuild(b *buildFile, args []string) error { triggerInstruction := strings.ToUpper(strings.TrimSpace(args[0])) switch triggerInstruction { @@ -92,6 +129,10 @@ func onbuild(b *buildFile, args []string) error { return b.commit("", b.config.Cmd, fmt.Sprintf("ONBUILD %s", trigger)) } +// WORKDIR /tmp +// +// Set the working directory for future RUN/CMD/etc statements. +// func workdir(b *buildFile, args []string) error { if len(args) != 1 { return fmt.Errorf("WORKDIR requires exactly one argument") @@ -111,6 +152,15 @@ func workdir(b *buildFile, args []string) error { return b.commit("", b.config.Cmd, fmt.Sprintf("WORKDIR %v", workdir)) } +// RUN some command yo +// +// run a command and commit the image. Args are automatically prepended with +// 'sh -c' in the event there is only one argument. The difference in +// processing: +// +// RUN echo hi # sh -c echo hi +// RUN [ "echo", "hi" ] # echo hi +// func run(b *buildFile, args []string) error { if len(args) == 1 { // literal string command, not an exec array args = append([]string{"/bin/sh", "-c"}, args[0]) @@ -162,6 +212,11 @@ func run(b *buildFile, args []string) error { return nil } +// CMD foo +// +// Set the default command to run in the container (which may be empty). +// Argument handling is the same as RUN. +// func cmd(b *buildFile, args []string) error { if len(args) < 2 { args = append([]string{"/bin/sh", "-c"}, args...) @@ -176,6 +231,14 @@ func cmd(b *buildFile, args []string) error { return nil } +// ENTRYPOINT /usr/sbin/nginx +// +// Set the entrypoint (which defaults to sh -c) to /usr/sbin/nginx. Will +// accept the CMD as the arguments to /usr/sbin/nginx. +// +// Handles command processing similar to CMD and RUN, only b.config.Entrypoint +// is initialized at NewBuilder time instead of through argument parsing. +// func entrypoint(b *buildFile, args []string) error { b.config.Entrypoint = args @@ -189,6 +252,11 @@ func entrypoint(b *buildFile, args []string) error { return nil } +// EXPOSE 6667/tcp 7000/tcp +// +// Expose ports for links and port mappings. This all ends up in +// b.config.ExposedPorts for runconfig. +// func expose(b *buildFile, args []string) error { portsTab := args @@ -211,6 +279,11 @@ func expose(b *buildFile, args []string) error { return b.commit("", b.config.Cmd, fmt.Sprintf("EXPOSE %v", ports)) } +// USER foo +// +// Set the user to 'foo' for future commands and when running the +// ENTRYPOINT/CMD at container run time. +// func user(b *buildFile, args []string) error { if len(args) != 1 { return fmt.Errorf("USER requires exactly one argument") @@ -220,6 +293,11 @@ func user(b *buildFile, args []string) error { return b.commit("", b.config.Cmd, fmt.Sprintf("USER %v", args)) } +// VOLUME /foo +// +// Expose the volume /foo for use. Will also accept the JSON form, but either +// way requires exactly one argument. +// func volume(b *buildFile, args []string) error { if len(args) != 1 { return fmt.Errorf("Volume cannot be empty") @@ -239,6 +317,7 @@ func volume(b *buildFile, args []string) error { return nil } +// INSERT is no longer accepted, but we still parse it. func insert(b *buildFile, args []string) error { return fmt.Errorf("INSERT has been deprecated. Please use ADD instead") } diff --git a/builder/evaluator/evaluator.go b/builder/evaluator/evaluator.go index 7b74db1d86..2b22d47317 100644 --- a/builder/evaluator/evaluator.go +++ b/builder/evaluator/evaluator.go @@ -1,3 +1,22 @@ +// evaluator is the evaluation step in the Dockerfile parse/evaluate pipeline. +// +// It incorporates a dispatch table based on the parser.Node values (see the +// parser package for more information) that are yielded from the parser itself. +// Calling NewBuilder with the BuildOpts struct can be used to customize the +// experience for execution purposes only. Parsing is controlled in the parser +// package, and this division of resposibility should be respected. +// +// Please see the jump table targets for the actual invocations, most of which +// will call out to the functions in internals.go to deal with their tasks. +// +// ONBUILD is a special case, which is covered in the onbuild() func in +// dispatchers.go. +// +// The evaluator uses the concept of "steps", which are usually each processable +// line in the Dockerfile. Each step is numbered and certain actions are taken +// before and after each step, such as creating an image ID and removing temporary +// containers and images. Note that ONBUILD creates a kinda-sorta "sub run" which +// includes its own set of steps (usually only one of them). package evaluator import ( @@ -49,32 +68,40 @@ func init() { type envMap map[string]string type uniqueMap map[string]struct{} +// internal struct, used to maintain configuration of the Dockerfile's +// processing as it evaluates the parsing result. type buildFile struct { - dockerfile *parser.Node - env envMap - image string - config *runconfig.Config - options *BuildOpts - maintainer string + dockerfile *parser.Node // the syntax tree of the dockerfile + env envMap // map of environment variables + image string // image name for commit processing + config *runconfig.Config // runconfig for cmd, run, entrypoint etc. + options *BuildOpts // see below + maintainer string // maintainer name. could probably be removed. + cmdSet bool // indicates is CMD was set in current Dockerfile + context *tarsum.TarSum // the context is a tarball that is uploaded by the client + contextPath string // the path of the temporary directory the local context is unpacked to (server side) - // cmdSet indicates is CMD was set in current Dockerfile - cmdSet bool - - context *tarsum.TarSum - contextPath string - tmpContainers uniqueMap - tmpImages uniqueMap + // both of these are controlled by the Remove and ForceRemove options in BuildOpts + tmpContainers uniqueMap // a map of containers used for removes + tmpImages uniqueMap // a map of images used for removes } type BuildOpts struct { - Daemon *daemon.Daemon - Engine *engine.Engine - OutStream io.Writer - ErrStream io.Writer - Verbose bool - UtilizeCache bool - Remove bool - ForceRemove bool + Daemon *daemon.Daemon + Engine *engine.Engine + + // effectively stdio for the run. Because it is not stdio, I said + // "Effectively". Do not use stdio anywhere in this package for any reason. + OutStream io.Writer + ErrStream io.Writer + + Verbose bool + UtilizeCache bool + + // controls how images and containers are handled between steps. + Remove bool + ForceRemove bool + AuthConfig *registry.AuthConfig AuthConfigFile *registry.ConfigFile @@ -83,6 +110,7 @@ type BuildOpts struct { StreamFormatter *utils.StreamFormatter } +// Create a new builder. func NewBuilder(opts *BuildOpts) (*buildFile, error) { return &buildFile{ dockerfile: nil, @@ -94,10 +122,20 @@ func NewBuilder(opts *BuildOpts) (*buildFile, error) { }, nil } +// Run the builder with the context. This is the lynchpin of this package. This +// will (barring errors): +// +// * call readContext() which will set up the temporary directory and unpack +// the context into it. +// * read the dockerfile +// * parse the dockerfile +// * walk the parse tree and execute it by dispatching to handlers. If Remove +// or ForceRemove is set, additional cleanup around containers happens after +// processing. +// * Print a happy message and return the image ID. +// func (b *buildFile) Run(context io.Reader) (string, error) { - err := b.readContext(context) - - if err != nil { + if err := b.readContext(context); err != nil { return "", err } @@ -131,7 +169,7 @@ func (b *buildFile) Run(context io.Reader) (string, error) { } if b.image == "" { - return "", fmt.Errorf("No image was generated. This may be because the Dockerfile does not, like, do anything.\n") + return "", fmt.Errorf("No image was generated. Is your Dockerfile empty?\n") } fmt.Fprintf(b.options.OutStream, "Successfully built %s\n", utils.TruncateID(b.image)) @@ -153,6 +191,20 @@ func initRunConfig() *runconfig.Config { } } +// This method is the entrypoint to all statement handling routines. +// +// Almost all nodes will have this structure: +// Child[Node, Node, Node] where Child is from parser.Node.Children and each +// node comes from parser.Node.Next. This forms a "line" with a statement and +// arguments and we process them in this normalized form by hitting +// evaluateTable with the leaf nodes of the command and the buildFile object. +// +// ONBUILD is a special case; in this case the parser will emit: +// Child[Node, Child[Node, Node...]] where the first node is the literal +// "onbuild" and the child entrypoint is the command of the ONBUILD statmeent, +// such as `RUN` in ONBUILD RUN foo. There is special case logic in here to +// deal with that, at least until it becomes more of a general concern with new +// features. func (b *buildFile) dispatch(stepN int, ast *parser.Node) error { cmd := ast.Value strs := []string{} diff --git a/builder/evaluator/internals.go b/builder/evaluator/internals.go index 719a6d3639..b55b0b967c 100644 --- a/builder/evaluator/internals.go +++ b/builder/evaluator/internals.go @@ -1,5 +1,8 @@ package evaluator +// internals for handling commands. Covers many areas and a lot of +// non-contiguous functionality. Please read the comments. + import ( "crypto/sha256" "encoding/hex" diff --git a/builder/evaluator/support.go b/builder/evaluator/support.go index da9f64695e..21dd7ccd3c 100644 --- a/builder/evaluator/support.go +++ b/builder/evaluator/support.go @@ -9,6 +9,7 @@ var ( TOKEN_ENV_INTERPOLATION = regexp.MustCompile("(\\\\\\\\+|[^\\\\]|\\b|\\A)\\$({?)([[:alnum:]_]+)(}?)") ) +// handle environment replacement. Used in dispatcher. func replaceEnv(b *buildFile, str string) string { for _, match := range TOKEN_ENV_INTERPOLATION.FindAllString(str, -1) { match = match[strings.Index(match, "$"):] diff --git a/builder/parser/line_parsers.go b/builder/parser/line_parsers.go index 9ae2a3191f..71e704791f 100644 --- a/builder/parser/line_parsers.go +++ b/builder/parser/line_parsers.go @@ -1,5 +1,11 @@ package parser +// line parsers are dispatch calls that parse a single unit of text into a +// Node object which contains the whole statement. Dockerfiles have varied +// (but not usually unique, see ONBUILD for a unique example) parsing rules +// per-command, and these unify the processing in a way that makes it +// manageable. + import ( "encoding/json" "strconv" @@ -12,6 +18,11 @@ func parseIgnore(rest string) (*Node, error) { return blankNode(), nil } +// used for onbuild. Could potentially be used for anything that represents a +// statement with sub-statements. +// +// ONBUILD RUN foo bar -> (onbuild (run foo bar)) +// func parseSubCommand(rest string) (*Node, error) { _, child, err := parseLine(rest) if err != nil { diff --git a/builder/parser/parser.go b/builder/parser/parser.go index 08f67dbb2c..03196c7da9 100644 --- a/builder/parser/parser.go +++ b/builder/parser/parser.go @@ -8,32 +8,17 @@ import ( "strings" ) -// Node is the building block of the AST this package will create. +// Node is a structure used to represent a parse tree. // -// Nodes are structured to have a value, next, and child, the latter two of -// which are Nodes themselves. +// In the node there are three fields, Value, Next, and Children. Value is the +// current token's string value. Next is always the next non-child token, and +// children contains all the children. Here's an example: // -// This terminology is unfortunately rather confusing, so here's a diagram. -// Anything after the ; is a comment. +// (value next (child child-next child-next-next) next-next) // -// ( -// (run "foo") ; value run, and next is a value foo. -// (run "1" "2" "3") ; -// (something (really cool)) -// ) -// -// Will give you something like this: -// -// &Node{ -// Value:"", -// Child: &Node{Value: "run", Next: &Node{Value: "foo"}, Child: nil}, -// Next: &Node{Value:"", Child: &Node{Value:"run", Next: &Node{Value:`"1"`.... -// -// ... and so on. -// -// The short and fast rule is that anything that starts with ( is a child of -// something. Anything which follows a previous statement is a next of -// something. +// This data structure is frankly pretty lousy for handling complex languages, +// but lucky for us the Dockerfile isn't very complicated. This structure +// works a little more effectively than a "proper" parse tree for our needs. // type Node struct { Value string // actual content @@ -79,6 +64,7 @@ func blankNode() *Node { return &Node{"", nil, []*Node{}} } +// parse a line and return the remainder. func parseLine(line string) (string, *Node, error) { if line = stripComments(line); line == "" { return "", nil, nil