2015-07-22 01:29:03 -04:00
|
|
|
// Package parser implements a parser and parse tree dumper for Dockerfiles.
|
2014-08-05 16:17:40 -04:00
|
|
|
package parser
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2016-06-03 08:32:37 -04:00
|
|
|
"bytes"
|
2016-04-22 18:04:46 -04:00
|
|
|
"fmt"
|
2014-08-05 16:17:40 -04:00
|
|
|
"io"
|
|
|
|
"regexp"
|
2017-05-17 20:08:01 -04:00
|
|
|
"runtime"
|
2017-03-10 16:23:46 -05:00
|
|
|
"strconv"
|
2014-08-05 16:17:40 -04:00
|
|
|
"strings"
|
2014-10-02 19:58:53 -04:00
|
|
|
"unicode"
|
2015-02-12 02:54:41 -05:00
|
|
|
|
2015-09-05 15:49:06 -04:00
|
|
|
"github.com/docker/docker/builder/dockerfile/command"
|
2017-05-17 20:08:01 -04:00
|
|
|
"github.com/docker/docker/pkg/system"
|
2017-04-12 15:40:16 -04:00
|
|
|
"github.com/pkg/errors"
|
2014-08-05 16:17:40 -04:00
|
|
|
)
|
|
|
|
|
2014-08-07 01:56:44 -04:00
|
|
|
// Node is a structure used to represent a parse tree.
|
2014-08-05 16:17:40 -04:00
|
|
|
//
|
2014-08-07 01:56:44 -04:00
|
|
|
// In the node there are three fields, Value, Next, and Children. Value is the
|
|
|
|
// current token's string value. Next is always the next non-child token, and
|
|
|
|
// children contains all the children. Here's an example:
|
2014-08-05 16:17:40 -04:00
|
|
|
//
|
2014-08-07 01:56:44 -04:00
|
|
|
// (value next (child child-next child-next-next) next-next)
|
2014-08-05 16:17:40 -04:00
|
|
|
//
|
2014-08-07 01:56:44 -04:00
|
|
|
// This data structure is frankly pretty lousy for handling complex languages,
|
|
|
|
// but lucky for us the Dockerfile isn't very complicated. This structure
|
|
|
|
// works a little more effectively than a "proper" parse tree for our needs.
|
2014-08-05 16:17:40 -04:00
|
|
|
//
|
|
|
|
type Node struct {
|
2014-08-13 06:07:41 -04:00
|
|
|
Value string // actual content
|
|
|
|
Next *Node // the next item in the current sexp
|
|
|
|
Children []*Node // the children of this sexp
|
|
|
|
Attributes map[string]bool // special attributes for this node
|
2014-10-13 16:14:35 -04:00
|
|
|
Original string // original line used before parsing
|
2015-01-27 10:57:34 -05:00
|
|
|
Flags []string // only top Node should have this set
|
2015-11-01 16:28:30 -05:00
|
|
|
StartLine int // the line in the original dockerfile where the node begins
|
2017-04-12 13:47:19 -04:00
|
|
|
endLine int // the line in the original dockerfile where the node ends
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
|
|
|
|
2017-03-10 16:23:46 -05:00
|
|
|
// Dump dumps the AST defined by `node` as a list of sexps.
|
|
|
|
// Returns a string suitable for printing.
|
|
|
|
func (node *Node) Dump() string {
|
|
|
|
str := ""
|
|
|
|
str += node.Value
|
|
|
|
|
|
|
|
if len(node.Flags) > 0 {
|
|
|
|
str += fmt.Sprintf(" %q", node.Flags)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, n := range node.Children {
|
|
|
|
str += "(" + n.Dump() + ")\n"
|
|
|
|
}
|
|
|
|
|
|
|
|
for n := node.Next; n != nil; n = n.Next {
|
|
|
|
if len(n.Children) > 0 {
|
|
|
|
str += " " + n.Dump()
|
|
|
|
} else {
|
|
|
|
str += " " + strconv.Quote(n.Value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return strings.TrimSpace(str)
|
|
|
|
}
|
|
|
|
|
2017-04-12 18:00:55 -04:00
|
|
|
func (node *Node) lines(start, end int) {
|
|
|
|
node.StartLine = start
|
|
|
|
node.endLine = end
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddChild adds a new child node, and updates line information
|
|
|
|
func (node *Node) AddChild(child *Node, startLine, endLine int) {
|
|
|
|
child.lines(startLine, endLine)
|
|
|
|
if node.StartLine < 0 {
|
|
|
|
node.StartLine = startLine
|
|
|
|
}
|
|
|
|
node.endLine = endLine
|
|
|
|
node.Children = append(node.Children, child)
|
|
|
|
}
|
|
|
|
|
2014-08-05 16:17:40 -04:00
|
|
|
var (
|
2017-05-17 20:08:01 -04:00
|
|
|
dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error)
|
|
|
|
tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
|
|
|
|
tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
|
|
|
|
tokenPlatformCommand = regexp.MustCompile(`^#[ \t]*platform[ \t]*=[ \t]*(?P<platform>.*)$`)
|
|
|
|
tokenComment = regexp.MustCompile(`^#.*$`)
|
2014-08-05 16:17:40 -04:00
|
|
|
)
|
|
|
|
|
2016-06-27 16:20:47 -04:00
|
|
|
// DefaultEscapeToken is the default escape token
|
2017-04-12 13:47:19 -04:00
|
|
|
const DefaultEscapeToken = '\\'
|
2016-06-27 16:20:47 -04:00
|
|
|
|
2017-04-11 15:07:02 -04:00
|
|
|
// Directive is the structure used during a build run to hold the state of
|
|
|
|
// parsing directives.
|
|
|
|
type Directive struct {
|
|
|
|
escapeToken rune // Current escape token
|
2017-05-17 20:08:01 -04:00
|
|
|
platformToken string // Current platform token
|
2017-04-11 15:07:02 -04:00
|
|
|
lineContinuationRegex *regexp.Regexp // Current line continuation regex
|
2017-04-12 15:40:16 -04:00
|
|
|
processingComplete bool // Whether we are done looking for directives
|
2017-04-11 15:07:02 -04:00
|
|
|
escapeSeen bool // Whether the escape directive has been seen
|
2017-05-17 20:08:01 -04:00
|
|
|
platformSeen bool // Whether the platform directive has been seen
|
2017-04-11 15:07:02 -04:00
|
|
|
}
|
|
|
|
|
2017-04-12 13:47:19 -04:00
|
|
|
// setEscapeToken sets the default token for escaping characters in a Dockerfile.
|
|
|
|
func (d *Directive) setEscapeToken(s string) error {
|
2016-04-22 18:04:46 -04:00
|
|
|
if s != "`" && s != "\\" {
|
|
|
|
return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
|
|
|
|
}
|
2017-04-11 15:07:02 -04:00
|
|
|
d.escapeToken = rune(s[0])
|
|
|
|
d.lineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
|
2016-04-22 18:04:46 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-05-17 20:08:01 -04:00
|
|
|
// setPlatformToken sets the default platform for pulling images in a Dockerfile.
|
|
|
|
func (d *Directive) setPlatformToken(s string) error {
|
|
|
|
s = strings.ToLower(s)
|
|
|
|
valid := []string{runtime.GOOS}
|
2017-06-26 12:11:54 -04:00
|
|
|
if system.LCOWSupported() {
|
2017-05-17 20:08:01 -04:00
|
|
|
valid = append(valid, "linux")
|
|
|
|
}
|
|
|
|
for _, item := range valid {
|
|
|
|
if s == item {
|
|
|
|
d.platformToken = s
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return fmt.Errorf("invalid PLATFORM '%s'. Must be one of %v", s, valid)
|
|
|
|
}
|
|
|
|
|
2017-05-30 12:45:27 -04:00
|
|
|
// possibleParserDirective looks for one or more parser directives '# escapeToken=<char>' and
|
|
|
|
// '# platform=<string>'. Parser directives must precede any builder instruction
|
|
|
|
// or other comments, and cannot be repeated.
|
|
|
|
func (d *Directive) possibleParserDirective(line string) error {
|
2017-04-12 15:40:16 -04:00
|
|
|
if d.processingComplete {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
|
2017-05-30 12:45:27 -04:00
|
|
|
if len(tecMatch) != 0 {
|
|
|
|
for i, n := range tokenEscapeCommand.SubexpNames() {
|
|
|
|
if n == "escapechar" {
|
2017-09-11 14:55:05 -04:00
|
|
|
if d.escapeSeen {
|
2017-05-30 12:45:27 -04:00
|
|
|
return errors.New("only one escape parser directive can be used")
|
|
|
|
}
|
|
|
|
d.escapeSeen = true
|
|
|
|
return d.setEscapeToken(tecMatch[i])
|
|
|
|
}
|
2017-04-12 15:40:16 -04:00
|
|
|
}
|
|
|
|
}
|
2017-05-30 12:45:27 -04:00
|
|
|
|
2017-08-08 15:43:48 -04:00
|
|
|
// Only recognise a platform token if LCOW is supported
|
2017-06-26 12:11:54 -04:00
|
|
|
if system.LCOWSupported() {
|
2017-05-17 20:08:01 -04:00
|
|
|
tpcMatch := tokenPlatformCommand.FindStringSubmatch(strings.ToLower(line))
|
|
|
|
if len(tpcMatch) != 0 {
|
|
|
|
for i, n := range tokenPlatformCommand.SubexpNames() {
|
|
|
|
if n == "platform" {
|
2017-09-11 14:55:05 -04:00
|
|
|
if d.platformSeen {
|
2017-05-17 20:08:01 -04:00
|
|
|
return errors.New("only one platform parser directive can be used")
|
|
|
|
}
|
|
|
|
d.platformSeen = true
|
|
|
|
return d.setPlatformToken(tpcMatch[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-30 12:45:27 -04:00
|
|
|
d.processingComplete = true
|
2017-04-12 15:40:16 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-04-11 15:07:02 -04:00
|
|
|
// NewDefaultDirective returns a new Directive with the default escapeToken token
|
|
|
|
func NewDefaultDirective() *Directive {
|
2017-04-12 15:40:16 -04:00
|
|
|
directive := Directive{}
|
2017-04-12 13:47:19 -04:00
|
|
|
directive.setEscapeToken(string(DefaultEscapeToken))
|
2017-04-11 15:07:02 -04:00
|
|
|
return &directive
|
|
|
|
}
|
|
|
|
|
2014-08-05 16:17:40 -04:00
|
|
|
func init() {
|
|
|
|
// Dispatch Table. see line_parsers.go for the parse functions.
|
|
|
|
// The command is parsed and mapped to the line parser. The line parser
|
2015-08-07 18:24:18 -04:00
|
|
|
// receives the arguments but not the command, and returns an AST after
|
2014-08-05 16:17:40 -04:00
|
|
|
// reformulating the arguments according to the rules in the parser
|
2015-02-03 21:47:37 -05:00
|
|
|
// functions. Errors are propagated up by Parse() and the resulting AST can
|
2014-08-05 16:17:40 -04:00
|
|
|
// be incorporated directly into the existing AST as a next.
|
2016-06-27 16:20:47 -04:00
|
|
|
dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Add: parseMaybeJSONToList,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.Arg: parseNameOrNameVal,
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Cmd: parseMaybeJSON,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.Copy: parseMaybeJSONToList,
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Entrypoint: parseMaybeJSON,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.Env: parseEnv,
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Expose: parseStringsWhitespaceDelimited,
|
2017-03-20 13:28:21 -04:00
|
|
|
command.From: parseStringsWhitespaceDelimited,
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Healthcheck: parseHealthConfig,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.Label: parseLabel,
|
|
|
|
command.Maintainer: parseString,
|
|
|
|
command.Onbuild: parseSubCommand,
|
|
|
|
command.Run: parseMaybeJSON,
|
|
|
|
command.Shell: parseMaybeJSON,
|
|
|
|
command.StopSignal: parseString,
|
|
|
|
command.User: parseString,
|
|
|
|
command.Volume: parseMaybeJSONToList,
|
|
|
|
command.Workdir: parseString,
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-10 16:23:46 -05:00
|
|
|
// newNodeFromLine splits the line into parts, and dispatches to a function
|
|
|
|
// based on the command and command arguments. A Node is created from the
|
|
|
|
// result of the dispatch.
|
|
|
|
func newNodeFromLine(line string, directive *Directive) (*Node, error) {
|
2015-01-27 10:57:34 -05:00
|
|
|
cmd, flags, args, err := splitCommand(line)
|
2014-10-14 23:33:11 -04:00
|
|
|
if err != nil {
|
2017-03-10 16:23:46 -05:00
|
|
|
return nil, err
|
2014-10-14 23:33:11 -04:00
|
|
|
}
|
2014-08-05 16:17:40 -04:00
|
|
|
|
2017-03-10 16:23:46 -05:00
|
|
|
fn := dispatch[cmd]
|
|
|
|
// Ignore invalid Dockerfile instructions
|
|
|
|
if fn == nil {
|
|
|
|
fn = parseIgnore
|
|
|
|
}
|
|
|
|
next, attrs, err := fn(args, directive)
|
2014-08-05 16:17:40 -04:00
|
|
|
if err != nil {
|
2017-03-10 16:23:46 -05:00
|
|
|
return nil, err
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
|
|
|
|
2017-03-10 16:23:46 -05:00
|
|
|
return &Node{
|
|
|
|
Value: cmd,
|
|
|
|
Original: line,
|
|
|
|
Flags: flags,
|
|
|
|
Next: next,
|
|
|
|
Attributes: attrs,
|
|
|
|
}, nil
|
|
|
|
}
|
2014-10-23 20:23:25 -04:00
|
|
|
|
2017-04-12 13:47:19 -04:00
|
|
|
// Result is the result of parsing a Dockerfile
|
|
|
|
type Result struct {
|
|
|
|
AST *Node
|
|
|
|
EscapeToken rune
|
2017-08-08 15:43:48 -04:00
|
|
|
// TODO @jhowardmsft - see https://github.com/moby/moby/issues/34617
|
|
|
|
// This next field will be removed in a future update for LCOW support.
|
|
|
|
OS string
|
|
|
|
Warnings []string
|
2017-04-12 13:47:19 -04:00
|
|
|
}
|
|
|
|
|
2017-06-16 18:05:30 -04:00
|
|
|
// PrintWarnings to the writer
|
|
|
|
func (r *Result) PrintWarnings(out io.Writer) {
|
|
|
|
if len(r.Warnings) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
fmt.Fprintf(out, strings.Join(r.Warnings, "\n")+"\n")
|
|
|
|
}
|
|
|
|
|
2017-04-12 13:47:19 -04:00
|
|
|
// Parse reads lines from a Reader, parses the lines into an AST and returns
|
|
|
|
// the AST and escape token
|
|
|
|
func Parse(rwc io.Reader) (*Result, error) {
|
|
|
|
d := NewDefaultDirective()
|
2015-11-01 16:28:30 -05:00
|
|
|
currentLine := 0
|
2017-04-12 15:40:16 -04:00
|
|
|
root := &Node{StartLine: -1}
|
2014-08-05 16:17:40 -04:00
|
|
|
scanner := bufio.NewScanner(rwc)
|
2016-12-05 21:55:07 -05:00
|
|
|
warnings := []string{}
|
2014-08-05 16:17:40 -04:00
|
|
|
|
2017-04-12 18:00:55 -04:00
|
|
|
var err error
|
2014-08-05 16:17:40 -04:00
|
|
|
for scanner.Scan() {
|
2017-05-30 12:45:27 -04:00
|
|
|
bytesRead := scanner.Bytes()
|
|
|
|
if currentLine == 0 {
|
|
|
|
// First line, strip the byte-order-marker if present
|
|
|
|
bytesRead = bytes.TrimPrefix(bytesRead, utf8bom)
|
|
|
|
}
|
|
|
|
bytesRead, err = processLine(d, bytesRead, true)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2016-06-03 08:32:37 -04:00
|
|
|
}
|
2015-11-01 16:28:30 -05:00
|
|
|
currentLine++
|
2014-08-05 16:17:40 -04:00
|
|
|
|
2017-04-12 18:00:55 -04:00
|
|
|
startLine := currentLine
|
2017-05-30 12:45:27 -04:00
|
|
|
line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d)
|
2017-04-12 18:00:55 -04:00
|
|
|
if isEndOfLine && line == "" {
|
|
|
|
continue
|
|
|
|
}
|
2014-08-05 16:17:40 -04:00
|
|
|
|
2016-12-05 21:55:07 -05:00
|
|
|
var hasEmptyContinuationLine bool
|
2017-04-12 18:00:55 -04:00
|
|
|
for !isEndOfLine && scanner.Scan() {
|
2017-05-30 12:45:27 -04:00
|
|
|
bytesRead, err := processLine(d, scanner.Bytes(), false)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-04-12 18:00:55 -04:00
|
|
|
currentLine++
|
2014-08-05 16:17:40 -04:00
|
|
|
|
2017-09-27 16:18:24 -04:00
|
|
|
if isComment(scanner.Bytes()) {
|
|
|
|
// original line was a comment (processLine strips comments)
|
|
|
|
continue
|
|
|
|
}
|
2017-05-30 12:45:27 -04:00
|
|
|
if isEmptyContinuationLine(bytesRead) {
|
2016-12-05 21:55:07 -05:00
|
|
|
hasEmptyContinuationLine = true
|
2017-04-12 18:00:55 -04:00
|
|
|
continue
|
2014-09-25 22:28:24 -04:00
|
|
|
}
|
2017-04-12 18:00:55 -04:00
|
|
|
|
2017-05-30 12:45:27 -04:00
|
|
|
continuationLine := string(bytesRead)
|
2017-04-12 18:00:55 -04:00
|
|
|
continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d)
|
|
|
|
line += continuationLine
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
|
|
|
|
2016-12-05 21:55:07 -05:00
|
|
|
if hasEmptyContinuationLine {
|
|
|
|
warning := "[WARNING]: Empty continuation line found in:\n " + line
|
|
|
|
warnings = append(warnings, warning)
|
|
|
|
}
|
|
|
|
|
2017-04-12 18:00:55 -04:00
|
|
|
child, err := newNodeFromLine(line, d)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
2017-04-12 18:00:55 -04:00
|
|
|
root.AddChild(child, startLine, currentLine)
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
2016-12-05 21:55:07 -05:00
|
|
|
|
|
|
|
if len(warnings) > 0 {
|
|
|
|
warnings = append(warnings, "[WARNING]: Empty continuation lines will become errors in a future release.")
|
|
|
|
}
|
|
|
|
return &Result{
|
|
|
|
AST: root,
|
|
|
|
Warnings: warnings,
|
|
|
|
EscapeToken: d.escapeToken,
|
2017-08-08 15:43:48 -04:00
|
|
|
OS: d.platformToken,
|
2016-12-05 21:55:07 -05:00
|
|
|
}, nil
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
2017-03-10 16:23:46 -05:00
|
|
|
|
2017-04-12 18:00:55 -04:00
|
|
|
func trimComments(src []byte) []byte {
|
|
|
|
return tokenComment.ReplaceAll(src, []byte{})
|
|
|
|
}
|
|
|
|
|
|
|
|
func trimWhitespace(src []byte) []byte {
|
|
|
|
return bytes.TrimLeftFunc(src, unicode.IsSpace)
|
|
|
|
}
|
|
|
|
|
2017-09-27 16:18:24 -04:00
|
|
|
func isComment(line []byte) bool {
|
|
|
|
return tokenComment.Match(trimWhitespace(line))
|
|
|
|
}
|
|
|
|
|
2017-04-12 18:00:55 -04:00
|
|
|
func isEmptyContinuationLine(line []byte) bool {
|
2017-09-27 16:18:24 -04:00
|
|
|
return len(trimWhitespace(line)) == 0
|
2017-04-12 18:00:55 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
var utf8bom = []byte{0xEF, 0xBB, 0xBF}
|
|
|
|
|
|
|
|
func trimContinuationCharacter(line string, d *Directive) (string, bool) {
|
|
|
|
if d.lineContinuationRegex.MatchString(line) {
|
|
|
|
line = d.lineContinuationRegex.ReplaceAllString(line, "")
|
|
|
|
return line, false
|
2017-03-10 16:23:46 -05:00
|
|
|
}
|
2017-04-12 18:00:55 -04:00
|
|
|
return line, true
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: remove stripLeftWhitespace after deprecation period. It seems silly
|
|
|
|
// to preserve whitespace on continuation lines. Why is that done?
|
2017-05-30 12:45:27 -04:00
|
|
|
func processLine(d *Directive, token []byte, stripLeftWhitespace bool) ([]byte, error) {
|
2017-04-12 18:00:55 -04:00
|
|
|
if stripLeftWhitespace {
|
|
|
|
token = trimWhitespace(token)
|
|
|
|
}
|
2016-12-05 21:55:07 -05:00
|
|
|
return trimComments(token), d.possibleParserDirective(string(token))
|
2017-03-10 16:23:46 -05:00
|
|
|
}
|