2015-07-22 01:29:03 -04:00
|
|
|
// Package parser implements a parser and parse tree dumper for Dockerfiles.
|
2014-08-05 16:17:40 -04:00
|
|
|
package parser
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2016-06-03 08:32:37 -04:00
|
|
|
"bytes"
|
2016-04-22 18:04:46 -04:00
|
|
|
"fmt"
|
2014-08-05 16:17:40 -04:00
|
|
|
"io"
|
|
|
|
"regexp"
|
|
|
|
"strings"
|
2014-10-02 19:58:53 -04:00
|
|
|
"unicode"
|
2015-02-12 02:54:41 -05:00
|
|
|
|
2015-09-05 15:49:06 -04:00
|
|
|
"github.com/docker/docker/builder/dockerfile/command"
|
2014-08-05 16:17:40 -04:00
|
|
|
)
|
|
|
|
|
2014-08-07 01:56:44 -04:00
|
|
|
// Node is a structure used to represent a parse tree.
|
2014-08-05 16:17:40 -04:00
|
|
|
//
|
2014-08-07 01:56:44 -04:00
|
|
|
// In the node there are three fields, Value, Next, and Children. Value is the
|
|
|
|
// current token's string value. Next is always the next non-child token, and
|
|
|
|
// children contains all the children. Here's an example:
|
2014-08-05 16:17:40 -04:00
|
|
|
//
|
2014-08-07 01:56:44 -04:00
|
|
|
// (value next (child child-next child-next-next) next-next)
|
2014-08-05 16:17:40 -04:00
|
|
|
//
|
2014-08-07 01:56:44 -04:00
|
|
|
// This data structure is frankly pretty lousy for handling complex languages,
|
|
|
|
// but lucky for us the Dockerfile isn't very complicated. This structure
|
|
|
|
// works a little more effectively than a "proper" parse tree for our needs.
|
2014-08-05 16:17:40 -04:00
|
|
|
//
|
|
|
|
type Node struct {
|
2014-08-13 06:07:41 -04:00
|
|
|
Value string // actual content
|
|
|
|
Next *Node // the next item in the current sexp
|
|
|
|
Children []*Node // the children of this sexp
|
|
|
|
Attributes map[string]bool // special attributes for this node
|
2014-10-13 16:14:35 -04:00
|
|
|
Original string // original line used before parsing
|
2015-01-27 10:57:34 -05:00
|
|
|
Flags []string // only top Node should have this set
|
2015-11-01 16:28:30 -05:00
|
|
|
StartLine int // the line in the original dockerfile where the node begins
|
|
|
|
EndLine int // the line in the original dockerfile where the node ends
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
|
|
|
|
2016-06-27 16:20:47 -04:00
|
|
|
// Directive is the structure used during a build run to hold the state of
|
|
|
|
// parsing directives.
|
|
|
|
type Directive struct {
|
|
|
|
EscapeToken rune // Current escape token
|
2017-02-16 10:56:53 -05:00
|
|
|
LineContinuationRegex *regexp.Regexp // Current line continuation regex
|
2016-06-27 16:20:47 -04:00
|
|
|
LookingForDirectives bool // Whether we are currently looking for directives
|
|
|
|
EscapeSeen bool // Whether the escape directive has been seen
|
|
|
|
}
|
2016-07-26 10:03:47 -04:00
|
|
|
|
2014-08-05 16:17:40 -04:00
|
|
|
var (
|
2016-06-27 16:20:47 -04:00
|
|
|
dispatch map[string]func(string, *Directive) (*Node, map[string]bool, error)
|
|
|
|
tokenWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
|
|
|
|
tokenEscapeCommand = regexp.MustCompile(`^#[ \t]*escape[ \t]*=[ \t]*(?P<escapechar>.).*$`)
|
|
|
|
tokenComment = regexp.MustCompile(`^#.*$`)
|
2014-08-05 16:17:40 -04:00
|
|
|
)
|
|
|
|
|
2016-06-27 16:20:47 -04:00
|
|
|
// DefaultEscapeToken is the default escape token
|
|
|
|
const DefaultEscapeToken = "\\"
|
|
|
|
|
|
|
|
// SetEscapeToken sets the default token for escaping characters in a Dockerfile.
|
|
|
|
func SetEscapeToken(s string, d *Directive) error {
|
2016-04-22 18:04:46 -04:00
|
|
|
if s != "`" && s != "\\" {
|
|
|
|
return fmt.Errorf("invalid ESCAPE '%s'. Must be ` or \\", s)
|
|
|
|
}
|
2016-06-27 16:20:47 -04:00
|
|
|
d.EscapeToken = rune(s[0])
|
2017-01-09 18:41:03 -05:00
|
|
|
d.LineContinuationRegex = regexp.MustCompile(`\` + s + `[ \t]*$`)
|
2016-04-22 18:04:46 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2014-08-05 16:17:40 -04:00
|
|
|
func init() {
|
|
|
|
// Dispatch Table. see line_parsers.go for the parse functions.
|
|
|
|
// The command is parsed and mapped to the line parser. The line parser
|
2015-08-07 18:24:18 -04:00
|
|
|
// receives the arguments but not the command, and returns an AST after
|
2014-08-05 16:17:40 -04:00
|
|
|
// reformulating the arguments according to the rules in the parser
|
2015-02-03 21:47:37 -05:00
|
|
|
// functions. Errors are propagated up by Parse() and the resulting AST can
|
2014-08-05 16:17:40 -04:00
|
|
|
// be incorporated directly into the existing AST as a next.
|
2016-06-27 16:20:47 -04:00
|
|
|
dispatch = map[string]func(string, *Directive) (*Node, map[string]bool, error){
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Add: parseMaybeJSONToList,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.Arg: parseNameOrNameVal,
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Cmd: parseMaybeJSON,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.Copy: parseMaybeJSONToList,
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Entrypoint: parseMaybeJSON,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.Env: parseEnv,
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Expose: parseStringsWhitespaceDelimited,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.From: parseString,
|
2016-04-18 05:48:13 -04:00
|
|
|
command.Healthcheck: parseHealthConfig,
|
2016-05-03 16:56:59 -04:00
|
|
|
command.Label: parseLabel,
|
|
|
|
command.Maintainer: parseString,
|
|
|
|
command.Onbuild: parseSubCommand,
|
|
|
|
command.Run: parseMaybeJSON,
|
|
|
|
command.Shell: parseMaybeJSON,
|
|
|
|
command.StopSignal: parseString,
|
|
|
|
command.User: parseString,
|
|
|
|
command.Volume: parseMaybeJSONToList,
|
|
|
|
command.Workdir: parseString,
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-04 03:17:58 -04:00
|
|
|
// ParseLine parses a line and returns the remainder.
|
2016-10-28 17:11:36 -04:00
|
|
|
func ParseLine(line string, d *Directive, ignoreCont bool) (string, *Node, error) {
|
2016-06-09 17:47:05 -04:00
|
|
|
// Handle the parser directive '# escape=<char>. Parser directives must precede
|
2016-04-22 18:04:46 -04:00
|
|
|
// any builder instruction or other comments, and cannot be repeated.
|
2016-06-27 16:20:47 -04:00
|
|
|
if d.LookingForDirectives {
|
2016-04-22 18:04:46 -04:00
|
|
|
tecMatch := tokenEscapeCommand.FindStringSubmatch(strings.ToLower(line))
|
|
|
|
if len(tecMatch) > 0 {
|
2016-06-27 16:20:47 -04:00
|
|
|
if d.EscapeSeen == true {
|
2016-04-22 18:04:46 -04:00
|
|
|
return "", nil, fmt.Errorf("only one escape parser directive can be used")
|
|
|
|
}
|
|
|
|
for i, n := range tokenEscapeCommand.SubexpNames() {
|
|
|
|
if n == "escapechar" {
|
2016-06-27 16:20:47 -04:00
|
|
|
if err := SetEscapeToken(tecMatch[i], d); err != nil {
|
2016-04-22 18:04:46 -04:00
|
|
|
return "", nil, err
|
|
|
|
}
|
2016-06-27 16:20:47 -04:00
|
|
|
d.EscapeSeen = true
|
2016-04-22 18:04:46 -04:00
|
|
|
return "", nil, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-27 16:20:47 -04:00
|
|
|
d.LookingForDirectives = false
|
2016-04-22 18:04:46 -04:00
|
|
|
|
2014-08-05 16:17:40 -04:00
|
|
|
if line = stripComments(line); line == "" {
|
|
|
|
return "", nil, nil
|
|
|
|
}
|
|
|
|
|
2016-10-28 17:11:36 -04:00
|
|
|
if !ignoreCont && d.LineContinuationRegex.MatchString(line) {
|
2016-06-27 16:20:47 -04:00
|
|
|
line = d.LineContinuationRegex.ReplaceAllString(line, "")
|
2014-08-05 16:17:40 -04:00
|
|
|
return line, nil, nil
|
|
|
|
}
|
|
|
|
|
2015-01-27 10:57:34 -05:00
|
|
|
cmd, flags, args, err := splitCommand(line)
|
2014-10-14 23:33:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return "", nil, err
|
|
|
|
}
|
2014-08-05 16:17:40 -04:00
|
|
|
|
2014-08-10 07:01:10 -04:00
|
|
|
node := &Node{}
|
2014-08-05 16:17:40 -04:00
|
|
|
node.Value = cmd
|
|
|
|
|
2016-06-27 16:20:47 -04:00
|
|
|
sexp, attrs, err := fullDispatch(cmd, args, d)
|
2014-08-05 16:17:40 -04:00
|
|
|
if err != nil {
|
|
|
|
return "", nil, err
|
|
|
|
}
|
|
|
|
|
2015-01-03 00:40:43 -05:00
|
|
|
node.Next = sexp
|
2014-10-23 20:23:25 -04:00
|
|
|
node.Attributes = attrs
|
|
|
|
node.Original = line
|
2015-01-27 10:57:34 -05:00
|
|
|
node.Flags = flags
|
2014-10-23 20:23:25 -04:00
|
|
|
|
2014-08-05 16:17:40 -04:00
|
|
|
return "", node, nil
|
|
|
|
}
|
|
|
|
|
2015-07-22 01:29:03 -04:00
|
|
|
// Parse is the main parse routine.
|
|
|
|
// It handles an io.ReadWriteCloser and returns the root of the AST.
|
2016-06-27 16:20:47 -04:00
|
|
|
func Parse(rwc io.Reader, d *Directive) (*Node, error) {
|
2015-11-01 16:28:30 -05:00
|
|
|
currentLine := 0
|
2014-08-10 07:01:10 -04:00
|
|
|
root := &Node{}
|
2015-11-01 16:28:30 -05:00
|
|
|
root.StartLine = -1
|
2014-08-05 16:17:40 -04:00
|
|
|
scanner := bufio.NewScanner(rwc)
|
|
|
|
|
2016-06-03 08:32:37 -04:00
|
|
|
utf8bom := []byte{0xEF, 0xBB, 0xBF}
|
2014-08-05 16:17:40 -04:00
|
|
|
for scanner.Scan() {
|
2016-06-03 08:32:37 -04:00
|
|
|
scannedBytes := scanner.Bytes()
|
|
|
|
// We trim UTF8 BOM
|
|
|
|
if currentLine == 0 {
|
|
|
|
scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
|
|
|
|
}
|
|
|
|
scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
|
2015-11-01 16:28:30 -05:00
|
|
|
currentLine++
|
2016-10-28 17:11:36 -04:00
|
|
|
line, child, err := ParseLine(scannedLine, d, false)
|
2014-08-05 16:17:40 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-11-01 16:28:30 -05:00
|
|
|
startLine := currentLine
|
2014-08-05 16:17:40 -04:00
|
|
|
|
|
|
|
if line != "" && child == nil {
|
2014-08-19 07:14:21 -04:00
|
|
|
for scanner.Scan() {
|
2014-10-02 19:58:53 -04:00
|
|
|
newline := scanner.Text()
|
2015-11-01 16:28:30 -05:00
|
|
|
currentLine++
|
2014-08-05 16:17:40 -04:00
|
|
|
|
2016-12-02 08:15:55 -05:00
|
|
|
if stripComments(strings.TrimSpace(newline)) == "" {
|
|
|
|
continue
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
|
|
|
|
2016-10-28 17:11:36 -04:00
|
|
|
line, child, err = ParseLine(line+newline, d, false)
|
2014-08-05 16:17:40 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if child != nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2014-09-25 22:28:24 -04:00
|
|
|
if child == nil && line != "" {
|
2016-10-28 17:11:36 -04:00
|
|
|
// When we call ParseLine we'll pass in 'true' for
|
|
|
|
// the ignoreCont param if we're at the EOF. This will
|
|
|
|
// prevent the func from returning immediately w/o
|
|
|
|
// parsing the line thinking that there's more input
|
|
|
|
// to come.
|
|
|
|
|
|
|
|
_, child, err = ParseLine(line, d, scanner.Err() == nil)
|
2014-09-25 22:28:24 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
2014-08-05 16:17:40 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
if child != nil {
|
2015-11-01 16:28:30 -05:00
|
|
|
// Update the line information for the current child.
|
|
|
|
child.StartLine = startLine
|
|
|
|
child.EndLine = currentLine
|
|
|
|
// Update the line information for the root. The starting line of the root is always the
|
|
|
|
// starting line of the first child and the ending line is the ending line of the last child.
|
|
|
|
if root.StartLine < 0 {
|
|
|
|
root.StartLine = currentLine
|
|
|
|
}
|
|
|
|
root.EndLine = currentLine
|
2014-08-05 16:17:40 -04:00
|
|
|
root.Children = append(root.Children, child)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return root, nil
|
|
|
|
}
|