mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #17055 from daehyeok/parse_utf8
ProcessWord support UTF-8
This commit is contained in:
commit
ab22fe0f7c
3 changed files with 99 additions and 51 deletions
|
@ -9,13 +9,15 @@ package dockerfile
|
|||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"text/scanner"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type shellWord struct {
|
||||
word string
|
||||
envs []string
|
||||
pos int
|
||||
word string
|
||||
scanner scanner.Scanner
|
||||
envs []string
|
||||
pos int
|
||||
}
|
||||
|
||||
// ProcessWord will use the 'env' list of environment variables,
|
||||
|
@ -26,11 +28,12 @@ func ProcessWord(word string, env []string) (string, error) {
|
|||
envs: env,
|
||||
pos: 0,
|
||||
}
|
||||
sw.scanner.Init(strings.NewReader(word))
|
||||
return sw.process()
|
||||
}
|
||||
|
||||
func (sw *shellWord) process() (string, error) {
|
||||
return sw.processStopOn('\000')
|
||||
return sw.processStopOn(scanner.EOF)
|
||||
}
|
||||
|
||||
// Process the word, starting at 'pos', and stop when we get to the
|
||||
|
@ -43,10 +46,11 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
|||
'$': sw.processDollar,
|
||||
}
|
||||
|
||||
for sw.pos < len(sw.word) {
|
||||
ch := sw.peek()
|
||||
if stopChar != '\000' && ch == stopChar {
|
||||
sw.next()
|
||||
for sw.scanner.Peek() != scanner.EOF {
|
||||
ch := sw.scanner.Peek()
|
||||
|
||||
if stopChar != scanner.EOF && ch == stopChar {
|
||||
sw.scanner.Next()
|
||||
break
|
||||
}
|
||||
if fn, ok := charFuncMapping[ch]; ok {
|
||||
|
@ -58,14 +62,19 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
|||
result += tmp
|
||||
} else {
|
||||
// Not special, just add it to the result
|
||||
ch = sw.next()
|
||||
ch = sw.scanner.Next()
|
||||
|
||||
if ch == '\\' {
|
||||
// '\' escapes, except end of line
|
||||
ch = sw.next()
|
||||
if ch == '\000' {
|
||||
continue
|
||||
|
||||
ch = sw.scanner.Next()
|
||||
|
||||
if ch == scanner.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
result += string(ch)
|
||||
}
|
||||
}
|
||||
|
@ -73,36 +82,21 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
|||
return result, nil
|
||||
}
|
||||
|
||||
func (sw *shellWord) peek() rune {
|
||||
if sw.pos == len(sw.word) {
|
||||
return '\000'
|
||||
}
|
||||
return rune(sw.word[sw.pos])
|
||||
}
|
||||
|
||||
func (sw *shellWord) next() rune {
|
||||
if sw.pos == len(sw.word) {
|
||||
return '\000'
|
||||
}
|
||||
ch := rune(sw.word[sw.pos])
|
||||
sw.pos++
|
||||
return ch
|
||||
}
|
||||
|
||||
func (sw *shellWord) processSingleQuote() (string, error) {
|
||||
// All chars between single quotes are taken as-is
|
||||
// Note, you can't escape '
|
||||
var result string
|
||||
|
||||
sw.next()
|
||||
sw.scanner.Next()
|
||||
|
||||
for {
|
||||
ch := sw.next()
|
||||
if ch == '\000' || ch == '\'' {
|
||||
ch := sw.scanner.Next()
|
||||
if ch == '\'' || ch == scanner.EOF {
|
||||
break
|
||||
}
|
||||
result += string(ch)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
|
@ -111,12 +105,12 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
|||
// But you can escape " with a \
|
||||
var result string
|
||||
|
||||
sw.next()
|
||||
sw.scanner.Next()
|
||||
|
||||
for sw.pos < len(sw.word) {
|
||||
ch := sw.peek()
|
||||
for sw.scanner.Peek() != scanner.EOF {
|
||||
ch := sw.scanner.Peek()
|
||||
if ch == '"' {
|
||||
sw.next()
|
||||
sw.scanner.Next()
|
||||
break
|
||||
}
|
||||
if ch == '$' {
|
||||
|
@ -126,18 +120,18 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
|||
}
|
||||
result += tmp
|
||||
} else {
|
||||
ch = sw.next()
|
||||
ch = sw.scanner.Next()
|
||||
if ch == '\\' {
|
||||
chNext := sw.peek()
|
||||
chNext := sw.scanner.Peek()
|
||||
|
||||
if chNext == '\000' {
|
||||
if chNext == scanner.EOF {
|
||||
// Ignore \ at end of word
|
||||
continue
|
||||
}
|
||||
|
||||
if chNext == '"' || chNext == '$' {
|
||||
// \" and \$ can be escaped, all other \'s are left as-is
|
||||
ch = sw.next()
|
||||
ch = sw.scanner.Next()
|
||||
}
|
||||
}
|
||||
result += string(ch)
|
||||
|
@ -148,23 +142,23 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
|||
}
|
||||
|
||||
func (sw *shellWord) processDollar() (string, error) {
|
||||
sw.next()
|
||||
ch := sw.peek()
|
||||
sw.scanner.Next()
|
||||
ch := sw.scanner.Peek()
|
||||
if ch == '{' {
|
||||
sw.next()
|
||||
sw.scanner.Next()
|
||||
name := sw.processName()
|
||||
ch = sw.peek()
|
||||
ch = sw.scanner.Peek()
|
||||
if ch == '}' {
|
||||
// Normal ${xx} case
|
||||
sw.next()
|
||||
sw.scanner.Next()
|
||||
return sw.getEnv(name), nil
|
||||
}
|
||||
if ch == ':' {
|
||||
// Special ${xx:...} format processing
|
||||
// Yes it allows for recursive $'s in the ... spot
|
||||
|
||||
sw.next() // skip over :
|
||||
modifier := sw.next()
|
||||
sw.scanner.Next() // skip over :
|
||||
modifier := sw.scanner.Next()
|
||||
|
||||
word, err := sw.processStopOn('}')
|
||||
if err != nil {
|
||||
|
@ -207,16 +201,16 @@ func (sw *shellWord) processName() string {
|
|||
// If it starts with a numeric then just return $#
|
||||
var name string
|
||||
|
||||
for sw.pos < len(sw.word) {
|
||||
ch := sw.peek()
|
||||
for sw.scanner.Peek() != scanner.EOF {
|
||||
ch := sw.scanner.Peek()
|
||||
if len(name) == 0 && unicode.IsDigit(ch) {
|
||||
ch = sw.next()
|
||||
ch = sw.scanner.Next()
|
||||
return string(ch)
|
||||
}
|
||||
if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
|
||||
break
|
||||
}
|
||||
ch = sw.next()
|
||||
ch = sw.scanner.Next()
|
||||
name += string(ch)
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ func TestShellParser(t *testing.T) {
|
|||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
envs := []string{"PWD=/home", "SHELL=bash"}
|
||||
envs := []string{"PWD=/home", "SHELL=bash", "KOREAN=한국어"}
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
|
|
|
@ -56,3 +56,57 @@ he${PWD:=000}xx | error
|
|||
he${PWD:+${PWD}:}xx | he/home:xx
|
||||
he${XXX:-\$PWD:}xx | he$PWD:xx
|
||||
he${XXX:-\${PWD}z}xx | he${PWDz}xx
|
||||
안녕하세요 | 안녕하세요
|
||||
안'녕'하세요 | 안녕하세요
|
||||
안'녕하세요 | 안녕하세요
|
||||
안녕\'하세요 | 안녕'하세요
|
||||
안\\'녕하세요 | 안\녕하세요
|
||||
안녕\t하세요 | 안녕t하세요
|
||||
"안녕\t하세요" | 안녕\t하세요
|
||||
'안녕\t하세요 | 안녕\t하세요
|
||||
안녕하세요\ | 안녕하세요
|
||||
안녕하세요\\ | 안녕하세요\
|
||||
"안녕하세요 | 안녕하세요
|
||||
"안녕하세요\" | 안녕하세요"
|
||||
"안녕'하세요" | 안녕'하세요
|
||||
'안녕하세요 | 안녕하세요
|
||||
'안녕하세요\' | 안녕하세요\
|
||||
안녕$1x | 안녕x
|
||||
안녕$.x | 안녕$.x
|
||||
안녕$pwd. | 안녕.
|
||||
안녕$PWD | 안녕/home
|
||||
안녕\$PWD | 안녕$PWD
|
||||
안녕\\$PWD | 안녕\/home
|
||||
안녕\${} | 안녕${}
|
||||
안녕\${}xx | 안녕${}xx
|
||||
안녕${} | 안녕
|
||||
안녕${}xx | 안녕xx
|
||||
안녕${hi} | 안녕
|
||||
안녕${hi}xx | 안녕xx
|
||||
안녕${PWD} | 안녕/home
|
||||
안녕${.} | error
|
||||
안녕${XXX:-000}xx | 안녕000xx
|
||||
안녕${PWD:-000}xx | 안녕/homexx
|
||||
안녕${XXX:-$PWD}xx | 안녕/homexx
|
||||
안녕${XXX:-${PWD:-yyy}}xx | 안녕/homexx
|
||||
안녕${XXX:-${YYY:-yyy}}xx | 안녕yyyxx
|
||||
안녕${XXX:YYY} | error
|
||||
안녕${XXX:+${PWD}}xx | 안녕xx
|
||||
안녕${PWD:+${XXX}}xx | 안녕xx
|
||||
안녕${PWD:+${SHELL}}xx | 안녕bashxx
|
||||
안녕${XXX:+000}xx | 안녕xx
|
||||
안녕${PWD:+000}xx | 안녕000xx
|
||||
'안녕${XX}' | 안녕${XX}
|
||||
"안녕${PWD}" | 안녕/home
|
||||
"안녕'$PWD'" | 안녕'/home'
|
||||
'"안녕"' | "안녕"
|
||||
안녕\$PWD | 안녕$PWD
|
||||
"안녕\$PWD" | 안녕$PWD
|
||||
'안녕\$PWD' | 안녕\$PWD
|
||||
안녕${PWD | error
|
||||
안녕${PWD:=000}xx | error
|
||||
안녕${PWD:+${PWD}:}xx | 안녕/home:xx
|
||||
안녕${XXX:-\$PWD:}xx | 안녕$PWD:xx
|
||||
안녕${XXX:-\${PWD}z}xx | 안녕${PWDz}xx
|
||||
$KOREAN | 한국어
|
||||
안녕$KOREAN | 안녕한국어
|
||||
|
|
Loading…
Reference in a new issue