mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
ProcessWord support UTF-8
modified PorcessWord to working normally for UTF-8 strings and added test cases Signed-off-by: Daehyeok Mun <daehyeok@gmail.com>
This commit is contained in:
parent
bb5551746b
commit
bb79b7eb9e
3 changed files with 99 additions and 51 deletions
|
@ -9,13 +9,15 @@ package dockerfile
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"text/scanner"
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
type shellWord struct {
|
type shellWord struct {
|
||||||
word string
|
word string
|
||||||
envs []string
|
scanner scanner.Scanner
|
||||||
pos int
|
envs []string
|
||||||
|
pos int
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProcessWord will use the 'env' list of environment variables,
|
// ProcessWord will use the 'env' list of environment variables,
|
||||||
|
@ -26,11 +28,12 @@ func ProcessWord(word string, env []string) (string, error) {
|
||||||
envs: env,
|
envs: env,
|
||||||
pos: 0,
|
pos: 0,
|
||||||
}
|
}
|
||||||
|
sw.scanner.Init(strings.NewReader(word))
|
||||||
return sw.process()
|
return sw.process()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sw *shellWord) process() (string, error) {
|
func (sw *shellWord) process() (string, error) {
|
||||||
return sw.processStopOn('\000')
|
return sw.processStopOn(scanner.EOF)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the word, starting at 'pos', and stop when we get to the
|
// Process the word, starting at 'pos', and stop when we get to the
|
||||||
|
@ -43,10 +46,11 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
||||||
'$': sw.processDollar,
|
'$': sw.processDollar,
|
||||||
}
|
}
|
||||||
|
|
||||||
for sw.pos < len(sw.word) {
|
for sw.scanner.Peek() != scanner.EOF {
|
||||||
ch := sw.peek()
|
ch := sw.scanner.Peek()
|
||||||
if stopChar != '\000' && ch == stopChar {
|
|
||||||
sw.next()
|
if stopChar != scanner.EOF && ch == stopChar {
|
||||||
|
sw.scanner.Next()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if fn, ok := charFuncMapping[ch]; ok {
|
if fn, ok := charFuncMapping[ch]; ok {
|
||||||
|
@ -58,14 +62,19 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
||||||
result += tmp
|
result += tmp
|
||||||
} else {
|
} else {
|
||||||
// Not special, just add it to the result
|
// Not special, just add it to the result
|
||||||
ch = sw.next()
|
ch = sw.scanner.Next()
|
||||||
|
|
||||||
if ch == '\\' {
|
if ch == '\\' {
|
||||||
// '\' escapes, except end of line
|
// '\' escapes, except end of line
|
||||||
ch = sw.next()
|
|
||||||
if ch == '\000' {
|
ch = sw.scanner.Next()
|
||||||
continue
|
|
||||||
|
if ch == scanner.EOF {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result += string(ch)
|
result += string(ch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -73,36 +82,21 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) {
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sw *shellWord) peek() rune {
|
|
||||||
if sw.pos == len(sw.word) {
|
|
||||||
return '\000'
|
|
||||||
}
|
|
||||||
return rune(sw.word[sw.pos])
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sw *shellWord) next() rune {
|
|
||||||
if sw.pos == len(sw.word) {
|
|
||||||
return '\000'
|
|
||||||
}
|
|
||||||
ch := rune(sw.word[sw.pos])
|
|
||||||
sw.pos++
|
|
||||||
return ch
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sw *shellWord) processSingleQuote() (string, error) {
|
func (sw *shellWord) processSingleQuote() (string, error) {
|
||||||
// All chars between single quotes are taken as-is
|
// All chars between single quotes are taken as-is
|
||||||
// Note, you can't escape '
|
// Note, you can't escape '
|
||||||
var result string
|
var result string
|
||||||
|
|
||||||
sw.next()
|
sw.scanner.Next()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
ch := sw.next()
|
ch := sw.scanner.Next()
|
||||||
if ch == '\000' || ch == '\'' {
|
if ch == '\'' || ch == scanner.EOF {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
result += string(ch)
|
result += string(ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,12 +105,12 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
||||||
// But you can escape " with a \
|
// But you can escape " with a \
|
||||||
var result string
|
var result string
|
||||||
|
|
||||||
sw.next()
|
sw.scanner.Next()
|
||||||
|
|
||||||
for sw.pos < len(sw.word) {
|
for sw.scanner.Peek() != scanner.EOF {
|
||||||
ch := sw.peek()
|
ch := sw.scanner.Peek()
|
||||||
if ch == '"' {
|
if ch == '"' {
|
||||||
sw.next()
|
sw.scanner.Next()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if ch == '$' {
|
if ch == '$' {
|
||||||
|
@ -126,18 +120,18 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
||||||
}
|
}
|
||||||
result += tmp
|
result += tmp
|
||||||
} else {
|
} else {
|
||||||
ch = sw.next()
|
ch = sw.scanner.Next()
|
||||||
if ch == '\\' {
|
if ch == '\\' {
|
||||||
chNext := sw.peek()
|
chNext := sw.scanner.Peek()
|
||||||
|
|
||||||
if chNext == '\000' {
|
if chNext == scanner.EOF {
|
||||||
// Ignore \ at end of word
|
// Ignore \ at end of word
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if chNext == '"' || chNext == '$' {
|
if chNext == '"' || chNext == '$' {
|
||||||
// \" and \$ can be escaped, all other \'s are left as-is
|
// \" and \$ can be escaped, all other \'s are left as-is
|
||||||
ch = sw.next()
|
ch = sw.scanner.Next()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result += string(ch)
|
result += string(ch)
|
||||||
|
@ -148,23 +142,23 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sw *shellWord) processDollar() (string, error) {
|
func (sw *shellWord) processDollar() (string, error) {
|
||||||
sw.next()
|
sw.scanner.Next()
|
||||||
ch := sw.peek()
|
ch := sw.scanner.Peek()
|
||||||
if ch == '{' {
|
if ch == '{' {
|
||||||
sw.next()
|
sw.scanner.Next()
|
||||||
name := sw.processName()
|
name := sw.processName()
|
||||||
ch = sw.peek()
|
ch = sw.scanner.Peek()
|
||||||
if ch == '}' {
|
if ch == '}' {
|
||||||
// Normal ${xx} case
|
// Normal ${xx} case
|
||||||
sw.next()
|
sw.scanner.Next()
|
||||||
return sw.getEnv(name), nil
|
return sw.getEnv(name), nil
|
||||||
}
|
}
|
||||||
if ch == ':' {
|
if ch == ':' {
|
||||||
// Special ${xx:...} format processing
|
// Special ${xx:...} format processing
|
||||||
// Yes it allows for recursive $'s in the ... spot
|
// Yes it allows for recursive $'s in the ... spot
|
||||||
|
|
||||||
sw.next() // skip over :
|
sw.scanner.Next() // skip over :
|
||||||
modifier := sw.next()
|
modifier := sw.scanner.Next()
|
||||||
|
|
||||||
word, err := sw.processStopOn('}')
|
word, err := sw.processStopOn('}')
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -207,16 +201,16 @@ func (sw *shellWord) processName() string {
|
||||||
// If it starts with a numeric then just return $#
|
// If it starts with a numeric then just return $#
|
||||||
var name string
|
var name string
|
||||||
|
|
||||||
for sw.pos < len(sw.word) {
|
for sw.scanner.Peek() != scanner.EOF {
|
||||||
ch := sw.peek()
|
ch := sw.scanner.Peek()
|
||||||
if len(name) == 0 && unicode.IsDigit(ch) {
|
if len(name) == 0 && unicode.IsDigit(ch) {
|
||||||
ch = sw.next()
|
ch = sw.scanner.Next()
|
||||||
return string(ch)
|
return string(ch)
|
||||||
}
|
}
|
||||||
if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
|
if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
ch = sw.next()
|
ch = sw.scanner.Next()
|
||||||
name += string(ch)
|
name += string(ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ func TestShellParser(t *testing.T) {
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
scanner := bufio.NewScanner(file)
|
scanner := bufio.NewScanner(file)
|
||||||
envs := []string{"PWD=/home", "SHELL=bash"}
|
envs := []string{"PWD=/home", "SHELL=bash", "KOREAN=한국어"}
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
|
|
||||||
|
|
|
@ -56,3 +56,57 @@ he${PWD:=000}xx | error
|
||||||
he${PWD:+${PWD}:}xx | he/home:xx
|
he${PWD:+${PWD}:}xx | he/home:xx
|
||||||
he${XXX:-\$PWD:}xx | he$PWD:xx
|
he${XXX:-\$PWD:}xx | he$PWD:xx
|
||||||
he${XXX:-\${PWD}z}xx | he${PWDz}xx
|
he${XXX:-\${PWD}z}xx | he${PWDz}xx
|
||||||
|
안녕하세요 | 안녕하세요
|
||||||
|
안'녕'하세요 | 안녕하세요
|
||||||
|
안'녕하세요 | 안녕하세요
|
||||||
|
안녕\'하세요 | 안녕'하세요
|
||||||
|
안\\'녕하세요 | 안\녕하세요
|
||||||
|
안녕\t하세요 | 안녕t하세요
|
||||||
|
"안녕\t하세요" | 안녕\t하세요
|
||||||
|
'안녕\t하세요 | 안녕\t하세요
|
||||||
|
안녕하세요\ | 안녕하세요
|
||||||
|
안녕하세요\\ | 안녕하세요\
|
||||||
|
"안녕하세요 | 안녕하세요
|
||||||
|
"안녕하세요\" | 안녕하세요"
|
||||||
|
"안녕'하세요" | 안녕'하세요
|
||||||
|
'안녕하세요 | 안녕하세요
|
||||||
|
'안녕하세요\' | 안녕하세요\
|
||||||
|
안녕$1x | 안녕x
|
||||||
|
안녕$.x | 안녕$.x
|
||||||
|
안녕$pwd. | 안녕.
|
||||||
|
안녕$PWD | 안녕/home
|
||||||
|
안녕\$PWD | 안녕$PWD
|
||||||
|
안녕\\$PWD | 안녕\/home
|
||||||
|
안녕\${} | 안녕${}
|
||||||
|
안녕\${}xx | 안녕${}xx
|
||||||
|
안녕${} | 안녕
|
||||||
|
안녕${}xx | 안녕xx
|
||||||
|
안녕${hi} | 안녕
|
||||||
|
안녕${hi}xx | 안녕xx
|
||||||
|
안녕${PWD} | 안녕/home
|
||||||
|
안녕${.} | error
|
||||||
|
안녕${XXX:-000}xx | 안녕000xx
|
||||||
|
안녕${PWD:-000}xx | 안녕/homexx
|
||||||
|
안녕${XXX:-$PWD}xx | 안녕/homexx
|
||||||
|
안녕${XXX:-${PWD:-yyy}}xx | 안녕/homexx
|
||||||
|
안녕${XXX:-${YYY:-yyy}}xx | 안녕yyyxx
|
||||||
|
안녕${XXX:YYY} | error
|
||||||
|
안녕${XXX:+${PWD}}xx | 안녕xx
|
||||||
|
안녕${PWD:+${XXX}}xx | 안녕xx
|
||||||
|
안녕${PWD:+${SHELL}}xx | 안녕bashxx
|
||||||
|
안녕${XXX:+000}xx | 안녕xx
|
||||||
|
안녕${PWD:+000}xx | 안녕000xx
|
||||||
|
'안녕${XX}' | 안녕${XX}
|
||||||
|
"안녕${PWD}" | 안녕/home
|
||||||
|
"안녕'$PWD'" | 안녕'/home'
|
||||||
|
'"안녕"' | "안녕"
|
||||||
|
안녕\$PWD | 안녕$PWD
|
||||||
|
"안녕\$PWD" | 안녕$PWD
|
||||||
|
'안녕\$PWD' | 안녕\$PWD
|
||||||
|
안녕${PWD | error
|
||||||
|
안녕${PWD:=000}xx | error
|
||||||
|
안녕${PWD:+${PWD}:}xx | 안녕/home:xx
|
||||||
|
안녕${XXX:-\$PWD:}xx | 안녕$PWD:xx
|
||||||
|
안녕${XXX:-\${PWD}z}xx | 안녕${PWDz}xx
|
||||||
|
$KOREAN | 한국어
|
||||||
|
안녕$KOREAN | 안녕한국어
|
||||||
|
|
Loading…
Add table
Reference in a new issue