1
0
Fork 0
miniflux/vendor/github.com/tdewolff/minify/html/buffer.go
Frédéric Guillot 8ffb773f43 First commit
2017-11-19 22:01:46 -08:00

131 lines
2.9 KiB
Go

package html // import "github.com/tdewolff/minify/html"
import (
"github.com/tdewolff/parse"
"github.com/tdewolff/parse/html"
)
// Token is a single token unit with an attribute value (if given) and hash of the data.
type Token struct {
html.TokenType
Hash html.Hash
Data []byte
Text []byte
AttrVal []byte
Traits traits
}
// TokenBuffer is a buffer that allows for token look-ahead.
type TokenBuffer struct {
l *html.Lexer
buf []Token
pos int
attrBuffer []*Token
}
// NewTokenBuffer returns a new TokenBuffer.
func NewTokenBuffer(l *html.Lexer) *TokenBuffer {
return &TokenBuffer{
l: l,
buf: make([]Token, 0, 8),
}
}
func (z *TokenBuffer) read(t *Token) {
t.TokenType, t.Data = z.l.Next()
t.Text = z.l.Text()
if t.TokenType == html.AttributeToken {
t.AttrVal = z.l.AttrVal()
if len(t.AttrVal) > 1 && (t.AttrVal[0] == '"' || t.AttrVal[0] == '\'') {
t.AttrVal = parse.TrimWhitespace(t.AttrVal[1 : len(t.AttrVal)-1]) // quotes will be readded in attribute loop if necessary
}
t.Hash = html.ToHash(t.Text)
t.Traits = attrMap[t.Hash]
} else if t.TokenType == html.StartTagToken || t.TokenType == html.EndTagToken {
t.AttrVal = nil
t.Hash = html.ToHash(t.Text)
t.Traits = tagMap[t.Hash]
} else {
t.AttrVal = nil
t.Hash = 0
t.Traits = 0
}
}
// Peek returns the ith element and possibly does an allocation.
// Peeking past an error will panic.
func (z *TokenBuffer) Peek(pos int) *Token {
pos += z.pos
if pos >= len(z.buf) {
if len(z.buf) > 0 && z.buf[len(z.buf)-1].TokenType == html.ErrorToken {
return &z.buf[len(z.buf)-1]
}
c := cap(z.buf)
d := len(z.buf) - z.pos
p := pos - z.pos + 1 // required peek length
var buf []Token
if 2*p > c {
buf = make([]Token, 0, 2*c+p)
} else {
buf = z.buf
}
copy(buf[:d], z.buf[z.pos:])
buf = buf[:p]
pos -= z.pos
for i := d; i < p; i++ {
z.read(&buf[i])
if buf[i].TokenType == html.ErrorToken {
buf = buf[:i+1]
pos = i
break
}
}
z.pos, z.buf = 0, buf
}
return &z.buf[pos]
}
// Shift returns the first element and advances position.
func (z *TokenBuffer) Shift() *Token {
if z.pos >= len(z.buf) {
t := &z.buf[:1][0]
z.read(t)
return t
}
t := &z.buf[z.pos]
z.pos++
return t
}
// Attributes extracts the gives attribute hashes from a tag.
// It returns in the same order pointers to the requested token data or nil.
func (z *TokenBuffer) Attributes(hashes ...html.Hash) []*Token {
n := 0
for {
if t := z.Peek(n); t.TokenType != html.AttributeToken {
break
}
n++
}
if len(hashes) > cap(z.attrBuffer) {
z.attrBuffer = make([]*Token, len(hashes))
} else {
z.attrBuffer = z.attrBuffer[:len(hashes)]
for i := range z.attrBuffer {
z.attrBuffer[i] = nil
}
}
for i := z.pos; i < z.pos+n; i++ {
attr := &z.buf[i]
for j, hash := range hashes {
if hash == attr.Hash {
z.attrBuffer[j] = attr
}
}
}
return z.attrBuffer
}