diff --git a/hack/vendor.sh b/hack/vendor.sh index af575f0f82..444bd9075f 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -92,7 +92,7 @@ clone git github.com/miekg/dns 75e6e86cc601825c5dbcd4e0c209eab180997cd7 # get graph and distribution packages clone git github.com/docker/distribution 77b9d2997abcded79a5314970fe69a44c93c25fb -clone git github.com/vbatts/tar-split v0.9.11 +clone git github.com/vbatts/tar-split v0.10.1 # get go-zfs packages clone git github.com/mistifyio/go-zfs 22c9b32c84eb0d0c6f4043b6e90fc94073de92fa diff --git a/vendor/src/github.com/vbatts/tar-split/LICENSE b/vendor/src/github.com/vbatts/tar-split/LICENSE index 8ba549196e..ca03685b15 100644 --- a/vendor/src/github.com/vbatts/tar-split/LICENSE +++ b/vendor/src/github.com/vbatts/tar-split/LICENSE @@ -1,19 +1,28 @@ Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +All rights reserved. -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/common.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/common.go index c31df062f7..36f4e23980 100644 --- a/vendor/src/github.com/vbatts/tar-split/archive/tar/common.go +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/common.go @@ -327,3 +327,14 @@ func toASCII(s string) string { } return buf.String() } + +// isHeaderOnlyType checks if the given type flag is of the type that has no +// data section even if a size is specified. +func isHeaderOnlyType(flag byte) bool { + switch flag { + case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: + return true + default: + return false + } +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/reader.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/reader.go index 4168ea2cc7..adf32122e1 100644 --- a/vendor/src/github.com/vbatts/tar-split/archive/tar/reader.go +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/reader.go @@ -12,6 +12,7 @@ import ( "errors" "io" "io/ioutil" + "math" "os" "strconv" "strings" @@ -39,6 +40,10 @@ type Reader struct { rawBytes *bytes.Buffer // last raw bits } +type parser struct { + err error // Last error seen +} + // RawBytes accesses the raw bytes of the archive, apart from the file payload itself. // This includes the header and padding. // @@ -70,12 +75,36 @@ type regFileReader struct { nb int64 // number of unread bytes for current file entry } -// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. +// A sparseFileReader is a numBytesReader for reading sparse file data from a +// tar archive. type sparseFileReader struct { - rfr *regFileReader // reads the sparse-encoded file data - sp []sparseEntry // the sparse map for the file - pos int64 // keeps track of file position - tot int64 // total size of the file + rfr numBytesReader // Reads the sparse-encoded file data + sp []sparseEntry // The sparse map for the file + pos int64 // Keeps track of file position + total int64 // Total size of the file +} + +// A sparseEntry holds a single entry in a sparse file's sparse map. +// +// Sparse files are represented using a series of sparseEntrys. +// Despite the name, a sparseEntry represents an actual data fragment that +// references data found in the underlying archive stream. All regions not +// covered by a sparseEntry are logically filled with zeros. +// +// For example, if the underlying raw file contains the 10-byte data: +// var compactData = "abcdefgh" +// +// And the sparse map has the following entries: +// var sp = []sparseEntry{ +// {offset: 2, numBytes: 5} // Data fragment for [2..7] +// {offset: 18, numBytes: 3} // Data fragment for [18..21] +// } +// +// Then the content of the resulting sparse file with a "real" size of 25 is: +// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type sparseEntry struct { + offset int64 // Starting position of the fragment + numBytes int64 // Length of the fragment } // Keywords for GNU sparse files in a PAX extended header @@ -109,7 +138,6 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} } // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { - var hdr *Header if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) @@ -117,98 +145,88 @@ func (tr *Reader) Next() (*Header, error) { tr.rawBytes.Reset() } } - if tr.err == nil { - tr.skipUnread() - } + if tr.err != nil { - return hdr, tr.err + return nil, tr.err } - hdr = tr.readHeader() - if hdr == nil { - return hdr, tr.err - } - // Check for PAX/GNU header. - switch hdr.Typeflag { - case TypeXHeader: - // PAX extended header - headers, err := parsePAX(tr) - if err != nil { - return nil, err - } - // We actually read the whole file, - // but this skips alignment padding - tr.skipUnread() + + var hdr *Header + var extHdrs map[string]string + + // Externally, Next iterates through the tar archive as if it is a series of + // files. Internally, the tar format often uses fake "files" to add meta + // data that describes the next file. These meta data "files" should not + // normally be visible to the outside. As such, this loop iterates through + // one or more "header files" until it finds a "normal file". +loop: + for { + tr.err = tr.skipUnread() if tr.err != nil { return nil, tr.err } + hdr = tr.readHeader() - if hdr == nil { + if tr.err != nil { return nil, tr.err } - mergePAX(hdr, headers) + // Check for PAX/GNU special headers and files. + switch hdr.Typeflag { + case TypeXHeader: + extHdrs, tr.err = parsePAX(tr) + if tr.err != nil { + return nil, tr.err + } + continue loop // This is a meta header affecting the next header + case TypeGNULongName, TypeGNULongLink: + var realname []byte + realname, tr.err = ioutil.ReadAll(tr) + if tr.err != nil { + return nil, tr.err + } - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) - if err != nil { - tr.err = err - return nil, err - } - if sp != nil { - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} - } - return hdr, nil - case TypeGNULongName: - // We have a GNU long name header. Its contents are the real file name. - realname, err := ioutil.ReadAll(tr) - if err != nil { - return nil, err - } - var buf []byte - if tr.RawAccounting { - if _, err = tr.rawBytes.Write(realname); err != nil { + if tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil { + return nil, tr.err + } + } + + // Convert GNU extensions to use PAX headers. + if extHdrs == nil { + extHdrs = make(map[string]string) + } + var p parser + switch hdr.Typeflag { + case TypeGNULongName: + extHdrs[paxPath] = p.parseString(realname) + case TypeGNULongLink: + extHdrs[paxLinkpath] = p.parseString(realname) + } + if p.err != nil { + tr.err = p.err + return nil, tr.err + } + continue loop // This is a meta header affecting the next header + default: + mergePAX(hdr, extHdrs) + + // Check for a PAX format sparse file + sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) + if err != nil { + tr.err = err return nil, err } - buf = make([]byte, tr.rawBytes.Len()) - copy(buf[:], tr.RawBytes()) - } - hdr, err := tr.Next() - // since the above call to Next() resets the buffer, we need to throw the bytes over - if tr.RawAccounting { - buf = append(buf, tr.RawBytes()...) - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err + if sp != nil { + // Current file is a PAX format GNU sparse file. + // Set the current file reader to a sparse file reader. + tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + if tr.err != nil { + return nil, tr.err + } } + break loop // This is a file, so stop } - hdr.Name = cString(realname) - return hdr, err - case TypeGNULongLink: - // We have a GNU long link header. - realname, err := ioutil.ReadAll(tr) - if err != nil { - return nil, err - } - var buf []byte - if tr.RawAccounting { - if _, err = tr.rawBytes.Write(realname); err != nil { - return nil, err - } - buf = make([]byte, tr.rawBytes.Len()) - copy(buf[:], tr.RawBytes()) - } - hdr, err := tr.Next() - // since the above call to Next() resets the buffer, we need to throw the bytes over - if tr.RawAccounting { - buf = append(buf, tr.RawBytes()...) - if _, err = tr.rawBytes.Write(buf); err != nil { - return nil, err - } - } - hdr.Linkname = cString(realname) - return hdr, err } - return hdr, tr.err + return hdr, nil } // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then @@ -385,6 +403,7 @@ func parsePAX(r io.Reader) (map[string]string, error) { return nil, err } } + sbuf := string(buf) // For GNU PAX sparse format 0.0 support. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. @@ -393,35 +412,17 @@ func parsePAX(r io.Reader) (map[string]string, error) { headers := make(map[string]string) // Each record is constructed as // "%d %s=%s\n", length, keyword, value - for len(buf) > 0 { - // or the header was empty to start with. - var sp int - // The size field ends at the first space. - sp = bytes.IndexByte(buf, ' ') - if sp == -1 { + for len(sbuf) > 0 { + key, value, residual, err := parsePAXRecord(sbuf) + if err != nil { return nil, ErrHeader } - // Parse the first token as a decimal integer. - n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) - if err != nil || n < 5 || int64(len(buf)) < n { - return nil, ErrHeader - } - // Extract everything between the decimal and the n -1 on the - // beginning to eat the ' ', -1 on the end to skip the newline. - var record []byte - record, buf = buf[sp+1:n-1], buf[n:] - // The first equals is guaranteed to mark the end of the key. - // Everything else is value. - eq := bytes.IndexByte(record, '=') - if eq == -1 { - return nil, ErrHeader - } - key, value := record[:eq], record[eq+1:] + sbuf = residual keyStr := string(key) if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. - sparseMap.Write(value) + sparseMap.WriteString(value) sparseMap.Write([]byte{','}) } else { // Normal key. Set the value in the headers map. @@ -436,9 +437,42 @@ func parsePAX(r io.Reader) (map[string]string, error) { return headers, nil } -// cString parses bytes as a NUL-terminated C-style string. +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +// +// A PAX record is of the following form: +// "%d %s=%s\n" % (size, key, value) +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + sp := strings.IndexByte(s, ' ') + if sp == -1 { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || int64(len(s)) < n { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + eq := strings.IndexByte(rec, '=') + if eq == -1 { + return "", "", s, ErrHeader + } + return rec[:eq], rec[eq+1:], rem, nil +} + +// parseString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. -func cString(b []byte) string { +func (*parser) parseString(b []byte) string { n := 0 for n < len(b) && b[n] != 0 { n++ @@ -446,19 +480,51 @@ func cString(b []byte) string { return string(b[0:n]) } -func (tr *Reader) octal(b []byte) int64 { - // Check for binary format first. +// parseNumeric parses the input as being encoded in either base-256 or octal. +// This function may return negative numbers. +// If parsing fails or an integer overflow occurs, err will be set. +func (p *parser) parseNumeric(b []byte) int64 { + // Check for base-256 (binary) format first. + // If the first bit is set, then all following bits constitute a two's + // complement encoded number in big-endian byte order. if len(b) > 0 && b[0]&0x80 != 0 { - var x int64 - for i, c := range b { - if i == 0 { - c &= 0x7f // ignore signal bit in first byte - } - x = x<<8 | int64(c) + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff } - return x + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) } + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +func (p *parser) parseOctal(b []byte) int64 { // Because unused fields are filled with NULs, we need // to skip leading NULs. Fields may also be padded with // spaces or NULs. @@ -469,27 +535,55 @@ func (tr *Reader) octal(b []byte) int64 { if len(b) == 0 { return 0 } - x, err := strconv.ParseUint(cString(b), 8, 64) - if err != nil { - tr.err = err + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader } return int64(x) } -// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. -func (tr *Reader) skipUnread() { - nr := tr.numBytes() + tr.pad // number of bytes to skip +// skipUnread skips any unread bytes in the existing file entry, as well as any +// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is +// encountered in the data portion; it is okay to hit io.EOF in the padding. +// +// Note that this function still works properly even when sparse files are being +// used since numBytes returns the bytes remaining in the underlying io.Reader. +func (tr *Reader) skipUnread() error { + dataSkip := tr.numBytes() // Number of data bytes to skip + totalSkip := dataSkip + tr.pad // Total number of bytes to skip tr.curr, tr.pad = nil, 0 if tr.RawAccounting { - _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr) - return + _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip) + return tr.err } - if sr, ok := tr.r.(io.Seeker); ok { - if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { - return + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the tar stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, os.SEEK_CUR) + if err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) + if err != nil { + tr.err = err + return tr.err + } + seekSkipped = pos2 - pos1 } } - _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) + + var copySkipped int64 // Number of bytes skipped via CopyN + copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) + if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { + tr.err = io.ErrUnexpectedEOF + } + return tr.err } func (tr *Reader) verifyChecksum(header []byte) bool { @@ -497,23 +591,32 @@ func (tr *Reader) verifyChecksum(header []byte) bool { return false } - given := tr.octal(header[148:156]) + var p parser + given := p.parseOctal(header[148:156]) unsigned, signed := checksum(header) - return given == unsigned || given == signed + return p.err == nil && (given == unsigned || given == signed) } +// readHeader reads the next block header and assumes that the underlying reader +// is already aligned to a block boundary. +// +// The err will be set to io.EOF only when one of the following occurs: +// * Exactly 0 bytes are read and EOF is hit. +// * Exactly 1 block of zeros is read and EOF is hit. +// * At least 2 blocks of zeros are read. func (tr *Reader) readHeader() *Header { header := tr.hdrBuff[:] copy(header, zeroBlock) - if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + if n, err := io.ReadFull(tr.r, header); err != nil { + tr.err = err // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { - return nil + if _, err := tr.rawBytes.Write(header[:n]); err != nil { + tr.err = err } } - return nil + return nil // io.EOF is okay here } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { @@ -523,14 +626,15 @@ func (tr *Reader) readHeader() *Header { // Two blocks of zero bytes marks the end of the archive. if bytes.Equal(header, zeroBlock[0:blockSize]) { - if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + if n, err := io.ReadFull(tr.r, header); err != nil { + tr.err = err // because it could read some of the block, but reach EOF first if tr.err == io.EOF && tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { - return nil + if _, err := tr.rawBytes.Write(header[:n]); err != nil { + tr.err = err } } - return nil + return nil // io.EOF is okay here } if tr.RawAccounting { if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { @@ -551,22 +655,19 @@ func (tr *Reader) readHeader() *Header { } // Unpack + var p parser hdr := new(Header) s := slicer(header) - hdr.Name = cString(s.next(100)) - hdr.Mode = tr.octal(s.next(8)) - hdr.Uid = int(tr.octal(s.next(8))) - hdr.Gid = int(tr.octal(s.next(8))) - hdr.Size = tr.octal(s.next(12)) - if hdr.Size < 0 { - tr.err = ErrHeader - return nil - } - hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) + hdr.Name = p.parseString(s.next(100)) + hdr.Mode = p.parseNumeric(s.next(8)) + hdr.Uid = int(p.parseNumeric(s.next(8))) + hdr.Gid = int(p.parseNumeric(s.next(8))) + hdr.Size = p.parseNumeric(s.next(12)) + hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) s.next(8) // chksum hdr.Typeflag = s.next(1)[0] - hdr.Linkname = cString(s.next(100)) + hdr.Linkname = p.parseString(s.next(100)) // The remainder of the header depends on the value of magic. // The original (v7) version of tar had no explicit magic field, @@ -586,70 +687,76 @@ func (tr *Reader) readHeader() *Header { switch format { case "posix", "gnu", "star": - hdr.Uname = cString(s.next(32)) - hdr.Gname = cString(s.next(32)) + hdr.Uname = p.parseString(s.next(32)) + hdr.Gname = p.parseString(s.next(32)) devmajor := s.next(8) devminor := s.next(8) if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = tr.octal(devmajor) - hdr.Devminor = tr.octal(devminor) + hdr.Devmajor = p.parseNumeric(devmajor) + hdr.Devminor = p.parseNumeric(devminor) } var prefix string switch format { case "posix", "gnu": - prefix = cString(s.next(155)) + prefix = p.parseString(s.next(155)) case "star": - prefix = cString(s.next(131)) - hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) - hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) + prefix = p.parseString(s.next(131)) + hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } - if tr.err != nil { + if p.err != nil { + tr.err = p.err + return nil + } + + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { tr.err = ErrHeader return nil } - // Maximum value of hdr.Size is 64 GB (12 octal digits), - // so there's no risk of int64 overflowing. - nb := int64(hdr.Size) - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two - // Set the current file reader. + tr.pad = -nb & (blockSize - 1) // blockSize is a power of two tr.curr = ®FileReader{r: tr.r, nb: nb} // Check for old GNU sparse format entry. if hdr.Typeflag == TypeGNUSparse { // Get the real size of the file. - hdr.Size = tr.octal(header[483:495]) + hdr.Size = p.parseNumeric(header[483:495]) + if p.err != nil { + tr.err = p.err + return nil + } // Read the sparse map. sp := tr.readOldGNUSparseMap(header) if tr.err != nil { return nil } + // Current file is a GNU sparse file. Update the current file reader. - tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} + tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) + if tr.err != nil { + return nil + } } return hdr } -// A sparseEntry holds a single entry in a sparse file's sparse map. -// A sparse entry indicates the offset and size in a sparse file of a -// block of data. -type sparseEntry struct { - offset int64 - numBytes int64 -} - // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // then one or more extension headers are used to store the rest of the sparse map. func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { + var p parser isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 spCap := oldGNUSparseMainHeaderNumEntries if isExtended { @@ -660,10 +767,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { // Read the four entries from the main tar header for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { - offset := tr.octal(s.next(oldGNUSparseOffsetSize)) - numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) - if tr.err != nil { - tr.err = ErrHeader + offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) + numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + if p.err != nil { + tr.err = p.err return nil } if offset == 0 && numBytes == 0 { @@ -687,10 +794,10 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 s = slicer(sparseHeader) for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { - offset := tr.octal(s.next(oldGNUSparseOffsetSize)) - numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) - if tr.err != nil { - tr.err = ErrHeader + offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) + numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + if p.err != nil { + tr.err = p.err return nil } if offset == 0 && numBytes == 0 { @@ -702,134 +809,111 @@ func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { return sp } -// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. -// The sparse map is stored just before the file data and padded out to the nearest block boundary. +// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format +// version 1.0. The format of the sparse map consists of a series of +// newline-terminated numeric fields. The first field is the number of entries +// and is always present. Following this are the entries, consisting of two +// fields (offset, numBytes). This function must stop reading at the end +// boundary of the block containing the last newline. +// +// Note that the GNU manual says that numeric values should be encoded in octal +// format. However, the GNU tar utility itself outputs these values in decimal. +// As such, this library treats values as being encoded in decimal. func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - buf := make([]byte, 2*blockSize) - sparseHeader := buf[:blockSize] + var cntNewline int64 + var buf bytes.Buffer + var blk = make([]byte, blockSize) - // readDecimal is a helper function to read a decimal integer from the sparse map - // while making sure to read from the file in blocks of size blockSize - readDecimal := func() (int64, error) { - // Look for newline - nl := bytes.IndexByte(sparseHeader, '\n') - if nl == -1 { - if len(sparseHeader) >= blockSize { - // This is an error - return 0, ErrHeader + // feedTokens copies data in numBlock chunks from r into buf until there are + // at least cnt newlines in buf. It will not read more blocks than needed. + var feedTokens = func(cnt int64) error { + for cntNewline < cnt { + if _, err := io.ReadFull(r, blk); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err } - oldLen := len(sparseHeader) - newLen := oldLen + blockSize - if cap(sparseHeader) < newLen { - // There's more header, but we need to make room for the next block - copy(buf, sparseHeader) - sparseHeader = buf[:newLen] - } else { - // There's more header, and we can just reslice - sparseHeader = sparseHeader[:newLen] - } - - // Now that sparseHeader is large enough, read next block - if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil { - return 0, err - } - // leaving this function for io.Reader makes it more testable - if tr, ok := r.(*Reader); ok && tr.RawAccounting { - if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil { - return 0, err + buf.Write(blk) + for _, c := range blk { + if c == '\n' { + cntNewline++ } } - - // Look for a newline in the new data - nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n') - if nl == -1 { - // This is an error - return 0, ErrHeader - } - nl += oldLen // We want the position from the beginning } - // Now that we've found a newline, read a number - n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0) - if err != nil { - return 0, ErrHeader - } - - // Update sparseHeader to consume this number - sparseHeader = sparseHeader[nl+1:] - return n, nil + return nil } - // Read the first block - if _, err := io.ReadFull(r, sparseHeader); err != nil { + // nextToken gets the next token delimited by a newline. This assumes that + // at least one newline exists in the buffer. + var nextToken = func() string { + cntNewline-- + tok, _ := buf.ReadString('\n') + return tok[:len(tok)-1] // Cut off newline + } + + // Parse for the number of entries. + // Use integer overflow resistant math to check this. + if err := feedTokens(1); err != nil { return nil, err } - // leaving this function for io.Reader makes it more testable - if tr, ok := r.(*Reader); ok && tr.RawAccounting { - if _, err := tr.rawBytes.Write(sparseHeader); err != nil { - return nil, err - } + numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader } - // The first line contains the number of entries - numEntries, err := readDecimal() - if err != nil { + // Parse for all member entries. + // numEntries is trusted after this since a potential attacker must have + // committed resources proportional to what this library used. + if err := feedTokens(2 * numEntries); err != nil { return nil, err } - - // Read all the entries sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { - // Read the offset - offset, err := readDecimal() + offset, err := strconv.ParseInt(nextToken(), 10, 64) if err != nil { - return nil, err + return nil, ErrHeader } - // Read numBytes - numBytes, err := readDecimal() + numBytes, err := strconv.ParseInt(nextToken(), 10, 64) if err != nil { - return nil, err + return nil, ErrHeader } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } - return sp, nil } -// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1. -// The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) { - // Get number of entries - numEntriesStr, ok := headers[paxGNUSparseNumBlocks] - if !ok { - return nil, ErrHeader - } - numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) - if err != nil { +// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format +// version 0.1. The sparse map is stored in the PAX headers. +func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { + // Get number of entries. + // Use integer overflow resistant math to check this. + numEntriesStr := extHdrs[paxGNUSparseNumBlocks] + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } - sparseMap := strings.Split(headers[paxGNUSparseMap], ",") - - // There should be two numbers in sparseMap for each entry + // There should be two numbers in sparseMap for each entry. + sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } - // Loop through the entries in the sparse map + // Loop through the entries in the sparse map. + // numEntries is trusted now. sp := make([]sparseEntry, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0) + offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) if err != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0) + numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) if err != nil { return nil, ErrHeader } sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) } - return sp, nil } @@ -846,10 +930,18 @@ func (tr *Reader) numBytes() int64 { // Read reads from the current entry in the tar archive. // It returns 0, io.EOF when it reaches the end of that entry, // until Next is called to advance to the next entry. +// +// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what +// the Header.Size claims. func (tr *Reader) Read(b []byte) (n int, err error) { + if tr.err != nil { + return 0, tr.err + } if tr.curr == nil { return 0, io.EOF } + n, err = tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err @@ -879,9 +971,33 @@ func (rfr *regFileReader) numBytes() int64 { return rfr.nb } -// readHole reads a sparse file hole ending at offset toOffset -func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { - n64 := toOffset - sfr.pos +// newSparseFileReader creates a new sparseFileReader, but validates all of the +// sparse entries before doing so. +func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { + if total < 0 { + return nil, ErrHeader // Total size cannot be negative + } + + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + for i, s := range sp { + switch { + case s.offset < 0 || s.numBytes < 0: + return nil, ErrHeader // Negative values are never okay + case s.offset > math.MaxInt64-s.numBytes: + return nil, ErrHeader // Integer overflow with large length + case s.offset+s.numBytes > total: + return nil, ErrHeader // Region extends beyond the "real" size + case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: + return nil, ErrHeader // Regions can't overlap and must be in order + } + } + return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil +} + +// readHole reads a sparse hole ending at endOffset. +func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { + n64 := endOffset - sfr.pos if n64 > int64(len(b)) { n64 = int64(len(b)) } @@ -895,49 +1011,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { // Read reads the sparse file data in expanded form. func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - if len(sfr.sp) == 0 { - // No more data fragments to read from. - if sfr.pos < sfr.tot { - // We're in the last hole - n = sfr.readHole(b, sfr.tot) - return - } - // Otherwise, we're at the end of the file - return 0, io.EOF - } - if sfr.tot < sfr.sp[0].offset { - return 0, io.ErrUnexpectedEOF - } - if sfr.pos < sfr.sp[0].offset { - // We're in a hole - n = sfr.readHole(b, sfr.sp[0].offset) - return + // Skip past all empty fragments. + for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { + sfr.sp = sfr.sp[1:] } - // We're not in a hole, so we'll read from the next data fragment - posInFragment := sfr.pos - sfr.sp[0].offset - bytesLeft := sfr.sp[0].numBytes - posInFragment + // If there are no more fragments, then it is possible that there + // is one last sparse hole. + if len(sfr.sp) == 0 { + // This behavior matches the BSD tar utility. + // However, GNU tar stops returning data even if sfr.total is unmet. + if sfr.pos < sfr.total { + return sfr.readHole(b, sfr.total), nil + } + return 0, io.EOF + } + + // In front of a data fragment, so read a hole. + if sfr.pos < sfr.sp[0].offset { + return sfr.readHole(b, sfr.sp[0].offset), nil + } + + // In a data fragment, so read from it. + // This math is overflow free since we verify that offset and numBytes can + // be safely added when creating the sparseFileReader. + endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment + bytesLeft := endPos - sfr.pos // Bytes left in fragment if int64(len(b)) > bytesLeft { - b = b[0:bytesLeft] + b = b[:bytesLeft] } n, err = sfr.rfr.Read(b) sfr.pos += int64(n) - - if int64(n) == bytesLeft { - // We're done with this fragment - sfr.sp = sfr.sp[1:] + if err == io.EOF { + if sfr.pos < endPos { + err = io.ErrUnexpectedEOF // There was supposed to be more data + } else if sfr.pos < sfr.total { + err = nil // There is still an implicit sparse hole at the end + } } - if err == io.EOF && sfr.pos < sfr.tot { - // We reached the end of the last fragment's data, but there's a final hole - err = nil + if sfr.pos == endPos { + sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it } - return + return n, err } // numBytes returns the number of bytes left to read in the sparse file's // sparse-encoded data in the tar archive. func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.nb + return sfr.rfr.numBytes() } diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/writer.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/writer.go index 9dbc01a2ff..042638175c 100644 --- a/vendor/src/github.com/vbatts/tar-split/archive/tar/writer.go +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/writer.go @@ -12,8 +12,8 @@ import ( "errors" "fmt" "io" - "os" "path" + "sort" "strconv" "strings" "time" @@ -23,7 +23,6 @@ var ( ErrWriteTooLong = errors.New("archive/tar: write too long") ErrFieldTooLong = errors.New("archive/tar: header field too long") ErrWriteAfterClose = errors.New("archive/tar: write after close") - errNameTooLong = errors.New("archive/tar: name too long") errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") ) @@ -43,6 +42,10 @@ type Writer struct { paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header } +type formatter struct { + err error // Last error seen +} + // NewWriter creates a new Writer writing to w. func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } @@ -69,17 +72,9 @@ func (tw *Writer) Flush() error { } // Write s into b, terminating it with a NUL if there is room. -// If the value is too long for the field and allowPax is true add a paxheader record instead -func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) { - needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return - } +func (f *formatter) formatString(b []byte, s string) { if len(s) > len(b) { - if tw.err == nil { - tw.err = ErrFieldTooLong - } + f.err = ErrFieldTooLong return } ascii := toASCII(s) @@ -90,40 +85,40 @@ func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, } // Encode x as an octal ASCII string and write it into b with leading zeros. -func (tw *Writer) octal(b []byte, x int64) { +func (f *formatter) formatOctal(b []byte, x int64) { s := strconv.FormatInt(x, 8) // leading zeros, but leave room for a NUL. for len(s)+1 < len(b) { s = "0" + s } - tw.cString(b, s, false, paxNone, nil) + f.formatString(b, s) } -// Write x into b, either as octal or as binary (GNUtar/star extension). -// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead -func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - tw.octal(b, x) +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + var binBits = uint(n-1) * 8 + return n >= 9 || (x >= -1<= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format return } - // If it is too long for octal, and pax is preferred, use a pax header - if allowPax && tw.preferPax { - tw.octal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return - } - - // Too big: use binary (big-endian). - tw.usedBinary = true - for i := len(b) - 1; x > 0 && i >= 0; i-- { - b[i] = byte(x) - x >>= 8 - } - b[0] |= 0x80 // highest bit indicates binary format + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong } var ( @@ -162,6 +157,7 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { // subsecond time resolution, but for now let's just capture // too long fields or non ascii characters + var f formatter var header []byte // We need to select which scratch buffer to use carefully, @@ -176,10 +172,40 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { copy(header, zeroBlock) s := slicer(header) + // Wrappers around formatter that automatically sets paxHeaders if the + // argument extends beyond the capacity of the input byte slice. + var formatString = func(b []byte, s string, paxKeyword string) { + needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) + if needsPaxHeader { + paxHeaders[paxKeyword] = s + return + } + f.formatString(b, s) + } + var formatNumeric = func(b []byte, x int64, paxKeyword string) { + // Try octal first. + s := strconv.FormatInt(x, 8) + if len(s) < len(b) { + f.formatOctal(b, x) + return + } + + // If it is too long for octal, and PAX is preferred, use a PAX header. + if paxKeyword != paxNone && tw.preferPax { + f.formatOctal(b, 0) + s := strconv.FormatInt(x, 10) + paxHeaders[paxKeyword] = s + return + } + + tw.usedBinary = true + f.formatNumeric(b, x) + } + // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax pathHeaderBytes := s.next(fileNameSize) - tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders) + formatString(pathHeaderBytes, hdr.Name, paxPath) // Handle out of range ModTime carefully. var modTime int64 @@ -187,25 +213,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { modTime = hdr.ModTime.Unix() } - tw.octal(s.next(8), hdr.Mode) // 100:108 - tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116 - tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124 - tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136 - tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 + f.formatOctal(s.next(8), hdr.Mode) // 100:108 + formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 + formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 + formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 + formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity + s.next(8) // chksum (148:156) + s.next(1)[0] = hdr.Typeflag // 156:157 - tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders) + formatString(s.next(100), hdr.Linkname, paxLinkpath) - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297 - tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329 - tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337 - tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345 + copy(s.next(8), []byte("ustar\x0000")) // 257:265 + formatString(s.next(32), hdr.Uname, paxUname) // 265:297 + formatString(s.next(32), hdr.Gname, paxGname) // 297:329 + formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 + formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax prefixHeaderBytes := s.next(155) - tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix + formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix // Use the GNU magic instead of POSIX magic if we used any GNU extensions. if tw.usedBinary { @@ -215,37 +241,26 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { _, paxPathUsed := paxHeaders[paxPath] // try to use a ustar header when only the name is too long if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - suffix := hdr.Name - prefix := "" - if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) { - var err error - prefix, suffix, err = tw.splitUSTARLongName(hdr.Name) - if err == nil { - // ok we can use a ustar long name instead of pax, now correct the fields + prefix, suffix, ok := splitUSTARPath(hdr.Name) + if ok { + // Since we can encode in USTAR format, disable PAX header. + delete(paxHeaders, paxPath) - // remove the path field from the pax header. this will suppress the pax header - delete(paxHeaders, paxPath) - - // update the path fields - tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) - tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) - - // Use the ustar magic if we used ustar long names. - if len(prefix) > 0 && !tw.usedBinary { - copy(header[257:265], []byte("ustar\x00")) - } - } + // Update the path fields + formatString(pathHeaderBytes, suffix, paxNone) + formatString(prefixHeaderBytes, prefix, paxNone) } } // The chksum field is terminated by a NUL and a space. // This is different from the other octal fields. chksum, _ := checksum(header) - tw.octal(header[148:155], chksum) + f.formatOctal(header[148:155], chksum) // Never fails header[155] = ' ' - if tw.err != nil { - // problem with header; probably integer too big for a field. + // Check if there were any formatting errors. + if f.err != nil { + tw.err = f.err return tw.err } @@ -270,28 +285,25 @@ func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { return tw.err } -// writeUSTARLongName splits a USTAR long name hdr.Name. -// name must be < 256 characters. errNameTooLong is returned -// if hdr.Name can't be split. The splitting heuristic -// is compatible with gnu tar. -func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) { +// splitUSTARPath splits a path according to USTAR prefix and suffix rules. +// If the path is not splittable, then it will return ("", "", false). +func splitUSTARPath(name string) (prefix, suffix string, ok bool) { length := len(name) - if length > fileNamePrefixSize+1 { + if length <= fileNameSize || !isASCII(name) { + return "", "", false + } else if length > fileNamePrefixSize+1 { length = fileNamePrefixSize + 1 } else if name[length-1] == '/' { length-- } + i := strings.LastIndex(name[:length], "/") - // nlen contains the resulting length in the name field. - // plen contains the resulting length in the prefix field. - nlen := len(name) - i - 1 - plen := i + nlen := len(name) - i - 1 // nlen is length of suffix + plen := i // plen is length of prefix if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { - err = errNameTooLong - return + return "", "", false } - prefix, suffix = name[:i], name[i+1:] - return + return name[:i], name[i+1:], true } // writePaxHeader writes an extended pax header to the @@ -304,11 +316,11 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro // succeed, and seems harmless enough. ext.ModTime = hdr.ModTime // The spec asks that we namespace our pseudo files - // with the current pid. - pid := os.Getpid() + // with the current pid. However, this results in differing outputs + // for identical inputs. As such, the constant 0 is now used instead. + // golang.org/issue/12358 dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, - fmt.Sprintf("PaxHeaders.%d", pid), file) + fullName := path.Join(dir, "PaxHeaders.0", file) ascii := toASCII(fullName) if len(ascii) > 100 { @@ -318,8 +330,15 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro // Construct the body var buf bytes.Buffer - for k, v := range paxHeaders { - fmt.Fprint(&buf, paxHeader(k+"="+v)) + // Keys are sorted before writing to body to allow deterministic output. + var keys []string + for k := range paxHeaders { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, k := range keys { + fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) } ext.Size = int64(len(buf.Bytes())) @@ -335,17 +354,18 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) erro return nil } -// paxHeader formats a single pax record, prefixing it with the appropriate length -func paxHeader(msg string) string { - const padding = 2 // Extra padding for space and newline - size := len(msg) + padding +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) string { + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s\n", size, msg) + record := fmt.Sprintf("%d %s=%s\n", size, k, v) + + // Final adjustment if adding size field increased the record size. if len(record) != size { - // Final adjustment if adding size increased - // the number of digits in size size = len(record) - record = fmt.Sprintf("%d %s\n", size, msg) + record = fmt.Sprintf("%d %s=%s\n", size, k, v) } return record }