diff --git a/Dockerfile b/Dockerfile index a5b35e0ec7..33e88dce26 100644 --- a/Dockerfile +++ b/Dockerfile @@ -107,6 +107,7 @@ RUN set -x \ # IMPORTANT: If the version of Go is updated, the Windows to Linux CI machines # will need updating, to avoid errors. Ping #docker-maintainers on IRC # with a heads-up. +# IMPORTANT: When updating this please note that stdlib archive/tar pkg is vendored ENV GO_VERSION 1.8.3 RUN curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-amd64.tar.gz" \ | tar -xzC /usr/local diff --git a/Dockerfile.aarch64 b/Dockerfile.aarch64 index 7a8f5f793c..cabcda28bd 100644 --- a/Dockerfile.aarch64 +++ b/Dockerfile.aarch64 @@ -97,6 +97,7 @@ RUN set -x \ # bootstrap, so we use golang-go (1.6) as bootstrap to build Go from source code. # We don't use the official ARMv6 released binaries as a GOROOT_BOOTSTRAP, because # not all ARM64 platforms support 32-bit mode. 32-bit mode is optional for ARMv8. +# IMPORTANT: When updating this please note that stdlib archive/tar pkg is vendored ENV GO_VERSION 1.8.3 RUN mkdir /usr/src/go && curl -fsSL https://golang.org/dl/go${GO_VERSION}.src.tar.gz | tar -v -C /usr/src/go -xz --strip-components=1 \ && cd /usr/src/go/src \ diff --git a/Dockerfile.armhf b/Dockerfile.armhf index 6103c5a3ad..dd1f536191 100644 --- a/Dockerfile.armhf +++ b/Dockerfile.armhf @@ -70,6 +70,7 @@ RUN cd /usr/local/lvm2 \ # See https://git.fedorahosted.org/cgit/lvm2.git/tree/INSTALL # Install Go +# IMPORTANT: When updating this please note that stdlib archive/tar pkg is vendored ENV GO_VERSION 1.8.3 RUN curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-armv6l.tar.gz" \ | tar -xzC /usr/local diff --git a/Dockerfile.ppc64le b/Dockerfile.ppc64le index e64153800c..43b84e4501 100644 --- a/Dockerfile.ppc64le +++ b/Dockerfile.ppc64le @@ -94,6 +94,7 @@ RUN set -x \ # Install Go # NOTE: official ppc64le go binaries weren't available until go 1.6.4 and 1.7.4 +# IMPORTANT: When updating this please note that stdlib archive/tar pkg is vendored ENV GO_VERSION 1.8.3 RUN curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-ppc64le.tar.gz" \ | tar -xzC /usr/local diff --git a/Dockerfile.s390x b/Dockerfile.s390x index c69da3c969..35ec683739 100644 --- a/Dockerfile.s390x +++ b/Dockerfile.s390x @@ -87,6 +87,7 @@ RUN cd /usr/local/lvm2 \ && make install_device-mapper # See https://git.fedorahosted.org/cgit/lvm2.git/tree/INSTALL +# IMPORTANT: When updating this please note that stdlib archive/tar pkg is vendored ENV GO_VERSION 1.8.3 RUN curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-s390x.tar.gz" \ | tar -xzC /usr/local diff --git a/Dockerfile.simple b/Dockerfile.simple index 4edc08f9ed..b4682d4cbc 100644 --- a/Dockerfile.simple +++ b/Dockerfile.simple @@ -53,6 +53,7 @@ RUN set -x \ # IMPORTANT: If the version of Go is updated, the Windows to Linux CI machines # will need updating, to avoid errors. Ping #docker-maintainers on IRC # with a heads-up. +# IMPORTANT: When updating this please note that stdlib archive/tar pkg is vendored ENV GO_VERSION 1.8.3 RUN curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-amd64.tar.gz" \ | tar -xzC /usr/local diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go index e85878fd0f..1371b8ab12 100644 --- a/pkg/archive/archive_test.go +++ b/pkg/archive/archive_test.go @@ -1203,6 +1203,25 @@ func TestReplaceFileTarWrapper(t *testing.T) { } } +// TestPrefixHeaderReadable tests that files that could be created with the +// version of this package that was built with <=go17 are still readable. +func TestPrefixHeaderReadable(t *testing.T) { + // https://gist.github.com/stevvooe/e2a790ad4e97425896206c0816e1a882#file-out-go + var testFile = []byte("\x1f\x8b\x08\x08\x44\x21\x68\x59\x00\x03\x74\x2e\x74\x61\x72\x00\x4b\xcb\xcf\x67\xa0\x35\x30\x80\x00\x86\x06\x10\x47\x01\xc1\x37\x40\x00\x54\xb6\xb1\xa1\xa9\x99\x09\x48\x25\x1d\x40\x69\x71\x49\x62\x91\x02\xe5\x76\xa1\x79\x84\x21\x91\xd6\x80\x72\xaf\x8f\x82\x51\x30\x0a\x46\x36\x00\x00\xf0\x1c\x1e\x95\x00\x06\x00\x00") + + tmpDir, err := ioutil.TempDir("", "prefix-test") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) + err = Untar(bytes.NewReader(testFile), tmpDir, nil) + require.NoError(t, err) + + baseName := "foo" + pth := strings.Repeat("a", 100-len(baseName)) + "/" + baseName + + _, err = os.Lstat(filepath.Join(tmpDir, pth)) + require.NoError(t, err) +} + func buildSourceArchive(t *testing.T, numberOfFiles int) (io.ReadCloser, func()) { srcDir, err := ioutil.TempDir("", "docker-test-srcDir") require.NoError(t, err) diff --git a/vendor.conf b/vendor.conf index eed5fe3f74..7be4a60e70 100644 --- a/vendor.conf +++ b/vendor.conf @@ -136,3 +136,11 @@ github.com/Nvveen/Gotty a8b993ba6abdb0e0c12b0125c603323a71c7790c https://github. github.com/docker/go-metrics d466d4f6fd960e01820085bd7e1a24426ee7ef18 github.com/opencontainers/selinux v1.0.0-rc1 + +# archive/tar +# mkdir -p ./vendor/archive +# git clone git://github.com/tonistiigi/go-1.git ./go +# git --git-dir ./go/.git --work-tree ./go checkout revert-prefix-ignore +# cp -a go/src/archive/tar ./vendor/archive/tar +# rm -rf ./go +# vndr \ No newline at end of file diff --git a/vendor/archive/tar/common.go b/vendor/archive/tar/common.go new file mode 100644 index 0000000000..d2ae66d554 --- /dev/null +++ b/vendor/archive/tar/common.go @@ -0,0 +1,286 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package tar implements access to tar archives. +// It aims to cover most of the variations, including those produced +// by GNU and BSD tars. +// +// References: +// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 +// http://www.gnu.org/software/tar/manual/html_node/Standard.html +// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html +package tar + +import ( + "errors" + "fmt" + "os" + "path" + "time" +) + +// BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit +// architectures. If a large value is encountered when decoding, the result +// stored in Header will be the truncated version. + +// Header type flags. +const ( + TypeReg = '0' // regular file + TypeRegA = '\x00' // regular file + TypeLink = '1' // hard link + TypeSymlink = '2' // symbolic link + TypeChar = '3' // character device node + TypeBlock = '4' // block device node + TypeDir = '5' // directory + TypeFifo = '6' // fifo node + TypeCont = '7' // reserved + TypeXHeader = 'x' // extended header + TypeXGlobalHeader = 'g' // global extended header + TypeGNULongName = 'L' // Next file has a long name + TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name + TypeGNUSparse = 'S' // sparse file +) + +// A Header represents a single header in a tar archive. +// Some fields may not be populated. +type Header struct { + Name string // name of header file entry + Mode int64 // permission and mode bits + Uid int // user id of owner + Gid int // group id of owner + Size int64 // length in bytes + ModTime time.Time // modified time + Typeflag byte // type of header entry + Linkname string // target name of link + Uname string // user name of owner + Gname string // group name of owner + Devmajor int64 // major number of character or block device + Devminor int64 // minor number of character or block device + AccessTime time.Time // access time + ChangeTime time.Time // status change time + Xattrs map[string]string +} + +// FileInfo returns an os.FileInfo for the Header. +func (h *Header) FileInfo() os.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements os.FileInfo. +type headerFileInfo struct { + h *Header +} + +func (fi headerFileInfo) Size() int64 { return fi.h.Size } +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } +func (fi headerFileInfo) Sys() interface{} { return fi.h } + +// Name returns the base name of the file. +func (fi headerFileInfo) Name() string { + if fi.IsDir() { + return path.Base(path.Clean(fi.h.Name)) + } + return path.Base(fi.h.Name) +} + +// Mode returns the permission and mode bits for the headerFileInfo. +func (fi headerFileInfo) Mode() (mode os.FileMode) { + // Set file permission bits. + mode = os.FileMode(fi.h.Mode).Perm() + + // Set setuid, setgid and sticky bits. + if fi.h.Mode&c_ISUID != 0 { + // setuid + mode |= os.ModeSetuid + } + if fi.h.Mode&c_ISGID != 0 { + // setgid + mode |= os.ModeSetgid + } + if fi.h.Mode&c_ISVTX != 0 { + // sticky + mode |= os.ModeSticky + } + + // Set file mode bits. + // clear perm, setuid, setgid and sticky bits. + m := os.FileMode(fi.h.Mode) &^ 07777 + if m == c_ISDIR { + // directory + mode |= os.ModeDir + } + if m == c_ISFIFO { + // named pipe (FIFO) + mode |= os.ModeNamedPipe + } + if m == c_ISLNK { + // symbolic link + mode |= os.ModeSymlink + } + if m == c_ISBLK { + // device file + mode |= os.ModeDevice + } + if m == c_ISCHR { + // Unix character device + mode |= os.ModeDevice + mode |= os.ModeCharDevice + } + if m == c_ISSOCK { + // Unix domain socket + mode |= os.ModeSocket + } + + switch fi.h.Typeflag { + case TypeSymlink: + // symbolic link + mode |= os.ModeSymlink + case TypeChar: + // character device node + mode |= os.ModeDevice + mode |= os.ModeCharDevice + case TypeBlock: + // block device node + mode |= os.ModeDevice + case TypeDir: + // directory + mode |= os.ModeDir + case TypeFifo: + // fifo node + mode |= os.ModeNamedPipe + } + + return mode +} + +// sysStat, if non-nil, populates h from system-dependent fields of fi. +var sysStat func(fi os.FileInfo, h *Header) error + +// Mode constants from the tar spec. +const ( + c_ISUID = 04000 // Set uid + c_ISGID = 02000 // Set gid + c_ISVTX = 01000 // Save text (sticky bit) + c_ISDIR = 040000 // Directory + c_ISFIFO = 010000 // FIFO + c_ISREG = 0100000 // Regular file + c_ISLNK = 0120000 // Symbolic link + c_ISBLK = 060000 // Block special file + c_ISCHR = 020000 // Character special file + c_ISSOCK = 0140000 // Socket +) + +// Keywords for the PAX Extended Header +const ( + paxAtime = "atime" + paxCharset = "charset" + paxComment = "comment" + paxCtime = "ctime" // please note that ctime is not a valid pax header. + paxGid = "gid" + paxGname = "gname" + paxLinkpath = "linkpath" + paxMtime = "mtime" + paxPath = "path" + paxSize = "size" + paxUid = "uid" + paxUname = "uname" + paxXattr = "SCHILY.xattr." + paxNone = "" +) + +// FileInfoHeader creates a partially-populated Header from fi. +// If fi describes a symlink, FileInfoHeader records link as the link target. +// If fi describes a directory, a slash is appended to the name. +// Because os.FileInfo's Name method returns only the base name of +// the file it describes, it may be necessary to modify the Name field +// of the returned header to provide the full path name of the file. +func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { + if fi == nil { + return nil, errors.New("tar: FileInfo is nil") + } + fm := fi.Mode() + h := &Header{ + Name: fi.Name(), + ModTime: fi.ModTime(), + Mode: int64(fm.Perm()), // or'd with c_IS* constants later + } + switch { + case fm.IsRegular(): + h.Mode |= c_ISREG + h.Typeflag = TypeReg + h.Size = fi.Size() + case fi.IsDir(): + h.Typeflag = TypeDir + h.Mode |= c_ISDIR + h.Name += "/" + case fm&os.ModeSymlink != 0: + h.Typeflag = TypeSymlink + h.Mode |= c_ISLNK + h.Linkname = link + case fm&os.ModeDevice != 0: + if fm&os.ModeCharDevice != 0 { + h.Mode |= c_ISCHR + h.Typeflag = TypeChar + } else { + h.Mode |= c_ISBLK + h.Typeflag = TypeBlock + } + case fm&os.ModeNamedPipe != 0: + h.Typeflag = TypeFifo + h.Mode |= c_ISFIFO + case fm&os.ModeSocket != 0: + h.Mode |= c_ISSOCK + default: + return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) + } + if fm&os.ModeSetuid != 0 { + h.Mode |= c_ISUID + } + if fm&os.ModeSetgid != 0 { + h.Mode |= c_ISGID + } + if fm&os.ModeSticky != 0 { + h.Mode |= c_ISVTX + } + // If possible, populate additional fields from OS-specific + // FileInfo fields. + if sys, ok := fi.Sys().(*Header); ok { + // This FileInfo came from a Header (not the OS). Use the + // original Header to populate all remaining fields. + h.Uid = sys.Uid + h.Gid = sys.Gid + h.Uname = sys.Uname + h.Gname = sys.Gname + h.AccessTime = sys.AccessTime + h.ChangeTime = sys.ChangeTime + if sys.Xattrs != nil { + h.Xattrs = make(map[string]string) + for k, v := range sys.Xattrs { + h.Xattrs[k] = v + } + } + if sys.Typeflag == TypeLink { + // hard link + h.Typeflag = TypeLink + h.Size = 0 + h.Linkname = sys.Linkname + } + } + if sysStat != nil { + return h, sysStat(fi, h) + } + return h, nil +} + +// isHeaderOnlyType checks if the given type flag is of the type that has no +// data section even if a size is specified. +func isHeaderOnlyType(flag byte) bool { + switch flag { + case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: + return true + default: + return false + } +} diff --git a/vendor/archive/tar/format.go b/vendor/archive/tar/format.go new file mode 100644 index 0000000000..c2c9910d00 --- /dev/null +++ b/vendor/archive/tar/format.go @@ -0,0 +1,197 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +// Constants to identify various tar formats. +const ( + // The format is unknown. + formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc... + + // The format of the original Unix V7 tar tool prior to standardization. + formatV7 + + // The old and new GNU formats, which are incompatible with USTAR. + // This does cover the old GNU sparse extension. + // This does not cover the GNU sparse extensions using PAX headers, + // versions 0.0, 0.1, and 1.0; these fall under the PAX format. + formatGNU + + // Schily's tar format, which is incompatible with USTAR. + // This does not cover STAR extensions to the PAX format; these fall under + // the PAX format. + formatSTAR + + // USTAR is the former standardization of tar defined in POSIX.1-1988. + // This is incompatible with the GNU and STAR formats. + formatUSTAR + + // PAX is the latest standardization of tar defined in POSIX.1-2001. + // This is an extension of USTAR and is "backwards compatible" with it. + // + // Some newer formats add their own extensions to PAX, such as GNU sparse + // files and SCHILY extended attributes. Since they are backwards compatible + // with PAX, they will be labelled as "PAX". + formatPAX +) + +// Magics used to identify various formats. +const ( + magicGNU, versionGNU = "ustar ", " \x00" + magicUSTAR, versionUSTAR = "ustar\x00", "00" + trailerSTAR = "tar\x00" +) + +// Size constants from various tar specifications. +const ( + blockSize = 512 // Size of each block in a tar stream + nameSize = 100 // Max length of the name field in USTAR format + prefixSize = 155 // Max length of the prefix field in USTAR format +) + +var zeroBlock block + +type block [blockSize]byte + +// Convert block to any number of formats. +func (b *block) V7() *headerV7 { return (*headerV7)(b) } +func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } +func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } +func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } +func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } + +// GetFormat checks that the block is a valid tar header based on the checksum. +// It then attempts to guess the specific format based on magic values. +// If the checksum fails, then formatUnknown is returned. +func (b *block) GetFormat() (format int) { + // Verify checksum. + var p parser + value := p.parseOctal(b.V7().Chksum()) + chksum1, chksum2 := b.ComputeChecksum() + if p.err != nil || (value != chksum1 && value != chksum2) { + return formatUnknown + } + + // Guess the magic values. + magic := string(b.USTAR().Magic()) + version := string(b.USTAR().Version()) + trailer := string(b.STAR().Trailer()) + switch { + case magic == magicUSTAR && trailer == trailerSTAR: + return formatSTAR + case magic == magicUSTAR: + return formatUSTAR + case magic == magicGNU && version == versionGNU: + return formatGNU + default: + return formatV7 + } +} + +// SetFormat writes the magic values necessary for specified format +// and then updates the checksum accordingly. +func (b *block) SetFormat(format int) { + // Set the magic values. + switch format { + case formatV7: + // Do nothing. + case formatGNU: + copy(b.GNU().Magic(), magicGNU) + copy(b.GNU().Version(), versionGNU) + case formatSTAR: + copy(b.STAR().Magic(), magicUSTAR) + copy(b.STAR().Version(), versionUSTAR) + copy(b.STAR().Trailer(), trailerSTAR) + case formatUSTAR, formatPAX: + copy(b.USTAR().Magic(), magicUSTAR) + copy(b.USTAR().Version(), versionUSTAR) + default: + panic("invalid format") + } + + // Update checksum. + // This field is special in that it is terminated by a NULL then space. + var f formatter + field := b.V7().Chksum() + chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 + f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 + field[7] = ' ' +} + +// ComputeChecksum computes the checksum for the header block. +// POSIX specifies a sum of the unsigned byte values, but the Sun tar used +// signed byte values. +// We compute and return both. +func (b *block) ComputeChecksum() (unsigned, signed int64) { + for i, c := range b { + if 148 <= i && i < 156 { + c = ' ' // Treat the checksum field itself as all spaces. + } + unsigned += int64(uint8(c)) + signed += int64(int8(c)) + } + return unsigned, signed +} + +type headerV7 [blockSize]byte + +func (h *headerV7) Name() []byte { return h[000:][:100] } +func (h *headerV7) Mode() []byte { return h[100:][:8] } +func (h *headerV7) UID() []byte { return h[108:][:8] } +func (h *headerV7) GID() []byte { return h[116:][:8] } +func (h *headerV7) Size() []byte { return h[124:][:12] } +func (h *headerV7) ModTime() []byte { return h[136:][:12] } +func (h *headerV7) Chksum() []byte { return h[148:][:8] } +func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } +func (h *headerV7) LinkName() []byte { return h[157:][:100] } + +type headerGNU [blockSize]byte + +func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerGNU) Magic() []byte { return h[257:][:6] } +func (h *headerGNU) Version() []byte { return h[263:][:2] } +func (h *headerGNU) UserName() []byte { return h[265:][:32] } +func (h *headerGNU) GroupName() []byte { return h[297:][:32] } +func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } +func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } +func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } +func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } +func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) } +func (h *headerGNU) RealSize() []byte { return h[483:][:12] } + +type headerSTAR [blockSize]byte + +func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerSTAR) Version() []byte { return h[263:][:2] } +func (h *headerSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } +func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } +func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } +func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } + +type headerUSTAR [blockSize]byte + +func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerUSTAR) Version() []byte { return h[263:][:2] } +func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } + +type sparseArray []byte + +func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) } +func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } +func (s sparseArray) MaxEntries() int { return len(s) / 24 } + +type sparseNode []byte + +func (s sparseNode) Offset() []byte { return s[00:][:12] } +func (s sparseNode) NumBytes() []byte { return s[12:][:12] } diff --git a/vendor/archive/tar/reader.go b/vendor/archive/tar/reader.go new file mode 100644 index 0000000000..a6142c6b86 --- /dev/null +++ b/vendor/archive/tar/reader.go @@ -0,0 +1,800 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +// TODO(dsymonds): +// - pax extensions + +import ( + "bytes" + "errors" + "io" + "io/ioutil" + "math" + "strconv" + "strings" + "time" +) + +var ( + ErrHeader = errors.New("archive/tar: invalid tar header") +) + +// A Reader provides sequential access to the contents of a tar archive. +// A tar archive consists of a sequence of files. +// The Next method advances to the next file in the archive (including the first), +// and then it can be treated as an io.Reader to access the file's data. +type Reader struct { + r io.Reader + pad int64 // amount of padding (ignored) after current file entry + curr numBytesReader // reader for current file entry + blk block // buffer to use as temporary local storage + + // err is a persistent error. + // It is only the responsibility of every exported method of Reader to + // ensure that this error is sticky. + err error +} + +// A numBytesReader is an io.Reader with a numBytes method, returning the number +// of bytes remaining in the underlying encoded data. +type numBytesReader interface { + io.Reader + numBytes() int64 +} + +// A regFileReader is a numBytesReader for reading file data from a tar archive. +type regFileReader struct { + r io.Reader // underlying reader + nb int64 // number of unread bytes for current file entry +} + +// A sparseFileReader is a numBytesReader for reading sparse file data from a +// tar archive. +type sparseFileReader struct { + rfr numBytesReader // Reads the sparse-encoded file data + sp []sparseEntry // The sparse map for the file + pos int64 // Keeps track of file position + total int64 // Total size of the file +} + +// A sparseEntry holds a single entry in a sparse file's sparse map. +// +// Sparse files are represented using a series of sparseEntrys. +// Despite the name, a sparseEntry represents an actual data fragment that +// references data found in the underlying archive stream. All regions not +// covered by a sparseEntry are logically filled with zeros. +// +// For example, if the underlying raw file contains the 10-byte data: +// var compactData = "abcdefgh" +// +// And the sparse map has the following entries: +// var sp = []sparseEntry{ +// {offset: 2, numBytes: 5} // Data fragment for [2..7] +// {offset: 18, numBytes: 3} // Data fragment for [18..21] +// } +// +// Then the content of the resulting sparse file with a "real" size of 25 is: +// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type sparseEntry struct { + offset int64 // Starting position of the fragment + numBytes int64 // Length of the fragment +} + +// Keywords for GNU sparse files in a PAX extended header +const ( + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" + paxGNUSparseOffset = "GNU.sparse.offset" + paxGNUSparseNumBytes = "GNU.sparse.numbytes" + paxGNUSparseMap = "GNU.sparse.map" + paxGNUSparseName = "GNU.sparse.name" + paxGNUSparseMajor = "GNU.sparse.major" + paxGNUSparseMinor = "GNU.sparse.minor" + paxGNUSparseSize = "GNU.sparse.size" + paxGNUSparseRealSize = "GNU.sparse.realsize" +) + +// NewReader creates a new Reader reading from r. +func NewReader(r io.Reader) *Reader { return &Reader{r: r} } + +// Next advances to the next entry in the tar archive. +// +// io.EOF is returned at the end of the input. +func (tr *Reader) Next() (*Header, error) { + if tr.err != nil { + return nil, tr.err + } + hdr, err := tr.next() + tr.err = err + return hdr, err +} + +func (tr *Reader) next() (*Header, error) { + var extHdrs map[string]string + + // Externally, Next iterates through the tar archive as if it is a series of + // files. Internally, the tar format often uses fake "files" to add meta + // data that describes the next file. These meta data "files" should not + // normally be visible to the outside. As such, this loop iterates through + // one or more "header files" until it finds a "normal file". +loop: + for { + if err := tr.skipUnread(); err != nil { + return nil, err + } + hdr, rawHdr, err := tr.readHeader() + if err != nil { + return nil, err + } + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + + // Check for PAX/GNU special headers and files. + switch hdr.Typeflag { + case TypeXHeader: + extHdrs, err = parsePAX(tr) + if err != nil { + return nil, err + } + continue loop // This is a meta header affecting the next header + case TypeGNULongName, TypeGNULongLink: + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err + } + + // Convert GNU extensions to use PAX headers. + if extHdrs == nil { + extHdrs = make(map[string]string) + } + var p parser + switch hdr.Typeflag { + case TypeGNULongName: + extHdrs[paxPath] = p.parseString(realname) + case TypeGNULongLink: + extHdrs[paxLinkpath] = p.parseString(realname) + } + if p.err != nil { + return nil, p.err + } + continue loop // This is a meta header affecting the next header + default: + // The old GNU sparse format is handled here since it is technically + // just a regular file with additional attributes. + + if err := mergePAX(hdr, extHdrs); err != nil { + return nil, err + } + + // The extended headers may have updated the size. + // Thus, setup the regFileReader again after merging PAX headers. + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + + // Sparse formats rely on being able to read from the logical data + // section; there must be a preceding call to handleRegularFile. + if err := tr.handleSparseFile(hdr, rawHdr, extHdrs); err != nil { + return nil, err + } + return hdr, nil // This is a file, so stop + } + } +} + +// handleRegularFile sets up the current file reader and padding such that it +// can only read the following logical data section. It will properly handle +// special headers that contain no data section. +func (tr *Reader) handleRegularFile(hdr *Header) error { + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { + return ErrHeader + } + + tr.pad = -nb & (blockSize - 1) // blockSize is a power of two + tr.curr = ®FileReader{r: tr.r, nb: nb} + return nil +} + +// handleSparseFile checks if the current file is a sparse format of any type +// and sets the curr reader appropriately. +func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error { + var sp []sparseEntry + var err error + if hdr.Typeflag == TypeGNUSparse { + sp, err = tr.readOldGNUSparseMap(hdr, rawHdr) + if err != nil { + return err + } + } else { + sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) + if err != nil { + return err + } + } + + // If sp is non-nil, then this is a sparse file. + // Note that it is possible for len(sp) to be zero. + if sp != nil { + tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size) + } + return err +} + +// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then +// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to +// be treated as a regular file. +func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { + var sparseFormat string + + // Check for sparse format indicators + major, majorOk := headers[paxGNUSparseMajor] + minor, minorOk := headers[paxGNUSparseMinor] + sparseName, sparseNameOk := headers[paxGNUSparseName] + _, sparseMapOk := headers[paxGNUSparseMap] + sparseSize, sparseSizeOk := headers[paxGNUSparseSize] + sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] + + // Identify which, if any, sparse format applies from which PAX headers are set + if majorOk && minorOk { + sparseFormat = major + "." + minor + } else if sparseNameOk && sparseMapOk { + sparseFormat = "0.1" + } else if sparseSizeOk { + sparseFormat = "0.0" + } else { + // Not a PAX format GNU sparse file. + return nil, nil + } + + // Check for unknown sparse format + if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { + return nil, nil + } + + // Update hdr from GNU sparse PAX headers + if sparseNameOk { + hdr.Name = sparseName + } + if sparseSizeOk { + realSize, err := strconv.ParseInt(sparseSize, 10, 64) + if err != nil { + return nil, ErrHeader + } + hdr.Size = realSize + } else if sparseRealSizeOk { + realSize, err := strconv.ParseInt(sparseRealSize, 10, 64) + if err != nil { + return nil, ErrHeader + } + hdr.Size = realSize + } + + // Set up the sparse map, according to the particular sparse format in use + var sp []sparseEntry + var err error + switch sparseFormat { + case "0.0", "0.1": + sp, err = readGNUSparseMap0x1(headers) + case "1.0": + sp, err = readGNUSparseMap1x0(tr.curr) + } + return sp, err +} + +// mergePAX merges well known headers according to PAX standard. +// In general headers with the same name as those found +// in the header struct overwrite those found in the header +// struct with higher precision or longer values. Esp. useful +// for name and linkname fields. +func mergePAX(hdr *Header, headers map[string]string) (err error) { + var id64 int64 + for k, v := range headers { + switch k { + case paxPath: + hdr.Name = v + case paxLinkpath: + hdr.Linkname = v + case paxUname: + hdr.Uname = v + case paxGname: + hdr.Gname = v + case paxUid: + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Uid = int(id64) // Integer overflow possible + case paxGid: + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Gid = int(id64) // Integer overflow possible + case paxAtime: + hdr.AccessTime, err = parsePAXTime(v) + case paxMtime: + hdr.ModTime, err = parsePAXTime(v) + case paxCtime: + hdr.ChangeTime, err = parsePAXTime(v) + case paxSize: + hdr.Size, err = strconv.ParseInt(v, 10, 64) + default: + if strings.HasPrefix(k, paxXattr) { + if hdr.Xattrs == nil { + hdr.Xattrs = make(map[string]string) + } + hdr.Xattrs[k[len(paxXattr):]] = v + } + } + if err != nil { + return ErrHeader + } + } + return nil +} + +// parsePAX parses PAX headers. +// If an extended header (type 'x') is invalid, ErrHeader is returned +func parsePAX(r io.Reader) (map[string]string, error) { + buf, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + sbuf := string(buf) + + // For GNU PAX sparse format 0.0 support. + // This function transforms the sparse format 0.0 headers into format 0.1 + // headers since 0.0 headers were not PAX compliant. + var sparseMap []string + + extHdrs := make(map[string]string) + for len(sbuf) > 0 { + key, value, residual, err := parsePAXRecord(sbuf) + if err != nil { + return nil, ErrHeader + } + sbuf = residual + + switch key { + case paxGNUSparseOffset, paxGNUSparseNumBytes: + // Validate sparse header order and value. + if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || + (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || + strings.Contains(value, ",") { + return nil, ErrHeader + } + sparseMap = append(sparseMap, value) + default: + // According to PAX specification, a value is stored only if it is + // non-empty. Otherwise, the key is deleted. + if len(value) > 0 { + extHdrs[key] = value + } else { + delete(extHdrs, key) + } + } + } + if len(sparseMap) > 0 { + extHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") + } + return extHdrs, nil +} + +// skipUnread skips any unread bytes in the existing file entry, as well as any +// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is +// encountered in the data portion; it is okay to hit io.EOF in the padding. +// +// Note that this function still works properly even when sparse files are being +// used since numBytes returns the bytes remaining in the underlying io.Reader. +func (tr *Reader) skipUnread() error { + dataSkip := tr.numBytes() // Number of data bytes to skip + totalSkip := dataSkip + tr.pad // Total number of bytes to skip + tr.curr, tr.pad = nil, 0 + + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the tar stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, io.SeekCurrent) + if err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent) + if err != nil { + return err + } + seekSkipped = pos2 - pos1 + } + } + + copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) + if err == io.EOF && seekSkipped+copySkipped < dataSkip { + err = io.ErrUnexpectedEOF + } + return err +} + +// readHeader reads the next block header and assumes that the underlying reader +// is already aligned to a block boundary. It returns the raw block of the +// header in case further processing is required. +// +// The err will be set to io.EOF only when one of the following occurs: +// * Exactly 0 bytes are read and EOF is hit. +// * Exactly 1 block of zeros is read and EOF is hit. +// * At least 2 blocks of zeros are read. +func (tr *Reader) readHeader() (*Header, *block, error) { + // Two blocks of zero bytes marks the end of the archive. + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 0 bytes read + } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 1 block of zeros read + } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read + } + return nil, nil, ErrHeader // Zero block and then non-zero block + } + + // Verify the header matches a known format. + format := tr.blk.GetFormat() + if format == formatUnknown { + return nil, nil, ErrHeader + } + + var p parser + hdr := new(Header) + + // Unpack the V7 header. + v7 := tr.blk.V7() + hdr.Name = p.parseString(v7.Name()) + hdr.Mode = p.parseNumeric(v7.Mode()) + hdr.Uid = int(p.parseNumeric(v7.UID())) + hdr.Gid = int(p.parseNumeric(v7.GID())) + hdr.Size = p.parseNumeric(v7.Size()) + hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) + hdr.Typeflag = v7.TypeFlag()[0] + hdr.Linkname = p.parseString(v7.LinkName()) + + // Unpack format specific fields. + if format > formatV7 { + ustar := tr.blk.USTAR() + hdr.Uname = p.parseString(ustar.UserName()) + hdr.Gname = p.parseString(ustar.GroupName()) + if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { + hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) + hdr.Devminor = p.parseNumeric(ustar.DevMinor()) + } + + var prefix string + switch format { + case formatUSTAR, formatGNU: + // TODO(dsnet): Do not use the prefix field for the GNU format! + // See golang.org/issues/12594 + ustar := tr.blk.USTAR() + prefix = p.parseString(ustar.Prefix()) + case formatSTAR: + star := tr.blk.STAR() + prefix = p.parseString(star.Prefix()) + hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) + } + if len(prefix) > 0 { + hdr.Name = prefix + "/" + hdr.Name + } + } + return hdr, &tr.blk, p.err +} + +// readOldGNUSparseMap reads the sparse map from the old GNU sparse format. +// The sparse map is stored in the tar header if it's small enough. +// If it's larger than four entries, then one or more extension headers are used +// to store the rest of the sparse map. +// +// The Header.Size does not reflect the size of any extended headers used. +// Thus, this function will read from the raw io.Reader to fetch extra headers. +// This method mutates blk in the process. +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) { + // Make sure that the input format is GNU. + // Unfortunately, the STAR format also has a sparse header format that uses + // the same type flag but has a completely different layout. + if blk.GetFormat() != formatGNU { + return nil, ErrHeader + } + + var p parser + hdr.Size = p.parseNumeric(blk.GNU().RealSize()) + if p.err != nil { + return nil, p.err + } + var s sparseArray = blk.GNU().Sparse() + var sp = make([]sparseEntry, 0, s.MaxEntries()) + for { + for i := 0; i < s.MaxEntries(); i++ { + // This termination condition is identical to GNU and BSD tar. + if s.Entry(i).Offset()[0] == 0x00 { + break // Don't return, need to process extended headers (even if empty) + } + offset := p.parseNumeric(s.Entry(i).Offset()) + numBytes := p.parseNumeric(s.Entry(i).NumBytes()) + if p.err != nil { + return nil, p.err + } + sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + } + + if s.IsExtended()[0] > 0 { + // There are more entries. Read an extension header and parse its entries. + if _, err := io.ReadFull(tr.r, blk[:]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return nil, err + } + s = blk.Sparse() + continue + } + return sp, nil // Done + } +} + +// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format +// version 1.0. The format of the sparse map consists of a series of +// newline-terminated numeric fields. The first field is the number of entries +// and is always present. Following this are the entries, consisting of two +// fields (offset, numBytes). This function must stop reading at the end +// boundary of the block containing the last newline. +// +// Note that the GNU manual says that numeric values should be encoded in octal +// format. However, the GNU tar utility itself outputs these values in decimal. +// As such, this library treats values as being encoded in decimal. +func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { + var cntNewline int64 + var buf bytes.Buffer + var blk = make([]byte, blockSize) + + // feedTokens copies data in numBlock chunks from r into buf until there are + // at least cnt newlines in buf. It will not read more blocks than needed. + var feedTokens = func(cnt int64) error { + for cntNewline < cnt { + if _, err := io.ReadFull(r, blk); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + buf.Write(blk) + for _, c := range blk { + if c == '\n' { + cntNewline++ + } + } + } + return nil + } + + // nextToken gets the next token delimited by a newline. This assumes that + // at least one newline exists in the buffer. + var nextToken = func() string { + cntNewline-- + tok, _ := buf.ReadString('\n') + return tok[:len(tok)-1] // Cut off newline + } + + // Parse for the number of entries. + // Use integer overflow resistant math to check this. + if err := feedTokens(1); err != nil { + return nil, err + } + numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader + } + + // Parse for all member entries. + // numEntries is trusted after this since a potential attacker must have + // committed resources proportional to what this library used. + if err := feedTokens(2 * numEntries); err != nil { + return nil, err + } + sp := make([]sparseEntry, 0, numEntries) + for i := int64(0); i < numEntries; i++ { + offset, err := strconv.ParseInt(nextToken(), 10, 64) + if err != nil { + return nil, ErrHeader + } + numBytes, err := strconv.ParseInt(nextToken(), 10, 64) + if err != nil { + return nil, ErrHeader + } + sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + } + return sp, nil +} + +// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format +// version 0.1. The sparse map is stored in the PAX headers. +func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { + // Get number of entries. + // Use integer overflow resistant math to check this. + numEntriesStr := extHdrs[paxGNUSparseNumBlocks] + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader + } + + // There should be two numbers in sparseMap for each entry. + sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") + if int64(len(sparseMap)) != 2*numEntries { + return nil, ErrHeader + } + + // Loop through the entries in the sparse map. + // numEntries is trusted now. + sp := make([]sparseEntry, 0, numEntries) + for i := int64(0); i < numEntries; i++ { + offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) + if err != nil { + return nil, ErrHeader + } + numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) + if err != nil { + return nil, ErrHeader + } + sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + } + return sp, nil +} + +// numBytes returns the number of bytes left to read in the current file's entry +// in the tar archive, or 0 if there is no current file. +func (tr *Reader) numBytes() int64 { + if tr.curr == nil { + // No current file, so no bytes + return 0 + } + return tr.curr.numBytes() +} + +// Read reads from the current entry in the tar archive. +// It returns 0, io.EOF when it reaches the end of that entry, +// until Next is called to advance to the next entry. +// +// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what +// the Header.Size claims. +func (tr *Reader) Read(b []byte) (int, error) { + if tr.err != nil { + return 0, tr.err + } + if tr.curr == nil { + return 0, io.EOF + } + + n, err := tr.curr.Read(b) + if err != nil && err != io.EOF { + tr.err = err + } + return n, err +} + +func (rfr *regFileReader) Read(b []byte) (n int, err error) { + if rfr.nb == 0 { + // file consumed + return 0, io.EOF + } + if int64(len(b)) > rfr.nb { + b = b[0:rfr.nb] + } + n, err = rfr.r.Read(b) + rfr.nb -= int64(n) + + if err == io.EOF && rfr.nb > 0 { + err = io.ErrUnexpectedEOF + } + return +} + +// numBytes returns the number of bytes left to read in the file's data in the tar archive. +func (rfr *regFileReader) numBytes() int64 { + return rfr.nb +} + +// newSparseFileReader creates a new sparseFileReader, but validates all of the +// sparse entries before doing so. +func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { + if total < 0 { + return nil, ErrHeader // Total size cannot be negative + } + + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + for i, s := range sp { + switch { + case s.offset < 0 || s.numBytes < 0: + return nil, ErrHeader // Negative values are never okay + case s.offset > math.MaxInt64-s.numBytes: + return nil, ErrHeader // Integer overflow with large length + case s.offset+s.numBytes > total: + return nil, ErrHeader // Region extends beyond the "real" size + case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: + return nil, ErrHeader // Regions can't overlap and must be in order + } + } + return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil +} + +// readHole reads a sparse hole ending at endOffset. +func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { + n64 := endOffset - sfr.pos + if n64 > int64(len(b)) { + n64 = int64(len(b)) + } + n := int(n64) + for i := 0; i < n; i++ { + b[i] = 0 + } + sfr.pos += n64 + return n +} + +// Read reads the sparse file data in expanded form. +func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { + // Skip past all empty fragments. + for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { + sfr.sp = sfr.sp[1:] + } + + // If there are no more fragments, then it is possible that there + // is one last sparse hole. + if len(sfr.sp) == 0 { + // This behavior matches the BSD tar utility. + // However, GNU tar stops returning data even if sfr.total is unmet. + if sfr.pos < sfr.total { + return sfr.readHole(b, sfr.total), nil + } + return 0, io.EOF + } + + // In front of a data fragment, so read a hole. + if sfr.pos < sfr.sp[0].offset { + return sfr.readHole(b, sfr.sp[0].offset), nil + } + + // In a data fragment, so read from it. + // This math is overflow free since we verify that offset and numBytes can + // be safely added when creating the sparseFileReader. + endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment + bytesLeft := endPos - sfr.pos // Bytes left in fragment + if int64(len(b)) > bytesLeft { + b = b[:bytesLeft] + } + + n, err = sfr.rfr.Read(b) + sfr.pos += int64(n) + if err == io.EOF { + if sfr.pos < endPos { + err = io.ErrUnexpectedEOF // There was supposed to be more data + } else if sfr.pos < sfr.total { + err = nil // There is still an implicit sparse hole at the end + } + } + + if sfr.pos == endPos { + sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it + } + return n, err +} + +// numBytes returns the number of bytes left to read in the sparse file's +// sparse-encoded data in the tar archive. +func (sfr *sparseFileReader) numBytes() int64 { + return sfr.rfr.numBytes() +} diff --git a/vendor/archive/tar/stat_atim.go b/vendor/archive/tar/stat_atim.go new file mode 100644 index 0000000000..cf9cc79c59 --- /dev/null +++ b/vendor/archive/tar/stat_atim.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux dragonfly openbsd solaris + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atim.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctim.Unix()) +} diff --git a/vendor/archive/tar/stat_atimespec.go b/vendor/archive/tar/stat_atimespec.go new file mode 100644 index 0000000000..6f17dbe307 --- /dev/null +++ b/vendor/archive/tar/stat_atimespec.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin freebsd netbsd + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atimespec.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctimespec.Unix()) +} diff --git a/vendor/archive/tar/stat_unix.go b/vendor/archive/tar/stat_unix.go new file mode 100644 index 0000000000..cb843db4cf --- /dev/null +++ b/vendor/archive/tar/stat_unix.go @@ -0,0 +1,32 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux darwin dragonfly freebsd openbsd netbsd solaris + +package tar + +import ( + "os" + "syscall" +) + +func init() { + sysStat = statUnix +} + +func statUnix(fi os.FileInfo, h *Header) error { + sys, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return nil + } + h.Uid = int(sys.Uid) + h.Gid = int(sys.Gid) + // TODO(bradfitz): populate username & group. os/user + // doesn't cache LookupId lookups, and lacks group + // lookup functions. + h.AccessTime = statAtime(sys) + h.ChangeTime = statCtime(sys) + // TODO(bradfitz): major/minor device numbers? + return nil +} diff --git a/vendor/archive/tar/strconv.go b/vendor/archive/tar/strconv.go new file mode 100644 index 0000000000..bb5b51c02d --- /dev/null +++ b/vendor/archive/tar/strconv.go @@ -0,0 +1,252 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "time" +) + +func isASCII(s string) bool { + for _, c := range s { + if c >= 0x80 { + return false + } + } + return true +} + +func toASCII(s string) string { + if isASCII(s) { + return s + } + var buf bytes.Buffer + for _, c := range s { + if c < 0x80 { + buf.WriteByte(byte(c)) + } + } + return buf.String() +} + +type parser struct { + err error // Last error seen +} + +type formatter struct { + err error // Last error seen +} + +// parseString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func (*parser) parseString(b []byte) string { + n := 0 + for n < len(b) && b[n] != 0 { + n++ + } + return string(b[0:n]) +} + +// Write s into b, terminating it with a NUL if there is room. +func (f *formatter) formatString(b []byte, s string) { + if len(s) > len(b) { + f.err = ErrFieldTooLong + return + } + ascii := toASCII(s) + copy(b, ascii) + if len(ascii) < len(b) { + b[len(ascii)] = 0 + } +} + +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + var binBits = uint(n-1) * 8 + return n >= 9 || (x >= -1< 0 && b[0]&0x80 != 0 { + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff + } + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) + } + + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +// Write x into b, as binary (GNUtar/star extension). +func (f *formatter) formatNumeric(b []byte, x int64) { + if fitsInBase256(len(b), x) { + for i := len(b) - 1; i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format + return + } + + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong +} + +func (p *parser) parseOctal(b []byte) int64 { + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0 + } + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader + } + return int64(x) +} + +func (f *formatter) formatOctal(b []byte, x int64) { + s := strconv.FormatInt(x, 8) + // Add leading zeros, but leave room for a NUL. + if n := len(b) - len(s) - 1; n > 0 { + s = strings.Repeat("0", n) + s + } + f.formatString(b, s) +} + +// parsePAXTime takes a string of the form %d.%d as described in the PAX +// specification. Note that this implementation allows for negative timestamps, +// which is allowed for by the PAX specification, but not always portable. +func parsePAXTime(s string) (time.Time, error) { + const maxNanoSecondDigits = 9 + + // Split string into seconds and sub-seconds parts. + ss, sn := s, "" + if pos := strings.IndexByte(s, '.'); pos >= 0 { + ss, sn = s[:pos], s[pos+1:] + } + + // Parse the seconds. + secs, err := strconv.ParseInt(ss, 10, 64) + if err != nil { + return time.Time{}, ErrHeader + } + if len(sn) == 0 { + return time.Unix(secs, 0), nil // No sub-second values + } + + // Parse the nanoseconds. + if strings.Trim(sn, "0123456789") != "" { + return time.Time{}, ErrHeader + } + if len(sn) < maxNanoSecondDigits { + sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad + } else { + sn = sn[:maxNanoSecondDigits] // Right truncate + } + nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed + if len(ss) > 0 && ss[0] == '-' { + return time.Unix(secs, -1*int64(nsecs)), nil // Negative correction + } + return time.Unix(secs, int64(nsecs)), nil +} + +// TODO(dsnet): Implement formatPAXTime. + +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +// +// A PAX record is of the following form: +// "%d %s=%s\n" % (size, key, value) +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + sp := strings.IndexByte(s, ' ') + if sp == -1 { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || int64(len(s)) < n { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + eq := strings.IndexByte(rec, '=') + if eq == -1 { + return "", "", s, ErrHeader + } + return rec[:eq], rec[eq+1:], rem, nil +} + +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) string { + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding + size += len(strconv.Itoa(size)) + record := fmt.Sprintf("%d %s=%s\n", size, k, v) + + // Final adjustment if adding size field increased the record size. + if len(record) != size { + size = len(record) + record = fmt.Sprintf("%d %s=%s\n", size, k, v) + } + return record +} diff --git a/vendor/archive/tar/writer.go b/vendor/archive/tar/writer.go new file mode 100644 index 0000000000..596fb8b9e1 --- /dev/null +++ b/vendor/archive/tar/writer.go @@ -0,0 +1,364 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +// TODO(dsymonds): +// - catch more errors (no first header, etc.) + +import ( + "bytes" + "errors" + "fmt" + "io" + "path" + "sort" + "strconv" + "strings" + "time" +) + +var ( + ErrWriteTooLong = errors.New("archive/tar: write too long") + ErrFieldTooLong = errors.New("archive/tar: header field too long") + ErrWriteAfterClose = errors.New("archive/tar: write after close") + errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") +) + +// A Writer provides sequential writing of a tar archive in POSIX.1 format. +// A tar archive consists of a sequence of files. +// Call WriteHeader to begin a new file, and then call Write to supply that file's data, +// writing at most hdr.Size bytes in total. +type Writer struct { + w io.Writer + err error + nb int64 // number of unwritten bytes for current file entry + pad int64 // amount of padding to write after current file entry + closed bool + usedBinary bool // whether the binary numeric field extension was used + preferPax bool // use PAX header instead of binary numeric header + hdrBuff block // buffer to use in writeHeader when writing a regular header + paxHdrBuff block // buffer to use in writeHeader when writing a PAX header +} + +// NewWriter creates a new Writer writing to w. +func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } + +// Flush finishes writing the current file (optional). +func (tw *Writer) Flush() error { + if tw.nb > 0 { + tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) + return tw.err + } + + n := tw.nb + tw.pad + for n > 0 && tw.err == nil { + nr := n + if nr > blockSize { + nr = blockSize + } + var nw int + nw, tw.err = tw.w.Write(zeroBlock[0:nr]) + n -= int64(nw) + } + tw.nb = 0 + tw.pad = 0 + return tw.err +} + +var ( + minTime = time.Unix(0, 0) + // There is room for 11 octal digits (33 bits) of mtime. + maxTime = minTime.Add((1<<33 - 1) * time.Second) +) + +// WriteHeader writes hdr and prepares to accept the file's contents. +// WriteHeader calls Flush if it is not the first header. +// Calling after a Close will return ErrWriteAfterClose. +func (tw *Writer) WriteHeader(hdr *Header) error { + return tw.writeHeader(hdr, true) +} + +// WriteHeader writes hdr and prepares to accept the file's contents. +// WriteHeader calls Flush if it is not the first header. +// Calling after a Close will return ErrWriteAfterClose. +// As this method is called internally by writePax header to allow it to +// suppress writing the pax header. +func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { + if tw.closed { + return ErrWriteAfterClose + } + if tw.err == nil { + tw.Flush() + } + if tw.err != nil { + return tw.err + } + + // a map to hold pax header records, if any are needed + paxHeaders := make(map[string]string) + + // TODO(dsnet): we might want to use PAX headers for + // subsecond time resolution, but for now let's just capture + // too long fields or non ascii characters + + // We need to select which scratch buffer to use carefully, + // since this method is called recursively to write PAX headers. + // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. + // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is + // already being used by the non-recursive call, so we must use paxHdrBuff. + header := &tw.hdrBuff + if !allowPax { + header = &tw.paxHdrBuff + } + copy(header[:], zeroBlock[:]) + + // Wrappers around formatter that automatically sets paxHeaders if the + // argument extends beyond the capacity of the input byte slice. + var f formatter + var formatString = func(b []byte, s string, paxKeyword string) { + needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) + if needsPaxHeader { + paxHeaders[paxKeyword] = s + return + } + f.formatString(b, s) + } + var formatNumeric = func(b []byte, x int64, paxKeyword string) { + // Try octal first. + s := strconv.FormatInt(x, 8) + if len(s) < len(b) { + f.formatOctal(b, x) + return + } + + // If it is too long for octal, and PAX is preferred, use a PAX header. + if paxKeyword != paxNone && tw.preferPax { + f.formatOctal(b, 0) + s := strconv.FormatInt(x, 10) + paxHeaders[paxKeyword] = s + return + } + + tw.usedBinary = true + f.formatNumeric(b, x) + } + + // Handle out of range ModTime carefully. + var modTime int64 + if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { + modTime = hdr.ModTime.Unix() + } + + v7 := header.V7() + formatString(v7.Name(), hdr.Name, paxPath) + // TODO(dsnet): The GNU format permits the mode field to be encoded in + // base-256 format. Thus, we can use formatNumeric instead of formatOctal. + f.formatOctal(v7.Mode(), hdr.Mode) + formatNumeric(v7.UID(), int64(hdr.Uid), paxUid) + formatNumeric(v7.GID(), int64(hdr.Gid), paxGid) + formatNumeric(v7.Size(), hdr.Size, paxSize) + // TODO(dsnet): Consider using PAX for finer time granularity. + formatNumeric(v7.ModTime(), modTime, paxNone) + v7.TypeFlag()[0] = hdr.Typeflag + formatString(v7.LinkName(), hdr.Linkname, paxLinkpath) + + ustar := header.USTAR() + formatString(ustar.UserName(), hdr.Uname, paxUname) + formatString(ustar.GroupName(), hdr.Gname, paxGname) + formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone) + formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone) + + // TODO(dsnet): The logic surrounding the prefix field is broken when trying + // to encode the header as GNU format. The challenge with the current logic + // is that we are unsure what format we are using at any given moment until + // we have processed *all* of the fields. The problem is that by the time + // all fields have been processed, some work has already been done to handle + // each field under the assumption that it is for one given format or + // another. In some situations, this causes the Writer to be confused and + // encode a prefix field when the format being used is GNU. Thus, producing + // an invalid tar file. + // + // As a short-term fix, we disable the logic to use the prefix field, which + // will force the badly generated GNU files to become encoded as being + // the PAX format. + // + // As an alternative fix, we could hard-code preferPax to be true. However, + // this is problematic for the following reasons: + // * The preferPax functionality is not tested at all. + // * This can result in headers that try to use both the GNU and PAX + // features at the same time, which is also wrong. + // + // The proper fix for this is to use a two-pass method: + // * The first pass simply determines what set of formats can possibly + // encode the given header. + // * The second pass actually encodes the header as that given format + // without worrying about violating the format. + // + // See the following: + // https://golang.org/issue/12594 + // https://golang.org/issue/17630 + // https://golang.org/issue/9683 + const usePrefix = false + + // try to use a ustar header when only the name is too long + _, paxPathUsed := paxHeaders[paxPath] + if usePrefix && !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { + prefix, suffix, ok := splitUSTARPath(hdr.Name) + if ok { + // Since we can encode in USTAR format, disable PAX header. + delete(paxHeaders, paxPath) + + // Update the path fields + formatString(v7.Name(), suffix, paxNone) + formatString(ustar.Prefix(), prefix, paxNone) + } + } + + if tw.usedBinary { + header.SetFormat(formatGNU) + } else { + header.SetFormat(formatUSTAR) + } + + // Check if there were any formatting errors. + if f.err != nil { + tw.err = f.err + return tw.err + } + + if allowPax { + for k, v := range hdr.Xattrs { + paxHeaders[paxXattr+k] = v + } + } + + if len(paxHeaders) > 0 { + if !allowPax { + return errInvalidHeader + } + if err := tw.writePAXHeader(hdr, paxHeaders); err != nil { + return err + } + } + tw.nb = hdr.Size + tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize + + _, tw.err = tw.w.Write(header[:]) + return tw.err +} + +// splitUSTARPath splits a path according to USTAR prefix and suffix rules. +// If the path is not splittable, then it will return ("", "", false). +func splitUSTARPath(name string) (prefix, suffix string, ok bool) { + length := len(name) + if length <= nameSize || !isASCII(name) { + return "", "", false + } else if length > prefixSize+1 { + length = prefixSize + 1 + } else if name[length-1] == '/' { + length-- + } + + i := strings.LastIndex(name[:length], "/") + nlen := len(name) - i - 1 // nlen is length of suffix + plen := i // plen is length of prefix + if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { + return "", "", false + } + return name[:i], name[i+1:], true +} + +// writePaxHeader writes an extended pax header to the +// archive. +func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error { + // Prepare extended header + ext := new(Header) + ext.Typeflag = TypeXHeader + // Setting ModTime is required for reader parsing to + // succeed, and seems harmless enough. + ext.ModTime = hdr.ModTime + // The spec asks that we namespace our pseudo files + // with the current pid. However, this results in differing outputs + // for identical inputs. As such, the constant 0 is now used instead. + // golang.org/issue/12358 + dir, file := path.Split(hdr.Name) + fullName := path.Join(dir, "PaxHeaders.0", file) + + ascii := toASCII(fullName) + if len(ascii) > nameSize { + ascii = ascii[:nameSize] + } + ext.Name = ascii + // Construct the body + var buf bytes.Buffer + + // Keys are sorted before writing to body to allow deterministic output. + keys := make([]string, 0, len(paxHeaders)) + for k := range paxHeaders { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, k := range keys { + fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) + } + + ext.Size = int64(len(buf.Bytes())) + if err := tw.writeHeader(ext, false); err != nil { + return err + } + if _, err := tw.Write(buf.Bytes()); err != nil { + return err + } + if err := tw.Flush(); err != nil { + return err + } + return nil +} + +// Write writes to the current entry in the tar archive. +// Write returns the error ErrWriteTooLong if more than +// hdr.Size bytes are written after WriteHeader. +func (tw *Writer) Write(b []byte) (n int, err error) { + if tw.closed { + err = ErrWriteAfterClose + return + } + overwrite := false + if int64(len(b)) > tw.nb { + b = b[0:tw.nb] + overwrite = true + } + n, err = tw.w.Write(b) + tw.nb -= int64(n) + if err == nil && overwrite { + err = ErrWriteTooLong + return + } + tw.err = err + return +} + +// Close closes the tar archive, flushing any unwritten +// data to the underlying writer. +func (tw *Writer) Close() error { + if tw.err != nil || tw.closed { + return tw.err + } + tw.Flush() + tw.closed = true + if tw.err != nil { + return tw.err + } + + // trailer: two zero blocks + for i := 0; i < 2; i++ { + _, tw.err = tw.w.Write(zeroBlock[:]) + if tw.err != nil { + break + } + } + return tw.err +}