Merge pull request #37999 from Microsoft/jjh/tar2vhd

LCOW: ApplyDiff() use tar2ext4, not SVM
This commit is contained in:
Sebastiaan van Stijn 2018-10-24 22:35:34 +02:00 committed by GitHub
commit 1527a67212
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1999 additions and 28 deletions

View File

@ -71,20 +71,33 @@ import (
"time"
"github.com/Microsoft/hcsshim"
"github.com/Microsoft/hcsshim/ext4/tar2ext4"
"github.com/Microsoft/opengcs/client"
"github.com/docker/docker/daemon/graphdriver"
"github.com/docker/docker/pkg/archive"
"github.com/docker/docker/pkg/containerfs"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/ioutils"
"github.com/docker/docker/pkg/reexec"
"github.com/docker/docker/pkg/system"
"github.com/sirupsen/logrus"
)
// noreexec controls reexec functionality. Off by default, on for debugging purposes.
var noreexec = false
// init registers this driver to the register. It gets initialised by the
// function passed in the second parameter, implemented in this file.
func init() {
graphdriver.Register("lcow", InitDriver)
// DOCKER_LCOW_NOREEXEC allows for inline processing which makes
// debugging issues in the re-exec codepath significantly easier.
if os.Getenv("DOCKER_LCOW_NOREEXEC") != "" {
logrus.Warnf("LCOW Graphdriver is set to not re-exec. This is intended for debugging purposes only.")
noreexec = true
} else {
reexec.Register("docker-lcow-tar2ext4", tar2ext4Reexec)
}
}
const (
@ -846,32 +859,72 @@ func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
func (d *Driver) ApplyDiff(id, parent string, diff io.Reader) (int64, error) {
logrus.Debugf("lcowdriver: applydiff: id %s", id)
svm, err := d.startServiceVMIfNotRunning(id, nil, fmt.Sprintf("applydiff %s", id))
// Log failures here as it's undiagnosable sometimes, due to a possible panic.
// See https://github.com/moby/moby/issues/37955 for more information.
dest := filepath.Join(d.dataRoot, id, layerFilename)
if !noreexec {
cmd := reexec.Command([]string{"docker-lcow-tar2ext4", dest}...)
stdout := bytes.NewBuffer(nil)
stderr := bytes.NewBuffer(nil)
cmd.Stdin = diff
cmd.Stdout = stdout
cmd.Stderr = stderr
if err := cmd.Start(); err != nil {
logrus.Warnf("lcowdriver: applydiff: id %s failed to start re-exec: %s", id, err)
return 0, err
}
if err := cmd.Wait(); err != nil {
logrus.Warnf("lcowdriver: applydiff: id %s failed %s", id, err)
return 0, fmt.Errorf("re-exec error: %v: stderr: %s", err, stderr)
}
return strconv.ParseInt(stdout.String(), 10, 64)
}
// The inline case
size, err := tar2ext4Actual(dest, diff)
if err != nil {
logrus.Warnf("lcowdriver: applydiff: id %s failed %s", id, err)
}
return size, err
}
// tar2ext4Reexec is the re-exec entry point for writing a layer from a tar file
func tar2ext4Reexec() {
size, err := tar2ext4Actual(os.Args[1], os.Stdin)
if err != nil {
fmt.Fprint(os.Stderr, err)
os.Exit(1)
}
fmt.Fprint(os.Stdout, size)
}
// tar2ext4Actual is the implementation of tar2ext to write a layer from a tar file.
// It can be called through re-exec (default), or inline for debugging.
func tar2ext4Actual(dest string, diff io.Reader) (int64, error) {
// maxDiskSize is not relating to the sandbox size - this is the
// maximum possible size a layer VHD generated can be from an EXT4
// layout perspective.
const maxDiskSize = 128 * 1024 * 1024 * 1024 // 128GB
out, err := os.Create(dest)
if err != nil {
return 0, err
}
defer d.terminateServiceVM(id, fmt.Sprintf("applydiff %s", id), false)
logrus.Debugf("lcowdriver: applydiff: waiting for svm to finish booting")
err = svm.getStartError()
defer out.Close()
if err := tar2ext4.Convert(
diff,
out,
tar2ext4.AppendVhdFooter,
tar2ext4.ConvertWhiteout,
tar2ext4.MaximumDiskSize(maxDiskSize)); err != nil {
return 0, err
}
fi, err := os.Stat(dest)
if err != nil {
return 0, fmt.Errorf("lcowdriver: applydiff: svm failed to boot: %s", err)
}
// TODO @jhowardmsft - the retries are temporary to overcome platform reliability issues.
// Obviously this will be removed as platform bugs are fixed.
retries := 0
for {
retries++
size, err := svm.config.TarToVhd(filepath.Join(d.dataRoot, id, layerFilename), diff)
if err != nil {
if retries <= 10 {
continue
}
return 0, err
}
return size, err
return 0, err
}
return fi.Size(), nil
}
// Changes produces a list of changes between the specified layer

View File

@ -1,6 +1,6 @@
# the following lines are in sorted order, FYI
github.com/Azure/go-ansiterm d6e3b3328b783f23731bc4d058875b0371ff8109
github.com/Microsoft/hcsshim v0.7.6
github.com/Microsoft/hcsshim v0.7.9
github.com/Microsoft/go-winio v0.4.11
github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
github.com/go-check/check 4ed411733c5785b40214c70bce814c3a3a689609 https://github.com/cpuguy83/check.git

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,411 @@
package format
type SuperBlock struct {
InodesCount uint32
BlocksCountLow uint32
RootBlocksCountLow uint32
FreeBlocksCountLow uint32
FreeInodesCount uint32
FirstDataBlock uint32
LogBlockSize uint32
LogClusterSize uint32
BlocksPerGroup uint32
ClustersPerGroup uint32
InodesPerGroup uint32
Mtime uint32
Wtime uint32
MountCount uint16
MaxMountCount uint16
Magic uint16
State uint16
Errors uint16
MinorRevisionLevel uint16
LastCheck uint32
CheckInterval uint32
CreatorOS uint32
RevisionLevel uint32
DefaultReservedUid uint16
DefaultReservedGid uint16
FirstInode uint32
InodeSize uint16
BlockGroupNr uint16
FeatureCompat CompatFeature
FeatureIncompat IncompatFeature
FeatureRoCompat RoCompatFeature
UUID [16]uint8
VolumeName [16]byte
LastMounted [64]byte
AlgorithmUsageBitmap uint32
PreallocBlocks uint8
PreallocDirBlocks uint8
ReservedGdtBlocks uint16
JournalUUID [16]uint8
JournalInum uint32
JournalDev uint32
LastOrphan uint32
HashSeed [4]uint32
DefHashVersion uint8
JournalBackupType uint8
DescSize uint16
DefaultMountOpts uint32
FirstMetaBg uint32
MkfsTime uint32
JournalBlocks [17]uint32
BlocksCountHigh uint32
RBlocksCountHigh uint32
FreeBlocksCountHigh uint32
MinExtraIsize uint16
WantExtraIsize uint16
Flags uint32
RaidStride uint16
MmpInterval uint16
MmpBlock uint64
RaidStripeWidth uint32
LogGroupsPerFlex uint8
ChecksumType uint8
ReservedPad uint16
KbytesWritten uint64
SnapshotInum uint32
SnapshotID uint32
SnapshotRBlocksCount uint64
SnapshotList uint32
ErrorCount uint32
FirstErrorTime uint32
FirstErrorInode uint32
FirstErrorBlock uint64
FirstErrorFunc [32]uint8
FirstErrorLine uint32
LastErrorTime uint32
LastErrorInode uint32
LastErrorLine uint32
LastErrorBlock uint64
LastErrorFunc [32]uint8
MountOpts [64]uint8
UserQuotaInum uint32
GroupQuotaInum uint32
OverheadBlocks uint32
BackupBgs [2]uint32
EncryptAlgos [4]uint8
EncryptPwSalt [16]uint8
LpfInode uint32
ProjectQuotaInum uint32
ChecksumSeed uint32
WtimeHigh uint8
MtimeHigh uint8
MkfsTimeHigh uint8
LastcheckHigh uint8
FirstErrorTimeHigh uint8
LastErrorTimeHigh uint8
Pad [2]uint8
Reserved [96]uint32
Checksum uint32
}
const SuperBlockMagic uint16 = 0xef53
type CompatFeature uint32
type IncompatFeature uint32
type RoCompatFeature uint32
const (
CompatDirPrealloc CompatFeature = 0x1
CompatImagicInodes CompatFeature = 0x2
CompatHasJournal CompatFeature = 0x4
CompatExtAttr CompatFeature = 0x8
CompatResizeInode CompatFeature = 0x10
CompatDirIndex CompatFeature = 0x20
CompatLazyBg CompatFeature = 0x40
CompatExcludeInode CompatFeature = 0x80
CompatExcludeBitmap CompatFeature = 0x100
CompatSparseSuper2 CompatFeature = 0x200
IncompatCompression IncompatFeature = 0x1
IncompatFiletype IncompatFeature = 0x2
IncompatRecover IncompatFeature = 0x4
IncompatJournalDev IncompatFeature = 0x8
IncompatMetaBg IncompatFeature = 0x10
IncompatExtents IncompatFeature = 0x40
Incompat_64Bit IncompatFeature = 0x80
IncompatMmp IncompatFeature = 0x100
IncompatFlexBg IncompatFeature = 0x200
IncompatEaInode IncompatFeature = 0x400
IncompatDirdata IncompatFeature = 0x1000
IncompatCsumSeed IncompatFeature = 0x2000
IncompatLargedir IncompatFeature = 0x4000
IncompatInlineData IncompatFeature = 0x8000
IncompatEncrypt IncompatFeature = 0x10000
RoCompatSparseSuper RoCompatFeature = 0x1
RoCompatLargeFile RoCompatFeature = 0x2
RoCompatBtreeDir RoCompatFeature = 0x4
RoCompatHugeFile RoCompatFeature = 0x8
RoCompatGdtCsum RoCompatFeature = 0x10
RoCompatDirNlink RoCompatFeature = 0x20
RoCompatExtraIsize RoCompatFeature = 0x40
RoCompatHasSnapshot RoCompatFeature = 0x80
RoCompatQuota RoCompatFeature = 0x100
RoCompatBigalloc RoCompatFeature = 0x200
RoCompatMetadataCsum RoCompatFeature = 0x400
RoCompatReplica RoCompatFeature = 0x800
RoCompatReadonly RoCompatFeature = 0x1000
RoCompatProject RoCompatFeature = 0x2000
)
type BlockGroupFlag uint16
const (
BlockGroupInodeUninit BlockGroupFlag = 0x1
BlockGroupBlockUninit BlockGroupFlag = 0x2
BlockGroupInodeZeroed BlockGroupFlag = 0x4
)
type GroupDescriptor struct {
BlockBitmapLow uint32
InodeBitmapLow uint32
InodeTableLow uint32
FreeBlocksCountLow uint16
FreeInodesCountLow uint16
UsedDirsCountLow uint16
Flags BlockGroupFlag
ExcludeBitmapLow uint32
BlockBitmapCsumLow uint16
InodeBitmapCsumLow uint16
ItableUnusedLow uint16
Checksum uint16
}
type GroupDescriptor64 struct {
GroupDescriptor
BlockBitmapHigh uint32
InodeBitmapHigh uint32
InodeTableHigh uint32
FreeBlocksCountHigh uint16
FreeInodesCountHigh uint16
UsedDirsCountHigh uint16
ItableUnusedHigh uint16
ExcludeBitmapHigh uint32
BlockBitmapCsumHigh uint16
InodeBitmapCsumHigh uint16
Reserved uint32
}
const (
S_IXOTH = 0x1
S_IWOTH = 0x2
S_IROTH = 0x4
S_IXGRP = 0x8
S_IWGRP = 0x10
S_IRGRP = 0x20
S_IXUSR = 0x40
S_IWUSR = 0x80
S_IRUSR = 0x100
S_ISVTX = 0x200
S_ISGID = 0x400
S_ISUID = 0x800
S_IFIFO = 0x1000
S_IFCHR = 0x2000
S_IFDIR = 0x4000
S_IFBLK = 0x6000
S_IFREG = 0x8000
S_IFLNK = 0xA000
S_IFSOCK = 0xC000
TypeMask uint16 = 0xF000
)
type InodeNumber uint32
const (
InodeRoot = 2
)
type Inode struct {
Mode uint16
Uid uint16
SizeLow uint32
Atime uint32
Ctime uint32
Mtime uint32
Dtime uint32
Gid uint16
LinksCount uint16
BlocksLow uint32
Flags InodeFlag
Version uint32
Block [60]byte
Generation uint32
XattrBlockLow uint32
SizeHigh uint32
ObsoleteFragmentAddr uint32
BlocksHigh uint16
XattrBlockHigh uint16
UidHigh uint16
GidHigh uint16
ChecksumLow uint16
Reserved uint16
ExtraIsize uint16
ChecksumHigh uint16
CtimeExtra uint32
MtimeExtra uint32
AtimeExtra uint32
Crtime uint32
CrtimeExtra uint32
VersionHigh uint32
Projid uint32
}
type InodeFlag uint32
const (
InodeFlagSecRm InodeFlag = 0x1
InodeFlagUnRm InodeFlag = 0x2
InodeFlagCompressed InodeFlag = 0x4
InodeFlagSync InodeFlag = 0x8
InodeFlagImmutable InodeFlag = 0x10
InodeFlagAppend InodeFlag = 0x20
InodeFlagNoDump InodeFlag = 0x40
InodeFlagNoAtime InodeFlag = 0x80
InodeFlagDirtyCompressed InodeFlag = 0x100
InodeFlagCompressedClusters InodeFlag = 0x200
InodeFlagNoCompress InodeFlag = 0x400
InodeFlagEncrypted InodeFlag = 0x800
InodeFlagHashedIndex InodeFlag = 0x1000
InodeFlagMagic InodeFlag = 0x2000
InodeFlagJournalData InodeFlag = 0x4000
InodeFlagNoTail InodeFlag = 0x8000
InodeFlagDirSync InodeFlag = 0x10000
InodeFlagTopDir InodeFlag = 0x20000
InodeFlagHugeFile InodeFlag = 0x40000
InodeFlagExtents InodeFlag = 0x80000
InodeFlagEaInode InodeFlag = 0x200000
InodeFlagEOFBlocks InodeFlag = 0x400000
InodeFlagSnapfile InodeFlag = 0x01000000
InodeFlagSnapfileDeleted InodeFlag = 0x04000000
InodeFlagSnapfileShrunk InodeFlag = 0x08000000
InodeFlagInlineData InodeFlag = 0x10000000
InodeFlagProjectIDInherit InodeFlag = 0x20000000
InodeFlagReserved InodeFlag = 0x80000000
)
const (
MaxLinks = 65000
)
type ExtentHeader struct {
Magic uint16
Entries uint16
Max uint16
Depth uint16
Generation uint32
}
const ExtentHeaderMagic uint16 = 0xf30a
type ExtentIndexNode struct {
Block uint32
LeafLow uint32
LeafHigh uint16
Unused uint16
}
type ExtentLeafNode struct {
Block uint32
Length uint16
StartHigh uint16
StartLow uint32
}
type ExtentTail struct {
Checksum uint32
}
type DirectoryEntry struct {
Inode InodeNumber
RecordLength uint16
NameLength uint8
FileType FileType
//Name []byte
}
type FileType uint8
const (
FileTypeUnknown FileType = 0x0
FileTypeRegular FileType = 0x1
FileTypeDirectory FileType = 0x2
FileTypeCharacter FileType = 0x3
FileTypeBlock FileType = 0x4
FileTypeFIFO FileType = 0x5
FileTypeSocket FileType = 0x6
FileTypeSymbolicLink FileType = 0x7
)
type DirectoryEntryTail struct {
ReservedZero1 uint32
RecordLength uint16
ReservedZero2 uint8
FileType uint8
Checksum uint32
}
type DirectoryTreeRoot struct {
Dot DirectoryEntry
DotName [4]byte
DotDot DirectoryEntry
DotDotName [4]byte
ReservedZero uint32
HashVersion uint8
InfoLength uint8
IndirectLevels uint8
UnusedFlags uint8
Limit uint16
Count uint16
Block uint32
//Entries []DirectoryTreeEntry
}
type DirectoryTreeNode struct {
FakeInode uint32
FakeRecordLength uint16
NameLength uint8
FileType uint8
Limit uint16
Count uint16
Block uint32
//Entries []DirectoryTreeEntry
}
type DirectoryTreeEntry struct {
Hash uint32
Block uint32
}
type DirectoryTreeTail struct {
Reserved uint32
Checksum uint32
}
type XAttrInodeBodyHeader struct {
Magic uint32
}
type XAttrHeader struct {
Magic uint32
ReferenceCount uint32
Blocks uint32
Hash uint32
Checksum uint32
Reserved [3]uint32
}
const XAttrHeaderMagic uint32 = 0xea020000
type XAttrEntry struct {
NameLength uint8
NameIndex uint8
ValueOffset uint16
ValueInum uint32
ValueSize uint32
Hash uint32
//Name []byte
}

View File

@ -0,0 +1,174 @@
package tar2ext4
import (
"archive/tar"
"bufio"
"encoding/binary"
"io"
"path"
"strings"
"github.com/Microsoft/hcsshim/ext4/internal/compactext4"
)
type params struct {
convertWhiteout bool
appendVhdFooter bool
ext4opts []compactext4.Option
}
// Option is the type for optional parameters to Convert.
type Option func(*params)
// ConvertWhiteout instructs the converter to convert OCI-style whiteouts
// (beginning with .wh.) to overlay-style whiteouts.
func ConvertWhiteout(p *params) {
p.convertWhiteout = true
}
// AppendVhdFooter instructs the converter to add a fixed VHD footer to the
// file.
func AppendVhdFooter(p *params) {
p.appendVhdFooter = true
}
// InlineData instructs the converter to write small files into the inode
// structures directly. This creates smaller images but currently is not
// compatible with DAX.
func InlineData(p *params) {
p.ext4opts = append(p.ext4opts, compactext4.InlineData)
}
// MaximumDiskSize instructs the writer to limit the disk size to the specified
// value. This also reserves enough metadata space for the specified disk size.
// If not provided, then 16GB is the default.
func MaximumDiskSize(size int64) Option {
return func(p *params) {
p.ext4opts = append(p.ext4opts, compactext4.MaximumDiskSize(size))
}
}
const (
whiteoutPrefix = ".wh."
opaqueWhiteout = ".wh..wh..opq"
)
// Convert writes a compact ext4 file system image that contains the files in the
// input tar stream.
func Convert(r io.Reader, w io.ReadWriteSeeker, options ...Option) error {
var p params
for _, opt := range options {
opt(&p)
}
t := tar.NewReader(bufio.NewReader(r))
fs := compactext4.NewWriter(w, p.ext4opts...)
for {
hdr, err := t.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
if p.convertWhiteout {
dir, name := path.Split(hdr.Name)
if strings.HasPrefix(name, whiteoutPrefix) {
if name == opaqueWhiteout {
// Update the directory with the appropriate xattr.
f, err := fs.Stat(dir)
if err != nil {
return err
}
f.Xattrs["trusted.overlay.opaque"] = []byte("y")
err = fs.Create(dir, f)
if err != nil {
return err
}
} else {
// Create an overlay-style whiteout.
f := &compactext4.File{
Mode: compactext4.S_IFCHR,
Devmajor: 0,
Devminor: 0,
}
err = fs.Create(path.Join(dir, name[len(whiteoutPrefix):]), f)
if err != nil {
return err
}
}
continue
}
}
if hdr.Typeflag == tar.TypeLink {
err = fs.Link(hdr.Linkname, hdr.Name)
if err != nil {
return err
}
} else {
f := &compactext4.File{
Mode: uint16(hdr.Mode),
Atime: hdr.AccessTime,
Mtime: hdr.ModTime,
Ctime: hdr.ChangeTime,
Crtime: hdr.ModTime,
Size: hdr.Size,
Uid: uint32(hdr.Uid),
Gid: uint32(hdr.Gid),
Linkname: hdr.Linkname,
Devmajor: uint32(hdr.Devmajor),
Devminor: uint32(hdr.Devminor),
Xattrs: make(map[string][]byte),
}
for key, value := range hdr.PAXRecords {
const xattrPrefix = "SCHILY.xattr."
if strings.HasPrefix(key, xattrPrefix) {
f.Xattrs[key[len(xattrPrefix):]] = []byte(value)
}
}
var typ uint16
switch hdr.Typeflag {
case tar.TypeReg, tar.TypeRegA:
typ = compactext4.S_IFREG
case tar.TypeSymlink:
typ = compactext4.S_IFLNK
case tar.TypeChar:
typ = compactext4.S_IFCHR
case tar.TypeBlock:
typ = compactext4.S_IFBLK
case tar.TypeDir:
typ = compactext4.S_IFDIR
case tar.TypeFifo:
typ = compactext4.S_IFIFO
}
f.Mode &= ^compactext4.TypeMask
f.Mode |= typ
err = fs.Create(hdr.Name, f)
if err != nil {
return err
}
_, err = io.Copy(fs, t)
if err != nil {
return err
}
}
}
err := fs.Close()
if err != nil {
return err
}
if p.appendVhdFooter {
size, err := w.Seek(0, io.SeekEnd)
if err != nil {
return err
}
err = binary.Write(w, binary.BigEndian, makeFixedVHDFooter(size))
if err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,76 @@
package tar2ext4
import (
"bytes"
"crypto/rand"
"encoding/binary"
)
// Constants for the VHD footer
const (
cookieMagic = "conectix"
featureMask = 0x2
fileFormatVersionMagic = 0x00010000
fixedDataOffset = -1
creatorVersionMagic = 0x000a0000
diskTypeFixed = 2
)
type vhdFooter struct {
Cookie [8]byte
Features uint32
FileFormatVersion uint32
DataOffset int64
TimeStamp uint32
CreatorApplication [4]byte
CreatorVersion uint32
CreatorHostOS [4]byte
OriginalSize int64
CurrentSize int64
DiskGeometry uint32
DiskType uint32
Checksum uint32
UniqueID [16]uint8
SavedState uint8
Reserved [427]uint8
}
func makeFixedVHDFooter(size int64) *vhdFooter {
footer := &vhdFooter{
Features: featureMask,
FileFormatVersion: fileFormatVersionMagic,
DataOffset: fixedDataOffset,
CreatorVersion: creatorVersionMagic,
OriginalSize: size,
CurrentSize: size,
DiskType: diskTypeFixed,
UniqueID: generateUUID(),
}
copy(footer.Cookie[:], cookieMagic)
footer.Checksum = calculateCheckSum(footer)
return footer
}
func calculateCheckSum(footer *vhdFooter) uint32 {
oldchk := footer.Checksum
footer.Checksum = 0
buf := &bytes.Buffer{}
binary.Write(buf, binary.BigEndian, footer)
var chk uint32
bufBytes := buf.Bytes()
for i := 0; i < len(bufBytes); i++ {
chk += uint32(bufBytes[i])
}
footer.Checksum = oldchk
return uint32(^chk)
}
func generateUUID() [16]byte {
res := [16]byte{}
if _, err := rand.Read(res[:]); err != nil {
panic(err)
}
return res
}

View File

@ -1,6 +0,0 @@
package hcsshim
// IsTP4 returns whether the currently running Windows build is at least TP4.
func IsTP4() bool {
return false
}