1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/vendor/github.com/Microsoft/hcsshim/ext4/internal/compactext4/compact.go
John Howard 05151dcc39 Windows:Bump HCSShim to v0.8.3
Signed-off-by: John Howard <jhoward@microsoft.com>
2018-12-03 14:08:26 -08:00

1263 lines
32 KiB
Go

package compactext4
import (
"bufio"
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"path"
"sort"
"strings"
"time"
"github.com/Microsoft/hcsshim/ext4/internal/format"
)
// Writer writes a compact ext4 file system.
type Writer struct {
f io.ReadWriteSeeker
bw *bufio.Writer
inodes []*inode
curName string
curInode *inode
pos int64
dataWritten, dataMax int64
err error
initialized bool
supportInlineData bool
maxDiskSize int64
gdBlocks uint32
}
// Mode flags for Linux files.
const (
S_IXOTH = format.S_IXOTH
S_IWOTH = format.S_IWOTH
S_IROTH = format.S_IROTH
S_IXGRP = format.S_IXGRP
S_IWGRP = format.S_IWGRP
S_IRGRP = format.S_IRGRP
S_IXUSR = format.S_IXUSR
S_IWUSR = format.S_IWUSR
S_IRUSR = format.S_IRUSR
S_ISVTX = format.S_ISVTX
S_ISGID = format.S_ISGID
S_ISUID = format.S_ISUID
S_IFIFO = format.S_IFIFO
S_IFCHR = format.S_IFCHR
S_IFDIR = format.S_IFDIR
S_IFBLK = format.S_IFBLK
S_IFREG = format.S_IFREG
S_IFLNK = format.S_IFLNK
S_IFSOCK = format.S_IFSOCK
TypeMask = format.TypeMask
)
type inode struct {
Size int64
Atime, Ctime, Mtime, Crtime uint64
Number format.InodeNumber
Mode uint16
Uid, Gid uint32
LinkCount uint32
XattrBlock uint32
BlockCount uint32
Devmajor, Devminor uint32
Flags format.InodeFlag
Data []byte
XattrInline []byte
Children directory
}
func (node *inode) FileType() uint16 {
return node.Mode & format.TypeMask
}
func (node *inode) IsDir() bool {
return node.FileType() == S_IFDIR
}
// A File represents a file to be added to an ext4 file system.
type File struct {
Linkname string
Size int64
Mode uint16
Uid, Gid uint32
Atime, Ctime, Mtime, Crtime time.Time
Devmajor, Devminor uint32
Xattrs map[string][]byte
}
const (
inodeFirst = 11
inodeLostAndFound = inodeFirst
blockSize = 4096
blocksPerGroup = blockSize * 8
inodeSize = 256
maxInodesPerGroup = blockSize * 8 // Limited by the inode bitmap
inodesPerGroupIncrement = blockSize / inodeSize
defaultMaxDiskSize = 16 * 1024 * 1024 * 1024 // 16GB
maxMaxDiskSize = 16 * 1024 * 1024 * 1024 * 1024 // 16TB
groupDescriptorSize = 32 // Use the small group descriptor
groupsPerDescriptorBlock = blockSize / groupDescriptorSize
maxFileSize = 128 * 1024 * 1024 * 1024 // 128GB file size maximum for now
smallSymlinkSize = 59 // max symlink size that goes directly in the inode
maxBlocksPerExtent = 0x8000 // maximum number of blocks in an extent
inodeDataSize = 60
inodeUsedSize = 152 // fields through CrtimeExtra
inodeExtraSize = inodeSize - inodeUsedSize
xattrInodeOverhead = 4 + 4 // magic number + empty next entry value
xattrBlockOverhead = 32 + 4 // header + empty next entry value
inlineDataXattrOverhead = xattrInodeOverhead + 16 + 4 // entry + "data"
inlineDataSize = inodeDataSize + inodeExtraSize - inlineDataXattrOverhead
)
type exceededMaxSizeError struct {
Size int64
}
func (err exceededMaxSizeError) Error() string {
return fmt.Sprintf("disk exceeded maximum size of %d bytes", err.Size)
}
var directoryEntrySize = binary.Size(format.DirectoryEntry{})
var extraIsize = uint16(inodeUsedSize - 128)
type directory map[string]*inode
func splitFirst(p string) (string, string) {
n := strings.IndexByte(p, '/')
if n >= 0 {
return p[:n], p[n+1:]
}
return p, ""
}
func (w *Writer) findPath(root *inode, p string) *inode {
inode := root
for inode != nil && len(p) != 0 {
name, rest := splitFirst(p)
p = rest
inode = inode.Children[name]
}
return inode
}
func timeToFsTime(t time.Time) uint64 {
if t.IsZero() {
return 0
}
s := t.Unix()
if s < -0x80000000 {
return 0x80000000
}
if s > 0x37fffffff {
return 0x37fffffff
}
return uint64(s) | uint64(t.Nanosecond())<<34
}
func fsTimeToTime(t uint64) time.Time {
if t == 0 {
return time.Time{}
}
s := int64(t & 0x3ffffffff)
if s > 0x7fffffff && s < 0x100000000 {
s = int64(int32(uint32(s)))
}
return time.Unix(s, int64(t>>34))
}
func (w *Writer) getInode(i format.InodeNumber) *inode {
if i == 0 || int(i) > len(w.inodes) {
return nil
}
return w.inodes[i-1]
}
var xattrPrefixes = []struct {
Index uint8
Prefix string
}{
{2, "system.posix_acl_access"},
{3, "system.posix_acl_default"},
{8, "system.richacl"},
{7, "system."},
{1, "user."},
{4, "trusted."},
{6, "security."},
}
func compressXattrName(name string) (uint8, string) {
for _, p := range xattrPrefixes {
if strings.HasPrefix(name, p.Prefix) {
return p.Index, name[len(p.Prefix):]
}
}
return 0, name
}
func decompressXattrName(index uint8, name string) string {
for _, p := range xattrPrefixes {
if index == p.Index {
return p.Prefix + name
}
}
return name
}
func hashXattrEntry(name string, value []byte) uint32 {
var hash uint32
for i := 0; i < len(name); i++ {
hash = (hash << 5) ^ (hash >> 27) ^ uint32(name[i])
}
for i := 0; i+3 < len(value); i += 4 {
hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(value[i:i+4])
}
if len(value)%4 != 0 {
var last [4]byte
copy(last[:], value[len(value)&^3:])
hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(last[:])
}
return hash
}
type xattr struct {
Name string
Index uint8
Value []byte
}
func (x *xattr) EntryLen() int {
return (len(x.Name)+3)&^3 + 16
}
func (x *xattr) ValueLen() int {
return (len(x.Value) + 3) &^ 3
}
type xattrState struct {
inode, block []xattr
inodeLeft, blockLeft int
}
func (s *xattrState) init() {
s.inodeLeft = inodeExtraSize - xattrInodeOverhead
s.blockLeft = blockSize - xattrBlockOverhead
}
func (s *xattrState) addXattr(name string, value []byte) bool {
index, name := compressXattrName(name)
x := xattr{
Index: index,
Name: name,
Value: value,
}
length := x.EntryLen() + x.ValueLen()
if s.inodeLeft >= length {
s.inode = append(s.inode, x)
s.inodeLeft -= length
} else if s.blockLeft >= length {
s.block = append(s.block, x)
s.blockLeft -= length
} else {
return false
}
return true
}
func putXattrs(xattrs []xattr, b []byte, offsetDelta uint16) {
offset := uint16(len(b)) + offsetDelta
eb := b
db := b
for _, xattr := range xattrs {
vl := xattr.ValueLen()
offset -= uint16(vl)
eb[0] = uint8(len(xattr.Name))
eb[1] = xattr.Index
binary.LittleEndian.PutUint16(eb[2:], offset)
binary.LittleEndian.PutUint32(eb[8:], uint32(len(xattr.Value)))
binary.LittleEndian.PutUint32(eb[12:], hashXattrEntry(xattr.Name, xattr.Value))
copy(eb[16:], xattr.Name)
eb = eb[xattr.EntryLen():]
copy(db[len(db)-vl:], xattr.Value)
db = db[:len(db)-vl]
}
}
func getXattrs(b []byte, xattrs map[string][]byte, offsetDelta uint16) {
eb := b
for len(eb) != 0 {
nameLen := eb[0]
if nameLen == 0 {
break
}
index := eb[1]
offset := binary.LittleEndian.Uint16(eb[2:]) - offsetDelta
valueLen := binary.LittleEndian.Uint32(eb[8:])
attr := xattr{
Index: index,
Name: string(eb[16 : 16+nameLen]),
Value: b[offset : uint32(offset)+valueLen],
}
xattrs[decompressXattrName(index, attr.Name)] = attr.Value
eb = eb[attr.EntryLen():]
}
}
func (w *Writer) writeXattrs(inode *inode, state *xattrState) error {
// Write the inline attributes.
if len(state.inode) != 0 {
inode.XattrInline = make([]byte, inodeExtraSize)
binary.LittleEndian.PutUint32(inode.XattrInline[0:], format.XAttrHeaderMagic) // Magic
putXattrs(state.inode, inode.XattrInline[4:], 0)
}
// Write the block attributes. If there was previously an xattr block, then
// rewrite it even if it is now empty.
if len(state.block) != 0 || inode.XattrBlock != 0 {
sort.Slice(state.block, func(i, j int) bool {
return state.block[i].Index < state.block[j].Index ||
len(state.block[i].Name) < len(state.block[j].Name) ||
state.block[i].Name < state.block[j].Name
})
var b [blockSize]byte
binary.LittleEndian.PutUint32(b[0:], format.XAttrHeaderMagic) // Magic
binary.LittleEndian.PutUint32(b[4:], 1) // ReferenceCount
binary.LittleEndian.PutUint32(b[8:], 1) // Blocks
putXattrs(state.block, b[32:], 32)
orig := w.block()
if inode.XattrBlock == 0 {
inode.XattrBlock = orig
inode.BlockCount++
} else {
// Reuse the original block.
w.seekBlock(inode.XattrBlock)
defer w.seekBlock(orig)
}
if _, err := w.write(b[:]); err != nil {
return err
}
}
return nil
}
func (w *Writer) write(b []byte) (int, error) {
if w.err != nil {
return 0, w.err
}
if w.pos+int64(len(b)) > w.maxDiskSize {
w.err = exceededMaxSizeError{w.maxDiskSize}
return 0, w.err
}
n, err := w.bw.Write(b)
w.pos += int64(n)
w.err = err
return n, err
}
func (w *Writer) zero(n int64) (int64, error) {
if w.err != nil {
return 0, w.err
}
if w.pos+int64(n) > w.maxDiskSize {
w.err = exceededMaxSizeError{w.maxDiskSize}
return 0, w.err
}
n, err := io.CopyN(w.bw, zero, n)
w.pos += n
w.err = err
return n, err
}
func (w *Writer) makeInode(f *File, node *inode) (*inode, error) {
mode := f.Mode
if mode&format.TypeMask == 0 {
mode |= format.S_IFREG
}
typ := mode & format.TypeMask
ino := format.InodeNumber(len(w.inodes) + 1)
if node == nil {
node = &inode{
Number: ino,
}
if typ == S_IFDIR {
node.Children = make(directory)
node.LinkCount = 1 // A directory is linked to itself.
}
} else if node.Flags&format.InodeFlagExtents != 0 {
// Since we cannot deallocate or reuse blocks, don't allow updates that
// would invalidate data that has already been written.
return nil, errors.New("cannot overwrite file with non-inline data")
}
node.Mode = mode
node.Uid = f.Uid
node.Gid = f.Gid
node.Flags = format.InodeFlagHugeFile
node.Atime = timeToFsTime(f.Atime)
node.Ctime = timeToFsTime(f.Ctime)
node.Mtime = timeToFsTime(f.Mtime)
node.Crtime = timeToFsTime(f.Crtime)
node.Devmajor = f.Devmajor
node.Devminor = f.Devminor
node.Data = nil
node.XattrInline = nil
var xstate xattrState
xstate.init()
var size int64
switch typ {
case format.S_IFREG:
size = f.Size
if f.Size > maxFileSize {
return nil, fmt.Errorf("file too big: %d > %d", f.Size, int64(maxFileSize))
}
if f.Size <= inlineDataSize && w.supportInlineData {
node.Data = make([]byte, f.Size)
extra := 0
if f.Size > inodeDataSize {
extra = int(f.Size - inodeDataSize)
}
// Add a dummy entry for now.
if !xstate.addXattr("system.data", node.Data[:extra]) {
panic("not enough room for inline data")
}
node.Flags |= format.InodeFlagInlineData
}
case format.S_IFLNK:
node.Mode |= 0777 // Symlinks should appear as ugw rwx
size = int64(len(f.Linkname))
if size <= smallSymlinkSize {
// Special case: small symlinks go directly in Block without setting
// an inline data flag.
node.Data = make([]byte, len(f.Linkname))
copy(node.Data, f.Linkname)
}
case format.S_IFDIR, format.S_IFIFO, format.S_IFSOCK, format.S_IFCHR, format.S_IFBLK:
default:
return nil, fmt.Errorf("invalid mode %o", mode)
}
// Accumulate the extended attributes.
if len(f.Xattrs) != 0 {
// Sort the xattrs to avoid non-determinism in map iteration.
var xattrs []string
for name := range f.Xattrs {
xattrs = append(xattrs, name)
}
sort.Strings(xattrs)
for _, name := range xattrs {
if !xstate.addXattr(name, f.Xattrs[name]) {
return nil, fmt.Errorf("could not fit xattr %s", name)
}
}
}
if err := w.writeXattrs(node, &xstate); err != nil {
return nil, err
}
node.Size = size
if typ == format.S_IFLNK && size > smallSymlinkSize {
// Write the link name as data.
w.startInode("", node, size)
if _, err := w.Write([]byte(f.Linkname)); err != nil {
return nil, err
}
if err := w.finishInode(); err != nil {
return nil, err
}
}
if int(node.Number-1) >= len(w.inodes) {
w.inodes = append(w.inodes, node)
}
return node, nil
}
func (w *Writer) root() *inode {
return w.getInode(format.InodeRoot)
}
func (w *Writer) lookup(name string, mustExist bool) (*inode, *inode, string, error) {
root := w.root()
cleanname := path.Clean("/" + name)[1:]
if len(cleanname) == 0 {
return root, root, "", nil
}
dirname, childname := path.Split(cleanname)
if len(childname) == 0 || len(childname) > 0xff {
return nil, nil, "", fmt.Errorf("%s: invalid name", name)
}
dir := w.findPath(root, dirname)
if dir == nil || !dir.IsDir() {
return nil, nil, "", fmt.Errorf("%s: path not found", name)
}
child := dir.Children[childname]
if child == nil && mustExist {
return nil, nil, "", fmt.Errorf("%s: file not found", name)
}
return dir, child, childname, nil
}
// Create adds a file to the file system.
func (w *Writer) Create(name string, f *File) error {
if err := w.finishInode(); err != nil {
return err
}
dir, existing, childname, err := w.lookup(name, false)
if err != nil {
return err
}
var reuse *inode
if existing != nil {
if existing.IsDir() {
if f.Mode&TypeMask != S_IFDIR {
return fmt.Errorf("%s: cannot replace a directory with a file", name)
}
reuse = existing
} else if f.Mode&TypeMask == S_IFDIR {
return fmt.Errorf("%s: cannot replace a file with a directory", name)
} else if existing.LinkCount < 2 {
reuse = existing
}
} else {
if f.Mode&TypeMask == S_IFDIR && dir.LinkCount >= format.MaxLinks {
return fmt.Errorf("%s: exceeded parent directory maximum link count", name)
}
}
child, err := w.makeInode(f, reuse)
if err != nil {
return fmt.Errorf("%s: %s", name, err)
}
if existing != child {
if existing != nil {
existing.LinkCount--
}
dir.Children[childname] = child
child.LinkCount++
if child.IsDir() {
dir.LinkCount++
}
}
if child.Mode&format.TypeMask == format.S_IFREG {
w.startInode(name, child, f.Size)
}
return nil
}
// Link adds a hard link to the file system.
func (w *Writer) Link(oldname, newname string) error {
if err := w.finishInode(); err != nil {
return err
}
newdir, existing, newchildname, err := w.lookup(newname, false)
if err != nil {
return err
}
if existing != nil && (existing.IsDir() || existing.LinkCount < 2) {
return fmt.Errorf("%s: cannot orphan existing file or directory", newname)
}
_, oldfile, _, err := w.lookup(oldname, true)
if err != nil {
return err
}
switch oldfile.Mode & format.TypeMask {
case format.S_IFDIR, format.S_IFLNK:
return fmt.Errorf("%s: link target cannot be a directory or symlink: %s", newname, oldname)
}
if existing != oldfile && oldfile.LinkCount >= format.MaxLinks {
return fmt.Errorf("%s: link target would exceed maximum link count: %s", newname, oldname)
}
if existing != nil {
existing.LinkCount--
}
oldfile.LinkCount++
newdir.Children[newchildname] = oldfile
return nil
}
// Stat returns information about a file that has been written.
func (w *Writer) Stat(name string) (*File, error) {
if err := w.finishInode(); err != nil {
return nil, err
}
_, node, _, err := w.lookup(name, true)
if err != nil {
return nil, err
}
f := &File{
Size: node.Size,
Mode: node.Mode,
Uid: node.Uid,
Gid: node.Gid,
Atime: fsTimeToTime(node.Atime),
Ctime: fsTimeToTime(node.Ctime),
Mtime: fsTimeToTime(node.Mtime),
Crtime: fsTimeToTime(node.Crtime),
Devmajor: node.Devmajor,
Devminor: node.Devminor,
}
f.Xattrs = make(map[string][]byte)
if node.XattrBlock != 0 || len(node.XattrInline) != 0 {
if node.XattrBlock != 0 {
orig := w.block()
w.seekBlock(node.XattrBlock)
if w.err != nil {
return nil, w.err
}
var b [blockSize]byte
_, err := w.f.Read(b[:])
w.seekBlock(orig)
if err != nil {
return nil, err
}
getXattrs(b[32:], f.Xattrs, 32)
}
if len(node.XattrInline) != 0 {
getXattrs(node.XattrInline[4:], f.Xattrs, 0)
delete(f.Xattrs, "system.data")
}
}
if node.FileType() == S_IFLNK {
if node.Size > smallSymlinkSize {
return nil, fmt.Errorf("%s: cannot retrieve link information", name)
}
f.Linkname = string(node.Data)
}
return f, nil
}
func (w *Writer) Write(b []byte) (int, error) {
if len(b) == 0 {
return 0, nil
}
if w.dataWritten+int64(len(b)) > w.dataMax {
return 0, fmt.Errorf("%s: wrote too much: %d > %d", w.curName, w.dataWritten+int64(len(b)), w.dataMax)
}
if w.curInode.Flags&format.InodeFlagInlineData != 0 {
copy(w.curInode.Data[w.dataWritten:], b)
w.dataWritten += int64(len(b))
return len(b), nil
}
n, err := w.write(b)
w.dataWritten += int64(n)
return n, err
}
func (w *Writer) startInode(name string, inode *inode, size int64) {
if w.curInode != nil {
panic("inode already in progress")
}
w.curName = name
w.curInode = inode
w.dataWritten = 0
w.dataMax = size
}
func (w *Writer) block() uint32 {
return uint32(w.pos / blockSize)
}
func (w *Writer) seekBlock(block uint32) {
w.pos = int64(block) * blockSize
if w.err != nil {
return
}
w.err = w.bw.Flush()
if w.err != nil {
return
}
_, w.err = w.f.Seek(w.pos, io.SeekStart)
}
func (w *Writer) nextBlock() {
if w.pos%blockSize != 0 {
// Simplify callers; w.err is updated on failure.
w.zero(blockSize - w.pos%blockSize)
}
}
func fillExtents(hdr *format.ExtentHeader, extents []format.ExtentLeafNode, startBlock, offset, inodeSize uint32) {
*hdr = format.ExtentHeader{
Magic: format.ExtentHeaderMagic,
Entries: uint16(len(extents)),
Max: uint16(cap(extents)),
Depth: 0,
}
for i := range extents {
block := offset + uint32(i)*maxBlocksPerExtent
length := inodeSize - block
if length > maxBlocksPerExtent {
length = maxBlocksPerExtent
}
start := startBlock + block
extents[i] = format.ExtentLeafNode{
Block: block,
Length: uint16(length),
StartLow: start,
}
}
}
func (w *Writer) writeExtents(inode *inode) error {
start := w.pos - w.dataWritten
if start%blockSize != 0 {
panic("unaligned")
}
w.nextBlock()
startBlock := uint32(start / blockSize)
blocks := w.block() - startBlock
usedBlocks := blocks
const extentNodeSize = 12
const extentsPerBlock = blockSize/extentNodeSize - 1
extents := (blocks + maxBlocksPerExtent - 1) / maxBlocksPerExtent
var b bytes.Buffer
if extents == 0 {
// Nothing to do.
} else if extents <= 4 {
var root struct {
hdr format.ExtentHeader
extents [4]format.ExtentLeafNode
}
fillExtents(&root.hdr, root.extents[:extents], startBlock, 0, blocks)
binary.Write(&b, binary.LittleEndian, root)
} else if extents <= 4*extentsPerBlock {
const extentsPerBlock = blockSize/extentNodeSize - 1
extentBlocks := extents/extentsPerBlock + 1
usedBlocks += extentBlocks
var b2 bytes.Buffer
var root struct {
hdr format.ExtentHeader
nodes [4]format.ExtentIndexNode
}
root.hdr = format.ExtentHeader{
Magic: format.ExtentHeaderMagic,
Entries: uint16(extentBlocks),
Max: 4,
Depth: 1,
}
for i := uint32(0); i < extentBlocks; i++ {
root.nodes[i] = format.ExtentIndexNode{
Block: i * extentsPerBlock * maxBlocksPerExtent,
LeafLow: w.block(),
}
extentsInBlock := extents - i*extentBlocks
if extentsInBlock > extentsPerBlock {
extentsInBlock = extentsPerBlock
}
var node struct {
hdr format.ExtentHeader
extents [extentsPerBlock]format.ExtentLeafNode
_ [blockSize - (extentsPerBlock+1)*extentNodeSize]byte
}
offset := i * extentsPerBlock * maxBlocksPerExtent
fillExtents(&node.hdr, node.extents[:extentsInBlock], startBlock+offset, offset, blocks)
binary.Write(&b2, binary.LittleEndian, node)
if _, err := w.write(b2.Next(blockSize)); err != nil {
return err
}
}
binary.Write(&b, binary.LittleEndian, root)
} else {
panic("file too big")
}
inode.Data = b.Bytes()
inode.Flags |= format.InodeFlagExtents
inode.BlockCount += usedBlocks
return w.err
}
func (w *Writer) finishInode() error {
if !w.initialized {
if err := w.init(); err != nil {
return err
}
}
if w.curInode == nil {
return nil
}
if w.dataWritten != w.dataMax {
return fmt.Errorf("did not write the right amount: %d != %d", w.dataWritten, w.dataMax)
}
if w.dataMax != 0 && w.curInode.Flags&format.InodeFlagInlineData == 0 {
if err := w.writeExtents(w.curInode); err != nil {
return err
}
}
w.dataWritten = 0
w.dataMax = 0
w.curInode = nil
return w.err
}
func modeToFileType(mode uint16) format.FileType {
switch mode & format.TypeMask {
default:
return format.FileTypeUnknown
case format.S_IFREG:
return format.FileTypeRegular
case format.S_IFDIR:
return format.FileTypeDirectory
case format.S_IFCHR:
return format.FileTypeCharacter
case format.S_IFBLK:
return format.FileTypeBlock
case format.S_IFIFO:
return format.FileTypeFIFO
case format.S_IFSOCK:
return format.FileTypeSocket
case format.S_IFLNK:
return format.FileTypeSymbolicLink
}
}
type constReader byte
var zero = constReader(0)
func (r constReader) Read(b []byte) (int, error) {
for i := range b {
b[i] = byte(r)
}
return len(b), nil
}
func (w *Writer) writeDirectory(dir, parent *inode) error {
if err := w.finishInode(); err != nil {
return err
}
// The size of the directory is not known yet.
w.startInode("", dir, 0x7fffffffffffffff)
left := blockSize
finishBlock := func() error {
if left > 0 {
e := format.DirectoryEntry{
RecordLength: uint16(left),
}
err := binary.Write(w, binary.LittleEndian, e)
if err != nil {
return err
}
left -= directoryEntrySize
if left < 4 {
panic("not enough space for trailing entry")
}
_, err = io.CopyN(w, zero, int64(left))
if err != nil {
return err
}
}
left = blockSize
return nil
}
writeEntry := func(ino format.InodeNumber, name string) error {
rlb := directoryEntrySize + len(name)
rl := (rlb + 3) & ^3
if left < rl+12 {
if err := finishBlock(); err != nil {
return err
}
}
e := format.DirectoryEntry{
Inode: ino,
RecordLength: uint16(rl),
NameLength: uint8(len(name)),
FileType: modeToFileType(w.getInode(ino).Mode),
}
err := binary.Write(w, binary.LittleEndian, e)
if err != nil {
return err
}
_, err = w.Write([]byte(name))
if err != nil {
return err
}
var zero [4]byte
_, err = w.Write(zero[:rl-rlb])
if err != nil {
return err
}
left -= rl
return nil
}
if err := writeEntry(dir.Number, "."); err != nil {
return err
}
if err := writeEntry(parent.Number, ".."); err != nil {
return err
}
// Follow e2fsck's convention and sort the children by inode number.
var children []string
for name := range dir.Children {
children = append(children, name)
}
sort.Slice(children, func(i, j int) bool {
return dir.Children[children[i]].Number < dir.Children[children[j]].Number
})
for _, name := range children {
child := dir.Children[name]
if err := writeEntry(child.Number, name); err != nil {
return err
}
}
if err := finishBlock(); err != nil {
return err
}
w.curInode.Size = w.dataWritten
w.dataMax = w.dataWritten
return nil
}
func (w *Writer) writeDirectoryRecursive(dir, parent *inode) error {
if err := w.writeDirectory(dir, parent); err != nil {
return err
}
for _, child := range dir.Children {
if child.IsDir() {
if err := w.writeDirectoryRecursive(child, dir); err != nil {
return err
}
}
}
return nil
}
func (w *Writer) writeInodeTable(tableSize uint32) error {
var b bytes.Buffer
for _, inode := range w.inodes {
if inode != nil {
binode := format.Inode{
Mode: inode.Mode,
Uid: uint16(inode.Uid & 0xffff),
Gid: uint16(inode.Gid & 0xffff),
SizeLow: uint32(inode.Size & 0xffffffff),
SizeHigh: uint32(inode.Size >> 32),
LinksCount: uint16(inode.LinkCount),
BlocksLow: inode.BlockCount,
Flags: inode.Flags,
XattrBlockLow: inode.XattrBlock,
UidHigh: uint16(inode.Uid >> 16),
GidHigh: uint16(inode.Gid >> 16),
ExtraIsize: uint16(inodeUsedSize - 128),
Atime: uint32(inode.Atime),
AtimeExtra: uint32(inode.Atime >> 32),
Ctime: uint32(inode.Ctime),
CtimeExtra: uint32(inode.Ctime >> 32),
Mtime: uint32(inode.Mtime),
MtimeExtra: uint32(inode.Mtime >> 32),
Crtime: uint32(inode.Crtime),
CrtimeExtra: uint32(inode.Crtime >> 32),
}
switch inode.Mode & format.TypeMask {
case format.S_IFDIR, format.S_IFREG, format.S_IFLNK:
n := copy(binode.Block[:], inode.Data)
if n < len(inode.Data) {
// Rewrite the first xattr with the data.
xattr := [1]xattr{{
Name: "data",
Index: 7, // "system."
Value: inode.Data[n:],
}}
putXattrs(xattr[:], inode.XattrInline[4:], 0)
}
case format.S_IFBLK, format.S_IFCHR:
dev := inode.Devminor&0xff | inode.Devmajor<<8 | (inode.Devminor&0xffffff00)<<12
binary.LittleEndian.PutUint32(binode.Block[4:], dev)
}
binary.Write(&b, binary.LittleEndian, binode)
b.Truncate(inodeUsedSize)
n, _ := b.Write(inode.XattrInline)
io.CopyN(&b, zero, int64(inodeExtraSize-n))
} else {
io.CopyN(&b, zero, inodeSize)
}
if _, err := w.write(b.Next(inodeSize)); err != nil {
return err
}
}
rest := tableSize - uint32(len(w.inodes)*inodeSize)
if _, err := w.zero(int64(rest)); err != nil {
return err
}
return nil
}
// NewWriter returns a Writer that writes an ext4 file system to the provided
// WriteSeeker.
func NewWriter(f io.ReadWriteSeeker, opts ...Option) *Writer {
w := &Writer{
f: f,
bw: bufio.NewWriterSize(f, 65536*8),
maxDiskSize: defaultMaxDiskSize,
}
for _, opt := range opts {
opt(w)
}
return w
}
// An Option provides extra options to NewWriter.
type Option func(*Writer)
// InlineData instructs the Writer to write small files into the inode
// structures directly. This creates smaller images but currently is not
// compatible with DAX.
func InlineData(w *Writer) {
w.supportInlineData = true
}
// MaximumDiskSize instructs the writer to reserve enough metadata space for the
// specified disk size. If not provided, then 16GB is the default.
func MaximumDiskSize(size int64) Option {
return func(w *Writer) {
if size < 0 || size > maxMaxDiskSize {
w.maxDiskSize = maxMaxDiskSize
} else if size == 0 {
w.maxDiskSize = defaultMaxDiskSize
} else {
w.maxDiskSize = (size + blockSize - 1) &^ (blockSize - 1)
}
}
}
func (w *Writer) init() error {
// Skip the defective block inode.
w.inodes = make([]*inode, 1, 32)
// Create the root directory.
root, _ := w.makeInode(&File{
Mode: format.S_IFDIR | 0755,
}, nil)
root.LinkCount++ // The root is linked to itself.
// Skip until the first non-reserved inode.
w.inodes = append(w.inodes, make([]*inode, inodeFirst-len(w.inodes)-1)...)
maxBlocks := (w.maxDiskSize-1)/blockSize + 1
maxGroups := (maxBlocks-1)/blocksPerGroup + 1
w.gdBlocks = uint32((maxGroups-1)/groupsPerDescriptorBlock + 1)
// Skip past the superblock and block descriptor table.
w.seekBlock(1 + w.gdBlocks)
w.initialized = true
// The lost+found directory is required to exist for e2fsck to pass.
if err := w.Create("lost+found", &File{Mode: format.S_IFDIR | 0700}); err != nil {
return err
}
return w.err
}
func groupCount(blocks uint32, inodes uint32, inodesPerGroup uint32) uint32 {
inodeBlocksPerGroup := inodesPerGroup * inodeSize / blockSize
dataBlocksPerGroup := blocksPerGroup - inodeBlocksPerGroup - 2 // save room for the bitmaps
// Increase the block count to ensure there are enough groups for all the
// inodes.
minBlocks := (inodes-1)/inodesPerGroup*dataBlocksPerGroup + 1
if blocks < minBlocks {
blocks = minBlocks
}
return (blocks + dataBlocksPerGroup - 1) / dataBlocksPerGroup
}
func bestGroupCount(blocks uint32, inodes uint32) (groups uint32, inodesPerGroup uint32) {
groups = 0xffffffff
for ipg := uint32(inodesPerGroupIncrement); ipg <= maxInodesPerGroup; ipg += inodesPerGroupIncrement {
g := groupCount(blocks, inodes, ipg)
if g < groups {
groups = g
inodesPerGroup = ipg
}
}
return
}
func (w *Writer) Close() error {
if err := w.finishInode(); err != nil {
return err
}
root := w.root()
if err := w.writeDirectoryRecursive(root, root); err != nil {
return err
}
// Finish the last inode (probably a directory).
if err := w.finishInode(); err != nil {
return err
}
// Write the inode table
inodeTableOffset := w.block()
groups, inodesPerGroup := bestGroupCount(inodeTableOffset, uint32(len(w.inodes)))
err := w.writeInodeTable(groups * inodesPerGroup * inodeSize)
if err != nil {
return err
}
// Write the bitmaps.
bitmapOffset := w.block()
bitmapSize := groups * 2
validDataSize := bitmapOffset + bitmapSize
diskSize := validDataSize
minSize := (groups-1)*blocksPerGroup + 1
if diskSize < minSize {
diskSize = minSize
}
usedGdBlocks := (groups-1)/groupDescriptorSize + 1
if usedGdBlocks > w.gdBlocks {
return exceededMaxSizeError{w.maxDiskSize}
}
gds := make([]format.GroupDescriptor, w.gdBlocks*groupsPerDescriptorBlock)
inodeTableSizePerGroup := inodesPerGroup * inodeSize / blockSize
var totalUsedBlocks, totalUsedInodes uint32
for g := uint32(0); g < groups; g++ {
var b [blockSize * 2]byte
var dirCount, usedInodeCount, usedBlockCount uint16
// Block bitmap
if (g+1)*blocksPerGroup <= validDataSize {
// This group is fully allocated.
for j := range b[:blockSize] {
b[j] = 0xff
}
usedBlockCount = blocksPerGroup
} else if g*blocksPerGroup < validDataSize {
for j := uint32(0); j < validDataSize-g*blocksPerGroup; j++ {
b[j/8] |= 1 << (j % 8)
usedBlockCount++
}
}
if g == 0 {
// Unused group descriptor blocks should be cleared.
for j := 1 + usedGdBlocks; j < 1+w.gdBlocks; j++ {
b[j/8] &^= 1 << (j % 8)
usedBlockCount--
}
}
if g == groups-1 && diskSize%blocksPerGroup != 0 {
// Blocks that aren't present in the disk should be marked as
// allocated.
for j := diskSize % blocksPerGroup; j < blocksPerGroup; j++ {
b[j/8] |= 1 << (j % 8)
usedBlockCount++
}
}
// Inode bitmap
for j := uint32(0); j < inodesPerGroup; j++ {
ino := format.InodeNumber(1 + g*inodesPerGroup + j)
inode := w.getInode(ino)
if ino < inodeFirst || inode != nil {
b[blockSize+j/8] |= 1 << (j % 8)
usedInodeCount++
}
if inode != nil && inode.Mode&format.TypeMask == format.S_IFDIR {
dirCount++
}
}
_, err := w.write(b[:])
if err != nil {
return err
}
gds[g] = format.GroupDescriptor{
BlockBitmapLow: bitmapOffset + 2*g,
InodeBitmapLow: bitmapOffset + 2*g + 1,
InodeTableLow: inodeTableOffset + g*inodeTableSizePerGroup,
UsedDirsCountLow: dirCount,
FreeInodesCountLow: uint16(inodesPerGroup) - usedInodeCount,
FreeBlocksCountLow: blocksPerGroup - usedBlockCount,
}
totalUsedBlocks += uint32(usedBlockCount)
totalUsedInodes += uint32(usedInodeCount)
}
// Zero up to the disk size.
_, err = w.zero(int64(diskSize-bitmapOffset-bitmapSize) * blockSize)
if err != nil {
return err
}
// Write the block descriptors
w.seekBlock(1)
if w.err != nil {
return w.err
}
err = binary.Write(w.bw, binary.LittleEndian, gds)
if err != nil {
return err
}
// Write the super block
var blk [blockSize]byte
b := bytes.NewBuffer(blk[:1024])
sb := &format.SuperBlock{
InodesCount: inodesPerGroup * groups,
BlocksCountLow: diskSize,
FreeBlocksCountLow: blocksPerGroup*groups - totalUsedBlocks,
FreeInodesCount: inodesPerGroup*groups - totalUsedInodes,
FirstDataBlock: 0,
LogBlockSize: 2, // 2^(10 + 2)
LogClusterSize: 2,
BlocksPerGroup: blocksPerGroup,
ClustersPerGroup: blocksPerGroup,
InodesPerGroup: inodesPerGroup,
Magic: format.SuperBlockMagic,
State: 1, // cleanly unmounted
Errors: 1, // continue on error?
CreatorOS: 0, // Linux
RevisionLevel: 1, // dynamic inode sizes
FirstInode: inodeFirst,
LpfInode: inodeLostAndFound,
InodeSize: inodeSize,
FeatureCompat: format.CompatSparseSuper2 | format.CompatExtAttr,
FeatureIncompat: format.IncompatFiletype | format.IncompatExtents | format.IncompatFlexBg,
FeatureRoCompat: format.RoCompatLargeFile | format.RoCompatHugeFile | format.RoCompatExtraIsize | format.RoCompatReadonly,
MinExtraIsize: extraIsize,
WantExtraIsize: extraIsize,
LogGroupsPerFlex: 31,
}
if w.supportInlineData {
sb.FeatureIncompat |= format.IncompatInlineData
}
binary.Write(b, binary.LittleEndian, sb)
w.seekBlock(0)
if _, err := w.write(blk[:]); err != nil {
return err
}
w.seekBlock(diskSize)
return w.err
}