1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/daemon/graphdriver/copy/copy.go
Sargun Dhillon 3ec4ec2857 Add zero-copy support to copy module
This changeset allows Docker's VFS, and Overlay to take advantage of
Linux's zerocopy APIs.

The copy function first tries to use the ficlone ioctl. Reason being:
 - they do not allow partial success (aka short writes)
 - clones are expected to be a fast metadata operation
See: http://oss.sgi.com/archives/xfs/2015-12/msg00356.html

If the clone fails, we fall back to copy_file_range, which internally
may fall back to splice, which has an upper limit on the size
of copy it can perform. Given that, we have to loop until the copy
is done.

For a given dirCopy operation, if the clone fails, we will not try
it again during any other file copy. Same is true with copy_file_range.

If all else fails, we fall back to traditional copy.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
2017-10-24 13:14:40 -07:00

234 lines
5.5 KiB
Go

// +build linux
package copy
/*
#include <linux/fs.h>
#ifndef FICLONE
#define FICLONE _IOW(0x94, 9, int)
#endif
*/
import "C"
import (
"fmt"
"io"
"os"
"path/filepath"
"syscall"
"time"
"github.com/docker/docker/pkg/pools"
"github.com/docker/docker/pkg/system"
rsystem "github.com/opencontainers/runc/libcontainer/system"
"golang.org/x/sys/unix"
)
// Mode indicates whether to use hardlink or copy content
type Mode int
const (
// Content creates a new file, and copies the content of the file
Content Mode = iota
// Hardlink creates a new hardlink to the existing file
Hardlink
)
func copyRegular(srcPath, dstPath string, fileinfo os.FileInfo, copyWithFileRange, copyWithFileClone *bool) error {
srcFile, err := os.Open(srcPath)
if err != nil {
return err
}
defer srcFile.Close()
// If the destination file already exists, we shouldn't blow it away
dstFile, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, fileinfo.Mode())
if err != nil {
return err
}
defer dstFile.Close()
if *copyWithFileClone {
_, _, err = unix.Syscall(unix.SYS_IOCTL, dstFile.Fd(), C.FICLONE, srcFile.Fd())
if err == nil {
return nil
}
*copyWithFileClone = false
if err == unix.EXDEV {
*copyWithFileRange = false
}
}
if *copyWithFileRange {
err = doCopyWithFileRange(srcFile, dstFile, fileinfo)
// Trying the file_clone may not have caught the exdev case
// as the ioctl may not have been available (therefore EINVAL)
if err == unix.EXDEV || err == unix.ENOSYS {
*copyWithFileRange = false
} else if err != nil {
return err
}
}
return legacyCopy(srcFile, dstFile)
}
func doCopyWithFileRange(srcFile, dstFile *os.File, fileinfo os.FileInfo) error {
amountLeftToCopy := fileinfo.Size()
for amountLeftToCopy > 0 {
n, err := unix.CopyFileRange(int(srcFile.Fd()), nil, int(dstFile.Fd()), nil, int(amountLeftToCopy), 0)
if err != nil {
return err
}
amountLeftToCopy = amountLeftToCopy - int64(n)
}
return nil
}
func legacyCopy(srcFile io.Reader, dstFile io.Writer) error {
_, err := pools.Copy(dstFile, srcFile)
return err
}
func copyXattr(srcPath, dstPath, attr string) error {
data, err := system.Lgetxattr(srcPath, attr)
if err != nil {
return err
}
if data != nil {
if err := system.Lsetxattr(dstPath, attr, data, 0); err != nil {
return err
}
}
return nil
}
// DirCopy copies or hardlinks the contents of one directory to another,
// properly handling xattrs, and soft links
func DirCopy(srcDir, dstDir string, copyMode Mode) error {
copyWithFileRange := true
copyWithFileClone := true
err := filepath.Walk(srcDir, func(srcPath string, f os.FileInfo, err error) error {
if err != nil {
return err
}
// Rebase path
relPath, err := filepath.Rel(srcDir, srcPath)
if err != nil {
return err
}
dstPath := filepath.Join(dstDir, relPath)
if err != nil {
return err
}
stat, ok := f.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("Unable to get raw syscall.Stat_t data for %s", srcPath)
}
isHardlink := false
switch f.Mode() & os.ModeType {
case 0: // Regular file
if copyMode == Hardlink {
isHardlink = true
if err2 := os.Link(srcPath, dstPath); err2 != nil {
return err2
}
} else {
if err2 := copyRegular(srcPath, dstPath, f, &copyWithFileRange, &copyWithFileClone); err2 != nil {
return err2
}
}
case os.ModeDir:
if err := os.Mkdir(dstPath, f.Mode()); err != nil && !os.IsExist(err) {
return err
}
case os.ModeSymlink:
link, err := os.Readlink(srcPath)
if err != nil {
return err
}
if err := os.Symlink(link, dstPath); err != nil {
return err
}
case os.ModeNamedPipe:
fallthrough
case os.ModeSocket:
if rsystem.RunningInUserNS() {
// cannot create a device if running in user namespace
return nil
}
if err := unix.Mkfifo(dstPath, stat.Mode); err != nil {
return err
}
case os.ModeDevice:
if err := unix.Mknod(dstPath, stat.Mode, int(stat.Rdev)); err != nil {
return err
}
default:
return fmt.Errorf("unknown file type for %s", srcPath)
}
// Everything below is copying metadata from src to dst. All this metadata
// already shares an inode for hardlinks.
if isHardlink {
return nil
}
if err := os.Lchown(dstPath, int(stat.Uid), int(stat.Gid)); err != nil {
return err
}
if err := copyXattr(srcPath, dstPath, "security.capability"); err != nil {
return err
}
// We need to copy this attribute if it appears in an overlay upper layer, as
// this function is used to copy those. It is set by overlay if a directory
// is removed and then re-created and should not inherit anything from the
// same dir in the lower dir.
if err := copyXattr(srcPath, dstPath, "trusted.overlay.opaque"); err != nil {
return err
}
isSymlink := f.Mode()&os.ModeSymlink != 0
// There is no LChmod, so ignore mode for symlink. Also, this
// must happen after chown, as that can modify the file mode
if !isSymlink {
if err := os.Chmod(dstPath, f.Mode()); err != nil {
return err
}
}
// system.Chtimes doesn't support a NOFOLLOW flag atm
// nolint: unconvert
if !isSymlink {
aTime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec))
mTime := time.Unix(int64(stat.Mtim.Sec), int64(stat.Mtim.Nsec))
if err := system.Chtimes(dstPath, aTime, mTime); err != nil {
return err
}
} else {
ts := []syscall.Timespec{stat.Atim, stat.Mtim}
if err := system.LUtimesNano(dstPath, ts); err != nil {
return err
}
}
return nil
})
return err
}