1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/daemon/logger/loggerutils/logfile_test.go

370 lines
9.4 KiB
Go
Raw Normal View History

package loggerutils // import "github.com/docker/docker/daemon/logger/loggerutils"
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
"sync/atomic"
"testing"
"text/tabwriter"
"time"
"github.com/docker/docker/daemon/logger"
"github.com/docker/docker/pkg/pubsub"
"github.com/docker/docker/pkg/tailfile"
"gotest.tools/v3/assert"
"gotest.tools/v3/poll"
)
type testDecoder struct {
rdr io.Reader
scanner *bufio.Scanner
}
func (d *testDecoder) Decode() (*logger.Message, error) {
if d.scanner == nil {
d.scanner = bufio.NewScanner(d.rdr)
}
if !d.scanner.Scan() {
return nil, d.scanner.Err()
}
// some comment
return &logger.Message{Line: d.scanner.Bytes(), Timestamp: time.Now()}, nil
}
func (d *testDecoder) Reset(rdr io.Reader) {
d.rdr = rdr
d.scanner = bufio.NewScanner(rdr)
}
func (d *testDecoder) Close() {
d.rdr = nil
d.scanner = nil
}
func TestTailFiles(t *testing.T) {
s1 := strings.NewReader("Hello.\nMy name is Inigo Montoya.\n")
s2 := strings.NewReader("I'm serious.\nDon't call me Shirley!\n")
s3 := strings.NewReader("Roads?\nWhere we're going we don't need roads.\n")
files := []SizeReaderAt{s1, s2, s3}
watcher := logger.NewLogWatcher()
defer watcher.ConsumerGone()
tailReader := func(ctx context.Context, r SizeReaderAt, lines int) (io.Reader, int, error) {
return tailfile.NewTailReader(ctx, r, lines)
}
dec := &testDecoder{}
for desc, config := range map[string]logger.ReadConfig{} {
t.Run(desc, func(t *testing.T) {
started := make(chan struct{})
go func() {
close(started)
tailFiles(files, watcher, dec, tailReader, config, make(chan interface{}))
}()
<-started
})
}
config := logger.ReadConfig{Tail: 2}
started := make(chan struct{})
go func() {
close(started)
tailFiles(files, watcher, dec, tailReader, config, make(chan interface{}))
}()
<-started
select {
case <-time.After(60 * time.Second):
t.Fatal("timeout waiting for tail line")
case err := <-watcher.Err:
assert.NilError(t, err)
case msg := <-watcher.Msg:
assert.Assert(t, msg != nil)
assert.Assert(t, string(msg.Line) == "Roads?", string(msg.Line))
}
select {
case <-time.After(60 * time.Second):
t.Fatal("timeout waiting for tail line")
case err := <-watcher.Err:
assert.NilError(t, err)
case msg := <-watcher.Msg:
assert.Assert(t, msg != nil)
assert.Assert(t, string(msg.Line) == "Where we're going we don't need roads.", string(msg.Line))
}
}
type dummyDecoder struct{}
func (dummyDecoder) Decode() (*logger.Message, error) {
return &logger.Message{}, nil
}
func (dummyDecoder) Close() {}
func (dummyDecoder) Reset(io.Reader) {}
daemon.ContainerLogs(): fix resource leak on follow When daemon.ContainerLogs() is called with options.follow=true (as in "docker logs --follow"), the "loggerutils.followLogs()" function never returns (even then the logs consumer is gone). As a result, all the resources associated with it (including an opened file descriptor for the log file being read, two FDs for a pipe, and two FDs for inotify watch) are never released. If this is repeated (such as by running "docker logs --follow" and pressing Ctrl-C a few times), this results in DoS caused by either hitting the limit of inotify watches, or the limit of opened files. The only cure is daemon restart. Apparently, what happens is: 1. logs producer (a container) is gone, calling (*LogWatcher).Close() for all its readers (daemon/logger/jsonfilelog/jsonfilelog.go:175). 2. WatchClose() is properly handled by a dedicated goroutine in followLogs(), cancelling the context. 3. Upon receiving the ctx.Done(), the code in followLogs() (daemon/logger/loggerutils/logfile.go#L626-L638) keeps to send messages _synchronously_ (which is OK for now). 4. Logs consumer is gone (Ctrl-C is pressed on a terminal running "docker logs --follow"). Method (*LogWatcher).Close() is properly called (see daemon/logs.go:114). Since it was called before and due to to once.Do(), nothing happens (which is kinda good, as otherwise it will panic on closing a closed channel). 5. A goroutine (see item 3 above) keeps sending log messages synchronously to the logWatcher.Msg channel. Since the channel reader is gone, the channel send operation blocks forever, and resource cleanup set up in defer statements at the beginning of followLogs() never happens. Alas, the fix is somewhat complicated: 1. Distinguish between close from logs producer and logs consumer. To that effect, - yet another channel is added to LogWatcher(); - {Watch,}Close() are renamed to {Watch,}ProducerGone(); - {Watch,}ConsumerGone() are added; *NOTE* that ProducerGone()/WatchProducerGone() pair is ONLY needed in order to stop ConsumerLogs(follow=true) when a container is stopped; otherwise we're not interested in it. In other words, we're only using it in followLogs(). 2. Code that was doing (logWatcher*).Close() is modified to either call ProducerGone() or ConsumerGone(), depending on the context. 3. Code that was waiting for WatchClose() is modified to wait for either ConsumerGone() or ProducerGone(), or both, depending on the context. 4. followLogs() are modified accordingly: - context cancellation is happening on WatchProducerGone(), and once it's received the FileWatcher is closed and waitRead() returns errDone on EOF (i.e. log rotation handling logic is disabled); - due to this, code that was writing synchronously to logWatcher.Msg can be and is removed as the code above it handles this case; - function returns once ConsumerGone is received, freeing all the resources -- this is the bugfix itself. While at it, 1. Let's also remove the ctx usage to simplify the code a bit. It was introduced by commit a69a59ffc7e3d ("Decouple removing the fileWatcher from reading") in order to fix a bug. The bug was actually a deadlock in fsnotify, and the fix was just a workaround. Since then the fsnofify bug has been fixed, and a new fsnotify was vendored in. For more details, please see https://github.com/moby/moby/pull/27782#issuecomment-416794490 2. Since `(*filePoller).Close()` is fixed to remove all the files being watched, there is no need to explicitly call fileWatcher.Remove(name) anymore, so get rid of the extra code. Should fix https://github.com/moby/moby/issues/37391 Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2018-08-01 00:03:55 -04:00
func TestFollowLogsConsumerGone(t *testing.T) {
lw := logger.NewLogWatcher()
f, err := ioutil.TempFile("", t.Name())
assert.NilError(t, err)
defer func() {
f.Close()
os.Remove(f.Name())
}()
dec := dummyDecoder{}
followLogsDone := make(chan struct{})
var since, until time.Time
go func() {
followLogs(f, lw, make(chan interface{}), make(chan interface{}), dec, since, until)
close(followLogsDone)
}()
select {
case <-lw.Msg:
case err := <-lw.Err:
assert.NilError(t, err)
case <-followLogsDone:
t.Fatal("follow logs finished unexpectedly")
case <-time.After(10 * time.Second):
t.Fatal("timeout waiting for log message")
}
daemon.ContainerLogs(): fix resource leak on follow When daemon.ContainerLogs() is called with options.follow=true (as in "docker logs --follow"), the "loggerutils.followLogs()" function never returns (even then the logs consumer is gone). As a result, all the resources associated with it (including an opened file descriptor for the log file being read, two FDs for a pipe, and two FDs for inotify watch) are never released. If this is repeated (such as by running "docker logs --follow" and pressing Ctrl-C a few times), this results in DoS caused by either hitting the limit of inotify watches, or the limit of opened files. The only cure is daemon restart. Apparently, what happens is: 1. logs producer (a container) is gone, calling (*LogWatcher).Close() for all its readers (daemon/logger/jsonfilelog/jsonfilelog.go:175). 2. WatchClose() is properly handled by a dedicated goroutine in followLogs(), cancelling the context. 3. Upon receiving the ctx.Done(), the code in followLogs() (daemon/logger/loggerutils/logfile.go#L626-L638) keeps to send messages _synchronously_ (which is OK for now). 4. Logs consumer is gone (Ctrl-C is pressed on a terminal running "docker logs --follow"). Method (*LogWatcher).Close() is properly called (see daemon/logs.go:114). Since it was called before and due to to once.Do(), nothing happens (which is kinda good, as otherwise it will panic on closing a closed channel). 5. A goroutine (see item 3 above) keeps sending log messages synchronously to the logWatcher.Msg channel. Since the channel reader is gone, the channel send operation blocks forever, and resource cleanup set up in defer statements at the beginning of followLogs() never happens. Alas, the fix is somewhat complicated: 1. Distinguish between close from logs producer and logs consumer. To that effect, - yet another channel is added to LogWatcher(); - {Watch,}Close() are renamed to {Watch,}ProducerGone(); - {Watch,}ConsumerGone() are added; *NOTE* that ProducerGone()/WatchProducerGone() pair is ONLY needed in order to stop ConsumerLogs(follow=true) when a container is stopped; otherwise we're not interested in it. In other words, we're only using it in followLogs(). 2. Code that was doing (logWatcher*).Close() is modified to either call ProducerGone() or ConsumerGone(), depending on the context. 3. Code that was waiting for WatchClose() is modified to wait for either ConsumerGone() or ProducerGone(), or both, depending on the context. 4. followLogs() are modified accordingly: - context cancellation is happening on WatchProducerGone(), and once it's received the FileWatcher is closed and waitRead() returns errDone on EOF (i.e. log rotation handling logic is disabled); - due to this, code that was writing synchronously to logWatcher.Msg can be and is removed as the code above it handles this case; - function returns once ConsumerGone is received, freeing all the resources -- this is the bugfix itself. While at it, 1. Let's also remove the ctx usage to simplify the code a bit. It was introduced by commit a69a59ffc7e3d ("Decouple removing the fileWatcher from reading") in order to fix a bug. The bug was actually a deadlock in fsnotify, and the fix was just a workaround. Since then the fsnofify bug has been fixed, and a new fsnotify was vendored in. For more details, please see https://github.com/moby/moby/pull/27782#issuecomment-416794490 2. Since `(*filePoller).Close()` is fixed to remove all the files being watched, there is no need to explicitly call fileWatcher.Remove(name) anymore, so get rid of the extra code. Should fix https://github.com/moby/moby/issues/37391 Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2018-08-01 00:03:55 -04:00
lw.ConsumerGone()
select {
case <-followLogsDone:
case <-time.After(20 * time.Second):
t.Fatal("timeout waiting for followLogs() to finish")
}
}
type dummyWrapper struct {
dummyDecoder
fn func() error
}
func (d *dummyWrapper) Decode() (*logger.Message, error) {
if err := d.fn(); err != nil {
return nil, err
}
return d.dummyDecoder.Decode()
}
func TestFollowLogsProducerGone(t *testing.T) {
lw := logger.NewLogWatcher()
defer lw.ConsumerGone()
f, err := ioutil.TempFile("", t.Name())
assert.NilError(t, err)
defer os.Remove(f.Name())
var sent, received, closed int32
dec := &dummyWrapper{fn: func() error {
switch atomic.LoadInt32(&closed) {
case 0:
atomic.AddInt32(&sent, 1)
return nil
case 1:
atomic.AddInt32(&closed, 1)
t.Logf("logDecode() closed after sending %d messages\n", sent)
return io.EOF
default:
t.Fatal("logDecode() called after closing!")
return io.EOF
}
}}
var since, until time.Time
followLogsDone := make(chan struct{})
go func() {
followLogs(f, lw, make(chan interface{}), make(chan interface{}), dec, since, until)
close(followLogsDone)
}()
// read 1 message
select {
case <-lw.Msg:
received++
case err := <-lw.Err:
assert.NilError(t, err)
case <-followLogsDone:
t.Fatal("followLogs() finished unexpectedly")
case <-time.After(10 * time.Second):
t.Fatal("timeout waiting for log message")
}
// "stop" the "container"
atomic.StoreInt32(&closed, 1)
lw.ProducerGone()
// should receive all the messages sent
readDone := make(chan struct{})
go func() {
defer close(readDone)
for {
select {
case <-lw.Msg:
received++
if received == atomic.LoadInt32(&sent) {
return
}
case err := <-lw.Err:
assert.NilError(t, err)
}
}
}()
select {
case <-readDone:
case <-time.After(30 * time.Second):
t.Fatalf("timeout waiting for log messages to be read (sent: %d, received: %d", sent, received)
}
t.Logf("messages sent: %d, received: %d", atomic.LoadInt32(&sent), received)
// followLogs() should be done by now
select {
case <-followLogsDone:
case <-time.After(30 * time.Second):
t.Fatal("timeout waiting for followLogs() to finish")
}
select {
case <-lw.WatchConsumerGone():
t.Fatal("consumer should not have exited")
default:
}
}
func TestCheckCapacityAndRotate(t *testing.T) {
dir, err := ioutil.TempDir("", t.Name())
assert.NilError(t, err)
defer os.RemoveAll(dir)
f, err := ioutil.TempFile(dir, "log")
assert.NilError(t, err)
l := &LogFile{
f: f,
capacity: 5,
maxFiles: 3,
compress: true,
notifyReaders: pubsub.NewPublisher(0, 1),
perms: 0600,
filesRefCounter: refCounter{counter: make(map[string]int)},
getTailReader: func(ctx context.Context, r SizeReaderAt, lines int) (io.Reader, int, error) {
return tailfile.NewTailReader(ctx, r, lines)
},
createDecoder: func(io.Reader) Decoder {
return dummyDecoder{}
},
marshal: func(msg *logger.Message) ([]byte, error) {
return msg.Line, nil
},
}
defer l.Close()
ls := dirStringer{dir}
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")}))
_, err = os.Stat(f.Name() + ".1")
assert.Assert(t, os.IsNotExist(err), ls)
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")}))
poll.WaitOn(t, checkFileExists(f.Name()+".1.gz"), poll.WithDelay(time.Millisecond), poll.WithTimeout(30*time.Second))
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")}))
poll.WaitOn(t, checkFileExists(f.Name()+".1.gz"), poll.WithDelay(time.Millisecond), poll.WithTimeout(30*time.Second))
poll.WaitOn(t, checkFileExists(f.Name()+".2.gz"), poll.WithDelay(time.Millisecond), poll.WithTimeout(30*time.Second))
t.Run("closed log file", func(t *testing.T) {
// Now let's simulate a failed rotation where the file was able to be closed but something else happened elsewhere
// down the line.
// We want to make sure that we can recover in the case that `l.f` was closed while attempting a rotation.
l.f.Close()
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")}))
assert.NilError(t, os.Remove(f.Name()+".2.gz"))
})
t.Run("with log reader", func(t *testing.T) {
// Make sure rotate works with an active reader
lw := logger.NewLogWatcher()
defer lw.ConsumerGone()
go l.ReadLogs(logger.ReadConfig{Follow: true, Tail: 1000}, lw)
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world 0!")}), ls)
// make sure the log reader is primed
waitForMsg(t, lw, 30*time.Second)
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world 1!")}), ls)
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world 2!")}), ls)
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world 3!")}), ls)
assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world 4!")}), ls)
poll.WaitOn(t, checkFileExists(f.Name()+".2.gz"), poll.WithDelay(time.Millisecond), poll.WithTimeout(30*time.Second))
})
}
func waitForMsg(t *testing.T, lw *logger.LogWatcher, timeout time.Duration) {
t.Helper()
timer := time.NewTimer(timeout)
defer timer.Stop()
select {
case <-lw.Msg:
case <-lw.WatchProducerGone():
t.Fatal("log producer gone before log message arrived")
case err := <-lw.Err:
assert.NilError(t, err)
case <-timer.C:
t.Fatal("timeout waiting for log message")
}
}
type dirStringer struct {
d string
}
func (d dirStringer) String() string {
ls, err := ioutil.ReadDir(d.d)
if err != nil {
return ""
}
buf := bytes.NewBuffer(nil)
tw := tabwriter.NewWriter(buf, 1, 8, 1, '\t', 0)
buf.WriteString("\n")
btw := bufio.NewWriter(tw)
for _, fi := range ls {
btw.WriteString(fmt.Sprintf("%s\t%s\t%dB\t%s\n", fi.Name(), fi.Mode(), fi.Size(), fi.ModTime()))
}
btw.Flush()
tw.Flush()
return buf.String()
}
func checkFileExists(name string) poll.Check {
return func(t poll.LogT) poll.Result {
_, err := os.Stat(name)
switch {
case err == nil:
return poll.Success()
case os.IsNotExist(err):
return poll.Continue("waiting for %s to exist", name)
default:
t.Logf("waiting for %s: %v: %s", name, err, dirStringer{filepath.Dir(name)})
return poll.Error(err)
}
}
}