1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Set socket timeout on netlink sockets

In case the file descriptor of the netlink socket is closed
the recvfrom is not returning. This may create deadlock conditions.
The current solution is to make sure that all the netlink socket used
have a proper timeout set on them to have the possibility to return

Added test to emulate the watchMiss condition

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
This commit is contained in:
Flavio Crisciani 2017-10-10 11:36:16 -07:00
parent a41f623b10
commit 6736b223ec
4 changed files with 70 additions and 4 deletions

View file

@ -696,6 +696,12 @@ func (n *network) initSandbox(restore bool) error {
var nlSock *nl.NetlinkSocket
sbox.InvokeFunc(func() {
nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
if err != nil {
return
}
// set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
tv := syscall.NsecToTimeval(soTimeout.Nanoseconds())
err = nlSock.SetReceiveTimeout(&tv)
})
n.setNetlinkSocket(nlSock)
@ -721,6 +727,11 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
// The netlink socket got closed, simply exit to not leak this goroutine
return
}
// When the receive timeout expires the receive will return EAGAIN
if err == syscall.EAGAIN {
// we continue here to avoid spam for timeouts
continue
}
logrus.Errorf("Failed to receive from netlink: %v ", err)
continue
}

View file

@ -1,7 +1,9 @@
package overlay
import (
"context"
"net"
"syscall"
"testing"
"time"
@ -12,6 +14,7 @@ import (
"github.com/docker/libnetwork/driverapi"
"github.com/docker/libnetwork/netlabel"
_ "github.com/docker/libnetwork/testutils"
"github.com/vishvananda/netlink/nl"
)
func init() {
@ -135,3 +138,36 @@ func TestOverlayType(t *testing.T) {
dt.d.Type())
}
}
// Test that the netlink socket close unblock the watchMiss to avoid deadlock
func TestNetlinkSocket(t *testing.T) {
// This is the same code used by the overlay driver to create the netlink interface
// for the watch miss
nlSock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
if err != nil {
t.Fatal()
}
// set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
tv := syscall.NsecToTimeval(soTimeout.Nanoseconds())
err = nlSock.SetReceiveTimeout(&tv)
if err != nil {
t.Fatal()
}
n := &network{id: "testnetid"}
ch := make(chan error)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
go func() {
n.watchMiss(nlSock)
ch <- nil
}()
time.Sleep(5 * time.Second)
nlSock.Close()
select {
case <-ch:
case <-ctx.Done():
{
t.Fatalf("Timeout expired")
}
}
}

View file

@ -5,12 +5,19 @@ package ipvs
import (
"net"
"syscall"
"time"
"fmt"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
)
const (
netlinkRecvSocketsTimeout = 3 * time.Second
netlinkSendSocketTimeout = 30 * time.Second
)
// Service defines an IPVS service in its entirety.
type Service struct {
// Virtual service address.
@ -82,6 +89,15 @@ func New(path string) (*Handle, error) {
if err != nil {
return nil, err
}
// Add operation timeout to avoid deadlocks
tv := syscall.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds())
if err := sock.SetSendTimeout(&tv); err != nil {
return nil, err
}
tv = syscall.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds())
if err := sock.SetReceiveTimeout(&tv); err != nil {
return nil, err
}
return &Handle{sock: sock}, nil
}

View file

@ -203,10 +203,6 @@ func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest {
}
func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) {
var (
err error
)
if err := s.Send(req); err != nil {
return nil, err
}
@ -222,6 +218,13 @@ done:
for {
msgs, err := s.Receive()
if err != nil {
if s.GetFd() == -1 {
return nil, fmt.Errorf("Socket got closed on receive")
}
if err == syscall.EAGAIN {
// timeout fired
continue
}
return nil, err
}
for _, m := range msgs {