2016-06-07 14:28:28 -07:00
package scheduler
2016-10-26 06:35:48 -07:00
import (
"time"
"github.com/docker/swarmkit/api"
"github.com/docker/swarmkit/log"
"golang.org/x/net/context"
)
2016-06-07 14:28:28 -07:00
// NodeInfo contains a node and some additional metadata.
type NodeInfo struct {
* api . Node
2016-09-13 09:28:01 -07:00
Tasks map [ string ] * api . Task
DesiredRunningTasksCount int
DesiredRunningTasksCountByService map [ string ] int
AvailableResources api . Resources
2016-10-26 06:35:48 -07:00
// recentFailures is a map from service ID to the timestamps of the
// most recent failures the node has experienced from replicas of that
// service.
// TODO(aaronl): When spec versioning is supported, this should track
// the version of the spec that failed.
recentFailures map [ string ] [ ] time . Time
2016-06-07 14:28:28 -07:00
}
2016-06-15 22:41:30 -07:00
func newNodeInfo ( n * api . Node , tasks map [ string ] * api . Task , availableResources api . Resources ) NodeInfo {
2016-06-07 14:28:28 -07:00
nodeInfo := NodeInfo {
2016-09-13 09:28:01 -07:00
Node : n ,
Tasks : make ( map [ string ] * api . Task ) ,
DesiredRunningTasksCountByService : make ( map [ string ] int ) ,
AvailableResources : availableResources ,
2016-10-26 06:35:48 -07:00
recentFailures : make ( map [ string ] [ ] time . Time ) ,
2016-06-07 14:28:28 -07:00
}
for _ , t := range tasks {
nodeInfo . addTask ( t )
}
return nodeInfo
}
2016-09-13 09:28:01 -07:00
// addTask removes a task from nodeInfo if it's tracked there, and returns true
// if nodeInfo was modified.
2016-06-07 14:28:28 -07:00
func ( nodeInfo * NodeInfo ) removeTask ( t * api . Task ) bool {
2016-09-13 09:28:01 -07:00
oldTask , ok := nodeInfo . Tasks [ t . ID ]
if ! ok {
2016-06-07 14:28:28 -07:00
return false
}
delete ( nodeInfo . Tasks , t . ID )
2016-09-13 09:28:01 -07:00
if oldTask . DesiredState == api . TaskStateRunning {
nodeInfo . DesiredRunningTasksCount --
nodeInfo . DesiredRunningTasksCountByService [ t . ServiceID ] --
}
2016-06-15 22:41:30 -07:00
reservations := taskReservations ( t . Spec )
nodeInfo . AvailableResources . MemoryBytes += reservations . MemoryBytes
nodeInfo . AvailableResources . NanoCPUs += reservations . NanoCPUs
2016-06-07 14:28:28 -07:00
return true
}
2016-09-13 09:28:01 -07:00
// addTask adds or updates a task on nodeInfo, and returns true if nodeInfo was
// modified.
2016-06-07 14:28:28 -07:00
func ( nodeInfo * NodeInfo ) addTask ( t * api . Task ) bool {
2016-09-13 09:28:01 -07:00
oldTask , ok := nodeInfo . Tasks [ t . ID ]
if ok {
if t . DesiredState == api . TaskStateRunning && oldTask . DesiredState != api . TaskStateRunning {
nodeInfo . Tasks [ t . ID ] = t
nodeInfo . DesiredRunningTasksCount ++
nodeInfo . DesiredRunningTasksCountByService [ t . ServiceID ] ++
return true
} else if t . DesiredState != api . TaskStateRunning && oldTask . DesiredState == api . TaskStateRunning {
nodeInfo . Tasks [ t . ID ] = t
nodeInfo . DesiredRunningTasksCount --
nodeInfo . DesiredRunningTasksCountByService [ t . ServiceID ] --
return true
}
return false
}
nodeInfo . Tasks [ t . ID ] = t
reservations := taskReservations ( t . Spec )
nodeInfo . AvailableResources . MemoryBytes -= reservations . MemoryBytes
nodeInfo . AvailableResources . NanoCPUs -= reservations . NanoCPUs
if t . DesiredState == api . TaskStateRunning {
nodeInfo . DesiredRunningTasksCount ++
nodeInfo . DesiredRunningTasksCountByService [ t . ServiceID ] ++
2016-06-07 14:28:28 -07:00
}
2016-09-13 09:28:01 -07:00
return true
2016-06-07 14:28:28 -07:00
}
func taskReservations ( spec api . TaskSpec ) ( reservations api . Resources ) {
if spec . Resources != nil && spec . Resources . Reservations != nil {
reservations = * spec . Resources . Reservations
}
return
}
2016-10-26 06:35:48 -07:00
// taskFailed records a task failure from a given service.
func ( nodeInfo * NodeInfo ) taskFailed ( ctx context . Context , serviceID string ) {
expired := 0
now := time . Now ( )
for _ , timestamp := range nodeInfo . recentFailures [ serviceID ] {
if now . Sub ( timestamp ) < monitorFailures {
break
}
expired ++
}
if len ( nodeInfo . recentFailures [ serviceID ] ) - expired == maxFailures - 1 {
log . G ( ctx ) . Warnf ( "underweighting node %s for service %s because it experienced %d failures or rejections within %s" , nodeInfo . ID , serviceID , maxFailures , monitorFailures . String ( ) )
}
nodeInfo . recentFailures [ serviceID ] = append ( nodeInfo . recentFailures [ serviceID ] [ expired : ] , now )
}
// countRecentFailures returns the number of times the service has failed on
// this node within the lookback window monitorFailures.
func ( nodeInfo * NodeInfo ) countRecentFailures ( now time . Time , serviceID string ) int {
recentFailureCount := len ( nodeInfo . recentFailures [ serviceID ] )
for i := recentFailureCount - 1 ; i >= 0 ; i -- {
if now . Sub ( nodeInfo . recentFailures [ serviceID ] [ i ] ) > monitorFailures {
recentFailureCount -= i + 1
break
}
}
return recentFailureCount
}