diff --git a/hack/vendor.sh b/hack/vendor.sh index 0fe174c105..3cf528c2f7 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -9,7 +9,7 @@ source 'hack/.vendor-helpers.sh' clone git github.com/Azure/go-ansiterm 388960b655244e76e24c75f48631564eaefade62 clone git github.com/Microsoft/hcsshim v0.2.2 clone git github.com/Microsoft/go-winio v0.3.4 -clone git github.com/Sirupsen/logrus v0.9.0 # logrus is a common dependency among multiple deps +clone git github.com/Sirupsen/logrus v0.10.0 # logrus is a common dependency among multiple deps clone git github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a clone git github.com/go-check/check 03a4d9dcf2f92eae8e90ed42aa2656f63fdd0b14 https://github.com/cpuguy83/check.git clone git github.com/gorilla/context 14f550f51a @@ -30,11 +30,14 @@ clone git github.com/imdario/mergo 0.2.1 #get libnetwork packages clone git github.com/docker/libnetwork v0.8.0-dev.1 +clone git github.com/docker/go-events 2e7d352816128aa84f4d29b2a21d400133701a0d +clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b -clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4 -clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2 -clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410 +clone git github.com/hashicorp/memberlist 88ac4de0d1a0ca6def284b571342db3b777a4c37 +clone git github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa0733f7e +clone git github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870 +clone git github.com/docker/libkv 7283ef27ed32fe267388510a91709b307bb9942c clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25 clone git github.com/vishvananda/netlink 631962935bff4f3d20ff32a72e8944f6d2836a26 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060 diff --git a/vendor/src/github.com/Sirupsen/logrus/CHANGELOG.md b/vendor/src/github.com/Sirupsen/logrus/CHANGELOG.md index 9e9e6009a0..f2c2bc2111 100644 --- a/vendor/src/github.com/Sirupsen/logrus/CHANGELOG.md +++ b/vendor/src/github.com/Sirupsen/logrus/CHANGELOG.md @@ -1,3 +1,10 @@ +# 0.10.0 + +* feature: Add a test hook (#180) +* feature: `ParseLevel` is now case-insensitive (#326) +* feature: `FieldLogger` interface that generalizes `Logger` and `Entry` (#308) +* performance: avoid re-allocations on `WithFields` (#335) + # 0.9.0 * logrus/text_formatter: don't emit empty msg diff --git a/vendor/src/github.com/Sirupsen/logrus/README.md b/vendor/src/github.com/Sirupsen/logrus/README.md index f8720c9a74..6e1721a743 100644 --- a/vendor/src/github.com/Sirupsen/logrus/README.md +++ b/vendor/src/github.com/Sirupsen/logrus/README.md @@ -1,4 +1,4 @@ -# Logrus :walrus: [![Build Status](https://travis-ci.org/Sirupsen/logrus.svg?branch=master)](https://travis-ci.org/Sirupsen/logrus) [![godoc reference](https://godoc.org/github.com/Sirupsen/logrus?status.png)][godoc] +# Logrus :walrus: [![Build Status](https://travis-ci.org/Sirupsen/logrus.svg?branch=master)](https://travis-ci.org/Sirupsen/logrus) [![GoDoc](https://godoc.org/github.com/Sirupsen/logrus?status.svg)](https://godoc.org/github.com/Sirupsen/logrus) Logrus is a structured logger for Go (golang), completely API compatible with the standard library logger. [Godoc][godoc]. **Please note the Logrus API is not @@ -12,7 +12,7 @@ plain text): ![Colored](http://i.imgur.com/PY7qMwd.png) -With `log.Formatter = new(logrus.JSONFormatter)`, for easy parsing by logstash +With `log.SetFormatter(&log.JSONFormatter{})`, for easy parsing by logstash or Splunk: ```json @@ -32,7 +32,7 @@ ocean","size":10,"time":"2014-03-10 19:57:38.562264131 -0400 EDT"} "time":"2014-03-10 19:57:38.562543128 -0400 EDT"} ``` -With the default `log.Formatter = new(&log.TextFormatter{})` when a TTY is not +With the default `log.SetFormatter(&log.TextFormatter{})` when a TTY is not attached, the output is compatible with the [logfmt](http://godoc.org/github.com/kr/logfmt) format: @@ -222,6 +222,11 @@ Note: Syslog hook also support connecting to local syslog (Ex. "/dev/log" or "/v | [Octokit](https://github.com/dorajistyle/logrus-octokit-hook) | Hook for logging to github via octokit | | [DeferPanic](https://github.com/deferpanic/dp-logrus) | Hook for logging to DeferPanic | | [Redis-Hook](https://github.com/rogierlommers/logrus-redis-hook) | Hook for logging to a ELK stack (through Redis) | +| [Amqp-Hook](https://github.com/vladoatanasov/logrus_amqp) | Hook for logging to Amqp broker (Like RabbitMQ) | +| [KafkaLogrus](https://github.com/goibibo/KafkaLogrus) | Hook for logging to kafka | +| [Typetalk](https://github.com/dragon3/logrus-typetalk-hook) | Hook for logging to [Typetalk](https://www.typetalk.in/) | +| [ElasticSearch](https://github.com/sohlich/elogrus) | Hook for logging to ElasticSearch| + #### Level logging @@ -363,4 +368,21 @@ entries. It should not be a feature of the application-level logger. | ---- | ----------- | |[Logrus Mate](https://github.com/gogap/logrus_mate)|Logrus mate is a tool for Logrus to manage loggers, you can initial logger's level, hook and formatter by config file, the logger will generated with different config at different environment.| -[godoc]: https://godoc.org/github.com/Sirupsen/logrus +#### Testing + +Logrus has a built in facility for asserting the presence of log messages. This is implemented through the `test` hook and provides: + +* decorators for existing logger (`test.NewLocal` and `test.NewGlobal`) which basically just add the `test` hook +* a test logger (`test.NewNullLogger`) that just records log messages (and does not output any): + +```go +logger, hook := NewNullLogger() +logger.Error("Hello error") + +assert.Equal(1, len(hook.Entries)) +assert.Equal(logrus.ErrorLevel, hook.LastEntry().Level) +assert.Equal("Hello error", hook.LastEntry().Message) + +hook.Reset() +assert.Nil(hook.LastEntry()) +``` diff --git a/vendor/src/github.com/Sirupsen/logrus/entry.go b/vendor/src/github.com/Sirupsen/logrus/entry.go index 9ae900bc5e..89e966e7bf 100644 --- a/vendor/src/github.com/Sirupsen/logrus/entry.go +++ b/vendor/src/github.com/Sirupsen/logrus/entry.go @@ -68,7 +68,7 @@ func (entry *Entry) WithField(key string, value interface{}) *Entry { // Add a map of fields to the Entry. func (entry *Entry) WithFields(fields Fields) *Entry { - data := Fields{} + data := make(Fields, len(entry.Data)+len(fields)) for k, v := range entry.Data { data[k] = v } diff --git a/vendor/src/github.com/Sirupsen/logrus/logrus.go b/vendor/src/github.com/Sirupsen/logrus/logrus.go index 0c09fbc264..e596691116 100644 --- a/vendor/src/github.com/Sirupsen/logrus/logrus.go +++ b/vendor/src/github.com/Sirupsen/logrus/logrus.go @@ -3,6 +3,7 @@ package logrus import ( "fmt" "log" + "strings" ) // Fields type, used to pass to `WithFields`. @@ -33,7 +34,7 @@ func (level Level) String() string { // ParseLevel takes a string level and returns the Logrus log level constant. func ParseLevel(lvl string) (Level, error) { - switch lvl { + switch strings.ToLower(lvl) { case "panic": return PanicLevel, nil case "fatal": @@ -52,6 +53,16 @@ func ParseLevel(lvl string) (Level, error) { return l, fmt.Errorf("not a valid logrus Level: %q", lvl) } +// A constant exposing all logging levels +var AllLevels = []Level{ + PanicLevel, + FatalLevel, + ErrorLevel, + WarnLevel, + InfoLevel, + DebugLevel, +} + // These are the different logging levels. You can set the logging level to log // on your instance of logger, obtained with `logrus.New()`. const ( @@ -96,3 +107,37 @@ type StdLogger interface { Panicf(string, ...interface{}) Panicln(...interface{}) } + +// The FieldLogger interface generalizes the Entry and Logger types +type FieldLogger interface { + WithField(key string, value interface{}) *Entry + WithFields(fields Fields) *Entry + WithError(err error) *Entry + + Debugf(format string, args ...interface{}) + Infof(format string, args ...interface{}) + Printf(format string, args ...interface{}) + Warnf(format string, args ...interface{}) + Warningf(format string, args ...interface{}) + Errorf(format string, args ...interface{}) + Fatalf(format string, args ...interface{}) + Panicf(format string, args ...interface{}) + + Debug(args ...interface{}) + Info(args ...interface{}) + Print(args ...interface{}) + Warn(args ...interface{}) + Warning(args ...interface{}) + Error(args ...interface{}) + Fatal(args ...interface{}) + Panic(args ...interface{}) + + Debugln(args ...interface{}) + Infoln(args ...interface{}) + Println(args ...interface{}) + Warnln(args ...interface{}) + Warningln(args ...interface{}) + Errorln(args ...interface{}) + Fatalln(args ...interface{}) + Panicln(args ...interface{}) +} diff --git a/vendor/src/github.com/armon/go-radix/.gitignore b/vendor/src/github.com/armon/go-radix/.gitignore new file mode 100644 index 0000000000..00268614f0 --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/.gitignore @@ -0,0 +1,22 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe diff --git a/vendor/src/github.com/armon/go-radix/.travis.yml b/vendor/src/github.com/armon/go-radix/.travis.yml new file mode 100644 index 0000000000..1a0bbea6c7 --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/.travis.yml @@ -0,0 +1,3 @@ +language: go +go: + - tip diff --git a/vendor/src/github.com/armon/go-radix/LICENSE b/vendor/src/github.com/armon/go-radix/LICENSE new file mode 100644 index 0000000000..a5df10e675 --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2014 Armon Dadgar + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/src/github.com/armon/go-radix/README.md b/vendor/src/github.com/armon/go-radix/README.md new file mode 100644 index 0000000000..c054fe86c0 --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/README.md @@ -0,0 +1,36 @@ +go-radix [![Build Status](https://travis-ci.org/armon/go-radix.png)](https://travis-ci.org/armon/go-radix) +========= + +Provides the `radix` package that implements a [radix tree](http://en.wikipedia.org/wiki/Radix_tree). +The package only provides a single `Tree` implementation, optimized for sparse nodes. + +As a radix tree, it provides the following: + * O(k) operations. In many cases, this can be faster than a hash table since + the hash function is an O(k) operation, and hash tables have very poor cache locality. + * Minimum / Maximum value lookups + * Ordered iteration + +Documentation +============= + +The full documentation is available on [Godoc](http://godoc.org/github.com/armon/go-radix). + +Example +======= + +Below is a simple example of usage + +```go +// Create a tree +r := radix.New() +r.Insert("foo", 1) +r.Insert("bar", 2) +r.Insert("foobar", 2) + +// Find the longest prefix match +m, _, _ := r.LongestPrefix("foozip") +if m != "foo" { + panic("should be foo") +} +``` + diff --git a/vendor/src/github.com/armon/go-radix/radix.go b/vendor/src/github.com/armon/go-radix/radix.go new file mode 100644 index 0000000000..8c963c914a --- /dev/null +++ b/vendor/src/github.com/armon/go-radix/radix.go @@ -0,0 +1,467 @@ +package radix + +import ( + "sort" + "strings" +) + +// WalkFn is used when walking the tree. Takes a +// key and value, returning if iteration should +// be terminated. +type WalkFn func(s string, v interface{}) bool + +// leafNode is used to represent a value +type leafNode struct { + key string + val interface{} +} + +// edge is used to represent an edge node +type edge struct { + label byte + node *node +} + +type node struct { + // leaf is used to store possible leaf + leaf *leafNode + + // prefix is the common prefix we ignore + prefix string + + // Edges should be stored in-order for iteration. + // We avoid a fully materialized slice to save memory, + // since in most cases we expect to be sparse + edges edges +} + +func (n *node) isLeaf() bool { + return n.leaf != nil +} + +func (n *node) addEdge(e edge) { + n.edges = append(n.edges, e) + n.edges.Sort() +} + +func (n *node) replaceEdge(e edge) { + num := len(n.edges) + idx := sort.Search(num, func(i int) bool { + return n.edges[i].label >= e.label + }) + if idx < num && n.edges[idx].label == e.label { + n.edges[idx].node = e.node + return + } + panic("replacing missing edge") +} + +func (n *node) getEdge(label byte) *node { + num := len(n.edges) + idx := sort.Search(num, func(i int) bool { + return n.edges[i].label >= label + }) + if idx < num && n.edges[idx].label == label { + return n.edges[idx].node + } + return nil +} + +type edges []edge + +func (e edges) Len() int { + return len(e) +} + +func (e edges) Less(i, j int) bool { + return e[i].label < e[j].label +} + +func (e edges) Swap(i, j int) { + e[i], e[j] = e[j], e[i] +} + +func (e edges) Sort() { + sort.Sort(e) +} + +// Tree implements a radix tree. This can be treated as a +// Dictionary abstract data type. The main advantage over +// a standard hash map is prefix-based lookups and +// ordered iteration, +type Tree struct { + root *node + size int +} + +// New returns an empty Tree +func New() *Tree { + return NewFromMap(nil) +} + +// NewFromMap returns a new tree containing the keys +// from an existing map +func NewFromMap(m map[string]interface{}) *Tree { + t := &Tree{root: &node{}} + for k, v := range m { + t.Insert(k, v) + } + return t +} + +// Len is used to return the number of elements in the tree +func (t *Tree) Len() int { + return t.size +} + +// longestPrefix finds the length of the shared prefix +// of two strings +func longestPrefix(k1, k2 string) int { + max := len(k1) + if l := len(k2); l < max { + max = l + } + var i int + for i = 0; i < max; i++ { + if k1[i] != k2[i] { + break + } + } + return i +} + +// Insert is used to add a newentry or update +// an existing entry. Returns if updated. +func (t *Tree) Insert(s string, v interface{}) (interface{}, bool) { + var parent *node + n := t.root + search := s + for { + // Handle key exhaution + if len(search) == 0 { + if n.isLeaf() { + old := n.leaf.val + n.leaf.val = v + return old, true + } else { + n.leaf = &leafNode{ + key: s, + val: v, + } + t.size++ + return nil, false + } + } + + // Look for the edge + parent = n + n = n.getEdge(search[0]) + + // No edge, create one + if n == nil { + e := edge{ + label: search[0], + node: &node{ + leaf: &leafNode{ + key: s, + val: v, + }, + prefix: search, + }, + } + parent.addEdge(e) + t.size++ + return nil, false + } + + // Determine longest prefix of the search key on match + commonPrefix := longestPrefix(search, n.prefix) + if commonPrefix == len(n.prefix) { + search = search[commonPrefix:] + continue + } + + // Split the node + t.size++ + child := &node{ + prefix: search[:commonPrefix], + } + parent.replaceEdge(edge{ + label: search[0], + node: child, + }) + + // Restore the existing node + child.addEdge(edge{ + label: n.prefix[commonPrefix], + node: n, + }) + n.prefix = n.prefix[commonPrefix:] + + // Create a new leaf node + leaf := &leafNode{ + key: s, + val: v, + } + + // If the new key is a subset, add to to this node + search = search[commonPrefix:] + if len(search) == 0 { + child.leaf = leaf + return nil, false + } + + // Create a new edge for the node + child.addEdge(edge{ + label: search[0], + node: &node{ + leaf: leaf, + prefix: search, + }, + }) + return nil, false + } + return nil, false +} + +// Delete is used to delete a key, returning the previous +// value and if it was deleted +func (t *Tree) Delete(s string) (interface{}, bool) { + n := t.root + search := s + for { + // Check for key exhaution + if len(search) == 0 { + if !n.isLeaf() { + break + } + goto DELETE + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + break + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + } else { + break + } + } + return nil, false + +DELETE: + // Delete the leaf + leaf := n.leaf + n.leaf = nil + t.size-- + + // Check if we should merge this node + if len(n.edges) == 1 { + e := n.edges[0] + child := e.node + n.prefix = n.prefix + child.prefix + n.leaf = child.leaf + n.edges = child.edges + } + return leaf.val, true +} + +// Get is used to lookup a specific key, returning +// the value and if it was found +func (t *Tree) Get(s string) (interface{}, bool) { + n := t.root + search := s + for { + // Check for key exhaution + if len(search) == 0 { + if n.isLeaf() { + return n.leaf.val, true + } + break + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + break + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + } else { + break + } + } + return nil, false +} + +// LongestPrefix is like Get, but instead of an +// exact match, it will return the longest prefix match. +func (t *Tree) LongestPrefix(s string) (string, interface{}, bool) { + var last *leafNode + n := t.root + search := s + for { + // Look for a leaf node + if n.isLeaf() { + last = n.leaf + } + + // Check for key exhaution + if len(search) == 0 { + break + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + break + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + } else { + break + } + } + if last != nil { + return last.key, last.val, true + } + return "", nil, false +} + +// Minimum is used to return the minimum value in the tree +func (t *Tree) Minimum() (string, interface{}, bool) { + n := t.root + for { + if n.isLeaf() { + return n.leaf.key, n.leaf.val, true + } + if len(n.edges) > 0 { + n = n.edges[0].node + } else { + break + } + } + return "", nil, false +} + +// Maximum is used to return the maximum value in the tree +func (t *Tree) Maximum() (string, interface{}, bool) { + n := t.root + for { + if num := len(n.edges); num > 0 { + n = n.edges[num-1].node + continue + } + if n.isLeaf() { + return n.leaf.key, n.leaf.val, true + } else { + break + } + } + return "", nil, false +} + +// Walk is used to walk the tree +func (t *Tree) Walk(fn WalkFn) { + recursiveWalk(t.root, fn) +} + +// WalkPrefix is used to walk the tree under a prefix +func (t *Tree) WalkPrefix(prefix string, fn WalkFn) { + n := t.root + search := prefix + for { + // Check for key exhaution + if len(search) == 0 { + recursiveWalk(n, fn) + return + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + break + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + + } else if strings.HasPrefix(n.prefix, search) { + // Child may be under our search prefix + recursiveWalk(n, fn) + return + } else { + break + } + } + +} + +// WalkPath is used to walk the tree, but only visiting nodes +// from the root down to a given leaf. Where WalkPrefix walks +// all the entries *under* the given prefix, this walks the +// entries *above* the given prefix. +func (t *Tree) WalkPath(path string, fn WalkFn) { + n := t.root + search := path + for { + // Visit the leaf values if any + if n.leaf != nil && fn(n.leaf.key, n.leaf.val) { + return + } + + // Check for key exhaution + if len(search) == 0 { + return + } + + // Look for an edge + n = n.getEdge(search[0]) + if n == nil { + return + } + + // Consume the search prefix + if strings.HasPrefix(search, n.prefix) { + search = search[len(n.prefix):] + } else { + break + } + } +} + +// recursiveWalk is used to do a pre-order walk of a node +// recursively. Returns true if the walk should be aborted +func recursiveWalk(n *node, fn WalkFn) bool { + // Visit the leaf values if any + if n.leaf != nil && fn(n.leaf.key, n.leaf.val) { + return true + } + + // Recurse on the children + for _, e := range n.edges { + if recursiveWalk(e.node, fn) { + return true + } + } + return false +} + +// ToMap is used to walk the tree and convert it into a map +func (t *Tree) ToMap() map[string]interface{} { + out := make(map[string]interface{}, t.size) + t.Walk(func(k string, v interface{}) bool { + out[k] = v + return false + }) + return out +} diff --git a/vendor/src/github.com/docker/go-events/.gitignore b/vendor/src/github.com/docker/go-events/.gitignore new file mode 100644 index 0000000000..daf913b1b3 --- /dev/null +++ b/vendor/src/github.com/docker/go-events/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/src/github.com/docker/go-events/LICENSE b/vendor/src/github.com/docker/go-events/LICENSE new file mode 100644 index 0000000000..8dada3edaf --- /dev/null +++ b/vendor/src/github.com/docker/go-events/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/src/github.com/docker/go-events/README.md b/vendor/src/github.com/docker/go-events/README.md new file mode 100644 index 0000000000..9e3625da9e --- /dev/null +++ b/vendor/src/github.com/docker/go-events/README.md @@ -0,0 +1,112 @@ +# Docker Events Package + +[![GoDoc](https://godoc.org/github.com/docker/go-events?status.svg)](https://godoc.org/github.com/docker/go-events) +[![Circle CI](https://circleci.com/gh/docker/go-events.svg?style=shield)](https://circleci.com/gh/docker/go-events) + +The Docker `events` package implements a composable event distribution package +for Go. + +Originally created to implement the [notifications in Docker Registry +2](https://github.com/docker/distribution/blob/master/docs/notifications.md), +we've found the pattern to be useful in other applications. This package is +most of the same code with slightly updated interfaces. Much of the internals +have been made available. + +## Usage + +The `events` package centers around a `Sink` type. Events are written with +calls to `Sink.Write(event Event)`. Sinks can be wired up in various +configurations to achieve interesting behavior. + +The canonical example is that employed by the +[docker/distribution/notifications](https://godoc.org/github.com/docker/distribution/notifications) +package. Let's say we have a type `httpSink` where we'd like to queue +notifications. As a rule, it should send a single http request and return an +error if it fails: + +```go +func (h *httpSink) Write(event Event) error { + p, err := json.Marshal(event) + if err != nil { + return err + } + body := bytes.NewReader(p) + resp, err := h.client.Post(h.url, "application/json", body) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.Status != 200 { + return errors.New("unexpected status") + } + + return nil +} + +// implement (*httpSink).Close() +``` + +With just that, we can start using components from this package. One can call +`(*httpSink).Write` to send events as the body of a post request to a +configured URL. + +### Retries + +HTTP can be unreliable. The first feature we'd like is to have some retry: + +```go +hs := newHTTPSink(/*...*/) +retry := NewRetryingSink(hs, NewBreaker(5, time.Second)) +``` + +We now have a sink that will retry events against the `httpSink` until they +succeed. The retry will backoff for one second after 5 consecutive failures +using the breaker strategy. + +### Queues + +This isn't quite enough. We we want a sink that doesn't block while we are +waiting for events to be sent. Let's add a `Queue`: + +```go +queue := NewQueue(retry) +``` + +Now, we have an unbounded queue that will work through all events sent with +`(*Queue).Write`. Events can be added asynchronously to the queue without +blocking the current execution path. This is ideal for use in an http request. + +### Broadcast + +It usually turns out that you want to send to more than one listener. We can +use `Broadcaster` to support this: + +```go +var broadcast = NewBroadcaster() // make it available somewhere in your application. +broadcast.Add(queue) // add your queue! +broadcast.Add(queue2) // and another! +``` + +With the above, we can now call `broadcast.Write` in our http handlers and have +all the events distributed to each queue. Because the events are queued, not +listener blocks another. + +### Extending + +For the most part, the above is sufficient for a lot of applications. However, +extending the above functionality can be done implementing your own `Sink`. The +behavior and semantics of the sink can be completely dependent on the +application requirements. The interface is provided below for reference: + +```go +type Sink { + Write(Event) error + Close() error +} +``` + +Application behavior can be controlled by how `Write` behaves. The examples +above are designed to queue the message and return as quickly as possible. +Other implementations may block until the event is committed to durable +storage. diff --git a/vendor/src/github.com/docker/go-events/broadcast.go b/vendor/src/github.com/docker/go-events/broadcast.go new file mode 100644 index 0000000000..e73d758bcf --- /dev/null +++ b/vendor/src/github.com/docker/go-events/broadcast.go @@ -0,0 +1,158 @@ +package events + +import "github.com/Sirupsen/logrus" + +// Broadcaster sends events to multiple, reliable Sinks. The goal of this +// component is to dispatch events to configured endpoints. Reliability can be +// provided by wrapping incoming sinks. +type Broadcaster struct { + sinks []Sink + events chan Event + adds chan configureRequest + removes chan configureRequest + closed chan chan struct{} +} + +// NewBroadcaster appends one or more sinks to the list of sinks. The +// broadcaster behavior will be affected by the properties of the sink. +// Generally, the sink should accept all messages and deal with reliability on +// its own. Use of EventQueue and RetryingSink should be used here. +func NewBroadcaster(sinks ...Sink) *Broadcaster { + b := Broadcaster{ + sinks: sinks, + events: make(chan Event), + adds: make(chan configureRequest), + removes: make(chan configureRequest), + closed: make(chan chan struct{}), + } + + // Start the broadcaster + go b.run() + + return &b +} + +// Write accepts an event to be dispatched to all sinks. This method will never +// fail and should never block (hopefully!). The caller cedes the memory to the +// broadcaster and should not modify it after calling write. +func (b *Broadcaster) Write(event Event) error { + select { + case b.events <- event: + case <-b.closed: + return ErrSinkClosed + } + return nil +} + +// Add the sink to the broadcaster. +// +// The provided sink must be comparable with equality. Typically, this just +// works with a regular pointer type. +func (b *Broadcaster) Add(sink Sink) error { + return b.configure(b.adds, sink) +} + +// Remove the provided sink. +func (b *Broadcaster) Remove(sink Sink) error { + return b.configure(b.removes, sink) +} + +type configureRequest struct { + sink Sink + response chan error +} + +func (b *Broadcaster) configure(ch chan configureRequest, sink Sink) error { + response := make(chan error, 1) + + for { + select { + case ch <- configureRequest{ + sink: sink, + response: response}: + ch = nil + case err := <-response: + return err + case <-b.closed: + return ErrSinkClosed + } + } +} + +// Close the broadcaster, ensuring that all messages are flushed to the +// underlying sink before returning. +func (b *Broadcaster) Close() error { + select { + case <-b.closed: + // already closed + return ErrSinkClosed + default: + // do a little chan handoff dance to synchronize closing + closed := make(chan struct{}) + b.closed <- closed + close(b.closed) + <-closed + return nil + } +} + +// run is the main broadcast loop, started when the broadcaster is created. +// Under normal conditions, it waits for events on the event channel. After +// Close is called, this goroutine will exit. +func (b *Broadcaster) run() { + remove := func(target Sink) { + for i, sink := range b.sinks { + if sink == target { + b.sinks = append(b.sinks[:i], b.sinks[i+1:]...) + break + } + } + } + + for { + select { + case event := <-b.events: + for _, sink := range b.sinks { + if err := sink.Write(event); err != nil { + if err == ErrSinkClosed { + // remove closed sinks + remove(sink) + continue + } + logrus.WithField("event", event).WithField("events.sink", sink).WithError(err). + Errorf("broadcaster: dropping event") + } + } + case request := <-b.adds: + // while we have to iterate for add/remove, common iteration for + // send is faster against slice. + + var found bool + for _, sink := range b.sinks { + if request.sink == sink { + found = true + break + } + } + + if !found { + b.sinks = append(b.sinks, request.sink) + } + // b.sinks[request.sink] = struct{}{} + request.response <- nil + case request := <-b.removes: + remove(request.sink) + request.response <- nil + case closing := <-b.closed: + // close all the underlying sinks + for _, sink := range b.sinks { + if err := sink.Close(); err != nil && err != ErrSinkClosed { + logrus.WithField("events.sink", sink).WithError(err). + Errorf("broadcaster: closing sink failed") + } + } + closing <- struct{}{} + return + } + } +} diff --git a/vendor/src/github.com/docker/go-events/channel.go b/vendor/src/github.com/docker/go-events/channel.go new file mode 100644 index 0000000000..7ee7ea5e22 --- /dev/null +++ b/vendor/src/github.com/docker/go-events/channel.go @@ -0,0 +1,47 @@ +package events + +// Channel provides a sink that can be listened on. The writer and channel +// listener must operate in separate goroutines. +// +// Consumers should listen on Channel.C until Closed is closed. +type Channel struct { + C chan Event + + closed chan struct{} +} + +// NewChannel returns a channel. If buffer is non-zero, the channel is +// unbuffered. +func NewChannel(buffer int) *Channel { + return &Channel{ + C: make(chan Event, buffer), + closed: make(chan struct{}), + } +} + +// Done returns a channel that will always proceed once the sink is closed. +func (ch *Channel) Done() chan struct{} { + return ch.closed +} + +// Write the event to the channel. Must be called in a separate goroutine from +// the listener. +func (ch *Channel) Write(event Event) error { + select { + case ch.C <- event: + return nil + case <-ch.closed: + return ErrSinkClosed + } +} + +// Close the channel sink. +func (ch *Channel) Close() error { + select { + case <-ch.closed: + return ErrSinkClosed + default: + close(ch.closed) + return nil + } +} diff --git a/vendor/src/github.com/docker/go-events/errors.go b/vendor/src/github.com/docker/go-events/errors.go new file mode 100644 index 0000000000..56db7c2510 --- /dev/null +++ b/vendor/src/github.com/docker/go-events/errors.go @@ -0,0 +1,10 @@ +package events + +import "fmt" + +var ( + // ErrSinkClosed is returned if a write is issued to a sink that has been + // closed. If encountered, the error should be considered terminal and + // retries will not be successful. + ErrSinkClosed = fmt.Errorf("events: sink closed") +) diff --git a/vendor/src/github.com/docker/go-events/event.go b/vendor/src/github.com/docker/go-events/event.go new file mode 100644 index 0000000000..f0f1d9ea5f --- /dev/null +++ b/vendor/src/github.com/docker/go-events/event.go @@ -0,0 +1,15 @@ +package events + +// Event marks items that can be sent as events. +type Event interface{} + +// Sink accepts and sends events. +type Sink interface { + // Write an event to the Sink. If no error is returned, the caller will + // assume that all events have been committed to the sink. If an error is + // received, the caller may retry sending the event. + Write(event Event) error + + // Close the sink, possibly waiting for pending events to flush. + Close() error +} diff --git a/vendor/src/github.com/docker/go-events/filter.go b/vendor/src/github.com/docker/go-events/filter.go new file mode 100644 index 0000000000..f2765cfe6b --- /dev/null +++ b/vendor/src/github.com/docker/go-events/filter.go @@ -0,0 +1,52 @@ +package events + +// Matcher matches events. +type Matcher interface { + Match(event Event) bool +} + +// MatcherFunc implements matcher with just a function. +type MatcherFunc func(event Event) bool + +// Match calls the wrapped function. +func (fn MatcherFunc) Match(event Event) bool { + return fn(event) +} + +// Filter provides an event sink that sends only events that are accepted by a +// Matcher. No methods on filter are goroutine safe. +type Filter struct { + dst Sink + matcher Matcher + closed bool +} + +// NewFilter returns a new filter that will send to events to dst that return +// true for Matcher. +func NewFilter(dst Sink, matcher Matcher) Sink { + return &Filter{dst: dst, matcher: matcher} +} + +// Write an event to the filter. +func (f *Filter) Write(event Event) error { + if f.closed { + return ErrSinkClosed + } + + if f.matcher.Match(event) { + return f.dst.Write(event) + } + + return nil +} + +// Close the filter and allow no more events to pass through. +func (f *Filter) Close() error { + // TODO(stevvooe): Not all sinks should have Close. + if f.closed { + return ErrSinkClosed + } + + f.closed = true + return f.dst.Close() +} diff --git a/vendor/src/github.com/docker/go-events/queue.go b/vendor/src/github.com/docker/go-events/queue.go new file mode 100644 index 0000000000..7c5fc8150e --- /dev/null +++ b/vendor/src/github.com/docker/go-events/queue.go @@ -0,0 +1,104 @@ +package events + +import ( + "container/list" + "sync" + + "github.com/Sirupsen/logrus" +) + +// Queue accepts all messages into a queue for asynchronous consumption +// by a sink. It is unbounded and thread safe but the sink must be reliable or +// events will be dropped. +type Queue struct { + dst Sink + events *list.List + cond *sync.Cond + mu sync.Mutex + closed bool +} + +// NewQueue returns a queue to the provided Sink dst. +func NewQueue(dst Sink) *Queue { + eq := Queue{ + dst: dst, + events: list.New(), + } + + eq.cond = sync.NewCond(&eq.mu) + go eq.run() + return &eq +} + +// Write accepts the events into the queue, only failing if the queue has +// beend closed. +func (eq *Queue) Write(event Event) error { + eq.mu.Lock() + defer eq.mu.Unlock() + + if eq.closed { + return ErrSinkClosed + } + + eq.events.PushBack(event) + eq.cond.Signal() // signal waiters + + return nil +} + +// Close shutsdown the event queue, flushing +func (eq *Queue) Close() error { + eq.mu.Lock() + defer eq.mu.Unlock() + + if eq.closed { + return ErrSinkClosed + } + + // set closed flag + eq.closed = true + eq.cond.Signal() // signal flushes queue + eq.cond.Wait() // wait for signal from last flush + return eq.dst.Close() +} + +// run is the main goroutine to flush events to the target sink. +func (eq *Queue) run() { + for { + event := eq.next() + + if event == nil { + return // nil block means event queue is closed. + } + + if err := eq.dst.Write(event); err != nil { + logrus.WithFields(logrus.Fields{ + "event": event, + "sink": eq.dst, + }).WithError(err).Warnf("eventqueue: dropped event") + } + } +} + +// next encompasses the critical section of the run loop. When the queue is +// empty, it will block on the condition. If new data arrives, it will wake +// and return a block. When closed, a nil slice will be returned. +func (eq *Queue) next() Event { + eq.mu.Lock() + defer eq.mu.Unlock() + + for eq.events.Len() < 1 { + if eq.closed { + eq.cond.Broadcast() + return nil + } + + eq.cond.Wait() + } + + front := eq.events.Front() + block := front.Value.(Event) + eq.events.Remove(front) + + return block +} diff --git a/vendor/src/github.com/docker/go-events/retry.go b/vendor/src/github.com/docker/go-events/retry.go new file mode 100644 index 0000000000..501deeb55f --- /dev/null +++ b/vendor/src/github.com/docker/go-events/retry.go @@ -0,0 +1,168 @@ +package events + +import ( + "sync" + "time" + + "github.com/Sirupsen/logrus" +) + +// RetryingSink retries the write until success or an ErrSinkClosed is +// returned. Underlying sink must have p > 0 of succeeding or the sink will +// block. Retry is configured with a RetryStrategy. Concurrent calls to a +// retrying sink are serialized through the sink, meaning that if one is +// in-flight, another will not proceed. +type RetryingSink struct { + sink Sink + strategy RetryStrategy + closed chan struct{} +} + +// NewRetryingSink returns a sink that will retry writes to a sink, backing +// off on failure. Parameters threshold and backoff adjust the behavior of the +// circuit breaker. +func NewRetryingSink(sink Sink, strategy RetryStrategy) *RetryingSink { + rs := &RetryingSink{ + sink: sink, + strategy: strategy, + closed: make(chan struct{}), + } + + return rs +} + +// Write attempts to flush the events to the downstream sink until it succeeds +// or the sink is closed. +func (rs *RetryingSink) Write(event Event) error { + logger := logrus.WithField("event", event) + var timer *time.Timer + +retry: + select { + case <-rs.closed: + return ErrSinkClosed + default: + } + + if backoff := rs.strategy.Proceed(event); backoff > 0 { + if timer == nil { + timer = time.NewTimer(backoff) + defer timer.Stop() + } else { + timer.Reset(backoff) + } + + select { + case <-timer.C: + goto retry + case <-rs.closed: + return ErrSinkClosed + } + } + + if err := rs.sink.Write(event); err != nil { + if err == ErrSinkClosed { + // terminal! + return err + } + + logger := logger.WithError(err) // shadow!! + + if rs.strategy.Failure(event, err) { + logger.Errorf("retryingsink: dropped event") + return nil + } + + logger.Errorf("retryingsink: error writing event, retrying") + goto retry + } + + rs.strategy.Success(event) + return nil +} + +// Close closes the sink and the underlying sink. +func (rs *RetryingSink) Close() error { + select { + case <-rs.closed: + return ErrSinkClosed + default: + close(rs.closed) + return rs.sink.Close() + } +} + +// RetryStrategy defines a strategy for retrying event sink writes. +// +// All methods should be goroutine safe. +type RetryStrategy interface { + // Proceed is called before every event send. If proceed returns a + // positive, non-zero integer, the retryer will back off by the provided + // duration. + // + // An event is provided, by may be ignored. + Proceed(event Event) time.Duration + + // Failure reports a failure to the strategy. If this method returns true, + // the event should be dropped. + Failure(event Event, err error) bool + + // Success should be called when an event is sent successfully. + Success(event Event) +} + +// TODO(stevvooe): We are using circuit breaker here. May want to provide +// bounded exponential backoff, as well. + +// Breaker implements a circuit breaker retry strategy. +// +// The current implementation never drops events. +type Breaker struct { + threshold int + recent int + last time.Time + backoff time.Duration // time after which we retry after failure. + mu sync.Mutex +} + +var _ RetryStrategy = &Breaker{} + +// NewBreaker returns a breaker that will backoff after the threshold has been +// tripped. A Breaker is thread safe and may be shared by many goroutines. +func NewBreaker(threshold int, backoff time.Duration) *Breaker { + return &Breaker{ + threshold: threshold, + backoff: backoff, + } +} + +// Proceed checks the failures against the threshold. +func (b *Breaker) Proceed(event Event) time.Duration { + b.mu.Lock() + defer b.mu.Unlock() + + if b.recent < b.threshold { + return 0 + } + + return b.last.Add(b.backoff).Sub(time.Now()) +} + +// Success resets the breaker. +func (b *Breaker) Success(event Event) { + b.mu.Lock() + defer b.mu.Unlock() + + b.recent = 0 + b.last = time.Time{} +} + +// Failure records the failure and latest failure time. +func (b *Breaker) Failure(event Event, err error) bool { + b.mu.Lock() + defer b.mu.Unlock() + + b.recent++ + b.last = time.Now().UTC() + return false // never drop events. +} diff --git a/vendor/src/github.com/docker/libkv/.travis.yml b/vendor/src/github.com/docker/libkv/.travis.yml index a9ef7c82bf..f7cecbdf9c 100644 --- a/vendor/src/github.com/docker/libkv/.travis.yml +++ b/vendor/src/github.com/docker/libkv/.travis.yml @@ -1,9 +1,7 @@ language: go go: - - 1.3 -# - 1.4 -# see https://github.com/moovweb/gvm/pull/116 for why Go 1.4 is currently disabled + - 1.5.3 # let us have speedy Docker-based Travis workers sudo: false @@ -11,19 +9,18 @@ sudo: false before_install: # Symlink below is needed for Travis CI to work correctly on personal forks of libkv - ln -s $HOME/gopath/src/github.com/${TRAVIS_REPO_SLUG///libkv/} $HOME/gopath/src/github.com/docker - - go get golang.org/x/tools/cmd/vet - go get golang.org/x/tools/cmd/cover - go get github.com/mattn/goveralls - go get github.com/golang/lint/golint - go get github.com/GeertJohan/fgt before_script: - - script/travis_consul.sh 0.5.2 - - script/travis_etcd.sh 2.2.0 - - script/travis_zk.sh 3.4.6 + - script/travis_consul.sh 0.6.3 + - script/travis_etcd.sh 2.2.5 + - script/travis_zk.sh 3.5.1-alpha script: - - ./consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul -config-file=./config.json 1>/dev/null & + - ./consul agent -server -bootstrap -advertise=127.0.0.1 -data-dir /tmp/consul -config-file=./config.json 1>/dev/null & - ./etcd/etcd --listen-client-urls 'http://0.0.0.0:4001' --advertise-client-urls 'http://127.0.0.1:4001' >/dev/null 2>&1 & - ./zk/bin/zkServer.sh start ./zk/conf/zoo.cfg 1> /dev/null - script/validate-gofmt diff --git a/vendor/src/github.com/docker/libkv/LICENSE.code b/vendor/src/github.com/docker/libkv/LICENSE.code index 9e4bd4dbee..34c4ea7c50 100644 --- a/vendor/src/github.com/docker/libkv/LICENSE.code +++ b/vendor/src/github.com/docker/libkv/LICENSE.code @@ -176,7 +176,7 @@ END OF TERMS AND CONDITIONS - Copyright 2014-2015 Docker, Inc. + Copyright 2014-2016 Docker, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/vendor/src/github.com/docker/libkv/MAINTAINERS b/vendor/src/github.com/docker/libkv/MAINTAINERS new file mode 100644 index 0000000000..4dd59c7e27 --- /dev/null +++ b/vendor/src/github.com/docker/libkv/MAINTAINERS @@ -0,0 +1,46 @@ +# Libkv maintainers file +# +# This file describes who runs the docker/libkv project and how. +# This is a living document - if you see something out of date or missing, speak up! +# +# It is structured to be consumable by both humans and programs. +# To extract its contents programmatically, use any TOML-compliant parser. +# +# This file is compiled into the MAINTAINERS file in docker/opensource. +# +[Org] + [Org."Core maintainers"] + people = [ + "abronan", + "aluzzardi", + "sanimej", + "vieux", + ] + +[people] + +# A reference list of all people associated with the project. +# All other sections should refer to people by their canonical key +# in the people section. + + # ADD YOURSELF HERE IN ALPHABETICAL ORDER + + [people.abronan] + Name = "Alexandre Beslic" + Email = "abronan@docker.com" + GitHub = "abronan" + + [people.aluzzardi] + Name = "Andrea Luzzardi" + Email = "al@docker.com" + GitHub = "aluzzardi" + + [people.sanimej] + Name = "Santhosh Manohar" + Email = "santhosh@docker.com" + GitHub = "sanimej" + + [people.vieux] + Name = "Victor Vieux" + Email = "vieux@docker.com" + GitHub = "vieux" diff --git a/vendor/src/github.com/docker/libkv/README.md b/vendor/src/github.com/docker/libkv/README.md index 02cf3fd0ea..baad81b4f8 100644 --- a/vendor/src/github.com/docker/libkv/README.md +++ b/vendor/src/github.com/docker/libkv/README.md @@ -3,6 +3,7 @@ [![GoDoc](https://godoc.org/github.com/docker/libkv?status.png)](https://godoc.org/github.com/docker/libkv) [![Build Status](https://travis-ci.org/docker/libkv.svg?branch=master)](https://travis-ci.org/docker/libkv) [![Coverage Status](https://coveralls.io/repos/docker/libkv/badge.svg)](https://coveralls.io/r/docker/libkv) +[![Go Report Card](https://goreportcard.com/badge/github.com/docker/libkv)](https://goreportcard.com/report/github.com/docker/libkv) `libkv` provides a `Go` native library to store metadata. @@ -10,7 +11,7 @@ The goal of `libkv` is to abstract common store operations for multiple distribu For example, you can use it to store your metadata or for service discovery to register machines and endpoints inside your cluster. -You can also easily implement a generic *Leader Election* on top of it (see the [swarm/leadership](https://github.com/docker/swarm/tree/master/leadership) package). +You can also easily implement a generic *Leader Election* on top of it (see the [docker/leadership](https://github.com/docker/leadership) repository). As of now, `libkv` offers support for `Consul`, `Etcd`, `Zookeeper` (**Distributed** store) and `BoltDB` (**Local** store). @@ -30,7 +31,7 @@ You can find examples of usage for `libkv` under in `docs/examples.go`. Optional `libkv` supports: - Consul versions >= `0.5.1` because it uses Sessions with `Delete` behavior for the use of `TTLs` (mimics zookeeper's Ephemeral node support), If you don't plan to use `TTLs`: you can use Consul version `0.4.0+`. -- Etcd versions >= `2.0` because it uses the new `coreos/etcd/client`, this might change in the future as the support for `APIv3` comes along and adds mor capabilities. +- Etcd versions >= `2.0` because it uses the new `coreos/etcd/client`, this might change in the future as the support for `APIv3` comes along and adds more capabilities. - Zookeeper versions >= `3.4.5`. Although this might work with previous version but this remains untested as of now. - Boltdb, which shouldn't be subject to any version dependencies. @@ -83,7 +84,7 @@ Please refer to the `docs/compatibility.md` to see what are the special cases fo Other than those special cases, you should expect the same experience for basic operations like `Get`/`Put`, etc. -Calls like `WatchTree` may return different events (or number of events) depending on the backend (for now, `Etcd` and `Consul` will likely return more events than `Zookeeper` that you should triage properly). Although you should be able to use it successfully to watch on events in an interchangeable way (see the **swarm/leadership** or **swarm/discovery** packages in **docker/swarm**). +Calls like `WatchTree` may return different events (or number of events) depending on the backend (for now, `Etcd` and `Consul` will likely return more events than `Zookeeper` that you should triage properly). Although you should be able to use it successfully to watch on events in an interchangeable way (see the **docker/leadership** repository or the **pkg/discovery/kv** package in **docker/docker**). ## TLS @@ -103,4 +104,4 @@ Want to hack on libkv? [Docker's contributions guidelines](https://github.com/do ##Copyright and license -Copyright © 2014-2015 Docker, Inc. All rights reserved, except as follows. Code is released under the Apache 2.0 license. The README.md file, and files in the "docs" folder are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file "LICENSE.docs". You may obtain a duplicate copy of the same license, titled CC-BY-SA-4.0, at http://creativecommons.org/licenses/by/4.0/. +Copyright © 2014-2016 Docker, Inc. All rights reserved, except as follows. Code is released under the Apache 2.0 license. The README.md file, and files in the "docs" folder are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file "LICENSE.docs". You may obtain a duplicate copy of the same license, titled CC-BY-SA-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/vendor/src/github.com/docker/libkv/libkv.go b/vendor/src/github.com/docker/libkv/libkv.go index 2192cd14e8..bdb8c7529f 100644 --- a/vendor/src/github.com/docker/libkv/libkv.go +++ b/vendor/src/github.com/docker/libkv/libkv.go @@ -25,7 +25,7 @@ var ( }() ) -// NewStore creates a an instance of store +// NewStore creates an instance of store func NewStore(backend store.Backend, addrs []string, options *store.Config) (store.Store, error) { if init, exists := initializers[backend]; exists { return init(addrs, options) diff --git a/vendor/src/github.com/docker/libkv/store/boltdb/boltdb.go b/vendor/src/github.com/docker/libkv/store/boltdb/boltdb.go index 94d01b4b09..4026e0a20c 100644 --- a/vendor/src/github.com/docker/libkv/store/boltdb/boltdb.go +++ b/vendor/src/github.com/docker/libkv/store/boltdb/boltdb.go @@ -19,8 +19,6 @@ var ( // ErrMultipleEndpointsUnsupported is thrown when multiple endpoints specified for // BoltDB. Endpoint has to be a local file path ErrMultipleEndpointsUnsupported = errors.New("boltdb supports one endpoint and should be a file path") - // ErrBoltBucketNotFound is thrown when specified BoltBD bucket doesn't exist in the DB - ErrBoltBucketNotFound = errors.New("boltdb bucket doesn't exist") // ErrBoltBucketOptionMissing is thrown when boltBcuket config option is missing ErrBoltBucketOptionMissing = errors.New("boltBucket config option missing") ) @@ -141,7 +139,7 @@ func (b *BoltDB) Get(key string) (*store.KVPair, error) { err = db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } v := bucket.Get([]byte(key)) @@ -217,7 +215,7 @@ func (b *BoltDB) Delete(key string) error { err = db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } err := bucket.Delete([]byte(key)) return err @@ -243,7 +241,7 @@ func (b *BoltDB) Exists(key string) (bool, error) { err = db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } val = bucket.Get([]byte(key)) @@ -276,7 +274,7 @@ func (b *BoltDB) List(keyPrefix string) ([]*store.KVPair, error) { err = db.View(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } cursor := bucket.Cursor() @@ -326,7 +324,7 @@ func (b *BoltDB) AtomicDelete(key string, previous *store.KVPair) (bool, error) err = db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } val = bucket.Get([]byte(key)) @@ -370,7 +368,7 @@ func (b *BoltDB) AtomicPut(key string, value []byte, previous *store.KVPair, opt bucket := tx.Bucket(b.boltBucket) if bucket == nil { if previous != nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } bucket, err = tx.CreateBucket(b.boltBucket) if err != nil { @@ -381,7 +379,7 @@ func (b *BoltDB) AtomicPut(key string, value []byte, previous *store.KVPair, opt // doesn't exist in the DB. val = bucket.Get([]byte(key)) if previous == nil && len(val) != 0 { - return store.ErrKeyModified + return store.ErrKeyExists } if previous != nil { if len(val) == 0 { @@ -440,7 +438,7 @@ func (b *BoltDB) DeleteTree(keyPrefix string) error { err = db.Update(func(tx *bolt.Tx) error { bucket := tx.Bucket(b.boltBucket) if bucket == nil { - return ErrBoltBucketNotFound + return store.ErrKeyNotFound } cursor := bucket.Cursor() diff --git a/vendor/src/github.com/docker/libkv/store/consul/consul.go b/vendor/src/github.com/docker/libkv/store/consul/consul.go index c7693ca441..cb64be72d6 100644 --- a/vendor/src/github.com/docker/libkv/store/consul/consul.go +++ b/vendor/src/github.com/docker/libkv/store/consul/consul.go @@ -22,6 +22,14 @@ const ( // RenewSessionRetryMax is the number of time we should try // to renew the session before giving up and throwing an error RenewSessionRetryMax = 5 + + // MaxSessionDestroyAttempts is the maximum times we will try + // to explicitely destroy the session attached to a lock after + // the connectivity to the store has been lost + MaxSessionDestroyAttempts = 5 + + // defaultLockTTL is the default ttl for the consul lock + defaultLockTTL = 20 * time.Second ) var ( @@ -186,6 +194,7 @@ func (s *Consul) Put(key string, value []byte, opts *store.WriteOptions) error { p := &api.KVPair{ Key: key, Value: value, + Flags: api.LockFlagValue, } if opts != nil && opts.TTL > 0 { @@ -378,44 +387,99 @@ func (s *Consul) NewLock(key string, options *store.LockOptions) (store.Locker, lock := &consulLock{} + ttl := defaultLockTTL + if options != nil { // Set optional TTL on Lock if options.TTL != 0 { - entry := &api.SessionEntry{ - Behavior: api.SessionBehaviorRelease, // Release the lock when the session expires - TTL: (options.TTL / 2).String(), // Consul multiplies the TTL by 2x - LockDelay: 1 * time.Millisecond, // Virtually disable lock delay - } - - // Create the key session - session, _, err := s.client.Session().Create(entry, nil) - if err != nil { - return nil, err - } - - // Place the session on lock - lockOpts.Session = session - - // Renew the session ttl lock periodically - go s.client.Session().RenewPeriodic(entry.TTL, session, nil, options.RenewLock) - lock.renewCh = options.RenewLock + ttl = options.TTL } - // Set optional value on Lock if options.Value != nil { lockOpts.Value = options.Value } } + entry := &api.SessionEntry{ + Behavior: api.SessionBehaviorRelease, // Release the lock when the session expires + TTL: (ttl / 2).String(), // Consul multiplies the TTL by 2x + LockDelay: 1 * time.Millisecond, // Virtually disable lock delay + } + + // Create the key session + session, _, err := s.client.Session().Create(entry, nil) + if err != nil { + return nil, err + } + + // Place the session and renew chan on lock + lockOpts.Session = session + lock.renewCh = options.RenewLock + l, err := s.client.LockOpts(lockOpts) if err != nil { return nil, err } + // Renew the session ttl lock periodically + s.renewLockSession(entry.TTL, session, options.RenewLock) + lock.lock = l return lock, nil } +// renewLockSession is used to renew a session Lock, it takes +// a stopRenew chan which is used to explicitely stop the session +// renew process. The renew routine never stops until a signal is +// sent to this channel. If deleting the session fails because the +// connection to the store is lost, it keeps trying to delete the +// session periodically until it can contact the store, this ensures +// that the lock is not maintained indefinitely which ensures liveness +// over safety for the lock when the store becomes unavailable. +func (s *Consul) renewLockSession(initialTTL string, id string, stopRenew chan struct{}) { + sessionDestroyAttempts := 0 + ttl, err := time.ParseDuration(initialTTL) + if err != nil { + return + } + go func() { + for { + select { + case <-time.After(ttl / 2): + entry, _, err := s.client.Session().Renew(id, nil) + if err != nil { + // If an error occurs, continue until the + // session gets destroyed explicitely or + // the session ttl times out + continue + } + if entry == nil { + return + } + + // Handle the server updating the TTL + ttl, _ = time.ParseDuration(entry.TTL) + + case <-stopRenew: + // Attempt a session destroy + _, err := s.client.Session().Destroy(id, nil) + if err == nil { + return + } + + if sessionDestroyAttempts >= MaxSessionDestroyAttempts { + return + } + + // We can't destroy the session because the store + // is unavailable, wait for the session renew period + sessionDestroyAttempts++ + time.Sleep(ttl / 2) + } + } + }() +} + // Lock attempts to acquire the lock and blocks while // doing so. It returns a channel that is closed if our // lock is lost or if an error occurs @@ -436,7 +500,7 @@ func (l *consulLock) Unlock() error { // modified in the meantime, throws an error if this is the case func (s *Consul) AtomicPut(key string, value []byte, previous *store.KVPair, options *store.WriteOptions) (bool, *store.KVPair, error) { - p := &api.KVPair{Key: s.normalize(key), Value: value} + p := &api.KVPair{Key: s.normalize(key), Value: value, Flags: api.LockFlagValue} if previous == nil { // Consul interprets ModifyIndex = 0 as new key. @@ -445,9 +509,14 @@ func (s *Consul) AtomicPut(key string, value []byte, previous *store.KVPair, opt p.ModifyIndex = previous.LastIndex } - if work, _, err := s.client.KV().CAS(p, nil); err != nil { + ok, _, err := s.client.KV().CAS(p, nil) + if err != nil { return false, nil, err - } else if !work { + } + if !ok { + if previous == nil { + return false, nil, store.ErrKeyExists + } return false, nil, store.ErrKeyModified } @@ -466,7 +535,7 @@ func (s *Consul) AtomicDelete(key string, previous *store.KVPair) (bool, error) return false, store.ErrPreviousNotSpecified } - p := &api.KVPair{Key: s.normalize(key), ModifyIndex: previous.LastIndex} + p := &api.KVPair{Key: s.normalize(key), ModifyIndex: previous.LastIndex, Flags: api.LockFlagValue} // Extra Get operation to check on the key _, err := s.Get(key) diff --git a/vendor/src/github.com/docker/libkv/store/etcd/etcd.go b/vendor/src/github.com/docker/libkv/store/etcd/etcd.go index 312bb0b65a..c932ca665e 100644 --- a/vendor/src/github.com/docker/libkv/store/etcd/etcd.go +++ b/vendor/src/github.com/docker/libkv/store/etcd/etcd.go @@ -75,6 +75,9 @@ func New(addrs []string, options *store.Config) (store.Store, error) { if options.ConnectionTimeout != 0 { setTimeout(cfg, options.ConnectionTimeout) } + if options.Username != "" { + setCredentials(cfg, options.Username, options.Password) + } } c, err := etcd.New(*cfg) @@ -119,6 +122,12 @@ func setTimeout(cfg *etcd.Config, time time.Duration) { cfg.HeaderTimeoutPerRequest = time } +// setCredentials sets the username/password credentials for connecting to Etcd +func setCredentials(cfg *etcd.Config, username, password string) { + cfg.Username = username + cfg.Password = password +} + // Normalize the key for usage in Etcd func (s *Etcd) normalize(key string) string { key = store.Normalize(key) @@ -335,6 +344,10 @@ func (s *Etcd) AtomicPut(key string, value []byte, previous *store.KVPair, opts if etcdError.Code == etcd.ErrorCodeTestFailed { return false, nil, store.ErrKeyModified } + // Node exists error (when PrevNoExist) + if etcdError.Code == etcd.ErrorCodeNodeExist { + return false, nil, store.ErrKeyExists + } } return false, nil, err } @@ -508,15 +521,15 @@ func (l *etcdLock) Lock(stopChan chan struct{}) (<-chan struct{}, error) { // Wait for the key to be available or for // a signal to stop trying to lock the key select { - case _ = <-free: + case <-free: break case err := <-errorCh: return nil, err - case _ = <-stopChan: + case <-stopChan: return nil, ErrAbortTryLock } - // Delete or Expire event occured + // Delete or Expire event occurred // Retry } } diff --git a/vendor/src/github.com/docker/libkv/store/store.go b/vendor/src/github.com/docker/libkv/store/store.go index 0df01b6c83..7a4850c019 100644 --- a/vendor/src/github.com/docker/libkv/store/store.go +++ b/vendor/src/github.com/docker/libkv/store/store.go @@ -35,6 +35,8 @@ var ( ErrKeyNotFound = errors.New("Key not found in store") // ErrPreviousNotSpecified is thrown when the previous value is not specified for an atomic operation ErrPreviousNotSpecified = errors.New("Previous K/V pair should be provided for the Atomic operation") + // ErrKeyExists is thrown when the previous value exists in the case of an AtomicPut + ErrKeyExists = errors.New("Previous K/V pair exists, cannot complete Atomic operation") ) // Config contains the options for a storage client @@ -44,6 +46,8 @@ type Config struct { ConnectionTimeout time.Duration Bucket string PersistConnection bool + Username string + Password string } // ClientTLSConfig contains data for a Client TLS configuration in the form diff --git a/vendor/src/github.com/docker/libkv/store/zookeeper/zookeeper.go b/vendor/src/github.com/docker/libkv/store/zookeeper/zookeeper.go index 502b1c6e85..8a44ad318a 100644 --- a/vendor/src/github.com/docker/libkv/store/zookeeper/zookeeper.go +++ b/vendor/src/github.com/docker/libkv/store/zookeeper/zookeeper.go @@ -291,8 +291,8 @@ func (s *Zookeeper) DeleteTree(directory string) error { // AtomicPut put a value at "key" if the key has not been // modified in the meantime, throws an error if this is the case func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair, _ *store.WriteOptions) (bool, *store.KVPair, error) { - var lastIndex uint64 + if previous != nil { meta, err := s.client.Set(s.normalize(key), value, int32(previous.LastIndex)) if err != nil { @@ -307,8 +307,9 @@ func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair, // Interpret previous == nil as create operation. _, err := s.client.Create(s.normalize(key), value, 0, zk.WorldACL(zk.PermAll)) if err != nil { - // Zookeeper will complain if the directory doesn't exist. + // Directory does not exist if err == zk.ErrNoNode { + // Create the directory parts := store.SplitKey(strings.TrimSuffix(key, "/")) parts = parts[:len(parts)-1] @@ -316,11 +317,22 @@ func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair, // Failed to create the directory. return false, nil, err } + + // Create the node if _, err := s.client.Create(s.normalize(key), value, 0, zk.WorldACL(zk.PermAll)); err != nil { + // Node exist error (when previous nil) + if err == zk.ErrNodeExists { + return false, nil, store.ErrKeyExists + } return false, nil, err } } else { + // Node Exists error (when previous nil) + if err == zk.ErrNodeExists { + return false, nil, store.ErrKeyExists + } + // Unhandled error return false, nil, err } diff --git a/vendor/src/github.com/hashicorp/go-multierror/LICENSE b/vendor/src/github.com/hashicorp/go-multierror/LICENSE new file mode 100644 index 0000000000..82b4de97c7 --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/LICENSE @@ -0,0 +1,353 @@ +Mozilla Public License, version 2.0 + +1. Definitions + +1.1. “Contributor” + + means each individual or legal entity that creates, contributes to the + creation of, or owns Covered Software. + +1.2. “Contributor Version” + + means the combination of the Contributions of others (if any) used by a + Contributor and that particular Contributor’s Contribution. + +1.3. “Contribution” + + means Covered Software of a particular Contributor. + +1.4. “Covered Software” + + means Source Code Form to which the initial Contributor has attached the + notice in Exhibit A, the Executable Form of such Source Code Form, and + Modifications of such Source Code Form, in each case including portions + thereof. + +1.5. “Incompatible With Secondary Licenses” + means + + a. that the initial Contributor has attached the notice described in + Exhibit B to the Covered Software; or + + b. that the Covered Software was made available under the terms of version + 1.1 or earlier of the License, but not also under the terms of a + Secondary License. + +1.6. “Executable Form” + + means any form of the work other than Source Code Form. + +1.7. “Larger Work” + + means a work that combines Covered Software with other material, in a separate + file or files, that is not Covered Software. + +1.8. “License” + + means this document. + +1.9. “Licensable” + + means having the right to grant, to the maximum extent possible, whether at the + time of the initial grant or subsequently, any and all of the rights conveyed by + this License. + +1.10. “Modifications” + + means any of the following: + + a. any file in Source Code Form that results from an addition to, deletion + from, or modification of the contents of Covered Software; or + + b. any new file in Source Code Form that contains any Covered Software. + +1.11. “Patent Claims” of a Contributor + + means any patent claim(s), including without limitation, method, process, + and apparatus claims, in any patent Licensable by such Contributor that + would be infringed, but for the grant of the License, by the making, + using, selling, offering for sale, having made, import, or transfer of + either its Contributions or its Contributor Version. + +1.12. “Secondary License” + + means either the GNU General Public License, Version 2.0, the GNU Lesser + General Public License, Version 2.1, the GNU Affero General Public + License, Version 3.0, or any later versions of those licenses. + +1.13. “Source Code Form” + + means the form of the work preferred for making modifications. + +1.14. “You” (or “Your”) + + means an individual or a legal entity exercising rights under this + License. For legal entities, “You” includes any entity that controls, is + controlled by, or is under common control with You. For purposes of this + definition, “control” means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by contract or + otherwise, or (b) ownership of more than fifty percent (50%) of the + outstanding shares or beneficial ownership of such entity. + + +2. License Grants and Conditions + +2.1. Grants + + Each Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + a. under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or as + part of a Larger Work; and + + b. under Patent Claims of such Contributor to make, use, sell, offer for + sale, have made, import, and otherwise transfer either its Contributions + or its Contributor Version. + +2.2. Effective Date + + The licenses granted in Section 2.1 with respect to any Contribution become + effective for each Contribution on the date the Contributor first distributes + such Contribution. + +2.3. Limitations on Grant Scope + + The licenses granted in this Section 2 are the only rights granted under this + License. No additional rights or licenses will be implied from the distribution + or licensing of Covered Software under this License. Notwithstanding Section + 2.1(b) above, no patent license is granted by a Contributor: + + a. for any code that a Contributor has removed from Covered Software; or + + b. for infringements caused by: (i) Your and any other third party’s + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + + c. under Patent Claims infringed by Covered Software in the absence of its + Contributions. + + This License does not grant any rights in the trademarks, service marks, or + logos of any Contributor (except as may be necessary to comply with the + notice requirements in Section 3.4). + +2.4. Subsequent Licenses + + No Contributor makes additional grants as a result of Your choice to + distribute the Covered Software under a subsequent version of this License + (see Section 10.2) or under the terms of a Secondary License (if permitted + under the terms of Section 3.3). + +2.5. Representation + + Each Contributor represents that the Contributor believes its Contributions + are its original creation(s) or it has sufficient rights to grant the + rights to its Contributions conveyed by this License. + +2.6. Fair Use + + This License is not intended to limit any rights You have under applicable + copyright doctrines of fair use, fair dealing, or other equivalents. + +2.7. Conditions + + Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in + Section 2.1. + + +3. Responsibilities + +3.1. Distribution of Source Form + + All distribution of Covered Software in Source Code Form, including any + Modifications that You create or to which You contribute, must be under the + terms of this License. You must inform recipients that the Source Code Form + of the Covered Software is governed by the terms of this License, and how + they can obtain a copy of this License. You may not attempt to alter or + restrict the recipients’ rights in the Source Code Form. + +3.2. Distribution of Executable Form + + If You distribute Covered Software in Executable Form then: + + a. such Covered Software must also be made available in Source Code Form, + as described in Section 3.1, and You must inform recipients of the + Executable Form how they can obtain a copy of such Source Code Form by + reasonable means in a timely manner, at a charge no more than the cost + of distribution to the recipient; and + + b. You may distribute such Executable Form under the terms of this License, + or sublicense it under different terms, provided that the license for + the Executable Form does not attempt to limit or alter the recipients’ + rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + + You may create and distribute a Larger Work under terms of Your choice, + provided that You also comply with the requirements of this License for the + Covered Software. If the Larger Work is a combination of Covered Software + with a work governed by one or more Secondary Licenses, and the Covered + Software is not Incompatible With Secondary Licenses, this License permits + You to additionally distribute such Covered Software under the terms of + such Secondary License(s), so that the recipient of the Larger Work may, at + their option, further distribute the Covered Software under the terms of + either this License or such Secondary License(s). + +3.4. Notices + + You may not remove or alter the substance of any license notices (including + copyright notices, patent notices, disclaimers of warranty, or limitations + of liability) contained within the Source Code Form of the Covered + Software, except that You may alter any license notices to the extent + required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + + You may choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of Covered + Software. However, You may do so only on Your own behalf, and not on behalf + of any Contributor. You must make it absolutely clear that any such + warranty, support, indemnity, or liability obligation is offered by You + alone, and You hereby agree to indemnify every Contributor for any + liability incurred by such Contributor as a result of warranty, support, + indemnity or liability terms You offer. You may include additional + disclaimers of warranty and limitations of liability specific to any + jurisdiction. + +4. Inability to Comply Due to Statute or Regulation + + If it is impossible for You to comply with any of the terms of this License + with respect to some or all of the Covered Software due to statute, judicial + order, or regulation then You must: (a) comply with the terms of this License + to the maximum extent possible; and (b) describe the limitations and the code + they affect. Such description must be placed in a text file included with all + distributions of the Covered Software under this License. Except to the + extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + +5. Termination + +5.1. The rights granted under this License will terminate automatically if You + fail to comply with any of its terms. However, if You become compliant, + then the rights granted under this License from a particular Contributor + are reinstated (a) provisionally, unless and until such Contributor + explicitly and finally terminates Your grants, and (b) on an ongoing basis, + if such Contributor fails to notify You of the non-compliance by some + reasonable means prior to 60 days after You have come back into compliance. + Moreover, Your grants from a particular Contributor are reinstated on an + ongoing basis if such Contributor notifies You of the non-compliance by + some reasonable means, this is the first time You have received notice of + non-compliance with this License from such Contributor, and You become + compliant prior to 30 days after Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent + infringement claim (excluding declaratory judgment actions, counter-claims, + and cross-claims) alleging that a Contributor Version directly or + indirectly infringes any patent, then the rights granted to You by any and + all Contributors for the Covered Software under Section 2.1 of this License + shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user + license agreements (excluding distributors and resellers) which have been + validly granted by You or Your distributors under this License prior to + termination shall survive termination. + +6. Disclaimer of Warranty + + Covered Software is provided under this License on an “as is” basis, without + warranty of any kind, either expressed, implied, or statutory, including, + without limitation, warranties that the Covered Software is free of defects, + merchantable, fit for a particular purpose or non-infringing. The entire + risk as to the quality and performance of the Covered Software is with You. + Should any Covered Software prove defective in any respect, You (not any + Contributor) assume the cost of any necessary servicing, repair, or + correction. This disclaimer of warranty constitutes an essential part of this + License. No use of any Covered Software is authorized under this License + except under this disclaimer. + +7. Limitation of Liability + + Under no circumstances and under no legal theory, whether tort (including + negligence), contract, or otherwise, shall any Contributor, or anyone who + distributes Covered Software as permitted above, be liable to You for any + direct, indirect, special, incidental, or consequential damages of any + character including, without limitation, damages for lost profits, loss of + goodwill, work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses, even if such party shall have been + informed of the possibility of such damages. This limitation of liability + shall not apply to liability for death or personal injury resulting from such + party’s negligence to the extent applicable law prohibits such limitation. + Some jurisdictions do not allow the exclusion or limitation of incidental or + consequential damages, so this exclusion and limitation may not apply to You. + +8. Litigation + + Any litigation relating to this License may be brought only in the courts of + a jurisdiction where the defendant maintains its principal place of business + and such litigation shall be governed by laws of that jurisdiction, without + reference to its conflict-of-law provisions. Nothing in this Section shall + prevent a party’s ability to bring cross-claims or counter-claims. + +9. Miscellaneous + + This License represents the complete agreement concerning the subject matter + hereof. If any provision of this License is held to be unenforceable, such + provision shall be reformed only to the extent necessary to make it + enforceable. Any law or regulation which provides that the language of a + contract shall be construed against the drafter shall not be used to construe + this License against a Contributor. + + +10. Versions of the License + +10.1. New Versions + + Mozilla Foundation is the license steward. Except as provided in Section + 10.3, no one other than the license steward has the right to modify or + publish new versions of this License. Each version will be given a + distinguishing version number. + +10.2. Effect of New Versions + + You may distribute the Covered Software under the terms of the version of + the License under which You originally received the Covered Software, or + under the terms of any subsequent version published by the license + steward. + +10.3. Modified Versions + + If you create software not governed by this License, and you want to + create a new license for such software, you may create and use a modified + version of this License if you rename the license and remove any + references to the name of the license steward (except to note that such + modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses + If You choose to distribute Source Code Form that is Incompatible With + Secondary Licenses under the terms of this version of the License, the + notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice + + This Source Code Form is subject to the + terms of the Mozilla Public License, v. + 2.0. If a copy of the MPL was not + distributed with this file, You can + obtain one at + http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular file, then +You may include the notice in a location (such as a LICENSE file in a relevant +directory) where a recipient would be likely to look for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - “Incompatible With Secondary Licenses” Notice + + This Source Code Form is “Incompatible + With Secondary Licenses”, as defined by + the Mozilla Public License, v. 2.0. diff --git a/vendor/src/github.com/hashicorp/go-multierror/README.md b/vendor/src/github.com/hashicorp/go-multierror/README.md new file mode 100644 index 0000000000..e81be50e0d --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/README.md @@ -0,0 +1,91 @@ +# go-multierror + +`go-multierror` is a package for Go that provides a mechanism for +representing a list of `error` values as a single `error`. + +This allows a function in Go to return an `error` that might actually +be a list of errors. If the caller knows this, they can unwrap the +list and access the errors. If the caller doesn't know, the error +formats to a nice human-readable format. + +`go-multierror` implements the +[errwrap](https://github.com/hashicorp/errwrap) interface so that it can +be used with that library, as well. + +## Installation and Docs + +Install using `go get github.com/hashicorp/go-multierror`. + +Full documentation is available at +http://godoc.org/github.com/hashicorp/go-multierror + +## Usage + +go-multierror is easy to use and purposely built to be unobtrusive in +existing Go applications/libraries that may not be aware of it. + +**Building a list of errors** + +The `Append` function is used to create a list of errors. This function +behaves a lot like the Go built-in `append` function: it doesn't matter +if the first argument is nil, a `multierror.Error`, or any other `error`, +the function behaves as you would expect. + +```go +var result error + +if err := step1(); err != nil { + result = multierror.Append(result, err) +} +if err := step2(); err != nil { + result = multierror.Append(result, err) +} + +return result +``` + +**Customizing the formatting of the errors** + +By specifying a custom `ErrorFormat`, you can customize the format +of the `Error() string` function: + +```go +var result *multierror.Error + +// ... accumulate errors here, maybe using Append + +if result != nil { + result.ErrorFormat = func([]error) string { + return "errors!" + } +} +``` + +**Accessing the list of errors** + +`multierror.Error` implements `error` so if the caller doesn't know about +multierror, it will work just fine. But if you're aware a multierror might +be returned, you can use type switches to access the list of errors: + +```go +if err := something(); err != nil { + if merr, ok := err.(*multierror.Error); ok { + // Use merr.Errors + } +} +``` + +**Returning a multierror only if there are errors** + +If you build a `multierror.Error`, you can use the `ErrorOrNil` function +to return an `error` implementation only if there are errors to return: + +```go +var result *multierror.Error + +// ... accumulate errors here + +// Return the `error` only if errors were added to the multierror, otherwise +// return nil since there are no errors. +return result.ErrorOrNil() +``` diff --git a/vendor/src/github.com/hashicorp/go-multierror/append.go b/vendor/src/github.com/hashicorp/go-multierror/append.go new file mode 100644 index 0000000000..8d22ee7a0e --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/append.go @@ -0,0 +1,30 @@ +package multierror + +// Append is a helper function that will append more errors +// onto an Error in order to create a larger multi-error. +// +// If err is not a multierror.Error, then it will be turned into +// one. If any of the errs are multierr.Error, they will be flattened +// one level into err. +func Append(err error, errs ...error) *Error { + switch err := err.(type) { + case *Error: + // Typed nils can reach here, so initialize if we are nil + if err == nil { + err = new(Error) + } + + err.Errors = append(err.Errors, errs...) + return err + default: + newErrs := make([]error, 0, len(errs)+1) + if err != nil { + newErrs = append(newErrs, err) + } + newErrs = append(newErrs, errs...) + + return &Error{ + Errors: newErrs, + } + } +} diff --git a/vendor/src/github.com/hashicorp/go-multierror/format.go b/vendor/src/github.com/hashicorp/go-multierror/format.go new file mode 100644 index 0000000000..bb65a12e74 --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/format.go @@ -0,0 +1,23 @@ +package multierror + +import ( + "fmt" + "strings" +) + +// ErrorFormatFunc is a function callback that is called by Error to +// turn the list of errors into a string. +type ErrorFormatFunc func([]error) string + +// ListFormatFunc is a basic formatter that outputs the number of errors +// that occurred along with a bullet point list of the errors. +func ListFormatFunc(es []error) string { + points := make([]string, len(es)) + for i, err := range es { + points[i] = fmt.Sprintf("* %s", err) + } + + return fmt.Sprintf( + "%d error(s) occurred:\n\n%s", + len(es), strings.Join(points, "\n")) +} diff --git a/vendor/src/github.com/hashicorp/go-multierror/multierror.go b/vendor/src/github.com/hashicorp/go-multierror/multierror.go new file mode 100644 index 0000000000..2ea0827329 --- /dev/null +++ b/vendor/src/github.com/hashicorp/go-multierror/multierror.go @@ -0,0 +1,51 @@ +package multierror + +import ( + "fmt" +) + +// Error is an error type to track multiple errors. This is used to +// accumulate errors in cases and return them as a single "error". +type Error struct { + Errors []error + ErrorFormat ErrorFormatFunc +} + +func (e *Error) Error() string { + fn := e.ErrorFormat + if fn == nil { + fn = ListFormatFunc + } + + return fn(e.Errors) +} + +// ErrorOrNil returns an error interface if this Error represents +// a list of errors, or returns nil if the list of errors is empty. This +// function is useful at the end of accumulation to make sure that the value +// returned represents the existence of errors. +func (e *Error) ErrorOrNil() error { + if e == nil { + return nil + } + if len(e.Errors) == 0 { + return nil + } + + return e +} + +func (e *Error) GoString() string { + return fmt.Sprintf("*%#v", *e) +} + +// WrappedErrors returns the list of errors that this Error is wrapping. +// It is an implementatin of the errwrap.Wrapper interface so that +// multierror.Error can be used with that library. +// +// This method is not safe to be called concurrently and is no different +// than accessing the Errors field directly. It is implementd only to +// satisfy the errwrap.Wrapper interface. +func (e *Error) WrappedErrors() []error { + return e.Errors +} diff --git a/vendor/src/github.com/hashicorp/memberlist/README.md b/vendor/src/github.com/hashicorp/memberlist/README.md index d55befac6e..c8a125f2ca 100644 --- a/vendor/src/github.com/hashicorp/memberlist/README.md +++ b/vendor/src/github.com/hashicorp/memberlist/README.md @@ -1,4 +1,4 @@ -# memberlist +# memberlist [![GoDoc](https://godoc.org/github.com/hashicorp/memberlist?status.png)](https://godoc.org/github.com/hashicorp/memberlist) memberlist is a [Go](http://www.golang.org) library that manages cluster membership and member failure detection using a gossip based protocol. @@ -64,7 +64,7 @@ For complete documentation, see the associated [Godoc](http://godoc.org/github.c ## Protocol memberlist is based on ["SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol"](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf), -with a few minor adaptations, mostly to increase propogation speed and +with a few minor adaptations, mostly to increase propagation speed and convergence rate. A high level overview of the memberlist protocol (based on SWIM) is @@ -93,15 +93,22 @@ be disabled entirely. Failure detection is done by periodic random probing using a configurable interval. If the node fails to ack within a reasonable time (typically some multiple -of RTT), then an indirect probe is attempted. An indirect probe asks a -configurable number of random nodes to probe the same node, in case there -are network issues causing our own node to fail the probe. If both our -probe and the indirect probes fail within a reasonable time, then the -node is marked "suspicious" and this knowledge is gossiped to the cluster. -A suspicious node is still considered a member of cluster. If the suspect member -of the cluster does not disputes the suspicion within a configurable period of -time, the node is finally considered dead, and this state is then gossiped -to the cluster. +of RTT), then an indirect probe as well as a direct TCP probe are attempted. An +indirect probe asks a configurable number of random nodes to probe the same node, +in case there are network issues causing our own node to fail the probe. The direct +TCP probe is used to help identify the common situation where networking is +misconfigured to allow TCP but not UDP. Without the TCP probe, a UDP-isolated node +would think all other nodes were suspect and could cause churn in the cluster when +it attempts a TCP-based state exchange with another node. It is not desirable to +operate with only TCP connectivity because convergence will be much slower, but it +is enabled so that memberlist can detect this situation and alert operators. + +If both our probe, the indirect probes, and the direct TCP probe fail within a +configurable time, then the node is marked "suspicious" and this knowledge is +gossiped to the cluster. A suspicious node is still considered a member of +cluster. If the suspect member of the cluster does not dispute the suspicion +within a configurable period of time, the node is finally considered dead, +and this state is then gossiped to the cluster. This is a brief and incomplete description of the protocol. For a better idea, please read the @@ -111,7 +118,7 @@ in its entirety, along with the memberlist source code. ### Changes from SWIM As mentioned earlier, the memberlist protocol is based on SWIM but includes -minor changes, mostly to increase propogation speed and convergence rates. +minor changes, mostly to increase propagation speed and convergence rates. The changes from SWIM are noted here: @@ -127,7 +134,7 @@ The changes from SWIM are noted here: also will periodically send out dedicated gossip messages on their own. This feature lets you have a higher gossip rate (for example once per 200ms) and a slower failure detection rate (such as once per second), resulting - in overall faster convergence rates and data propogation speeds. This feature + in overall faster convergence rates and data propagation speeds. This feature can be totally disabed as well, if you wish. * memberlist stores around the state of dead nodes for a set amount of time, diff --git a/vendor/src/github.com/hashicorp/memberlist/alive_delegate.go b/vendor/src/github.com/hashicorp/memberlist/alive_delegate.go new file mode 100644 index 0000000000..51a0ba9054 --- /dev/null +++ b/vendor/src/github.com/hashicorp/memberlist/alive_delegate.go @@ -0,0 +1,14 @@ +package memberlist + +// AliveDelegate is used to involve a client in processing +// a node "alive" message. When a node joins, either through +// a UDP gossip or TCP push/pull, we update the state of +// that node via an alive message. This can be used to filter +// a node out and prevent it from being considered a peer +// using application specific logic. +type AliveDelegate interface { + // NotifyMerge is invoked when a merge could take place. + // Provides a list of the nodes known by the peer. If + // the return value is non-nil, the merge is canceled. + NotifyAlive(peer *Node) error +} diff --git a/vendor/src/github.com/hashicorp/memberlist/config.go b/vendor/src/github.com/hashicorp/memberlist/config.go index bd15047273..16bebacaaa 100644 --- a/vendor/src/github.com/hashicorp/memberlist/config.go +++ b/vendor/src/github.com/hashicorp/memberlist/config.go @@ -2,6 +2,7 @@ package memberlist import ( "io" + "log" "os" "time" ) @@ -85,6 +86,11 @@ type Config struct { ProbeInterval time.Duration ProbeTimeout time.Duration + // DisableTcpPings will turn off the fallback TCP pings that are attempted + // if the direct UDP ping fails. These get pipelined along with the + // indirect UDP pings. + DisableTcpPings bool + // GossipInterval and GossipNodes are used to configure the gossip // behavior of memberlist. // @@ -111,6 +117,8 @@ type Config struct { // the first key used while attempting to decrypt messages. Providing a // value for this primary key will enable message-level encryption and // verification, and automatically install the key onto the keyring. + // The value should be either 16, 24, or 32 bytes to select AES-128, + // AES-192, or AES-256. SecretKey []byte // The keyring holds all of the encryption keys used internally. It is @@ -132,16 +140,29 @@ type Config struct { Events EventDelegate Conflict ConflictDelegate Merge MergeDelegate + Ping PingDelegate + Alive AliveDelegate + + // DNSConfigPath points to the system's DNS config file, usually located + // at /etc/resolv.conf. It can be overridden via config for easier testing. + DNSConfigPath string // LogOutput is the writer where logs should be sent. If this is not - // set, logging will go to stderr by default. + // set, logging will go to stderr by default. You cannot specify both LogOutput + // and Logger at the same time. LogOutput io.Writer + + // Logger is a custom logger which you provide. If Logger is set, it will use + // this for the internal logger. If Logger is not set, it will fall back to the + // behavior for using LogOutput. You cannot specify both LogOutput and Logger + // at the same time. + Logger *log.Logger } // DefaultLANConfig returns a sane set of configurations for Memberlist. // It uses the hostname as the node name, and otherwise sets very conservative // values that are sane for most LAN environments. The default configuration -// errs on the side on the side of caution, choosing values that are optimized +// errs on the side of caution, choosing values that are optimized // for higher convergence at the cost of higher bandwidth usage. Regardless, // these values are a good starting point when getting started with memberlist. func DefaultLANConfig() *Config { @@ -152,7 +173,7 @@ func DefaultLANConfig() *Config { BindPort: 7946, AdvertiseAddr: "", AdvertisePort: 7946, - ProtocolVersion: ProtocolVersionMax, + ProtocolVersion: ProtocolVersion2Compatible, TCPTimeout: 10 * time.Second, // Timeout after 10 seconds IndirectChecks: 3, // Use 3 nodes for the indirect ping RetransmitMult: 4, // Retransmit a message 4 * log(N+1) nodes @@ -160,6 +181,7 @@ func DefaultLANConfig() *Config { PushPullInterval: 30 * time.Second, // Low frequency ProbeTimeout: 500 * time.Millisecond, // Reasonable RTT time for LAN ProbeInterval: 1 * time.Second, // Failure check every second + DisableTcpPings: false, // TCP pings are safe, even with mixed versions GossipNodes: 3, // Gossip to 3 nodes GossipInterval: 200 * time.Millisecond, // Gossip more rapidly @@ -167,8 +189,9 @@ func DefaultLANConfig() *Config { EnableCompression: true, // Enable compression by default SecretKey: nil, + Keyring: nil, - Keyring: nil, + DNSConfigPath: "/etc/resolv.conf", } } diff --git a/vendor/src/github.com/hashicorp/memberlist/delegate.go b/vendor/src/github.com/hashicorp/memberlist/delegate.go index b1204a41e8..66aa2da796 100644 --- a/vendor/src/github.com/hashicorp/memberlist/delegate.go +++ b/vendor/src/github.com/hashicorp/memberlist/delegate.go @@ -19,7 +19,8 @@ type Delegate interface { // It can return a list of buffers to send. Each buffer should assume an // overhead as provided with a limit on the total byte size allowed. // The total byte size of the resulting data to send must not exceed - // the limit. + // the limit. Care should be taken that this method does not block, + // since doing so would block the entire UDP packet receive loop. GetBroadcasts(overhead, limit int) [][]byte // LocalState is used for a TCP Push/Pull. This is sent to diff --git a/vendor/src/github.com/hashicorp/memberlist/keyring.go b/vendor/src/github.com/hashicorp/memberlist/keyring.go index ebcd2f2002..be2201d488 100644 --- a/vendor/src/github.com/hashicorp/memberlist/keyring.go +++ b/vendor/src/github.com/hashicorp/memberlist/keyring.go @@ -34,6 +34,9 @@ func (k *Keyring) init() { // keyring. If creating a keyring with multiple keys, one key must be designated // primary by passing it as the primaryKey. If the primaryKey does not exist in // the list of secondary keys, it will be automatically added at position 0. +// +// A key should be either 16, 24, or 32 bytes to select AES-128, +// AES-192, or AES-256. func NewKeyring(keys [][]byte, primaryKey []byte) (*Keyring, error) { keyring := &Keyring{} keyring.init() @@ -58,10 +61,12 @@ func NewKeyring(keys [][]byte, primaryKey []byte) (*Keyring, error) { // AddKey will install a new key on the ring. Adding a key to the ring will make // it available for use in decryption. If the key already exists on the ring, // this function will just return noop. +// +// key should be either 16, 24, or 32 bytes to select AES-128, +// AES-192, or AES-256. func (k *Keyring) AddKey(key []byte) error { - // Encorce 16-byte key size - if len(key) != 16 { - return fmt.Errorf("key size must be 16 bytes") + if l := len(key); l != 16 && l != 24 && l != 32 { + return fmt.Errorf("key size must be 16, 24 or 32 bytes") } // No-op if key is already installed diff --git a/vendor/src/github.com/hashicorp/memberlist/logging.go b/vendor/src/github.com/hashicorp/memberlist/logging.go new file mode 100644 index 0000000000..f31acfb2fa --- /dev/null +++ b/vendor/src/github.com/hashicorp/memberlist/logging.go @@ -0,0 +1,22 @@ +package memberlist + +import ( + "fmt" + "net" +) + +func LogAddress(addr net.Addr) string { + if addr == nil { + return "from=" + } + + return fmt.Sprintf("from=%s", addr.String()) +} + +func LogConn(conn net.Conn) string { + if conn == nil { + return LogAddress(nil) + } + + return LogAddress(conn.RemoteAddr()) +} diff --git a/vendor/src/github.com/hashicorp/memberlist/memberlist.go b/vendor/src/github.com/hashicorp/memberlist/memberlist.go index 01807cfe43..593fc17265 100644 --- a/vendor/src/github.com/hashicorp/memberlist/memberlist.go +++ b/vendor/src/github.com/hashicorp/memberlist/memberlist.go @@ -20,11 +20,19 @@ import ( "net" "os" "strconv" + "strings" "sync" "time" + + "github.com/hashicorp/go-multierror" + "github.com/miekg/dns" ) type Memberlist struct { + sequenceNum uint32 // Local sequence number + incarnation uint32 // Local incarnation number + numNodes uint32 // Number of known nodes (estimate) + config *Config shutdown bool shutdownCh chan struct{} @@ -35,9 +43,6 @@ type Memberlist struct { tcpListener *net.TCPListener handoff chan msgHandoff - sequenceNum uint32 // Local sequence number - incarnation uint32 // Local incarnation number - nodeLock sync.RWMutex nodes []*nodeState // Known nodes nodeMap map[string]*nodeState // Maps Addr.String() -> NodeState @@ -52,8 +57,6 @@ type Memberlist struct { broadcasts *TransmitLimitedQueue - startStopLock sync.Mutex - logger *log.Logger } @@ -90,6 +93,9 @@ func newMemberlist(conf *Config) (*Memberlist, error) { if err != nil { return nil, fmt.Errorf("Failed to start TCP listener. Err: %s", err) } + if conf.BindPort == 0 { + conf.BindPort = tcpLn.Addr().(*net.TCPAddr).Port + } udpAddr := &net.UDPAddr{IP: net.ParseIP(conf.BindAddr), Port: conf.BindPort} udpLn, err := net.ListenUDP("udp", udpAddr) @@ -101,10 +107,19 @@ func newMemberlist(conf *Config) (*Memberlist, error) { // Set the UDP receive window size setUDPRecvBuf(udpLn) - if conf.LogOutput == nil { - conf.LogOutput = os.Stderr + if conf.LogOutput != nil && conf.Logger != nil { + return nil, fmt.Errorf("Cannot specify both LogOutput and Logger. Please choose a single log configuration setting.") + } + + logDest := conf.LogOutput + if logDest == nil { + logDest = os.Stderr + } + + logger := conf.Logger + if logger == nil { + logger = log.New(logDest, "", log.LstdFlags) } - logger := log.New(conf.LogOutput, "", log.LstdFlags) m := &Memberlist{ config: conf, @@ -118,7 +133,9 @@ func newMemberlist(conf *Config) (*Memberlist, error) { broadcasts: &TransmitLimitedQueue{RetransmitMult: conf.RetransmitMult}, logger: logger, } - m.broadcasts.NumNodes = func() int { return len(m.nodes) } + m.broadcasts.NumNodes = func() int { + return m.estNumNodes() + } go m.tcpListen() go m.udpListen() go m.udpHandler() @@ -153,79 +170,158 @@ func Create(conf *Config) (*Memberlist, error) { // none could be reached. If an error is returned, the node did not successfully // join the cluster. func (m *Memberlist) Join(existing []string) (int, error) { - // Attempt to join any of them numSuccess := 0 - var retErr error + var errs error for _, exist := range existing { - addrs, port, err := m.resolveAddr(exist) + addrs, err := m.resolveAddr(exist) if err != nil { - m.logger.Printf("[WARN] memberlist: Failed to resolve %s: %v", exist, err) - retErr = err + err = fmt.Errorf("Failed to resolve %s: %v", exist, err) + errs = multierror.Append(errs, err) + m.logger.Printf("[WARN] memberlist: %v", err) continue } for _, addr := range addrs { - if err := m.pushPullNode(addr, port, true); err != nil { - retErr = err + if err := m.pushPullNode(addr.ip, addr.port, true); err != nil { + err = fmt.Errorf("Failed to join %s: %v", addr.ip, err) + errs = multierror.Append(errs, err) + m.logger.Printf("[DEBUG] memberlist: %v", err) continue } numSuccess++ } } - if numSuccess > 0 { - retErr = nil + errs = nil + } + return numSuccess, errs +} + +// ipPort holds information about a node we want to try to join. +type ipPort struct { + ip net.IP + port uint16 +} + +// tcpLookupIP is a helper to initiate a TCP-based DNS lookup for the given host. +// The built-in Go resolver will do a UDP lookup first, and will only use TCP if +// the response has the truncate bit set, which isn't common on DNS servers like +// Consul's. By doing the TCP lookup directly, we get the best chance for the +// largest list of hosts to join. Since joins are relatively rare events, it's ok +// to do this rather expensive operation. +func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16) ([]ipPort, error) { + // Don't attempt any TCP lookups against non-fully qualified domain + // names, since those will likely come from the resolv.conf file. + if !strings.Contains(host, ".") { + return nil, nil } - return numSuccess, retErr + // Make sure the domain name is terminated with a dot (we know there's + // at least one character at this point). + dn := host + if dn[len(dn)-1] != '.' { + dn = dn + "." + } + + // See if we can find a server to try. + cc, err := dns.ClientConfigFromFile(m.config.DNSConfigPath) + if err != nil { + return nil, err + } + if len(cc.Servers) > 0 { + // We support host:port in the DNS config, but need to add the + // default port if one is not supplied. + server := cc.Servers[0] + if !hasPort(server) { + server = net.JoinHostPort(server, cc.Port) + } + + // Do the lookup. + c := new(dns.Client) + c.Net = "tcp" + msg := new(dns.Msg) + msg.SetQuestion(dn, dns.TypeANY) + in, _, err := c.Exchange(msg, server) + if err != nil { + return nil, err + } + + // Handle any IPs we get back that we can attempt to join. + var ips []ipPort + for _, r := range in.Answer { + switch rr := r.(type) { + case (*dns.A): + ips = append(ips, ipPort{rr.A, defaultPort}) + case (*dns.AAAA): + ips = append(ips, ipPort{rr.AAAA, defaultPort}) + case (*dns.CNAME): + m.logger.Printf("[DEBUG] memberlist: Ignoring CNAME RR in TCP-first answer for '%s'", host) + } + } + return ips, nil + } + + return nil, nil } // resolveAddr is used to resolve the address into an address, // port, and error. If no port is given, use the default -func (m *Memberlist) resolveAddr(hostStr string) ([][]byte, uint16, error) { - ips := make([][]byte, 0) +func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) { + // Normalize the incoming string to host:port so we can apply Go's + // parser to it. port := uint16(0) + if !hasPort(hostStr) { + hostStr += ":" + strconv.Itoa(m.config.BindPort) + } host, sport, err := net.SplitHostPort(hostStr) - if ae, ok := err.(*net.AddrError); ok && ae.Err == "missing port in address" { - // error, port missing - we can solve this - port = uint16(m.config.BindPort) - host = hostStr - } else if err != nil { - // error, but not missing port - return ips, port, err - } else if lport, err := strconv.ParseUint(sport, 10, 16); err != nil { - // error, when parsing port - return ips, port, err - } else { - // no error - port = uint16(lport) + if err != nil { + return nil, err } - // Get the addresses that hostPort might resolve to - // ResolveTcpAddr requres ipv6 brackets to separate - // port numbers whereas ParseIP doesn't, but luckily - // SplitHostPort takes care of the brackets - if ip := net.ParseIP(host); ip == nil { - if pre, err := net.LookupIP(host); err == nil { - for _, ip := range pre { - ips = append(ips, ip) - } - } else { - return ips, port, err - } - } else { - ips = append(ips, ip) + // This will capture the supplied port, or the default one added above. + lport, err := strconv.ParseUint(sport, 10, 16) + if err != nil { + return nil, err + } + port = uint16(lport) + + // If it looks like an IP address we are done. The SplitHostPort() above + // will make sure the host part is in good shape for parsing, even for + // IPv6 addresses. + if ip := net.ParseIP(host); ip != nil { + return []ipPort{ipPort{ip, port}}, nil } - return ips, port, nil + // First try TCP so we have the best chance for the largest list of + // hosts to join. If this fails it's not fatal since this isn't a standard + // way to query DNS, and we have a fallback below. + ips, err := m.tcpLookupIP(host, port) + if err != nil { + m.logger.Printf("[DEBUG] memberlist: TCP-first lookup failed for '%s', falling back to UDP: %s", hostStr, err) + } + if len(ips) > 0 { + return ips, nil + } + + // If TCP didn't yield anything then use the normal Go resolver which + // will try UDP, then might possibly try TCP again if the UDP response + // indicates it was truncated. + ans, err := net.LookupIP(host) + if err != nil { + return nil, err + } + ips = make([]ipPort, 0, len(ans)) + for _, ip := range ans { + ips = append(ips, ipPort{ip, port}) + } + return ips, nil } // setAlive is used to mark this node as being alive. This is the same // as if we received an alive notification our own network channel for // ourself. func (m *Memberlist) setAlive() error { - var advertiseAddr []byte var advertisePort int if m.config.AdvertiseAddr != "" { @@ -268,7 +364,7 @@ func (m *Memberlist) setAlive() error { if ip.To4() == nil { continue } - if !isPrivateIP(ip.String()) { + if !IsPrivateIP(ip.String()) { continue } @@ -286,12 +382,14 @@ func (m *Memberlist) setAlive() error { addr := m.tcpListener.Addr().(*net.TCPAddr) advertiseAddr = addr.IP } - advertisePort = m.config.BindPort + + // Use the port we are bound to. + advertisePort = m.tcpListener.Addr().(*net.TCPAddr).Port } // Check if this is a public address without encryption addrStr := net.IP(advertiseAddr).String() - if !isPrivateIP(addrStr) && !isLoopbackIP(addrStr) && !m.config.EncryptionEnabled() { + if !IsPrivateIP(addrStr) && !isLoopbackIP(addrStr) && !m.config.EncryptionEnabled() { m.logger.Printf("[WARN] memberlist: Binding to public address without encryption!") } @@ -385,7 +483,8 @@ func (m *Memberlist) UpdateNode(timeout time.Duration) error { // user-data message, which a delegate will receive through NotifyMsg // The actual data is transmitted over UDP, which means this is a // best-effort transmission mechanism, and the maximum size of the -// message is the size of a single UDP datagram, after compression +// message is the size of a single UDP datagram, after compression. +// This method is DEPRECATED in favor or SendToUDP func (m *Memberlist) SendTo(to net.Addr, msg []byte) error { // Encode as a user message buf := make([]byte, 1, len(msg)+1) @@ -393,7 +492,36 @@ func (m *Memberlist) SendTo(to net.Addr, msg []byte) error { buf = append(buf, msg...) // Send the message - return m.rawSendMsg(to, buf) + return m.rawSendMsgUDP(to, buf) +} + +// SendToUDP is used to directly send a message to another node, without +// the use of the gossip mechanism. This will encode the message as a +// user-data message, which a delegate will receive through NotifyMsg +// The actual data is transmitted over UDP, which means this is a +// best-effort transmission mechanism, and the maximum size of the +// message is the size of a single UDP datagram, after compression +func (m *Memberlist) SendToUDP(to *Node, msg []byte) error { + // Encode as a user message + buf := make([]byte, 1, len(msg)+1) + buf[0] = byte(userMsg) + buf = append(buf, msg...) + + // Send the message + destAddr := &net.UDPAddr{IP: to.Addr, Port: int(to.Port)} + return m.rawSendMsgUDP(destAddr, buf) +} + +// SendToTCP is used to directly send a message to another node, without +// the use of the gossip mechanism. This will encode the message as a +// user-data message, which a delegate will receive through NotifyMsg +// The actual data is transmitted over TCP, which means delivery +// is guaranteed if no error is returned. There is no limit +// to the size of the message +func (m *Memberlist) SendToTCP(to *Node, msg []byte) error { + // Send the message + destAddr := &net.TCPAddr{IP: to.Addr, Port: int(to.Port)} + return m.sendTCPUserMsg(destAddr, msg) } // Members returns a list of all known live nodes. The node structures @@ -441,10 +569,12 @@ func (m *Memberlist) NumMembers() (alive int) { // This method is safe to call multiple times, but must not be called // after the cluster is already shut down. func (m *Memberlist) Leave(timeout time.Duration) error { - m.startStopLock.Lock() - defer m.startStopLock.Unlock() + m.nodeLock.Lock() + // We can't defer m.nodeLock.Unlock() because m.deadNode will also try to + // acquire a lock so we need to Unlock before that. if m.shutdown { + m.nodeLock.Unlock() panic("leave after shutdown") } @@ -452,6 +582,7 @@ func (m *Memberlist) Leave(timeout time.Duration) error { m.leave = true state, ok := m.nodeMap[m.config.Name] + m.nodeLock.Unlock() if !ok { m.logger.Printf("[WARN] memberlist: Leave but we're not in the node map.") return nil @@ -475,6 +606,8 @@ func (m *Memberlist) Leave(timeout time.Duration) error { return fmt.Errorf("timeout waiting for leave broadcast") } } + } else { + m.nodeLock.Unlock() } return nil @@ -509,8 +642,8 @@ func (m *Memberlist) ProtocolVersion() uint8 { // // This method is safe to call multiple times. func (m *Memberlist) Shutdown() error { - m.startStopLock.Lock() - defer m.startStopLock.Unlock() + m.nodeLock.Lock() + defer m.nodeLock.Unlock() if m.shutdown { return nil diff --git a/vendor/src/github.com/hashicorp/memberlist/merge_delegate.go b/vendor/src/github.com/hashicorp/memberlist/merge_delegate.go index dc27250021..89afb59f20 100644 --- a/vendor/src/github.com/hashicorp/memberlist/merge_delegate.go +++ b/vendor/src/github.com/hashicorp/memberlist/merge_delegate.go @@ -8,6 +8,7 @@ package memberlist // as part of the push-pull anti-entropy. type MergeDelegate interface { // NotifyMerge is invoked when a merge could take place. - // Provides a list of the nodes known by the peer. - NotifyMerge(peers []*Node) (cancel bool) + // Provides a list of the nodes known by the peer. If + // the return value is non-nil, the merge is canceled. + NotifyMerge(peers []*Node) error } diff --git a/vendor/src/github.com/hashicorp/memberlist/net.go b/vendor/src/github.com/hashicorp/memberlist/net.go index 4e8319c6e8..b92dccb101 100644 --- a/vendor/src/github.com/hashicorp/memberlist/net.go +++ b/vendor/src/github.com/hashicorp/memberlist/net.go @@ -18,7 +18,15 @@ import ( // range. This range is inclusive. const ( ProtocolVersionMin uint8 = 1 - ProtocolVersionMax = 2 + + // Version 3 added support for TCP pings but we kept the default + // protocol version at 2 to ease transition to this new feature. + // A memberlist speaking version 2 of the protocol will attempt + // to TCP ping another memberlist who understands version 3 or + // greater. + ProtocolVersion2Compatible = 2 + + ProtocolVersionMax = 3 ) // messageType is an integer ID of a type of message that can be received @@ -79,7 +87,8 @@ type indirectPingReq struct { // ack response is sent for a ping type ackResp struct { - SeqNo uint32 + SeqNo uint32 + Payload []byte } // suspect is broadcast when we suspect a node is dead @@ -119,6 +128,11 @@ type pushPullHeader struct { Join bool // Is this a join request or a anti-entropy run } +// userMsgHeader is used to encapsulate a userMsg +type userMsgHeader struct { + UserMsgLen int // Encodes the byte lengh of user state +} + // pushNodeState is used for pushPullReq when we are // transfering out node states type pushNodeState struct { @@ -185,54 +199,65 @@ func (m *Memberlist) tcpListen() { // handleConn handles a single incoming TCP connection func (m *Memberlist) handleConn(conn *net.TCPConn) { - m.logger.Printf("[DEBUG] memberlist: Responding to push/pull sync with: %s", conn.RemoteAddr()) + m.logger.Printf("[DEBUG] memberlist: TCP connection %s", LogConn(conn)) + defer conn.Close() metrics.IncrCounter([]string{"memberlist", "tcp", "accept"}, 1) - join, remoteNodes, userState, err := m.readRemoteState(conn) + conn.SetDeadline(time.Now().Add(m.config.TCPTimeout)) + msgType, bufConn, dec, err := m.readTCP(conn) if err != nil { - m.logger.Printf("[ERR] memberlist: Failed to receive remote state: %s", err) + m.logger.Printf("[ERR] memberlist: failed to receive: %s %s", err, LogConn(conn)) return } - if err := m.sendLocalState(conn, join); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to push local state: %s", err) - } - - if err := m.verifyProtocol(remoteNodes); err != nil { - m.logger.Printf("[ERR] memberlist: Push/pull verification failed: %s", err) - return - } - - // Invoke the merge delegate if any - if join && m.config.Merge != nil { - nodes := make([]*Node, len(remoteNodes)) - for idx, n := range remoteNodes { - nodes[idx] = &Node{ - Name: n.Name, - Addr: n.Addr, - Port: n.Port, - Meta: n.Meta, - PMin: n.Vsn[0], - PMax: n.Vsn[1], - PCur: n.Vsn[2], - DMin: n.Vsn[3], - DMax: n.Vsn[4], - DCur: n.Vsn[5], - } + switch msgType { + case userMsg: + if err := m.readUserMsg(bufConn, dec); err != nil { + m.logger.Printf("[ERR] memberlist: Failed to receive user message: %s %s", err, LogConn(conn)) } - if m.config.Merge.NotifyMerge(nodes) { - m.logger.Printf("[WARN] memberlist: Cluster merge canceled") + case pushPullMsg: + join, remoteNodes, userState, err := m.readRemoteState(bufConn, dec) + if err != nil { + m.logger.Printf("[ERR] memberlist: Failed to read remote state: %s %s", err, LogConn(conn)) return } - } - // Merge the membership state - m.mergeState(remoteNodes) + if err := m.sendLocalState(conn, join); err != nil { + m.logger.Printf("[ERR] memberlist: Failed to push local state: %s %s", err, LogConn(conn)) + return + } - // Invoke the delegate for user state - if m.config.Delegate != nil { - m.config.Delegate.MergeRemoteState(userState, join) + if err := m.mergeRemoteState(join, remoteNodes, userState); err != nil { + m.logger.Printf("[ERR] memberlist: Failed push/pull merge: %s %s", err, LogConn(conn)) + return + } + case pingMsg: + var p ping + if err := dec.Decode(&p); err != nil { + m.logger.Printf("[ERR] memberlist: Failed to decode TCP ping: %s %s", err, LogConn(conn)) + return + } + + if p.Node != "" && p.Node != m.config.Name { + m.logger.Printf("[WARN] memberlist: Got ping for unexpected node %s %s", p.Node, LogConn(conn)) + return + } + + ack := ackResp{p.SeqNo, nil} + out, err := encode(ackRespMsg, &ack) + if err != nil { + m.logger.Printf("[ERR] memberlist: Failed to encode TCP ack: %s", err) + return + } + + err = m.rawSendMsgTCP(conn, out.Bytes()) + if err != nil { + m.logger.Printf("[ERR] memberlist: Failed to send TCP ack: %s %s", err, LogConn(conn)) + return + } + default: + m.logger.Printf("[ERR] memberlist: Received invalid msgType (%d) %s", msgType, LogConn(conn)) } } @@ -265,29 +290,30 @@ func (m *Memberlist) udpListen() { continue } + // Capture the reception time of the packet as close to the + // system calls as possible. + lastPacket = time.Now() + // Check the length if n < 1 { - m.logger.Printf("[ERR] memberlist: UDP packet too short (%d bytes). From: %s", - len(buf), addr) + m.logger.Printf("[ERR] memberlist: UDP packet too short (%d bytes) %s", + len(buf), LogAddress(addr)) continue } - // Capture the current time - lastPacket = time.Now() - // Ingest this packet metrics.IncrCounter([]string{"memberlist", "udp", "received"}, float32(n)) - m.ingestPacket(buf[:n], addr) + m.ingestPacket(buf[:n], addr, lastPacket) } } -func (m *Memberlist) ingestPacket(buf []byte, from net.Addr) { +func (m *Memberlist) ingestPacket(buf []byte, from net.Addr, timestamp time.Time) { // Check if encryption is enabled if m.config.EncryptionEnabled() { // Decrypt the payload plain, err := decryptPayload(m.config.Keyring.GetKeys(), buf, nil) if err != nil { - m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v", err) + m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v %s", err, LogAddress(from)) return } @@ -296,10 +322,10 @@ func (m *Memberlist) ingestPacket(buf []byte, from net.Addr) { } // Handle the command - m.handleCommand(buf, from) + m.handleCommand(buf, from, timestamp) } -func (m *Memberlist) handleCommand(buf []byte, from net.Addr) { +func (m *Memberlist) handleCommand(buf []byte, from net.Addr, timestamp time.Time) { // Decode the message type msgType := messageType(buf[0]) buf = buf[1:] @@ -307,16 +333,16 @@ func (m *Memberlist) handleCommand(buf []byte, from net.Addr) { // Switch on the msgType switch msgType { case compoundMsg: - m.handleCompound(buf, from) + m.handleCompound(buf, from, timestamp) case compressMsg: - m.handleCompressed(buf, from) + m.handleCompressed(buf, from, timestamp) case pingMsg: m.handlePing(buf, from) case indirectPingMsg: m.handleIndirectPing(buf, from) case ackRespMsg: - m.handleAck(buf, from) + m.handleAck(buf, from, timestamp) case suspectMsg: fallthrough @@ -328,11 +354,11 @@ func (m *Memberlist) handleCommand(buf []byte, from net.Addr) { select { case m.handoff <- msgHandoff{msgType, buf, from}: default: - m.logger.Printf("[WARN] memberlist: UDP handler queue full, dropping message (%d)", msgType) + m.logger.Printf("[WARN] memberlist: UDP handler queue full, dropping message (%d) %s", msgType, LogAddress(from)) } default: - m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported. From: %s", msgType, from) + m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported %s", msgType, LogAddress(from)) } } @@ -357,7 +383,7 @@ func (m *Memberlist) udpHandler() { case userMsg: m.handleUser(buf, from) default: - m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported. From: %s (handler)", msgType, from) + m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported %s (handler)", msgType, LogAddress(from)) } case <-m.shutdownCh: @@ -366,46 +392,50 @@ func (m *Memberlist) udpHandler() { } } -func (m *Memberlist) handleCompound(buf []byte, from net.Addr) { +func (m *Memberlist) handleCompound(buf []byte, from net.Addr, timestamp time.Time) { // Decode the parts trunc, parts, err := decodeCompoundMessage(buf) if err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode compound request: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode compound request: %s %s", err, LogAddress(from)) return } // Log any truncation if trunc > 0 { - m.logger.Printf("[WARN] memberlist: Compound request had %d truncated messages", trunc) + m.logger.Printf("[WARN] memberlist: Compound request had %d truncated messages %s", trunc, LogAddress(from)) } // Handle each message for _, part := range parts { - m.handleCommand(part, from) + m.handleCommand(part, from, timestamp) } } func (m *Memberlist) handlePing(buf []byte, from net.Addr) { var p ping if err := decode(buf, &p); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode ping request: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode ping request: %s %s", err, LogAddress(from)) return } // If node is provided, verify that it is for us if p.Node != "" && p.Node != m.config.Name { - m.logger.Printf("[WARN] memberlist: Got ping for unexpected node '%s'", p.Node) + m.logger.Printf("[WARN] memberlist: Got ping for unexpected node '%s' %s", p.Node, LogAddress(from)) return } - ack := ackResp{p.SeqNo} + var ack ackResp + ack.SeqNo = p.SeqNo + if m.config.Ping != nil { + ack.Payload = m.config.Ping.AckPayload() + } if err := m.encodeAndSendMsg(from, ackRespMsg, &ack); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to send ack: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to send ack: %s %s", err, LogAddress(from)) } } func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) { var ind indirectPingReq if err := decode(buf, &ind); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode indirect ping request: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode indirect ping request: %s %s", err, LogAddress(from)) return } @@ -421,33 +451,33 @@ func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) { destAddr := &net.UDPAddr{IP: ind.Target, Port: int(ind.Port)} // Setup a response handler to relay the ack - respHandler := func() { - ack := ackResp{ind.SeqNo} + respHandler := func(payload []byte, timestamp time.Time) { + ack := ackResp{ind.SeqNo, nil} if err := m.encodeAndSendMsg(from, ackRespMsg, &ack); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to forward ack: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to forward ack: %s %s", err, LogAddress(from)) } } m.setAckHandler(localSeqNo, respHandler, m.config.ProbeTimeout) // Send the ping if err := m.encodeAndSendMsg(destAddr, pingMsg, &ping); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to send ping: %s %s", err, LogAddress(from)) } } -func (m *Memberlist) handleAck(buf []byte, from net.Addr) { +func (m *Memberlist) handleAck(buf []byte, from net.Addr, timestamp time.Time) { var ack ackResp if err := decode(buf, &ack); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode ack response: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode ack response: %s %s", err, LogAddress(from)) return } - m.invokeAckHandler(ack.SeqNo) + m.invokeAckHandler(ack, timestamp) } func (m *Memberlist) handleSuspect(buf []byte, from net.Addr) { var sus suspect if err := decode(buf, &sus); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode suspect message: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode suspect message: %s %s", err, LogAddress(from)) return } m.suspectNode(&sus) @@ -456,7 +486,7 @@ func (m *Memberlist) handleSuspect(buf []byte, from net.Addr) { func (m *Memberlist) handleAlive(buf []byte, from net.Addr) { var live alive if err := decode(buf, &live); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode alive message: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode alive message: %s %s", err, LogAddress(from)) return } @@ -472,7 +502,7 @@ func (m *Memberlist) handleAlive(buf []byte, from net.Addr) { func (m *Memberlist) handleDead(buf []byte, from net.Addr) { var d dead if err := decode(buf, &d); err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decode dead message: %s", err) + m.logger.Printf("[ERR] memberlist: Failed to decode dead message: %s %s", err, LogAddress(from)) return } m.deadNode(&d) @@ -487,16 +517,16 @@ func (m *Memberlist) handleUser(buf []byte, from net.Addr) { } // handleCompressed is used to unpack a compressed message -func (m *Memberlist) handleCompressed(buf []byte, from net.Addr) { +func (m *Memberlist) handleCompressed(buf []byte, from net.Addr, timestamp time.Time) { // Try to decode the payload payload, err := decompressPayload(buf) if err != nil { - m.logger.Printf("[ERR] memberlist: Failed to decompress payload: %v", err) + m.logger.Printf("[ERR] memberlist: Failed to decompress payload: %v %s", err, LogAddress(from)) return } // Recursively handle the payload - m.handleCommand(payload, from) + m.handleCommand(payload, from, timestamp) } // encodeAndSendMsg is used to combine the encoding and sending steps @@ -523,7 +553,7 @@ func (m *Memberlist) sendMsg(to net.Addr, msg []byte) error { // Fast path if nothing to piggypack if len(extra) == 0 { - return m.rawSendMsg(to, msg) + return m.rawSendMsgUDP(to, msg) } // Join all the messages @@ -535,11 +565,11 @@ func (m *Memberlist) sendMsg(to net.Addr, msg []byte) error { compound := makeCompoundMessage(msgs) // Send the message - return m.rawSendMsg(to, compound.Bytes()) + return m.rawSendMsgUDP(to, compound.Bytes()) } -// rawSendMsg is used to send a UDP message to another host without modification -func (m *Memberlist) rawSendMsg(to net.Addr, msg []byte) error { +// rawSendMsgUDP is used to send a UDP message to another host without modification +func (m *Memberlist) rawSendMsgUDP(to net.Addr, msg []byte) error { // Check if we have compression enabled if m.config.EnableCompression { buf, err := compressPayload(msg) @@ -571,7 +601,72 @@ func (m *Memberlist) rawSendMsg(to net.Addr, msg []byte) error { return err } -// sendState is used to initiate a push/pull over TCP with a remote node +// rawSendMsgTCP is used to send a TCP message to another host without modification +func (m *Memberlist) rawSendMsgTCP(conn net.Conn, sendBuf []byte) error { + // Check if compresion is enabled + if m.config.EnableCompression { + compBuf, err := compressPayload(sendBuf) + if err != nil { + m.logger.Printf("[ERROR] memberlist: Failed to compress payload: %v", err) + } else { + sendBuf = compBuf.Bytes() + } + } + + // Check if encryption is enabled + if m.config.EncryptionEnabled() { + crypt, err := m.encryptLocalState(sendBuf) + if err != nil { + m.logger.Printf("[ERROR] memberlist: Failed to encrypt local state: %v", err) + return err + } + sendBuf = crypt + } + + // Write out the entire send buffer + metrics.IncrCounter([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf))) + + if n, err := conn.Write(sendBuf); err != nil { + return err + } else if n != len(sendBuf) { + return fmt.Errorf("only %d of %d bytes written", n, len(sendBuf)) + } + + return nil +} + +// sendTCPUserMsg is used to send a TCP userMsg to another host +func (m *Memberlist) sendTCPUserMsg(to net.Addr, sendBuf []byte) error { + dialer := net.Dialer{Timeout: m.config.TCPTimeout} + conn, err := dialer.Dial("tcp", to.String()) + if err != nil { + return err + } + defer conn.Close() + + bufConn := bytes.NewBuffer(nil) + + if err := bufConn.WriteByte(byte(userMsg)); err != nil { + return err + } + + // Send our node state + header := userMsgHeader{UserMsgLen: len(sendBuf)} + hd := codec.MsgpackHandle{} + enc := codec.NewEncoder(bufConn, &hd) + + if err := enc.Encode(&header); err != nil { + return err + } + + if _, err := bufConn.Write(sendBuf); err != nil { + return err + } + + return m.rawSendMsgTCP(conn, bufConn.Bytes()) +} + +// sendAndReceiveState is used to initiate a push/pull over TCP with a remote node func (m *Memberlist) sendAndReceiveState(addr []byte, port uint16, join bool) ([]pushNodeState, []byte, error) { // Attempt to connect dialer := net.Dialer{Timeout: m.config.TCPTimeout} @@ -589,15 +684,21 @@ func (m *Memberlist) sendAndReceiveState(addr []byte, port uint16, join bool) ([ return nil, nil, err } - // Read remote state - _, remote, userState, err := m.readRemoteState(conn) + conn.SetDeadline(time.Now().Add(m.config.TCPTimeout)) + msgType, bufConn, dec, err := m.readTCP(conn) if err != nil { - err := fmt.Errorf("Reading remote state failed: %v", err) return nil, nil, err } - // Return the remote state - return remote, userState, nil + // Quit if not push/pull + if msgType != pushPullMsg { + err := fmt.Errorf("received invalid msgType (%d), expected pushPullMsg (%d) %s", msgType, pushPullMsg, LogConn(conn)) + return nil, nil, err + } + + // Read remote state + _, remoteNodes, userState, err := m.readRemoteState(bufConn, dec) + return remoteNodes, userState, err } // sendLocalState is invoked to send our local state over a tcp connection @@ -658,34 +759,7 @@ func (m *Memberlist) sendLocalState(conn net.Conn, join bool) error { } // Get the send buffer - sendBuf := bufConn.Bytes() - - // Check if compresion is enabled - if m.config.EnableCompression { - compBuf, err := compressPayload(bufConn.Bytes()) - if err != nil { - m.logger.Printf("[ERROR] memberlist: Failed to compress local state: %v", err) - } else { - sendBuf = compBuf.Bytes() - } - } - - // Check if encryption is enabled - if m.config.EncryptionEnabled() { - crypt, err := m.encryptLocalState(sendBuf) - if err != nil { - m.logger.Printf("[ERROR] memberlist: Failed to encrypt local state: %v", err) - return err - } - sendBuf = crypt - } - - // Write out the entire send buffer - metrics.IncrCounter([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf))) - if _, err := conn.Write(sendBuf); err != nil { - return err - } - return nil + return m.rawSendMsgTCP(conn, bufConn.Bytes()) } // encryptLocalState is used to help encrypt local state before sending @@ -743,38 +817,36 @@ func (m *Memberlist) decryptRemoteState(bufConn io.Reader) ([]byte, error) { return decryptPayload(keys, cipherBytes, dataBytes) } -// recvRemoteState is used to read the remote state from a connection -func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []byte, error) { - // Setup a deadline - conn.SetDeadline(time.Now().Add(m.config.TCPTimeout)) - +// readTCP is used to read the start of a TCP stream. +// it decrypts and decompresses the stream if necessary +func (m *Memberlist) readTCP(conn net.Conn) (messageType, io.Reader, *codec.Decoder, error) { // Created a buffered reader var bufConn io.Reader = bufio.NewReader(conn) // Read the message type buf := [1]byte{0} if _, err := bufConn.Read(buf[:]); err != nil { - return false, nil, nil, err + return 0, nil, nil, err } msgType := messageType(buf[0]) // Check if the message is encrypted if msgType == encryptMsg { if !m.config.EncryptionEnabled() { - return false, nil, nil, + return 0, nil, nil, fmt.Errorf("Remote state is encrypted and encryption is not configured") } plain, err := m.decryptRemoteState(bufConn) if err != nil { - return false, nil, nil, err + return 0, nil, nil, err } // Reset message type and bufConn msgType = messageType(plain[0]) bufConn = bytes.NewReader(plain[1:]) } else if m.config.EncryptionEnabled() { - return false, nil, nil, + return 0, nil, nil, fmt.Errorf("Encryption is configured but remote state is not encrypted") } @@ -786,11 +858,11 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by if msgType == compressMsg { var c compress if err := dec.Decode(&c); err != nil { - return false, nil, nil, err + return 0, nil, nil, err } decomp, err := decompressBuffer(&c) if err != nil { - return false, nil, nil, err + return 0, nil, nil, err } // Reset the message type @@ -803,12 +875,11 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by dec = codec.NewDecoder(bufConn, &hd) } - // Quit if not push/pull - if msgType != pushPullMsg { - err := fmt.Errorf("received invalid msgType (%d)", msgType) - return false, nil, nil, err - } + return msgType, bufConn, dec, nil +} +// readRemoteState is used to read the remote state from a connection +func (m *Memberlist) readRemoteState(bufConn io.Reader, dec *codec.Decoder) (bool, []pushNodeState, []byte, error) { // Read the push/pull header var header pushPullHeader if err := dec.Decode(&header); err != nil { @@ -821,7 +892,7 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by // Try to decode all the states for i := 0; i < header.Nodes; i++ { if err := dec.Decode(&remoteNodes[i]); err != nil { - return false, remoteNodes, nil, err + return false, nil, nil, err } } @@ -836,7 +907,7 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by bytes, header.UserStateLen) } if err != nil { - return false, remoteNodes, nil, err + return false, nil, nil, err } } @@ -850,3 +921,119 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by return header.Join, remoteNodes, userBuf, nil } + +// mergeRemoteState is used to merge the remote state with our local state +func (m *Memberlist) mergeRemoteState(join bool, remoteNodes []pushNodeState, userBuf []byte) error { + if err := m.verifyProtocol(remoteNodes); err != nil { + return err + } + + // Invoke the merge delegate if any + if join && m.config.Merge != nil { + nodes := make([]*Node, len(remoteNodes)) + for idx, n := range remoteNodes { + nodes[idx] = &Node{ + Name: n.Name, + Addr: n.Addr, + Port: n.Port, + Meta: n.Meta, + PMin: n.Vsn[0], + PMax: n.Vsn[1], + PCur: n.Vsn[2], + DMin: n.Vsn[3], + DMax: n.Vsn[4], + DCur: n.Vsn[5], + } + } + if err := m.config.Merge.NotifyMerge(nodes); err != nil { + return err + } + } + + // Merge the membership state + m.mergeState(remoteNodes) + + // Invoke the delegate for user state + if userBuf != nil && m.config.Delegate != nil { + m.config.Delegate.MergeRemoteState(userBuf, join) + } + return nil +} + +// readUserMsg is used to decode a userMsg from a TCP stream +func (m *Memberlist) readUserMsg(bufConn io.Reader, dec *codec.Decoder) error { + // Read the user message header + var header userMsgHeader + if err := dec.Decode(&header); err != nil { + return err + } + + // Read the user message into a buffer + var userBuf []byte + if header.UserMsgLen > 0 { + userBuf = make([]byte, header.UserMsgLen) + bytes, err := io.ReadAtLeast(bufConn, userBuf, header.UserMsgLen) + if err == nil && bytes != header.UserMsgLen { + err = fmt.Errorf( + "Failed to read full user message (%d / %d)", + bytes, header.UserMsgLen) + } + if err != nil { + return err + } + + d := m.config.Delegate + if d != nil { + d.NotifyMsg(userBuf) + } + } + + return nil +} + +// sendPingAndWaitForAck makes a TCP connection to the given address, sends +// a ping, and waits for an ack. All of this is done as a series of blocking +// operations, given the deadline. The bool return parameter is true if we +// we able to round trip a ping to the other node. +func (m *Memberlist) sendPingAndWaitForAck(destAddr net.Addr, ping ping, deadline time.Time) (bool, error) { + dialer := net.Dialer{Deadline: deadline} + conn, err := dialer.Dial("tcp", destAddr.String()) + if err != nil { + // If the node is actually dead we expect this to fail, so we + // shouldn't spam the logs with it. After this point, errors + // with the connection are real, unexpected errors and should + // get propagated up. + return false, nil + } + defer conn.Close() + conn.SetDeadline(deadline) + + out, err := encode(pingMsg, &ping) + if err != nil { + return false, err + } + + if err = m.rawSendMsgTCP(conn, out.Bytes()); err != nil { + return false, err + } + + msgType, _, dec, err := m.readTCP(conn) + if err != nil { + return false, err + } + + if msgType != ackRespMsg { + return false, fmt.Errorf("Unexpected msgType (%d) from TCP ping %s", msgType, LogConn(conn)) + } + + var ack ackResp + if err = dec.Decode(&ack); err != nil { + return false, err + } + + if ack.SeqNo != ping.SeqNo { + return false, fmt.Errorf("Sequence number from ack (%d) doesn't match ping (%d) from TCP ping %s", ack.SeqNo, ping.SeqNo, LogConn(conn)) + } + + return true, nil +} diff --git a/vendor/src/github.com/hashicorp/memberlist/ping_delegate.go b/vendor/src/github.com/hashicorp/memberlist/ping_delegate.go new file mode 100644 index 0000000000..1566c8b3d5 --- /dev/null +++ b/vendor/src/github.com/hashicorp/memberlist/ping_delegate.go @@ -0,0 +1,14 @@ +package memberlist + +import "time" + +// PingDelegate is used to notify an observer how long it took for a ping message to +// complete a round trip. It can also be used for writing arbitrary byte slices +// into ack messages. Note that in order to be meaningful for RTT estimates, this +// delegate does not apply to indirect pings, nor fallback pings sent over TCP. +type PingDelegate interface { + // AckPayload is invoked when an ack is being sent; the returned bytes will be appended to the ack + AckPayload() []byte + // NotifyPing is invoked when an ack for a ping is received + NotifyPingComplete(other *Node, rtt time.Duration, payload []byte) +} diff --git a/vendor/src/github.com/hashicorp/memberlist/state.go b/vendor/src/github.com/hashicorp/memberlist/state.go index 3fc1d02e19..d0339bd158 100644 --- a/vendor/src/github.com/hashicorp/memberlist/state.go +++ b/vendor/src/github.com/hashicorp/memberlist/state.go @@ -44,10 +44,20 @@ type nodeState struct { // ackHandler is used to register handlers for incoming acks type ackHandler struct { - handler func() + handler func([]byte, time.Time) timer *time.Timer } +// NoPingResponseError is used to indicate a 'ping' packet was +// successfully issued but no response was received +type NoPingResponseError struct { + node string +} + +func (f NoPingResponseError) Error() string { + return fmt.Sprintf("No response from node %s", f.node) +} + // Schedule is used to ensure the Tick is performed periodically. This // function is safe to call multiple times. If the memberlist is already // scheduled, then it won't do anything. @@ -128,9 +138,7 @@ func (m *Memberlist) pushPullTrigger(stop <-chan struct{}) { // Tick using a dynamic timer for { - m.nodeLock.RLock() - tickTime := pushPullScale(interval, len(m.nodes)) - m.nodeLock.RUnlock() + tickTime := pushPullScale(interval, m.estNumNodes()) select { case <-time.After(tickTime): m.pushPull() @@ -207,46 +215,55 @@ START: m.probeNode(&node) } -// probeNode handles a single round of failure checking on a node +// probeNode handles a single round of failure checking on a node. func (m *Memberlist) probeNode(node *nodeState) { defer metrics.MeasureSince([]string{"memberlist", "probeNode"}, time.Now()) - // Send a ping to the node + // Prepare a ping message and setup an ack handler. ping := ping{SeqNo: m.nextSeqNo(), Node: node.Name} - destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)} - - // Setup an ack handler - ackCh := make(chan bool, m.config.IndirectChecks+1) + ackCh := make(chan ackMessage, m.config.IndirectChecks+1) m.setAckChannel(ping.SeqNo, ackCh, m.config.ProbeInterval) - // Send the ping message + // Send a ping to the node. + deadline := time.Now().Add(m.config.ProbeInterval) + destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)} if err := m.encodeAndSendMsg(destAddr, pingMsg, &ping); err != nil { m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err) return } - // Wait for response or round-trip-time + // Mark the sent time here, which should be after any pre-processing and + // system calls to do the actual send. This probably under-reports a bit, + // but it's the best we can do. + sent := time.Now() + + // Wait for response or round-trip-time. select { case v := <-ackCh: - if v == true { + if v.Complete == true { + if m.config.Ping != nil { + rtt := v.Timestamp.Sub(sent) + m.config.Ping.NotifyPingComplete(&node.Node, rtt, v.Payload) + } return } // As an edge case, if we get a timeout, we need to re-enqueue it - // here to break out of the select below - if v == false { + // here to break out of the select below. + if v.Complete == false { ackCh <- v } case <-time.After(m.config.ProbeTimeout): + m.logger.Printf("[DEBUG] memberlist: Failed UDP ping: %v (timeout reached)", node.Name) } - // Get some random live nodes + // Get some random live nodes. m.nodeLock.RLock() excludes := []string{m.config.Name, node.Name} kNodes := kRandomNodes(m.config.IndirectChecks, excludes, m.nodes) m.nodeLock.RUnlock() - // Attempt an indirect ping + // Attempt an indirect ping. ind := indirectPingReq{SeqNo: ping.SeqNo, Target: node.Addr, Port: node.Port, Node: node.Name} for _, peer := range kNodes { destAddr := &net.UDPAddr{IP: peer.Addr, Port: int(peer.Port)} @@ -255,10 +272,49 @@ func (m *Memberlist) probeNode(node *nodeState) { } } - // Wait for the acks or timeout + // Also make an attempt to contact the node directly over TCP. This + // helps prevent confused clients who get isolated from UDP traffic + // but can still speak TCP (which also means they can possibly report + // misinformation to other nodes via anti-entropy), avoiding flapping in + // the cluster. + // + // This is a little unusual because we will attempt a TCP ping to any + // member who understands version 3 of the protocol, regardless of + // which protocol version we are speaking. That's why we've included a + // config option to turn this off if desired. + fallbackCh := make(chan bool, 1) + if (!m.config.DisableTcpPings) && (node.PMax >= 3) { + destAddr := &net.TCPAddr{IP: node.Addr, Port: int(node.Port)} + go func() { + defer close(fallbackCh) + didContact, err := m.sendPingAndWaitForAck(destAddr, ping, deadline) + if err != nil { + m.logger.Printf("[ERR] memberlist: Failed TCP fallback ping: %s", err) + } else { + fallbackCh <- didContact + } + }() + } else { + close(fallbackCh) + } + + // Wait for the acks or timeout. Note that we don't check the fallback + // channel here because we want to issue a warning below if that's the + // *only* way we hear back from the peer, so we have to let this time + // out first to allow the normal UDP-based acks to come in. select { case v := <-ackCh: - if v == true { + if v.Complete == true { + return + } + } + + // Finally, poll the fallback channel. The timeouts are set such that + // the channel will have something or be closed without having to wait + // any additional time here. + for didContact := range fallbackCh { + if didContact { + m.logger.Printf("[WARN] memberlist: Was able to reach %s via TCP but not UDP, network may be misconfigured and not allowing bidirectional UDP", node.Name) return } } @@ -269,6 +325,37 @@ func (m *Memberlist) probeNode(node *nodeState) { m.suspectNode(&s) } +// Ping initiates a ping to the node with the specified name. +func (m *Memberlist) Ping(node string, addr net.Addr) (time.Duration, error) { + // Prepare a ping message and setup an ack handler. + ping := ping{SeqNo: m.nextSeqNo(), Node: node} + ackCh := make(chan ackMessage, m.config.IndirectChecks+1) + m.setAckChannel(ping.SeqNo, ackCh, m.config.ProbeInterval) + + // Send a ping to the node. + if err := m.encodeAndSendMsg(addr, pingMsg, &ping); err != nil { + return 0, err + } + + // Mark the sent time here, which should be after any pre-processing and + // system calls to do the actual send. This probably under-reports a bit, + // but it's the best we can do. + sent := time.Now() + + // Wait for response or timeout. + select { + case v := <-ackCh: + if v.Complete == true { + return v.Timestamp.Sub(sent), nil + } + case <-time.After(m.config.ProbeTimeout): + // Timeout, return an error below. + } + + m.logger.Printf("[DEBUG] memberlist: Failed UDP ping: %v (timeout reached)", node) + return 0, NoPingResponseError{ping.Node} +} + // resetNodes is used when the tick wraps around. It will reap the // dead nodes and shuffle the node list. func (m *Memberlist) resetNodes() { @@ -287,6 +374,9 @@ func (m *Memberlist) resetNodes() { // Trim the nodes to exclude the dead nodes m.nodes = m.nodes[0:deadIdx] + // Update numNodes after we've trimmed the dead nodes + atomic.StoreUint32(&m.numNodes, uint32(deadIdx)) + // Shuffle live nodes shuffleNodes(m.nodes) } @@ -320,7 +410,7 @@ func (m *Memberlist) gossip() { // Send the compound message destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)} - if err := m.rawSendMsg(destAddr, compound.Bytes()); err != nil { + if err := m.rawSendMsgUDP(destAddr, compound.Bytes()); err != nil { m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", destAddr, err) } } @@ -359,40 +449,9 @@ func (m *Memberlist) pushPullNode(addr []byte, port uint16, join bool) error { return err } - if err := m.verifyProtocol(remote); err != nil { + if err := m.mergeRemoteState(join, remote, userState); err != nil { return err } - - // Invoke the merge delegate if any - if join && m.config.Merge != nil { - nodes := make([]*Node, len(remote)) - for idx, n := range remote { - nodes[idx] = &Node{ - Name: n.Name, - Addr: n.Addr, - Port: n.Port, - Meta: n.Meta, - PMin: n.Vsn[0], - PMax: n.Vsn[1], - PCur: n.Vsn[2], - DMin: n.Vsn[3], - DMax: n.Vsn[4], - DCur: n.Vsn[5], - } - } - if m.config.Merge.NotifyMerge(nodes) { - m.logger.Printf("[WARN] memberlist: Cluster merge canceled") - return fmt.Errorf("Merge canceled") - } - } - - // Merge the state - m.mergeState(remote) - - // Invoke the delegate - if m.config.Delegate != nil { - m.config.Delegate.MergeRemoteState(userState, join) - } return nil } @@ -525,14 +584,24 @@ func (m *Memberlist) nextIncarnation() uint32 { return atomic.AddUint32(&m.incarnation, 1) } -// setAckChannel is used to attach a channel to receive a message when -// an ack with a given sequence number is received. The channel gets sent -// false on timeout -func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Duration) { +// estNumNodes is used to get the current estimate of the number of nodes +func (m *Memberlist) estNumNodes() int { + return int(atomic.LoadUint32(&m.numNodes)) +} + +type ackMessage struct { + Complete bool + Payload []byte + Timestamp time.Time +} + +// setAckChannel is used to attach a channel to receive a message when an ack with a given +// sequence number is received. The `complete` field of the message will be false on timeout +func (m *Memberlist) setAckChannel(seqNo uint32, ch chan ackMessage, timeout time.Duration) { // Create a handler function - handler := func() { + handler := func(payload []byte, timestamp time.Time) { select { - case ch <- true: + case ch <- ackMessage{true, payload, timestamp}: default: } } @@ -549,7 +618,7 @@ func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Dura delete(m.ackHandlers, seqNo) m.ackLock.Unlock() select { - case ch <- false: + case ch <- ackMessage{false, nil, time.Now()}: default: } }) @@ -558,7 +627,7 @@ func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Dura // setAckHandler is used to attach a handler to be invoked when an // ack with a given sequence number is received. If a timeout is reached, // the handler is deleted -func (m *Memberlist) setAckHandler(seqNo uint32, handler func(), timeout time.Duration) { +func (m *Memberlist) setAckHandler(seqNo uint32, handler func([]byte, time.Time), timeout time.Duration) { // Add the handler ah := &ackHandler{handler, nil} m.ackLock.Lock() @@ -574,16 +643,16 @@ func (m *Memberlist) setAckHandler(seqNo uint32, handler func(), timeout time.Du } // Invokes an Ack handler if any is associated, and reaps the handler immediately -func (m *Memberlist) invokeAckHandler(seqNo uint32) { +func (m *Memberlist) invokeAckHandler(ack ackResp, timestamp time.Time) { m.ackLock.Lock() - ah, ok := m.ackHandlers[seqNo] - delete(m.ackHandlers, seqNo) + ah, ok := m.ackHandlers[ack.SeqNo] + delete(m.ackHandlers, ack.SeqNo) m.ackLock.Unlock() if !ok { return } ah.timer.Stop() - ah.handler() + ah.handler(ack.Payload, timestamp) } // aliveNode is invoked by the network layer when we get a message about a @@ -601,6 +670,30 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { return } + // Invoke the Alive delegate if any. This can be used to filter out + // alive messages based on custom logic. For example, using a cluster name. + // Using a merge delegate is not enough, as it is possible for passive + // cluster merging to still occur. + if m.config.Alive != nil { + node := &Node{ + Name: a.Node, + Addr: a.Addr, + Port: a.Port, + Meta: a.Meta, + PMin: a.Vsn[0], + PMax: a.Vsn[1], + PCur: a.Vsn[2], + DMin: a.Vsn[3], + DMax: a.Vsn[4], + DCur: a.Vsn[5], + } + if err := m.config.Alive.NotifyAlive(node); err != nil { + m.logger.Printf("[WARN] memberlist: ignoring alive message for '%s': %s", + a.Node, err) + return + } + } + // Check if we've never seen this node before, and if not, then // store this node in our node map. if !ok { @@ -627,6 +720,9 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { // Add at the end and swap with the node at the offset m.nodes = append(m.nodes, state) m.nodes[offset], m.nodes[n] = m.nodes[n], m.nodes[offset] + + // Update numNodes after we've added a new node + atomic.AddUint32(&m.numNodes, 1) } // Check if this address is different than the existing node @@ -658,9 +754,6 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { return } - // Update metrics - metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1) - // Store the old state and meta data oldState := state.State oldMeta := state.Meta @@ -728,6 +821,9 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { } } + // Update metrics + metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1) + // Notify the delegate of any relevant updates if m.config.Events != nil { if oldState == stateDead { @@ -799,7 +895,7 @@ func (m *Memberlist) suspectNode(s *suspect) { state.StateChange = changeTime // Setup a timeout for this - timeout := suspicionTimeout(m.config.SuspicionMult, len(m.nodes), m.config.ProbeInterval) + timeout := suspicionTimeout(m.config.SuspicionMult, m.estNumNodes(), m.config.ProbeInterval) time.AfterFunc(timeout, func() { m.nodeLock.Lock() state, ok := m.nodeMap[s.Node] diff --git a/vendor/src/github.com/hashicorp/memberlist/util.go b/vendor/src/github.com/hashicorp/memberlist/util.go index 27f9f2a3ca..7a59e3b370 100644 --- a/vendor/src/github.com/hashicorp/memberlist/util.go +++ b/vendor/src/github.com/hashicorp/memberlist/util.go @@ -5,12 +5,14 @@ import ( "compress/lzw" "encoding/binary" "fmt" - "github.com/hashicorp/go-msgpack/codec" "io" "math" "math/rand" "net" + "strings" "time" + + "github.com/hashicorp/go-msgpack/codec" ) // pushPullScale is the minimum number of nodes @@ -23,8 +25,11 @@ const pushPullScaleThreshold = 32 /* * Contains an entry for each private block: * 10.0.0.0/8 + * 100.64.0.0/10 + * 127.0.0.0/8 + * 169.254.0.0/16 * 172.16.0.0/12 - * 192.168/16 + * 192.168.0.0/16 */ var privateBlocks []*net.IPNet @@ -40,25 +45,44 @@ func init() { rand.Seed(time.Now().UnixNano()) // Add each private block - privateBlocks = make([]*net.IPNet, 3) + privateBlocks = make([]*net.IPNet, 6) + _, block, err := net.ParseCIDR("10.0.0.0/8") if err != nil { panic(fmt.Sprintf("Bad cidr. Got %v", err)) } privateBlocks[0] = block - _, block, err = net.ParseCIDR("172.16.0.0/12") + _, block, err = net.ParseCIDR("100.64.0.0/10") if err != nil { panic(fmt.Sprintf("Bad cidr. Got %v", err)) } privateBlocks[1] = block - _, block, err = net.ParseCIDR("192.168.0.0/16") + _, block, err = net.ParseCIDR("127.0.0.0/8") if err != nil { panic(fmt.Sprintf("Bad cidr. Got %v", err)) } privateBlocks[2] = block + _, block, err = net.ParseCIDR("169.254.0.0/16") + if err != nil { + panic(fmt.Sprintf("Bad cidr. Got %v", err)) + } + privateBlocks[3] = block + + _, block, err = net.ParseCIDR("172.16.0.0/12") + if err != nil { + panic(fmt.Sprintf("Bad cidr. Got %v", err)) + } + privateBlocks[4] = block + + _, block, err = net.ParseCIDR("192.168.0.0/16") + if err != nil { + panic(fmt.Sprintf("Bad cidr. Got %v", err)) + } + privateBlocks[5] = block + _, block, err = net.ParseCIDR("127.0.0.0/8") if err != nil { panic(fmt.Sprintf("Bad cidr. Got %v", err)) @@ -84,6 +108,42 @@ func encode(msgType messageType, in interface{}) (*bytes.Buffer, error) { return buf, err } +// GetPrivateIP returns the first private IP address found in a list of +// addresses. +func GetPrivateIP(addresses []net.Addr) (net.IP, error) { + var candidates []net.IP + + // Find private IPv4 address + for _, rawAddr := range addresses { + var ip net.IP + switch addr := rawAddr.(type) { + case *net.IPAddr: + ip = addr.IP + case *net.IPNet: + ip = addr.IP + default: + continue + } + + if ip.To4() == nil { + continue + } + if !IsPrivateIP(ip.String()) { + continue + } + candidates = append(candidates, ip) + } + numIps := len(candidates) + switch numIps { + case 0: + return nil, fmt.Errorf("No private IP address found") + case 1: + return candidates[0], nil + default: + return nil, fmt.Errorf("Multiple private IPs found. Please configure one.") + } +} + // Returns a random offset between 0 and n func randomOffset(n int) int { if n == 0 { @@ -107,9 +167,10 @@ func retransmitLimit(retransmitMult, n int) int { return limit } -// shuffleNodes randomly shuffles the input nodes +// shuffleNodes randomly shuffles the input nodes using the Fisher-Yates shuffle func shuffleNodes(nodes []*nodeState) { - for i := range nodes { + n := len(nodes) + for i := n - 1; i > 0; i-- { j := rand.Intn(i + 1) nodes[i], nodes[j] = nodes[j], nodes[i] } @@ -250,7 +311,7 @@ func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) { } // Returns if the given IP is in a private block -func isPrivateIP(ip_str string) bool { +func IsPrivateIP(ip_str string) bool { ip := net.ParseIP(ip_str) for _, priv := range privateBlocks { if priv.Contains(ip) { @@ -266,6 +327,12 @@ func isLoopbackIP(ip_str string) bool { return loopbackBlock.Contains(ip) } +// Given a string of the form "host", "host:port", or "[ipv6::address]:port", +// return true if the string includes a port. +func hasPort(s string) bool { + return strings.LastIndex(s, ":") > strings.LastIndex(s, "]") +} + // compressPayload takes an opaque input buffer, compresses it // and wraps it in a compress{} message that is encoded. func compressPayload(inp []byte) (*bytes.Buffer, error) { diff --git a/vendor/src/github.com/hashicorp/serf/coordinate/client.go b/vendor/src/github.com/hashicorp/serf/coordinate/client.go new file mode 100644 index 0000000000..613bfff89e --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/coordinate/client.go @@ -0,0 +1,180 @@ +package coordinate + +import ( + "fmt" + "math" + "sort" + "sync" + "time" +) + +// Client manages the estimated network coordinate for a given node, and adjusts +// it as the node observes round trip times and estimated coordinates from other +// nodes. The core algorithm is based on Vivaldi, see the documentation for Config +// for more details. +type Client struct { + // coord is the current estimate of the client's network coordinate. + coord *Coordinate + + // origin is a coordinate sitting at the origin. + origin *Coordinate + + // config contains the tuning parameters that govern the performance of + // the algorithm. + config *Config + + // adjustmentIndex is the current index into the adjustmentSamples slice. + adjustmentIndex uint + + // adjustment is used to store samples for the adjustment calculation. + adjustmentSamples []float64 + + // latencyFilterSamples is used to store the last several RTT samples, + // keyed by node name. We will use the config's LatencyFilterSamples + // value to determine how many samples we keep, per node. + latencyFilterSamples map[string][]float64 + + // mutex enables safe concurrent access to the client. + mutex sync.RWMutex +} + +// NewClient creates a new Client and verifies the configuration is valid. +func NewClient(config *Config) (*Client, error) { + if !(config.Dimensionality > 0) { + return nil, fmt.Errorf("dimensionality must be >0") + } + + return &Client{ + coord: NewCoordinate(config), + origin: NewCoordinate(config), + config: config, + adjustmentIndex: 0, + adjustmentSamples: make([]float64, config.AdjustmentWindowSize), + latencyFilterSamples: make(map[string][]float64), + }, nil +} + +// GetCoordinate returns a copy of the coordinate for this client. +func (c *Client) GetCoordinate() *Coordinate { + c.mutex.RLock() + defer c.mutex.RUnlock() + + return c.coord.Clone() +} + +// SetCoordinate forces the client's coordinate to a known state. +func (c *Client) SetCoordinate(coord *Coordinate) { + c.mutex.Lock() + defer c.mutex.Unlock() + + c.coord = coord.Clone() +} + +// ForgetNode removes any client state for the given node. +func (c *Client) ForgetNode(node string) { + c.mutex.Lock() + defer c.mutex.Unlock() + + delete(c.latencyFilterSamples, node) +} + +// latencyFilter applies a simple moving median filter with a new sample for +// a node. This assumes that the mutex has been locked already. +func (c *Client) latencyFilter(node string, rttSeconds float64) float64 { + samples, ok := c.latencyFilterSamples[node] + if !ok { + samples = make([]float64, 0, c.config.LatencyFilterSize) + } + + // Add the new sample and trim the list, if needed. + samples = append(samples, rttSeconds) + if len(samples) > int(c.config.LatencyFilterSize) { + samples = samples[1:] + } + c.latencyFilterSamples[node] = samples + + // Sort a copy of the samples and return the median. + sorted := make([]float64, len(samples)) + copy(sorted, samples) + sort.Float64s(sorted) + return sorted[len(sorted)/2] +} + +// updateVivialdi updates the Vivaldi portion of the client's coordinate. This +// assumes that the mutex has been locked already. +func (c *Client) updateVivaldi(other *Coordinate, rttSeconds float64) { + const zeroThreshold = 1.0e-6 + + dist := c.coord.DistanceTo(other).Seconds() + if rttSeconds < zeroThreshold { + rttSeconds = zeroThreshold + } + wrongness := math.Abs(dist-rttSeconds) / rttSeconds + + totalError := c.coord.Error + other.Error + if totalError < zeroThreshold { + totalError = zeroThreshold + } + weight := c.coord.Error / totalError + + c.coord.Error = c.config.VivaldiCE*weight*wrongness + c.coord.Error*(1.0-c.config.VivaldiCE*weight) + if c.coord.Error > c.config.VivaldiErrorMax { + c.coord.Error = c.config.VivaldiErrorMax + } + + delta := c.config.VivaldiCC * weight + force := delta * (rttSeconds - dist) + c.coord = c.coord.ApplyForce(c.config, force, other) +} + +// updateAdjustment updates the adjustment portion of the client's coordinate, if +// the feature is enabled. This assumes that the mutex has been locked already. +func (c *Client) updateAdjustment(other *Coordinate, rttSeconds float64) { + if c.config.AdjustmentWindowSize == 0 { + return + } + + // Note that the existing adjustment factors don't figure in to this + // calculation so we use the raw distance here. + dist := c.coord.rawDistanceTo(other) + c.adjustmentSamples[c.adjustmentIndex] = rttSeconds - dist + c.adjustmentIndex = (c.adjustmentIndex + 1) % c.config.AdjustmentWindowSize + + sum := 0.0 + for _, sample := range c.adjustmentSamples { + sum += sample + } + c.coord.Adjustment = sum / (2.0 * float64(c.config.AdjustmentWindowSize)) +} + +// updateGravity applies a small amount of gravity to pull coordinates towards +// the center of the coordinate system to combat drift. This assumes that the +// mutex is locked already. +func (c *Client) updateGravity() { + dist := c.origin.DistanceTo(c.coord).Seconds() + force := -1.0 * math.Pow(dist/c.config.GravityRho, 2.0) + c.coord = c.coord.ApplyForce(c.config, force, c.origin) +} + +// Update takes other, a coordinate for another node, and rtt, a round trip +// time observation for a ping to that node, and updates the estimated position of +// the client's coordinate. Returns the updated coordinate. +func (c *Client) Update(node string, other *Coordinate, rtt time.Duration) *Coordinate { + c.mutex.Lock() + defer c.mutex.Unlock() + + rttSeconds := c.latencyFilter(node, rtt.Seconds()) + c.updateVivaldi(other, rttSeconds) + c.updateAdjustment(other, rttSeconds) + c.updateGravity() + return c.coord.Clone() +} + +// DistanceTo returns the estimated RTT from the client's coordinate to other, the +// coordinate for another node. +func (c *Client) DistanceTo(other *Coordinate) time.Duration { + c.mutex.RLock() + defer c.mutex.RUnlock() + + return c.coord.DistanceTo(other) +} diff --git a/vendor/src/github.com/hashicorp/serf/coordinate/config.go b/vendor/src/github.com/hashicorp/serf/coordinate/config.go new file mode 100644 index 0000000000..a5b3aadfe4 --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/coordinate/config.go @@ -0,0 +1,70 @@ +package coordinate + +// Config is used to set the parameters of the Vivaldi-based coordinate mapping +// algorithm. +// +// The following references are called out at various points in the documentation +// here: +// +// [1] Dabek, Frank, et al. "Vivaldi: A decentralized network coordinate system." +// ACM SIGCOMM Computer Communication Review. Vol. 34. No. 4. ACM, 2004. +// [2] Ledlie, Jonathan, Paul Gardner, and Margo I. Seltzer. "Network Coordinates +// in the Wild." NSDI. Vol. 7. 2007. +// [3] Lee, Sanghwan, et al. "On suitability of Euclidean embedding for +// host-based network coordinate systems." Networking, IEEE/ACM Transactions +// on 18.1 (2010): 27-40. +type Config struct { + // The dimensionality of the coordinate system. As discussed in [2], more + // dimensions improves the accuracy of the estimates up to a point. Per [2] + // we chose 4 dimensions plus a non-Euclidean height. + Dimensionality uint + + // VivaldiErrorMax is the default error value when a node hasn't yet made + // any observations. It also serves as an upper limit on the error value in + // case observations cause the error value to increase without bound. + VivaldiErrorMax float64 + + // VivaldiCE is a tuning factor that controls the maximum impact an + // observation can have on a node's confidence. See [1] for more details. + VivaldiCE float64 + + // VivaldiCC is a tuning factor that controls the maximum impact an + // observation can have on a node's coordinate. See [1] for more details. + VivaldiCC float64 + + // AdjustmentWindowSize is a tuning factor that determines how many samples + // we retain to calculate the adjustment factor as discussed in [3]. Setting + // this to zero disables this feature. + AdjustmentWindowSize uint + + // HeightMin is the minimum value of the height parameter. Since this + // always must be positive, it will introduce a small amount error, so + // the chosen value should be relatively small compared to "normal" + // coordinates. + HeightMin float64 + + // LatencyFilterSamples is the maximum number of samples that are retained + // per node, in order to compute a median. The intent is to ride out blips + // but still keep the delay low, since our time to probe any given node is + // pretty infrequent. See [2] for more details. + LatencyFilterSize uint + + // GravityRho is a tuning factor that sets how much gravity has an effect + // to try to re-center coordinates. See [2] for more details. + GravityRho float64 +} + +// DefaultConfig returns a Config that has some default values suitable for +// basic testing of the algorithm, but not tuned to any particular type of cluster. +func DefaultConfig() *Config { + return &Config{ + Dimensionality: 8, + VivaldiErrorMax: 1.5, + VivaldiCE: 0.25, + VivaldiCC: 0.25, + AdjustmentWindowSize: 20, + HeightMin: 10.0e-6, + LatencyFilterSize: 3, + GravityRho: 150.0, + } +} diff --git a/vendor/src/github.com/hashicorp/serf/coordinate/coordinate.go b/vendor/src/github.com/hashicorp/serf/coordinate/coordinate.go new file mode 100644 index 0000000000..c9194e048b --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/coordinate/coordinate.go @@ -0,0 +1,183 @@ +package coordinate + +import ( + "math" + "math/rand" + "time" +) + +// Coordinate is a specialized structure for holding network coordinates for the +// Vivaldi-based coordinate mapping algorithm. All of the fields should be public +// to enable this to be serialized. All values in here are in units of seconds. +type Coordinate struct { + // Vec is the Euclidean portion of the coordinate. This is used along + // with the other fields to provide an overall distance estimate. The + // units here are seconds. + Vec []float64 + + // Err reflects the confidence in the given coordinate and is updated + // dynamically by the Vivaldi Client. This is dimensionless. + Error float64 + + // Adjustment is a distance offset computed based on a calculation over + // observations from all other nodes over a fixed window and is updated + // dynamically by the Vivaldi Client. The units here are seconds. + Adjustment float64 + + // Height is a distance offset that accounts for non-Euclidean effects + // which model the access links from nodes to the core Internet. The access + // links are usually set by bandwidth and congestion, and the core links + // usually follow distance based on geography. + Height float64 +} + +const ( + // secondsToNanoseconds is used to convert float seconds to nanoseconds. + secondsToNanoseconds = 1.0e9 + + // zeroThreshold is used to decide if two coordinates are on top of each + // other. + zeroThreshold = 1.0e-6 +) + +// ErrDimensionalityConflict will be panic-d if you try to perform operations +// with incompatible dimensions. +type DimensionalityConflictError struct{} + +// Adds the error interface. +func (e DimensionalityConflictError) Error() string { + return "coordinate dimensionality does not match" +} + +// NewCoordinate creates a new coordinate at the origin, using the given config +// to supply key initial values. +func NewCoordinate(config *Config) *Coordinate { + return &Coordinate{ + Vec: make([]float64, config.Dimensionality), + Error: config.VivaldiErrorMax, + Adjustment: 0.0, + Height: config.HeightMin, + } +} + +// Clone creates an independent copy of this coordinate. +func (c *Coordinate) Clone() *Coordinate { + vec := make([]float64, len(c.Vec)) + copy(vec, c.Vec) + return &Coordinate{ + Vec: vec, + Error: c.Error, + Adjustment: c.Adjustment, + Height: c.Height, + } +} + +// IsCompatibleWith checks to see if the two coordinates are compatible +// dimensionally. If this returns true then you are guaranteed to not get +// any runtime errors operating on them. +func (c *Coordinate) IsCompatibleWith(other *Coordinate) bool { + return len(c.Vec) == len(other.Vec) +} + +// ApplyForce returns the result of applying the force from the direction of the +// other coordinate. +func (c *Coordinate) ApplyForce(config *Config, force float64, other *Coordinate) *Coordinate { + if !c.IsCompatibleWith(other) { + panic(DimensionalityConflictError{}) + } + + ret := c.Clone() + unit, mag := unitVectorAt(c.Vec, other.Vec) + ret.Vec = add(ret.Vec, mul(unit, force)) + if mag > zeroThreshold { + ret.Height = (ret.Height+other.Height)*force/mag + ret.Height + ret.Height = math.Max(ret.Height, config.HeightMin) + } + return ret +} + +// DistanceTo returns the distance between this coordinate and the other +// coordinate, including adjustments. +func (c *Coordinate) DistanceTo(other *Coordinate) time.Duration { + if !c.IsCompatibleWith(other) { + panic(DimensionalityConflictError{}) + } + + dist := c.rawDistanceTo(other) + adjustedDist := dist + c.Adjustment + other.Adjustment + if adjustedDist > 0.0 { + dist = adjustedDist + } + return time.Duration(dist * secondsToNanoseconds) +} + +// rawDistanceTo returns the Vivaldi distance between this coordinate and the +// other coordinate in seconds, not including adjustments. This assumes the +// dimensions have already been checked to be compatible. +func (c *Coordinate) rawDistanceTo(other *Coordinate) float64 { + return magnitude(diff(c.Vec, other.Vec)) + c.Height + other.Height +} + +// add returns the sum of vec1 and vec2. This assumes the dimensions have +// already been checked to be compatible. +func add(vec1 []float64, vec2 []float64) []float64 { + ret := make([]float64, len(vec1)) + for i, _ := range ret { + ret[i] = vec1[i] + vec2[i] + } + return ret +} + +// diff returns the difference between the vec1 and vec2. This assumes the +// dimensions have already been checked to be compatible. +func diff(vec1 []float64, vec2 []float64) []float64 { + ret := make([]float64, len(vec1)) + for i, _ := range ret { + ret[i] = vec1[i] - vec2[i] + } + return ret +} + +// mul returns vec multiplied by a scalar factor. +func mul(vec []float64, factor float64) []float64 { + ret := make([]float64, len(vec)) + for i, _ := range vec { + ret[i] = vec[i] * factor + } + return ret +} + +// magnitude computes the magnitude of the vec. +func magnitude(vec []float64) float64 { + sum := 0.0 + for i, _ := range vec { + sum += vec[i] * vec[i] + } + return math.Sqrt(sum) +} + +// unitVectorAt returns a unit vector pointing at vec1 from vec2. If the two +// positions are the same then a random unit vector is returned. We also return +// the distance between the points for use in the later height calculation. +func unitVectorAt(vec1 []float64, vec2 []float64) ([]float64, float64) { + ret := diff(vec1, vec2) + + // If the coordinates aren't on top of each other we can normalize. + if mag := magnitude(ret); mag > zeroThreshold { + return mul(ret, 1.0/mag), mag + } + + // Otherwise, just return a random unit vector. + for i, _ := range ret { + ret[i] = rand.Float64() - 0.5 + } + if mag := magnitude(ret); mag > zeroThreshold { + return mul(ret, 1.0/mag), 0.0 + } + + // And finally just give up and make a unit vector along the first + // dimension. This should be exceedingly rare. + ret = make([]float64, len(ret)) + ret[0] = 1.0 + return ret, 0.0 +} diff --git a/vendor/src/github.com/hashicorp/serf/coordinate/phantom.go b/vendor/src/github.com/hashicorp/serf/coordinate/phantom.go new file mode 100644 index 0000000000..6fb033c0cd --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/coordinate/phantom.go @@ -0,0 +1,187 @@ +package coordinate + +import ( + "fmt" + "math" + "math/rand" + "time" +) + +// GenerateClients returns a slice with nodes number of clients, all with the +// given config. +func GenerateClients(nodes int, config *Config) ([]*Client, error) { + clients := make([]*Client, nodes) + for i, _ := range clients { + client, err := NewClient(config) + if err != nil { + return nil, err + } + + clients[i] = client + } + return clients, nil +} + +// GenerateLine returns a truth matrix as if all the nodes are in a straight linke +// with the given spacing between them. +func GenerateLine(nodes int, spacing time.Duration) [][]time.Duration { + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + rtt := time.Duration(j-i) * spacing + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// GenerateGrid returns a truth matrix as if all the nodes are in a two dimensional +// grid with the given spacing between them. +func GenerateGrid(nodes int, spacing time.Duration) [][]time.Duration { + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + n := int(math.Sqrt(float64(nodes))) + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + x1, y1 := float64(i%n), float64(i/n) + x2, y2 := float64(j%n), float64(j/n) + dx, dy := x2-x1, y2-y1 + dist := math.Sqrt(dx*dx + dy*dy) + rtt := time.Duration(dist * float64(spacing)) + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// GenerateSplit returns a truth matrix as if half the nodes are close together in +// one location and half the nodes are close together in another. The lan factor +// is used to separate the nodes locally and the wan factor represents the split +// between the two sides. +func GenerateSplit(nodes int, lan time.Duration, wan time.Duration) [][]time.Duration { + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + split := nodes / 2 + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + rtt := lan + if (i <= split && j > split) || (i > split && j <= split) { + rtt += wan + } + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// GenerateCircle returns a truth matrix for a set of nodes, evenly distributed +// around a circle with the given radius. The first node is at the "center" of the +// circle because it's equidistant from all the other nodes, but we place it at +// double the radius, so it should show up above all the other nodes in height. +func GenerateCircle(nodes int, radius time.Duration) [][]time.Duration { + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + var rtt time.Duration + if i == 0 { + rtt = 2 * radius + } else { + t1 := 2.0 * math.Pi * float64(i) / float64(nodes) + x1, y1 := math.Cos(t1), math.Sin(t1) + t2 := 2.0 * math.Pi * float64(j) / float64(nodes) + x2, y2 := math.Cos(t2), math.Sin(t2) + dx, dy := x2-x1, y2-y1 + dist := math.Sqrt(dx*dx + dy*dy) + rtt = time.Duration(dist * float64(radius)) + } + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// GenerateRandom returns a truth matrix for a set of nodes with normally +// distributed delays, with the given mean and deviation. The RNG is re-seeded +// so you always get the same matrix for a given size. +func GenerateRandom(nodes int, mean time.Duration, deviation time.Duration) [][]time.Duration { + rand.Seed(1) + + truth := make([][]time.Duration, nodes) + for i := range truth { + truth[i] = make([]time.Duration, nodes) + } + + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + rttSeconds := rand.NormFloat64()*deviation.Seconds() + mean.Seconds() + rtt := time.Duration(rttSeconds * secondsToNanoseconds) + truth[i][j], truth[j][i] = rtt, rtt + } + } + return truth +} + +// Simulate runs the given number of cycles using the given list of clients and +// truth matrix. On each cycle, each client will pick a random node and observe +// the truth RTT, updating its coordinate estimate. The RNG is re-seeded for +// each simulation run to get deterministic results (for this algorithm and the +// underlying algorithm which will use random numbers for position vectors when +// starting out with everything at the origin). +func Simulate(clients []*Client, truth [][]time.Duration, cycles int) { + rand.Seed(1) + + nodes := len(clients) + for cycle := 0; cycle < cycles; cycle++ { + for i, _ := range clients { + if j := rand.Intn(nodes); j != i { + c := clients[j].GetCoordinate() + rtt := truth[i][j] + node := fmt.Sprintf("node_%d", j) + clients[i].Update(node, c, rtt) + } + } + } +} + +// Stats is returned from the Evaluate function with a summary of the algorithm +// performance. +type Stats struct { + ErrorMax float64 + ErrorAvg float64 +} + +// Evaluate uses the coordinates of the given clients to calculate estimated +// distances and compares them with the given truth matrix, returning summary +// stats. +func Evaluate(clients []*Client, truth [][]time.Duration) (stats Stats) { + nodes := len(clients) + count := 0 + for i := 0; i < nodes; i++ { + for j := i + 1; j < nodes; j++ { + est := clients[i].DistanceTo(clients[j].GetCoordinate()).Seconds() + actual := truth[i][j].Seconds() + error := math.Abs(est-actual) / actual + stats.ErrorMax = math.Max(stats.ErrorMax, error) + stats.ErrorAvg += error + count += 1 + } + } + + stats.ErrorAvg /= float64(count) + fmt.Printf("Error avg=%9.6f max=%9.6f\n", stats.ErrorAvg, stats.ErrorMax) + return +} diff --git a/vendor/src/github.com/hashicorp/serf/serf/config.go b/vendor/src/github.com/hashicorp/serf/serf/config.go index e2a43f549f..87cba9f7ca 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/config.go +++ b/vendor/src/github.com/hashicorp/serf/serf/config.go @@ -149,6 +149,14 @@ type Config struct { // QueryTimeoutMult int + // QueryResponseSizeLimit and QuerySizeLimit limit the inbound and + // outbound payload sizes for queries, respectively. These must fit + // in a UDP packet with some additional overhead, so tuning these + // past the default values of 1024 will depend on your network + // configuration. + QueryResponseSizeLimit int + QuerySizeLimit int + // MemberlistConfig is the memberlist configuration that Serf will // use to do the underlying membership management and gossip. Some // fields in the MemberlistConfig will be overwritten by Serf no @@ -189,6 +197,12 @@ type Config struct { // node stays while the other node will leave the cluster and exit. EnableNameConflictResolution bool + // DisableCoordinates controls if Serf will maintain an estimate of this + // node's network coordinate internally. A network coordinate is useful + // for estimating the network distance (i.e. round trip time) between + // two nodes. Enabling this option adds some overhead to ping messages. + DisableCoordinates bool + // KeyringFile provides the location of a writable file where Serf can // persist changes to the encryption keyring. KeyringFile string @@ -229,6 +243,9 @@ func DefaultConfig() *Config { TombstoneTimeout: 24 * time.Hour, MemberlistConfig: memberlist.DefaultLANConfig(), QueryTimeoutMult: 16, + QueryResponseSizeLimit: 1024, + QuerySizeLimit: 1024, EnableNameConflictResolution: true, + DisableCoordinates: false, } } diff --git a/vendor/src/github.com/hashicorp/serf/serf/delegate.go b/vendor/src/github.com/hashicorp/serf/serf/delegate.go index 4ffedbd3d0..d19ca3090f 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/delegate.go +++ b/vendor/src/github.com/hashicorp/serf/serf/delegate.go @@ -2,6 +2,7 @@ package serf import ( "fmt" + "github.com/armon/go-metrics" ) @@ -170,6 +171,12 @@ func (d *delegate) LocalState(join bool) []byte { } func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) { + // Ensure we have a message + if len(buf) == 0 { + d.serf.logger.Printf("[ERR] serf: Remote state is zero bytes") + return + } + // Check the message type if messageType(buf[0]) != messagePushPullType { d.serf.logger.Printf("[ERR] serf: Remote state has bad type prefix: %v", buf[0]) diff --git a/vendor/src/github.com/hashicorp/serf/serf/event.go b/vendor/src/github.com/hashicorp/serf/serf/event.go index 5c6ff740e2..8337e95ead 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/event.go +++ b/vendor/src/github.com/hashicorp/serf/serf/event.go @@ -152,8 +152,8 @@ func (q *Query) Respond(buf []byte) error { } // Check the size limit - if len(raw) > QueryResponseSizeLimit { - return fmt.Errorf("response exceeds limit of %d bytes", QueryResponseSizeLimit) + if len(raw) > q.serf.config.QueryResponseSizeLimit { + return fmt.Errorf("response exceeds limit of %d bytes", q.serf.config.QueryResponseSizeLimit) } // Send the response diff --git a/vendor/src/github.com/hashicorp/serf/serf/merge_delegate.go b/vendor/src/github.com/hashicorp/serf/serf/merge_delegate.go index ece3e9767f..7fdc732887 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/merge_delegate.go +++ b/vendor/src/github.com/hashicorp/serf/serf/merge_delegate.go @@ -7,29 +7,38 @@ import ( ) type MergeDelegate interface { - NotifyMerge([]*Member) (cancel bool) + NotifyMerge([]*Member) error } type mergeDelegate struct { serf *Serf } -func (m *mergeDelegate) NotifyMerge(nodes []*memberlist.Node) (cancel bool) { +func (m *mergeDelegate) NotifyMerge(nodes []*memberlist.Node) error { members := make([]*Member, len(nodes)) for idx, n := range nodes { - members[idx] = &Member{ - Name: n.Name, - Addr: net.IP(n.Addr), - Port: n.Port, - Tags: m.serf.decodeTags(n.Meta), - Status: StatusNone, - ProtocolMin: n.PMin, - ProtocolMax: n.PMax, - ProtocolCur: n.PCur, - DelegateMin: n.DMin, - DelegateMax: n.DMax, - DelegateCur: n.DCur, - } + members[idx] = m.nodeToMember(n) } return m.serf.config.Merge.NotifyMerge(members) } + +func (m *mergeDelegate) NotifyAlive(peer *memberlist.Node) error { + member := m.nodeToMember(peer) + return m.serf.config.Merge.NotifyMerge([]*Member{member}) +} + +func (m *mergeDelegate) nodeToMember(n *memberlist.Node) *Member { + return &Member{ + Name: n.Name, + Addr: net.IP(n.Addr), + Port: n.Port, + Tags: m.serf.decodeTags(n.Meta), + Status: StatusNone, + ProtocolMin: n.PMin, + ProtocolMax: n.PMax, + ProtocolCur: n.PCur, + DelegateMin: n.DMin, + DelegateMax: n.DMax, + DelegateCur: n.DCur, + } +} diff --git a/vendor/src/github.com/hashicorp/serf/serf/ping_delegate.go b/vendor/src/github.com/hashicorp/serf/serf/ping_delegate.go new file mode 100644 index 0000000000..a482685a20 --- /dev/null +++ b/vendor/src/github.com/hashicorp/serf/serf/ping_delegate.go @@ -0,0 +1,89 @@ +package serf + +import ( + "bytes" + "log" + "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-msgpack/codec" + "github.com/hashicorp/memberlist" + "github.com/hashicorp/serf/coordinate" +) + +// pingDelegate is notified when memberlist successfully completes a direct ping +// of a peer node. We use this to update our estimated network coordinate, as +// well as cache the coordinate of the peer. +type pingDelegate struct { + serf *Serf +} + +const ( + // PingVersion is an internal version for the ping message, above the normal + // versioning we get from the protocol version. This enables small updates + // to the ping message without a full protocol bump. + PingVersion = 1 +) + +// AckPayload is called to produce a payload to send back in response to a ping +// request. +func (p *pingDelegate) AckPayload() []byte { + var buf bytes.Buffer + + // The first byte is the version number, forming a simple header. + version := []byte{PingVersion} + buf.Write(version) + + // The rest of the message is the serialized coordinate. + enc := codec.NewEncoder(&buf, &codec.MsgpackHandle{}) + if err := enc.Encode(p.serf.coordClient.GetCoordinate()); err != nil { + log.Printf("[ERR] serf: Failed to encode coordinate: %v\n", err) + } + return buf.Bytes() +} + +// NotifyPingComplete is called when this node successfully completes a direct ping +// of a peer node. +func (p *pingDelegate) NotifyPingComplete(other *memberlist.Node, rtt time.Duration, payload []byte) { + if payload == nil || len(payload) == 0 { + return + } + + // Verify ping version in the header. + version := payload[0] + if version != PingVersion { + log.Printf("[ERR] serf: Unsupported ping version: %v", version) + return + } + + // Process the remainder of the message as a coordinate. + r := bytes.NewReader(payload[1:]) + dec := codec.NewDecoder(r, &codec.MsgpackHandle{}) + var coord coordinate.Coordinate + if err := dec.Decode(&coord); err != nil { + log.Printf("[ERR] serf: Failed to decode coordinate from ping: %v", err) + } + + // Apply the update. Since this is a coordinate coming from some place + // else we harden this and look for dimensionality problems proactively. + before := p.serf.coordClient.GetCoordinate() + if before.IsCompatibleWith(&coord) { + after := p.serf.coordClient.Update(other.Name, &coord, rtt) + + // Publish some metrics to give us an idea of how much we are + // adjusting each time we update. + d := float32(before.DistanceTo(after).Seconds() * 1.0e3) + metrics.AddSample([]string{"serf", "coordinate", "adjustment-ms"}, d) + + // Cache the coordinate for the other node, and add our own + // to the cache as well since it just got updated. This lets + // users call GetCachedCoordinate with our node name, which is + // more friendly. + p.serf.coordCacheLock.Lock() + p.serf.coordCache[other.Name] = &coord + p.serf.coordCache[p.serf.config.NodeName] = p.serf.coordClient.GetCoordinate() + p.serf.coordCacheLock.Unlock() + } else { + log.Printf("[ERR] serf: Rejected bad coordinate: %v\n", coord) + } +} diff --git a/vendor/src/github.com/hashicorp/serf/serf/serf.go b/vendor/src/github.com/hashicorp/serf/serf/serf.go index a40ad06fbc..613b915dc4 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/serf.go +++ b/vendor/src/github.com/hashicorp/serf/serf/serf.go @@ -17,6 +17,7 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/go-msgpack/codec" "github.com/hashicorp/memberlist" + "github.com/hashicorp/serf/coordinate" ) // These are the protocol versions that Serf can _understand_. These are @@ -91,6 +92,10 @@ type Serf struct { snapshotter *Snapshotter keyManager *KeyManager + + coordClient *coordinate.Client + coordCache map[string]*coordinate.Coordinate + coordCacheLock sync.RWMutex } // SerfState is the state of the Serf instance. @@ -209,10 +214,8 @@ type queries struct { } const ( - UserEventSizeLimit = 512 // Maximum byte size for event name and payload - QuerySizeLimit = 1024 // Maximum byte size for query - QueryResponseSizeLimit = 1024 // Maximum bytes size for response - snapshotSizeLimit = 128 * 1024 // Maximum 128 KB snapshot + UserEventSizeLimit = 512 // Maximum byte size for event name and payload + snapshotSizeLimit = 128 * 1024 // Maximum 128 KB snapshot ) // Create creates a new Serf instance, starting all the background tasks @@ -274,15 +277,25 @@ func Create(conf *Config) (*Serf, error) { } conf.EventCh = outCh + // Set up network coordinate client. + if !conf.DisableCoordinates { + serf.coordClient, err = coordinate.NewClient(coordinate.DefaultConfig()) + if err != nil { + return nil, fmt.Errorf("Failed to create coordinate client: %v", err) + } + } + // Try access the snapshot var oldClock, oldEventClock, oldQueryClock LamportTime var prev []*PreviousNode if conf.SnapshotPath != "" { - eventCh, snap, err := NewSnapshotter(conf.SnapshotPath, + eventCh, snap, err := NewSnapshotter( + conf.SnapshotPath, snapshotSizeLimit, conf.RejoinAfterLeave, serf.logger, &serf.clock, + serf.coordClient, conf.EventCh, serf.shutdownCh) if err != nil { @@ -298,6 +311,13 @@ func Create(conf *Config) (*Serf, error) { serf.queryMinTime = oldQueryClock + 1 } + // Set up the coordinate cache. We do this after we read the snapshot to + // make sure we get a good initial value from there, if we got one. + if !conf.DisableCoordinates { + serf.coordCache = make(map[string]*coordinate.Coordinate) + serf.coordCache[conf.NodeName] = serf.coordClient.GetCoordinate() + } + // Setup the various broadcast queues, which we use to send our own // custom broadcasts along the gossip channel. serf.broadcasts = &memberlist.TransmitLimitedQueue{ @@ -347,17 +367,22 @@ func Create(conf *Config) (*Serf, error) { conf.MemberlistConfig.DelegateProtocolMax = ProtocolVersionMax conf.MemberlistConfig.Name = conf.NodeName conf.MemberlistConfig.ProtocolVersion = ProtocolVersionMap[conf.ProtocolVersion] + if !conf.DisableCoordinates { + conf.MemberlistConfig.Ping = &pingDelegate{serf: serf} + } // Setup a merge delegate if necessary if conf.Merge != nil { - conf.MemberlistConfig.Merge = &mergeDelegate{serf: serf} + md := &mergeDelegate{serf: serf} + conf.MemberlistConfig.Merge = md + conf.MemberlistConfig.Alive = md } // Create the underlying memberlist that will manage membership // and failure detection for the Serf instance. memberlist, err := memberlist.Create(conf.MemberlistConfig) if err != nil { - return nil, err + return nil, fmt.Errorf("Failed to create memberlist: %v", err) } serf.memberlist = memberlist @@ -486,8 +511,8 @@ func (s *Serf) Query(name string, payload []byte, params *QueryParam) (*QueryRes } // Check the size - if len(raw) > QuerySizeLimit { - return nil, fmt.Errorf("query exceeds limit of %d bytes", QuerySizeLimit) + if len(raw) > s.config.QuerySizeLimit { + return nil, fmt.Errorf("query exceeds limit of %d bytes", s.config.QuerySizeLimit) } // Register QueryResponse to track acks and responses @@ -950,6 +975,19 @@ func (s *Serf) handleNodeUpdate(n *memberlist.Node) { member.Port = n.Port member.Tags = s.decodeTags(n.Meta) + // Snag the latest versions. NOTE - the current memberlist code will NOT + // fire an update event if the metadata (for Serf, tags) stays the same + // and only the protocol versions change. If we wake any Serf-level + // protocol changes where we want to get this event under those + // circumstances, we will need to update memberlist to do a check of + // versions as well as the metadata. + member.ProtocolMin = n.PMin + member.ProtocolMax = n.PMax + member.ProtocolCur = n.PCur + member.DelegateMin = n.DMin + member.DelegateMax = n.DMax + member.DelegateCur = n.DCur + // Update some metrics metrics.IncrCounter([]string{"serf", "member", "update"}, 1) @@ -1016,6 +1054,17 @@ func (s *Serf) handleNodeLeaveIntent(leaveMsg *messageLeave) bool { s.failedMembers = removeOldMember(s.failedMembers, member.Name) s.leftMembers = append(s.leftMembers, member) + // We must push a message indicating the node has now + // left to allow higher-level applications to handle the + // graceful leave. + s.logger.Printf("[INFO] serf: EventMemberLeave (forced): %s %s", + member.Member.Name, member.Member.Addr) + if s.config.EventCh != nil { + s.config.EventCh <- MemberEvent{ + Type: EventMemberLeave, + Members: []Member{member.Member}, + } + } return true default: return false @@ -1384,6 +1433,16 @@ func (s *Serf) reap(old []*memberState, timeout time.Duration) []*memberState { // Delete from members delete(s.members, m.Name) + // Tell the coordinate client the node has gone away and delete + // its cached coordinates. + if !s.config.DisableCoordinates { + s.coordClient.ForgetNode(m.Name) + + s.coordCacheLock.Lock() + delete(s.coordCache, m.Name) + s.coordCacheLock.Unlock() + } + // Send an event along s.logger.Printf("[INFO] serf: EventMemberReap: %s", m.Name) if s.config.EventCh != nil { @@ -1596,3 +1655,38 @@ func (s *Serf) writeKeyringFile() error { // Success! return nil } + +// GetCoordinate returns the network coordinate of the local node. +func (s *Serf) GetCoordinate() (*coordinate.Coordinate, error) { + if !s.config.DisableCoordinates { + return s.coordClient.GetCoordinate(), nil + } + + return nil, fmt.Errorf("Coordinates are disabled") +} + +// GetCachedCoordinate returns the network coordinate for the node with the given +// name. This will only be valid if DisableCoordinates is set to false. +func (s *Serf) GetCachedCoordinate(name string) (coord *coordinate.Coordinate, ok bool) { + if !s.config.DisableCoordinates { + s.coordCacheLock.RLock() + defer s.coordCacheLock.RUnlock() + if coord, ok = s.coordCache[name]; ok { + return coord, true + } + + return nil, false + } + + return nil, false +} + +// NumNodes returns the number of nodes in the serf cluster, regardless of +// their health or status. +func (s *Serf) NumNodes() (numNodes int) { + s.memberLock.RLock() + numNodes = len(s.members) + s.memberLock.RUnlock() + + return numNodes +} diff --git a/vendor/src/github.com/hashicorp/serf/serf/snapshot.go b/vendor/src/github.com/hashicorp/serf/serf/snapshot.go index 3a1eb92ce2..44f8a5175a 100644 --- a/vendor/src/github.com/hashicorp/serf/serf/snapshot.go +++ b/vendor/src/github.com/hashicorp/serf/serf/snapshot.go @@ -2,6 +2,7 @@ package serf import ( "bufio" + "encoding/json" "fmt" "log" "math/rand" @@ -12,6 +13,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/hashicorp/serf/coordinate" ) /* @@ -27,6 +29,7 @@ old events. const flushInterval = 500 * time.Millisecond const clockUpdateInterval = 500 * time.Millisecond +const coordinateUpdateInterval = 60 * time.Second const tmpExt = ".compact" // Snapshotter is responsible for ingesting events and persisting @@ -34,6 +37,7 @@ const tmpExt = ".compact" type Snapshotter struct { aliveNodes map[string]string clock *LamportClock + coordClient *coordinate.Client fh *os.File buffered *bufio.Writer inCh <-chan Event @@ -74,6 +78,7 @@ func NewSnapshotter(path string, rejoinAfterLeave bool, logger *log.Logger, clock *LamportClock, + coordClient *coordinate.Client, outCh chan<- Event, shutdownCh <-chan struct{}) (chan<- Event, *Snapshotter, error) { inCh := make(chan Event, 1024) @@ -96,6 +101,7 @@ func NewSnapshotter(path string, snap := &Snapshotter{ aliveNodes: make(map[string]string), clock: clock, + coordClient: coordClient, fh: fh, buffered: bufio.NewWriter(fh), inCh: inCh, @@ -171,6 +177,12 @@ func (s *Snapshotter) Leave() { // stream is a long running routine that is used to handle events func (s *Snapshotter) stream() { + clockTicker := time.NewTicker(clockUpdateInterval) + defer clockTicker.Stop() + + coordinateTicker := time.NewTicker(coordinateUpdateInterval) + defer coordinateTicker.Stop() + for { select { case <-s.leaveCh: @@ -209,9 +221,12 @@ func (s *Snapshotter) stream() { s.logger.Printf("[ERR] serf: Unknown event to snapshot: %#v", e) } - case <-time.After(clockUpdateInterval): + case <-clockTicker.C: s.updateClock() + case <-coordinateTicker.C: + s.updateCoordinate() + case <-s.shutdownCh: if err := s.buffered.Flush(); err != nil { s.logger.Printf("[ERR] serf: failed to flush snapshot: %v", err) @@ -258,6 +273,20 @@ func (s *Snapshotter) updateClock() { } } +// updateCoordinate is called periodically to write out the current local +// coordinate. It's safe to call this if coordinates aren't enabled (nil +// client) and it will be a no-op. +func (s *Snapshotter) updateCoordinate() { + if s.coordClient != nil { + encoded, err := json.Marshal(s.coordClient.GetCoordinate()) + if err != nil { + s.logger.Printf("[ERR] serf: Failed to encode coordinate: %v", err) + } else { + s.tryAppend(fmt.Sprintf("coordinate: %s\n", encoded)) + } + } +} + // processUserEvent is used to handle a single user event func (s *Snapshotter) processUserEvent(e UserEvent) { // Ignore old clocks @@ -362,6 +391,23 @@ func (s *Snapshotter) compact() error { } offset += int64(n) + // Write out the coordinate. + if s.coordClient != nil { + encoded, err := json.Marshal(s.coordClient.GetCoordinate()) + if err != nil { + fh.Close() + return err + } + + line = fmt.Sprintf("coordinate: %s\n", encoded) + n, err = buf.WriteString(line) + if err != nil { + fh.Close() + return err + } + offset += int64(n) + } + // Flush the new snapshot err = buf.Flush() fh.Close() @@ -473,6 +519,20 @@ func (s *Snapshotter) replay() error { } s.lastQueryClock = LamportTime(timeInt) + } else if strings.HasPrefix(line, "coordinate: ") { + if s.coordClient == nil { + s.logger.Printf("[WARN] serf: Ignoring snapshot coordinates since they are disabled") + continue + } + + coordStr := strings.TrimPrefix(line, "coordinate: ") + var coord coordinate.Coordinate + err := json.Unmarshal([]byte(coordStr), &coord) + if err != nil { + s.logger.Printf("[WARN] serf: Failed to decode coordinate: %v", err) + continue + } + s.coordClient.SetCoordinate(&coord) } else if line == "leave" { // Ignore a leave if we plan on re-joining if s.rejoinAfterLeave { diff --git a/vendor/src/github.com/hashicorp/serf/website/LICENSE.md b/vendor/src/github.com/hashicorp/serf/website/source/LICENSE similarity index 100% rename from vendor/src/github.com/hashicorp/serf/website/LICENSE.md rename to vendor/src/github.com/hashicorp/serf/website/source/LICENSE