syntax = "proto2"; package raftpb; import "gogoproto/gogo.proto"; option (gogoproto.marshaler_all) = true; option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (gogoproto.goproto_getters_all) = false; option (gogoproto.goproto_enum_prefix_all) = false; option (gogoproto.goproto_unkeyed_all) = false; option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.goproto_sizecache_all) = false; enum EntryType { EntryNormal = 0; EntryConfChange = 1; // corresponds to pb.ConfChange EntryConfChangeV2 = 2; // corresponds to pb.ConfChangeV2 } message Entry { optional uint64 Term = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations optional uint64 Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations optional EntryType Type = 1 [(gogoproto.nullable) = false]; optional bytes Data = 4; } message SnapshotMetadata { optional ConfState conf_state = 1 [(gogoproto.nullable) = false]; optional uint64 index = 2 [(gogoproto.nullable) = false]; optional uint64 term = 3 [(gogoproto.nullable) = false]; } message Snapshot { optional bytes data = 1; optional SnapshotMetadata metadata = 2 [(gogoproto.nullable) = false]; } // For description of different message types, see: // https://pkg.go.dev/go.etcd.io/etcd/raft/v3#hdr-MessageType enum MessageType { MsgHup = 0; MsgBeat = 1; MsgProp = 2; MsgApp = 3; MsgAppResp = 4; MsgVote = 5; MsgVoteResp = 6; MsgSnap = 7; MsgHeartbeat = 8; MsgHeartbeatResp = 9; MsgUnreachable = 10; MsgSnapStatus = 11; MsgCheckQuorum = 12; MsgTransferLeader = 13; MsgTimeoutNow = 14; MsgReadIndex = 15; MsgReadIndexResp = 16; MsgPreVote = 17; MsgPreVoteResp = 18; } message Message { optional MessageType type = 1 [(gogoproto.nullable) = false]; optional uint64 to = 2 [(gogoproto.nullable) = false]; optional uint64 from = 3 [(gogoproto.nullable) = false]; optional uint64 term = 4 [(gogoproto.nullable) = false]; // logTerm is generally used for appending Raft logs to followers. For example, // (type=MsgApp,index=100,logTerm=5) means leader appends entries starting at // index=101, and the term of entry at index 100 is 5. // (type=MsgAppResp,reject=true,index=100,logTerm=5) means follower rejects some // entries from its leader as it already has an entry with term 5 at index 100. optional uint64 logTerm = 5 [(gogoproto.nullable) = false]; optional uint64 index = 6 [(gogoproto.nullable) = false]; repeated Entry entries = 7 [(gogoproto.nullable) = false]; optional uint64 commit = 8 [(gogoproto.nullable) = false]; optional Snapshot snapshot = 9 [(gogoproto.nullable) = false]; optional bool reject = 10 [(gogoproto.nullable) = false]; optional uint64 rejectHint = 11 [(gogoproto.nullable) = false]; optional bytes context = 12; } message HardState { optional uint64 term = 1 [(gogoproto.nullable) = false]; optional uint64 vote = 2 [(gogoproto.nullable) = false]; optional uint64 commit = 3 [(gogoproto.nullable) = false]; } // ConfChangeTransition specifies the behavior of a configuration change with // respect to joint consensus. enum ConfChangeTransition { // Automatically use the simple protocol if possible, otherwise fall back // to ConfChangeJointImplicit. Most applications will want to use this. ConfChangeTransitionAuto = 0; // Use joint consensus unconditionally, and transition out of them // automatically (by proposing a zero configuration change). // // This option is suitable for applications that want to minimize the time // spent in the joint configuration and do not store the joint configuration // in the state machine (outside of InitialState). ConfChangeTransitionJointImplicit = 1; // Use joint consensus and remain in the joint configuration until the // application proposes a no-op configuration change. This is suitable for // applications that want to explicitly control the transitions, for example // to use a custom payload (via the Context field). ConfChangeTransitionJointExplicit = 2; } message ConfState { // The voters in the incoming config. (If the configuration is not joint, // then the outgoing config is empty). repeated uint64 voters = 1; // The learners in the incoming config. repeated uint64 learners = 2; // The voters in the outgoing config. repeated uint64 voters_outgoing = 3; // The nodes that will become learners when the outgoing config is removed. // These nodes are necessarily currently in nodes_joint (or they would have // been added to the incoming config right away). repeated uint64 learners_next = 4; // If set, the config is joint and Raft will automatically transition into // the final config (i.e. remove the outgoing config) when this is safe. optional bool auto_leave = 5 [(gogoproto.nullable) = false]; } enum ConfChangeType { ConfChangeAddNode = 0; ConfChangeRemoveNode = 1; ConfChangeUpdateNode = 2; ConfChangeAddLearnerNode = 3; } message ConfChange { optional ConfChangeType type = 2 [(gogoproto.nullable) = false]; optional uint64 node_id = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID" ]; optional bytes context = 4; // NB: this is used only by etcd to thread through a unique identifier. // Ideally it should really use the Context instead. No counterpart to // this field exists in ConfChangeV2. optional uint64 id = 1 [(gogoproto.nullable) = false, (gogoproto.customname) = "ID" ]; } // ConfChangeSingle is an individual configuration change operation. Multiple // such operations can be carried out atomically via a ConfChangeV2. message ConfChangeSingle { optional ConfChangeType type = 1 [(gogoproto.nullable) = false]; optional uint64 node_id = 2 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID"]; } // ConfChangeV2 messages initiate configuration changes. They support both the // simple "one at a time" membership change protocol and full Joint Consensus // allowing for arbitrary changes in membership. // // The supplied context is treated as an opaque payload and can be used to // attach an action on the state machine to the application of the config change // proposal. Note that contrary to Joint Consensus as outlined in the Raft // paper[1], configuration changes become active when they are *applied* to the // state machine (not when they are appended to the log). // // The simple protocol can be used whenever only a single change is made. // // Non-simple changes require the use of Joint Consensus, for which two // configuration changes are run. The first configuration change specifies the // desired changes and transitions the Raft group into the joint configuration, // in which quorum requires a majority of both the pre-changes and post-changes // configuration. Joint Consensus avoids entering fragile intermediate // configurations that could compromise survivability. For example, without the // use of Joint Consensus and running across three availability zones with a // replication factor of three, it is not possible to replace a voter without // entering an intermediate configuration that does not survive the outage of // one availability zone. // // The provided ConfChangeTransition specifies how (and whether) Joint Consensus // is used, and assigns the task of leaving the joint configuration either to // Raft or the application. Leaving the joint configuration is accomplished by // proposing a ConfChangeV2 with only and optionally the Context field // populated. // // For details on Raft membership changes, see: // // [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf message ConfChangeV2 { optional ConfChangeTransition transition = 1 [(gogoproto.nullable) = false]; repeated ConfChangeSingle changes = 2 [(gogoproto.nullable) = false]; optional bytes context = 3; }