mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Bump libkv to 458977154600b9f23984d9f4b82e79570b5ae12b
Signed-off-by: John Howard <jhoward@microsoft.com> As well as bumping, libkv now requires go.etcd.io/bolt rather than boltdb/bolt. Hence removed bolt from vendor.conf, vendored go.etcd.io/bbot @ v1.3.1-etcd.8 and rerun vndr.
This commit is contained in:
parent
a906968a3f
commit
9ca55c7a60
31 changed files with 764 additions and 417 deletions
|
@ -4,18 +4,18 @@ github.com/Microsoft/go-winio v0.4.11
|
|||
github.com/Microsoft/hcsshim v0.7.3
|
||||
github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
|
||||
github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
|
||||
github.com/boltdb/bolt fff57c100f4dea1905678da7e90d92429dff2904
|
||||
github.com/codegangsta/cli a65b733b303f0055f8d324d805f393cd3e7a7904
|
||||
github.com/containerd/continuity d3c23511c1bf5851696cba83143d9cbcd666869b
|
||||
github.com/coreos/etcd v3.2.1
|
||||
github.com/coreos/go-semver v0.2.0
|
||||
github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d
|
||||
go.etcd.io/bbolt v1.3.1-etcd.8
|
||||
|
||||
github.com/docker/docker 162ba6016def672690ee4a1f3978368853a1e149
|
||||
github.com/docker/go-connections 7beb39f0b969b075d1325fecb092faf27fd357b6
|
||||
github.com/docker/go-events 9461782956ad83b30282bf90e31fa6a70c255ba9
|
||||
github.com/docker/go-units 9e638d38cf6977a37a8ea0078f3ee75a7cdb2dd1
|
||||
github.com/docker/libkv 1d8431073ae03cdaedb198a89722f3aab6d418ef
|
||||
github.com/docker/libkv 458977154600b9f23984d9f4b82e79570b5ae12b
|
||||
|
||||
github.com/godbus/dbus v4.0.0
|
||||
github.com/gogo/protobuf v1.0.0
|
||||
|
|
6
libnetwork/vendor/github.com/docker/libkv/README.md
generated
vendored
6
libnetwork/vendor/github.com/docker/libkv/README.md
generated
vendored
|
@ -90,7 +90,7 @@ Calls like `WatchTree` may return different events (or number of events) dependi
|
|||
|
||||
Only `Consul` and `etcd` have support for TLS and you should build and provide your own `config.TLS` object to feed the client. Support is planned for `zookeeper`.
|
||||
|
||||
##Roadmap
|
||||
## Roadmap
|
||||
|
||||
- Make the API nicer to use (using `options`)
|
||||
- Provide more options (`consistency` for example)
|
||||
|
@ -98,10 +98,10 @@ Only `Consul` and `etcd` have support for TLS and you should build and provide y
|
|||
- Better key formatting
|
||||
- New backends?
|
||||
|
||||
##Contributing
|
||||
## Contributing
|
||||
|
||||
Want to hack on libkv? [Docker's contributions guidelines](https://github.com/docker/docker/blob/master/CONTRIBUTING.md) apply.
|
||||
|
||||
##Copyright and license
|
||||
## Copyright and license
|
||||
|
||||
Copyright © 2014-2016 Docker, Inc. All rights reserved, except as follows. Code is released under the Apache 2.0 license. The README.md file, and files in the "docs" folder are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file "LICENSE.docs". You may obtain a duplicate copy of the same license, titled CC-BY-SA-4.0, at http://creativecommons.org/licenses/by/4.0/.
|
||||
|
|
2
libnetwork/vendor/github.com/docker/libkv/store/boltdb/boltdb.go
generated
vendored
2
libnetwork/vendor/github.com/docker/libkv/store/boltdb/boltdb.go
generated
vendored
|
@ -10,9 +10,9 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/boltdb/bolt"
|
||||
"github.com/docker/libkv"
|
||||
"github.com/docker/libkv/store"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
184
libnetwork/vendor/github.com/docker/libkv/store/etcd/etcd.go
generated
vendored
184
libnetwork/vendor/github.com/docker/libkv/store/etcd/etcd.go
generated
vendored
|
@ -1,6 +1,7 @@
|
|||
package etcd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"log"
|
||||
|
@ -9,8 +10,6 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
|
||||
etcd "github.com/coreos/etcd/client"
|
||||
"github.com/docker/libkv"
|
||||
"github.com/docker/libkv/store"
|
||||
|
@ -30,13 +29,29 @@ type Etcd struct {
|
|||
}
|
||||
|
||||
type etcdLock struct {
|
||||
client etcd.KeysAPI
|
||||
stopLock chan struct{}
|
||||
client etcd.KeysAPI
|
||||
key string
|
||||
value string
|
||||
ttl time.Duration
|
||||
|
||||
// Closed when the caller wants to stop renewing the lock. I'm not sure
|
||||
// why this is even used - you could just call the Unlock() method.
|
||||
stopRenew chan struct{}
|
||||
key string
|
||||
value string
|
||||
last *etcd.Response
|
||||
ttl time.Duration
|
||||
// When the lock is held, this is the last modified index of the key.
|
||||
// Used for conditional updates when extending the lock TTL and when
|
||||
// conditionall deleteing when Unlock() is called.
|
||||
lastIndex uint64
|
||||
// When the lock is held, this function will cancel the locked context.
|
||||
// This is called both by the Unlock() method in order to stop the
|
||||
// background holding goroutine and in a deferred call in that background
|
||||
// holding goroutine in case the lock is lost due to an error or the
|
||||
// stopRenew channel is closed. Calling this function also closes the chan
|
||||
// returned by the Lock() method.
|
||||
cancel context.CancelFunc
|
||||
// Used to sync the Unlock() call with the background holding goroutine.
|
||||
// This channel is closed when that background goroutine exits, signalling
|
||||
// that it is okay to conditionally delete the key.
|
||||
doneHolding chan struct{}
|
||||
}
|
||||
|
||||
const (
|
||||
|
@ -472,112 +487,97 @@ func (s *Etcd) NewLock(key string, options *store.LockOptions) (lock store.Locke
|
|||
// doing so. It returns a channel that is closed if our
|
||||
// lock is lost or if an error occurs
|
||||
func (l *etcdLock) Lock(stopChan chan struct{}) (<-chan struct{}, error) {
|
||||
|
||||
// Lock holder channel
|
||||
lockHeld := make(chan struct{})
|
||||
stopLocking := l.stopRenew
|
||||
|
||||
// Conditional Set - only if the key does not exist.
|
||||
setOpts := &etcd.SetOptions{
|
||||
TTL: l.ttl,
|
||||
TTL: l.ttl,
|
||||
PrevExist: etcd.PrevNoExist,
|
||||
}
|
||||
|
||||
for {
|
||||
setOpts.PrevExist = etcd.PrevNoExist
|
||||
resp, err := l.client.Set(context.Background(), l.key, l.value, setOpts)
|
||||
if err != nil {
|
||||
if etcdError, ok := err.(etcd.Error); ok {
|
||||
if etcdError.Code != etcd.ErrorCodeNodeExist {
|
||||
return nil, err
|
||||
}
|
||||
setOpts.PrevIndex = ^uint64(0)
|
||||
}
|
||||
} else {
|
||||
setOpts.PrevIndex = resp.Node.ModifiedIndex
|
||||
}
|
||||
|
||||
setOpts.PrevExist = etcd.PrevExist
|
||||
l.last, err = l.client.Set(context.Background(), l.key, l.value, setOpts)
|
||||
|
||||
if err == nil {
|
||||
// Leader section
|
||||
l.stopLock = stopLocking
|
||||
go l.holdLock(l.key, lockHeld, stopLocking)
|
||||
break
|
||||
} else {
|
||||
// If this is a legitimate error, return
|
||||
if etcdError, ok := err.(etcd.Error); ok {
|
||||
if etcdError.Code != etcd.ErrorCodeTestFailed {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
// Acquired the lock!
|
||||
l.lastIndex = resp.Node.ModifiedIndex
|
||||
lockedCtx, cancel := context.WithCancel(context.Background())
|
||||
l.cancel = cancel
|
||||
l.doneHolding = make(chan struct{})
|
||||
|
||||
// Seeker section
|
||||
errorCh := make(chan error)
|
||||
chWStop := make(chan bool)
|
||||
free := make(chan bool)
|
||||
go l.holdLock(lockedCtx)
|
||||
|
||||
go l.waitLock(l.key, errorCh, chWStop, free)
|
||||
|
||||
// Wait for the key to be available or for
|
||||
// a signal to stop trying to lock the key
|
||||
select {
|
||||
case <-free:
|
||||
break
|
||||
case err := <-errorCh:
|
||||
return nil, err
|
||||
case <-stopChan:
|
||||
return nil, ErrAbortTryLock
|
||||
}
|
||||
|
||||
// Delete or Expire event occurred
|
||||
// Retry
|
||||
return lockedCtx.Done(), nil
|
||||
}
|
||||
}
|
||||
|
||||
return lockHeld, nil
|
||||
etcdErr, ok := err.(etcd.Error)
|
||||
if !ok || etcdErr.Code != etcd.ErrorCodeNodeExist {
|
||||
return nil, err // Unexpected error.
|
||||
}
|
||||
|
||||
// Need to wait for the lock key to expire or be deleted.
|
||||
if err := l.waitLock(stopChan, etcdErr.Index); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Delete or Expire event occurred.
|
||||
// Retry
|
||||
}
|
||||
}
|
||||
|
||||
// Hold the lock as long as we can
|
||||
// Hold the lock as long as we can.
|
||||
// Updates the key ttl periodically until we receive
|
||||
// an explicit stop signal from the Unlock method
|
||||
func (l *etcdLock) holdLock(key string, lockHeld chan struct{}, stopLocking <-chan struct{}) {
|
||||
defer close(lockHeld)
|
||||
// an explicit stop signal from the Unlock method OR
|
||||
// the stopRenew channel is closed.
|
||||
func (l *etcdLock) holdLock(ctx context.Context) {
|
||||
defer close(l.doneHolding)
|
||||
defer l.cancel()
|
||||
|
||||
update := time.NewTicker(l.ttl / 3)
|
||||
defer update.Stop()
|
||||
|
||||
var err error
|
||||
setOpts := &etcd.SetOptions{TTL: l.ttl}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-update.C:
|
||||
setOpts.PrevIndex = l.last.Node.ModifiedIndex
|
||||
l.last, err = l.client.Set(context.Background(), key, l.value, setOpts)
|
||||
setOpts.PrevIndex = l.lastIndex
|
||||
resp, err := l.client.Set(ctx, l.key, l.value, setOpts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
case <-stopLocking:
|
||||
l.lastIndex = resp.Node.ModifiedIndex
|
||||
case <-l.stopRenew:
|
||||
return
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WaitLock simply waits for the key to be available for creation
|
||||
func (l *etcdLock) waitLock(key string, errorCh chan error, stopWatchCh chan bool, free chan<- bool) {
|
||||
opts := &etcd.WatcherOptions{Recursive: false}
|
||||
watcher := l.client.Watcher(key, opts)
|
||||
|
||||
for {
|
||||
event, err := watcher.Next(context.Background())
|
||||
if err != nil {
|
||||
errorCh <- err
|
||||
return
|
||||
// WaitLock simply waits for the key to be available for creation.
|
||||
func (l *etcdLock) waitLock(stopWait <-chan struct{}, afterIndex uint64) error {
|
||||
waitCtx, waitCancel := context.WithCancel(context.Background())
|
||||
defer waitCancel()
|
||||
go func() {
|
||||
select {
|
||||
case <-stopWait:
|
||||
// If the caller closes the stopWait, cancel the wait context.
|
||||
waitCancel()
|
||||
case <-waitCtx.Done():
|
||||
// No longer waiting.
|
||||
}
|
||||
if event.Action == "delete" || event.Action == "expire" {
|
||||
free <- true
|
||||
return
|
||||
}()
|
||||
|
||||
watcher := l.client.Watcher(l.key, &etcd.WatcherOptions{AfterIndex: afterIndex})
|
||||
for {
|
||||
event, err := watcher.Next(waitCtx)
|
||||
if err != nil {
|
||||
if err == context.Canceled {
|
||||
return ErrAbortTryLock
|
||||
}
|
||||
return err
|
||||
}
|
||||
switch event.Action {
|
||||
case "delete", "compareAndDelete", "expire":
|
||||
return nil // The key has been deleted or expired.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -585,19 +585,17 @@ func (l *etcdLock) waitLock(key string, errorCh chan error, stopWatchCh chan boo
|
|||
// Unlock the "key". Calling unlock while
|
||||
// not holding the lock will throw an error
|
||||
func (l *etcdLock) Unlock() error {
|
||||
if l.stopLock != nil {
|
||||
l.stopLock <- struct{}{}
|
||||
}
|
||||
if l.last != nil {
|
||||
l.cancel() // Will signal the holdLock goroutine to exit.
|
||||
<-l.doneHolding // Wait for the holdLock goroutine to exit.
|
||||
|
||||
var err error
|
||||
if l.lastIndex != 0 {
|
||||
delOpts := &etcd.DeleteOptions{
|
||||
PrevIndex: l.last.Node.ModifiedIndex,
|
||||
}
|
||||
_, err := l.client.Delete(context.Background(), l.key, delOpts)
|
||||
if err != nil {
|
||||
return err
|
||||
PrevIndex: l.lastIndex,
|
||||
}
|
||||
_, err = l.client.Delete(context.Background(), l.key, delOpts)
|
||||
}
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
|
||||
// Close closes the client connection
|
||||
|
|
|
@ -1,5 +1,18 @@
|
|||
Bolt [](https://coveralls.io/r/boltdb/bolt?branch=master) [](https://godoc.org/github.com/boltdb/bolt) 
|
||||
====
|
||||
bbolt
|
||||
=====
|
||||
|
||||
[](https://goreportcard.com/report/github.com/etcd-io/bbolt)
|
||||
[](https://codecov.io/gh/etcd-io/bbolt)
|
||||
[](https://travis-ci.com/etcd-io/bbolt)
|
||||
[](https://godoc.org/github.com/etcd-io/bbolt)
|
||||
[](https://github.com/etcd-io/bbolt/releases)
|
||||
[](https://github.com/etcd-io/bbolt/blob/master/LICENSE)
|
||||
|
||||
bbolt is a fork of [Ben Johnson's][gh_ben] [Bolt][bolt] key/value
|
||||
store. The purpose of this fork is to provide the Go community with an active
|
||||
maintenance and development target for Bolt; the goal is improved reliability
|
||||
and stability. bbolt includes bug fixes, performance enhancements, and features
|
||||
not found in Bolt while preserving backwards compatibility with the Bolt API.
|
||||
|
||||
Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas]
|
||||
[LMDB project][lmdb]. The goal of the project is to provide a simple,
|
||||
|
@ -10,6 +23,8 @@ Since Bolt is meant to be used as such a low-level piece of functionality,
|
|||
simplicity is key. The API will be small and only focus on getting values
|
||||
and setting values. That's it.
|
||||
|
||||
[gh_ben]: https://github.com/benbjohnson
|
||||
[bolt]: https://github.com/boltdb/bolt
|
||||
[hyc_symas]: https://twitter.com/hyc_symas
|
||||
[lmdb]: http://symas.com/mdb/
|
||||
|
||||
|
@ -17,40 +32,46 @@ and setting values. That's it.
|
|||
|
||||
Bolt is stable, the API is fixed, and the file format is fixed. Full unit
|
||||
test coverage and randomized black box testing are used to ensure database
|
||||
consistency and thread safety. Bolt is currently in high-load production
|
||||
consistency and thread safety. Bolt is currently used in high-load production
|
||||
environments serving databases as large as 1TB. Many companies such as
|
||||
Shopify and Heroku use Bolt-backed services every day.
|
||||
|
||||
## Project versioning
|
||||
|
||||
bbolt uses [semantic versioning](http://semver.org).
|
||||
API should not change between patch and minor releases.
|
||||
New minor versions may add additional features to the API.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Getting Started](#getting-started)
|
||||
- [Installing](#installing)
|
||||
- [Opening a database](#opening-a-database)
|
||||
- [Transactions](#transactions)
|
||||
- [Read-write transactions](#read-write-transactions)
|
||||
- [Read-only transactions](#read-only-transactions)
|
||||
- [Batch read-write transactions](#batch-read-write-transactions)
|
||||
- [Managing transactions manually](#managing-transactions-manually)
|
||||
- [Using buckets](#using-buckets)
|
||||
- [Using key/value pairs](#using-keyvalue-pairs)
|
||||
- [Autoincrementing integer for the bucket](#autoincrementing-integer-for-the-bucket)
|
||||
- [Iterating over keys](#iterating-over-keys)
|
||||
- [Prefix scans](#prefix-scans)
|
||||
- [Range scans](#range-scans)
|
||||
- [ForEach()](#foreach)
|
||||
- [Nested buckets](#nested-buckets)
|
||||
- [Database backups](#database-backups)
|
||||
- [Statistics](#statistics)
|
||||
- [Read-Only Mode](#read-only-mode)
|
||||
- [Mobile Use (iOS/Android)](#mobile-use-iosandroid)
|
||||
- [Resources](#resources)
|
||||
- [Comparison with other databases](#comparison-with-other-databases)
|
||||
- [Postgres, MySQL, & other relational databases](#postgres-mysql--other-relational-databases)
|
||||
- [LevelDB, RocksDB](#leveldb-rocksdb)
|
||||
- [LMDB](#lmdb)
|
||||
- [Caveats & Limitations](#caveats--limitations)
|
||||
- [Reading the Source](#reading-the-source)
|
||||
- [Other Projects Using Bolt](#other-projects-using-bolt)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Installing](#installing)
|
||||
- [Opening a database](#opening-a-database)
|
||||
- [Transactions](#transactions)
|
||||
- [Read-write transactions](#read-write-transactions)
|
||||
- [Read-only transactions](#read-only-transactions)
|
||||
- [Batch read-write transactions](#batch-read-write-transactions)
|
||||
- [Managing transactions manually](#managing-transactions-manually)
|
||||
- [Using buckets](#using-buckets)
|
||||
- [Using key/value pairs](#using-keyvalue-pairs)
|
||||
- [Autoincrementing integer for the bucket](#autoincrementing-integer-for-the-bucket)
|
||||
- [Iterating over keys](#iterating-over-keys)
|
||||
- [Prefix scans](#prefix-scans)
|
||||
- [Range scans](#range-scans)
|
||||
- [ForEach()](#foreach)
|
||||
- [Nested buckets](#nested-buckets)
|
||||
- [Database backups](#database-backups)
|
||||
- [Statistics](#statistics)
|
||||
- [Read-Only Mode](#read-only-mode)
|
||||
- [Mobile Use (iOS/Android)](#mobile-use-iosandroid)
|
||||
- [Resources](#resources)
|
||||
- [Comparison with other databases](#comparison-with-other-databases)
|
||||
- [Postgres, MySQL, & other relational databases](#postgres-mysql--other-relational-databases)
|
||||
- [LevelDB, RocksDB](#leveldb-rocksdb)
|
||||
- [LMDB](#lmdb)
|
||||
- [Caveats & Limitations](#caveats--limitations)
|
||||
- [Reading the Source](#reading-the-source)
|
||||
- [Other Projects Using Bolt](#other-projects-using-bolt)
|
||||
|
||||
## Getting Started
|
||||
|
||||
|
@ -59,13 +80,28 @@ Shopify and Heroku use Bolt-backed services every day.
|
|||
To start using Bolt, install Go and run `go get`:
|
||||
|
||||
```sh
|
||||
$ go get github.com/boltdb/bolt/...
|
||||
$ go get go.etcd.io/bbolt/...
|
||||
```
|
||||
|
||||
This will retrieve the library and install the `bolt` command line utility into
|
||||
your `$GOBIN` path.
|
||||
|
||||
|
||||
### Importing bbolt
|
||||
|
||||
To use bbolt as an embedded key-value store, import as:
|
||||
|
||||
```go
|
||||
import bolt "go.etcd.io/bbolt"
|
||||
|
||||
db, err := bolt.Open(path, 0666, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer db.Close()
|
||||
```
|
||||
|
||||
|
||||
### Opening a database
|
||||
|
||||
The top-level object in Bolt is a `DB`. It is represented as a single file on
|
||||
|
@ -79,7 +115,7 @@ package main
|
|||
import (
|
||||
"log"
|
||||
|
||||
"github.com/boltdb/bolt"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
@ -395,7 +431,7 @@ db.View(func(tx *bolt.Tx) error {
|
|||
c := tx.Bucket([]byte("MyBucket")).Cursor()
|
||||
|
||||
prefix := []byte("1234")
|
||||
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() {
|
||||
for k, v := c.Seek(prefix); k != nil && bytes.HasPrefix(k, prefix); k, v = c.Next() {
|
||||
fmt.Printf("key=%s, value=%s\n", k, v)
|
||||
}
|
||||
|
||||
|
@ -464,6 +500,55 @@ func (*Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error)
|
|||
func (*Bucket) DeleteBucket(key []byte) error
|
||||
```
|
||||
|
||||
Say you had a multi-tenant application where the root level bucket was the account bucket. Inside of this bucket was a sequence of accounts which themselves are buckets. And inside the sequence bucket you could have many buckets pertaining to the Account itself (Users, Notes, etc) isolating the information into logical groupings.
|
||||
|
||||
```go
|
||||
|
||||
// createUser creates a new user in the given account.
|
||||
func createUser(accountID int, u *User) error {
|
||||
// Start the transaction.
|
||||
tx, err := db.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
// Retrieve the root bucket for the account.
|
||||
// Assume this has already been created when the account was set up.
|
||||
root := tx.Bucket([]byte(strconv.FormatUint(accountID, 10)))
|
||||
|
||||
// Setup the users bucket.
|
||||
bkt, err := root.CreateBucketIfNotExists([]byte("USERS"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Generate an ID for the new user.
|
||||
userID, err := bkt.NextSequence()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
u.ID = userID
|
||||
|
||||
// Marshal and save the encoded user.
|
||||
if buf, err := json.Marshal(u); err != nil {
|
||||
return err
|
||||
} else if err := bkt.Put([]byte(strconv.FormatUint(u.ID, 10)), buf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Commit the transaction.
|
||||
if err := tx.Commit(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
### Database backups
|
||||
|
||||
|
@ -473,7 +558,7 @@ this from a read-only transaction, it will perform a hot backup and not block
|
|||
your other database reads and writes.
|
||||
|
||||
By default, it will use a regular file handle which will utilize the operating
|
||||
system's page cache. See the [`Tx`](https://godoc.org/github.com/boltdb/bolt#Tx)
|
||||
system's page cache. See the [`Tx`](https://godoc.org/go.etcd.io/bbolt#Tx)
|
||||
documentation for information about optimizing for larger-than-RAM datasets.
|
||||
|
||||
One common use case is to backup over HTTP so you can use tools like `cURL` to
|
||||
|
@ -719,6 +804,9 @@ Here are a few things to note when evaluating and using Bolt:
|
|||
can be reused by a new page or can be unmapped from virtual memory and you'll
|
||||
see an `unexpected fault address` panic when accessing it.
|
||||
|
||||
* Bolt uses an exclusive write lock on the database file so it cannot be
|
||||
shared by multiple processes.
|
||||
|
||||
* Be careful when using `Bucket.FillPercent`. Setting a high fill percent for
|
||||
buckets that have random inserts will cause your database to have very poor
|
||||
page utilization.
|
||||
|
@ -759,7 +847,7 @@ Here are a few things to note when evaluating and using Bolt:
|
|||
|
||||
## Reading the Source
|
||||
|
||||
Bolt is a relatively small code base (<3KLOC) for an embedded, serializable,
|
||||
Bolt is a relatively small code base (<5KLOC) for an embedded, serializable,
|
||||
transactional key/value database so it can be a good starting point for people
|
||||
interested in how databases work.
|
||||
|
||||
|
@ -811,47 +899,55 @@ them via pull request.
|
|||
|
||||
Below is a list of public, open source projects that use Bolt:
|
||||
|
||||
* [BoltDbWeb](https://github.com/evnix/boltdbweb) - A web based GUI for BoltDB files.
|
||||
* [Operation Go: A Routine Mission](http://gocode.io) - An online programming game for Golang using Bolt for user accounts and a leaderboard.
|
||||
* [Algernon](https://github.com/xyproto/algernon) - A HTTP/2 web server with built-in support for Lua. Uses BoltDB as the default database backend.
|
||||
* [Bazil](https://bazil.org/) - A file system that lets your data reside where it is most convenient for it to reside.
|
||||
* [DVID](https://github.com/janelia-flyem/dvid) - Added Bolt as optional storage engine and testing it against Basho-tuned leveldb.
|
||||
* [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics.
|
||||
* [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects.
|
||||
* [Wiki](https://github.com/peterhellberg/wiki) - A tiny wiki using Goji, BoltDB and Blackfriday.
|
||||
* [ChainStore](https://github.com/pressly/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations.
|
||||
* [MetricBase](https://github.com/msiebuhr/MetricBase) - Single-binary version of Graphite.
|
||||
* [Gitchain](https://github.com/gitchain/gitchain) - Decentralized, peer-to-peer Git repositories aka "Git meets Bitcoin".
|
||||
* [event-shuttle](https://github.com/sclasen/event-shuttle) - A Unix system service to collect and reliably deliver messages to Kafka.
|
||||
* [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed.
|
||||
* [bolter](https://github.com/hasit/bolter) - Command-line app for viewing BoltDB file in your terminal.
|
||||
* [boltcli](https://github.com/spacewander/boltcli) - the redis-cli for boltdb with Lua script support.
|
||||
* [BoltHold](https://github.com/timshannon/bolthold) - An embeddable NoSQL store for Go types built on BoltDB
|
||||
* [BoltStore](https://github.com/yosssi/boltstore) - Session store using Bolt.
|
||||
* [photosite/session](https://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site.
|
||||
* [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage.
|
||||
* [ipLocator](https://github.com/AndreasBriese/ipLocator) - A fast ip-geo-location-server using bolt with bloom filters.
|
||||
* [cayley](https://github.com/google/cayley) - Cayley is an open-source graph database using Bolt as optional backend.
|
||||
* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners.
|
||||
* [BoltDbWeb](https://github.com/evnix/boltdbweb) - A web based GUI for BoltDB files.
|
||||
* [bleve](http://www.blevesearch.com/) - A pure Go search engine similar to ElasticSearch that uses Bolt as the default storage backend.
|
||||
* [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server.
|
||||
* [Seaweed File System](https://github.com/chrislusf/seaweedfs) - Highly scalable distributed key~file system with O(1) disk read.
|
||||
* [InfluxDB](https://influxdata.com) - Scalable datastore for metrics, events, and real-time analytics.
|
||||
* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data.
|
||||
* [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system.
|
||||
* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware.
|
||||
* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistent, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs.
|
||||
* [drive](https://github.com/odeke-em/drive) - drive is an unofficial Google Drive command line client for \*NIX operating systems.
|
||||
* [stow](https://github.com/djherbis/stow) - a persistence manager for objects
|
||||
backed by boltdb.
|
||||
* [btcwallet](https://github.com/btcsuite/btcwallet) - A bitcoin wallet.
|
||||
* [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining
|
||||
simple tx and key scans.
|
||||
* [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets.
|
||||
* [Request Baskets](https://github.com/darklynx/request-baskets) - A web service to collect arbitrary HTTP requests and inspect them via REST API or simple web UI, similar to [RequestBin](http://requestb.in/) service
|
||||
* [cayley](https://github.com/google/cayley) - Cayley is an open-source graph database using Bolt as optional backend.
|
||||
* [ChainStore](https://github.com/pressly/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations.
|
||||
* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware.
|
||||
* [DVID](https://github.com/janelia-flyem/dvid) - Added Bolt as optional storage engine and testing it against Basho-tuned leveldb.
|
||||
* [dcrwallet](https://github.com/decred/dcrwallet) - A wallet for the Decred cryptocurrency.
|
||||
* [drive](https://github.com/odeke-em/drive) - drive is an unofficial Google Drive command line client for \*NIX operating systems.
|
||||
* [event-shuttle](https://github.com/sclasen/event-shuttle) - A Unix system service to collect and reliably deliver messages to Kafka.
|
||||
* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data.
|
||||
* [Go Report Card](https://goreportcard.com/) - Go code quality report cards as a (free and open source) service.
|
||||
* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners.
|
||||
* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores.
|
||||
* [Storm](https://github.com/asdine/storm) - Simple and powerful ORM for BoltDB.
|
||||
* [GoWebApp](https://github.com/josephspurrier/gowebapp) - A basic MVC web application in Go using BoltDB.
|
||||
* [SimpleBolt](https://github.com/xyproto/simplebolt) - A simple way to use BoltDB. Deals mainly with strings.
|
||||
* [Algernon](https://github.com/xyproto/algernon) - A HTTP/2 web server with built-in support for Lua. Uses BoltDB as the default database backend.
|
||||
* [MuLiFS](https://github.com/dankomiocevic/mulifs) - Music Library Filesystem creates a filesystem to organise your music files.
|
||||
* [GoShort](https://github.com/pankajkhairnar/goShort) - GoShort is a URL shortener written in Golang and BoltDB for persistent key/value storage and for routing it's using high performent HTTPRouter.
|
||||
* [gopherpit](https://github.com/gopherpit/gopherpit) - A web service to manage Go remote import paths with custom domains
|
||||
* [Gitchain](https://github.com/gitchain/gitchain) - Decentralized, peer-to-peer Git repositories aka "Git meets Bitcoin".
|
||||
* [InfluxDB](https://influxdata.com) - Scalable datastore for metrics, events, and real-time analytics.
|
||||
* [ipLocator](https://github.com/AndreasBriese/ipLocator) - A fast ip-geo-location-server using bolt with bloom filters.
|
||||
* [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed.
|
||||
* [Ironsmith](https://github.com/timshannon/ironsmith) - A simple, script-driven continuous integration (build - > test -> release) tool, with no external dependencies
|
||||
* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistent, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs.
|
||||
* [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage.
|
||||
* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores.
|
||||
* [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets.
|
||||
* [MetricBase](https://github.com/msiebuhr/MetricBase) - Single-binary version of Graphite.
|
||||
* [MuLiFS](https://github.com/dankomiocevic/mulifs) - Music Library Filesystem creates a filesystem to organise your music files.
|
||||
* [Operation Go: A Routine Mission](http://gocode.io) - An online programming game for Golang using Bolt for user accounts and a leaderboard.
|
||||
* [photosite/session](https://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site.
|
||||
* [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system.
|
||||
* [reef-pi](https://github.com/reef-pi/reef-pi) - reef-pi is an award winning, modular, DIY reef tank controller using easy to learn electronics based on a Raspberry Pi.
|
||||
* [Request Baskets](https://github.com/darklynx/request-baskets) - A web service to collect arbitrary HTTP requests and inspect them via REST API or simple web UI, similar to [RequestBin](http://requestb.in/) service
|
||||
* [Seaweed File System](https://github.com/chrislusf/seaweedfs) - Highly scalable distributed key~file system with O(1) disk read.
|
||||
* [stow](https://github.com/djherbis/stow) - a persistence manager for objects
|
||||
backed by boltdb.
|
||||
* [Storm](https://github.com/asdine/storm) - Simple and powerful ORM for BoltDB.
|
||||
* [SimpleBolt](https://github.com/xyproto/simplebolt) - A simple way to use BoltDB. Deals mainly with strings.
|
||||
* [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics.
|
||||
* [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects.
|
||||
* [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server.
|
||||
* [torrent](https://github.com/anacrolix/torrent) - Full-featured BitTorrent client package and utilities in Go. BoltDB is a storage backend in development.
|
||||
* [Wiki](https://github.com/peterhellberg/wiki) - A tiny wiki using Goji, BoltDB and Blackfriday.
|
||||
|
||||
If you are using Bolt in a project please send a pull request to add it to the list.
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x7FFFFFFF // 2GB
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import "unsafe"
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
// +build arm64
|
||||
|
||||
package bolt
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"syscall"
|
12
libnetwork/vendor/go.etcd.io/bbolt/bolt_mips64x.go
generated
vendored
Normal file
12
libnetwork/vendor/go.etcd.io/bbolt/bolt_mips64x.go
generated
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
// +build mips64 mips64le
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x8000000000 // 512GB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
12
libnetwork/vendor/go.etcd.io/bbolt/bolt_mipsx.go
generated
vendored
Normal file
12
libnetwork/vendor/go.etcd.io/bbolt/bolt_mipsx.go
generated
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
// +build mips mipsle
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x40000000 // 1GB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0xFFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"syscall"
|
|
@ -1,9 +1,12 @@
|
|||
// +build ppc
|
||||
|
||||
package bolt
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0x7FFFFFFF // 2GB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0xFFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
|
@ -1,9 +1,12 @@
|
|||
// +build ppc64
|
||||
|
||||
package bolt
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = false
|
|
@ -1,6 +1,6 @@
|
|||
// +build ppc64le
|
||||
|
||||
package bolt
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
@ -1,6 +1,6 @@
|
|||
// +build s390x
|
||||
|
||||
package bolt
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
|
@ -1,41 +1,43 @@
|
|||
// +build !windows,!plan9,!solaris
|
||||
|
||||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
"time"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// flock acquires an advisory lock on a file descriptor.
|
||||
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
|
||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||
var t time.Time
|
||||
if timeout != 0 {
|
||||
t = time.Now()
|
||||
}
|
||||
fd := db.file.Fd()
|
||||
flag := syscall.LOCK_NB
|
||||
if exclusive {
|
||||
flag |= syscall.LOCK_EX
|
||||
} else {
|
||||
flag |= syscall.LOCK_SH
|
||||
}
|
||||
for {
|
||||
// If we're beyond our timeout then return an error.
|
||||
// This can only occur after we've attempted a flock once.
|
||||
if t.IsZero() {
|
||||
t = time.Now()
|
||||
} else if timeout > 0 && time.Since(t) > timeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
flag := syscall.LOCK_SH
|
||||
if exclusive {
|
||||
flag = syscall.LOCK_EX
|
||||
}
|
||||
|
||||
// Otherwise attempt to obtain an exclusive lock.
|
||||
err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB)
|
||||
// Attempt to obtain an exclusive lock.
|
||||
err := syscall.Flock(int(fd), flag)
|
||||
if err == nil {
|
||||
return nil
|
||||
} else if err != syscall.EWOULDBLOCK {
|
||||
return err
|
||||
}
|
||||
|
||||
// If we timed out then return an error.
|
||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
|
||||
// Wait for a bit and try again.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
time.Sleep(flockRetryTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,7 +55,9 @@ func mmap(db *DB, sz int) error {
|
|||
}
|
||||
|
||||
// Advise the kernel that the mmap is accessed randomly.
|
||||
if err := madvise(b, syscall.MADV_RANDOM); err != nil {
|
||||
err = madvise(b, syscall.MADV_RANDOM)
|
||||
if err != nil && err != syscall.ENOSYS {
|
||||
// Ignore not implemented error in kernel because it still works.
|
||||
return fmt.Errorf("madvise: %s", err)
|
||||
}
|
||||
|
|
@ -1,8 +1,7 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
@ -11,36 +10,35 @@ import (
|
|||
)
|
||||
|
||||
// flock acquires an advisory lock on a file descriptor.
|
||||
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
|
||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||
var t time.Time
|
||||
if timeout != 0 {
|
||||
t = time.Now()
|
||||
}
|
||||
fd := db.file.Fd()
|
||||
var lockType int16
|
||||
if exclusive {
|
||||
lockType = syscall.F_WRLCK
|
||||
} else {
|
||||
lockType = syscall.F_RDLCK
|
||||
}
|
||||
for {
|
||||
// If we're beyond our timeout then return an error.
|
||||
// This can only occur after we've attempted a flock once.
|
||||
if t.IsZero() {
|
||||
t = time.Now()
|
||||
} else if timeout > 0 && time.Since(t) > timeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
var lock syscall.Flock_t
|
||||
lock.Start = 0
|
||||
lock.Len = 0
|
||||
lock.Pid = 0
|
||||
lock.Whence = 0
|
||||
lock.Pid = 0
|
||||
if exclusive {
|
||||
lock.Type = syscall.F_WRLCK
|
||||
} else {
|
||||
lock.Type = syscall.F_RDLCK
|
||||
}
|
||||
err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock)
|
||||
// Attempt to obtain an exclusive lock.
|
||||
lock := syscall.Flock_t{Type: lockType}
|
||||
err := syscall.FcntlFlock(fd, syscall.F_SETLK, &lock)
|
||||
if err == nil {
|
||||
return nil
|
||||
} else if err != syscall.EAGAIN {
|
||||
return err
|
||||
}
|
||||
|
||||
// If we timed out then return an error.
|
||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
|
||||
// Wait for a bit and try again.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
time.Sleep(flockRetryTimeout)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -16,8 +16,6 @@ var (
|
|||
)
|
||||
|
||||
const (
|
||||
lockExt = ".lock"
|
||||
|
||||
// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
|
||||
flagLockExclusive = 2
|
||||
flagLockFailImmediately = 1
|
||||
|
@ -48,48 +46,47 @@ func fdatasync(db *DB) error {
|
|||
}
|
||||
|
||||
// flock acquires an advisory lock on a file descriptor.
|
||||
func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
|
||||
// Create a separate lock file on windows because a process
|
||||
// cannot share an exclusive lock on the same file. This is
|
||||
// needed during Tx.WriteTo().
|
||||
f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
db.lockfile = f
|
||||
|
||||
func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
||||
var t time.Time
|
||||
if timeout != 0 {
|
||||
t = time.Now()
|
||||
}
|
||||
var flag uint32 = flagLockFailImmediately
|
||||
if exclusive {
|
||||
flag |= flagLockExclusive
|
||||
}
|
||||
for {
|
||||
// If we're beyond our timeout then return an error.
|
||||
// This can only occur after we've attempted a flock once.
|
||||
if t.IsZero() {
|
||||
t = time.Now()
|
||||
} else if timeout > 0 && time.Since(t) > timeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
// Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range
|
||||
// -1..0 as the lock on the database file.
|
||||
var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
|
||||
err := lockFileEx(syscall.Handle(db.file.Fd()), flag, 0, 1, 0, &syscall.Overlapped{
|
||||
Offset: m1,
|
||||
OffsetHigh: m1,
|
||||
})
|
||||
|
||||
var flag uint32 = flagLockFailImmediately
|
||||
if exclusive {
|
||||
flag |= flagLockExclusive
|
||||
}
|
||||
|
||||
err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{})
|
||||
if err == nil {
|
||||
return nil
|
||||
} else if err != errLockViolation {
|
||||
return err
|
||||
}
|
||||
|
||||
// If we timed oumercit then return an error.
|
||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||
return ErrTimeout
|
||||
}
|
||||
|
||||
// Wait for a bit and try again.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
time.Sleep(flockRetryTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
// funlock releases an advisory lock on a file descriptor.
|
||||
func funlock(db *DB) error {
|
||||
err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{})
|
||||
db.lockfile.Close()
|
||||
os.Remove(db.path+lockExt)
|
||||
var m1 uint32 = (1 << 32) - 1 // -1 in a uint32
|
||||
err := unlockFileEx(syscall.Handle(db.file.Fd()), 0, 1, 0, &syscall.Overlapped{
|
||||
Offset: m1,
|
||||
OffsetHigh: m1,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
// +build !windows,!plan9,!linux,!openbsd
|
||||
|
||||
package bolt
|
||||
package bbolt
|
||||
|
||||
// fdatasync flushes written data to a file descriptor.
|
||||
func fdatasync(db *DB) error {
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
@ -14,13 +14,6 @@ const (
|
|||
MaxValueSize = (1 << 31) - 2
|
||||
)
|
||||
|
||||
const (
|
||||
maxUint = ^uint(0)
|
||||
minUint = 0
|
||||
maxInt = int(^uint(0) >> 1)
|
||||
minInt = -maxInt - 1
|
||||
)
|
||||
|
||||
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
|
||||
|
||||
const (
|
||||
|
@ -175,9 +168,8 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
|
|||
if bytes.Equal(key, k) {
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
return nil, ErrBucketExists
|
||||
} else {
|
||||
return nil, ErrIncompatibleValue
|
||||
}
|
||||
return nil, ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Create empty, inline bucket.
|
||||
|
@ -324,7 +316,12 @@ func (b *Bucket) Delete(key []byte) error {
|
|||
|
||||
// Move cursor to correct position.
|
||||
c := b.Cursor()
|
||||
_, _, flags := c.seek(key)
|
||||
k, _, flags := c.seek(key)
|
||||
|
||||
// Return nil if the key doesn't exist.
|
||||
if !bytes.Equal(key, k) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return an error if there is already existing bucket value.
|
||||
if (flags & bucketLeafFlag) != 0 {
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
@ -157,12 +157,6 @@ func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
|
|||
// Start from root page/node and traverse to correct page.
|
||||
c.stack = c.stack[:0]
|
||||
c.search(seek, c.bucket.root)
|
||||
ref := &c.stack[len(c.stack)-1]
|
||||
|
||||
// If the cursor is pointing to the end of page/node then return nil.
|
||||
if ref.index >= ref.count() {
|
||||
return nil, nil, 0
|
||||
}
|
||||
|
||||
// If this is a bucket then return a nil value.
|
||||
return c.keyValue()
|
||||
|
@ -339,6 +333,8 @@ func (c *Cursor) nsearch(key []byte) {
|
|||
// keyValue returns the key and value of the current leaf element.
|
||||
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
|
||||
ref := &c.stack[len(c.stack)-1]
|
||||
|
||||
// If the cursor is pointing to the end of page/node then return nil.
|
||||
if ref.count() == 0 || ref.index >= ref.count() {
|
||||
return nil, nil, 0
|
||||
}
|
228
libnetwork/vendor/github.com/boltdb/bolt/db.go → libnetwork/vendor/go.etcd.io/bbolt/db.go
generated
vendored
228
libnetwork/vendor/github.com/boltdb/bolt/db.go → libnetwork/vendor/go.etcd.io/bbolt/db.go
generated
vendored
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
@ -7,8 +7,7 @@ import (
|
|||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
@ -23,6 +22,8 @@ const version = 2
|
|||
// Represents a marker value to indicate that a file is a Bolt DB.
|
||||
const magic uint32 = 0xED0CDAED
|
||||
|
||||
const pgidNoFreelist pgid = 0xffffffffffffffff
|
||||
|
||||
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
|
||||
// syncing changes to a file. This is required as some operating systems,
|
||||
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
|
||||
|
@ -39,6 +40,9 @@ const (
|
|||
// default page size for db is set to the OS page size.
|
||||
var defaultPageSize = os.Getpagesize()
|
||||
|
||||
// The time elapsed between consecutive file locking attempts.
|
||||
const flockRetryTimeout = 50 * time.Millisecond
|
||||
|
||||
// DB represents a collection of buckets persisted to a file on disk.
|
||||
// All data access is performed through transactions which can be obtained through the DB.
|
||||
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
|
||||
|
@ -61,6 +65,11 @@ type DB struct {
|
|||
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
|
||||
NoSync bool
|
||||
|
||||
// When true, skips syncing freelist to disk. This improves the database
|
||||
// write performance under normal operation, but requires a full database
|
||||
// re-sync during recovery.
|
||||
NoFreelistSync bool
|
||||
|
||||
// When true, skips the truncate call when growing the database.
|
||||
// Setting this to true is only safe on non-ext3/ext4 systems.
|
||||
// Skipping truncation avoids preallocation of hard drive space and
|
||||
|
@ -96,8 +105,7 @@ type DB struct {
|
|||
|
||||
path string
|
||||
file *os.File
|
||||
lockfile *os.File // windows only
|
||||
dataref []byte // mmap'ed readonly, write throws SEGV
|
||||
dataref []byte // mmap'ed readonly, write throws SEGV
|
||||
data *[maxMapSize]byte
|
||||
datasz int
|
||||
filesz int // current on disk file size
|
||||
|
@ -107,9 +115,11 @@ type DB struct {
|
|||
opened bool
|
||||
rwtx *Tx
|
||||
txs []*Tx
|
||||
freelist *freelist
|
||||
stats Stats
|
||||
|
||||
freelist *freelist
|
||||
freelistLoad sync.Once
|
||||
|
||||
pagePool sync.Pool
|
||||
|
||||
batchMu sync.Mutex
|
||||
|
@ -148,14 +158,17 @@ func (db *DB) String() string {
|
|||
// If the file does not exist then it will be created automatically.
|
||||
// Passing in nil options will cause Bolt to open the database with the default options.
|
||||
func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
||||
var db = &DB{opened: true}
|
||||
|
||||
db := &DB{
|
||||
opened: true,
|
||||
}
|
||||
// Set default options if no options are provided.
|
||||
if options == nil {
|
||||
options = DefaultOptions
|
||||
}
|
||||
db.NoSync = options.NoSync
|
||||
db.NoGrowSync = options.NoGrowSync
|
||||
db.MmapFlags = options.MmapFlags
|
||||
db.NoFreelistSync = options.NoFreelistSync
|
||||
|
||||
// Set default values for later DB operations.
|
||||
db.MaxBatchSize = DefaultMaxBatchSize
|
||||
|
@ -183,7 +196,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
|||
// if !options.ReadOnly.
|
||||
// The database file is locked using the shared lock (more than one process may
|
||||
// hold a lock at the same time) otherwise (options.ReadOnly is set).
|
||||
if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
|
||||
if err := flock(db, !db.readOnly, options.Timeout); err != nil {
|
||||
_ = db.close()
|
||||
return nil, err
|
||||
}
|
||||
|
@ -191,31 +204,41 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
|||
// Default values for test hooks
|
||||
db.ops.writeAt = db.file.WriteAt
|
||||
|
||||
if db.pageSize = options.PageSize; db.pageSize == 0 {
|
||||
// Set the default page size to the OS page size.
|
||||
db.pageSize = defaultPageSize
|
||||
}
|
||||
|
||||
// Initialize the database if it doesn't exist.
|
||||
if info, err := db.file.Stat(); err != nil {
|
||||
_ = db.close()
|
||||
return nil, err
|
||||
} else if info.Size() == 0 {
|
||||
// Initialize new files with meta pages.
|
||||
if err := db.init(); err != nil {
|
||||
// clean up file descriptor on initialization fail
|
||||
_ = db.close()
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
// Read the first meta page to determine the page size.
|
||||
var buf [0x1000]byte
|
||||
if _, err := db.file.ReadAt(buf[:], 0); err == nil {
|
||||
m := db.pageInBuffer(buf[:], 0).meta()
|
||||
if err := m.validate(); err != nil {
|
||||
// If we can't read the page size, we can assume it's the same
|
||||
// as the OS -- since that's how the page size was chosen in the
|
||||
// first place.
|
||||
//
|
||||
// If the first page is invalid and this OS uses a different
|
||||
// page size than what the database was created with then we
|
||||
// are out of luck and cannot access the database.
|
||||
db.pageSize = os.Getpagesize()
|
||||
} else {
|
||||
// If we can't read the page size, but can read a page, assume
|
||||
// it's the same as the OS or one given -- since that's how the
|
||||
// page size was chosen in the first place.
|
||||
//
|
||||
// If the first page is invalid and this OS uses a different
|
||||
// page size than what the database was created with then we
|
||||
// are out of luck and cannot access the database.
|
||||
//
|
||||
// TODO: scan for next page
|
||||
if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
|
||||
if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
|
||||
db.pageSize = int(m.pageSize)
|
||||
}
|
||||
} else {
|
||||
_ = db.close()
|
||||
return nil, ErrInvalid
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -232,14 +255,50 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// Read in the freelist.
|
||||
db.freelist = newFreelist()
|
||||
db.freelist.read(db.page(db.meta().freelist))
|
||||
if db.readOnly {
|
||||
return db, nil
|
||||
}
|
||||
|
||||
db.loadFreelist()
|
||||
|
||||
// Flush freelist when transitioning from no sync to sync so
|
||||
// NoFreelistSync unaware boltdb can open the db later.
|
||||
if !db.NoFreelistSync && !db.hasSyncedFreelist() {
|
||||
tx, err := db.Begin(true)
|
||||
if tx != nil {
|
||||
err = tx.Commit()
|
||||
}
|
||||
if err != nil {
|
||||
_ = db.close()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// Mark the database as opened and return.
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// loadFreelist reads the freelist if it is synced, or reconstructs it
|
||||
// by scanning the DB if it is not synced. It assumes there are no
|
||||
// concurrent accesses being made to the freelist.
|
||||
func (db *DB) loadFreelist() {
|
||||
db.freelistLoad.Do(func() {
|
||||
db.freelist = newFreelist()
|
||||
if !db.hasSyncedFreelist() {
|
||||
// Reconstruct free list by scanning the DB.
|
||||
db.freelist.readIDs(db.freepages())
|
||||
} else {
|
||||
// Read free list from freelist page.
|
||||
db.freelist.read(db.page(db.meta().freelist))
|
||||
}
|
||||
db.stats.FreePageN = len(db.freelist.ids)
|
||||
})
|
||||
}
|
||||
|
||||
func (db *DB) hasSyncedFreelist() bool {
|
||||
return db.meta().freelist != pgidNoFreelist
|
||||
}
|
||||
|
||||
// mmap opens the underlying memory-mapped file and initializes the meta references.
|
||||
// minsz is the minimum size that the new mmap can be.
|
||||
func (db *DB) mmap(minsz int) error {
|
||||
|
@ -341,9 +400,6 @@ func (db *DB) mmapSize(size int) (int, error) {
|
|||
|
||||
// init creates a new database file and initializes its meta pages.
|
||||
func (db *DB) init() error {
|
||||
// Set the page size to the OS page size.
|
||||
db.pageSize = os.Getpagesize()
|
||||
|
||||
// Create two meta pages on a buffer.
|
||||
buf := make([]byte, db.pageSize*4)
|
||||
for i := 0; i < 2; i++ {
|
||||
|
@ -387,7 +443,8 @@ func (db *DB) init() error {
|
|||
}
|
||||
|
||||
// Close releases all database resources.
|
||||
// All transactions must be closed before closing the database.
|
||||
// It will block waiting for any open transactions to finish
|
||||
// before closing the database and returning.
|
||||
func (db *DB) Close() error {
|
||||
db.rwlock.Lock()
|
||||
defer db.rwlock.Unlock()
|
||||
|
@ -395,8 +452,8 @@ func (db *DB) Close() error {
|
|||
db.metalock.Lock()
|
||||
defer db.metalock.Unlock()
|
||||
|
||||
db.mmaplock.RLock()
|
||||
defer db.mmaplock.RUnlock()
|
||||
db.mmaplock.Lock()
|
||||
defer db.mmaplock.Unlock()
|
||||
|
||||
return db.close()
|
||||
}
|
||||
|
@ -526,21 +583,36 @@ func (db *DB) beginRWTx() (*Tx, error) {
|
|||
t := &Tx{writable: true}
|
||||
t.init(db)
|
||||
db.rwtx = t
|
||||
db.freePages()
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// Free any pages associated with closed read-only transactions.
|
||||
var minid txid = 0xFFFFFFFFFFFFFFFF
|
||||
for _, t := range db.txs {
|
||||
if t.meta.txid < minid {
|
||||
minid = t.meta.txid
|
||||
}
|
||||
// freePages releases any pages associated with closed read-only transactions.
|
||||
func (db *DB) freePages() {
|
||||
// Free all pending pages prior to earliest open transaction.
|
||||
sort.Sort(txsById(db.txs))
|
||||
minid := txid(0xFFFFFFFFFFFFFFFF)
|
||||
if len(db.txs) > 0 {
|
||||
minid = db.txs[0].meta.txid
|
||||
}
|
||||
if minid > 0 {
|
||||
db.freelist.release(minid - 1)
|
||||
}
|
||||
|
||||
return t, nil
|
||||
// Release unused txid extents.
|
||||
for _, t := range db.txs {
|
||||
db.freelist.releaseRange(minid, t.meta.txid-1)
|
||||
minid = t.meta.txid + 1
|
||||
}
|
||||
db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
|
||||
// Any page both allocated and freed in an extent is safe to release.
|
||||
}
|
||||
|
||||
type txsById []*Tx
|
||||
|
||||
func (t txsById) Len() int { return len(t) }
|
||||
func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
|
||||
func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
|
||||
|
||||
// removeTx removes a transaction from the database.
|
||||
func (db *DB) removeTx(tx *Tx) {
|
||||
// Release the read lock on the mmap.
|
||||
|
@ -552,7 +624,10 @@ func (db *DB) removeTx(tx *Tx) {
|
|||
// Remove the transaction.
|
||||
for i, t := range db.txs {
|
||||
if t == tx {
|
||||
db.txs = append(db.txs[:i], db.txs[i+1:]...)
|
||||
last := len(db.txs) - 1
|
||||
db.txs[i] = db.txs[last]
|
||||
db.txs[last] = nil
|
||||
db.txs = db.txs[:last]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -630,11 +705,7 @@ func (db *DB) View(fn func(*Tx) error) error {
|
|||
return err
|
||||
}
|
||||
|
||||
if err := t.Rollback(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return t.Rollback()
|
||||
}
|
||||
|
||||
// Batch calls fn as part of a batch. It behaves similar to Update,
|
||||
|
@ -734,9 +805,7 @@ retry:
|
|||
|
||||
// pass success, or bolt internal errors, to all callers
|
||||
for _, c := range b.calls {
|
||||
if c.err != nil {
|
||||
c.err <- err
|
||||
}
|
||||
c.err <- err
|
||||
}
|
||||
break retry
|
||||
}
|
||||
|
@ -823,7 +892,7 @@ func (db *DB) meta() *meta {
|
|||
}
|
||||
|
||||
// allocate returns a contiguous block of memory starting at a given page.
|
||||
func (db *DB) allocate(count int) (*page, error) {
|
||||
func (db *DB) allocate(txid txid, count int) (*page, error) {
|
||||
// Allocate a temporary buffer for the page.
|
||||
var buf []byte
|
||||
if count == 1 {
|
||||
|
@ -835,7 +904,7 @@ func (db *DB) allocate(count int) (*page, error) {
|
|||
p.overflow = uint32(count - 1)
|
||||
|
||||
// Use pages from the freelist if they are available.
|
||||
if p.id = db.freelist.allocate(count); p.id != 0 {
|
||||
if p.id = db.freelist.allocate(txid, count); p.id != 0 {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
|
@ -890,6 +959,38 @@ func (db *DB) IsReadOnly() bool {
|
|||
return db.readOnly
|
||||
}
|
||||
|
||||
func (db *DB) freepages() []pgid {
|
||||
tx, err := db.beginTx()
|
||||
defer func() {
|
||||
err = tx.Rollback()
|
||||
if err != nil {
|
||||
panic("freepages: failed to rollback tx")
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
panic("freepages: failed to open read only tx")
|
||||
}
|
||||
|
||||
reachable := make(map[pgid]*page)
|
||||
nofreed := make(map[pgid]bool)
|
||||
ech := make(chan error)
|
||||
go func() {
|
||||
for e := range ech {
|
||||
panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
|
||||
}
|
||||
}()
|
||||
tx.checkBucket(&tx.root, reachable, nofreed, ech)
|
||||
close(ech)
|
||||
|
||||
var fids []pgid
|
||||
for i := pgid(2); i < db.meta().pgid; i++ {
|
||||
if _, ok := reachable[i]; !ok {
|
||||
fids = append(fids, i)
|
||||
}
|
||||
}
|
||||
return fids
|
||||
}
|
||||
|
||||
// Options represents the options that can be set when opening a database.
|
||||
type Options struct {
|
||||
// Timeout is the amount of time to wait to obtain a file lock.
|
||||
|
@ -900,6 +1001,10 @@ type Options struct {
|
|||
// Sets the DB.NoGrowSync flag before memory mapping the file.
|
||||
NoGrowSync bool
|
||||
|
||||
// Do not sync freelist to disk. This improves the database write performance
|
||||
// under normal operation, but requires a full database re-sync during recovery.
|
||||
NoFreelistSync bool
|
||||
|
||||
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
|
||||
// grab a shared lock (UNIX).
|
||||
ReadOnly bool
|
||||
|
@ -916,6 +1021,14 @@ type Options struct {
|
|||
// If initialMmapSize is smaller than the previous database size,
|
||||
// it takes no effect.
|
||||
InitialMmapSize int
|
||||
|
||||
// PageSize overrides the default OS page size.
|
||||
PageSize int
|
||||
|
||||
// NoSync sets the initial value of DB.NoSync. Normally this can just be
|
||||
// set directly on the DB itself when returned from Open(), but this option
|
||||
// is useful in APIs which expose Options but not the underlying DB.
|
||||
NoSync bool
|
||||
}
|
||||
|
||||
// DefaultOptions represent the options used if nil options are passed into Open().
|
||||
|
@ -952,15 +1065,11 @@ func (s *Stats) Sub(other *Stats) Stats {
|
|||
diff.PendingPageN = s.PendingPageN
|
||||
diff.FreeAlloc = s.FreeAlloc
|
||||
diff.FreelistInuse = s.FreelistInuse
|
||||
diff.TxN = other.TxN - s.TxN
|
||||
diff.TxN = s.TxN - other.TxN
|
||||
diff.TxStats = s.TxStats.Sub(&other.TxStats)
|
||||
return diff
|
||||
}
|
||||
|
||||
func (s *Stats) add(other *Stats) {
|
||||
s.TxStats.add(&other.TxStats)
|
||||
}
|
||||
|
||||
type Info struct {
|
||||
Data uintptr
|
||||
PageSize int
|
||||
|
@ -999,7 +1108,8 @@ func (m *meta) copy(dest *meta) {
|
|||
func (m *meta) write(p *page) {
|
||||
if m.root.root >= m.pgid {
|
||||
panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
|
||||
} else if m.freelist >= m.pgid {
|
||||
} else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
|
||||
// TODO: reject pgidNoFreeList if !NoFreelistSync
|
||||
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
|
||||
}
|
||||
|
||||
|
@ -1026,11 +1136,3 @@ func _assert(condition bool, msg string, v ...interface{}) {
|
|||
panic(fmt.Sprintf("assertion failed: "+msg, v...))
|
||||
}
|
||||
}
|
||||
|
||||
func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
|
||||
func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
|
||||
|
||||
func printstack() {
|
||||
stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
|
||||
fmt.Fprintln(os.Stderr, stack)
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Package bolt implements a low-level key/value store in pure Go. It supports
|
||||
package bbolt implements a low-level key/value store in pure Go. It supports
|
||||
fully serializable transactions, ACID semantics, and lock-free MVCC with
|
||||
multiple readers and a single writer. Bolt can be used for projects that
|
||||
want a simple data store without the need to add large dependencies such as
|
||||
|
@ -41,4 +41,4 @@ point to different data or can point to invalid memory which will cause a panic.
|
|||
|
||||
|
||||
*/
|
||||
package bolt
|
||||
package bbolt
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import "errors"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -6,25 +6,40 @@ import (
|
|||
"unsafe"
|
||||
)
|
||||
|
||||
// txPending holds a list of pgids and corresponding allocation txns
|
||||
// that are pending to be freed.
|
||||
type txPending struct {
|
||||
ids []pgid
|
||||
alloctx []txid // txids allocating the ids
|
||||
lastReleaseBegin txid // beginning txid of last matching releaseRange
|
||||
}
|
||||
|
||||
// freelist represents a list of all pages that are available for allocation.
|
||||
// It also tracks pages that have been freed but are still in use by open transactions.
|
||||
type freelist struct {
|
||||
ids []pgid // all free and available free page ids.
|
||||
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx.
|
||||
cache map[pgid]bool // fast lookup of all free and pending page ids.
|
||||
ids []pgid // all free and available free page ids.
|
||||
allocs map[pgid]txid // mapping of txid that allocated a pgid.
|
||||
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
|
||||
cache map[pgid]bool // fast lookup of all free and pending page ids.
|
||||
}
|
||||
|
||||
// newFreelist returns an empty, initialized freelist.
|
||||
func newFreelist() *freelist {
|
||||
return &freelist{
|
||||
pending: make(map[txid][]pgid),
|
||||
allocs: make(map[pgid]txid),
|
||||
pending: make(map[txid]*txPending),
|
||||
cache: make(map[pgid]bool),
|
||||
}
|
||||
}
|
||||
|
||||
// size returns the size of the page after serialization.
|
||||
func (f *freelist) size() int {
|
||||
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * f.count())
|
||||
n := f.count()
|
||||
if n >= 0xFFFF {
|
||||
// The first element will be used to store the count. See freelist.write.
|
||||
n++
|
||||
}
|
||||
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n)
|
||||
}
|
||||
|
||||
// count returns count of pages on the freelist
|
||||
|
@ -40,27 +55,26 @@ func (f *freelist) free_count() int {
|
|||
// pending_count returns count of pending pages
|
||||
func (f *freelist) pending_count() int {
|
||||
var count int
|
||||
for _, list := range f.pending {
|
||||
count += len(list)
|
||||
for _, txp := range f.pending {
|
||||
count += len(txp.ids)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// all returns a list of all free ids and all pending ids in one sorted list.
|
||||
func (f *freelist) all() []pgid {
|
||||
m := make(pgids, 0)
|
||||
|
||||
for _, list := range f.pending {
|
||||
m = append(m, list...)
|
||||
// copyall copies into dst a list of all free ids and all pending ids in one sorted list.
|
||||
// f.count returns the minimum length required for dst.
|
||||
func (f *freelist) copyall(dst []pgid) {
|
||||
m := make(pgids, 0, f.pending_count())
|
||||
for _, txp := range f.pending {
|
||||
m = append(m, txp.ids...)
|
||||
}
|
||||
|
||||
sort.Sort(m)
|
||||
return pgids(f.ids).merge(m)
|
||||
mergepgids(dst, f.ids, m)
|
||||
}
|
||||
|
||||
// allocate returns the starting page id of a contiguous list of pages of a given size.
|
||||
// If a contiguous block cannot be found then 0 is returned.
|
||||
func (f *freelist) allocate(n int) pgid {
|
||||
func (f *freelist) allocate(txid txid, n int) pgid {
|
||||
if len(f.ids) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
@ -93,7 +107,7 @@ func (f *freelist) allocate(n int) pgid {
|
|||
for i := pgid(0); i < pgid(n); i++ {
|
||||
delete(f.cache, initial+i)
|
||||
}
|
||||
|
||||
f.allocs[initial] = txid
|
||||
return initial
|
||||
}
|
||||
|
||||
|
@ -110,28 +124,73 @@ func (f *freelist) free(txid txid, p *page) {
|
|||
}
|
||||
|
||||
// Free page and all its overflow pages.
|
||||
var ids = f.pending[txid]
|
||||
txp := f.pending[txid]
|
||||
if txp == nil {
|
||||
txp = &txPending{}
|
||||
f.pending[txid] = txp
|
||||
}
|
||||
allocTxid, ok := f.allocs[p.id]
|
||||
if ok {
|
||||
delete(f.allocs, p.id)
|
||||
} else if (p.flags & freelistPageFlag) != 0 {
|
||||
// Freelist is always allocated by prior tx.
|
||||
allocTxid = txid - 1
|
||||
}
|
||||
|
||||
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
|
||||
// Verify that page is not already free.
|
||||
if f.cache[id] {
|
||||
panic(fmt.Sprintf("page %d already freed", id))
|
||||
}
|
||||
|
||||
// Add to the freelist and cache.
|
||||
ids = append(ids, id)
|
||||
txp.ids = append(txp.ids, id)
|
||||
txp.alloctx = append(txp.alloctx, allocTxid)
|
||||
f.cache[id] = true
|
||||
}
|
||||
f.pending[txid] = ids
|
||||
}
|
||||
|
||||
// release moves all page ids for a transaction id (or older) to the freelist.
|
||||
func (f *freelist) release(txid txid) {
|
||||
m := make(pgids, 0)
|
||||
for tid, ids := range f.pending {
|
||||
for tid, txp := range f.pending {
|
||||
if tid <= txid {
|
||||
// Move transaction's pending pages to the available freelist.
|
||||
// Don't remove from the cache since the page is still free.
|
||||
m = append(m, ids...)
|
||||
m = append(m, txp.ids...)
|
||||
delete(f.pending, tid)
|
||||
}
|
||||
}
|
||||
sort.Sort(m)
|
||||
f.ids = pgids(f.ids).merge(m)
|
||||
}
|
||||
|
||||
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
|
||||
func (f *freelist) releaseRange(begin, end txid) {
|
||||
if begin > end {
|
||||
return
|
||||
}
|
||||
var m pgids
|
||||
for tid, txp := range f.pending {
|
||||
if tid < begin || tid > end {
|
||||
continue
|
||||
}
|
||||
// Don't recompute freed pages if ranges haven't updated.
|
||||
if txp.lastReleaseBegin == begin {
|
||||
continue
|
||||
}
|
||||
for i := 0; i < len(txp.ids); i++ {
|
||||
if atx := txp.alloctx[i]; atx < begin || atx > end {
|
||||
continue
|
||||
}
|
||||
m = append(m, txp.ids[i])
|
||||
txp.ids[i] = txp.ids[len(txp.ids)-1]
|
||||
txp.ids = txp.ids[:len(txp.ids)-1]
|
||||
txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
|
||||
txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
|
||||
i--
|
||||
}
|
||||
txp.lastReleaseBegin = begin
|
||||
if len(txp.ids) == 0 {
|
||||
delete(f.pending, tid)
|
||||
}
|
||||
}
|
||||
|
@ -142,12 +201,29 @@ func (f *freelist) release(txid txid) {
|
|||
// rollback removes the pages from a given pending tx.
|
||||
func (f *freelist) rollback(txid txid) {
|
||||
// Remove page ids from cache.
|
||||
for _, id := range f.pending[txid] {
|
||||
delete(f.cache, id)
|
||||
txp := f.pending[txid]
|
||||
if txp == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Remove pages from pending list.
|
||||
var m pgids
|
||||
for i, pgid := range txp.ids {
|
||||
delete(f.cache, pgid)
|
||||
tx := txp.alloctx[i]
|
||||
if tx == 0 {
|
||||
continue
|
||||
}
|
||||
if tx != txid {
|
||||
// Pending free aborted; restore page back to alloc list.
|
||||
f.allocs[pgid] = tx
|
||||
} else {
|
||||
// Freed page was allocated by this txn; OK to throw away.
|
||||
m = append(m, pgid)
|
||||
}
|
||||
}
|
||||
// Remove pages from pending list and mark as free if allocated by txid.
|
||||
delete(f.pending, txid)
|
||||
sort.Sort(m)
|
||||
f.ids = pgids(f.ids).merge(m)
|
||||
}
|
||||
|
||||
// freed returns whether a given page is in the free list.
|
||||
|
@ -157,6 +233,9 @@ func (f *freelist) freed(pgid pgid) bool {
|
|||
|
||||
// read initializes the freelist from a freelist page.
|
||||
func (f *freelist) read(p *page) {
|
||||
if (p.flags & freelistPageFlag) == 0 {
|
||||
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
|
||||
}
|
||||
// If the page.count is at the max uint16 value (64k) then it's considered
|
||||
// an overflow and the size of the freelist is stored as the first element.
|
||||
idx, count := 0, int(p.count)
|
||||
|
@ -169,7 +248,7 @@ func (f *freelist) read(p *page) {
|
|||
if count == 0 {
|
||||
f.ids = nil
|
||||
} else {
|
||||
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count]
|
||||
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
|
||||
f.ids = make([]pgid, len(ids))
|
||||
copy(f.ids, ids)
|
||||
|
||||
|
@ -181,27 +260,33 @@ func (f *freelist) read(p *page) {
|
|||
f.reindex()
|
||||
}
|
||||
|
||||
// read initializes the freelist from a given list of ids.
|
||||
func (f *freelist) readIDs(ids []pgid) {
|
||||
f.ids = ids
|
||||
f.reindex()
|
||||
}
|
||||
|
||||
// write writes the page ids onto a freelist page. All free and pending ids are
|
||||
// saved to disk since in the event of a program crash, all pending ids will
|
||||
// become free.
|
||||
func (f *freelist) write(p *page) error {
|
||||
// Combine the old free pgids and pgids waiting on an open transaction.
|
||||
ids := f.all()
|
||||
|
||||
// Update the header flag.
|
||||
p.flags |= freelistPageFlag
|
||||
|
||||
// The page.count can only hold up to 64k elements so if we overflow that
|
||||
// number then we handle it by putting the size in the first element.
|
||||
if len(ids) == 0 {
|
||||
p.count = uint16(len(ids))
|
||||
} else if len(ids) < 0xFFFF {
|
||||
p.count = uint16(len(ids))
|
||||
copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:], ids)
|
||||
lenids := f.count()
|
||||
if lenids == 0 {
|
||||
p.count = uint16(lenids)
|
||||
} else if lenids < 0xFFFF {
|
||||
p.count = uint16(lenids)
|
||||
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
|
||||
} else {
|
||||
p.count = 0xFFFF
|
||||
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(len(ids))
|
||||
copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:], ids)
|
||||
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
|
||||
f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -213,8 +298,8 @@ func (f *freelist) reload(p *page) {
|
|||
|
||||
// Build a cache of only pending pages.
|
||||
pcache := make(map[pgid]bool)
|
||||
for _, pendingIDs := range f.pending {
|
||||
for _, pendingID := range pendingIDs {
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
pcache[pendingID] = true
|
||||
}
|
||||
}
|
||||
|
@ -240,8 +325,8 @@ func (f *freelist) reindex() {
|
|||
for _, id := range f.ids {
|
||||
f.cache[id] = true
|
||||
}
|
||||
for _, pendingIDs := range f.pending {
|
||||
for _, pendingID := range pendingIDs {
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
f.cache[pendingID] = true
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
@ -365,7 +365,7 @@ func (n *node) spill() error {
|
|||
}
|
||||
|
||||
// Allocate contiguous space for the node.
|
||||
p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
|
||||
p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -145,12 +145,33 @@ func (a pgids) merge(b pgids) pgids {
|
|||
// Return the opposite slice if one is nil.
|
||||
if len(a) == 0 {
|
||||
return b
|
||||
} else if len(b) == 0 {
|
||||
}
|
||||
if len(b) == 0 {
|
||||
return a
|
||||
}
|
||||
merged := make(pgids, len(a)+len(b))
|
||||
mergepgids(merged, a, b)
|
||||
return merged
|
||||
}
|
||||
|
||||
// Create a list to hold all elements from both lists.
|
||||
merged := make(pgids, 0, len(a)+len(b))
|
||||
// mergepgids copies the sorted union of a and b into dst.
|
||||
// If dst is too small, it panics.
|
||||
func mergepgids(dst, a, b pgids) {
|
||||
if len(dst) < len(a)+len(b) {
|
||||
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
|
||||
}
|
||||
// Copy in the opposite slice if one is nil.
|
||||
if len(a) == 0 {
|
||||
copy(dst, b)
|
||||
return
|
||||
}
|
||||
if len(b) == 0 {
|
||||
copy(dst, a)
|
||||
return
|
||||
}
|
||||
|
||||
// Merged will hold all elements from both lists.
|
||||
merged := dst[:0]
|
||||
|
||||
// Assign lead to the slice with a lower starting value, follow to the higher value.
|
||||
lead, follow := a, b
|
||||
|
@ -172,7 +193,5 @@ func (a pgids) merge(b pgids) pgids {
|
|||
}
|
||||
|
||||
// Append what's left in follow.
|
||||
merged = append(merged, follow...)
|
||||
|
||||
return merged
|
||||
_ = append(merged, follow...)
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package bolt
|
||||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -126,10 +126,7 @@ func (tx *Tx) DeleteBucket(name []byte) error {
|
|||
// the error is returned to the caller.
|
||||
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
|
||||
return tx.root.ForEach(func(k, v []byte) error {
|
||||
if err := fn(k, tx.root.Bucket(k)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return fn(k, tx.root.Bucket(k))
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -169,28 +166,18 @@ func (tx *Tx) Commit() error {
|
|||
// Free the old root bucket.
|
||||
tx.meta.root.root = tx.root.root
|
||||
|
||||
opgid := tx.meta.pgid
|
||||
|
||||
// Free the freelist and allocate new pages for it. This will overestimate
|
||||
// the size of the freelist but not underestimate the size (which would be bad).
|
||||
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
|
||||
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
|
||||
if err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
// Free the old freelist because commit writes out a fresh freelist.
|
||||
if tx.meta.freelist != pgidNoFreelist {
|
||||
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
|
||||
}
|
||||
if err := tx.db.freelist.write(p); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
tx.meta.freelist = p.id
|
||||
|
||||
// If the high water mark has moved up then attempt to grow the database.
|
||||
if tx.meta.pgid > opgid {
|
||||
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
|
||||
tx.rollback()
|
||||
if !tx.db.NoFreelistSync {
|
||||
err := tx.commitFreelist()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
tx.meta.freelist = pgidNoFreelist
|
||||
}
|
||||
|
||||
// Write dirty pages to disk.
|
||||
|
@ -235,6 +222,31 @@ func (tx *Tx) Commit() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (tx *Tx) commitFreelist() error {
|
||||
// Allocate new pages for the new free list. This will overestimate
|
||||
// the size of the freelist but not underestimate the size (which would be bad).
|
||||
opgid := tx.meta.pgid
|
||||
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
|
||||
if err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
if err := tx.db.freelist.write(p); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
tx.meta.freelist = p.id
|
||||
// If the high water mark has moved up then attempt to grow the database.
|
||||
if tx.meta.pgid > opgid {
|
||||
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Rollback closes the transaction and ignores all previous updates. Read-only
|
||||
// transactions must be rolled back and not committed.
|
||||
func (tx *Tx) Rollback() error {
|
||||
|
@ -291,7 +303,9 @@ func (tx *Tx) close() {
|
|||
}
|
||||
|
||||
// Copy writes the entire database to a writer.
|
||||
// This function exists for backwards compatibility. Use WriteTo() instead.
|
||||
// This function exists for backwards compatibility.
|
||||
//
|
||||
// Deprecated; Use WriteTo() instead.
|
||||
func (tx *Tx) Copy(w io.Writer) error {
|
||||
_, err := tx.WriteTo(w)
|
||||
return err
|
||||
|
@ -305,7 +319,11 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
|||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer func() { _ = f.Close() }()
|
||||
defer func() {
|
||||
if cerr := f.Close(); err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
// Generate a meta page. We use the same page data for both meta pages.
|
||||
buf := make([]byte, tx.db.pageSize)
|
||||
|
@ -333,7 +351,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
|||
}
|
||||
|
||||
// Move past the meta pages in the file.
|
||||
if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil {
|
||||
if _, err := f.Seek(int64(tx.db.pageSize*2), io.SeekStart); err != nil {
|
||||
return n, fmt.Errorf("seek: %s", err)
|
||||
}
|
||||
|
||||
|
@ -344,7 +362,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
|||
return n, err
|
||||
}
|
||||
|
||||
return n, f.Close()
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// CopyFile copies the entire database to file at the given path.
|
||||
|
@ -379,9 +397,14 @@ func (tx *Tx) Check() <-chan error {
|
|||
}
|
||||
|
||||
func (tx *Tx) check(ch chan error) {
|
||||
// Force loading free list if opened in ReadOnly mode.
|
||||
tx.db.loadFreelist()
|
||||
|
||||
// Check if any pages are double freed.
|
||||
freed := make(map[pgid]bool)
|
||||
for _, id := range tx.db.freelist.all() {
|
||||
all := make([]pgid, tx.db.freelist.count())
|
||||
tx.db.freelist.copyall(all)
|
||||
for _, id := range all {
|
||||
if freed[id] {
|
||||
ch <- fmt.Errorf("page %d: already freed", id)
|
||||
}
|
||||
|
@ -392,8 +415,10 @@ func (tx *Tx) check(ch chan error) {
|
|||
reachable := make(map[pgid]*page)
|
||||
reachable[0] = tx.page(0) // meta0
|
||||
reachable[1] = tx.page(1) // meta1
|
||||
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
|
||||
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
|
||||
if tx.meta.freelist != pgidNoFreelist {
|
||||
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
|
||||
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively check buckets.
|
||||
|
@ -451,7 +476,7 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo
|
|||
|
||||
// allocate returns a contiguous block of memory starting at a given page.
|
||||
func (tx *Tx) allocate(count int) (*page, error) {
|
||||
p, err := tx.db.allocate(count)
|
||||
p, err := tx.db.allocate(tx.meta.txid, count)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -460,7 +485,7 @@ func (tx *Tx) allocate(count int) (*page, error) {
|
|||
tx.pages[p.id] = p
|
||||
|
||||
// Update statistics.
|
||||
tx.stats.PageCount++
|
||||
tx.stats.PageCount += count
|
||||
tx.stats.PageAlloc += count * tx.db.pageSize
|
||||
|
||||
return p, nil
|
Loading…
Add table
Reference in a new issue