1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/vendor/github.com/klauspost/compress/compressible.go
Sebastiaan van Stijn 0809bd6859
vendor: github.com/klauspost/compress v1.14.2
full diff: https://github.com/klauspost/compress/compare/v1.12.3...v1.14.2

Relevant changes affecting vendor:

- docs: Add combined LICENSE file
- Add snappy replacement package
- tests: Remove snappy dependency for tests
- huff0: Add size estimation function
- huff0: Improve 4X decompression speed
- huff0: Improve 4X decompression speed 5-10%
- huff0: Faster 1X Decompression
- zstd: Spawn decoder goroutine only if needed
- zstd: Detect short invalid signatures
- zstd: Add configurable Decoder window size
- zstd: Add stream content size
- zstd: Simplify hashing functions
- zstd: use SpeedBestCompression for level >= 10
- zstd: Fix WriteTo error forwarding
- zstd: Improve Best compression
- zstd: Fix incorrect encoding in best mode
- zstd: pooledZipWriter should return Writers to the same pool
- zstd: Upgrade xxhash
- zstd: Improve block encoding speed
- zstd: add arm64 xxhash assembly
- zstd: Minor decoder improvements
- zstd: Minor performance tweaks
- zstd: Add bigger default blocks
- zstd: Remove unused decompression buffer
- zstd: fix logically dead code
- zstd: Add noasm tag for xxhash
- zstd: improve header decoder

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-02-23 19:41:14 +01:00

85 lines
1.8 KiB
Go

package compress
import "math"
// Estimate returns a normalized compressibility estimate of block b.
// Values close to zero are likely uncompressible.
// Values above 0.1 are likely to be compressible.
// Values above 0.5 are very compressible.
// Very small lengths will return 0.
func Estimate(b []byte) float64 {
if len(b) < 16 {
return 0
}
// Correctly predicted order 1
hits := 0
lastMatch := false
var o1 [256]byte
var hist [256]int
c1 := byte(0)
for _, c := range b {
if c == o1[c1] {
// We only count a hit if there was two correct predictions in a row.
if lastMatch {
hits++
}
lastMatch = true
} else {
lastMatch = false
}
o1[c1] = c
c1 = c
hist[c]++
}
// Use x^0.6 to give better spread
prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)
// Calculate histogram distribution
variance := float64(0)
avg := float64(len(b)) / 256
for _, v := range hist {
Δ := float64(v) - avg
variance += Δ * Δ
}
stddev := math.Sqrt(float64(variance)) / float64(len(b))
exp := math.Sqrt(1 / float64(len(b)))
// Subtract expected stddev
stddev -= exp
if stddev < 0 {
stddev = 0
}
stddev *= 1 + exp
// Use x^0.4 to give better spread
entropy := math.Pow(stddev, 0.4)
// 50/50 weight between prediction and histogram distribution
return math.Pow((prediction+entropy)/2, 0.9)
}
// ShannonEntropyBits returns the number of bits minimum required to represent
// an entropy encoding of the input bytes.
// https://en.wiktionary.org/wiki/Shannon_entropy
func ShannonEntropyBits(b []byte) int {
if len(b) == 0 {
return 0
}
var hist [256]int
for _, c := range b {
hist[c]++
}
shannon := float64(0)
invTotal := 1.0 / float64(len(b))
for _, v := range hist[:] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
}
}
return int(math.Ceil(shannon))
}