From 8f6d58915d9359cded2ec49ccea42e2cc756d0e8 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Mon, 14 Feb 2022 15:58:01 +0100 Subject: [PATCH] vendor: github.com/klauspost/compress v1.12.3 Remove the replace rule, and use the version as specified by (indirect) dependencies: full diff: https://github.com/klauspost/compress/compare/v1.11.13...v1.12.3 Signed-off-by: Sebastiaan van Stijn --- vendor.mod | 2 +- vendor.sum | 5 +- .../compress => golang}/snappy/.gitignore | 0 .../compress => golang}/snappy/AUTHORS | 2 + .../compress => golang}/snappy/CONTRIBUTORS | 2 + .../compress => golang}/snappy/LICENSE | 0 .../compress => golang}/snappy/README | 0 .../compress => golang}/snappy/decode.go | 4 + .../compress => golang}/snappy/decode_amd64.s | 16 +- .../github.com/golang/snappy/decode_arm64.s | 494 ++++++++++++ .../snappy/decode_asm.go} | 1 + .../snappy/decode_other.go | 4 +- .../compress => golang}/snappy/encode.go | 4 + .../compress => golang}/snappy/encode_amd64.s | 0 .../github.com/golang/snappy/encode_arm64.s | 722 ++++++++++++++++++ .../snappy/encode_asm.go} | 1 + .../snappy/encode_other.go | 2 +- .../compress => golang}/snappy/snappy.go | 4 +- .../klauspost/compress/fse/compress.go | 1 - .../klauspost/compress/huff0/compress.go | 1 - .../klauspost/compress/snappy/runbench.cmd | 2 - .../klauspost/compress/zstd/README.md | 46 +- .../klauspost/compress/zstd/blockdec.go | 11 +- .../klauspost/compress/zstd/blockenc.go | 12 +- .../klauspost/compress/zstd/bytebuf.go | 21 +- .../klauspost/compress/zstd/decodeheader.go | 4 +- .../klauspost/compress/zstd/decoder.go | 26 +- .../compress/zstd/decoder_options.go | 5 +- .../klauspost/compress/zstd/enc_base.go | 13 +- .../klauspost/compress/zstd/enc_best.go | 14 + .../klauspost/compress/zstd/enc_better.go | 73 +- .../klauspost/compress/zstd/encoder.go | 2 +- .../compress/zstd/encoder_options.go | 2 +- .../klauspost/compress/zstd/framedec.go | 48 +- .../klauspost/compress/zstd/fse_encoder.go | 1 - .../klauspost/compress/zstd/fse_predefined.go | 2 +- .../klauspost/compress/zstd/seqenc.go | 1 - .../klauspost/compress/zstd/snappy.go | 3 +- .../github.com/klauspost/compress/zstd/zip.go | 121 +++ .../klauspost/compress/zstd/zstd.go | 18 +- vendor/modules.txt | 7 +- 41 files changed, 1568 insertions(+), 129 deletions(-) rename vendor/github.com/{klauspost/compress => golang}/snappy/.gitignore (100%) rename vendor/github.com/{klauspost/compress => golang}/snappy/AUTHORS (90%) rename vendor/github.com/{klauspost/compress => golang}/snappy/CONTRIBUTORS (95%) rename vendor/github.com/{klauspost/compress => golang}/snappy/LICENSE (100%) rename vendor/github.com/{klauspost/compress => golang}/snappy/README (100%) rename vendor/github.com/{klauspost/compress => golang}/snappy/decode.go (97%) rename vendor/github.com/{klauspost/compress => golang}/snappy/decode_amd64.s (98%) create mode 100644 vendor/github.com/golang/snappy/decode_arm64.s rename vendor/github.com/{klauspost/compress/snappy/decode_amd64.go => golang/snappy/decode_asm.go} (93%) rename vendor/github.com/{klauspost/compress => golang}/snappy/decode_other.go (98%) rename vendor/github.com/{klauspost/compress => golang}/snappy/encode.go (98%) rename vendor/github.com/{klauspost/compress => golang}/snappy/encode_amd64.s (100%) create mode 100644 vendor/github.com/golang/snappy/encode_arm64.s rename vendor/github.com/{klauspost/compress/snappy/encode_amd64.go => golang/snappy/encode_asm.go} (97%) rename vendor/github.com/{klauspost/compress => golang}/snappy/encode_other.go (99%) rename vendor/github.com/{klauspost/compress => golang}/snappy/snappy.go (97%) delete mode 100644 vendor/github.com/klauspost/compress/snappy/runbench.cmd create mode 100644 vendor/github.com/klauspost/compress/zstd/zip.go diff --git a/vendor.mod b/vendor.mod index bbba8dfec8..931ba503bc 100644 --- a/vendor.mod +++ b/vendor.mod @@ -106,6 +106,7 @@ require ( github.com/gogo/googleapis v1.4.0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.2 // indirect + github.com/golang/snappy v0.0.3 // indirect github.com/google/btree v1.0.1 // indirect github.com/google/certificate-transparency-go v1.0.20 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect @@ -166,7 +167,6 @@ replace ( github.com/hashicorp/go-multierror => github.com/hashicorp/go-multierror v1.0.0 github.com/hashicorp/serf => github.com/hashicorp/serf v0.7.1-0.20160317193612-598c54895cc5 github.com/hpcloud/tail => github.com/hpcloud/tail v1.0.0 - github.com/klauspost/compress => github.com/klauspost/compress v1.11.13 github.com/matttproud/golang_protobuf_extensions => github.com/matttproud/golang_protobuf_extensions v1.0.1 github.com/onsi/ginkgo => github.com/onsi/ginkgo v1.8.0 github.com/onsi/gomega => github.com/onsi/gomega v1.5.0 diff --git a/vendor.sum b/vendor.sum index 024e207c0b..249e7d1d04 100644 --- a/vendor.sum +++ b/vendor.sum @@ -245,6 +245,8 @@ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/protobuf v1.3.5 h1:F768QJ1E9tib+q5Sc8MkdJi1RxLTbRcTf8LJV56aRls= github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= @@ -335,8 +337,9 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.11.13 h1:eSvu8Tmq6j2psUJqJrLcWH6K3w5Dwc+qipbaA6eVEN4= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.12.3 h1:G5AfA94pHPysR56qqrkO2pxEexdDzrpFJ6yt/VqWxVU= +github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/vendor/github.com/klauspost/compress/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/.gitignore rename to vendor/github.com/golang/snappy/.gitignore diff --git a/vendor/github.com/klauspost/compress/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS similarity index 90% rename from vendor/github.com/klauspost/compress/snappy/AUTHORS rename to vendor/github.com/golang/snappy/AUTHORS index bcfa19520a..203e84ebab 100644 --- a/vendor/github.com/klauspost/compress/snappy/AUTHORS +++ b/vendor/github.com/golang/snappy/AUTHORS @@ -8,8 +8,10 @@ # Please keep the list sorted. +Amazon.com, Inc Damian Gryski Google Inc. Jan Mercl <0xjnml@gmail.com> +Klaus Post Rodolfo Carvalho Sebastien Binet diff --git a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS similarity index 95% rename from vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS rename to vendor/github.com/golang/snappy/CONTRIBUTORS index 931ae31606..d9914732b5 100644 --- a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS +++ b/vendor/github.com/golang/snappy/CONTRIBUTORS @@ -28,7 +28,9 @@ Damian Gryski Jan Mercl <0xjnml@gmail.com> +Jonathan Swinney Kai Backman +Klaus Post Marc-Antoine Ruel Nigel Tao Rob Pike diff --git a/vendor/github.com/klauspost/compress/snappy/LICENSE b/vendor/github.com/golang/snappy/LICENSE similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/LICENSE rename to vendor/github.com/golang/snappy/LICENSE diff --git a/vendor/github.com/klauspost/compress/snappy/README b/vendor/github.com/golang/snappy/README similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/README rename to vendor/github.com/golang/snappy/README diff --git a/vendor/github.com/klauspost/compress/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go similarity index 97% rename from vendor/github.com/klauspost/compress/snappy/decode.go rename to vendor/github.com/golang/snappy/decode.go index 72efb0353d..f1e04b172c 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode.go +++ b/vendor/github.com/golang/snappy/decode.go @@ -52,6 +52,8 @@ const ( // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. +// +// Decode handles the Snappy block format, not the Snappy stream format. func Decode(dst, src []byte) ([]byte, error) { dLen, s, err := decodedLen(src) if err != nil { @@ -83,6 +85,8 @@ func NewReader(r io.Reader) *Reader { } // Reader is an io.Reader that can read Snappy-compressed bytes. +// +// Reader handles the Snappy stream format, not the Snappy block format. type Reader struct { r io.Reader err error diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s similarity index 98% rename from vendor/github.com/klauspost/compress/snappy/decode_amd64.s rename to vendor/github.com/golang/snappy/decode_amd64.s index 1c66e37234..e6179f65e3 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s +++ b/vendor/github.com/golang/snappy/decode_amd64.s @@ -184,7 +184,9 @@ tagLit60Plus: // checks. In the asm version, we code it once instead of once per switch case. ADDQ CX, SI SUBQ $58, SI - CMPQ SI, R13 + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 JA errCorrupt // case x == 60: @@ -230,7 +232,9 @@ tagCopy4: ADDQ $5, SI // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 JA errCorrupt // length = 1 + int(src[s-5])>>2 @@ -247,7 +251,9 @@ tagCopy2: ADDQ $3, SI // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 JA errCorrupt // length = 1 + int(src[s-3])>>2 @@ -271,7 +277,9 @@ tagCopy: ADDQ $2, SI // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 JA errCorrupt // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) diff --git a/vendor/github.com/golang/snappy/decode_arm64.s b/vendor/github.com/golang/snappy/decode_arm64.s new file mode 100644 index 0000000000..7a3ead17ea --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_arm64.s @@ -0,0 +1,494 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - R2 scratch +// - R3 scratch +// - R4 length or x +// - R5 offset +// - R6 &src[s] +// - R7 &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7. +// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6. +TEXT ·decode(SB), NOSPLIT, $56-56 + // Initialize R6, R7 and R8-R13. + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R7 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R6 + MOVD R11, R13 + ADD R12, R13, R13 + +loop: + // for s < len(src) + CMP R13, R6 + BEQ end + + // R4 = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBU (R6), R4 + MOVW R4, R3 + ANDW $3, R3 + MOVW $1, R1 + CMPW R1, R3 + BGE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + MOVW $60, R1 + LSRW $2, R4, R4 + CMPW R4, R1 + BLS tagLit60Plus + + // case x < 60: + // s++ + ADD $1, R6, R6 + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that R4 == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // R4 can hold 64 bits, so the increment cannot overflow. + ADD $1, R4, R4 + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // R2 = len(dst) - d + // R3 = len(src) - s + MOVD R10, R2 + SUB R7, R2, R2 + MOVD R13, R3 + SUB R6, R3, R3 + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMP $16, R4 + BGT callMemmove + CMP $16, R2 + BLT callMemmove + CMP $16, R3 + BLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R6), (R14, R15) + STP (R14, R15), 0(R7) + + // d += length + // s += length + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMP R2, R4 + BGT errCorrupt + CMP R3, R4 + BGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // R7, R6 and R4 as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVD R7, 8(RSP) + MOVD R6, 16(RSP) + MOVD R4, 24(RSP) + MOVD R7, 32(RSP) + MOVD R6, 40(RSP) + MOVD R4, 48(RSP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVD 32(RSP), R7 + MOVD 40(RSP), R6 + MOVD 48(RSP), R4 + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R13 + ADD R12, R13, R13 + + // d += length + // s += length + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADD R4, R6, R6 + SUB $58, R6, R6 + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // case x == 60: + MOVW $61, R1 + CMPW R1, R4 + BEQ tagLit61 + BGT tagLit62Plus + + // x = uint32(src[s-1]) + MOVBU -1(R6), R4 + B doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVHU -2(R6), R4 + B doLit + +tagLit62Plus: + CMPW $62, R4 + BHI tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVHU -3(R6), R4 + MOVBU -1(R6), R3 + ORR R3<<16, R4 + B doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVWU -4(R6), R4 + B doLit + + // The code above handles literal tags. + // ---------------------------------------- + // The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADD $5, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // length = 1 + int(src[s-5])>>2 + MOVD $1, R1 + ADD R4>>2, R1, R4 + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVWU -4(R6), R5 + B doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADD $3, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // length = 1 + int(src[s-3])>>2 + MOVD $1, R1 + ADD R4>>2, R1, R4 + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVHU -2(R6), R5 + B doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - R3 == src[s] & 0x03 + // - R4 == src[s] + CMP $2, R3 + BEQ tagCopy2 + BGT tagCopy4 + + // case tagCopy1: + // s += 2 + ADD $2, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVD R4, R5 + AND $0xe0, R5 + MOVBU -1(R6), R3 + ORR R5<<3, R3, R5 + + // length = 4 + int(src[s-2])>>2&0x7 + MOVD $7, R1 + AND R4>>2, R1, R4 + ADD $4, R4, R4 + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - R4 == length && R4 > 0 + // - R5 == offset + + // if offset <= 0 { etc } + MOVD $0, R1 + CMP R1, R5 + BLE errCorrupt + + // if d < offset { etc } + MOVD R7, R3 + SUB R8, R3, R3 + CMP R5, R3 + BLT errCorrupt + + // if length > len(dst)-d { etc } + MOVD R10, R3 + SUB R7, R3, R3 + CMP R3, R4 + BGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVD R10, R14 + SUB R7, R14, R14 + MOVD R7, R15 + SUB R5, R15, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMP $16, R4 + BGT slowForwardCopy + CMP $8, R5 + BLT slowForwardCopy + CMP $16, R14 + BLT slowForwardCopy + MOVD 0(R15), R2 + MOVD R2, 0(R7) + MOVD 8(R15), R3 + MOVD R3, 8(R7) + ADD R4, R7, R7 + B loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUB $10, R14, R14 + CMP R14, R4 + BGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMP $8, R5 + BGE fixUpSlowForwardCopy + MOVD (R15), R3 + MOVD R3, (R7) + SUB R5, R4, R4 + ADD R5, R7, R7 + ADD R5, R5, R5 + B makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by R7 being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save R7 to R2 so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVD R7, R2 + ADD R4, R7, R7 + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + MOVD $0, R1 + CMP R1, R4 + BLE loop + MOVD (R15), R3 + MOVD R3, (R2) + ADD $8, R15, R15 + ADD $8, R2, R2 + SUB $8, R4, R4 + B finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), R3 + MOVB R3, (R7) + ADD $1, R15, R15 + ADD $1, R7, R7 + SUB $1, R4, R4 + CBNZ R4, verySlowForwardCopy + B loop + + // The code above handles copy tags. + // ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMP R10, R7 + BNE errCorrupt + + // return 0 + MOVD $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVD $1, R2 + MOVD R2, ret+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_asm.go similarity index 93% rename from vendor/github.com/klauspost/compress/snappy/decode_amd64.go rename to vendor/github.com/golang/snappy/decode_asm.go index fcd192b849..7082b34919 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go +++ b/vendor/github.com/golang/snappy/decode_asm.go @@ -5,6 +5,7 @@ // +build !appengine // +build gc // +build !noasm +// +build amd64 arm64 package snappy diff --git a/vendor/github.com/klauspost/compress/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go similarity index 98% rename from vendor/github.com/klauspost/compress/snappy/decode_other.go rename to vendor/github.com/golang/snappy/decode_other.go index 94a96c5d7b..2f672be557 100644 --- a/vendor/github.com/klauspost/compress/snappy/decode_other.go +++ b/vendor/github.com/golang/snappy/decode_other.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 appengine !gc noasm +// +build !amd64,!arm64 appengine !gc noasm package snappy @@ -87,7 +87,7 @@ func decode(dst, src []byte) int { } // Copy from an earlier sub-slice of dst to a later sub-slice. // If no overlap, use the built-in copy: - if offset > length { + if offset >= length { copy(dst[d:d+length], dst[d-offset:]) d += length continue diff --git a/vendor/github.com/klauspost/compress/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go similarity index 98% rename from vendor/github.com/klauspost/compress/snappy/encode.go rename to vendor/github.com/golang/snappy/encode.go index 8d393e904b..7f23657076 100644 --- a/vendor/github.com/klauspost/compress/snappy/encode.go +++ b/vendor/github.com/golang/snappy/encode.go @@ -15,6 +15,8 @@ import ( // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. +// +// Encode handles the Snappy block format, not the Snappy stream format. func Encode(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) @@ -139,6 +141,8 @@ func NewBufferedWriter(w io.Writer) *Writer { } // Writer is an io.Writer that can write Snappy-compressed bytes. +// +// Writer handles the Snappy stream format, not the Snappy block format. type Writer struct { w io.Writer err error diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s similarity index 100% rename from vendor/github.com/klauspost/compress/snappy/encode_amd64.s rename to vendor/github.com/golang/snappy/encode_amd64.s diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s new file mode 100644 index 0000000000..bf83667d71 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_arm64.s @@ -0,0 +1,722 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - R3 len(lit) +// - R4 n +// - R6 return value +// - R8 &dst[i] +// - R10 &lit[0] +// +// The 32 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $32-56 + MOVD dst_base+0(FP), R8 + MOVD lit_base+24(FP), R10 + MOVD lit_len+32(FP), R3 + MOVD R3, R6 + MOVW R3, R4 + SUBW $1, R4, R4 + + CMPW $60, R4 + BLT oneByte + CMPW $256, R4 + BLT twoBytes + +threeBytes: + MOVD $0xf4, R2 + MOVB R2, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + ADD $3, R6, R6 + B memmove + +twoBytes: + MOVD $0xf0, R2 + MOVB R2, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + ADD $2, R6, R6 + B memmove + +oneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + ADD $1, R6, R6 + +memmove: + MOVD R6, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - R3 length +// - R7 &dst[0] +// - R8 &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVD dst_base+0(FP), R8 + MOVD R8, R7 + MOVD offset+24(FP), R11 + MOVD length+32(FP), R3 + +loop0: + // for length >= 68 { etc } + CMPW $68, R3 + BLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $64, R3, R3 + B loop0 + +step1: + // if length > 64 { etc } + CMP $64, R3 + BLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $60, R3, R3 + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMP $12, R3 + BGE step3 + CMPW $2048, R11 + BGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $3, R11, R11 + AND $0xe0, R11, R11 + SUB $4, R3, R3 + LSLW $2, R3 + AND $0xff, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUB $1, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - R6 &src[0] +// - R7 &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVD src_base+0(FP), R6 + MOVD src_len+8(FP), R14 + MOVD i+24(FP), R15 + MOVD j+32(FP), R7 + ADD R6, R14, R14 + ADD R6, R15, R15 + ADD R6, R7, R7 + MOVD R14, R13 + SUB $8, R13, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI cmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE bsf + ADD $8, R15, R15 + ADD $8, R7, R7 + B cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS extendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE extendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - R3 . . +// - R4 . . +// - R5 64 shift +// - R6 72 &src[0], tableSize +// - R7 80 &src[s] +// - R8 88 &dst[d] +// - R9 96 sLimit +// - R10 . &src[nextEmit] +// - R11 104 prevHash, currHash, nextHash, offset +// - R12 112 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 120 candidate +// - R16 . hash constant, 0x1e35a7bd +// - R17 . &table +// - . 128 table +// +// The second column (64, 72, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 64 + 64 = 32896. +TEXT ·encodeBlock(SB), 0, $32896-56 + MOVD dst_base+0(FP), R8 + MOVD src_base+24(FP), R7 + MOVD src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVD $24, R5 + MOVD $256, R6 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + MOVD $16384, R2 + CMP R2, R6 + BGE varTable + CMP R14, R6 + BGE varTable + SUB $1, R5, R5 + LSL $1, R6, R6 + B calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each + // iterations writes 64 bytes, so we can do only tableSize/32 writes + // instead of the 2048 writes that would zero-initialize all of table's + // 32768 bytes. This clear could overrun the first tableSize elements, but + // it won't overrun the allocated stack size. + ADD $128, RSP, R17 + MOVD R17, R4 + + // !!! R6 = &src[tableSize] + ADD R6<<1, R17, R6 + +memclr: + STP.P (ZR, ZR), 64(R4) + STP (ZR, ZR), -48(R4) + STP (ZR, ZR), -32(R4) + STP (ZR, ZR), -16(R4) + CMP R4, R6 + BHI memclr + + // !!! R6 = &src[0] + MOVD R7, R6 + + // sLimit := len(src) - inputMargin + MOVD R14, R9 + SUB $15, R9, R9 + + // !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't + // change for the rest of the function. + MOVD R5, 64(RSP) + MOVD R6, 72(RSP) + MOVD R9, 96(RSP) + + // nextEmit := 0 + MOVD R6, R10 + + // s := 1 + ADD $1, R7, R7 + + // nextHash := hash(load32(src, s), shift) + MOVW 0(R7), R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + +outer: + // for { etc } + + // skip := 32 + MOVD $32, R12 + + // nextS := s + MOVD R7, R13 + + // candidate := 0 + MOVD $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVD R13, R7 + + // bytesBetweenHashLookups := skip >> 5 + MOVD R12, R14 + LSR $5, R14, R14 + + // nextS = s + bytesBetweenHashLookups + ADD R14, R13, R13 + + // skip += bytesBetweenHashLookups + ADD R14, R12, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVD R13, R3 + SUB R6, R3, R3 + CMP R9, R3 + BHI emitRemainder + + // candidate = int(table[nextHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[nextHash] = uint16(s) + MOVD R7, R3 + SUB R6, R3, R3 + + MOVH R3, 0(R17)(R11<<1) + + // nextHash = hash(load32(src, nextS), shift) + MOVW 0(R13), R11 + MULW R16, R11 + LSRW R5, R11, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVW 0(R7), R3 + MOVW (R6)(R15*1), R4 + CMPW R4, R3 + BNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVD R7, R3 + SUB R10, R3, R3 + CMP $16, R3 + BLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVW R3, R4 + SUBW $1, R4, R4 + + MOVW $60, R2 + CMPW R2, R4 + BLT inlineEmitLiteralOneByte + MOVW $256, R2 + CMPW R2, R4 + BLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVD $0xf4, R1 + MOVB R1, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVD $0xf0, R1 + MOVB R1, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADD R3, R8, R8 + MOVD R7, 80(RSP) + MOVD R8, 88(RSP) + MOVD R15, 120(RSP) + CALL runtime·memmove(SB) + MOVD 64(RSP), R5 + MOVD 72(RSP), R6 + MOVD 80(RSP), R7 + MOVD 88(RSP), R8 + MOVD 96(RSP), R9 + MOVD 120(RSP), R15 + ADD $128, RSP, R17 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + B inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB R3, R4 + SUBW $1, R4, R4 + AND $0xff, R4, R4 + LSLW $2, R4, R4 + MOVB R4, (R8) + ADD $1, R8, R8 + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R10), (R0, R1) + STP (R0, R1), 0(R8) + ADD R3, R8, R8 + +inner1: + // for { etc } + + // base := s + MOVD R7, R12 + + // !!! offset := base - candidate + MOVD R12, R11 + SUB R15, R11, R11 + SUB R6, R11, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVD src_len+32(FP), R14 + ADD R6, R14, R14 + + // !!! R13 = &src[len(src) - 8] + MOVD R14, R13 + SUB $8, R13, R13 + + // !!! R15 = &src[candidate + 4] + ADD $4, R15, R15 + ADD R6, R15, R15 + + // !!! s += 4 + ADD $4, R7, R7 + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI inlineExtendMatchCmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchBSF + ADD $8, R15, R15 + ADD $8, R7, R7 + B inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + B inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS inlineExtendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVD R7, R3 + SUB R12, R3, R3 + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + MOVW $68, R2 + CMPW R2, R3 + BLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $64, R3, R3 + B inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + MOVW $64, R2 + CMPW R2, R3 + BLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $60, R3, R3 + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + MOVW $12, R2 + CMPW R2, R3 + BGE inlineEmitCopyStep3 + MOVW $2048, R2 + CMPW R2, R11 + BGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $8, R11, R11 + LSLW $5, R11, R11 + SUBW $4, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + B inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBW $1, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVD R7, R10 + + // if s >= sLimit { goto emitRemainder } + MOVD R7, R3 + SUB R6, R3, R3 + CMP R3, R9 + BLS emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVD -1(R7), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // table[prevHash] = uint16(s-1) + MOVD R7, R3 + SUB R6, R3, R3 + SUB $1, R3, R3 + + MOVHU R3, 0(R17)(R11<<1) + + // currHash := hash(uint32(x>>8), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // candidate = int(table[currHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[currHash] = uint16(s) + ADD $1, R3, R3 + MOVHU R3, 0(R17)(R11<<1) + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVW (R6)(R15*1), R4 + CMPW R4, R14 + BEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // s++ + ADD $1, R7, R7 + + // break out of the inner1 for loop, i.e. continue the outer loop. + B outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVD src_len+32(FP), R3 + ADD R6, R3, R3 + CMP R3, R10 + BEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVD R8, 8(RSP) + MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD R10, 32(RSP) + SUB R10, R3, R3 + MOVD R3, 40(RSP) + MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVD R8, 88(RSP) + CALL ·emitLiteral(SB) + MOVD 88(RSP), R8 + + // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVD 56(RSP), R1 + ADD R1, R8, R8 + +encodeBlockEnd: + MOVD dst_base+0(FP), R3 + SUB R3, R8, R8 + MOVD R8, d+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_asm.go similarity index 97% rename from vendor/github.com/klauspost/compress/snappy/encode_amd64.go rename to vendor/github.com/golang/snappy/encode_asm.go index 150d91bc8b..107c1e7141 100644 --- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go +++ b/vendor/github.com/golang/snappy/encode_asm.go @@ -5,6 +5,7 @@ // +build !appengine // +build gc // +build !noasm +// +build amd64 arm64 package snappy diff --git a/vendor/github.com/klauspost/compress/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go similarity index 99% rename from vendor/github.com/klauspost/compress/snappy/encode_other.go rename to vendor/github.com/golang/snappy/encode_other.go index dbcae905e6..296d7f0beb 100644 --- a/vendor/github.com/klauspost/compress/snappy/encode_other.go +++ b/vendor/github.com/golang/snappy/encode_other.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64 appengine !gc noasm +// +build !amd64,!arm64 appengine !gc noasm package snappy diff --git a/vendor/github.com/klauspost/compress/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go similarity index 97% rename from vendor/github.com/klauspost/compress/snappy/snappy.go rename to vendor/github.com/golang/snappy/snappy.go index ea58ced882..ece692ea46 100644 --- a/vendor/github.com/klauspost/compress/snappy/snappy.go +++ b/vendor/github.com/golang/snappy/snappy.go @@ -17,7 +17,7 @@ // // The canonical, C++ implementation is at https://github.com/google/snappy and // it only implements the block format. -package snappy +package snappy // import "github.com/golang/snappy" import ( "hash/crc32" @@ -94,5 +94,5 @@ var crcTable = crc32.MakeTable(crc32.Castagnoli) // https://github.com/google/snappy/blob/master/framing_format.txt func crc(b []byte) uint32 { c := crc32.Update(0, crcTable, b) - return c>>15 | c<<17 + 0xa282ead8 + return uint32(c>>15|c<<17) + 0xa282ead8 } diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index 0d31f5ebc9..6f341914c6 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -92,7 +92,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTra im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + first.deltaFindState c.state = c.stateTable[lu] - return } // encode the output symbol provided and write it to the bitstream. diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index dea115b334..0823c928ce 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -536,7 +536,6 @@ func (s *Scratch) huffSort() { } nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} } - return } func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { diff --git a/vendor/github.com/klauspost/compress/snappy/runbench.cmd b/vendor/github.com/klauspost/compress/snappy/runbench.cmd deleted file mode 100644 index d24eb4b47c..0000000000 --- a/vendor/github.com/klauspost/compress/snappy/runbench.cmd +++ /dev/null @@ -1,2 +0,0 @@ -del old.txt -go test -bench=. >>old.txt && go test -bench=. >>old.txt && go test -bench=. >>old.txt && benchstat -delta-test=ttest old.txt new.txt diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index 7680bfe1dd..787813fa9e 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -16,8 +16,7 @@ Currently the package is heavily optimized for 64 bit processors and will be sig Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`. -Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd - +[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/zstd.svg)](https://pkg.go.dev/github.com/klauspost/compress/zstd) ## Compressor @@ -152,8 +151,8 @@ This package: file out level insize outsize millis mb/s silesia.tar zskp 1 211947520 73101992 643 313.87 silesia.tar zskp 2 211947520 67504318 969 208.38 -silesia.tar zskp 3 211947520 65177448 1899 106.44 -silesia.tar zskp 4 211947520 61381950 8115 24.91 +silesia.tar zskp 3 211947520 64595893 2007 100.68 +silesia.tar zskp 4 211947520 60995370 7691 26.28 cgo zstd: silesia.tar zstd 1 211947520 73605392 543 371.56 @@ -171,8 +170,8 @@ https://files.klauspost.com/compress/gob-stream.7z file out level insize outsize millis mb/s gob-stream zskp 1 1911399616 235022249 3088 590.30 gob-stream zskp 2 1911399616 205669791 3786 481.34 -gob-stream zskp 3 1911399616 185792019 9324 195.48 -gob-stream zskp 4 1911399616 171537212 32113 56.76 +gob-stream zskp 3 1911399616 175034659 9636 189.17 +gob-stream zskp 4 1911399616 167273881 29337 62.13 gob-stream zstd 1 1911399616 249810424 2637 691.26 gob-stream zstd 3 1911399616 208192146 3490 522.31 gob-stream zstd 6 1911399616 193632038 6687 272.56 @@ -187,8 +186,8 @@ http://mattmahoney.net/dc/textdata.html file out level insize outsize millis mb/s enwik9 zskp 1 1000000000 343848582 3609 264.18 enwik9 zskp 2 1000000000 317276632 5746 165.97 -enwik9 zskp 3 1000000000 294540704 11725 81.34 -enwik9 zskp 4 1000000000 276609671 44029 21.66 +enwik9 zskp 3 1000000000 292243069 12162 78.41 +enwik9 zskp 4 1000000000 275241169 36430 26.18 enwik9 zstd 1 1000000000 358072021 3110 306.65 enwik9 zstd 3 1000000000 313734672 4784 199.35 enwik9 zstd 6 1000000000 295138875 10290 92.68 @@ -202,8 +201,8 @@ https://files.klauspost.com/compress/github-june-2days-2019.json.zst file out level insize outsize millis mb/s github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 -github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54 -github-june-2days-2019.json zskp 4 6273951764 512796117 97791 61.18 +github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75 +github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78 github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 @@ -217,8 +216,8 @@ https://files.klauspost.com/compress/rawstudio-mint14.7z file out level insize outsize millis mb/s rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 -rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75 -rawstudio-mint14.tar zskp 4 8558382592 3027332295 486243 16.79 +rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08 +rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16 rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 @@ -232,8 +231,8 @@ https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst file out level insize outsize millis mb/s nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 -nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53 -nyc-taxi-data-10M.csv zskp 4 3325605752 495986829 89368 35.49 +nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66 +nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10 nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 @@ -405,13 +404,28 @@ BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 This reflects the performance around May 2020, but this may be out of date. +## Zstd inside ZIP files + +It is possible to use zstandard to compress individual files inside zip archives. +While this isn't widely supported it can be useful for internal files. + +To support the compression and decompression of these files you must register a compressor and decompressor. + +It is highly recommended registering the (de)compressors on individual zip Reader/Writer and NOT +use the global registration functions. The main reason for this is that 2 registrations from +different packages will result in a panic. + +It is a good idea to only have a single compressor and decompressor, since they can be used for multiple zip +files concurrently, and using a single instance will allow reusing some resources. + +See [this example](https://pkg.go.dev/github.com/klauspost/compress/zstd#example-ZipCompressor) for +how to compress and decompress files inside zip archives. + # Contributions Contributions are always welcome. For new features/fixes, remember to add tests and for performance enhancements include benchmarks. -For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files. - For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan). This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare. diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index b51d922bda..6cea054d2d 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -123,12 +123,10 @@ func newBlockDec(lowMem bool) *blockDec { // Input must be a start of a block and will be at the end of the block when returned. func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.WindowSize = windowSize - tmp := br.readSmall(3) - if tmp == nil { - if debug { - println("Reading block header:", io.ErrUnexpectedEOF) - } - return io.ErrUnexpectedEOF + tmp, err := br.readSmall(3) + if err != nil { + println("Reading block header:", err) + return err } bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) b.Last = bh&1 != 0 @@ -179,7 +177,6 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { if cap(b.dst) <= maxSize { b.dst = make([]byte, 0, maxSize+1) } - var err error b.data, err = br.readBig(cSize, b.dataStorage) if err != nil { if debug { diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index 9647c64e53..e1be092f32 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -386,9 +386,9 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { b.output = bh.appendTo(b.output) b.output = append(b.output, lits[0]) return nil + case nil: default: return err - case nil: } // Compressed... // Now, allow reuse @@ -528,11 +528,6 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if debug { println("Adding literals RLE") } - default: - if debug { - println("Adding literals ERROR:", err) - } - return err case nil: // Compressed litLen... if reUsed { @@ -563,6 +558,11 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if debug { println("Adding literals compressed") } + default: + if debug { + println("Adding literals ERROR:", err) + } + return err } // Sequence compression diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go index 658ef78380..17e820a6a6 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -12,8 +12,8 @@ import ( type byteBuffer interface { // Read up to 8 bytes. - // Returns nil if no more input is available. - readSmall(n int) []byte + // Returns io.ErrUnexpectedEOF if this cannot be satisfied. + readSmall(n int) ([]byte, error) // Read >8 bytes. // MAY use the destination slice. @@ -29,17 +29,17 @@ type byteBuffer interface { // in-memory buffer type byteBuf []byte -func (b *byteBuf) readSmall(n int) []byte { +func (b *byteBuf) readSmall(n int) ([]byte, error) { if debugAsserts && n > 8 { panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) } bb := *b if len(bb) < n { - return nil + return nil, io.ErrUnexpectedEOF } r := bb[:n] *b = bb[n:] - return r + return r, nil } func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) { @@ -81,19 +81,22 @@ type readerWrapper struct { tmp [8]byte } -func (r *readerWrapper) readSmall(n int) []byte { +func (r *readerWrapper) readSmall(n int) ([]byte, error) { if debugAsserts && n > 8 { panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) } n2, err := io.ReadFull(r.r, r.tmp[:n]) // We only really care about the actual bytes read. - if n2 != n { + if err != nil { + if err == io.EOF { + return nil, io.ErrUnexpectedEOF + } if debug { println("readSmall: got", n2, "want", n, "err", err) } - return nil + return nil, err } - return r.tmp[:n] + return r.tmp[:n], nil } func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) { diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go index 87896c5eaa..69736e8d4b 100644 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -93,7 +93,7 @@ func (h *Header) Decode(in []byte) error { h.HasCheckSum = fhd&(1<<2) != 0 if fhd&(1<<3) != 0 { - return errors.New("Reserved bit set on frame header") + return errors.New("reserved bit set on frame header") } // Read Window_Descriptor @@ -174,7 +174,7 @@ func (h *Header) Decode(in []byte) error { if len(in) < 3 { return nil } - tmp, in := in[:3], in[3:] + tmp := in[:3] bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) h.FirstBlock.Last = bh&1 != 0 blockType := blockType((bh >> 1) & 3) diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 1d41c25d29..f593e464b6 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -179,8 +179,7 @@ func (d *Decoder) Reset(r io.Reader) error { // If bytes buffer and < 1MB, do sync decoding anyway. if bb, ok := r.(byter); ok && bb.Len() < 1<<20 { - var bb2 byter - bb2 = bb + bb2 := bb if debug { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } @@ -237,20 +236,17 @@ func (d *Decoder) drainOutput() { println("current already flushed") return } - for { - select { - case v := <-d.current.output: - if v.d != nil { - if debug { - printf("re-adding decoder %p", v.d) - } - d.decoders <- v.d - } - if v.err == errEndOfStream { - println("current flushed") - d.current.flushed = true - return + for v := range d.current.output { + if v.d != nil { + if debug { + printf("re-adding decoder %p", v.d) } + d.decoders <- v.d + } + if v.err == errEndOfStream { + println("current flushed") + d.current.flushed = true + return } } } diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index 284d384492..c0fd058c28 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -6,7 +6,6 @@ package zstd import ( "errors" - "fmt" "runtime" ) @@ -43,7 +42,7 @@ func WithDecoderLowmem(b bool) DOption { func WithDecoderConcurrency(n int) DOption { return func(o *decoderOptions) error { if n <= 0 { - return fmt.Errorf("Concurrency must be at least 1") + return errors.New("concurrency must be at least 1") } o.concurrent = n return nil @@ -61,7 +60,7 @@ func WithDecoderMaxMemory(n uint64) DOption { return errors.New("WithDecoderMaxMemory must be at least 1") } if n > 1<<63 { - return fmt.Errorf("WithDecoderMaxmemory must be less than 1 << 63") + return errors.New("WithDecoderMaxmemory must be less than 1 << 63") } o.maxDecodedSize = n return nil diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go index 2d4d893eb9..60f2986486 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_base.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go @@ -150,14 +150,15 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) { } else { e.crc.Reset() } - if (!singleBlock || d.DictContentSize() > 0) && cap(e.hist) < int(e.maxMatchOff*2)+d.DictContentSize() { - l := e.maxMatchOff*2 + int32(d.DictContentSize()) - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 + if d != nil { + low := e.lowMem + if singleBlock { + e.lowMem = true } - e.hist = make([]byte, 0, l) + e.ensureHist(d.DictContentSize() + maxCompressedBlockSize) + e.lowMem = low } + // We offset current position so everything will be out of reach. // If above reset line, history will be purged. if e.cur < bufferReset { diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index fe3625c5f5..dc1eed5f00 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -220,6 +220,20 @@ encodeLoop: best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) + + // See if we can find a better match by checking where the current best ends. + // Use that offset to see if we can find a better full match. + if sAt := best.s + best.length; sAt < sLimit { + nextHashL := hash8(load6432(src, sAt), bestLongTableBits) + candidateEnd := e.longTable[nextHashL] + if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { + bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) + if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 { + bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1)) + } + best = bestEnd + } + } } // We have a match, we can store the forward value diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index c2ce4a2bac..604954290e 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -412,8 +412,41 @@ encodeLoop: cv = load6432(src, s) } - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. + // Try to find a better match by searching for a long match at the end of the current best match + if true && s+matched < sLimit { + nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + cv := load3232(src, s) + candidateL := e.longTable[nextHashL] + coffsetL := candidateL.offset - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("long match at end-of-match") + } + } + } + + // Check prev long... + if true { + coffsetL = candidateL.prev - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("prev long match at end-of-match") + } + } + } + } + } + // A match has been found. Update recent offsets. offset2 = offset1 offset1 = s - t @@ -905,9 +938,41 @@ encodeLoop: } cv = load6432(src, s) } + // Try to find a better match by searching for a long match at the end of the current best match + if s+matched < sLimit { + nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + cv := load3232(src, s) + candidateL := e.longTable[nextHashL] + coffsetL := candidateL.offset - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("long match at end-of-match") + } + } + } - // A 4-byte match has been found. Update recent offsets. - // We'll later see if more than 4 bytes. + // Check prev long... + if true { + coffsetL = candidateL.prev - e.cur - matched + if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + // Found a long match, at least 4 bytes. + matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + if matchedNext > matched { + t = coffsetL + matched = matchedNext + if debugMatches { + println("prev long match at end-of-match") + } + } + } + } + } + // A match has been found. Update recent offsets. offset2 = offset1 offset1 = s - t diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 6f0265099e..4871dd03af 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -340,13 +340,13 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { println("ReadFrom: got EOF final block:", len(e.state.filling)) } return n, nil + case nil: default: if debug { println("ReadFrom: got error:", err) } e.state.err = err return n, err - case nil: } if len(src) > 0 { if debug { diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 18a47eb03e..16d4ab63c1 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -73,7 +73,7 @@ func WithEncoderCRC(b bool) EOption { } // WithEncoderConcurrency will set the concurrency, -// meaning the maximum number of decoders to run concurrently. +// meaning the maximum number of encoders to run concurrently. // The value supplied must be at least 1. // By default this will be set to GOMAXPROCS. func WithEncoderConcurrency(n int) EOption { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index fc4a566d39..4dc1512139 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -80,9 +80,14 @@ func (d *frameDec) reset(br byteBuffer) error { d.WindowSize = 0 var b []byte for { - b = br.readSmall(4) - if b == nil { + var err error + b, err = br.readSmall(4) + switch err { + case io.EOF, io.ErrUnexpectedEOF: return io.EOF + default: + return err + case nil: } if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { if debug { @@ -92,14 +97,14 @@ func (d *frameDec) reset(br byteBuffer) error { break } // Read size to skip - b = br.readSmall(4) - if b == nil { - println("Reading Frame Size EOF") - return io.ErrUnexpectedEOF + b, err = br.readSmall(4) + if err != nil { + println("Reading Frame Size", err) + return err } n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) println("Skipping frame with", n, "bytes.") - err := br.skipN(int(n)) + err = br.skipN(int(n)) if err != nil { if debug { println("Reading discarded frame", err) @@ -121,7 +126,7 @@ func (d *frameDec) reset(br byteBuffer) error { d.SingleSegment = fhd&(1<<5) != 0 if fhd&(1<<3) != 0 { - return errors.New("Reserved bit set on frame header") + return errors.New("reserved bit set on frame header") } // Read Window_Descriptor @@ -147,12 +152,11 @@ func (d *frameDec) reset(br byteBuffer) error { if size == 3 { size = 4 } - b = br.readSmall(int(size)) - if b == nil { - if debug { - println("Reading Dictionary_ID", io.ErrUnexpectedEOF) - } - return io.ErrUnexpectedEOF + + b, err = br.readSmall(int(size)) + if err != nil { + println("Reading Dictionary_ID", err) + return err } var id uint32 switch size { @@ -187,10 +191,10 @@ func (d *frameDec) reset(br byteBuffer) error { } d.FrameContentSize = 0 if fcsSize > 0 { - b := br.readSmall(fcsSize) - if b == nil { - println("Reading Frame content", io.ErrUnexpectedEOF) - return io.ErrUnexpectedEOF + b, err = br.readSmall(fcsSize) + if err != nil { + println("Reading Frame content", err) + return err } switch fcsSize { case 1: @@ -307,10 +311,10 @@ func (d *frameDec) checkCRC() error { tmp[3] = byte(got >> 24) // We can overwrite upper tmp now - want := d.rawInput.readSmall(4) - if want == nil { - println("CRC missing?") - return io.ErrUnexpectedEOF + want, err := d.rawInput.readSmall(4) + if err != nil { + println("CRC missing?", err) + return err } if !bytes.Equal(tmp[:], want) { diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index b80709d5ea..c74681b999 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -708,7 +708,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) { im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + int32(first.deltaFindState) c.state = c.stateTable[lu] - return } // encode the output symbol provided and write it to the bitstream. diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go index 6c17dc17f4..474cb77d2b 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go @@ -59,7 +59,7 @@ func fillBase(dst []baseOffset, base uint32, bits ...uint8) { } for i, bit := range bits { if base > math.MaxInt32 { - panic(fmt.Sprintf("invalid decoding table, base overflows int32")) + panic("invalid decoding table, base overflows int32") } dst[i] = baseOffset{ diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go index 36bcc3cc02..8014174a77 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqenc.go +++ b/vendor/github.com/klauspost/compress/zstd/seqenc.go @@ -35,7 +35,6 @@ func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) { // Ensure we cannot reuse by accident prevEnc := *prev prevEnc.symbolLen = 0 - return } compareSwap(ll, &s.llEnc, &s.llPrev) compareSwap(ml, &s.mlEnc, &s.mlPrev) diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index c95fe5111c..9d9d1d567e 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -10,8 +10,8 @@ import ( "hash/crc32" "io" + "github.com/golang/snappy" "github.com/klauspost/compress/huff0" - "github.com/klauspost/compress/snappy" ) const ( @@ -185,7 +185,6 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { r.block.reset(nil) r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen]) if err != nil { - println("snappy.Decode:", err) return written, err } err = r.block.encodeLits(r.block.literals, false) diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go new file mode 100644 index 0000000000..9325b928ae --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -0,0 +1,121 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package zstd + +import ( + "errors" + "io" + "sync" +) + +// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip. +// See https://www.winzip.com/win/en/comp_info.html +const ZipMethodWinZip = 93 + +// ZipMethodPKWare is the original method number used by PKWARE to indicate Zstandard compression. +// Deprecated: This has been deprecated by PKWARE, use ZipMethodWinZip instead for compression. +// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT +const ZipMethodPKWare = 20 + +var zipReaderPool sync.Pool + +// newZipReader cannot be used since we would leak goroutines... +func newZipReader(r io.Reader) io.ReadCloser { + dec, ok := zipReaderPool.Get().(*Decoder) + if ok { + dec.Reset(r) + } else { + d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) + if err != nil { + panic(err) + } + dec = d + } + return &pooledZipReader{dec: dec} +} + +type pooledZipReader struct { + mu sync.Mutex // guards Close and Read + dec *Decoder +} + +func (r *pooledZipReader) Read(p []byte) (n int, err error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.dec == nil { + return 0, errors.New("Read after Close") + } + dec, err := r.dec.Read(p) + + return dec, err +} + +func (r *pooledZipReader) Close() error { + r.mu.Lock() + defer r.mu.Unlock() + var err error + if r.dec != nil { + err = r.dec.Reset(nil) + zipReaderPool.Put(r.dec) + r.dec = nil + } + return err +} + +type pooledZipWriter struct { + mu sync.Mutex // guards Close and Read + enc *Encoder +} + +func (w *pooledZipWriter) Write(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.enc == nil { + return 0, errors.New("Write after Close") + } + return w.enc.Write(p) +} + +func (w *pooledZipWriter) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + var err error + if w.enc != nil { + err = w.enc.Close() + zipReaderPool.Put(w.enc) + w.enc = nil + } + return err +} + +// ZipCompressor returns a compressor that can be registered with zip libraries. +// The provided encoder options will be used on all encodes. +func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { + var pool sync.Pool + return func(w io.Writer) (io.WriteCloser, error) { + enc, ok := pool.Get().(*Encoder) + if ok { + enc.Reset(w) + } else { + var err error + enc, err = NewWriter(w, opts...) + if err != nil { + return nil, err + } + } + return &pooledZipWriter{enc: enc}, nil + } +} + +// ZipDecompressor returns a decompressor that can be registered with zip libraries. +// See ZipCompressor for example. +func ZipDecompressor() func(r io.Reader) io.ReadCloser { + return func(r io.Reader) io.ReadCloser { + d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) + if err != nil { + panic(err) + } + return d.IOReadCloser() + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 9056beef27..1ba308c8bf 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -5,6 +5,7 @@ package zstd import ( "bytes" + "encoding/binary" "errors" "log" "math" @@ -126,26 +127,15 @@ func matchLen(a, b []byte) int { } func load3232(b []byte, i int32) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + return binary.LittleEndian.Uint32(b[i:]) } func load6432(b []byte, i int32) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } func load64(b []byte, i int) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } type byter interface { diff --git a/vendor/modules.txt b/vendor/modules.txt index 1413087c2c..b8d2779e98 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -429,6 +429,9 @@ github.com/golang/protobuf/ptypes/duration github.com/golang/protobuf/ptypes/empty github.com/golang/protobuf/ptypes/struct github.com/golang/protobuf/ptypes/timestamp +# github.com/golang/snappy v0.0.3 +## explicit +github.com/golang/snappy # github.com/google/btree v1.0.1 ## explicit; go 1.12 github.com/google/btree @@ -515,11 +518,10 @@ github.com/jmespath/go-jmespath github.com/jstemmer/go-junit-report github.com/jstemmer/go-junit-report/formatter github.com/jstemmer/go-junit-report/parser -# github.com/klauspost/compress v1.12.3 => github.com/klauspost/compress v1.11.13 +# github.com/klauspost/compress v1.12.3 ## explicit; go 1.13 github.com/klauspost/compress/fse github.com/klauspost/compress/huff0 -github.com/klauspost/compress/snappy github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd/internal/xxhash # github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 => github.com/matttproud/golang_protobuf_extensions v1.0.1 @@ -1001,7 +1003,6 @@ gotest.tools/v3/skip # github.com/hashicorp/go-multierror => github.com/hashicorp/go-multierror v1.0.0 # github.com/hashicorp/serf => github.com/hashicorp/serf v0.7.1-0.20160317193612-598c54895cc5 # github.com/hpcloud/tail => github.com/hpcloud/tail v1.0.0 -# github.com/klauspost/compress => github.com/klauspost/compress v1.11.13 # github.com/matttproud/golang_protobuf_extensions => github.com/matttproud/golang_protobuf_extensions v1.0.1 # github.com/onsi/ginkgo => github.com/onsi/ginkgo v1.8.0 # github.com/onsi/gomega => github.com/onsi/gomega v1.5.0