mirror of
https://github.com/mjl-/mox.git
synced 2025-04-21 21:40:01 +03:00
implement IMAP extension COMPRESS=DEFLATE, rfc 4978
to compress the entire IMAP connection. tested with thunderbird, meli, k9, ios mail. the initial implementation had interoperability issues with some of these clients: if they write the deflate stream and flush in "partial mode", the go stdlib flate reader does not return any data (until there is an explicit zero-length "sync flush" block, or until the history/sliding window is full), blocking progress, resulting in clients closing the seemingly stuck connection after considering the connection timed out. this includes a coy of the flate package with a new reader that returns partially flushed blocks earlier. this also adds imap trace logging to imapclient.Conn, which was useful for debugging.
This commit is contained in:
parent
3f6c45a41f
commit
f40f94670e
25 changed files with 3623 additions and 69 deletions
|
@ -522,7 +522,7 @@ func TestCtl(t *testing.T) {
|
|||
testctl(func(ctl *ctl) {
|
||||
a, b := net.Pipe()
|
||||
go func() {
|
||||
client, err := imapclient.New(a, true)
|
||||
client, err := imapclient.New(mox.Cid(), a, true)
|
||||
tcheck(t, err, "new imapclient")
|
||||
client.Select("inbox")
|
||||
client.Logout()
|
||||
|
|
3
go.mod
3
go.mod
|
@ -6,6 +6,7 @@ require (
|
|||
github.com/mjl-/adns v0.0.0-20240509092456-2dc8715bf4af
|
||||
github.com/mjl-/autocert v0.0.0-20231214125928-31b7400acb05
|
||||
github.com/mjl-/bstore v0.0.6
|
||||
github.com/mjl-/flate v0.0.0-20250221133712-6372d09eb978
|
||||
github.com/mjl-/sconf v0.0.7
|
||||
github.com/mjl-/sherpa v0.6.7
|
||||
github.com/mjl-/sherpadoc v0.0.16
|
||||
|
@ -17,6 +18,7 @@ require (
|
|||
golang.org/x/crypto v0.32.0
|
||||
golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f
|
||||
golang.org/x/net v0.34.0
|
||||
golang.org/x/sys v0.29.0
|
||||
golang.org/x/text v0.21.0
|
||||
rsc.io/qr v0.2.0
|
||||
)
|
||||
|
@ -31,7 +33,6 @@ require (
|
|||
github.com/prometheus/procfs v0.12.0 // indirect
|
||||
golang.org/x/mod v0.22.0 // indirect
|
||||
golang.org/x/sync v0.10.0 // indirect
|
||||
golang.org/x/sys v0.29.0 // indirect
|
||||
golang.org/x/tools v0.29.0 // indirect
|
||||
google.golang.org/protobuf v1.31.0 // indirect
|
||||
)
|
||||
|
|
2
go.sum
2
go.sum
|
@ -30,6 +30,8 @@ github.com/mjl-/autocert v0.0.0-20231214125928-31b7400acb05 h1:s6ay4bh4tmpPLdxjy
|
|||
github.com/mjl-/autocert v0.0.0-20231214125928-31b7400acb05/go.mod h1:taMFU86abMxKLPV4Bynhv8enbYmS67b8LG80qZv2Qus=
|
||||
github.com/mjl-/bstore v0.0.6 h1:ntlu9MkfCkpm2XfBY4+Ws4KK9YzXzewr3+lCueFB+9c=
|
||||
github.com/mjl-/bstore v0.0.6/go.mod h1:/cD25FNBaDfvL/plFRxI3Ba3E+wcB0XVOS8nJDqndg0=
|
||||
github.com/mjl-/flate v0.0.0-20250221133712-6372d09eb978 h1:Eg5DfI3/00URzGErujKus6a3O0kyXzF8vjoDZzH/gig=
|
||||
github.com/mjl-/flate v0.0.0-20250221133712-6372d09eb978/go.mod h1:QBkFtjai3AiQQuUu7pVh6PA06Vd3oa68E+vddf/UBOs=
|
||||
github.com/mjl-/sconf v0.0.7 h1:bdBcSFZCDFMm/UdBsgNCsjkYmKrSgYwp7rAOoufwHe4=
|
||||
github.com/mjl-/sconf v0.0.7/go.mod h1:uF8OdWtLT8La3i4ln176i1pB0ps9pXGCaABEU55ZkE0=
|
||||
github.com/mjl-/sherpa v0.6.7 h1:C5F8XQdV5nCuS4fvB+ye/ziUQrajEhOoj/t2w5T14BY=
|
||||
|
|
|
@ -21,12 +21,26 @@ import (
|
|||
"net"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/mjl-/flate"
|
||||
|
||||
"github.com/mjl-/mox/mlog"
|
||||
"github.com/mjl-/mox/moxio"
|
||||
)
|
||||
|
||||
// Conn is an IMAP connection to a server.
|
||||
type Conn struct {
|
||||
conn net.Conn
|
||||
r *bufio.Reader
|
||||
// Connection, may be original TCP or TLS connection. Reads go through c.br, and
|
||||
// writes through c.bw. It wraps a tracing reading/writer and may wrap flate
|
||||
// compression.
|
||||
conn net.Conn
|
||||
br *bufio.Reader
|
||||
bw *bufio.Writer
|
||||
compress bool // If compression is enabled, we must flush flateWriter and its target original bufio writer.
|
||||
flateWriter *flate.Writer
|
||||
flateBW *bufio.Writer
|
||||
|
||||
log mlog.Log
|
||||
panic bool
|
||||
tagGen int
|
||||
record bool // If true, bytes read are added to recordBuf. recorded() resets.
|
||||
|
@ -57,14 +71,18 @@ func (e Error) Unwrap() error {
|
|||
// The initial untagged greeting response is read and must be "OK" or
|
||||
// "PREAUTH". If preauth, the connection is already in authenticated state,
|
||||
// typically through TLS client certificate. This is indicated in Conn.Preauth.
|
||||
func New(conn net.Conn, xpanic bool) (client *Conn, rerr error) {
|
||||
func New(cid int64, conn net.Conn, xpanic bool) (client *Conn, rerr error) {
|
||||
log := mlog.New("imapclient", nil).WithCid(cid)
|
||||
c := Conn{
|
||||
conn: conn,
|
||||
r: bufio.NewReader(conn),
|
||||
br: bufio.NewReader(moxio.NewTraceReader(log, "CR: ", conn)),
|
||||
log: log,
|
||||
panic: xpanic,
|
||||
CapAvailable: map[Capability]struct{}{},
|
||||
CapEnabled: map[Capability]struct{}{},
|
||||
}
|
||||
// Writes are buffered and write to Conn, which may panic.
|
||||
c.bw = bufio.NewWriter(moxio.NewTraceWriter(log, "CW: ", &c))
|
||||
|
||||
defer c.recover(&rerr)
|
||||
tag := c.xnonspace()
|
||||
|
@ -122,6 +140,53 @@ func (c *Conn) xcheck(err error) {
|
|||
}
|
||||
}
|
||||
|
||||
// Write writes directly to the connection. Write errors do take the connection's
|
||||
// panic mode into account, i.e. Write can panic.
|
||||
func (c *Conn) Write(buf []byte) (n int, rerr error) {
|
||||
defer c.recover(&rerr)
|
||||
|
||||
n, rerr = c.conn.Write(buf)
|
||||
c.xcheckf(rerr, "write")
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (c *Conn) xflush() {
|
||||
err := c.bw.Flush()
|
||||
c.xcheckf(err, "flush")
|
||||
|
||||
// If compression is active, we need to flush the deflate stream.
|
||||
if c.compress {
|
||||
err := c.flateWriter.Flush()
|
||||
c.xcheckf(err, "flush deflate")
|
||||
err = c.flateBW.Flush()
|
||||
c.xcheckf(err, "flush deflate buffer")
|
||||
}
|
||||
}
|
||||
|
||||
// Close closes the connection, flushing and closing any compression and TLS layer.
|
||||
//
|
||||
// You may want to call Logout first. Closing a connection with a mailbox with
|
||||
// deleted messages not yet expunged will not expunge those messages.
|
||||
func (c *Conn) Close() (rerr error) {
|
||||
defer c.recover(&rerr)
|
||||
|
||||
if c.conn == nil {
|
||||
return nil
|
||||
}
|
||||
if c.flateWriter != nil {
|
||||
err := c.flateWriter.Close()
|
||||
c.xcheckf(err, "close deflate writer")
|
||||
err = c.flateBW.Flush()
|
||||
c.xcheckf(err, "flush deflate buffer")
|
||||
c.flateWriter = nil
|
||||
c.flateBW = nil
|
||||
}
|
||||
err := c.conn.Close()
|
||||
c.xcheckf(err, "close connection")
|
||||
c.conn = nil
|
||||
return
|
||||
}
|
||||
|
||||
// TLSConnectionState returns the TLS connection state if the connection uses TLS.
|
||||
func (c *Conn) TLSConnectionState() *tls.ConnectionState {
|
||||
if conn, ok := c.conn.(*tls.Conn); ok {
|
||||
|
@ -141,8 +206,9 @@ func (c *Conn) Commandf(tag string, format string, args ...any) (rerr error) {
|
|||
}
|
||||
c.LastTag = tag
|
||||
|
||||
_, err := fmt.Fprintf(c.conn, "%s %s\r\n", tag, fmt.Sprintf(format, args...))
|
||||
_, err := fmt.Fprintf(c.bw, "%s %s\r\n", tag, fmt.Sprintf(format, args...))
|
||||
c.xcheckf(err, "write command")
|
||||
c.xflush()
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -196,7 +262,7 @@ func (c *Conn) ReadUntagged() (untagged Untagged, rerr error) {
|
|||
func (c *Conn) Readline() (line string, rerr error) {
|
||||
defer c.recover(&rerr)
|
||||
|
||||
line, err := c.r.ReadString('\n')
|
||||
line, err := c.br.ReadString('\n')
|
||||
c.xcheckf(err, "read line")
|
||||
return line, nil
|
||||
}
|
||||
|
@ -225,37 +291,30 @@ func (c *Conn) Writelinef(format string, args ...any) (rerr error) {
|
|||
defer c.recover(&rerr)
|
||||
|
||||
s := fmt.Sprintf(format, args...)
|
||||
_, err := fmt.Fprintf(c.conn, "%s\r\n", s)
|
||||
_, err := fmt.Fprintf(c.bw, "%s\r\n", s)
|
||||
c.xcheckf(err, "writeline")
|
||||
c.xflush()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write writes directly to the connection. Write errors do take the connections
|
||||
// panic mode into account, i.e. Write can panic.
|
||||
func (c *Conn) Write(buf []byte) (n int, rerr error) {
|
||||
defer c.recover(&rerr)
|
||||
|
||||
n, rerr = c.conn.Write(buf)
|
||||
c.xcheckf(rerr, "write")
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// WriteSyncLiteral first writes the synchronous literal size, then read the
|
||||
// WriteSyncLiteral first writes the synchronous literal size, then reads the
|
||||
// continuation "+" and finally writes the data.
|
||||
func (c *Conn) WriteSyncLiteral(s string) (untagged []Untagged, rerr error) {
|
||||
defer c.recover(&rerr)
|
||||
|
||||
_, err := fmt.Fprintf(c.conn, "{%d}\r\n", len(s))
|
||||
_, err := fmt.Fprintf(c.bw, "{%d}\r\n", len(s))
|
||||
c.xcheckf(err, "write sync literal size")
|
||||
c.xflush()
|
||||
|
||||
plus, err := c.r.Peek(1)
|
||||
plus, err := c.br.Peek(1)
|
||||
c.xcheckf(err, "read continuation")
|
||||
if plus[0] == '+' {
|
||||
_, err = c.Readline()
|
||||
c.xcheckf(err, "read continuation line")
|
||||
|
||||
_, err = c.conn.Write([]byte(s))
|
||||
_, err = c.bw.Write([]byte(s))
|
||||
c.xcheckf(err, "write literal data")
|
||||
c.xflush()
|
||||
return nil, nil
|
||||
}
|
||||
untagged, result, err := c.Response()
|
||||
|
@ -301,15 +360,3 @@ func (c *Conn) xgetUntagged(l []Untagged, dst any) {
|
|||
}
|
||||
dstv.Elem().Set(gotv)
|
||||
}
|
||||
|
||||
// Close closes the connection without writing anything to the server.
|
||||
// You may want to call Logout. Closing a connection with a mailbox with deleted
|
||||
// message not yet expunged will not expunge those messages.
|
||||
func (c *Conn) Close() error {
|
||||
var err error
|
||||
if c.conn != nil {
|
||||
err = c.conn.Close()
|
||||
c.conn = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -9,6 +9,10 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mjl-/flate"
|
||||
|
||||
"github.com/mjl-/mox/mlog"
|
||||
"github.com/mjl-/mox/moxio"
|
||||
"github.com/mjl-/mox/scram"
|
||||
)
|
||||
|
||||
|
@ -39,11 +43,14 @@ func (c *Conn) Starttls(config *tls.Config) (untagged []Untagged, result Result,
|
|||
defer c.recover(&rerr)
|
||||
untagged, result, rerr = c.Transactf("starttls")
|
||||
c.xcheckf(rerr, "starttls command")
|
||||
conn := tls.Client(c.conn, config)
|
||||
err := conn.Handshake()
|
||||
|
||||
conn := c.xprefixConn()
|
||||
tlsConn := tls.Client(conn, config)
|
||||
err := tlsConn.Handshake()
|
||||
c.xcheckf(err, "tls handshake")
|
||||
c.conn = conn
|
||||
c.r = bufio.NewReader(conn)
|
||||
c.conn = tlsConn
|
||||
c.br = bufio.NewReader(moxio.NewTraceReader(c.log, "CR: ", tlsConn))
|
||||
c.bw = bufio.NewWriter(moxio.NewTraceWriter(c.log, "CW: ", c))
|
||||
return untagged, result, nil
|
||||
}
|
||||
|
||||
|
@ -116,6 +123,38 @@ func (c *Conn) AuthenticateSCRAM(method string, h func() hash.Hash, username, pa
|
|||
return c.ResponseOK()
|
||||
}
|
||||
|
||||
// CompressDeflate enables compression with deflate on the connection.
|
||||
//
|
||||
// Only possible when server has announced the COMPRESS=DEFLATE capability.
|
||||
//
|
||||
// State: Authenticated or selected.
|
||||
func (c *Conn) CompressDeflate() (untagged []Untagged, result Result, rerr error) {
|
||||
defer c.recover(&rerr)
|
||||
|
||||
if _, ok := c.CapAvailable[CapCompressDeflate]; !ok {
|
||||
c.xerrorf("server does not implement capability %s", CapCompressDeflate)
|
||||
}
|
||||
|
||||
untagged, result, rerr = c.Transactf("compress deflate")
|
||||
c.xcheck(rerr)
|
||||
|
||||
c.flateBW = bufio.NewWriter(c)
|
||||
fw, err := flate.NewWriter(c.flateBW, flate.DefaultCompression)
|
||||
c.xcheckf(err, "deflate") // Cannot happen.
|
||||
|
||||
c.compress = true
|
||||
c.flateWriter = fw
|
||||
tw := moxio.NewTraceWriter(mlog.New("imapclient", nil), "CW: ", fw)
|
||||
c.bw = bufio.NewWriter(tw)
|
||||
|
||||
rc := c.xprefixConn()
|
||||
fr := flate.NewReaderPartial(rc)
|
||||
tr := moxio.NewTraceReader(mlog.New("imapclient", nil), "CR: ", fr)
|
||||
c.br = bufio.NewReader(tr)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Enable enables capabilities for use with the connection, verifying the server has indeed enabled them.
|
||||
func (c *Conn) Enable(capabilities ...string) (untagged []Untagged, result Result, rerr error) {
|
||||
defer c.recover(&rerr)
|
||||
|
|
|
@ -23,7 +23,7 @@ func (c *Conn) recordAdd(buf []byte) {
|
|||
|
||||
func (c *Conn) xtake(s string) {
|
||||
buf := make([]byte, len(s))
|
||||
_, err := io.ReadFull(c.r, buf)
|
||||
_, err := io.ReadFull(c.br, buf)
|
||||
c.xcheckf(err, "taking %q", s)
|
||||
if !strings.EqualFold(string(buf), s) {
|
||||
c.xerrorf("got %q, expected %q", buf, s)
|
||||
|
@ -32,7 +32,7 @@ func (c *Conn) xtake(s string) {
|
|||
}
|
||||
|
||||
func (c *Conn) readbyte() (byte, error) {
|
||||
b, err := c.r.ReadByte()
|
||||
b, err := c.br.ReadByte()
|
||||
if err == nil {
|
||||
c.recordAdd([]byte{b})
|
||||
}
|
||||
|
@ -43,12 +43,12 @@ func (c *Conn) unreadbyte() {
|
|||
if c.record {
|
||||
c.recordBuf = c.recordBuf[:len(c.recordBuf)-1]
|
||||
}
|
||||
err := c.r.UnreadByte()
|
||||
err := c.br.UnreadByte()
|
||||
c.xcheckf(err, "unread byte")
|
||||
}
|
||||
|
||||
func (c *Conn) readrune() (rune, error) {
|
||||
x, _, err := c.r.ReadRune()
|
||||
x, _, err := c.br.ReadRune()
|
||||
if err == nil {
|
||||
c.recordAdd([]byte(string(x)))
|
||||
}
|
||||
|
@ -126,7 +126,8 @@ func (c *Conn) xrespText() RespText {
|
|||
var knownCodes = stringMap(
|
||||
// Without parameters.
|
||||
"ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE", "UIDNOTSTICKY", "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED", "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE", "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT", "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "NOTSAVED", "HASCHILDREN", "CLOSED", "UNKNOWN-CTE",
|
||||
"OVERQUOTA", // ../rfc/9208:472
|
||||
"OVERQUOTA", // ../rfc/9208:472
|
||||
"COMPRESSIONACTIVE", // ../rfc/4978:143
|
||||
// With parameters.
|
||||
"BADCHARSET", "CAPABILITY", "PERMANENTFLAGS", "UIDNEXT", "UIDVALIDITY", "UNSEEN", "APPENDUID", "COPYUID",
|
||||
"HIGHESTMODSEQ", "MODIFIED",
|
||||
|
@ -810,7 +811,7 @@ func (c *Conn) xatom() string {
|
|||
b, err := c.readbyte()
|
||||
c.xcheckf(err, "read byte for atom")
|
||||
if b <= ' ' || strings.IndexByte("(){%*\"\\]", b) >= 0 {
|
||||
c.r.UnreadByte()
|
||||
c.br.UnreadByte()
|
||||
if s == "" {
|
||||
c.xerrorf("expected atom")
|
||||
}
|
||||
|
@ -850,11 +851,12 @@ func (c *Conn) xliteral() []byte {
|
|||
c.xerrorf("refusing to read more than 1MB: %d", size)
|
||||
}
|
||||
if sync {
|
||||
_, err := fmt.Fprintf(c.conn, "+ ok\r\n")
|
||||
_, err := fmt.Fprintf(c.bw, "+ ok\r\n")
|
||||
c.xcheckf(err, "write continuation")
|
||||
c.xflush()
|
||||
}
|
||||
buf := make([]byte, int(size))
|
||||
_, err := io.ReadFull(c.r, buf)
|
||||
_, err := io.ReadFull(c.br, buf)
|
||||
c.xcheckf(err, "reading data for literal")
|
||||
return buf
|
||||
}
|
||||
|
|
44
imapclient/prefixconn.go
Normal file
44
imapclient/prefixconn.go
Normal file
|
@ -0,0 +1,44 @@
|
|||
package imapclient
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net"
|
||||
)
|
||||
|
||||
// prefixConn is a net.Conn with a buffer from which the first reads are satisfied.
|
||||
// used for STARTTLS where already did a buffered read of initial TLS data.
|
||||
type prefixConn struct {
|
||||
prefix []byte
|
||||
net.Conn
|
||||
}
|
||||
|
||||
func (c *prefixConn) Read(buf []byte) (int, error) {
|
||||
if len(c.prefix) > 0 {
|
||||
n := len(buf)
|
||||
if n > len(c.prefix) {
|
||||
n = len(c.prefix)
|
||||
}
|
||||
copy(buf[:n], c.prefix[:n])
|
||||
c.prefix = c.prefix[n:]
|
||||
if len(c.prefix) == 0 {
|
||||
c.prefix = nil
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
return c.Conn.Read(buf)
|
||||
}
|
||||
|
||||
// xprefixConn checks if there are any buffered unconsumed reads. If not, it
|
||||
// returns c.conn. Otherwise, it returns a *prefixConn from which the buffered data
|
||||
// can be read followed by data from c.conn.
|
||||
func (c *Conn) xprefixConn() net.Conn {
|
||||
n := c.br.Buffered()
|
||||
if n == 0 {
|
||||
return c.conn
|
||||
}
|
||||
|
||||
buf := make([]byte, n)
|
||||
_, err := io.ReadFull(c.br, buf)
|
||||
c.xcheckf(err, "get buffered data")
|
||||
return &prefixConn{buf, c.conn}
|
||||
}
|
|
@ -36,6 +36,7 @@ const (
|
|||
CapMetadataServer Capability = "METADATA-SERVER" // ../rfc/5464:124
|
||||
CapSaveDate Capability = "SAVEDATE" // ../rfc/8514
|
||||
CapCreateSpecialUse Capability = "CREATE-SPECIAL-USE" // ../rfc/6154:296
|
||||
CapCompressDeflate Capability = "COMPRESS=DEFLATE" // ../rfc/4978:65
|
||||
)
|
||||
|
||||
// Status is the tagged final result of a command.
|
||||
|
@ -381,7 +382,7 @@ func (ns NumSet) String() string {
|
|||
}
|
||||
|
||||
func ParseNumSet(s string) (ns NumSet, rerr error) {
|
||||
c := Conn{r: bufio.NewReader(strings.NewReader(s))}
|
||||
c := Conn{br: bufio.NewReader(strings.NewReader(s))}
|
||||
defer c.recover(&rerr)
|
||||
ns = c.xsequenceSet()
|
||||
return
|
||||
|
|
39
imapserver/compress_test.go
Normal file
39
imapserver/compress_test.go
Normal file
|
@ -0,0 +1,39 @@
|
|||
package imapserver
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCompress(t *testing.T) {
|
||||
tc := start(t)
|
||||
defer tc.close()
|
||||
|
||||
tc.client.Login("mjl@mox.example", password0)
|
||||
|
||||
tc.transactf("bad", "compress")
|
||||
tc.transactf("bad", "compress bogus ")
|
||||
tc.transactf("no", "compress bogus")
|
||||
|
||||
tc.client.CompressDeflate()
|
||||
tc.transactf("no", "compress deflate") // Cannot have multiple.
|
||||
tc.xcode("COMPRESSIONACTIVE")
|
||||
|
||||
tc.client.Select("inbox")
|
||||
tc.transactf("ok", "append inbox (\\seen) {%d+}\r\n%s", len(exampleMsg), exampleMsg)
|
||||
tc.transactf("ok", "noop")
|
||||
tc.transactf("ok", "fetch 1 body.peek[1]")
|
||||
}
|
||||
|
||||
func TestCompressStartTLS(t *testing.T) {
|
||||
tc := start(t)
|
||||
defer tc.close()
|
||||
|
||||
tc.client.Starttls(&tls.Config{InsecureSkipVerify: true})
|
||||
tc.client.Login("mjl@mox.example", password0)
|
||||
tc.client.CompressDeflate()
|
||||
tc.client.Select("inbox")
|
||||
tc.transactf("ok", "append inbox (\\seen) {%d+}\r\n%s", len(exampleMsg), exampleMsg)
|
||||
tc.transactf("ok", "noop")
|
||||
tc.transactf("ok", "fetch 1 body.peek[1]")
|
||||
}
|
|
@ -120,7 +120,7 @@ func FuzzServer(f *testing.F) {
|
|||
|
||||
err := clientConn.SetDeadline(time.Now().Add(time.Second))
|
||||
flog(err, "set client deadline")
|
||||
client, _ := imapclient.New(clientConn, true)
|
||||
client, _ := imapclient.New(mox.Cid(), clientConn, true)
|
||||
|
||||
for _, cmd := range cmds {
|
||||
client.Commandf("", "%s", cmd)
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package imapserver
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"net"
|
||||
)
|
||||
|
||||
|
@ -26,3 +28,18 @@ func (c *prefixConn) Read(buf []byte) (int, error) {
|
|||
}
|
||||
return c.Conn.Read(buf)
|
||||
}
|
||||
|
||||
// xprefixConn returns either the original net.Conn passed as parameter, or returns
|
||||
// a *prefixConn returning the buffered data available in br followed data from the
|
||||
// net.Conn passed in.
|
||||
func xprefixConn(c net.Conn, br *bufio.Reader) net.Conn {
|
||||
n := br.Buffered()
|
||||
if n == 0 {
|
||||
return c
|
||||
}
|
||||
|
||||
buf := make([]byte, n)
|
||||
_, err := io.ReadFull(c, buf)
|
||||
xcheckf(err, "get buffered data")
|
||||
return &prefixConn{buf, c}
|
||||
}
|
||||
|
|
|
@ -66,6 +66,7 @@ import (
|
|||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
|
||||
"github.com/mjl-/bstore"
|
||||
"github.com/mjl-/flate"
|
||||
|
||||
"github.com/mjl-/mox/config"
|
||||
"github.com/mjl-/mox/message"
|
||||
|
@ -162,12 +163,13 @@ var authFailDelay = time.Second // After authentication failure.
|
|||
// SAVEDATE: ../rfc/8514
|
||||
// WITHIN: ../rfc/5032
|
||||
// NAMESPACE: ../rfc/2342
|
||||
// COMPRESS=DEFLATE: ../rfc/4978
|
||||
//
|
||||
// We always announce support for SCRAM PLUS-variants, also on connections without
|
||||
// TLS. The client should not be selecting PLUS variants on non-TLS connections,
|
||||
// instead opting to do the bare SCRAM variant without indicating the server claims
|
||||
// to support the PLUS variant (skipping the server downgrade detection check).
|
||||
const serverCapabilities = "IMAP4rev2 IMAP4rev1 ENABLE LITERAL+ IDLE SASL-IR BINARY UNSELECT UIDPLUS ESEARCH SEARCHRES MOVE UTF8=ACCEPT LIST-EXTENDED SPECIAL-USE CREATE-SPECIAL-USE LIST-STATUS AUTH=SCRAM-SHA-256-PLUS AUTH=SCRAM-SHA-256 AUTH=SCRAM-SHA-1-PLUS AUTH=SCRAM-SHA-1 AUTH=CRAM-MD5 ID APPENDLIMIT=9223372036854775807 CONDSTORE QRESYNC STATUS=SIZE QUOTA QUOTA=RES-STORAGE METADATA SAVEDATE WITHIN NAMESPACE"
|
||||
const serverCapabilities = "IMAP4rev2 IMAP4rev1 ENABLE LITERAL+ IDLE SASL-IR BINARY UNSELECT UIDPLUS ESEARCH SEARCHRES MOVE UTF8=ACCEPT LIST-EXTENDED SPECIAL-USE CREATE-SPECIAL-USE LIST-STATUS AUTH=SCRAM-SHA-256-PLUS AUTH=SCRAM-SHA-256 AUTH=SCRAM-SHA-1-PLUS AUTH=SCRAM-SHA-1 AUTH=CRAM-MD5 ID APPENDLIMIT=9223372036854775807 CONDSTORE QRESYNC STATUS=SIZE QUOTA QUOTA=RES-STORAGE METADATA SAVEDATE WITHIN NAMESPACE COMPRESS=DEFLATE"
|
||||
|
||||
type conn struct {
|
||||
cid int64
|
||||
|
@ -175,10 +177,10 @@ type conn struct {
|
|||
conn net.Conn
|
||||
tls bool // Whether TLS has been initialized.
|
||||
viaHTTPS bool // Whether this connection came in via HTTPS (using TLS ALPN).
|
||||
br *bufio.Reader // From remote, with TLS unwrapped in case of TLS.
|
||||
br *bufio.Reader // From remote, with TLS unwrapped in case of TLS, and possibly wrapping inflate.
|
||||
line chan lineErr // If set, instead of reading from br, a line is read from this channel. For reading a line in IDLE while also waiting for mailbox/account updates.
|
||||
lastLine string // For detecting if syntax error is fatal, i.e. if this ends with a literal. Without crlf.
|
||||
bw *bufio.Writer // To remote, with TLS added in case of TLS.
|
||||
bw *bufio.Writer // To remote, with TLS added in case of TLS, and possibly wrapping deflate, see conn.flateWriter.
|
||||
tr *moxio.TraceReader // Kept to change trace level when reading/writing cmd/auth/data.
|
||||
tw *moxio.TraceWriter
|
||||
slow bool // If set, reads are done with a 1 second sleep, and writes are done 1 byte at a time, to keep spammers busy.
|
||||
|
@ -192,6 +194,9 @@ type conn struct {
|
|||
ncmds int // Number of commands processed. Used to abort connection when first incoming command is unknown/invalid.
|
||||
log mlog.Log // Used for all synchronous logging on this connection, see logbg for logging in a separate goroutine.
|
||||
enabled map[capability]bool // All upper-case.
|
||||
compress bool // Whether compression is enabled, via compress command.
|
||||
flateWriter *flate.Writer // For flushing output after flushing conn.bw, and for closing.
|
||||
flateBW *bufio.Writer // Wraps raw connection writes, flateWriter writes here, also needs flushing.
|
||||
|
||||
// Set by SEARCH with SAVE. Can be used by commands accepting a sequence-set with
|
||||
// value "$". When used, UIDs must be verified to still exist, because they may
|
||||
|
@ -258,7 +263,7 @@ func stateCommands(cmds ...string) map[string]struct{} {
|
|||
var (
|
||||
commandsStateAny = stateCommands("capability", "noop", "logout", "id")
|
||||
commandsStateNotAuthenticated = stateCommands("starttls", "authenticate", "login")
|
||||
commandsStateAuthenticated = stateCommands("enable", "select", "examine", "create", "delete", "rename", "subscribe", "unsubscribe", "list", "namespace", "status", "append", "idle", "lsub", "getquotaroot", "getquota", "getmetadata", "setmetadata")
|
||||
commandsStateAuthenticated = stateCommands("enable", "select", "examine", "create", "delete", "rename", "subscribe", "unsubscribe", "list", "namespace", "status", "append", "idle", "lsub", "getquotaroot", "getquota", "getmetadata", "setmetadata", "compress")
|
||||
commandsStateSelected = stateCommands("close", "unselect", "expunge", "search", "fetch", "store", "copy", "move", "uid expunge", "uid search", "uid fetch", "uid store", "uid copy", "uid move")
|
||||
)
|
||||
|
||||
|
@ -293,6 +298,7 @@ var commands = map[string]func(c *conn, tag, cmd string, p *parser){
|
|||
"getquota": (*conn).cmdGetquota,
|
||||
"getmetadata": (*conn).cmdGetmetadata,
|
||||
"setmetadata": (*conn).cmdSetmetadata,
|
||||
"compress": (*conn).cmdCompress,
|
||||
|
||||
// Selected.
|
||||
"check": (*conn).cmdCheck,
|
||||
|
@ -623,6 +629,19 @@ func (c *conn) bwritelinef(format string, args ...any) {
|
|||
func (c *conn) xflush() {
|
||||
err := c.bw.Flush()
|
||||
xcheckf(err, "flush") // Should never happen, the Write caused by the Flush should panic on i/o error.
|
||||
|
||||
// If compression is enabled, we need to flush its stream.
|
||||
if c.compress {
|
||||
// Note: Flush writes a sync message if there is nothing to flush. Ideally we
|
||||
// wouldn't send that, but we would have to keep track of whether data needs to be
|
||||
// flushed.
|
||||
err := c.flateWriter.Flush()
|
||||
xcheckf(err, "flush deflate")
|
||||
|
||||
// The flate writer writes to a bufio.Writer, we must also flush that.
|
||||
err = c.flateBW.Flush()
|
||||
xcheckf(err, "flush deflate writer")
|
||||
}
|
||||
}
|
||||
|
||||
func (c *conn) readCommand(tag *string) (cmd string, p *parser) {
|
||||
|
@ -689,7 +708,7 @@ func serve(listenerName string, cid int64, tlsConfig *tls.Config, nc net.Conn, x
|
|||
if a, ok := nc.RemoteAddr().(*net.TCPAddr); ok {
|
||||
remoteIP = a.IP
|
||||
} else {
|
||||
// For net.Pipe, during tests and for imapserve.
|
||||
// For tests and for imapserve.
|
||||
remoteIP = net.ParseIP("127.0.0.10")
|
||||
}
|
||||
|
||||
|
@ -751,7 +770,10 @@ func serve(listenerName string, cid int64, tlsConfig *tls.Config, nc net.Conn, x
|
|||
slog.String("listener", listenerName))
|
||||
|
||||
defer func() {
|
||||
c.conn.Close()
|
||||
err := c.conn.Close()
|
||||
if err != nil {
|
||||
c.log.Debugx("closing connection", err)
|
||||
}
|
||||
|
||||
if c.account != nil {
|
||||
c.comm.Unregister()
|
||||
|
@ -842,7 +864,7 @@ func serve(listenerName string, cid int64, tlsConfig *tls.Config, nc net.Conn, x
|
|||
var storeLoginAttempt bool
|
||||
for {
|
||||
c.command()
|
||||
c.xflush() // For flushing errors, or possibly commands that did not flush explicitly.
|
||||
c.xflush() // For flushing errors, or commands that did not flush explicitly.
|
||||
|
||||
// After an authentication command, we will have a c.loginAttempt. We typically get
|
||||
// an "ID" command with the user-agent immediately after. So we wait for one more
|
||||
|
@ -1117,6 +1139,16 @@ func (c *conn) command() {
|
|||
c.log.Debug("imap command done", logFields...)
|
||||
result = "ok"
|
||||
if x == cleanClose {
|
||||
// If compression was enabled, we flush & close the deflate stream.
|
||||
if c.compress {
|
||||
// Note: Close and flush can Write and may panic with an i/o error.
|
||||
if err := c.flateWriter.Close(); err != nil {
|
||||
c.log.Debugx("close deflate writer", err)
|
||||
} else if err := c.flateBW.Flush(); err != nil {
|
||||
c.log.Debugx("flush deflate buffer", err)
|
||||
}
|
||||
}
|
||||
|
||||
panic(x)
|
||||
}
|
||||
return
|
||||
|
@ -1803,6 +1835,56 @@ func (c *conn) cmdID(tag, cmd string, p *parser) {
|
|||
c.ok(tag, cmd)
|
||||
}
|
||||
|
||||
// Compress enables compression on the connection. Deflate is the only algorithm
|
||||
// specified. TLS doesn't do compression nowadays, so we don't have to check for that.
|
||||
//
|
||||
// Status: Authenticated. The RFC doesn't mention this in prose, but the command is
|
||||
// added to ABNF production rule "command-auth".
|
||||
func (c *conn) cmdCompress(tag, cmd string, p *parser) {
|
||||
// Command: ../rfc/4978:122
|
||||
|
||||
// Request syntax: ../rfc/4978:310
|
||||
p.xspace()
|
||||
alg := p.xatom()
|
||||
p.xempty()
|
||||
|
||||
// Will do compression only once.
|
||||
if c.compress {
|
||||
// ../rfc/4978:143
|
||||
xusercodeErrorf("COMPRESSIONACTIVE", "compression already active with previous compress command")
|
||||
}
|
||||
// ../rfc/4978:134
|
||||
if !strings.EqualFold(alg, "deflate") {
|
||||
xuserErrorf("compression algorithm not supported")
|
||||
}
|
||||
|
||||
// We must flush now, before we initialize flate.
|
||||
c.log.Debug("compression enabled")
|
||||
c.ok(tag, cmd)
|
||||
|
||||
c.flateBW = bufio.NewWriter(c)
|
||||
fw, err := flate.NewWriter(c.flateBW, flate.DefaultCompression)
|
||||
xcheckf(err, "deflate") // Cannot happen.
|
||||
|
||||
c.compress = true
|
||||
c.flateWriter = fw
|
||||
c.tw = moxio.NewTraceWriter(c.log, "S: ", c.flateWriter)
|
||||
c.bw = bufio.NewWriter(c.tw) // The previous c.bw will not have buffered data.
|
||||
|
||||
rc := xprefixConn(c.conn, c.br) // c.br may contain buffered data.
|
||||
// We use the special partial reader. Some clients write commands and flush the
|
||||
// buffer in "partial flush" mode instead of "sync flush" mode. The "sync flush"
|
||||
// mode emits an explicit zero-length data block that triggers the Go stdlib flate
|
||||
// reader to return data to us. It wouldn't for blocks written in "partial flush"
|
||||
// mode, and it would block us indefinitely while trying to read another flate
|
||||
// block. The partial reader returns data earlier, but still eagerly consumes all
|
||||
// blocks in its buffer.
|
||||
// todo: also _write_ in partial mode since it uses fewer bytes than a sync flush (which needs an additional 4 bytes for the zero-length data block). we need a writer that can flush in partial mode first. writing with sync flush will work with clients that themselves write with partial flush.
|
||||
fr := flate.NewReaderPartial(rc)
|
||||
c.tr = moxio.NewTraceReader(c.log, "C: ", fr)
|
||||
c.br = bufio.NewReader(c.tr)
|
||||
}
|
||||
|
||||
// STARTTLS enables TLS on the connection, after a plain text start.
|
||||
// Only allowed if TLS isn't already enabled, either through connecting to a
|
||||
// TLS-enabled TCP port, or a previous STARTTLS command.
|
||||
|
@ -1822,13 +1904,7 @@ func (c *conn) cmdStarttls(tag, cmd string, p *parser) {
|
|||
xsyntaxErrorf("starttls not announced")
|
||||
}
|
||||
|
||||
conn := c.conn
|
||||
if n := c.br.Buffered(); n > 0 {
|
||||
buf := make([]byte, n)
|
||||
_, err := io.ReadFull(c.br, buf)
|
||||
xcheckf(err, "reading buffered data for tls handshake")
|
||||
conn = &prefixConn{buf, conn}
|
||||
}
|
||||
conn := xprefixConn(c.conn, c.br)
|
||||
// We add the cid to facilitate debugging in case of TLS connection failure.
|
||||
c.ok(tag, cmd+" ("+mox.ReceivedID(c.cid)+")")
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/mjl-/mox/imapclient"
|
||||
"github.com/mjl-/mox/mlog"
|
||||
"github.com/mjl-/mox/mox-"
|
||||
|
@ -376,7 +378,23 @@ func startArgsMore(t *testing.T, first, immediateTLS bool, serverConfig, clientC
|
|||
tcheck(t, err, "after init")
|
||||
}
|
||||
|
||||
serverConn, clientConn := net.Pipe()
|
||||
// We get actual sockets for their buffering behaviour. net.Pipe is synchronous,
|
||||
// and the implementation of the compress extension can write a sync message to an
|
||||
// imap client when that client isn't reading but is trying to write. In the real
|
||||
// world, network buffer will take up those few bytes, so assume the buffer in the
|
||||
// test too.
|
||||
fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0)
|
||||
tcheck(t, err, "socketpair")
|
||||
xfdconn := func(fd int, name string) net.Conn {
|
||||
f := os.NewFile(uintptr(fd), name)
|
||||
fc, err := net.FileConn(f)
|
||||
tcheck(t, err, "fileconn")
|
||||
err = f.Close()
|
||||
tcheck(t, err, "close file for conn")
|
||||
return fc
|
||||
}
|
||||
serverConn := xfdconn(fds[0], "server")
|
||||
clientConn := xfdconn(fds[1], "client")
|
||||
|
||||
if serverConfig == nil {
|
||||
serverConfig = &tls.Config{
|
||||
|
@ -391,8 +409,8 @@ func startArgsMore(t *testing.T, first, immediateTLS bool, serverConfig, clientC
|
|||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
connCounter++
|
||||
cid := connCounter
|
||||
connCounter += 2
|
||||
cid := connCounter - 1
|
||||
go func() {
|
||||
const viaHTTPS = false
|
||||
serve("test", cid, serverConfig, serverConn, immediateTLS, allowLoginWithoutTLS, viaHTTPS, "")
|
||||
|
@ -401,7 +419,7 @@ func startArgsMore(t *testing.T, first, immediateTLS bool, serverConfig, clientC
|
|||
}
|
||||
close(done)
|
||||
}()
|
||||
client, err := imapclient.New(clientConn, true)
|
||||
client, err := imapclient.New(connCounter, clientConn, true)
|
||||
tcheck(t, err, "new client")
|
||||
tc := &testconn{t: t, conn: clientConn, client: client, done: done, serverConn: serverConn, account: acc}
|
||||
if first && noCloseSwitchboard {
|
||||
|
|
|
@ -64,7 +64,7 @@ func TestDeliver(t *testing.T) {
|
|||
tcheck(t, err, "dial imap")
|
||||
defer imapconn.Close()
|
||||
|
||||
imapc, err := imapclient.New(imapconn, false)
|
||||
imapc, err := imapclient.New(mox.Cid(), imapconn, false)
|
||||
tcheck(t, err, "new imapclient")
|
||||
|
||||
_, _, err = imapc.Login(imapuser, imappassword)
|
||||
|
|
|
@ -189,7 +189,7 @@ https://www.iana.org/assignments/message-headers/message-headers.xhtml
|
|||
4551 Yes Obs (RFC 7162) IMAP Extension for Conditional STORE Operation or Quick Flag Changes Resynchronization
|
||||
4731 Yes - IMAP4 Extension to SEARCH Command for Controlling What Kind of Information Is Returned
|
||||
4959 Yes - IMAP Extension for Simple Authentication and Security Layer (SASL) Initial Client Response
|
||||
4978 Roadmap - The IMAP COMPRESS Extension
|
||||
4978 Yes - The IMAP COMPRESS Extension
|
||||
5032 Yes - WITHIN Search Extension to the IMAP Protocol
|
||||
5092 Roadmap - IMAP URL Scheme
|
||||
5161 Yes - The IMAP ENABLE Extension
|
||||
|
|
27
vendor/github.com/mjl-/flate/LICENSE
generated
vendored
Normal file
27
vendor/github.com/mjl-/flate/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
Copyright 2009 The Go Authors.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google LLC nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
11
vendor/github.com/mjl-/flate/README.md
generated
vendored
Normal file
11
vendor/github.com/mjl-/flate/README.md
generated
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
https://pkg.go.dev/compress/flate from go1.24.0, with flate.NewReaderPartial
|
||||
added: a Reader that returns data from blocks flushed with mode "partial
|
||||
flush", without blocking on reading the next flate block. Without
|
||||
NewReaderPartial, protocols that expect a response after writing a short
|
||||
compressed request that was flushed in "partial flush" mode can get stuck.
|
||||
|
||||
Writes/flushes in "partial flush" mode are not implemented.
|
||||
|
||||
https://pkg.go.dev/github.com/mjl-/flate#NewReaderPartial
|
||||
|
||||
Also see https://github.com/golang/go/issues/31514
|
743
vendor/github.com/mjl-/flate/deflate.go
generated
vendored
Normal file
743
vendor/github.com/mjl-/flate/deflate.go
generated
vendored
Normal file
|
@ -0,0 +1,743 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package flate
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
)
|
||||
|
||||
const (
|
||||
NoCompression = 0
|
||||
BestSpeed = 1
|
||||
BestCompression = 9
|
||||
DefaultCompression = -1
|
||||
|
||||
// HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
|
||||
// entropy encoding. This mode is useful in compressing data that has
|
||||
// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
|
||||
// that lacks an entropy encoder. Compression gains are achieved when
|
||||
// certain bytes in the input stream occur more frequently than others.
|
||||
//
|
||||
// Note that HuffmanOnly produces a compressed output that is
|
||||
// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
|
||||
// continue to be able to decompress this output.
|
||||
HuffmanOnly = -2
|
||||
)
|
||||
|
||||
const (
|
||||
logWindowSize = 15
|
||||
windowSize = 1 << logWindowSize
|
||||
windowMask = windowSize - 1
|
||||
|
||||
// The LZ77 step produces a sequence of literal tokens and <length, offset>
|
||||
// pair tokens. The offset is also known as distance. The underlying wire
|
||||
// format limits the range of lengths and offsets. For example, there are
|
||||
// 256 legitimate lengths: those in the range [3, 258]. This package's
|
||||
// compressor uses a higher minimum match length, enabling optimizations
|
||||
// such as finding matches via 32-bit loads and compares.
|
||||
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
|
||||
minMatchLength = 4 // The smallest match length that the compressor actually emits
|
||||
maxMatchLength = 258 // The largest match length
|
||||
baseMatchOffset = 1 // The smallest match offset
|
||||
maxMatchOffset = 1 << 15 // The largest match offset
|
||||
|
||||
// The maximum number of tokens we put into a single flate block, just to
|
||||
// stop things from getting too large.
|
||||
maxFlateBlockTokens = 1 << 14
|
||||
maxStoreBlockSize = 65535
|
||||
hashBits = 17 // After 17 performance degrades
|
||||
hashSize = 1 << hashBits
|
||||
hashMask = (1 << hashBits) - 1
|
||||
maxHashOffset = 1 << 24
|
||||
|
||||
skipNever = math.MaxInt32
|
||||
)
|
||||
|
||||
type compressionLevel struct {
|
||||
level, good, lazy, nice, chain, fastSkipHashing int
|
||||
}
|
||||
|
||||
var levels = []compressionLevel{
|
||||
{0, 0, 0, 0, 0, 0}, // NoCompression.
|
||||
{1, 0, 0, 0, 0, 0}, // BestSpeed uses a custom algorithm; see deflatefast.go.
|
||||
// For levels 2-3 we don't bother trying with lazy matches.
|
||||
{2, 4, 0, 16, 8, 5},
|
||||
{3, 4, 0, 32, 32, 6},
|
||||
// Levels 4-9 use increasingly more lazy matching
|
||||
// and increasingly stringent conditions for "good enough".
|
||||
{4, 4, 4, 16, 16, skipNever},
|
||||
{5, 8, 16, 32, 32, skipNever},
|
||||
{6, 8, 16, 128, 128, skipNever},
|
||||
{7, 8, 32, 128, 256, skipNever},
|
||||
{8, 32, 128, 258, 1024, skipNever},
|
||||
{9, 32, 258, 258, 4096, skipNever},
|
||||
}
|
||||
|
||||
type compressor struct {
|
||||
compressionLevel
|
||||
|
||||
w *huffmanBitWriter
|
||||
bulkHasher func([]byte, []uint32)
|
||||
|
||||
// compression algorithm
|
||||
fill func(*compressor, []byte) int // copy data to window
|
||||
step func(*compressor) // process window
|
||||
bestSpeed *deflateFast // Encoder for BestSpeed
|
||||
|
||||
// Input hash chains
|
||||
// hashHead[hashValue] contains the largest inputIndex with the specified hash value
|
||||
// If hashHead[hashValue] is within the current window, then
|
||||
// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
|
||||
// with the same hash value.
|
||||
chainHead int
|
||||
hashHead [hashSize]uint32
|
||||
hashPrev [windowSize]uint32
|
||||
hashOffset int
|
||||
|
||||
// input window: unprocessed data is window[index:windowEnd]
|
||||
index int
|
||||
window []byte
|
||||
windowEnd int
|
||||
blockStart int // window index where current tokens start
|
||||
byteAvailable bool // if true, still need to process window[index-1].
|
||||
|
||||
sync bool // requesting flush
|
||||
|
||||
// queued output tokens
|
||||
tokens []token
|
||||
|
||||
// deflate state
|
||||
length int
|
||||
offset int
|
||||
maxInsertIndex int
|
||||
err error
|
||||
|
||||
// hashMatch must be able to contain hashes for the maximum match length.
|
||||
hashMatch [maxMatchLength - 1]uint32
|
||||
}
|
||||
|
||||
func (d *compressor) fillDeflate(b []byte) int {
|
||||
if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
|
||||
// shift the window by windowSize
|
||||
copy(d.window, d.window[windowSize:2*windowSize])
|
||||
d.index -= windowSize
|
||||
d.windowEnd -= windowSize
|
||||
if d.blockStart >= windowSize {
|
||||
d.blockStart -= windowSize
|
||||
} else {
|
||||
d.blockStart = math.MaxInt32
|
||||
}
|
||||
d.hashOffset += windowSize
|
||||
if d.hashOffset > maxHashOffset {
|
||||
delta := d.hashOffset - 1
|
||||
d.hashOffset -= delta
|
||||
d.chainHead -= delta
|
||||
|
||||
// Iterate over slices instead of arrays to avoid copying
|
||||
// the entire table onto the stack (Issue #18625).
|
||||
for i, v := range d.hashPrev[:] {
|
||||
if int(v) > delta {
|
||||
d.hashPrev[i] = uint32(int(v) - delta)
|
||||
} else {
|
||||
d.hashPrev[i] = 0
|
||||
}
|
||||
}
|
||||
for i, v := range d.hashHead[:] {
|
||||
if int(v) > delta {
|
||||
d.hashHead[i] = uint32(int(v) - delta)
|
||||
} else {
|
||||
d.hashHead[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
n := copy(d.window[d.windowEnd:], b)
|
||||
d.windowEnd += n
|
||||
return n
|
||||
}
|
||||
|
||||
func (d *compressor) writeBlock(tokens []token, index int) error {
|
||||
if index > 0 {
|
||||
var window []byte
|
||||
if d.blockStart <= index {
|
||||
window = d.window[d.blockStart:index]
|
||||
}
|
||||
d.blockStart = index
|
||||
d.w.writeBlock(tokens, false, window)
|
||||
return d.w.err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// fillWindow will fill the current window with the supplied
|
||||
// dictionary and calculate all hashes.
|
||||
// This is much faster than doing a full encode.
|
||||
// Should only be used after a reset.
|
||||
func (d *compressor) fillWindow(b []byte) {
|
||||
// Do not fill window if we are in store-only mode.
|
||||
if d.compressionLevel.level < 2 {
|
||||
return
|
||||
}
|
||||
if d.index != 0 || d.windowEnd != 0 {
|
||||
panic("internal error: fillWindow called with stale data")
|
||||
}
|
||||
|
||||
// If we are given too much, cut it.
|
||||
if len(b) > windowSize {
|
||||
b = b[len(b)-windowSize:]
|
||||
}
|
||||
// Add all to window.
|
||||
n := copy(d.window, b)
|
||||
|
||||
// Calculate 256 hashes at the time (more L1 cache hits)
|
||||
loops := (n + 256 - minMatchLength) / 256
|
||||
for j := 0; j < loops; j++ {
|
||||
index := j * 256
|
||||
end := index + 256 + minMatchLength - 1
|
||||
if end > n {
|
||||
end = n
|
||||
}
|
||||
toCheck := d.window[index:end]
|
||||
dstSize := len(toCheck) - minMatchLength + 1
|
||||
|
||||
if dstSize <= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
dst := d.hashMatch[:dstSize]
|
||||
d.bulkHasher(toCheck, dst)
|
||||
for i, val := range dst {
|
||||
di := i + index
|
||||
hh := &d.hashHead[val&hashMask]
|
||||
// Get previous value with the same hash.
|
||||
// Our chain should point to the previous value.
|
||||
d.hashPrev[di&windowMask] = *hh
|
||||
// Set the head of the hash chain to us.
|
||||
*hh = uint32(di + d.hashOffset)
|
||||
}
|
||||
}
|
||||
// Update window information.
|
||||
d.windowEnd = n
|
||||
d.index = n
|
||||
}
|
||||
|
||||
// Try to find a match starting at index whose length is greater than prevSize.
|
||||
// We only look at chainCount possibilities before giving up.
|
||||
func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
|
||||
minMatchLook := maxMatchLength
|
||||
if lookahead < minMatchLook {
|
||||
minMatchLook = lookahead
|
||||
}
|
||||
|
||||
win := d.window[0 : pos+minMatchLook]
|
||||
|
||||
// We quit when we get a match that's at least nice long
|
||||
nice := len(win) - pos
|
||||
if d.nice < nice {
|
||||
nice = d.nice
|
||||
}
|
||||
|
||||
// If we've got a match that's good enough, only look in 1/4 the chain.
|
||||
tries := d.chain
|
||||
length = prevLength
|
||||
if length >= d.good {
|
||||
tries >>= 2
|
||||
}
|
||||
|
||||
wEnd := win[pos+length]
|
||||
wPos := win[pos:]
|
||||
minIndex := pos - windowSize
|
||||
|
||||
for i := prevHead; tries > 0; tries-- {
|
||||
if wEnd == win[i+length] {
|
||||
n := matchLen(win[i:], wPos, minMatchLook)
|
||||
|
||||
if n > length && (n > minMatchLength || pos-i <= 4096) {
|
||||
length = n
|
||||
offset = pos - i
|
||||
ok = true
|
||||
if n >= nice {
|
||||
// The match is good enough that we don't try to find a better one.
|
||||
break
|
||||
}
|
||||
wEnd = win[pos+n]
|
||||
}
|
||||
}
|
||||
if i == minIndex {
|
||||
// hashPrev[i & windowMask] has already been overwritten, so stop now.
|
||||
break
|
||||
}
|
||||
i = int(d.hashPrev[i&windowMask]) - d.hashOffset
|
||||
if i < minIndex || i < 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (d *compressor) writeStoredBlock(buf []byte) error {
|
||||
if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
|
||||
return d.w.err
|
||||
}
|
||||
d.w.writeBytes(buf)
|
||||
return d.w.err
|
||||
}
|
||||
|
||||
const hashmul = 0x1e35a7bd
|
||||
|
||||
// hash4 returns a hash representation of the first 4 bytes
|
||||
// of the supplied slice.
|
||||
// The caller must ensure that len(b) >= 4.
|
||||
func hash4(b []byte) uint32 {
|
||||
return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits)
|
||||
}
|
||||
|
||||
// bulkHash4 will compute hashes using the same
|
||||
// algorithm as hash4.
|
||||
func bulkHash4(b []byte, dst []uint32) {
|
||||
if len(b) < minMatchLength {
|
||||
return
|
||||
}
|
||||
hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
|
||||
dst[0] = (hb * hashmul) >> (32 - hashBits)
|
||||
end := len(b) - minMatchLength + 1
|
||||
for i := 1; i < end; i++ {
|
||||
hb = (hb << 8) | uint32(b[i+3])
|
||||
dst[i] = (hb * hashmul) >> (32 - hashBits)
|
||||
}
|
||||
}
|
||||
|
||||
// matchLen returns the number of matching bytes in a and b
|
||||
// up to length 'max'. Both slices must be at least 'max'
|
||||
// bytes in size.
|
||||
func matchLen(a, b []byte, max int) int {
|
||||
a = a[:max]
|
||||
b = b[:len(a)]
|
||||
for i, av := range a {
|
||||
if b[i] != av {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
// encSpeed will compress and store the currently added data,
|
||||
// if enough has been accumulated or we at the end of the stream.
|
||||
// Any error that occurred will be in d.err
|
||||
func (d *compressor) encSpeed() {
|
||||
// We only compress if we have maxStoreBlockSize.
|
||||
if d.windowEnd < maxStoreBlockSize {
|
||||
if !d.sync {
|
||||
return
|
||||
}
|
||||
|
||||
// Handle small sizes.
|
||||
if d.windowEnd < 128 {
|
||||
switch {
|
||||
case d.windowEnd == 0:
|
||||
return
|
||||
case d.windowEnd <= 16:
|
||||
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
|
||||
default:
|
||||
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
|
||||
d.err = d.w.err
|
||||
}
|
||||
d.windowEnd = 0
|
||||
d.bestSpeed.reset()
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
// Encode the block.
|
||||
d.tokens = d.bestSpeed.encode(d.tokens[:0], d.window[:d.windowEnd])
|
||||
|
||||
// If we removed less than 1/16th, Huffman compress the block.
|
||||
if len(d.tokens) > d.windowEnd-(d.windowEnd>>4) {
|
||||
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
|
||||
} else {
|
||||
d.w.writeBlockDynamic(d.tokens, false, d.window[:d.windowEnd])
|
||||
}
|
||||
d.err = d.w.err
|
||||
d.windowEnd = 0
|
||||
}
|
||||
|
||||
func (d *compressor) initDeflate() {
|
||||
d.window = make([]byte, 2*windowSize)
|
||||
d.hashOffset = 1
|
||||
d.tokens = make([]token, 0, maxFlateBlockTokens+1)
|
||||
d.length = minMatchLength - 1
|
||||
d.offset = 0
|
||||
d.byteAvailable = false
|
||||
d.index = 0
|
||||
d.chainHead = -1
|
||||
d.bulkHasher = bulkHash4
|
||||
}
|
||||
|
||||
func (d *compressor) deflate() {
|
||||
if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
|
||||
return
|
||||
}
|
||||
|
||||
d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
|
||||
|
||||
Loop:
|
||||
for {
|
||||
if d.index > d.windowEnd {
|
||||
panic("index > windowEnd")
|
||||
}
|
||||
lookahead := d.windowEnd - d.index
|
||||
if lookahead < minMatchLength+maxMatchLength {
|
||||
if !d.sync {
|
||||
break Loop
|
||||
}
|
||||
if d.index > d.windowEnd {
|
||||
panic("index > windowEnd")
|
||||
}
|
||||
if lookahead == 0 {
|
||||
// Flush current output block if any.
|
||||
if d.byteAvailable {
|
||||
// There is still one pending token that needs to be flushed
|
||||
d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1])))
|
||||
d.byteAvailable = false
|
||||
}
|
||||
if len(d.tokens) > 0 {
|
||||
if d.err = d.writeBlock(d.tokens, d.index); d.err != nil {
|
||||
return
|
||||
}
|
||||
d.tokens = d.tokens[:0]
|
||||
}
|
||||
break Loop
|
||||
}
|
||||
}
|
||||
if d.index < d.maxInsertIndex {
|
||||
// Update the hash
|
||||
hash := hash4(d.window[d.index : d.index+minMatchLength])
|
||||
hh := &d.hashHead[hash&hashMask]
|
||||
d.chainHead = int(*hh)
|
||||
d.hashPrev[d.index&windowMask] = uint32(d.chainHead)
|
||||
*hh = uint32(d.index + d.hashOffset)
|
||||
}
|
||||
prevLength := d.length
|
||||
prevOffset := d.offset
|
||||
d.length = minMatchLength - 1
|
||||
d.offset = 0
|
||||
minIndex := d.index - windowSize
|
||||
if minIndex < 0 {
|
||||
minIndex = 0
|
||||
}
|
||||
|
||||
if d.chainHead-d.hashOffset >= minIndex &&
|
||||
(d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 ||
|
||||
d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) {
|
||||
if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
|
||||
d.length = newLength
|
||||
d.offset = newOffset
|
||||
}
|
||||
}
|
||||
if d.fastSkipHashing != skipNever && d.length >= minMatchLength ||
|
||||
d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength {
|
||||
// There was a match at the previous step, and the current match is
|
||||
// not better. Output the previous match.
|
||||
if d.fastSkipHashing != skipNever {
|
||||
d.tokens = append(d.tokens, matchToken(uint32(d.length-baseMatchLength), uint32(d.offset-baseMatchOffset)))
|
||||
} else {
|
||||
d.tokens = append(d.tokens, matchToken(uint32(prevLength-baseMatchLength), uint32(prevOffset-baseMatchOffset)))
|
||||
}
|
||||
// Insert in the hash table all strings up to the end of the match.
|
||||
// index and index-1 are already inserted. If there is not enough
|
||||
// lookahead, the last two strings are not inserted into the hash
|
||||
// table.
|
||||
if d.length <= d.fastSkipHashing {
|
||||
var newIndex int
|
||||
if d.fastSkipHashing != skipNever {
|
||||
newIndex = d.index + d.length
|
||||
} else {
|
||||
newIndex = d.index + prevLength - 1
|
||||
}
|
||||
index := d.index
|
||||
for index++; index < newIndex; index++ {
|
||||
if index < d.maxInsertIndex {
|
||||
hash := hash4(d.window[index : index+minMatchLength])
|
||||
// Get previous value with the same hash.
|
||||
// Our chain should point to the previous value.
|
||||
hh := &d.hashHead[hash&hashMask]
|
||||
d.hashPrev[index&windowMask] = *hh
|
||||
// Set the head of the hash chain to us.
|
||||
*hh = uint32(index + d.hashOffset)
|
||||
}
|
||||
}
|
||||
d.index = index
|
||||
|
||||
if d.fastSkipHashing == skipNever {
|
||||
d.byteAvailable = false
|
||||
d.length = minMatchLength - 1
|
||||
}
|
||||
} else {
|
||||
// For matches this long, we don't bother inserting each individual
|
||||
// item into the table.
|
||||
d.index += d.length
|
||||
}
|
||||
if len(d.tokens) == maxFlateBlockTokens {
|
||||
// The block includes the current character
|
||||
if d.err = d.writeBlock(d.tokens, d.index); d.err != nil {
|
||||
return
|
||||
}
|
||||
d.tokens = d.tokens[:0]
|
||||
}
|
||||
} else {
|
||||
if d.fastSkipHashing != skipNever || d.byteAvailable {
|
||||
i := d.index - 1
|
||||
if d.fastSkipHashing != skipNever {
|
||||
i = d.index
|
||||
}
|
||||
d.tokens = append(d.tokens, literalToken(uint32(d.window[i])))
|
||||
if len(d.tokens) == maxFlateBlockTokens {
|
||||
if d.err = d.writeBlock(d.tokens, i+1); d.err != nil {
|
||||
return
|
||||
}
|
||||
d.tokens = d.tokens[:0]
|
||||
}
|
||||
}
|
||||
d.index++
|
||||
if d.fastSkipHashing == skipNever {
|
||||
d.byteAvailable = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *compressor) fillStore(b []byte) int {
|
||||
n := copy(d.window[d.windowEnd:], b)
|
||||
d.windowEnd += n
|
||||
return n
|
||||
}
|
||||
|
||||
func (d *compressor) store() {
|
||||
if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) {
|
||||
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
|
||||
d.windowEnd = 0
|
||||
}
|
||||
}
|
||||
|
||||
// storeHuff compresses and stores the currently added data
|
||||
// when the d.window is full or we are at the end of the stream.
|
||||
// Any error that occurred will be in d.err
|
||||
func (d *compressor) storeHuff() {
|
||||
if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
|
||||
return
|
||||
}
|
||||
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
|
||||
d.err = d.w.err
|
||||
d.windowEnd = 0
|
||||
}
|
||||
|
||||
func (d *compressor) write(b []byte) (n int, err error) {
|
||||
if d.err != nil {
|
||||
return 0, d.err
|
||||
}
|
||||
n = len(b)
|
||||
for len(b) > 0 {
|
||||
d.step(d)
|
||||
b = b[d.fill(d, b):]
|
||||
if d.err != nil {
|
||||
return 0, d.err
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (d *compressor) syncFlush() error {
|
||||
if d.err != nil {
|
||||
return d.err
|
||||
}
|
||||
d.sync = true
|
||||
d.step(d)
|
||||
if d.err == nil {
|
||||
d.w.writeStoredHeader(0, false)
|
||||
d.w.flush()
|
||||
d.err = d.w.err
|
||||
}
|
||||
d.sync = false
|
||||
return d.err
|
||||
}
|
||||
|
||||
func (d *compressor) init(w io.Writer, level int) (err error) {
|
||||
d.w = newHuffmanBitWriter(w)
|
||||
|
||||
switch {
|
||||
case level == NoCompression:
|
||||
d.window = make([]byte, maxStoreBlockSize)
|
||||
d.fill = (*compressor).fillStore
|
||||
d.step = (*compressor).store
|
||||
case level == HuffmanOnly:
|
||||
d.window = make([]byte, maxStoreBlockSize)
|
||||
d.fill = (*compressor).fillStore
|
||||
d.step = (*compressor).storeHuff
|
||||
case level == BestSpeed:
|
||||
d.compressionLevel = levels[level]
|
||||
d.window = make([]byte, maxStoreBlockSize)
|
||||
d.fill = (*compressor).fillStore
|
||||
d.step = (*compressor).encSpeed
|
||||
d.bestSpeed = newDeflateFast()
|
||||
d.tokens = make([]token, maxStoreBlockSize)
|
||||
case level == DefaultCompression:
|
||||
level = 6
|
||||
fallthrough
|
||||
case 2 <= level && level <= 9:
|
||||
d.compressionLevel = levels[level]
|
||||
d.initDeflate()
|
||||
d.fill = (*compressor).fillDeflate
|
||||
d.step = (*compressor).deflate
|
||||
default:
|
||||
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *compressor) reset(w io.Writer) {
|
||||
d.w.reset(w)
|
||||
d.sync = false
|
||||
d.err = nil
|
||||
switch d.compressionLevel.level {
|
||||
case NoCompression:
|
||||
d.windowEnd = 0
|
||||
case BestSpeed:
|
||||
d.windowEnd = 0
|
||||
d.tokens = d.tokens[:0]
|
||||
d.bestSpeed.reset()
|
||||
default:
|
||||
d.chainHead = -1
|
||||
clear(d.hashHead[:])
|
||||
clear(d.hashPrev[:])
|
||||
d.hashOffset = 1
|
||||
d.index, d.windowEnd = 0, 0
|
||||
d.blockStart, d.byteAvailable = 0, false
|
||||
d.tokens = d.tokens[:0]
|
||||
d.length = minMatchLength - 1
|
||||
d.offset = 0
|
||||
d.maxInsertIndex = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (d *compressor) close() error {
|
||||
if d.err == errWriterClosed {
|
||||
return nil
|
||||
}
|
||||
if d.err != nil {
|
||||
return d.err
|
||||
}
|
||||
d.sync = true
|
||||
d.step(d)
|
||||
if d.err != nil {
|
||||
return d.err
|
||||
}
|
||||
if d.w.writeStoredHeader(0, true); d.w.err != nil {
|
||||
return d.w.err
|
||||
}
|
||||
d.w.flush()
|
||||
if d.w.err != nil {
|
||||
return d.w.err
|
||||
}
|
||||
d.err = errWriterClosed
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewWriter returns a new [Writer] compressing data at the given level.
|
||||
// Following zlib, levels range from 1 ([BestSpeed]) to 9 ([BestCompression]);
|
||||
// higher levels typically run slower but compress more. Level 0
|
||||
// ([NoCompression]) does not attempt any compression; it only adds the
|
||||
// necessary DEFLATE framing.
|
||||
// Level -1 ([DefaultCompression]) uses the default compression level.
|
||||
// Level -2 ([HuffmanOnly]) will use Huffman compression only, giving
|
||||
// a very fast compression for all types of input, but sacrificing considerable
|
||||
// compression efficiency.
|
||||
//
|
||||
// If level is in the range [-2, 9] then the error returned will be nil.
|
||||
// Otherwise the error returned will be non-nil.
|
||||
func NewWriter(w io.Writer, level int) (*Writer, error) {
|
||||
var dw Writer
|
||||
if err := dw.d.init(w, level); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &dw, nil
|
||||
}
|
||||
|
||||
// NewWriterDict is like [NewWriter] but initializes the new
|
||||
// [Writer] with a preset dictionary. The returned [Writer] behaves
|
||||
// as if the dictionary had been written to it without producing
|
||||
// any compressed output. The compressed data written to w
|
||||
// can only be decompressed by a [Reader] initialized with the
|
||||
// same dictionary.
|
||||
func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
|
||||
dw := &dictWriter{w}
|
||||
zw, err := NewWriter(dw, level)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
zw.d.fillWindow(dict)
|
||||
zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
|
||||
return zw, err
|
||||
}
|
||||
|
||||
type dictWriter struct {
|
||||
w io.Writer
|
||||
}
|
||||
|
||||
func (w *dictWriter) Write(b []byte) (n int, err error) {
|
||||
return w.w.Write(b)
|
||||
}
|
||||
|
||||
var errWriterClosed = errors.New("flate: closed writer")
|
||||
|
||||
// A Writer takes data written to it and writes the compressed
|
||||
// form of that data to an underlying writer (see [NewWriter]).
|
||||
type Writer struct {
|
||||
d compressor
|
||||
dict []byte
|
||||
}
|
||||
|
||||
// Write writes data to w, which will eventually write the
|
||||
// compressed form of data to its underlying writer.
|
||||
func (w *Writer) Write(data []byte) (n int, err error) {
|
||||
return w.d.write(data)
|
||||
}
|
||||
|
||||
// Flush flushes any pending data to the underlying writer.
|
||||
// It is useful mainly in compressed network protocols, to ensure that
|
||||
// a remote reader has enough data to reconstruct a packet.
|
||||
// Flush does not return until the data has been written.
|
||||
// Calling Flush when there is no pending data still causes the [Writer]
|
||||
// to emit a sync marker of at least 4 bytes.
|
||||
// If the underlying writer returns an error, Flush returns that error.
|
||||
//
|
||||
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
|
||||
func (w *Writer) Flush() error {
|
||||
// For more about flushing:
|
||||
// https://www.bolet.org/~pornin/deflate-flush.html
|
||||
return w.d.syncFlush()
|
||||
}
|
||||
|
||||
// Close flushes and closes the writer.
|
||||
func (w *Writer) Close() error {
|
||||
return w.d.close()
|
||||
}
|
||||
|
||||
// Reset discards the writer's state and makes it equivalent to
|
||||
// the result of [NewWriter] or [NewWriterDict] called with dst
|
||||
// and w's level and dictionary.
|
||||
func (w *Writer) Reset(dst io.Writer) {
|
||||
if dw, ok := w.d.w.writer.(*dictWriter); ok {
|
||||
// w was created with NewWriterDict
|
||||
dw.w = dst
|
||||
w.d.reset(dw)
|
||||
w.d.fillWindow(w.dict)
|
||||
} else {
|
||||
// w was created with NewWriter
|
||||
w.d.reset(dst)
|
||||
}
|
||||
}
|
307
vendor/github.com/mjl-/flate/deflatefast.go
generated
vendored
Normal file
307
vendor/github.com/mjl-/flate/deflatefast.go
generated
vendored
Normal file
|
@ -0,0 +1,307 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package flate
|
||||
|
||||
import "math"
|
||||
|
||||
// This encoding algorithm, which prioritizes speed over output size, is
|
||||
// based on Snappy's LZ77-style encoder: github.com/golang/snappy
|
||||
|
||||
const (
|
||||
tableBits = 14 // Bits used in the table.
|
||||
tableSize = 1 << tableBits // Size of the table.
|
||||
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
|
||||
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
|
||||
|
||||
// Reset the buffer offset when reaching this.
|
||||
// Offsets are stored between blocks as int32 values.
|
||||
// Since the offset we are checking against is at the beginning
|
||||
// of the buffer, we need to subtract the current and input
|
||||
// buffer to not risk overflowing the int32.
|
||||
bufferReset = math.MaxInt32 - maxStoreBlockSize*2
|
||||
)
|
||||
|
||||
func load32(b []byte, i int32) uint32 {
|
||||
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
||||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||||
}
|
||||
|
||||
func load64(b []byte, i int32) uint64 {
|
||||
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
||||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
||||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
||||
}
|
||||
|
||||
func hash(u uint32) uint32 {
|
||||
return (u * 0x1e35a7bd) >> tableShift
|
||||
}
|
||||
|
||||
// These constants are defined by the Snappy implementation so that its
|
||||
// assembly implementation can fast-path some 16-bytes-at-a-time copies. They
|
||||
// aren't necessary in the pure Go implementation, as we don't use those same
|
||||
// optimizations, but using the same thresholds doesn't really hurt.
|
||||
const (
|
||||
inputMargin = 16 - 1
|
||||
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||
)
|
||||
|
||||
type tableEntry struct {
|
||||
val uint32 // Value at destination
|
||||
offset int32
|
||||
}
|
||||
|
||||
// deflateFast maintains the table for matches,
|
||||
// and the previous byte block for cross block matching.
|
||||
type deflateFast struct {
|
||||
table [tableSize]tableEntry
|
||||
prev []byte // Previous block, zero length if unknown.
|
||||
cur int32 // Current match offset.
|
||||
}
|
||||
|
||||
func newDeflateFast() *deflateFast {
|
||||
return &deflateFast{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}
|
||||
}
|
||||
|
||||
// encode encodes a block given in src and appends tokens
|
||||
// to dst and returns the result.
|
||||
func (e *deflateFast) encode(dst []token, src []byte) []token {
|
||||
// Ensure that e.cur doesn't wrap.
|
||||
if e.cur >= bufferReset {
|
||||
e.shiftOffsets()
|
||||
}
|
||||
|
||||
// This check isn't in the Snappy implementation, but there, the caller
|
||||
// instead of the callee handles this case.
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
e.cur += maxStoreBlockSize
|
||||
e.prev = e.prev[:0]
|
||||
return emitLiteral(dst, src)
|
||||
}
|
||||
|
||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||
// looking for copies.
|
||||
sLimit := int32(len(src) - inputMargin)
|
||||
|
||||
// nextEmit is where in src the next emitLiteral should start from.
|
||||
nextEmit := int32(0)
|
||||
s := int32(0)
|
||||
cv := load32(src, s)
|
||||
nextHash := hash(cv)
|
||||
|
||||
for {
|
||||
// Copied from the C++ snappy implementation:
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned (or skipped), look at every third byte, etc.. When a match
|
||||
// is found, immediately go back to looking at every byte. This is a
|
||||
// small loss (~5% performance, ~0.1% density) for compressible data
|
||||
// due to more bookkeeping, but for non-compressible data (such as
|
||||
// JPEG) it's a huge win since the compressor quickly "realizes" the
|
||||
// data is incompressible and doesn't bother looking for matches
|
||||
// everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since
|
||||
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
||||
// the number of bytes to move ahead for each iteration.
|
||||
skip := int32(32)
|
||||
|
||||
nextS := s
|
||||
var candidate tableEntry
|
||||
for {
|
||||
s = nextS
|
||||
bytesBetweenHashLookups := skip >> 5
|
||||
nextS = s + bytesBetweenHashLookups
|
||||
skip += bytesBetweenHashLookups
|
||||
if nextS > sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
candidate = e.table[nextHash&tableMask]
|
||||
now := load32(src, nextS)
|
||||
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
|
||||
nextHash = hash(now)
|
||||
|
||||
offset := s - (candidate.offset - e.cur)
|
||||
if offset > maxMatchOffset || cv != candidate.val {
|
||||
// Out of range or not matched.
|
||||
cv = now
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||
// them as literal bytes.
|
||||
dst = emitLiteral(dst, src[nextEmit:s])
|
||||
|
||||
// Call emitCopy, and then see if another emitCopy could be our next
|
||||
// move. Repeat until we find no match for the input immediately after
|
||||
// what was consumed by the last emitCopy call.
|
||||
//
|
||||
// If we exit this loop normally then we need to call emitLiteral next,
|
||||
// though we don't yet know how big the literal will be. We handle that
|
||||
// by proceeding to the next iteration of the main loop. We also can
|
||||
// exit this loop via goto if we get close to exhausting the input.
|
||||
for {
|
||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||
// literal bytes prior to s.
|
||||
|
||||
// Extend the 4-byte match as long as possible.
|
||||
//
|
||||
s += 4
|
||||
t := candidate.offset - e.cur + 4
|
||||
l := e.matchLen(s, t, src)
|
||||
|
||||
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
|
||||
dst = append(dst, matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)))
|
||||
s += l
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
|
||||
// We could immediately start working at s now, but to improve
|
||||
// compression we first update the hash table at s-1 and at s. If
|
||||
// another emitCopy is not our next move, also calculate nextHash
|
||||
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
||||
// are faster as one load64 call (with some shifts) instead of
|
||||
// three load32 calls.
|
||||
x := load64(src, s-1)
|
||||
prevHash := hash(uint32(x))
|
||||
e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
|
||||
x >>= 8
|
||||
currHash := hash(uint32(x))
|
||||
candidate = e.table[currHash&tableMask]
|
||||
e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
|
||||
|
||||
offset := s - (candidate.offset - e.cur)
|
||||
if offset > maxMatchOffset || uint32(x) != candidate.val {
|
||||
cv = uint32(x >> 8)
|
||||
nextHash = hash(cv)
|
||||
s++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
emitRemainder:
|
||||
if int(nextEmit) < len(src) {
|
||||
dst = emitLiteral(dst, src[nextEmit:])
|
||||
}
|
||||
e.cur += int32(len(src))
|
||||
e.prev = e.prev[:len(src)]
|
||||
copy(e.prev, src)
|
||||
return dst
|
||||
}
|
||||
|
||||
func emitLiteral(dst []token, lit []byte) []token {
|
||||
for _, v := range lit {
|
||||
dst = append(dst, literalToken(uint32(v)))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// matchLen returns the match length between src[s:] and src[t:].
|
||||
// t can be negative to indicate the match is starting in e.prev.
|
||||
// We assume that src[s-4:s] and src[t-4:t] already match.
|
||||
func (e *deflateFast) matchLen(s, t int32, src []byte) int32 {
|
||||
s1 := int(s) + maxMatchLength - 4
|
||||
if s1 > len(src) {
|
||||
s1 = len(src)
|
||||
}
|
||||
|
||||
// If we are inside the current block
|
||||
if t >= 0 {
|
||||
b := src[t:]
|
||||
a := src[s:s1]
|
||||
b = b[:len(a)]
|
||||
// Extend the match to be as long as possible.
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return int32(i)
|
||||
}
|
||||
}
|
||||
return int32(len(a))
|
||||
}
|
||||
|
||||
// We found a match in the previous block.
|
||||
tp := int32(len(e.prev)) + t
|
||||
if tp < 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Extend the match to be as long as possible.
|
||||
a := src[s:s1]
|
||||
b := e.prev[tp:]
|
||||
if len(b) > len(a) {
|
||||
b = b[:len(a)]
|
||||
}
|
||||
a = a[:len(b)]
|
||||
for i := range b {
|
||||
if a[i] != b[i] {
|
||||
return int32(i)
|
||||
}
|
||||
}
|
||||
|
||||
// If we reached our limit, we matched everything we are
|
||||
// allowed to in the previous block and we return.
|
||||
n := int32(len(b))
|
||||
if int(s+n) == s1 {
|
||||
return n
|
||||
}
|
||||
|
||||
// Continue looking for more matches in the current block.
|
||||
a = src[s+n : s1]
|
||||
b = src[:len(a)]
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return int32(i) + n
|
||||
}
|
||||
}
|
||||
return int32(len(a)) + n
|
||||
}
|
||||
|
||||
// Reset resets the encoding history.
|
||||
// This ensures that no matches are made to the previous block.
|
||||
func (e *deflateFast) reset() {
|
||||
e.prev = e.prev[:0]
|
||||
// Bump the offset, so all matches will fail distance check.
|
||||
// Nothing should be >= e.cur in the table.
|
||||
e.cur += maxMatchOffset
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
if e.cur >= bufferReset {
|
||||
e.shiftOffsets()
|
||||
}
|
||||
}
|
||||
|
||||
// shiftOffsets will shift down all match offset.
|
||||
// This is only called in rare situations to prevent integer overflow.
|
||||
//
|
||||
// See https://golang.org/issue/18636 and https://github.com/golang/go/issues/34121.
|
||||
func (e *deflateFast) shiftOffsets() {
|
||||
if len(e.prev) == 0 {
|
||||
// We have no history; just clear the table.
|
||||
clear(e.table[:])
|
||||
e.cur = maxMatchOffset + 1
|
||||
return
|
||||
}
|
||||
|
||||
// Shift down everything in the table that isn't already too far away.
|
||||
for i := range e.table[:] {
|
||||
v := e.table[i].offset - e.cur + maxMatchOffset + 1
|
||||
if v < 0 {
|
||||
// We want to reset e.cur to maxMatchOffset + 1, so we need to shift
|
||||
// all table entries down by (e.cur - (maxMatchOffset + 1)).
|
||||
// Because we ignore matches > maxMatchOffset, we can cap
|
||||
// any negative offsets at 0.
|
||||
v = 0
|
||||
}
|
||||
e.table[i].offset = v
|
||||
}
|
||||
e.cur = maxMatchOffset + 1
|
||||
}
|
182
vendor/github.com/mjl-/flate/dict_decoder.go
generated
vendored
Normal file
182
vendor/github.com/mjl-/flate/dict_decoder.go
generated
vendored
Normal file
|
@ -0,0 +1,182 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package flate
|
||||
|
||||
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
|
||||
// LZ77 decompresses data through sequences of two forms of commands:
|
||||
//
|
||||
// - Literal insertions: Runs of one or more symbols are inserted into the data
|
||||
// stream as is. This is accomplished through the writeByte method for a
|
||||
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
|
||||
// Any valid stream must start with a literal insertion if no preset dictionary
|
||||
// is used.
|
||||
//
|
||||
// - Backward copies: Runs of one or more symbols are copied from previously
|
||||
// emitted data. Backward copies come as the tuple (dist, length) where dist
|
||||
// determines how far back in the stream to copy from and length determines how
|
||||
// many bytes to copy. Note that it is valid for the length to be greater than
|
||||
// the distance. Since LZ77 uses forward copies, that situation is used to
|
||||
// perform a form of run-length encoding on repeated runs of symbols.
|
||||
// The writeCopy and tryWriteCopy are used to implement this command.
|
||||
//
|
||||
// For performance reasons, this implementation performs little to no sanity
|
||||
// checks about the arguments. As such, the invariants documented for each
|
||||
// method call must be respected.
|
||||
type dictDecoder struct {
|
||||
hist []byte // Sliding window history
|
||||
|
||||
// Invariant: 0 <= rdPos <= wrPos <= len(hist)
|
||||
wrPos int // Current output position in buffer
|
||||
rdPos int // Have emitted hist[:rdPos] already
|
||||
full bool // Has a full window length been written yet?
|
||||
}
|
||||
|
||||
// init initializes dictDecoder to have a sliding window dictionary of the given
|
||||
// size. If a preset dict is provided, it will initialize the dictionary with
|
||||
// the contents of dict.
|
||||
func (dd *dictDecoder) init(size int, dict []byte) {
|
||||
*dd = dictDecoder{hist: dd.hist}
|
||||
|
||||
if cap(dd.hist) < size {
|
||||
dd.hist = make([]byte, size)
|
||||
}
|
||||
dd.hist = dd.hist[:size]
|
||||
|
||||
if len(dict) > len(dd.hist) {
|
||||
dict = dict[len(dict)-len(dd.hist):]
|
||||
}
|
||||
dd.wrPos = copy(dd.hist, dict)
|
||||
if dd.wrPos == len(dd.hist) {
|
||||
dd.wrPos = 0
|
||||
dd.full = true
|
||||
}
|
||||
dd.rdPos = dd.wrPos
|
||||
}
|
||||
|
||||
// histSize reports the total amount of historical data in the dictionary.
|
||||
func (dd *dictDecoder) histSize() int {
|
||||
if dd.full {
|
||||
return len(dd.hist)
|
||||
}
|
||||
return dd.wrPos
|
||||
}
|
||||
|
||||
// availRead reports the number of bytes that can be flushed by readFlush.
|
||||
func (dd *dictDecoder) availRead() int {
|
||||
return dd.wrPos - dd.rdPos
|
||||
}
|
||||
|
||||
// availWrite reports the available amount of output buffer space.
|
||||
func (dd *dictDecoder) availWrite() int {
|
||||
return len(dd.hist) - dd.wrPos
|
||||
}
|
||||
|
||||
// writeSlice returns a slice of the available buffer to write data to.
|
||||
//
|
||||
// This invariant will be kept: len(s) <= availWrite()
|
||||
func (dd *dictDecoder) writeSlice() []byte {
|
||||
return dd.hist[dd.wrPos:]
|
||||
}
|
||||
|
||||
// writeMark advances the writer pointer by cnt.
|
||||
//
|
||||
// This invariant must be kept: 0 <= cnt <= availWrite()
|
||||
func (dd *dictDecoder) writeMark(cnt int) {
|
||||
dd.wrPos += cnt
|
||||
}
|
||||
|
||||
// writeByte writes a single byte to the dictionary.
|
||||
//
|
||||
// This invariant must be kept: 0 < availWrite()
|
||||
func (dd *dictDecoder) writeByte(c byte) {
|
||||
dd.hist[dd.wrPos] = c
|
||||
dd.wrPos++
|
||||
}
|
||||
|
||||
// writeCopy copies a string at a given (dist, length) to the output.
|
||||
// This returns the number of bytes copied and may be less than the requested
|
||||
// length if the available space in the output buffer is too small.
|
||||
//
|
||||
// This invariant must be kept: 0 < dist <= histSize()
|
||||
func (dd *dictDecoder) writeCopy(dist, length int) int {
|
||||
dstBase := dd.wrPos
|
||||
dstPos := dstBase
|
||||
srcPos := dstPos - dist
|
||||
endPos := dstPos + length
|
||||
if endPos > len(dd.hist) {
|
||||
endPos = len(dd.hist)
|
||||
}
|
||||
|
||||
// Copy non-overlapping section after destination position.
|
||||
//
|
||||
// This section is non-overlapping in that the copy length for this section
|
||||
// is always less than or equal to the backwards distance. This can occur
|
||||
// if a distance refers to data that wraps-around in the buffer.
|
||||
// Thus, a backwards copy is performed here; that is, the exact bytes in
|
||||
// the source prior to the copy is placed in the destination.
|
||||
if srcPos < 0 {
|
||||
srcPos += len(dd.hist)
|
||||
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
|
||||
srcPos = 0
|
||||
}
|
||||
|
||||
// Copy possibly overlapping section before destination position.
|
||||
//
|
||||
// This section can overlap if the copy length for this section is larger
|
||||
// than the backwards distance. This is allowed by LZ77 so that repeated
|
||||
// strings can be succinctly represented using (dist, length) pairs.
|
||||
// Thus, a forwards copy is performed here; that is, the bytes copied is
|
||||
// possibly dependent on the resulting bytes in the destination as the copy
|
||||
// progresses along. This is functionally equivalent to the following:
|
||||
//
|
||||
// for i := 0; i < endPos-dstPos; i++ {
|
||||
// dd.hist[dstPos+i] = dd.hist[srcPos+i]
|
||||
// }
|
||||
// dstPos = endPos
|
||||
//
|
||||
for dstPos < endPos {
|
||||
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
|
||||
}
|
||||
|
||||
dd.wrPos = dstPos
|
||||
return dstPos - dstBase
|
||||
}
|
||||
|
||||
// tryWriteCopy tries to copy a string at a given (distance, length) to the
|
||||
// output. This specialized version is optimized for short distances.
|
||||
//
|
||||
// This method is designed to be inlined for performance reasons.
|
||||
//
|
||||
// This invariant must be kept: 0 < dist <= histSize()
|
||||
func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
|
||||
dstPos := dd.wrPos
|
||||
endPos := dstPos + length
|
||||
if dstPos < dist || endPos > len(dd.hist) {
|
||||
return 0
|
||||
}
|
||||
dstBase := dstPos
|
||||
srcPos := dstPos - dist
|
||||
|
||||
// Copy possibly overlapping section before destination position.
|
||||
for dstPos < endPos {
|
||||
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
|
||||
}
|
||||
|
||||
dd.wrPos = dstPos
|
||||
return dstPos - dstBase
|
||||
}
|
||||
|
||||
// readFlush returns a slice of the historical buffer that is ready to be
|
||||
// emitted to the user. The data returned by readFlush must be fully consumed
|
||||
// before calling any other dictDecoder methods.
|
||||
func (dd *dictDecoder) readFlush() []byte {
|
||||
toRead := dd.hist[dd.rdPos:dd.wrPos]
|
||||
dd.rdPos = dd.wrPos
|
||||
if dd.wrPos == len(dd.hist) {
|
||||
dd.wrPos, dd.rdPos = 0, 0
|
||||
dd.full = true
|
||||
}
|
||||
return toRead
|
||||
}
|
693
vendor/github.com/mjl-/flate/huffman_bit_writer.go
generated
vendored
Normal file
693
vendor/github.com/mjl-/flate/huffman_bit_writer.go
generated
vendored
Normal file
|
@ -0,0 +1,693 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package flate
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
const (
|
||||
// The largest offset code.
|
||||
offsetCodeCount = 30
|
||||
|
||||
// The special code used to mark the end of a block.
|
||||
endBlockMarker = 256
|
||||
|
||||
// The first length code.
|
||||
lengthCodesStart = 257
|
||||
|
||||
// The number of codegen codes.
|
||||
codegenCodeCount = 19
|
||||
badCode = 255
|
||||
|
||||
// bufferFlushSize indicates the buffer size
|
||||
// after which bytes are flushed to the writer.
|
||||
// Should preferably be a multiple of 6, since
|
||||
// we accumulate 6 bytes between writes to the buffer.
|
||||
bufferFlushSize = 240
|
||||
|
||||
// bufferSize is the actual output byte buffer size.
|
||||
// It must have additional headroom for a flush
|
||||
// which can contain up to 8 bytes.
|
||||
bufferSize = bufferFlushSize + 8
|
||||
)
|
||||
|
||||
// The number of extra bits needed by length code X - LENGTH_CODES_START.
|
||||
var lengthExtraBits = []int8{
|
||||
/* 257 */ 0, 0, 0,
|
||||
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
|
||||
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
|
||||
/* 280 */ 4, 5, 5, 5, 5, 0,
|
||||
}
|
||||
|
||||
// The length indicated by length code X - LENGTH_CODES_START.
|
||||
var lengthBase = []uint32{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
|
||||
12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
|
||||
64, 80, 96, 112, 128, 160, 192, 224, 255,
|
||||
}
|
||||
|
||||
// offset code word extra bits.
|
||||
var offsetExtraBits = []int8{
|
||||
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
|
||||
}
|
||||
|
||||
var offsetBase = []uint32{
|
||||
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
|
||||
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
|
||||
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
|
||||
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
|
||||
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
|
||||
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
|
||||
}
|
||||
|
||||
// The odd order in which the codegen code sizes are written.
|
||||
var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
|
||||
|
||||
type huffmanBitWriter struct {
|
||||
// writer is the underlying writer.
|
||||
// Do not use it directly; use the write method, which ensures
|
||||
// that Write errors are sticky.
|
||||
writer io.Writer
|
||||
|
||||
// Data waiting to be written is bytes[0:nbytes]
|
||||
// and then the low nbits of bits. Data is always written
|
||||
// sequentially into the bytes array.
|
||||
bits uint64
|
||||
nbits uint
|
||||
bytes [bufferSize]byte
|
||||
codegenFreq [codegenCodeCount]int32
|
||||
nbytes int
|
||||
literalFreq []int32
|
||||
offsetFreq []int32
|
||||
codegen []uint8
|
||||
literalEncoding *huffmanEncoder
|
||||
offsetEncoding *huffmanEncoder
|
||||
codegenEncoding *huffmanEncoder
|
||||
err error
|
||||
}
|
||||
|
||||
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
|
||||
return &huffmanBitWriter{
|
||||
writer: w,
|
||||
literalFreq: make([]int32, maxNumLit),
|
||||
offsetFreq: make([]int32, offsetCodeCount),
|
||||
codegen: make([]uint8, maxNumLit+offsetCodeCount+1),
|
||||
literalEncoding: newHuffmanEncoder(maxNumLit),
|
||||
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
|
||||
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
|
||||
}
|
||||
}
|
||||
|
||||
func (w *huffmanBitWriter) reset(writer io.Writer) {
|
||||
w.writer = writer
|
||||
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
|
||||
}
|
||||
|
||||
func (w *huffmanBitWriter) flush() {
|
||||
if w.err != nil {
|
||||
w.nbits = 0
|
||||
return
|
||||
}
|
||||
n := w.nbytes
|
||||
for w.nbits != 0 {
|
||||
w.bytes[n] = byte(w.bits)
|
||||
w.bits >>= 8
|
||||
if w.nbits > 8 { // Avoid underflow
|
||||
w.nbits -= 8
|
||||
} else {
|
||||
w.nbits = 0
|
||||
}
|
||||
n++
|
||||
}
|
||||
w.bits = 0
|
||||
w.write(w.bytes[:n])
|
||||
w.nbytes = 0
|
||||
}
|
||||
|
||||
func (w *huffmanBitWriter) write(b []byte) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
_, w.err = w.writer.Write(b)
|
||||
}
|
||||
|
||||
func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
w.bits |= uint64(b) << w.nbits
|
||||
w.nbits += nb
|
||||
if w.nbits >= 48 {
|
||||
bits := w.bits
|
||||
w.bits >>= 48
|
||||
w.nbits -= 48
|
||||
n := w.nbytes
|
||||
bytes := w.bytes[n : n+6]
|
||||
bytes[0] = byte(bits)
|
||||
bytes[1] = byte(bits >> 8)
|
||||
bytes[2] = byte(bits >> 16)
|
||||
bytes[3] = byte(bits >> 24)
|
||||
bytes[4] = byte(bits >> 32)
|
||||
bytes[5] = byte(bits >> 40)
|
||||
n += 6
|
||||
if n >= bufferFlushSize {
|
||||
w.write(w.bytes[:n])
|
||||
n = 0
|
||||
}
|
||||
w.nbytes = n
|
||||
}
|
||||
}
|
||||
|
||||
func (w *huffmanBitWriter) writeBytes(bytes []byte) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
n := w.nbytes
|
||||
if w.nbits&7 != 0 {
|
||||
w.err = InternalError("writeBytes with unfinished bits")
|
||||
return
|
||||
}
|
||||
for w.nbits != 0 {
|
||||
w.bytes[n] = byte(w.bits)
|
||||
w.bits >>= 8
|
||||
w.nbits -= 8
|
||||
n++
|
||||
}
|
||||
if n != 0 {
|
||||
w.write(w.bytes[:n])
|
||||
}
|
||||
w.nbytes = 0
|
||||
w.write(bytes)
|
||||
}
|
||||
|
||||
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
|
||||
// the literal and offset lengths arrays (which are concatenated into a single
|
||||
// array). This method generates that run-length encoding.
|
||||
//
|
||||
// The result is written into the codegen array, and the frequencies
|
||||
// of each code is written into the codegenFreq array.
|
||||
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
|
||||
// information. Code badCode is an end marker
|
||||
//
|
||||
// numLiterals The number of literals in literalEncoding
|
||||
// numOffsets The number of offsets in offsetEncoding
|
||||
// litenc, offenc The literal and offset encoder to use
|
||||
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
|
||||
clear(w.codegenFreq[:])
|
||||
// Note that we are using codegen both as a temporary variable for holding
|
||||
// a copy of the frequencies, and as the place where we put the result.
|
||||
// This is fine because the output is always shorter than the input used
|
||||
// so far.
|
||||
codegen := w.codegen // cache
|
||||
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
||||
cgnl := codegen[:numLiterals]
|
||||
for i := range cgnl {
|
||||
cgnl[i] = uint8(litEnc.codes[i].len)
|
||||
}
|
||||
|
||||
cgnl = codegen[numLiterals : numLiterals+numOffsets]
|
||||
for i := range cgnl {
|
||||
cgnl[i] = uint8(offEnc.codes[i].len)
|
||||
}
|
||||
codegen[numLiterals+numOffsets] = badCode
|
||||
|
||||
size := codegen[0]
|
||||
count := 1
|
||||
outIndex := 0
|
||||
for inIndex := 1; size != badCode; inIndex++ {
|
||||
// INVARIANT: We have seen "count" copies of size that have not yet
|
||||
// had output generated for them.
|
||||
nextSize := codegen[inIndex]
|
||||
if nextSize == size {
|
||||
count++
|
||||
continue
|
||||
}
|
||||
// We need to generate codegen indicating "count" of size.
|
||||
if size != 0 {
|
||||
codegen[outIndex] = size
|
||||
outIndex++
|
||||
w.codegenFreq[size]++
|
||||
count--
|
||||
for count >= 3 {
|
||||
n := 6
|
||||
if n > count {
|
||||
n = count
|
||||
}
|
||||
codegen[outIndex] = 16
|
||||
outIndex++
|
||||
codegen[outIndex] = uint8(n - 3)
|
||||
outIndex++
|
||||
w.codegenFreq[16]++
|
||||
count -= n
|
||||
}
|
||||
} else {
|
||||
for count >= 11 {
|
||||
n := 138
|
||||
if n > count {
|
||||
n = count
|
||||
}
|
||||
codegen[outIndex] = 18
|
||||
outIndex++
|
||||
codegen[outIndex] = uint8(n - 11)
|
||||
outIndex++
|
||||
w.codegenFreq[18]++
|
||||
count -= n
|
||||
}
|
||||
if count >= 3 {
|
||||
// count >= 3 && count <= 10
|
||||
codegen[outIndex] = 17
|
||||
outIndex++
|
||||
codegen[outIndex] = uint8(count - 3)
|
||||
outIndex++
|
||||
w.codegenFreq[17]++
|
||||
count = 0
|
||||
}
|
||||
}
|
||||
count--
|
||||
for ; count >= 0; count-- {
|
||||
codegen[outIndex] = size
|
||||
outIndex++
|
||||
w.codegenFreq[size]++
|
||||
}
|
||||
// Set up invariant for next time through the loop.
|
||||
size = nextSize
|
||||
count = 1
|
||||
}
|
||||
// Marker indicating the end of the codegen.
|
||||
codegen[outIndex] = badCode
|
||||
}
|
||||
|
||||
// dynamicSize returns the size of dynamically encoded data in bits.
|
||||
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
|
||||
numCodegens = len(w.codegenFreq)
|
||||
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
|
||||
numCodegens--
|
||||
}
|
||||
header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
|
||||
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
|
||||
int(w.codegenFreq[16])*2 +
|
||||
int(w.codegenFreq[17])*3 +
|
||||
int(w.codegenFreq[18])*7
|
||||
size = header +
|
||||
litEnc.bitLength(w.literalFreq) +
|
||||
offEnc.bitLength(w.offsetFreq) +
|
||||
extraBits
|
||||
|
||||
return size, numCodegens
|
||||
}
|
||||
|
||||
// fixedSize returns the size of dynamically encoded data in bits.
|
||||
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
|
||||
return 3 +
|
||||
fixedLiteralEncoding.bitLength(w.literalFreq) +
|
||||
fixedOffsetEncoding.bitLength(w.offsetFreq) +
|
||||
extraBits
|
||||
}
|
||||
|
||||
// storedSize calculates the stored size, including header.
|
||||
// The function returns the size in bits and whether the block
|
||||
// fits inside a single block.
|
||||
func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
|
||||
if in == nil {
|
||||
return 0, false
|
||||
}
|
||||
if len(in) <= maxStoreBlockSize {
|
||||
return (len(in) + 5) * 8, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func (w *huffmanBitWriter) writeCode(c hcode) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
w.bits |= uint64(c.code) << w.nbits
|
||||
w.nbits += uint(c.len)
|
||||
if w.nbits >= 48 {
|
||||
bits := w.bits
|
||||
w.bits >>= 48
|
||||
w.nbits -= 48
|
||||
n := w.nbytes
|
||||
bytes := w.bytes[n : n+6]
|
||||
bytes[0] = byte(bits)
|
||||
bytes[1] = byte(bits >> 8)
|
||||
bytes[2] = byte(bits >> 16)
|
||||
bytes[3] = byte(bits >> 24)
|
||||
bytes[4] = byte(bits >> 32)
|
||||
bytes[5] = byte(bits >> 40)
|
||||
n += 6
|
||||
if n >= bufferFlushSize {
|
||||
w.write(w.bytes[:n])
|
||||
n = 0
|
||||
}
|
||||
w.nbytes = n
|
||||
}
|
||||
}
|
||||
|
||||
// Write the header of a dynamic Huffman block to the output stream.
|
||||
//
|
||||
// numLiterals The number of literals specified in codegen
|
||||
// numOffsets The number of offsets specified in codegen
|
||||
// numCodegens The number of codegens used in codegen
|
||||
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
var firstBits int32 = 4
|
||||
if isEof {
|
||||
firstBits = 5
|
||||
}
|
||||
w.writeBits(firstBits, 3)
|
||||
w.writeBits(int32(numLiterals-257), 5)
|
||||
w.writeBits(int32(numOffsets-1), 5)
|
||||
w.writeBits(int32(numCodegens-4), 4)
|
||||
|
||||
for i := 0; i < numCodegens; i++ {
|
||||
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
|
||||
w.writeBits(int32(value), 3)
|
||||
}
|
||||
|
||||
i := 0
|
||||
for {
|
||||
var codeWord int = int(w.codegen[i])
|
||||
i++
|
||||
if codeWord == badCode {
|
||||
break
|
||||
}
|
||||
w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
|
||||
|
||||
switch codeWord {
|
||||
case 16:
|
||||
w.writeBits(int32(w.codegen[i]), 2)
|
||||
i++
|
||||
case 17:
|
||||
w.writeBits(int32(w.codegen[i]), 3)
|
||||
i++
|
||||
case 18:
|
||||
w.writeBits(int32(w.codegen[i]), 7)
|
||||
i++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
var flag int32
|
||||
if isEof {
|
||||
flag = 1
|
||||
}
|
||||
w.writeBits(flag, 3)
|
||||
w.flush()
|
||||
w.writeBits(int32(length), 16)
|
||||
w.writeBits(int32(^uint16(length)), 16)
|
||||
}
|
||||
|
||||
func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
// Indicate that we are a fixed Huffman block
|
||||
var value int32 = 2
|
||||
if isEof {
|
||||
value = 3
|
||||
}
|
||||
w.writeBits(value, 3)
|
||||
}
|
||||
|
||||
// writeBlock will write a block of tokens with the smallest encoding.
|
||||
// The original input can be supplied, and if the huffman encoded data
|
||||
// is larger than the original bytes, the data will be written as a
|
||||
// stored block.
|
||||
// If the input is nil, the tokens will always be Huffman encoded.
|
||||
func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
tokens = append(tokens, endBlockMarker)
|
||||
numLiterals, numOffsets := w.indexTokens(tokens)
|
||||
|
||||
var extraBits int
|
||||
storedSize, storable := w.storedSize(input)
|
||||
if storable {
|
||||
// We only bother calculating the costs of the extra bits required by
|
||||
// the length of offset fields (which will be the same for both fixed
|
||||
// and dynamic encoding), if we need to compare those two encodings
|
||||
// against stored encoding.
|
||||
for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
|
||||
// First eight length codes have extra size = 0.
|
||||
extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
|
||||
}
|
||||
for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
|
||||
// First four offset codes have extra size = 0.
|
||||
extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out smallest code.
|
||||
// Fixed Huffman baseline.
|
||||
var literalEncoding = fixedLiteralEncoding
|
||||
var offsetEncoding = fixedOffsetEncoding
|
||||
var size = w.fixedSize(extraBits)
|
||||
|
||||
// Dynamic Huffman?
|
||||
var numCodegens int
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literalEncoding and the offsetEncoding.
|
||||
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
|
||||
w.codegenEncoding.generate(w.codegenFreq[:], 7)
|
||||
dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
|
||||
|
||||
if dynamicSize < size {
|
||||
size = dynamicSize
|
||||
literalEncoding = w.literalEncoding
|
||||
offsetEncoding = w.offsetEncoding
|
||||
}
|
||||
|
||||
// Stored bytes?
|
||||
if storable && storedSize < size {
|
||||
w.writeStoredHeader(len(input), eof)
|
||||
w.writeBytes(input)
|
||||
return
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
if literalEncoding == fixedLiteralEncoding {
|
||||
w.writeFixedHeader(eof)
|
||||
} else {
|
||||
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
|
||||
}
|
||||
|
||||
// Write the tokens.
|
||||
w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
|
||||
}
|
||||
|
||||
// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
||||
// This should be used if the symbols used have a disproportionate
|
||||
// histogram distribution.
|
||||
// If input is supplied and the compression savings are below 1/16th of the
|
||||
// input size the block is stored.
|
||||
func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
tokens = append(tokens, endBlockMarker)
|
||||
numLiterals, numOffsets := w.indexTokens(tokens)
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literalEncoding and the offsetEncoding.
|
||||
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
|
||||
w.codegenEncoding.generate(w.codegenFreq[:], 7)
|
||||
size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
|
||||
w.writeStoredHeader(len(input), eof)
|
||||
w.writeBytes(input)
|
||||
return
|
||||
}
|
||||
|
||||
// Write Huffman table.
|
||||
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
|
||||
|
||||
// Write the tokens.
|
||||
w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
|
||||
}
|
||||
|
||||
// indexTokens indexes a slice of tokens, and updates
|
||||
// literalFreq and offsetFreq, and generates literalEncoding
|
||||
// and offsetEncoding.
|
||||
// The number of literal and offset tokens is returned.
|
||||
func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
|
||||
clear(w.literalFreq)
|
||||
clear(w.offsetFreq)
|
||||
|
||||
for _, t := range tokens {
|
||||
if t < matchType {
|
||||
w.literalFreq[t.literal()]++
|
||||
continue
|
||||
}
|
||||
length := t.length()
|
||||
offset := t.offset()
|
||||
w.literalFreq[lengthCodesStart+lengthCode(length)]++
|
||||
w.offsetFreq[offsetCode(offset)]++
|
||||
}
|
||||
|
||||
// get the number of literals
|
||||
numLiterals = len(w.literalFreq)
|
||||
for w.literalFreq[numLiterals-1] == 0 {
|
||||
numLiterals--
|
||||
}
|
||||
// get the number of offsets
|
||||
numOffsets = len(w.offsetFreq)
|
||||
for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
|
||||
numOffsets--
|
||||
}
|
||||
if numOffsets == 0 {
|
||||
// We haven't found a single match. If we want to go with the dynamic encoding,
|
||||
// we should count at least one offset to be sure that the offset huffman tree could be encoded.
|
||||
w.offsetFreq[0] = 1
|
||||
numOffsets = 1
|
||||
}
|
||||
w.literalEncoding.generate(w.literalFreq, 15)
|
||||
w.offsetEncoding.generate(w.offsetFreq, 15)
|
||||
return
|
||||
}
|
||||
|
||||
// writeTokens writes a slice of tokens to the output.
|
||||
// codes for literal and offset encoding must be supplied.
|
||||
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
for _, t := range tokens {
|
||||
if t < matchType {
|
||||
w.writeCode(leCodes[t.literal()])
|
||||
continue
|
||||
}
|
||||
// Write the length
|
||||
length := t.length()
|
||||
lengthCode := lengthCode(length)
|
||||
w.writeCode(leCodes[lengthCode+lengthCodesStart])
|
||||
extraLengthBits := uint(lengthExtraBits[lengthCode])
|
||||
if extraLengthBits > 0 {
|
||||
extraLength := int32(length - lengthBase[lengthCode])
|
||||
w.writeBits(extraLength, extraLengthBits)
|
||||
}
|
||||
// Write the offset
|
||||
offset := t.offset()
|
||||
offsetCode := offsetCode(offset)
|
||||
w.writeCode(oeCodes[offsetCode])
|
||||
extraOffsetBits := uint(offsetExtraBits[offsetCode])
|
||||
if extraOffsetBits > 0 {
|
||||
extraOffset := int32(offset - offsetBase[offsetCode])
|
||||
w.writeBits(extraOffset, extraOffsetBits)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// huffOffset is a static offset encoder used for huffman only encoding.
|
||||
// It can be reused since we will not be encoding offset values.
|
||||
var huffOffset *huffmanEncoder
|
||||
|
||||
func init() {
|
||||
offsetFreq := make([]int32, offsetCodeCount)
|
||||
offsetFreq[0] = 1
|
||||
huffOffset = newHuffmanEncoder(offsetCodeCount)
|
||||
huffOffset.generate(offsetFreq, 15)
|
||||
}
|
||||
|
||||
// writeBlockHuff encodes a block of bytes as either
|
||||
// Huffman encoded literals or uncompressed bytes if the
|
||||
// results only gains very little from compression.
|
||||
func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
|
||||
if w.err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Clear histogram
|
||||
clear(w.literalFreq)
|
||||
|
||||
// Add everything as literals
|
||||
histogram(input, w.literalFreq)
|
||||
|
||||
w.literalFreq[endBlockMarker] = 1
|
||||
|
||||
const numLiterals = endBlockMarker + 1
|
||||
w.offsetFreq[0] = 1
|
||||
const numOffsets = 1
|
||||
|
||||
w.literalEncoding.generate(w.literalFreq, 15)
|
||||
|
||||
// Figure out smallest code.
|
||||
// Always use dynamic Huffman or Store
|
||||
var numCodegens int
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literalEncoding and the offsetEncoding.
|
||||
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
|
||||
w.codegenEncoding.generate(w.codegenFreq[:], 7)
|
||||
size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
|
||||
w.writeStoredHeader(len(input), eof)
|
||||
w.writeBytes(input)
|
||||
return
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
|
||||
encoding := w.literalEncoding.codes[:257]
|
||||
n := w.nbytes
|
||||
for _, t := range input {
|
||||
// Bitwriting inlined, ~30% speedup
|
||||
c := encoding[t]
|
||||
w.bits |= uint64(c.code) << w.nbits
|
||||
w.nbits += uint(c.len)
|
||||
if w.nbits < 48 {
|
||||
continue
|
||||
}
|
||||
// Store 6 bytes
|
||||
bits := w.bits
|
||||
w.bits >>= 48
|
||||
w.nbits -= 48
|
||||
bytes := w.bytes[n : n+6]
|
||||
bytes[0] = byte(bits)
|
||||
bytes[1] = byte(bits >> 8)
|
||||
bytes[2] = byte(bits >> 16)
|
||||
bytes[3] = byte(bits >> 24)
|
||||
bytes[4] = byte(bits >> 32)
|
||||
bytes[5] = byte(bits >> 40)
|
||||
n += 6
|
||||
if n < bufferFlushSize {
|
||||
continue
|
||||
}
|
||||
w.write(w.bytes[:n])
|
||||
if w.err != nil {
|
||||
return // Return early in the event of write failures
|
||||
}
|
||||
n = 0
|
||||
}
|
||||
w.nbytes = n
|
||||
w.writeCode(encoding[endBlockMarker])
|
||||
}
|
||||
|
||||
// histogram accumulates a histogram of b in h.
|
||||
//
|
||||
// len(h) must be >= 256, and h's elements must be all zeroes.
|
||||
func histogram(b []byte, h []int32) {
|
||||
h = h[:256]
|
||||
for _, t := range b {
|
||||
h[t]++
|
||||
}
|
||||
}
|
345
vendor/github.com/mjl-/flate/huffman_code.go
generated
vendored
Normal file
345
vendor/github.com/mjl-/flate/huffman_code.go
generated
vendored
Normal file
|
@ -0,0 +1,345 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package flate
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/bits"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// hcode is a huffman code with a bit code and bit length.
|
||||
type hcode struct {
|
||||
code, len uint16
|
||||
}
|
||||
|
||||
type huffmanEncoder struct {
|
||||
codes []hcode
|
||||
freqcache []literalNode
|
||||
bitCount [17]int32
|
||||
lns byLiteral // stored to avoid repeated allocation in generate
|
||||
lfs byFreq // stored to avoid repeated allocation in generate
|
||||
}
|
||||
|
||||
type literalNode struct {
|
||||
literal uint16
|
||||
freq int32
|
||||
}
|
||||
|
||||
// A levelInfo describes the state of the constructed tree for a given depth.
|
||||
type levelInfo struct {
|
||||
// Our level. for better printing
|
||||
level int32
|
||||
|
||||
// The frequency of the last node at this level
|
||||
lastFreq int32
|
||||
|
||||
// The frequency of the next character to add to this level
|
||||
nextCharFreq int32
|
||||
|
||||
// The frequency of the next pair (from level below) to add to this level.
|
||||
// Only valid if the "needed" value of the next lower level is 0.
|
||||
nextPairFreq int32
|
||||
|
||||
// The number of chains remaining to generate for this level before moving
|
||||
// up to the next level
|
||||
needed int32
|
||||
}
|
||||
|
||||
// set sets the code and length of an hcode.
|
||||
func (h *hcode) set(code uint16, length uint16) {
|
||||
h.len = length
|
||||
h.code = code
|
||||
}
|
||||
|
||||
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
|
||||
|
||||
func newHuffmanEncoder(size int) *huffmanEncoder {
|
||||
return &huffmanEncoder{codes: make([]hcode, size)}
|
||||
}
|
||||
|
||||
// Generates a HuffmanCode corresponding to the fixed literal table.
|
||||
func generateFixedLiteralEncoding() *huffmanEncoder {
|
||||
h := newHuffmanEncoder(maxNumLit)
|
||||
codes := h.codes
|
||||
var ch uint16
|
||||
for ch = 0; ch < maxNumLit; ch++ {
|
||||
var bits uint16
|
||||
var size uint16
|
||||
switch {
|
||||
case ch < 144:
|
||||
// size 8, 000110000 .. 10111111
|
||||
bits = ch + 48
|
||||
size = 8
|
||||
case ch < 256:
|
||||
// size 9, 110010000 .. 111111111
|
||||
bits = ch + 400 - 144
|
||||
size = 9
|
||||
case ch < 280:
|
||||
// size 7, 0000000 .. 0010111
|
||||
bits = ch - 256
|
||||
size = 7
|
||||
default:
|
||||
// size 8, 11000000 .. 11000111
|
||||
bits = ch + 192 - 280
|
||||
size = 8
|
||||
}
|
||||
codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
func generateFixedOffsetEncoding() *huffmanEncoder {
|
||||
h := newHuffmanEncoder(30)
|
||||
codes := h.codes
|
||||
for ch := range codes {
|
||||
codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
|
||||
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
|
||||
|
||||
func (h *huffmanEncoder) bitLength(freq []int32) int {
|
||||
var total int
|
||||
for i, f := range freq {
|
||||
if f != 0 {
|
||||
total += int(f) * int(h.codes[i].len)
|
||||
}
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
const maxBitsLimit = 16
|
||||
|
||||
// bitCounts computes the number of literals assigned to each bit size in the Huffman encoding.
|
||||
// It is only called when list.length >= 3.
|
||||
// The cases of 0, 1, and 2 literals are handled by special case code.
|
||||
//
|
||||
// list is an array of the literals with non-zero frequencies
|
||||
// and their associated frequencies. The array is in order of increasing
|
||||
// frequency and has as its last element a special element with frequency
|
||||
// MaxInt32.
|
||||
//
|
||||
// maxBits is the maximum number of bits that should be used to encode any literal.
|
||||
// It must be less than 16.
|
||||
//
|
||||
// bitCounts returns an integer slice in which slice[i] indicates the number of literals
|
||||
// that should be encoded in i bits.
|
||||
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
|
||||
if maxBits >= maxBitsLimit {
|
||||
panic("flate: maxBits too large")
|
||||
}
|
||||
n := int32(len(list))
|
||||
list = list[0 : n+1]
|
||||
list[n] = maxNode()
|
||||
|
||||
// The tree can't have greater depth than n - 1, no matter what. This
|
||||
// saves a little bit of work in some small cases
|
||||
if maxBits > n-1 {
|
||||
maxBits = n - 1
|
||||
}
|
||||
|
||||
// Create information about each of the levels.
|
||||
// A bogus "Level 0" whose sole purpose is so that
|
||||
// level1.prev.needed==0. This makes level1.nextPairFreq
|
||||
// be a legitimate value that never gets chosen.
|
||||
var levels [maxBitsLimit]levelInfo
|
||||
// leafCounts[i] counts the number of literals at the left
|
||||
// of ancestors of the rightmost node at level i.
|
||||
// leafCounts[i][j] is the number of literals at the left
|
||||
// of the level j ancestor.
|
||||
var leafCounts [maxBitsLimit][maxBitsLimit]int32
|
||||
|
||||
for level := int32(1); level <= maxBits; level++ {
|
||||
// For every level, the first two items are the first two characters.
|
||||
// We initialize the levels as if we had already figured this out.
|
||||
levels[level] = levelInfo{
|
||||
level: level,
|
||||
lastFreq: list[1].freq,
|
||||
nextCharFreq: list[2].freq,
|
||||
nextPairFreq: list[0].freq + list[1].freq,
|
||||
}
|
||||
leafCounts[level][level] = 2
|
||||
if level == 1 {
|
||||
levels[level].nextPairFreq = math.MaxInt32
|
||||
}
|
||||
}
|
||||
|
||||
// We need a total of 2*n - 2 items at top level and have already generated 2.
|
||||
levels[maxBits].needed = 2*n - 4
|
||||
|
||||
level := maxBits
|
||||
for {
|
||||
l := &levels[level]
|
||||
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
|
||||
// We've run out of both leaves and pairs.
|
||||
// End all calculations for this level.
|
||||
// To make sure we never come back to this level or any lower level,
|
||||
// set nextPairFreq impossibly large.
|
||||
l.needed = 0
|
||||
levels[level+1].nextPairFreq = math.MaxInt32
|
||||
level++
|
||||
continue
|
||||
}
|
||||
|
||||
prevFreq := l.lastFreq
|
||||
if l.nextCharFreq < l.nextPairFreq {
|
||||
// The next item on this row is a leaf node.
|
||||
n := leafCounts[level][level] + 1
|
||||
l.lastFreq = l.nextCharFreq
|
||||
// Lower leafCounts are the same of the previous node.
|
||||
leafCounts[level][level] = n
|
||||
l.nextCharFreq = list[n].freq
|
||||
} else {
|
||||
// The next item on this row is a pair from the previous row.
|
||||
// nextPairFreq isn't valid until we generate two
|
||||
// more values in the level below
|
||||
l.lastFreq = l.nextPairFreq
|
||||
// Take leaf counts from the lower level, except counts[level] remains the same.
|
||||
copy(leafCounts[level][:level], leafCounts[level-1][:level])
|
||||
levels[l.level-1].needed = 2
|
||||
}
|
||||
|
||||
if l.needed--; l.needed == 0 {
|
||||
// We've done everything we need to do for this level.
|
||||
// Continue calculating one level up. Fill in nextPairFreq
|
||||
// of that level with the sum of the two nodes we've just calculated on
|
||||
// this level.
|
||||
if l.level == maxBits {
|
||||
// All done!
|
||||
break
|
||||
}
|
||||
levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
|
||||
level++
|
||||
} else {
|
||||
// If we stole from below, move down temporarily to replenish it.
|
||||
for levels[level-1].needed > 0 {
|
||||
level--
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Somethings is wrong if at the end, the top level is null or hasn't used
|
||||
// all of the leaves.
|
||||
if leafCounts[maxBits][maxBits] != n {
|
||||
panic("leafCounts[maxBits][maxBits] != n")
|
||||
}
|
||||
|
||||
bitCount := h.bitCount[:maxBits+1]
|
||||
bits := 1
|
||||
counts := &leafCounts[maxBits]
|
||||
for level := maxBits; level > 0; level-- {
|
||||
// chain.leafCount gives the number of literals requiring at least "bits"
|
||||
// bits to encode.
|
||||
bitCount[bits] = counts[level] - counts[level-1]
|
||||
bits++
|
||||
}
|
||||
return bitCount
|
||||
}
|
||||
|
||||
// Look at the leaves and assign them a bit count and an encoding as specified
|
||||
// in RFC 1951 3.2.2
|
||||
func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
|
||||
code := uint16(0)
|
||||
for n, bits := range bitCount {
|
||||
code <<= 1
|
||||
if n == 0 || bits == 0 {
|
||||
continue
|
||||
}
|
||||
// The literals list[len(list)-bits] .. list[len(list)-bits]
|
||||
// are encoded using "bits" bits, and get the values
|
||||
// code, code + 1, .... The code values are
|
||||
// assigned in literal order (not frequency order).
|
||||
chunk := list[len(list)-int(bits):]
|
||||
|
||||
h.lns.sort(chunk)
|
||||
for _, node := range chunk {
|
||||
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
|
||||
code++
|
||||
}
|
||||
list = list[0 : len(list)-int(bits)]
|
||||
}
|
||||
}
|
||||
|
||||
// Update this Huffman Code object to be the minimum code for the specified frequency count.
|
||||
//
|
||||
// freq is an array of frequencies, in which freq[i] gives the frequency of literal i.
|
||||
// maxBits The maximum number of bits to use for any literal.
|
||||
func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
|
||||
if h.freqcache == nil {
|
||||
// Allocate a reusable buffer with the longest possible frequency table.
|
||||
// Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
|
||||
// The largest of these is maxNumLit, so we allocate for that case.
|
||||
h.freqcache = make([]literalNode, maxNumLit+1)
|
||||
}
|
||||
list := h.freqcache[:len(freq)+1]
|
||||
// Number of non-zero literals
|
||||
count := 0
|
||||
// Set list to be the set of all non-zero literals and their frequencies
|
||||
for i, f := range freq {
|
||||
if f != 0 {
|
||||
list[count] = literalNode{uint16(i), f}
|
||||
count++
|
||||
} else {
|
||||
h.codes[i].len = 0
|
||||
}
|
||||
}
|
||||
|
||||
list = list[:count]
|
||||
if count <= 2 {
|
||||
// Handle the small cases here, because they are awkward for the general case code. With
|
||||
// two or fewer literals, everything has bit length 1.
|
||||
for i, node := range list {
|
||||
// "list" is in order of increasing literal value.
|
||||
h.codes[node.literal].set(uint16(i), 1)
|
||||
}
|
||||
return
|
||||
}
|
||||
h.lfs.sort(list)
|
||||
|
||||
// Get the number of literals for each bit count
|
||||
bitCount := h.bitCounts(list, maxBits)
|
||||
// And do the assignment
|
||||
h.assignEncodingAndSize(bitCount, list)
|
||||
}
|
||||
|
||||
type byLiteral []literalNode
|
||||
|
||||
func (s *byLiteral) sort(a []literalNode) {
|
||||
*s = byLiteral(a)
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
func (s byLiteral) Len() int { return len(s) }
|
||||
|
||||
func (s byLiteral) Less(i, j int) bool {
|
||||
return s[i].literal < s[j].literal
|
||||
}
|
||||
|
||||
func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
|
||||
type byFreq []literalNode
|
||||
|
||||
func (s *byFreq) sort(a []literalNode) {
|
||||
*s = byFreq(a)
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
func (s byFreq) Len() int { return len(s) }
|
||||
|
||||
func (s byFreq) Less(i, j int) bool {
|
||||
if s[i].freq == s[j].freq {
|
||||
return s[i].literal < s[j].literal
|
||||
}
|
||||
return s[i].freq < s[j].freq
|
||||
}
|
||||
|
||||
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
|
||||
func reverseBits(number uint16, bitLength byte) uint16 {
|
||||
return bits.Reverse16(number << (16 - bitLength))
|
||||
}
|
860
vendor/github.com/mjl-/flate/inflate.go
generated
vendored
Normal file
860
vendor/github.com/mjl-/flate/inflate.go
generated
vendored
Normal file
|
@ -0,0 +1,860 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package flate implements the DEFLATE compressed data format, described in RFC
|
||||
// 1951. NewReaderPartial returns data flushed in "partial flush" mode without
|
||||
// requiring a full history/sliding window or an explicit empty data block (as
|
||||
// written in "sync flush" mode). The gzip and zlib packages implement access to
|
||||
// DEFLATE-based file formats.
|
||||
package flate
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"math/bits"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
maxCodeLen = 16 // max length of Huffman code
|
||||
// The next three numbers come from the RFC section 3.2.7, with the
|
||||
// additional proviso in section 3.2.5 which implies that distance codes
|
||||
// 30 and 31 should never occur in compressed data.
|
||||
maxNumLit = 286
|
||||
maxNumDist = 30
|
||||
numCodes = 19 // number of codes in Huffman meta-code
|
||||
)
|
||||
|
||||
// Initialize the fixedHuffmanDecoder only once upon first use.
|
||||
var fixedOnce sync.Once
|
||||
var fixedHuffmanDecoder huffmanDecoder
|
||||
|
||||
// A CorruptInputError reports the presence of corrupt input at a given offset.
|
||||
type CorruptInputError int64
|
||||
|
||||
func (e CorruptInputError) Error() string {
|
||||
return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
|
||||
}
|
||||
|
||||
// An InternalError reports an error in the flate code itself.
|
||||
type InternalError string
|
||||
|
||||
func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
|
||||
|
||||
// A ReadError reports an error encountered while reading input.
|
||||
//
|
||||
// Deprecated: No longer returned.
|
||||
type ReadError struct {
|
||||
Offset int64 // byte offset where error occurred
|
||||
Err error // error returned by underlying Read
|
||||
}
|
||||
|
||||
func (e *ReadError) Error() string {
|
||||
return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
// A WriteError reports an error encountered while writing output.
|
||||
//
|
||||
// Deprecated: No longer returned.
|
||||
type WriteError struct {
|
||||
Offset int64 // byte offset where error occurred
|
||||
Err error // error returned by underlying Write
|
||||
}
|
||||
|
||||
func (e *WriteError) Error() string {
|
||||
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
|
||||
// to switch to a new underlying [Reader]. This permits reusing a ReadCloser
|
||||
// instead of allocating a new one.
|
||||
type Resetter interface {
|
||||
// Reset discards any buffered data and resets the Resetter as if it was
|
||||
// newly initialized with the given reader.
|
||||
Reset(r io.Reader, dict []byte) error
|
||||
}
|
||||
|
||||
// The data structure for decoding Huffman tables is based on that of
|
||||
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
|
||||
// For codes smaller than the table width, there are multiple entries
|
||||
// (each combination of trailing bits has the same value). For codes
|
||||
// larger than the table width, the table contains a link to an overflow
|
||||
// table. The width of each entry in the link table is the maximum code
|
||||
// size minus the chunk width.
|
||||
//
|
||||
// Note that you can do a lookup in the table even without all bits
|
||||
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
|
||||
// have the property that shorter codes come before longer ones, the
|
||||
// bit length estimate in the result is a lower bound on the actual
|
||||
// number of bits.
|
||||
//
|
||||
// See the following:
|
||||
// https://github.com/madler/zlib/raw/master/doc/algorithm.txt
|
||||
|
||||
// chunk & 15 is number of bits
|
||||
// chunk >> 4 is value, including table link
|
||||
|
||||
const (
|
||||
huffmanChunkBits = 9
|
||||
huffmanNumChunks = 1 << huffmanChunkBits
|
||||
huffmanCountMask = 15
|
||||
huffmanValueShift = 4
|
||||
)
|
||||
|
||||
type huffmanDecoder struct {
|
||||
min int // the minimum code length
|
||||
chunks [huffmanNumChunks]uint32 // chunks as described above
|
||||
links [][]uint32 // overflow links
|
||||
linkMask uint32 // mask the width of the link table
|
||||
}
|
||||
|
||||
// Initialize Huffman decoding tables from array of code lengths.
|
||||
// Following this function, h is guaranteed to be initialized into a complete
|
||||
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
|
||||
// degenerate case where the tree has only a single symbol with length 1. Empty
|
||||
// trees are permitted.
|
||||
func (h *huffmanDecoder) init(lengths []int) bool {
|
||||
// Sanity enables additional runtime tests during Huffman
|
||||
// table construction. It's intended to be used during
|
||||
// development to supplement the currently ad-hoc unit tests.
|
||||
const sanity = false
|
||||
|
||||
if h.min != 0 {
|
||||
*h = huffmanDecoder{}
|
||||
}
|
||||
|
||||
// Count number of codes of each length,
|
||||
// compute min and max length.
|
||||
var count [maxCodeLen]int
|
||||
var min, max int
|
||||
for _, n := range lengths {
|
||||
if n == 0 {
|
||||
continue
|
||||
}
|
||||
if min == 0 || n < min {
|
||||
min = n
|
||||
}
|
||||
if n > max {
|
||||
max = n
|
||||
}
|
||||
count[n]++
|
||||
}
|
||||
|
||||
// Empty tree. The decompressor.huffSym function will fail later if the tree
|
||||
// is used. Technically, an empty tree is only valid for the HDIST tree and
|
||||
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
|
||||
// is guaranteed to fail since it will attempt to use the tree to decode the
|
||||
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
|
||||
// guaranteed to fail later since the compressed data section must be
|
||||
// composed of at least one symbol (the end-of-block marker).
|
||||
if max == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
code := 0
|
||||
var nextcode [maxCodeLen]int
|
||||
for i := min; i <= max; i++ {
|
||||
code <<= 1
|
||||
nextcode[i] = code
|
||||
code += count[i]
|
||||
}
|
||||
|
||||
// Check that the coding is complete (i.e., that we've
|
||||
// assigned all 2-to-the-max possible bit sequences).
|
||||
// Exception: To be compatible with zlib, we also need to
|
||||
// accept degenerate single-code codings. See also
|
||||
// TestDegenerateHuffmanCoding.
|
||||
if code != 1<<uint(max) && !(code == 1 && max == 1) {
|
||||
return false
|
||||
}
|
||||
|
||||
h.min = min
|
||||
if max > huffmanChunkBits {
|
||||
numLinks := 1 << (uint(max) - huffmanChunkBits)
|
||||
h.linkMask = uint32(numLinks - 1)
|
||||
|
||||
// create link tables
|
||||
link := nextcode[huffmanChunkBits+1] >> 1
|
||||
h.links = make([][]uint32, huffmanNumChunks-link)
|
||||
for j := uint(link); j < huffmanNumChunks; j++ {
|
||||
reverse := int(bits.Reverse16(uint16(j)))
|
||||
reverse >>= uint(16 - huffmanChunkBits)
|
||||
off := j - uint(link)
|
||||
if sanity && h.chunks[reverse] != 0 {
|
||||
panic("impossible: overwriting existing chunk")
|
||||
}
|
||||
h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
|
||||
h.links[off] = make([]uint32, numLinks)
|
||||
}
|
||||
}
|
||||
|
||||
for i, n := range lengths {
|
||||
if n == 0 {
|
||||
continue
|
||||
}
|
||||
code := nextcode[n]
|
||||
nextcode[n]++
|
||||
chunk := uint32(i<<huffmanValueShift | n)
|
||||
reverse := int(bits.Reverse16(uint16(code)))
|
||||
reverse >>= uint(16 - n)
|
||||
if n <= huffmanChunkBits {
|
||||
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
|
||||
// We should never need to overwrite
|
||||
// an existing chunk. Also, 0 is
|
||||
// never a valid chunk, because the
|
||||
// lower 4 "count" bits should be
|
||||
// between 1 and 15.
|
||||
if sanity && h.chunks[off] != 0 {
|
||||
panic("impossible: overwriting existing chunk")
|
||||
}
|
||||
h.chunks[off] = chunk
|
||||
}
|
||||
} else {
|
||||
j := reverse & (huffmanNumChunks - 1)
|
||||
if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
|
||||
// Longer codes should have been
|
||||
// associated with a link table above.
|
||||
panic("impossible: not an indirect chunk")
|
||||
}
|
||||
value := h.chunks[j] >> huffmanValueShift
|
||||
linktab := h.links[value]
|
||||
reverse >>= huffmanChunkBits
|
||||
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
|
||||
if sanity && linktab[off] != 0 {
|
||||
panic("impossible: overwriting existing chunk")
|
||||
}
|
||||
linktab[off] = chunk
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if sanity {
|
||||
// Above we've sanity checked that we never overwrote
|
||||
// an existing entry. Here we additionally check that
|
||||
// we filled the tables completely.
|
||||
for i, chunk := range h.chunks {
|
||||
if chunk == 0 {
|
||||
// As an exception, in the degenerate
|
||||
// single-code case, we allow odd
|
||||
// chunks to be missing.
|
||||
if code == 1 && i%2 == 1 {
|
||||
continue
|
||||
}
|
||||
panic("impossible: missing chunk")
|
||||
}
|
||||
}
|
||||
for _, linktab := range h.links {
|
||||
for _, chunk := range linktab {
|
||||
if chunk == 0 {
|
||||
panic("impossible: missing chunk")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// The actual read interface needed by [NewReader].
|
||||
// If the passed in io.Reader does not also have ReadByte,
|
||||
// the [NewReader] will introduce its own buffering.
|
||||
type Reader interface {
|
||||
io.Reader
|
||||
io.ByteReader
|
||||
}
|
||||
|
||||
// Decompress state.
|
||||
type decompressor struct {
|
||||
// Input source.
|
||||
r Reader
|
||||
rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader
|
||||
roffset int64
|
||||
|
||||
// Input bits, in top of b.
|
||||
b uint32
|
||||
nb uint
|
||||
|
||||
// Huffman decoders for literal/length, distance.
|
||||
h1, h2 huffmanDecoder
|
||||
|
||||
// Length arrays used to define Huffman codes.
|
||||
bits *[maxNumLit + maxNumDist]int
|
||||
codebits *[numCodes]int
|
||||
|
||||
// Output history, buffer.
|
||||
dict dictDecoder
|
||||
|
||||
// Temporary buffer (avoids repeated allocation).
|
||||
buf [4]byte
|
||||
|
||||
// Next step in the decompression,
|
||||
// and decompression state.
|
||||
step func(*decompressor)
|
||||
stepState int
|
||||
final bool
|
||||
readPartial bool // If set, r is a *bufio.Reader.
|
||||
err error
|
||||
toRead []byte
|
||||
hl, hd *huffmanDecoder
|
||||
copyLen int
|
||||
copyDist int
|
||||
}
|
||||
|
||||
func (f *decompressor) nextBlock() {
|
||||
// Ensure we don't block readers of network streams written with "partial flush"
|
||||
// mode and small writes/blocks (shorter than the sliding window size). Such
|
||||
// streams don't explicitly signal that we should pass data to our reader, like the
|
||||
// "sync flush" mode does with its zero-length data block. If we would do a
|
||||
// blocking read for the next block, our reader's protocol may get stuck.
|
||||
if f.readPartial && f.dict.availRead() > 0 {
|
||||
if f.r.(*bufio.Reader).Buffered() == 0 {
|
||||
f.toRead = f.dict.readFlush()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for f.nb < 1+2 {
|
||||
if f.err = f.moreBits(); f.err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
f.final = f.b&1 == 1
|
||||
f.b >>= 1
|
||||
typ := f.b & 3
|
||||
f.b >>= 2
|
||||
f.nb -= 1 + 2
|
||||
switch typ {
|
||||
case 0:
|
||||
f.dataBlock()
|
||||
case 1:
|
||||
// compressed, fixed Huffman tables
|
||||
f.hl = &fixedHuffmanDecoder
|
||||
f.hd = nil
|
||||
f.huffmanBlock()
|
||||
case 2:
|
||||
// compressed, dynamic Huffman tables
|
||||
if f.err = f.readHuffman(); f.err != nil {
|
||||
break
|
||||
}
|
||||
f.hl = &f.h1
|
||||
f.hd = &f.h2
|
||||
f.huffmanBlock()
|
||||
default:
|
||||
// 3 is reserved.
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
}
|
||||
}
|
||||
|
||||
func (f *decompressor) Read(b []byte) (int, error) {
|
||||
for {
|
||||
if len(f.toRead) > 0 {
|
||||
n := copy(b, f.toRead)
|
||||
f.toRead = f.toRead[n:]
|
||||
if len(f.toRead) == 0 {
|
||||
return n, f.err
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
if f.err != nil {
|
||||
return 0, f.err
|
||||
}
|
||||
f.step(f)
|
||||
if f.err != nil && len(f.toRead) == 0 {
|
||||
f.toRead = f.dict.readFlush() // Flush what's left in case of error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (f *decompressor) Close() error {
|
||||
if f.err == io.EOF {
|
||||
return nil
|
||||
}
|
||||
return f.err
|
||||
}
|
||||
|
||||
// RFC 1951 section 3.2.7.
|
||||
// Compression with dynamic Huffman codes
|
||||
|
||||
var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
|
||||
|
||||
func (f *decompressor) readHuffman() error {
|
||||
// HLIT[5], HDIST[5], HCLEN[4].
|
||||
for f.nb < 5+5+4 {
|
||||
if err := f.moreBits(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
nlit := int(f.b&0x1F) + 257
|
||||
if nlit > maxNumLit {
|
||||
return CorruptInputError(f.roffset)
|
||||
}
|
||||
f.b >>= 5
|
||||
ndist := int(f.b&0x1F) + 1
|
||||
if ndist > maxNumDist {
|
||||
return CorruptInputError(f.roffset)
|
||||
}
|
||||
f.b >>= 5
|
||||
nclen := int(f.b&0xF) + 4
|
||||
// numCodes is 19, so nclen is always valid.
|
||||
f.b >>= 4
|
||||
f.nb -= 5 + 5 + 4
|
||||
|
||||
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
|
||||
for i := 0; i < nclen; i++ {
|
||||
for f.nb < 3 {
|
||||
if err := f.moreBits(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
f.codebits[codeOrder[i]] = int(f.b & 0x7)
|
||||
f.b >>= 3
|
||||
f.nb -= 3
|
||||
}
|
||||
for i := nclen; i < len(codeOrder); i++ {
|
||||
f.codebits[codeOrder[i]] = 0
|
||||
}
|
||||
if !f.h1.init(f.codebits[0:]) {
|
||||
return CorruptInputError(f.roffset)
|
||||
}
|
||||
|
||||
// HLIT + 257 code lengths, HDIST + 1 code lengths,
|
||||
// using the code length Huffman code.
|
||||
for i, n := 0, nlit+ndist; i < n; {
|
||||
x, err := f.huffSym(&f.h1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if x < 16 {
|
||||
// Actual length.
|
||||
f.bits[i] = x
|
||||
i++
|
||||
continue
|
||||
}
|
||||
// Repeat previous length or zero.
|
||||
var rep int
|
||||
var nb uint
|
||||
var b int
|
||||
switch x {
|
||||
default:
|
||||
return InternalError("unexpected length code")
|
||||
case 16:
|
||||
rep = 3
|
||||
nb = 2
|
||||
if i == 0 {
|
||||
return CorruptInputError(f.roffset)
|
||||
}
|
||||
b = f.bits[i-1]
|
||||
case 17:
|
||||
rep = 3
|
||||
nb = 3
|
||||
b = 0
|
||||
case 18:
|
||||
rep = 11
|
||||
nb = 7
|
||||
b = 0
|
||||
}
|
||||
for f.nb < nb {
|
||||
if err := f.moreBits(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
rep += int(f.b & uint32(1<<nb-1))
|
||||
f.b >>= nb
|
||||
f.nb -= nb
|
||||
if i+rep > n {
|
||||
return CorruptInputError(f.roffset)
|
||||
}
|
||||
for j := 0; j < rep; j++ {
|
||||
f.bits[i] = b
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
|
||||
return CorruptInputError(f.roffset)
|
||||
}
|
||||
|
||||
// As an optimization, we can initialize the min bits to read at a time
|
||||
// for the HLIT tree to the length of the EOB marker since we know that
|
||||
// every block must terminate with one. This preserves the property that
|
||||
// we never read any extra bytes after the end of the DEFLATE stream.
|
||||
if f.h1.min < f.bits[endBlockMarker] {
|
||||
f.h1.min = f.bits[endBlockMarker]
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Decode a single Huffman block from f.
|
||||
// hl and hd are the Huffman states for the lit/length values
|
||||
// and the distance values, respectively. If hd == nil, using the
|
||||
// fixed distance encoding associated with fixed Huffman blocks.
|
||||
func (f *decompressor) huffmanBlock() {
|
||||
const (
|
||||
stateInit = iota // Zero value must be stateInit
|
||||
stateDict
|
||||
)
|
||||
|
||||
switch f.stepState {
|
||||
case stateInit:
|
||||
goto readLiteral
|
||||
case stateDict:
|
||||
goto copyHistory
|
||||
}
|
||||
|
||||
readLiteral:
|
||||
// Read literal and/or (length, distance) according to RFC section 3.2.3.
|
||||
{
|
||||
v, err := f.huffSym(f.hl)
|
||||
if err != nil {
|
||||
f.err = err
|
||||
return
|
||||
}
|
||||
var n uint // number of bits extra
|
||||
var length int
|
||||
switch {
|
||||
case v < 256:
|
||||
f.dict.writeByte(byte(v))
|
||||
if f.dict.availWrite() == 0 {
|
||||
f.toRead = f.dict.readFlush()
|
||||
f.step = (*decompressor).huffmanBlock
|
||||
f.stepState = stateInit
|
||||
return
|
||||
}
|
||||
goto readLiteral
|
||||
case v == 256:
|
||||
f.finishBlock()
|
||||
return
|
||||
// otherwise, reference to older data
|
||||
case v < 265:
|
||||
length = v - (257 - 3)
|
||||
n = 0
|
||||
case v < 269:
|
||||
length = v*2 - (265*2 - 11)
|
||||
n = 1
|
||||
case v < 273:
|
||||
length = v*4 - (269*4 - 19)
|
||||
n = 2
|
||||
case v < 277:
|
||||
length = v*8 - (273*8 - 35)
|
||||
n = 3
|
||||
case v < 281:
|
||||
length = v*16 - (277*16 - 67)
|
||||
n = 4
|
||||
case v < 285:
|
||||
length = v*32 - (281*32 - 131)
|
||||
n = 5
|
||||
case v < maxNumLit:
|
||||
length = 258
|
||||
n = 0
|
||||
default:
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
return
|
||||
}
|
||||
if n > 0 {
|
||||
for f.nb < n {
|
||||
if err = f.moreBits(); err != nil {
|
||||
f.err = err
|
||||
return
|
||||
}
|
||||
}
|
||||
length += int(f.b & uint32(1<<n-1))
|
||||
f.b >>= n
|
||||
f.nb -= n
|
||||
}
|
||||
|
||||
var dist int
|
||||
if f.hd == nil {
|
||||
for f.nb < 5 {
|
||||
if err = f.moreBits(); err != nil {
|
||||
f.err = err
|
||||
return
|
||||
}
|
||||
}
|
||||
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
|
||||
f.b >>= 5
|
||||
f.nb -= 5
|
||||
} else {
|
||||
if dist, err = f.huffSym(f.hd); err != nil {
|
||||
f.err = err
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case dist < 4:
|
||||
dist++
|
||||
case dist < maxNumDist:
|
||||
nb := uint(dist-2) >> 1
|
||||
// have 1 bit in bottom of dist, need nb more.
|
||||
extra := (dist & 1) << nb
|
||||
for f.nb < nb {
|
||||
if err = f.moreBits(); err != nil {
|
||||
f.err = err
|
||||
return
|
||||
}
|
||||
}
|
||||
extra |= int(f.b & uint32(1<<nb-1))
|
||||
f.b >>= nb
|
||||
f.nb -= nb
|
||||
dist = 1<<(nb+1) + 1 + extra
|
||||
default:
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
return
|
||||
}
|
||||
|
||||
// No check on length; encoding can be prescient.
|
||||
if dist > f.dict.histSize() {
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
return
|
||||
}
|
||||
|
||||
f.copyLen, f.copyDist = length, dist
|
||||
goto copyHistory
|
||||
}
|
||||
|
||||
copyHistory:
|
||||
// Perform a backwards copy according to RFC section 3.2.3.
|
||||
{
|
||||
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
|
||||
if cnt == 0 {
|
||||
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
|
||||
}
|
||||
f.copyLen -= cnt
|
||||
|
||||
if f.dict.availWrite() == 0 || f.copyLen > 0 {
|
||||
f.toRead = f.dict.readFlush()
|
||||
f.step = (*decompressor).huffmanBlock // We need to continue this work
|
||||
f.stepState = stateDict
|
||||
return
|
||||
}
|
||||
goto readLiteral
|
||||
}
|
||||
}
|
||||
|
||||
// Copy a single uncompressed data block from input to output.
|
||||
func (f *decompressor) dataBlock() {
|
||||
// Uncompressed.
|
||||
// Discard current half-byte.
|
||||
f.nb = 0
|
||||
f.b = 0
|
||||
|
||||
// Length then ones-complement of length.
|
||||
nr, err := io.ReadFull(f.r, f.buf[0:4])
|
||||
f.roffset += int64(nr)
|
||||
if err != nil {
|
||||
f.err = noEOF(err)
|
||||
return
|
||||
}
|
||||
n := int(f.buf[0]) | int(f.buf[1])<<8
|
||||
nn := int(f.buf[2]) | int(f.buf[3])<<8
|
||||
if uint16(nn) != uint16(^n) {
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
return
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
f.toRead = f.dict.readFlush()
|
||||
f.finishBlock()
|
||||
return
|
||||
}
|
||||
|
||||
f.copyLen = n
|
||||
f.copyData()
|
||||
}
|
||||
|
||||
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
|
||||
// It pauses for reads when f.hist is full.
|
||||
func (f *decompressor) copyData() {
|
||||
buf := f.dict.writeSlice()
|
||||
if len(buf) > f.copyLen {
|
||||
buf = buf[:f.copyLen]
|
||||
}
|
||||
|
||||
cnt, err := io.ReadFull(f.r, buf)
|
||||
f.roffset += int64(cnt)
|
||||
f.copyLen -= cnt
|
||||
f.dict.writeMark(cnt)
|
||||
if err != nil {
|
||||
f.err = noEOF(err)
|
||||
return
|
||||
}
|
||||
|
||||
if f.dict.availWrite() == 0 || f.copyLen > 0 {
|
||||
f.toRead = f.dict.readFlush()
|
||||
f.step = (*decompressor).copyData
|
||||
return
|
||||
}
|
||||
f.finishBlock()
|
||||
}
|
||||
|
||||
func (f *decompressor) finishBlock() {
|
||||
if f.final {
|
||||
if f.dict.availRead() > 0 {
|
||||
f.toRead = f.dict.readFlush()
|
||||
}
|
||||
f.err = io.EOF
|
||||
}
|
||||
f.step = (*decompressor).nextBlock
|
||||
}
|
||||
|
||||
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
|
||||
func noEOF(e error) error {
|
||||
if e == io.EOF {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
func (f *decompressor) moreBits() error {
|
||||
c, err := f.r.ReadByte()
|
||||
if err != nil {
|
||||
return noEOF(err)
|
||||
}
|
||||
f.roffset++
|
||||
f.b |= uint32(c) << f.nb
|
||||
f.nb += 8
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read the next Huffman-encoded symbol from f according to h.
|
||||
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
|
||||
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
|
||||
// with single element, huffSym must error on these two edge cases. In both
|
||||
// cases, the chunks slice will be 0 for the invalid sequence, leading it
|
||||
// satisfy the n == 0 check below.
|
||||
n := uint(h.min)
|
||||
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
|
||||
// but is smart enough to keep local variables in registers, so use nb and b,
|
||||
// inline call to moreBits and reassign b,nb back to f on return.
|
||||
nb, b := f.nb, f.b
|
||||
for {
|
||||
for nb < n {
|
||||
c, err := f.r.ReadByte()
|
||||
if err != nil {
|
||||
f.b = b
|
||||
f.nb = nb
|
||||
return 0, noEOF(err)
|
||||
}
|
||||
f.roffset++
|
||||
b |= uint32(c) << (nb & 31)
|
||||
nb += 8
|
||||
}
|
||||
chunk := h.chunks[b&(huffmanNumChunks-1)]
|
||||
n = uint(chunk & huffmanCountMask)
|
||||
if n > huffmanChunkBits {
|
||||
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
|
||||
n = uint(chunk & huffmanCountMask)
|
||||
}
|
||||
if n <= nb {
|
||||
if n == 0 {
|
||||
f.b = b
|
||||
f.nb = nb
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
return 0, f.err
|
||||
}
|
||||
f.b = b >> (n & 31)
|
||||
f.nb = nb - n
|
||||
return int(chunk >> huffmanValueShift), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (f *decompressor) makeReader(r io.Reader) {
|
||||
if rr, ok := r.(Reader); ok {
|
||||
f.rBuf = nil
|
||||
if f.readPartial {
|
||||
if _, ok := rr.(*bufio.Reader); !ok {
|
||||
rr = bufio.NewReader(rr)
|
||||
}
|
||||
}
|
||||
f.r = rr
|
||||
return
|
||||
}
|
||||
// Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor.
|
||||
if f.rBuf != nil {
|
||||
f.rBuf.Reset(r)
|
||||
} else {
|
||||
// bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader.
|
||||
f.rBuf = bufio.NewReader(r)
|
||||
}
|
||||
f.r = f.rBuf
|
||||
}
|
||||
|
||||
func fixedHuffmanDecoderInit() {
|
||||
fixedOnce.Do(func() {
|
||||
// These come from the RFC section 3.2.6.
|
||||
var bits [288]int
|
||||
for i := 0; i < 144; i++ {
|
||||
bits[i] = 8
|
||||
}
|
||||
for i := 144; i < 256; i++ {
|
||||
bits[i] = 9
|
||||
}
|
||||
for i := 256; i < 280; i++ {
|
||||
bits[i] = 7
|
||||
}
|
||||
for i := 280; i < 288; i++ {
|
||||
bits[i] = 8
|
||||
}
|
||||
fixedHuffmanDecoder.init(bits[:])
|
||||
})
|
||||
}
|
||||
|
||||
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
|
||||
*f = decompressor{
|
||||
rBuf: f.rBuf,
|
||||
bits: f.bits,
|
||||
codebits: f.codebits,
|
||||
dict: f.dict,
|
||||
step: (*decompressor).nextBlock,
|
||||
readPartial: f.readPartial,
|
||||
}
|
||||
f.makeReader(r)
|
||||
f.dict.init(maxMatchOffset, dict)
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewReader returns a new ReadCloser that can be used
|
||||
// to read the uncompressed version of r.
|
||||
// If r does not also implement [io.ByteReader],
|
||||
// the decompressor may read more data than necessary from r.
|
||||
// The reader returns [io.EOF] after the final block in the DEFLATE stream has
|
||||
// been encountered. Any trailing data after the final block is ignored.
|
||||
//
|
||||
// The [io.ReadCloser] returned by NewReader also implements [Resetter].
|
||||
func NewReader(r io.Reader) io.ReadCloser {
|
||||
return newReader(r, nil, false)
|
||||
}
|
||||
|
||||
// NewReaderDict is like [NewReader] but initializes the reader
|
||||
// with a preset dictionary. The returned [Reader] behaves as if
|
||||
// the uncompressed data stream started with the given dictionary,
|
||||
// which has already been read. NewReaderDict is typically used
|
||||
// to read data compressed by NewWriterDict.
|
||||
//
|
||||
// The ReadCloser returned by NewReaderDict also implements [Resetter].
|
||||
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
|
||||
return newReader(r, dict, false)
|
||||
}
|
||||
|
||||
// NewReaderPartial is like [NewReader], but can return data smaller than the
|
||||
// history sliding window size when it was written in "partial flush" mode.
|
||||
func NewReaderPartial(r io.Reader) io.ReadCloser {
|
||||
return newReader(r, nil, true)
|
||||
}
|
||||
|
||||
func newReader(r io.Reader, dict []byte, readPartial bool) *decompressor {
|
||||
fixedHuffmanDecoderInit()
|
||||
|
||||
var f decompressor
|
||||
f.readPartial = readPartial
|
||||
f.makeReader(r)
|
||||
f.bits = new([maxNumLit + maxNumDist]int)
|
||||
f.codebits = new([numCodes]int)
|
||||
f.step = (*decompressor).nextBlock
|
||||
f.dict.init(maxMatchOffset, dict)
|
||||
return &f
|
||||
}
|
97
vendor/github.com/mjl-/flate/token.go
generated
vendored
Normal file
97
vendor/github.com/mjl-/flate/token.go
generated
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package flate
|
||||
|
||||
const (
|
||||
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
|
||||
// 8 bits: xlength = length - MIN_MATCH_LENGTH
|
||||
// 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
|
||||
lengthShift = 22
|
||||
offsetMask = 1<<lengthShift - 1
|
||||
typeMask = 3 << 30
|
||||
literalType = 0 << 30
|
||||
matchType = 1 << 30
|
||||
)
|
||||
|
||||
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
|
||||
// is lengthCodes[length - MIN_MATCH_LENGTH]
|
||||
var lengthCodes = [...]uint32{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
|
||||
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
|
||||
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
|
||||
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
|
||||
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
|
||||
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
|
||||
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 28,
|
||||
}
|
||||
|
||||
var offsetCodes = [...]uint32{
|
||||
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
}
|
||||
|
||||
type token uint32
|
||||
|
||||
// Convert a literal into a literal token.
|
||||
func literalToken(literal uint32) token { return token(literalType + literal) }
|
||||
|
||||
// Convert a < xlength, xoffset > pair into a match token.
|
||||
func matchToken(xlength uint32, xoffset uint32) token {
|
||||
return token(matchType + xlength<<lengthShift + xoffset)
|
||||
}
|
||||
|
||||
// Returns the literal of a literal token.
|
||||
func (t token) literal() uint32 { return uint32(t - literalType) }
|
||||
|
||||
// Returns the extra offset of a match token.
|
||||
func (t token) offset() uint32 { return uint32(t) & offsetMask }
|
||||
|
||||
func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) }
|
||||
|
||||
func lengthCode(len uint32) uint32 { return lengthCodes[len] }
|
||||
|
||||
// Returns the offset code corresponding to a specific offset.
|
||||
func offsetCode(off uint32) uint32 {
|
||||
if off < uint32(len(offsetCodes)) {
|
||||
return offsetCodes[off]
|
||||
}
|
||||
if off>>7 < uint32(len(offsetCodes)) {
|
||||
return offsetCodes[off>>7] + 14
|
||||
}
|
||||
return offsetCodes[off>>14] + 28
|
||||
}
|
3
vendor/modules.txt
vendored
3
vendor/modules.txt
vendored
|
@ -19,6 +19,9 @@ github.com/mjl-/autocert
|
|||
# github.com/mjl-/bstore v0.0.6
|
||||
## explicit; go 1.19
|
||||
github.com/mjl-/bstore
|
||||
# github.com/mjl-/flate v0.0.0-20250221133712-6372d09eb978
|
||||
## explicit; go 1.21
|
||||
github.com/mjl-/flate
|
||||
# github.com/mjl-/sconf v0.0.7
|
||||
## explicit; go 1.12
|
||||
github.com/mjl-/sconf
|
||||
|
|
Loading…
Reference in a new issue