1
1
Fork 0
mirror of https://github.com/mjl-/mox.git synced 2025-04-21 21:40:01 +03:00

implement IMAP extension COMPRESS=DEFLATE, rfc 4978

to compress the entire IMAP connection. tested with thunderbird, meli, k9, ios
mail. the initial implementation had interoperability issues with some of these
clients: if they write the deflate stream and flush in "partial mode", the go
stdlib flate reader does not return any data (until there is an explicit
zero-length "sync flush" block, or until the history/sliding window is full),
blocking progress, resulting in clients closing the seemingly stuck connection
after considering the connection timed out. this includes a coy of the flate
package with a new reader that returns partially flushed blocks earlier.

this also adds imap trace logging to imapclient.Conn, which was useful for
debugging.
This commit is contained in:
Mechiel Lukkien 2025-02-20 19:33:09 +01:00
parent 3f6c45a41f
commit f40f94670e
No known key found for this signature in database
25 changed files with 3623 additions and 69 deletions

View file

@ -522,7 +522,7 @@ func TestCtl(t *testing.T) {
testctl(func(ctl *ctl) {
a, b := net.Pipe()
go func() {
client, err := imapclient.New(a, true)
client, err := imapclient.New(mox.Cid(), a, true)
tcheck(t, err, "new imapclient")
client.Select("inbox")
client.Logout()

3
go.mod
View file

@ -6,6 +6,7 @@ require (
github.com/mjl-/adns v0.0.0-20240509092456-2dc8715bf4af
github.com/mjl-/autocert v0.0.0-20231214125928-31b7400acb05
github.com/mjl-/bstore v0.0.6
github.com/mjl-/flate v0.0.0-20250221133712-6372d09eb978
github.com/mjl-/sconf v0.0.7
github.com/mjl-/sherpa v0.6.7
github.com/mjl-/sherpadoc v0.0.16
@ -17,6 +18,7 @@ require (
golang.org/x/crypto v0.32.0
golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f
golang.org/x/net v0.34.0
golang.org/x/sys v0.29.0
golang.org/x/text v0.21.0
rsc.io/qr v0.2.0
)
@ -31,7 +33,6 @@ require (
github.com/prometheus/procfs v0.12.0 // indirect
golang.org/x/mod v0.22.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/tools v0.29.0 // indirect
google.golang.org/protobuf v1.31.0 // indirect
)

2
go.sum
View file

@ -30,6 +30,8 @@ github.com/mjl-/autocert v0.0.0-20231214125928-31b7400acb05 h1:s6ay4bh4tmpPLdxjy
github.com/mjl-/autocert v0.0.0-20231214125928-31b7400acb05/go.mod h1:taMFU86abMxKLPV4Bynhv8enbYmS67b8LG80qZv2Qus=
github.com/mjl-/bstore v0.0.6 h1:ntlu9MkfCkpm2XfBY4+Ws4KK9YzXzewr3+lCueFB+9c=
github.com/mjl-/bstore v0.0.6/go.mod h1:/cD25FNBaDfvL/plFRxI3Ba3E+wcB0XVOS8nJDqndg0=
github.com/mjl-/flate v0.0.0-20250221133712-6372d09eb978 h1:Eg5DfI3/00URzGErujKus6a3O0kyXzF8vjoDZzH/gig=
github.com/mjl-/flate v0.0.0-20250221133712-6372d09eb978/go.mod h1:QBkFtjai3AiQQuUu7pVh6PA06Vd3oa68E+vddf/UBOs=
github.com/mjl-/sconf v0.0.7 h1:bdBcSFZCDFMm/UdBsgNCsjkYmKrSgYwp7rAOoufwHe4=
github.com/mjl-/sconf v0.0.7/go.mod h1:uF8OdWtLT8La3i4ln176i1pB0ps9pXGCaABEU55ZkE0=
github.com/mjl-/sherpa v0.6.7 h1:C5F8XQdV5nCuS4fvB+ye/ziUQrajEhOoj/t2w5T14BY=

View file

@ -21,12 +21,26 @@ import (
"net"
"reflect"
"strings"
"github.com/mjl-/flate"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/moxio"
)
// Conn is an IMAP connection to a server.
type Conn struct {
conn net.Conn
r *bufio.Reader
// Connection, may be original TCP or TLS connection. Reads go through c.br, and
// writes through c.bw. It wraps a tracing reading/writer and may wrap flate
// compression.
conn net.Conn
br *bufio.Reader
bw *bufio.Writer
compress bool // If compression is enabled, we must flush flateWriter and its target original bufio writer.
flateWriter *flate.Writer
flateBW *bufio.Writer
log mlog.Log
panic bool
tagGen int
record bool // If true, bytes read are added to recordBuf. recorded() resets.
@ -57,14 +71,18 @@ func (e Error) Unwrap() error {
// The initial untagged greeting response is read and must be "OK" or
// "PREAUTH". If preauth, the connection is already in authenticated state,
// typically through TLS client certificate. This is indicated in Conn.Preauth.
func New(conn net.Conn, xpanic bool) (client *Conn, rerr error) {
func New(cid int64, conn net.Conn, xpanic bool) (client *Conn, rerr error) {
log := mlog.New("imapclient", nil).WithCid(cid)
c := Conn{
conn: conn,
r: bufio.NewReader(conn),
br: bufio.NewReader(moxio.NewTraceReader(log, "CR: ", conn)),
log: log,
panic: xpanic,
CapAvailable: map[Capability]struct{}{},
CapEnabled: map[Capability]struct{}{},
}
// Writes are buffered and write to Conn, which may panic.
c.bw = bufio.NewWriter(moxio.NewTraceWriter(log, "CW: ", &c))
defer c.recover(&rerr)
tag := c.xnonspace()
@ -122,6 +140,53 @@ func (c *Conn) xcheck(err error) {
}
}
// Write writes directly to the connection. Write errors do take the connection's
// panic mode into account, i.e. Write can panic.
func (c *Conn) Write(buf []byte) (n int, rerr error) {
defer c.recover(&rerr)
n, rerr = c.conn.Write(buf)
c.xcheckf(rerr, "write")
return n, nil
}
func (c *Conn) xflush() {
err := c.bw.Flush()
c.xcheckf(err, "flush")
// If compression is active, we need to flush the deflate stream.
if c.compress {
err := c.flateWriter.Flush()
c.xcheckf(err, "flush deflate")
err = c.flateBW.Flush()
c.xcheckf(err, "flush deflate buffer")
}
}
// Close closes the connection, flushing and closing any compression and TLS layer.
//
// You may want to call Logout first. Closing a connection with a mailbox with
// deleted messages not yet expunged will not expunge those messages.
func (c *Conn) Close() (rerr error) {
defer c.recover(&rerr)
if c.conn == nil {
return nil
}
if c.flateWriter != nil {
err := c.flateWriter.Close()
c.xcheckf(err, "close deflate writer")
err = c.flateBW.Flush()
c.xcheckf(err, "flush deflate buffer")
c.flateWriter = nil
c.flateBW = nil
}
err := c.conn.Close()
c.xcheckf(err, "close connection")
c.conn = nil
return
}
// TLSConnectionState returns the TLS connection state if the connection uses TLS.
func (c *Conn) TLSConnectionState() *tls.ConnectionState {
if conn, ok := c.conn.(*tls.Conn); ok {
@ -141,8 +206,9 @@ func (c *Conn) Commandf(tag string, format string, args ...any) (rerr error) {
}
c.LastTag = tag
_, err := fmt.Fprintf(c.conn, "%s %s\r\n", tag, fmt.Sprintf(format, args...))
_, err := fmt.Fprintf(c.bw, "%s %s\r\n", tag, fmt.Sprintf(format, args...))
c.xcheckf(err, "write command")
c.xflush()
return
}
@ -196,7 +262,7 @@ func (c *Conn) ReadUntagged() (untagged Untagged, rerr error) {
func (c *Conn) Readline() (line string, rerr error) {
defer c.recover(&rerr)
line, err := c.r.ReadString('\n')
line, err := c.br.ReadString('\n')
c.xcheckf(err, "read line")
return line, nil
}
@ -225,37 +291,30 @@ func (c *Conn) Writelinef(format string, args ...any) (rerr error) {
defer c.recover(&rerr)
s := fmt.Sprintf(format, args...)
_, err := fmt.Fprintf(c.conn, "%s\r\n", s)
_, err := fmt.Fprintf(c.bw, "%s\r\n", s)
c.xcheckf(err, "writeline")
c.xflush()
return nil
}
// Write writes directly to the connection. Write errors do take the connections
// panic mode into account, i.e. Write can panic.
func (c *Conn) Write(buf []byte) (n int, rerr error) {
defer c.recover(&rerr)
n, rerr = c.conn.Write(buf)
c.xcheckf(rerr, "write")
return n, nil
}
// WriteSyncLiteral first writes the synchronous literal size, then read the
// WriteSyncLiteral first writes the synchronous literal size, then reads the
// continuation "+" and finally writes the data.
func (c *Conn) WriteSyncLiteral(s string) (untagged []Untagged, rerr error) {
defer c.recover(&rerr)
_, err := fmt.Fprintf(c.conn, "{%d}\r\n", len(s))
_, err := fmt.Fprintf(c.bw, "{%d}\r\n", len(s))
c.xcheckf(err, "write sync literal size")
c.xflush()
plus, err := c.r.Peek(1)
plus, err := c.br.Peek(1)
c.xcheckf(err, "read continuation")
if plus[0] == '+' {
_, err = c.Readline()
c.xcheckf(err, "read continuation line")
_, err = c.conn.Write([]byte(s))
_, err = c.bw.Write([]byte(s))
c.xcheckf(err, "write literal data")
c.xflush()
return nil, nil
}
untagged, result, err := c.Response()
@ -301,15 +360,3 @@ func (c *Conn) xgetUntagged(l []Untagged, dst any) {
}
dstv.Elem().Set(gotv)
}
// Close closes the connection without writing anything to the server.
// You may want to call Logout. Closing a connection with a mailbox with deleted
// message not yet expunged will not expunge those messages.
func (c *Conn) Close() error {
var err error
if c.conn != nil {
err = c.conn.Close()
c.conn = nil
}
return err
}

View file

@ -9,6 +9,10 @@ import (
"strings"
"time"
"github.com/mjl-/flate"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/moxio"
"github.com/mjl-/mox/scram"
)
@ -39,11 +43,14 @@ func (c *Conn) Starttls(config *tls.Config) (untagged []Untagged, result Result,
defer c.recover(&rerr)
untagged, result, rerr = c.Transactf("starttls")
c.xcheckf(rerr, "starttls command")
conn := tls.Client(c.conn, config)
err := conn.Handshake()
conn := c.xprefixConn()
tlsConn := tls.Client(conn, config)
err := tlsConn.Handshake()
c.xcheckf(err, "tls handshake")
c.conn = conn
c.r = bufio.NewReader(conn)
c.conn = tlsConn
c.br = bufio.NewReader(moxio.NewTraceReader(c.log, "CR: ", tlsConn))
c.bw = bufio.NewWriter(moxio.NewTraceWriter(c.log, "CW: ", c))
return untagged, result, nil
}
@ -116,6 +123,38 @@ func (c *Conn) AuthenticateSCRAM(method string, h func() hash.Hash, username, pa
return c.ResponseOK()
}
// CompressDeflate enables compression with deflate on the connection.
//
// Only possible when server has announced the COMPRESS=DEFLATE capability.
//
// State: Authenticated or selected.
func (c *Conn) CompressDeflate() (untagged []Untagged, result Result, rerr error) {
defer c.recover(&rerr)
if _, ok := c.CapAvailable[CapCompressDeflate]; !ok {
c.xerrorf("server does not implement capability %s", CapCompressDeflate)
}
untagged, result, rerr = c.Transactf("compress deflate")
c.xcheck(rerr)
c.flateBW = bufio.NewWriter(c)
fw, err := flate.NewWriter(c.flateBW, flate.DefaultCompression)
c.xcheckf(err, "deflate") // Cannot happen.
c.compress = true
c.flateWriter = fw
tw := moxio.NewTraceWriter(mlog.New("imapclient", nil), "CW: ", fw)
c.bw = bufio.NewWriter(tw)
rc := c.xprefixConn()
fr := flate.NewReaderPartial(rc)
tr := moxio.NewTraceReader(mlog.New("imapclient", nil), "CR: ", fr)
c.br = bufio.NewReader(tr)
return
}
// Enable enables capabilities for use with the connection, verifying the server has indeed enabled them.
func (c *Conn) Enable(capabilities ...string) (untagged []Untagged, result Result, rerr error) {
defer c.recover(&rerr)

View file

@ -23,7 +23,7 @@ func (c *Conn) recordAdd(buf []byte) {
func (c *Conn) xtake(s string) {
buf := make([]byte, len(s))
_, err := io.ReadFull(c.r, buf)
_, err := io.ReadFull(c.br, buf)
c.xcheckf(err, "taking %q", s)
if !strings.EqualFold(string(buf), s) {
c.xerrorf("got %q, expected %q", buf, s)
@ -32,7 +32,7 @@ func (c *Conn) xtake(s string) {
}
func (c *Conn) readbyte() (byte, error) {
b, err := c.r.ReadByte()
b, err := c.br.ReadByte()
if err == nil {
c.recordAdd([]byte{b})
}
@ -43,12 +43,12 @@ func (c *Conn) unreadbyte() {
if c.record {
c.recordBuf = c.recordBuf[:len(c.recordBuf)-1]
}
err := c.r.UnreadByte()
err := c.br.UnreadByte()
c.xcheckf(err, "unread byte")
}
func (c *Conn) readrune() (rune, error) {
x, _, err := c.r.ReadRune()
x, _, err := c.br.ReadRune()
if err == nil {
c.recordAdd([]byte(string(x)))
}
@ -126,7 +126,8 @@ func (c *Conn) xrespText() RespText {
var knownCodes = stringMap(
// Without parameters.
"ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE", "UIDNOTSTICKY", "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED", "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE", "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT", "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "NOTSAVED", "HASCHILDREN", "CLOSED", "UNKNOWN-CTE",
"OVERQUOTA", // ../rfc/9208:472
"OVERQUOTA", // ../rfc/9208:472
"COMPRESSIONACTIVE", // ../rfc/4978:143
// With parameters.
"BADCHARSET", "CAPABILITY", "PERMANENTFLAGS", "UIDNEXT", "UIDVALIDITY", "UNSEEN", "APPENDUID", "COPYUID",
"HIGHESTMODSEQ", "MODIFIED",
@ -810,7 +811,7 @@ func (c *Conn) xatom() string {
b, err := c.readbyte()
c.xcheckf(err, "read byte for atom")
if b <= ' ' || strings.IndexByte("(){%*\"\\]", b) >= 0 {
c.r.UnreadByte()
c.br.UnreadByte()
if s == "" {
c.xerrorf("expected atom")
}
@ -850,11 +851,12 @@ func (c *Conn) xliteral() []byte {
c.xerrorf("refusing to read more than 1MB: %d", size)
}
if sync {
_, err := fmt.Fprintf(c.conn, "+ ok\r\n")
_, err := fmt.Fprintf(c.bw, "+ ok\r\n")
c.xcheckf(err, "write continuation")
c.xflush()
}
buf := make([]byte, int(size))
_, err := io.ReadFull(c.r, buf)
_, err := io.ReadFull(c.br, buf)
c.xcheckf(err, "reading data for literal")
return buf
}

44
imapclient/prefixconn.go Normal file
View file

@ -0,0 +1,44 @@
package imapclient
import (
"io"
"net"
)
// prefixConn is a net.Conn with a buffer from which the first reads are satisfied.
// used for STARTTLS where already did a buffered read of initial TLS data.
type prefixConn struct {
prefix []byte
net.Conn
}
func (c *prefixConn) Read(buf []byte) (int, error) {
if len(c.prefix) > 0 {
n := len(buf)
if n > len(c.prefix) {
n = len(c.prefix)
}
copy(buf[:n], c.prefix[:n])
c.prefix = c.prefix[n:]
if len(c.prefix) == 0 {
c.prefix = nil
}
return n, nil
}
return c.Conn.Read(buf)
}
// xprefixConn checks if there are any buffered unconsumed reads. If not, it
// returns c.conn. Otherwise, it returns a *prefixConn from which the buffered data
// can be read followed by data from c.conn.
func (c *Conn) xprefixConn() net.Conn {
n := c.br.Buffered()
if n == 0 {
return c.conn
}
buf := make([]byte, n)
_, err := io.ReadFull(c.br, buf)
c.xcheckf(err, "get buffered data")
return &prefixConn{buf, c.conn}
}

View file

@ -36,6 +36,7 @@ const (
CapMetadataServer Capability = "METADATA-SERVER" // ../rfc/5464:124
CapSaveDate Capability = "SAVEDATE" // ../rfc/8514
CapCreateSpecialUse Capability = "CREATE-SPECIAL-USE" // ../rfc/6154:296
CapCompressDeflate Capability = "COMPRESS=DEFLATE" // ../rfc/4978:65
)
// Status is the tagged final result of a command.
@ -381,7 +382,7 @@ func (ns NumSet) String() string {
}
func ParseNumSet(s string) (ns NumSet, rerr error) {
c := Conn{r: bufio.NewReader(strings.NewReader(s))}
c := Conn{br: bufio.NewReader(strings.NewReader(s))}
defer c.recover(&rerr)
ns = c.xsequenceSet()
return

View file

@ -0,0 +1,39 @@
package imapserver
import (
"crypto/tls"
"testing"
)
func TestCompress(t *testing.T) {
tc := start(t)
defer tc.close()
tc.client.Login("mjl@mox.example", password0)
tc.transactf("bad", "compress")
tc.transactf("bad", "compress bogus ")
tc.transactf("no", "compress bogus")
tc.client.CompressDeflate()
tc.transactf("no", "compress deflate") // Cannot have multiple.
tc.xcode("COMPRESSIONACTIVE")
tc.client.Select("inbox")
tc.transactf("ok", "append inbox (\\seen) {%d+}\r\n%s", len(exampleMsg), exampleMsg)
tc.transactf("ok", "noop")
tc.transactf("ok", "fetch 1 body.peek[1]")
}
func TestCompressStartTLS(t *testing.T) {
tc := start(t)
defer tc.close()
tc.client.Starttls(&tls.Config{InsecureSkipVerify: true})
tc.client.Login("mjl@mox.example", password0)
tc.client.CompressDeflate()
tc.client.Select("inbox")
tc.transactf("ok", "append inbox (\\seen) {%d+}\r\n%s", len(exampleMsg), exampleMsg)
tc.transactf("ok", "noop")
tc.transactf("ok", "fetch 1 body.peek[1]")
}

View file

@ -120,7 +120,7 @@ func FuzzServer(f *testing.F) {
err := clientConn.SetDeadline(time.Now().Add(time.Second))
flog(err, "set client deadline")
client, _ := imapclient.New(clientConn, true)
client, _ := imapclient.New(mox.Cid(), clientConn, true)
for _, cmd := range cmds {
client.Commandf("", "%s", cmd)

View file

@ -1,6 +1,8 @@
package imapserver
import (
"bufio"
"io"
"net"
)
@ -26,3 +28,18 @@ func (c *prefixConn) Read(buf []byte) (int, error) {
}
return c.Conn.Read(buf)
}
// xprefixConn returns either the original net.Conn passed as parameter, or returns
// a *prefixConn returning the buffered data available in br followed data from the
// net.Conn passed in.
func xprefixConn(c net.Conn, br *bufio.Reader) net.Conn {
n := br.Buffered()
if n == 0 {
return c
}
buf := make([]byte, n)
_, err := io.ReadFull(c, buf)
xcheckf(err, "get buffered data")
return &prefixConn{buf, c}
}

View file

@ -66,6 +66,7 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/mjl-/bstore"
"github.com/mjl-/flate"
"github.com/mjl-/mox/config"
"github.com/mjl-/mox/message"
@ -162,12 +163,13 @@ var authFailDelay = time.Second // After authentication failure.
// SAVEDATE: ../rfc/8514
// WITHIN: ../rfc/5032
// NAMESPACE: ../rfc/2342
// COMPRESS=DEFLATE: ../rfc/4978
//
// We always announce support for SCRAM PLUS-variants, also on connections without
// TLS. The client should not be selecting PLUS variants on non-TLS connections,
// instead opting to do the bare SCRAM variant without indicating the server claims
// to support the PLUS variant (skipping the server downgrade detection check).
const serverCapabilities = "IMAP4rev2 IMAP4rev1 ENABLE LITERAL+ IDLE SASL-IR BINARY UNSELECT UIDPLUS ESEARCH SEARCHRES MOVE UTF8=ACCEPT LIST-EXTENDED SPECIAL-USE CREATE-SPECIAL-USE LIST-STATUS AUTH=SCRAM-SHA-256-PLUS AUTH=SCRAM-SHA-256 AUTH=SCRAM-SHA-1-PLUS AUTH=SCRAM-SHA-1 AUTH=CRAM-MD5 ID APPENDLIMIT=9223372036854775807 CONDSTORE QRESYNC STATUS=SIZE QUOTA QUOTA=RES-STORAGE METADATA SAVEDATE WITHIN NAMESPACE"
const serverCapabilities = "IMAP4rev2 IMAP4rev1 ENABLE LITERAL+ IDLE SASL-IR BINARY UNSELECT UIDPLUS ESEARCH SEARCHRES MOVE UTF8=ACCEPT LIST-EXTENDED SPECIAL-USE CREATE-SPECIAL-USE LIST-STATUS AUTH=SCRAM-SHA-256-PLUS AUTH=SCRAM-SHA-256 AUTH=SCRAM-SHA-1-PLUS AUTH=SCRAM-SHA-1 AUTH=CRAM-MD5 ID APPENDLIMIT=9223372036854775807 CONDSTORE QRESYNC STATUS=SIZE QUOTA QUOTA=RES-STORAGE METADATA SAVEDATE WITHIN NAMESPACE COMPRESS=DEFLATE"
type conn struct {
cid int64
@ -175,10 +177,10 @@ type conn struct {
conn net.Conn
tls bool // Whether TLS has been initialized.
viaHTTPS bool // Whether this connection came in via HTTPS (using TLS ALPN).
br *bufio.Reader // From remote, with TLS unwrapped in case of TLS.
br *bufio.Reader // From remote, with TLS unwrapped in case of TLS, and possibly wrapping inflate.
line chan lineErr // If set, instead of reading from br, a line is read from this channel. For reading a line in IDLE while also waiting for mailbox/account updates.
lastLine string // For detecting if syntax error is fatal, i.e. if this ends with a literal. Without crlf.
bw *bufio.Writer // To remote, with TLS added in case of TLS.
bw *bufio.Writer // To remote, with TLS added in case of TLS, and possibly wrapping deflate, see conn.flateWriter.
tr *moxio.TraceReader // Kept to change trace level when reading/writing cmd/auth/data.
tw *moxio.TraceWriter
slow bool // If set, reads are done with a 1 second sleep, and writes are done 1 byte at a time, to keep spammers busy.
@ -192,6 +194,9 @@ type conn struct {
ncmds int // Number of commands processed. Used to abort connection when first incoming command is unknown/invalid.
log mlog.Log // Used for all synchronous logging on this connection, see logbg for logging in a separate goroutine.
enabled map[capability]bool // All upper-case.
compress bool // Whether compression is enabled, via compress command.
flateWriter *flate.Writer // For flushing output after flushing conn.bw, and for closing.
flateBW *bufio.Writer // Wraps raw connection writes, flateWriter writes here, also needs flushing.
// Set by SEARCH with SAVE. Can be used by commands accepting a sequence-set with
// value "$". When used, UIDs must be verified to still exist, because they may
@ -258,7 +263,7 @@ func stateCommands(cmds ...string) map[string]struct{} {
var (
commandsStateAny = stateCommands("capability", "noop", "logout", "id")
commandsStateNotAuthenticated = stateCommands("starttls", "authenticate", "login")
commandsStateAuthenticated = stateCommands("enable", "select", "examine", "create", "delete", "rename", "subscribe", "unsubscribe", "list", "namespace", "status", "append", "idle", "lsub", "getquotaroot", "getquota", "getmetadata", "setmetadata")
commandsStateAuthenticated = stateCommands("enable", "select", "examine", "create", "delete", "rename", "subscribe", "unsubscribe", "list", "namespace", "status", "append", "idle", "lsub", "getquotaroot", "getquota", "getmetadata", "setmetadata", "compress")
commandsStateSelected = stateCommands("close", "unselect", "expunge", "search", "fetch", "store", "copy", "move", "uid expunge", "uid search", "uid fetch", "uid store", "uid copy", "uid move")
)
@ -293,6 +298,7 @@ var commands = map[string]func(c *conn, tag, cmd string, p *parser){
"getquota": (*conn).cmdGetquota,
"getmetadata": (*conn).cmdGetmetadata,
"setmetadata": (*conn).cmdSetmetadata,
"compress": (*conn).cmdCompress,
// Selected.
"check": (*conn).cmdCheck,
@ -623,6 +629,19 @@ func (c *conn) bwritelinef(format string, args ...any) {
func (c *conn) xflush() {
err := c.bw.Flush()
xcheckf(err, "flush") // Should never happen, the Write caused by the Flush should panic on i/o error.
// If compression is enabled, we need to flush its stream.
if c.compress {
// Note: Flush writes a sync message if there is nothing to flush. Ideally we
// wouldn't send that, but we would have to keep track of whether data needs to be
// flushed.
err := c.flateWriter.Flush()
xcheckf(err, "flush deflate")
// The flate writer writes to a bufio.Writer, we must also flush that.
err = c.flateBW.Flush()
xcheckf(err, "flush deflate writer")
}
}
func (c *conn) readCommand(tag *string) (cmd string, p *parser) {
@ -689,7 +708,7 @@ func serve(listenerName string, cid int64, tlsConfig *tls.Config, nc net.Conn, x
if a, ok := nc.RemoteAddr().(*net.TCPAddr); ok {
remoteIP = a.IP
} else {
// For net.Pipe, during tests and for imapserve.
// For tests and for imapserve.
remoteIP = net.ParseIP("127.0.0.10")
}
@ -751,7 +770,10 @@ func serve(listenerName string, cid int64, tlsConfig *tls.Config, nc net.Conn, x
slog.String("listener", listenerName))
defer func() {
c.conn.Close()
err := c.conn.Close()
if err != nil {
c.log.Debugx("closing connection", err)
}
if c.account != nil {
c.comm.Unregister()
@ -842,7 +864,7 @@ func serve(listenerName string, cid int64, tlsConfig *tls.Config, nc net.Conn, x
var storeLoginAttempt bool
for {
c.command()
c.xflush() // For flushing errors, or possibly commands that did not flush explicitly.
c.xflush() // For flushing errors, or commands that did not flush explicitly.
// After an authentication command, we will have a c.loginAttempt. We typically get
// an "ID" command with the user-agent immediately after. So we wait for one more
@ -1117,6 +1139,16 @@ func (c *conn) command() {
c.log.Debug("imap command done", logFields...)
result = "ok"
if x == cleanClose {
// If compression was enabled, we flush & close the deflate stream.
if c.compress {
// Note: Close and flush can Write and may panic with an i/o error.
if err := c.flateWriter.Close(); err != nil {
c.log.Debugx("close deflate writer", err)
} else if err := c.flateBW.Flush(); err != nil {
c.log.Debugx("flush deflate buffer", err)
}
}
panic(x)
}
return
@ -1803,6 +1835,56 @@ func (c *conn) cmdID(tag, cmd string, p *parser) {
c.ok(tag, cmd)
}
// Compress enables compression on the connection. Deflate is the only algorithm
// specified. TLS doesn't do compression nowadays, so we don't have to check for that.
//
// Status: Authenticated. The RFC doesn't mention this in prose, but the command is
// added to ABNF production rule "command-auth".
func (c *conn) cmdCompress(tag, cmd string, p *parser) {
// Command: ../rfc/4978:122
// Request syntax: ../rfc/4978:310
p.xspace()
alg := p.xatom()
p.xempty()
// Will do compression only once.
if c.compress {
// ../rfc/4978:143
xusercodeErrorf("COMPRESSIONACTIVE", "compression already active with previous compress command")
}
// ../rfc/4978:134
if !strings.EqualFold(alg, "deflate") {
xuserErrorf("compression algorithm not supported")
}
// We must flush now, before we initialize flate.
c.log.Debug("compression enabled")
c.ok(tag, cmd)
c.flateBW = bufio.NewWriter(c)
fw, err := flate.NewWriter(c.flateBW, flate.DefaultCompression)
xcheckf(err, "deflate") // Cannot happen.
c.compress = true
c.flateWriter = fw
c.tw = moxio.NewTraceWriter(c.log, "S: ", c.flateWriter)
c.bw = bufio.NewWriter(c.tw) // The previous c.bw will not have buffered data.
rc := xprefixConn(c.conn, c.br) // c.br may contain buffered data.
// We use the special partial reader. Some clients write commands and flush the
// buffer in "partial flush" mode instead of "sync flush" mode. The "sync flush"
// mode emits an explicit zero-length data block that triggers the Go stdlib flate
// reader to return data to us. It wouldn't for blocks written in "partial flush"
// mode, and it would block us indefinitely while trying to read another flate
// block. The partial reader returns data earlier, but still eagerly consumes all
// blocks in its buffer.
// todo: also _write_ in partial mode since it uses fewer bytes than a sync flush (which needs an additional 4 bytes for the zero-length data block). we need a writer that can flush in partial mode first. writing with sync flush will work with clients that themselves write with partial flush.
fr := flate.NewReaderPartial(rc)
c.tr = moxio.NewTraceReader(c.log, "C: ", fr)
c.br = bufio.NewReader(c.tr)
}
// STARTTLS enables TLS on the connection, after a plain text start.
// Only allowed if TLS isn't already enabled, either through connecting to a
// TLS-enabled TCP port, or a previous STARTTLS command.
@ -1822,13 +1904,7 @@ func (c *conn) cmdStarttls(tag, cmd string, p *parser) {
xsyntaxErrorf("starttls not announced")
}
conn := c.conn
if n := c.br.Buffered(); n > 0 {
buf := make([]byte, n)
_, err := io.ReadFull(c.br, buf)
xcheckf(err, "reading buffered data for tls handshake")
conn = &prefixConn{buf, conn}
}
conn := xprefixConn(c.conn, c.br)
// We add the cid to facilitate debugging in case of TLS connection failure.
c.ok(tag, cmd+" ("+mox.ReceivedID(c.cid)+")")

View file

@ -16,6 +16,8 @@ import (
"testing"
"time"
"golang.org/x/sys/unix"
"github.com/mjl-/mox/imapclient"
"github.com/mjl-/mox/mlog"
"github.com/mjl-/mox/mox-"
@ -376,7 +378,23 @@ func startArgsMore(t *testing.T, first, immediateTLS bool, serverConfig, clientC
tcheck(t, err, "after init")
}
serverConn, clientConn := net.Pipe()
// We get actual sockets for their buffering behaviour. net.Pipe is synchronous,
// and the implementation of the compress extension can write a sync message to an
// imap client when that client isn't reading but is trying to write. In the real
// world, network buffer will take up those few bytes, so assume the buffer in the
// test too.
fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_STREAM, 0)
tcheck(t, err, "socketpair")
xfdconn := func(fd int, name string) net.Conn {
f := os.NewFile(uintptr(fd), name)
fc, err := net.FileConn(f)
tcheck(t, err, "fileconn")
err = f.Close()
tcheck(t, err, "close file for conn")
return fc
}
serverConn := xfdconn(fds[0], "server")
clientConn := xfdconn(fds[1], "client")
if serverConfig == nil {
serverConfig = &tls.Config{
@ -391,8 +409,8 @@ func startArgsMore(t *testing.T, first, immediateTLS bool, serverConfig, clientC
}
done := make(chan struct{})
connCounter++
cid := connCounter
connCounter += 2
cid := connCounter - 1
go func() {
const viaHTTPS = false
serve("test", cid, serverConfig, serverConn, immediateTLS, allowLoginWithoutTLS, viaHTTPS, "")
@ -401,7 +419,7 @@ func startArgsMore(t *testing.T, first, immediateTLS bool, serverConfig, clientC
}
close(done)
}()
client, err := imapclient.New(clientConn, true)
client, err := imapclient.New(connCounter, clientConn, true)
tcheck(t, err, "new client")
tc := &testconn{t: t, conn: clientConn, client: client, done: done, serverConn: serverConn, account: acc}
if first && noCloseSwitchboard {

View file

@ -64,7 +64,7 @@ func TestDeliver(t *testing.T) {
tcheck(t, err, "dial imap")
defer imapconn.Close()
imapc, err := imapclient.New(imapconn, false)
imapc, err := imapclient.New(mox.Cid(), imapconn, false)
tcheck(t, err, "new imapclient")
_, _, err = imapc.Login(imapuser, imappassword)

View file

@ -189,7 +189,7 @@ https://www.iana.org/assignments/message-headers/message-headers.xhtml
4551 Yes Obs (RFC 7162) IMAP Extension for Conditional STORE Operation or Quick Flag Changes Resynchronization
4731 Yes - IMAP4 Extension to SEARCH Command for Controlling What Kind of Information Is Returned
4959 Yes - IMAP Extension for Simple Authentication and Security Layer (SASL) Initial Client Response
4978 Roadmap - The IMAP COMPRESS Extension
4978 Yes - The IMAP COMPRESS Extension
5032 Yes - WITHIN Search Extension to the IMAP Protocol
5092 Roadmap - IMAP URL Scheme
5161 Yes - The IMAP ENABLE Extension

27
vendor/github.com/mjl-/flate/LICENSE generated vendored Normal file
View file

@ -0,0 +1,27 @@
Copyright 2009 The Go Authors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google LLC nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

11
vendor/github.com/mjl-/flate/README.md generated vendored Normal file
View file

@ -0,0 +1,11 @@
https://pkg.go.dev/compress/flate from go1.24.0, with flate.NewReaderPartial
added: a Reader that returns data from blocks flushed with mode "partial
flush", without blocking on reading the next flate block. Without
NewReaderPartial, protocols that expect a response after writing a short
compressed request that was flushed in "partial flush" mode can get stuck.
Writes/flushes in "partial flush" mode are not implemented.
https://pkg.go.dev/github.com/mjl-/flate#NewReaderPartial
Also see https://github.com/golang/go/issues/31514

743
vendor/github.com/mjl-/flate/deflate.go generated vendored Normal file
View file

@ -0,0 +1,743 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"errors"
"fmt"
"io"
"math"
)
const (
NoCompression = 0
BestSpeed = 1
BestCompression = 9
DefaultCompression = -1
// HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
// entropy encoding. This mode is useful in compressing data that has
// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
// that lacks an entropy encoder. Compression gains are achieved when
// certain bytes in the input stream occur more frequently than others.
//
// Note that HuffmanOnly produces a compressed output that is
// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
// continue to be able to decompress this output.
HuffmanOnly = -2
)
const (
logWindowSize = 15
windowSize = 1 << logWindowSize
windowMask = windowSize - 1
// The LZ77 step produces a sequence of literal tokens and <length, offset>
// pair tokens. The offset is also known as distance. The underlying wire
// format limits the range of lengths and offsets. For example, there are
// 256 legitimate lengths: those in the range [3, 258]. This package's
// compressor uses a higher minimum match length, enabling optimizations
// such as finding matches via 32-bit loads and compares.
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
minMatchLength = 4 // The smallest match length that the compressor actually emits
maxMatchLength = 258 // The largest match length
baseMatchOffset = 1 // The smallest match offset
maxMatchOffset = 1 << 15 // The largest match offset
// The maximum number of tokens we put into a single flate block, just to
// stop things from getting too large.
maxFlateBlockTokens = 1 << 14
maxStoreBlockSize = 65535
hashBits = 17 // After 17 performance degrades
hashSize = 1 << hashBits
hashMask = (1 << hashBits) - 1
maxHashOffset = 1 << 24
skipNever = math.MaxInt32
)
type compressionLevel struct {
level, good, lazy, nice, chain, fastSkipHashing int
}
var levels = []compressionLevel{
{0, 0, 0, 0, 0, 0}, // NoCompression.
{1, 0, 0, 0, 0, 0}, // BestSpeed uses a custom algorithm; see deflatefast.go.
// For levels 2-3 we don't bother trying with lazy matches.
{2, 4, 0, 16, 8, 5},
{3, 4, 0, 32, 32, 6},
// Levels 4-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough".
{4, 4, 4, 16, 16, skipNever},
{5, 8, 16, 32, 32, skipNever},
{6, 8, 16, 128, 128, skipNever},
{7, 8, 32, 128, 256, skipNever},
{8, 32, 128, 258, 1024, skipNever},
{9, 32, 258, 258, 4096, skipNever},
}
type compressor struct {
compressionLevel
w *huffmanBitWriter
bulkHasher func([]byte, []uint32)
// compression algorithm
fill func(*compressor, []byte) int // copy data to window
step func(*compressor) // process window
bestSpeed *deflateFast // Encoder for BestSpeed
// Input hash chains
// hashHead[hashValue] contains the largest inputIndex with the specified hash value
// If hashHead[hashValue] is within the current window, then
// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
// with the same hash value.
chainHead int
hashHead [hashSize]uint32
hashPrev [windowSize]uint32
hashOffset int
// input window: unprocessed data is window[index:windowEnd]
index int
window []byte
windowEnd int
blockStart int // window index where current tokens start
byteAvailable bool // if true, still need to process window[index-1].
sync bool // requesting flush
// queued output tokens
tokens []token
// deflate state
length int
offset int
maxInsertIndex int
err error
// hashMatch must be able to contain hashes for the maximum match length.
hashMatch [maxMatchLength - 1]uint32
}
func (d *compressor) fillDeflate(b []byte) int {
if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
// shift the window by windowSize
copy(d.window, d.window[windowSize:2*windowSize])
d.index -= windowSize
d.windowEnd -= windowSize
if d.blockStart >= windowSize {
d.blockStart -= windowSize
} else {
d.blockStart = math.MaxInt32
}
d.hashOffset += windowSize
if d.hashOffset > maxHashOffset {
delta := d.hashOffset - 1
d.hashOffset -= delta
d.chainHead -= delta
// Iterate over slices instead of arrays to avoid copying
// the entire table onto the stack (Issue #18625).
for i, v := range d.hashPrev[:] {
if int(v) > delta {
d.hashPrev[i] = uint32(int(v) - delta)
} else {
d.hashPrev[i] = 0
}
}
for i, v := range d.hashHead[:] {
if int(v) > delta {
d.hashHead[i] = uint32(int(v) - delta)
} else {
d.hashHead[i] = 0
}
}
}
}
n := copy(d.window[d.windowEnd:], b)
d.windowEnd += n
return n
}
func (d *compressor) writeBlock(tokens []token, index int) error {
if index > 0 {
var window []byte
if d.blockStart <= index {
window = d.window[d.blockStart:index]
}
d.blockStart = index
d.w.writeBlock(tokens, false, window)
return d.w.err
}
return nil
}
// fillWindow will fill the current window with the supplied
// dictionary and calculate all hashes.
// This is much faster than doing a full encode.
// Should only be used after a reset.
func (d *compressor) fillWindow(b []byte) {
// Do not fill window if we are in store-only mode.
if d.compressionLevel.level < 2 {
return
}
if d.index != 0 || d.windowEnd != 0 {
panic("internal error: fillWindow called with stale data")
}
// If we are given too much, cut it.
if len(b) > windowSize {
b = b[len(b)-windowSize:]
}
// Add all to window.
n := copy(d.window, b)
// Calculate 256 hashes at the time (more L1 cache hits)
loops := (n + 256 - minMatchLength) / 256
for j := 0; j < loops; j++ {
index := j * 256
end := index + 256 + minMatchLength - 1
if end > n {
end = n
}
toCheck := d.window[index:end]
dstSize := len(toCheck) - minMatchLength + 1
if dstSize <= 0 {
continue
}
dst := d.hashMatch[:dstSize]
d.bulkHasher(toCheck, dst)
for i, val := range dst {
di := i + index
hh := &d.hashHead[val&hashMask]
// Get previous value with the same hash.
// Our chain should point to the previous value.
d.hashPrev[di&windowMask] = *hh
// Set the head of the hash chain to us.
*hh = uint32(di + d.hashOffset)
}
}
// Update window information.
d.windowEnd = n
d.index = n
}
// Try to find a match starting at index whose length is greater than prevSize.
// We only look at chainCount possibilities before giving up.
func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
minMatchLook := maxMatchLength
if lookahead < minMatchLook {
minMatchLook = lookahead
}
win := d.window[0 : pos+minMatchLook]
// We quit when we get a match that's at least nice long
nice := len(win) - pos
if d.nice < nice {
nice = d.nice
}
// If we've got a match that's good enough, only look in 1/4 the chain.
tries := d.chain
length = prevLength
if length >= d.good {
tries >>= 2
}
wEnd := win[pos+length]
wPos := win[pos:]
minIndex := pos - windowSize
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
n := matchLen(win[i:], wPos, minMatchLook)
if n > length && (n > minMatchLength || pos-i <= 4096) {
length = n
offset = pos - i
ok = true
if n >= nice {
// The match is good enough that we don't try to find a better one.
break
}
wEnd = win[pos+n]
}
}
if i == minIndex {
// hashPrev[i & windowMask] has already been overwritten, so stop now.
break
}
i = int(d.hashPrev[i&windowMask]) - d.hashOffset
if i < minIndex || i < 0 {
break
}
}
return
}
func (d *compressor) writeStoredBlock(buf []byte) error {
if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
return d.w.err
}
d.w.writeBytes(buf)
return d.w.err
}
const hashmul = 0x1e35a7bd
// hash4 returns a hash representation of the first 4 bytes
// of the supplied slice.
// The caller must ensure that len(b) >= 4.
func hash4(b []byte) uint32 {
return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits)
}
// bulkHash4 will compute hashes using the same
// algorithm as hash4.
func bulkHash4(b []byte, dst []uint32) {
if len(b) < minMatchLength {
return
}
hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
dst[0] = (hb * hashmul) >> (32 - hashBits)
end := len(b) - minMatchLength + 1
for i := 1; i < end; i++ {
hb = (hb << 8) | uint32(b[i+3])
dst[i] = (hb * hashmul) >> (32 - hashBits)
}
}
// matchLen returns the number of matching bytes in a and b
// up to length 'max'. Both slices must be at least 'max'
// bytes in size.
func matchLen(a, b []byte, max int) int {
a = a[:max]
b = b[:len(a)]
for i, av := range a {
if b[i] != av {
return i
}
}
return max
}
// encSpeed will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
// Any error that occurred will be in d.err
func (d *compressor) encSpeed() {
// We only compress if we have maxStoreBlockSize.
if d.windowEnd < maxStoreBlockSize {
if !d.sync {
return
}
// Handle small sizes.
if d.windowEnd < 128 {
switch {
case d.windowEnd == 0:
return
case d.windowEnd <= 16:
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
default:
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
d.err = d.w.err
}
d.windowEnd = 0
d.bestSpeed.reset()
return
}
}
// Encode the block.
d.tokens = d.bestSpeed.encode(d.tokens[:0], d.window[:d.windowEnd])
// If we removed less than 1/16th, Huffman compress the block.
if len(d.tokens) > d.windowEnd-(d.windowEnd>>4) {
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
} else {
d.w.writeBlockDynamic(d.tokens, false, d.window[:d.windowEnd])
}
d.err = d.w.err
d.windowEnd = 0
}
func (d *compressor) initDeflate() {
d.window = make([]byte, 2*windowSize)
d.hashOffset = 1
d.tokens = make([]token, 0, maxFlateBlockTokens+1)
d.length = minMatchLength - 1
d.offset = 0
d.byteAvailable = false
d.index = 0
d.chainHead = -1
d.bulkHasher = bulkHash4
}
func (d *compressor) deflate() {
if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
return
}
d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
Loop:
for {
if d.index > d.windowEnd {
panic("index > windowEnd")
}
lookahead := d.windowEnd - d.index
if lookahead < minMatchLength+maxMatchLength {
if !d.sync {
break Loop
}
if d.index > d.windowEnd {
panic("index > windowEnd")
}
if lookahead == 0 {
// Flush current output block if any.
if d.byteAvailable {
// There is still one pending token that needs to be flushed
d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1])))
d.byteAvailable = false
}
if len(d.tokens) > 0 {
if d.err = d.writeBlock(d.tokens, d.index); d.err != nil {
return
}
d.tokens = d.tokens[:0]
}
break Loop
}
}
if d.index < d.maxInsertIndex {
// Update the hash
hash := hash4(d.window[d.index : d.index+minMatchLength])
hh := &d.hashHead[hash&hashMask]
d.chainHead = int(*hh)
d.hashPrev[d.index&windowMask] = uint32(d.chainHead)
*hh = uint32(d.index + d.hashOffset)
}
prevLength := d.length
prevOffset := d.offset
d.length = minMatchLength - 1
d.offset = 0
minIndex := d.index - windowSize
if minIndex < 0 {
minIndex = 0
}
if d.chainHead-d.hashOffset >= minIndex &&
(d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 ||
d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) {
if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
d.length = newLength
d.offset = newOffset
}
}
if d.fastSkipHashing != skipNever && d.length >= minMatchLength ||
d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength {
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
if d.fastSkipHashing != skipNever {
d.tokens = append(d.tokens, matchToken(uint32(d.length-baseMatchLength), uint32(d.offset-baseMatchOffset)))
} else {
d.tokens = append(d.tokens, matchToken(uint32(prevLength-baseMatchLength), uint32(prevOffset-baseMatchOffset)))
}
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
// lookahead, the last two strings are not inserted into the hash
// table.
if d.length <= d.fastSkipHashing {
var newIndex int
if d.fastSkipHashing != skipNever {
newIndex = d.index + d.length
} else {
newIndex = d.index + prevLength - 1
}
index := d.index
for index++; index < newIndex; index++ {
if index < d.maxInsertIndex {
hash := hash4(d.window[index : index+minMatchLength])
// Get previous value with the same hash.
// Our chain should point to the previous value.
hh := &d.hashHead[hash&hashMask]
d.hashPrev[index&windowMask] = *hh
// Set the head of the hash chain to us.
*hh = uint32(index + d.hashOffset)
}
}
d.index = index
if d.fastSkipHashing == skipNever {
d.byteAvailable = false
d.length = minMatchLength - 1
}
} else {
// For matches this long, we don't bother inserting each individual
// item into the table.
d.index += d.length
}
if len(d.tokens) == maxFlateBlockTokens {
// The block includes the current character
if d.err = d.writeBlock(d.tokens, d.index); d.err != nil {
return
}
d.tokens = d.tokens[:0]
}
} else {
if d.fastSkipHashing != skipNever || d.byteAvailable {
i := d.index - 1
if d.fastSkipHashing != skipNever {
i = d.index
}
d.tokens = append(d.tokens, literalToken(uint32(d.window[i])))
if len(d.tokens) == maxFlateBlockTokens {
if d.err = d.writeBlock(d.tokens, i+1); d.err != nil {
return
}
d.tokens = d.tokens[:0]
}
}
d.index++
if d.fastSkipHashing == skipNever {
d.byteAvailable = true
}
}
}
}
func (d *compressor) fillStore(b []byte) int {
n := copy(d.window[d.windowEnd:], b)
d.windowEnd += n
return n
}
func (d *compressor) store() {
if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
d.windowEnd = 0
}
}
// storeHuff compresses and stores the currently added data
// when the d.window is full or we are at the end of the stream.
// Any error that occurred will be in d.err
func (d *compressor) storeHuff() {
if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
return
}
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
d.err = d.w.err
d.windowEnd = 0
}
func (d *compressor) write(b []byte) (n int, err error) {
if d.err != nil {
return 0, d.err
}
n = len(b)
for len(b) > 0 {
d.step(d)
b = b[d.fill(d, b):]
if d.err != nil {
return 0, d.err
}
}
return n, nil
}
func (d *compressor) syncFlush() error {
if d.err != nil {
return d.err
}
d.sync = true
d.step(d)
if d.err == nil {
d.w.writeStoredHeader(0, false)
d.w.flush()
d.err = d.w.err
}
d.sync = false
return d.err
}
func (d *compressor) init(w io.Writer, level int) (err error) {
d.w = newHuffmanBitWriter(w)
switch {
case level == NoCompression:
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillStore
d.step = (*compressor).store
case level == HuffmanOnly:
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillStore
d.step = (*compressor).storeHuff
case level == BestSpeed:
d.compressionLevel = levels[level]
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillStore
d.step = (*compressor).encSpeed
d.bestSpeed = newDeflateFast()
d.tokens = make([]token, maxStoreBlockSize)
case level == DefaultCompression:
level = 6
fallthrough
case 2 <= level && level <= 9:
d.compressionLevel = levels[level]
d.initDeflate()
d.fill = (*compressor).fillDeflate
d.step = (*compressor).deflate
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
return nil
}
func (d *compressor) reset(w io.Writer) {
d.w.reset(w)
d.sync = false
d.err = nil
switch d.compressionLevel.level {
case NoCompression:
d.windowEnd = 0
case BestSpeed:
d.windowEnd = 0
d.tokens = d.tokens[:0]
d.bestSpeed.reset()
default:
d.chainHead = -1
clear(d.hashHead[:])
clear(d.hashPrev[:])
d.hashOffset = 1
d.index, d.windowEnd = 0, 0
d.blockStart, d.byteAvailable = 0, false
d.tokens = d.tokens[:0]
d.length = minMatchLength - 1
d.offset = 0
d.maxInsertIndex = 0
}
}
func (d *compressor) close() error {
if d.err == errWriterClosed {
return nil
}
if d.err != nil {
return d.err
}
d.sync = true
d.step(d)
if d.err != nil {
return d.err
}
if d.w.writeStoredHeader(0, true); d.w.err != nil {
return d.w.err
}
d.w.flush()
if d.w.err != nil {
return d.w.err
}
d.err = errWriterClosed
return nil
}
// NewWriter returns a new [Writer] compressing data at the given level.
// Following zlib, levels range from 1 ([BestSpeed]) to 9 ([BestCompression]);
// higher levels typically run slower but compress more. Level 0
// ([NoCompression]) does not attempt any compression; it only adds the
// necessary DEFLATE framing.
// Level -1 ([DefaultCompression]) uses the default compression level.
// Level -2 ([HuffmanOnly]) will use Huffman compression only, giving
// a very fast compression for all types of input, but sacrificing considerable
// compression efficiency.
//
// If level is in the range [-2, 9] then the error returned will be nil.
// Otherwise the error returned will be non-nil.
func NewWriter(w io.Writer, level int) (*Writer, error) {
var dw Writer
if err := dw.d.init(w, level); err != nil {
return nil, err
}
return &dw, nil
}
// NewWriterDict is like [NewWriter] but initializes the new
// [Writer] with a preset dictionary. The returned [Writer] behaves
// as if the dictionary had been written to it without producing
// any compressed output. The compressed data written to w
// can only be decompressed by a [Reader] initialized with the
// same dictionary.
func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
dw := &dictWriter{w}
zw, err := NewWriter(dw, level)
if err != nil {
return nil, err
}
zw.d.fillWindow(dict)
zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
return zw, err
}
type dictWriter struct {
w io.Writer
}
func (w *dictWriter) Write(b []byte) (n int, err error) {
return w.w.Write(b)
}
var errWriterClosed = errors.New("flate: closed writer")
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see [NewWriter]).
type Writer struct {
d compressor
dict []byte
}
// Write writes data to w, which will eventually write the
// compressed form of data to its underlying writer.
func (w *Writer) Write(data []byte) (n int, err error) {
return w.d.write(data)
}
// Flush flushes any pending data to the underlying writer.
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet.
// Flush does not return until the data has been written.
// Calling Flush when there is no pending data still causes the [Writer]
// to emit a sync marker of at least 4 bytes.
// If the underlying writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (w *Writer) Flush() error {
// For more about flushing:
// https://www.bolet.org/~pornin/deflate-flush.html
return w.d.syncFlush()
}
// Close flushes and closes the writer.
func (w *Writer) Close() error {
return w.d.close()
}
// Reset discards the writer's state and makes it equivalent to
// the result of [NewWriter] or [NewWriterDict] called with dst
// and w's level and dictionary.
func (w *Writer) Reset(dst io.Writer) {
if dw, ok := w.d.w.writer.(*dictWriter); ok {
// w was created with NewWriterDict
dw.w = dst
w.d.reset(dw)
w.d.fillWindow(w.dict)
} else {
// w was created with NewWriter
w.d.reset(dst)
}
}

307
vendor/github.com/mjl-/flate/deflatefast.go generated vendored Normal file
View file

@ -0,0 +1,307 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import "math"
// This encoding algorithm, which prioritizes speed over output size, is
// based on Snappy's LZ77-style encoder: github.com/golang/snappy
const (
tableBits = 14 // Bits used in the table.
tableSize = 1 << tableBits // Size of the table.
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
// Reset the buffer offset when reaching this.
// Offsets are stored between blocks as int32 values.
// Since the offset we are checking against is at the beginning
// of the buffer, we need to subtract the current and input
// buffer to not risk overflowing the int32.
bufferReset = math.MaxInt32 - maxStoreBlockSize*2
)
func load32(b []byte, i int32) uint32 {
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load64(b []byte, i int32) uint64 {
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func hash(u uint32) uint32 {
return (u * 0x1e35a7bd) >> tableShift
}
// These constants are defined by the Snappy implementation so that its
// assembly implementation can fast-path some 16-bytes-at-a-time copies. They
// aren't necessary in the pure Go implementation, as we don't use those same
// optimizations, but using the same thresholds doesn't really hurt.
const (
inputMargin = 16 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
type tableEntry struct {
val uint32 // Value at destination
offset int32
}
// deflateFast maintains the table for matches,
// and the previous byte block for cross block matching.
type deflateFast struct {
table [tableSize]tableEntry
prev []byte // Previous block, zero length if unknown.
cur int32 // Current match offset.
}
func newDeflateFast() *deflateFast {
return &deflateFast{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}
}
// encode encodes a block given in src and appends tokens
// to dst and returns the result.
func (e *deflateFast) encode(dst []token, src []byte) []token {
// Ensure that e.cur doesn't wrap.
if e.cur >= bufferReset {
e.shiftOffsets()
}
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
e.cur += maxStoreBlockSize
e.prev = e.prev[:0]
return emitLiteral(dst, src)
}
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := int32(0)
s := int32(0)
cv := load32(src, s)
nextHash := hash(cv)
for {
// Copied from the C++ snappy implementation:
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned (or skipped), look at every third byte, etc.. When a match
// is found, immediately go back to looking at every byte. This is a
// small loss (~5% performance, ~0.1% density) for compressible data
// due to more bookkeeping, but for non-compressible data (such as
// JPEG) it's a huge win since the compressor quickly "realizes" the
// data is incompressible and doesn't bother looking for matches
// everywhere.
//
// The "skip" variable keeps track of how many bytes there are since
// the last match; dividing it by 32 (ie. right-shifting by five) gives
// the number of bytes to move ahead for each iteration.
skip := int32(32)
nextS := s
var candidate tableEntry
for {
s = nextS
bytesBetweenHashLookups := skip >> 5
nextS = s + bytesBetweenHashLookups
skip += bytesBetweenHashLookups
if nextS > sLimit {
goto emitRemainder
}
candidate = e.table[nextHash&tableMask]
now := load32(src, nextS)
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
nextHash = hash(now)
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || cv != candidate.val {
// Out of range or not matched.
cv = now
continue
}
break
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
dst = emitLiteral(dst, src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
//
s += 4
t := candidate.offset - e.cur + 4
l := e.matchLen(s, t, src)
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
dst = append(dst, matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)))
s += l
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load64(src, s-1)
prevHash := hash(uint32(x))
e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
x >>= 8
currHash := hash(uint32(x))
candidate = e.table[currHash&tableMask]
e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != candidate.val {
cv = uint32(x >> 8)
nextHash = hash(cv)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
dst = emitLiteral(dst, src[nextEmit:])
}
e.cur += int32(len(src))
e.prev = e.prev[:len(src)]
copy(e.prev, src)
return dst
}
func emitLiteral(dst []token, lit []byte) []token {
for _, v := range lit {
dst = append(dst, literalToken(uint32(v)))
}
return dst
}
// matchLen returns the match length between src[s:] and src[t:].
// t can be negative to indicate the match is starting in e.prev.
// We assume that src[s-4:s] and src[t-4:t] already match.
func (e *deflateFast) matchLen(s, t int32, src []byte) int32 {
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// If we are inside the current block
if t >= 0 {
b := src[t:]
a := src[s:s1]
b = b[:len(a)]
// Extend the match to be as long as possible.
for i := range a {
if a[i] != b[i] {
return int32(i)
}
}
return int32(len(a))
}
// We found a match in the previous block.
tp := int32(len(e.prev)) + t
if tp < 0 {
return 0
}
// Extend the match to be as long as possible.
a := src[s:s1]
b := e.prev[tp:]
if len(b) > len(a) {
b = b[:len(a)]
}
a = a[:len(b)]
for i := range b {
if a[i] != b[i] {
return int32(i)
}
}
// If we reached our limit, we matched everything we are
// allowed to in the previous block and we return.
n := int32(len(b))
if int(s+n) == s1 {
return n
}
// Continue looking for more matches in the current block.
a = src[s+n : s1]
b = src[:len(a)]
for i := range a {
if a[i] != b[i] {
return int32(i) + n
}
}
return int32(len(a)) + n
}
// Reset resets the encoding history.
// This ensures that no matches are made to the previous block.
func (e *deflateFast) reset() {
e.prev = e.prev[:0]
// Bump the offset, so all matches will fail distance check.
// Nothing should be >= e.cur in the table.
e.cur += maxMatchOffset
// Protect against e.cur wraparound.
if e.cur >= bufferReset {
e.shiftOffsets()
}
}
// shiftOffsets will shift down all match offset.
// This is only called in rare situations to prevent integer overflow.
//
// See https://golang.org/issue/18636 and https://github.com/golang/go/issues/34121.
func (e *deflateFast) shiftOffsets() {
if len(e.prev) == 0 {
// We have no history; just clear the table.
clear(e.table[:])
e.cur = maxMatchOffset + 1
return
}
// Shift down everything in the table that isn't already too far away.
for i := range e.table[:] {
v := e.table[i].offset - e.cur + maxMatchOffset + 1
if v < 0 {
// We want to reset e.cur to maxMatchOffset + 1, so we need to shift
// all table entries down by (e.cur - (maxMatchOffset + 1)).
// Because we ignore matches > maxMatchOffset, we can cap
// any negative offsets at 0.
v = 0
}
e.table[i].offset = v
}
e.cur = maxMatchOffset + 1
}

182
vendor/github.com/mjl-/flate/dict_decoder.go generated vendored Normal file
View file

@ -0,0 +1,182 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands:
//
// - Literal insertions: Runs of one or more symbols are inserted into the data
// stream as is. This is accomplished through the writeByte method for a
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
// Any valid stream must start with a literal insertion if no preset dictionary
// is used.
//
// - Backward copies: Runs of one or more symbols are copied from previously
// emitted data. Backward copies come as the tuple (dist, length) where dist
// determines how far back in the stream to copy from and length determines how
// many bytes to copy. Note that it is valid for the length to be greater than
// the distance. Since LZ77 uses forward copies, that situation is used to
// perform a form of run-length encoding on repeated runs of symbols.
// The writeCopy and tryWriteCopy are used to implement this command.
//
// For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each
// method call must be respected.
type dictDecoder struct {
hist []byte // Sliding window history
// Invariant: 0 <= rdPos <= wrPos <= len(hist)
wrPos int // Current output position in buffer
rdPos int // Have emitted hist[:rdPos] already
full bool // Has a full window length been written yet?
}
// init initializes dictDecoder to have a sliding window dictionary of the given
// size. If a preset dict is provided, it will initialize the dictionary with
// the contents of dict.
func (dd *dictDecoder) init(size int, dict []byte) {
*dd = dictDecoder{hist: dd.hist}
if cap(dd.hist) < size {
dd.hist = make([]byte, size)
}
dd.hist = dd.hist[:size]
if len(dict) > len(dd.hist) {
dict = dict[len(dict)-len(dd.hist):]
}
dd.wrPos = copy(dd.hist, dict)
if dd.wrPos == len(dd.hist) {
dd.wrPos = 0
dd.full = true
}
dd.rdPos = dd.wrPos
}
// histSize reports the total amount of historical data in the dictionary.
func (dd *dictDecoder) histSize() int {
if dd.full {
return len(dd.hist)
}
return dd.wrPos
}
// availRead reports the number of bytes that can be flushed by readFlush.
func (dd *dictDecoder) availRead() int {
return dd.wrPos - dd.rdPos
}
// availWrite reports the available amount of output buffer space.
func (dd *dictDecoder) availWrite() int {
return len(dd.hist) - dd.wrPos
}
// writeSlice returns a slice of the available buffer to write data to.
//
// This invariant will be kept: len(s) <= availWrite()
func (dd *dictDecoder) writeSlice() []byte {
return dd.hist[dd.wrPos:]
}
// writeMark advances the writer pointer by cnt.
//
// This invariant must be kept: 0 <= cnt <= availWrite()
func (dd *dictDecoder) writeMark(cnt int) {
dd.wrPos += cnt
}
// writeByte writes a single byte to the dictionary.
//
// This invariant must be kept: 0 < availWrite()
func (dd *dictDecoder) writeByte(c byte) {
dd.hist[dd.wrPos] = c
dd.wrPos++
}
// writeCopy copies a string at a given (dist, length) to the output.
// This returns the number of bytes copied and may be less than the requested
// length if the available space in the output buffer is too small.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) writeCopy(dist, length int) int {
dstBase := dd.wrPos
dstPos := dstBase
srcPos := dstPos - dist
endPos := dstPos + length
if endPos > len(dd.hist) {
endPos = len(dd.hist)
}
// Copy non-overlapping section after destination position.
//
// This section is non-overlapping in that the copy length for this section
// is always less than or equal to the backwards distance. This can occur
// if a distance refers to data that wraps-around in the buffer.
// Thus, a backwards copy is performed here; that is, the exact bytes in
// the source prior to the copy is placed in the destination.
if srcPos < 0 {
srcPos += len(dd.hist)
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
srcPos = 0
}
// Copy possibly overlapping section before destination position.
//
// This section can overlap if the copy length for this section is larger
// than the backwards distance. This is allowed by LZ77 so that repeated
// strings can be succinctly represented using (dist, length) pairs.
// Thus, a forwards copy is performed here; that is, the bytes copied is
// possibly dependent on the resulting bytes in the destination as the copy
// progresses along. This is functionally equivalent to the following:
//
// for i := 0; i < endPos-dstPos; i++ {
// dd.hist[dstPos+i] = dd.hist[srcPos+i]
// }
// dstPos = endPos
//
for dstPos < endPos {
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// tryWriteCopy tries to copy a string at a given (distance, length) to the
// output. This specialized version is optimized for short distances.
//
// This method is designed to be inlined for performance reasons.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
dstPos := dd.wrPos
endPos := dstPos + length
if dstPos < dist || endPos > len(dd.hist) {
return 0
}
dstBase := dstPos
srcPos := dstPos - dist
// Copy possibly overlapping section before destination position.
for dstPos < endPos {
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// readFlush returns a slice of the historical buffer that is ready to be
// emitted to the user. The data returned by readFlush must be fully consumed
// before calling any other dictDecoder methods.
func (dd *dictDecoder) readFlush() []byte {
toRead := dd.hist[dd.rdPos:dd.wrPos]
dd.rdPos = dd.wrPos
if dd.wrPos == len(dd.hist) {
dd.wrPos, dd.rdPos = 0, 0
dd.full = true
}
return toRead
}

693
vendor/github.com/mjl-/flate/huffman_bit_writer.go generated vendored Normal file
View file

@ -0,0 +1,693 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"io"
)
const (
// The largest offset code.
offsetCodeCount = 30
// The special code used to mark the end of a block.
endBlockMarker = 256
// The first length code.
lengthCodesStart = 257
// The number of codegen codes.
codegenCodeCount = 19
badCode = 255
// bufferFlushSize indicates the buffer size
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
// we accumulate 6 bytes between writes to the buffer.
bufferFlushSize = 240
// bufferSize is the actual output byte buffer size.
// It must have additional headroom for a flush
// which can contain up to 8 bytes.
bufferSize = bufferFlushSize + 8
)
// The number of extra bits needed by length code X - LENGTH_CODES_START.
var lengthExtraBits = []int8{
/* 257 */ 0, 0, 0,
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
/* 280 */ 4, 5, 5, 5, 5, 0,
}
// The length indicated by length code X - LENGTH_CODES_START.
var lengthBase = []uint32{
0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
64, 80, 96, 112, 128, 160, 192, 224, 255,
}
// offset code word extra bits.
var offsetExtraBits = []int8{
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
}
var offsetBase = []uint32{
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
}
// The odd order in which the codegen code sizes are written.
var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
type huffmanBitWriter struct {
// writer is the underlying writer.
// Do not use it directly; use the write method, which ensures
// that Write errors are sticky.
writer io.Writer
// Data waiting to be written is bytes[0:nbytes]
// and then the low nbits of bits. Data is always written
// sequentially into the bytes array.
bits uint64
nbits uint
bytes [bufferSize]byte
codegenFreq [codegenCodeCount]int32
nbytes int
literalFreq []int32
offsetFreq []int32
codegen []uint8
literalEncoding *huffmanEncoder
offsetEncoding *huffmanEncoder
codegenEncoding *huffmanEncoder
err error
}
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
return &huffmanBitWriter{
writer: w,
literalFreq: make([]int32, maxNumLit),
offsetFreq: make([]int32, offsetCodeCount),
codegen: make([]uint8, maxNumLit+offsetCodeCount+1),
literalEncoding: newHuffmanEncoder(maxNumLit),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
}
}
func (w *huffmanBitWriter) reset(writer io.Writer) {
w.writer = writer
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
}
func (w *huffmanBitWriter) flush() {
if w.err != nil {
w.nbits = 0
return
}
n := w.nbytes
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
w.bits >>= 8
if w.nbits > 8 { // Avoid underflow
w.nbits -= 8
} else {
w.nbits = 0
}
n++
}
w.bits = 0
w.write(w.bytes[:n])
w.nbytes = 0
}
func (w *huffmanBitWriter) write(b []byte) {
if w.err != nil {
return
}
_, w.err = w.writer.Write(b)
}
func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
if w.err != nil {
return
}
w.bits |= uint64(b) << w.nbits
w.nbits += nb
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
bytes := w.bytes[n : n+6]
bytes[0] = byte(bits)
bytes[1] = byte(bits >> 8)
bytes[2] = byte(bits >> 16)
bytes[3] = byte(bits >> 24)
bytes[4] = byte(bits >> 32)
bytes[5] = byte(bits >> 40)
n += 6
if n >= bufferFlushSize {
w.write(w.bytes[:n])
n = 0
}
w.nbytes = n
}
}
func (w *huffmanBitWriter) writeBytes(bytes []byte) {
if w.err != nil {
return
}
n := w.nbytes
if w.nbits&7 != 0 {
w.err = InternalError("writeBytes with unfinished bits")
return
}
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
w.bits >>= 8
w.nbits -= 8
n++
}
if n != 0 {
w.write(w.bytes[:n])
}
w.nbytes = 0
w.write(bytes)
}
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
// the literal and offset lengths arrays (which are concatenated into a single
// array). This method generates that run-length encoding.
//
// The result is written into the codegen array, and the frequencies
// of each code is written into the codegenFreq array.
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
// information. Code badCode is an end marker
//
// numLiterals The number of literals in literalEncoding
// numOffsets The number of offsets in offsetEncoding
// litenc, offenc The literal and offset encoder to use
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
clear(w.codegenFreq[:])
// Note that we are using codegen both as a temporary variable for holding
// a copy of the frequencies, and as the place where we put the result.
// This is fine because the output is always shorter than the input used
// so far.
codegen := w.codegen // cache
// Copy the concatenated code sizes to codegen. Put a marker at the end.
cgnl := codegen[:numLiterals]
for i := range cgnl {
cgnl[i] = uint8(litEnc.codes[i].len)
}
cgnl = codegen[numLiterals : numLiterals+numOffsets]
for i := range cgnl {
cgnl[i] = uint8(offEnc.codes[i].len)
}
codegen[numLiterals+numOffsets] = badCode
size := codegen[0]
count := 1
outIndex := 0
for inIndex := 1; size != badCode; inIndex++ {
// INVARIANT: We have seen "count" copies of size that have not yet
// had output generated for them.
nextSize := codegen[inIndex]
if nextSize == size {
count++
continue
}
// We need to generate codegen indicating "count" of size.
if size != 0 {
codegen[outIndex] = size
outIndex++
w.codegenFreq[size]++
count--
for count >= 3 {
n := 6
if n > count {
n = count
}
codegen[outIndex] = 16
outIndex++
codegen[outIndex] = uint8(n - 3)
outIndex++
w.codegenFreq[16]++
count -= n
}
} else {
for count >= 11 {
n := 138
if n > count {
n = count
}
codegen[outIndex] = 18
outIndex++
codegen[outIndex] = uint8(n - 11)
outIndex++
w.codegenFreq[18]++
count -= n
}
if count >= 3 {
// count >= 3 && count <= 10
codegen[outIndex] = 17
outIndex++
codegen[outIndex] = uint8(count - 3)
outIndex++
w.codegenFreq[17]++
count = 0
}
}
count--
for ; count >= 0; count-- {
codegen[outIndex] = size
outIndex++
w.codegenFreq[size]++
}
// Set up invariant for next time through the loop.
size = nextSize
count = 1
}
// Marker indicating the end of the codegen.
codegen[outIndex] = badCode
}
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
numCodegens = len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
header := 3 + 5 + 5 + 4 + (3 * numCodegens) +
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
int(w.codegenFreq[16])*2 +
int(w.codegenFreq[17])*3 +
int(w.codegenFreq[18])*7
size = header +
litEnc.bitLength(w.literalFreq) +
offEnc.bitLength(w.offsetFreq) +
extraBits
return size, numCodegens
}
// fixedSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
return 3 +
fixedLiteralEncoding.bitLength(w.literalFreq) +
fixedOffsetEncoding.bitLength(w.offsetFreq) +
extraBits
}
// storedSize calculates the stored size, including header.
// The function returns the size in bits and whether the block
// fits inside a single block.
func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
if in == nil {
return 0, false
}
if len(in) <= maxStoreBlockSize {
return (len(in) + 5) * 8, true
}
return 0, false
}
func (w *huffmanBitWriter) writeCode(c hcode) {
if w.err != nil {
return
}
w.bits |= uint64(c.code) << w.nbits
w.nbits += uint(c.len)
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
bytes := w.bytes[n : n+6]
bytes[0] = byte(bits)
bytes[1] = byte(bits >> 8)
bytes[2] = byte(bits >> 16)
bytes[3] = byte(bits >> 24)
bytes[4] = byte(bits >> 32)
bytes[5] = byte(bits >> 40)
n += 6
if n >= bufferFlushSize {
w.write(w.bytes[:n])
n = 0
}
w.nbytes = n
}
}
// Write the header of a dynamic Huffman block to the output stream.
//
// numLiterals The number of literals specified in codegen
// numOffsets The number of offsets specified in codegen
// numCodegens The number of codegens used in codegen
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
if w.err != nil {
return
}
var firstBits int32 = 4
if isEof {
firstBits = 5
}
w.writeBits(firstBits, 3)
w.writeBits(int32(numLiterals-257), 5)
w.writeBits(int32(numOffsets-1), 5)
w.writeBits(int32(numCodegens-4), 4)
for i := 0; i < numCodegens; i++ {
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
w.writeBits(int32(value), 3)
}
i := 0
for {
var codeWord int = int(w.codegen[i])
i++
if codeWord == badCode {
break
}
w.writeCode(w.codegenEncoding.codes[uint32(codeWord)])
switch codeWord {
case 16:
w.writeBits(int32(w.codegen[i]), 2)
i++
case 17:
w.writeBits(int32(w.codegen[i]), 3)
i++
case 18:
w.writeBits(int32(w.codegen[i]), 7)
i++
}
}
}
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil {
return
}
var flag int32
if isEof {
flag = 1
}
w.writeBits(flag, 3)
w.flush()
w.writeBits(int32(length), 16)
w.writeBits(int32(^uint16(length)), 16)
}
func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
if w.err != nil {
return
}
// Indicate that we are a fixed Huffman block
var value int32 = 2
if isEof {
value = 3
}
w.writeBits(value, 3)
}
// writeBlock will write a block of tokens with the smallest encoding.
// The original input can be supplied, and if the huffman encoded data
// is larger than the original bytes, the data will be written as a
// stored block.
// If the input is nil, the tokens will always be Huffman encoded.
func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
if w.err != nil {
return
}
tokens = append(tokens, endBlockMarker)
numLiterals, numOffsets := w.indexTokens(tokens)
var extraBits int
storedSize, storable := w.storedSize(input)
if storable {
// We only bother calculating the costs of the extra bits required by
// the length of offset fields (which will be the same for both fixed
// and dynamic encoding), if we need to compare those two encodings
// against stored encoding.
for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ {
// First eight length codes have extra size = 0.
extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart])
}
for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
// First four offset codes have extra size = 0.
extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
}
}
// Figure out smallest code.
// Fixed Huffman baseline.
var literalEncoding = fixedLiteralEncoding
var offsetEncoding = fixedOffsetEncoding
var size = w.fixedSize(extraBits)
// Dynamic Huffman?
var numCodegens int
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
if dynamicSize < size {
size = dynamicSize
literalEncoding = w.literalEncoding
offsetEncoding = w.offsetEncoding
}
// Stored bytes?
if storable && storedSize < size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
// Huffman.
if literalEncoding == fixedLiteralEncoding {
w.writeFixedHeader(eof)
} else {
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
}
// Write the tokens.
w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes)
}
// writeBlockDynamic encodes a block using a dynamic Huffman table.
// This should be used if the symbols used have a disproportionate
// histogram distribution.
// If input is supplied and the compression savings are below 1/16th of the
// input size the block is stored.
func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) {
if w.err != nil {
return
}
tokens = append(tokens, endBlockMarker)
numLiterals, numOffsets := w.indexTokens(tokens)
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0)
// Store bytes, if we don't get a reasonable improvement.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
// Write Huffman table.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
// Write the tokens.
w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes)
}
// indexTokens indexes a slice of tokens, and updates
// literalFreq and offsetFreq, and generates literalEncoding
// and offsetEncoding.
// The number of literal and offset tokens is returned.
func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) {
clear(w.literalFreq)
clear(w.offsetFreq)
for _, t := range tokens {
if t < matchType {
w.literalFreq[t.literal()]++
continue
}
length := t.length()
offset := t.offset()
w.literalFreq[lengthCodesStart+lengthCode(length)]++
w.offsetFreq[offsetCode(offset)]++
}
// get the number of literals
numLiterals = len(w.literalFreq)
for w.literalFreq[numLiterals-1] == 0 {
numLiterals--
}
// get the number of offsets
numOffsets = len(w.offsetFreq)
for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
numOffsets--
}
if numOffsets == 0 {
// We haven't found a single match. If we want to go with the dynamic encoding,
// we should count at least one offset to be sure that the offset huffman tree could be encoded.
w.offsetFreq[0] = 1
numOffsets = 1
}
w.literalEncoding.generate(w.literalFreq, 15)
w.offsetEncoding.generate(w.offsetFreq, 15)
return
}
// writeTokens writes a slice of tokens to the output.
// codes for literal and offset encoding must be supplied.
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
if w.err != nil {
return
}
for _, t := range tokens {
if t < matchType {
w.writeCode(leCodes[t.literal()])
continue
}
// Write the length
length := t.length()
lengthCode := lengthCode(length)
w.writeCode(leCodes[lengthCode+lengthCodesStart])
extraLengthBits := uint(lengthExtraBits[lengthCode])
if extraLengthBits > 0 {
extraLength := int32(length - lengthBase[lengthCode])
w.writeBits(extraLength, extraLengthBits)
}
// Write the offset
offset := t.offset()
offsetCode := offsetCode(offset)
w.writeCode(oeCodes[offsetCode])
extraOffsetBits := uint(offsetExtraBits[offsetCode])
if extraOffsetBits > 0 {
extraOffset := int32(offset - offsetBase[offsetCode])
w.writeBits(extraOffset, extraOffsetBits)
}
}
}
// huffOffset is a static offset encoder used for huffman only encoding.
// It can be reused since we will not be encoding offset values.
var huffOffset *huffmanEncoder
func init() {
offsetFreq := make([]int32, offsetCodeCount)
offsetFreq[0] = 1
huffOffset = newHuffmanEncoder(offsetCodeCount)
huffOffset.generate(offsetFreq, 15)
}
// writeBlockHuff encodes a block of bytes as either
// Huffman encoded literals or uncompressed bytes if the
// results only gains very little from compression.
func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
if w.err != nil {
return
}
// Clear histogram
clear(w.literalFreq)
// Add everything as literals
histogram(input, w.literalFreq)
w.literalFreq[endBlockMarker] = 1
const numLiterals = endBlockMarker + 1
w.offsetFreq[0] = 1
const numOffsets = 1
w.literalEncoding.generate(w.literalFreq, 15)
// Figure out smallest code.
// Always use dynamic Huffman or Store
var numCodegens int
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0)
// Store bytes, if we don't get a reasonable improvement.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
// Huffman.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
encoding := w.literalEncoding.codes[:257]
n := w.nbytes
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
w.bits |= uint64(c.code) << w.nbits
w.nbits += uint(c.len)
if w.nbits < 48 {
continue
}
// Store 6 bytes
bits := w.bits
w.bits >>= 48
w.nbits -= 48
bytes := w.bytes[n : n+6]
bytes[0] = byte(bits)
bytes[1] = byte(bits >> 8)
bytes[2] = byte(bits >> 16)
bytes[3] = byte(bits >> 24)
bytes[4] = byte(bits >> 32)
bytes[5] = byte(bits >> 40)
n += 6
if n < bufferFlushSize {
continue
}
w.write(w.bytes[:n])
if w.err != nil {
return // Return early in the event of write failures
}
n = 0
}
w.nbytes = n
w.writeCode(encoding[endBlockMarker])
}
// histogram accumulates a histogram of b in h.
//
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogram(b []byte, h []int32) {
h = h[:256]
for _, t := range b {
h[t]++
}
}

345
vendor/github.com/mjl-/flate/huffman_code.go generated vendored Normal file
View file

@ -0,0 +1,345 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"math"
"math/bits"
"sort"
)
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
}
type huffmanEncoder struct {
codes []hcode
freqcache []literalNode
bitCount [17]int32
lns byLiteral // stored to avoid repeated allocation in generate
lfs byFreq // stored to avoid repeated allocation in generate
}
type literalNode struct {
literal uint16
freq int32
}
// A levelInfo describes the state of the constructed tree for a given depth.
type levelInfo struct {
// Our level. for better printing
level int32
// The frequency of the last node at this level
lastFreq int32
// The frequency of the next character to add to this level
nextCharFreq int32
// The frequency of the next pair (from level below) to add to this level.
// Only valid if the "needed" value of the next lower level is 0.
nextPairFreq int32
// The number of chains remaining to generate for this level before moving
// up to the next level
needed int32
}
// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint16) {
h.len = length
h.code = code
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
func newHuffmanEncoder(size int) *huffmanEncoder {
return &huffmanEncoder{codes: make([]hcode, size)}
}
// Generates a HuffmanCode corresponding to the fixed literal table.
func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(maxNumLit)
codes := h.codes
var ch uint16
for ch = 0; ch < maxNumLit; ch++ {
var bits uint16
var size uint16
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
bits = ch + 48
size = 8
case ch < 256:
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144
size = 9
case ch < 280:
// size 7, 0000000 .. 0010111
bits = ch - 256
size = 7
default:
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280
size = 8
}
codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
}
return h
}
func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codes := h.codes
for ch := range codes {
codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
}
return h
}
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []int32) int {
var total int
for i, f := range freq {
if f != 0 {
total += int(f) * int(h.codes[i].len)
}
}
return total
}
const maxBitsLimit = 16
// bitCounts computes the number of literals assigned to each bit size in the Huffman encoding.
// It is only called when list.length >= 3.
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list is an array of the literals with non-zero frequencies
// and their associated frequencies. The array is in order of increasing
// frequency and has as its last element a special element with frequency
// MaxInt32.
//
// maxBits is the maximum number of bits that should be used to encode any literal.
// It must be less than 16.
//
// bitCounts returns an integer slice in which slice[i] indicates the number of literals
// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
}
n := int32(len(list))
list = list[0 : n+1]
list[n] = maxNode()
// The tree can't have greater depth than n - 1, no matter what. This
// saves a little bit of work in some small cases
if maxBits > n-1 {
maxBits = n - 1
}
// Create information about each of the levels.
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed==0. This makes level1.nextPairFreq
// be a legitimate value that never gets chosen.
var levels [maxBitsLimit]levelInfo
// leafCounts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leafCounts[i][j] is the number of literals at the left
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
lastFreq: list[1].freq,
nextCharFreq: list[2].freq,
nextPairFreq: list[0].freq + list[1].freq,
}
leafCounts[level][level] = 2
if level == 1 {
levels[level].nextPairFreq = math.MaxInt32
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[maxBits].needed = 2*n - 4
level := maxBits
for {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leaves and pairs.
// End all calculations for this level.
// To make sure we never come back to this level or any lower level,
// set nextPairFreq impossibly large.
l.needed = 0
levels[level+1].nextPairFreq = math.MaxInt32
level++
continue
}
prevFreq := l.lastFreq
if l.nextCharFreq < l.nextPairFreq {
// The next item on this row is a leaf node.
n := leafCounts[level][level] + 1
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
l.nextCharFreq = list[n].freq
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
// more values in the level below
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
copy(leafCounts[level][:level], leafCounts[level-1][:level])
levels[l.level-1].needed = 2
}
if l.needed--; l.needed == 0 {
// We've done everything we need to do for this level.
// Continue calculating one level up. Fill in nextPairFreq
// of that level with the sum of the two nodes we've just calculated on
// this level.
if l.level == maxBits {
// All done!
break
}
levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
level++
} else {
// If we stole from below, move down temporarily to replenish it.
for levels[level-1].needed > 0 {
level--
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
if leafCounts[maxBits][maxBits] != n {
panic("leafCounts[maxBits][maxBits] != n")
}
bitCount := h.bitCount[:maxBits+1]
bits := 1
counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- {
// chain.leafCount gives the number of literals requiring at least "bits"
// bits to encode.
bitCount[bits] = counts[level] - counts[level-1]
bits++
}
return bitCount
}
// Look at the leaves and assign them a bit count and an encoding as specified
// in RFC 1951 3.2.2
func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
code := uint16(0)
for n, bits := range bitCount {
code <<= 1
if n == 0 || bits == 0 {
continue
}
// The literals list[len(list)-bits] .. list[len(list)-bits]
// are encoded using "bits" bits, and get the values
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):]
h.lns.sort(chunk)
for _, node := range chunk {
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
code++
}
list = list[0 : len(list)-int(bits)]
}
}
// Update this Huffman Code object to be the minimum code for the specified frequency count.
//
// freq is an array of frequencies, in which freq[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
// The largest of these is maxNumLit, so we allocate for that case.
h.freqcache = make([]literalNode, maxNumLit+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
for i, f := range freq {
if f != 0 {
list[count] = literalNode{uint16(i), f}
count++
} else {
h.codes[i].len = 0
}
}
list = list[:count]
if count <= 2 {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for i, node := range list {
// "list" is in order of increasing literal value.
h.codes[node.literal].set(uint16(i), 1)
}
return
}
h.lfs.sort(list)
// Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits)
// And do the assignment
h.assignEncodingAndSize(bitCount, list)
}
type byLiteral []literalNode
func (s *byLiteral) sort(a []literalNode) {
*s = byLiteral(a)
sort.Sort(s)
}
func (s byLiteral) Len() int { return len(s) }
func (s byLiteral) Less(i, j int) bool {
return s[i].literal < s[j].literal
}
func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
type byFreq []literalNode
func (s *byFreq) sort(a []literalNode) {
*s = byFreq(a)
sort.Sort(s)
}
func (s byFreq) Len() int { return len(s) }
func (s byFreq) Less(i, j int) bool {
if s[i].freq == s[j].freq {
return s[i].literal < s[j].literal
}
return s[i].freq < s[j].freq
}
func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func reverseBits(number uint16, bitLength byte) uint16 {
return bits.Reverse16(number << (16 - bitLength))
}

860
vendor/github.com/mjl-/flate/inflate.go generated vendored Normal file
View file

@ -0,0 +1,860 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package flate implements the DEFLATE compressed data format, described in RFC
// 1951. NewReaderPartial returns data flushed in "partial flush" mode without
// requiring a full history/sliding window or an explicit empty data block (as
// written in "sync flush" mode). The gzip and zlib packages implement access to
// DEFLATE-based file formats.
package flate
import (
"bufio"
"io"
"math/bits"
"strconv"
"sync"
)
const (
maxCodeLen = 16 // max length of Huffman code
// The next three numbers come from the RFC section 3.2.7, with the
// additional proviso in section 3.2.5 which implies that distance codes
// 30 and 31 should never occur in compressed data.
maxNumLit = 286
maxNumDist = 30
numCodes = 19 // number of codes in Huffman meta-code
)
// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
// A CorruptInputError reports the presence of corrupt input at a given offset.
type CorruptInputError int64
func (e CorruptInputError) Error() string {
return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
}
// An InternalError reports an error in the flate code itself.
type InternalError string
func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
// A ReadError reports an error encountered while reading input.
//
// Deprecated: No longer returned.
type ReadError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Read
}
func (e *ReadError) Error() string {
return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// A WriteError reports an error encountered while writing output.
//
// Deprecated: No longer returned.
type WriteError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Write
}
func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
// to switch to a new underlying [Reader]. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}
// The data structure for decoding Huffman tables is based on that of
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
// For codes smaller than the table width, there are multiple entries
// (each combination of trailing bits has the same value). For codes
// larger than the table width, the table contains a link to an overflow
// table. The width of each entry in the link table is the maximum code
// size minus the chunk width.
//
// Note that you can do a lookup in the table even without all bits
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
// have the property that shorter codes come before longer ones, the
// bit length estimate in the result is a lower bound on the actual
// number of bits.
//
// See the following:
// https://github.com/madler/zlib/raw/master/doc/algorithm.txt
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
min int // the minimum code length
chunks [huffmanNumChunks]uint32 // chunks as described above
links [][]uint32 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
// Following this function, h is guaranteed to be initialized into a complete
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
// degenerate case where the tree has only a single symbol with length 1. Empty
// trees are permitted.
func (h *huffmanDecoder) init(lengths []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
const sanity = false
if h.min != 0 {
*h = huffmanDecoder{}
}
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
if n == 0 {
continue
}
if min == 0 || n < min {
min = n
}
if n > max {
max = n
}
count[n]++
}
// Empty tree. The decompressor.huffSym function will fail later if the tree
// is used. Technically, an empty tree is only valid for the HDIST tree and
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
// is guaranteed to fail since it will attempt to use the tree to decode the
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
// guaranteed to fail later since the compressed data section must be
// composed of at least one symbol (the end-of-block marker).
if max == 0 {
return true
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
code <<= 1
nextcode[i] = code
code += count[i]
}
// Check that the coding is complete (i.e., that we've
// assigned all 2-to-the-max possible bit sequences).
// Exception: To be compatible with zlib, we also need to
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
return false
}
h.min = min
if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
h.linkMask = uint32(numLinks - 1)
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(bits.Reverse16(uint16(j)))
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
h.links[off] = make([]uint32, numLinks)
}
}
for i, n := range lengths {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint32(i<<huffmanValueShift | n)
reverse := int(bits.Reverse16(uint16(code)))
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
// lower 4 "count" bits should be
// between 1 and 15.
if sanity && h.chunks[off] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[off] = chunk
}
} else {
j := reverse & (huffmanNumChunks - 1)
if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
// Longer codes should have been
// associated with a link table above.
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
linktab := h.links[value]
reverse >>= huffmanChunkBits
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}
linktab[off] = chunk
}
}
}
if sanity {
// Above we've sanity checked that we never overwrote
// an existing entry. Here we additionally check that
// we filled the tables completely.
for i, chunk := range h.chunks {
if chunk == 0 {
// As an exception, in the degenerate
// single-code case, we allow odd
// chunks to be missing.
if code == 1 && i%2 == 1 {
continue
}
panic("impossible: missing chunk")
}
}
for _, linktab := range h.links {
for _, chunk := range linktab {
if chunk == 0 {
panic("impossible: missing chunk")
}
}
}
}
return true
}
// The actual read interface needed by [NewReader].
// If the passed in io.Reader does not also have ReadByte,
// the [NewReader] will introduce its own buffering.
type Reader interface {
io.Reader
io.ByteReader
}
// Decompress state.
type decompressor struct {
// Input source.
r Reader
rBuf *bufio.Reader // created if provided io.Reader does not implement io.ByteReader
roffset int64
// Input bits, in top of b.
b uint32
nb uint
// Huffman decoders for literal/length, distance.
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
bits *[maxNumLit + maxNumDist]int
codebits *[numCodes]int
// Output history, buffer.
dict dictDecoder
// Temporary buffer (avoids repeated allocation).
buf [4]byte
// Next step in the decompression,
// and decompression state.
step func(*decompressor)
stepState int
final bool
readPartial bool // If set, r is a *bufio.Reader.
err error
toRead []byte
hl, hd *huffmanDecoder
copyLen int
copyDist int
}
func (f *decompressor) nextBlock() {
// Ensure we don't block readers of network streams written with "partial flush"
// mode and small writes/blocks (shorter than the sliding window size). Such
// streams don't explicitly signal that we should pass data to our reader, like the
// "sync flush" mode does with its zero-length data block. If we would do a
// blocking read for the next block, our reader's protocol may get stuck.
if f.readPartial && f.dict.availRead() > 0 {
if f.r.(*bufio.Reader).Buffered() == 0 {
f.toRead = f.dict.readFlush()
return
}
}
for f.nb < 1+2 {
if f.err = f.moreBits(); f.err != nil {
return
}
}
f.final = f.b&1 == 1
f.b >>= 1
typ := f.b & 3
f.b >>= 2
f.nb -= 1 + 2
switch typ {
case 0:
f.dataBlock()
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlock()
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
break
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlock()
default:
// 3 is reserved.
f.err = CorruptInputError(f.roffset)
}
}
func (f *decompressor) Read(b []byte) (int, error) {
for {
if len(f.toRead) > 0 {
n := copy(b, f.toRead)
f.toRead = f.toRead[n:]
if len(f.toRead) == 0 {
return n, f.err
}
return n, nil
}
if f.err != nil {
return 0, f.err
}
f.step(f)
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
func (f *decompressor) Close() error {
if f.err == io.EOF {
return nil
}
return f.err
}
// RFC 1951 section 3.2.7.
// Compression with dynamic Huffman codes
var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
func (f *decompressor) readHuffman() error {
// HLIT[5], HDIST[5], HCLEN[4].
for f.nb < 5+5+4 {
if err := f.moreBits(); err != nil {
return err
}
}
nlit := int(f.b&0x1F) + 257
if nlit > maxNumLit {
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
if ndist > maxNumDist {
return CorruptInputError(f.roffset)
}
f.b >>= 5
nclen := int(f.b&0xF) + 4
// numCodes is 19, so nclen is always valid.
f.b >>= 4
f.nb -= 5 + 5 + 4
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
for i := 0; i < nclen; i++ {
for f.nb < 3 {
if err := f.moreBits(); err != nil {
return err
}
}
f.codebits[codeOrder[i]] = int(f.b & 0x7)
f.b >>= 3
f.nb -= 3
}
for i := nclen; i < len(codeOrder); i++ {
f.codebits[codeOrder[i]] = 0
}
if !f.h1.init(f.codebits[0:]) {
return CorruptInputError(f.roffset)
}
// HLIT + 257 code lengths, HDIST + 1 code lengths,
// using the code length Huffman code.
for i, n := 0, nlit+ndist; i < n; {
x, err := f.huffSym(&f.h1)
if err != nil {
return err
}
if x < 16 {
// Actual length.
f.bits[i] = x
i++
continue
}
// Repeat previous length or zero.
var rep int
var nb uint
var b int
switch x {
default:
return InternalError("unexpected length code")
case 16:
rep = 3
nb = 2
if i == 0 {
return CorruptInputError(f.roffset)
}
b = f.bits[i-1]
case 17:
rep = 3
nb = 3
b = 0
case 18:
rep = 11
nb = 7
b = 0
}
for f.nb < nb {
if err := f.moreBits(); err != nil {
return err
}
}
rep += int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
if i+rep > n {
return CorruptInputError(f.roffset)
}
for j := 0; j < rep; j++ {
f.bits[i] = b
i++
}
}
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
return CorruptInputError(f.roffset)
}
// As an optimization, we can initialize the min bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.min < f.bits[endBlockMarker] {
f.h1.min = f.bits[endBlockMarker]
}
return nil
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBlock() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
v, err := f.huffSym(f.hl)
if err != nil {
f.err = err
return
}
var n uint // number of bits extra
var length int
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
f.nb = 0
f.b = 0
// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
f.err = CorruptInputError(f.roffset)
return
}
if n == 0 {
f.toRead = f.dict.readFlush()
f.finishBlock()
return
}
f.copyLen = n
f.copyData()
}
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
buf := f.dict.writeSlice()
if len(buf) > f.copyLen {
buf = buf[:f.copyLen]
}
cnt, err := io.ReadFull(f.r, buf)
f.roffset += int64(cnt)
f.copyLen -= cnt
f.dict.writeMark(cnt)
if err != nil {
f.err = noEOF(err)
return
}
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).copyData
return
}
f.finishBlock()
}
func (f *decompressor) finishBlock() {
if f.final {
if f.dict.availRead() > 0 {
f.toRead = f.dict.readFlush()
}
f.err = io.EOF
}
f.step = (*decompressor).nextBlock
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
func noEOF(e error) error {
if e == io.EOF {
return io.ErrUnexpectedEOF
}
return e
}
func (f *decompressor) moreBits() error {
c, err := f.r.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.min)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
return 0, noEOF(err)
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := h.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
f.err = CorruptInputError(f.roffset)
return 0, f.err
}
f.b = b >> (n & 31)
f.nb = nb - n
return int(chunk >> huffmanValueShift), nil
}
}
}
func (f *decompressor) makeReader(r io.Reader) {
if rr, ok := r.(Reader); ok {
f.rBuf = nil
if f.readPartial {
if _, ok := rr.(*bufio.Reader); !ok {
rr = bufio.NewReader(rr)
}
}
f.r = rr
return
}
// Reuse rBuf if possible. Invariant: rBuf is always created (and owned) by decompressor.
if f.rBuf != nil {
f.rBuf.Reset(r)
} else {
// bufio.NewReader will not return r, as r does not implement flate.Reader, so it is not bufio.Reader.
f.rBuf = bufio.NewReader(r)
}
f.r = f.rBuf
}
func fixedHuffmanDecoderInit() {
fixedOnce.Do(func() {
// These come from the RFC section 3.2.6.
var bits [288]int
for i := 0; i < 144; i++ {
bits[i] = 8
}
for i := 144; i < 256; i++ {
bits[i] = 9
}
for i := 256; i < 280; i++ {
bits[i] = 7
}
for i := 280; i < 288; i++ {
bits[i] = 8
}
fixedHuffmanDecoder.init(bits[:])
})
}
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
*f = decompressor{
rBuf: f.rBuf,
bits: f.bits,
codebits: f.codebits,
dict: f.dict,
step: (*decompressor).nextBlock,
readPartial: f.readPartial,
}
f.makeReader(r)
f.dict.init(maxMatchOffset, dict)
return nil
}
// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r.
// If r does not also implement [io.ByteReader],
// the decompressor may read more data than necessary from r.
// The reader returns [io.EOF] after the final block in the DEFLATE stream has
// been encountered. Any trailing data after the final block is ignored.
//
// The [io.ReadCloser] returned by NewReader also implements [Resetter].
func NewReader(r io.Reader) io.ReadCloser {
return newReader(r, nil, false)
}
// NewReaderDict is like [NewReader] but initializes the reader
// with a preset dictionary. The returned [Reader] behaves as if
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
//
// The ReadCloser returned by NewReaderDict also implements [Resetter].
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
return newReader(r, dict, false)
}
// NewReaderPartial is like [NewReader], but can return data smaller than the
// history sliding window size when it was written in "partial flush" mode.
func NewReaderPartial(r io.Reader) io.ReadCloser {
return newReader(r, nil, true)
}
func newReader(r io.Reader, dict []byte, readPartial bool) *decompressor {
fixedHuffmanDecoderInit()
var f decompressor
f.readPartial = readPartial
f.makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}

97
vendor/github.com/mjl-/flate/token.go generated vendored Normal file
View file

@ -0,0 +1,97 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
const (
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
// 8 bits: xlength = length - MIN_MATCH_LENGTH
// 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
)
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
// is lengthCodes[length - MIN_MATCH_LENGTH]
var lengthCodes = [...]uint32{
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 28,
}
var offsetCodes = [...]uint32{
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
}
type token uint32
// Convert a literal into a literal token.
func literalToken(literal uint32) token { return token(literalType + literal) }
// Convert a < xlength, xoffset > pair into a match token.
func matchToken(xlength uint32, xoffset uint32) token {
return token(matchType + xlength<<lengthShift + xoffset)
}
// Returns the literal of a literal token.
func (t token) literal() uint32 { return uint32(t - literalType) }
// Returns the extra offset of a match token.
func (t token) offset() uint32 { return uint32(t) & offsetMask }
func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) }
func lengthCode(len uint32) uint32 { return lengthCodes[len] }
// Returns the offset code corresponding to a specific offset.
func offsetCode(off uint32) uint32 {
if off < uint32(len(offsetCodes)) {
return offsetCodes[off]
}
if off>>7 < uint32(len(offsetCodes)) {
return offsetCodes[off>>7] + 14
}
return offsetCodes[off>>14] + 28
}

3
vendor/modules.txt vendored
View file

@ -19,6 +19,9 @@ github.com/mjl-/autocert
# github.com/mjl-/bstore v0.0.6
## explicit; go 1.19
github.com/mjl-/bstore
# github.com/mjl-/flate v0.0.0-20250221133712-6372d09eb978
## explicit; go 1.21
github.com/mjl-/flate
# github.com/mjl-/sconf v0.0.7
## explicit; go 1.12
github.com/mjl-/sconf