logging: Implement dial timeout for net writer (fix #4083) (#4172)

* logging: Implement dial timeout for net writer (fix #4083)

* Limit how often redials are attempted

This should cause dial blocking to occur only once every 10 seconds at most, but it also means the logger connection might be down for up to 10 seconds after it comes back online; oh well. We shouldn't block for DialTimeout at every single log emission.

* Clarify offline behavior
This commit is contained in:
Matt Holt 2021-05-19 15:14:03 -06:00 committed by GitHub
parent c052162203
commit e3c369d452
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -18,7 +18,9 @@ import (
"fmt"
"io"
"net"
"os"
"sync"
"time"
"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
@ -28,10 +30,16 @@ func init() {
caddy.RegisterModule(NetWriter{})
}
// NetWriter implements a log writer that outputs to a network socket.
// NetWriter implements a log writer that outputs to a network socket. If
// the socket goes down, it will dump logs to stderr while it attempts to
// reconnect.
type NetWriter struct {
// The address of the network socket to which to connect.
Address string `json:"address,omitempty"`
// The timeout to wait while connecting to the socket.
DialTimeout caddy.Duration `json:"dial_timeout,omitempty"`
addr caddy.NetworkAddress
}
@ -60,6 +68,10 @@ func (nw *NetWriter) Provision(ctx caddy.Context) error {
return fmt.Errorf("multiple ports not supported")
}
if nw.DialTimeout < 0 {
return fmt.Errorf("timeout cannot be less than 0")
}
return nil
}
@ -74,7 +86,10 @@ func (nw NetWriter) WriterKey() string {
// OpenWriter opens a new network connection.
func (nw NetWriter) OpenWriter() (io.WriteCloser, error) {
reconn := &redialerConn{nw: nw}
reconn := &redialerConn{
nw: nw,
timeout: time.Duration(nw.DialTimeout),
}
conn, err := reconn.dial()
if err != nil {
return nil, err
@ -87,7 +102,9 @@ func (nw NetWriter) OpenWriter() (io.WriteCloser, error) {
// UnmarshalCaddyfile sets up the handler from Caddyfile tokens. Syntax:
//
// net <address>
// net <address> {
// dial_timeout <duration>
// }
//
func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
for d.Next() {
@ -98,6 +115,22 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
if d.NextArg() {
return d.ArgErr()
}
for nesting := d.Nesting(); d.NextBlock(nesting); {
switch d.Val() {
case "dial_timeout":
if !d.NextArg() {
return d.ArgErr()
}
timeout, err := caddy.ParseDuration(d.Val())
if err != nil {
return d.Errf("invalid duration: %s", d.Val())
}
if d.NextArg() {
return d.ArgErr()
}
nw.DialTimeout = caddy.Duration(timeout)
}
}
}
return nil
}
@ -107,8 +140,10 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
// is retried.
type redialerConn struct {
net.Conn
connMu sync.RWMutex
nw NetWriter
connMu sync.RWMutex
nw NetWriter
timeout time.Duration
lastRedial time.Time
}
// Write wraps the underlying Conn.Write method, but if that fails,
@ -131,20 +166,32 @@ func (reconn *redialerConn) Write(b []byte) (n int, err error) {
return
}
// we're the lucky first goroutine to re-dial the connection
conn2, err2 := reconn.dial()
if err2 != nil {
return
}
if n, err = conn2.Write(b); err == nil {
reconn.Conn.Close()
reconn.Conn = conn2
// there's still a problem, so try to re-attempt dialing the socket
// if some time has passed in which the issue could have potentially
// been resolved - we don't want to block at every single log
// emission (!) - see discussion in #4111
if time.Since(reconn.lastRedial) > 10*time.Second {
reconn.lastRedial = time.Now()
conn2, err2 := reconn.dial()
if err2 != nil {
// logger socket still offline; instead of discarding the log, dump it to stderr
os.Stderr.Write(b)
return
}
if n, err = conn2.Write(b); err == nil {
reconn.Conn.Close()
reconn.Conn = conn2
}
} else {
// last redial attempt was too recent; just dump to stderr for now
os.Stderr.Write(b)
}
return
}
func (reconn *redialerConn) dial() (net.Conn, error) {
return net.Dial(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0))
return net.DialTimeout(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0), reconn.timeout)
}
// Interface guards