telemetry: Make http_user_agent a normalized field

This way we store a short 8-byte hash of the UA instead of the full
string; exactly the same way we store TLS ClientHello info.
This commit is contained in:
Matthew Holt 2018-05-10 08:57:25 -06:00
parent df7cdc3fae
commit 9160789b42
No known key found for this signature in database
GPG key ID: 2A349DD577D586A5
4 changed files with 17 additions and 3 deletions

View file

@ -65,9 +65,10 @@ func (h *tlsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
h.listener.helloInfosMu.RUnlock()
ua := r.Header.Get("User-Agent")
uaHash := telemetry.FastHash([]byte(ua))
// report this request's UA in connection with this ClientHello
go telemetry.AppendUnique("tls_client_hello_ua:"+caddytls.ClientHelloInfo(info).Key(), ua)
go telemetry.AppendUnique("tls_client_hello_ua:"+caddytls.ClientHelloInfo(info).Key(), uaHash)
var checked, mitm bool
if r.Header.Get("X-BlueCoat-Via") != "" || // Blue Coat (masks User-Agent header to generic values)

View file

@ -354,7 +354,9 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if len(ua) > 512 {
ua = ua[:512]
}
go telemetry.AppendUnique("http_user_agent", ua)
uaHash := telemetry.FastHash([]byte(ua)) // this is a normalized field
go telemetry.SetNested("http_user_agent", uaHash, ua)
go telemetry.AppendUnique("http_user_agent_count", uaHash)
go telemetry.Increment("http_request_count")
// copy the original, unchanged URL into the context

View file

@ -516,7 +516,7 @@ func (info ClientHelloInfo) Key() string {
if !info.CompressionMethodsUnknown {
compressionMethods = fmt.Sprintf("%x", info.CompressionMethods)
}
return fastHash([]byte(fmt.Sprintf("%x-%x-%s-%s-%x-%x",
return telemetry.FastHash([]byte(fmt.Sprintf("%x-%x-%s-%s-%x-%x",
info.Version, info.CipherSuites, extensions,
compressionMethods, info.Curves, info.Points)))
}

View file

@ -15,6 +15,8 @@
package telemetry
import (
"fmt"
"hash/fnv"
"log"
"strings"
@ -276,6 +278,15 @@ func atomicAdd(key string, amount int) {
bufferMu.Unlock()
}
// FastHash hashes input using a 32-bit hashing algorithm
// that is fast, and returns the hash as a hex-encoded string.
// Do not use this for cryptographic purposes.
func FastHash(input []byte) string {
h := fnv.New32a()
h.Write(input)
return fmt.Sprintf("%x", h.Sum32())
}
// isDisabled returns whether key is
// a disabled metric key. ALL collection
// functions should call this and not