2023-01-30 16:27:06 +03:00
package bstore
import (
"bytes"
"fmt"
"reflect"
"sort"
)
2023-05-22 15:40:36 +03:00
// todo: cache query plans? perhaps explicitly through something like a prepared statement. the current plan includes values in keys,start,stop, which would need to be calculated for each execution. should benchmark time spent in planning first.
// todo optimize: combine multiple filter (not)in/equals calls for same field
// todo optimize: efficiently pack booleans in an index (eg for Message.Flags), and use it to query.
// todo optimize: do multiple range scans if necessary when we can use an index for an equal check with multiple values.
2023-01-30 16:27:06 +03:00
// Plan represents a plan to execute a query, possibly using a simple/quick
// bucket "get" or cursor scan (forward/backward) on either the records or an
// index.
type plan [ T any ] struct {
// The index for this plan. If nil, we are using pk's, in which case
// "keys" below can be nil for a range scan with start/stop (possibly empty
// for full scan), or non-nil for looking up specific keys.
idx * index
// Use full unique index to get specific values from keys. idx above can be
// a unique index that we only use partially. In that case, this field is
// false.
unique bool
// If not nil, used to fetch explicit keys when using pk or unique
// index. Required non-nil for unique.
keys [ ] [ ] byte
2024-03-30 11:39:18 +03:00
desc bool // Direction of the range scan.
// First key to scan. Filters below may still apply. If desc, this value is > than
// stop (if it is set). If nil, we begin ranging at the first or last (for desc)
// key.
start [ ] byte
// Last key to scan. Can be nil independently of start.
stop [ ] byte
// If the start and stop values are inclusive or exclusive.
startInclusive bool
2023-01-30 16:27:06 +03:00
stopInclusive bool
2023-05-22 15:40:36 +03:00
// Filter we need to apply after retrieving the record. If all original filters
// from a query were handled by "keys" above, or by a range scan, this field is
// empty.
2023-01-30 16:27:06 +03:00
filters [ ] filter [ T ]
2024-03-30 11:39:18 +03:00
// Number of fields from index used to group results before applying in-memory
// ordering with "orders" below.
norderidxuse int
// Orders we need to apply after first retrieving all records with equal values for
// first norderidxuse fields. As with filters, if a range scan takes care of all
// orderings from the query, this field is empty.
2023-01-30 16:27:06 +03:00
orders [ ] order
}
// selectPlan selects the best plan for this query.
func ( q * Query [ T ] ) selectPlan ( ) ( * plan [ T ] , error ) {
// Simple case first: List of known IDs. We can just fetch them from
// the records bucket by their primary keys. This is common for a
// "Get" query.
if q . xfilterIDs != nil {
orders := q . xorders
keys := q . xfilterIDs . pks
// If there is an ordering on the PK field, we do the ordering here.
if len ( orders ) > 0 && orders [ 0 ] . field . Name == q . st . Current . Fields [ 0 ] . Name {
asc := orders [ 0 ] . asc
sort . Slice ( keys , func ( i , j int ) bool {
cmp := bytes . Compare ( keys [ i ] , keys [ j ] )
return asc && cmp < 0 || ! asc && cmp > 0
} )
orders = orders [ 1 : ]
}
p := & plan [ T ] {
keys : keys ,
filters : q . xfilters ,
orders : orders ,
}
return p , nil
}
// Try using a fully matched unique index. We build a map with all
// fields that have an equal or in filter. So we can easily look
// through our unique indices and get a match. We only look at a single
// filter per field. If there are multiple, we would use the last one.
// That's okay, we'll filter records out when we execute the leftover
// filters. Probably not common.
2023-05-22 15:40:36 +03:00
// This is common for filterEqual and filterIn on fields that have a unique index.
2023-01-30 16:27:06 +03:00
equalsIn := map [ string ] * filter [ T ] { }
for i := range q . xfilters {
ff := & q . xfilters [ i ]
switch f := ( * ff ) . ( type ) {
case filterEqual [ T ] :
equalsIn [ f . field . Name ] = ff
case filterIn [ T ] :
equalsIn [ f . field . Name ] = ff
}
}
indices :
for _ , idx := range q . st . Current . Indices {
// Direct fetches only for unique indices.
if ! idx . Unique {
continue
}
for _ , f := range idx . Fields {
if _ , ok := equalsIn [ f . Name ] ; ! ok {
// At least one index field does not have a filter.
continue indices
}
}
// Calculate all keys that we need to retrieve from the index.
2023-05-22 15:40:36 +03:00
// todo optimize: if there is a sort involving these fields, we could do the sorting before fetching data.
// todo optimize: we can generate the keys on demand, will help when limit is in use: we are not generating all keys.
2023-01-30 16:27:06 +03:00
var keys [ ] [ ] byte
var skipFilters [ ] * filter [ T ] // Filters to remove from the full list because they are handled by quering the index.
for i , f := range idx . Fields {
var rvalues [ ] reflect . Value
ff := equalsIn [ f . Name ]
skipFilters = append ( skipFilters , ff )
switch fi := ( * ff ) . ( type ) {
case filterEqual [ T ] :
rvalues = [ ] reflect . Value { fi . rvalue }
case filterIn [ T ] :
rvalues = fi . rvalues
default :
return nil , fmt . Errorf ( "internal error: bad filter %T" , equalsIn [ f . Name ] )
}
fekeys := make ( [ ] [ ] byte , len ( rvalues ) )
for j , fv := range rvalues {
2023-05-22 15:40:36 +03:00
ikl , err := packIndexKeys ( [ ] reflect . Value { fv } , nil )
2023-01-30 16:27:06 +03:00
if err != nil {
q . error ( err )
return nil , err
}
2023-05-22 15:40:36 +03:00
if len ( ikl ) != 1 {
return nil , fmt . Errorf ( "internal error: multiple index keys for unique index (%d)" , len ( ikl ) )
}
fekeys [ j ] = ikl [ 0 ] . pre
2023-01-30 16:27:06 +03:00
}
if i == 0 {
keys = fekeys
continue
}
// Multiply current keys with the new values.
nkeys := make ( [ ] [ ] byte , 0 , len ( keys ) * len ( fekeys ) )
for _ , k := range keys {
for _ , fk := range fekeys {
nk := append ( append ( [ ] byte { } , k ... ) , fk ... )
nkeys = append ( nkeys , nk )
}
}
keys = nkeys
}
p := & plan [ T ] {
idx : idx ,
unique : true ,
keys : keys ,
filters : dropFilters ( q . xfilters , skipFilters ) ,
orders : q . xorders ,
}
return p , nil
}
// Try all other indices. We treat them all as non-unique indices now.
2023-05-22 15:40:36 +03:00
// We want to use the one with as many "equal" or "inslice" field filters as
// possible. Then we hope to use a scan on the remaining, either because of a
// filterCompare, or for an ordering. If there is a limit, orderings are preferred
// over compares.
2023-01-30 16:27:06 +03:00
equals := map [ string ] * filter [ T ] { }
2023-05-22 15:40:36 +03:00
inslices := map [ string ] * filter [ T ] { }
2023-01-30 16:27:06 +03:00
for i := range q . xfilters {
ff := & q . xfilters [ i ]
switch f := ( * ff ) . ( type ) {
case filterEqual [ T ] :
equals [ f . field . Name ] = ff
2023-05-22 15:40:36 +03:00
case filterInSlice [ T ] :
inslices [ f . field . Name ] = ff
2023-01-30 16:27:06 +03:00
}
}
2023-05-22 15:40:36 +03:00
// We are going to generate new plans, and keep the new one if it is better than
// what we have so far.
2023-01-30 16:27:06 +03:00
var p * plan [ T ]
2023-05-22 15:40:36 +03:00
var nexact int
2023-01-30 16:27:06 +03:00
var nrange int
2024-03-30 11:39:18 +03:00
var norder int
2023-01-30 16:27:06 +03:00
evaluatePKOrIndex := func ( idx * index ) error {
var isPK bool
var packKeys func ( [ ] reflect . Value ) ( [ ] byte , error )
if idx == nil {
// Make pretend index.
isPK = true
idx = & index {
Fields : [ ] field { q . st . Current . Fields [ 0 ] } ,
}
packKeys = func ( l [ ] reflect . Value ) ( [ ] byte , error ) {
return packPK ( l [ 0 ] )
}
} else {
packKeys = func ( l [ ] reflect . Value ) ( [ ] byte , error ) {
2023-05-22 15:40:36 +03:00
ikl , err := packIndexKeys ( l , nil )
if err != nil {
return nil , err
}
if err == nil && len ( ikl ) != 1 {
return nil , fmt . Errorf ( "internal error: multiple index keys for exact filters, %v" , ikl )
}
return ikl [ 0 ] . pre , nil
2023-01-30 16:27:06 +03:00
}
}
2023-05-22 15:40:36 +03:00
var nex = 0
2024-03-30 11:39:18 +03:00
// log.Printf("evaluating idx %#v", idx)
2023-01-30 16:27:06 +03:00
var skipFilters [ ] * filter [ T ]
for _ , f := range idx . Fields {
2023-05-22 15:40:36 +03:00
if equals [ f . Name ] != nil && f . Type . Kind != kindSlice {
skipFilters = append ( skipFilters , equals [ f . Name ] )
} else if inslices [ f . Name ] != nil && f . Type . Kind == kindSlice {
skipFilters = append ( skipFilters , inslices [ f . Name ] )
2023-01-30 16:27:06 +03:00
} else {
break
}
2024-03-30 11:39:18 +03:00
nex ++
2023-01-30 16:27:06 +03:00
}
2024-03-30 11:39:18 +03:00
// For ordering, skip leading filters we already match on exactly.
2023-01-30 16:27:06 +03:00
orders := q . xorders
2024-03-30 11:39:18 +03:00
trim := 0
TrimOrders :
for _ , o := range orders {
for _ , f := range idx . Fields [ : nex ] {
if o . field . Name == f . Name {
trim ++
continue TrimOrders
}
}
break
}
orders = orders [ trim : ]
// Fields from the index that we use for grouping before in-memory sorting.
var norderidxuse int
// See if the next index field can be used for compare and ordering.
var gx , lx * filterCompare [ T ]
var nrng int // for nrange
2023-05-22 15:40:36 +03:00
if nex < len ( idx . Fields ) {
nf := idx . Fields [ nex ]
2023-01-30 16:27:06 +03:00
for i := range q . xfilters {
ff := & q . xfilters [ i ]
switch f := ( * ff ) . ( type ) {
case filterCompare [ T ] :
if f . field . Name != nf . Name {
continue
}
switch f . op {
case opGreater , opGreaterEqual :
if gx == nil {
gx = & f
skipFilters = append ( skipFilters , ff )
nrng ++
}
case opLess , opLessEqual :
if lx == nil {
lx = & f
skipFilters = append ( skipFilters , ff )
nrng ++
}
}
}
}
2024-03-30 11:39:18 +03:00
// We can use multiple orderings as long as the asc/desc direction stays the same.
nord := 0
for i , o := range orders {
if nex + i < len ( idx . Fields ) && o . field . Name == idx . Fields [ nex + i ] . Name && ( nord == 0 || o . asc == orders [ 0 ] . asc ) {
nord ++
continue
}
break
}
norderidxuse = nex + nord
prevorders := orders
orders = orders [ nord : ]
// The stored index key ends with the primary key, so if we're there, and the next
// ordering key is the primary key, we use the index for it too.
if norderidxuse == len ( idx . Fields ) && len ( orders ) > 0 && orders [ 0 ] . field . Name == q . st . Current . Fields [ 0 ] . Name && ( nord == 0 || orders [ 0 ] . asc == prevorders [ nord - 1 ] . asc ) {
2023-01-30 16:27:06 +03:00
orders = orders [ 1 : ]
2024-03-30 11:39:18 +03:00
norderidxuse ++
2023-01-30 16:27:06 +03:00
}
2024-03-30 11:39:18 +03:00
} else if len ( orders ) > 0 && orders [ 0 ] . field . Name == q . st . Current . Fields [ 0 ] . Name {
// We only had equals filters that used all of the index, but we're also sorting by
// the primary key, so use the index for that too.
orders = orders [ 1 : ]
norderidxuse ++
2023-01-30 16:27:06 +03:00
}
2024-03-30 11:39:18 +03:00
// Orders handled by the index, excluding exact match filters.
idxorders := q . xorders [ trim : len ( q . xorders ) - len ( orders ) ]
// log.Printf("index fields to match for index order: %d, orders for index %d, in-memory ordering %d, total orders %d", norderidxuse, len(idxorders), len(orders), len(q.xorders))
2023-01-30 16:27:06 +03:00
// See if this is better than what we had.
2024-03-30 11:39:18 +03:00
if ! ( nex > nexact || ( nex == nexact && ( nrng > nrange || len ( idxorders ) > norder && ( q . xlimit > 0 || nrng == nrange ) ) ) ) {
// log.Printf("plan not better, nex %d, nrng %d, limit %d, nidxorders %v ordered %v", nex, nrng, q.xlimit, len(idxorders), norder)
2023-01-30 16:27:06 +03:00
return nil
}
2023-05-22 15:40:36 +03:00
nexact = nex
2023-01-30 16:27:06 +03:00
nrange = nrng
2024-03-30 11:39:18 +03:00
norder = len ( idxorders )
2023-01-30 16:27:06 +03:00
// Calculate the prefix key.
var kvalues [ ] reflect . Value
2023-05-22 15:40:36 +03:00
for i := 0 ; i < nex ; i ++ {
2023-01-30 16:27:06 +03:00
f := idx . Fields [ i ]
2023-05-22 15:40:36 +03:00
var v reflect . Value
if f . Type . Kind != kindSlice {
v = ( * equals [ f . Name ] ) . ( filterEqual [ T ] ) . rvalue
} else {
v = ( * inslices [ f . Name ] ) . ( filterInSlice [ T ] ) . rvalue
}
kvalues = append ( kvalues , v )
2023-01-30 16:27:06 +03:00
}
var key [ ] byte
var err error
2023-05-22 15:40:36 +03:00
if nex > 0 {
2023-01-30 16:27:06 +03:00
key , err = packKeys ( kvalues )
if err != nil {
return err
}
}
start := key
stop := key
if gx != nil {
k , err := packKeys ( [ ] reflect . Value { gx . value } )
if err != nil {
return err
}
start = append ( append ( [ ] byte { } , start ... ) , k ... )
}
if lx != nil {
k , err := packKeys ( [ ] reflect . Value { lx . value } )
if err != nil {
return err
}
stop = append ( append ( [ ] byte { } , stop ... ) , k ... )
}
startInclusive := gx == nil || gx . op != opGreater
stopInclusive := lx == nil || lx . op != opLess
2024-03-30 11:39:18 +03:00
desc := len ( idxorders ) > 0 && ! idxorders [ 0 ] . asc
if desc {
2023-01-30 16:27:06 +03:00
start , stop = stop , start
startInclusive , stopInclusive = stopInclusive , startInclusive
}
if isPK {
idx = nil // Clear our fake index for PK.
}
p = & plan [ T ] {
idx : idx ,
2024-03-30 11:39:18 +03:00
desc : desc ,
2023-01-30 16:27:06 +03:00
start : start ,
stop : stop ,
startInclusive : startInclusive ,
stopInclusive : stopInclusive ,
filters : dropFilters ( q . xfilters , skipFilters ) ,
2024-03-30 11:39:18 +03:00
norderidxuse : norderidxuse ,
2023-01-30 16:27:06 +03:00
orders : orders ,
}
return nil
}
if err := evaluatePKOrIndex ( nil ) ; err != nil {
q . error ( err )
return nil , q . err
}
for _ , idx := range q . st . Current . Indices {
if err := evaluatePKOrIndex ( idx ) ; err != nil {
q . error ( err )
return nil , q . err
}
}
if p != nil {
2024-03-30 11:39:18 +03:00
// log.Printf("using index plan %v", p)
2023-01-30 16:27:06 +03:00
return p , nil
}
// We'll just do a scan over all data.
p = & plan [ T ] {
filters : q . xfilters ,
orders : q . xorders ,
}
return p , nil
}
func dropFilters [ T any ] ( filters [ ] T , skip [ ] * T ) [ ] T {
n := make ( [ ] T , 0 , len ( filters ) - len ( skip ) )
next :
for i := range filters {
f := & filters [ i ]
for _ , s := range skip {
if f == s {
continue next
}
}
n = append ( n , * f )
}
return n
}