mirror of
https://github.com/mjl-/mox.git
synced 2024-12-27 08:53:48 +03:00
166 lines
3.4 KiB
Go
166 lines
3.4 KiB
Go
|
package junk
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"os"
|
||
|
|
||
|
"golang.org/x/crypto/blake2b"
|
||
|
)
|
||
|
|
||
|
// see https://en.wikipedia.org/wiki/Bloom_filter
|
||
|
|
||
|
var errWidth = errors.New("k and width wider than 256 bits and width not more than 32")
|
||
|
var errPowerOfTwo = errors.New("data not a power of two")
|
||
|
|
||
|
// Bloom is a bloom filter.
|
||
|
type Bloom struct {
|
||
|
data []byte
|
||
|
k int // Number of bits we store/lookup in the bloom filter per value.
|
||
|
w int // Number of bits needed to address a single bit position.
|
||
|
modified bool
|
||
|
}
|
||
|
|
||
|
func bloomWidth(fileSize int) int {
|
||
|
w := 0
|
||
|
for bits := uint32(fileSize * 8); bits > 1; bits >>= 1 {
|
||
|
w++
|
||
|
}
|
||
|
return w
|
||
|
}
|
||
|
|
||
|
// BloomValid returns an error if the bloom file parameters are not correct.
|
||
|
func BloomValid(fileSize int, k int) error {
|
||
|
_, err := bloomValid(fileSize, k)
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
func bloomValid(fileSize, k int) (int, error) {
|
||
|
w := bloomWidth(fileSize)
|
||
|
if 1<<w != fileSize*8 {
|
||
|
return 0, errPowerOfTwo
|
||
|
}
|
||
|
if k*w > 256 || w > 32 {
|
||
|
return 0, errWidth
|
||
|
}
|
||
|
return w, nil
|
||
|
}
|
||
|
|
||
|
// NewBloom returns a bloom filter with given initial data.
|
||
|
//
|
||
|
// The number of bits in data must be a power of 2.
|
||
|
// K is the number of "hashes" (bits) to store/lookup for each value stored.
|
||
|
// Width is calculated as the number of bits needed to represent a single bit/hash
|
||
|
// position in the data.
|
||
|
//
|
||
|
// For each value stored/looked up, a hash over the value is calculated. The hash
|
||
|
// is split into "k" values that are "width" bits wide, each used to lookup a bit.
|
||
|
// K * width must not exceed 256.
|
||
|
func NewBloom(data []byte, k int) (*Bloom, error) {
|
||
|
w, err := bloomValid(len(data), k)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return &Bloom{
|
||
|
data: data,
|
||
|
k: k,
|
||
|
w: w,
|
||
|
}, nil
|
||
|
}
|
||
|
|
||
|
func (b *Bloom) Add(s string) {
|
||
|
h := hash([]byte(s), b.w)
|
||
|
for i := 0; i < b.k; i++ {
|
||
|
b.set(h.nextPos())
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (b *Bloom) Has(s string) bool {
|
||
|
h := hash([]byte(s), b.w)
|
||
|
for i := 0; i < b.k; i++ {
|
||
|
if !b.has(h.nextPos()) {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
func (b *Bloom) Bytes() []byte {
|
||
|
return b.data
|
||
|
}
|
||
|
|
||
|
func (b *Bloom) Modified() bool {
|
||
|
return b.modified
|
||
|
}
|
||
|
|
||
|
// Ones returns the number of ones.
|
||
|
func (b *Bloom) Ones() (n int) {
|
||
|
for _, d := range b.data {
|
||
|
for i := 0; i < 8; i++ {
|
||
|
if d&1 != 0 {
|
||
|
n++
|
||
|
}
|
||
|
d >>= 1
|
||
|
}
|
||
|
}
|
||
|
return n
|
||
|
}
|
||
|
|
||
|
func (b *Bloom) Write(path string) error {
|
||
|
f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0660)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
if _, err := f.Write(b.data); err != nil {
|
||
|
f.Close()
|
||
|
return err
|
||
|
}
|
||
|
if err := f.Close(); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
b.modified = false
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (b *Bloom) has(p int) bool {
|
||
|
v := b.data[p>>3] >> (7 - (p & 7))
|
||
|
return v&1 != 0
|
||
|
}
|
||
|
|
||
|
func (b *Bloom) set(p int) {
|
||
|
by := p >> 3
|
||
|
bi := p & 0x7
|
||
|
var v byte = 1 << (7 - bi)
|
||
|
if b.data[by]&v == 0 {
|
||
|
b.data[by] |= v
|
||
|
b.modified = true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
type bits struct {
|
||
|
width int // Number of bits for each position.
|
||
|
buf []byte // Remaining bytes to use for next position.
|
||
|
cur uint64 // Bits to read next position from. Replenished from buf.
|
||
|
ncur int // Number of bits available in cur. We consume the highest bits first.
|
||
|
}
|
||
|
|
||
|
func hash(v []byte, width int) *bits {
|
||
|
buf := blake2b.Sum256(v)
|
||
|
return &bits{width: width, buf: buf[:]}
|
||
|
}
|
||
|
|
||
|
// nextPos returns the next bit position.
|
||
|
func (b *bits) nextPos() (v int) {
|
||
|
if b.width > b.ncur {
|
||
|
for len(b.buf) > 0 && b.ncur < 64-8 {
|
||
|
b.cur <<= 8
|
||
|
b.cur |= uint64(b.buf[0])
|
||
|
b.ncur += 8
|
||
|
b.buf = b.buf[1:]
|
||
|
}
|
||
|
}
|
||
|
v = int((b.cur >> (b.ncur - b.width)) & ((1 << b.width) - 1))
|
||
|
b.ncur -= b.width
|
||
|
return v
|
||
|
}
|