arche / internal/syncpkg/bloom.go

commit 154431fd
 1package syncpkg
 2
 3import (
 4	"encoding/binary"
 5	"fmt"
 6	"math"
 7)
 8
 9type BloomFilter struct {
10	k     int
11	m     uint64
12	n     uint64
13	words []uint64
14}
15
16func NewBloom(n int) *BloomFilter {
17	if n <= 0 {
18		n = 1
19	}
20	m := uint64(math.Ceil(-float64(n) * math.Log(0.01) / (math.Log(2) * math.Log(2))))
21	if m == 0 {
22		m = 64
23	}
24	m = ((m + 63) / 64) * 64
25	return &BloomFilter{
26		k:     7,
27		m:     m,
28		words: make([]uint64, m/64),
29	}
30}
31
32func (f *BloomFilter) Add(id [32]byte) {
33	h1, h2 := splitID(id)
34	for i := uint64(0); i < uint64(f.k); i++ {
35		bit := (h1 + i*h2) % f.m
36		f.words[bit/64] |= 1 << (bit % 64)
37	}
38	f.n++
39}
40
41func (f *BloomFilter) Test(id [32]byte) bool {
42	h1, h2 := splitID(id)
43	for i := uint64(0); i < uint64(f.k); i++ {
44		bit := (h1 + i*h2) % f.m
45		if f.words[bit/64]&(1<<(bit%64)) == 0 {
46			return false
47		}
48	}
49	return true
50}
51
52func (f *BloomFilter) Len() int { return int(f.n) }
53
54func (f *BloomFilter) Bytes() []byte {
55	buf := make([]byte, 4+8+8+len(f.words)*8)
56	binary.BigEndian.PutUint32(buf[0:4], uint32(f.k))
57	binary.BigEndian.PutUint64(buf[4:12], f.m)
58	binary.BigEndian.PutUint64(buf[12:20], f.n)
59	for i, w := range f.words {
60		binary.BigEndian.PutUint64(buf[20+i*8:], w)
61	}
62	return buf
63}
64
65func BloomFilterFrom(data []byte) (*BloomFilter, error) {
66	if len(data) < 20 {
67		return nil, fmt.Errorf("bloom: data too short (%d bytes)", len(data))
68	}
69	k := int(binary.BigEndian.Uint32(data[0:4]))
70	m := binary.BigEndian.Uint64(data[4:12])
71	n := binary.BigEndian.Uint64(data[12:20])
72	nWords := m / 64
73	expected := int(20 + nWords*8)
74	if len(data) < expected {
75		return nil, fmt.Errorf("bloom: expected %d bytes, got %d", expected, len(data))
76	}
77	words := make([]uint64, nWords)
78	for i := range words {
79		words[i] = binary.BigEndian.Uint64(data[20+i*8:])
80	}
81	return &BloomFilter{k: k, m: m, n: n, words: words}, nil
82}
83
84func splitID(id [32]byte) (h1, h2 uint64) {
85	h1 = binary.BigEndian.Uint64(id[0:8])
86	h2 = binary.BigEndian.Uint64(id[8:16])
87	if h2 == 0 {
88		h2 = 1
89	}
90	return
91}