diff --git a/bchain/coins/eth/ethparser.go b/bchain/coins/eth/ethparser.go index e3d310cc..424bdcf6 100644 --- a/bchain/coins/eth/ethparser.go +++ b/bchain/coins/eth/ethparser.go @@ -22,10 +22,19 @@ const EthereumTypeTxidLen = 32 // EtherAmountDecimalPoint defines number of decimal points in Ether amounts const EtherAmountDecimalPoint = 18 +const defaultHotAddressMinContracts = 192 +const defaultHotAddressLRUCacheSize = 20000 +const defaultHotAddressMinHits = 3 +const maxHotAddressLRUCacheSize = 100_000 +const maxHotAddressMinHits = 10 + // EthereumParser handle type EthereumParser struct { *bchain.BaseParser - EnsSuffix string + EnsSuffix string + HotAddressMinContracts int + HotAddressLRUCacheSize int + HotAddressMinHits int } // NewEthereumParser returns new EthereumParser instance @@ -36,10 +45,17 @@ func NewEthereumParser(b int, addressAliases bool) *EthereumParser { AmountDecimalPoint: EtherAmountDecimalPoint, AddressAliases: addressAliases, }, - EnsSuffix: ".eth", + EnsSuffix: ".eth", + HotAddressMinContracts: defaultHotAddressMinContracts, + HotAddressLRUCacheSize: defaultHotAddressLRUCacheSize, + HotAddressMinHits: defaultHotAddressMinHits, } } +func (p *EthereumParser) HotAddressConfig() (minContracts, lruSize, minHits int) { + return p.HotAddressMinContracts, p.HotAddressLRUCacheSize, p.HotAddressMinHits +} + type rpcHeader struct { Hash string `json:"hash"` ParentHash string `json:"parentHash"` diff --git a/bchain/coins/eth/ethrpc.go b/bchain/coins/eth/ethrpc.go index 69ce3340..2912893c 100644 --- a/bchain/coins/eth/ethrpc.go +++ b/bchain/coins/eth/ethrpc.go @@ -51,6 +51,9 @@ type Configuration struct { RPCTimeout int `json:"rpc_timeout"` Erc20BatchSize int `json:"erc20_batch_size,omitempty"` BlockAddressesToKeep int `json:"block_addresses_to_keep"` + HotAddressMinContracts int `json:"hot_address_min_contracts,omitempty"` + HotAddressLRUCacheSize int `json:"hot_address_lru_cache_size,omitempty"` + HotAddressMinHits int `json:"hot_address_min_hits,omitempty"` AddressAliases bool `json:"address_aliases,omitempty"` MempoolTxTimeoutHours int `json:"mempoolTxTimeoutHours"` QueryBackendOnMempoolResync bool `json:"queryBackendOnMempoolResync"` @@ -112,6 +115,21 @@ func NewEthereumRPC(config json.RawMessage, pushHandler func(bchain.Notification if c.Erc20BatchSize <= 0 { c.Erc20BatchSize = defaultErc20BatchSize } + if c.HotAddressMinContracts <= 0 { + c.HotAddressMinContracts = defaultHotAddressMinContracts + } + if c.HotAddressLRUCacheSize <= 0 { + c.HotAddressLRUCacheSize = defaultHotAddressLRUCacheSize + } else if c.HotAddressLRUCacheSize > maxHotAddressLRUCacheSize { + glog.Warningf("hot_address_lru_cache_size=%d is too large, clamping to %d", c.HotAddressLRUCacheSize, maxHotAddressLRUCacheSize) + c.HotAddressLRUCacheSize = maxHotAddressLRUCacheSize + } + if c.HotAddressMinHits <= 0 { + c.HotAddressMinHits = defaultHotAddressMinHits + } else if c.HotAddressMinHits > maxHotAddressMinHits { + glog.Warningf("hot_address_min_hits=%d is too large, clamping to %d", c.HotAddressMinHits, maxHotAddressMinHits) + c.HotAddressMinHits = maxHotAddressMinHits + } s := &EthereumRPC{ BaseChain: &bchain.BaseChain{}, @@ -124,6 +142,9 @@ func NewEthereumRPC(config json.RawMessage, pushHandler func(bchain.Notification // always create parser s.Parser = NewEthereumParser(c.BlockAddressesToKeep, c.AddressAliases) + s.Parser.HotAddressMinContracts = c.HotAddressMinContracts + s.Parser.HotAddressLRUCacheSize = c.HotAddressLRUCacheSize + s.Parser.HotAddressMinHits = c.HotAddressMinHits s.Timeout = time.Duration(c.RPCTimeout) * time.Second s.PushHandler = pushHandler diff --git a/db/address_hotness.go b/db/address_hotness.go new file mode 100644 index 00000000..fa750e35 --- /dev/null +++ b/db/address_hotness.go @@ -0,0 +1,173 @@ +package db + +import ( + "container/list" + "fmt" + + "github.com/trezor/blockbook/bchain" + "github.com/trezor/blockbook/bchain/coins/eth" +) + +type hotAddressConfigProvider interface { + HotAddressConfig() (minContracts, lruSize, minHits int) +} + +type addressHotnessKey [eth.EthereumTypeAddressDescriptorLen]byte + +func addressHotnessKeyFromDesc(addr bchain.AddressDescriptor) (addressHotnessKey, bool) { + var key addressHotnessKey + if len(addr) != len(key) { + return key, false + } + copy(key[:], addr) + return key, true +} + +type addressHotness struct { + minContracts int + minHits int + lru *hotAddressLRU + // hits tracks per-block lookup counts so we can decide when an address is hot. + // It is cleared at BeginBlock to avoid unbounded growth. + hits map[addressHotnessKey]uint16 + // block stats (reset after reporting) to keep logging cheap. + // blockEligibleLookups counts lookups with contractCount >= minContracts (i.e., eligible for hotness). + blockEligibleLookups uint64 + // blockLRUHits counts eligible lookups that hit an already-hot address in the LRU. + blockLRUHits uint64 + // blockPromotions counts addresses promoted to hot (minHits reached) in the current block. + blockPromotions uint64 + // blockEvictions counts LRU evictions triggered by promotions in the current block. + blockEvictions uint64 +} + +func newAddressHotness(minContracts, lruSize, minHits int) *addressHotness { + if minContracts <= 0 || lruSize <= 0 || minHits <= 0 { + return nil + } + return &addressHotness{ + minContracts: minContracts, + minHits: minHits, + lru: newHotAddressLRU(lruSize), + // Pre-size the per-block hit map to avoid reallocs on busy blocks. + hits: make(map[addressHotnessKey]uint16), + } +} + +func newAddressHotnessFromParser(parser bchain.BlockChainParser) *addressHotness { + cfg, ok := parser.(hotAddressConfigProvider) + if !ok { + return nil + } + minContracts, lruSize, minHits := cfg.HotAddressConfig() + return newAddressHotness(minContracts, lruSize, minHits) +} + +func (h *addressHotness) BeginBlock() { + if h == nil { + return + } + // Reset per-block hit counts; LRU survives across blocks. + clear(h.hits) + // Reset per-block stats counters. + h.blockEligibleLookups = 0 + h.blockLRUHits = 0 + h.blockPromotions = 0 + h.blockEvictions = 0 +} + +func (h *addressHotness) ShouldUseIndex(addrKey addressHotnessKey, contractCount int) bool { + if h == nil || contractCount < h.minContracts { + return false + } + h.blockEligibleLookups++ + // Rule B: once an address is hot, reuse the index immediately. + if h.lru != nil && h.lru.touch(addrKey) { + h.blockLRUHits++ + return true + } + // Count hits within the current block; once minHits is reached, promote to LRU. + hits := h.hits[addrKey] + 1 + if hits < uint16(h.minHits) { + h.hits[addrKey] = hits + return false + } + delete(h.hits, addrKey) + if h.lru != nil { + // Promotion: once hot, an address stays hot until evicted by LRU capacity. + if h.lru.add(addrKey) { + h.blockEvictions++ + } + h.blockPromotions++ + } + return true +} + +func (h *addressHotness) LogSuffix() string { + if h == nil { + return "" + } + if h.blockEligibleLookups == 0 && h.blockLRUHits == 0 && h.blockPromotions == 0 && h.blockEvictions == 0 { + return "" + } + hitRate := 0.0 + if h.blockEligibleLookups > 0 { + hitRate = float64(h.blockLRUHits) / float64(h.blockEligibleLookups) + } + return fmt.Sprintf(", hotness[eligible_lookups=%d, lru_hits=%d, promotions=%d, evictions=%d, hit_rate=%.3f]", + h.blockEligibleLookups, h.blockLRUHits, h.blockPromotions, h.blockEvictions, hitRate) +} + +type hotAddressLRU struct { + capacity int + order *list.List + items map[addressHotnessKey]*list.Element +} + +func newHotAddressLRU(capacity int) *hotAddressLRU { + if capacity <= 0 { + return nil + } + return &hotAddressLRU{ + capacity: capacity, + order: list.New(), + // items maps address -> list element; the list order is MRU->LRU. + items: make(map[addressHotnessKey]*list.Element, capacity), + } +} + +func (l *hotAddressLRU) touch(key addressHotnessKey) bool { + if l == nil { + return false + } + if el, ok := l.items[key]; ok { + // Hot: move to front so it won't be evicted soon. + l.order.MoveToFront(el) + return true + } + return false +} + +func (l *hotAddressLRU) add(key addressHotnessKey) bool { + if l == nil { + return false + } + if el, ok := l.items[key]; ok { + // Already hot; refresh recency. + l.order.MoveToFront(el) + return false + } + el := l.order.PushFront(key) + l.items[key] = el + if l.order.Len() <= l.capacity { + return false + } + // Evict the least-recently used hot address. + oldest := l.order.Back() + if oldest == nil { + return false + } + l.order.Remove(oldest) + delete(l.items, oldest.Value.(addressHotnessKey)) + return true +} diff --git a/db/address_hotness_test.go b/db/address_hotness_test.go new file mode 100644 index 00000000..e0f54d6e --- /dev/null +++ b/db/address_hotness_test.go @@ -0,0 +1,183 @@ +//go:build unittest + +package db + +import "testing" + +func makeHotKey(seed byte) addressHotnessKey { + var key addressHotnessKey + for i := range key { + key[i] = seed + } + return key +} + +func Test_newAddressHotness_Disabled(t *testing.T) { + if got := newAddressHotness(0, 1, 1); got != nil { + t.Fatal("expected nil when minContracts is disabled") + } + if got := newAddressHotness(1, 0, 1); got != nil { + t.Fatal("expected nil when lruSize is disabled") + } + if got := newAddressHotness(1, 1, 0); got != nil { + t.Fatal("expected nil when minHits is disabled") + } +} + +func Test_addressHotness_MinContractsGate(t *testing.T) { + hot := newAddressHotness(5, 4, 1) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + key := makeHotKey(1) + + if hot.ShouldUseIndex(key, 4) { + t.Fatal("expected contractCount below minContracts to skip index") + } + if !hot.ShouldUseIndex(key, 5) { + t.Fatal("expected hot address to use index once minContracts is met") + } +} + +func Test_addressHotness_HitsPromotionAndBeginBlock(t *testing.T) { + hot := newAddressHotness(2, 4, 3) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + key := makeHotKey(2) + hot.BeginBlock() + + if hot.ShouldUseIndex(key, 2) { + t.Fatal("expected first hit to stay cold") + } + if hot.ShouldUseIndex(key, 2) { + t.Fatal("expected second hit to stay cold") + } + if !hot.ShouldUseIndex(key, 2) { + t.Fatal("expected third hit to promote to hot") + } + + hot.BeginBlock() + if !hot.ShouldUseIndex(key, 2) { + t.Fatal("expected hot address to stay hot across blocks") + } +} + +func Test_addressHotness_LRUEviction(t *testing.T) { + hot := newAddressHotness(1, 2, 1) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + a := makeHotKey(10) + b := makeHotKey(11) + c := makeHotKey(12) + hot.BeginBlock() + + if !hot.ShouldUseIndex(a, 1) || !hot.ShouldUseIndex(b, 1) { + t.Fatal("expected A and B to be promoted to hot") + } + // Touch A so B becomes the least-recently used. + if !hot.ShouldUseIndex(a, 1) { + t.Fatal("expected A to remain hot after touch") + } + // Promote C; should evict B. + if !hot.ShouldUseIndex(c, 1) { + t.Fatal("expected C to be promoted to hot") + } + if _, ok := hot.lru.items[b]; ok { + t.Fatal("expected LRU eviction of B after promoting C") + } + if _, ok := hot.lru.items[a]; !ok { + t.Fatal("expected A to remain hot after eviction") + } + if _, ok := hot.lru.items[c]; !ok { + t.Fatal("expected C to be hot after promotion") + } +} + +func Test_addressHotness_Specs(t *testing.T) { + t.Run("it should reset per-block hits", func(t *testing.T) { + hot := newAddressHotness(1, 2, 2) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + key := makeHotKey(20) + hot.BeginBlock() + if hot.ShouldUseIndex(key, 1) { + t.Fatal("expected first hit to stay cold") + } + hot.BeginBlock() + if hot.ShouldUseIndex(key, 1) { + t.Fatal("expected hit count to reset between blocks") + } + }) + + t.Run("it should report a non-empty log suffix after activity", func(t *testing.T) { + hot := newAddressHotness(1, 2, 1) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + key := makeHotKey(24) + hot.BeginBlock() + if !hot.ShouldUseIndex(key, 1) { + t.Fatal("expected promotion to happen") + } + if got := hot.LogSuffix(); got == "" { + t.Fatal("expected log suffix to be non-empty after activity") + } + }) + + t.Run("it should not use index below minContracts even if hot", func(t *testing.T) { + hot := newAddressHotness(3, 2, 1) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + key := makeHotKey(21) + hot.BeginBlock() + if !hot.ShouldUseIndex(key, 3) { + t.Fatal("expected address to become hot at minContracts") + } + if hot.ShouldUseIndex(key, 2) { + t.Fatal("expected address below minContracts to skip index") + } + }) + + t.Run("it should promote immediately when minHits is one", func(t *testing.T) { + hot := newAddressHotness(1, 2, 1) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + key := makeHotKey(22) + hot.BeginBlock() + if !hot.ShouldUseIndex(key, 1) { + t.Fatal("expected immediate promotion when minHits is one") + } + if _, ok := hot.lru.items[key]; !ok { + t.Fatal("expected key to be present in LRU after promotion") + } + }) + + t.Run("it should not add to LRU before minHits", func(t *testing.T) { + hot := newAddressHotness(1, 2, 3) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + key := makeHotKey(23) + hot.BeginBlock() + if hot.ShouldUseIndex(key, 1) { + t.Fatal("expected first hit to stay cold") + } + if len(hot.lru.items) != 0 { + t.Fatal("expected LRU to remain empty before promotion") + } + if hot.hits[key] != 1 { + t.Fatal("expected hit counter to increment before promotion") + } + }) + + t.Run("it should reject short address descriptors", func(t *testing.T) { + if _, ok := addressHotnessKeyFromDesc([]byte{1, 2}); ok { + t.Fatal("expected short address descriptor to be rejected") + } + }) +} diff --git a/db/bulkconnect.go b/db/bulkconnect.go index faa49632..6dc5c075 100644 --- a/db/bulkconnect.go +++ b/db/bulkconnect.go @@ -247,7 +247,11 @@ func (b *BulkConnect) connectBlockBitcoinType(block *bchain.Block, storeBlockTxs return err } if bac > b.bulkAddressesCount { - glog.Info("rocksdb: height ", b.height, ", stored ", bac, " addresses, done in ", time.Since(start)) + suffix := "" + if b.d.hotAddrTracker != nil { + suffix = b.d.hotAddrTracker.LogSuffix() + } + glog.Info("rocksdb: height ", b.height, ", stored ", bac, " addresses, done in ", time.Since(start), suffix) } } if storeAddressesChan != nil { @@ -355,7 +359,11 @@ func (b *BulkConnect) connectBlockEthereumType(block *bchain.Block, storeBlockTx return err } if bac > b.bulkAddressesCount { - glog.Info("rocksdb: height ", b.height, ", stored ", bac, " addresses, done in ", time.Since(start)) + suffix := "" + if b.d.hotAddrTracker != nil { + suffix = b.d.hotAddrTracker.LogSuffix() + } + glog.Info("rocksdb: height ", b.height, ", stored ", bac, " addresses, done in ", time.Since(start), suffix) } } else { // if there are blockSpecificData, store them @@ -422,7 +430,11 @@ func (b *BulkConnect) Close() error { if err := b.d.WriteBatch(wb); err != nil { return err } - glog.Info("rocksdb: height ", b.height, ", stored ", bac, " addresses, done in ", time.Since(start)) + suffix := "" + if b.d.hotAddrTracker != nil { + suffix = b.d.hotAddrTracker.LogSuffix() + } + glog.Info("rocksdb: height ", b.height, ", stored ", bac, " addresses, done in ", time.Since(start), suffix) if storeTxAddressesChan != nil { if err := <-storeTxAddressesChan; err != nil { return err diff --git a/db/rocksdb.go b/db/rocksdb.go index a64c31e8..e035279b 100644 --- a/db/rocksdb.go +++ b/db/rocksdb.go @@ -76,6 +76,7 @@ type RocksDB struct { connectBlockMux sync.Mutex addrContractsCacheMux sync.Mutex addrContractsCache map[string]*unpackedAddrContracts + hotAddrTracker *addressHotness } const ( @@ -154,8 +155,26 @@ func NewRocksDB(path string, cacheSize, maxOpenFiles int, parser bchain.BlockCha } wo := grocksdb.NewDefaultWriteOptions() ro := grocksdb.NewDefaultReadOptions() - r := &RocksDB{path, db, wo, ro, cfh, parser, nil, metrics, c, maxOpenFiles, connectBlockStats{}, extendedIndex, sync.Mutex{}, sync.Mutex{}, make(map[string]*unpackedAddrContracts)} + r := &RocksDB{ + path: path, + db: db, + wo: wo, + ro: ro, + cfh: cfh, + chainParser: parser, + is: nil, + metrics: metrics, + cache: c, + maxOpenFiles: maxOpenFiles, + cbs: connectBlockStats{}, + extendedIndex: extendedIndex, + connectBlockMux: sync.Mutex{}, + addrContractsCacheMux: sync.Mutex{}, + addrContractsCache: make(map[string]*unpackedAddrContracts), + hotAddrTracker: nil, + } if chainType == bchain.ChainEthereumType { + r.hotAddrTracker = newAddressHotnessFromParser(parser) go r.periodicStoreAddrContractsCache() } return r, nil diff --git a/db/rocksdb_ethereumtype.go b/db/rocksdb_ethereumtype.go index e3b83390..f1463109 100644 --- a/db/rocksdb_ethereumtype.go +++ b/db/rocksdb_ethereumtype.go @@ -468,7 +468,7 @@ func (d *RocksDB) addToAddressesAndContractsEthereumType(addrDesc bchain.Address // do not store contracts for 0x0000000000000000000000000000000000000000 address if !isZeroAddress(addrDesc) { // locate the contract and set i to the index in the array of contracts - contractIndex, found := ac.findContractIndex(contract) + contractIndex, found := ac.findContractIndex(addrDesc, contract, d.hotAddrTracker) if !found { contractIndex = len(ac.Contracts) ac.Contracts = append(ac.Contracts, unpackedAddrContract{ @@ -682,6 +682,9 @@ func (d *RocksDB) processContractTransfers(blockTx *ethBlockTx, tx *bchain.Tx, a } func (d *RocksDB) processAddressesEthereumType(block *bchain.Block, addresses addressesMap, addressContracts map[string]*unpackedAddrContracts) ([]ethBlockTx, error) { + if d.hotAddrTracker != nil { + d.hotAddrTracker.BeginBlock() + } blockTxs := make([]ethBlockTx, len(block.Txs)) for txi := range block.Txs { tx := &block.Txs[txi] @@ -719,6 +722,9 @@ func (d *RocksDB) ReconnectInternalDataToBlockEthereumType(block *bchain.Block) if d.chainParser.GetChainType() != bchain.ChainEthereumType { return errors.New("Unsupported chain type") } + if d.hotAddrTracker != nil { + d.hotAddrTracker.BeginBlock() + } addresses := make(addressesMap) addressContracts := make(map[string]*unpackedAddrContracts) @@ -1350,7 +1356,7 @@ func (d *RocksDB) disconnectAddress(btxID []byte, internal bool, addrDesc bchain } } } else { - contractIndex, found := addrContracts.findContractIndex(btxContract.contract) + contractIndex, found := addrContracts.findContractIndex(addrDesc, btxContract.contract, nil) if found { addrContract := &addrContracts.Contracts[contractIndex] if addrContract.Txs > 0 { @@ -1603,8 +1609,6 @@ type unpackedAddrContracts struct { contractIndexDirty bool } -const addrContractsIndexMinSize = 192 - type contractIndexKey [eth.EthereumTypeAddressDescriptorLen]byte func contractIndexKeyFromDesc(addr bchain.AddressDescriptor) (contractIndexKey, bool) { @@ -1617,11 +1621,6 @@ func contractIndexKeyFromDesc(addr bchain.AddressDescriptor) (contractIndexKey, } func (acs *unpackedAddrContracts) rebuildContractIndex() { - if len(acs.Contracts) < addrContractsIndexMinSize { - acs.contractIndex = nil - acs.contractIndexDirty = false - return - } m := make(map[contractIndexKey]int, len(acs.Contracts)) for i := range acs.Contracts { if key, ok := contractIndexKeyFromDesc(acs.Contracts[i].Contract); ok { @@ -1632,8 +1631,16 @@ func (acs *unpackedAddrContracts) rebuildContractIndex() { acs.contractIndexDirty = false } -func (acs *unpackedAddrContracts) findContractIndex(contract bchain.AddressDescriptor) (int, bool) { - if len(acs.Contracts) >= addrContractsIndexMinSize { +func (acs *unpackedAddrContracts) findContractIndex(addrDesc, contract bchain.AddressDescriptor, hot *addressHotness) (int, bool) { + useIndex := false + if hot != nil && len(acs.Contracts) >= hot.minContracts { + // Rule B: use the index only for addresses that are "hot" in this block, + // so mid-size lists stay on a cheap linear scan unless we see repeated lookups. + if addrKey, ok := addressHotnessKeyFromDesc(addrDesc); ok { + useIndex = hot.ShouldUseIndex(addrKey, len(acs.Contracts)) + } + } + if useIndex { if acs.contractIndex == nil || acs.contractIndexDirty { acs.rebuildContractIndex() } diff --git a/db/rocksdb_ethereumtype_test.go b/db/rocksdb_ethereumtype_test.go index 67ae6c78..4a590401 100644 --- a/db/rocksdb_ethereumtype_test.go +++ b/db/rocksdb_ethereumtype_test.go @@ -45,42 +45,51 @@ func makeTestAddrDesc(seed int) bchain.AddressDescriptor { func Test_unpackedAddrContracts_findContractIndex_LazyMap(t *testing.T) { acs := &unpackedAddrContracts{} - for i := 0; i < addrContractsIndexMinSize+2; i++ { + minContracts := 192 + for i := 0; i < minContracts+2; i++ { acs.Contracts = append(acs.Contracts, unpackedAddrContract{ Contract: makeTestAddrDesc(i), }) } + addrDesc := makeTestAddrDesc(9999) - target := acs.Contracts[addrContractsIndexMinSize].Contract - idx, found := acs.findContractIndex(target) - if !found || idx != addrContractsIndexMinSize { - t.Fatalf("findContractIndex() = (%v, %v), want (%v, true)", idx, found, addrContractsIndexMinSize) + target := acs.Contracts[minContracts].Contract + idx, found := acs.findContractIndex(addrDesc, target, nil) + if !found || idx != minContracts { + t.Fatalf("findContractIndex() = (%v, %v), want (%v, true)", idx, found, minContracts) } - if acs.contractIndex == nil { - t.Fatal("expected contract index map to be built") + if acs.contractIndex != nil { + t.Fatal("did not expect contract index map to be built without hotness") } - missing := makeTestAddrDesc(addrContractsIndexMinSize + 1024) + missing := makeTestAddrDesc(minContracts + 1024) if _, found := findContractInAddressContracts(missing, acs.Contracts); found { - missing = makeTestAddrDesc(addrContractsIndexMinSize + 2048) + missing = makeTestAddrDesc(minContracts + 2048) if _, found := findContractInAddressContracts(missing, acs.Contracts); found { t.Fatal("failed to generate a missing contract for test") } } - if _, found := acs.findContractIndex(missing); found { + if _, found := acs.findContractIndex(addrDesc, missing, nil); found { t.Fatal("expected missing contract to be not found") } } func Test_unpackedAddrContracts_findContractIndex_DirtyRebuild(t *testing.T) { acs := &unpackedAddrContracts{} - for i := 0; i < addrContractsIndexMinSize+1; i++ { + minContracts := 192 + for i := 0; i < minContracts+1; i++ { acs.Contracts = append(acs.Contracts, unpackedAddrContract{ Contract: makeTestAddrDesc(i), }) } + addrDesc := makeTestAddrDesc(9998) + hot := newAddressHotness(minContracts, 4, 1) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + hot.BeginBlock() - _, _ = acs.findContractIndex(acs.Contracts[0].Contract) + _, _ = acs.findContractIndex(addrDesc, acs.Contracts[0].Contract, hot) if acs.contractIndex == nil { t.Fatal("expected contract index map to be built") } @@ -90,35 +99,72 @@ func Test_unpackedAddrContracts_findContractIndex_DirtyRebuild(t *testing.T) { acs.Contracts = append(acs.Contracts[:1], acs.Contracts[2:]...) acs.markContractIndexDirty() - if _, found := acs.findContractIndex(removed); found { + if _, found := acs.findContractIndex(addrDesc, removed, hot); found { t.Fatal("expected removed contract to be not found after rebuild") } - if idx, found := acs.findContractIndex(acs.Contracts[1].Contract); !found || idx != 1 { + if idx, found := acs.findContractIndex(addrDesc, acs.Contracts[1].Contract, hot); !found || idx != 1 { t.Fatalf("findContractIndex() = (%v, %v), want (1, true)", idx, found) } } func Test_unpackedAddrContracts_findContractIndex_InvalidLenFallback(t *testing.T) { acs := &unpackedAddrContracts{} - for i := 0; i < addrContractsIndexMinSize; i++ { + minContracts := 192 + for i := 0; i < minContracts; i++ { acs.Contracts = append(acs.Contracts, unpackedAddrContract{ Contract: makeTestAddrDesc(i), }) } + addrDesc := makeTestAddrDesc(9997) + hot := newAddressHotness(minContracts, 4, 1) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + hot.BeginBlock() invalid := bchain.AddressDescriptor([]byte{1, 2, 3}) acs.Contracts = append(acs.Contracts, unpackedAddrContract{Contract: invalid}) // Build index, which will skip the invalid entry. - _, _ = acs.findContractIndex(acs.Contracts[0].Contract) + _, _ = acs.findContractIndex(addrDesc, acs.Contracts[0].Contract, hot) if acs.contractIndex == nil { t.Fatal("expected contract index map to be built") } - if idx, found := acs.findContractIndex(invalid); !found || idx != len(acs.Contracts)-1 { + if idx, found := acs.findContractIndex(addrDesc, invalid, hot); !found || idx != len(acs.Contracts)-1 { t.Fatalf("findContractIndex() = (%v, %v), want (%v, true)", idx, found, len(acs.Contracts)-1) } } +func Test_unpackedAddrContracts_findContractIndex_HotnessTriggers(t *testing.T) { + hotMinContracts := 192 + hotMinHits := 3 + hot := newAddressHotness(hotMinContracts, 4, hotMinHits) + if hot == nil { + t.Fatal("expected hotness tracker to be initialized") + } + hot.BeginBlock() + + acs := &unpackedAddrContracts{} + for i := 0; i < hotMinContracts; i++ { + acs.Contracts = append(acs.Contracts, unpackedAddrContract{ + Contract: makeTestAddrDesc(i), + }) + } + addrDesc := makeTestAddrDesc(777) + target := acs.Contracts[hotMinContracts/2].Contract + + for i := 0; i < hotMinHits-1; i++ { + _, _ = acs.findContractIndex(addrDesc, target, hot) + if acs.contractIndex != nil { + t.Fatalf("unexpected index build before min hits, hit %d", i+1) + } + } + _, _ = acs.findContractIndex(addrDesc, target, hot) + if acs.contractIndex == nil { + t.Fatal("expected index to be built after reaching min hits") + } +} + func verifyAfterEthereumTypeBlock1(t *testing.T, d *RocksDB, afterDisconnect bool) { if err := checkColumn(d, cfHeight, []keyPair{ { @@ -1524,7 +1570,12 @@ func Benchmark_contractIndexLookup(b *testing.B) { for i := 0; i < n; i++ { contracts[i].Contract = makeTestAddrDesc(i) } + addrDesc := makeTestAddrDesc(1234) target := contracts[n/2].Contract + hot := newAddressHotness(192, 8, 1) + if hot != nil { + hot.BeginBlock() + } b.Run(fmt.Sprintf("ScanHit_%d", n), func(b *testing.B) { b.ReportAllocs() @@ -1537,11 +1588,11 @@ func Benchmark_contractIndexLookup(b *testing.B) { b.Run(fmt.Sprintf("MapHit_%d", n), func(b *testing.B) { acs := &unpackedAddrContracts{Contracts: contracts} // Build once to isolate lookup cost. - _, _ = acs.findContractIndex(target) + _, _ = acs.findContractIndex(addrDesc, target, hot) b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { - _, _ = acs.findContractIndex(target) + _, _ = acs.findContractIndex(addrDesc, target, hot) } }) @@ -1552,7 +1603,7 @@ func Benchmark_contractIndexLookup(b *testing.B) { for i := 0; i < b.N; i++ { acs.contractIndex = nil acs.contractIndexDirty = false - _, _ = acs.findContractIndex(target) + _, _ = acs.findContractIndex(addrDesc, target, hot) } }) } diff --git a/docs/config.md b/docs/config.md index d004d936..7eceba74 100644 --- a/docs/config.md +++ b/docs/config.md @@ -95,6 +95,10 @@ Good examples of coin configuration are * `mempool_sub_workers` – Number of subworkers for BitcoinType mempool. * `block_addresses_to_keep` – Number of blocks that are to be kept in blockaddresses column. * `additional_params` – Object of coin-specific params. + * Hot-address configuration (Blockbook, Ethereum-type indexing): + * `hot_address_min_contracts` – Minimum number of contracts before hotness tracking applies (default **192**). + * `hot_address_min_hits` – Lookups within the current block required to mark an address hot (default **3**, clamped to **10**). + * `hot_address_lru_cache_size` – Max hot addresses kept in the LRU (default **20000**, clamped to **100,000**). * `meta` – Common package metadata. * `package_maintainer` – Full name of package maintainer. diff --git a/docs/rocksdb.md b/docs/rocksdb.md index 3a230085..c7dff1ef 100644 --- a/docs/rocksdb.md +++ b/docs/rocksdb.md @@ -107,6 +107,13 @@ Column families used only by **Ethereum type** coins: <(nr_values vuint)+[]((id bigInt)+(value bigInt)) if ERC1155> ``` + - Contract ordering & hotness lookup + + Contract entries are appended in discovery order (they are not sorted). Lookups are normally a linear scan, but for + mid-size lists we lazily build an in-memory index map when an address becomes "hot" (frequently looked up within the + current block). A size-limited LRU keeps hot addresses; once the cache is full, the least-recently used hot address is + evicted and will fall back to linear scans until it becomes hot again. + - **internalData** (used only by Ethereum type coins) Maps _txid_ to _type (CALL 0 | CREATE 1)_, _addrDesc of created contract for CREATE type_, array of _type (CALL 0 | CREATE 1 | SELFDESTRUCT 2)_, _from addrDesc_, _to addrDesc_, _value bigInt_ and possible _error_.