针对BlockCache的结构说明已在此篇文章中说明leveldb 内部整体Cache结构说明。 BlockCache中缓存的就是实际的KV数据,也就是DataBlock数据。这里为便于理解,可看如下BlockCache结构图:
图1BlockCache:
key: 当前table对应的缓存id + BlockData在ldb文件中的偏移位。Value: 就是真实的BlockData数据。注: 因为打开的ldb(就是sst)文件中的BlockData都是存放于全局一份的BlockCache中的, 而不同的ldb文件其BlockData的offset可能相同,为了区分不同ldb文件中的BlockData的 offset,所以要给每个ldb文件分配一个唯一的 cache_id,这样key = cache_id + block_offset的组合就是唯一的了。
BlockCache的使用主要封装在Table类结构中的,此处则解读Table.cc类
namespace leveldb { struct Table::Rep { ~Rep() { delete filter; delete[] filter_data; delete index_block; } Options options; Status status; RandomAccessFile* file; //ldb文件句柄 uint64_t cache_id; //Cache缓存分配给当前DataBlock的唯一id FilterBlockReader* filter; //读取FilterBlock实例 const char* filter_data; //指向filter 数据 //index block在ldb文件中的位置信息 BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer Block* index_block; //index_block的操作实例 }; //打开SSTable时,首先将index block读取出来, //用于后期查询key时,先通过内存中的index block来 //判断key在不在这个SSTable,然后再决定是否去读取对应的data block。 //这样明显可减少I/O操作。 Status Table::Open(const Options& options, RandomAccessFile* file, uint64_t size, Table** table) { *table = nullptr; //SSTable的Footer就是48Byte if (size < Footer::kEncodedLength) { return Status::Corruption("file is too short to be an sstable"); } char footer_space[Footer::kEncodedLength]; Slice footer_input; //将footer读出来,用于解析其中的metaindex_block_handle和 //index_block_handle。 Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength, &footer_input, footer_space); if (!s.ok()) return s; //1、解析出metaindex_block_handle; //2、解析出index_block_handle。 Footer footer; s = footer.DecodeFrom(&footer_input); if (!s.ok()) return s; // Read the index block BlockContents index_block_contents; if (s.ok()) { ReadOptions opt; //是否开启严格检查数据完整性,默认false //开启之后可能会因为部分数据异常导致整个数据库无法读。 if (options.paranoid_checks) { opt.verify_checksums = true; } //将index_block读出。 //1、安装offset去sstable位置读取数据; //2、若开启校验则校验; //3、若数据压缩则解压。 s = ReadBlock(file, opt, footer.index_handle(), &index_block_contents); } if (s.ok()) { // We've successfully read the footer and the index block: we're // ready to serve requests. Block* index_block = new Block(index_block_contents); Rep* rep = new Table::Rep; rep->options = options; rep->file = file; rep->metaindex_handle = footer.metaindex_handle(); rep->index_block = index_block; //涉及到对Cache管理了,这里暂时不清楚此cache_id的作用。 rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0); rep->filter_data = nullptr; rep->filter = nullptr; //实例一个table,用于对sstable读取解析 *table = new Table(rep); //读取filte block (*table)->ReadMeta(footer); } return s; } void Table::ReadMeta(const Footer& footer) { //过滤策略都没有,那就可以确定没必要读filter block了 if (rep_->options.filter_policy == nullptr) { return; // Do not need any metadata } // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates // it is an empty block. ReadOptions opt; if (rep_->options.paranoid_checks) { opt.verify_checksums = true; } //根据metaindex_handle读取metaindex block BlockContents contents; if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) { // Do not propagate errors since meta info is not needed for operation return; } //这里是疑惑的地方!!!!!! Block* meta = new Block(contents); Iterator* iter = meta->NewIterator(BytewiseComparator()); std::string key = "filter."; key.append(rep_->options.filter_policy->Name()); iter->Seek(key); if (iter->Valid() && iter->key() == Slice(key)) { //根据metaindex的offset+size去读取filter block ReadFilter(iter->value()); } delete iter; delete meta; } void Table::ReadFilter(const Slice& filter_handle_value) { Slice v = filter_handle_value; BlockHandle filter_handle; if (!filter_handle.DecodeFrom(&v).ok()) { return; } // We might want to unify with ReadBlock() if we start // requiring checksum verification in Table::Open. ReadOptions opt; if (rep_->options.paranoid_checks) { opt.verify_checksums = true; } //读取filter block 数据 BlockContents block; if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) { return; } //如果heap_allocated为true表示读取 //filter block的时候new了内存,后续需要删除 if (block.heap_allocated) { rep_->filter_data = block.data.data(); // Will need to delete later } //构造一个读取filter block的实例 rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data); } Table::~Table() { delete rep_; } static void DeleteBlock(void* arg, void* ignored) { delete reinterpret_cast<Block*>(arg); } static void DeleteCachedBlock(const Slice& key, void* value) { Block* block = reinterpret_cast<Block*>(value); delete block; } static void ReleaseBlock(void* arg, void* h) { Cache* cache = reinterpret_cast<Cache*>(arg); Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h); cache->Release(handle); } //根据index_value(即offset+size),读取对应的block。 // Convert an index iterator value (i.e., an encoded BlockHandle) // into an iterator over the contents of the corresponding block. Iterator* Table::BlockReader(void* arg, const ReadOptions& options, const Slice& index_value) { Table* table = reinterpret_cast<Table*>(arg); Cache* block_cache = table->rep_->options.block_cache; Block* block = nullptr; Cache::Handle* cache_handle = nullptr; BlockHandle handle; Slice input = index_value; Status s = handle.DecodeFrom(&input); // We intentionally allow extra stuff in index_value so that we // can add more features in the future. if (s.ok()) { BlockContents contents; if (block_cache != nullptr) { //如果开启了block_cache,则先去此cache中查找 //key就是id+DataBlock的offset。(此处暂时不解读Cache相关实现) char cache_key_buffer[16]; EncodeFixed64(cache_key_buffer, table->rep_->cache_id); EncodeFixed64(cache_key_buffer + 8, handle.offset()); Slice key(cache_key_buffer, sizeof(cache_key_buffer)); cache_handle = block_cache->Lookup(key); //1、若在cache中查找到了直接将地址赋值给block; //2、若为找到,则去SSTable文件中去查找 if (cache_handle != nullptr) { block = reinterpret_cast<Block*>(block_cache->Value(cache_handle)); } else { s = ReadBlock(table->rep_->file, options, handle, &contents); if (s.ok()) { block = new Block(contents); //若读取的Block是直接new的,且fill_cache,则将这个Block缓存起来。 if (contents.cachable && options.fill_cache) { cache_handle = block_cache->Insert(key, block, block->size(), &DeleteCachedBlock); } } } } else { //3、若为使用block_cache,则直接去SSTable中去读数据。 s = ReadBlock(table->rep_->file, options, handle, &contents); if (s.ok()) { block = new Block(contents); } } } Iterator* iter; if (block != nullptr) { iter = block->NewIterator(table->rep_->options.comparator); //1、cache_handle 为null,表示block不在缓存中,在迭代器iter析构时, // 直接删除这个block。 //2、cache_handle非null,表示block在缓存中,在迭代器iter析构时, // 通过ReleaseBlock,减少其一次引用计数。 if (cache_handle == nullptr) { iter->RegisterCleanup(&DeleteBlock, block, nullptr); } else { iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle); } } else { //若未获取到block,则直接生存一个错误迭代器返回。 iter = NewErrorIterator(s); } return iter; } //SSTable二层迭代器迭代器。 Iterator* Table::NewIterator(const ReadOptions& options) const { return NewTwoLevelIterator( rep_->index_block->NewIterator(rep_->options.comparator), &Table::BlockReader, const_cast<Table*>(this), options); } Status Table::InternalGet(const ReadOptions& options, const Slice& k, void* arg, void (*handle_result)(void*, const Slice&, const Slice&)) { Status s; //通过key,找到index block中的一条对应DataBlock的记录 Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator); iiter->Seek(k); //Seek到 if (iiter->Valid()) { //hanlde_vale就是返回的DataBlock的offset+size。 Slice handle_value = iiter->value(); FilterBlockReader* filter = rep_->filter; BlockHandle handle; //如果过滤策略非空,则通过DataBlock的offset,去Filter中去查找是否有此key if (filter != nullptr && handle.DecodeFrom(&handle_value).ok() && !filter->KeyMayMatch(handle.offset(), k)) { // Not found } else { //如果在Filte Block中查找到了(不一定真的查找到),那就去DataBlock中去查找。 //通过DataBlock的offset+size去创建一个读取DataBlock的迭代器 Iterator* block_iter = BlockReader(this, options, iiter->value()); //Seek要查找的key block_iter->Seek(k); if (block_iter->Valid()) { //查找到key之后,执行传入的方法函数 (*handle_result)(arg, block_iter->key(), block_iter->value()); } s = block_iter->status(); delete block_iter; } } if (s.ok()) { s = iiter->status(); } delete iiter; return s; } //预估key的大致偏移位。 //1、在index_block中查找到了就返回index_block中对应的DataBlock的offset。 //2、如果在index_block中查找到了但是无法解码出offset+size,就默认给metaindex_block的offset。 //3、Seek是查到大于等于这个key的值,若未找到,说明这个key比较大,默认给metaindex_block的offset。 uint64_t Table::ApproximateOffsetOf(const Slice& key) const { Iterator* index_iter = rep_->index_block->NewIterator(rep_->options.comparator); index_iter->Seek(key); uint64_t result; if (index_iter->Valid()) { BlockHandle handle; Slice input = index_iter->value(); Status s = handle.DecodeFrom(&input); if (s.ok()) { result = handle.offset(); } else { // Strange: we can't decode the block handle in the index block. // We'll just return the offset of the metaindex block, which is // close to the whole file size for this case. result = rep_->metaindex_handle.offset(); } } else { // key is past the last key in the file. Approximate the offset // by returning the offset of the metaindex block (which is // right near the end of the file). result = rep_->metaindex_handle.offset(); } delete index_iter; return result; } } // namespace leveldb通过levledb之十六、十七、十八,三篇文章的梳理,基本上把leveldb的Cache流程理清了后续如有需要会继续补充。