65 constexpr static size_t kWordSize = 64;
66 constexpr static size_t kSuperBlockRankIntSize = 64;
67 constexpr static size_t kBasicBlockRankIntSize = 16;
68 constexpr static size_t kBasicBlockSize = 512;
69 constexpr static size_t kWordsPerBlock = 8;
70 constexpr static size_t kSuperBlockSize = 65536;
71 constexpr static size_t kBlocksPerSuperBlock = 128;
72 constexpr static size_t kSelectSampleFrequency = 16384;
74 alignas(64) uint64_t delta_super[8];
75 alignas(64) uint16_t delta_basic[32];
82 size_t padded_size_{};
85 std::span<const uint64_t> bits_;
87 size_t logical_word_count()
const {
88 return (num_bits_ + kWordSize - 1) / kWordSize;
91 size_t logical_word_bits(
size_t word_index)
const {
92 const size_t begin = word_index * kWordSize;
93 if (begin >= num_bits_) {
96 return std::min(kWordSize, num_bits_ - begin);
99 uint64_t logical_word(
size_t word_index)
const {
100 if (word_index >= bits_.size()) {
103 const size_t bits = logical_word_bits(word_index);
107 if (bits == kWordSize) {
108 return bits_[word_index];
110 return bits_[word_index] & first_bits_mask(bits);
113 uint64_t rank_in_basic_block(
size_t basic_block,
size_t offset)
const {
117 const size_t first_word = basic_block * kWordsPerBlock;
118 if (first_word + kWordsPerBlock <= bits_.size()) {
119 return rank_512(&bits_[first_word], offset);
123 size_t word_index = first_word;
124 while (offset >= kWordSize) {
125 result += std::popcount(logical_word(word_index));
131 std::popcount(logical_word(word_index) & first_bits_mask(offset));
136 uint64_t select_in_words(
size_t first_word,
size_t rank,
bool value)
const {
137 const size_t first_bit = first_word * kWordSize;
138 if (first_bit + kBasicBlockSize <= num_bits_ &&
139 first_word + kWordsPerBlock <= bits_.size()) {
140 return value ? first_bit + select_512(&bits_[first_word],
rank - 1)
141 : first_bit + select0_512(&bits_[first_word],
rank - 1);
144 for (
size_t word_index = first_word; word_index < logical_word_count();
146 const uint64_t word = logical_word(word_index);
147 const uint64_t candidates =
149 : (~word & first_bits_mask(logical_word_bits(word_index)));
150 const size_t count = std::popcount(candidates);
155 return word_index * kWordSize + select_64(candidates,
rank - 1);
164 size_t num_superblocks =
165 8 + (padded_size_ == 0 ? 0 : (padded_size_ - 1) / kSuperBlockSize);
169 num_superblocks = ((num_superblocks + 7) / 8) * 8;
170 size_t num_basicblocks = num_superblocks * kBlocksPerSuperBlock;
171 super_block_rank_.resize(num_superblocks * 64);
172 basic_block_rank_.resize(num_basicblocks * 16);
174 auto super_block_rank = super_block_rank_.As64BitInts();
175 auto basic_block_rank = basic_block_rank_.As16BitInts();
177 uint64_t super_block_sum = 0;
178 uint64_t basic_block_sum = 0;
180 for (
size_t i = 0; i / kBasicBlockSize < basic_block_rank.size();
182 if (i % kSuperBlockSize == 0) {
183 super_block_sum += basic_block_sum;
184 super_block_rank[i / kSuperBlockSize] = super_block_sum;
187 if (i % kBasicBlockSize == 0) {
188 basic_block_rank[i / kBasicBlockSize] =
189 static_cast<uint16_t
>(basic_block_sum);
191 if (i / kWordSize < logical_word_count()) {
192 basic_block_sum += std::popcount(logical_word(i / kWordSize));
195 max_rank_ = super_block_sum + basic_block_sum;
201 void build_select() {
202 uint64_t milestone = kSelectSampleFrequency;
203 uint64_t milestone0 = kSelectSampleFrequency;
207 size_t num_one_samples =
208 1 + (max_rank_ + kSelectSampleFrequency - 1) / kSelectSampleFrequency;
209 size_t num_zero_samples =
210 1 + (num_bits_ - max_rank_ + kSelectSampleFrequency - 1) /
211 kSelectSampleFrequency;
213 select1_samples_.resize(num_one_samples * 64);
214 select0_samples_.resize(num_zero_samples * 64);
215 auto select1_samples = select1_samples_.As64BitInts();
216 auto select0_samples = select0_samples_.As64BitInts();
218 select1_samples[0] = 0;
219 select0_samples[0] = 0;
221 size_t num_zeros = 1, num_ones = 1;
223 for (
size_t i = 0; i < logical_word_count(); ++i) {
224 const uint64_t word = logical_word(i);
225 const auto ones = std::popcount(word);
226 const auto zeros = logical_word_bits(i) - ones;
227 if (
rank + ones >= milestone) {
228 auto pos = select_64(word, milestone -
rank - 1);
231 select1_samples[num_ones++] = (64 * i + pos) / kSuperBlockSize;
232 milestone += kSelectSampleFrequency;
234 if (
rank0 + zeros >= milestone0) {
235 const uint64_t zero_word =
236 ~word & first_bits_mask(logical_word_bits(i));
237 auto pos = select_64(zero_word, milestone0 -
rank0 - 1);
238 select0_samples[num_zeros++] = (64 * i + pos) / kSuperBlockSize;
239 milestone0 += kSelectSampleFrequency;
245 for (
size_t i = 0; i < 8; ++i) {
246 delta_super[i] = i * kSuperBlockSize;
248 for (
size_t i = 0; i < 32; ++i) {
249 delta_basic[i] = i * kBasicBlockSize;
258 uint64_t find_superblock(uint64_t
rank)
const {
259 auto select1_samples = select1_samples_.AsConst64BitInts();
260 auto super_block_rank = super_block_rank_.AsConst64BitInts();
262 uint64_t left = select1_samples[
rank / kSelectSampleFrequency];
264 while (left + 7 < super_block_rank.size()) {
265 auto len = lower_bound_8x64(&super_block_rank[left],
rank);
267 return left + len - 1;
271 if (left + 3 < super_block_rank.size()) {
272 auto len = lower_bound_4x64(&super_block_rank[left],
rank);
274 return left + len - 1;
278 while (left < super_block_rank.size() && super_block_rank[left] <
rank) {
289 uint64_t find_superblock_zeros(uint64_t
rank0)
const {
290 auto select0_samples = select0_samples_.AsConst64BitInts();
291 auto super_block_rank = super_block_rank_.AsConst64BitInts();
293 uint64_t left = select0_samples[
rank0 / kSelectSampleFrequency];
295 while (left + 7 < super_block_rank.size()) {
296 auto len = lower_bound_delta_8x64(&super_block_rank[left],
rank0,
297 delta_super, kSuperBlockSize * left);
299 return left + len - 1;
303 if (left + 3 < super_block_rank.size()) {
304 auto len = lower_bound_delta_4x64(&super_block_rank[left],
rank0,
305 delta_super, kSuperBlockSize * left);
307 return left + len - 1;
311 while (left < super_block_rank.size() &&
312 kSuperBlockSize * left - super_block_rank[left] <
rank0) {
329 uint64_t find_basicblock(uint16_t local_rank, uint64_t s_block)
const {
330 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
332 for (
size_t pos = 0; pos < kBlocksPerSuperBlock; pos += 32) {
333 auto count = lower_bound_32x16(
334 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank);
336 return kBlocksPerSuperBlock * s_block + pos + count - 1;
339 return kBlocksPerSuperBlock * s_block + kBlocksPerSuperBlock - 1;
353 uint64_t find_basicblock_zeros(uint16_t local_rank0, uint64_t s_block)
const {
354 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
355 for (
size_t pos = 0; pos < kBlocksPerSuperBlock; pos += 32) {
356 auto count = lower_bound_delta_32x16(
357 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank0,
358 delta_basic, kBasicBlockSize * pos);
360 return kBlocksPerSuperBlock * s_block + pos + count - 1;
363 return kBlocksPerSuperBlock * s_block + kBlocksPerSuperBlock - 1;
383 uint64_t find_basicblock_is(uint16_t local_rank, uint64_t s_block)
const {
384 auto super_block_rank = super_block_rank_.AsConst64BitInts();
385 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
387 auto lower = super_block_rank[s_block];
388 auto upper = super_block_rank[s_block + 1];
390 uint64_t pos = kBlocksPerSuperBlock * local_rank / (upper - lower);
391 pos = pos + 16 < 32 ? 0 : (pos - 16);
392 pos = pos > 96 ? 96 : pos;
394 auto count = lower_bound_32x16(
395 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank);
397 return find_basicblock(local_rank, s_block);
400 return kBlocksPerSuperBlock * s_block + pos + count - 1;
405 auto count = lower_bound_32x16(
406 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank);
408 return find_basicblock(local_rank, s_block);
410 return kBlocksPerSuperBlock * s_block + pos + count - 1;
431 uint64_t find_basicblock_is_zeros(uint16_t local_rank0,
432 uint64_t s_block)
const {
433 auto super_block_rank = super_block_rank_.AsConst64BitInts();
434 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
436 auto lower = kSuperBlockSize * s_block - super_block_rank[s_block];
438 kSuperBlockSize * (s_block + 1) - super_block_rank[s_block + 1];
440 uint64_t pos = kBlocksPerSuperBlock * local_rank0 / (upper - lower);
441 pos = pos + 16 < 32 ? 0 : (pos - 16);
442 pos = pos > 96 ? 96 : pos;
444 auto count = lower_bound_delta_32x16(
445 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank0,
446 delta_basic, kBasicBlockSize * pos);
448 return find_basicblock_zeros(local_rank0, s_block);
451 return kBlocksPerSuperBlock * s_block + pos + count - 1;
456 auto count = lower_bound_delta_32x16(
457 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank0,
458 delta_basic, kBasicBlockSize * pos);
460 return find_basicblock_zeros(local_rank0, s_block);
462 return kBlocksPerSuperBlock * s_block + pos + count - 1;
466 BitVector() =
default;
467 BitVector(
const BitVector&) =
default;
468 BitVector(BitVector&&)
noexcept =
default;
469 BitVector& operator=(
const BitVector&) =
default;
470 BitVector& operator=(BitVector&&)
noexcept =
default;
472#ifdef PIXIE_DIAGNOSTICS
473 struct DiagnosticsBytes {
474 size_t source_bitvector_bytes = 0;
475 size_t super_block_rank_bytes = 0;
476 size_t basic_block_rank_bytes = 0;
477 size_t select1_samples_bytes = 0;
478 size_t select0_samples_bytes = 0;
479 size_t total_bytes = 0;
485 DiagnosticsBytes diagnostics_bytes()
const {
486 DiagnosticsBytes result;
487 result.source_bitvector_bytes = (num_bits_ + 7) / 8;
488 result.super_block_rank_bytes = super_block_rank_.AsConstBytes().size();
489 result.basic_block_rank_bytes = basic_block_rank_.AsConstBytes().size();
490 result.select1_samples_bytes = select1_samples_.AsConstBytes().size();
491 result.select0_samples_bytes = select0_samples_.AsConstBytes().size();
493 result.super_block_rank_bytes + result.basic_block_rank_bytes +
494 result.select1_samples_bytes + result.select0_samples_bytes;
501 void memory_report()
const {
502 const auto diagnostics = diagnostics_bytes();
503 const double source_bytes =
504 static_cast<double>(diagnostics.source_bitvector_bytes);
505 const auto log_bytes = [&](std::string_view label,
size_t bytes) {
506 const double percentage =
507 source_bytes > 0.0 ? 100.0 *
static_cast<double>(bytes) / source_bytes
509 spdlog::info(
"BitVector {}: {} bytes ({:.2f}% of source)", label, bytes,
512 log_bytes(
"source_bitvector", diagnostics.source_bitvector_bytes);
513 log_bytes(
"super_block_rank", diagnostics.super_block_rank_bytes);
514 log_bytes(
"basic_block_rank", diagnostics.basic_block_rank_bytes);
515 log_bytes(
"select1_samples", diagnostics.select1_samples_bytes);
516 log_bytes(
"select0_samples", diagnostics.select0_samples_bytes);
517 log_bytes(
"total", diagnostics.total_bytes);
527 explicit BitVector(std::span<const uint64_t> bit_vector,
size_t num_bits)
528 : num_bits_(std::min(num_bits, bit_vector.
size() * kWordSize)),
529 padded_size_(((num_bits_ + kWordSize - 1) / kWordSize) * kWordSize),
538 size_t size()
const {
return num_bits_; }
546 size_t word_idx = pos / kWordSize;
547 size_t bit_off = pos % kWordSize;
549 return (bits_[word_idx] >> bit_off) & 1;
558 uint64_t
rank(
size_t pos)
const {
559 if (pos >= num_bits_) [[unlikely]] {
563 auto super_block_rank = super_block_rank_.AsConst64BitInts();
564 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
566 uint64_t b_block = pos / kBasicBlockSize;
567 uint64_t s_block = pos / kSuperBlockSize;
569 uint64_t result = super_block_rank[s_block] + basic_block_rank[b_block];
571 result += rank_in_basic_block(b_block, pos - (b_block * kBasicBlockSize));
582 if (pos >= num_bits_) [[unlikely]] {
583 return num_bits_ - max_rank_;
585 return pos -
rank(pos);
596 if (
rank > max_rank_) [[unlikely]] {
599 if (
rank == 0) [[unlikely]] {
602 auto super_block_rank = super_block_rank_.AsConst64BitInts();
603 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
605 uint64_t s_block = find_superblock(
rank);
606 rank -= super_block_rank[s_block];
607 auto pos = find_basicblock_is(
rank, s_block);
608 rank -= basic_block_rank[pos];
609 return select_in_words(pos * kWordsPerBlock,
rank,
true);
620 if (
rank0 > num_bits_ - max_rank_) [[unlikely]] {
623 if (
rank0 == 0) [[unlikely]] {
626 auto super_block_rank = super_block_rank_.AsConst64BitInts();
627 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
629 uint64_t s_block = find_superblock_zeros(
rank0);
630 rank0 -= kSuperBlockSize * s_block - super_block_rank[s_block];
631 auto pos = find_basicblock_is_zeros(
rank0, s_block);
632 auto pos_in_super_block = pos & (kBlocksPerSuperBlock - 1);
633 rank0 -= kBasicBlockSize * pos_in_super_block - basic_block_rank[pos];
634 return select_in_words(pos * kWordsPerBlock,
rank0,
false);
642 result.reserve(num_bits_);
644 for (
size_t i = 0; i < num_bits_; i++) {
645 result.push_back(
operator[](i) ?
'1' :
'0');