65 constexpr static size_t kWordSize = 64;
66 constexpr static size_t kSuperBlockRankIntSize = 64;
67 constexpr static size_t kBasicBlockRankIntSize = 16;
68 constexpr static size_t kBasicBlockSize = 512;
69 constexpr static size_t kWordsPerBlock = 8;
70 constexpr static size_t kSuperBlockSize = 65536;
71 constexpr static size_t kBlocksPerSuperBlock = 128;
72 constexpr static size_t kSelectSampleFrequency = 16384;
74 alignas(64) uint64_t delta_super[8];
75 alignas(64) uint16_t delta_basic[32];
81 const size_t num_bits_;
82 const size_t padded_size_;
85 std::span<const uint64_t> bits_;
91 size_t num_superblocks = 8 + (padded_size_ - 1) / kSuperBlockSize;
95 num_superblocks = ((num_superblocks + 7) / 8) * 8;
96 size_t num_basicblocks = num_superblocks * kBlocksPerSuperBlock;
97 super_block_rank_.
resize(num_superblocks * 64);
98 basic_block_rank_.resize(num_basicblocks * 16);
100 auto super_block_rank = super_block_rank_.As64BitInts();
101 auto basic_block_rank = basic_block_rank_.As16BitInts();
103 uint64_t super_block_sum = 0;
104 uint16_t basic_block_sum = 0;
106 for (
size_t i = 0; i / kBasicBlockSize < basic_block_rank.size();
108 if (i % kSuperBlockSize == 0) {
109 super_block_sum += basic_block_sum;
110 super_block_rank[i / kSuperBlockSize] = super_block_sum;
113 if (i % kBasicBlockSize == 0) {
114 basic_block_rank[i / kBasicBlockSize] = basic_block_sum;
116 if (i / kWordSize < bits_.size()) {
117 basic_block_sum += std::popcount(bits_[i / kWordSize]);
120 max_rank_ = super_block_sum + basic_block_sum;
126 void build_select() {
127 uint64_t milestone = kSelectSampleFrequency;
128 uint64_t milestone0 = kSelectSampleFrequency;
132 size_t num_one_samples =
133 1 + (max_rank_ + kSelectSampleFrequency - 1) / kSelectSampleFrequency;
134 size_t num_zero_samples =
135 1 + (num_bits_ - max_rank_ + kSelectSampleFrequency - 1) /
136 kSelectSampleFrequency;
138 select1_samples_.resize(num_one_samples * 64);
139 select0_samples_.resize(num_zero_samples * 64);
140 auto select1_samples = select1_samples_.As64BitInts();
141 auto select0_samples = select0_samples_.As64BitInts();
143 select1_samples[0] = 0;
144 select0_samples[0] = 0;
146 size_t num_zeros = 1, num_ones = 1;
148 for (
size_t i = 0; i < bits_.size(); ++i) {
149 auto ones = std::popcount(bits_[i]);
150 auto zeros = 64 - ones;
151 if (
rank + ones >= milestone) {
152 auto pos = select_64(bits_[i], milestone -
rank - 1);
155 select1_samples[num_ones++] = (64 * i + pos) / kSuperBlockSize;
156 milestone += kSelectSampleFrequency;
158 if (
rank0 + zeros >= milestone0) {
159 auto pos = select_64(~bits_[i], milestone0 -
rank0 - 1);
160 select0_samples[num_zeros++] = (64 * i + pos) / kSuperBlockSize;
161 milestone0 += kSelectSampleFrequency;
167 for (
size_t i = 0; i < 8; ++i) {
168 delta_super[i] = i * kSuperBlockSize;
170 for (
size_t i = 0; i < 32; ++i) {
171 delta_basic[i] = i * kBasicBlockSize;
180 uint64_t find_superblock(uint64_t
rank)
const {
181 auto select1_samples = select1_samples_.AsConst64BitInts();
182 auto super_block_rank = super_block_rank_.AsConst64BitInts();
184 uint64_t left = select1_samples[
rank / kSelectSampleFrequency];
186 while (left + 7 < super_block_rank.size()) {
187 auto len = lower_bound_8x64(&super_block_rank[left],
rank);
189 return left + len - 1;
193 if (left + 3 < super_block_rank.size()) {
194 auto len = lower_bound_4x64(&super_block_rank[left],
rank);
196 return left + len - 1;
200 while (left < super_block_rank.size() && super_block_rank[left] <
rank) {
211 uint64_t find_superblock_zeros(uint64_t
rank0)
const {
212 auto select0_samples = select0_samples_.AsConst64BitInts();
213 auto super_block_rank = super_block_rank_.AsConst64BitInts();
215 uint64_t left = select0_samples[
rank0 / kSelectSampleFrequency];
217 while (left + 7 < super_block_rank.size()) {
218 auto len = lower_bound_delta_8x64(&super_block_rank[left],
rank0,
219 delta_super, kSuperBlockSize * left);
221 return left + len - 1;
225 if (left + 3 < super_block_rank.size()) {
226 auto len = lower_bound_delta_4x64(&super_block_rank[left],
rank0,
227 delta_super, kSuperBlockSize * left);
229 return left + len - 1;
233 while (left < super_block_rank.size() &&
234 kSuperBlockSize * left - super_block_rank[left] <
rank0) {
251 uint64_t find_basicblock(uint16_t local_rank, uint64_t s_block)
const {
252 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
254 for (
size_t pos = 0; pos < kBlocksPerSuperBlock; pos += 32) {
255 auto count = lower_bound_32x16(
256 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank);
258 return kBlocksPerSuperBlock * s_block + pos + count - 1;
261 return kBlocksPerSuperBlock * s_block + kBlocksPerSuperBlock - 1;
275 uint64_t find_basicblock_zeros(uint16_t local_rank0, uint64_t s_block)
const {
276 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
277 for (
size_t pos = 0; pos < kBlocksPerSuperBlock; pos += 32) {
278 auto count = lower_bound_delta_32x16(
279 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank0,
280 delta_basic, kBasicBlockSize * pos);
282 return kBlocksPerSuperBlock * s_block + pos + count - 1;
285 return kBlocksPerSuperBlock * s_block + kBlocksPerSuperBlock - 1;
305 uint64_t find_basicblock_is(uint16_t local_rank, uint64_t s_block)
const {
306 auto super_block_rank = super_block_rank_.AsConst64BitInts();
307 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
309 auto lower = super_block_rank[s_block];
310 auto upper = super_block_rank[s_block + 1];
312 uint64_t pos = kBlocksPerSuperBlock * local_rank / (upper - lower);
313 pos = pos + 16 < 32 ? 0 : (pos - 16);
314 pos = pos > 96 ? 96 : pos;
316 auto count = lower_bound_32x16(
317 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank);
319 return find_basicblock(local_rank, s_block);
322 return kBlocksPerSuperBlock * s_block + pos + count - 1;
327 auto count = lower_bound_32x16(
328 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank);
330 return find_basicblock(local_rank, s_block);
332 return kBlocksPerSuperBlock * s_block + pos + count - 1;
353 uint64_t find_basicblock_is_zeros(uint16_t local_rank0,
354 uint64_t s_block)
const {
355 auto super_block_rank = super_block_rank_.AsConst64BitInts();
356 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
358 auto lower = kSuperBlockSize * s_block - super_block_rank[s_block];
360 kSuperBlockSize * (s_block + 1) - super_block_rank[s_block + 1];
362 uint64_t pos = kBlocksPerSuperBlock * local_rank0 / (upper - lower);
363 pos = pos + 16 < 32 ? 0 : (pos - 16);
364 pos = pos > 96 ? 96 : pos;
366 auto count = lower_bound_delta_32x16(
367 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank0,
368 delta_basic, kBasicBlockSize * pos);
370 return find_basicblock_zeros(local_rank0, s_block);
373 return kBlocksPerSuperBlock * s_block + pos + count - 1;
378 auto count = lower_bound_delta_32x16(
379 &basic_block_rank[kBlocksPerSuperBlock * s_block + pos], local_rank0,
380 delta_basic, kBasicBlockSize * pos);
382 return find_basicblock_zeros(local_rank0, s_block);
384 return kBlocksPerSuperBlock * s_block + pos + count - 1;
388#ifdef PIXIE_DIAGNOSTICS
389 struct DiagnosticsBytes {
390 size_t source_bitvector_bytes = 0;
391 size_t super_block_rank_bytes = 0;
392 size_t basic_block_rank_bytes = 0;
393 size_t select1_samples_bytes = 0;
394 size_t select0_samples_bytes = 0;
395 size_t total_bytes = 0;
401 DiagnosticsBytes diagnostics_bytes()
const {
402 DiagnosticsBytes result;
403 result.source_bitvector_bytes = (num_bits_ + 7) / 8;
404 result.super_block_rank_bytes = super_block_rank_.AsConstBytes().size();
405 result.basic_block_rank_bytes = basic_block_rank_.AsConstBytes().size();
406 result.select1_samples_bytes = select1_samples_.AsConstBytes().size();
407 result.select0_samples_bytes = select0_samples_.AsConstBytes().size();
409 result.super_block_rank_bytes + result.basic_block_rank_bytes +
410 result.select1_samples_bytes + result.select0_samples_bytes;
417 void memory_report()
const {
418 const auto diagnostics = diagnostics_bytes();
419 const double source_bytes =
420 static_cast<double>(diagnostics.source_bitvector_bytes);
421 const auto log_bytes = [&](std::string_view label,
size_t bytes) {
422 const double percentage =
423 source_bytes > 0.0 ? 100.0 *
static_cast<double>(bytes) / source_bytes
425 spdlog::info(
"BitVector {}: {} bytes ({:.2f}% of source)", label, bytes,
428 log_bytes(
"source_bitvector", diagnostics.source_bitvector_bytes);
429 log_bytes(
"super_block_rank", diagnostics.super_block_rank_bytes);
430 log_bytes(
"basic_block_rank", diagnostics.basic_block_rank_bytes);
431 log_bytes(
"select1_samples", diagnostics.select1_samples_bytes);
432 log_bytes(
"select0_samples", diagnostics.select0_samples_bytes);
433 log_bytes(
"total", diagnostics.total_bytes);
443 explicit BitVector(std::span<const uint64_t> bit_vector,
size_t num_bits)
444 : num_bits_(std::min(num_bits, bit_vector.
size() * kWordSize)),
445 padded_size_(((num_bits_ + kWordSize - 1) / kWordSize) * kWordSize),
454 size_t size()
const {
return num_bits_; }
462 size_t word_idx = pos / kWordSize;
463 size_t bit_off = pos % kWordSize;
465 return (bits_[word_idx] >> bit_off) & 1;
474 uint64_t
rank(
size_t pos)
const {
475 if (pos >= bits_.size() * kWordSize) [[unlikely]] {
479 auto super_block_rank = super_block_rank_.AsConst64BitInts();
480 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
482 uint64_t b_block = pos / kBasicBlockSize;
483 uint64_t s_block = pos / kSuperBlockSize;
485 uint64_t result = super_block_rank[s_block] + basic_block_rank[b_block];
487 result += rank_512(&bits_[b_block * kWordsPerBlock],
488 pos - (b_block * kBasicBlockSize));
499 if (pos >= bits_.size() * kWordSize) [[unlikely]] {
500 return bits_.size() * kWordSize - max_rank_;
502 return pos -
rank(pos);
513 if (
rank > max_rank_) [[unlikely]] {
516 if (
rank == 0) [[unlikely]] {
519 auto super_block_rank = super_block_rank_.AsConst64BitInts();
520 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
522 uint64_t s_block = find_superblock(
rank);
523 rank -= super_block_rank[s_block];
524 auto pos = find_basicblock_is(
rank, s_block);
525 rank -= basic_block_rank[pos];
526 pos *= kWordsPerBlock;
529 if (pos + kWordsPerBlock - 1 < kWordsPerBlock) [[unlikely]] {
530 size_t ones = std::popcount(bits_[pos]);
531 while (pos < bits_.size() && ones <
rank) {
533 ones = std::popcount(bits_[++pos]);
535 return kWordSize * pos + select_64(bits_[pos],
rank - 1);
537 return kWordSize * pos + select_512(&bits_[pos],
rank - 1);
548 if (
rank0 > num_bits_ - max_rank_) [[unlikely]] {
551 if (
rank0 == 0) [[unlikely]] {
554 auto super_block_rank = super_block_rank_.AsConst64BitInts();
555 auto basic_block_rank = basic_block_rank_.AsConst16BitInts();
557 uint64_t s_block = find_superblock_zeros(
rank0);
558 rank0 -= kSuperBlockSize * s_block - super_block_rank[s_block];
559 auto pos = find_basicblock_is_zeros(
rank0, s_block);
560 auto pos_in_super_block = pos & (kBlocksPerSuperBlock - 1);
561 rank0 -= kBasicBlockSize * pos_in_super_block - basic_block_rank[pos];
562 pos *= kWordsPerBlock;
565 if (pos + kWordsPerBlock - 1 < kWordsPerBlock) [[unlikely]] {
566 size_t zeros = std::popcount(~bits_[pos]);
567 while (pos < bits_.size() && zeros <
rank0) {
569 zeros = std::popcount(~bits_[++pos]);
571 return kWordSize * pos + select_64(~bits_[pos],
rank0 - 1);
573 return kWordSize * pos + select0_512(&bits_[pos],
rank0 - 1);
581 result.reserve(num_bits_);
583 for (
size_t i = 0; i < num_bits_; i++) {
584 result.push_back(
operator[](i) ?
'1' :
'0');