Sux
SimpleSelectZeroHalf.hpp
Go to the documentation of this file.
1 /*
2  * Sux: Succinct data structures
3  *
4  * Copyright (C) 2007-2020 Sebastiano Vigna
5  *
6  * This library is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by the Free
8  * Software Foundation; either version 3 of the License, or (at your option)
9  * any later version.
10  *
11  * This library is free software; you can redistribute it and/or modify it under
12  * the terms of the GNU General Public License as published by the Free Software
13  * Foundation; either version 3, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
17  * PARTICULAR PURPOSE. See the GNU General Public License for more details.
18  *
19  * Under Section 7 of GPL version 3, you are granted additional permissions
20  * described in the GCC Runtime Library Exception, version 3.1, as published by
21  * the Free Software Foundation.
22  *
23  * You should have received a copy of the GNU General Public License and a copy of
24  * the GCC Runtime Library Exception along with this program; see the files
25  * COPYING3 and COPYING.RUNTIME respectively. If not, see
26  * <http://www.gnu.org/licenses/>.
27  */
28 
29 #pragma once
30 
31 #include "../support/common.hpp"
32 #include "../util/Vector.hpp"
33 #include "SelectZero.hpp"
34 #include <cstdint>
35 
36 namespace sux::bits {
37 
38 using namespace std;
39 using namespace sux;
40 
54 template <util::AllocType AT = util::AllocType::MALLOC> class SimpleSelectZeroHalf {
55  private:
56  static const int log2_zeros_per_inventory = 10;
57  static const int zeros_per_inventory = 1 << log2_zeros_per_inventory;
58  static const uint64_t zeros_per_inventory_mask = zeros_per_inventory - 1;
59  static const int log2_longwords_per_subinventory = 2;
60  static const int longwords_per_subinventory = 1 << log2_longwords_per_subinventory;
61  static const int log2_zeros_per_sub64 = log2_zeros_per_inventory - log2_longwords_per_subinventory;
62  static const int zeros_per_sub64 = 1 << log2_zeros_per_sub64;
63  static const uint64_t zeros_per_sub64_mask = zeros_per_sub64 - 1;
64  static const int log2_zeros_per_sub16 = log2_zeros_per_sub64 - 2;
65  static const int zeros_per_sub16 = 1 << log2_zeros_per_sub16;
66  static const uint64_t zeros_per_sub16_mask = zeros_per_sub16 - 1;
67 
68  const uint64_t *bits;
69  util::Vector<int64_t, AT> inventory;
70 
71  uint64_t num_words, inventory_size, num_zeros;
72 
73  public:
75 
82  SimpleSelectZeroHalf(const uint64_t *const bits, const uint64_t num_bits) : bits(bits) {
83  num_words = (num_bits + 63) / 64;
84 
85  // Init rank/select structure
86  uint64_t c = 0;
87  for (uint64_t i = 0; i < num_words; i++) c += __builtin_popcountll(~bits[i]);
88  num_zeros = c;
89 
90  if (num_bits % 64 != 0) c -= 64 - num_bits % 64;
91  assert(c <= num_bits);
92 
93  inventory_size = (c + zeros_per_inventory - 1) / zeros_per_inventory;
94 
95 #ifdef DEBUG
96  printf("Number of bits: %" PRId64 " Number of zeros: %" PRId64 " (%.2f%%)\n", num_bits, c, (c * 100.0) / num_bits);
97 
98  printf("Ones per inventory: %d Ones per sub 64: %d sub 16: %d\n", zeros_per_inventory, zeros_per_sub64, zeros_per_sub16);
99 #endif
100 
101  inventory.size(inventory_size * (longwords_per_subinventory + 1) + 1);
102 
103  uint64_t d = 0;
104  const uint64_t mask = zeros_per_inventory - 1;
105 
106  // First phase: we build an inventory for each one out of zeros_per_inventory.
107  for (uint64_t i = 0; i < num_words; i++)
108  for (int j = 0; j < 64; j++) {
109  if (i * 64 + j >= num_bits) break;
110  if (~bits[i] & 1ULL << j) {
111  if ((d & mask) == 0) inventory[(d >> log2_zeros_per_inventory) * (longwords_per_subinventory + 1)] = i * 64 + j;
112  d++;
113  }
114  }
115 
116  assert(c == d);
117  inventory[inventory_size * (longwords_per_subinventory + 1)] = num_bits;
118 
119 #ifdef DEBUG
120  printf("Inventory entries filled: %" PRId64 "\n", inventory_size + 1);
121 #endif
122 
123  uint16_t *p16;
124  int64_t *p64;
125 
126  d = 0;
127  uint64_t exact = 0, start, span, inventory_index;
128  int offset;
129 
130  for (uint64_t i = 0; i < num_words; i++)
131  for (int j = 0; j < 64; j++) {
132  if (i * 64 + j >= num_bits) break;
133  if (~bits[i] & 1ULL << j) {
134  if ((d & mask) == 0) {
135  inventory_index = (d >> log2_zeros_per_inventory) * (longwords_per_subinventory + 1);
136  start = inventory[inventory_index];
137  span = inventory[inventory_index + longwords_per_subinventory + 1] - start;
138  if (span > (1 << 16)) inventory[inventory_index] = -inventory[inventory_index] - 1;
139  offset = 0;
140  p64 = &inventory[inventory_index + 1];
141  p16 = (uint16_t *)p64;
142  }
143 
144  if (span < (1 << 16)) {
145  assert(i * 64 + j - start <= (1 << 16));
146  if ((d & zeros_per_sub16_mask) == 0) {
147  assert(offset < longwords_per_subinventory * 4);
148  p16[offset++] = i * 64 + j - start;
149  }
150  } else {
151  if ((d & zeros_per_sub64_mask) == 0) {
152  assert(offset < longwords_per_subinventory);
153  p64[offset++] = i * 64 + j - start;
154  exact++;
155  }
156  }
157 
158  d++;
159  }
160  }
161 
162 #ifdef DEBUG
163  // printf("Exact entries: %" PRId64 "\n", exact);
164  // printf("First inventories: %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", inventory[0], inventory[1], inventory[2],
165  // inventory[3]);
166 #endif
167  }
168 
169  uint64_t selectZero(const uint64_t rank) {
170 #ifdef DEBUG
171  printf("Selecting %" PRId64 "\n...", rank);
172 #endif
173  assert(rank < num_zeros);
174 
175  const uint64_t inventory_index = rank >> log2_zeros_per_inventory;
176  assert(inventory_index <= inventory_size);
177  const int64_t *inventory_start = &inventory + (inventory_index << log2_longwords_per_subinventory) + inventory_index;
178 
179  const int64_t inventory_rank = *inventory_start;
180  const int subrank = rank & zeros_per_inventory_mask;
181 #ifdef DEBUG
182  printf("Rank: %" PRId64 " inventory index: %" PRId64 " inventory rank: %" PRId64 " subrank: %d\n", rank, inventory_index, inventory_rank, subrank);
183 #endif
184 
185  uint64_t start;
186  int residual;
187 
188  if (inventory_rank >= 0) {
189  start = inventory_rank + ((uint16_t *)(inventory_start + 1))[subrank >> log2_zeros_per_sub16];
190  residual = subrank & zeros_per_sub16_mask;
191  } else {
192  assert((subrank >> log2_zeros_per_sub64) < longwords_per_subinventory);
193  start = -inventory_rank - 1 + *(inventory_start + 1 + (subrank >> log2_zeros_per_sub64));
194  residual = subrank & zeros_per_sub64_mask;
195  }
196 
197 #ifdef DEBUG
198  printf("Differential; start: %" PRId64 " residual: %d\n", start, residual);
199  if (residual == 0) puts("No residual; returning start");
200 #endif
201 
202  if (residual == 0) return start;
203 
204  uint64_t word_index = start / 64;
205  uint64_t word = ~bits[word_index] & -1ULL << start % 64;
206 
207  for (;;) {
208  const int bit_count = __builtin_popcountll(word);
209  if (residual < bit_count) break;
210  word = ~bits[++word_index];
211  residual -= bit_count;
212  }
213 
214  return word_index * 64 + select64(word, residual);
215  }
216 
217  uint64_t selectZero(const uint64_t rank, uint64_t *const next) {
218  const uint64_t s = selectZero(rank);
219  int curr = s / 64;
220 
221  uint64_t window = ~bits[curr] & -1ULL << s;
222  window &= window - 1;
223 
224  while (window == 0) window = ~bits[++curr];
225  *next = curr * 64 + __builtin_ctzll(window);
226 
227  return s;
228  }
229 
231  size_t bitCount() const { return inventory.bitCount() - sizeof(inventory) * 8 + sizeof(*this) * 8; };
232 };
233 
234 } // namespace sux::bits
sux::bits::SimpleSelectZeroHalf::SimpleSelectZeroHalf
SimpleSelectZeroHalf(const uint64_t *const bits, const uint64_t num_bits)
Definition: SimpleSelectZeroHalf.hpp:82
sux::bits::SimpleSelectZeroHalf
Definition: SimpleSelectZeroHalf.hpp:54
sux::bits::SimpleSelectZeroHalf::bitCount
size_t bitCount() const
Definition: SimpleSelectZeroHalf.hpp:231
sux::util::Vector::size
void size(size_t size)
Definition: Vector.hpp:178
sux
Definition: DynamicBitVector.hpp:31
sux::bits
Definition: DynamicBitVector.hpp:31
sux::util::Vector::bitCount
size_t bitCount() const
Definition: Vector.hpp:231
sux::bits::SimpleSelectZeroHalf::SimpleSelectZeroHalf
SimpleSelectZeroHalf()
Definition: SimpleSelectZeroHalf.hpp:74
sux::bits::SimpleSelectZeroHalf::selectZero
uint64_t selectZero(const uint64_t rank)
Definition: SimpleSelectZeroHalf.hpp:169
SelectZero.hpp
sux::bits::SimpleSelectZeroHalf::selectZero
uint64_t selectZero(const uint64_t rank, uint64_t *const next)
Definition: SimpleSelectZeroHalf.hpp:217
sux::util::Vector< int64_t, AT >
sux::select64
uint64_t select64(uint64_t x, uint64_t k)
Definition: common.hpp:372