Sux
SimpleSelectZero.hpp
Go to the documentation of this file.
1 /*
2  * Sux: Succinct data structures
3  *
4  * Copyright (C) 2007-2020 Sebastiano Vigna
5  *
6  * This library is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by the Free
8  * Software Foundation; either version 3 of the License, or (at your option)
9  * any later version.
10  *
11  * This library is free software; you can redistribute it and/or modify it under
12  * the terms of the GNU General Public License as published by the Free Software
13  * Foundation; either version 3, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
17  * PARTICULAR PURPOSE. See the GNU General Public License for more details.
18  *
19  * Under Section 7 of GPL version 3, you are granted additional permissions
20  * described in the GCC Runtime Library Exception, version 3.1, as published by
21  * the Free Software Foundation.
22  *
23  * You should have received a copy of the GNU General Public License and a copy of
24  * the GCC Runtime Library Exception along with this program; see the files
25  * COPYING3 and COPYING.RUNTIME respectively. If not, see
26  * <http://www.gnu.org/licenses/>.
27  */
28 
29 #pragma once
30 
31 #include "../support/common.hpp"
32 #include "../util/Vector.hpp"
33 #include "SelectZero.hpp"
34 #include <cstdint>
35 
36 namespace sux::bits {
37 using namespace std;
38 
52 template <util::AllocType AT = util::AllocType::MALLOC> class SimpleSelectZero {
53  private:
54  static const int max_zeros_per_inventory = 8192;
55 
56  const uint64_t *bits;
57  util::Vector<int64_t, AT> inventory;
58  util::Vector<uint64_t, AT> exact_spill;
59  int log2_zeros_per_inventory, log2_zeros_per_sub16, log2_zeros_per_sub64, log2_longwords_per_subinventory, zeros_per_inventory, zeros_per_sub16, zeros_per_sub64, longwords_per_subinventory,
60  longwords_per_inventory, zeros_per_inventory_mask, zeros_per_sub16_mask, zeros_per_sub64_mask;
61 
62  uint64_t num_words, inventory_size, exact_spill_size, num_zeros;
63 
64  public:
66 
74  SimpleSelectZero(const uint64_t *const bits, const uint64_t num_bits, const int max_log2_longwords_per_subinventory) : bits(bits) {
75  num_words = (num_bits + 63) / 64;
76 
77  // Init rank/select structure
78  uint64_t c = 0;
79  for (uint64_t i = 0; i < num_words; i++) c += __builtin_popcountll(~bits[i]);
80  num_zeros = c;
81 
82  if (num_bits % 64 != 0) c -= 64 - num_bits % 64;
83  assert(c <= num_bits);
84 
85  zeros_per_inventory = num_bits == 0 ? 0 : (c * max_zeros_per_inventory + num_bits - 1) / num_bits;
86  // Make zeros_per_inventory into a power of 2
87  log2_zeros_per_inventory = max(0, lambda_safe(zeros_per_inventory));
88  zeros_per_inventory = 1ULL << log2_zeros_per_inventory;
89  zeros_per_inventory_mask = zeros_per_inventory - 1;
90  inventory_size = (c + zeros_per_inventory - 1) / zeros_per_inventory;
91 
92  log2_longwords_per_subinventory = min(max_log2_longwords_per_subinventory, max(0, log2_zeros_per_inventory - 2));
93  longwords_per_subinventory = 1 << log2_longwords_per_subinventory;
94  longwords_per_inventory = longwords_per_subinventory + 1;
95  log2_zeros_per_sub64 = max(0, log2_zeros_per_inventory - log2_longwords_per_subinventory);
96  log2_zeros_per_sub16 = max(0, log2_zeros_per_sub64 - 2);
97  zeros_per_sub64 = 1ULL << log2_zeros_per_sub64;
98  zeros_per_sub16 = 1ULL << log2_zeros_per_sub16;
99  zeros_per_sub64_mask = zeros_per_sub64 - 1;
100  zeros_per_sub16_mask = zeros_per_sub16 - 1;
101 
102 #ifdef DEBUG
103  printf("Number of zeros: %" PRId64 " Number of zeros per inventory item: %d\n", c, zeros_per_inventory);
104  printf("Longwords per subinventory: %d zeros per sub 64: %d sub 16: %d\n", longwords_per_subinventory, zeros_per_sub64, zeros_per_sub16);
105 #endif
106 
107  inventory.size(inventory_size * longwords_per_inventory + 1);
108  const int64_t *end_of_inventory = &inventory + inventory_size * longwords_per_inventory + 1;
109 
110  uint64_t d = 0;
111 
112  // First phase: we build an inventory for each one out of zeros_per_inventory.
113  for (uint64_t i = 0; i < num_words; i++)
114  for (int j = 0; j < 64; j++) {
115  if (i * 64 + j >= num_bits) break;
116  if (~bits[i] & 1ULL << j) {
117  if ((d & zeros_per_inventory_mask) == 0) inventory[(d >> log2_zeros_per_inventory) * longwords_per_inventory] = i * 64 + j;
118  d++;
119  }
120  }
121 
122  assert(c == d);
123  inventory[inventory_size * longwords_per_inventory] = num_bits;
124 
125 #ifdef DEBUG
126  printf("Inventory entries filled: %" PRId64 "\n", inventory_size + 1);
127 #endif
128 
129  if (zeros_per_inventory > 1) {
130  d = 0;
131  int zeros;
132  uint64_t spilled = 0, exact = 0, start, span, inventory_index;
133 
134  for (uint64_t i = 0; i < num_words; i++)
135  // We estimate the subinventory and exact spill size
136  for (int j = 0; j < 64; j++) {
137  if (i * 64 + j >= num_bits) break;
138  if (~bits[i] & 1ULL << j) {
139  if ((d & zeros_per_inventory_mask) == 0) {
140  inventory_index = d >> log2_zeros_per_inventory;
141  start = inventory[inventory_index * longwords_per_inventory];
142  span = inventory[(inventory_index + 1) * longwords_per_inventory] - start;
143  zeros = min(c - d, (uint64_t)zeros_per_inventory);
144 
145  assert(start + span == num_bits || zeros == zeros_per_inventory);
146 
147  // We accumulate space for exact pointers ONLY if necessary.
148  if (span >= (1 << 16)) {
149  exact += zeros;
150  if (zeros_per_sub64 > 1) spilled += zeros;
151  }
152  }
153  d++;
154  }
155  }
156 
157 #ifdef DEBUG
158  printf("Spilled entries: %" PRId64 " exact: %" PRId64 "\n", spilled, exact);
159 #endif
160 
161  exact_spill_size = spilled;
162  exact_spill.size(exact_spill_size);
163 
164  uint16_t *p16;
165  int64_t *p64;
166  int offset;
167  spilled = 0;
168  d = 0;
169 
170  for (uint64_t i = 0; i < num_words; i++)
171  for (int j = 0; j < 64; j++) {
172  if (i * 64 + j >= num_bits) break;
173  if (~bits[i] & 1ULL << j) {
174  if ((d & zeros_per_inventory_mask) == 0) {
175  inventory_index = d >> log2_zeros_per_inventory;
176  start = inventory[inventory_index * longwords_per_inventory];
177  span = inventory[(inventory_index + 1) * longwords_per_inventory] - start;
178  p64 = &inventory[inventory_index * longwords_per_inventory + 1];
179  p16 = (uint16_t *)p64;
180  offset = 0;
181  }
182 
183  if (span < (1 << 16)) {
184  assert(i * 64 + j - start <= (1 << 16));
185  if ((d & zeros_per_sub16_mask) == 0) {
186  assert(offset < longwords_per_subinventory * 4);
187  assert(p16 + offset < (uint16_t *)end_of_inventory);
188  p16[offset++] = i * 64 + j - start;
189  }
190  } else {
191  if (zeros_per_sub64 == 1) {
192  assert(p64 + offset < end_of_inventory);
193  p64[offset++] = i * 64 + j;
194  } else {
195  assert(p64 < end_of_inventory);
196  if ((d & zeros_per_inventory_mask) == 0) {
197  inventory[inventory_index * longwords_per_inventory] |= 1ULL << 63;
198  p64[0] = spilled;
199  }
200  assert(spilled < exact_spill_size);
201  exact_spill[spilled++] = i * 64 + j;
202  }
203  }
204 
205  d++;
206  }
207  }
208  }
209 
210 #ifdef DEBUG
211  printf("First inventories: %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", inventory[0], inventory[1], inventory[2], inventory[3]);
212  // if ( subinventory_size > 0 ) printf("First subinventories: %016" PRIx64 " %016" PRIx64 " %016"
213  // PRIx64 " %016" PRIx64 "\n", subinventory[ 0 ], subinventory[ 1 ], subinventory[ 2 ],
214  // subinventory[ 3 ] );
215  if (exact_spill_size > 0) printf("First spilled entries: %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n", exact_spill[0], exact_spill[1], exact_spill[2], exact_spill[3]);
216 #endif
217  }
218 
219  uint64_t selectZero(const uint64_t rank) {
220 #ifdef DEBUG
221  printf("Selecting %" PRId64 "\n...", rank);
222 #endif
223 
224  const uint64_t inventory_index = rank >> log2_zeros_per_inventory;
225  const int64_t *inventory_start = &inventory + (inventory_index << log2_longwords_per_subinventory) + inventory_index;
226  assert(inventory_index <= inventory_size);
227 
228  const int64_t inventory_rank = *inventory_start;
229  const int subrank = rank & zeros_per_inventory_mask;
230 #ifdef DEBUG
231  printf("Rank: %" PRId64 " inventory index: %" PRId64 " inventory rank: %" PRId64 " subrank: %d\n", rank, inventory_index, inventory_rank, subrank);
232 #endif
233 
234 #ifdef DEBUG
235  if (subrank == 0) puts("Exact hit (no subrank); returning inventory");
236 #endif
237  if (subrank == 0) return inventory_rank & ~(1ULL << 63);
238 
239  uint64_t start;
240  int residual;
241 
242  if (inventory_rank >= 0) {
243  start = inventory_rank + ((uint16_t *)(inventory_start + 1))[subrank >> log2_zeros_per_sub16];
244  residual = subrank & zeros_per_sub16_mask;
245  } else {
246  if (zeros_per_sub64 == 1) return *(inventory_start + 1 + subrank);
247  assert(*(inventory_start + 1) + subrank < (int64_t)exact_spill_size);
248  return exact_spill[*(inventory_start + 1) + subrank];
249  }
250 
251 #ifdef DEBUG
252  printf("Differential; start: %" PRId64 " residual: %d\n", start, residual);
253  if (residual == 0) puts("No residual; returning start");
254 #endif
255 
256  if (residual == 0) return start;
257 
258  uint64_t word_index = start / 64;
259  uint64_t word = ~bits[word_index] & -1ULL << start % 64;
260 
261  for (;;) {
262  const int bit_count = __builtin_popcountll(word);
263  if (residual < bit_count) break;
264  word = ~bits[++word_index];
265  residual -= bit_count;
266  }
267 
268  return word_index * 64 + select64(word, residual);
269  }
270 
272  size_t bitCount() const { return inventory.bitCount() - sizeof(inventory) * 8 + exact_spill.bitCount() - sizeof(exact_spill) * 8 + sizeof(*this) * 8; }
273 };
274 
275 } // namespace sux::bits
sux::bits::SimpleSelectZero::selectZero
uint64_t selectZero(const uint64_t rank)
Definition: SimpleSelectZero.hpp:219
sux::lambda_safe
int lambda_safe(uint64_t word)
Definition: common.hpp:188
sux::bits::SimpleSelectZero::bitCount
size_t bitCount() const
Definition: SimpleSelectZero.hpp:272
sux::bits::SimpleSelectZero
Definition: SimpleSelectZero.hpp:52
sux::util::Vector::size
void size(size_t size)
Definition: Vector.hpp:178
sux::bits::SimpleSelectZero::SimpleSelectZero
SimpleSelectZero()
Definition: SimpleSelectZero.hpp:65
sux::bits
Definition: DynamicBitVector.hpp:31
sux::util::Vector::bitCount
size_t bitCount() const
Definition: Vector.hpp:231
SelectZero.hpp
sux::bits::SimpleSelectZero::SimpleSelectZero
SimpleSelectZero(const uint64_t *const bits, const uint64_t num_bits, const int max_log2_longwords_per_subinventory)
Definition: SimpleSelectZero.hpp:74
sux::util::Vector< int64_t, AT >
sux::select64
uint64_t select64(uint64_t x, uint64_t k)
Definition: common.hpp:372