Sux
SimpleSelect.hpp
Go to the documentation of this file.
1 /*
2  * Sux: Succinct data structures
3  *
4  * Copyright (C) 2007-2020 Sebastiano Vigna
5  *
6  * This library is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by the Free
8  * Software Foundation; either version 3 of the License, or (at your option)
9  * any later version.
10  *
11  * This library is free software; you can redistribute it and/or modify it under
12  * the terms of the GNU General Public License as published by the Free Software
13  * Foundation; either version 3, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
17  * PARTICULAR PURPOSE. See the GNU General Public License for more details.
18  *
19  * Under Section 7 of GPL version 3, you are granted additional permissions
20  * described in the GCC Runtime Library Exception, version 3.1, as published by
21  * the Free Software Foundation.
22  *
23  * You should have received a copy of the GNU General Public License and a copy of
24  * the GCC Runtime Library Exception along with this program; see the files
25  * COPYING3 and COPYING.RUNTIME respectively. If not, see
26  * <http://www.gnu.org/licenses/>.
27  */
28 
29 #pragma once
30 
31 #include "../support/common.hpp"
32 #include "../util/Vector.hpp"
33 #include "Select.hpp"
34 #include <cstdint>
35 
36 namespace sux::bits {
37 
38 using namespace std;
39 
53 template <util::AllocType AT = util::AllocType::MALLOC> class SimpleSelect : public Select {
54  private:
55  static const int max_ones_per_inventory = 8192;
56 
57  const uint64_t *bits;
58  util::Vector<int64_t, AT> inventory;
59  util::Vector<uint64_t, AT> exact_spill;
60  int log2_ones_per_inventory, log2_ones_per_sub16, log2_ones_per_sub64, log2_longwords_per_subinventory, ones_per_inventory, ones_per_sub16, ones_per_sub64, longwords_per_subinventory,
61  longwords_per_inventory, ones_per_inventory_mask, ones_per_sub16_mask, ones_per_sub64_mask;
62 
63  uint64_t num_words, inventory_size, exact_spill_size, num_ones;
64 
65  public:
67 
75  SimpleSelect(const uint64_t *const bits, const uint64_t num_bits, const int max_log2_longwords_per_subinventory) : bits(bits) {
76  num_words = (num_bits + 63) / 64;
77 
78  // Init rank/select structure
79  uint64_t c = 0;
80  for (uint64_t i = 0; i < num_words; i++) c += __builtin_popcountll(bits[i]);
81  num_ones = c;
82 
83  assert(c <= num_bits);
84 
85  ones_per_inventory = num_bits == 0 ? 0 : (c * max_ones_per_inventory + num_bits - 1) / num_bits;
86  // Make ones_per_inventory into a power of 2
87  log2_ones_per_inventory = max(0, lambda_safe(ones_per_inventory));
88  ones_per_inventory = 1ULL << log2_ones_per_inventory;
89  ones_per_inventory_mask = ones_per_inventory - 1;
90  inventory_size = (c + ones_per_inventory - 1) / ones_per_inventory;
91 
92 #ifdef DEBUG
93  printf("Number of ones: %" PRId64 " Number of ones per inventory item: %d\n", c, ones_per_inventory);
94 #endif
95 
96  log2_longwords_per_subinventory = min(max_log2_longwords_per_subinventory, max(0, log2_ones_per_inventory - 2));
97  longwords_per_subinventory = 1 << log2_longwords_per_subinventory;
98  longwords_per_inventory = longwords_per_subinventory + 1;
99  log2_ones_per_sub64 = max(0, log2_ones_per_inventory - log2_longwords_per_subinventory);
100  log2_ones_per_sub16 = max(0, log2_ones_per_sub64 - 2);
101  ones_per_sub64 = 1ULL << log2_ones_per_sub64;
102  ones_per_sub16 = 1ULL << log2_ones_per_sub16;
103  ones_per_sub64_mask = ones_per_sub64 - 1;
104  ones_per_sub16_mask = ones_per_sub16 - 1;
105 
106 #ifdef DEBUG
107  printf("Longwords per subinventory: %d Ones per sub 64: %d sub 16: %d\n", longwords_per_subinventory, ones_per_sub64, ones_per_sub16);
108 #endif
109 
110  inventory.size(inventory_size * longwords_per_inventory + 1);
111  const int64_t *end_of_inventory = &inventory + inventory_size * longwords_per_inventory + 1;
112 
113  uint64_t d = 0;
114 
115  // First phase: we build an inventory for each one out of ones_per_inventory.
116  for (uint64_t i = 0; i < num_words; i++)
117  for (int j = 0; j < 64; j++) {
118  if (i * 64 + j >= num_bits) break;
119  if (bits[i] & 1ULL << j) {
120  if ((d & ones_per_inventory_mask) == 0) inventory[(d >> log2_ones_per_inventory) * longwords_per_inventory] = i * 64 + j;
121  d++;
122  }
123  }
124 
125  assert(c == d);
126  inventory[inventory_size * longwords_per_inventory] = num_bits;
127 
128 #ifdef DEBUG
129  printf("Inventory entries filled: %" PRId64 "\n", inventory_size + 1);
130 #endif
131 
132  if (ones_per_inventory > 1) {
133  d = 0;
134  int ones;
135  uint64_t spilled = 0, exact = 0, start, span, inventory_index;
136 
137  for (uint64_t i = 0; i < num_words; i++)
138  // We estimate the subinventory and exact spill size
139  for (int j = 0; j < 64; j++) {
140  if (i * 64 + j >= num_bits) break;
141  if (bits[i] & 1ULL << j) {
142  if ((d & ones_per_inventory_mask) == 0) {
143  inventory_index = d >> log2_ones_per_inventory;
144  start = inventory[inventory_index * longwords_per_inventory];
145  span = inventory[(inventory_index + 1) * longwords_per_inventory] - start;
146  ones = min(c - d, (uint64_t)ones_per_inventory);
147 
148  assert(start + span == num_bits || ones == ones_per_inventory);
149 
150  // We accumulate space for exact pointers ONLY if necessary.
151  if (span >= (1 << 16)) {
152  exact += ones;
153  if (ones_per_sub64 > 1) spilled += ones;
154  }
155  }
156  d++;
157  }
158  }
159 
160 #ifdef DEBUG
161  printf("Spilled entries: %" PRId64 " exact: %" PRId64 "\n", spilled, exact);
162 #endif
163 
164  exact_spill_size = spilled;
165  exact_spill.size(exact_spill_size);
166 
167  uint16_t *p16;
168  int64_t *p64;
169  int offset;
170  spilled = 0;
171  d = 0;
172 
173  for (uint64_t i = 0; i < num_words; i++)
174  for (int j = 0; j < 64; j++) {
175  if (i * 64 + j >= num_bits) break;
176  if (bits[i] & 1ULL << j) {
177  if ((d & ones_per_inventory_mask) == 0) {
178  inventory_index = d >> log2_ones_per_inventory;
179  start = inventory[inventory_index * longwords_per_inventory];
180  span = inventory[(inventory_index + 1) * longwords_per_inventory] - start;
181  p64 = &inventory[inventory_index * longwords_per_inventory + 1];
182  p16 = (uint16_t *)p64;
183  offset = 0;
184  }
185 
186  if (span < (1 << 16)) {
187  assert(i * 64 + j - start <= (1 << 16));
188  if ((d & ones_per_sub16_mask) == 0) {
189  assert(offset < longwords_per_subinventory * 4);
190  assert(p16 + offset < (uint16_t *)end_of_inventory);
191  p16[offset++] = i * 64 + j - start;
192  }
193  } else {
194  if (ones_per_sub64 == 1) {
195  assert(p64 + offset < end_of_inventory);
196  p64[offset++] = i * 64 + j;
197  } else {
198  assert(p64 < end_of_inventory);
199  if ((d & ones_per_inventory_mask) == 0) {
200  inventory[inventory_index * longwords_per_inventory] |= 1ULL << 63;
201  p64[0] = spilled;
202  }
203  assert(spilled < exact_spill_size);
204  exact_spill[spilled++] = i * 64 + j;
205  }
206  }
207 
208  d++;
209  }
210  }
211  }
212 #ifdef DEBUG
213  // printf("First inventories: %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", inventory[0],
214  // inventory[1], inventory[2],
215  // inventory[3]);
216  // if ( subinventory_size > 0 ) printf("First subinventories: %016" PRIx64 " %016" PRIx64 " %016"
217  // PRIx64 " %016" PRIx64 "\n", subinventory[ 0 ], subinventory[ 1 ], subinventory[ 2 ],
218  // subinventory[ 3 ] );
219  // if (exact_spill_size > 0)
220  // printf("First spilled entries: %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
221  // exact_spill[0],
222  // exact_spill[1], exact_spill[2], exact_spill[3]);
223 #endif
224  }
225 
226  size_t select(const uint64_t rank) {
227 #ifdef DEBUG
228  printf("Selecting %" PRId64 "\n...", rank);
229 #endif
230 
231  const uint64_t inventory_index = rank >> log2_ones_per_inventory;
232  const int64_t *inventory_start = &inventory + (inventory_index << log2_longwords_per_subinventory) + inventory_index;
233  assert(inventory_index <= inventory_size);
234 
235  const int64_t inventory_rank = *inventory_start;
236  const int subrank = rank & ones_per_inventory_mask;
237 #ifdef DEBUG
238  printf("Rank: %" PRId64 " inventory index: %" PRId64 " inventory rank: %" PRId64 " subrank: %d\n", rank, inventory_index, inventory_rank, subrank);
239 #endif
240 
241 #ifdef DEBUG
242  if (subrank == 0) puts("Exact hit (no subrank); returning inventory");
243 #endif
244  if (subrank == 0) return inventory_rank & ~(1ULL << 63);
245 
246  uint64_t start;
247  int residual;
248 
249  if (inventory_rank >= 0) {
250  start = inventory_rank + ((uint16_t *)(inventory_start + 1))[subrank >> log2_ones_per_sub16];
251  residual = subrank & ones_per_sub16_mask;
252  } else {
253  if (ones_per_sub64 == 1) return *(inventory_start + 1 + subrank);
254  assert(*(inventory_start + 1) + subrank < (int64_t)exact_spill_size);
255  return exact_spill[*(inventory_start + 1) + subrank];
256  }
257 
258 #ifdef DEBUG
259  printf("Differential; start: %" PRId64 " residual: %d\n", start, residual);
260  if (residual == 0) puts("No residual; returning start");
261 #endif
262 
263  if (residual == 0) return start;
264 
265  uint64_t word_index = start / 64;
266  uint64_t word = bits[word_index] & -1ULL << start % 64;
267 
268  for (;;) {
269  const int bit_count = __builtin_popcountll(word);
270  if (residual < bit_count) break;
271  word = bits[++word_index];
272  residual -= bit_count;
273  }
274 
275  return word_index * 64 + select64(word, residual);
276  }
277 
279  size_t bitCount() const { return inventory.bitCount() - sizeof(inventory) * 8 + exact_spill.bitCount() - sizeof(exact_spill) * 8 + sizeof(*this) * 8; }
280 };
281 
282 } // namespace sux::bits
sux::bits::SimpleSelect
Definition: SimpleSelect.hpp:53
sux::lambda_safe
int lambda_safe(uint64_t word)
Definition: common.hpp:188
sux::bits::SimpleSelect::SimpleSelect
SimpleSelect()
Definition: SimpleSelect.hpp:66
sux::util::Vector::size
void size(size_t size)
Definition: Vector.hpp:178
sux::Select
Definition: Select.hpp:38
sux::bits
Definition: DynamicBitVector.hpp:31
sux::util::Vector::bitCount
size_t bitCount() const
Definition: Vector.hpp:231
Select.hpp
sux::bits::SimpleSelect::SimpleSelect
SimpleSelect(const uint64_t *const bits, const uint64_t num_bits, const int max_log2_longwords_per_subinventory)
Definition: SimpleSelect.hpp:75
sux::bits::SimpleSelect::bitCount
size_t bitCount() const
Definition: SimpleSelect.hpp:279
sux::bits::SimpleSelect::select
size_t select(const uint64_t rank)
Definition: SimpleSelect.hpp:226
sux::util::Vector< int64_t, AT >
sux::select64
uint64_t select64(uint64_t x, uint64_t k)
Definition: common.hpp:372