Sux
Rank9Sel.hpp
Go to the documentation of this file.
1 /*
2  * Sux: Succinct data structures
3  *
4  * Copyright (C) 2007-2020 Sebastiano Vigna
5  *
6  * This library is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by the Free
8  * Software Foundation; either version 3 of the License, or (at your option)
9  * any later version.
10  *
11  * This library is free software; you can redistribute it and/or modify it under
12  * the terms of the GNU General Public License as published by the Free Software
13  * Foundation; either version 3, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
17  * PARTICULAR PURPOSE. See the GNU General Public License for more details.
18  *
19  * Under Section 7 of GPL version 3, you are granted additional permissions
20  * described in the GCC Runtime Library Exception, version 3.1, as published by
21  * the Free Software Foundation.
22  *
23  * You should have received a copy of the GNU General Public License and a copy of
24  * the GCC Runtime Library Exception along with this program; see the files
25  * COPYING3 and COPYING.RUNTIME respectively. If not, see
26  * <http://www.gnu.org/licenses/>.
27  */
28 
29 #pragma once
30 
31 #include "../support/common.hpp"
32 #include "Rank9.hpp"
33 #include "Select.hpp"
34 #include <cstdint>
35 
36 namespace sux::bits {
37 
50 template <util::AllocType AT = util::AllocType::MALLOC> class Rank9Sel : public Rank9<AT>, public Select {
51  private:
52  static const int log2_ones_per_inventory = 9;
53  static const int ones_per_inventory = 1 << log2_ones_per_inventory;
54  static const uint64_t inventory_mask = ones_per_inventory - 1;
55 
56  util::Vector<uint64_t, AT> inventory, subinventory;
57  uint64_t inventory_size;
58 
59  public:
74  Rank9Sel(const uint64_t *const bits, const uint64_t num_bits) : Rank9<AT>(bits, num_bits) {
75  const uint64_t num_words = (num_bits + 63) / 64;
76  inventory_size = (this->num_ones + ones_per_inventory - 1) / ones_per_inventory;
77 
78 #ifdef DEBUG
79  printf("Number of ones per inventory item: %d\n", ones_per_inventory);
80 #endif
81  assert(ones_per_inventory <= 8 * 64);
82 
83  inventory.size(inventory_size + 1);
84  subinventory.size((num_words + 3) / 4);
85 
86  uint64_t d = 0;
87  for (uint64_t i = 0; i < num_words; i++)
88  for (int j = 0; j < 64; j++)
89  if (bits[i] & 1ULL << j) {
90  if ((d & inventory_mask) == 0) {
91  inventory[d >> log2_ones_per_inventory] = i * 64 + j;
92  assert(this->counts[(i / 8) * 2] <= d);
93  assert(this->counts[(i / 8) * 2 + 2] > d);
94  }
95 
96  d++;
97  }
98 
99  assert(this->num_ones == d);
100  inventory[inventory_size] = ((num_words + 3) & ~3ULL) * 64;
101 
102 #ifdef DEBUG
103  printf("Inventory size: %" PRId64 "\n", inventory_size);
104  printf("Inventory entries filled: %" PRId64 "\n", d / ones_per_inventory + 1);
105  // printf("First inventories: %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", inventory[0],
106  // inventory[1], inventory[2],
107  // inventory[3]);
108 #endif
109 
110  d = 0;
111  int state;
112  uint64_t *s, first_bit, index, span, block_span, block_left, counts_at_start;
113 
114  for (uint64_t i = 0; i < num_words; i++)
115  for (int j = 0; j < 64; j++)
116  if (bits[i] & 1ULL << j) {
117  if ((d & inventory_mask) == 0) {
118  first_bit = i * 64 + j;
119  index = d >> log2_ones_per_inventory;
120  assert(inventory[index] == first_bit);
121  s = &subinventory[(inventory[index] / 64) / 4];
122  span = (inventory[index + 1] / 64) / 4 - (inventory[index] / 64) / 4;
123  state = -1;
124  counts_at_start = this->counts[((inventory[index] / 64) / 8) * 2];
125  block_span = (inventory[index + 1] / 64) / 8 - (inventory[index] / 64) / 8;
126  block_left = (inventory[index] / 64) / 8;
127 
128  if (span >= 512)
129  state = 0;
130  else if (span >= 256)
131  state = 1;
132  else if (span >= 128)
133  state = 2;
134  else if (span >= 16) {
135  assert(((block_span + 8) & -8LL) + 8 <= span * 4);
136 
137  uint64_t k;
138  for (k = 0; k < block_span; k++) {
139  assert(((uint16_t *)s)[k + 8] == 0);
140  ((uint16_t *)s)[k + 8] = this->counts[(block_left + k + 1) * 2] - counts_at_start;
141  }
142 
143  for (; k < ((block_span + 8) & -8LL); k++) {
144  assert(((uint16_t *)s)[k + 8] == 0);
145  ((uint16_t *)s)[k + 8] = 0xFFFFU;
146  }
147 
148  assert(block_span / 8 <= 8);
149 
150  for (k = 0; k < block_span / 8; k++) {
151  assert(((uint16_t *)s)[k] == 0);
152  ((uint16_t *)s)[k] = this->counts[(block_left + (k + 1) * 8) * 2] - counts_at_start;
153  }
154 
155  for (; k < 8; k++) {
156  assert(((uint16_t *)s)[k] == 0);
157  ((uint16_t *)s)[k] = 0xFFFFU;
158  }
159  } else if (span >= 2) {
160  assert(((block_span + 8) & -8LL) <= span * 4);
161 
162  uint64_t k;
163  for (k = 0; k < block_span; k++) {
164  assert(((uint16_t *)s)[k] == 0);
165  ((uint16_t *)s)[k] = this->counts[(block_left + k + 1) * 2] - counts_at_start;
166  }
167 
168  for (; k < ((block_span + 8) & -8LL); k++) {
169  assert(((uint16_t *)s)[k] == 0);
170  ((uint16_t *)s)[k] = 0xFFFFU;
171  }
172  }
173  }
174 
175  switch (state) {
176  case 0:
177  assert(s[d & inventory_mask] == 0);
178  s[d & inventory_mask] = i * 64 + j;
179  break;
180  case 1:
181  assert(((uint32_t *)s)[d & inventory_mask] == 0);
182  assert(i * 64 + j - first_bit < (1ULL << 32));
183  ((uint32_t *)s)[d & inventory_mask] = i * 64 + j - first_bit;
184  break;
185  case 2:
186  assert(((uint16_t *)s)[d & inventory_mask] == 0);
187  assert(i * 64 + j - first_bit < (1 << 16));
188  ((uint16_t *)s)[d & inventory_mask] = i * 64 + j - first_bit;
189  break;
190  }
191 
192  d++;
193  }
194  }
195 
196  size_t select(const uint64_t rank) {
197  const uint64_t inventory_index_left = rank >> log2_ones_per_inventory;
198  assert(inventory_index_left <= inventory_size);
199 
200  const uint64_t inventory_left = inventory[inventory_index_left];
201  const uint64_t block_right = inventory[inventory_index_left + 1] / 64;
202  uint64_t block_left = inventory_left / 64;
203  const uint64_t span = block_right / 4 - block_left / 4;
204  const uint64_t *const s = &subinventory[block_left / 4];
205  uint64_t count_left, rank_in_block;
206 
207 #ifdef DEBUG
208  printf("Initially, rank: %" PRId64 " block_left: %" PRId64 " block_right: %" PRId64 " span: %" PRId64 "\n", rank, block_left, block_right, span);
209 #endif
210 
211  if (span < 2) {
212  block_left &= ~7;
213  count_left = block_left / 4 & ~1;
214  assert(rank < this->counts[count_left + 2]);
215  rank_in_block = rank - this->counts[count_left];
216 #ifdef DEBUG
217  printf("Single span; rank_in_block: %" PRId64 " block_left: %" PRId64 "\n", rank_in_block, block_left);
218 #endif
219  } else if (span < 16) {
220  block_left &= ~7;
221  count_left = block_left / 4 & ~1;
222  const uint64_t rank_in_superblock = rank - this->counts[count_left];
223  const uint64_t rank_in_superblock_step_16 = rank_in_superblock * ONES_STEP_16;
224 
225  const uint64_t first = s[0], second = s[1];
226  const int where = (ULEQ_STEP_16(first, rank_in_superblock_step_16) + ULEQ_STEP_16(second, rank_in_superblock_step_16)) * ONES_STEP_16 >> 47;
227  assert(where >= 0);
228  assert(where <= 16);
229 #ifdef DEBUG
230  printf("rank_in_superblock: %" PRId64 " (%" PRIx64 ") %" PRIx64 " %" PRIx64 "\n", rank_in_superblock, rank_in_superblock, s[0], s[1]);
231 #endif
232 
233  block_left += where * 4;
234  count_left += where;
235  rank_in_block = rank - this->counts[count_left];
236  assert(rank_in_block < 512);
237 #ifdef DEBUG
238  printf("Found where (1): %d rank_in_block: %" PRId64 " block_left: %" PRId64 "\n", where, rank_in_block, block_left);
239  printf("superthis->counts: %016" PRIx64 " %016" PRIx64 "\n", s[0], s[1]);
240 #endif
241  } else if (span < 128) {
242  block_left &= ~7;
243  count_left = block_left / 4 & ~1;
244  const uint64_t rank_in_superblock = rank - this->counts[count_left];
245  const uint64_t rank_in_superblock_step_16 = rank_in_superblock * ONES_STEP_16;
246 
247  const uint64_t first = s[0], second = s[1];
248  const int where0 = (ULEQ_STEP_16(first, rank_in_superblock_step_16) + ULEQ_STEP_16(second, rank_in_superblock_step_16)) * ONES_STEP_16 >> 47;
249  assert(where0 <= 16);
250  const uint64_t first_bis = s[where0 + 2], second_bis = s[where0 + 2 + 1];
251  const int where1 = where0 * 8 + ((ULEQ_STEP_16(first_bis, rank_in_superblock_step_16) + ULEQ_STEP_16(second_bis, rank_in_superblock_step_16)) * ONES_STEP_16 >> 47);
252 
253  block_left += where1 * 4;
254  count_left += where1;
255  rank_in_block = rank - this->counts[count_left];
256  assert(rank_in_block < 512);
257 
258 #ifdef DEBUG
259  printf("Found where (2): %d rank_in_block: %" PRId64 " block_left: %" PRId64 "\n", where1, rank_in_block, block_left);
260 #endif
261  } else if (span < 256) {
262  return ((uint16_t *)s)[rank % ones_per_inventory] + inventory_left;
263  } else if (span < 512) {
264  return ((uint32_t *)s)[rank % ones_per_inventory] + inventory_left;
265  } else {
266  return s[rank % ones_per_inventory];
267  }
268 
269  const uint64_t rank_in_block_step_9 = rank_in_block * ONES_STEP_9;
270  const uint64_t subcounts = this->counts[count_left + 1];
271  const uint64_t offset_in_block = (ULEQ_STEP_9(subcounts, rank_in_block_step_9) * ONES_STEP_9 >> 54 & 0x7);
272 
273  const uint64_t word = block_left + offset_in_block;
274  const uint64_t rank_in_word = rank_in_block - (subcounts >> ((offset_in_block - 1) & 7) * 9 & 0x1FF);
275  assert(offset_in_block <= 7);
276 
277 #ifdef DEBUG
278  printf("rank_in_block: %" PRId64 " offset_in_block: %" PRId64 " rank_in_word: %" PRId64 "\n", rank_in_block, offset_in_block, rank_in_word);
279  printf("subcounts: ");
280  for (int i = 0; i < 7; i++) printf("%" PRId64 " ", subcounts >> i * 9 & 0x1FF);
281  printf("\n");
282  fflush(stdout);
283 #endif
284 
285  assert(rank_in_word < 64);
286 
287 #ifdef DEBUG
288  printf("Returning %" PRId64 "\n", word * UINT64_C(64) + select64(this->bits[word], rank_in_word));
289 #endif
290  return word * UINT64_C(64) + select64(this->bits[word], rank_in_word);
291  }
292 
293  size_t bitCount() const {
294  return this->counts.bitCount() - sizeof(this->counts) * 8 + inventory.bitCount() - sizeof(inventory) * 8 + subinventory.bitCount() - sizeof(subinventory) * 8 + sizeof(*this) * 8;
295  }
296 };
297 
298 } // namespace sux::bits
sux::bits::Rank9::num_bits
const size_t num_bits
Definition: Rank9.hpp:57
ONES_STEP_9
#define ONES_STEP_9
Definition: common.hpp:49
sux::bits::Rank9Sel
Definition: Rank9Sel.hpp:50
sux::bits::Rank9
Definition: Rank9.hpp:55
sux::util::Vector::size
void size(size_t size)
Definition: Vector.hpp:178
sux::Select
Definition: Select.hpp:38
sux::bits::Rank9Sel::Rank9Sel
Rank9Sel(const uint64_t *const bits, const uint64_t num_bits)
Definition: Rank9Sel.hpp:74
sux::bits::Rank9Sel::bitCount
size_t bitCount() const
Definition: Rank9Sel.hpp:293
sux::bits::Rank9::bits
const uint64_t * bits
Definition: Rank9.hpp:59
ULEQ_STEP_16
#define ULEQ_STEP_16(x, y)
Definition: common.hpp:58
sux::bits
Definition: DynamicBitVector.hpp:31
sux::util::Vector::bitCount
size_t bitCount() const
Definition: Vector.hpp:231
Select.hpp
sux::bits::Rank9Sel::select
size_t select(const uint64_t rank)
Definition: Rank9Sel.hpp:196
sux::bits::Rank9::counts
util::Vector< uint64_t, AT > counts
Definition: Rank9.hpp:60
Rank9.hpp
sux::bits::Rank9::num_ones
size_t num_ones
Definition: Rank9.hpp:58
sux::bits::Rank9::rank
uint64_t rank(const size_t k)
Definition: Rank9.hpp:100
ONES_STEP_16
#define ONES_STEP_16
Definition: common.hpp:50
ULEQ_STEP_9
#define ULEQ_STEP_9(x, y)
Definition: common.hpp:57
sux::util::Vector< uint64_t, AT >
sux::select64
uint64_t select64(uint64_t x, uint64_t k)
Definition: common.hpp:372