Sux
SimpleSelectHalf.hpp
Go to the documentation of this file.
1 /*
2  * Sux: Succinct data structures
3  *
4  * Copyright (C) 2007-2020 Sebastiano Vigna
5  *
6  * This library is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by the Free
8  * Software Foundation; either version 3 of the License, or (at your option)
9  * any later version.
10  *
11  * This library is free software; you can redistribute it and/or modify it under
12  * the terms of the GNU General Public License as published by the Free Software
13  * Foundation; either version 3, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
17  * PARTICULAR PURPOSE. See the GNU General Public License for more details.
18  *
19  * Under Section 7 of GPL version 3, you are granted additional permissions
20  * described in the GCC Runtime Library Exception, version 3.1, as published by
21  * the Free Software Foundation.
22  *
23  * You should have received a copy of the GNU General Public License and a copy of
24  * the GCC Runtime Library Exception along with this program; see the files
25  * COPYING3 and COPYING.RUNTIME respectively. If not, see
26  * <http://www.gnu.org/licenses/>.
27  */
28 
29 #pragma once
30 
31 #include "../support/common.hpp"
32 #include "../util/Vector.hpp"
33 #include "Select.hpp"
34 #include <cstdint>
35 
36 namespace sux::bits {
37 
38 using namespace std;
39 using namespace sux;
40 
54 template <util::AllocType AT = util::AllocType::MALLOC> class SimpleSelectHalf {
55  private:
56  static const int log2_ones_per_inventory = 10;
57  static const int ones_per_inventory = 1 << log2_ones_per_inventory;
58  static const int ones_per_inventory_mask = ones_per_inventory - 1;
59  static const uint64_t log2_longwords_per_subinventory = 2;
60  static const int longwords_per_subinventory = 1 << log2_longwords_per_subinventory;
61  static const int log2_ones_per_sub64 = log2_ones_per_inventory - log2_longwords_per_subinventory;
62  static const int ones_per_sub64 = 1 << log2_ones_per_sub64;
63  static const uint64_t ones_per_sub64_mask = ones_per_sub64 - 1;
64  static const int log2_ones_per_sub16 = log2_ones_per_sub64 - 2;
65  static const int ones_per_sub16 = 1 << log2_ones_per_sub16;
66  static const uint64_t ones_per_sub16_mask = ones_per_sub16 - 1;
67 
68  const uint64_t *bits;
69  util::Vector<int64_t, AT> inventory;
70 
71  uint64_t num_words, inventory_size, num_ones;
72 
73  public:
75 
82  SimpleSelectHalf(const uint64_t *const bits, const uint64_t num_bits) : bits(bits) {
83  num_words = (num_bits + 63) / 64;
84 
85  // Init rank/select structure
86  uint64_t c = 0;
87  for (uint64_t i = 0; i < num_words; i++) c += __builtin_popcountll(bits[i]);
88  num_ones = c;
89 
90  assert(c <= num_bits);
91 
92  inventory_size = (c + ones_per_inventory - 1) / ones_per_inventory;
93 
94 #ifdef DEBUG
95  printf("Number of bits: %" PRId64 " Number of ones: %" PRId64 " (%.2f%%)\n", num_bits, c, (c * 100.0) / num_bits);
96 
97  printf("Ones per inventory: %d Ones per sub 64: %d sub 16: %d\n", ones_per_inventory, ones_per_sub64, ones_per_sub16);
98 #endif
99 
100  inventory.size(inventory_size * (longwords_per_subinventory + 1) + 1);
101 
102  uint64_t d = 0;
103  const uint64_t mask = ones_per_inventory - 1;
104 
105  // First phase: we build an inventory for each one out of ones_per_inventory.
106  for (uint64_t i = 0; i < num_words; i++)
107  for (int j = 0; j < 64; j++) {
108  if (i * 64 + j >= num_bits) break;
109  if (bits[i] & 1ULL << j) {
110  if ((d & mask) == 0) inventory[(d >> log2_ones_per_inventory) * (longwords_per_subinventory + 1)] = i * 64 + j;
111  d++;
112  }
113  }
114 
115  assert(c == d);
116  inventory[inventory_size * (longwords_per_subinventory + 1)] = num_bits;
117 
118 #ifdef DEBUG
119  printf("Inventory entries filled: %" PRId64 "\n", inventory_size + 1);
120 #endif
121 
122  uint16_t *p16;
123  int64_t *p64;
124 
125  d = 0;
126  uint64_t exact = 0, start, span, inventory_index;
127  int offset;
128 
129  for (uint64_t i = 0; i < num_words; i++)
130  for (int j = 0; j < 64; j++) {
131  if (i * 64 + j >= num_bits) break;
132  if (bits[i] & 1ULL << j) {
133  if ((d & mask) == 0) {
134  inventory_index = (d >> log2_ones_per_inventory) * (longwords_per_subinventory + 1);
135  start = inventory[inventory_index];
136  span = inventory[inventory_index + longwords_per_subinventory + 1] - start;
137  if (span > (1 << 16)) inventory[inventory_index] = -inventory[inventory_index] - 1;
138  offset = 0;
139  p64 = &inventory[inventory_index + 1];
140  p16 = (uint16_t *)p64;
141  }
142 
143  if (span < (1 << 16)) {
144  assert(i * 64 + j - start <= (1 << 16));
145  if ((d & ones_per_sub16_mask) == 0) {
146  assert(offset < longwords_per_subinventory * 4);
147  p16[offset++] = i * 64 + j - start;
148  }
149  } else {
150  if ((d & ones_per_sub64_mask) == 0) {
151  assert(offset < longwords_per_subinventory);
152  p64[offset++] = i * 64 + j - start;
153  exact++;
154  }
155  }
156 
157  d++;
158  }
159  }
160 
161 #ifdef DEBUG
162  // printf("Exact entries: %" PRId64 "\n", exact);
163  // printf("First inventories: %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", inventory[0],
164  // inventory[1], inventory[2],
165  // inventory[3]);
166 #endif
167  }
168 
169  uint64_t select(const uint64_t rank) {
170 #ifdef DEBUG
171  printf("Selecting %" PRId64 "\n...", rank);
172 #endif
173  assert(rank < num_ones);
174 
175  const uint64_t inventory_index = rank >> log2_ones_per_inventory;
176  assert(inventory_index <= inventory_size);
177  const int64_t *inventory_start = &inventory + (inventory_index << log2_longwords_per_subinventory) + inventory_index;
178 
179  const int64_t inventory_rank = *inventory_start;
180  const int subrank = rank & ones_per_inventory_mask;
181 #ifdef DEBUG
182  printf("Rank: %" PRId64 " inventory index: %" PRId64 " inventory rank: %" PRId64 " subrank: %d\n", rank, inventory_index, inventory_rank, subrank);
183 #endif
184 
185  uint64_t start;
186  int residual;
187 
188  if (inventory_rank >= 0) {
189  start = inventory_rank + ((uint16_t *)(inventory_start + 1))[subrank >> log2_ones_per_sub16];
190  residual = subrank & ones_per_sub16_mask;
191  } else {
192  assert((subrank >> log2_ones_per_sub64) < longwords_per_subinventory);
193  start = -inventory_rank - 1 + *(inventory_start + 1 + (subrank >> log2_ones_per_sub64));
194  residual = subrank & ones_per_sub64_mask;
195  }
196 
197 #ifdef DEBUG
198  printf("Differential; start: %" PRId64 " residual: %d\n", start, residual);
199  if (residual == 0) puts("No residual; returning start");
200 #endif
201 
202  if (residual == 0) return start;
203 
204  uint64_t word_index = start / 64;
205  uint64_t word = bits[word_index] & -1ULL << start % 64;
206 
207  for (;;) {
208  const int bit_count = __builtin_popcountll(word);
209  if (residual < bit_count) break;
210  word = bits[++word_index];
211  residual -= bit_count;
212  }
213 
214  return word_index * 64 + select64(word, residual);
215  }
216 
217  uint64_t select(const uint64_t rank, uint64_t *const next) {
218  const uint64_t s = select(rank);
219  int curr = s / 64;
220 
221  uint64_t window = bits[curr] & -1ULL << s;
222  window &= window - 1;
223 
224  while (window == 0) window = bits[++curr];
225  *next = curr * 64 + __builtin_ctzll(window);
226 
227  return s;
228  }
229 
231  size_t bitCount() const { return inventory.bitCount() - sizeof(inventory) * 8 + sizeof(*this) * 8; };
232 };
233 
234 } // namespace sux::bits
sux::util::Vector::size
void size(size_t size)
Definition: Vector.hpp:178
sux
Definition: DynamicBitVector.hpp:31
sux::bits::SimpleSelectHalf::select
uint64_t select(const uint64_t rank)
Definition: SimpleSelectHalf.hpp:169
sux::bits::SimpleSelectHalf
Definition: SimpleSelectHalf.hpp:54
sux::bits::SimpleSelectHalf::bitCount
size_t bitCount() const
Definition: SimpleSelectHalf.hpp:231
sux::bits::SimpleSelectHalf::SimpleSelectHalf
SimpleSelectHalf(const uint64_t *const bits, const uint64_t num_bits)
Definition: SimpleSelectHalf.hpp:82
sux::bits
Definition: DynamicBitVector.hpp:31
sux::util::Vector::bitCount
size_t bitCount() const
Definition: Vector.hpp:231
Select.hpp
sux::util::Vector< int64_t, AT >
sux::bits::SimpleSelectHalf::select
uint64_t select(const uint64_t rank, uint64_t *const next)
Definition: SimpleSelectHalf.hpp:217
sux::bits::SimpleSelectHalf::SimpleSelectHalf
SimpleSelectHalf()
Definition: SimpleSelectHalf.hpp:74
sux::select64
uint64_t select64(uint64_t x, uint64_t k)
Definition: common.hpp:372