Sux
EliasFano.hpp
Go to the documentation of this file.
1 /*
2  * Sux: Succinct data structures
3  *
4  * Copyright (C) 2007-2020 Sebastiano Vigna
5  *
6  * This library is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by the Free
8  * Software Foundation; either version 3 of the License, or (at your option)
9  * any later version.
10  *
11  * This library is free software; you can redistribute it and/or modify it under
12  * the terms of the GNU General Public License as published by the Free Software
13  * Foundation; either version 3, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
17  * PARTICULAR PURPOSE. See the GNU General Public License for more details.
18  *
19  * Under Section 7 of GPL version 3, you are granted additional permissions
20  * described in the GCC Runtime Library Exception, version 3.1, as published by
21  * the Free Software Foundation.
22  *
23  * You should have received a copy of the GNU General Public License and a copy of
24  * the GCC Runtime Library Exception along with this program; see the files
25  * COPYING3 and COPYING.RUNTIME respectively. If not, see
26  * <http://www.gnu.org/licenses/>.
27  */
28 
29 #pragma once
30 
31 #include "Rank.hpp"
32 #include "SimpleSelectHalf.hpp"
33 #include "SimpleSelectZeroHalf.hpp"
34 #include <cstdint>
35 #include <vector>
36 
37 namespace sux::bits {
38 
39 using namespace std;
40 using namespace sux;
41 
52 template <util::AllocType AT = util::AllocType::MALLOC> class EliasFano : public Rank, public Select {
53  private:
54  util::Vector<uint64_t, AT> lower_bits, upper_bits;
55  SimpleSelectHalf<AT> select_upper;
56  SimpleSelectZeroHalf<AT> selectz_upper;
57  uint64_t num_bits, num_ones;
58  int l;
59  int block_size;
60  int block_length;
61  uint64_t block_size_mask;
62  uint64_t lower_l_bits_mask;
63  uint64_t ones_step_l;
64  uint64_t msbs_step_l;
65  uint64_t compressor;
66 
67  __inline static void set(util::Vector<uint64_t, AT> &bits, const uint64_t pos) { bits[pos / 64] |= 1ULL << pos % 64; }
68 
69  __inline static uint64_t get_bits(util::Vector<uint64_t, AT> &bits, const uint64_t start, const int width) {
70  const int start_word = start / 64;
71  const int start_bit = start % 64;
72  const int total_offset = start_bit + width;
73  const uint64_t result = bits[start_word] >> start_bit;
74  return (total_offset <= 64 ? result : result | bits[start_word + 1] << (64 - start_bit)) & ((1ULL << width) - 1);
75  }
76 
77  __inline static void set_bits(util::Vector<uint64_t, AT> &bits, const uint64_t start, const int width, const uint64_t value) {
78  const uint64_t start_word = start / 64;
79  const uint64_t end_word = (start + width - 1) / 64;
80  const uint64_t start_bit = start % 64;
81 
82  if (start_word == end_word) {
83  bits[start_word] &= ~(((1ULL << width) - 1) << start_bit);
84  bits[start_word] |= value << start_bit;
85  } else {
86  // Here start_bit > 0.
87  bits[start_word] &= (1ULL << start_bit) - 1;
88  bits[start_word] |= value << start_bit;
89  bits[end_word] &= -(1ULL << (width - 64 + start_bit));
90  bits[end_word] |= value >> (64 - start_bit);
91  }
92  }
93 
94  public:
102  EliasFano(const uint64_t *const bits, const uint64_t num_bits) {
103  const uint64_t num_words = (num_bits + 63) / 64;
104  uint64_t m = 0;
105  for (uint64_t i = num_words; i-- != 0;) m += __builtin_popcountll(bits[i]);
106  num_ones = m;
107  this->num_bits = num_bits;
108  l = num_ones == 0 ? 0 : max(0, lambda_safe(num_bits / num_ones));
109 
110 #ifdef DEBUG
111  printf("Number of ones: %lld l: %d\n", num_ones, l);
112  printf("Upper bits: %lld\n", num_ones + (num_bits >> l) + 1);
113  printf("Lower bits: %lld\n", num_ones * l);
114 #endif
115 
116  const uint64_t lower_bits_mask = (1ULL << l) - 1;
117 
118  lower_bits.size((num_ones * l + 63) / 64 + 2 * (l == 0));
119  upper_bits.size(((num_ones + (num_bits >> l) + 1) + 63) / 64);
120 
121  uint64_t pos = 0;
122  for (uint64_t i = 0; i < num_bits; i++) {
123  if (bits[i / 64] & (1ULL << i % 64)) {
124  if (l != 0) set_bits(lower_bits, pos * l, l, i & lower_bits_mask);
125  set(upper_bits, (i >> l) + pos);
126  pos++;
127  }
128  }
129 
130 #ifdef DEBUG
131  // printf("First lower: %016llx %016llx %016llx %016llx\n", lower_bits[0], lower_bits[1],
132  // lower_bits[2], lower_bits[3]);
133  // printf("First upper: %016llx %016llx %016llx %016llx\n", upper_bits[0], upper_bits[1],
134  // upper_bits[2], upper_bits[3]);
135 #endif
136 
137  select_upper = SimpleSelectHalf(&upper_bits, num_ones + (num_bits >> l));
138  selectz_upper = SimpleSelectZeroHalf(&upper_bits, num_ones + (num_bits >> l));
139 
140  block_size = 0;
141  do
142  ++block_size;
143  while (block_size * l + block_size <= 64 && block_size <= l);
144  block_size--;
145 
146 #ifdef DEBUG
147  printf("Block size: %d\n", block_size);
148 #endif
149 
150  block_size_mask = (1ULL << block_size) - 1;
151  block_length = block_size * l;
152 
153 #ifdef PARSEARCH
154  ones_step_l = 0;
155  for (int i = 0; i < block_size; i++) ones_step_l |= 1ULL << i * l;
156  msbs_step_l = ones_step_l << (l - 1);
157 
158  compressor = 0;
159  for (int i = 0; i < block_size; i++) compressor |= 1ULL << ((l - 1) * i + block_size);
160 #endif
161 
162  lower_l_bits_mask = (1ULL << l) - 1;
163  }
164 
178  EliasFano(const std::vector<uint64_t> ones, const uint64_t num_bits) {
179  num_ones = ones.size();
180  this->num_bits = num_bits;
181  l = num_ones == 0 ? 0 : max(0, lambda_safe(num_bits / num_ones));
182 
183 #ifdef DEBUG
184  printf("Number of ones: %lld l: %d\n", num_ones, l);
185  printf("Upper bits: %lld\n", num_ones + (num_bits >> l) + 1);
186  printf("Lower bits: %lld\n", num_ones * l);
187 #endif
188 
189  const uint64_t lower_bits_mask = (1ULL << l) - 1;
190 
191  lower_bits = new uint64_t[(num_ones * l + 63) / 64 + 2 * (l == 0)];
192  upper_bits = new uint64_t[((num_ones + (num_bits >> l) + 1) + 63) / 64]();
193 
194  for (uint64_t i = 0; i < num_ones; i++) {
195  if (l != 0) set_bits(lower_bits, i * l, l, ones[i] & lower_bits_mask);
196  set(upper_bits, (ones[i] >> l) + i);
197  }
198 
199 #ifdef DEBUG
200  printf("First lower: %016llx %016llx %016llx %016llx\n", lower_bits[0], lower_bits[1], lower_bits[2], lower_bits[3]);
201  printf("First upper: %016llx %016llx %016llx %016llx\n", upper_bits[0], upper_bits[1], upper_bits[2], upper_bits[3]);
202 #endif
203 
204  select_upper = SimpleSelectHalf(upper_bits, num_ones + (num_bits >> l));
205  selectz_upper = SimpleSelectZeroHalf(upper_bits, num_ones + (num_bits >> l));
206 
207  block_size = 0;
208  do
209  ++block_size;
210  while (block_size * l + block_size <= 64 && block_size <= l);
211  block_size--;
212 
213 #ifdef DEBUG
214  printf("Block size: %d\n", block_size);
215 #endif
216  block_size_mask = (1ULL << block_size) - 1;
217  block_length = block_size * l;
218 
219 #ifdef PARSEARCH
220  ones_step_l = 0;
221  for (int i = 0; i < block_size; i++) ones_step_l |= 1ULL << i * l;
222  msbs_step_l = ones_step_l << (l - 1);
223 
224  compressor = 0;
225  for (int i = 0; i < block_size; i++) compressor |= 1ULL << ((l - 1) * i + block_size);
226 #endif
227 
228  lower_l_bits_mask = (1ULL << l) - 1;
229  }
230 
231  uint64_t rank(const size_t k) {
232  if (num_ones == 0) return 0;
233  if (k >= num_bits) return num_ones;
234 #ifdef DEBUG
235  printf("Ranking %lld...\n", k);
236 #endif
237  const uint64_t k_shiftr_l = k >> l;
238 
239 #ifndef PARSEARCH
240  int64_t pos = selectz_upper.selectZero(k_shiftr_l);
241  uint64_t rank = pos - (k_shiftr_l);
242 
243 #ifdef DEBUG
244  printf("Position: %lld rank: %lld\n", pos, rank);
245 #endif
246  uint64_t rank_times_l = rank * l;
247  const uint64_t k_lower_bits = k & lower_l_bits_mask;
248 
249  do {
250  rank--;
251  rank_times_l -= l;
252  pos--;
253  } while (pos >= 0 && (upper_bits[pos / 64] & 1ULL << pos % 64) && get_bits(lower_bits, rank_times_l, l) >= k_lower_bits);
254 
255  return ++rank;
256 #else
257 
258  const uint64_t k_lower_bits = k & lower_l_bits_mask;
259 
260 #ifdef DEBUG
261  printf("k: %llx lower %d : %llx\n", k, l, k_lower_bits);
262 #endif
263 
264  const uint64_t k_lower_bits_step_l = k_lower_bits * ones_step_l;
265 
266  uint64_t pos = selectz_upper.selectZero(k_shiftr_l);
267  uint64_t rank = pos - (k_shiftr_l);
268  uint64_t rank_times_l = rank * l;
269 
270 #ifdef DEBUG
271  printf("pos: %lld rank: %lld\n", pos, rank);
272 #endif
273 
274  uint64_t block_upper_bits, block_lower_bits;
275 
276  while (rank > block_size) {
277  rank -= block_size;
278  rank_times_l -= block_length;
279  pos -= block_size;
280  block_upper_bits = get_bits(upper_bits, pos, block_size);
281  block_lower_bits = get_bits(lower_bits, rank_times_l, block_length);
282 
283  // printf( "block upper bits: %llx block lower bits: %llx\n", block_upper_bits, block_lower_bits
284  // );
285 
286  const uint64_t cmp =
287  ((((block_lower_bits | msbs_step_l) - (k_lower_bits_step_l & ~msbs_step_l)) | (k_lower_bits_step_l ^ block_lower_bits)) ^ (k_lower_bits_step_l & ~block_lower_bits)) & msbs_step_l;
288 
289  // printf( "Compare: %016llx compressed: %016llx shifted: %016llx\n", cmp, cmp * compressor, cmp
290  // * compressor >> block_size * l );
291 
292  const uint64_t cmp_compr = ~(cmp * compressor >> block_length & block_upper_bits) & block_size_mask;
293 
294  // printf( "Combined compare: %llx\n", ~t );
295 
296  if (cmp_compr) return rank + 1 + lambda_safe(cmp_compr);
297  }
298 
299  block_upper_bits = get_bits(upper_bits, pos - rank, rank);
300  block_lower_bits = get_bits(lower_bits, 0, rank_times_l);
301 
302  // printf( "\nTail (%lld bits)...\n", rank );
303 
304  // printf( "block upper bits: %llx block lower bits: %llx\n", block_upper_bits, block_lower_bits
305  // );
306 
307  const uint64_t cmp =
308  ((((block_lower_bits | msbs_step_l) - (k_lower_bits_step_l & ~msbs_step_l)) | (k_lower_bits_step_l ^ block_lower_bits)) ^ (k_lower_bits_step_l & ~block_lower_bits)) & msbs_step_l;
309 
310  // printf( "Compressor: %llx\n", compressor );
311  // printf( "Compare: %016llx compressed: %016llx shifted: %016llx\n", cmp, cmp * compressor, cmp *
312  // compressor >> block_size * l );
313 
314  const uint64_t cmp_compr = ~(cmp * compressor >> block_length & block_upper_bits) & (1ULL << rank) - 1;
315 
316  // printf( "Combined compare: %llx\n", ~t );
317 
318  return 1 + lambda_safe(cmp_compr);
319 
320 #endif
321  }
322 
323  size_t select(const uint64_t rank) {
324 #ifdef DEBUG
325  printf("Selecting %lld...\n", rank);
326 #endif
327 #ifdef DEBUG
328  printf("Returning %lld = %llx << %d | %llx\n", (select_upper.select(rank) - rank) << l | get_bits(lower_bits, rank * l, l), select_upper.select(rank) - rank, l,
329  get_bits(lower_bits, rank * l, l));
330 #endif
331  return (select_upper.select(rank) - rank) << l | get_bits(lower_bits, rank * l, l);
332  }
333 
334  uint64_t select(const uint64_t rank, uint64_t *const next) {
335  uint64_t s, t;
336  s = select_upper.select(rank, &t) - rank;
337  t -= rank + 1;
338 
339  const uint64_t position = rank * l;
340  *next = t << l | get_bits(lower_bits, position + l, l);
341  return s << l | get_bits(lower_bits, position, l);
342  }
343 
345  size_t size() const { return num_bits; }
346 
348  uint64_t bitCount() {
349  return upper_bits.bitCount() - sizeof(upper_bits) * 8 + lower_bits.bitCount() - sizeof(lower_bits) * 8 + select_upper.bitCount() - sizeof(select_upper) * 8 + selectz_upper.bitCount() -
350  sizeof(selectz_upper) * 8 + sizeof(*this) * 8;
351  }
352 };
353 
354 } // namespace sux::bits
sux::Rank
Definition: Rank.hpp:46
sux::bits::SimpleSelectZeroHalf
Definition: SimpleSelectZeroHalf.hpp:54
sux::bits::EliasFano::select
size_t select(const uint64_t rank)
Definition: EliasFano.hpp:323
sux::bits::SimpleSelectZeroHalf::bitCount
size_t bitCount() const
Definition: SimpleSelectZeroHalf.hpp:231
sux::lambda_safe
int lambda_safe(uint64_t word)
Definition: common.hpp:188
SimpleSelectHalf.hpp
sux::bits::EliasFano::rank
uint64_t rank(const size_t k)
Definition: EliasFano.hpp:231
Rank.hpp
sux::util::Vector::size
void size(size_t size)
Definition: Vector.hpp:178
sux
Definition: DynamicBitVector.hpp:31
sux::bits::SimpleSelectHalf::select
uint64_t select(const uint64_t rank)
Definition: SimpleSelectHalf.hpp:169
sux::bits::SimpleSelectHalf
Definition: SimpleSelectHalf.hpp:54
sux::bits::EliasFano::size
size_t size() const
Definition: EliasFano.hpp:345
sux::bits::SimpleSelectHalf::bitCount
size_t bitCount() const
Definition: SimpleSelectHalf.hpp:231
sux::Select
Definition: Select.hpp:38
sux::bits
Definition: DynamicBitVector.hpp:31
sux::util::Vector::bitCount
size_t bitCount() const
Definition: Vector.hpp:231
sux::bits::EliasFano
Definition: EliasFano.hpp:52
SimpleSelectZeroHalf.hpp
sux::bits::EliasFano::bitCount
uint64_t bitCount()
Definition: EliasFano.hpp:348
sux::bits::SimpleSelectZeroHalf::selectZero
uint64_t selectZero(const uint64_t rank)
Definition: SimpleSelectZeroHalf.hpp:169
sux::bits::EliasFano::EliasFano
EliasFano(const std::vector< uint64_t > ones, const uint64_t num_bits)
Definition: EliasFano.hpp:178
sux::bits::EliasFano::EliasFano
EliasFano(const uint64_t *const bits, const uint64_t num_bits)
Definition: EliasFano.hpp:102
sux::util::Vector< uint64_t, AT >
sux::bits::EliasFano::select
uint64_t select(const uint64_t rank, uint64_t *const next)
Definition: EliasFano.hpp:334