Sux
RiceBitVector.hpp
Go to the documentation of this file.
1 /*
2  * Sux: Succinct data structures
3  *
4  * Copyright (C) 2019-2020 Emmanuel Esposito and Sebastiano Vigna
5  *
6  * This library is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by the Free
8  * Software Foundation; either version 3 of the License, or (at your option)
9  * any later version.
10  *
11  * This library is free software; you can redistribute it and/or modify it under
12  * the terms of the GNU General Public License as published by the Free Software
13  * Foundation; either version 3, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
17  * PARTICULAR PURPOSE. See the GNU General Public License for more details.
18  *
19  * Under Section 7 of GPL version 3, you are granted additional permissions
20  * described in the GCC Runtime Library Exception, version 3.1, as published by
21  * the Free Software Foundation.
22  *
23  * You should have received a copy of the GNU General Public License and a copy of
24  * the GCC Runtime Library Exception along with this program; see the files
25  * COPYING3 and COPYING.RUNTIME respectively. If not, see
26  * <http://www.gnu.org/licenses/>.
27  */
28 
29 #pragma once
30 
31 #include "../support/common.hpp"
32 #include "../util/Vector.hpp"
33 #include <cstdint>
34 #include <cstdio>
35 #include <iostream>
36 
37 namespace sux::function {
38 
39 using namespace std;
40 using namespace sux;
41 
48 template <util::AllocType AT = util::AllocType::MALLOC> class RiceBitVector {
49 
50  public:
51  class Builder {
53  size_t bit_count = 0;
54 
55  public:
56  Builder() : Builder(16) {}
57 
58  Builder(const size_t alloc_words) : data(alloc_words) {}
59 
60  void appendFixed(const uint64_t v, const int log2golomb) {
61  const uint64_t lower_bits = v & ((uint64_t(1) << log2golomb) - 1);
62  int used_bits = bit_count & 63;
63 
64  data.resize((((bit_count + log2golomb + 7) / 8) + 7 + 7) / 8);
65 
66  uint64_t *append_ptr = &data + bit_count / 64;
67  uint64_t cur_word = *append_ptr;
68 
69  cur_word |= lower_bits << used_bits;
70  if (used_bits + log2golomb > 64) {
71  *(append_ptr++) = cur_word;
72  cur_word = lower_bits >> (64 - used_bits);
73  used_bits += log2golomb - 64;
74  }
75  *append_ptr = cur_word;
76  bit_count += log2golomb;
77  }
78 
79  void appendUnaryAll(const std::vector<uint32_t> unary) {
80  size_t bit_inc = 0;
81  for (const auto &u : unary) {
82  bit_inc += u + 1;
83  }
84 
85  data.resize((((bit_count + bit_inc + 7) / 8) + 7 + 7) / 8);
86 
87  for (const auto &u : unary) {
88  bit_count += u;
89  uint64_t *append_ptr = &data + bit_count / 64;
90  *append_ptr |= uint64_t(1) << (bit_count & 63);
91  ++bit_count;
92  }
93  }
94 
95  uint64_t getBits() { return bit_count; }
96 
98  data.trimToFit();
99  return RiceBitVector(std::move(data));
100  }
101  };
102 
103  private:
105  size_t curr_fixed_offset = 0;
106  uint64_t curr_window_unary = 0;
107  uint64_t *curr_ptr_unary;
108  int valid_lower_bits_unary = 0;
109 
110  friend std::ostream &operator<<(std::ostream &os, const RiceBitVector<AT> &rbv) {
111  // os.write((char *)&rbv.bit_count, sizeof(rbv.bit_count));
112  os << rbv.data;
113  return os;
114  }
115 
116  friend std::istream &operator>>(std::istream &is, RiceBitVector<AT> &rbv) {
117  rbv.curr_fixed_offset = 0;
118  rbv.curr_window_unary = 0;
119  rbv.valid_lower_bits_unary = 0;
120  // is.read((char *)&rbv.bit_count, sizeof(rbv.bit_count));
121  is >> rbv.data;
122  return is;
123  }
124 
125  public:
127  RiceBitVector(util::Vector<uint64_t, AT> data) : data(std::move(data)) {}
128 
129  uint64_t readNext(const int log2golomb) {
130  uint64_t result = 0;
131 
132  if (curr_window_unary == 0) {
133  result += valid_lower_bits_unary;
134  curr_window_unary = *(curr_ptr_unary++);
135  valid_lower_bits_unary = 64;
136  while (__builtin_expect(curr_window_unary == 0, 0)) {
137  result += 64;
138  curr_window_unary = *(curr_ptr_unary++);
139  }
140  }
141 
142  const size_t pos = rho(curr_window_unary);
143 
144  curr_window_unary >>= pos;
145  curr_window_unary >>= 1;
146  valid_lower_bits_unary -= pos + 1;
147 
148  result += pos;
149  result <<= log2golomb;
150 
151  uint64_t fixed;
152  memcpy(&fixed, (uint8_t *)&data + curr_fixed_offset / 8, 8);
153  result |= (fixed >> curr_fixed_offset % 8) & ((uint64_t(1) << log2golomb) - 1);
154  curr_fixed_offset += log2golomb;
155  return result;
156  }
157 
158  void skipSubtree(const size_t nodes, const size_t fixed_len) {
159  assert(nodes > 0);
160  size_t missing = nodes, cnt;
161  while ((cnt = nu(curr_window_unary)) < missing) {
162  curr_window_unary = *(curr_ptr_unary++);
163  missing -= cnt;
164  valid_lower_bits_unary = 64;
165  }
166  cnt = select64(curr_window_unary, missing - 1);
167  curr_window_unary >>= cnt;
168  curr_window_unary >>= 1;
169  valid_lower_bits_unary -= cnt + 1;
170 
171  curr_fixed_offset += fixed_len;
172  }
173 
174  void readReset(const size_t bit_pos, const size_t unary_offset) {
175  // assert(bit_pos < bit_count);
176  curr_fixed_offset = bit_pos;
177  size_t unary_pos = bit_pos + unary_offset;
178  curr_ptr_unary = &data + unary_pos / 64;
179  curr_window_unary = *(curr_ptr_unary++) >> (unary_pos & 63);
180  valid_lower_bits_unary = 64 - (unary_pos & 63);
181  }
182 
183  size_t getBits() const { return data.size() * sizeof(uint64_t); }
184 };
185 
186 } // namespace sux::function
sux::function::RiceBitVector::Builder::Builder
Builder(const size_t alloc_words)
Definition: RiceBitVector.hpp:58
sux::function::RiceBitVector::Builder::build
RiceBitVector< AT > build()
Definition: RiceBitVector.hpp:97
sux::rho
int rho(uint64_t word)
Definition: common.hpp:168
sux::function::RiceBitVector::getBits
size_t getBits() const
Definition: RiceBitVector.hpp:183
sux::function::RiceBitVector::Builder
Definition: RiceBitVector.hpp:51
sux::function::RiceBitVector::operator<<
friend std::ostream & operator<<(std::ostream &os, const RiceBitVector< AT > &rbv)
Definition: RiceBitVector.hpp:110
sux::function::RiceBitVector::readNext
uint64_t readNext(const int log2golomb)
Definition: RiceBitVector.hpp:129
sux::util::Vector::size
void size(size_t size)
Definition: Vector.hpp:178
sux
Definition: DynamicBitVector.hpp:31
sux::function::RiceBitVector::Builder::getBits
uint64_t getBits()
Definition: RiceBitVector.hpp:95
sux::function::RiceBitVector::readReset
void readReset(const size_t bit_pos, const size_t unary_offset)
Definition: RiceBitVector.hpp:174
sux::function
Definition: DoubleEF.hpp:43
sux::function::RiceBitVector::operator>>
friend std::istream & operator>>(std::istream &is, RiceBitVector< AT > &rbv)
Definition: RiceBitVector.hpp:116
sux::function::RiceBitVector::RiceBitVector
RiceBitVector()
Definition: RiceBitVector.hpp:126
sux::util::Vector::trimToFit
void trimToFit()
Definition: Vector.hpp:132
sux::nu
int nu(uint64_t word)
Definition: common.hpp:339
sux::function::RiceBitVector::Builder::Builder
Builder()
Definition: RiceBitVector.hpp:56
sux::util::Vector::resize
void resize(size_t size)
Definition: Vector.hpp:166
sux::function::RiceBitVector::skipSubtree
void skipSubtree(const size_t nodes, const size_t fixed_len)
Definition: RiceBitVector.hpp:158
sux::function::RiceBitVector::Builder::appendUnaryAll
void appendUnaryAll(const std::vector< uint32_t > unary)
Definition: RiceBitVector.hpp:79
sux::function::RiceBitVector
Definition: RiceBitVector.hpp:48
sux::util::Vector< uint64_t, AT >
sux::function::RiceBitVector::Builder::appendFixed
void appendFixed(const uint64_t v, const int log2golomb)
Definition: RiceBitVector.hpp:60
sux::select64
uint64_t select64(uint64_t x, uint64_t k)
Definition: common.hpp:372
sux::function::RiceBitVector::RiceBitVector
RiceBitVector(util::Vector< uint64_t, AT > data)
Definition: RiceBitVector.hpp:127