00001 #ifndef BMSERIAL__H__INCLUDED__
00002 #define BMSERIAL__H__INCLUDED__
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #ifndef BM__H__INCLUDED__
00037 #define BM__H__INCLUDED__
00038
00039 #include "bm.h"
00040
00041 #endif
00042
00043 #ifdef _MSC_VER
00044 #pragma warning( push )
00045 #pragma warning( disable : 4311 4312 4127)
00046 #endif
00047
00048
00049
00050 #include "encoding.h"
00051 #include "bmdef.h"
00052 #include "bmfunc.h"
00053 #include "bmtrans.h"
00054 #include "bmalgo_impl.h"
00055 #include "bmutil.h"
00056
00057
00058
00059
00060 namespace bm
00061 {
00062
00063
00064
00065
00066
00067 const unsigned char set_block_end = 0;
00068 const unsigned char set_block_1zero = 1;
00069 const unsigned char set_block_1one = 2;
00070 const unsigned char set_block_8zero = 3;
00071 const unsigned char set_block_8one = 4;
00072 const unsigned char set_block_16zero = 5;
00073 const unsigned char set_block_16one = 6;
00074 const unsigned char set_block_32zero = 7;
00075 const unsigned char set_block_32one = 8;
00076 const unsigned char set_block_azero = 9;
00077 const unsigned char set_block_aone = 10;
00078 const unsigned char set_block_bit = 11;
00079 const unsigned char set_block_sgapbit = 12;
00080 const unsigned char set_block_sgapgap = 13;
00081 const unsigned char set_block_gap = 14;
00082 const unsigned char set_block_gapbit = 15;
00083 const unsigned char set_block_arrbit = 16;
00084 const unsigned char set_block_bit_interval = 17;
00085 const unsigned char set_block_arrgap = 18;
00086 const unsigned char set_block_bit_1bit = 19;
00087 const unsigned char set_block_gap_egamma = 20;
00088 const unsigned char set_block_arrgap_egamma = 21;
00089 const unsigned char set_block_bit_0runs = 22;
00090 const unsigned char set_block_arrgap_egamma_inv = 23;
00091 const unsigned char set_block_arrgap_inv = 24;
00092
00093
00094
00095
00096 enum serialization_header_mask {
00097 BM_HM_DEFAULT = 1,
00098 BM_HM_RESIZE = (1 << 1),
00099 BM_HM_ID_LIST = (1 << 2),
00100 BM_HM_NO_BO = (1 << 3),
00101 BM_HM_NO_GAPL = (1 << 4)
00102 };
00103
00104
00105
00106 #define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO) \
00107 if (nb == 1) \
00108 enc.put_8(B_1ZERO); \
00109 else if (nb < 256) \
00110 { \
00111 enc.put_8(B_8ZERO); \
00112 enc.put_8((unsigned char)nb); \
00113 } \
00114 else if (nb < 65536) \
00115 { \
00116 enc.put_8(B_16ZERO); \
00117 enc.put_16((unsigned short)nb); \
00118 } \
00119 else \
00120 {\
00121 enc.put_8(B_32ZERO); \
00122 enc.put_32(nb); \
00123 }
00124
00125
00126 #define BM_SET_ONE_BLOCKS(x) \
00127 {\
00128 unsigned end_block = i + x; \
00129 for (;i < end_block; ++i) \
00130 bman.set_block_all_set(i); \
00131 } \
00132 --i
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145 template<class BV>
00146 class serializer
00147 {
00148 public:
00149 typedef typename BV::allocator_type allocator_type;
00150 typedef typename BV::blocks_manager_type blocks_manager_type;
00151 public:
00152 serializer(const allocator_type& alloc = allocator_type());
00153 ~serializer();
00154
00155
00156
00157
00158
00159
00160 void set_compression_level(unsigned clevel);
00161
00162
00163
00164
00165 unsigned get_compression_level() const;
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181 unsigned serialize(const BV& bv,
00182 unsigned char* buf, unsigned buf_size);
00183
00184
00185
00186
00187
00188
00189
00190 void gap_length_serialization(bool value);
00191
00192
00193
00194
00195
00196 void byte_order_serialization(bool value);
00197
00198 protected:
00199
00200
00201
00202 void encode_header(const BV& bv, bm::encoder& enc);
00203
00204
00205
00206
00207 void encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
00208
00209
00210
00211
00212 void gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
00213
00214
00215
00216
00217 void gamma_gap_array(const bm::gap_word_t* gap_block,
00218 unsigned arr_len,
00219 bm::encoder& enc,
00220 bool inverted = false);
00221
00222
00223
00224
00225 void encode_bit_interval(const bm::word_t* blk,
00226 bm::encoder& enc,
00227 unsigned size_control);
00228
00229 private:
00230 serializer(const serializer&);
00231 serializer& operator=(const serializer&);
00232
00233 private:
00234
00235 typedef bm::bit_out<bm::encoder> bit_out_type;
00236 typedef bm::gamma_encoder<bm::gap_word_t, bit_out_type> gamma_encoder_func;
00237
00238 private:
00239 allocator_type alloc_;
00240 bool gap_serial_;
00241 bool byte_order_serial_;
00242 bm::word_t* temp_block_;
00243 unsigned compression_level_;
00244 };
00245
00246
00247
00248
00249
00250 template<class DEC>
00251 class deseriaizer_base
00252 {
00253 public:
00254 typedef DEC decoder_type;
00255 protected:
00256 deseriaizer_base(){}
00257
00258
00259
00260
00261
00262 unsigned read_gap_block(decoder_type& decoder,
00263 unsigned block_type,
00264 bm::gap_word_t* dst_block,
00265 bm::gap_word_t& gap_head);
00266
00267
00268
00269
00270 unsigned read_id_list(decoder_type& decoder,
00271 unsigned block_type,
00272 bm::gap_word_t* dst_arr);
00273
00274 protected:
00275 bm::gap_word_t id_array_[bm::gap_equiv_len * 2];
00276 };
00277
00278
00279
00280
00281
00282 template<class BV, class DEC>
00283 class deserializer : protected deseriaizer_base<DEC>
00284 {
00285 public:
00286 typedef BV bvector_type;
00287 typedef typename deseriaizer_base<DEC>::decoder_type decoder_type;
00288
00289 public:
00290 deserializer() : temp_block_(0) {}
00291
00292 unsigned deserialize(bvector_type& bv,
00293 const unsigned char* buf,
00294 bm::word_t* temp_block);
00295 protected:
00296 typedef typename BV::blocks_manager_type blocks_manager_type;
00297 typedef typename BV::allocator_type allocator_type;
00298
00299 protected:
00300 void deserialize_gap(unsigned char btype, decoder_type& dec,
00301 bvector_type& bv, blocks_manager_type& bman,
00302 unsigned i,
00303 bm::word_t* blk);
00304 protected:
00305 bm::gap_word_t gap_temp_block_[bm::gap_equiv_len * 4];
00306 bm::word_t* temp_block_;
00307 };
00308
00309
00310
00311
00312
00313
00314
00315
00316 template<class BV, class SerialIterator>
00317 class iterator_deserializer
00318 {
00319 public:
00320 typedef BV bvector_type;
00321 typedef SerialIterator serial_iterator_type;
00322 public:
00323 static
00324 unsigned deserialize(bvector_type& bv,
00325 serial_iterator_type& sit,
00326 bm::word_t* temp_block,
00327 set_operation op = bm::set_OR);
00328
00329
00330
00331
00332 static
00333 void deserialize(bvector_type& bv_target,
00334 const bvector_type& bv_mask,
00335 serial_iterator_type& sit,
00336 bm::word_t* temp_block,
00337 set_operation op);
00338
00339 private:
00340 typedef typename BV::blocks_manager_type blocks_manager_type;
00341
00342
00343 static
00344 void load_id_list(bvector_type& bv,
00345 serial_iterator_type& sit,
00346 unsigned id_count,
00347 bool set_clear);
00348
00349
00350 static
00351 unsigned finalize_target_vector(blocks_manager_type& bman,
00352 set_operation op,
00353 unsigned bv_block_idx);
00354
00355
00356 static
00357 unsigned process_id_list(bvector_type& bv,
00358 serial_iterator_type& sit,
00359 set_operation op);
00360
00361
00362 };
00363
00364
00365
00366
00367
00368
00369
00370 template<class DEC>
00371 class serial_stream_iterator : protected deseriaizer_base<DEC>
00372 {
00373 public:
00374 typedef typename deseriaizer_base<DEC>::decoder_type decoder_type;
00375 public:
00376 serial_stream_iterator(const unsigned char* buf);
00377
00378
00379 unsigned bv_size() const { return bv_size_; }
00380
00381
00382 bool is_eof() const { return end_of_stream_; }
00383
00384
00385 void next();
00386
00387
00388 void skip_mono_blocks();
00389
00390
00391 unsigned get_bit_block(bm::word_t* dst_block,
00392 bm::word_t* tmp_block,
00393 set_operation op);
00394
00395
00396
00397 void get_gap_block(bm::gap_word_t* dst_block);
00398
00399
00400 unsigned dec_size() const { return decoder_.size(); }
00401
00402
00403 decoder_type& decoder() { return decoder_; }
00404
00405
00406
00407
00408 enum iterator_state
00409 {
00410 e_unknown = 0,
00411 e_list_ids,
00412 e_blocks,
00413 e_zero_blocks,
00414 e_one_blocks,
00415 e_bit_block,
00416 e_gap_block
00417
00418 };
00419
00420
00421 iterator_state state() const { return this->state_; }
00422
00423 iterator_state get_state() const { return this->state_; }
00424
00425 unsigned get_id_count() const { return this->id_cnt_; }
00426
00427
00428 bm::id_t get_id() const { return this->last_id_; }
00429
00430
00431 unsigned block_idx() const { return this->block_idx_; }
00432
00433 public:
00434
00435
00436 typedef
00437 unsigned (serial_stream_iterator<DEC>::*get_bit_func_type)
00438 (bm::word_t*,bm::word_t*);
00439
00440 unsigned
00441 get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
00442 unsigned
00443 get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
00444 unsigned
00445 get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
00446 unsigned
00447 get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
00448 unsigned
00449 get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
00450 unsigned
00451 get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
00452 unsigned
00453 get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
00454 unsigned
00455 get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
00456 unsigned
00457 get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
00458 unsigned
00459 get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
00460 unsigned
00461 get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
00462 unsigned
00463 get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
00464 unsigned
00465 get_bit_block_COUNT_B(bm::word_t* dst_block, bm::word_t* tmp_block)
00466 {
00467 return get_bit_block_COUNT(dst_block, tmp_block);
00468 }
00469
00470
00471
00472
00473 unsigned get_arr_bit(bm::word_t* dst_block,
00474 bool clear_target=true);
00475
00476
00477 unsigned get_block_type() const { return block_type_; }
00478
00479 unsigned get_bit();
00480
00481 protected:
00482 get_bit_func_type bit_func_table_[bm::set_END];
00483
00484 decoder_type decoder_;
00485 bool end_of_stream_;
00486 unsigned bv_size_;
00487 iterator_state state_;
00488 unsigned id_cnt_;
00489 bm::id_t last_id_;
00490 gap_word_t glevels_[bm::gap_levels];
00491
00492 unsigned block_type_;
00493 unsigned block_idx_;
00494 unsigned mono_block_cnt_;
00495
00496 gap_word_t gap_head_;
00497 };
00498
00499
00500
00501
00502
00503
00504
00505 template<class BV>
00506 class operation_deserializer
00507 {
00508 public:
00509 typedef BV bvector_type;
00510 public:
00511 static
00512 unsigned deserialize(bvector_type& bv,
00513 const unsigned char* buf,
00514 bm::word_t* temp_block,
00515 set_operation op = bm::set_OR);
00516 private:
00517
00518 static
00519 void deserialize(bvector_type& bv_target,
00520 const bvector_type& bv_mask,
00521 const unsigned char* buf,
00522 bm::word_t* temp_block,
00523 set_operation op);
00524
00525 private:
00526 typedef
00527 typename BV::blocks_manager_type blocks_manager_type;
00528 typedef
00529 serial_stream_iterator<bm::decoder> serial_stream_current;
00530 typedef
00531 serial_stream_iterator<bm::decoder_big_endian> serial_stream_be;
00532 typedef
00533 serial_stream_iterator<bm::decoder_little_endian> serial_stream_le;
00534
00535 };
00536
00537
00538
00539
00540
00541
00542
00543 template<class BV>
00544 serializer<BV>::serializer(const allocator_type& alloc)
00545 : alloc_(alloc),
00546 gap_serial_(false),
00547 byte_order_serial_(true),
00548 temp_block_(0),
00549 compression_level_(3)
00550 {
00551 temp_block_ = alloc_.alloc_bit_block();
00552 }
00553
00554 template<class BV>
00555 void serializer<BV>::set_compression_level(unsigned clevel)
00556 {
00557 compression_level_ = clevel;
00558 }
00559
00560 template<class BV>
00561 unsigned serializer<BV>::get_compression_level() const
00562 {
00563 return compression_level_;
00564 }
00565
00566 template<class BV>
00567 serializer<BV>::~serializer()
00568 {
00569 alloc_.free_bit_block(temp_block_);
00570 }
00571
00572
00573 template<class BV>
00574 void serializer<BV>::gap_length_serialization(bool value)
00575 {
00576 gap_serial_ = value;
00577 }
00578
00579 template<class BV>
00580 void serializer<BV>::byte_order_serialization(bool value)
00581 {
00582 byte_order_serial_ = value;
00583 }
00584
00585 template<class BV>
00586 void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc)
00587 {
00588 const blocks_manager_type& bman = bv.get_blocks_manager();
00589
00590 unsigned char header_flag = 0;
00591 if (bv.size() == bm::id_max)
00592 header_flag |= BM_HM_DEFAULT;
00593 else
00594 header_flag |= BM_HM_RESIZE;
00595
00596 if (!byte_order_serial_)
00597 header_flag |= BM_HM_NO_BO;
00598
00599 if (!gap_serial_)
00600 header_flag |= BM_HM_NO_GAPL;
00601
00602 enc.put_8(header_flag);
00603
00604 if (byte_order_serial_)
00605 {
00606 ByteOrder bo = globals<true>::byte_order();
00607 enc.put_8((unsigned char)bo);
00608 }
00609
00610
00611 if (gap_serial_)
00612 {
00613 enc.put_16(bman.glen(), bm::gap_levels);
00614 }
00615
00616
00617 if (header_flag & BM_HM_RESIZE)
00618 {
00619 enc.put_32(bv.size());
00620 }
00621
00622 }
00623
00624 template<class BV>
00625 void serializer<BV>::gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc)
00626 {
00627 unsigned len = gap_length(gap_block);
00628
00629
00630 if (len > 6 && (compression_level_ > 3))
00631 {
00632 encoder::position_type enc_pos0 = enc.get_pos();
00633 {
00634 bit_out_type bout(enc);
00635 gamma_encoder_func gamma(bout);
00636
00637 enc.put_8(set_block_gap_egamma);
00638 enc.put_16(gap_block[0]);
00639
00640 for_each_dgap(gap_block, gamma);
00641 }
00642
00643
00644 encoder::position_type enc_pos1 = enc.get_pos();
00645 unsigned gamma_size = enc_pos1 - enc_pos0;
00646 if (gamma_size > (len-1)*sizeof(gap_word_t))
00647 {
00648 enc.set_pos(enc_pos0);
00649 }
00650 else
00651 {
00652 return;
00653 }
00654 }
00655
00656
00657 enc.put_8(set_block_gap);
00658 enc.put_16(gap_block, len-1);
00659 }
00660
00661 template<class BV>
00662 void serializer<BV>::gamma_gap_array(const bm::gap_word_t* gap_array,
00663 unsigned arr_len,
00664 bm::encoder& enc,
00665 bool inverted)
00666 {
00667 if (compression_level_ > 3 && arr_len > 25)
00668 {
00669 encoder::position_type enc_pos0 = enc.get_pos();
00670 {
00671 bit_out_type bout(enc);
00672
00673 enc.put_8(
00674 inverted ? set_block_arrgap_egamma_inv
00675 : set_block_arrgap_egamma);
00676
00677 bout.gamma(arr_len);
00678
00679 gap_word_t prev = gap_array[0];
00680 bout.gamma(prev + 1);
00681
00682 for (unsigned i = 1; i < arr_len; ++i)
00683 {
00684 gap_word_t curr = gap_array[i];
00685 bout.gamma(curr - prev);
00686 prev = curr;
00687 }
00688 }
00689
00690 encoder::position_type enc_pos1 = enc.get_pos();
00691 unsigned gamma_size = enc_pos1 - enc_pos0;
00692 if (gamma_size > (arr_len)*sizeof(gap_word_t))
00693 {
00694 enc.set_pos(enc_pos0);
00695 }
00696 else
00697 {
00698 return;
00699 }
00700 }
00701
00702
00703 enc.put_prefixed_array_16(inverted ? set_block_arrgap_inv : set_block_arrgap,
00704 gap_array, arr_len, true);
00705 }
00706
00707
00708 template<class BV>
00709 void serializer<BV>::encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc)
00710 {
00711 if (compression_level_ > 2)
00712 {
00713 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
00714 gap_word_t arr_len;
00715
00716 unsigned bc = gap_bit_count(gap_block);
00717 if (bc == 1)
00718 {
00719 arr_len = gap_convert_to_arr(gap_temp_block,
00720 gap_block,
00721 bm::gap_equiv_len-10);
00722 BM_ASSERT(arr_len == 1);
00723 enc.put_8(set_block_bit_1bit);
00724 enc.put_16(gap_temp_block[0]);
00725 return;
00726 }
00727
00728 unsigned len = gap_length(gap_block);
00729 bool invert, use_array;
00730 invert = use_array = false;
00731
00732 if (bc < len-1)
00733 {
00734 use_array = true;
00735 }
00736 else
00737 {
00738 unsigned inverted_bc = bm::gap_max_bits - bc;
00739 if (inverted_bc < len-1)
00740 {
00741 use_array = invert = true;
00742 }
00743 }
00744 if (use_array)
00745 {
00746 arr_len = gap_convert_to_arr(gap_temp_block,
00747 gap_block,
00748 bm::gap_equiv_len-10,
00749 invert);
00750 if (arr_len)
00751 {
00752 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
00753 return;
00754 }
00755 }
00756 }
00757
00758 gamma_gap_block(gap_block, enc);
00759 }
00760
00761 template<class BV>
00762 void serializer<BV>::encode_bit_interval(const bm::word_t* blk,
00763 bm::encoder& enc,
00764 unsigned
00765 )
00766 {
00767 enc.put_8(set_block_bit_0runs);
00768 enc.put_8((blk[0]==0) ? 0 : 1);
00769
00770 unsigned i,j;
00771
00772 for (i = 0; i < bm::set_block_size; ++i)
00773 {
00774 if (blk[i] == 0)
00775 {
00776
00777 for (j = i+1; j < bm::set_block_size; ++j)
00778 {
00779 if (blk[j] != 0)
00780 break;
00781 }
00782 enc.put_16((gap_word_t)(j-i));
00783 i = j - 1;
00784 }
00785 else
00786 {
00787
00788 for (j = i+1; j < bm::set_block_size; ++j)
00789 {
00790 if (blk[j] == 0)
00791 {
00792
00793 if (((j+1 < bm::set_block_size) && blk[j+1]) ||
00794 ((j+2 < bm::set_block_size) && blk[j+2])
00795 )
00796 {
00797 ++j;
00798 continue;
00799 }
00800 break;
00801 }
00802 }
00803 enc.put_16((gap_word_t)(j-i));
00804
00805 BM_ASSERT(i < j);
00806 enc.put_32(blk + i, j - i);
00807 i = j - 1;
00808 }
00809 }
00810 }
00811
00812
00813 template<class BV>
00814 unsigned serializer<BV>::serialize(const BV& bv,
00815 unsigned char* buf, unsigned buf_size)
00816 {
00817 BM_ASSERT(temp_block_);
00818
00819 const blocks_manager_type& bman = bv.get_blocks_manager();
00820
00821 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
00822
00823 bm::encoder enc(buf, buf_size);
00824 encode_header(bv, enc);
00825
00826 unsigned i,j;
00827
00828
00829
00830 for (i = 0; i < bm::set_total_blocks; ++i)
00831 {
00832 bm::word_t* blk = bman.get_block(i);
00833
00834
00835
00836 bool flag;
00837 flag = bman.is_block_zero(i, blk, false);
00838 if (flag)
00839 {
00840 zero_block:
00841 unsigned next_nb = bman.find_next_nz_block(i+1, false);
00842 if (next_nb == bm::set_total_blocks)
00843 {
00844 enc.put_8(set_block_azero);
00845 return enc.size();
00846 }
00847 unsigned nb = next_nb - i;
00848
00849 if (nb > 1 && nb < 128)
00850 {
00851
00852 unsigned char c = (unsigned char)((1u << 7) | nb);
00853 enc.put_8(c);
00854 }
00855 else
00856 {
00857 SER_NEXT_GRP(enc, nb, set_block_1zero,
00858 set_block_8zero,
00859 set_block_16zero,
00860 set_block_32zero)
00861 }
00862 i = next_nb - 1;
00863 continue;
00864 }
00865 else
00866 {
00867 flag = bman.is_block_one(i, blk, false);
00868 if (flag)
00869 {
00870
00871 for(j = i+1; j < bm::set_total_blocks; ++j)
00872 {
00873 bm::word_t* blk_next = bman.get_block(j);
00874 if (flag != bman.is_block_one(j, blk_next, false))
00875 break;
00876 }
00877 if (j == bm::set_total_blocks)
00878 {
00879 enc.put_8(set_block_aone);
00880 break;
00881 }
00882 else
00883 {
00884 unsigned nb = j - i;
00885 SER_NEXT_GRP(enc, nb, set_block_1one,
00886 set_block_8one,
00887 set_block_16one,
00888 set_block_32one)
00889 }
00890 i = j - 1;
00891 continue;
00892 }
00893 }
00894
00895
00896
00897
00898 if (BM_IS_GAP(blk))
00899 {
00900 gap_word_t* gblk = BMGAP_PTR(blk);
00901 encode_gap_block(gblk, enc);
00902 continue;
00903 }
00904
00905
00906
00907
00908 {
00909 if (compression_level_ <= 1)
00910 {
00911 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00912 continue;
00913 }
00914
00915
00916 unsigned block_bc = 0;
00917 bm::id_t bit_gaps =
00918 bit_block_calc_count_change(blk, blk + bm::set_block_size,
00919 &block_bc);
00920 unsigned block_bc_inv = bm::gap_max_bits - block_bc;
00921 switch (block_bc)
00922 {
00923 case 1:
00924 {
00925 bm::id_t bit_idx = 0;
00926 bit_find_in_block(blk, bit_idx, &bit_idx);
00927 enc.put_8(set_block_bit_1bit); enc.put_16((short)bit_idx);
00928 continue;
00929 }
00930 case 0: goto zero_block;
00931 }
00932
00933
00934
00935
00936 unsigned arr_block_size = sizeof(gap_word_t) + (block_bc * sizeof(gap_word_t));
00937 unsigned arr_block_size_inv = sizeof(gap_word_t) + (block_bc_inv * sizeof(gap_word_t));
00938 unsigned gap_block_size = sizeof(gap_word_t) + ((bit_gaps+1) * sizeof(gap_word_t));
00939 unsigned interval_block_size;
00940 interval_block_size = bit_count_nonzero_size(blk, bm::set_block_size);
00941 bool inverted = false;
00942
00943 if (arr_block_size_inv < arr_block_size &&
00944 arr_block_size_inv < gap_block_size &&
00945 arr_block_size_inv < interval_block_size)
00946 {
00947 inverted = true;
00948 goto bit_as_array;
00949 }
00950
00951
00952 if ((interval_block_size > arr_block_size) ||
00953 (interval_block_size > gap_block_size))
00954 {
00955 if (gap_block_size < (bm::gap_equiv_len-64) &&
00956 gap_block_size < arr_block_size)
00957 {
00958 unsigned len = bit_convert_to_gap(gap_temp_block,
00959 blk,
00960 bm::gap_max_bits,
00961 bm::gap_equiv_len-64);
00962 if (len)
00963 {
00964 gamma_gap_block(gap_temp_block, enc);
00965 continue;
00966 }
00967 }
00968
00969 if (arr_block_size < ((bm::gap_equiv_len-64) * sizeof(gap_word_t)))
00970 {
00971 bit_as_array:
00972 gap_word_t arr_len;
00973 unsigned mask = inverted ? ~0 : 0;
00974 arr_len = bit_convert_to_arr(gap_temp_block,
00975 blk,
00976 bm::gap_max_bits,
00977 bm::gap_equiv_len-64,
00978 mask);
00979 if (arr_len)
00980 {
00981 gamma_gap_array(gap_temp_block, arr_len, enc, inverted);
00982 continue;
00983 }
00984
00985 }
00986
00987 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00988 continue;
00989 }
00990
00991
00992 if (interval_block_size < arr_block_size &&
00993 interval_block_size < gap_block_size)
00994 {
00995 encode_bit_interval(blk, enc, interval_block_size);
00996 continue;
00997 }
00998
00999 if (gap_block_size < bm::gap_equiv_len &&
01000 gap_block_size < arr_block_size)
01001 {
01002 unsigned len = bit_convert_to_gap(gap_temp_block,
01003 blk,
01004 bm::gap_max_bits,
01005 bm::gap_equiv_len-64);
01006 if (len)
01007 {
01008 gamma_gap_block(gap_temp_block, enc);
01009 continue;
01010 }
01011 }
01012
01013
01014
01015 if (arr_block_size < bm::gap_equiv_len-64)
01016 {
01017 goto bit_as_array;
01018 }
01019
01020
01021 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
01022 continue;
01023 }
01024 }
01025
01026 enc.put_8(set_block_end);
01027
01028 unsigned encoded_size = enc.size();
01029 return encoded_size;
01030
01031 }
01032
01033
01034
01035
01036
01037 enum serialization_flags {
01038 BM_NO_BYTE_ORDER = 1,
01039 BM_NO_GAP_LENGTH = (1 << 1)
01040 };
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083 template<class BV>
01084 unsigned serialize(const BV& bv,
01085 unsigned char* buf,
01086 bm::word_t* temp_block,
01087 unsigned serialization_flags = 0)
01088 {
01089 bm::serializer<BV> bv_serial;
01090 if (serialization_flags & BM_NO_BYTE_ORDER)
01091 bv_serial.byte_order_serialization(false);
01092
01093 if (serialization_flags & BM_NO_GAP_LENGTH)
01094 bv_serial.gap_length_serialization(false);
01095 else
01096 bv_serial.gap_length_serialization(true);
01097
01098 bv_serial.set_compression_level(4);
01099
01100 return bv_serial.serialize(bv, buf, 0);
01101 }
01102
01103
01104
01105
01106
01107
01108
01109 template<class BV>
01110 unsigned serialize(BV& bv,
01111 unsigned char* buf,
01112 unsigned serialization_flags=0)
01113 {
01114 bm::serializer<BV> bv_serial;
01115 if (serialization_flags & BM_NO_BYTE_ORDER)
01116 bv_serial.byte_order_serialization(false);
01117
01118 if (serialization_flags & BM_NO_GAP_LENGTH)
01119 bv_serial.gap_length_serialization(false);
01120 else
01121 bv_serial.gap_length_serialization(true);
01122
01123 bv_serial.set_compression_level(4);
01124
01125 return bv_serial.serialize(bv, buf, 0);
01126 }
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145 template<class BV>
01146 unsigned deserialize(BV& bv,
01147 const unsigned char* buf,
01148 bm::word_t* temp_block=0)
01149 {
01150 ByteOrder bo_current = globals<true>::byte_order();
01151
01152 bm::decoder dec(buf);
01153 unsigned char header_flag = dec.get_8();
01154 ByteOrder bo = bo_current;
01155 if (!(header_flag & BM_HM_NO_BO))
01156 {
01157 bo = (bm::ByteOrder) dec.get_8();
01158 }
01159
01160 if (bo_current == bo)
01161 {
01162 deserializer<BV, bm::decoder> deserial;
01163 return deserial.deserialize(bv, buf, temp_block);
01164 }
01165 switch (bo_current)
01166 {
01167 case BigEndian:
01168 {
01169 deserializer<BV, bm::decoder_big_endian> deserial;
01170 return deserial.deserialize(bv, buf, temp_block);
01171 }
01172 case LittleEndian:
01173 {
01174 deserializer<BV, bm::decoder_little_endian> deserial;
01175 return deserial.deserialize(bv, buf, temp_block);
01176 }
01177 default:
01178 BM_ASSERT(0);
01179 };
01180 return 0;
01181 }
01182
01183 template<class DEC>
01184 unsigned deseriaizer_base<DEC>::read_id_list(decoder_type& decoder,
01185 unsigned block_type,
01186 bm::gap_word_t* dst_arr)
01187 {
01188 typedef bit_in<DEC> bit_in_type;
01189
01190 gap_word_t len = 0;
01191
01192 switch (block_type)
01193 {
01194 case set_block_bit_1bit:
01195 dst_arr[0] = decoder.get_16();
01196 len = 1;
01197 break;
01198 case set_block_arrgap:
01199 case set_block_arrgap_inv:
01200 len = decoder.get_16();
01201 decoder.get_16(dst_arr, len);
01202 break;
01203 case set_block_arrgap_egamma:
01204 case set_block_arrgap_egamma_inv:
01205 {
01206 bit_in_type bin(decoder);
01207 len = (gap_word_t)bin.gamma();
01208 gap_word_t prev, bit_idx;
01209 prev = 0;
01210 for (gap_word_t k = 0; k < len; ++k)
01211 {
01212 bit_idx = (gap_word_t)bin.gamma();
01213 bit_idx -= (k == 0);
01214 bit_idx += prev;
01215 dst_arr[k] = prev = bit_idx;
01216 }
01217 }
01218 break;
01219 default:
01220 BM_ASSERT(0);
01221 }
01222 return len;
01223 }
01224
01225
01226 template<class DEC>
01227 unsigned deseriaizer_base<DEC>::read_gap_block(decoder_type& decoder,
01228 unsigned block_type,
01229 bm::gap_word_t* dst_block,
01230 bm::gap_word_t& gap_head)
01231 {
01232 typedef bit_in<DEC> bit_in_type;
01233 unsigned gap_len = 0;
01234
01235 switch (block_type)
01236 {
01237 case set_block_gap:
01238 {
01239 gap_len = gap_length(&gap_head);
01240 --gap_len;
01241 *dst_block = gap_head;
01242 decoder.get_16(dst_block+1, gap_len - 1);
01243 dst_block[gap_len] = gap_max_bits - 1;
01244 ++gap_len;
01245 }
01246 break;
01247 case set_block_bit_1bit:
01248 {
01249 gap_set_all(dst_block, bm::gap_max_bits, 0);
01250 gap_word_t bit_idx = decoder.get_16();
01251 gap_len = gap_add_value(dst_block, bit_idx);
01252 ++gap_len;
01253 }
01254 break;
01255 case set_block_arrgap:
01256 case set_block_arrgap_inv:
01257 {
01258 gap_set_all(dst_block, bm::gap_max_bits, 0);
01259 gap_word_t len = decoder.get_16();
01260
01261 for (gap_word_t k = 0; k < len; ++k)
01262 {
01263 gap_word_t bit_idx = decoder.get_16();
01264 gap_len = gap_add_value(dst_block, bit_idx);
01265 }
01266 ++gap_len;
01267 }
01268 break;
01269 case set_block_arrgap_egamma:
01270 case set_block_arrgap_egamma_inv:
01271 {
01272 unsigned arr_len = read_id_list(decoder, block_type, id_array_);
01273 dst_block[0] = 0;
01274 gap_len = gap_set_array(dst_block, id_array_, arr_len);
01275 }
01276 break;
01277 case set_block_gap_egamma:
01278 {
01279 gap_len = (gap_head >> 3);
01280 --gap_len;
01281
01282 {
01283 *dst_block = gap_head;
01284 gap_word_t* gap_data_ptr = dst_block + 1;
01285
01286 bit_in_type bin(decoder);
01287 {
01288 gap_word_t v = (gap_word_t)bin.gamma();
01289 gap_word_t gap_sum = *gap_data_ptr = v - 1;
01290 for (unsigned i = 1; i < gap_len; ++i)
01291 {
01292 v = (gap_word_t)bin.gamma();
01293 *(++gap_data_ptr) = gap_sum += v;
01294 }
01295 dst_block[++gap_len] = bm::gap_max_bits - 1;
01296 }
01297 }
01298
01299 }
01300 break;
01301 default:
01302 BM_ASSERT(0);
01303 }
01304
01305 if (block_type == set_block_arrgap_egamma_inv ||
01306 block_type == set_block_arrgap_inv)
01307 {
01308 gap_invert(dst_block);
01309 }
01310 return gap_len;
01311 }
01312
01313
01314 template<class BV, class DEC>
01315 void
01316 deserializer<BV, DEC>::deserialize_gap(unsigned char btype, decoder_type& dec,
01317 bvector_type& bv, blocks_manager_type& bman,
01318 unsigned i,
01319 bm::word_t* blk)
01320 {
01321 typedef bit_in<DEC> bit_in_type;
01322 gap_word_t gap_head;
01323 gap_word_t gap_len = 0;
01324
01325 switch (btype)
01326 {
01327 case set_block_gap:
01328 case set_block_gapbit:
01329 {
01330 gap_word_t gap_head = (gap_word_t)
01331 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
01332
01333 gap_len = gap_length(&gap_head);
01334 int level = gap_calc_level(gap_len, bman.glen());
01335 --gap_len;
01336 if (level == -1)
01337 {
01338 *gap_temp_block_ = gap_head;
01339 dec.get_16(gap_temp_block_+1, gap_len - 1);
01340 gap_temp_block_[gap_len] = gap_max_bits - 1;
01341
01342 if (blk == 0)
01343 {
01344 blk = bman.get_allocator().alloc_bit_block();
01345 bman.set_block(i, blk);
01346 gap_convert_to_bitset(blk, gap_temp_block_);
01347 }
01348 else
01349 {
01350 blk = bman.deoptimize_block(i);
01351 gap_add_to_bitset(blk, gap_temp_block_);
01352 }
01353 return;
01354 }
01355
01356 set_gap_level(&gap_head, level);
01357
01358 if (blk == 0)
01359 {
01360 gap_word_t* gap_blk =
01361 bman.get_allocator().alloc_gap_block(level, bman.glen());
01362 gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
01363 *gap_blk_ptr = gap_head;
01364 set_gap_level(gap_blk_ptr, level);
01365 bman.set_block(i, (bm::word_t*)gap_blk);
01366 bman.set_block_gap(i);
01367 dec.get_16(gap_blk + 1, gap_len - 1);
01368 gap_blk[gap_len] = bm::gap_max_bits - 1;
01369 }
01370 else
01371 {
01372
01373 *gap_temp_block_ = gap_head;
01374 dec.get_16(gap_temp_block_ + 1, gap_len - 1);
01375 gap_temp_block_[gap_len] = bm::gap_max_bits - 1;
01376 ++gap_len;
01377 break;
01378 }
01379 return;
01380 }
01381 case set_block_arrgap:
01382 case set_block_arrgap_egamma:
01383 {
01384 unsigned arr_len = read_id_list(dec, btype, this->id_array_);
01385 gap_len = gap_set_array(gap_temp_block_, this->id_array_, arr_len);
01386 break;
01387 }
01388 case set_block_gap_egamma:
01389 gap_head = (gap_word_t)
01390 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
01391 case set_block_arrgap_egamma_inv:
01392 case set_block_arrgap_inv:
01393 gap_len = read_gap_block(dec, btype, gap_temp_block_, gap_head);
01394 break;
01395 default:
01396 BM_ASSERT(0);
01397 }
01398
01399
01400
01401 if (gap_len > bm::gap_length_threashold)
01402 {
01403 blk = bman.deoptimize_block(i);
01404 if (!blk)
01405 {
01406 blk = bman.get_allocator().alloc_bit_block();
01407 bman.set_block(i, blk);
01408 bit_block_set(blk, 0);
01409 }
01410 gap_add_to_bitset_l(blk, gap_temp_block_, gap_len-1);
01411 }
01412 else
01413 {
01414 bv.combine_operation_with_block(i,
01415 (bm::word_t*)gap_temp_block_,
01416 1,
01417 BM_OR);
01418 }
01419
01420 }
01421
01422
01423 template<class BV, class DEC>
01424 unsigned deserializer<BV, DEC>::deserialize(bvector_type& bv,
01425 const unsigned char* buf,
01426 bm::word_t* temp_block)
01427 {
01428 blocks_manager_type& bman = bv.get_blocks_manager();
01429
01430 bm::wordop_t* tmp_buf =
01431 temp_block ? (bm::wordop_t*) temp_block
01432 : (bm::wordop_t*)bman.check_allocate_tempblock();
01433
01434 temp_block_ = temp_block = (word_t*)tmp_buf;
01435 bm::strategy strat = bv.get_new_blocks_strat();
01436 bv.set_new_blocks_strat(BM_GAP);
01437
01438 decoder_type dec(buf);
01439
01440 BM_SET_MMX_GUARD
01441
01442
01443
01444 unsigned char header_flag = dec.get_8();
01445 if (!(header_flag & BM_HM_NO_BO))
01446 {
01447 dec.get_8();
01448 }
01449
01450 if (header_flag & BM_HM_ID_LIST)
01451 {
01452
01453 if (header_flag & BM_HM_RESIZE)
01454 {
01455 unsigned bv_size = dec.get_32();
01456 if (bv_size > bv.size())
01457 {
01458 bv.resize(bv_size);
01459 }
01460 }
01461
01462
01463 for (unsigned cnt = dec.get_32(); cnt; --cnt) {
01464 bm::id_t id = dec.get_32();
01465 bv.set(id);
01466 }
01467
01468 return dec.size()-1;
01469 }
01470
01471 unsigned i;
01472
01473 if (!(header_flag & BM_HM_NO_GAPL))
01474 {
01475 gap_word_t glevels[bm::gap_levels];
01476
01477 for (i = 0; i < bm::gap_levels; ++i)
01478 {
01479 glevels[i] = dec.get_16();
01480 }
01481 }
01482
01483 if (header_flag & (1 << 1))
01484 {
01485 unsigned bv_size = dec.get_32();
01486 if (bv_size > bv.size())
01487 {
01488 bv.resize(bv_size);
01489 }
01490 }
01491
01492 unsigned char btype;
01493 unsigned nb;
01494
01495 for (i = 0; i < bm::set_total_blocks; ++i)
01496 {
01497 btype = dec.get_8();
01498 bm::word_t* blk = bman.get_block(i);
01499
01500
01501 if (btype & (1 << 7))
01502 {
01503 nb = btype & ~(1 << 7);
01504 i += nb-1;
01505 continue;
01506 }
01507
01508 switch (btype)
01509 {
01510 case set_block_azero:
01511 case set_block_end:
01512 i = bm::set_total_blocks;
01513 break;
01514 case set_block_1zero:
01515 continue;
01516 case set_block_8zero:
01517 nb = dec.get_8();
01518 i += nb-1;
01519 continue;
01520 case set_block_16zero:
01521 nb = dec.get_16();
01522 i += nb-1;
01523 continue;
01524 case set_block_32zero:
01525 nb = dec.get_32();
01526 i += nb-1;
01527 continue;
01528 case set_block_aone:
01529 for (;i < bm::set_total_blocks; ++i)
01530 {
01531 bman.set_block_all_set(i);
01532 }
01533 break;
01534 case set_block_1one:
01535 bman.set_block_all_set(i);
01536 continue;
01537 case set_block_8one:
01538 BM_SET_ONE_BLOCKS(dec.get_8());
01539 continue;
01540 case set_block_16one:
01541 BM_SET_ONE_BLOCKS(dec.get_16());
01542 continue;
01543 case set_block_32one:
01544 BM_SET_ONE_BLOCKS(dec.get_32());
01545 continue;
01546 case set_block_bit:
01547 {
01548 if (blk == 0)
01549 {
01550 blk = bman.get_allocator().alloc_bit_block();
01551 bman.set_block(i, blk);
01552 dec.get_32(blk, bm::set_block_size);
01553 continue;
01554 }
01555
01556 dec.get_32(temp_block, bm::set_block_size);
01557 bv.combine_operation_with_block(i,
01558 temp_block,
01559 0, BM_OR);
01560
01561 continue;
01562 }
01563 case set_block_bit_1bit:
01564 {
01565 unsigned bit_idx = dec.get_16();
01566 bit_idx += i * bm::bits_in_block;
01567 bv.set_bit(bit_idx);
01568 continue;
01569 }
01570 case set_block_bit_0runs:
01571 {
01572
01573 bit_block_set(temp_block, 0);
01574
01575 unsigned char run_type = dec.get_8();
01576 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
01577 {
01578 unsigned run_length = dec.get_16();
01579 if (run_type)
01580 {
01581 unsigned run_end = j + run_length;
01582 for (;j < run_end; ++j)
01583 {
01584 BM_ASSERT(j < bm::set_block_size);
01585 temp_block[j] = dec.get_32();
01586 }
01587 }
01588 else
01589 {
01590 j += run_length;
01591 }
01592 }
01593
01594 bv.combine_operation_with_block(i,
01595 temp_block,
01596 0, BM_OR);
01597 continue;
01598 }
01599 case set_block_bit_interval:
01600 {
01601 unsigned head_idx, tail_idx;
01602 head_idx = dec.get_16();
01603 tail_idx = dec.get_16();
01604
01605 if (blk == 0)
01606 {
01607 blk = bman.get_allocator().alloc_bit_block();
01608 bman.set_block(i, blk);
01609 for (unsigned i = 0; i < head_idx; ++i)
01610 {
01611 blk[i] = 0;
01612 }
01613 dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
01614 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
01615 {
01616 blk[j] = 0;
01617 }
01618 continue;
01619 }
01620 bit_block_set(temp_block, 0);
01621 dec.get_32(temp_block + head_idx, tail_idx - head_idx + 1);
01622
01623 bv.combine_operation_with_block(i,
01624 temp_block,
01625 0, BM_OR);
01626 continue;
01627 }
01628 case set_block_gap:
01629 case set_block_gapbit:
01630 case set_block_arrgap:
01631 case set_block_gap_egamma:
01632 case set_block_arrgap_egamma:
01633 case set_block_arrgap_egamma_inv:
01634 case set_block_arrgap_inv:
01635 deserialize_gap(btype, dec, bv, bman, i, blk);
01636 continue;
01637 case set_block_arrbit:
01638 {
01639 gap_word_t len = (gap_word_t)
01640 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
01641
01642 if (bman.is_block_gap(i))
01643 {
01644
01645
01646
01647 blk = bman.convert_gap2bitset(i);
01648 }
01649 else
01650 {
01651 if (blk == 0)
01652 {
01653 blk = bman.get_allocator().alloc_bit_block();
01654 bit_block_set(blk, 0);
01655 bman.set_block(i, blk);
01656 }
01657 }
01658
01659
01660 for (unsigned k = 0; k < len; ++k)
01661 {
01662 gap_word_t bit_idx = dec.get_16();
01663 set_bit(blk, bit_idx);
01664 }
01665 continue;
01666 }
01667 default:
01668 BM_ASSERT(0);
01669 }
01670 }
01671
01672 bv.forget_count();
01673 bv.set_new_blocks_strat(strat);
01674
01675 return dec.size();
01676 }
01677
01678
01679
01680 template<class DEC>
01681 serial_stream_iterator<DEC>::serial_stream_iterator(const unsigned char* buf)
01682 : decoder_(buf),
01683 end_of_stream_(false),
01684 bv_size_(0),
01685 state_(e_unknown),
01686 id_cnt_(0),
01687 block_idx_(0),
01688 mono_block_cnt_(0)
01689 {
01690 ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
01691
01692 bit_func_table_[bm::set_AND] =
01693 &serial_stream_iterator<DEC>::get_bit_block_AND;
01694 bit_func_table_[bm::set_ASSIGN] =
01695 &serial_stream_iterator<DEC>::get_bit_block_ASSIGN;
01696 bit_func_table_[bm::set_OR] =
01697 &serial_stream_iterator<DEC>::get_bit_block_OR;
01698 bit_func_table_[bm::set_SUB] =
01699 &serial_stream_iterator<DEC>::get_bit_block_SUB;
01700 bit_func_table_[bm::set_XOR] =
01701 &serial_stream_iterator<DEC>::get_bit_block_XOR;
01702 bit_func_table_[bm::set_COUNT] =
01703 &serial_stream_iterator<DEC>::get_bit_block_COUNT;
01704 bit_func_table_[bm::set_COUNT_AND] =
01705 &serial_stream_iterator<DEC>::get_bit_block_COUNT_AND;
01706 bit_func_table_[bm::set_COUNT_XOR] =
01707 &serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR;
01708 bit_func_table_[bm::set_COUNT_OR] =
01709 &serial_stream_iterator<DEC>::get_bit_block_COUNT_OR;
01710 bit_func_table_[bm::set_COUNT_SUB_AB] =
01711 &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB;
01712 bit_func_table_[bm::set_COUNT_SUB_BA] =
01713 &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA;
01714 bit_func_table_[bm::set_COUNT_A] =
01715 &serial_stream_iterator<DEC>::get_bit_block_COUNT_A;
01716 bit_func_table_[bm::set_COUNT_B] =
01717 &serial_stream_iterator<DEC>::get_bit_block_COUNT;
01718
01719
01720
01721 unsigned char header_flag = decoder_.get_8();
01722 if (!(header_flag & BM_HM_NO_BO))
01723 {
01724 decoder_.get_8();
01725 }
01726
01727
01728
01729 if (header_flag & BM_HM_ID_LIST)
01730 {
01731
01732 if (header_flag & BM_HM_RESIZE)
01733 {
01734 bv_size_ = decoder_.get_32();
01735 }
01736
01737 state_ = e_list_ids;
01738 id_cnt_ = decoder_.get_32();
01739 next();
01740 }
01741 else
01742 {
01743 if (!(header_flag & BM_HM_NO_GAPL))
01744 {
01745 unsigned i;
01746
01747 for (i = 0; i < bm::gap_levels; ++i)
01748 {
01749 glevels_[i] = decoder_.get_16();
01750 }
01751 }
01752
01753 if (header_flag & (1 << 1))
01754 {
01755 bv_size_ = decoder_.get_32();
01756 }
01757 state_ = e_blocks;
01758 }
01759 }
01760
01761 template<class DEC>
01762 void serial_stream_iterator<DEC>::next()
01763 {
01764 if (is_eof())
01765 {
01766 ++block_idx_;
01767 return;
01768 }
01769
01770 switch (state_)
01771 {
01772 case e_list_ids:
01773
01774 if (id_cnt_ == 0)
01775 {
01776 end_of_stream_ = true;
01777 state_ = e_unknown;
01778 break;
01779 }
01780 last_id_ = decoder_.get_32();
01781 --id_cnt_;
01782 break;
01783
01784 case e_blocks:
01785 if (block_idx_ == bm::set_total_blocks)
01786 {
01787 end_of_stream_ = true;
01788 state_ = e_unknown;
01789 break;
01790 }
01791
01792 block_type_ = decoder_.get_8();
01793
01794
01795
01796 if (block_type_ & (1 << 7))
01797 {
01798 mono_block_cnt_ = (block_type_ & ~(1 << 7)) - 1;
01799 state_ = e_zero_blocks;
01800 break;
01801 }
01802
01803 switch (block_type_)
01804 {
01805 case set_block_azero:
01806 case set_block_end:
01807 end_of_stream_ = true; state_ = e_unknown;
01808 break;
01809 case set_block_1zero:
01810 state_ = e_zero_blocks;
01811 mono_block_cnt_ = 0;
01812 break;
01813 case set_block_8zero:
01814 state_ = e_zero_blocks;
01815 mono_block_cnt_ = decoder_.get_8()-1;
01816 break;
01817 case set_block_16zero:
01818 state_ = e_zero_blocks;
01819 mono_block_cnt_ = decoder_.get_16()-1;
01820 break;
01821 case set_block_32zero:
01822 state_ = e_zero_blocks;
01823 mono_block_cnt_ = decoder_.get_32()-1;
01824 break;
01825 case set_block_aone:
01826 state_ = e_one_blocks;
01827 mono_block_cnt_ = bm::set_total_blocks - block_idx_;
01828 break;
01829 case set_block_1one:
01830 state_ = e_one_blocks;
01831 mono_block_cnt_ = 0;
01832 break;
01833 case set_block_8one:
01834 state_ = e_one_blocks;
01835 mono_block_cnt_ = decoder_.get_8()-1;
01836 break;
01837 case set_block_16one:
01838 state_ = e_one_blocks;
01839 mono_block_cnt_ = decoder_.get_16()-1;
01840 break;
01841 case set_block_32one:
01842 state_ = e_one_blocks;
01843 mono_block_cnt_ = decoder_.get_32()-1;
01844 break;
01845
01846 case set_block_bit:
01847 case set_block_bit_interval:
01848 case set_block_bit_0runs:
01849 case set_block_arrbit:
01850 state_ = e_bit_block;
01851 break;
01852
01853 case set_block_gap:
01854 case set_block_gap_egamma:
01855 gap_head_ = (gap_word_t)
01856 (sizeof(gap_word_t) == 2 ?
01857 decoder_.get_16() : decoder_.get_32());
01858 case set_block_arrgap:
01859 case set_block_arrgap_egamma:
01860 case set_block_arrgap_egamma_inv:
01861 case set_block_arrgap_inv:
01862 case set_block_bit_1bit:
01863 state_ = e_gap_block;
01864 break;
01865 case set_block_gapbit:
01866 state_ = e_gap_block;
01867 break;
01868
01869 default:
01870 BM_ASSERT(0);
01871 }
01872
01873 break;
01874
01875 case e_zero_blocks:
01876 case e_one_blocks:
01877 ++block_idx_;
01878 if (!mono_block_cnt_)
01879 {
01880 state_ = e_blocks;
01881 break;
01882 }
01883 --mono_block_cnt_;
01884 break;
01885
01886 case e_unknown:
01887 default:
01888 BM_ASSERT(0);
01889 }
01890 }
01891
01892 template<class DEC>
01893 void serial_stream_iterator<DEC>::skip_mono_blocks()
01894 {
01895 BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
01896 if (!mono_block_cnt_)
01897 {
01898 ++block_idx_;
01899 }
01900 else
01901 {
01902 block_idx_ += mono_block_cnt_+1;
01903 mono_block_cnt_ = 0;
01904 }
01905 state_ = e_blocks;
01906 }
01907
01908 template<class DEC>
01909 unsigned
01910 serial_stream_iterator<DEC>::get_bit_block_ASSIGN(
01911 bm::word_t* dst_block,
01912 bm::word_t* )
01913 {
01914 BM_ASSERT(this->state_ == e_bit_block);
01915 unsigned count = 0;
01916
01917 switch (this->block_type_)
01918 {
01919 case set_block_bit:
01920 decoder_.get_32(dst_block, bm::set_block_size);
01921 break;
01922 case set_block_bit_0runs:
01923 {
01924 if (dst_block)
01925 bit_block_set(dst_block, 0);
01926 unsigned char run_type = decoder_.get_8();
01927 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
01928 {
01929 unsigned run_length = decoder_.get_16();
01930 if (run_type)
01931 {
01932 decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
01933 }
01934 j += run_length;
01935 }
01936 }
01937 break;
01938 case set_block_bit_interval:
01939 {
01940 unsigned head_idx = decoder_.get_16();
01941 unsigned tail_idx = decoder_.get_16();
01942 if (dst_block)
01943 {
01944 for (unsigned i = 0; i < head_idx; ++i)
01945 dst_block[i] = 0;
01946 decoder_.get_32(dst_block + head_idx,
01947 tail_idx - head_idx + 1);
01948 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
01949 dst_block[j] = 0;
01950 }
01951 else
01952 {
01953 decoder_.seek((tail_idx - head_idx + 1) * 4);
01954 }
01955 }
01956 break;
01957 case set_block_arrbit:
01958 case set_block_bit_1bit:
01959 get_arr_bit(dst_block, true );
01960 break;
01961 case set_block_gapbit:
01962 BM_ASSERT(0);
01963 break;
01964 default:
01965 BM_ASSERT(0);
01966 }
01967 return count;
01968 }
01969
01970 template<class DEC>
01971 unsigned
01972 serial_stream_iterator<DEC>::get_bit_block_OR(bm::word_t* dst_block,
01973 bm::word_t* )
01974 {
01975 BM_ASSERT(this->state_ == e_bit_block);
01976 unsigned count = 0;
01977 switch (block_type_)
01978 {
01979 case set_block_bit:
01980 {
01981 bitblock_get_adapter ga(dst_block);
01982 bit_OR<bm::word_t> func;
01983 bitblock_store_adapter sa(dst_block);
01984
01985 bit_recomb(ga,
01986 decoder_,
01987 func,
01988 sa
01989 );
01990 }
01991 break;
01992 case set_block_bit_interval:
01993 {
01994 unsigned head_idx = decoder_.get_16();
01995 unsigned tail_idx = decoder_.get_16();
01996 for (unsigned i = head_idx; i <= tail_idx; ++i)
01997 dst_block[i] |= decoder_.get_32();
01998 }
01999 break;
02000 case set_block_bit_0runs:
02001 {
02002 unsigned char run_type = decoder_.get_8();
02003 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02004 {
02005 unsigned run_length = decoder_.get_16();
02006 if (run_type)
02007 {
02008 unsigned run_end = j + run_length;
02009 for (;j < run_end; ++j)
02010 {
02011 BM_ASSERT(j < bm::set_block_size);
02012 dst_block[j] |= decoder_.get_32();
02013 }
02014 }
02015 else
02016 {
02017 j += run_length;
02018 }
02019 }
02020 }
02021 break;
02022 case set_block_bit_1bit:
02023 case set_block_arrbit:
02024 get_arr_bit(dst_block, false );
02025 break;
02026 default:
02027 BM_ASSERT(0);
02028 }
02029 return count;
02030 }
02031
02032 template<class DEC>
02033 unsigned
02034 serial_stream_iterator<DEC>::get_bit_block_AND(bm::word_t* dst_block,
02035 bm::word_t* tmp_block)
02036 {
02037 BM_ASSERT(this->state_ == e_bit_block);
02038 BM_ASSERT(dst_block != tmp_block);
02039
02040 unsigned count = 0;
02041 switch (block_type_)
02042 {
02043 case set_block_bit:
02044 for (unsigned i = 0; i < bm::set_block_size; i+=4)
02045 {
02046 dst_block[i] &= decoder_.get_32();
02047 dst_block[i+1] &= decoder_.get_32();
02048 dst_block[i+2] &= decoder_.get_32();
02049 dst_block[i+3] &= decoder_.get_32();
02050 }
02051 break;
02052 case set_block_bit_0runs:
02053 {
02054 unsigned char run_type = decoder_.get_8();
02055 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02056 {
02057 unsigned run_length = decoder_.get_16();
02058 unsigned run_end = j + run_length;
02059 if (run_type)
02060 {
02061 for (;j < run_end; ++j)
02062 {
02063 BM_ASSERT(j < bm::set_block_size);
02064 dst_block[j] &= decoder_.get_32();
02065 }
02066 }
02067 else
02068 {
02069 for (;j < run_end; ++j)
02070 {
02071 BM_ASSERT(j < bm::set_block_size);
02072 dst_block[j] = 0;
02073 }
02074 }
02075 }
02076 }
02077 break;
02078 case set_block_bit_interval:
02079 {
02080 unsigned head_idx = decoder_.get_16();
02081 unsigned tail_idx = decoder_.get_16();
02082 unsigned i;
02083 for ( i = 0; i < head_idx; ++i)
02084 dst_block[i] = 0;
02085 for ( i = head_idx; i <= tail_idx; ++i)
02086 dst_block[i] &= decoder_.get_32();
02087 for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
02088 dst_block[i] = 0;
02089 }
02090 break;
02091 case set_block_bit_1bit:
02092 case set_block_arrbit:
02093 get_arr_bit(tmp_block, true );
02094 if (dst_block)
02095 bit_block_and(dst_block, tmp_block);
02096 break;
02097 default:
02098 BM_ASSERT(0);
02099 }
02100 return count;
02101 }
02102
02103 template<class DEC>
02104 unsigned
02105 serial_stream_iterator<DEC>::get_bit_block_XOR(bm::word_t* dst_block,
02106 bm::word_t* tmp_block)
02107 {
02108 BM_ASSERT(this->state_ == e_bit_block);
02109 BM_ASSERT(dst_block != tmp_block);
02110
02111 unsigned count = 0;
02112 switch (block_type_)
02113 {
02114 case set_block_bit:
02115 for (unsigned i = 0; i < bm::set_block_size; ++i)
02116 dst_block[i] ^= decoder_.get_32();
02117 break;
02118 case set_block_bit_0runs:
02119 {
02120 unsigned char run_type = decoder_.get_8();
02121 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02122 {
02123 unsigned run_length = decoder_.get_16();
02124 if (run_type)
02125 {
02126 unsigned run_end = j + run_length;
02127 for (;j < run_end; ++j)
02128 {
02129 BM_ASSERT(j < bm::set_block_size);
02130 dst_block[j] ^= decoder_.get_32();
02131 }
02132 }
02133 else
02134 {
02135 j += run_length;
02136 }
02137 }
02138 }
02139 break;
02140 case set_block_bit_interval:
02141 {
02142 unsigned head_idx = decoder_.get_16();
02143 unsigned tail_idx = decoder_.get_16();
02144 for (unsigned i = head_idx; i <= tail_idx; ++i)
02145 dst_block[i] ^= decoder_.get_32();
02146 }
02147 break;
02148 case set_block_bit_1bit:
02149
02150 case set_block_arrbit:
02151 get_arr_bit(tmp_block, true );
02152 if (dst_block)
02153 {
02154 bit_block_xor(dst_block, tmp_block);
02155 }
02156 break;
02157 default:
02158 BM_ASSERT(0);
02159 }
02160 return count;
02161 }
02162
02163 template<class DEC>
02164 unsigned
02165 serial_stream_iterator<DEC>::get_bit_block_SUB(bm::word_t* dst_block,
02166 bm::word_t* tmp_block)
02167 {
02168 BM_ASSERT(this->state_ == e_bit_block);
02169 BM_ASSERT(dst_block != tmp_block);
02170
02171 unsigned count = 0;
02172 switch (block_type_)
02173 {
02174 case set_block_bit:
02175 for (unsigned i = 0; i < bm::set_block_size; ++i)
02176 dst_block[i] &= ~decoder_.get_32();
02177 break;
02178 case set_block_bit_0runs:
02179 {
02180 unsigned char run_type = decoder_.get_8();
02181 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02182 {
02183 unsigned run_length = decoder_.get_16();
02184 if (run_type)
02185 {
02186 unsigned run_end = j + run_length;
02187 for (;j < run_end; ++j)
02188 {
02189 BM_ASSERT(j < bm::set_block_size);
02190 dst_block[j] &= ~decoder_.get_32();
02191 }
02192 }
02193 else
02194 {
02195 j += run_length;
02196 }
02197 }
02198 }
02199 break;
02200 case set_block_bit_interval:
02201 {
02202 unsigned head_idx = decoder_.get_16();
02203 unsigned tail_idx = decoder_.get_16();
02204 for (unsigned i = head_idx; i <= tail_idx; ++i)
02205 dst_block[i] &= ~decoder_.get_32();
02206 }
02207 break;
02208 case set_block_bit_1bit:
02209
02210 case set_block_arrbit:
02211 get_arr_bit(tmp_block, true );
02212 if (dst_block)
02213 bit_block_sub(dst_block, tmp_block);
02214 break;
02215 default:
02216 BM_ASSERT(0);
02217 }
02218 return count;
02219 }
02220
02221
02222 template<class DEC>
02223 unsigned
02224 serial_stream_iterator<DEC>::get_bit_block_COUNT(bm::word_t* ,
02225 bm::word_t* )
02226 {
02227 BM_ASSERT(this->state_ == e_bit_block);
02228
02229 unsigned count = 0;
02230 switch (block_type_)
02231 {
02232 case set_block_bit:
02233 for (unsigned i = 0; i < bm::set_block_size; ++i)
02234 count += word_bitcount(decoder_.get_32());
02235 break;
02236 case set_block_bit_0runs:
02237 {
02238 unsigned count = 0;
02239 unsigned char run_type = decoder_.get_8();
02240 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02241 {
02242 unsigned run_length = decoder_.get_16();
02243 if (run_type)
02244 {
02245 unsigned run_end = j + run_length;
02246 for (;j < run_end; ++j)
02247 {
02248 count += word_bitcount(decoder_.get_32());
02249 }
02250 }
02251 else
02252 {
02253 j += run_length;
02254 }
02255 }
02256 return count;
02257 }
02258 case set_block_bit_interval:
02259 {
02260 unsigned head_idx = decoder_.get_16();
02261 unsigned tail_idx = decoder_.get_16();
02262 for (unsigned i = head_idx; i <= tail_idx; ++i)
02263 count += word_bitcount(decoder_.get_32());
02264 }
02265 break;
02266 case set_block_arrbit:
02267 count += get_arr_bit(0);
02268 break;
02269 case set_block_bit_1bit:
02270 ++count;
02271 decoder_.get_16();
02272 break;
02273 default:
02274 BM_ASSERT(0);
02275 }
02276 return count;
02277 }
02278
02279 template<class DEC>
02280 unsigned
02281 serial_stream_iterator<DEC>::get_bit_block_COUNT_A(bm::word_t* dst_block,
02282 bm::word_t* )
02283 {
02284 BM_ASSERT(this->state_ == e_bit_block);
02285 unsigned count = 0;
02286 if (dst_block)
02287 {
02288
02289 count =
02290 bit_block_calc_count(dst_block,
02291 dst_block + bm::set_block_size);
02292 }
02293
02294 switch (block_type_)
02295 {
02296 case set_block_bit:
02297 decoder_.get_32(0, bm::set_block_size);
02298 break;
02299 case set_block_bit_0runs:
02300 {
02301 unsigned char run_type = decoder_.get_8();
02302 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02303 {
02304 unsigned run_length = decoder_.get_16();
02305 if (run_type)
02306 {
02307 unsigned run_end = j + run_length;
02308 for (;j < run_end; ++j)
02309 {
02310 decoder_.get_32();
02311 }
02312 }
02313 else
02314 {
02315 j += run_length;
02316 }
02317 }
02318 }
02319 break;
02320
02321 case set_block_bit_interval:
02322 {
02323 unsigned head_idx = decoder_.get_16();
02324 unsigned tail_idx = decoder_.get_16();
02325 for (unsigned i = head_idx; i <= tail_idx; ++i)
02326 decoder_.get_32();
02327 }
02328 break;
02329 case set_block_arrbit:
02330 get_arr_bit(0);
02331 break;
02332 case set_block_bit_1bit:
02333 decoder_.get_16();
02334 break;
02335 default:
02336 BM_ASSERT(0);
02337 }
02338 return count;
02339 }
02340
02341
02342 template<class DEC>
02343 unsigned
02344 serial_stream_iterator<DEC>::get_bit_block_COUNT_AND(bm::word_t* dst_block,
02345 bm::word_t* tmp_block)
02346 {
02347 BM_ASSERT(this->state_ == e_bit_block);
02348 BM_ASSERT(dst_block);
02349
02350 unsigned count = 0;
02351 switch (block_type_)
02352 {
02353 case set_block_bit:
02354 for (unsigned i = 0; i < bm::set_block_size; ++i)
02355 count += word_bitcount(dst_block[i] & decoder_.get_32());
02356 break;
02357 case set_block_bit_0runs:
02358 {
02359 unsigned count = 0;
02360 unsigned char run_type = decoder_.get_8();
02361 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02362 {
02363 unsigned run_length = decoder_.get_16();
02364 if (run_type)
02365 {
02366 unsigned run_end = j + run_length;
02367 for (;j < run_end; ++j)
02368 {
02369 count += word_bitcount(dst_block[j] & decoder_.get_32());
02370 }
02371 }
02372 else
02373 {
02374 j += run_length;
02375 }
02376 }
02377 return count;
02378 }
02379 case set_block_bit_interval:
02380 {
02381 unsigned head_idx = decoder_.get_16();
02382 unsigned tail_idx = decoder_.get_16();
02383 for (unsigned i = head_idx; i <= tail_idx; ++i)
02384 count += word_bitcount(dst_block[i] & decoder_.get_32());
02385 }
02386 break;
02387 case set_block_bit_1bit:
02388
02389 case set_block_arrbit:
02390 get_arr_bit(tmp_block, true );
02391 count +=
02392 bit_operation_and_count(dst_block, dst_block + bm::set_block_size,
02393 tmp_block);
02394 break;
02395 default:
02396 BM_ASSERT(0);
02397 }
02398 return count;
02399 }
02400
02401 template<class DEC>
02402 unsigned
02403 serial_stream_iterator<DEC>::get_bit_block_COUNT_OR(bm::word_t* dst_block,
02404 bm::word_t* tmp_block)
02405 {
02406 BM_ASSERT(this->state_ == e_bit_block);
02407 BM_ASSERT(dst_block);
02408
02409 bitblock_sum_adapter count_adapter;
02410 switch (block_type_)
02411 {
02412 case set_block_bit:
02413 {
02414 bitblock_get_adapter ga(dst_block);
02415 bit_COUNT_OR<bm::word_t> func;
02416
02417 bit_recomb(ga,
02418 decoder_,
02419 func,
02420 count_adapter
02421 );
02422 }
02423 break;
02424 case set_block_bit_0runs:
02425 {
02426 unsigned count = 0;
02427 unsigned char run_type = decoder_.get_8();
02428 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02429 {
02430 unsigned run_length = decoder_.get_16();
02431 unsigned run_end = j + run_length;
02432 if (run_type)
02433 {
02434 for (;j < run_end; ++j)
02435 {
02436 BM_ASSERT(j < bm::set_block_size);
02437 count += word_bitcount(dst_block[j] | decoder_.get_32());
02438 }
02439 }
02440 else
02441 {
02442 for (;j < run_end; ++j)
02443 {
02444 BM_ASSERT(j < bm::set_block_size);
02445 count += word_bitcount(dst_block[j]);
02446 }
02447 }
02448 }
02449 return count;
02450 }
02451 case set_block_bit_interval:
02452 {
02453 unsigned head_idx = decoder_.get_16();
02454 unsigned tail_idx = decoder_.get_16();
02455 unsigned count = 0;
02456 unsigned i;
02457 for (i = 0; i < head_idx; ++i)
02458 count += word_bitcount(dst_block[i]);
02459 for (i = head_idx; i <= tail_idx; ++i)
02460 count += word_bitcount(dst_block[i] | decoder_.get_32());
02461 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02462 count += word_bitcount(dst_block[i]);
02463 return count;
02464 }
02465 case set_block_bit_1bit:
02466
02467 case set_block_arrbit:
02468 get_arr_bit(tmp_block, true );
02469 return
02470 bit_operation_or_count(dst_block,
02471 dst_block + bm::set_block_size,
02472 tmp_block);
02473 default:
02474 BM_ASSERT(0);
02475 }
02476 return count_adapter.sum();
02477 }
02478
02479 template<class DEC>
02480 unsigned
02481 serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR(bm::word_t* dst_block,
02482 bm::word_t* tmp_block)
02483 {
02484 BM_ASSERT(this->state_ == e_bit_block);
02485 BM_ASSERT(dst_block);
02486
02487 bitblock_sum_adapter count_adapter;
02488 switch (block_type_)
02489 {
02490 case set_block_bit:
02491 {
02492 bitblock_get_adapter ga(dst_block);
02493 bit_COUNT_XOR<bm::word_t> func;
02494
02495 bit_recomb(ga,
02496 decoder_,
02497 func,
02498 count_adapter
02499 );
02500 }
02501 break;
02502 case set_block_bit_0runs:
02503 {
02504 unsigned count = 0;
02505 unsigned char run_type = decoder_.get_8();
02506 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02507 {
02508 unsigned run_length = decoder_.get_16();
02509 unsigned run_end = j + run_length;
02510 if (run_type)
02511 {
02512 for (;j < run_end; ++j)
02513 {
02514 BM_ASSERT(j < bm::set_block_size);
02515 count += word_bitcount(dst_block[j] ^ decoder_.get_32());
02516 }
02517 }
02518 else
02519 {
02520 for (;j < run_end; ++j)
02521 {
02522 BM_ASSERT(j < bm::set_block_size);
02523 count += word_bitcount(dst_block[j]);
02524 }
02525 }
02526 }
02527 return count;
02528 }
02529 case set_block_bit_interval:
02530 {
02531 unsigned head_idx = decoder_.get_16();
02532 unsigned tail_idx = decoder_.get_16();
02533 unsigned count = 0;
02534 unsigned i;
02535 for (i = 0; i < head_idx; ++i)
02536 count += word_bitcount(dst_block[i]);
02537 for (i = head_idx; i <= tail_idx; ++i)
02538 count += word_bitcount(dst_block[i] ^ decoder_.get_32());
02539 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02540 count += word_bitcount(dst_block[i]);
02541 return count;
02542 }
02543 case set_block_bit_1bit:
02544
02545 case set_block_arrbit:
02546 get_arr_bit(tmp_block, true );
02547 return
02548 bit_operation_xor_count(dst_block,
02549 dst_block + bm::set_block_size,
02550 tmp_block);
02551 default:
02552 BM_ASSERT(0);
02553 }
02554 return count_adapter.sum();
02555 }
02556
02557 template<class DEC>
02558 unsigned
02559 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block,
02560 bm::word_t* tmp_block)
02561 {
02562 BM_ASSERT(this->state_ == e_bit_block);
02563 BM_ASSERT(dst_block);
02564
02565 bitblock_sum_adapter count_adapter;
02566 switch (block_type_)
02567 {
02568 case set_block_bit:
02569 {
02570 bitblock_get_adapter ga(dst_block);
02571 bit_COUNT_SUB_AB<bm::word_t> func;
02572
02573 bit_recomb(ga,
02574 decoder_,
02575 func,
02576 count_adapter
02577 );
02578 }
02579 break;
02580 case set_block_bit_0runs:
02581 {
02582 unsigned count = 0;
02583 unsigned char run_type = decoder_.get_8();
02584 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02585 {
02586 unsigned run_length = decoder_.get_16();
02587 unsigned run_end = j + run_length;
02588 if (run_type)
02589 {
02590 for (;j < run_end; ++j)
02591 {
02592 BM_ASSERT(j < bm::set_block_size);
02593 count += word_bitcount(dst_block[j] & ~decoder_.get_32());
02594 }
02595 }
02596 else
02597 {
02598 for (;j < run_end; ++j)
02599 {
02600 BM_ASSERT(j < bm::set_block_size);
02601 count += word_bitcount(dst_block[j]);
02602 }
02603 }
02604 }
02605 return count;
02606 }
02607 case set_block_bit_interval:
02608 {
02609 unsigned head_idx = decoder_.get_16();
02610 unsigned tail_idx = decoder_.get_16();
02611 unsigned count = 0;
02612 unsigned i;
02613 for (i = 0; i < head_idx; ++i)
02614 count += word_bitcount(dst_block[i]);
02615 for (i = head_idx; i <= tail_idx; ++i)
02616 count += word_bitcount(dst_block[i] & (~decoder_.get_32()));
02617 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02618 count += word_bitcount(dst_block[i]);
02619 return count;
02620 }
02621 break;
02622 case set_block_bit_1bit:
02623
02624 case set_block_arrbit:
02625 get_arr_bit(tmp_block, true );
02626 return
02627 bit_operation_sub_count(dst_block,
02628 dst_block + bm::set_block_size,
02629 tmp_block);
02630 default:
02631 BM_ASSERT(0);
02632 }
02633 return count_adapter.sum();
02634 }
02635
02636 template<class DEC>
02637 unsigned
02638 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block,
02639 bm::word_t* tmp_block)
02640 {
02641 BM_ASSERT(this->state_ == e_bit_block);
02642 BM_ASSERT(dst_block);
02643
02644 bitblock_sum_adapter count_adapter;
02645 switch (block_type_)
02646 {
02647 case set_block_bit:
02648 {
02649 bitblock_get_adapter ga(dst_block);
02650 bit_COUNT_SUB_BA<bm::word_t> func;
02651
02652 bit_recomb(ga,
02653 decoder_,
02654 func,
02655 count_adapter
02656 );
02657 }
02658 break;
02659 case set_block_bit_0runs:
02660 {
02661 unsigned count = 0;
02662 unsigned char run_type = decoder_.get_8();
02663 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02664 {
02665 unsigned run_length = decoder_.get_16();
02666 unsigned run_end = j + run_length;
02667 if (run_type)
02668 {
02669 for (;j < run_end; ++j)
02670 {
02671 BM_ASSERT(j < bm::set_block_size);
02672 count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
02673 }
02674 }
02675 else
02676 {
02677 j += run_length;
02678 }
02679 }
02680 return count;
02681 }
02682
02683 case set_block_bit_interval:
02684 {
02685 unsigned head_idx = decoder_.get_16();
02686 unsigned tail_idx = decoder_.get_16();
02687 unsigned count = 0;
02688 unsigned i;
02689 for (i = head_idx; i <= tail_idx; ++i)
02690 count += word_bitcount(decoder_.get_32() & (~dst_block[i]));
02691 return count;
02692 }
02693 break;
02694 case set_block_bit_1bit:
02695
02696 case set_block_arrbit:
02697 get_arr_bit(tmp_block, true );
02698 return
02699 bit_operation_sub_count(tmp_block,
02700 tmp_block + bm::set_block_size,
02701 dst_block);
02702 default:
02703 BM_ASSERT(0);
02704 }
02705 return count_adapter.sum();
02706 }
02707
02708
02709
02710 template<class DEC>
02711 unsigned serial_stream_iterator<DEC>::get_arr_bit(bm::word_t* dst_block,
02712 bool clear_target)
02713 {
02714 BM_ASSERT(this->block_type_ == set_block_arrbit ||
02715 this->block_type_ == set_block_bit_1bit);
02716
02717 gap_word_t len = decoder_.get_16();
02718 if (dst_block)
02719 {
02720 if (clear_target)
02721 bit_block_set(dst_block, 0);
02722
02723 if (this->block_type_ == set_block_bit_1bit)
02724 {
02725
02726 set_bit(dst_block, len);
02727 return 1;
02728 }
02729
02730 for (unsigned k = 0; k < len; ++k)
02731 {
02732 gap_word_t bit_idx = decoder_.get_16();
02733 set_bit(dst_block, bit_idx);
02734 }
02735 }
02736 else
02737 {
02738 if (this->block_type_ == set_block_bit_1bit)
02739 {
02740 return 1;
02741 }
02742
02743 decoder_.seek(len * 2);
02744 }
02745 return len;
02746 }
02747
02748 template<class DEC>
02749 unsigned serial_stream_iterator<DEC>::get_bit()
02750 {
02751 BM_ASSERT(this->block_type_ == set_block_bit_1bit);
02752 ++(this->block_idx_);
02753 this->state_ = e_blocks;
02754
02755 return decoder_.get_16();
02756 }
02757
02758 template<class DEC>
02759 void
02760 serial_stream_iterator<DEC>::get_gap_block(bm::gap_word_t* dst_block)
02761 {
02762 BM_ASSERT(this->state_ == e_gap_block ||
02763 this->block_type_ == set_block_bit_1bit);
02764 BM_ASSERT(dst_block);
02765
02766 read_gap_block(this->decoder_,
02767 this->block_type_,
02768 dst_block,
02769 this->gap_head_);
02770
02771 ++(this->block_idx_);
02772 this->state_ = e_blocks;
02773 }
02774
02775
02776 template<class DEC>
02777 unsigned
02778 serial_stream_iterator<DEC>::get_bit_block(bm::word_t* dst_block,
02779 bm::word_t* tmp_block,
02780 set_operation op)
02781 {
02782 BM_ASSERT(this->state_ == e_bit_block);
02783 get_bit_func_type bit_func = bit_func_table_[op];
02784 BM_ASSERT(bit_func);
02785 unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
02786 this->state_ = e_blocks;
02787 ++(this->block_idx_);
02788 return cnt;
02789 }
02790
02791
02792
02793 template<class BV>
02794 unsigned operation_deserializer<BV>::deserialize(
02795 bvector_type& bv,
02796 const unsigned char* buf,
02797 bm::word_t* temp_block,
02798 set_operation op)
02799 {
02800 ByteOrder bo_current = globals<true>::byte_order();
02801
02802 bm::decoder dec(buf);
02803 unsigned char header_flag = dec.get_8();
02804 ByteOrder bo = bo_current;
02805 if (!(header_flag & BM_HM_NO_BO))
02806 {
02807 bo = (bm::ByteOrder) dec.get_8();
02808 }
02809
02810 blocks_manager_type& bman = bv.get_blocks_manager();
02811 bit_block_guard<blocks_manager_type> bg(bman);
02812 if (temp_block == 0)
02813 {
02814 temp_block = bg.allocate();
02815 }
02816
02817 if (bo_current == bo)
02818 {
02819 serial_stream_current ss(buf);
02820 return
02821 iterator_deserializer<BV, serial_stream_current>::
02822 deserialize(bv, ss, temp_block, op);
02823 }
02824 switch (bo_current)
02825 {
02826 case BigEndian:
02827 {
02828 serial_stream_be ss(buf);
02829 return
02830 iterator_deserializer<BV, serial_stream_be>::
02831 deserialize(bv, ss, temp_block, op);
02832 }
02833 case LittleEndian:
02834 {
02835 serial_stream_le ss(buf);
02836 return
02837 iterator_deserializer<BV, serial_stream_le>::
02838 deserialize(bv, ss, temp_block, op);
02839 }
02840 default:
02841 BM_ASSERT(0);
02842 };
02843 return 0;
02844 }
02845
02846
02847 template<class BV>
02848 void operation_deserializer<BV>::deserialize(
02849 bvector_type& bv_target,
02850 const bvector_type& bv_mask,
02851 const unsigned char* buf,
02852 bm::word_t* temp_block,
02853 set_operation op)
02854 {
02855 ByteOrder bo_current = globals<true>::byte_order();
02856
02857 bm::decoder dec(buf);
02858 unsigned char header_flag = dec.get_8();
02859 ByteOrder bo = bo_current;
02860 if (!(header_flag & BM_HM_NO_BO))
02861 {
02862 bo = (bm::ByteOrder) dec.get_8();
02863 }
02864
02865 blocks_manager_type& bman = bv_target.get_blocks_manager();
02866 bit_block_guard<blocks_manager_type> bg(bman);
02867 if (temp_block == 0)
02868 {
02869 temp_block = bg.allocate();
02870 }
02871
02872 if (bo_current == bo)
02873 {
02874 serial_stream_current ss(buf);
02875 iterator_deserializer<BV, serial_stream_current>::
02876 deserialize(bv_target, bv_mask, ss, temp_block, op);
02877 return;
02878 }
02879 switch (bo_current)
02880 {
02881 case BigEndian:
02882 {
02883 serial_stream_be ss(buf);
02884 iterator_deserializer<BV, serial_stream_be>::
02885 deserialize(bv_target, bv_mask, ss, temp_block, op);
02886 }
02887 case LittleEndian:
02888 {
02889 serial_stream_le ss(buf);
02890 iterator_deserializer<BV, serial_stream_le>::
02891 deserialize(bv_target, bv_mask, ss, temp_block, op);
02892 }
02893 default:
02894 BM_ASSERT(0);
02895 };
02896 }
02897
02898
02899 template<class BV, class SerialIterator>
02900 void iterator_deserializer<BV, SerialIterator>::load_id_list(
02901 bvector_type& bv,
02902 serial_iterator_type& sit,
02903 unsigned id_count,
02904 bool set_clear)
02905 {
02906 const unsigned win_size = 64;
02907 bm::id_t id_buffer[win_size+1];
02908
02909 if (set_clear)
02910 {
02911 for (unsigned i = 0; i <= id_count;)
02912 {
02913 unsigned j;
02914 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
02915 {
02916 id_buffer[j] = sit.get_id();
02917 sit.next();
02918 }
02919 bm::combine_or(bv, id_buffer, id_buffer + j);
02920 }
02921 }
02922 else
02923 {
02924 for (unsigned i = 0; i <= id_count;)
02925 {
02926 unsigned j;
02927 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
02928 {
02929 id_buffer[j] = sit.get_id();
02930 sit.next();
02931 }
02932 bm::combine_sub(bv, id_buffer, id_buffer + j);
02933 }
02934 }
02935 }
02936
02937 template<class BV, class SerialIterator>
02938 unsigned
02939 iterator_deserializer<BV, SerialIterator>::finalize_target_vector(
02940 blocks_manager_type& bman,
02941 set_operation op,
02942 unsigned bv_block_idx)
02943 {
02944 unsigned count = 0;
02945 switch (op)
02946 {
02947 case set_OR: case set_SUB: case set_XOR:
02948 case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
02949 case set_COUNT_SUB_BA:
02950
02951 break;
02952 case set_AND: case set_ASSIGN:
02953
02954 {
02955 unsigned i, j;
02956 bman.get_block_coord(bv_block_idx, &i, &j);
02957 bm::word_t*** blk_root = bman.get_rootblock();
02958 unsigned effective_top_size =
02959 bman.effective_top_block_size();
02960 for (;i < effective_top_size; ++i)
02961 {
02962 bm::word_t** blk_blk = blk_root[i];
02963 if (blk_blk == 0)
02964 {
02965 bv_block_idx+=bm::set_array_size-j;
02966 j = 0;
02967 continue;
02968 }
02969 for (;j < bm::set_array_size; ++j, ++bv_block_idx)
02970 {
02971 if (blk_blk[j])
02972 bman.zero_block(bv_block_idx);
02973 }
02974 j = 0;
02975 }
02976
02977 }
02978 break;
02979 case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
02980 case set_COUNT_SUB_AB:
02981
02982 {
02983 unsigned i, j;
02984 bman.get_block_coord(bv_block_idx, &i, &j);
02985 bm::word_t*** blk_root = bman.get_rootblock();
02986 unsigned effective_top_size =
02987 bman.effective_top_block_size();
02988 for (;i < effective_top_size; ++i)
02989 {
02990 bm::word_t** blk_blk = blk_root[i];
02991 if (blk_blk == 0)
02992 {
02993 bv_block_idx+=bm::set_array_size-j;
02994 j = 0;
02995 continue;
02996 }
02997 for (;j < bm::set_array_size; ++j, ++bv_block_idx)
02998 {
02999 if (blk_blk[j])
03000 count += bman.block_bitcount(blk_blk[j]);
03001 }
03002 j = 0;
03003 }
03004
03005 }
03006 break;
03007 default:
03008 BM_ASSERT(0);
03009 }
03010 return count;
03011 }
03012
03013 template<class BV, class SerialIterator>
03014 unsigned
03015 iterator_deserializer<BV, SerialIterator>::process_id_list(
03016 bvector_type& bv,
03017 serial_iterator_type& sit,
03018 set_operation op)
03019 {
03020 unsigned count = 0;
03021 unsigned id_count = sit.get_id_count();
03022 bool set_clear = true;
03023 switch (op)
03024 {
03025 case set_AND:
03026 {
03027
03028 BV bv_tmp(BM_GAP);
03029 load_id_list(bv_tmp, sit, id_count, true);
03030 bv &= bv_tmp;
03031 }
03032 break;
03033 case set_ASSIGN:
03034 bv.clear(true);
03035
03036 case set_OR:
03037 set_clear = true;
03038
03039 case set_SUB:
03040 load_id_list(bv, sit, id_count, set_clear);
03041 break;
03042 case set_XOR:
03043 for (unsigned i = 0; i < id_count; ++i)
03044 {
03045 bm::id_t id = sit.get_id();
03046 bv[id] ^= true;
03047 sit.next();
03048 }
03049 break;
03050 case set_COUNT: case set_COUNT_B:
03051 for (unsigned i = 0; i < id_count; ++i)
03052 {
03053 sit.get_id();
03054 ++count;
03055 sit.next();
03056 }
03057 break;
03058 case set_COUNT_A:
03059 return bv.count();
03060 case set_COUNT_AND:
03061 for (unsigned i = 0; i < id_count; ++i)
03062 {
03063 bm::id_t id = sit.get_id();
03064 count += bv.get_bit(id);
03065 sit.next();
03066 }
03067 break;
03068 case set_COUNT_XOR:
03069 {
03070
03071 BV bv_tmp(BM_GAP);
03072 load_id_list(bv_tmp, sit, id_count, true);
03073 count += count_xor(bv, bv_tmp);
03074 }
03075 break;
03076 case set_COUNT_OR:
03077 {
03078
03079 BV bv_tmp(BM_GAP);
03080 load_id_list(bv_tmp, sit, id_count, true);
03081 count += count_or(bv, bv_tmp);
03082 }
03083 break;
03084 case set_COUNT_SUB_AB:
03085 {
03086
03087 BV bv_tmp(bv);
03088 load_id_list(bv_tmp, sit, id_count, false);
03089 count += bv_tmp.count();
03090 }
03091 break;
03092 default:
03093 BM_ASSERT(0);
03094 }
03095
03096 return count;
03097 }
03098
03099
03100 template<class BV, class SerialIterator>
03101 void iterator_deserializer<BV, SerialIterator>::deserialize(
03102 bvector_type& bv_target,
03103 const bvector_type& bv_mask,
03104 serial_iterator_type& sit,
03105 bm::word_t* temp_block,
03106 set_operation op)
03107 {
03108 BM_ASSERT(temp_block);
03109 BM_ASSERT(op == bm::set_AND ||
03110 op == bm::set_OR || op == bm::set_XOR || op == bm::set_SUB);
03111
03112 gap_word_t gap_temp_block[bm::gap_equiv_len*3];
03113 gap_temp_block[0] = 0;
03114
03115 bv_target.clear(true);
03116
03117 const blocks_manager_type& bman_mask = bv_mask.get_blocks_manager();
03118 blocks_manager_type& bman_target = bv_target.get_blocks_manager();
03119
03120 unsigned bv_size = sit.bv_size();
03121 if (bv_mask.size() > bv_size)
03122 {
03123 bv_size = bv_mask.size();
03124 }
03125 if (bv_target.size() < bv_size)
03126 {
03127 bv_target.resize(bv_size);
03128 }
03129
03130 unsigned top_blocks = bman_mask.effective_top_block_size();
03131
03132 BM_SET_MMX_GUARD
03133
03134 typename serial_iterator_type::iterator_state state;
03135 state = sit.get_state();
03136 if (state == serial_iterator_type::e_list_ids)
03137 {
03138 bv_target = bv_mask;
03139 process_id_list(bv_target, sit, op);
03140 return;
03141 }
03142
03143 unsigned bv_block_idx = 0;
03144 for (;1;)
03145 {
03146 bv_block_idx = sit.block_idx();
03147
03148 {
03149 unsigned tb_idx = bv_block_idx >> bm::set_array_shift;
03150 if (tb_idx > top_blocks)
03151 {
03152 if (op == bm::set_AND)
03153 break;
03154 if (sit.is_eof())
03155 break;
03156 }
03157 }
03158
03159 if (sit.is_eof())
03160 {
03161 if (op == bm::set_AND)
03162 break;
03163
03164 state = serial_iterator_type::e_zero_blocks;
03165 }
03166 else
03167 {
03168 state = sit.state();
03169 }
03170
03171 switch (state)
03172 {
03173 case serial_iterator_type::e_blocks:
03174 sit.next();
03175 continue;
03176 case serial_iterator_type::e_bit_block:
03177 {
03178 bm::set_operation sop = op;
03179 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
03180 bm::word_t* blk_target = 0;
03181 if (!blk_mask)
03182 {
03183 switch (op)
03184 {
03185 case set_AND: case set_SUB:
03186
03187
03188 sop = set_ASSIGN;
03189 break;
03190 case set_OR: case set_XOR:
03191 blk_target = bman_target.make_bit_block(bv_block_idx);
03192 break;
03193 default:
03194 BM_ASSERT(0);
03195 }
03196 }
03197 else
03198 {
03199 int is_gap = BM_IS_GAP(blk_mask);
03200 blk_target = bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap);
03201 }
03202
03203
03204 sit.get_bit_block(blk_target, temp_block, sop);
03205 }
03206 break;
03207
03208 case serial_iterator_type::e_zero_blocks:
03209 {
03210 if (op == set_AND)
03211 {
03212 sit.skip_mono_blocks();
03213 break;
03214 }
03215 sit.next();
03216
03217 bman_target.copy_block(bv_block_idx, bman_mask);
03218 }
03219 break;
03220
03221 case serial_iterator_type::e_one_blocks:
03222 {
03223 BM_ASSERT(bv_block_idx == sit.block_idx());
03224 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
03225 sit.next();
03226
03227 switch (op)
03228 {
03229 case set_OR:
03230 bman_target.set_block_all_set(bv_block_idx);
03231 break;
03232 case set_SUB:
03233 break;
03234 case set_AND:
03235 bman_target.copy_block(bv_block_idx, bman_mask);
03236 break;
03237 case set_XOR:
03238 if (blk_mask)
03239 {
03240 int is_gap = BM_IS_GAP(blk_mask);
03241 bm::word_t* blk_target =
03242 bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap);
03243 bit_block_xor(blk_target, FULL_BLOCK_ADDR);
03244 }
03245 else
03246 {
03247
03248 bman_target.set_block_all_set(bv_block_idx);
03249 }
03250 break;
03251 default:
03252 BM_ASSERT(0);
03253 }
03254
03255 }
03256 break;
03257
03258 case serial_iterator_type::e_gap_block:
03259 {
03260
03261 if (sit.get_block_type() == set_block_bit_1bit)
03262 {
03263 if (op == set_AND)
03264 {
03265 unsigned bit_idx = sit.get_bit();
03266 unsigned bn = (bv_block_idx << bm::set_block_shift) | bit_idx;
03267 bool bval_mask = bv_mask.test(bn);
03268 bv_target.set_bit(bn, bval_mask);
03269 break;
03270 }
03271 }
03272
03273 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
03274
03275 sit.get_gap_block(gap_temp_block);
03276
03277
03278 unsigned len = gap_length(gap_temp_block);
03279 int level = gap_calc_level(len, bman_target.glen());
03280 --len;
03281
03282
03283 if (!blk_mask)
03284 {
03285 if (op == set_OR || op == set_XOR)
03286 {
03287 bman_target.set_gap_block(bv_block_idx, gap_temp_block, level);
03288 }
03289 }
03290 else
03291 {
03292
03293 bm::operation bop = bm::setop2op(op);
03294 bman_target.copy_block(bv_block_idx, bman_mask);
03295 bv_target.combine_operation_with_block(
03296 bv_block_idx,
03297 (bm::word_t*)gap_temp_block,
03298 1,
03299 bop);
03300 }
03301
03302 }
03303 break;
03304
03305 default:
03306 BM_ASSERT(0);
03307 }
03308
03309
03310 }
03311
03312 bv_target.forget_count();
03313 }
03314
03315
03316
03317 template<class BV, class SerialIterator>
03318 unsigned
03319 iterator_deserializer<BV, SerialIterator>::deserialize(
03320 bvector_type& bv,
03321 serial_iterator_type& sit,
03322 bm::word_t* temp_block,
03323 set_operation op)
03324 {
03325 BM_ASSERT(temp_block);
03326
03327 unsigned count = 0;
03328 gap_word_t gap_temp_block[bm::gap_equiv_len*3];
03329 gap_temp_block[0] = 0;
03330
03331 blocks_manager_type& bman = bv.get_blocks_manager();
03332
03333 bv.forget_count();
03334 if (sit.bv_size() && (sit.bv_size() > bv.size()))
03335 {
03336 bv.resize(sit.bv_size());
03337 }
03338
03339 BM_SET_MMX_GUARD
03340
03341 typename serial_iterator_type::iterator_state state;
03342 state = sit.get_state();
03343 if (state == serial_iterator_type::e_list_ids)
03344 {
03345 count = process_id_list(bv, sit, op);
03346 return count;
03347 }
03348
03349 unsigned bv_block_idx = 0;
03350
03351 for (;1;)
03352 {
03353 bm::set_operation sop = op;
03354 if (sit.is_eof())
03355 {
03356 count += finalize_target_vector(bman, op, bv_block_idx);
03357 return count;
03358 }
03359
03360 state = sit.state();
03361 switch (state)
03362 {
03363 case serial_iterator_type::e_blocks:
03364 sit.next();
03365 continue;
03366 case serial_iterator_type::e_bit_block:
03367 {
03368 BM_ASSERT(sit.block_idx() == bv_block_idx);
03369 bm::word_t* blk = bman.get_block(bv_block_idx);
03370
03371 if (!blk)
03372 {
03373 switch (op)
03374 {
03375 case set_AND: case set_SUB: case set_COUNT_AND:
03376 case set_COUNT_SUB_AB: case set_COUNT_A:
03377
03378
03379 sop = set_ASSIGN;
03380 break;
03381
03382 case set_OR: case set_XOR: case set_ASSIGN:
03383 blk = bman.make_bit_block(bv_block_idx);
03384 break;
03385
03386 case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR:
03387 case set_COUNT_SUB_BA: case set_COUNT_B:
03388
03389 sop = set_COUNT;
03390 break;
03391
03392 case set_END:
03393 default:
03394 BM_ASSERT(0);
03395 }
03396 }
03397 else
03398 {
03399 int is_gap = BM_IS_GAP(blk);
03400 if (is_gap || IS_FULL_BLOCK(blk))
03401 {
03402 if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
03403 {
03404 }
03405 else
03406 {
03407
03408
03409 blk = bman.deoptimize_block(bv_block_idx);
03410 }
03411 }
03412 }
03413
03414
03415 unsigned c = sit.get_bit_block(blk, temp_block, sop);
03416 count += c;
03417 }
03418 break;
03419
03420 case serial_iterator_type::e_zero_blocks:
03421 {
03422 BM_ASSERT(bv_block_idx == sit.block_idx());
03423 bm::word_t* blk = bman.get_block(bv_block_idx);
03424 sit.next();
03425
03426 if (blk)
03427 {
03428 switch (op)
03429 {
03430 case set_AND: case set_ASSIGN:
03431
03432 blk = bman.zero_block(bv_block_idx);
03433 break;
03434
03435 case set_SUB: case set_COUNT_AND: case set_OR:
03436 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
03437
03438 break;
03439
03440 case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
03441 case set_COUNT: case set_COUNT_XOR:
03442
03443
03444
03445 {
03446 unsigned c = bman.block_bitcount(blk);
03447 count += c;
03448 }
03449 break;
03450 case set_END:
03451 default:
03452 BM_ASSERT(0);
03453 }
03454 }
03455 }
03456 break;
03457
03458 case serial_iterator_type::e_one_blocks:
03459 {
03460 BM_ASSERT(bv_block_idx == sit.block_idx());
03461 bm::word_t* blk = bman.get_block(bv_block_idx);
03462 sit.next();
03463
03464 switch (op)
03465 {
03466 case set_OR: case set_ASSIGN:
03467 bman.set_block_all_set(bv_block_idx);
03468 break;
03469 case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
03470
03471 count += bm::bits_in_block;
03472 break;
03473 case set_SUB:
03474 blk = bman.zero_block(bv_block_idx);
03475 break;
03476 case set_COUNT_SUB_AB: case set_AND:
03477
03478 break;
03479 case set_COUNT_AND: case set_COUNT_A:
03480 count += bman.block_bitcount(blk);
03481 break;
03482 default:
03483 if (blk)
03484 {
03485 switch (op)
03486 {
03487 case set_XOR:
03488 blk = bman.deoptimize_block(bv_block_idx);
03489 bit_block_xor(blk, FULL_BLOCK_ADDR);
03490 break;
03491 case set_COUNT_XOR:
03492 {
03493 count +=
03494 combine_count_operation_with_block(
03495 blk,
03496 FULL_BLOCK_ADDR,
03497 bm::COUNT_XOR);
03498 }
03499 break;
03500 case set_COUNT_SUB_BA:
03501 {
03502 count +=
03503 combine_count_operation_with_block(
03504 blk,
03505 FULL_BLOCK_ADDR,
03506 bm::COUNT_SUB_BA);
03507 }
03508 break;
03509 default:
03510 BM_ASSERT(0);
03511 }
03512 }
03513 else
03514 {
03515 switch (op)
03516 {
03517 case set_XOR:
03518
03519 bman.set_block_all_set(bv_block_idx);
03520 break;
03521 case set_COUNT_XOR:
03522 count += bm::bits_in_block;
03523 break;
03524 case set_COUNT_SUB_BA:
03525
03526 count += bm::bits_in_block;
03527 break;
03528 default:
03529 break;
03530 }
03531 }
03532 }
03533
03534 }
03535 break;
03536
03537 case serial_iterator_type::e_gap_block:
03538 {
03539 BM_ASSERT(bv_block_idx == sit.block_idx());
03540 bm::word_t* blk = bman.get_block(bv_block_idx);
03541
03542 sit.get_gap_block(gap_temp_block);
03543
03544 unsigned len = gap_length(gap_temp_block);
03545 int level = gap_calc_level(len, bman.glen());
03546 --len;
03547
03548 bool const_op = is_const_set_operation(op);
03549 if (const_op)
03550 {
03551 distance_metric metric = operation2metric(op);
03552 bm::word_t* gptr = (bm::word_t*)gap_temp_block;
03553 BMSET_PTRGAP(gptr);
03554 unsigned c =
03555 combine_count_operation_with_block(
03556 blk,
03557 gptr,
03558 metric);
03559 count += c;
03560 }
03561 else
03562 {
03563 if ((sop == set_ASSIGN) && blk)
03564 {
03565 blk = bman.zero_block(bv_block_idx);
03566 sop = set_OR;
03567 }
03568 if (blk == 0)
03569 {
03570 switch (sop)
03571 {
03572 case set_AND: case set_SUB:
03573 break;
03574 case set_OR: case set_XOR: case set_ASSIGN:
03575 bman.set_gap_block(
03576 bv_block_idx, gap_temp_block, level);
03577 break;
03578 default:
03579 BM_ASSERT(0);
03580 }
03581 }
03582 else
03583 {
03584 bm::operation bop = bm::setop2op(op);
03585 if (level == -1)
03586 {
03587 gap_convert_to_bitset(temp_block, gap_temp_block);
03588 bv.combine_operation_with_block(bv_block_idx,
03589 temp_block,
03590 0,
03591 bop);
03592 }
03593 else
03594 {
03595 set_gap_level(gap_temp_block, level);
03596 bv.combine_operation_with_block(
03597 bv_block_idx,
03598 (bm::word_t*)gap_temp_block,
03599 1,
03600 bop);
03601 }
03602 }
03603 }
03604 }
03605 break;
03606
03607 default:
03608 BM_ASSERT(0);
03609 }
03610
03611 ++bv_block_idx;
03612
03613 }
03614
03615 return count;
03616 }
03617
03618
03619
03620
03621 }
03622
03623 #include "bmundef.h"
03624
03625 #ifdef _MSC_VER
03626 #pragma warning( pop )
03627 #endif
03628
03629
03630 #endif