00001 #ifndef BMALGO_IMPL__H__INCLUDED__
00002 #define BMALGO_IMPL__H__INCLUDED__
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifdef _MSC_VER
00030 #pragma warning( push )
00031 #pragma warning( disable : 4311 4312 4127)
00032 #endif
00033
00034 #include "bmdef.h"
00035 #include "bmutil.h"
00036
00037 namespace bm
00038 {
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 enum distance_metric
00056 {
00057 COUNT_AND = set_COUNT_AND,
00058 COUNT_XOR = set_COUNT_XOR,
00059 COUNT_OR = set_COUNT_OR,
00060 COUNT_SUB_AB = set_COUNT_SUB_AB,
00061 COUNT_SUB_BA = set_COUNT_SUB_BA,
00062 COUNT_A = set_COUNT_A,
00063 COUNT_B = set_COUNT_B
00064 };
00065
00066
00067
00068
00069
00070 inline
00071 distance_metric operation2metric(set_operation op)
00072 {
00073 BM_ASSERT(is_const_set_operation(op));
00074 if (op == set_COUNT) op = set_COUNT_B;
00075
00076
00077 return (distance_metric) op;
00078 }
00079
00080
00081
00082
00083
00084
00085 struct distance_metric_descriptor
00086 {
00087 distance_metric metric;
00088 bm::id_t result;
00089
00090 distance_metric_descriptor(distance_metric m)
00091 : metric(m),
00092 result(0)
00093 {}
00094 distance_metric_descriptor()
00095 : metric(bm::COUNT_XOR),
00096 result(0)
00097 {}
00098
00099
00100
00101
00102 void reset()
00103 {
00104 result = 0;
00105 }
00106 };
00107
00108
00109
00110
00111
00112
00113
00114
00115 inline
00116 void combine_count_operation_with_block(const bm::word_t* blk,
00117 const bm::word_t* arg_blk,
00118 distance_metric_descriptor* dmit,
00119 distance_metric_descriptor* dmit_end)
00120
00121 {
00122 gap_word_t* g1 = BMGAP_PTR(blk);
00123 gap_word_t* g2 = BMGAP_PTR(arg_blk);
00124
00125 unsigned gap = BM_IS_GAP(blk);
00126 unsigned arg_gap = BM_IS_GAP(arg_blk);
00127
00128 if (gap)
00129 {
00130 if (arg_gap)
00131 {
00132 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00133 {
00134 distance_metric_descriptor& dmd = *it;
00135
00136 switch (dmd.metric)
00137 {
00138 case bm::COUNT_AND:
00139 dmd.result += gap_count_and(g1, g2);
00140 break;
00141 case bm::COUNT_OR:
00142 dmd.result += gap_count_or(g1, g2);
00143 break;
00144 case bm::COUNT_SUB_AB:
00145 dmd.result += gap_count_sub(g1, g2);
00146 break;
00147 case bm::COUNT_SUB_BA:
00148 dmd.result += gap_count_sub(g2, g1);
00149 break;
00150 case bm::COUNT_XOR:
00151 dmd.result += gap_count_xor(g1, g2);
00152 break;
00153 case bm::COUNT_A:
00154 dmd.result += gap_bit_count(g1);
00155 break;
00156 case bm::COUNT_B:
00157 dmd.result += gap_bit_count(g2);
00158 break;
00159 }
00160
00161 }
00162
00163 return;
00164
00165 }
00166 else
00167 {
00168 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00169 {
00170 distance_metric_descriptor& dmd = *it;
00171
00172 switch (dmd.metric)
00173 {
00174 case bm::COUNT_AND:
00175 if (arg_blk)
00176 dmd.result += gap_bitset_and_count(arg_blk, g1);
00177 break;
00178 case bm::COUNT_OR:
00179 if (!arg_blk)
00180 dmd.result += gap_bit_count(g1);
00181 else
00182 dmd.result += gap_bitset_or_count(arg_blk, g1);
00183 break;
00184 case bm::COUNT_SUB_AB:
00185 {
00186 bm::word_t BM_ALIGN16 temp_bit_blk[bm::set_block_size] BM_ALIGN16ATTR;
00187
00188 gap_convert_to_bitset((bm::word_t*) temp_bit_blk, g1);
00189 dmd.result +=
00190 bit_operation_sub_count((bm::word_t*)temp_bit_blk,
00191 ((bm::word_t*)temp_bit_blk) + bm::set_block_size,
00192 arg_blk);
00193 }
00194 break;
00195 case bm::COUNT_SUB_BA:
00196 dmd.metric = bm::COUNT_SUB_AB;
00197 combine_count_operation_with_block(arg_blk,
00198 blk,
00199 it, it+1);
00200 dmd.metric = bm::COUNT_SUB_BA;
00201 break;
00202 case bm::COUNT_XOR:
00203 if (!arg_blk)
00204 dmd.result += gap_bit_count(g1);
00205 else
00206 dmd.result += gap_bitset_xor_count(arg_blk, g1);
00207 break;
00208 case bm::COUNT_A:
00209 if (g1)
00210 dmd.result += gap_bit_count(g1);
00211 break;
00212 case bm::COUNT_B:
00213 if (arg_blk)
00214 {
00215 dmd.result +=
00216 bit_block_calc_count(arg_blk,
00217 arg_blk + bm::set_block_size);
00218 }
00219 break;
00220 }
00221
00222 }
00223
00224 return;
00225
00226 }
00227 }
00228 else
00229 {
00230 if (arg_gap)
00231 {
00232 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00233 {
00234 distance_metric_descriptor& dmd = *it;
00235
00236 switch (dmd.metric)
00237 {
00238 case bm::COUNT_AND:
00239 if (blk)
00240 dmd.result += gap_bitset_and_count(blk, g2);
00241 break;
00242 case bm::COUNT_OR:
00243 if (!blk)
00244 dmd.result += gap_bit_count(g2);
00245 else
00246 dmd.result += gap_bitset_or_count(blk, g2);
00247 break;
00248 case bm::COUNT_SUB_AB:
00249 if (blk)
00250 dmd.result += gap_bitset_sub_count(blk, g2);
00251 break;
00252 case bm::COUNT_SUB_BA:
00253 dmd.metric = bm::COUNT_SUB_AB;
00254 combine_count_operation_with_block(arg_blk,
00255
00256 blk,
00257
00258 it, it+1);
00259 dmd.metric = bm::COUNT_SUB_BA;
00260 break;
00261 case bm::COUNT_XOR:
00262 if (!blk)
00263 dmd.result += gap_bit_count(g2);
00264 else
00265 dmd.result += gap_bitset_xor_count(blk, g2);
00266 break;
00267 case bm::COUNT_A:
00268 if (blk)
00269 {
00270 dmd.result +=
00271 bit_block_calc_count(blk,
00272 blk + bm::set_block_size);
00273 }
00274 break;
00275 case bm::COUNT_B:
00276 if (g2)
00277 dmd.result += gap_bit_count(g2);
00278 break;
00279 }
00280
00281 }
00282
00283 return;
00284 }
00285 }
00286
00287
00288
00289
00290
00291
00292 for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
00293 {
00294 distance_metric_descriptor& dmd = *it;
00295 bit_operation_count_func_type gfunc =
00296 operation_functions<true>::bit_operation_count(dmd.metric);
00297 if (gfunc)
00298 {
00299 dmd.result += (*gfunc)(blk, blk + bm::set_block_size, arg_blk);
00300 }
00301 else
00302 {
00303 switch (dmd.metric)
00304 {
00305 case bm::COUNT_A:
00306 if (blk)
00307 dmd.result += bit_block_calc_count(blk, blk + bm::set_block_size);
00308 break;
00309 case bm::COUNT_B:
00310 if (arg_blk)
00311 dmd.result +=
00312 bit_block_calc_count(arg_blk,
00313 arg_blk + bm::set_block_size);
00314 break;
00315 default:
00316 BM_ASSERT(0);
00317 }
00318 }
00319
00320 }
00321 }
00322
00323
00324
00325
00326
00327
00328 inline
00329 unsigned combine_count_and_operation_with_block(const bm::word_t* blk,
00330 const bm::word_t* arg_blk)
00331 {
00332 unsigned gap = BM_IS_GAP(blk);
00333 unsigned arg_gap = BM_IS_GAP(arg_blk);
00334 if (gap)
00335 {
00336 if (arg_gap)
00337 {
00338 return gap_count_and(BMGAP_PTR(blk), BMGAP_PTR(arg_blk));
00339 }
00340 else
00341 {
00342 return gap_bitset_and_count(arg_blk, BMGAP_PTR(blk));
00343 }
00344 }
00345 else
00346 {
00347 if (arg_gap)
00348 {
00349 return gap_bitset_and_count(blk, BMGAP_PTR(arg_blk));
00350 }
00351 }
00352
00353
00354
00355
00356 return bit_operation_and_count(blk, blk + (bm::set_block_size), arg_blk);
00357 }
00358
00359
00360
00361
00362
00363
00364
00365 inline
00366 void combine_any_operation_with_block(const bm::word_t* blk,
00367 unsigned gap,
00368 const bm::word_t* arg_blk,
00369 int arg_gap,
00370 distance_metric_descriptor* dmit,
00371 distance_metric_descriptor* dmit_end)
00372
00373 {
00374 gap_word_t* res=0;
00375
00376 gap_word_t* g1 = BMGAP_PTR(blk);
00377 gap_word_t* g2 = BMGAP_PTR(arg_blk);
00378
00379 if (gap)
00380 {
00381 if (arg_gap)
00382 {
00383 gap_word_t tmp_buf[bm::gap_max_buff_len * 3];
00384
00385 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00386 {
00387 distance_metric_descriptor& dmd = *it;
00388 if (dmd.result)
00389 {
00390 continue;
00391 }
00392 res = 0;
00393 unsigned dsize = 0;
00394 switch (dmd.metric)
00395 {
00396 case bm::COUNT_AND:
00397 dmd.result += gap_operation_any_and(g1, g2);
00398 break;
00399 case bm::COUNT_OR:
00400 res = gap_operation_or(g1, g2, tmp_buf, dsize);
00401 break;
00402 case bm::COUNT_SUB_AB:
00403 dmd.result += gap_operation_any_sub(g1, g2);
00404 break;
00405 case bm::COUNT_SUB_BA:
00406 dmd.result += gap_operation_any_sub(g2, g1);
00407 break;
00408 case bm::COUNT_XOR:
00409 dmd.result += gap_operation_any_xor(g1, g2);
00410 break;
00411 case bm::COUNT_A:
00412 res = g1;
00413 break;
00414 case bm::COUNT_B:
00415 res = g2;
00416 break;
00417 }
00418 if (res)
00419 dmd.result += !gap_is_all_zero(res, bm::gap_max_bits);
00420
00421 }
00422
00423 return;
00424
00425 }
00426 else
00427 {
00428 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00429 {
00430 distance_metric_descriptor& dmd = *it;
00431 if (dmd.result)
00432 {
00433 continue;
00434 }
00435
00436 switch (dmd.metric)
00437 {
00438 case bm::COUNT_AND:
00439 if (arg_blk)
00440 dmd.result += gap_bitset_and_any(arg_blk, g1);
00441 break;
00442 case bm::COUNT_OR:
00443 if (!arg_blk)
00444 dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits);
00445 else
00446 dmd.result += gap_bitset_or_any(arg_blk, g1);
00447 break;
00448 case bm::COUNT_SUB_AB:
00449 {
00450 bm::word_t BM_ALIGN16 temp_blk[bm::set_block_size] BM_ALIGN16ATTR;
00451 gap_convert_to_bitset((bm::word_t*) temp_blk, g1);
00452 dmd.result +=
00453 bit_operation_sub_any((bm::word_t*)temp_blk,
00454 ((bm::word_t*)temp_blk) + bm::set_block_size,
00455 arg_blk);
00456 }
00457 break;
00458 case bm::COUNT_SUB_BA:
00459 dmd.metric = bm::COUNT_SUB_AB;
00460 combine_any_operation_with_block(arg_blk,
00461 arg_gap,
00462 blk,
00463 gap,
00464 it, it+1);
00465 dmd.metric = bm::COUNT_SUB_BA;
00466 break;
00467 case bm::COUNT_XOR:
00468 if (!arg_blk)
00469 dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits);
00470 else
00471 dmd.result += gap_bitset_xor_any(arg_blk, g1);
00472 break;
00473 case bm::COUNT_A:
00474 if (g1)
00475 dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits);
00476 break;
00477 case bm::COUNT_B:
00478 if (arg_blk)
00479 {
00480 dmd.result +=
00481 !bit_is_all_zero((bm::wordop_t*)arg_blk,
00482 (bm::wordop_t*)(arg_blk + bm::set_block_size));
00483 }
00484 break;
00485 }
00486
00487 }
00488
00489 return;
00490
00491 }
00492 }
00493 else
00494 {
00495 if (arg_gap)
00496 {
00497 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00498 {
00499 distance_metric_descriptor& dmd = *it;
00500 if (dmd.result)
00501 {
00502 continue;
00503 }
00504
00505 switch (dmd.metric)
00506 {
00507 case bm::COUNT_AND:
00508 if (blk)
00509 dmd.result += gap_bitset_and_any(blk, g2);
00510 break;
00511 case bm::COUNT_OR:
00512 if (!blk)
00513 dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits);
00514 else
00515 dmd.result += gap_bitset_or_any(blk, g2);
00516 break;
00517 case bm::COUNT_SUB_AB:
00518 if (blk)
00519 dmd.result += gap_bitset_sub_any(blk, g2);
00520 break;
00521 case bm::COUNT_SUB_BA:
00522 dmd.metric = bm::COUNT_SUB_AB;
00523 combine_any_operation_with_block(arg_blk,
00524 arg_gap,
00525 blk,
00526 gap,
00527 it, it+1);
00528 dmd.metric = bm::COUNT_SUB_BA;
00529 break;
00530 case bm::COUNT_XOR:
00531 if (!blk)
00532 dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits);
00533 else
00534 dmd.result += gap_bitset_xor_any(blk, g2);
00535 break;
00536 case bm::COUNT_A:
00537 if (blk)
00538 {
00539 dmd.result+=
00540 !bit_is_all_zero((bm::wordop_t*)blk,
00541 (bm::wordop_t*)blk + bm::set_block_size);
00542 }
00543 break;
00544 case bm::COUNT_B:
00545 if (g2)
00546 dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits);
00547 break;
00548 }
00549
00550 }
00551
00552 return;
00553 }
00554 }
00555
00556
00557
00558
00559
00560 const bm::word_t* blk_end;
00561 const bm::word_t* arg_end;
00562
00563 blk_end = blk + (bm::set_block_size);
00564 arg_end = arg_blk + (bm::set_block_size);
00565
00566 for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
00567 {
00568 distance_metric_descriptor& dmd = *it;
00569 if (dmd.result)
00570 {
00571 continue;
00572 }
00573
00574 switch (dmd.metric)
00575 {
00576 case bm::COUNT_AND:
00577 dmd.result +=
00578 bit_operation_and_any(blk, blk_end, arg_blk);
00579 break;
00580 case bm::COUNT_OR:
00581 dmd.result +=
00582 bit_operation_or_any(blk, blk_end, arg_blk);
00583 break;
00584 case bm::COUNT_SUB_AB:
00585 dmd.result +=
00586 bit_operation_sub_any(blk, blk_end, arg_blk);
00587 break;
00588 case bm::COUNT_SUB_BA:
00589 dmd.result +=
00590 bit_operation_sub_any(arg_blk, arg_end, blk);
00591 break;
00592 case bm::COUNT_XOR:
00593 dmd.result +=
00594 bit_operation_xor_any(blk, blk_end, arg_blk);
00595 break;
00596 case bm::COUNT_A:
00597 if (blk)
00598 dmd.result += !bit_is_all_zero((bm::wordop_t*)blk,
00599 (bm::wordop_t*)blk_end);
00600 break;
00601 case bm::COUNT_B:
00602 if (arg_blk)
00603 dmd.result += !bit_is_all_zero((bm::wordop_t*)arg_blk,
00604 (bm::wordop_t*)arg_end);
00605 break;
00606 }
00607
00608 }
00609 }
00610
00611
00612
00613
00614
00615
00616
00617
00618 inline
00619 unsigned combine_count_operation_with_block(const bm::word_t* blk,
00620 const bm::word_t* arg_blk,
00621 distance_metric metric)
00622 {
00623 distance_metric_descriptor dmd(metric);
00624 combine_count_operation_with_block(blk,
00625 arg_blk,
00626
00627 &dmd, &dmd+1);
00628 return dmd.result;
00629 }
00630
00631
00632
00633
00634
00635
00636
00637 inline
00638 unsigned combine_any_operation_with_block(const bm::word_t* blk,
00639 unsigned gap,
00640 const bm::word_t* arg_blk,
00641 int arg_gap,
00642 distance_metric metric)
00643 {
00644 distance_metric_descriptor dmd(metric);
00645 combine_any_operation_with_block(blk, gap,
00646 arg_blk, arg_gap,
00647 &dmd, &dmd+1);
00648 return dmd.result;
00649 }
00650
00651
00652
00653
00654
00655
00656
00657
00658 inline
00659 void distance_stage(const distance_metric_descriptor* dmit,
00660 const distance_metric_descriptor* dmit_end,
00661 bool* is_all_and)
00662 {
00663 for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
00664 {
00665 if (it->metric != bm::COUNT_AND)
00666 {
00667 *is_all_and = false;
00668 }
00669 }
00670 }
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692 template<class BV>
00693 void distance_operation(const BV& bv1,
00694 const BV& bv2,
00695 distance_metric_descriptor* dmit,
00696 distance_metric_descriptor* dmit_end)
00697 {
00698 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
00699 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
00700
00701 bool is_all_and = true;
00702 distance_stage(dmit, dmit_end, &is_all_and);
00703
00704 bm::word_t*** blk_root = bman1.get_rootblock();
00705 unsigned block_idx = 0;
00706 unsigned i, j;
00707
00708 const bm::word_t* blk;
00709 const bm::word_t* arg_blk;
00710
00711 BM_SET_MMX_GUARD
00712
00713 unsigned effective_top_block_size = bman1.effective_top_block_size();
00714 unsigned ebs2 = bman2.effective_top_block_size();
00715 if (ebs2 > effective_top_block_size)
00716 effective_top_block_size = ebs2;
00717
00718 for (i = 0; i < effective_top_block_size; ++i)
00719 {
00720 bm::word_t** blk_blk = blk_root[i];
00721
00722 if (blk_blk == 0)
00723 {
00724
00725 if (is_all_and)
00726 {
00727 block_idx += bm::set_array_size;
00728 continue;
00729 }
00730 const bm::word_t* const* bvbb = bman2.get_topblock(i);
00731 if (bvbb == 0)
00732 {
00733 block_idx += bm::set_array_size;
00734 continue;
00735 }
00736
00737 blk = 0;
00738 for (j = 0; j < bm::set_array_size; ++j,++block_idx)
00739 {
00740 arg_blk = bman2.get_block(i, j);
00741 if (!arg_blk)
00742 continue;
00743 combine_count_operation_with_block(blk,
00744 arg_blk,
00745 dmit, dmit_end);
00746 }
00747 continue;
00748 }
00749
00750 for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
00751 {
00752 blk = blk_blk[j];
00753 if (blk == 0 && is_all_and)
00754 continue;
00755
00756 arg_blk = bman2.get_block(i, j);
00757
00758 if (blk == 0 && arg_blk == 0)
00759 continue;
00760
00761 combine_count_operation_with_block(blk,
00762 arg_blk,
00763 dmit, dmit_end);
00764
00765
00766 }
00767
00768 }
00769 }
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782 template<class BV>
00783 unsigned distance_and_operation(const BV& bv1,
00784 const BV& bv2)
00785 {
00786 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
00787 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
00788
00789 bm::word_t*** blk_root = bman1.get_rootblock();
00790 bm::word_t*** blk_root_arg = bman2.get_rootblock();
00791
00792 unsigned count = 0;
00793
00794 BM_SET_MMX_GUARD
00795
00796 unsigned effective_top_block_size =
00797 bm::min_value(bman1.effective_top_block_size(),
00798 bman2.effective_top_block_size());
00799
00800 for (unsigned i = 0; i < effective_top_block_size; ++i)
00801 {
00802 bm::word_t** blk_blk;
00803 bm::word_t** blk_blk_arg;
00804 if ((blk_blk = blk_root[i]) == 0 || (blk_blk_arg= blk_root_arg[i]) == 0)
00805 {
00806 continue;
00807 }
00808 for (unsigned j = 0; j < bm::set_array_size; j+=4)
00809 {
00810 (blk_blk[j] && blk_blk_arg[j]) ?
00811 count += combine_count_and_operation_with_block(blk_blk[j],blk_blk_arg[j])
00812 :0;
00813 (blk_blk[j+1] && blk_blk_arg[j+1]) ?
00814 count += combine_count_and_operation_with_block(blk_blk[j+1],blk_blk_arg[j+1])
00815 :0;
00816 (blk_blk[j+2] && blk_blk_arg[j+2]) ?
00817 count += combine_count_and_operation_with_block(blk_blk[j+2],blk_blk_arg[j+2])
00818 :0;
00819 (blk_blk[j+3] && blk_blk_arg[j+3]) ?
00820 count += combine_count_and_operation_with_block(blk_blk[j+3],blk_blk_arg[j+3])
00821 :0;
00822 }
00823
00824 }
00825 return count;
00826 }
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852 template<class BV>
00853 void distance_operation_any(const BV& bv1,
00854 const BV& bv2,
00855 distance_metric_descriptor* dmit,
00856 distance_metric_descriptor* dmit_end)
00857 {
00858 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
00859 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
00860
00861 bool is_all_and = true;
00862
00863 distance_stage(dmit, dmit_end, &is_all_and);
00864
00865 bm::word_t*** blk_root = bman1.get_rootblock();
00866 unsigned block_idx = 0;
00867 unsigned i, j;
00868
00869 const bm::word_t* blk;
00870 const bm::word_t* arg_blk;
00871 bool blk_gap;
00872 bool arg_gap;
00873
00874 BM_SET_MMX_GUARD
00875
00876 unsigned effective_top_block_size = bman1.effective_top_block_size();
00877 unsigned ebs2 = bman2.effective_top_block_size();
00878 if (ebs2 > effective_top_block_size)
00879 effective_top_block_size = ebs2;
00880
00881 for (i = 0; i < effective_top_block_size; ++i)
00882 {
00883 bm::word_t** blk_blk = blk_root[i];
00884
00885 if (blk_blk == 0)
00886 {
00887
00888 if (is_all_and)
00889 {
00890 block_idx += bm::set_array_size;
00891 continue;
00892 }
00893
00894 const bm::word_t* const* bvbb = bman2.get_topblock(i);
00895 if (bvbb == 0)
00896 {
00897 block_idx += bm::set_array_size;
00898 continue;
00899 }
00900
00901 blk = 0;
00902 blk_gap = false;
00903
00904 for (j = 0; j < bm::set_array_size; ++j,++block_idx)
00905 {
00906 arg_blk = bman2.get_block(i, j);
00907 arg_gap = BM_IS_GAP(arg_blk);
00908
00909 if (!arg_blk)
00910 continue;
00911 combine_any_operation_with_block(blk, blk_gap,
00912 arg_blk, arg_gap,
00913 dmit, dmit_end);
00914
00915
00916 bool all_resolved = false;
00917 distance_metric_descriptor* it=dmit;
00918 do
00919 {
00920 if (!it->result)
00921 {
00922 all_resolved = false;
00923 break;
00924 }
00925 ++it;
00926 } while (it < dmit_end);
00927 if (all_resolved)
00928 return;
00929 }
00930
00931 continue;
00932 }
00933
00934 for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
00935 {
00936 blk = blk_blk[j];
00937 if (blk == 0 && is_all_and)
00938 continue;
00939
00940 arg_blk = bman2.get_block(i, j);
00941
00942 if (blk == 0 && arg_blk == 0)
00943 continue;
00944
00945 arg_gap = BM_IS_GAP(arg_blk);
00946 blk_gap = BM_IS_GAP(blk);
00947
00948 combine_any_operation_with_block(blk, blk_gap,
00949 arg_blk, arg_gap,
00950 dmit, dmit_end);
00951
00952
00953 bool all_resolved = false;
00954 distance_metric_descriptor* it=dmit;
00955 do
00956 {
00957 if (!it->result)
00958 {
00959 all_resolved = false;
00960 break;
00961 }
00962 ++it;
00963 } while (it < dmit_end);
00964 if (all_resolved)
00965 return;
00966
00967 }
00968
00969 }
00970 }
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981 template<class BV>
00982 bm::id_t count_and(const BV& bv1, const BV& bv2)
00983 {
00984 return distance_and_operation(bv1, bv2);
00985 }
00986
00987
00988
00989
00990
00991
00992
00993
00994 template<class BV>
00995 bm::id_t any_and(const BV& bv1, const BV& bv2)
00996 {
00997 distance_metric_descriptor dmd(bm::COUNT_AND);
00998
00999 distance_operation_any(bv1, bv2, &dmd, &dmd+1);
01000 return dmd.result;
01001 }
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012 template<class BV>
01013 bm::id_t count_xor(const BV& bv1, const BV& bv2)
01014 {
01015 distance_metric_descriptor dmd(bm::COUNT_XOR);
01016
01017 distance_operation(bv1, bv2, &dmd, &dmd+1);
01018 return dmd.result;
01019 }
01020
01021
01022
01023
01024
01025
01026
01027
01028 template<class BV>
01029 bm::id_t any_xor(const BV& bv1, const BV& bv2)
01030 {
01031 distance_metric_descriptor dmd(bm::COUNT_XOR);
01032
01033 distance_operation_any(bv1, bv2, &dmd, &dmd+1);
01034 return dmd.result;
01035 }
01036
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046 template<class BV>
01047 bm::id_t count_sub(const BV& bv1, const BV& bv2)
01048 {
01049 distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
01050
01051 distance_operation(bv1, bv2, &dmd, &dmd+1);
01052 return dmd.result;
01053 }
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063 template<class BV>
01064 bm::id_t any_sub(const BV& bv1, const BV& bv2)
01065 {
01066 distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
01067
01068 distance_operation_any(bv1, bv2, &dmd, &dmd+1);
01069 return dmd.result;
01070 }
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080 template<class BV>
01081 bm::id_t count_or(const BV& bv1, const BV& bv2)
01082 {
01083 distance_metric_descriptor dmd(bm::COUNT_OR);
01084
01085 distance_operation(bv1, bv2, &dmd, &dmd+1);
01086 return dmd.result;
01087 }
01088
01089
01090
01091
01092
01093
01094
01095
01096 template<class BV>
01097 bm::id_t any_or(const BV& bv1, const BV& bv2)
01098 {
01099 distance_metric_descriptor dmd(bm::COUNT_OR);
01100
01101 distance_operation_any(bv1, bv2, &dmd, &dmd+1);
01102 return dmd.result;
01103 }
01104
01105
01106
01107
01108
01109
01110 template<class It>
01111 It block_range_scan(It first, It last, unsigned nblock, unsigned* max_id)
01112 {
01113 It right;
01114 for (right = first; right != last; ++right)
01115 {
01116 unsigned v = unsigned(*right);
01117 BM_ASSERT(v < bm::id_max);
01118 if (v >= *max_id)
01119 *max_id = v;
01120 unsigned nb = v >> bm::set_block_shift;
01121 if (nb != nblock)
01122 break;
01123 }
01124 return right;
01125 }
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140 template<class BV, class It>
01141 void combine_or(BV& bv, It first, It last)
01142 {
01143 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01144 unsigned max_id = 0;
01145
01146 while (first < last)
01147 {
01148 unsigned nblock = unsigned((*first) >> bm::set_block_shift);
01149 It right = block_range_scan(first, last, nblock, &max_id);
01150
01151 if (max_id >= bv.size())
01152 {
01153 BM_ASSERT(max_id < bm::id_max);
01154 bv.resize(max_id + 1);
01155 }
01156
01157
01158
01159 label1:
01160
01161 int block_type;
01162 bm::word_t* blk =
01163 bman.check_allocate_block(nblock,
01164 true,
01165 bv.get_new_blocks_strat(),
01166 &block_type);
01167 if (!blk)
01168 continue;
01169
01170 if (block_type == 1)
01171 {
01172 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01173 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01174
01175 for (; first < right; ++first)
01176 {
01177 unsigned is_set;
01178 unsigned nbit = (*first) & bm::set_block_mask;
01179
01180 unsigned new_block_len =
01181 gap_set_value(true, gap_blk, nbit, &is_set);
01182 if (new_block_len > threshold)
01183 {
01184 bman.extend_gap_block(nblock, gap_blk);
01185 ++first;
01186 goto label1;
01187 }
01188 }
01189 }
01190 else
01191 {
01192 for (; first < right; ++first)
01193 {
01194 unsigned nbit = unsigned(*first & bm::set_block_mask);
01195 unsigned nword = unsigned(nbit >> bm::set_word_shift);
01196 nbit &= bm::set_word_mask;
01197 blk[nword] |= (bm::word_t)1 << nbit;
01198 }
01199 }
01200 }
01201
01202 bv.forget_count();
01203 }
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219 template<class BV, class It>
01220 void combine_xor(BV& bv, It first, It last)
01221 {
01222 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01223 unsigned max_id = 0;
01224
01225 while (first < last)
01226 {
01227 unsigned nblock = unsigned((*first) >> bm::set_block_shift);
01228 It right = block_range_scan(first, last, nblock, &max_id);
01229
01230 if (max_id >= bv.size())
01231 {
01232 BM_ASSERT(max_id < bm::id_max);
01233 bv.resize(max_id + 1);
01234 }
01235
01236
01237
01238 label1:
01239
01240 int block_type;
01241 bm::word_t* blk =
01242 bman.check_allocate_block(nblock,
01243 true,
01244 bv.get_new_blocks_strat(),
01245 &block_type,
01246 false );
01247 BM_ASSERT(blk);
01248
01249 if (block_type == 1)
01250 {
01251 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01252 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01253
01254 for (; first < right; ++first)
01255 {
01256 unsigned is_set;
01257 unsigned nbit = (*first) & bm::set_block_mask;
01258
01259 is_set = gap_test(gap_blk, nbit);
01260 BM_ASSERT(is_set <= 1);
01261 is_set ^= 1;
01262
01263 unsigned new_block_len =
01264 gap_set_value(is_set, gap_blk, nbit, &is_set);
01265 if (new_block_len > threshold)
01266 {
01267 bman.extend_gap_block(nblock, gap_blk);
01268 ++first;
01269 goto label1;
01270 }
01271 }
01272 }
01273 else
01274 {
01275 for (; first < right; ++first)
01276 {
01277 unsigned nbit = unsigned(*first & bm::set_block_mask);
01278 unsigned nword = unsigned(nbit >> bm::set_word_shift);
01279 nbit &= bm::set_word_mask;
01280 blk[nword] ^= (bm::word_t)1 << nbit;
01281 }
01282 }
01283 }
01284
01285 bv.forget_count();
01286 }
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303 template<class BV, class It>
01304 void combine_sub(BV& bv, It first, It last)
01305 {
01306 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01307 unsigned max_id = 0;
01308
01309 while (first < last)
01310 {
01311 unsigned nblock = unsigned((*first) >> bm::set_block_shift);
01312 It right = block_range_scan(first, last, nblock, &max_id);
01313
01314 if (max_id >= bv.size())
01315 {
01316 BM_ASSERT(max_id < bm::id_max);
01317 bv.resize(max_id + 1);
01318 }
01319
01320
01321
01322 label1:
01323
01324 int block_type;
01325 bm::word_t* blk =
01326 bman.check_allocate_block(nblock,
01327 false,
01328 bv.get_new_blocks_strat(),
01329 &block_type);
01330
01331 if (!blk)
01332 continue;
01333
01334 if (block_type == 1)
01335 {
01336 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01337 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01338
01339 for (; first < right; ++first)
01340 {
01341 unsigned is_set;
01342 unsigned nbit = (*first) & bm::set_block_mask;
01343
01344 is_set = gap_test(gap_blk, nbit);
01345 if (!is_set)
01346 continue;
01347
01348 unsigned new_block_len =
01349 gap_set_value(false, gap_blk, nbit, &is_set);
01350 if (new_block_len > threshold)
01351 {
01352 bman.extend_gap_block(nblock, gap_blk);
01353 ++first;
01354 goto label1;
01355 }
01356 }
01357 }
01358 else
01359 {
01360 for (; first < right; ++first)
01361 {
01362 unsigned nbit = unsigned(*first & bm::set_block_mask);
01363 unsigned nword = unsigned(nbit >> bm::set_word_shift);
01364 nbit &= bm::set_word_mask;
01365 blk[nword] &= ~((bm::word_t)1 << nbit);
01366 }
01367 }
01368 }
01369
01370 bv.forget_count();
01371 }
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385 template<class BV, class It>
01386 void combine_and_sorted(BV& bv, It first, It last)
01387 {
01388 bm::id_t prev = 0;
01389 for ( ;first < last; ++first)
01390 {
01391 bm::id_t id = *first;
01392 BM_ASSERT(id >= prev);
01393 bv.set_bit_and(id, true);
01394 if (++prev < id)
01395 {
01396 bv.set_range(prev, id-1, false);
01397 }
01398 prev = id;
01399 }
01400 }
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411
01412
01413
01414
01415
01416
01417 template<class BV, class It>
01418 void combine_and(BV& bv, It first, It last)
01419 {
01420 BV bv_tmp;
01421 combine_or(bv_tmp, first, last);
01422 bv &= bv_tmp;
01423 }
01424
01425
01426
01427
01428
01429
01430
01431
01432
01433
01434
01435
01436
01437
01438
01439
01440 template<class BV>
01441 bm::id_t count_intervals(const BV& bv)
01442 {
01443 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01444
01445 bm::word_t*** blk_root = bman.get_rootblock();
01446 typename BV::blocks_manager_type::block_count_change_func func(bman);
01447 for_each_block(blk_root, bman.top_block_size(), func);
01448
01449 return func.count();
01450 }
01451
01452
01453
01454
01455
01456
01457
01458
01459
01460
01461
01462
01463
01464
01465
01466 template<class BV, class It>
01467 void export_array(BV& bv, It first, It last)
01468 {
01469 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01470 unsigned inp_word_size = sizeof(*first);
01471 size_t array_size = last - first;
01472 size_t bit_cnt = array_size * inp_word_size * 8;
01473 int block_type;
01474 bm::word_t tmp;
01475 unsigned b1, b2, b3, b4;
01476
01477 if (bit_cnt >= bv.size())
01478 bv.resize((bm::id_t)bit_cnt + 1);
01479 else
01480 bv.set_range((bm::id_t)bit_cnt, bv.size() - 1, false);
01481
01482 switch (inp_word_size)
01483 {
01484 case 1:
01485 {
01486 size_t word_cnt = array_size / 4;
01487 for (unsigned i = 0; i < bm::set_total_blocks; ++i)
01488 {
01489 bm::word_t* blk =
01490 bman.check_allocate_block(i,
01491 false,
01492 BM_BIT,
01493 &block_type,
01494 false );
01495 if (block_type == 1)
01496 {
01497 blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
01498 }
01499
01500 bm::word_t* wrd_ptr = blk;
01501 if (word_cnt >= bm::set_block_size) {
01502 bm::word_t* wrd_end = blk + bm::set_block_size;
01503 do {
01504 b1 = *first++; b2 = *first++;
01505 b3 = *first++; b4 = *first++;
01506 tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
01507 *wrd_ptr++ = tmp;
01508 } while (wrd_ptr < wrd_end);
01509 word_cnt -= bm::set_block_size;
01510 }
01511 else
01512 {
01513 size_t to_convert = last - first;
01514 for (size_t j = 0; j < to_convert / 4; ++j)
01515 {
01516 b1 = *first++; b2 = *first++;
01517 b3 = *first++; b4 = *first++;
01518 tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
01519 *wrd_ptr++ = tmp;
01520 }
01521 while (1)
01522 {
01523 if (first == last) break;
01524 *wrd_ptr = *first++;
01525 if (first == last) break;
01526 *wrd_ptr |= (*first++) << 8;
01527 if (first == last) break;
01528 *wrd_ptr |= (*first++) << 16;
01529 if (first == last) break;
01530 *wrd_ptr |= (*first++) << 24;
01531 ++wrd_ptr;
01532 }
01533 }
01534 if (first == last) break;
01535 }
01536 }
01537 break;
01538 case 2:
01539 {
01540 size_t word_cnt = array_size / 2;
01541 for (unsigned i = 0; i < bm::set_total_blocks; ++i)
01542 {
01543 bm::word_t* blk =
01544 bman.check_allocate_block(i,
01545 false,
01546 BM_BIT,
01547 &block_type,
01548 false );
01549 if (block_type == 1)
01550 {
01551 blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
01552 }
01553
01554 bm::word_t* wrd_ptr = blk;
01555 if (word_cnt >= bm::set_block_size) {
01556 bm::word_t* wrd_end = blk + bm::set_block_size;
01557 do {
01558 b1 = *first++; b2 = *first++;
01559 tmp = b1 | (b2 << 16);
01560 *wrd_ptr++ = tmp;
01561 } while (wrd_ptr < wrd_end);
01562 word_cnt -= bm::set_block_size;
01563 }
01564 else
01565 {
01566 size_t to_convert = last - first;
01567 for (unsigned j = 0; j < to_convert / 2; ++j)
01568 {
01569 b1 = *first++; b2 = *first++;
01570 tmp = b1 | (b2 << 16);
01571 *wrd_ptr++ = tmp;
01572 }
01573 while (1)
01574 {
01575 if (first == last) break;
01576 *wrd_ptr = *first++;
01577 if (first == last) break;
01578 *wrd_ptr |= (*first++) << 16;
01579 ++wrd_ptr;
01580 }
01581 }
01582 if (first == last) break;
01583 }
01584 }
01585 break;
01586 case 4:
01587 {
01588 size_t word_cnt = array_size;
01589 for (unsigned i = 0; i < bm::set_total_blocks; ++i)
01590 {
01591 bm::word_t* blk =
01592 bman.check_allocate_block(i,
01593 false,
01594 BM_BIT,
01595 &block_type,
01596 false );
01597 if (block_type == 1)
01598 {
01599 blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
01600 }
01601
01602 bm::word_t* wrd_ptr = blk;
01603 if (word_cnt >= bm::set_block_size) {
01604 bm::word_t* wrd_end = blk + bm::set_block_size;
01605 do {
01606 *wrd_ptr++ = *first++;
01607 } while (wrd_ptr < wrd_end);
01608 word_cnt -= bm::set_block_size;
01609 }
01610 else
01611 {
01612 while (1)
01613 {
01614 if (first == last) break;
01615 *wrd_ptr = *first++;
01616 ++wrd_ptr;
01617 }
01618 }
01619 if (first == last) break;
01620 }
01621 }
01622 break;
01623 default:
01624 BM_ASSERT(0);
01625 }
01626
01627 }
01628
01629 }
01630
01631 #ifdef _MSC_VER
01632 #pragma warning( pop )
01633 #endif
01634
01635 #endif