1 #ifndef BMRANDOM__H__INCLUDED__ 
    2 #define BMRANDOM__H__INCLUDED__ 
   25 #ifndef BM__H__INCLUDED__ 
   28 # error missing include (bm.h or bm64.h) 
   76         typename BV::blocks_manager_type  blocks_manager_type;
 
   82     void simple_pick(BV& bv_out,
 
   88     void get_subset(BV& bv_out, 
 
  105                           unsigned          bit_list_size,
 
  108     unsigned compute_take_count(
unsigned bc,
 
  136     delete [] sub_block_;
 
  144     if (sample_count == 0)
 
  151     bv_in.build_rs_index(&rsi_, &bv_nb_);
 
  154     if (in_count <= sample_count)
 
  160     float pick_ratio = float(sample_count) / float(in_count);
 
  161     if (pick_ratio < 0.054f)
 
  164         bool b = bv_in.find_range(first, last);
 
  168         simple_pick(bv_out, bv_in, sample_count, first, last);
 
  172     if (sample_count > in_count/2)
 
  176         size_type delta_count = in_count - sample_count;
 
  178         get_subset(tmp_bv, bv_in, in_count, delta_count);
 
  183     get_subset(bv_out, bv_in, in_count, sample_count);
 
  189                                     size_type sample_count,
 
  195     std::random_device rd;
 
  197         std::mt19937_64 mt_rand(rd());
 
  199         std::mt19937 mt_rand(rd());
 
  201     std::uniform_int_distribution<size_type> dist(first, last);
 
  206         size_type ridx = dist(mt_rand); 
 
  208         BM_ASSERT(ridx >= first && ridx <= last);
 
  210         bool b = bv_in.find(ridx, fidx); 
 
  215             bool is_set = bv_out.set_bit_conditional(fidx, 
true, 
false);
 
  216             sample_count -= is_set;
 
  221                 b = bv_in.find(fidx, fidx);
 
  226                 is_set = bv_out.set_bit_conditional(fidx, 
true, 
false);
 
  227                 sample_count -= is_set;
 
  235 void random_subset<BV>::get_subset(BV&        bv_out,
 
  238                                    size_type  sample_count)
 
  241     bv_out.resize(bv_in.size());
 
  243     const blocks_manager_type& bman_in = bv_in.get_blocks_manager();
 
  244     blocks_manager_type& bman_out = bv_out.get_blocks_manager();
 
  246     bm::word_t* tmp_block = bman_out.check_allocate_tempblock();
 
  248     size_type first_nb, last_nb;
 
  249     bool b = bv_nb_.find_range(first_nb, last_nb);
 
  254     std::random_device rd;
 
  256         std::mt19937_64 mt_rand(rd());
 
  258         std::mt19937 mt_rand(rd());
 
  260     std::uniform_int_distribution<size_type> dist_nb(first_nb, last_nb);
 
  262     size_type curr_sample_count = sample_count;
 
  263     for (
unsigned take_count = 0; curr_sample_count; curr_sample_count -= take_count)
 
  268         size_type ridx = dist_nb(mt_rand); 
 
  269         BM_ASSERT(ridx >= first_nb && ridx <= last_nb);
 
  271         b = bv_nb_.find(ridx, nb); 
 
  274             b = bv_nb_.find(first_nb, nb);
 
  277                 b = bv_nb_.find(first_nb, nb);
 
  283         bv_nb_.clear_bit_no_check(nb); 
 
  287         unsigned bc = rsi_.count(nb);
 
  289         take_count = compute_take_count(bc, in_count, sample_count);
 
  290         if (take_count > curr_sample_count)
 
  291             take_count = unsigned(curr_sample_count);
 
  298             const bm::word_t* blk_src = bman_in.get_block(i0, j0);
 
  302             bm::word_t* blk_out = bman_out.get_block_ptr(i0, j0);
 
  306                 blk_out = bman_out.deoptimize_block(nb);
 
  310                 blk_out = bman_out.get_allocator().alloc_bit_block();
 
  311                 bman_out.set_block(nb, blk_out);
 
  313             if (take_count == bc) 
 
  342                 get_random_array(blk_out, bit_list_, arr_len, take_count);
 
  353                 get_block_subset(blk_out, blk_src, take_count);
 
  360 unsigned random_subset<BV>::compute_take_count(
 
  365     float block_percent = float(bc) / float(in_count);
 
  366     float bits_to_take = float(sample_count) * block_percent;
 
  367     bits_to_take += 0.99f;
 
  368     unsigned to_take = unsigned(bits_to_take);
 
  376 void random_subset<BV>::get_block_subset(
bm::word_t*       blk_out,
 
  380     for (
unsigned rounds = 0; take_count && rounds < 10; ++rounds)
 
  389             if (blk_src[i] && (blk_out[i] != blk_src[i]))
 
  391                 new_count = process_word(blk_out, blk_src, i, take_count);
 
  392                 take_count -= new_count;
 
  404             sub_block_[i] = blk_src[i] & ~blk_out[i];
 
  414         get_random_array(blk_out, bit_list_, arr_len, take_count);        
 
  419 unsigned random_subset<BV>::process_word(
bm::word_t*       blk_out, 
 
  424     unsigned new_bits, mask;
 
  427         mask = unsigned(rand());
 
  431     unsigned src_rand = blk_src[nword] & mask;
 
  432     new_bits = src_rand & ~blk_out[nword];
 
  438         if (new_count > take_count)
 
  442             unsigned char blist[64];
 
  445             std::random_shuffle(blist, blist + arr_size);
 
  447             for (
unsigned j = 0; j < take_count; ++j)
 
  449                 value |= (1u << blist[j]);
 
  452             new_count = take_count;
 
  455             BM_ASSERT((new_bits & ~blk_src[nword]) == 0);
 
  458         blk_out[nword] |= new_bits;
 
  466 void random_subset<BV>::get_random_array(
bm::word_t*       blk_out, 
 
  468                                          unsigned          bit_list_size,
 
  472     for (
unsigned i = 0; i < count; ++i)