Seach for human mutation (SNP) in within chr1. Benchmark comaprison of different methods
 
 
 
 
 
 
 
#include <iostream>
#include <sstream>
#include <chrono>
#include <regex>
#include <time.h>
#include <stdio.h>
 
 
#include <vector>
#include <chrono>
#include <map>
#include <utility>
 
 
 
#include "bmdbg.h"
 
static
{
    std::cerr
        << "BitMagic SNP Search Sample Utility (c) 2018" << std::endl
        << "-isnp   file-name            -- input set file (SNP FASTA) to parse" << std::endl
        << "-svout  spase vector output  -- sparse vector name to save" << std::endl
        << "-rscout rs-compressed spase vector output  -- name to save" << std::endl
        << "-svin   sparse vector input   -- sparse vector file name to load " << std::endl
        << "-rscin  rs-compressed sparse vector input   -- file name to load " << std::endl
        << "-diag                        -- run diagnostics"                  << std::endl
        << "-timing                      -- collect timings"                  << std::endl
      ;
}
 
 
 
 
 
static
{
    for (int i = 1; i < argc; ++i)
    {
        std::string arg = argv[i];
        if ((arg == "-h") || (arg == "--help"))
        {
            return 0;
        }
        
        if (arg == "-svout" || arg == "--svout")
        {
            if (i + 1 < argc)
            {
            }
            else
            {
                std::cerr << "Error: -svout requires file name" << std::endl;
                return 1;
            }
            continue;
        }
        if (arg == "-rscout" || arg == "--rscout")
        {
            if (i + 1 < argc)
            {
            }
            else
            {
                std::cerr << "Error: -rscout requires file name" << std::endl;
                return 1;
            }
            continue;
        }
 
        if (arg == "-svin" || arg == "--svin")
        {
            if (i + 1 < argc)
            {
            }
            else
            {
                std::cerr << "Error: -svin requires file name" << std::endl;
                return 1;
            }
            continue;
        }
 
        if (arg == "-rscin" || arg == "--rscin")
        {
            if (i + 1 < argc)
            {
            }
            else
            {
                std::cerr << "Error: -rscin requires file name" << std::endl;
                return 1;
            }
            continue;
        }
 
        if (arg == "-isnp" || arg == "--isnp" || arg == "-snp" || arg == "--snp")
        {
            if (i + 1 < argc)
            {
            }
            else
            {
                std::cerr << "Error: -isnp requires file name" << std::endl;
                return 1;
            }
            continue;
        }
 
        if (arg == "-diag" || arg == "--diag" || arg == "-d" || arg == "--d")
        if (arg == "-timing" || arg == "--timing" || arg == "-t" || arg == "--t")
        if (arg == "-bench" || arg == "--bench" || arg == "-b" || arg == "--b")
 
    } 
    return 0;
}
 
 
typedef std::vector<std::pair<unsigned, unsigned> >         
vector_pairs;
 
 
 
 
static
{
 
    std::ifstream fin(fname.c_str(), std::ios::in);
    if (!fin.good())
        return -1;
 
    unsigned rs_id, rs_pos;
    size_t idx;
 
    std::string line;
    std::string delim = " \t";
 
    std::regex reg("\\s+");
    std::sregex_token_iterator it_end;
 
 
    unsigned rs_cnt = 0;
    for (unsigned i = 0; std::getline(fin, line); ++i)
    {
        if (line.empty() ||
            !isdigit(line.front())
            )
            continue;
 
        
        std::sregex_token_iterator it(line.begin(), line.end(), reg, -1);
        std::vector<std::string> line_vec(it, it_end);
        if (line_vec.empty())
            continue; 
        
        
        try
        {
            rs_id = unsigned(std::stoul(line_vec.at(0), &idx));
            
            {
                continue;
            }
            rs_pos = unsigned(std::stoul(line_vec.at(11), &idx));
 
 
            ++rs_cnt;
        }
        catch (std::exception& )
        {
            continue; 
            
            
            
            
        }
        if (rs_cnt % (4 * 1024) == 0)
            std::cout << "\r" << rs_cnt << " / " << i; 
    } 
 
    std::cout << std::endl;
    std::cout << "SNP count=" << rs_cnt << std::endl;
    return 0;
}
 
static
{
 
    rand_sampler.
sample(bv_sample, *bv_null, count);
 
    {
        unsigned idx = *en;
        unsigned v = sv[idx];
    }
}
 
static
{
    
    for (; it != it_end; ++it)
    {
        if (!it.is_null())
        {
            std::pair<unsigned, unsigned> pos2rs = std::make_pair(it.pos(), it.value());
            vp.push_back(pos2rs);
        }
    }
}
 
static
{
    for (unsigned i = 0; i < vp.size(); ++i)
    {
        if (vp[i].second == rs_id)
        {
            pos = vp[i].first;
            return true;
        }
    }
    return false;
}
 
static
{
    const unsigned rs_sample_count = 2000;
 
    std::vector<unsigned> rs_vect;
    if (rs_vect.empty())
    {
        std::cerr << "Benchmark subset empty!" << std::endl;
        return;
    }
    
    
    
    
    
 
 
    {
        
        
        
        
        
 
 
        for (unsigned i = 0; i < rs_vect.size(); ++i)
        {
            unsigned rs_id = rs_vect[i];
            unsigned rs_pos;
            bool found = scanner.
find_eq(sv, rs_id, rs_pos);
 
 
            if (found)
            {
            }
            else
            {
                std::cout << "Error: rs_id = " << rs_id << " not found!" << std::endl;
            }
        } 
    }
 
    {
 
 
        for (unsigned i = 0; i < rs_vect.size(); ++i)
        {
            unsigned rs_id = rs_vect[i];
            unsigned rs_pos;
            bool found = scanner.
find_eq(csv, rs_id, rs_pos);
 
 
            if (found)
            {
            }
            else
            {
                std::cout << "rs_id = " << rs_id << " not found!" << std::endl;
            }
        } 
    }
 
    if (vp.size())
    {
 
        for (unsigned i = 0; i < rs_vect.size(); ++i)
        {
            unsigned rs_id = rs_vect[i];
            unsigned rs_pos;
 
            if (found)
            {
            }
            else
            {
                std::cout << "rs_id = " << rs_id << " not found!" << std::endl;
            }
        } 
    }
 
    
    int res = bv_found1.
compare(bv_found2);
 
    if (res != 0)
    {
        std::cerr << "Error: search discrepancy (sparse search) detected!" << std::endl;
    }
    res = bv_found1.
compare(bv_found3);
    if (res != 0)
    {
        std::cerr << "Error: search discrepancy (std::vector<>) detected!" << std::endl;
    }
 
}
 
 
int main(
int argc, 
char *argv[])
 
{
    if (argc < 3)
    {
        return 1;
    }
 
 
    try
    {
        if (ret != 0)
            return ret;
 
        {
            if (res != 0)
            {
                return res;
            }
        }
        {
        }
        
        
        {
        }
        
        
        {
            {
            }
            {
            }
            
            {
                std::cerr << "Error:  rs-compressed vector check failed!" << std::endl;
                return 1;
            }
        }
        
        
        {
        }
 
        
        {
        }
        
        {
            {
                std::cout << std::endl
                          << "sparse vector statistics:"
                          << std::endl;
                bm::print_svector_stat(sv, true);
            }
            {
                std::cout << std::endl
                          << "RS compressed sparse vector statistics:"
                          << std::endl;
                bm::print_svector_stat(csv, true);
            }
        }
 
        {
        }
 
        {
            std::cout << std::endl << "Performance:" << std::endl;
        }
    }
    catch (std::exception& ex)
    {
        std::cerr << "Error:" << ex.what() << std::endl;
        return 1;
    }
 
    return 0;
}