kCsiExt[] =
".csi";
70 static const charkIndexMagicCSI[] =
"CSI\1";
84 size_t cnt=
in.gcount();
92 const char*
s_Read(
const char*& buffer_ptr,
const char* buffer_end,
size_t len)
94 const char* ret_ptr = buffer_ptr;
95 const char* ret_end = ret_ptr +
len;
96 if( ret_end > buffer_end ) {
129 if( memcmp(
buf, magic, 4) != 0 ) {
202 #ifdef BAM_SUPPORT_CSI 212 for(
int32_ti_chunk = 0; i_chunk < n_chunks; ++i_chunk ) {
222 #ifdef BAM_SUPPORT_CSI 224 const char* header =
s_Read(ptr, end, 16);
230 const char* header =
s_Read(ptr, end, 8);
237 const char*
data=
s_Read(ptr, end, n_chunks*16);
238 for(
size_t i= 0;
i< n_chunks; ++
i) {
252pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
256pair<TBinsIter, TBinsIter> ret;
258ret.second =
m_Bins.end();
282 returnp1 < p2.second;
286 returnp1.second < p2;
304 "Bad unmapped bin format");
318 "No chunks in bin "<<bin.
m_Bin);
320 for(
size_t i= 0;
i< bin.
m_Chunks.size(); ++
i) {
322 if( range.first >= range.second ) {
324 "Empty BAM BGZF range in bin "<<bin.
m_Bin<<
325 ": "<<range.first<<
" - "<<range.second);
327 if(
i&& bin.
m_Chunks[
i-1].second >= range.first ) {
329 "Overlapping BAM BGZF ranges in bin "<<bin.
m_Bin<<
330 ": "<<bin.
m_Chunks[
i-1].second<<
" over "<<range.first);
349SBamIndexParams::operator=(params);
351 size_tbin_count = 0;
355 for(
int32_ti_bin = 0; i_bin < n_bin; ++i_bin ) {
357bin.
Read(
in, *
this);
364 m_Bins.resize(bin_count);
383SBamIndexParams::operator=(params);
385 size_tbin_count = 0;
389 for(
size_ti_bin = 0; i_bin < n_bin; ++i_bin ) {
391buffer_ptr = bin.
Read(buffer_ptr, buffer_end, *
this);
398 m_Bins.resize(bin_count);
404 const char*
data=
s_Read(buffer_ptr, buffer_end, n_intv*8);
405 for(
size_t i= 0;
i< n_intv; ++
i) {
418 constpair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
420 if( iters.first == iters.second ) {
428 returniters.first->GetSeqRange(params);
435 s_GetOverlap(
constpair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
437 if( iters.first == iters.second ) {
441 returniters.first->m_Overlap;
448 s_GetFilePos(
constpair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
450 autoiter = iters.first;
451 if( iter == iters.second ) {
454 returniter->GetStartFilePos();
460 s_GetNextFilePos(
constpair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
462 autoiter = iters.first;
463 if( iter == iters.second ) {
467 if( iter == iters.second ) {
470 returniter->GetStartFilePos();
502vector<TSeqPos> aln_over_starts(nBins);
511 autoit = lower_bound(
m_Bins.begin(),
m_Bins.end(), bin);
512 if( it !=
m_Bins.end() && it->m_Bin == bin ) {
513 for(
autoc : it->m_Chunks ) {
514 if( c.first >= min_fp ) {
517 if( c.first >= limit.second ) {
520 if( c.second <= limit.first ) {
523 if( c.first < limit.first ) {
524c.first = limit.first;
526 _ASSERT(c.first >= limit.first);
527 _ASSERT(c.first < limit.second);
529 if( c.first < min_fp ) {
538min_aln_start = ref_range.
GetFrom();
544 autoit = lower_bound(level_bins.first, level_bins.second, min_fp,
PByStartFilePos());
545 if( it == level_bins.first ) {
549min_aln_start =
max(min_aln_start, it->GetSeqRange(*this).GetFrom());
550 if( it->GetEndFilePos() > min_fp ) {
557aln_over_starts[
i] = min_aln_start;
559 returnaln_over_starts;
563vector<TSeqPos> aln_over_starts(nBins);
565 autobin_it_start =
GetLevelBins(0).first, next_bin_it = bin_it_start;
566 for(
size_t i= 0;
i< nBins; ++
i) {
571aln_over_starts[
i] = ref_pos;
575 while( next_bin_it !=
m_Bins.end() && next_bin_it->GetStartFilePos() <= min_fp ) {
578 TSeqPosmin_aln_start =
i? aln_over_starts[
i-1]: 0;
579 boolinside_min_bin =
false;
580 if( next_bin_it != bin_it_start ) {
581 auto& bin = next_bin_it[-1];
582 _ASSERT(bin.GetStartFilePos() <= min_fp);
583inside_min_bin = bin.GetEndFilePos() > min_fp;
586 if( min_aln_start+
GetMinBinSize() < ref_pos && !inside_min_bin ) {
590 autoit = upper_bound(level_bins.first, level_bins.second, min_fp,
PByStartFilePos());
591 if( it == level_bins.first ) {
595min_aln_start =
max(min_aln_start, it->GetSeqRange(*this).GetFrom());
596 if( it->GetEndFilePos() > min_fp ) {
603 if( min_aln_start > ref_pos ) {
605 "Inconsistent linear index at ref pos "<<ref_pos<<
606 ": align starts after end bin start "<<min_aln_start);
608aln_over_starts[
i] = min_aln_start;
610 returnaln_over_starts;
614vector<TSeqPos> aln_over_starts(nBins);
615vector<pair<TBinsIter, TBinsIter>> levelBins;
616vector<COpenRange<TSeqPos>> levelBinSeqRange;
617vector<CBGZFPos> levelPrevOverlap;
624levelBinSeqRange.push_back(
s_GetSeqRange(*
this, levelBins.back()));
629 returnaln_over_starts;
632 for(
auto& bin :
m_Bins) {
633 autosp = bin.GetSeqRange(*this).GetFrom();
634 auto fp= bin.GetStartFilePos();
635 autoins = sp2minfp.
insert(make_pair(sp,
fp));
638 auto& minfp = ins.first->second;
639minfp =
min(minfp,
fp);
643 for(
autop : sp2minfp ) {
644 autoins = fp2sp.
insert(make_pair(p.second, p.first));
646 autoiter = ins.first;
648 while( iter != fp2sp.
end() && iter->second < p.first ) {
649iter = fp2sp.
erase(iter);
662 while( levelBinSeqRange[level].GetToOpen() <= seqPos ) {
664levelPrevOverlap[level] =
s_GetOverlap(levelBins[level]);
666++(levelBins[level].first);
667levelBinSeqRange[level] =
s_GetSeqRange(*
this, levelBins[level]);
671 if( seqPos >= levelBinSeqRange[level].GetFrom() ) {
675overlap_fp = levelPrevOverlap[level];
677prev_overlap_fp =
max(prev_overlap_fp, overlap_fp);
684 while( levelBinSeqRange[level].GetToOpen() <= seqPos ) {
686levelPrevOverlap[level] =
s_GetOverlap(levelBins[level]);
688++(levelBins[level].first);
689levelBinSeqRange[level] =
s_GetSeqRange(*
this, levelBins[level]);
691 if( seqPos < levelBinSeqRange[level].GetFrom() ) {
697overlap_fp =
max(prev_overlap_fp, levelBins[level].
first->m_Overlap);
702 auto& chunks = levelBins[level].first->m_Chunks;
703 autoit = upper_bound(chunks.begin(), chunks.end(), overlap_fp,
PByEndFilePos());
704 if( it != chunks.end() && it->first <
min(found_fp, limit_fp) ) {
706found_fp =
max(it->first, overlap_fp);
707 if( found_fp <= overlap_fp ) {
714aln_over_starts[
b] = seqPos;
721 autoosp =
min(seqPos,
prev(iter)->second);
725osp =
max(osp, aln_over_starts[
b-1]);
727aln_over_starts[
b] = osp;
730 returnaln_over_starts;
740vector<TSeqPos> ends(
count);
742 for( ; ei <
count; ++ei ) {
743 while(
si*bin_size < starts[ei] ) {
744ends[
si++] = ei*bin_size-1;
748ends[
si++] = ei*bin_size-1;
764 if( file_pos1 >= file_pos2 ) {
791 if( ref_range.
Empty() ) {
803 #ifdef BAM_SUPPORT_CSI 809 autoit = lower_bound(bins.first, bins.second, bin_num);
810 if( it != bins.second && it->m_Bin <= bin_num_last ) {
811 if( it->m_Overlap ) {
812 if( !limit.first || it->m_Overlap < limit.first ) {
813limit.first = it->m_Overlap;
816 if( it->m_Bin == bin_num ) {
830 autoit = lower_bound(bins.first, bins.second, bin_num);
831 if( it != bins.first ) {
832limit.first =
max(limit.first,
prev(it)->GetEndFilePos());
841 autoit = lower_bound(bins.first, bins.second, bin_num);
842 if( it != bins.second ) {
843limit.second =
min(limit.second, it->GetStartFilePos());
850pair<SBamIndexRefIndex::TBin, SBamIndexRefIndex::TBin>
854pair<TBin, TBin> bin_range;
866bin_range.second = bin_range.first-1;
886pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
889pair<TBin, TBin> bin_range)
const 893 for( ; it !=
m_Bins.end() && it->m_Bin <= bin_range.second; ++it ) {
894 for(
autoc : it->m_Chunks ) {
895 if( c.first < limit_file_range.first ) {
896c.first = limit_file_range.first;
898 if( limit_file_range.second && limit_file_range.second < c.second ) {
899c.second = limit_file_range.second;
901 if( c.first < c.second ) {
906 returnmake_pair(
first, it);
910pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
915 returnmake_pair(
first, it);
935 size_tpage_count = end_pos - beg_pos + 1;
936 Uint8add_size = (file_size + page_count/2) / page_count;
938 for(
size_t i= beg_pos;
i<= end_pos; ++
i) {
945 for(
size_t i= beg_pos;
i<= end_pos; ++
i) {
979 _ASSERT(new_file_beg < new_file_end);
1006 constvector<SBamRangeBlock>&
bb,
size_tbb_beg,
size_tbb_end)
1008 for(
size_t i= bb_beg;
i<= bb_end; ++
i) {
1010 if( !
b.file_end ) {
1037 if( bins.first != bins.second ) {
1038 CBGZFPospos_beg = bins.first->GetStartFilePos();
1039 CBGZFPospos_end =
prev(bins.second)->GetEndFilePos();
1040 if( pos_beg < range.first ) {
1041range.first = pos_beg;
1043 if( pos_end > range.second ) {
1044range.second = pos_end;
1048 if( range.first.IsInvalid() ) {
1066vector<Uint8> vv(bin_count);
1068vector<SBamRangeBlock>
bb(bin_count);
1069 size_tbb_end = bin_count-1;
1070 for(
size_t i= 0;
i<= bb_end; ++
i) {
1077 for(
autobin_it = level_bins.first; bin_it != level_bins.second; ++bin_it ) {
1078 size_t i= bin_it->m_Bin - bin_number_base;
1080 bb[
i].InitData(vv, *bin_it);
1096 for(
autobin_it = level_bins.first; bin_it != level_bins.second; ++bin_it ) {
1097 size_t i= bin_it->m_Bin - bin_number_base;
1099 bb[
i].ExpandData(vv, *bin_it);
1111 for(
TIndexLevellevel = min_index_level; level <= max_index_level; ++level ) {
1113 uint32_tvv_bin_count = 1 << vv_bin_shift;
1116 for(
autoit = level_bins.first; it != level_bins.second; ++it ) {
1118 for(
auto& c : it->m_Chunks ) {
1124 uint32_tpos = (it->m_Bin - bin_base) << vv_bin_shift;
1130add = (add+
cnt/2)/
cnt;
1156 size_tfsz =
file.GetSize();
1157 data.reset(
new char[fsz]);
1158 file.ReadExactly(0,
data.get(), fsz);
1169: m_UnmappedCount(0),
1170m_TotalReadBytes(0),
1171m_TotalReadSeconds(0)
1177: m_UnmappedCount(0),
1178m_TotalReadBytes(0),
1179m_TotalReadSeconds(0)
1181 Read(index_file_name);
1202 LOG_POST(
"BAM: read index "<<
size/
double(1<<20)<<
" MB" 1220 size_t* bytes_read)
1233*bytes_read =
count;
1252 #ifdef BAM_SUPPORT_CSI 1263 #ifdef BAM_SUPPORT_CSI 1284 for(
int32_ti_ref = 0; i_ref < n_ref; ++i_ref ) {
1285 m_Refs[i_ref].Read(
in, *
this, i_ref);
1287streampos extra_pos =
in.tellg();
1288 in.seekg(0, ios::end);
1289streampos end_pos =
in.tellg();
1290 in.seekg(extra_pos);
1292 if( end_pos-extra_pos >= 8 ) {
1296 if( end_pos != extra_pos ) {
1298 "Extra "<<(end_pos-extra_pos)<<
" bytes in BAM index");
1308unique_ptr<CNcbiIstream> data_stream =
1309make_unique<CRStream>(
new CMemoryReader(buffer_ptr, buffer_size),
1311unique_ptr<CNcbiIstream>
z_stream=
1312make_unique<CCompressionIStream>(*data_stream,
1319 const char* buffer_end = buffer_ptr + buffer_size;
1321 #ifdef BAM_SUPPORT_CSI 1331 #ifdef BAM_SUPPORT_CSI 1335 const char* header =
s_Read(buffer_ptr, buffer_end, 12);
1339 s_Read(buffer_ptr, buffer_end, l_aux);
1346 const char* header =
s_Read(buffer_ptr, buffer_end, 4);
1350buffer_ptr =
m_Refs[
i].Read(buffer_ptr, buffer_end, *
this,
i);
1352 if( buffer_end - buffer_ptr >= 8 ) {
1356 if( buffer_ptr != buffer_end ) {
1358 "Extra "<<(buffer_end-buffer_ptr)<<
" bytes in BAM index");
1367 "Bad reference sequence index");
1369 return m_Refs[ref_index];
1377 "Wrong index ref count: "<<
1389 for(
auto&
b:
GetRef(ref_index).m_Bins ) {
1390 CBGZFPosstart_pos =
b.GetStartFilePos();
1391 if( start_pos < total_range.first )
1392total_range.first = start_pos;
1394 if( total_range.second < end_pos )
1395total_range.second = end_pos;
1402 stringtitle,
stringname)
1404 if( name.empty() ) {
1405name =
"BAM coverage";
1407 if( title.empty() ) {
1417 const string& ref_name,
1418 const string& seq_id,
1419 const string& annot_name,
1430 const string& ref_name,
1432 const string& annot_name,
1436 size_tref_index = header.
GetRefIndex(ref_name);
1437 if( ref_index ==
size_t(-1) ) {
1439 "Cannot find RefSeq: "<<ref_name);
1442header.
GetRefLength(ref_index), min_index_level, max_index_level);
1448 const string& seq_id,
1449 const string& annot_name,
1461 const string& seq_id,
1462 const string& annot_name,
1473 const string& annot_name,
1493 const string& annot_name,
1500 if( vv.empty() ) vv.push_back(0);
1503length =
count*bin_size;
1508annot->
SetData().SetGraph().push_back(graph);
1509 sx_SetTitle(*graph, *annot, annot_name, annot_name);
1511graph->
SetLoc().SetInt().SetId().Assign(seq_id);
1512graph->
SetLoc().SetInt().SetFrom(0);
1513graph->
SetLoc().SetInt().SetTo(length-1);
1518bvalues.resize(
count);
1520 uint64_tmax_value = *max_element(vv.begin(), vv.end());
1521 doublemul =
min(1., 255./max_value);
1522 for(
size_t i= 0;
i<
count; ++
i) {
1523 if(
autov = vv[
i] ) {
1537graph->
SetA(1/mul);
1555 Read(bam_file_name);
1568 m_Name.resize(l_name-1);
1590 for(
int32_ti_ref = 0; i_ref < n_ref; ++i_ref ) {
1591 m_Refs[i_ref].Read(stream);
1602 "Bad reference sequence index");
1604 return m_Refs[ref_index];
1614 returniter->second;
1621 boolstate_changed =
true;
1622 const char*p, *p0, *pend;
1624 for(p =
m_Text.data(), pend = p +
m_Text.size(); p < pend; ++p) {
1625 if(state_changed) {
1626state_changed =
false;
1627 for(; p < pend && iswspace(*p); ++p)
1635 else if(*p ==
':') {
1636 if(
state== eRecord) {
1637record.
assign(p0, p-p0);
1639state_changed =
true;
1643 else if( iswspace(*p) ) {
1644 if(
state== eTag) {
1647state_changed =
true;
1649 else if(
state== eValue) {
1650records.back().second[record] =
string(p0, p-p0);
1652state_changed =
true;
1656 if(
state== eValue) {
1657records.back().second[record] =
string(p0, p-p0);
1659 returnrecords.size();
1678 AddRanges(index, ref_index, ref_range, search_mode);
1688 AddRanges(index, ref_index, ref_range, min_level, max_level, search_mode);
1698 AddRanges(index, ref_index, ref_range, min_level, max_level, search_mode);
1710 for(
auto&
r: ranges ) {
1711cout <<
" ("<<
r.first<<
" "<<
r.second<<
")";
1713 returncout <<
" )";
1728 for(
autoiter = ranges.begin(); iter != ranges.end(); ) {
1730 for( ++iter; iter != ranges.end() && !(end < iter->
first); ++iter ) {
1731 if( end < iter->second ) {
1735 if( start < min_pos ) {
1737 if(
end<= min_pos ) {
1768vector<CBGZFRange> ranges;
1773 if( ref_range.
Empty() ) {
1784 TSeqPosset_limit_by_overlap_at = 0;
1787 autobin_range = index.
GetBinRange(ref_range, level);
1788pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter> iter_range;
1789 if( level >= min_index_level ) {
1801 autofirst_bin = iter_range.first;
1802 if( (first_bin == ref.
m_Bins.end() ||
1803first_bin->m_Bin != bin_range.first) &&
1804first_bin != ref.
m_Bins.begin() ) {
1807 if( first_bin != ref.
m_Bins.end() &&
1808first_bin->m_Bin <= bin_range.first &&
1809first_bin->m_Bin >= index.
GetFirstBin(level) ) {
1811 TSeqPospos = first_bin->GetSeqRange(index).GetFrom();
1812 if( pos > set_limit_by_overlap_at ) {
1814set_limit_by_overlap_at = pos;
1815limit.first =
max(limit.first, first_bin->m_Overlap);
1824limit.first =
max(limit.first, ref.
m_Overlaps[bin_index]);
1834 autofirst_bin = iter_range.first;
1835 if( first_bin != ref.
m_Bins.begin() ) {
1836 autoprev_bin =
prev(first_bin);
1837 _ASSERT(prev_bin->m_Bin < bin_range.first);
1838 if( prev_bin->m_Bin >= index.
GetFirstBin(level) ) {
1840limit.first =
max(limit.first, prev_bin->GetEndFilePos());
1846 autonext_bin = iter_range.second;
1847 if( next_bin != ref.
m_Bins.end() &&
1848next_bin->m_Bin < index.
GetFirstBin(level-1) ) {
1850limit.second =
min(limit.second, next_bin->GetStartFilePos());
1876 AddRanges(index, ref_index, ref_range, index_level, index_level, search_mode, file_pos);
1883 whole.first = file_pos;
1897 if( file_pos && *file_pos ) {
1915 AddRanges(index, ref_index, ref_range, min_index_level, max_index_level, search_mode, file_pos);
1936 SetRanges(index, ref_index, ref_range, index_level, index_level, search_mode, file_pos);
1975 const string& base_name,
1980dst.push_back(base_name.substr(0, base_name.size()-old_ext.
size())+new_ext);
1987vector<string> index_name_candidates;
1988 if( index_path.empty() || index_path == bam_path ) {
1989 #ifdef BAM_SUPPORT_CSI 1992index_name_candidates.push_back(bam_path+kCsiExt);
1996index_name_candidates.push_back(bam_path+
kBaiExt);
1998 #ifdef BAM_SUPPORT_CSI 1999 if( !prefer_csi ) {
2000index_name_candidates.push_back(bam_path+kCsiExt);
2006index_name_candidates.push_back(index_path);
2008 for(
size_t i= 0;
i< index_name_candidates.size(); ++
i) {
2014 if(
i< index_name_candidates.size()-1 &&
2035 const doubleindex_read_weight = 10;
2036 const Uint8add_read_bytes = 100000;
2037 const doubleadd_read_bytes_per_second = 80e6;
2038 const Uint8add_unzip_bytes = 100000;
2039 const doubleadd_unzip_bytes_per_second = 80e6;
2045 Uint8(index_read_stat.first*index_read_weight) +
2046data_read_stat.first +
2048 doubleread_seconds =
2049index_read_stat.second*index_read_weight +
2050data_read_stat.second +
2051add_read_bytes/add_read_bytes_per_second;
2053 Uint8unzip_bytes = data_unzip_stat.first + add_unzip_bytes;
2054 doubleunzip_seconds = data_unzip_stat.second + add_unzip_bytes/add_unzip_bytes_per_second;
2056 returnread_seconds/read_bytes + unzip_seconds/unzip_bytes;
2069 char* dst = &ret[0];
2093 char* dst =
str.data();
2119 switch( op & 0xf ) {
2142 switch( op & 0xf ) {
2168 switch( op & 0xf ) {
2194 switch( op & 0xf ) {
2237 switch( op & 0xf ) {
2257 switch( op & 0xf ) {
2278dst[0] =
'0'+(v/10);
2279dst[1] =
'0'+(v%10);
2289 char* dst =
str.data();
2291 for( ;
count--; ) {
2294 switch( op & 0xf ) {
2305 str.resize(dst-
str.data());
2365ptr =
static_cast<const char*
>(memchr(ptr, 0, end-ptr));
2382 size_telement_size;
2401 if( element_size == 0 ) {
2420 ERR_POST(
"BAM: Alignment aux tag parse error");
2430 "Conversion error: " 2431 "type "<<
GetDataType()<<
" cannot be converted to char");
2441 "Conversion error: " 2442 "type "<<
GetDataType()<<
" cannot be converted to string");
2452 "Conversion error: " 2453 "type "<<
GetDataType()<<
" cannot be converted to int");
2455 if( index >=
size() ) {
2457 "Index overflow: "<<index<<
" >= "<<
size());
2484 "Conversion error: " 2485 "type "<<
GetDataType()<<
" cannot be converted to float");
2487 if( index >=
size() ) {
2489 "Index overflow: "<<index<<
" >= "<<
size());
2499 if( iter->IsTag(c1, c2) ) {
2503 if( !allow_missing ) {
2505 "Tag "<<c1<<c2<<
" not found");
2514 return data.GetString();
2522 in.GetNextAvailableBytes();
2538 const string& ref_label,
2543: m_Reader(bam_db.GetFile())
2552 Select(bam_db, ref_label, ref_range, search_mode, file_pos);
2557 const string& ref_label,
2564: m_Reader(bam_db.GetFile())
2573 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode, file_pos);
2578 const string& ref_label,
2585: m_Reader(bam_db.GetFile())
2594 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode, file_pos);
2603 if( file_pos && *file_pos ) {
2626SBamIndexParams::operator=(index);
2629 m_Ranges.
SetRanges(index, ref_index, ref_range, min_index_level, max_index_level, search_mode, file_pos);
2702 if( index_level < m_MinIndexLevel || index_level >
m_MaxIndexLevel) {
2731 intrefstart, seqstart;
2732 switch( op & 0xf ) {
2760 "Bad CIGAR segment: "<< (op & 0xf) <<
" in "<<
GetCIGAR());
2762 if( seglen == 0 ) {
2764 "Zero CIGAR segment: in "<<
GetCIGAR());
2766starts.push_back(refstart);
2767starts.push_back(seqstart);
2768lens.push_back(seglen);
static void sx_SetTitle(CSeq_graph &graph, CSeq_annot &annot, string title, string name)
static char * s_format(char *dst, uint32_t v)
static const size_t kIndexMagicLength
NCBI_PARAM_DEF_EX(int, BAM, OVERLAP_MODE, 2, eParam_NoThread, BAM_OVERLAP_MODE)
static const char kBamExt[]
static const float kEstimatedCompression
static void s_ReadMagic(CBGZFStream &in, const char *magic)
static CBGZFPos s_ReadFilePos(CNcbiIstream &in)
static int s_GetRangesMode()
Uint8 s_EstimatedPos(CBGZFPos pos)
static size_t ReadVDBFile(AutoArray< char > &data, const string &path)
static int32_t s_ReadInt32(CNcbiIstream &in)
static int s_GetOverlapMode()
static CBGZFPos s_GetOverlap(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static void s_AddReplacedExt(vector< string > &dst, const string &base_name, CTempString old_ext, CTempString new_ext)
NCBI_PARAM_DECL(int, BAM, OVERLAP_MODE)
static CBGZFPos s_GetNextFilePos(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static CBGZFPos s_GetFilePos(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static uint32_t s_ReadUInt32(CNcbiIstream &in)
static const char kBaiExt[]
static const size_t kGZipMagicLength
static CBGZFRange s_ReadFileRange(CNcbiIstream &in)
static COpenRange< TSeqPos > s_GetSeqRange(SBamIndexParams params, const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static const char kGZipMagic[]
static const char kIndexMagicBAI[]
static void s_ReadString(CBGZFStream &in, string &ret, size_t len)
ostream & operator<<(ostream &out, const CBamFileRangeSet &ranges)
static void s_Read(CNcbiIstream &in, char *dst, size_t len)
Uint8 s_EstimatedSize(CBGZFPos file_pos1, CBGZFPos file_pos2)
static uint64_t s_ReadUInt64(CNcbiIstream &in)
pair< CBGZFPos, CBGZFPos > CBGZFRange
void SetPreviousReadStatistics(const pair< Uint8, double > &stats)
pair< Uint8, double > GetReadStatistics() const
pair< Uint8, double > GetUncompressStatistics() const
TByteOffset GetByteOffset() const
TFileBlockPos GetFileBlockPos() const
static CBGZFPos GetInvalid()
CBGZFPos GetSeekPos() const
void Seek(CBGZFPos pos, CBGZFPos end_pos=CBGZFPos::GetInvalid())
static int GetDebugLevel()
@ eFileNotFound
File not found.
virtual TErrCode GetErrCode(void) const
void SetFrom(CBGZFPos file_pos)
void AddWhole(const CBamHeader &header)
const_iterator end() const
void AddFrom(CBGZFPos file_pos)
const_iterator begin() const
void AddSortedRanges(const vector< CBGZFRange > &ranges, const CBGZFPos *file_pos=nullptr)
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
void SetWhole(const CBamHeader &header)
Uint8 GetFileSize() const
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
const SBamIndexRefIndex & GetRef(size_t ref_index) const
double m_TotalReadSeconds
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel min_index_level, TIndexLevel max_index_level) const
void Read(const string &index_file_name)
size_t GetRefCount() const
pair< Uint8, double > GetReadStatistics() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, TIndexLevel min_index_level, TIndexLevel max_index_level) const
CBGZFRange GetTotalFileRange(size_t ref_index) const
void SetLengthFromHeader(const CBamHeader &header)
Uint2 GetCIGAROpsCount() const
TSeqPos GetRefSeqPos() const
SBamAlignInfo m_AlignInfo
CBamFileRangeSet::const_iterator m_NextRange
void GetSegments(vector< int > &starts, vector< TSeqPos > &lens) const
void Select(CBamRawDb &bam_db, const CBGZFPos *file_pos=nullptr)
TIndexLevel GetIndexLevel() const
void x_Select(const CBamHeader &header, const CBGZFPos *file_pos=nullptr)
TIndexLevel m_MaxIndexLevel
CBamFileRangeSet m_Ranges
TIndexLevel m_MinIndexLevel
COpenRange< TSeqPos > m_QueryRefRange
COpenRange< TSeqPos > m_AlignRefRange
CBGZFPos m_CurrentRangeEnd
COpenRange< TSeqPos > m_AlignReadRange
double GetEstimatedSecondsPerByte() const
void Open(const string &bam_path)
CMemoryReader(const char *ptr, size_t size)
ERW_Result Read(void *buf, size_t count, size_t *bytes_read)
Read as many as "count" bytes into a buffer pointed to by the "buf" argument.
ERW_Result PendingCount(size_t *count)
Via parameter "count" (which is guaranteed to be supplied non-NULL) return the number of bytes that a...
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
@ fOwnReader
Own the underlying reader.
void SetNameDesc(const string &name)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
CZipStreamDecompressor â zlib based decompression stream processor.
A very basic data-read interface.
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator upper_bound(const key_type &key) const
const_iterator find(const key_type &key) const
static const char si[8][64]
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
@ fGZip
Set of flags for gzip file support. See each flag description above.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
TThisType & SetFrom(position_type from)
position_type GetTo(void) const
TThisType & SetToOpen(position_type toOpen)
position_type GetToOpen(void) const
position_type GetFrom(void) const
static TThisType GetEmpty(void)
TThisType & SetLength(position_type length)
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
ERW_Result
Result codes for I/O operations.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
@ eRW_Eof
End of data, should be considered permanent.
@ eRW_Success
Everything is okay, I/O completed.
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
CTempString & assign(const char *src_str, size_type len)
Assign new values to the content of the a string.
size_type size(void) const
Return the length of the represented array.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
@ eStart
Start timer immediately after creating.
void SetA(TA value)
Assign a value to A data member.
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetComp(TComp value)
Assign a value to Comp data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
void SetData(TData &value)
Assign a value to Data data member.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
static constexpr streamsize bmax
void timsort(RandomAccessIterator const first, RandomAccessIterator const last)
Same as std::stable_sort(first, last).
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static size_t read_size(CNcbiIstream &stream, const char *name)
Reader-writer based streams.
bool operator()(const CBGZFPos p1, const CBGZFRange &p2) const
bool operator()(const CBGZFRange &p1, const CBGZFPos p2) const
bool operator()(const CBGZFPos p1, const SBamIndexBinInfo &p2) const
bool operator()(const SBamIndexBinInfo &p1, const CBGZFPos p2) const
const char * get_cigar_ptr() const
uint32_t get_cigar_read_size() const
uint32_t get_cigar_pos() const
CTempString get_short_seq_accession_id() const
int32_t get_ref_pos() const
pair< COpenRange< uint32_t >, COpenRange< uint32_t > > get_cigar_alignment(void) const
uint8_t get_read_name_len() const
const char * get_read_ptr() const
const char * get_aux_data_end() const
bool has_ambiguous_match() const
uint32_t get_cigar_ref_size() const
const char * get_read_name_ptr() const
static const char kBaseSymbols[]
void Read(CBGZFStream &in)
const char * get_aux_data_ptr() const
uint16_t get_cigar_ops_count() const
static const char kCIGARSymbols[]
int32_t get_ref_index() const
uint32_t get_read_len() const
SBamAuxData get_aux_data(char c1, char c2, bool allow_missing=false) const
float GetFloat(size_t index=0) const
Int8 GetInt(size_t index=0) const
CTempString GetString() const
CBGZFPos GetEndFilePos() const
vector< CBGZFRange > m_Chunks
CBGZFPos GetStartFilePos() const
void Read(CNcbiIstream &in, SBamIndexParams params)
COpenRange< TSeqPos > GetSeqRange(SBamIndexParams params) const
static const TShift kLevelStepBinShift
static const TShift kBAI_min_shift
static const TIndexLevel kMinBinIndexLevel
static const TIndexLevel kBAI_depth
static const TBin kMaxBinNumber
constexpr TSeqPos GetBinSize(TIndexLevel level) const
constexpr TBin GetPseudoBin() const
constexpr TBin GetBinNumberBase(int level) const
constexpr TBin GetFirstBin(TIndexLevel level) const
pair< TBin, TBin > GetBinRange(COpenRange< TSeqPos > ref_range, TIndexLevel index_level) const
TBin GetBinNumber(TSeqPos pos, TIndexLevel level) const
TBin GetBinNumberOffset(TSeqPos pos, TIndexLevel level) const
bool IsOverflowBin(TBin bin, TIndexLevel level=0) const
constexpr TShift GetMinLevelBinShift() const
constexpr TSeqPos GetMinBinSize() const
constexpr TShift GetLevelBinShift(TIndexLevel level) const
constexpr TIndexLevel GetMaxIndexLevel() const
constexpr TBin GetLastBin(TIndexLevel level) const
vector< TSeqPos > GetAlnOverStarts(void) const
pair< TBinsIter, TBinsIter > GetBinsIterRange(pair< TBin, TBin > bin_range) const
vector< Uint8 > EstimateDataSizeByAlnStartPos(TSeqPos seqlen=kInvalidSeqPos) const
TSeqPos m_EstimatedLength
CBGZFRange GetFileRange() const
pair< TBinsIter, TBinsIter > GetLevelBins(TIndexLevel level) const
bool ProcessPseudoBin(SBamIndexBinInfo &bin)
TBins::const_iterator TBinsIter
CBGZFRange GetLimitRange(COpenRange< TSeqPos > &ref_range, ESearchMode search_mode) const
const char * Read(const char *buffer_ptr, const char *buffer_end, SBamIndexParams params, int32_t ref_index)
vector< uint64_t > CollectEstimatedCoverage(TIndexLevel min_index_level, TIndexLevel max_index_level) const
CBGZFRange m_UnmappedChunk
vector< CBGZFPos > m_Overlaps
void SetLengthFromHeader(TSeqPos length)
pair< TBinsIter, TBinsIter > AddLevelFileRanges(vector< CBGZFRange > &ranges, CBGZFRange limit_file_range, pair< TBin, TBin > bin_range) const
void ProcessBin(const SBamIndexBinInfo &bin)
vector< TSeqPos > GetAlnOverEnds(void) const
static void x_AddDataSize(vector< Uint8 > &vv, size_t beg_pos, size_t end_pos, CBGZFPos file_beg, CBGZFPos file_end)
void InitData(vector< Uint8 > &vv, const SBamIndexBinInfo &bin)
SBamRangeBlock(vector< Uint8 > &vv, const vector< SBamRangeBlock > &bb, size_t bb_beg, size_t bb_end)
void ExpandData(vector< Uint8 > &vv, const SBamIndexBinInfo &bin)
static Uint8 MakeUint8(const char *buf)
static Uint4 MakeUint4(const char *buf)
static Uint2 MakeUint2(const char *buf)
static float MakeFloat(const char *buf)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4