: m_MinMapQuality(-1),
63m_GraphType(eGraphType_linear),
64m_GraphValueType(eGraphValueType_byte),
65m_GraphBinSize(kDefaultGraphBinSize),
67m_OutlierDetails(
false),
178 const string& bam_file,
179 const string& bam_index)
187 CBamDbdb(mgr, bam_file, bam_index);
203 doublealign_cov = 0;
214TCovLevelChangeMap cov_level_change_map;
215 const TSeqPoskMapBinCountThreshold = 20;
217 const TSeqPoskWarnLongAlignThreshold = 10000;
218 const size_tkWarnLongAlignCount = 10;
219 size_tinvalid_align_count = 0;
220 size_tlong_align_count = 0;
222 if( min_qual > 0 && ait.GetMapQuality() < min_qual ) {
230 TSeqPospos = ait.GetRefSeqPos();
234 if( end > ref_length ) {
235 if( ++invalid_align_count <= kWarnLongAlignCount ) {
237 "alignment is out of refseq bounds "<<
239<<
", CIGAR: "<< ait.GetCIGAR());
241 else if( invalid_align_count == kWarnLongAlignCount+1 ) {
243 "there are more alignments out of refseq bounds...");
248 if( pos < min_pos ) {
251 if( end > max_pos ) {
254 if(
size> max_align_span ) {
255max_align_span =
size;
258 if(
size> kWarnLongAlignThreshold ) {
259 if( ++long_align_count <= kWarnLongAlignCount ) {
261 "alignment is too long at "<<
263<<
", CIGAR: "<< ait.GetCIGAR());
265 else if( long_align_count == kWarnLongAlignCount+1 ) {
267 "there are more very long alignments...");
271 TSeqPosend_bin = (end - 1) / bin_size;
272 if( end_bin >= bin_cnt ) {
273bin_cnt = end_bin + 1;
274 size_tcap = ret.capacity();
275 while( bin_cnt > cap ) {
277 "Cap "<<cap<<
" at "<<align_cnt<<
" aligns ");
283 TSeqPosbegin_bin = pos / bin_size;
284 if( begin_bin == end_bin ) {
285ret[begin_bin] +=
size;
288 TSeqPosbegin_bin_coverage = (begin_bin + 1) * bin_size - pos;
289ret[begin_bin] += begin_bin_coverage;
291 TSeqPosend_bin_coverage = end - end_bin * bin_size;
292ret[end_bin] += end_bin_coverage;
294 if( end_bin > begin_bin + kMapBinCountThreshold ) {
296cov_level_change_map[begin_bin] += bin_size;
297cov_level_change_map[end_bin] -= bin_size;
300 for(
TSeqPosbin = begin_bin; bin < end_bin; ++bin ) {
301ret[bin] += bin_size;
306 if( !cov_level_change_map.empty() ) {
307 TSeqPosbin = cov_level_change_map.begin()->first;
309 ITERATE( TCovLevelChangeMap, it, cov_level_change_map ) {
311 for( ; bin < next_bin; ++bin ) {
321 "Total aligns: "<<align_cnt<<
322 " total size: "<<align_cov<<
" "<<
323 " max align span: "<<max_align_span);
333 doublealign_cov = 0;
345TCovLevelChangeMap cov_level_change_map;
346 const TSeqPoskMapBinCountThreshold = 20;
348 const TSeqPoskWarnLongAlignThreshold = 10000;
349 const size_tkWarnLongAlignCount = 10;
350 size_tinvalid_align_count = 0;
351 size_tlong_align_count = 0;
353 if( min_qual > 0 && ait.GetMapQuality() < min_qual ) {
361 TSeqPospos = ait.GetRefSeqPos();
365 if( end > ref_length ) {
366 if( ++invalid_align_count <= kWarnLongAlignCount ) {
368 "alignment is out of refseq bounds "<<
370<<
", CIGAR: "<< ait.GetCIGAR());
372 else if( invalid_align_count == kWarnLongAlignCount+1 ) {
374 "there are more alignments out of refseq bounds...");
379 if( pos < min_pos ) {
382 if( end > max_pos ) {
385 if(
size> max_align_span ) {
386max_align_span =
size;
389 if(
size> kWarnLongAlignThreshold ) {
390 if( ++long_align_count <= kWarnLongAlignCount ) {
392 "alignment is too long at "<<
394<<
", CIGAR: "<< ait.GetCIGAR());
396 else if( long_align_count == kWarnLongAlignCount+1 ) {
398 "there are more very long alignments...");
402 TSeqPosend_bin = (end - 1) / bin_size;
403 if( end_bin >= bin_cnt ) {
404bin_cnt = end_bin + 1;
405 size_tcap = ret.capacity();
406 while( bin_cnt > cap ) {
408 "Cap "<<cap<<
" at "<<align_cnt<<
" aligns ");
414 TSeqPosbegin_bin = pos / bin_size;
415 if( begin_bin == end_bin ) {
416ret[begin_bin] +=
size;
419 TSeqPosbegin_bin_coverage = (begin_bin + 1) * bin_size - pos;
420ret[begin_bin] += begin_bin_coverage;
422 TSeqPosend_bin_coverage = end - end_bin * bin_size;
423ret[end_bin] += end_bin_coverage;
425 if( end_bin > begin_bin + kMapBinCountThreshold ) {
427cov_level_change_map[begin_bin] += bin_size;
428cov_level_change_map[end_bin] -= bin_size;
431 for(
TSeqPosbin = begin_bin; bin < end_bin; ++bin ) {
432ret[bin] += bin_size;
437 if( !cov_level_change_map.empty() ) {
438 TSeqPosbin = cov_level_change_map.begin()->first;
440 ITERATE( TCovLevelChangeMap, it, cov_level_change_map ) {
442 for( ; bin < next_bin; ++bin ) {
452 "Total aligns: "<<align_cnt<<
453 " total size: "<<align_cov<<
" "<<
454 " max align span: "<<max_align_span);
468length =
TSeqPos(ret.size())*bin_size;
484 const string& bam_ind)
487 CBamIndexbam_index(bam_ind.empty()? bam_file+
".bai": bam_ind);
502 const string& bam_ind)
504 CBamRawDbbam_raw_db(bam_file, bam_ind.empty()? bam_file+
".bai": bam_ind);
525 stringtitle,
stringname)
527 if( name.empty() ) {
528name =
"BAM coverage";
530 if( title.empty() ) {
536annot.
SetDesc().Set().push_back(desc);
541 const string& bam_file)
545annot->
SetData().SetGraph().push_back(graph);
550user_desc.
SetType().SetStr(
"BAM coverage");
551annot->
SetDesc().Set().push_back(desc);
553user_desc.
AddField(
"Estimated",
true);
558 size_tval_count = 0;
559 ITERATE( vector<Uint8>, it, cov ) {
573 if( val_count == 0 ) {
580avg_cov = double(sum_cov)/val_count;
583user_desc.
AddField(
"SourceFile", bam_file);
595user_desc.
AddField(
"MinCoverage", min_cov*cov_mul);
596user_desc.
AddField(
"MaxCoverage", max_cov*cov_mul);
597user_desc.
AddField(
"AvgCoverage", avg_cov*cov_mul);
602user_desc.
AddField(
"LimitCoverage", max_cov*cov_mul);
622vvb->reserve(cov.size());
631vvi->reserve(cov.size());
635user_desc.
AddField(
"Logarithmic",
true);
645 doublebase =
log(
double(min_cov));
646 doublebyte_mul = double(
MAX-1)/(
log(
double(max_cov))-base);
647graph->
SetA(1./byte_mul);
649 ITERATE( vector<Uint8>, it, cov ) {
655 else if( c > max_cov ) {
659 b= 1 +
int((
log(
double(c))-base)*byte_mul+.5);
677 doublebyte_mul = double(
MAX-1)/(max_cov-min_cov);
678graph->
SetA(cov_mul/byte_mul);
679graph->
SetB(cov_mul*(min_cov - 1./byte_mul));
680 ITERATE( vector<Uint8>, it, cov ) {
686 else if( c > max_cov ) {
690field->
SetLabel().SetId(
int(it-cov.begin()));
691field->
SetData().SetReal(
double(c));
692outliers->push_back(field);
696 b= 1 +
int((c-min_cov)*byte_mul+.5);
709 const string& bam_file,
710 const string& bam_index)
717 const string& bam_file)
724 const string& bam_file)
731 const string& bam_file)
744seq.
SetId().push_back(
id);
750seq.
SetId().push_back(
id);
767 const string& bam_file,
768 const string& bam_index)
775 const string& bam_file)
788 const string& bam_file)
795 const string& bam_file)
static void sx_SetTitle(CSeq_graph &graph, CSeq_annot &annot, string title, string name)
static const int kDefaultOutlierMax_Int
static const Uint8 kDefaultMinMapQuality
static const int kDefaultOutlierMax_Byte
NCBI_DEFINE_ERR_SUBCODE_X(6)
#define DEFAULT_BAI_SUFFIX
const string & GetRefLabel(void) const
Label of the reference sequence in the BAM file.
void SetGraphBinSize(TSeqPos bin_size)
void SetRawAccess(bool raw_access=true)
EGraphValueType m_GraphValueType
vector< Uint8 > CollectRawAccessCoverage(const CBamHeader &header, const CBamIndex &bam_index)
int GetMinMapQuality(void) const
Minimal map quality of alignments to include in graph.
void SetGraphTitle(const string &title)
bool GetEstimated(void) const
make estimated graph using BAM index only the bin size will be derived from index
bool GetOutlierDetails(void) const
void SetOutlierMax(double x)
void SetEstimated(bool estimated=true)
const string & GetAnnotName(void) const
Annot name of generated Seq-graph.
void SetGraphValueType(EGraphValueType type)
vector< Uint8 > CollectCoverage(CBamMgr &mgr, const string &bam_file, const string &bam_index)
Generate raw align coverage for BAM file using BAM file index.
bool GetRawAccess(void) const
try to use raw BAM file access for efficiency
double GetOutlierMax(void) const
Limit too big graph values by a multiple of their average.
void SetAnnotName(const string &name)
void SetOutlierDetails(bool details=true)
CRef< CSeq_annot > MakeSeq_annot(CBamMgr &mgr, const string &bam_file, const string &bam_index)
Generate Seq-annot for BAM file using BAM file index.
vector< Uint8 > CollectEstimatedCoverage(const CBamHeader &header, const CBamIndex &bam_index)
CRef< CSeq_entry > MakeSeq_entry(CBamMgr &mgr, const string &bam_file, const string &bam_index)
Generate Seq-entry for BAM file.
CRef< CSeq_inst > m_Seq_inst
void SetRefLabel(const string &ref_label)
void SetGraphType(EGraphType type)
void SetRefId(const CSeq_id &ref_id)
const string & GetGraphTitle(void) const
Title of generated Seq-graph.
EGraphType
Type of graph coverage axis - linear or logarithmic.
const CSeq_id & GetRefId(void) const
Seq-id for the reference sequence in generated entry.
EGraphType GetGraphType(void) const
void SetSeq_inst(CRef< CSeq_inst > inst)
Use specified Seq-inst object for the virtual sequence.
EGraphValueType GetGraphValueType(void) const
EGraphValueType
Type of graph values - byte (0-255) or int.
void SetMinMapQuality(int qual)
CRange< TSeqPos > m_TotalRange
TSeqPos GetGraphBinSize(void) const
bool UsesRawIndex() const
const string & GetDbName(void) const
TSeqPos GetRefSeqLength(const string &str) const
const string & GetIndexName(void) const
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel min_index_level, TIndexLevel max_index_level) const
size_t GetRefIndex(const string &ref_label) const
const CBamHeader & GetHeader() const
const CBamIndex & GetIndex() const
TSeqPos GetRefSeqLength(size_t ref_index) const
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
CRef< CUser_field > SetFieldRef(const string &str, const string &delim=".", const string &obj_subtype=kEmptyStr, NStr::ECase use_case=NStr::eCase)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define LOG_POST_X(err_subcode, message)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
void Warning(CExceptionArgs_Base &args)
void Info(CExceptionArgs_Base &args)
C * SerialClone(const C &src)
Create on heap a clone of the source object.
uint64_t Uint8
8-byte (64-bit) unsigned integer
position_type GetToOpen(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
vector< CRef< CUser_field > > TFields
void SetLabel(TLabel &value)
Assign a value to Label data member.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetA(TA value)
Assign a value to A data member.
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetComp(TComp value)
Assign a value to Comp data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
void SetB(TB value)
Assign a value to B data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
void SetMin(TMin value)
Assign a value to Min data member.
TValues & SetValues(void)
Assign a value to Values data member.
TSeq & SetSeq(void)
Select the variant.
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
TName & SetName(void)
Select the variant.
TUser & SetUser(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
void SetRepr(TRepr value)
Assign a value to Repr data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetMol(TMol value)
Assign a value to Mol data member.
@ eRepr_virtual
no seq data
@ eMol_na
just a nucleic acid
unsigned int
A callback function used to compare two keys in a database.
Definition of all error codes used in SRA C++ support libraries.
const struct ncbi::grid::netcache::search::fields::SIZE size
static bool Equals(const CVariation::TPlacements &p1, const CVariation::TPlacements &p2)
#define MAX(a, b)
returns larger of a and b.
constexpr TSeqPos GetMinBinSize() const
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4