: m_ErrorMode(eError_Throw)
68, m_SubstMatrixName(
"BLOSUM62")
84length += it->IntersectionWith(range).GetLength();
97 if(
data.empty() ) {
100 size_trows =
data.size();
102 for(
size_t i= 1;
i< rows; ++
i) {
105 "Rows have different lengths");
108 for(
size_t a= 0;
a<
size; ++
a) {
109 boolis_mismatch =
false;
110 charc =
data[0][
a];
111 for(
size_t b= 1;
b< rows; ++
b) {
112 if(
data[
b][
a] != c) {
144 if( !prod_bsh || !genomic_bsh ) {
147 "Can't get sequence data for "+ failed_id.
AsFastaString() +
148 " in order to count identities/mismatches");
164 gen.GetSeqData(
r2.GetFrom(),
r2.GetTo() + 1, gen_data);
174 TSeqPosstart_offset = range_it->GetFrom() -
r1.GetFrom(),
175end_offset = range_it->GetToOpen() -
r1.GetFrom();
176string::const_iterator pit = prod_data.begin()
178string::const_iterator pit_end = prod_data.begin()
180string::const_iterator git = gen_data.begin()
182string::const_iterator git_end = gen_data.begin()
185 for( ; pit != pit_end && git != git_end; ++pit, ++git)
187 bool match= (*pit == *git);
188*identities +=
match;
189*mismatches += !
match;
208codon[0] = codon[1] = codon[2] =
'N';
216 if(last_r1.
GetTo() + 1 !=
r1.GetFrom()) {
217 size_t i= last_r1.
GetTo() + 1;
220codon[
i% 3 ] =
'N';
227 gen.GetSeqData(
r2.GetFrom(),
r2.GetTo() + 1, gen_data);
241 for(
size_t i= 0;
i< gen_data.size(); ++
i, ++prod_pos) {
242codon[ prod_pos % 3 ] = gen_data[
i];
245 if(prod_pos % 3 == 2) {
247 charresidue = (prod_pos == 2
254 if(residue == prod[prod_pos / 3] &&
255residue !=
'X'&& residue !=
'-') {
286 int* identities,
int* mismatches,
290 _ASSERT(identities && mismatches);
291 if(ranges.empty()) {
300 if(ranges.begin()->IsWhole() &&
304*identities += num_ident;
305*mismatches += (
len- num_ident);
314vector<string>
data;
317 for(
intseg = 0; seg < vec.
GetNumSegs(); ++seg) {
318 boolhas_gap =
false;
319 for(
int i= 0; !has_gap &&
i< vec.
GetNumRows(); ++
i) {
331seg_stop = vec.
GetStop(0, seg);
339range_it->GetFrom()+
offset,
340range_it->GetTo()+
offset);
341 data[
i] += seq_string;
354identities, mismatches, ranges);
361 "identity + mismatch function not implemented for std-seg");
366 intaln_identities = 0;
367 intaln_mismatches = 0;
368 boolhas_non_standard =
false;
380 switch(chunk.
Which()) {
385part_start+part_len-1));
392part_start+part_len-1));
399part_start+part_len-1)))
401has_non_standard =
true;
412part_start += part_len;
415has_non_standard =
true;
419 if( !has_non_standard ) {
420*identities += aln_identities;
421*mismatches += aln_mismatches;
431identities, mismatches);
434 "Can't calculate identities/mismatches for " 435 "alignment with genomic sequence "+
437 "; Loader can't load all required " 438 "components of sequence");
456 double* pct_identity,
461 size_tcount_aligned = 0;
479*pct_identity = 100.0f * double(*identities) / count_aligned;
497 "failed to retrieve sequence: "+
id.AsFastaString());
539 double* pct_coverage,
unsigned query= 0)
541 if(!ranges.
empty() && ranges.
begin()->IsWhole() &&
550 if(ranges.
empty() || !ranges.
begin()->IsWhole()){
559 constobjects::CBioseq_Handle& bsh_seq = scope.
GetBioseqHandle(query_id);
563 "Can't get sequence data for "+ query_id.AsFastaString() +
564 " in order to calculate coverage");
566seq_len = bsh_seq.GetBioseqLength();
584 if(is_protein_to_genomic) {
598*pct_coverage = 100.0f * double(covered_bases) / double(seq_len);
607 int* positives,
int* negatives)
614 "num_positives and num_negatives scores only defined " 615 "for protein alignment");
619 const string& dna = pro_text.
GetDNA();
621 for(string::size_type
i=0;
i<
match.size(); ++
i) {
627*positives += increment;
632*negatives += increment;
653 doublepct_identity = 0;
655&identities, &mismatches, &pct_identity,
type);
667 doublepct_identity = 0;
669&identities, &mismatches, &pct_identity,
type,
682 doublepct_identity = 0;
684&identities, &mismatches, &pct_identity,
type, ranges);
693 doublepct_coverage = 0;
706 doublepct_coverage = 0;
716 doublepct_coverage = 0;
740 int& identities,
int& mismatches)
772 int& identities,
int& mismatches)
803 int& identities,
int& mismatches)
830 int& positives,
int& negatives)
925 doublepct_identity = 0;
939 doublescore_value =
ComputeScore(scope, align, score);
954 switch(e.GetErrCode()) {
967<<
"CScoreBuilderBase::AddScore(): error computing score: " 975 string GetDonor(
constobjects::CSpliced_exon& exon) {
976 if( exon.CanGetDonor_after_exon() && exon.GetDonor_after_exon().CanGetBases() ) {
977 returnexon.GetDonor_after_exon().GetBases();
983 if( exon.CanGetAcceptor_before_exon() && exon.GetAcceptor_before_exon().CanGetBases() ) {
984 returnexon.GetAcceptor_before_exon().GetBases();
991 if(donor.length()<2 || acc.length()<2)
return false;
992 if(
toupper(
Uchar(acc.c_str()[0])) !=
'A')
return false;
1001 if(don2 ==
'T'|| don2 ==
'C')
return true;
1020 "CScoreBuilderBase::AddSplignScores(): Unsupported product type");
1025 typedefTSpliced::TExons TExons;
1026 constTExons & exons (spliced.GetExons());
1029aligned_query_bases (0),
1030aln_length_exons (0),
1031aln_length_gaps (0),
1033splices_consensus (0);
1035 const TSeqPosqlen (spliced.GetProduct_length());
1036 const TSeqPospolya (spliced.CanGetPoly_a()?
1037spliced.GetPoly_a(): (qstrand? qlen:
TSeqPos(-1)));
1038 const TSeqPosprod_length_no_polya (qstrand? polya: qlen - 1 - polya);
1043 ITERATE(TExons, ii2, exons) {
1045 constTExon & exon (**ii2);
1046 const TSeqPosqmin (exon.GetProduct_start().GetNucpos()),
1047qmax (exon.GetProduct_end().GetNucpos());
1049 const TSeqPosqgap (qstrand? qmin - qprev - 1: qprev - qmax - 1);
1052aln_length_gaps += qgap;
1055 else if(ii2 != exons.begin()) {
1060 typedefTExon::TParts TParts;
1061 constTParts & parts (exon.GetParts());
1063 ITERATE(TParts, ii3, parts) {
1071aligned_query_bases +=
len;
1075aligned_query_bases +=
len;
1079aligned_query_bases +=
len;
1085errmsg =
"Unexpected spliced exon chunk part: " 1089aln_length_exons +=
len;
1093qprev = qstrand? qmax: qmin;
1096 const TSeqPosqgap (qstrand? polya - qprev - 1: qprev - polya - 1);
1097aln_length_gaps += qgap;
1099 for(CSeq_align::TScore::iterator it = scores.begin(); it != scores.end(); )
1102 if((*it)->GetId().IsStr()) {
1105. find((*it)->GetId().GetStr());
1107score_type = score->second;
1113it = scores.erase(it);
1121score_matches->SetId().SetStr(
1123score_matches->SetValue().SetInt(matches);
1124scores.push_back(score_matches);
1128score_overall_identity->SetId().SetStr(
1130score_overall_identity->SetValue().
1131SetReal(
double(matches)/(aln_length_exons + aln_length_gaps));
1132scores.push_back(score_overall_identity);
1136score_splices->SetId().SetStr(
1138score_splices->SetValue().SetInt(splices_total);
1139scores.push_back(score_splices);
1143score_splices_consensus->SetId().SetStr(
1145score_splices_consensus->SetValue().SetInt(splices_consensus);
1146scores.push_back(score_splices_consensus);
1150score_coverage->SetId().SetStr(
1152score_coverage->SetValue().
1153SetReal(
double(aligned_query_bases) / prod_length_no_polya);
1154scores.push_back(score_coverage);
1158score_exon_identity->SetId().SetStr(
1160score_exon_identity->SetValue().
1161SetReal(
double(matches) / aln_length_exons);
1162scores.push_back(score_exon_identity);
1189 "CScoreBuilderBase::ComputeScore(): " 1190 "generic 'score' computation is undefined");
1200 "CScoreBuilderBase::ComputeScore(): " 1201 "BLAST scores are available in CScoreBuilder, " 1202 "not CScoreBuilderBase");
1209 if(ranges.
empty() || !ranges.
begin()->IsWhole()) {
1211 "positive-count score not supported within a range");
1216 if(ranges.
empty() || !ranges.
begin()->IsWhole()) {
1218 "positive-count score not supported within a range");
1235 doublepct_identity = 0;
1237&identities, &mismatches, &pct_identity,
1239 returnpct_identity;
1247 doublepct_identity = 0;
1249&identities, &mismatches, &pct_identity,
1251 returnpct_identity;
1259 doublepct_identity = 0;
1261&identities, &mismatches, &pct_identity,
1263 returnpct_identity;
1269 doublepct_coverage = 0;
1271 returnpct_coverage;
1280 "High-quality percent coverage not supported " 1281 "for standard seg representation");
1283 if(ranges.
empty() || !ranges.
begin()->IsWhole()) {
1285 "High-quality percent coverage not supported " 1294SetExcludeExternal());
1297 if(feat_it->GetData().GetRegion() ==
"alignable"&&
1298feat_it->GetAnnot().IsNamed() &&
1299feat_it->GetAnnot().GetName() ==
"NCBI_GPIPE")
1301alignable_range = feat_it->GetRange();
1305 doublepct_coverage = 0;
1309 returnpct_coverage;
1320 if(ranges.
empty() || !ranges.
begin()->IsWhole()) {
1322 "splign scores not supported within a range");
1329 if((*it)->GetValue().IsInt()) {
1330 return(*it)->GetValue().GetInt();
1332 return(*it)->GetValue().GetReal();
1336 NCBI_ASSERT(
false,
"Should never reach this point");
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void ConvertSeqAlignToPairwiseAln(CPairwiseAln &pairwise_aln, const objects::CSeq_align &sa, objects::CSeq_align::TDim row_1, objects::CSeq_align::TDim row_2, CAlnUserOptions::EDirection direction=CAlnUserOptions::eBothDirections, const TAlnSeqIdVec *ids=0)
Build pairwise alignment from the selected rows of a seq-align.
CAlignRange Represents an element of pairwise alignment of two sequences.
TSignedSeqPos GetStop(TNumrow row, TNumseg seg, int offset=0) const
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
TDim GetNumRows(void) const
TNumseg GetNumSegs(void) const
Default IAlnSeqId implementation based on CSeq_id_Handle.
string & GetSeqString(string &buffer, TNumrow row, TSeqPos seq_from, TSeqPos seq_to) const
bool IsSetWidths(void) const
static const CTrans_table & GetTransTable(int id)
Data loader exceptions, used by GenBank loader.
A pairwise aln is a collection of ranges for a pair of rows.
Text representation of ProSplign alignment.
const string & GetDNA() const
const string & GetMatch() const
const string & GetProtein() const
TThisType & IntersectWith(const TRange &r)
const_iterator begin() const
position_type GetCoveredLength(void) const
Returns total length covered by ranges in this collection, i.e.
int GetPositiveCount(CScope &scope, const CSeq_align &align)
counts based on substitution matrix for protein alignments
TSeqPos GetAlignLength(const CSeq_align &align, bool ungapped=false)
Compute the length of the alignment (= length of all segments, gaps + aligned)
@ eError_Report
Print error messages, but do not fail.
@ eError_Throw
Throw exceptions on errors.
int GetIdentityCount(CScope &scope, const CSeq_align &align)
Compute the number of identities in the alignment.
void AddSplignScores(const CSeq_align &align, CSeq_align::TScore &scores)
Compute the six splign scores.
void AddScore(CScope &scope, CSeq_align &align, CSeq_align::EScoreType score)
int GetGapCount(const CSeq_align &align)
Compute the number of gaps in the alignment.
double ComputeScore(CScope &scope, const CSeq_align &align, CSeq_align::EScoreType score)
EPercentIdentityType
Compute percent identity (range 0-100)
int GetNegativeCount(CScope &scope, const CSeq_align &align)
double GetPercentCoverage(CScope &scope, const CSeq_align &align, unsigned query=0)
Compute percent coverage of the query (sequence 0) (range 0-100)
double GetPercentIdentity(CScope &scope, const CSeq_align &align, EPercentIdentityType type=eGapped)
virtual ~CScoreBuilderBase()
Destructor.
void x_GetMatrixCounts(CScope &scope, const CSeq_align &align, int *positives, int *negatives)
int GetMismatchCount(CScope &scope, const CSeq_align &align)
Compute the number of mismatches in the alignment.
CScoreBuilderBase()
Default constructor.
EErrorMode GetErrorMode(void) const
void GetMatrixCounts(CScope &scope, const CSeq_align &align, int &positives, int &negatives)
void SetSubstMatrix(const string &name)
int GetGapBaseCount(const CSeq_align &align)
Compute the number of gap bases in the alignment (= length of all gap segments)
static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
EScoreType
enum controlling known named scores
@ eScore_PercentIdentity_GapOpeningOnly
@ eScore_PercentIdentity_Gapped
@ eScore_ConsensusSplices
@ eScore_PercentIdentity_Ungapped
@ eScore_HighQualityPercentCoverage
TSeqPos GetNumGapOpeningsWithinRanges(const CRangeCollection< TSeqPos > &ranges, TDim row=-1) const
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
TSeqPos GetNumGapOpeningsWithinRange(const TSeqRange &range, TDim row=-1) const
static string ScoreName(EScoreType score)
void SetNamedScore(const string &id, int score)
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
TSeqPos GetTotalGapCountWithinRanges(const CRangeCollection< TSeqPos > &ranges, TDim row=-1) const
void ResetNamedScore(const string &name)
TSeqPos GetAlignLengthWithinRange(const TSeqRange &range, bool include_gaps=true) const
Get the length of this alignment within a specified range By default, this function computes an align...
TSeqPos GetTotalGapCountWithinRange(const TSeqRange &range, TDim row=-1) const
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
TSeqPos GetAlignLengthWithinRanges(const CRangeCollection< TSeqPos > &ranges, bool include_gaps=true) const
Get the length of this alignment within a specified range By default, this function computes an align...
static const TScoreNameMap & ScoreNameMap()
static bool IsIntegerScore(EScoreType score)
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
@ eUnsupported
Operation that is undefined for the given input Seq-align, and which is impossible to perform.
@ eNotImplemented
Attempt to use unimplemented funtionality.
char GetStartResidue(int state) const
char GetCodonResidue(int state) const
static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)
container_type::const_iterator const_iterator
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_ASSERT(expr, mess)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define NCBI_RETHROW_SAME(prev_exception, message)
Generic macro to re-throw the same exception.
const string AsFastaString(void) const
const COrg_ref & GetOrg_ref(const CBioseq_Handle &handle)
Return the org-ref associated with a given sequence.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_inst::TMol GetSequenceType(const CSeq_id &id, TGetFlags flags=0)
Get molecular type of sequence (protein/dna/rna) Return CSeq_inst::eMol_not_set if sequence is not fo...
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
unsigned char Uchar
Alias for unsigned char.
position_type GetFirstTo(void) const
TThisType & Set(position_type from, position_type to)
position_type GetSecondFrom(void) const
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
position_type GetSecondTo(void) const
position_type GetFirstFrom(void) const
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
TGcode GetGcode(void) const
Get the Gcode member data.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
const TDenseg & GetDenseg(void) const
Get the variant data.
E_Choice
Choice variants.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
vector< CRef< CScore > > TScore
TMatch GetMatch(void) const
Get the variant data.
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
static string SelectionName(E_Choice index)
Retrieve selection name (for diagnostic purposes).
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsStd(void) const
Check if variant Std is selected.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
list< CRef< CSeq_align > > Tdata
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TDisc & GetDisc(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eProduct_type_transcript
@ e_Region
named region (globin locus)
EMol
molecule class in living organism
@ eMol_not_set
> cdna = rna
const struct ncbi::grid::netcache::search::fields::SIZE size
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
static void s_GetCountIdentityMismatch(CScope &scope, const CSeq_align &align, int *identities, int *mismatches, const CRangeCollection< TSeqPos > &ranges=CRangeCollection< TSeqPos >(TSeqRange::GetWhole()))
static void s_GetNucIdentityMismatch(const vector< string > &data, int *identities, int *mismatches)
calculate mismatches and identities in a seq-align
static bool s_SequenceIsProtein(CScope &scope, const CSeq_id &id)
calculate the percent coverage
bool IsConsSplice(const string &donor, const string acc)
static TSeqPos s_IntersectionLength(const CRangeCollection< TSeqPos > &ranges, const TSeqRange &range)
Get length of intersection between a range and a range collection.
static void s_GetPercentIdentity(CScope &scope, const CSeq_align &align, int *identities, int *mismatches, double *pct_identity, CScoreBuilderBase::EPercentIdentityType type, const CRangeCollection< TSeqPos > &ranges=CRangeCollection< TSeqPos >(TSeqRange::GetWhole()))
calculate the percent identity we also return the count of identities and mismatches
static void s_GetPercentCoverage(CScope &scope, const CSeq_align &align, const CRangeCollection< TSeqPos > &ranges, double *pct_coverage, unsigned query=0)
string GetAcceptor(const objects::CSpliced_exon &exon)
static void s_GetSplicedSegIdentityMismatch(CScope &scope, const CSeq_align &align, const CRangeCollection< TSeqPos > &ranges, int *identities, int *mismatches)
string GetDonor(const objects::CSpliced_exon &exon)
static bool s_IsProteinToGenomic(CScope &scope, const CSeq_align &align)
static const sljit_gpr r1
static const sljit_gpr r2
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4