retval = -(pos + pos2advance - 1);
116 ENa_strandstrand,
booltranslate,
intlength,
intoriginal_length,
124retval = original_length -
170 unsigned int first,
unsigned intnsegs,
174 Int4query_length,
Int4subject_length,
175 booltranslate1,
booltranslate2)
183 intlength1 = query_length;
184 intlength2 = subject_length;
186lengths.reserve(nsegs);
198 for(
unsigned intesp_index =
first; esp_index< (
unsigned int)esp->
size&& esp_index < (
unsigned int)(
first+nsegs); esp_index++) {
199 switch(esp->
op_type[esp_index]) {
210strands.push_back(m_strand);
211strands.push_back(s_strand);
212starts.push_back(m_start);
213starts.push_back(s_start);
224strands.push_back(m_strand);
226starts.push_back(m_start);
227starts.push_back(s_start);
239strands.push_back(s_strand);
240starts.push_back(m_start);
241starts.push_back(s_start);
248lengths.push_back(esp->
num[esp_index]);
252 if(lengths.size() != nsegs)
253lengths.resize(nsegs);
255 if(starts.size() != nsegs*2)
256starts.resize(nsegs*2);
258 if(strands.size() != nsegs*2)
259strands.resize(nsegs*2);
287ids.push_back(master);
288ids.push_back(slave);
290dense_seg.
SetNumseg((
int) lengths.size());
291dense_seg.
SetLens().swap(lengths);
311 booltranslate_master,
312 booltranslate_slave)
318 intnsegs = (
int) lengths.size();
322 for(
int i= 0;
i< nsegs;
i++) {
330 if( (m_start = starts[2*
i]) !=
GAP_VALUE) {
331master_loc->
SetInt().SetId(*master);
332master_loc->
SetInt().SetFrom(m_start);
333 if(translate_master)
336m_stop = m_start + lengths[
i] - 1;
337master_loc->
SetInt().SetTo(m_stop);
338master_loc->
SetInt().SetStrand(strands[2*
i]);
344 if( (s_start = starts[2*
i+1]) !=
GAP_VALUE) {
345slave_loc->
SetInt().SetId(*slave);
346slave_loc->
SetInt().SetFrom(s_start);
350s_stop = s_start + lengths[
i] - 1;
351slave_loc->
SetInt().SetTo(s_stop);
352slave_loc->
SetInt().SetStrand(strands[2*
i+1]);
359std_seg->SetIds().push_back(master);
360std_seg->SetIds().push_back(slave);
361std_seg->SetLoc().push_back(master_loc);
362std_seg->SetLoc().push_back(slave_loc);
364retval.push_back(std_seg);
378 for(
intindex=0; index<esp->
size; index++)
386 inttemp_num = esp->
num[index];
389esp->
num[index] = esp->
num[index-1];
391esp->
num[index-1] = temp_num;
392esp->
op_type[index-1] = temp_op;
408 intproduct_length = product_end - product_start + 1;
409 intgenomic_length = genomic_end - genomic_start + 1;
412 for(
const auto& it : exon.
GetParts()) {
413 switch(it->Which()) {
420p += it->GetMismatch();
421 g+= it->GetMismatch();
425p += it->GetProduct_ins();
429 g+= it->GetGenomic_ins();
433cerr <<
"Urecognized exon part\t"<< product_id.
AsFastaString()
438 if(p != product_length) {
441<< product_length <<
"\t"<< p << endl;
444 if(
g!= genomic_length) {
447<< genomic_length <<
"\t"<<
g<< endl;
475 const Uint1kGap = 15;
483 while(last_h->
next&&
486last_h = last_h->
next;
496exon->SetProduct_start().SetNucpos(hsp->
query.
offset);
497exon->SetProduct_end().SetNucpos(last_hsp->
query.
end- 1);
499exon->SetGenomic_end(last_hsp->
subject.
end- 1);
501exon->SetProduct_strand(product_strand);
502exon->SetGenomic_strand(genomic_strand);
512exon->SetAcceptor_before_exon().SetBases(l_bases);
522exon->SetDonor_after_exon().SetBases(r_bases);
527 prev= hh, hh = hh->next) {
529 intquery_pos = hh->hsp->query.offset;
530 intsubject_pos = hh->hsp->subject.offset;
536 _ASSERT(hh->hsp->query.offset >=
prev->hsp->query.end);
537 _ASSERT(hh->hsp->subject.offset >=
prev->hsp->subject.end);
538 if(hh->hsp->query.offset >
prev->hsp->query.end) {
540chunk->SetProduct_ins(hh->hsp->query.offset -
541 prev->hsp->query.end);
543exon->SetParts().push_back(chunk);
546 if(hh->hsp->subject.offset >
prev->hsp->subject.end) {
548chunk->SetGenomic_ins(hh->hsp->subject.offset -
549 prev->hsp->subject.end);
551exon->SetParts().push_back(chunk);
558query_pos += num_matches;
559subject_pos += num_matches;
561 if(num_matches > 0) {
564chunk->SetMatch(num_matches);
565exon->SetParts().push_back(chunk);
574chunk->SetGenomic_ins(1);
578chunk->SetProduct_ins(1);
582chunk->SetMismatch(1);
587exon->SetParts().push_back(chunk);
590num_matches =
MAX(hh->hsp->query.end - query_pos, 0);
591 _ASSERT(hh->hsp->query.end - query_pos >= -1);
595 if(num_matches > 0) {
597chunk->SetMatch(num_matches);
598exon->SetParts().push_back(chunk);
607exons.push_back(exon);
632 booltranslate_master,
booltranslate_slave)
638 if(translate_master || translate_slave) {
641translate_master, translate_slave);
662 Int4query_length,
Int4subject_length)
669 booltranslate1, translate2;
670 boolis_disc_align =
false;
672 if(hsp->
score== 0) {
677 for(
int i=0;
i<
t->size;
i++) {
680is_disc_align =
true;
706 for(
intindex=0; index< esp->
size; index++)
708skip_region =
false;
711 for(index2=
first; index2<esp->
size; index2++, nsegs++){
730strands, query_length, subject_length,
731translate1, translate2);
735translate1, translate2);
739seqalign->
SetSegs().SetDisc().Set().push_back(sa_tmp);
748strands, query_length, subject_length,
749translate1, translate2);
771 Int4query_length,
Int4subject_length)
780 Int4original_length1, original_length2;
788 Int4from1, from2, to1, to2;
796original_length1 = subject_length;
797original_length2 = query_length;
798id1.
Reset(subject_id);
799id2.
Reset(query_id);
805original_length1 = query_length;
806original_length2 = subject_length;
807id1.
Reset(query_id);
808id2.
Reset(subject_id);
818first_shift =
false;
822 for(
intindex=0; index<esp->
size; index++)
827 switch(esp->
op_type[index]) {
830first_shift =
false;
833slp1->
SetInt().SetTo(
MIN(start1,original_length1) - 1);
834slp1->
SetInt().SetId(*id1);
835slp1->
SetInt().SetStrand(strand1);
853to1 =
MIN(start1,original_length1) - 1;
854slp1->
SetInt().SetTo(to1);
855slp1->
SetInt().SetId(*id1);
856slp1->
SetInt().SetStrand(strand1);
860to2 =
MIN(start2,original_length2) - 1;
861slp2->
SetInt().SetFrom(from2);
862slp2->
SetInt().SetTo(to2);
863 if(start2 > original_length2)
864slp1->
SetInt().SetTo(to1 - 1);
868slp2->
SetInt().SetTo(original_length2 - from2 - 1);
869slp2->
SetInt().SetFrom(original_length2 - to2 - 1);
872slp2->
SetInt().SetId(*id2);
873slp2->
SetInt().SetStrand(strand2);
883seg->SetLoc().push_back(slp2);
884seg->SetLoc().push_back(slp1);
888seg->SetLoc().push_back(slp1);
889seg->SetLoc().push_back(slp2);
893ids.resize(seg->GetDim());
895seqalign->
SetSegs().SetStd().push_back(seg);
898first_shift =
false;
905to2 =
MIN(start2,original_length2) - 1;
906slp2->
SetInt().SetFrom(from2);
907slp2->
SetInt().SetTo(to2);
911slp2->
SetInt().SetTo(original_length2 - from2 - 1);
912slp2->
SetInt().SetFrom(original_length2 - to2 - 1);
914slp2->
SetInt().SetId(*id2);
915slp2->
SetInt().SetStrand(strand2);
924first_shift =
false;
928to1 =
MIN(start1, original_length1) - 1;
936 while(to2 >= original_length2) {
944to2 = original_length2 - from2 - 1;
945from2 = original_length2 - tmp_int - 1;
948slp1->
SetInt().SetFrom(from1);
949slp1->
SetInt().SetTo(to1);
950slp1->
SetInt().SetId(*id1);
951slp1->
SetInt().SetStrand(strand1);
952slp2->
SetInt().SetFrom(from2);
953slp2->
SetInt().SetTo(to2);
954slp2->
SetInt().SetId(*id2);
955slp2->
SetInt().SetStrand(strand2);
970to1 =
MIN(start1,original_length1) - 1;
975 if(to2 >= original_length2) {
976to2 = original_length2 -1;
983to2 = original_length2 - from2 - 1;
984from2 = original_length2 - tmp_int - 1;
987slp1->
SetInt().SetFrom(from1);
988slp1->
SetInt().SetTo(to1);
989slp1->
SetInt().SetId(*id1);
990slp1->
SetInt().SetStrand(strand1);
991slp2->
SetInt().SetFrom(from2);
992slp2->
SetInt().SetTo(to2);
993slp2->
SetInt().SetId(*id2);
994slp2->
SetInt().SetStrand(strand2);
1002first_shift =
true;
1008 if(seq_int2_last) {
1011seq_int2_last->
SetTo(start2 - 1);
1014seq_int2_last->
SetFrom(original_length2 - start2);
1019 if(seq_int2_last->
GetFrom() > seq_int2_last->
GetTo()) {
1022seq_int2_last->
SetTo(seq_int2_last->
GetTo() + 3);
1028seq_int1_last->
GetTo() != 0)
1029seq_int1_last->
SetTo(seq_int1_last->
GetTo() + 1);
1038to2 =
MIN(start2,original_length2) - 1;
1044to2 = original_length2 - from2 - 1;
1045from2 = original_length2 - tmp_int - 1;
1047slp2->
SetInt().SetFrom(from2);
1048slp2->
SetInt().SetTo(to2);
1050slp2->
SetInt().SetId(*id2);
1073seg->SetLoc().push_back(slp2);
1074seg->SetLoc().push_back(slp1);
1078seg->SetLoc().push_back(slp1);
1079seg->SetLoc().push_back(slp2);
1083ids.resize(seg->GetDim());
1085seqalign->
SetSegs().SetStd().push_back(seg);
1099 s_MakeScore(
const string& ident_string,
doubled,
int i,
boolis_integer)
1102retval->SetId().SetStr(ident_string);
1105retval->SetValue().SetInt(
i);
1107retval->SetValue().SetReal(d);
1127 if(hsp->
num> 1) {
1132 if(evalue >= 0.0) {
1152 if( !seqid_list.empty() ) {
1153retval += seqid_list.size();
1165 constvector<string> & seqid_list,
1174 static const string kScore(
"score");
1176 static const stringkBlastScore(
"blast_score");
1180 if(hsp->
num> 1) {
1181 static const stringkSumN(
"sum_n");
1187 if(evalue >= 0.0) {
1188 stringscore_type = (hsp->
num<= 1) ?
"e_value":
"sum_e";
1189scores.push_back(
s_MakeScore(score_type, evalue, 0,
false));
1195 static const stringkBitScore(
"bit_score");
1201 static const stringkNumIdent(
"num_ident");
1206 static const stringkCompAdj(
"comp_adjustment_method");
1211 if( !seqid_list.empty() ) {
1212 ITERATE(vector<string>, sid, seqid_list) {
1213scores.push_back(
s_MakeScore(*sid, 0.0, 0,
true));
1218 static const stringkNumPositives(
"num_positives");
1222 if(query_length > 0) {
1223 static const stringkQueryCovHsp(
"hsp_percent_coverage");
1225scores.push_back(
s_MakeScore(kQueryCovHsp, hsp_coverage, 0,
false));
1235 constvector<string> & seqid_list)
1237 if(seqid_list.empty())
1241userObject->
SetType().SetStr(
"use_this_seqid");
1242userObject->
AddField(
"SEQIDS", seqid_list);
1243seqalign->
SetExt().push_back(userObject);
1255 constvector<string> & seqid_list,
1276 Int4query_length,
Int4subject_length,
1277 constvector<string> & seqid_list)
1286ids.push_back(query_id);
1287ids.push_back(subject_id);
1300starts.push_back(query_length - hsp->
query.
end);
1305starts.push_back(subject_length - hsp->
subject.
end);
1326 Int4query_length,
Int4subject_length,
1327 constvector<string> & seqid_list)
1336query_loc->
SetInt().SetId(*query_id);
1337subject_loc->
SetInt().SetId(*subject_id);
1342ids.push_back(query_id);
1343ids.push_back(subject_id);
1357query_loc->
SetInt().SetFrom(query_length -
1373subject_loc->
SetInt().SetFrom(subject_length -
1375subject_loc->
SetInt().SetTo(subject_length -
1379retval->SetLoc().push_back(query_loc);
1380retval->SetLoc().push_back(subject_loc);
1402 Int4subject_length,
1403 constvector<string> & seqid_list,
1416vector<string> emptyList;
1423 for(index=0; index<hsp_list->
hspcnt; index++) {
1425seqalign->
SetSegs().SetDendiag().push_back(
1434 for(index=0; index<hsp_list->
hspcnt; index++) {
1436seqalign->
SetSegs().SetStd().push_back(
1446sa_vector.push_back(seqalign);
1463 Int4query_length,
Int4subject_length,
boolis_ooframe,
1464 constvector<string> & seqid_list,
1473sa_vector.reserve(hsp_list->
hspcnt);
1474vector<string> emptyList;
1476 for(
intindex = 0; index < hsp_list->
hspcnt; index++) {
1483query_length, subject_length);
1487query_length, subject_length);
1490 if(seqalign.
Empty())
continue;
1506sa_vector.push_back(seqalign);
1515retval->
Set().clear();
1523 const intquery_row = 0;
1527 if(
query.IsInt()) {
1528q_shift =
query.GetInt().GetFrom();
1544 const intkSubjDimension = 1;
1569 for(
intindex = 0; index < hit_list->
hsplist_count; index++) {
1579 const Uint4kOid = hsp_list->
oid;
1583 if(subject_id.
Empty()){
1589vector <TSeqRange> ranges;
1590 for(
int i=0;
i<hsp_list->
hspcnt;
i++) {
1595ranges.push_back(rg);
1600 if(!ranges.empty() && seqinfo_src->
GetMasks(kOid, ranges, masks)) {
1601subj_masks.push_back(masks);
1605vector<string> seqid_list;
1608vector<CRef<CSeq_align > > hit_align;
1637seq_aligns->
Set().push_back(*iter);
1644seq_aligns->
Set().push_back(*iter);
1657vector<TSeqLocInfoVector>& subj_masks)
1671 for(
intpattern_index = 0; pattern_index <
pattern_info->num_patterns;
1675 if(one_phi_results) {
1686*
query.GetSeq_loc(0),
1691subj_masks[pattern_index]));
1693retval.push_back(seq_aligns);
1701*
query.GetSeq_loc(0),
1706subj_masks[pattern_index]));
1707retval.push_back(seq_aligns);
1711 sfree(phi_results);
1736 for(
intindex = 0; index < hsp_list->
hspcnt; index++) {
1764vector<TSeqLocInfoVector>& subj_masks)
1778vector<CRef<CSeq_align > > hit_align;
1779retval.reserve(
results->num_queries);
1782 for(
intqindex = 0; qindex <
results->num_queries; qindex++) {
1790 for(sindex = 0; sindex < hit_list->
hsplist_count; ++sindex) {
1792 if(hsp_list->
oid==
static_cast<Int4>(subj_idx))
1812vector<string> seqid_list;
1817vector <TSeqRange> ranges;
1818 for(
int i=0;
i<hsp_list->
hspcnt;
i++) {
1823ranges.push_back(rg);
1828 if(!ranges.empty() &&
1829seqinfo_src.
GetMasks(subj_idx, ranges, masks)) {
1830subj_masks[qindex].push_back(masks);
1860seq_aligns->
Set().push_back(*iter);
1865retval.push_back(seq_aligns);
1881 const size_tnum_queries,
1882 const size_tnum_subjects)
1885result_alnvec.reserve(alnvec.size());
1887 for(
size_tiQuery = 0; iQuery < num_queries; iQuery++)
1889 for(
size_tiSubject = 0; iSubject < num_subjects; iSubject++)
1891 size_tiLinearIndex = iSubject * num_queries + iQuery;
1893result_alnvec.push_back(aln_set);
1897 _ASSERT(result_alnvec.size() == alnvec.size());
1898 returnresult_alnvec;
1908vector<TSeqLocInfoVector>& subj_masks)
1911 size_tseqinfo_size = seqinfo_src->
Size();
1912retval.reserve(query_data.
GetNumQueries() * seqinfo_size);
1914 intnum_of_queries =
results->num_queries;
1919subj_masks.resize(num_of_queries *seqinfo_size);
1921 for(
Uint4index = 0; index < seqinfo_size; index++) {
1922vector<TSeqLocInfoVector> tmp_subj_masks(num_of_queries);
1925*seqinfo_src,
prog, index,
1926is_gapped, is_ooframe,
1934 for(TSeqAlignVector::size_type
i= 0;
i< seqalign.size(); ++
i) {
1935retval.push_back(seqalign[
i]);
1937subj_masks[ seqinfo_size *
i+ index] = tmp_subj_masks[
i];
1953vector<TSeqLocInfoVector>& subj_masks)
1961subj_masks.resize(
results->num_queries);
1962retval.reserve(
results->num_queries);
1965 for(
intindex = 0; index <
results->num_queries; index++) {
1971*
query.GetSeq_loc(index),
1976subj_masks[index]));
1978retval.push_back(seq_aligns);
1979 _TRACE(
"Query "<< index <<
": "<< seq_aligns->
Get().size()
1994vector<TSeqLocInfoVector>& subj_masks,
2020local_data, &seqinfo_src,
2027&seqinfo_src, gapped,
2028oof_mode, subj_masks);
2047 Int4query_length,
Int4subject_length,
2048 constvector<string> & seqid_list)
2057query_loc->
SetInt().SetId(*query_id);
2058subject_loc->
SetInt().SetId(*subject_id);
2063ids.push_back(query_id);
2064ids.push_back(subject_id);
2073query_loc->
SetInt().SetFrom(query_length - hsp->
query.
end);
2085retval->SetLoc().push_back(query_loc);
2086retval->SetLoc().push_back(subject_loc);
2111 constvector<string> & ) =
NULL;
2126 if(hsp_list->
hspcnt> 0)
2130vector<string> seqid_list;
2134 for(
intj = 0; j < hsp_list->
hspcnt; j++)
2141seg_list.push_back((*fun_ptr) (hsp, query_id, subject_id,
2142query_length, subject_length, seqid_list));
CRef< CSeq_align > RemapAlignToLoc(const CSeq_align &align, CSeq_align::TDim row, const CSeq_loc &loc)
Remap seq-align row to the seq-loc.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define CODON_LENGTH
Codons are always of length 3.
void Blast_HSPListSortByEvalue(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
BlastHSPResults ** PHIBlast_HSPResultsSplit(const BlastHSPResults *results, const SPHIQueryInfo *pattern_info)
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structur...
double Blast_HSPGetQueryCoverage(const BlastHSP *hsp, Int4 query_length)
Calculate query coverage percentage of an hsp.
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
#define TRANSLATED_SUBJECT_MASK
This bit is on if the subject is translated.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
#define TRANSLATED_QUERY_MASK
This bit is on if the query is translated.
Utility function to convert internal BLAST result structures into objects::CSeq_align_set objects.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
EResultType
Specifies the style of Seq-aligns that should be built from the internal BLAST data structures.
@ eSequenceComparison
Seq-aligns in the BLAST 2 Sequence style (one alignment per query-subject pair)
Wrapper class for BlastHSPResults .
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
void Validate(bool full_test=false) const
Validators.
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Abstract base class to encapsulate retrieval of sequence identifiers.
Provides access (not ownership) to the C structures used to configure local BLAST search class implem...
Collection of masked regions for a single query sequence.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
EGapAlignOpType
Operation types within the edit script.
@ eGapAlignDel2
Frame shift deletion of two nucleotides.
@ eGapAlignIns2
Frame shift insertion of two nucleotides.
@ eGapAlignIns1
Frame shift insertion of one nucleotide.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignDel1
Frame shift deletion of one nucleotide.
@ eGapAlignDecline
Non-aligned region.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
virtual CConstRef< objects::CSeq_loc > GetSeq_loc(size_t index)=0
Get the Seq_loc for the sequence indicated by index.
void BLASTPrelminSearchHitListToStdSeg(EBlastProgramType program, BlastHitList *hit_list, const CSeq_loc &query_loc, TSeqPos query_length, const IBlastSeqInfoSrc *subject_seqinfo, list< CRef< CStd_seg > > &seg_list)
CRef< CDense_diag > x_UngappedHSPToDenseDiag(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Dense-diag object from HSP information and sequence identifiers for a non-translated ungapp...
static CSeq_align::C_Segs::TStd s_CreateStdSegs(CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands, bool translate_master, bool translate_slave)
Creates a Std-seg object from the starts, lengths and strands vectors and two Seq-ids for a translate...
virtual CConstRef< objects::CSeq_loc > GetSeqLoc(Uint4 index) const =0
Method to retrieve the sequence location given its ordinal number.
static int s_GetCurrPos(int &pos, int pos2advance)
Advances position in a sequence, according to an edit script instruction.
static const TSeqPos kBlastAlignmentDim
BLAST alignments have always 2 dimensions (i.e.
static void s_CollectSeqAlignData(const BlastHSP *hsp, const GapEditScript *esp, unsigned int first, unsigned int nsegs, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands, Int4 query_length, Int4 subject_length, bool translate1, bool translate2)
Fills vectors of start positions, lengths and strands for all alignment segments.
static void s_CorrectUASequence(BlastHSP *hsp)
Checks if any decline-to-align segments immediately follow an insertion or deletion,...
#define SMALLEST_EVALUE
Threshold below which e-values are saved as 0.
static CRef< CSeq_align > s_BlastHSP2SeqAlign(EBlastProgramType program, BlastHSP *hsp, CRef< CSeq_id > id1, CRef< CSeq_id > id2, Int4 query_length, Int4 subject_length)
Converts a traceback editing block to a Seq-align, provided the 2 sequence identifiers.
static void s_CreateDenseg(CDense_seg &dense_seg, CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands)
Creates a Dense-seg object from the starts, lengths and strands vectors and two Seq-ids.
const char BLASTNA_TO_IUPACNA[]
Translates between blastna and iupacna.
void GetFilteredRedundantSeqids(const IBlastSeqInfoSrc &sisrc, int oid, vector< string > &seqids, bool use_gis=true)
Get Seqids for a sequence in a redundant database.
static size_t s_CalculateScoreVectorSize(const BlastHSP *hsp, const vector< string > &seqid_list)
Computes the exact size of a CSeq_align::TScore for a given HSP.
static TSeqPos s_GetAlignmentStart(int &curr_pos, int num, ENa_strand strand, bool translate, int length, int original_length, short frame)
Finds the starting position of a sequence segment in an alignment, given an editing script.
static CRef< CScore > s_MakeScore(const string &ident_string, double d, int i, bool is_integer)
Creates and initializes CScore with a given name, and with integer or double value.
CRef< CSeq_align_set > BlastHitList2SeqAlign_OMF(const BlastHitList *hit_list, EBlastProgramType prog, const CSeq_loc &query_loc, TSeqPos query_length, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, TSeqLocInfoVector &subj_masks)
static Int4 s_GetProteinFrameLength(Int4 nuc_length, Int2 frame)
Finds length of a protein frame given a nucleotide length and a frame number.
static TSeqAlignVector s_BLAST_OneSubjectResults2CSeqAlign(const BlastHSPResults *results, ILocalQueryData &query_data, const IBlastSeqInfoSrc &seqinfo_src, EBlastProgramType prog, Uint4 subj_idx, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
Extracts results from the BlastHSPResults structure for only one subject sequence,...
CRef< CStd_seg > x_NonTranslatedHSPToStdSeg(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Std-seg object from HSP information and sequence identifiers for a non-translated ungapped ...
static void s_AddUserObjectToSeqAlign(CRef< CSeq_align > &seqalign, const vector< string > &seqid_list)
Produce UserObject with Seq-ids to limit formatting to ("use_this_gi")
virtual BlastQueryInfo * GetQueryInfo()=0
Accessor for the BlastQueryInfo structure.
static ENa_strand s_Frame2Strand(short frame)
Converts a frame into the appropriate strand.
TSeqAlignVector PhiBlastResults2SeqAlign_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query, const IBlastSeqInfoSrc *seqinfo_src, const SPHIQueryInfo *pattern_info, vector< TSeqLocInfoVector > &subj_masks)
static TSeqAlignVector s_BlastResults2SeqAlignDatabaseSearch_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
#define GAP_VALUE
Value in the Dense-seg indicating a gap.
static CRef< CSeq_align > s_CreateSeqAlign(CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts starts, CDense_seg::TLens lengths, CDense_seg::TStrands strands, bool translate_master, bool translate_slave)
Creates a Seq-align for a single HSP from precalculated vectors of start positions,...
static void s_ValidateExon(const CSpliced_exon &exon, const CSeq_id &product_id, const CSeq_id &genomic_id)
virtual size_t GetNumQueries()=0
Get the number of queries.
static void s_RemapToSubjectLoc(CRef< CSeq_align > &subj_aligns, const CSeq_loc &subj_loc)
Remap subject alignment if its location specified the reverse strand or a starting location other tha...
TSeqAlignVector LocalBlastResults2SeqAlign(BlastHSPResults *hsp_results, ILocalQueryData &local_data, const IBlastSeqInfoSrc &seqinfo_src, EBlastProgramType program, bool gapped, bool oof_mode, vector< TSeqLocInfoVector > &subj_masks, EResultType result_type)
Convert traceback output into Seq-align format.
CRef< CSeq_align_set > CreateEmptySeq_align_set()
Constructs an empty Seq-align-set containing an empty discontinuous seq-align, and appends it to a pr...
void BLASTHspListToSeqAlign(EBlastProgramType program, BlastHSPList *hsp_list, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, bool is_ooframe, const vector< string > &seqid_list, vector< CRef< CSeq_align > > &sa_vector)
This is called for each query and each subject in a BLAST search.
virtual size_t Size() const =0
Returns the size of the underlying container of sequences.
void MakeSplicedSeg(CSpliced_seg &spliced_seg, CRef< CSeq_id > product_id, CRef< CSeq_id > genomic_id, int product_length, const HSPChain *chain)
Convert a spliced alignmeny in BlastHSPChain into Spliced_seg.
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
void BLASTUngappedHspListToSeqAlign(EBlastProgramType program, BlastHSPList *hsp_list, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list, vector< CRef< CSeq_align > > &sa_vector)
Creates a Seq-align from an HSP list for an ungapped search.
CRef< CStd_seg > x_UngappedHSPToStdSeg(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Std-seg object from HSP information and sequence identifiers for a translated ungapped sear...
void RemapToQueryLoc(CRef< CSeq_align > sar, const CSeq_loc &query)
Remaps Seq-align offsets relative to the query Seq-loc.
virtual bool CanReturnPartialSequence() const =0
Return true if the implementation can return anything besides a seq-loc for the entire sequence.
static TSeqAlignVector s_BlastResults2SeqAlignSequenceCmp_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query_data, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
static void s_AddScoresToSeqAlign(CRef< CSeq_align > &seqalign, const BlastHSP *hsp, const vector< string > &seqid_list, Int4 query_length)
Given an HSP structure, creates a list of scores and inserts them into a Seq-align.
static void s_BuildScoreList(const BlastHSP *hsp, CSeq_align::TScore &scores, const vector< string > &seqid_list, Int4 query_length)
Creates a list of score objects for a Seq-align, given an HSP structure.
void GetSequenceLengthAndId(const IBlastSeqInfoSrc *seqinfo_src, int oid, CRef< objects::CSeq_id > &seqid, TSeqPos *length)
Retrieves subject sequence Seq-id and length.
static void s_AdjustNegativeSubjFrameInBlastn(ENa_strand subj_strand, EBlastProgramType program, BlastHSPList *hsp_list)
This function changes the subject frame for HSPs if the program is blastn and the subject was specifi...
virtual bool GetMasks(Uint4 index, const TSeqRange &target_range, TMaskedSubjRegions &retval) const =0
Retrieves the subject masks for the corresponding index.
static TSeqAlignVector s_TransposeSeqAlignVector(const TSeqAlignVector &alnvec, const size_t num_queries, const size_t num_subjects)
Transpose the (linearly organized) seqalign set matrix from (q1 s1 q2 s1 ...
static CRef< CSeq_align > s_OOFBlastHSP2SeqAlign(EBlastProgramType program, BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length)
This function is used for out-of-frame traceback conversion Converts an OOF editing script chain to a...
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Info(CExceptionArgs_Base &args)
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const string AsFastaString(void) const
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetTo(TTo value)
Assign a value to To data member.
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
TScore & SetScore(void)
Assign a value to Score data member.
TLens & SetLens(void)
Assign a value to Lens data member.
vector< CRef< CScore > > TScore
list< CRef< CStd_seg > > TStd
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
vector< ENa_strand > TStrands
TExons & SetExons(void)
Assign a value to Exons data member.
void SetProduct_length(TProduct_length value)
Assign a value to Product_length data member.
void SetDim(TDim value)
Assign a value to Dim data member.
vector< TSignedSeqPos > TStarts
void SetDim(TDim value)
Assign a value to Dim data member.
vector< TSeqPos > TStarts
void SetType(TType value)
Assign a value to Type data member.
vector< CRef< CSeq_id > > TIds
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
vector< CRef< CSeq_id > > TIds
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
vector< CRef< CSeq_id > > TIds
TExt & SetExt(void)
Assign a value to Ext data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
list< CRef< CSpliced_exon > > TExons
vector< ENa_strand > TStrands
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
TIds & SetIds(void)
Assign a value to Ids data member.
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eType_partial
mapping pieces together
@ eType_diags
unbroken, but not ordered, diagonals
@ eProduct_type_transcript
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsGi(void) const
Check if variant Gi is selected.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
unsigned int
A callback function used to compare two keys in a database.
#define MAPPER_SPLICE_SIGNAL
if(yy_accept[yy_current_state])
const struct ncbi::grid::netcache::search::fields::SIZE size
#define MIN(a, b)
returns smaller of a and b.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ABS(a)
returns absolute value of a (|a|)
#define MAX(a, b)
returns larger of a and b.
static int pattern_info(int what, void *where, BOOL unsetok)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
static const char * kScore
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
The structure to contain all BLAST results, for multiple queries.
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
double bit_score
Bit score, calculated from score.
Int4 num
How many HSP's are linked together for sum statistics evaluation? If unset (0), this HSP is not part ...
BlastSeg subject
Subject sequence info.
GapEditScript * gap_info
ALL gapped alignment is here.
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn
Int4 score
This HSP's raw score.
BlastHSPMappingInfo * map_info
The structure to contain all BLAST results for one query sequence.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
The query related information.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Int2 frame
Translation frame.
Int4 offset
Start of hsp.
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
Int4 size
Size of above arrays.
EGapAlignOpType * op_type
Array of type of operation.
A chain of HSPs: spliced alignment.
HSPContainer * hsps
A list of HSPs that belong to this chain.
struct HSPContainer * next
Uint1 query_base
Query base at this position.
Uint1 subject_base
Subject base at this position.
Int4 query_pos
Query position.
Alignment edit script for gapped alignment.
In PHI BLAST, structure containing information about all pattern occurrences in query.
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4