;
108 const unsigned intkQuerySize = 10;
109 const unsigned intkNumSeqs = 2;
110 const unsigned char kQuery[] = { 3, 9, 14, 20, 6, 23, 1, 7, 16, 5 };
112m_query =
new unsigned char[kQuerySize];
113memcpy((
void*) m_query, (
void*)
kQuery, kQuerySize*
sizeof(*
kQuery));
115m_dim.query_length = kQuerySize;
116m_dim.num_seqs = kNumSeqs;
120 for(
unsigned int i= 0;
i< m_dim.query_length;
i++) {
121 for(
unsigned intj = 0; j < m_dim.num_seqs+1; j++) {
122m_msa->data[j][
i].letter =
kQuery[
i];
123m_msa->data[j][
i].is_aligned =
true;
128m_msa->data[1][0].letter =
129m_msa->data[2][0].letter =
130m_msa->data[2][m_dim.query_length-1].letter =
137memset((
void*) &m_diag_request, 0,
sizeof(m_diag_request));
152 return&m_diag_request;
171 for(
unsigned int i= 0;
i< m_dim.query_length;
i++) {
172 for(
unsigned intj = 0; j < m_dim.num_seqs+1; j++) {
173m_msa->data[j][
i].letter = m_query[
i];
174m_msa->data[j][
i].is_aligned =
true;
180 intgap_position =
r.GetRand(0, GetQueryLength() - 1);
182m_msa->data[0][gap_position].letter = m_query[gap_position];
260memcpy((
void*)&m_options, (
void*)opts,
sizeof(
PSIBlastOptions));
272 caseeNearIdenticalHits:
273SetupNearIdenticalHits();
276 caseeMsaHasUnalignedRegion:
277SetupMsaHasUnalignedRegion();
280 caseeQueryAlignedWithInternalGaps:
281SetupQueryAlignedWithInternalGaps();
284 caseeHenikoffsPaper:
285SetupHenikoffsPositionBasedSequenceWeights();
289 throwstd::logic_error(
"Unsupported alignment test data");
303 static const size_tkQueryLength = 232;
307 const Uint4kNumAlignedSeqs = 1;
309m_dim.query_length = kQueryLength;
310m_dim.num_seqs = kNumAlignedSeqs;
312m_query =
new unsigned char[kQueryLength];
315 for(
unsigned int i= 0;
i< kQueryLength;
i++) {
316 for(
unsigned intseq_idx = 0; seq_idx < kNumAlignedSeqs + 1;
318m_msa->data[seq_idx][
i].letter = m_query[
i] =
kQuery[
i];
319m_msa->data[seq_idx][
i].is_aligned =
true;
327BOOST_REQUIRE(score_matrix);
338 if(score > max_score) {
348 const Uint4kNumAlignedSeqs = 2;
350m_dim.query_length = kQueryLength;
351m_dim.num_seqs = kNumAlignedSeqs;
353m_query =
new unsigned char[kQueryLength];
356 for(
unsigned int i= 0;
i< kQueryLength;
i++) {
357m_msa->data[0][
i].letter = m_query[
i] =
kQuery[
i];
358m_msa->data[0][
i].is_aligned =
true;
368 for(
unsigned int i= kFirstAlignment.first;
369 i< kFirstAlignment.second;
i++) {
370m_msa->data[1][
i].letter =
371FindNonIdenticalHighScoringResidue(
kQuery[
i], score_matrix);
372m_msa->data[1][
i].is_aligned =
true;
380 for(
unsigned int i= kSecondAlignment.first;
381 i< kSecondAlignment.second;
i++) {
382m_msa->data[2][
i].letter =
383FindNonIdenticalHighScoringResidue(
kQuery[
i], score_matrix);
384m_msa->data[2][
i].is_aligned =
true;
393 const Uint4kNumAlignedSeqs = 1;
394 const size_tkLocalQueryLength = 87;
396m_dim.query_length = kLocalQueryLength;
397m_dim.num_seqs = kNumAlignedSeqs;
399m_query =
new unsigned char[kLocalQueryLength];
401 stringquery_seq(
"MFKVYGYDSNIHKCGPCDNAKRLLTVKKQPFEFINIM");
402query_seq +=
string(
"PEKGVFDDEKIAELLTKLGRDTQIGLTMPQVFAPDGSHIGGFD");
403query_seq +=
string(
"QLREYFK");
405 typedefpair<TAlignedSegment, string> TAlignedSequence;
406vector<TAlignedSequence> aligned_sequence;
408TAlignedSequence region(make_pair(make_pair(0
U, 8U),
409 string(
"KVVVFIKP")));
410aligned_sequence.push_back(region);
412region = make_pair(make_pair(12U, 39U),
413 string(
"TCPFCRKTQELLSQLPFLLEFVDITAT"));
414aligned_sequence.push_back(region);
416region = make_pair(make_pair(41U, 57U),
string(
"SDTNEIQDYLQQLTGA"));
417aligned_sequence.push_back(region);
419region = make_pair(make_pair(62U, 71U),
string(
"RTVPRVFIG"));
420aligned_sequence.push_back(region);
422region = make_pair(make_pair(72U, 87U),
string(
"KECIGGCTDLESMHK"));
423aligned_sequence.push_back(region);
427 for(
Uint4 i= 0;
i< kLocalQueryLength;
i++) {
429query_seq.substr(
i, 1));
430m_msa->data[0][
i].letter = m_query[
i];
431m_msa->data[0][
i].is_aligned =
true;
434m_msa->data[1][
i].letter = kGapResidue;
435m_msa->data[1][
i].is_aligned =
true;
439 ITERATE(vector<TAlignedSequence>, itr, aligned_sequence) {
441 stringsequence_data = itr->second;
443 for(
Uint4 i= loc.first, j = 0;
i< loc.second;
i++, j++) {
444m_msa->data[1][
i].letter =
446sequence_data.substr(j, 1));
452 const Uint4kNumAlignedSeqs = 3;
453 const Uint1kQuerySequence[5] = { 7, 22, 19, 7, 17 };
454 const Uint1kSeq1[5] = { 7, 6, 4, 7, 6 };
455 const Uint1kSeq2[5] = { 7, 22, 4, 7, 6 };
456 const Uint1kSeq3[5] = { 7, 22, 15, 7, 7 };
458m_dim.query_length =
sizeof(
kQuery);
459m_dim.num_seqs = kNumAlignedSeqs;
461m_query =
new unsigned char[
sizeof(kQuerySequence)];
464 for(
Uint4s = 0; s < kNumAlignedSeqs; s++) {
468 case0: sequence = kSeq1;
break;
469 case1: sequence = kSeq2;
break;
470 case2: sequence = kSeq3;
break;
474 for(
Uint4 i= 0;
i<
sizeof(kQuerySequence);
i++) {
475m_query[
i] = kQuerySequence[
i];
476m_msa->data[s][
i].letter = sequence[
i];
477m_msa->data[s][
i].is_aligned =
true;
483 const Uint4kNumAlignedSeqs = 2;
487 const Uint1kGi_129296_[388] = {
48812, 4, 17, 9, 17, 19, 18, 13, 1, 10, 6, 3, 6, 4, 19,
4896, 13, 5, 12, 10, 19, 8, 8, 19, 13, 5, 13, 9, 11, 22,
4903, 14, 11, 17, 9, 11, 18, 1, 11, 1, 12, 19, 22, 11, 7,
4911, 16, 7, 13, 18, 5, 17, 15, 12, 10, 10, 19, 11, 8, 6,
4924, 17, 9, 18, 7, 1, 7, 17, 18, 18, 4, 17, 15, 3, 7,
49317, 17, 5, 22, 19, 8, 13, 11, 6, 10, 5, 11, 11, 17, 5,
4949, 18, 16, 14, 13, 1, 18, 22, 17, 11, 5, 9, 1, 4, 10,
49511, 22, 19, 4, 10, 18, 6, 17, 19, 11, 14, 5, 22, 11, 17,
4963, 1, 16, 10, 6, 22, 18, 7, 7, 19, 5, 5, 19, 13, 6,
49710, 18, 1, 1, 5, 5, 1, 16, 15, 11, 9, 13, 17, 20, 19,
4985, 10, 5, 18, 13, 7, 15, 9, 10, 4, 11, 11, 19, 17, 17,
49917, 9, 4, 6, 7, 18, 18, 12, 19, 6, 9, 13, 18, 9, 22,
5006, 10, 7, 9, 20, 10, 9, 1, 6, 13, 18, 5, 4, 18, 16,
5015, 12, 14, 6, 17, 12, 18, 10, 5, 5, 17, 10, 14, 19, 15,
50212, 12, 3, 12, 13, 13, 17, 6, 13, 19, 1, 18, 11, 14, 1,
5035, 10, 12, 10, 9, 11, 5, 11, 14, 22, 1, 17, 7, 4, 11,
50417, 12, 11, 19, 11, 11, 14, 4, 5, 19, 17, 7, 11, 5, 16,
5059, 5, 10, 18, 9, 13, 6, 4, 10, 11, 16, 5, 20, 18, 17,
50618, 13, 1, 12, 1, 10, 10, 17, 12, 10, 19, 22, 11, 14, 16,
50712, 10, 9, 5, 5, 10, 22, 13, 11, 18, 17, 9, 11, 12, 1,
50811, 7, 12, 18, 4, 11, 6, 17, 16, 17, 1, 13, 11, 18, 7,
5099, 17, 17, 19, 4, 13, 11, 12, 9, 17, 4, 1, 19, 8, 7,
51019, 6, 12, 5, 19, 13, 5, 5, 7, 18, 5, 1, 18, 7, 17,
51118, 7, 1, 9, 7, 13, 9, 10, 8, 17, 11, 5, 11, 5, 5,
5126, 16, 1, 4, 8, 14, 6, 11, 6, 6, 9, 16, 22, 13, 14,
51318, 13, 1, 9, 11, 6, 6, 7, 16, 22, 20, 17, 14};
515m_dim.query_length = kQueryLength;
516m_dim.num_seqs = kNumAlignedSeqs;
518m_query =
new unsigned char[kQueryLength];
520 for(
unsigned int i= 0;
i< kQueryLength;
i++) {
525 for(
unsigned int i= 1;
i< kNumAlignedSeqs + 1;
i++) {
526 for(
unsigned intj = 0; j < kQueryLength; j++) {
527m_msa->data[
i][j].letter = kGi_129296_[j];
528m_msa->data[
i][j].is_aligned =
true;
536 const Uint4kHitIndex = 2;
537 const Uint4kNumIdenticalResidues = (
Uint4) (GetQueryLength() *
540 for(
Uint4 i= kNumIdenticalResidues;
i< GetQueryLength();
i++) {
541 Uint1& residue = m_msa->data[kHitIndex][
i].letter;
55015, 9, 10, 4, 11, 11, 19, 17, 17, 17, 18, 4, 11, 4, 18,
55118, 11, 19, 11, 19, 13, 1, 9, 22, 6, 10, 7, 12, 20, 10,
55218, 1, 6, 13, 1, 5, 4, 18, 16, 5, 12, 14, 6, 8, 19,
55318, 10, 15, 5, 17, 10, 14, 19, 15, 12, 12, 3, 12, 13, 13,
55417, 6, 13, 19, 1, 18, 11, 14, 1, 5, 10, 12, 10, 9, 11,
5555, 11, 14, 6, 1, 17, 7, 4, 11, 17, 12, 11, 19, 11, 11,
55614, 4, 5, 19, 17, 4, 11, 5, 16, 9, 5, 10, 18, 9, 13,
5576, 5, 10, 11, 18, 5, 20, 18, 13, 14, 13, 18, 12, 5, 10,
55816, 16, 19, 10, 19, 22, 11, 14, 15, 12, 10, 9, 5, 5, 10,
55922, 13, 11, 18, 17, 19, 11, 12, 1, 11, 7, 12, 18, 4, 11,
5606, 9, 14, 17, 1, 13, 11, 18, 7, 9, 17, 17, 1, 5, 17,
56111, 10, 9, 17, 15, 1, 19, 8, 7, 1, 6, 12, 5, 11, 17,
5625, 4, 7, 9, 5, 12, 1, 7, 17, 18, 7, 19, 9, 5, 4,
5639, 10, 8, 17, 14, 5, 17, 5, 15, 6, 16, 1, 4, 8, 14,
5646, 11, 6, 11, 9, 10, 8, 13, 14, 18, 13, 18, 9, 19, 22,
5656, 7, 16, 22, 20, 17, 14};
573 const stringseqalign(
"data/nr-129295.new.asn.short");
574unique_ptr<CObjectIStream>
in 580 CSeq_idqid(
"gi|129295"), sid(
"gi|6");
588memset((
void*) &request, 0,
sizeof(request));
594request.
sigma=
true;
598 const string kTitle(
"Test defline");
620 if((*iter)->IsTitle()) {
621query_descr += (*iter)->GetTitle();
625BOOST_REQUIRE_EQUAL(query_descr,
kTitle);
628 const size_tkNumElements =
633BOOST_REQUIRE_EQUAL(kNumElements, res_freqs.size());
637BOOST_REQUIRE_EQUAL(kNumElements, wres_freqs.size());
641BOOST_REQUIRE_EQUAL(kNumElements, freq_ratios.size());
651 CSeq_idqid(
"gi|129295"), sid(
"gi|6");
656BOOST_REQUIRE(sasv.size() != 0);
668unique_ptr<CPsiBlastInputData> pssm_input(
671sasv[0], q->scope, *opts));
674pssm_input->Process();
692vector<PSIMsaCell> aligned_pos(pssm_input->GetQueryLength());
693fill(aligned_pos.begin(), aligned_pos.end(), kNullPSIMsaCell);
699 const CDense_seg& ds = (*hsp)->GetSegs().GetDenseg();
703 constvector<TSignedSeqPos>& starts = ds.
GetStarts();
704 constvector<TSeqPos>& lengths = ds.
GetLens();
710 #define GAP_IN_ALIGNMENT -1 712 for(
TSeqPospos = 0; pos < lengths[
i]; pos++) {
718s_index += lengths[
i];
721s_index = (
i== 0) ? 0 : (s_index - starts[1]);
722 for(
TSeqPospos = 0; pos < lengths[
i]; pos++) {
724pd.
letter= subj[s_index++];
733 for(
TSeqPos i= 0;
i< pssm_input->GetQueryLength();
i++) {
734BOOST_REQUIRE(seq_index < nseqs);
736pssm_input->GetData()->data[seq_index][
i];
738ss <<
"Sequence "<< seq_index <<
", position "<<
i 740BOOST_REQUIRE_MESSAGE(aligned_pos[
i].
letter== pos_desc.
letter&&
741aligned_pos[
i].is_aligned == pos_desc.
is_aligned, ss.str());
745}
catch(
constexception& e) {
746cerr << e.what() << endl;
747BOOST_REQUIRE(
false);
749cerr <<
"Unknown exception"<< endl;
750BOOST_REQUIRE(
false);
763unique_ptr<IPssmInputData> pssm_input
765pssm_input->Process();
769 const Uint4kSelfHitIndex = 1;
770BOOST_REQUIRE_EQUAL(
true, !!msa->use_sequence[
kQueryIndex]);
771BOOST_REQUIRE_EQUAL(
false, !!msa->use_sequence[kSelfHitIndex]);
775unique_ptr<IPssmInputData> pssm_input
777pssm_input->Process();
781 const Uint4kDuplicateHitIndex = 2;
782BOOST_REQUIRE_EQUAL(
false, !!msa->use_sequence[kDuplicateHitIndex]);
783BOOST_REQUIRE_EQUAL(
true, !!msa->use_sequence[
kQueryIndex]);
784BOOST_REQUIRE_EQUAL(
true, !!msa->use_sequence[
kQueryIndex+ 1]);
788unique_ptr<IPssmInputData> pssm_input
790pssm_input->Process();
794 const Uint4kRemovedHitIndex = 2;
795BOOST_REQUIRE_EQUAL(
false,
796!! msa->use_sequence[kRemovedHitIndex]);
797BOOST_REQUIRE_EQUAL(
true, !!msa->use_sequence[
kQueryIndex]);
798BOOST_REQUIRE_EQUAL(
true, !! msa->use_sequence[
kQueryIndex+ 1]);
802unique_ptr<IPssmInputData> pssm_input
805BOOST_REQUIRE_EQUAL(
string(
"BLOSUM62"),
806 string(pssm_input->GetMatrixName()));
810unique_ptr< CNcbiMatrix<int> > pssm
826(
size_t)pssm->GetCols());
828(
size_t)pssm->GetRows());
829 for(
int i= 0;
i< pssm_asn->
GetPssm().GetNumColumns();
i++) {
830 for(
intj = 0; j < pssm_asn->
GetPssm().GetNumRows(); j++) {
836 if(pssm_input->GetData()->data[1][
i].is_aligned
837&& pssm_input->GetData()->data[1][
i].letter != kGapResidue) {
842 if(j == kGapResidue || j == kBResidue || j == kZResidue
843|| j == kUResidue || j >= kOResidue) {
845ss <<
"Position "<<
i<<
" residue " 851pssm_input->GetQuery()[
i], j);
854ss <<
"Position "<<
i<<
" residue " 861BOOST_REQUIRE_MESSAGE (score - (*pssm)(j,
i) <= 3, ss.str());
868unique_ptr<IPssmInputData> pssm_input
871pssm_input->Process();
872BOOST_REQUIRE_EQUAL(
string(
"BLOSUM62"),
873 string(pssm_input->GetMatrixName()));
881BOOST_REQUIRE_EQUAL(
true,
883BOOST_REQUIRE_EQUAL(
true, !! packed_msa->use_sequence[1]);
884BOOST_REQUIRE_EQUAL(
true, !! packed_msa->use_sequence[2]);
894ss <<
"_PSIComputeAlignmentBlocks failed: " 896BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
899vector<CPssmInputTestData::TAlignedSegment> aligned_regions;
900aligned_regions.push_back(make_pair(0
U, 99U));
901aligned_regions.push_back(make_pair(200U,
902pssm_input->GetQueryLength()-1));
904 for(vector<CPssmInputTestData::TAlignedSegment>::const_iterator
i=
905aligned_regions.begin();
906 i!= aligned_regions.end(); ++
i) {
907 for(
TSeqPospos =
i->first; pos < i->second; pos++) {
909ss <<
"Alignment extents differ at position " 911BOOST_REQUIRE_MESSAGE((
int)
i->first == (
int)aligned_blocks->pos_extnt[pos].left, ss.str());
912BOOST_REQUIRE_MESSAGE((
int)
i->second == (
int)aligned_blocks->pos_extnt[pos].right, ss.str());
913BOOST_REQUIRE_MESSAGE( (
int)(
i->second -
i->first + 1) == (
int)aligned_blocks->size[pos], ss.str());
919 for(
size_t i= kUnalignedRange.first;
920 i< kUnalignedRange.second;
i++) {
922ss <<
"Alignment extents differ at position " 924BOOST_REQUIRE_MESSAGE((
int)-1 == (
int)aligned_blocks->pos_extnt[
i].left, ss.str());
925BOOST_REQUIRE_MESSAGE( (
int)pssm_input->GetQueryLength() == (
int)aligned_blocks->pos_extnt[
i].right, ss.str());
926BOOST_REQUIRE_MESSAGE(
927(
int)(aligned_blocks->pos_extnt[
i].right - aligned_blocks->pos_extnt[
i].left + 1) == (
int)aligned_blocks->size[
i],
934pssm_input->GetQueryLength()));;
938(query_with_sentinels.get(), pssm_input->GetQueryLength()));
944seq_weights.
get());
946ss <<
"_PSIComputeSequenceWeights failed: " 948BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
954 for(vector<CPssmInputTestData::TAlignedSegment>::const_iterator
i=
955aligned_regions.begin();
956 i!= aligned_regions.end(); ++
i) {
957 for(
TSeqPospos =
i->first; pos < i->second; pos++) {
958 doubletotal_sequence_weights_for_column = 0.0;
959 for(
size_tres = 0; res < msa->alphabet_size; res++) {
960 if(res == kXResidue)
continue;
961total_sequence_weights_for_column +=
962seq_weights->match_weights[pos][res];
964BOOST_REQUIRE(total_sequence_weights_for_column > 0.99 &&
965total_sequence_weights_for_column < 1.01);
969 for(
size_tpos = kUnalignedRange.first;
970pos < kUnalignedRange.second; pos++) {
971 doubletotal_sequence_weights_for_column = 0.0;
972 for(
size_tres = 0; res < msa->alphabet_size; res++) {
973 if(res == kXResidue)
continue;
974total_sequence_weights_for_column +=
975seq_weights->match_weights[pos][res];
977BOOST_REQUIRE(total_sequence_weights_for_column == 0.0);
987internal_pssm.
get());
989ss <<
"_PSIComputeResidueFrequencies failed: " 991BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
997seq_weights->std_prob);
999ss <<
"_PSIConvertResidueFreqsToPSSM failed: " 1001BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
1005seq_weights->std_prob,
1006internal_pssm.
get(),
1009ss <<
"_PSIScaleMatrix failed: " 1011BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
1013BOOST_REQUIRE_EQUAL(msa->dimensions->num_seqs, 3u);
1025 for(
Uint4 i= 0;
i< pssm_input->GetQueryLength();
i++) {
1029 if(msa->cell[1][
i].is_aligned || msa->cell[2][
i].is_aligned
1030|| msa->cell[3][
i].is_aligned) {
1036 if(j == kBResidue || j == kZResidue || j == kUResidue
1037|| j >= kOResidue) {
1042 if(j == kGapResidue) {
1044ss <<
"Position "<<
i<<
" residue " 1046BOOST_REQUIRE_MESSAGE(
BLAST_SCORE_MIN== internal_pssm->pssm[
i][j], ss.str());
1052ss <<
"Position "<<
i<<
" residue " 1057BOOST_REQUIRE_MESSAGE(score-1 <= internal_pssm->pssm[
i][j] && internal_pssm->pssm[
i][j] <= score+1, ss.str());
1067unique_ptr<IPssmInputData> pssm_input
1069pssm_input->Process();
1070BOOST_REQUIRE_EQUAL(
string(
"BLOSUM62"),
1071 string(pssm_input->GetMatrixName()));
1079 const Uint4kSelfHitIndex = 1;
1080BOOST_REQUIRE_EQUAL(
true,
1082BOOST_REQUIRE_EQUAL(
false,
1083!! packed_msa->use_sequence[kSelfHitIndex]);
1093ss <<
"_PSIComputeAlignmentBlocks failed: " 1095BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
1097 for(
size_t i= 0;
i< pssm_input->GetQueryLength();
i++) {
1098BOOST_REQUIRE_EQUAL((
int)-1,
1099(
int)aligned_blocks->pos_extnt[
i].left);
1100BOOST_REQUIRE_EQUAL((
int)pssm_input->GetQueryLength(),
1101(
int)aligned_blocks->pos_extnt[
i].right);
1102BOOST_REQUIRE_EQUAL((
int)pssm_input->GetQueryLength() + 2,
1103(
int)aligned_blocks->size[
i]);
1109pssm_input->GetQueryLength()));;
1113(query_with_sentinels.get(), pssm_input->GetQueryLength()));
1121seq_weights.
get());
1123ss <<
"_PSIComputeSequenceWeights failed: " 1125BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
1134internal_pssm.
get());
1136ss <<
"_PSIComputeResidueFrequencies failed: " 1138BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
1144seq_weights->std_prob);
1146ss <<
"_PSIConvertResidueFreqsToPSSM failed: " 1148BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
1152seq_weights->std_prob,
1153internal_pssm.
get(),
1156ss <<
"_PSIScaleMatrix failed: " 1158BOOST_REQUIRE_MESSAGE(
PSI_SUCCESS== rv, ss.str());
1170 for(
Uint4 i= 0;
i< pssm_input->GetQueryLength();
i++) {
1174 if(j == kGapResidue || j == kBResidue || j == kZResidue
1175|| j == kUResidue || j >= kOResidue) {
1177ss <<
"Position "<<
i<<
" residue " 1179BOOST_REQUIRE_MESSAGE(
BLAST_SCORE_MIN== internal_pssm->pssm[
i][j], ss.str());
1185ss <<
"Position "<<
i<<
" residue " 1190BOOST_REQUIRE_MESSAGE(score-1 <= internal_pssm->pssm[
i][j] && internal_pssm->pssm[
i][j] <= score+1, ss.str());
1219unique_ptr<IPssmInputData> bad_pssm_data(
new CNullPssmInput());
1224unique_ptr<IPssmInputData> bad_pssm_data(
new 1235BOOST_REQUIRE(msa ==
NULL);
1242BOOST_REQUIRE(freq_ratios !=
NULL);
1244BOOST_REQUIRE(freq_ratios ==
NULL);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the CBl2Seq (BLAST 2 Sequences) class.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
Declares the BLAST exception class.
Definitions which are dependant on the NCBI C++ Object Manager.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
Port of posit.h structures and impalaScaling for implementing composition based statistics for PSI-BL...
PSIMsa * PSIMsaFree(PSIMsa *msa)
Deallocates the PSIMsa structure.
PSIMsa * PSIMsaNew(const PSIMsaDimensions *dimensions)
Allocates and initializes the multiple sequence alignment data structure for use as input to the PSSM...
int _PSIComputeAlignmentBlocks(const _PSIMsa *msa, _PSIAlignedBlock *aligned_blocks)
Main function to compute aligned blocks' properties for each position within multiple alignment (stag...
int _PSIConvertFreqRatiosToPSSM(_PSIInternalPssmData *internal_pssm, const Uint1 *query, const BlastScoreBlk *sbp, const double *std_probs)
Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores.
int _PSIComputeFreqRatios(const _PSIMsa *msa, const _PSISequenceWeights *seq_weights, const BlastScoreBlk *sbp, const _PSIAlignedBlock *aligned_blocks, Int4 pseudo_count, Boolean nsg_compatibility_mode, _PSIInternalPssmData *internal_pssm)
Main function to compute the PSSM's frequency ratios (stage 5).
_PSISequenceWeights * _PSISequenceWeightsNew(const PSIMsaDimensions *dimensions, const BlastScoreBlk *sbp)
Allocates and initializes the _PSISequenceWeights structure.
_PSIInternalPssmData * _PSIInternalPssmDataNew(Uint4 query_length, Uint4 alphabet_size)
Allocates a new _PSIInternalPssmData structure.
_PSIAlignedBlock * _PSIAlignedBlockNew(Uint4 query_length)
Allocates and initializes the _PSIAlignedBlock structure.
int _PSIComputeSequenceWeights(const _PSIMsa *msa, const _PSIAlignedBlock *aligned_blocks, Boolean nsg_compatibility_mode, _PSISequenceWeights *seq_weights)
Main function to calculate the sequence weights.
int _PSIPurgeBiasedSegments(_PSIPackedMsa *msa)
Main function for keeping only those selected sequences for PSSM construction (stage 2).
_PSIMsa * _PSIMsaNew(const _PSIPackedMsa *msa, Uint4 alphabet_size)
Allocates and initializes the internal version of the PSIMsa structure (makes a deep copy) for intern...
const double kPSINearIdentical
Percent identity threshold for discarding near-identical matches.
const unsigned int kQueryIndex
Index into multiple sequence alignment structure for the query sequence.
int _PSIScaleMatrix(const Uint1 *query, const double *std_probs, _PSIInternalPssmData *internal_pssm, BlastScoreBlk *sbp)
Scales the PSSM (stage 7)
_PSIPackedMsa * _PSIPackedMsaNew(const PSIMsa *msa)
Allocates and initializes the compact version of the PSIMsa structure (makes a deep copy) for interna...
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
#define PSIERR_BADPARAM
Bad parameter used in function.
#define PSI_SUCCESS
Successful operation.
Utilities initialize/setup BLAST.
#define BLAST_SCORE_MIN
minimum allowed score (for one letter comparison).
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
@ eBlastp
Protein-Protein.
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
TSeqPos GetLength(void) const
Runs the BLAST algorithm between 2 sequences.
Defines BLAST error codes (user errors included)
Wrapper class for BlastScoreBlk .
Mock object for the PSSM input data which returns NULLs for all its methods.
PSIMsa * GetData()
Obtain the multiple sequence alignment structure.
unsigned int GetQueryLength()
Get the query's length.
unsigned char * GetQuery()
Get the query sequence used as master for the multiple sequence alignment in ncbistdaa encoding.
const PSIDiagnosticsRequest * GetDiagnosticsRequest()
Obtain the diagnostics data that is requested from the PSSM engine Its results will be populated in t...
const PSIBlastOptions * GetOptions()
Obtain the options for the PSSM engine.
void Process()
Algorithm to produce multiple sequence alignment structure should be implemented in this method.
const char * GetMatrixName()
Obtain the name of the underlying matrix to use when building the PSSM.
Wrapper class for PSIBlastOptions .
This class is a concrete strategy for IPssmInputData, and it implements the traditional PSI-BLAST alg...
This class exists merely to call private methods in CPsiBlastInputData and CPssmEngine.
static unsigned int GetNumAlignedSequences(const CPsiBlastInputData &input)
Accesses CPsiBlastInputData private method.
static string x_ErrorCodeToString(int error_code)
Gets error strings from a CPssmEngine private method.
static unsigned char * x_GuardProteinQuery(const unsigned char *query, unsigned int query_length)
Accesses CPssmEngine private method.
static void x_GetSubjectSequence(const objects::CDense_seg &ds, objects::CScope &scope, string &sequence_data)
Gets Subject sequence from a CPsiBlastInputData private method.
Exception class for the CPssmEngine class.
Computes a PSSM as specified in PSI-BLAST.
Mock object for the PSSM input data which returns multiple sequence alignment data which has flanking...
const PSIBlastOptions * GetOptions()
Obtain the options for the PSSM engine.
void Process()
Algorithm to produce multiple sequence alignment structure should be implemented in this method.
PSIMsa * GetData()
Obtain the multiple sequence alignment structure.
PSIBlastOptions * m_options
PSIDiagnosticsRequest m_diag_request
const PSIDiagnosticsRequest * GetDiagnosticsRequest()
Obtain the diagnostics data that is requested from the PSSM engine Its results will be populated in t...
virtual ~CPssmInputFlankingGaps()
unsigned int GetQueryLength()
Get the query's length.
unsigned char * GetQuery()
Get the query sequence used as master for the multiple sequence alignment in ncbistdaa encoding.
Mock object for the PSSM input data which returns a query sequence with a gap in it.
Mock object for the PSSM input data which returns a query sequence with a gap in it.
unsigned int GetQueryLength()
Get the query's length.
Mock object for the PSSM input data which can be configured to have different combinations of aligned...
CPssmInputTestData(EAlignmentType type, PSIBlastOptions *opts=NULL)
pair< TSeqPos, TSeqPos > TAlignedSegment
void SetupDuplicateHit(void)
@ eQueryAlignedWithInternalGaps
void SetupHenikoffsPositionBasedSequenceWeights(void)
Uint1 FindNonIdenticalHighScoringResidue(Uint1 res, const SNCBIPackedScoreMatrix *score_matrix)
void SetupNearIdenticalHits(void)
static const size_t kQueryLength
void SetupQueryAlignedWithInternalGaps()
static const Uint1 kQuery[kQueryLength]
void SetupMsaHasUnalignedRegion(void)
const char * GetMatrixName()
Obtain the name of the underlying matrix to use when building the PSSM.
static TIndex GetIndex(CSeq_data::E_Choice code_type, const string &code)
static CTestObjMgr & Instance()
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
CRef< objects::CPssmWithParameters > Run()
Runs the PSSM engine to compute the PSSM.
static CNcbiMatrix< int > * GetScores(const objects::CPssmWithParameters &pssm)
Returns matrix of BLASTAA_SIZE by query size (dimensions are opposite of what is stored in the BlastS...
#define BLASTAA_SIZE
Size of aminoacid alphabet.
TAutoUint1Ptr data
Sequence data.
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
void Reset(BlastScoreBlk *p=NULL)
AutoPtr< Uint1, CDeleter< Uint1 > > TAutoUint1Ptr
Declares TAutoUint1Ptr (for Uint1 arrays allocated with malloc/calloc)
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
@ eBlastEncodingProtein
NCBIstdaa.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
element_type * get(void) const
Get pointer.
@ eSerial_AsnText
ASN.1 text.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
Uint4 TValue
Type of the generated integer value and/or the seed value.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
const TFreqRatios & GetFreqRatios(void) const
Get the FreqRatios member data.
TNumRows GetNumRows(void) const
Get the NumRows member data.
const TWeightedResFreqsPerPos & GetWeightedResFreqsPerPos(void) const
Get the WeightedResFreqsPerPos member data.
list< int > TResFreqsPerPos
TNumColumns GetNumColumns(void) const
Get the NumColumns member data.
list< double > TWeightedResFreqsPerPos
const TIntermediateData & GetIntermediateData(void) const
Get the IntermediateData member data.
list< double > TFreqRatios
const TResFreqsPerPos & GetResFreqsPerPos(void) const
Get the ResFreqsPerPos member data.
const TPssm & GetPssm(void) const
Get the Pssm member data.
const TStarts & GetStarts(void) const
Get the Starts member data.
const TLens & GetLens(void) const
Get the Lens member data.
TDim GetDim(void) const
Get the Dim member data.
TNumseg GetNumseg(void) const
Get the Numseg member data.
list< CRef< CSeq_align > > Tdata
list< CRef< CSeqdesc > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
@ e_Ncbistdaa
consecutive codes for std aas
unsigned int
A callback function used to compare two keys in a database.
SFreqRatios * _PSIMatrixFrequencyRatiosFree(SFreqRatios *freq_ratios)
Deallocate the frequency ratios structure.
SFreqRatios * _PSIMatrixFrequencyRatiosNew(const char *matrix_name)
Retrive the matrix's frequency ratios.
const TYPE & Get(const CNamedParameterList *param)
char GetResidue(unsigned int res)
Returns character representation of a residue from ncbistdaa.
Magic spell ;-) needed for some weird compilers... very empiric.
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Defines a concrete strategy to obtain PSSM input data for PSI-BLAST.
Declarations of auxiliary functions/classes for PSI-BLAST.
C++ API for the PSI-BLAST PSSM engine.
Defines interface for a sequence alignment processor that can populate a multiple alignment data stru...
BlastScoreBlk * InitializeBlastScoreBlk(const unsigned char *query, Uint4 query_size)
Utilities to develop and debug unit tests that deal with PSSM computation.
BOOST_AUTO_TEST_CASE(testFullPssmEngineRunWithDiagnosticsRequest)
const SNCBIPackedScoreMatrix NCBISM_Blosum62
TNCBIScore NCBISM_GetScore(const SNCBIPackedScoreMatrix *sm, int aa1, int aa2)
Look up an entry in a packed score matrix.
Int2 alphabet_size
size of alphabet.
Abstract base class to encapsulate the source(s) and pre-processing of PSSM input data as well as opt...
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
Int4 pseudo_count
Pseudocount constant.
Boolean use_best_alignment
If set to TRUE, use the best alignment when multiple HSPs are found in a query-subject alignment (i....
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
Boolean information_content
request information content
Boolean frequency_ratios
request frequency ratios
Boolean weighted_residue_frequencies
request observed weighted residue frequencies
Boolean gapless_column_weights
request gapless column weights
Boolean num_matching_seqs
request number of matching sequences
Boolean sigma
request sigma
Boolean residue_frequencies
request observed residue frequencies
Boolean interval_sizes
request interval sizes
Structure to describe the characteristics of a position in the multiple sequence alignment data struc...
Boolean is_aligned
Is this letter part of the alignment?
Uint1 letter
Preferred letter at this position, in ncbistdaa encoding.
Structure representing the dimensions of the multiple sequence alignment data structure.
Multiple sequence alignment (msa) data structure containing the raw data needed by the PSSM engine to...
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Stores the frequency ratios along with their bit scale factor.
Utility stuff for more convenient using of Boost.Test library.
static const string kTitle
CTraceGlyph inline method implementation.
static Uint4 letter(char c)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4