;
84 const intquery_offset[],
85 const intquery_end[],
86 const intsubject_offset[],
87 const intsubject_end[],
88 const intquery_gapped_start[],
89 const intsubject_gapped_start[],
91 const doubleevalue[] =
NULL,
92 const intnum_ident[] =
NULL)
94 const intkQueryContext = 0;
95 const intkSubjectFrame = 0;
96 const intkQueryFrame = 0;
103 for(
int i= 0;
i< num_hsps;
i++) {
106subject_offset[
i], subject_end[
i],
107query_gapped_start[
i],
108subject_gapped_start[
i],
109kQueryContext, kQueryFrame, kSubjectFrame, score[
i],
124 unsigned intquery_length)
126ifstream
in(filename.c_str());
128 throwruntime_error(filename +
" could not be found");
135 for(
unsigned int i= 0;
i< query_length;
i++) {
137 in>> retval[
i][j];
142 throwruntime_error(
"Error reading from "+ filename);
152 throwruntime_error(
"NULL BlastScoreBlk*!");
156loadPssmFromFile(
"data/aa.129295.pssm.txt",
175 Int8effective_searchsp,
177 booldoSmithWaterman,
178 doubleevalue_threshold =
189 Int8effective_searchsp,
191 booldoSmithWaterman,
192 doubleevalue_threshold =
203 Int8effective_searchsp,
205 booldoSmithWaterman,
206 doubleevalue_threshold,
213runRedoAlignmentCoreUnitTest(program, *qsl, *ssl, init_hsp_list,
214ending_hsp_list, effective_searchsp, compositonBasedStatsMode,
215doSmithWaterman, evalue_threshold, hit_list_size);
226 Int8effective_searchsp,
228 booldoSmithWaterman,
229 doubleevalue_threshold,
233 char* program_buffer =
NULL;
235BOOST_REQUIRE_MESSAGE(rv == (
Int2)0,
"BlastNumber2Program failed");
237 sfree(program_buffer);
249core_prog, strand_opt,&query_info);
251query_info, &query_blk, core_prog, strand_opt, blast_msg);
253BOOST_REQUIRE(m->empty());
258BOOST_REQUIRE(rv == 0);
269BOOST_REQUIRE(rv == 0);
294BOOST_REQUIRE(writer_info ==
NULL);
306 const doublek_rps_scale_factor = 1.0;
309program, &sbp, k_rps_scale_factor,
316setupPositionBasedBlastScoreBlk(sbp,
324&effective_searchsp, 1);
329eff_len_params, sbp, query_info,
NULL);
337BOOST_REQUIRE(eff_len_opts ==
NULL);
339BOOST_REQUIRE(eff_len_params ==
NULL);
345 const intkAvgSubjLen = 0;
348sbp, query_info, kAvgSubjLen,
362query_info, sbp,
NULL, seq_src,
365ext_params, hit_params, psi_options,
367BOOST_REQUIRE_MESSAGE(rv == (
Int2)0,
"Blast_RedoAlignmentCore failed!");
370BOOST_REQUIRE(hsp_stream ==
NULL);
372BOOST_REQUIRE(ext_params ==
NULL);
374BOOST_REQUIRE(ext_options ==
NULL);
376BOOST_REQUIRE(hit_params ==
NULL);
378BOOST_REQUIRE(scoring_params ==
NULL);
380BOOST_REQUIRE(psi_options ==
NULL);
382BOOST_REQUIRE(sbp ==
NULL);
386BOOST_REQUIRE(
results->hitlist_array[0]->hsplist_count > 0);
389BOOST_REQUIRE_EQUAL(ending_hsp_list->
hspcnt, hsp_list->
hspcnt);
393cout <<
"Expected num hsps="<< ending_hsp_list->
hspcnt;
394cout <<
" Actual num hsps="<< hsp_list->
hspcnt<< endl;
397 for(
intindex=0; index<hsp_list->
hspcnt; index++)
403cout << index <<
": query_offset=" 405cout << index <<
": query_end=" 406<< actual_hsp->
query.
end<< endl;
407cout << index <<
": subject_offset=" 409cout << index <<
": subject_end=" 411cout << index <<
": score=" 412<< actual_hsp->
score<< endl;
413cout << index <<
": bit_score=" 415cout << index <<
": evalue=" 416<< actual_hsp->
evalue<< endl;
417cout << index <<
": num_ident=" 420BOOST_REQUIRE_EQUAL(expected_hsp->
query.
offset,
422BOOST_REQUIRE_EQUAL(expected_hsp->
query.
end,
426BOOST_REQUIRE_EQUAL(expected_hsp->
subject.
end,
428BOOST_REQUIRE_EQUAL(expected_hsp->
score,
429actual_hsp->
score);
430BOOST_REQUIRE_EQUAL(expected_hsp->
num_ident,
434cerr <<
"Diff in evalues for "<< index <<
"="<< diff << endl;
436BOOST_REQUIRE_CLOSE(expected_hsp->
evalue, actual_hsp->
evalue, 10.0);
449 const intk_num_hsps_start = 3;
450 const intk_num_hsps_end = 2;
452 CSeq_idsubj_id(
"gi|402871");
458 const intquery_offset[k_num_hsps_start] = { 28, 46, 463};
459 const intquery_end[k_num_hsps_start] = { 485, 331, 488};
460 const intsubject_offset[k_num_hsps_start] = { 36, 327, 320};
461 const intsubject_end[k_num_hsps_start] = { 512, 604, 345};
462 const intscore[k_num_hsps_start] = { 554, 280, 28};
463 const intquery_gapped_start[k_num_hsps_start] = { 431, 186, 480};
464 const intsubject_gapped_start[k_num_hsps_start] = { 458, 458, 337};
468setUpHSPList(k_num_hsps_start,
469query_offset, query_end,
470subject_offset, subject_end,
472subject_gapped_start,
475 const intquery_offset_final[k_num_hsps_end] = { 2, 46};
476 const intquery_end_final[k_num_hsps_end] = { 485, 331};
477 const intsubject_offset_final[k_num_hsps_end] = { 9, 327};
478 const intsubject_end_final[k_num_hsps_end] = { 512, 604};
479 const intscore_final[k_num_hsps_end] = { 510, 282};
480 const doubleevalue_final[k_num_hsps_end] = {7.0065e-61, 1.6958e-30};
481 const intnum_idents_final[k_num_hsps_end] = { 171, 94 };
484setUpHSPList(k_num_hsps_end,
487subject_offset_final,
490subject_offset_final,
495 const Int8kEffSearchSp = 500000;
496 const boolkSmithWaterman =
false;
498runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
499init_hsp_list, ending_hsp_list,
504BOOST_REQUIRE(ending_hsp_list ==
NULL);
509 const intk_num_hsps_start = 3;
510 const intk_num_hsps_end = 2;
512 CSeq_idsubj_id(
"gi|402871");
518 const intquery_offset[k_num_hsps_start] = { 28, 46, 463};
519 const intquery_end[k_num_hsps_start] = { 485, 331, 488};
520 const intsubject_offset[k_num_hsps_start] = { 36, 327, 320};
521 const intsubject_end[k_num_hsps_start] = { 512, 604, 345};
522 const intscore[k_num_hsps_start] = { 554, 280, 28};
523 const intquery_gapped_start[k_num_hsps_start] = { 431, 186, 480};
524 const intsubject_gapped_start[k_num_hsps_start] = { 458, 458, 337};
528setUpHSPList(k_num_hsps_start,
529query_offset, query_end,
530subject_offset, subject_end,
532subject_gapped_start,
535 const intquery_offset_final[k_num_hsps_end] = { 2, 46};
536 const intquery_end_final[k_num_hsps_end] = { 517, 331};
537 const intsubject_offset_final[k_num_hsps_end] = { 9, 327};
538 const intsubject_end_final[k_num_hsps_end] = { 546, 604};
539 const intscore_final[k_num_hsps_end] = { 537, 298};
540 const doubleevalue_final[k_num_hsps_end] = {1.1954e-64, 1.5494e-32};
541 const intnum_idents_final[k_num_hsps_end] = { 177, 95 };
544setUpHSPList(k_num_hsps_end,
547subject_offset_final,
550subject_offset_final,
555 const Int8kEffSearchSp = 500000;
556 const boolkSmithWaterman =
false;
558runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
559init_hsp_list, ending_hsp_list,
564BOOST_REQUIRE(ending_hsp_list ==
NULL);
569 const intk_num_hsps_start = 6;
570 const intk_num_hsps_end = 2;
571 CSeq_idquery_id(
"gi|129295");
572 CSeq_idsubj_id(
"gi|7450545");
574 const intquery_offset[k_num_hsps_start] = { 24, 99, 16, 84, 6, 223 };
575 const intquery_end[k_num_hsps_start] = { 62, 128, 24, 114, 25, 231 };
576 const intsubject_offset[k_num_hsps_start] =
577{ 245, 0, 198, 86, 334, 151 };
578 const intsubject_end[k_num_hsps_start] =
579{ 287, 29, 206, 119, 353, 159 };
580 const intscore[k_num_hsps_start] = { 37, 26, 25, 25, 24, 24 };
581 const intquery_gapped_start[k_num_hsps_start] =
582{ 29, 104, 20, 91, 19, 227 };
583 const intsubject_gapped_start[k_num_hsps_start] =
584{ 250, 5, 202, 93, 347, 155 };
589setUpHSPList(k_num_hsps_start,
590query_offset, query_end,
591subject_offset, subject_end,
593subject_gapped_start,
596 const intquery_offset_final[k_num_hsps_end] = { 24, 18 };
597 const intquery_end_final[k_num_hsps_end] = { 30, 31 };
598 const intsubject_offset_final[k_num_hsps_end] = { 245, 200 };
599 const intsubject_end_final[k_num_hsps_end] = { 251, 210 };
600 const intscore_final[k_num_hsps_end] = { 29, 24 };
601 const doubleevalue_final[k_num_hsps_end] =
602{ 1.361074 , 6.425098 };
603 const intident_final[k_num_hsps_end] = { 3, 6};
607setUpHSPList(k_num_hsps_end,
610subject_offset_final,
613subject_offset_final,
619 const Int8kEffSearchSp = 84660;
620 const boolkSmithWaterman =
false;
622runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
623init_hsp_list, ending_hsp_list,
627BOOST_REQUIRE(ending_hsp_list ==
NULL);
632 const intk_num_hsps_start = 6;
633 const intk_num_hsps_end = 5;
635 CSeq_idsubj_id(
"gb|AAA22059|");
639 const boolis_protein(
true);
648 const intquery_offset[k_num_hsps_start] = { 3, 1, 4, 3, 0, 1 };
649 const intquery_end[k_num_hsps_start] = { 236, 232, 236, 235, 226, 233 };
650 const intsubject_offset[k_num_hsps_start] =
651{ 1, 1, 6, 6, 12, 22 };
652 const intsubject_end[k_num_hsps_start] =
653{ 238, 238, 238, 238, 238, 254 };
654 const intscore[k_num_hsps_start] = { 345, 344, 343, 339, 332, 320 };
655 const intquery_gapped_start[k_num_hsps_start] =
656{ 32, 194, 9, 8, 104, 9 };
657 const intsubject_gapped_start[k_num_hsps_start] =
658{ 30, 200, 11, 11, 116, 30 };
663setUpHSPList(k_num_hsps_start,
664query_offset, query_end,
665subject_offset, subject_end,
667subject_gapped_start,
670 const intquery_offset_final[k_num_hsps_end] = { 4, 3, 3, 0, 0};
671 const intquery_end_final[k_num_hsps_end] = { 236, 235, 220, 226, 232};
672 const intsubject_offset_final[k_num_hsps_end] = { 6, 6, 1, 12, 6};
673 const intsubject_end_final[k_num_hsps_end] = { 238, 238, 218, 238, 238};
674 const intscore_final[k_num_hsps_end] = { 73, 72, 69, 68, 66};
675 const doubleevalue_final[k_num_hsps_end] =
676{ 1.26e-05 , 1.7e-5 , 4.0e-5, 5.1e-5, 0.0000775};
677 const intnum_idents_final[k_num_hsps_end] = { 87, 85, 81, 84, 85 };
681setUpHSPList(k_num_hsps_end,
684subject_offset_final,
687subject_offset_final,
693 const Int8kEffSearchSp = 84660;
694 const boolkSmithWaterman =
false;
696runRedoAlignmentCoreUnitTest(kProgram, query_seqs[0], *ssl,
697init_hsp_list, ending_hsp_list,
701BOOST_REQUIRE(ending_hsp_list ==
NULL);
706 const intk_num_hsps_start = 3;
707 const intk_num_hsps_end = 5;
709 CSeq_idsubj_id(
"gi|402871");
715 const intquery_offset[k_num_hsps_start] = { 28, 46, 463 };
716 const intquery_end[k_num_hsps_start] = { 485, 331, 488 };
717 const intsubject_offset[k_num_hsps_start] = { 36, 327, 320 };
718 const intsubject_end[k_num_hsps_start] = { 512, 604, 345 };
719 const intscore[k_num_hsps_start] = { 554, 280, 28 };
720 const intquery_gapped_start[k_num_hsps_start] = { 431, 186, 480 };
721 const intsubject_gapped_start[k_num_hsps_start] = { 458, 458, 337 };
725setUpHSPList(k_num_hsps_start,
726query_offset, query_end,
727subject_offset, subject_end,
729subject_gapped_start,
732 const intquery_offset_final[k_num_hsps_end] = { 2, 250, 494, 67, 2 };
733 const intquery_end_final[k_num_hsps_end] = { 485, 331, 530, 86, 24 };
734 const intsubject_offset_final[k_num_hsps_end] = { 9, 523, 261, 585, 570 };
735 const intsubject_end_final[k_num_hsps_end] = { 512, 604, 297, 604, 592 };
736 const intscore_final[k_num_hsps_end] = { 591, 39, 37, 33, 32 };
737 const doubleevalue_final[k_num_hsps_end] = { 2.3451e-72, 0.387,
7380.6692, 1.9988, 2.6256 };
739 const intnum_idents_final[k_num_hsps_end] = { 172, 22, 9, 8, 7 };
742setUpHSPList(k_num_hsps_end,
745subject_offset_final,
748subject_offset_final,
753 const Int8kEffSearchSp = 500000;
754 const boolkSmithWaterman =
true;
756runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
757init_hsp_list, ending_hsp_list,
761BOOST_REQUIRE(ending_hsp_list ==
NULL);
766 const intk_num_hsps_start = 3;
767 const intk_num_hsps_end = 3;
769 CSeq_idsubj_id(
"gi|402871");
771 const intquery_offset[k_num_hsps_start] = { 28, 46, 463};
772 const intquery_end[k_num_hsps_start] = { 485, 331, 488};
773 const intsubject_offset[k_num_hsps_start] = { 36, 327, 320};
774 const intsubject_end[k_num_hsps_start] = { 512, 604, 345};
775 const intscore[k_num_hsps_start] = { 554, 280, 28};
776 const intquery_gapped_start[k_num_hsps_start] = { 431, 186, 480};
777 const intsubject_gapped_start[k_num_hsps_start] = { 458, 458, 337};
781setUpHSPList(k_num_hsps_start,
782query_offset, query_end,
783subject_offset, subject_end,
785subject_gapped_start,
788 const intquery_offset_final[k_num_hsps_end] = { 2, 250, 67 };
789 const intquery_end_final[k_num_hsps_end] = { 485, 331, 86};
790 const intsubject_offset_final[k_num_hsps_end] = { 9, 523, 585};
791 const intsubject_end_final[k_num_hsps_end] = { 512, 604, 604};
792 const intscore_final[k_num_hsps_end] = { 510, 34, 31};
793 const doubleevalue_final[k_num_hsps_end] = {7.0065e-61, 1.349, 3.7944};
794 const intnum_idents_final[k_num_hsps_end] = { 171, 22, 8 };
797setUpHSPList(k_num_hsps_end,
800subject_offset_final,
803subject_offset_final,
808 const Int8kEffSearchSp = 500000;
809 const boolkSmithWaterman =
true;
811runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
812init_hsp_list, ending_hsp_list,
817BOOST_REQUIRE(ending_hsp_list ==
NULL);
822 const intk_num_hsps_start = 6;
823 const intk_num_hsps_end = 8;
824 CSeq_idquery_id(
"gi|129295");
825 CSeq_idsubj_id(
"gi|7450545");
827 const intquery_offset[k_num_hsps_start] =
828{ 24, 99, 16, 84, 6, 223 };
829 const intquery_end[k_num_hsps_start] =
830{ 62, 128, 24, 114, 25, 231 };
831 const intsubject_offset[k_num_hsps_start] =
832{ 245, 0, 198, 86, 334, 151 };
833 const intsubject_end[k_num_hsps_start] =
834{ 287, 29, 206, 119, 353, 159 };
835 const intscore[k_num_hsps_start] =
836{ 37, 26, 25, 25, 24, 24 };
837 const intquery_gapped_start[k_num_hsps_start] =
838{ 29, 104, 20, 91, 19, 227 };
839 const intsubject_gapped_start[k_num_hsps_start] =
840{ 250, 5, 202, 93, 347, 155 };
846setUpHSPList(k_num_hsps_start,
847query_offset, query_end,
848subject_offset, subject_end,
850subject_gapped_start,
853 const intquery_offset_final[k_num_hsps_end] =
854{ 24, 140, 126, 10, 137, 198, 18, 137 };
855 const intquery_end_final[k_num_hsps_end] =
856{ 30, 171, 205, 35, 157, 208, 31, 152 };
857 const intsubject_offset_final[k_num_hsps_end] =
858{ 245, 408, 212, 130, 339, 388, 200, 186 };
859 const intsubject_end_final[k_num_hsps_end] =
860{ 251, 439, 287, 155, 359, 398, 210, 201 };
861 const intscore_final[k_num_hsps_end] =
862{ 29, 28, 28, 28, 25, 24, 24, 22 };
863 const doubleevalue_final[k_num_hsps_end] =
864{ 1.361074, 1.837947, 2.118044, 2.153685, 4.198304, 5.529096,
8656.425098, 8.532644 };
866 const intident_final[k_num_hsps_end] =
867{ 3, 8, 23, 10, 6, 5, 6, 5};
870setUpHSPList(k_num_hsps_end,
873subject_offset_final,
876subject_offset_final,
882 const Int8kEffSearchSp = 84660;
883 const boolkSmithWaterman =
true;
885runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
886init_hsp_list, ending_hsp_list,
891BOOST_REQUIRE(ending_hsp_list ==
NULL);
905 const intk_num_hsps_start = 4;
906 const intk_num_hsps_end = 4;
907 CSeq_idquery_id(
"gi|48100936");
908 CSeq_idsubj_id(
"gi|7301132");
910 const intquery_offset[k_num_hsps_start] = { 995, 1004, 995, 973};
911 const intquery_end[k_num_hsps_start] = { 1314, 1314, 1403, 1316};
912 const intsubject_offset[k_num_hsps_start] = { 61, 36, 61, 106};
913 const intsubject_end[k_num_hsps_start] = { 384, 384, 455, 420};
914 const intscore[k_num_hsps_start] = { 341, 327, 314, 301};
915 const intquery_gapped_start[k_num_hsps_start] = { 1233, 1017, 1310,
917 const intsubject_gapped_start[k_num_hsps_start] = { 303, 49, 347, 331};
921setUpHSPList(k_num_hsps_start,
922query_offset, query_end,
923subject_offset, subject_end,
925subject_gapped_start,
927 const intquery_offset_final[k_num_hsps_end] =
928{ 995, 1261, 1025, 1210};
929 const intquery_end_final[k_num_hsps_end] =
930{ 1314, 1341, 1125, 1243};
931 const intsubject_offset_final[k_num_hsps_end] =
933 const intsubject_end_final[k_num_hsps_end] =
934{ 384, 115, 482, 50};
935 const intscore_final[k_num_hsps_end] =
937 const doubleevalue_final[k_num_hsps_end] =
938{ 2.712e-34, 3.6003e-05, 0.00048334, 0.00441};
939 const intnum_idents_final[k_num_hsps_end] = { 108, 31, 30, 12 };
942setUpHSPList(k_num_hsps_end,
945subject_offset_final,
948subject_offset_final,
953 const Int8kEffSearchSp = 1000*1000;
954 const boolkSmithWaterman =
true;
956 const intkHitListSize = 1;
957 const doublekEvalueThreshold = 0.005;
959runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
960init_hsp_list, ending_hsp_list,
962kSmithWaterman, kEvalueThreshold,
966BOOST_REQUIRE(ending_hsp_list ==
NULL);
Declares the CBl2Seq (BLAST 2 Sequences) class.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Interface for reading SRA sequences into blast input.
Structures and API used for saving BLAST hits.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
BlastHSPWriter * BlastHSPWriterNew(BlastHSPWriterInfo **writer_info, BlastQueryInfo *query_info, BLAST_SequenceBlk *query)
A generic function to create writer.
Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
int BlastHSPStreamWrite(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified write function for this BlastHSPStream implementation.
BlastHSPStream * BlastHSPStreamFree(BlastHSPStream *hsp_stream)
Frees the BlastHSPStream structure by invoking the destructor function set by the user-defined constr...
BlastHSPStream * BlastHSPStreamNew(EBlastProgramType program, const BlastExtensionOptions *extn_opts, Boolean sort_on_read, Int4 num_queries, BlastHSPWriter *writer)
Initialize the HSP stream.
Interface for converting sources of sequence data into blast sequence input.
Header file for composition-based statistics.
Int2 Blast_RedoAlignmentCore(EBlastProgramType program_number, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *query_info, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Top level routine to recompute alignments for each match found by the gapped BLAST algorithm (single-...
Definitions which are dependant on the NCBI C++ Object Manager.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
#define BLAST_HITLIST_SIZE
Number of database sequences to save hits for.
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
Int2 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions *options, Int4 dbseq_num, Int8 db_length, Int8 *searchsp_eff, Int4 num_searchsp)
Fill the non-default values in the BlastEffectiveLengthsOptions structure.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
@ eSmithWatermanTbck
Smith-waterman finds optimal scores, then ALIGN_EX to find alignment.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
BlastEffectiveLengthsParameters * BlastEffectiveLengthsParametersFree(BlastEffectiveLengthsParameters *parameters)
Deallocate memory for BlastEffectiveLengthsParameters*.
Int2 BlastExtensionParametersNew(EBlastProgramType blast_program, const BlastExtensionOptions *options, BlastScoreBlk *sbp, BlastQueryInfo *query_info, BlastExtensionParameters **parameters)
Calculate the raw values for the X-dropoff parameters.
BlastExtensionParameters * BlastExtensionParametersFree(BlastExtensionParameters *parameters)
Deallocate memory for BlastExtensionParameters.
Int2 BlastScoringParametersNew(const BlastScoringOptions *options, BlastScoreBlk *sbp, BlastScoringParameters **parameters)
Calculate scaled cutoff scores and gap penalties.
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
BlastScoringParameters * BlastScoringParametersFree(BlastScoringParameters *parameters)
Deallocate memory for BlastScoringParameters.
Int2 BlastEffectiveLengthsParametersNew(const BlastEffectiveLengthsOptions *options, Int8 db_length, Int4 num_seqs, BlastEffectiveLengthsParameters **parameters)
Allocate memory for BlastEffectiveLengthsParameters.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Declares the CBlastProteinOptionsHandle class.
void ** _PSIAllocateMatrix(unsigned int ncols, unsigned int nrows, unsigned int data_type_sz)
Generic 2 dimensional matrix allocator.
void ** _PSIDeallocateMatrix(void **matrix, unsigned int ncols)
Generic 2 dimensional matrix deallocator.
void _PSICopyMatrix_int(int **dest, int **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows...
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
Utilities initialize/setup BLAST.
Int2 BlastSetup_ScoreBlkInit(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastScoringOptions *scoring_options, EBlastProgramType program_number, BlastScoreBlk **sbpp, double scale_factor, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
Initializes the score block structure.
Int2 BLAST_CalcEffLengths(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsParameters *eff_len_params, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Blast_Message **blast_message)
Function to calculate effective query length and db length as well as effective search space.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew(size_t ncols)
Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Int2 BlastNumber2Program(EBlastProgramType number, char **program)
Return string name for program given a number.
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Wrapper class for BLAST_SequenceBlk .
Runs the BLAST algorithm between 2 sequences.
Wrapper class for BlastExtensionOptions .
Class representing a text file containing sequences in fasta format.
Wrapper class for BlastHitSavingOptions .
Class that centralizes the configuration data for sequences to be converted.
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
Encapsulates ALL the BLAST algorithm's options.
Wrapper class for BlastQueryInfo .
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
Wrapper class for BlastScoringOptions .
Wrapper class for BlastSeqSrc .
static CTestObjMgr & Instance()
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
Constants used in compositional score matrix adjustment.
ECompoAdjustModes
An collection of constants that specify all permissible modes of composition adjustment.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
@ eCompositionMatrixAdjust
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence pro...
const CBlastOptionsHandle & GetOptionsHandle() const
Retrieve the options handle.
#define BLASTAA_SIZE
Size of aminoacid alphabet.
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
BlastSeqSrc * MultiSeqBlastSeqSrcInit(TSeqLocVector &seq_vector, EBlastProgramType program, bool dbscan_mode=false)
Initialize the sequence source structure.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
const TSeqLocVector & GetQueries() const
Retrieve a vector of query sequences.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
EProgram ProgramNameToEnum(const std::string &program_name)
Map a string into an element of the ncbi::blast::EProgram enumeration (except eBlastProgramMax).
const TSeqLocVector & GetSubjects() const
Retrieve a vector of subject sequences.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
int16_t Int2
2-byte (16-bit) signed integer
int64_t Int8
8-byte (64-bit) signed integer
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
ENa_strand
strand of nucleic acid
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
BlastHSPCollectorParams * BlastHSPCollectorParamsNew(const BlastHitSavingOptions *hit_options, Int4 compositionBasedStats, Boolean gapped_calculation)
Sets up parameter set for use by collector.
BlastHSPWriterInfo * BlastHSPCollectorInfoNew(BlastHSPCollectorParams *params)
WriterInfo to create a default writer: the collecter.
Definitions used to get joint probabilities for a scoring matrix.
int Blast_FrequencyDataIsAvailable(const char *matrix_name)
Retrieve the background letter probabilities implicitly used in constructing the score matrix matrix_...
void CheckForBlastSeqSrcErrors(const BlastSeqSrc *seqsrc)
Magic spell ;-) needed for some weird compilers... very empiric.
#define FALSE
bool replacment for C indicating false.
Defines: CTimeFormat - storage class for time format.
std::istream & in(std::istream &in_, double &x_)
BOOST_AUTO_TEST_CASE(testRedoAlignmentWithCompBasedStats)
Implementation of the BlastSeqSrc interface for a vector of sequence locations.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Int8 eff_searchsp
Effective search space for this context.
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
Options used for gapped extension These include: a.
EBlastTbackExt eTbackExt
type of traceback extension.
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Computed values used as parameters for gapped alignments.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
The structure to contain all BLAST results, for multiple queries.
Default implementation of BlastHSPStream.
A wrap of data structure used to create a writer.
ADT definition of BlastHSPWriter.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
double bit_score
Bit score, calculated from score.
BlastSeg subject
Subject sequence info.
Int4 score
This HSP's raw score.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 hitlist_size
Maximal number of database sequences to return results for.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Structure used for scoring calculations.
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Blast_KarlinBlk ** kbp_psi
K-A parameters for position-based alignments.
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
SPsiBlastScoreMatrix * psi_matrix
PSSM and associated data.
Blast_KarlinBlk ** kbp_gap_psi
K-A parameters for psi alignments.
Boolean gapped_calculation
gap-free search if FALSE
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Int4 offset
Start of hsp.
Structure to hold the a message from the core of the BLAST engine.
static int ** loadPssmFromFile(const string &filename, unsigned int query_length)
CRedoAlignmentTestFixture()
static BlastHSPList * setUpHSPList(int num_hsps, const int query_offset[], const int query_end[], const int subject_offset[], const int subject_end[], const int query_gapped_start[], const int subject_gapped_start[], const int score[], const double evalue[]=NULL, const int num_ident[]=NULL)
~CRedoAlignmentTestFixture()
static void runRedoAlignmentCoreUnitTest(EBlastProgramType program, CSeq_id &qid, CSeq_id &sid, BlastHSPList *init_hsp_list, const BlastHSPList *ending_hsp_list, Int8 effective_searchsp, ECompoAdjustModes compositonBasedStatsMode, bool doSmithWaterman, double evalue_threshold=BLAST_EXPECT_VALUE, int hit_list_size=BLAST_HITLIST_SIZE)
static void setupPositionBasedBlastScoreBlk(BlastScoreBlk *sbp, unsigned int qlen)
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
int ** data
actual scoring matrix data, stored in row-major form
SBlastScoreMatrix * pssm
position-specific score matrix
Structure to represent a single sequence to be fed to BLAST.
CConstRef< objects::CSeq_loc > seqloc
Seq-loc describing the sequence to use as query/subject to BLAST The types of Seq-loc currently suppo...
CRef< objects::CScope > scope
Scope where the sequence referenced can be found by the toolkit's object manager.
Utility stuff for more convenient using of Boost.Test library.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4