;
96 const Uint4kGi_130912_Length = 253;
97 const Uint1kGi_130912_[kGi_130912_Length] = {
9812, 1, 13, 11, 7, 3, 20, 12, 11, 19, 11, 6, 19, 1, 18,
9920, 17, 4, 11, 7, 11, 3, 10, 10, 16, 14, 10, 14, 7, 7,
10020, 13, 18, 7, 7, 17, 16, 22, 14, 7, 15, 7, 17, 14, 7,
1017, 13, 16, 22, 14, 14, 15, 7, 7, 7, 7, 20, 7, 15, 14,
1028, 7, 7, 7, 20, 7, 15, 14, 8, 7, 7, 7, 20, 7, 15,
10314, 8, 7, 7, 7, 20, 7, 15, 14, 8, 7, 7, 7, 20, 7,
10415, 7, 7, 7, 18, 8, 17, 15, 20, 13, 10, 14, 17, 10, 14,
10510, 18, 13, 12, 10, 8, 12, 1, 7, 1, 1, 1, 1, 7, 1,
10619, 19, 7, 7, 11, 7, 7, 22, 12, 11, 7, 17, 1, 12, 17,
10716, 14, 9, 9, 8, 6, 7, 17, 4, 22, 5, 4, 16, 22, 22,
10816, 5, 13, 12, 8, 16, 22, 14, 13, 15, 19, 22, 22, 16, 14,
10912, 4, 5, 22, 17, 13, 15, 13, 13, 6, 19, 8, 4, 3, 19,
11013, 9, 18, 9, 10, 15, 8, 18, 19, 18, 18, 18, 18, 10, 7,
1115, 13, 6, 18, 5, 18, 4, 19, 10, 12, 12, 5, 16, 19, 19,
1125, 15, 12, 3, 9, 18, 15, 22, 5, 16, 5, 17, 15, 1, 22,
11322, 15, 16, 7, 17, 17, 12, 19, 11, 6, 17, 17, 14, 14, 19,
1149, 11, 11, 9, 17, 6, 11, 9, 6, 11, 9, 19, 7};
119BOOST_REQUIRE_EQUAL((
int)kGi_130912_Length, seq_blk->
length);
121 for(
int i= 0;
i< seq_blk->
length;
i++) {
123os << prefix <<
": position "<<
i<< endl;
124BOOST_REQUIRE_MESSAGE((
int)seq_blk->
sequence[
i] == (
int)kGi_130912_[
i],
163BOOST_REQUIRE_EQUAL(1,
168BOOST_REQUIRE_EQUAL(1,
185 constvector<EBlastProgramType> programs =
187 ITERATE(vector<EBlastProgramType>, program, programs) {
193BOOST_REQUIRE_MESSAGE(reference ==
test,
"Failed on "+
prog);
198 constvector<EBlastProgramType> programs =
200 ITERATE(vector<EBlastProgramType>, program, programs) {
206BOOST_REQUIRE_MESSAGE(reference ==
test,
"Failed on "+
prog);
211 constvector<EBlastProgramType> programs =
213 ITERATE(vector<EBlastProgramType>, program, programs) {
221BOOST_REQUIRE_MESSAGE(reference ==
test,
"Failed on "+
prog);
226 constvector<EBlastProgramType> programs =
228 ITERATE(vector<EBlastProgramType>, program, programs) {
236BOOST_REQUIRE_MESSAGE(reference ==
test,
"Failed on "+
prog);
241 constvector<EBlastProgramType> programs =
243 ITERATE(vector<EBlastProgramType>, program, programs) {
249BOOST_REQUIRE_MESSAGE(reference ==
test,
"Failed on "+
prog);
259 strings.reserve(programs.size());
260 strings.push_back(
"blastp");
261 strings.push_back(
"blastn");
262 strings.push_back(
"blastx");
263 strings.push_back(
"tblastn");
264 strings.push_back(
"tblastx");
265 strings.push_back(
"psiblast");
266 strings.push_back(
"psitblastn");
267 strings.push_back(
"rpsblast");
268 strings.push_back(
"rpstblastn");
269 strings.push_back(
"phiblastp");
270 strings.push_back(
"phiblastn");
271 strings.push_back(
"unknown");
273BOOST_REQUIRE_EQUAL(programs.size(),
strings.size());
275 for(
size_t i= 0;
i< programs.size();
i++) {
334BOOST_REQUIRE((matrix_path !=
NULL) && (strlen(matrix_path) > 0));
341 stringignoreFile(
"BLOSUM30");
345BOOST_REQUIRE(matrix_path ==
NULL);
352 stringignoreFile(
input.get());
356BOOST_REQUIRE((matrix_path !=
NULL) && (strlen(matrix_path) > 0));
362BOOST_REQUIRE(matrix_path ==
NULL);
367BOOST_REQUIRE(matrix_path ==
NULL);
371 const size_tkNumPrograms = 4;
378 const char* program_strings[kNumPrograms] = {
384 stringerror_prefix(
"Conversion from frame to context failed for ");
386 for(
size_t i= 0;
i< kNumPrograms;
i++) {
390BOOST_REQUIRE_MESSAGE((
int)frame == (
int)0,
391error_prefix + program_strings[
i]);
393BOOST_REQUIRE_MESSAGE((
int)frame == (
int)0,
394error_prefix + program_strings[
i]);
397BOOST_REQUIRE_MESSAGE((
int)frame == (
int)0,
398error_prefix + program_strings[
i]);
400BOOST_REQUIRE_MESSAGE((
int)frame == (
int)0,
401error_prefix + program_strings[
i]);
404BOOST_REQUIRE_MESSAGE((
int)frame == (
int)0,
405error_prefix + program_strings[
i]);
407BOOST_REQUIRE_MESSAGE((
int)frame == (
int)0,
408error_prefix + program_strings[
i]);
411BOOST_REQUIRE_MESSAGE((
int)frame == (
int)0,
412error_prefix + program_strings[
i]);
414BOOST_REQUIRE_MESSAGE((
int)frame == (
int)0,
415error_prefix + program_strings[
i]);
422 string error(
"Conversion from frame to context failed for blastn");
425BOOST_REQUIRE_MESSAGE((
int)frame == (
int)1,
error);
427BOOST_REQUIRE_MESSAGE((
int)frame == (
int)-1,
error);
429BOOST_REQUIRE_MESSAGE((
int)frame == (
int)-1,
error);
431BOOST_REQUIRE_MESSAGE((
int)frame == (
int)1,
error);
435 const size_tkNumPrograms = 3;
441 const char* program_strings[kNumPrograms] = {
446 stringerror_prefix(
"Conversion from frame to context failed for ");
448 for(
size_t i= 0;
i< kNumPrograms;
i++) {
452BOOST_REQUIRE_MESSAGE((
int)frame == (
int)-1,
453error_prefix + program_strings[
i]);
455BOOST_REQUIRE_MESSAGE((
int)frame == (
int)-2,
456error_prefix + program_strings[
i]);
458BOOST_REQUIRE_MESSAGE((
int)frame == (
int)-3,
459error_prefix + program_strings[
i]);
462BOOST_REQUIRE_MESSAGE((
int)frame == (
int)1,
463error_prefix + program_strings[
i]);
465BOOST_REQUIRE_MESSAGE((
int)frame == (
int)2,
466error_prefix + program_strings[
i]);
468BOOST_REQUIRE_MESSAGE((
int)frame == (
int)3,
469error_prefix + program_strings[
i]);
478BOOST_REQUIRE_EQUAL((
int)127, (
int)frame);
480BOOST_REQUIRE_EQUAL((
int)127, (
int)frame);
488seqloc_v.push_back(*sseqloc);
490unique_ptr<CBlastOptionsHandle>
492vector<BLAST_SequenceBlk*> seqblk_v;
493 unsigned intmaxlen = 0;
495 SetupSubjects(seqloc_v, opts_handle->GetOptions().GetProgramType(),
499BOOST_REQUIRE(maxlen != 0);
500BOOST_REQUIRE(seqlen != 0);
501BOOST_REQUIRE(seqblk_v.size() == (
size_t)1);
502BOOST_REQUIRE_EQUAL(maxlen, seqlen);
504 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
505BOOST_REQUIRE((*itr)->lcase_mask ==
NULL);
508 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
515pair<TSeqPos, TSeqPos> range(50, 157);
516unique_ptr<SSeqLoc> sseqloc(
520seqloc_v.push_back(*sseqloc);
522unique_ptr<CBlastOptionsHandle>
524vector<BLAST_SequenceBlk*> seqblk_v;
525 unsigned intmaxlen = 0;
527 SetupSubjects(seqloc_v, opts_handle->GetOptions().GetProgramType(),
530 unsigned intseqlen = range.second - range.first + 1;
531BOOST_REQUIRE(maxlen != 0);
532BOOST_REQUIRE(seqlen != 0);
533BOOST_REQUIRE_EQUAL(maxlen, seqlen);
534BOOST_REQUIRE(seqblk_v.size() == (
size_t)1);
536 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
537BOOST_REQUIRE((*itr)->lcase_mask ==
NULL);
540 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
549unique_ptr<SSeqLoc> sl(m_Om->CreateSSeqLoc(
id, kRange,
kStrand));
553BOOST_REQUIRE(seqloc_v.begin()->mask.NotEmpty());
555vector<BLAST_SequenceBlk*> seqblk_v;
556 unsigned intmaxlen = 0;
561BOOST_REQUIRE(seqblk_v.size() == 1);
562 unsigned intseqlen = kRange.
GetLength();
563BOOST_REQUIRE_EQUAL(maxlen, seqlen);
565s_TestSingleSubjectNuclMask(*seqblk_v.begin());
567 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
576unique_ptr<SSeqLoc> sl(m_Om->CreateSSeqLoc(
id, kRange,
kStrand));
580BOOST_REQUIRE(seqloc_v.begin()->mask.NotEmpty());
582vector<BLAST_SequenceBlk*> seqblk_v;
583 unsigned intmaxlen = 0;
588BOOST_REQUIRE(seqblk_v.size() == 1);
589 unsigned intseqlen = kRange.
GetLength();
590BOOST_REQUIRE_EQUAL(maxlen, seqlen);
592s_TestSingleSubjectNuclMask(*seqblk_v.begin());
594 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
603unique_ptr<SSeqLoc> sl(m_Om->CreateSSeqLoc(
id, kRange,
kStrand));
607BOOST_REQUIRE(seqloc_v.begin()->mask.NotEmpty());
609vector<BLAST_SequenceBlk*> seqblk_v;
610 unsigned intmaxlen = 0;
615BOOST_REQUIRE(seqblk_v.size() == 1);
616 unsigned intseqlen = kRange.
GetLength();
617BOOST_REQUIRE_EQUAL(maxlen, seqlen);
619s_TestSingleSubjectNuclMask(*seqblk_v.begin());
621 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
632unique_ptr<SSeqLoc> sl(m_Om->CreateSSeqLoc(
id, range,
kStrand));
635unique_ptr<CBlastOptionsHandle>
637vector<BLAST_SequenceBlk*> seqblk_v;
638 unsigned intmaxlen = 0;
640 SetupSubjects(seqloc_v, opts_handle->GetOptions().GetProgramType(),
643 unsigned intseqlen = range.
GetLength();
644BOOST_REQUIRE_EQUAL(maxlen, seqlen);
646 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
647BOOST_REQUIRE((*itr)->lcase_mask ==
NULL);
649 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
659unique_ptr<SSeqLoc> sl(m_Om->CreateSSeqLoc(
id, range,
kStrand));
661unique_ptr<CBlastOptionsHandle>
663vector<BLAST_SequenceBlk*> seqblk_v;
664 unsigned intmaxlen = 0;
666 SetupSubjects(seqloc_v, opts_handle->GetOptions().GetProgramType(),
669 unsigned intseqlen = range.
GetLength();
670BOOST_REQUIRE_EQUAL(maxlen, seqlen);
672 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
673BOOST_REQUIRE((*itr)->lcase_mask ==
NULL);
675 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
685unique_ptr<SSeqLoc> sl(m_Om->CreateSSeqLoc(
id, range,
kStrand));
688unique_ptr<CBlastOptionsHandle>
690vector<BLAST_SequenceBlk*> seqblk_v;
691 unsigned intmaxlen = 0;
693 SetupSubjects(seqloc_v, opts_handle->GetOptions().GetProgramType(),
696 unsigned intseqlen = range.
GetLength();
697BOOST_REQUIRE_EQUAL(maxlen, seqlen);
699 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
700BOOST_REQUIRE((*itr)->lcase_mask ==
NULL);
702 ITERATE(vector<BLAST_SequenceBlk*>, itr, seqblk_v) {
711 const stringkFile(
"data/selenocysteines.fsa");
712 const stringkSeqIdString(
"lcl|seq1");
713ifstream
in(kFile.c_str());
715 throwruntime_error(
"Failed to open "+ kFile);
719 boolread_failed =
false;
722seq_entry = reader.
ReadSet();
729 if( read_failed || !seq_entry ) {
730 throwruntime_error(
"Failed to read sequence from "+ kFile);
742BOOST_REQUIRE(warnings.empty());
746 const TSeqPoskReplacedPositions[] = { 10+1, 15+1 };
749BOOST_REQUIRE_EQUAL((
int)kUresidue,
750(
int)seq.
data.
get()[kReplacedPositions[
i]]);
757pair<TSeqPos, TSeqPos> range(0, 19);
758unique_ptr<SSeqLoc> sl(
766BOOST_REQUIRE_EQUAL(expected_length, seq.
length);
772pair<TSeqPos, TSeqPos> range(0, 19);
773unique_ptr<SSeqLoc> sl(
781BOOST_REQUIRE_EQUAL(expected_length, seq.
length);
787pair<TSeqPos, TSeqPos> range(63999900,64000000);
788unique_ptr<SSeqLoc> sl(
800hash_value += *sequence;
804BOOST_REQUIRE_EQUAL(3285, hash_value);
810pair<TSeqPos, TSeqPos> range(3471240, 3686557);
811unique_ptr<SSeqLoc> sl(
823hash_value += *sequence;
827BOOST_REQUIRE_EQUAL(6940529, hash_value);
833 const intkNumPrograms = 7;
834 const EProgramkProgram[kNumPrograms] =
841 const Int8kNuclDbLength = (
Int8) 39855e+5;
842 const Int8kProtDbLength = (
Int8) 75867e+4;
843 const Int4kNuclNumDbSeqs = (
Int4) 1140e+3;
844 const Int4kProtNumDbSeqs = (
Int4) 2247e+3;
847 const doublekSearchSp[kNumPrograms] =
848{ 2333197e+6, 532990e+5, 423501e+5, 122988e+6, 192228e+6,
849532990e+5, 423501e+5 };
850 const intkLengthAdjustments[kNumPrograms] =
851{ 33, 122, 121, 128, 56, 122, 121 };
854 const doublekMaxRelativeError = 5e-6;
860 for(index = 0; index < kNumPrograms; ++index) {
872query_v.push_back(
SSeqLoc(loc, scope));
892 prog, strand_opt, msgs);
895BOOST_REQUIRE(m->empty());
903&sbp, 1.0, &blast_message,
906BOOST_REQUIRE(blast_message ==
NULL);
907BOOST_REQUIRE(status == 0);
9110, 0, &eff_len_params);
912 booldb_is_nucl = (kProgram[index] ==
eBlastn||
916(db_is_nucl ? kNuclDbLength : kProtDbLength);
918(db_is_nucl ? kNuclNumDbSeqs : kProtNumDbSeqs);
922eff_len_params, sbp, query_info,
NULL);
924 doublerelative_error =
925 fabs((kSearchSp[index] -
928BOOST_REQUIRE(relative_error < kMaxRelativeError);
929BOOST_REQUIRE_EQUAL(kLengthAdjustments[index],
940relative_error =
fabs((kSearchSp[index] -
943BOOST_REQUIRE(relative_error < kMaxRelativeError);
955query_v.push_back(*sseqloc);
957unique_ptr<CBlastOptionsHandle>
959opts_handle->SetOptions().SetGappedMode(
false);
962s_ValidateProtein130912(qf, opts_handle->GetOptions(),
963 "Before CEffectiveSearchSpaceCalculator");
970BOOST_REQUIRE(eff_searchsp > 0);
972s_ValidateProtein130912(qf, opts_handle->GetOptions(),
973 "After CEffectiveSearchSpaceCalculator");
981query_v.push_back(*sseqloc);
983unique_ptr<CBlastOptionsHandle>
987s_ValidateProtein130912(qf, opts_handle->GetOptions(),
988 "Before CEffectiveSearchSpaceCalculator");
995BOOST_REQUIRE(eff_searchsp > 0);
997s_ValidateProtein130912(qf, opts_handle->GetOptions(),
998 "After CEffectiveSearchSpaceCalculator");
1003 const Int8kSearchSp = (
Int8) 1e+9;
1005 const intkQueryLength = 1000;
1023(
Int8*)&kSearchSp, 1);
1030BOOST_REQUIRE_EQUAL(kSearchSp,
1049query_v.push_back(*sl);
1050BOOST_REQUIRE_EQUAL((
Uint4)1, query_v[0].genetic_code_id);
1060BOOST_REQUIRE_EQUAL((
Uint4)2, query_v[0].genetic_code_id);
1074query_v.push_back(*sl);
1100 short st=
BLAST_MainSetUp(kProgram, qsup_opts, score_opts, query_blk, query_info,
1103BOOST_REQUIRE_EQUAL(0, (
int)
st);
1105BOOST_REQUIRE_EQUAL(-3, sbp->
matrix->
data[1][11]);
1106BOOST_REQUIRE_EQUAL(-2, sbp->
matrix->
data[0][11]);
1109BOOST_REQUIRE(lookup_segments ==
NULL);
1113BOOST_REQUIRE(sbp ==
NULL);
1122BOOST_REQUIRE_EQUAL(0, (
int)
st);
1124BOOST_REQUIRE_EQUAL(-1, sbp->
matrix->
data[1][14]);
1125BOOST_REQUIRE_EQUAL(-1, sbp->
matrix->
data[1][11]);
1126BOOST_REQUIRE_EQUAL(0, sbp->
matrix->
data[0][11]);
1129BOOST_REQUIRE(lookup_segments ==
NULL);
1133BOOST_REQUIRE(sbp ==
NULL);
1144BOOST_REQUIRE_EQUAL(1, (
int)
st);
1147BOOST_REQUIRE(lookup_segments ==
NULL);
1151BOOST_REQUIRE(sbp ==
NULL);
1154score_opts->
reward= 3124;
1160BOOST_REQUIRE_EQUAL(-1, (
int)
st);
1163BOOST_REQUIRE(lookup_segments ==
NULL);
1167BOOST_REQUIRE(sbp ==
NULL);
1169BOOST_REQUIRE(blast_msg ==
NULL);
1171BOOST_REQUIRE(qsup_opts ==
NULL);
1173BOOST_REQUIRE(score_opts ==
NULL);
1183ifstream
in(
"data/delta_seq.asn");
1194query_v.push_back(
SSeqLoc(sl, scope));
1212s_GetScoringOpts(kOpts),
1214&sbp, 1.0, &blast_message,
1217BOOST_REQUIRE(blast_message ==
NULL);
1218BOOST_REQUIRE(status == 0);
1222BOOST_REQUIRE(sbp ==
NULL);
1223BOOST_REQUIRE(status == 0);
1241CSeq_loc* mask_seqloc =
newCSeq_loc(id3, 10, 20,
kStrand);
1242ssl3->mask.Reset(mask_seqloc);
1245query_v.push_back(*ssl1);
1246query_v.push_back(*ssl2);
1247query_v.push_back(*ssl3);
1255kProgram,
kStrand, blast_msg);
1259BOOST_REQUIRE(lcase_mask !=
NULL);
1270 msg+= (*qm)->GetMessage();
1273BOOST_REQUIRE(
msg.find(
"Sequence contains no data") != string::npos);
1292 intnucl_length = nucl_seq.
length;
1296&frame_offsets, &mixed_seq);
1297BOOST_REQUIRE_EQUAL(0, (
int) mixed_seq[0]);
1298BOOST_REQUIRE_EQUAL(0, (
int) mixed_seq[1]);
1299BOOST_REQUIRE_EQUAL(0, (
int) mixed_seq[2]);
1300BOOST_REQUIRE_EQUAL(0, (
int) mixed_seq[nucl_length+1]);
1301BOOST_REQUIRE_EQUAL(0, (
int) mixed_seq[nucl_length+2]);
1302BOOST_REQUIRE_EQUAL(0, (
int) mixed_seq[nucl_length+3]);
1303BOOST_REQUIRE_EQUAL(0, (
int) mixed_seq[2*nucl_length+2]);
1305 for(
intindex = 0; index <=
NUM_FRAMES; ++index) {
1306BOOST_REQUIRE_EQUAL(0,
1307(
int) translation_buffer[frame_offsets[index]]);
1309BOOST_REQUIRE_EQUAL(0, (
int)frame_offsets[0]);
1310BOOST_REQUIRE_EQUAL(nucl_length+1, (
int)frame_offsets[3]);
1311BOOST_REQUIRE_EQUAL(2*nucl_length+2, (
int)frame_offsets[
NUM_FRAMES]);
1313 sfree(translation_buffer);
1314 sfree(frame_offsets);
1326query_v.push_back(*sl);
1340 prog, strand_opt, blast_msg);
1342BOOST_REQUIRE(m->empty());
1365query_v.push_back(*sl);
1379 prog, strand_opt, blast_msg);
1381BOOST_REQUIRE(m->empty());
#define COMPRESSION_RATIO
Compression ratio of nucleotide bases (4 bases in 1 byte)
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define CODON_LENGTH
Codons are always of length 3.
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Declares the BLAST exception class.
BlastMaskLoc * BlastMaskLocFree(BlastMaskLoc *mask_loc)
Deallocate memory for a BlastMaskLoc structure as well as the BlastSeqLoc's pointed to.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Blast_Message * Blast_MessageFree(Blast_Message *blast_msg)
Deallocates message memory.
Definitions which are dependant on the NCBI C++ Object Manager.
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
Int2 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions *options, Int4 dbseq_num, Int8 db_length, Int8 *searchsp_eff, Int4 num_searchsp)
Fill the non-default values in the BlastEffectiveLengthsOptions structure.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
#define BLAST_DEFAULT_MATRIX
Default matrix name: BLOSUM62.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
BlastEffectiveLengthsParameters * BlastEffectiveLengthsParametersFree(BlastEffectiveLengthsParameters *parameters)
Deallocate memory for BlastEffectiveLengthsParameters*.
Int2 BlastEffectiveLengthsParametersNew(const BlastEffectiveLengthsOptions *options, Int8 db_length, Int4 num_seqs, BlastEffectiveLengthsParameters **parameters)
Allocate memory for BlastEffectiveLengthsParameters.
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_SubjectIsProtein(EBlastProgramType p)
Returns true if the subject is protein.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
BlastQueryInfo * BlastQueryInfoFree(BlastQueryInfo *query_info)
Deallocate memory for query information structure.
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
Utilities initialize/setup BLAST.
Int2 BlastSetup_ScoreBlkInit(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastScoringOptions *scoring_options, EBlastProgramType program_number, BlastScoreBlk **sbpp, double scale_factor, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
Initializes the score block structure.
Int2 BLAST_MainSetUp(EBlastProgramType program_number, const QuerySetUpOptions *qsup_options, const BlastScoringOptions *scoring_options, BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, double scale_factor, BlastSeqLoc **lookup_segments, BlastMaskLoc **mask, BlastScoreBlk **sbpp, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
"Main" setup routine for BLAST.
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
Int2 BLAST_CalcEffLengths(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsParameters *eff_len_params, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Blast_Message **blast_message)
Function to calculate effective query length and db length as well as effective search space.
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Blast_KarlinBlk * Blast_KarlinBlkNew(void)
Callocs a Blast_KarlinBlk.
Int2 Blast_KarlinBlkUngappedCalc(Blast_KarlinBlk *kbp, Blast_ScoreFreq *sfp)
Computes the parameters lambda, H K for use in calculating the statistical significance of high-scori...
Int2 Blast_ScoreBlkKbpIdealCalc(BlastScoreBlk *sbp)
Calculates the Karlin-Altschul parameters assuming standard residue compositions for the query and su...
Int2 Blast_KarlinBlkCopy(Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)
Copies contents of one Karlin block to another.
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Definitions of special type used in BLAST.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eTblastx
Translated nucl-Translated nucl.
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
@ eBlastp
Protein-Protein.
@ eTblastn
Protein-Translated nucl.
@ eMegablast
Nucl-Nucl (traditional megablast)
@ ePSITblastn
PSI Tblastn.
@ eDiscMegablast
Nucl-Nucl using discontiguous megablast.
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
@ eBlastx
Translated nucl-Protein.
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Int2 BLAST_CreateMixedFrameDNATranslation(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info)
Initialize the mixed-frame sequence for out-of-frame gapped extension.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
Int2 BLAST_GetAllTranslations(const Uint1 *nucl_seq, EBlastEncoding encoding, Int4 nucl_length, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Uint4 **frame_offsets_ptr, Uint1 **mixed_seq_ptr)
Translate nucleotide into 6 frames.
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
BOOST_AUTO_TEST_CASE(NumberOfContextsBlastp)
ncbi::TMaskedQueryRegions mask
CAutoEnvironmentVariable â.
Wrapper class for BLAST_SequenceBlk .
const CSeq_id * GetFirstId() const
Defines BLAST error codes (user errors included)
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
Wrapper class for BlastQueryInfo .
~CBlastSetupTestFixture()
static BlastScoringOptions * s_GetScoringOpts(const CBlastOptions &opts)
static void s_ValidateProtein130912(CRef< IQueryFactory > query_factory, const CBlastOptions &opts, string prefix)
Auxiliary function to validate that the query sequence data hasn't been modified (filtering can chang...
static void s_TestSingleSubjectNuclMask(BLAST_SequenceBlk const *seqblk)
static BlastEffectiveLengthsOptions * s_GetEffLenOpts(const CBlastOptions &opts)
Auxiliary class to compute the effective search space.
Base class for reading FASTA sequences.
NCBI C++ Object Manager dependant implementation of IQueryFactory.
This class wraps the C++ Object Manager to control its lifetime and to facilitate the creation of SSe...
static CTestObjMgr & Instance()
Class for the messages for an individual query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
Calls sym dust lib in algo/dustmask and returns CSeq_locs for use by BLAST.
Declares auxiliary class to calculate the effective search space.
Operators to edit gaps in sequences.
#define test(a, b, c, d, e)
static const char *const strings[]
Defines the interface to interact with the genetic code singleton object.
void GenCodeSingletonFini()
Uninitialize the genetic code singleton.
void GenCodeSingletonInit()
Initialize the genetic code singleton.
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
void SetupSubjects(TSeqLocVector &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Sets up internal subject data structure for the BLAST search.
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
TAutoUint1ArrayPtr FindGeneticCode(int genetic_code)
Retrieves the requested genetic code in Ncbistdaa format.
void SetGapOpeningCost(int g)
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
virtual BLAST_SequenceBlk * GetSequenceBlk()=0
Accessor for the BLAST_SequenceBlk structure.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
void SetGapExtensionCost(int e)
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
void SetMatrixName(const char *matrix)
BlastEffectiveLengthsOptions * GetEffLenOpts() const
Returns BlastEffectiveLengthsOptions for eLocal objects, NULL for eRemote.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
TAutoUint1Ptr data
Sequence data.
BLAST_SequenceBlk * Get() const
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
BlastScoringOptions * GetScoringOpts() const
Returns BlastScoringOptions for eLocal objects, NULL for eRemote.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
unsigned int GetNumberOfContexts(EBlastProgramType p)
Returns the number of contexts for a given BLAST program.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
void Blast_FindDustFilterLoc(TSeqLocVector &queries, const CBlastNucleotideOptionsHandle *nucl_handle)
Finds dust locations for a given set of sequences by calling the the symmetric dust lib.
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
EProgram ProgramNameToEnum(const std::string &program_name)
Map a string into an element of the ncbi::blast::EProgram enumeration (except eBlastProgramMax).
Int8 GetEffSearchSpace(size_t query_index=0) const
Retrieve the effective search space calculated for a given query.
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingProtein
NCBIstdaa.
@ eBlastEncodingNcbi2na
NCBI2na.
@ eNoSentinels
Do not use sentinel bytes.
@ eSentinels
Use sentinel bytes.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
element_type * get(void) const
Get pointer.
#define MSerial_AsnText
I/O stream manipulators â.
CRef< CSeq_entry > ReadSet(int max_seqs=kMax_Int, ILineErrorListener *pMessageListener=nullptr)
Read multiple sequences (by default, as many as are available.)
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
int8_t Int1
1-byte (8-bit) signed integer
position_type GetLength(void) const
ENa_strand
strand of nucleic acid
@ eNa_strand_both
in forward orientation
@ e_Gi
GenInfo Integrated Database.
const TSeq & GetSeq(void) const
Get the variant data.
unsigned int
A callback function used to compare two keys in a database.
vector< EBlastProgramType > GetAllBlastProgramTypes()
Magic spell ;-) needed for some weird compilers... very empiric.
#define DIM(A)
dimension of an array.
#define FALSE
bool replacment for C indicating false.
std::istream & in(std::istream &in_, double &x_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure to hold a sequence.
Uint4 num_seq_ranges
Number of elements in seq_ranges.
BlastMaskLoc * lcase_mask
Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...
SSeqRange * seq_ranges
Ranges of the sequence to search.
Boolean lcase_mask_allocated
TRUE if memory has been allocated for lcase_mask.
Int4 length
Length of sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Uint1 * oof_sequence
Mixed-frame protein representation of a nucleotide sequence for out-of-frame alignment.
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int4 length_adjustment
Length adjustment for boundary conditions.
Int8 eff_searchsp
Effective search space for this context.
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
Int8 real_db_length
Total database length to use in search space calculations.
Int4 real_num_seqs
Number of subject sequences to use for search space calculations.
Structure for keeping the query masking information.
BlastSeqLoc ** seqloc_array
Array of masked locations.
The query related information.
BlastContextInfo * contexts
Information per context.
Structure used for scoring calculations.
Blast_ScoreFreq ** sfp
score frequencies for scoring matrix.
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
SBlastScoreMatrix * matrix
scoring matrix data
Blast_KarlinBlk * kbp_ideal
Ideal values (for query with average database composition).
Blast_KarlinBlk ** kbp_gap_std
K-A parameters for std (not position-based) alignments.
Blast_KarlinBlk ** kbp_std
K-A parameters for ungapped alignments.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int2 penalty
Penalty for a mismatch.
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Int2 reward
Reward for a match.
Used to hold a set of positions, mostly used for filtering.
SSeqRange * ssr
location data on the sequence.
Structure to hold the Karlin-Altschul parameters.
Structure to hold the a message from the core of the BLAST engine.
Options required for setting up the query sequence.
int ** data
actual scoring matrix data, stored in row-major form
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Structure to represent a single sequence to be fed to BLAST.
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
Utility stuff for more convenient using of Boost.Test library.
void g_IgnoreDataFile(const string &pattern, bool do_ignore=true)
Ignore (or stop ignoring, depending on do_ignore) NCBI application data files matching the given patt...
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4