;
95query_loc->
SetWhole().SetGi(query_gi);
98m_vQuery.push_back(
SSeqLoc(query_loc, query_scope));
101subject_loc->
SetWhole().SetGi(subject_gi);
104m_vSubject.push_back(
SSeqLoc(subject_loc, subject_scope));
113diagnostics->ungapped_stat;
115diagnostics->gapped_stat;
117BOOST_REQUIRE_EQUAL(22670293, (
int)ungapped_stats->
lookup_hits);
118BOOST_REQUIRE_EQUAL(296326, ungapped_stats->
init_extends);
120BOOST_REQUIRE_EQUAL(1254, gapped_stats->
extensions);
131BOOST_REQUIRE_EQUAL(1152, (
int)ungapped_stats->
lookup_hits);
132BOOST_REQUIRE_EQUAL(24, ungapped_stats->
init_extends);
134BOOST_REQUIRE_EQUAL(8, gapped_stats->
extensions);
149 const Int4kNumHspsEnd=23;
159setupQueryAndSubject(kQueryGi, kSubjectGi);
172 const intkQueryOffsetFinal[kNumHspsEnd] =
173{ 407, 486, 421, 569, 265, 320, 266, 321, 727, 659,
17492, 1, 1, 727, 422, 216, 167, 825, 167, 831, 216, 369, 49 };
175 const intkQueryLengthFinal[kNumHspsEnd] =
176{ 164, 85, 62, 67, 58, 74, 56, 69, 56, 66, 147, 69,
17773, 61, 40, 26, 35, 54, 35, 48, 21, 69, 22 };
178 const intkScoreFinal[kNumHspsEnd] =
179{ 368, 199, 160, 104, 99, 95, 94, 92, 94, 89, 108,
180101, 97, 95, 89, 86, 84, 84, 83, 79, 75, 74, 74};
181 const doublekEvalueFinal[kNumHspsEnd] =
182{1.84467e-35, 4.47098e-34, 4.47098e-34, 4.47098e-34,
1834.23245e-08, 4.23245e-08, 3.29958e-07, 3.29958e-07,
1847.11395e-07, 7.11395e-07, 8.64076e-05, 0.000570668,
1850.001678, 0.00287725, 0.0145032, 0.0325588,
1860.0558201, 0.0558201, 0.0730883, 0.214807, 0.631249,
1870.826482, 0.826482 };
193BOOST_REQUIRE_EQUAL(1,
results->num_queries);
194BOOST_REQUIRE_EQUAL(1,
results->hitlist_array[0]->hsplist_count);
197BOOST_REQUIRE_EQUAL(kNumHspsEnd, hsplist->
hspcnt);
204 for(
intindex=0; index<kNumHspsEnd; index++) {
206BOOST_REQUIRE_EQUAL(kQueryOffsetFinal[index], tmp_hsp->
query.
offset);
207BOOST_REQUIRE_EQUAL(kQueryLengthFinal[index],
209BOOST_REQUIRE_EQUAL(kScoreFinal[index], tmp_hsp->
score);
210BOOST_REQUIRE(
fabs((kEvalueFinal[index]-tmp_hsp->
evalue) /
211kEvalueFinal[index]) < 0.001);
221 const Int4kNumHspsEnd=8;
228setupQueryAndSubject(kQueryGi, kSubjectGi);
245 const intkQueryOffsetFinal[kNumHspsEnd] = { 98, 425, 320, 340, 823, 675, 247, 103};
246 const intkQueryLengthFinal[kNumHspsEnd] = { 223,211, 35, 13, 46, 19, 25, 24};
247 const intkScoreFinal[kNumHspsEnd] = {1138, 173, 72, 46, 40, 36, 32, 30};
248 const doublekEvalueFinal[kNumHspsEnd] =
249{2.52769e-153, 4.98722e-18, 2.4525e-05, 0.0352845, 0.187202, 0.568557, 1.72476, 3.0028};
255BOOST_REQUIRE_EQUAL(1,
results->num_queries);
256BOOST_REQUIRE_EQUAL(1,
results->hitlist_array[0]->hsplist_count);
259BOOST_REQUIRE_EQUAL(kNumHspsEnd, hsplist->
hspcnt);
266 for(
intindex=0; index<kNumHspsEnd; index++)
269BOOST_REQUIRE_EQUAL(kQueryOffsetFinal[index],
271BOOST_REQUIRE_EQUAL(kQueryLengthFinal[index],
273BOOST_REQUIRE_EQUAL(kScoreFinal[index], tmp_hsp->
score);
274BOOST_REQUIRE(
fabs(kEvalueFinal[index]-tmp_hsp->
evalue) < 1.0e-10 ||
275 fabs((kEvalueFinal[index]-tmp_hsp->
evalue) /
276kEvalueFinal[index]) < 0.01);
287 const intkNumHsps = 330;
290setupQueryAndSubject(kQueryGi, kSubjectGi);
307BOOST_REQUIRE_EQUAL(1,
results->num_queries);
308BOOST_REQUIRE_EQUAL(1,
results->hitlist_array[0]->hsplist_count);
311BOOST_REQUIRE_EQUAL(kNumHsps, hsplist->
hspcnt);
321 const intkNumHsps = 3;
322 const intkMaskedLength = 389;
325setupQueryAndSubject(kQueryGi, kSubjectGi);
335 Uint4masked_length = m_vQuery[0].mask->GetPacked_int().GetLength();
336BOOST_REQUIRE_EQUAL(kMaskedLength, (
int) masked_length);
348BOOST_REQUIRE_EQUAL(1,
results->num_queries);
349BOOST_REQUIRE_EQUAL(1,
results->hitlist_array[0]->hsplist_count);
352BOOST_REQUIRE_EQUAL(kNumHsps, hsplist->
hspcnt);
359 const stringkDbName(
"data/seqn");
360 const size_tkNumHits = 2;
362 const intkScores[kNumHits] = { 1024, 944 };
363 const intkNumIdent[kNumHits] = { 458, 423 };
366query_loc->
SetWhole().SetGi(kQueryGi);
369m_vQuery.push_back(
SSeqLoc(query_loc, query_scope));
384 CLocalBlastblaster(query_factory, options, dbinfo);
388BOOST_REQUIRE_EQUAL((
int)1, (
int)
results.GetNumResults());
390BOOST_REQUIRE_EQUAL(kNumHits, alignment->
Get().size());
397BOOST_REQUIRE_EQUAL(kScores[0], score);
398BOOST_REQUIRE(second_hit->
GetNamedScore(
"score", score));
399BOOST_REQUIRE_EQUAL(kScores[1], score);
402BOOST_REQUIRE(first_hit->
GetNamedScore(
"num_ident", num_ident));
403BOOST_REQUIRE_EQUAL(kNumIdent[0], num_ident);
404BOOST_REQUIRE(second_hit->
GetNamedScore(
"num_ident", num_ident));
405BOOST_REQUIRE_EQUAL(kNumIdent[1], num_ident);
409BOOST_REQUIRE_EQUAL(kGis[0], first_hit->
GetSeq_id(1).
GetGi());
411BOOST_REQUIRE_EQUAL(kGis[1], second_hit->
GetSeq_id(1).
GetGi());
425 const stringkDbName(
"data/seqp");
428 const intkNumHits = 31;
429 const intkNumHitsToCheck = 3;
430 const intkIndices[kNumHitsToCheck] = { 1, 4, 8 };
431 const intkScores[kNumHitsToCheck] = { 519, 56, 54 };
432 const intkOids[kNumHitsToCheck] = { 74, 971, 45 };
433 const intkQueryLengths[kNumHitsToCheck] = { 297, 46, 63 };
434 const intkSubjectLengths[kNumHitsToCheck] = { 298, 48, 55 };
438query_loc1->
SetWhole().SetGi(kQueryGi1);
441m_vQuery.push_back(
SSeqLoc(query_loc1, query_scope1));
443query_loc2->
SetWhole().SetGi(kQueryGi2);
446m_vQuery.push_back(
SSeqLoc(query_loc2, query_scope2));
463BOOST_REQUIRE_EQUAL(2,
results->num_queries);
464BOOST_REQUIRE_EQUAL(kNumHits,
results->hitlist_array[0]->hsplist_count);
466BOOST_REQUIRE_CLOSE(
results->hitlist_array[0]->hsplist_array[0]->best_evalue,
467 results->hitlist_array[0]->hsplist_array[0]->hsp_array[0]->evalue,
468 results->hitlist_array[0]->hsplist_array[0]->hsp_array[0]->evalue/2);
470 for(
intindex = 0; index < kNumHitsToCheck;
472 const intkHitIndex = kIndices[index];
474 results->hitlist_array[0]->hsplist_array[kHitIndex];
475BOOST_REQUIRE_EQUAL(kOids[index], hsp_list->
oid);
478BOOST_REQUIRE_EQUAL(kScores[index], hsp->
score);
479BOOST_REQUIRE_EQUAL(0, hsp->
num_ident);
480BOOST_REQUIRE_EQUAL(kQueryLengths[index],
482BOOST_REQUIRE_EQUAL(kSubjectLengths[index],
489 const unsigned char query[] = {
'\016',
'\007',
'\014',
'\024',
'\004',
'\015',
'\011',
490 '\022',
'\012',
'\016',
'\001',
'\010',
'\007',
'\005',
'\014',
'\023',
491 '\021',
'\003',
'\016',
'\005',
'\013',
'\006',
'\020',
'\011',
'\006',
492 '\015',
'\016',
'\004',
'\017'};
494 const unsigned char subject[] = {
'\000',
'\000',
'\000',
495 '\004',
'\015',
'\011',
'\022',
'\012',
'\016',
'\001',
'\010',
'\007',
496 '\005',
'\014',
'\023',
'\021',
'\003',
'\016',
'\005',
'\013',
'\006',
497 '\020',
'\011',
'\006',
'\015',
'\016',
'\004',
'\017'};
515 Int4q_offset, s_offset;
527BOOST_REQUIRE_EQUAL(q_offset, 22);
528BOOST_REQUIRE_EQUAL(s_offset, 21);
529BOOST_REQUIRE_EQUAL(retval,
true);
540BOOST_REQUIRE_EQUAL(q_offset, 18);
541BOOST_REQUIRE_EQUAL(s_offset, 17);
542BOOST_REQUIRE_EQUAL(retval,
true);
552BOOST_REQUIRE_EQUAL(retval,
false);
User-defined methods of the data storage class.
static const Int8 kEffectiveSearchSpace
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Function calls to actually perform a BLAST search (high level).
Structures and functions prototypes used for BLAST gapped extension.
Boolean BlastGetOffsetsForGappedAlignment(const Uint1 *query, const Uint1 *subject, const BlastScoreBlk *sbp, BlastHSP *hsp, Int4 *q_retval, Int4 *s_retval)
Function to look for the highest scoring window (of size HSP_MAX_WINDOW) in an HSP and return the mid...
void Blast_HSPListSortByEvalue(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
Boolean Blast_HSPListIsSortedByScore(const BlastHSPList *hsp_list)
Check if HSP list is sorted by score.
Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
The structures and functions in blast_options.
#define BLAST_GAP_OPEN_PROT
Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
Int2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)
Fill non-default values in the BlastScoringOptions structure.
#define BLAST_GAP_EXTN_PROT
cost to extend a gap.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)
Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...
Utilities initialize/setup BLAST.
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Functions to do gapped alignment with traceback.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eBlastp
Protein-Protein.
@ eTblastn
Protein-Translated nucl.
void testLongMatchDiagnostics(BlastDiagnostics *diagnostics)
void testShortMatchDiagnostics(BlastDiagnostics *diagnostics)
BOOST_AUTO_TEST_CASE(testTBLASTNLongMatchBlastEngine)
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Wrapper class for BlastHSPResults .
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Search class to perform the preliminary stage of the BLAST search.
Handle to the nucleotide-nucleotide options to the discontiguous BLAST algorithm.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Search Results for All Queries.
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
static CTestObjMgr & Instance()
Declares the CDiscNucleotideOptionsHandle class.
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
void SetMatchReward(int r)
Sets MatchReward.
CRef< SInternalData > Run()
Borrow the internal data and results results.
BlastSeqSrc * SeqDbBlastSeqSrcInit(const string &dbname, bool is_prot, Uint4 first_seq=0, Uint4 last_seq=0, Int4 mask_algo_id=-1, ESubjectMaskingType mask_type=eNoSubjMasking)
Initialize the sequence source structure.
void SetEffectiveSearchSpace(Int8 eff)
Sets EffectiveSearchSpace.
CRef< CSearchResultSet > Run()
Executes the search.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
void SetTraditionalBlastnDefaults()
Sets TraditionalBlastnDefaults.
void SetMismatchPenalty(int p)
Sets MismatchPenalty.
BlastSeqSrc * MultiSeqBlastSeqSrcInit(TSeqLocVector &seq_vector, EBlastProgramType program, bool dbscan_mode=false)
Initialize the sequence source structure.
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
void SetWindowSize(int ws)
Sets WindowSize.
void SetRepeatFiltering(bool val)
Enable repeat filtering.
void SetFilterString(const char *f, bool clear=true)
void Blast_FindRepeatFilterLoc(TSeqLocVector &query_loc, const CBlastOptionsHandle *opts_handle)
Finds repeats locations for a given set of sequences.
void SetGapExtensionCost(int e)
Sets GapExtensionCost.
void SetMaskAtHash(bool m=true)
Sets MaskAtHash.
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
void SetWordSize(int ws)
Sets WordSize.
CRef< TBlastDiagnostics > m_Diagnostics
Diagnostic output from preliminary and traceback stages.
void SetPercentIdentity(double p)
Sets PercentIdentity.
void SetGapOpeningCost(int g)
Sets GapOpeningCost.
CRef< TBlastHSPStream > m_HspStream
HSP output of the preliminary stage goes here.
BlastHSPResults * ComputeBlastHSPResults(BlastHSPStream *stream, Uint4 max_num_hsps=0, bool *rm_hsps=NULL, vector< bool > *rm_hsps_info=NULL) const
Return HSPs in a structure other than the HSPStream? Provide conversion? How to combine this with CBl...
@ eBlastDbIsNucleotide
nucleotide
static bool PreferAccessionOverGi(void)
Check if the option to prefer accession.version over GI is enabled (SeqId/PreferAccessionOverGi or SE...
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
const Tdata & Get(void) const
Get the member data.
E_Choice Which(void) const
Which variant is currently selected.
TGi GetGi(void) const
Get the variant data.
TVersion GetVersion(void) const
Get the Version member data.
const TDdbj & GetDdbj(void) const
Get the variant data.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Gi
GenInfo Integrated Database.
Main class to perform a BLAST search on the local machine.
Magic spell ;-) needed for some weird compilers... very empiric.
Uint1 Boolean
bool replacment for C
#define FALSE
bool replacment for C indicating false.
Defines: CTimeFormat - storage class for time format.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
C++ implementation of repeats filtering for C++ BLAST.
Implementation of the BlastSeqSrc interface for a vector of sequence locations.
Implementation of the BlastSeqSrc interface using the C++ BLAST databases API.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Return statistics from the BLAST search.
BlastUngappedStats * ungapped_stat
Ungapped extension counts.
BlastGappedStats * gapped_stat
Gapped extension counts.
void setupQueryAndSubject(TGi query_gi, TGi subject_gi)
~BlastEngineTestFixture()
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 good_extensions
Number of HSPs below the e-value threshold after gapped extension.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
BlastSeg subject
Subject sequence info.
Int4 score
This HSP's raw score.
Structure used for scoring calculations.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int4 offset
Start of hsp.
Complete type definition of Blast Sequence Source ADT.
Structure containing hit counts from the ungapped stage of a BLAST search.
Int8 lookup_hits
Number of successful lookup table hits.
Int4 init_extends
Number of initial words found and extended.
Int4 good_init_extends
Number of successful initial extensions, i.e.
Structure to represent a single sequence to be fed to BLAST.
Utility stuff for more convenient using of Boost.Test library.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4