(kbp->
Lambda> 0 && kbp->
K> 0 && kbp->
H> 0);
69 ASSERT(kbp_in && query_info && kbp_ret);
71 for(
i=query_info->
first_context; i<=query_info->last_context;
i++) {
75*kbp_ret = kbp_in[
i];
97 doublemin_lambda = (double)
INT4_MAX;
99 ASSERT(kbp_in && query_info);
101 for(
i=query_info->
first_context; i<=query_info->last_context;
i++) {
105 if(min_lambda > kbp_in[
i]->Lambda)
107min_lambda = kbp_in[
i]->
Lambda;
109*kbp_out = kbp_in[
i];
114 ASSERT(min_lambda > 0.0);
165 Uint4subject_length,
172 const intkQueryLenForHashTable = 8000;
202context <= query_info->last_context; ++
context) {
234hit_params, sbp, query_info, subject_length, p);
249 for(
i= 0;
i< 256;
i++) {
255 if(
i& 3) score += penalty;
elsescore += reward;
256 if((
i>> 2) & 3) score += penalty;
elsescore += reward;
257 if((
i>> 4) & 3) score += penalty;
elsescore += reward;
258 if(
i>> 6) score += penalty;
elsescore += reward;
288 doublegap_decay_rate = 0.0;
302gapped_calculation =
FALSE;
317context <= query_info->last_context; ++
context) {
353 ASSERT(query_length > 0);
362(
Uint8)query_length)*((
Uint8)subj_length),
363 TRUE, gap_decay_rate);
367new_cutoff =
MIN(new_cutoff, gap_trigger);
369new_cutoff = gap_trigger;
372new_cutoff =
MIN(new_cutoff,
401 if(new_cutoff < cutoff_min) {
402cutoff_min = new_cutoff;
405 if(xdrop_max < curr_cutoffs->x_dropoff)
509 if( params ==
NULL)
511printf(
"parameters{ null }\n");
514printf(
"BlastScoringParameters:\n");
517printf(
" options = NULL\n");
520printf(
" options:\n");
521printf(
" matrix = %s\n", options->
matrix);
522printf(
" matrix_path = %s\n", options->
matrix_path);
523printf(
" reward = %d\n", options->
reward);
524printf(
" penalty = %d\n", options->
penalty);
527printf(
" gap_open = %d\n", options->
gap_open);
528printf(
" gap_extend = %d\n", options->
gap_extend);
529printf(
" is_ooframe = %d\n", options->
is_ooframe);
530printf(
" shift_pen = %d\n", options->
shift_pen);
533printf(
" reward = %d\n", params->
reward);
534printf(
" penalty = %d\n", params->
penalty);
535printf(
" gap_open = %d\n", params->
gap_open);
536printf(
" gap_extend = %d\n", params->
gap_extend);
537printf(
" shift_pen = %d\n", params->
shift_pen);
538printf(
" scale_factor = %f\n\n", params->
scale_factor);
549 if(score_options ==
NULL)
586(*parameters)->real_db_length = db_length;
587(*parameters)->real_num_seqs = num_seqs;
604 if(!link_hsp_params)
620*link_hsp_params = params;
629 if(!word_params || !hit_params)
634 if(gapped_calculation) {
673evalue = pattern_space*paramC*(1+Lambda*score)*
695 const intkMaxIter=20;
696 inteffNumPatterns = 0;
702 for(iteration=0; iteration<kMaxIter; iteration++)
704 inttargetScore = (lowScore+highScore)/2;
707lowScore = targetScore;
709highScore = targetScore;
711 if((highScore-lowScore) <= 1)
738 Int4avg_subj_length,
739 Int4compositionBasedStats,
757gapped_calculation =
FALSE;
759 if(options->
do_sum_stats&& gapped_calculation && avg_subj_length <= 0)
790 Int4max_protein_gap;
794 if(gapped_calculation) {
801}
else if(max_protein_gap <= 0) {
814 MAX(max_protein_gap, 0);
825avg_subj_length, compositionBasedStats, params);
833 Int4avg_subject_length,
Int4compositionBasedStats,
862}
else if(sbp->
kbp) {
863kbp_array = sbp->
kbp;
864gapped_calculation =
FALSE;
879context <= query_info->last_context; ++
context) {
903context <= query_info->last_context; ++
context) {
914context <= query_info->last_context; ++
context) {
937 intcbs_stretch = (compositionBasedStats > 1) ? 5 : 1;
953 doubleevalue_hsp = 1.0;
958 Int8searchsp = (
Int8)
MIN(avg_qlen, avg_subject_length) *
959(
Int8)avg_subject_length;
964context <= query_info->last_context; ++
context) {
981context <= query_info->last_context; ++
context) {
986cutoff_min =
MIN(cutoff_min,
1001 Int8db_length,
Int4subject_length)
1004 doublegap_prob, gap_decay_rate, x_variable, y_variable;
1009 if(!link_hsp_params)
1037((double) subject_length))/(kbp->
H));
1038query_length = query_length - expected_length;
1040subject_length = subject_length - expected_length;
1041query_length =
MAX(query_length, 1);
1042subject_length =
MAX(subject_length, 1);
1046 if(db_length > subject_length) {
1047y_variable =
log((
double) (db_length)/(
double) subject_length)*(kbp->
K)/
1050y_variable =
log((
double) (subject_length + expected_length)/
1051(
double) subject_length)*(kbp->
K)/(gap_decay_rate);
1054search_sp = ((
Int8) query_length)* ((
Int8) subject_length);
1055x_variable = 0.25*y_variable*((double) search_sp);
1063x_variable /= (1.0 - gap_prob +
kEpsilon);
1067x_variable /= (gap_prob +
kEpsilon);
1088printf(
"BlastInitialWordParamters:\n");
1089printf(
" x_dropoff_max = %d\n", word_params->
x_dropoff_max);
1091printf(
" cutoffs:\n");
1093context <= query_info->last_context; ++
context)
1107printf(
"BlastExtensionParameters:\n");
1108printf(
" gap_x_dropoff = %d\n", ext_params->
gap_x_dropoff);
1117printf(
"BlastHitSavingParameters:\n");
1120context <= query_info->last_context; ++
context)
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define CODON_LENGTH
Codons are always of length 3.
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
#define DEFAULT_LONGEST_INTRON
For translated gapped searches, this is the default value in nucleotides of longest_intron (for ungap...
Structures and API used for saving BLAST hits.
Int4 PhiBlastGetEffectiveNumberOfPatterns(const BlastQueryInfo *query_info)
Count the number of occurrences of pattern in sequence, which do not overlap by more than half the pa...
#define BLASTERR_NOVALIDKARLINALTSCHUL
Could not calculate Karlin-Altschul statistics for any context.
Routines for creating nucleotide BLAST lookup tables.
Int2 BlastHitSavingParametersUpdate(EBlastProgramType program_number, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters *params)
Updates cutoff scores in hit saving parameters.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
Int2 BlastScoringParametersNew(const BlastScoringOptions *score_options, BlastScoreBlk *sbp, BlastScoringParameters **parameters)
Calculate scaled cutoff scores and gap penalties.
Int2 BlastExtensionParametersNew(EBlastProgramType program_number, const BlastExtensionOptions *options, BlastScoreBlk *sbp, BlastQueryInfo *query_info, BlastExtensionParameters **parameters)
Calculate the raw values for the X-dropoff parameters.
Int2 BlastLinkHSPParametersNew(EBlastProgramType program_number, Boolean gapped_calculation, BlastLinkHSPParameters **link_hsp_params)
Initialize the linking HSPs parameters with default values.
BlastEffectiveLengthsParameters * BlastEffectiveLengthsParametersFree(BlastEffectiveLengthsParameters *parameters)
Deallocate memory for BlastEffectiveLengthsParameters*.
void printBlastScoringParameters(BlastScoringParameters *params)
static double s_BlastFindSmallestLambda(Blast_KarlinBlk **kbp_in, const BlastQueryInfo *query_info, Blast_KarlinBlk **kbp_out)
Returns the smallest lambda value from a collection of Karlin-Altchul blocks.
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subj_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
static Int2 s_BlastFindValidKarlinBlk(Blast_KarlinBlk **kbp_in, const BlastQueryInfo *query_info, Blast_KarlinBlk **kbp_ret)
Returns the first valid Karlin-Altchul block from the list of blocks.
BlastInitialWordParameters * BlastInitialWordParametersFree(BlastInitialWordParameters *parameters)
Deallocate memory for BlastInitialWordParameters.
BlastExtensionParameters * BlastExtensionParametersFree(BlastExtensionParameters *parameters)
Deallocate memory for BlastExtensionParameters.
void printBlastHitSavingParameters(BlastHitSavingParameters *hit_params, BlastQueryInfo *query_info)
static double s_GetEstimatedPhiExpect(int score, const BlastQueryInfo *query_info, const BlastScoreBlk *sbp, int effNumPatterns)
Returns the estimated expect value for the pattern match with a given scoring alignment.
Int2 BlastInitialWordParametersNew(EBlastProgramType program_number, const BlastInitialWordOptions *word_options, const BlastHitSavingParameters *hit_params, const LookupTableWrap *lookup_wrap, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Uint4 subject_length, BlastInitialWordParameters **parameters)
Allocate memory for BlastInitialWordParameters and set x_dropoff.
void printAllParameters(BlastHitSavingParameters *hit_params, BlastExtensionParameters *ext_params, BlastInitialWordParameters *word_params, BlastQueryInfo *query_info)
static double s_GetCutoffEvalue(EBlastProgramType program)
Compute the default cutoff expect value for ungapped extensions.
BlastLinkHSPParameters * BlastLinkHSPParametersFree(BlastLinkHSPParameters *parameters)
Deallocate memory for BlastLinkHSPParameters;.
static Boolean s_BlastKarlinBlkIsValid(const Blast_KarlinBlk *kbp)
Returns true if the Karlin-Altschul block doesn't have its lambda, K, and H fields set to negative va...
static Int4 s_PhiBlastCutoffScore(double ethresh, const BlastQueryInfo *query_info, const BlastScoreBlk *sbp)
Estimates a cutoff score for use in preliminary gapped stage of phiblast.
Int2 BlastLinkHSPParametersUpdate(const BlastInitialWordParameters *word_params, const BlastHitSavingParameters *hit_params, Boolean gapped_calculation)
Update BlastLinkHSPParameters, using calculated values of other parameters.
void printBlastInitialWordParamters(BlastInitialWordParameters *word_params, BlastQueryInfo *query_info)
void CalculateLinkHSPCutoffs(EBlastProgramType program, BlastQueryInfo *query_info, const BlastScoreBlk *sbp, BlastLinkHSPParameters *link_hsp_params, const BlastInitialWordParameters *word_params, Int8 db_length, Int4 subject_length)
Calculates cutoff scores and returns them.
Int2 BlastInitialWordParametersUpdate(EBlastProgramType program_number, const BlastHitSavingParameters *hit_params, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Uint4 subj_length, BlastInitialWordParameters *parameters)
Update cutoff scores in BlastInitialWordParameters structure.
void printBlastExtensionParameters(BlastExtensionParameters *ext_params)
BlastScoringParameters * BlastScoringParametersFree(BlastScoringParameters *parameters)
Deallocate memory for BlastScoringParameters.
Int2 BlastEffectiveLengthsParametersNew(const BlastEffectiveLengthsOptions *options, Int8 db_length, Int4 num_seqs, BlastEffectiveLengthsParameters **parameters)
Allocate memory for BlastEffectiveLengthsParameters.
Structure and function definitions for BLAST parameter structures, which are internal to the CORE of ...
#define RESTRICTED_ALIGNMENT_WORST_EVALUE
Because approximate gapped alignment adds extra overhead, it should be avoided if there is no perform...
#define CUTOFF_E_BLASTP
default evalue (ungapped blastp)
#define CUTOFF_E_BLASTN
Expect values corresponding to the default cutoff scores for all ungapped and gapped blastn alignment...
#define BLAST_GAP_SIZE
Default gap size.
@ eDiagHash
use hash table (blastn only)
@ eDiagArray
use diagonal structures with array of last hits and levels.
#define BLAST_GAP_PROB
Default parameters for linking HSPs.
#define BLAST_GAP_DECAY_RATE_GAPPED
Gap decay rate for gapped search.
#define CUTOFF_E_BLASTX
default evalue (ungapped blastx)
#define BLAST_GAP_PROB_GAPPED
Gap probability for gapped search.
#define BLAST_GAP_DECAY_RATE
Gap decay rate for ungapped search.
#define CUTOFF_E_TBLASTX
default evalue (tblastx)
#define BLAST_OVERLAP_SIZE
Default overlap size.
#define CUTOFF_E_TBLASTN
default evalue (ungapped tblastn)
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_ProgramIsNucleotide(EBlastProgramType p)
Boolean Blast_QueryIsPattern(EBlastProgramType p)
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
const double kEpsilon
Small constant to test against 0.
Int4 BLAST_SpougeEtoS(double E, Blast_KarlinBlk *kbp, Blast_GumbelBlk *gbp, Int4 qlen, Int4 slen)
Estimate the score for a specified expect value.
Int2 BLAST_Cutoffs(Int4 *S, double *E, Blast_KarlinBlk *kbp, Int8 searchsp, Boolean dodecay, double gap_decay_rate)
Calculate the cutoff score from the expected number of HSPs or vice versa.
static const char * expect
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
for(len=0;yy_str[len];++len)
Prototypes for portable math library (ported from C Toolkit)
#define NCBIMATH_LN2
Natural log(2)
long BLAST_Nint(double x)
Nearest integer.
#define MIN(a, b)
returns smaller of a and b.
#define INT4_MAX
largest nubmer represented by signed int
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int8 eff_searchsp
Effective search space for this context.
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
Options used for gapped extension These include: a.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
double gap_x_dropoff
X-dropoff value for gapped extension (in bits)
Computed values used as parameters for gapped alignments.
BlastExtensionOptions * options
The original (unparsed) options.
Int4 gap_x_dropoff_final
X-dropoff value for the final gapped extension (raw)
Int4 gap_x_dropoff
X-dropoff value for gapped extension (raw)
All the gapped cutoff values that can change from context to context.
Int4 cutoff_score
Raw cutoff score corresponding to the e-value provided by the user if no sum stats,...
Int4 cutoff_score_max
Raw cutoff score corresponding to the e-value provided by user, cutoff_score must be <= this.
Options used when evaluating and saving hits These include: a.
Int4 longest_intron
The longest distance between HSPs allowed for combining via sum statistics with uneven gaps.
double low_score_perc
Low-score option.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 cutoff_score
The (raw) score cut-off threshold.
Int4 mask_level
Only keep the highest scoring HSP when more than one HSP overlaps the same region of the query by mor...
Boolean do_sum_stats
Force sum statistics to be used to combine HSPs, TRUE by default for all ungapped searches and transl...
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastGappedCutoffs * cutoffs
per-context gapped cutoff information
Int4 mask_level
Only keep the highest scoring HSP when more than one HSP overlaps the same region of the query by mor...
Boolean restricted_align
TRUE if approximate score-only gapped alignment is used.
Int4 cutoff_score_min
smallest cutoff score across all contexts
Boolean do_sum_stats
TRUE if sum stats will be used.
Int4 * low_score
lowest ungapped score that can trigger a gapped alignment if the histlist is already full.
double prelim_evalue
evalue for preliminary search (may be higher for CBS).
BlastLinkHSPParameters * link_hsp_params
Parameters for linking HSPs with sum statistics; linking is not done if NULL.
BlastHitSavingOptions * options
The original (unparsed) options.
Options needed for initial word finding and processing.
double gap_trigger
Score in bits for starting gapped extension.
double x_dropoff
X-dropoff value (in bits) for the ungapped extension.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
BlastUngappedCutoffs * cutoffs
cutoff values (one per context)
Boolean ungapped_extension
Should an ungapped extension be performed?
BlastInitialWordOptions * options
The original (unparsed) options.
Boolean matrix_only_scoring
Use the scoring matrix ( not table ) to score ungapped and gapped alignments -RMH-.
Int4 cutoff_score_min
smallest cutoff score across all contexts
Int4 x_dropoff_max
largest X-drop cutoff across all contexts
ESeedContainerType container_type
How to store offset pairs for initial seeds?
Int4 nucl_score_table[256]
the combined score of all match/mismatch combinations for aligning four bases
Parameter block for linking HSPs with sum statistics.
double gap_decay_rate
Decay rate for linking HSPs and calculating cutoff scores.
double gap_prob
Probability of decay for linking HSPs.
Int4 cutoff_big_gap
Cutoff sum score for linked HSPs with big gaps.
Int4 cutoff_small_gap
Cutoff sum score for linked HSPs with small gaps.
Int4 overlap_size
Maximal overlap allowed in successive linked HSPs.
Int4 longest_intron
Length of a longest intron for uneven gap linking of HSPs.
Int4 gap_size
Small gap size for linking HSPs.
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Int4 last_context
Index of the last element of the context array.
Structure used for scoring calculations.
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Boolean matrix_only_scoring
Score ungapped/gapped alignment only using the matrix parameters and with raw scores.
double scale_factor
multiplier for all cutoff and dropoff scores
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
Int4 penalty
penalty for mismatch in blastn.
Blast_KarlinBlk ** kbp_std
K-A parameters for ungapped alignments.
Int4 reward
reward for match in blastn.
Blast_GumbelBlk * gbp
Gumbel parameters for FSC.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int2 penalty
Penalty for a mismatch.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Int2 reward
Reward for a match.
Boolean gapped_calculation
gap-free search if FALSE
char * matrix_path
Directory path to where matrices are stored.
Int4 shift_pen
Penalty for shifting a frame in out-of-frame gapping.
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
Boolean complexity_adjusted_scoring
Use cross_match-like complexity adjustment on raw scores.
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
double scale_factor
multiplier for all cutoff scores
Int4 gap_extend
Penalty for each gap residue (scaled version)
Int2 penalty
Penalty for a mismatch.
Int4 shift_pen
Penalty for shifting a frame in out-of-frame gapping (scaled version)
Int4 gap_open
Extra penalty for starting a gap (scaled version)
BlastScoringOptions * options
User-provided values for these params.
Int2 reward
Reward for a match.
All the ungapped cutoff values that can change from context to context.
Int4 reduced_nucl_cutoff_score
for blastn, a reduced cutoff score for use with approximate ungapped alignments
Int4 x_dropoff_init
Raw X-dropoff value specified by the bit score in BlastInitialWordOptions.
Int4 cutoff_score
Cutoff score for saving ungapped hits.
Int4 x_dropoff
Raw X-dropoff value used in the ungapped extension.
Boolean filled
flag indicate the values of gbp are prepared
Structure to hold the Karlin-Altschul parameters.
double paramC
for use in seed.
double K
K value used in statistics.
double Lambda
Lambda value used in statistics.
double H
H value used in statistics.
double logK
natural log of K value used in statistics
Wrapper structure for different types of BLAST lookup tables.
double probability
Estimated probability of the pattern.
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4