temp_sub_score = 0;
100 Int4temp_indel_score = 0;
109 Int1*editInstructions;
115 Int4editStep, nextEditStep;
117 Int4gapCost = gapOpen + gapExtend;
120band = highDiag-lowDiag+1;
127 for(index1 = 1; index1 <= end1; index1++)
132rightd = highDiag-lowDiag+1;
134score_array[leftd].
best= 0;
135 state[0][leftd] = -1;
136initialScore = -gapOpen;
137 for(diagIndex = leftd+1; diagIndex <= rightd; diagIndex++) {
138score_array[diagIndex].
best= initialScore = initialScore-gapExtend;
139score_array[diagIndex-1].
best_gap= initialScore-gapCost;
142score_array[rightd+1].
best= kMinScore;
143score_array[rightd].
best_gap= kMinScore;
144score_array[leftd-1].
best_gap= -gapCost;
145score_array[leftd-1].
best= kMinScore;
146 for(
i= 1;
i<= end1;
i++) {
147 if(
i> end2-highDiag)
151matrixRow = matrix[seq1[
i]];
152temp_indel_score = score_array[leftd].
best_gap;
154 if((index2 = leftd+lowDiag-1+
i) > 0)
155temp_sub_score = score_array[leftd].best+matrixRow[seq2[index2]];
156 if(temp_indel_score > temp_sub_score || index2 <= 0) {
157temp_sub_score = temp_indel_score;
160tempHorScore = temp_sub_score-gapCost;
162 if((temp_indel_score-= gapExtend) >= tempHorScore) {
163score_array[leftd-1].
best_gap= temp_indel_score;
166score_array[leftd-1].
best_gap= tempHorScore;
169stateRow = &
state[
i][leftd];
170*stateRow++ = nextState;
171score_array[leftd].
best= temp_sub_score;
172 for(curd=leftd+1, score_row = &score_array[curd]; curd <= rightd; curd++) {
173temp_sub_score = score_row->
best+ matrixRow[seq2[curd+lowDiag-1+
i]];
174 if((temp_indel_score=score_row->
best_gap) > temp_sub_score) {
175 if(temp_indel_score > tempHorScore) {
176score_row->
best= temp_indel_score;
177tempHorScore -= gapExtend;
178(score_row++-1)->best_gap = temp_indel_score-gapExtend;
181score_row->
best= tempHorScore;
182tempHorScore -= gapExtend;
183(score_row++-1)->best_gap = temp_indel_score-gapExtend;
186}
else if(tempHorScore > temp_sub_score) {
187score_row->
best= tempHorScore;
188tempHorScore -= gapExtend;
189(score_row++-1)->best_gap = temp_indel_score-gapExtend;
192score_row->
best= temp_sub_score;
193 if((temp_sub_score -= gapCost) >
194(tempHorScore-=gapExtend)) {
195tempHorScore = temp_sub_score;
200 if(temp_sub_score > (temp_indel_score -= gapExtend)) {
201*stateRow++= nextState;
202(score_row++-1)->best_gap = temp_sub_score;
205(score_row++-1)->best_gap = temp_indel_score;
212score = (score_row-1)->best;
214editInstructions = (
Int1*)
malloc(end1+end2);
215 for(index1 = end1, diagIndex = rightd, editStep=0, charCounter = 0;
216index1>=0; index1--, charCounter++) {
218 if(stateDecoder == -1)
230editInstructions[charCounter] = editStep = nextEditStep;
232 for(charIndex = charCounter-1; charIndex >= 0; charIndex--) {
233 switch(editInstructions[charIndex]) {
245 sfree(editInstructions);
263 return(length <= 0 ? 0 : (gap_open + gap_extend*length));
287lowDiag =
MIN(
MAX(-start1, lowDiag),
MIN(start2-start1,0));
288highDiag =
MAX(
MIN(start2, highDiag),
MAX(start2-start1,0));
293 return-
s_GapCost(gapOpen, gapExtend, start1);
297 return-
s_GapCost(gapOpen, gapExtend, start2);
300 if((highDiag-lowDiag+1) <= 1) {
302 for(
i= 1;
i<= start1;
i++) {
304score += matrix[seq1[
i]][seq2[
i]];
309score =
s_Align(seq1, seq2, start1, start2, lowDiag, highDiag, matrix,
310gapOpen, gapExtend, alignScript);
331 Int4prefixMatchedBitPattern = 0;
342maskShiftPlus1 = (
mask<< 1) +1;
343 for(
i= 0, prefixMatchedBitPattern= 0;
i<
len;
i++) {
344prefixMatchedBitPattern =
345((prefixMatchedBitPattern << 1) | maskShiftPlus1) &
350&rightOne, &rightMaskOnly);
352*start = rightMaskOnly + 1;
372 Int4*prefixMatchedBitPattern;
382prefixMatchedBitPattern = (
Int4*)
384 for(wordIndex = 0; wordIndex < num_words; wordIndex++) {
386prefixMatchedBitPattern[wordIndex] = 0;
389 for(
i= 0;
i<
len;
i++) {
393prefixMatchedBitPattern,
400 for(wordIndex = 0; (wordIndex < num_words) && (!found);
403 if((prefixMatchedBitPattern[wordIndex]>>j) % 2 == 1)
405 else if((multiword_items->
match_maskL[wordIndex] >> j)%2 == 1)
413 sfree(prefixMatchedBitPattern);
415*start = rightMaskOnly+1;
420 #define MAX_HITS_IN_WORD 64 434 Int4wordIndex, wordIndex2;
435 Int4twiceHitsOneWord;
450 for(wordIndex = 1; wordIndex < num_words; wordIndex++) {
454 for(j = 0; j <
i; j += wordIndex) {
455 Int4lastOffset = hitArray[j+wordIndex-1];
462 for(hitIndex = 0; hitIndex < twiceHitsOneWord;
463hitIndex+= 2, pos+= wordIndex+1) {
464 for(wordIndex2 = 0; wordIndex2 < wordIndex; wordIndex2++)
465hitArray1[pos+wordIndex2] = hitArray[j+wordIndex2];
466hitArray1[pos+wordIndex2] = hitArray1[pos+wordIndex2-1] +
467oneWordHitArray[hitIndex] + 1;
471 for(j = 0; j < pos; j++)
472hitArray[j] = hitArray1[j];
474 for(j = 0; j < pos; j+= num_words) {
475 if(hitArray[j+num_words-1] ==
len) {
476 for(
i= 0;
i< num_words;
i++)
477hitArray[
i] = hitArray[
i+j];
503 const intkBandLow = -5;
504 const intkBandHigh = 5;
506 Int4startQueryMatch, endQueryMatch;
508 Int4startDbMatch, endDbMatch;
511 Int4queryMatchOffset, dbMatchOffset;
514 Int4patternPosQuery, patternPosDb;
521 Int4** matrix = score_matrix->
data;
524gap_open = score_options->
gap_open;
528endQueryMatch = lenQuerySeq - 1;
530endDbMatch = lenDbSeq - 1;
535&endQueryMatch, pattern_blk);
537&endDbMatch, pattern_blk);
540&endQueryMatch, pattern_blk);
542&endDbMatch, pattern_blk);
544 Int4QueryWord, DbWord;
545 Int4QueryVarSize, DbVarSize;
556queryMatchOffset = dbMatchOffset = 0;
557QueryWord = DbWord = 0;
558QueryVarSize = DbVarSize = 0;
561placeIndex < extra_items->
highestPlace; placeIndex++) {
566 if(patternWord < 0) {
567QueryVarSize += hitArray1[QueryWord] -
568hitArray1[QueryWord-1] -
570DbVarSize += hitArray2[DbWord] -
571hitArray2[DbWord-1] -
579 if(QueryVarSize || DbVarSize) {
580 if(QueryVarSize == DbVarSize) {
586QueryVarSize, DbVarSize,
587kBandLow, kBandHigh, matrix,
588gap_open, gap_extend, alignScript);
590queryMatchOffset += QueryVarSize;
591querySeq += QueryVarSize;
592dbMatchOffset += DbVarSize;
601QueryVarSize = DbVarSize = 0;
604 if(queryMatchOffset + QueryVarSize >= hitArray1[QueryWord])
606 if(dbMatchOffset + DbVarSize >= hitArray2[DbWord])
616 for(patternPosQuery = startQueryMatch, patternPosDb = startDbMatch;
617patternPosQuery <= endQueryMatch || patternPosDb <= endDbMatch; ) {
628 for(queryMatchOffset =0;
631patternPosQuery <= endQueryMatch;
632patternPosQuery++, queryMatchOffset++) ;
633 for(dbMatchOffset = 0;
636patternPosDb <= endDbMatch;
637patternPosDb++, dbMatchOffset++) ;
638 if(queryMatchOffset == dbMatchOffset) {
644}
while(queryMatchOffset > 0);
647 s_BandedAlign(querySeq-1, dbSeq-1, queryMatchOffset, dbMatchOffset,
648kBandLow, kBandHigh, matrix, gap_open, gap_extend,
650querySeq+=queryMatchOffset;
651dbSeq+=dbMatchOffset;
675 Int4query_offset,
Int4subject_offset,
676 Int4query_length,
Int4subject_length)
678 Booleanfound_start, found_end;
679 Int4q_length=0, s_length=0, score_right, score_left;
680 Int4private_q_start, private_s_start;
683 if(gap_align ==
NULL)
686q_length = query_offset;
687s_length = subject_offset;
691found_start =
FALSE;
696 if(q_length != 0 && s_length != 0) {
700&private_q_start, &private_s_start,
TRUE,
NULL, gap_align,
703gap_align->
query_start= q_length - private_q_start + 1;
709q_length += query_length - 1;
710s_length += subject_length - 1;
713 if(q_length < query_blk->length && s_length < subject_blk->length) {
724 if(found_start ==
FALSE) {
729 if(found_end ==
FALSE) {
730gap_align->
query_stop= query_offset + query_length;
731gap_align->
subject_stop= subject_offset + subject_length;
734gap_align->
score= score_right+score_left;
764 if(!
query|| !
subject|| !gap_align || !score_params ||
765!hit_params || !init_hitlist || !hsp_list_ptr)
768 if(init_hitlist->
total== 0)
771hit_options = hit_params->
options;
774 if(*hsp_list_ptr ==
NULL)
777hsp_list = *hsp_list_ptr;
781 for(pattern_index = 0; pattern_index < num_patterns; ++pattern_index) {
787 for(index=0; index<init_hitlist->
total; index++) {
789 Int4s_pat_offset, s_pat_length;
801q_pat_offset, s_pat_offset, q_pat_length,
813q_pat_offset, s_pat_offset,
833*hsp_list_ptr = hsp_list;
841 Int4q_pat_length,
Int4s_pat_length,
845 Int4score_right, score_left, private_q_length, private_s_length;
851 if(!gap_align || !score_params || !pattern_blk)
863&private_q_length, &private_s_length,
FALSE, rev_prelim_tback,
865gap_align->
query_start= q_start - private_q_length;
869s_pat_length, pat_prelim_tback, score_params->
options,
870gap_align->
sbp->
matrix, pattern_blk);
880q_start += q_pat_length - 1;
881s_start += s_pat_length - 1;
883 if((q_start < query_length) && (s_start < subject_length)) {
887query_length-q_start-1, subject_length-s_start-1,
888&private_q_length, &private_s_length,
FALSE, fwd_prelim_tback,
891gap_align->
query_stop= q_start + private_q_length + 1;
892gap_align->
subject_stop= s_start + private_s_length + 1;
895 if(found_end ==
FALSE) {
904gap_align->
score= score_right + score_left;
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Int4 Blast_SemiGappedAlign(const Uint1 *A, const Uint1 *B, Int4 M, Int4 N, Int4 *a_offset, Int4 *b_offset, Boolean score_only, GapPrelimEditBlock *edit_block, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Boolean reversed, Boolean reverse_sequence, Boolean *fence_hit)
Low level function to perform gapped extension in one direction with or without traceback.
GapEditScript * Blast_PrelimEditBlockToGapEditScript(GapPrelimEditBlock *rev_prelim_tback, GapPrelimEditBlock *fwd_prelim_tback)
Convert the initial list of traceback actions from a non-OOF gapped alignment into a blast edit scrip...
Structures and functions prototypes used for BLAST gapped extension.
Private interface for blast_gapalign.c.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
ncbi::TMaskedQueryRegions mask
void GapPrelimEditBlockAdd(GapPrelimEditBlock *edit_block, EGapAlignOpType op_type, Int4 num_ops)
Add a new operation to a preliminary edit block, possibly combining it with the last operation if the...
GapPrelimEditBlock * GapPrelimEditBlockFree(GapPrelimEditBlock *edit_block)
Frees a preliminary edit block structure.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
void GapPrelimEditBlockAppend(GapPrelimEditBlock *edit_block1, GapPrelimEditBlock *edit_block2)
Append one GapPrelimEditBlock to the end of the other.
GapPrelimEditBlock * GapPrelimEditBlockNew(void)
Allocates a preliminary edit block structure.
void GapPrelimEditBlockReset(GapPrelimEditBlock *edit_block)
Reset a preliminary edit block without freeing it.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
#define MIN(a, b)
returns smaller of a and b.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
#define INT4_MIN
Smallest (most negative) number represented by signed int.
#define MAX(a, b)
returns larger of a and b.
Int4 _PHIBlastFindHitsShort(Int4 *hitArray, const Uint1 *seq, Int4 len1, const SPHIPatternSearchBlk *pattern_blk)
Routine to find hits of pattern to sequence when sequence is proteins.
void _PHIPatternWordsLeftShift(Int4 *a, Uint1 b, Int4 numWords)
Shift each word in the array left by 1 bit and add bit b.
Int4 _PHIPatternWordsBitwiseAnd(Int4 *result, Int4 *a, Int4 *b, Int4 numWords)
Do a word-by-word bit-wise and of two integer arrays and put the result in a new array.
void _PHIGetRightOneBits(Int4 s, Int4 mask, Int4 *rightOne, Int4 *rightMaskOnly)
Looks for 1 bits in the same position of s and mask Let R be the rightmost position where s and mask ...
void _PHIPatternWordsBitwiseOr(Int4 *a, Int4 *b, Int4 numWords)
Do a word-by-word bit-wise or of two integer arrays and put the result back in the first array.
@ eMultiWord
Does pattern consist of a multiple words?
@ eOneWord
Does pattern consist of a single word?
#define PHI_BITS_PACKED_PER_WORD
Number of bits packed in a word.
#define PHI_MAX_HIT
Maximal size of an array of pattern hits.
Auxiliary functions for finding pattern matches in sequence (PHI-BLAST), that are used in multiple so...
const int kMaskAaAlphabetBits
Masks all bits corresponding to the aminoacid alphabet, i.e.
#define MAX_HITS_IN_WORD
Maximal possible size of the hits array for one word of pattern.
static Int4 s_Align(Uint1 *seq1, Uint1 *seq2, Int4 end1, Int4 end2, Int4 lowDiag, Int4 highDiag, Int4 **matrix, Int4 gapOpen, Int4 gapExtend, GapPrelimEditBlock *alignScript)
Returns the cost of an optimum conversion within highDiag and lowDiag between two sequence segments a...
static Int4 s_BandedAlign(Uint1 *seq1, Uint1 *seq2, Int4 start1, Int4 start2, Int4 lowDiag, Int4 highDiag, Int4 **matrix, Int4 gapOpen, Int4 gapExtend, GapPrelimEditBlock *alignScript)
Do a banded gapped alignment of two sequences.
Int2 PHIGappedAlignmentWithTraceback(Uint1 *query, Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Int4 q_pat_length, Int4 s_pat_length, SPHIPatternSearchBlk *pattern_blk)
Perform a gapped alignment with traceback for PHI BLAST.
static Int4 s_PHIBlastAlignPatterns(Uint1 *querySeq, Uint1 *dbSeq, Int4 lenQuerySeq, Int4 lenDbSeq, GapPrelimEditBlock *alignScript, const BlastScoringOptions *score_options, SBlastScoreMatrix *score_matrix, SPHIPatternSearchBlk *pattern_blk)
Align pattern occurrences of the query and subject sequences.
static Int2 s_PHIGetShortPattern(Uint1 *seq, Int4 len, Int4 *start, Int4 *end, SPHIPatternSearchBlk *pattern_blk)
Finds the position of the first pattern match in an input sequence, if pattern consists of a single w...
static Int2 s_PHIGetExtraLongPattern(Uint1 *seq, Int4 len, Int4 *hitArray, SPHIPatternSearchBlk *pattern_blk)
Find pattern occurrences in seq when the pattern description is extra long, report the results back i...
static Int4 s_GapCost(Int4 gap_open, Int4 gap_extend, Int4 length)
k-symbol indel cost.
static void s_PHIGetLongPattern(Uint1 *seq, Int4 len, Int4 *start, Int4 *end, SPHIPatternSearchBlk *pattern_blk)
Finds the position of the first pattern match in an input sequence, if pattern consists of a more tha...
static Int2 s_PHIGappedAlignment(BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Int4 subject_offset, Int4 query_length, Int4 subject_length)
Performs gapped extension for PHI BLAST, given two sequence blocks, scoring and extension options,...
Int2 PHIGetGappedScore(EBlastProgramType program_number, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, BLAST_SequenceBlk *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, const BlastHitSavingParameters *hit_params, const BlastInitialWordParameters *word_params, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastGappedStats *gapped_stats, Boolean *fence_hit)
Preliminary gapped alignment for PHI BLAST.
Function prototypes used for PHI BLAST gapped extension and gapped extension with traceback.
Structure to hold a sequence.
Int4 length
Length of sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
GapPrelimEditBlock * fwd_prelim_tback
traceback from right extensions
GapPrelimEditBlock * rev_prelim_tback
traceback from left extensions
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
BlastScoreBlk * sbp
Pointer to the scoring information block.
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Auxiliary structure for dynamic programming gapped extension.
Int4 best_gap
score of best path that ends in a gap at this position
Int4 best
score of best path that ends in a match at this position
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Structure holding all information about an HSP.
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Int4 cutoff_score_min
smallest cutoff score across all contexts
BlastHitSavingOptions * options
The original (unparsed) options.
Structure to hold the initial HSP information.
BlastOffsetPair offsets
Offsets in query and subject, or, in PHI BLAST, start and end of pattern in subject.
Structure to hold all initial HSPs for a given subject sequence.
Int4 total
Total number of hits currently saved.
BlastInitHSP * init_hsp_array
Array of offset pairs, possibly with scores.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
The query related information.
BlastContextInfo * contexts
Information per context.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
SBlastScoreMatrix * matrix
scoring matrix data
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Boolean gapped_calculation
gap-free search if FALSE
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
BlastScoringOptions * options
User-provided values for these params.
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
Scoring matrix used in BLAST.
int ** data
actual scoring matrix data, stored in row-major form
Auxiliary items needed for a PHI BLAST search with pattern containing multiple words.
Int4 match_maskL[100]
Bit mask representation of input pattern for long patterns.
SExtraLongPatternItems * extra_long_items
Additional items necessary if pattern contains pieces longer than a word.
Int4 SLL[100][256]
For each letter in the alphabet and each word in the masked pattern representation,...
Int4 inputPatternMasked[(30 *11)]
Masked input pattern.
Int4 bitPatternByLetter[256][11]
Which positions can a character occur in for long patterns.
Int4 numWords
Number of words need to hold bit representation of pattern.
In PHI BLAST: information about pattern match in a given HSP.
Int4 index
Index of query pattern occurrence for this HSP.
Int4 length
Length of this pattern occurrence in subject.
Information about a single pattern occurence in the query.
Int4 length
Length of this pattern occurrence.
Int4 offset
Starting offset of this pattern occurrence.
Structure containing all auxiliary information needed in a pattern search.
SShortPatternItems * one_word_items
Items necessary when pattern fits in one word.
EPatternType flagPatternLength
Indicates if the whole pattern fits in 1 word, each of several parts of the pattern fit in a word,...
SLongPatternItems * multi_word_items
Additional items, when pattern requires multiple words.
Int4 num_patterns
Number of pattern occurrences in query.
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.
Auxiliary items needed for a PHI BLAST search with a pattern that fits in a single word.
Int4 * whichPositionPtr
Array of positions where pattern lettern should match, for a single word of the pattern.
Int4 match_mask
Bit mask representation of input pattern for patterns that fit in a word.
Uint4 s_start
Start offset of pattern in subject.
Uint4 s_end
End offset of pattern in subject.
struct BlastOffsetPair::@7 phi_offsets
Pattern offsets in subject (PHI BLAST only)
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4