kCheckingMatches = s &
mask;
70 if((kCheckingMatches >> right_index) % 2 == 1)
72 if((
mask>> right_index) %2 == 1)
73left_index = right_index;
80*rightOne = right_index;
81*rightMaskOnly = left_index;
100 return(rightOne - rightMaskOnly);
108 Int4prefixMatchedBitPattern = 0;
119maskShiftPlus1 = (
mask<< 1) + 1;
120 for(
i= 0;
i< len1;
i++) {
124prefixMatchedBitPattern =
125((prefixMatchedBitPattern << 1) | maskShiftPlus1) &
127 if(prefixMatchedBitPattern &
mask) {
130hitArray[numMatches++] =
i;
131hitArray[numMatches++] =
i-
s_LenOf(prefixMatchedBitPattern,
mask)+1;
157 Uint4prefixMatchedBitPattern;
164 Int4twiceNumHits = 0;
169 const Int4kMaskShiftPlus1 = (kMatchMask << 1)+1;
173prefixMatchedBitPattern =
174((kMatchMask * ((1 << (pos+1))-1)*2) + (1 << (pos+1))-1) &
178remain = (
len-pos) % 4;
181prefixMatchedBitPattern = kMaskShiftPlus1;
185 for(
i= 0;
i< end;
i++) {
186 if( (
tmp= (prefixMatchedBitPattern &
188 for(j = 0; j < 4; j++) {
189 if(
tmp& kMatchMask) {
190hitArray[twiceNumHits++] =
i*4 + j + pos;
191hitArray[twiceNumHits++] =
i*4 + j + pos -
197prefixMatchedBitPattern =
198(((prefixMatchedBitPattern << 4) | kMask2) &
202 if( (
tmp= (prefixMatchedBitPattern &
204 for(j = 0; j < remain; j++) {
205 if(
tmp& kMatchMask) {
206hitArray[twiceNumHits++] =
i*4+j + pos;
207hitArray[twiceNumHits++] =
i*4+j + pos -
243 for(
i= 0;
i< numWords;
i++) {
244x = (
a[
i] << 1) +
b;
245 if(x >= kOverflowThreshold) {
246 a[
i] = x - kOverflowThreshold;
260 for(
i= 0;
i< numWords;
i++)
261 a[
i] = (
a[
i] |
b[
i]);
268 Int4returnValue = 0;
270 for(
i= 0;
i< numWords;
i++) {
293 for(wordIndex = 0; wordIndex < numWords; wordIndex++) {
295 if((s[wordIndex] >> bitIndex) % 2 == 1)
297 if((
mask[wordIndex] >> bitIndex) %2 == 1)
320 Int4*prefixMatchedBitPattern;
322 Int4twiceNumHits = 0;
332prefixMatchedBitPattern = (
Int4*)
calloc(num_words,
sizeof(
Int4));
333 for(wordIndex = 0; wordIndex < num_words; wordIndex++) {
335prefixMatchedBitPattern[wordIndex] = 0;
339 for(
i= 0;
i< len1;
i++) {
347hitArray[twiceNumHits++] =
i;
348hitArray[twiceNumHits++] =
i-
352 sfree(prefixMatchedBitPattern);
372 Int4twiceHitsOneCall;
377 Int4nextPosInHitArray;
378 Int4hitIndex1, hitIndex2;
395 if(twiceNumHits < 2)
398 for(wordIndex = most_specific_word+1;
399wordIndex < multiword_items->
numWords; wordIndex++) {
408nextPosInHitArray = 0;
409 for(hitIndex2 = 0; hitIndex2 < twiceNumHits; hitIndex2 += 2) {
412hitArray[hitIndex2]+1,
413 MIN(
len-hitArray[hitIndex2]-1,
414extra_items->
spacing[wordIndex-1] +
416is_dna, pattern_blk);
417 for(hitIndex1 = 0; hitIndex1 < twiceHitsOneCall; hitIndex1+= 2) {
418hitArray1[nextPosInHitArray+hitIndex1] =
419hitArray[hitIndex2]+hitArray1[nextPosInHitArray+hitIndex1]+1;
420hitArray1[nextPosInHitArray+hitIndex1+1] = hitArray[hitIndex2+1];
422nextPosInHitArray += twiceHitsOneCall;
424twiceNumHits = nextPosInHitArray;
425 if(twiceNumHits < 2)
428 for(hitIndex2 = 0; hitIndex2 < nextPosInHitArray; hitIndex2++)
429hitArray[hitIndex2] = hitArray1[hitIndex2];
432 for(wordIndex = most_specific_word-1; wordIndex >=0;
442nextPosInHitArray = 0;
443 for(hitIndex2 = 0; hitIndex2 < twiceNumHits; hitIndex2 += 2) {
444start = hitArray[hitIndex2+1] - extra_items->
spacing[wordIndex] -
450hitArray[hitIndex2+1]-start, is_dna, pattern_blk);
451 for(hitIndex1 = 0; hitIndex1 < twiceHitsOneCall; hitIndex1+= 2) {
452hitArray1[nextPosInHitArray+hitIndex1] = hitArray[hitIndex2];
453hitArray1[nextPosInHitArray+hitIndex1+1] = start +
454hitArray1[nextPosInHitArray+hitIndex1+1];
456nextPosInHitArray += twiceHitsOneCall;
458twiceNumHits = nextPosInHitArray;
459 if(twiceNumHits < 2)
462 for(hitIndex2 = 0; hitIndex2 < nextPosInHitArray; hitIndex2++)
463hitArray[hitIndex2] = hitArray1[hitIndex2];
481 const Int4kMinPhiLookupSize = 8;
569 Int4 i, twiceNumHits;
576loc_length = to - from + 1;
577sequence =
query->sequence + from;
579twiceNumHits =
FindPatternHits(hitArray, sequence, loc_length, is_dna,
582 for(
i= 0;
i< twiceNumHits;
i+= 2) {
583 if(hitArray[
i+1]+from == 0 &&
590hitArray[
i]-hitArray[
i+1]+1);
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Obtains the sequence length for a given query in the query, without taking into consideration any app...
ncbi::TMaskedQueryRegions mask
static const char location[]
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define MIN(a, b)
returns smaller of a and b.
#define INT4_MAX
largest nubmer represented by signed int
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Uint1 Boolean
bool replacment for C
#define ASSERT
macro for assert.
static Int4 s_FindHitsShortDNA(Int4 *hitArray, const Uint1 *seq, Int4 pos, Int4 len, const SPHIPatternSearchBlk *pattern_blk)
Find hits when sequence is DNA and pattern is short returns twice the number of hits.
static Int4 s_LenOfL(Int4 *s, Int4 *mask, Int4 numWords)
Returns the difference between the offset F of a first 1-bit in a word sequence and the first offset ...
static Int4 s_FindHitsVeryLong(Int4 *hitArray, const Uint1 *seq, Int4 len, Boolean is_dna, const SPHIPatternSearchBlk *pattern_blk)
Find matches when pattern is very long,.
static Int4 s_LenOf(Int4 s, Int4 mask)
Looks for 1 bits in the same position of s and mask Let R be the rightmost position where s and mask ...
Int4 PHIGetPatternOccurrences(const SPHIPatternSearchBlk *pattern_blk, const BLAST_SequenceBlk *query, const BlastSeqLoc *location, Boolean is_dna, BlastQueryInfo *query_info)
Finds all pattern hits in a given query and saves them in the previously allocated SPHIQueryInfo stru...
Int4 _PHIBlastFindHitsShort(Int4 *hitArray, const Uint1 *seq, Int4 len1, const SPHIPatternSearchBlk *pattern_blk)
Routine to find hits of pattern to sequence when sequence is proteins.
SPHIQueryInfo * SPHIQueryInfoFree(SPHIQueryInfo *pat_info)
Frees the pattern information structure.
Int4 FindPatternHits(Int4 *hitArray, const Uint1 *seq, Int4 len, Boolean is_dna, const SPHIPatternSearchBlk *pattern_blk)
Find the places where the pattern matches seq; 3 different methods are used depending on the length o...
void _PHIPatternWordsLeftShift(Int4 *a, Uint1 b, Int4 numWords)
Shift each word in the array left by 1 bit and add bit b.
static Int2 s_PHIBlastAddPatternHit(SPHIQueryInfo *pattern_info, Int4 offset, Int4 length)
Adds a new pattern hit to the PHI BLAST pseudo lookup table.
Int4 _PHIPatternWordsBitwiseAnd(Int4 *result, Int4 *a, Int4 *b, Int4 numWords)
Do a word-by-word bit-wise and of two integer arrays and put the result in a new array.
static Int4 s_FindHitsShortHead(Int4 *hitArray, const Uint1 *seq, Int4 start, Int4 len, Uint1 is_dna, const SPHIPatternSearchBlk *pattern_blk)
Top level routine to find hits when pattern has a short description.
void _PHIGetRightOneBits(Int4 s, Int4 mask, Int4 *rightOne, Int4 *rightMaskOnly)
Looks for 1 bits in the same position of s and mask Let R be the rightmost position where s and mask ...
static Int4 s_FindHitsLong(Int4 *hitArray, const Uint1 *seq, Int4 len1, const SPHIPatternSearchBlk *pattern_blk)
Finds places where pattern matches seq and returns them as pairs of positions in consecutive entries ...
void _PHIPatternWordsBitwiseOr(Int4 *a, Int4 *b, Int4 numWords)
Do a word-by-word bit-wise or of two integer arrays and put the result back in the first array.
SPHIQueryInfo * SPHIQueryInfoCopy(const SPHIQueryInfo *pat_info)
Copies the SPHIQueryInfo structure.
SPHIQueryInfo * SPHIQueryInfoNew()
Allocates the pattern occurrences structure.
Functions for finding pattern matches in sequence (PHI-BLAST).
@ eMultiWord
Does pattern consist of a multiple words?
@ eOneWord
Does pattern consist of a single word?
#define PHI_BITS_PACKED_PER_WORD
Number of bits packed in a word.
#define PHI_MAX_HIT
Maximal size of an array of pattern hits.
Auxiliary functions for finding pattern matches in sequence (PHI-BLAST), that are used in multiple so...
static int pattern_info(int what, void *where, BOOL unsetok)
Structure to hold a sequence.
The query related information.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Used to hold a set of positions, mostly used for filtering.
SSeqRange * ssr
location data on the sequence.
struct BlastSeqLoc * next
next in linked list
Uint4 DNAprefixSLL[100][256]
Where prefix of DNA 4-mer matches pattern, for multiple-word patterns.
Uint4 DNAsuffixSLL[100][256]
Where suffix of DNA 4-mer matches pattern, for multiple-word patterns.
Uint4 * DNAwhichPrefixPosPtr
Prefix position array for DNA patterns.
Uint4 * DNAwhichSuffixPosPtr
Suffix position array for DNA patterns.
Auxiliary items needed for a PHI BLAST search with pattern containing multiple words.
Int4 match_maskL[100]
Bit mask representation of input pattern for long patterns.
SExtraLongPatternItems * extra_long_items
Additional items necessary if pattern contains pieces longer than a word.
SDNALongPatternItems * dna_items
Additional items necessary for a DNA pattern.
Int4 SLL[100][256]
For each letter in the alphabet and each word in the masked pattern representation,...
Int4 bitPatternByLetter[256][11]
Which positions can a character occur in for long patterns.
Int4 numWords
Number of words need to hold bit representation of pattern.
Information about a single pattern occurence in the query.
Structure containing all auxiliary information needed in a pattern search.
SShortPatternItems * one_word_items
Items necessary when pattern fits in one word.
EPatternType flagPatternLength
Indicates if the whole pattern fits in 1 word, each of several parts of the pattern fit in a word,...
SLongPatternItems * multi_word_items
Additional items, when pattern requires multiple words.
In PHI BLAST, structure containing information about all pattern occurrences in query.
char * pattern
Pattern used, saved here for formatting purposes.
Int4 num_patterns
Number of pattern occurrences in query.
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
Auxiliary items needed for a PHI BLAST search with a pattern that fits in a single word.
Int4 * whichPositionPtr
Array of positions where pattern lettern should match, for a single word of the pattern.
SDNAShortPatternItems * dna_items
Additional items for a DNA search.
Int4 match_mask
Bit mask representation of input pattern for patterns that fit in a word.
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4