A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/phi__lookup_8c_source.html below:

NCBI C++ ToolKit: src/algo/blast/core/phi_lookup.c Source File

65

maskLeftPlusOne = (

mask

<< 1)+1;

77

prefixPos[

i

] = mask2 & ((

tmp

>>1) |

mask

) &

S

[a1];

80  tmp

= ((

S

[a1]<<1) | maskLeftPlusOne) &

S

[a2];

82  tmp

= ((

tmp

<<1) | maskLeftPlusOne) &

S

[a3];

84

suffixPos[

i

] = ((((

tmp

<<1) | maskLeftPlusOne) &

S

[a4]) << 1) | maskLeftPlusOne;

101  for

(wordIndex = 0; wordIndex < multiword_items->

numWords

; wordIndex++) {

103

compositeMask = mask1 + (mask1>>1)+(mask1>>2)+(mask1>>3);

116

match_mask + (match_mask>>1) + (match_mask>>2) + (match_mask>>3);

147  Int4

recReturnValue1, recReturnValue2;

149  Int4

thisPlaceMasked;

155  for

(

i

= 0;

i

< length;

i

++) {

156

thisPlaceMasked = -inputPatternMasked[

i

];

157  if

(thisPlaceMasked > 0) {

159  for

(j = 0; j < length; j++) {

161

tempPatternMask[j] = inputPatternMasked[j];

162

tempPattern[j] = inputPattern[j];

164

recReturnValue2 = recReturnValue1 =

166  if

(recReturnValue1 == -1)

168  for

(numPos = 0; numPos <= thisPlaceMasked; numPos++) {

171  for

(k = 0; k < length; k++) {

173  for

(

t

= 0;

t

< numPos;

t

++) {

175  if

(recReturnValue1 >= maxLength)

180

inputPatternMasked[recReturnValue1] = tempPatternMask[k];

181

inputPattern[recReturnValue1++] = tempPattern[k];

182  if

(recReturnValue1 >= maxLength)

185  if

(recReturnValue1 >= maxLength)

190

&inputPattern[recReturnValue2],

192

maxLength - recReturnValue2);

193  if

(recReturnValue1 == -1)

195

recReturnValue2 += recReturnValue1;

196

recReturnValue1 = recReturnValue2;

198  return

recReturnValue1;

214  Int4

returnValue = 0;

215  for

(

i

= 0;

i

< length;

i

++) {

216  if

(inputPattern[

i

])

217

returnValue += (1 <<

i

);

242  for

(wordIndex = 0; wordIndex < multiword_items->

numWords

; wordIndex++) {

246

bitPattern += (1 <<

i

);

248

multiword_items->

match_maskL

[wordIndex] = bitPattern;

250  for

(charIndex = 0; charIndex <

BLASTAA_SIZE

; charIndex++) {

251  for

(wordIndex = 0; wordIndex < multiword_items->

numWords

; wordIndex++) {

256

bitPattern = bitPattern | (1 <<

i

);

291  Int4

placeInWord, placeInWord2;

295  double

patternWordProbability;

296  double

most_specific;

307

patternWordProbability = 1.0;

308  for

(placeIndex = 0, wordIndex = 0, placeInWord=0;

309

placeIndex <= numPlacesInPattern; placeIndex++, placeInWord++) {

310  if

(placeIndex==numPlacesInPattern || inputPatternMasked[placeIndex] < 0

312

multiword_items->

match_maskL

[wordIndex] = 1 << (placeInWord-1);

313

oneWordSLL = multiword_items->

SLL

[wordIndex];

314  for

(charIndex = 0; charIndex <

BLASTAA_SIZE

; charIndex++) {

316  for

(placeInWord2 = 0; placeInWord2 < placeInWord; placeInWord2++) {

317  if

((1<< charIndex) &

318

inputPatternMasked[placeIndex-placeInWord+placeInWord2])

319

oneWordMask |= (1 << placeInWord2);

321

oneWordSLL[charIndex] = oneWordMask;

324  if

(patternWordProbability < most_specific) {

325

most_specific = patternWordProbability;

328  if

(placeIndex == numPlacesInPattern)

329

extra_items->

spacing

[wordIndex++] = 0;

330  else if

(inputPatternMasked[placeIndex] < 0) {

331

extra_items->

spacing

[wordIndex++] = -inputPatternMasked[placeIndex];

334

extra_items->

spacing

[wordIndex++] = 0;

337

patternWordProbability = 1.0;

339

patternWordProbability *= (double)

343

multiword_items->

numWords

= wordIndex;

374  ASSERT

(pattern_in && pattern_out && length > 0);

376  for

(index=0; index<length; index++)

378  if

(pattern_in[index] >=

'a'

&& pattern_in[index] <=

'z'

)

379

pattern_out[index] =

toupper

(pattern_in[index]);

381

pattern_out[index] = pattern_in[index];

392  const int

kWildcardThreshold = 30;

401  Int4

currentSetMask, prevSetMask;

405  Int4

minWildcard, maxWildcard;

413  double

positionProbability;

415  Int4

currentWildcardProduct;

417  Int4

wildcardProduct;

419  Int4

* whichPositionsByCharacter=

NULL

;

425  char

* pattern =

NULL

;

426  int

pattern_length = 0;

436

currentWildcardProduct = 1;

439

pattern_length = (

int

)strlen(pattern_in);

444

snprintf(message,

sizeof

(message),

"Pattern is too long (%ld but only %ld supported)"

,

452

pattern =

calloc

(pattern_length+1,

sizeof

(

char

));

454

pattern_blk->

pattern

= pattern;

459  for

(charIndex = 0, posIndex = 0; charIndex < pattern_length; charIndex++)

461

next_char = pattern[charIndex];

462  if

(next_char ==

'\0'

|| next_char ==

'\r'

|| next_char ==

'\n'

)

464  if

(next_char ==

'-'

|| next_char ==

'.'

||

465

next_char ==

'>'

|| next_char ==

' '

|| next_char ==

'<'

)

467  if

( next_char !=

'['

&& next_char !=

'{'

) {

468  if

(next_char ==

'x'

|| next_char==

'X'

) {

471  if

(pattern[charIndex+1] ==

'('

) {

473

secondIndex = charIndex;

476  while

(pattern[secondIndex] !=

','

&&

477

pattern[secondIndex] !=

')'

)

479  if

(pattern[secondIndex] ==

')'

) {

484

positionProbability = 1;

487

sscanf(&pattern[++charIndex],

"%d,%d"

,

488

&minWildcard, &maxWildcard);

489

maxWildcard = maxWildcard - minWildcard;

490

currentWildcardProduct *= (maxWildcard + 1);

491  if

(currentWildcardProduct > wildcardProduct)

492

wildcardProduct = currentWildcardProduct;

494  while

(minWildcard-- > 0) {

505  if

(maxWildcard != 0) {

517  while

(pattern[++charIndex] !=

')'

) ;

523

positionProbability =1;

527  if

(next_char ==

'U'

) {

529

positionProbability = 1;

533

prevSetMask = currentSetMask;

535

charSetMask = (1 << kOrder[(

Uint1

)next_char]);

536  if

(!(prevSetMask & currentSetMask))

538

currentWildcardProduct = 1;

539

positionProbability =

544  if

(next_char ==

'['

) {

546

positionProbability = 0;

549  while

((next_char=pattern[++charIndex]) !=

']'

) {

550  if

((next_char <

'A'

) || (next_char >

'Z'

) || (next_char ==

'\0'

)) {

552  "pattern description has a non-alphabetic" 553  "character inside a bracket"

);

558

charSetMask | (1 << kOrder[(

Uint1

)next_char]);

559

positionProbability +=

562

prevSetMask = currentSetMask;

563

currentSetMask = charSetMask;

564  if

(!(prevSetMask & currentSetMask))

566

currentWildcardProduct = 1;

571

positionProbability = 1;

572  while

((next_char=pattern[++charIndex]) !=

'}'

) {

573

charSetMask = charSetMask -

574

(charSetMask & (1 << kOrder[(

Uint1

)next_char]));

575

positionProbability -=

578

prevSetMask = currentSetMask;

579

currentSetMask = charSetMask;

580  if

(!(prevSetMask & currentSetMask))

582

currentWildcardProduct = 1;

586  if

(pattern[charIndex+1] ==

'('

) {

588

numIdentical = atoi(&pattern[++charIndex]);

590  while

(pattern[++charIndex] !=

')'

) ;

591  while

((numIdentical--) > 0) {

604  "Pattern is too long"

);

617  for

(charIndex = 0; charIndex < posIndex; charIndex++) {

623  for

(secondIndex = charIndex + 1; secondIndex < posIndex;

628  for

(; secondIndex < posIndex; secondIndex++, charIndex++) {

632

posIndex = charIndex;

635

localPattern[posIndex-1] = 1;

639  for

(charIndex = 0; charIndex < posIndex; charIndex++) {

640

tempInputPatternMasked[charIndex] =

642

tempPosIndex = posIndex;

649  for

(charIndex = 0; charIndex < tempPosIndex; charIndex++)

651

tempInputPatternMasked[charIndex];

669  for

(charIndex = 0; charIndex <

BLASTAA_SIZE

; charIndex++) {

671  for

(charSetMask = 0; charSetMask < (

Uint4

)posIndex; charSetMask++) {

673

thisMask |= (1 << charSetMask);

675

whichPositionsByCharacter[charIndex] = thisMask;

681  if

(wildcardProduct > kWildcardThreshold) {

683  "Due to variable wildcards pattern is likely to " 684  "occur too many times in a single sequence\n"

);

746

*offset_ptr = subject_blk->

length

;

749

kIsDna, pattern_blk);

752  for

(index = 0; index < twiceNumHits; index += 2) {

753

offset_pairs[

count

].phi_offsets.s_start = hitArray[index+1];

754

offset_pairs[

count

].phi_offsets.s_end = hitArray[index];

#define sfree(x)

Safe free a pointer: belongs to a higher level header.

Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...

Int2 Blast_MessageWrite(Blast_Message **blast_msg, EBlastSeverity severity, int context, const char *message)

Writes a message to a structure.

const int kBlastMessageNoContext

Declared in blast_message.h as extern const.

@ ePhiNaLookupTable

nucleotide lookup table for phi-blast

@ ePhiLookupTable

protein lookup table specialized for phi-blast

Blast_ResFreq * Blast_ResFreqFree(Blast_ResFreq *rfp)

Deallocates Blast_ResFreq and prob0 element.

Int2 Blast_ResFreqStdComp(const BlastScoreBlk *sbp, Blast_ResFreq *rfp)

Calculates residues frequencies given a standard distribution.

Blast_ResFreq * Blast_ResFreqNew(const BlastScoreBlk *sbp)

Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.

Various auxiliary BLAST utility functions.

#define NCBI2NA_UNPACK_BASE(x, N)

Macro to extract base N from a byte x (N >= 0, N < 4)

ncbi::TMaskedQueryRegions mask

const Uint1 IUPACNA_TO_NCBI4NA[]

Translates between iupacna and ncbi4na.

#define BLASTAA_SIZE

Size of aminoacid alphabet.

const Uint1 AMINOACID_TO_NCBISTDAA[]

Translates between ncbieaa and ncbistdaa.

uint8_t Uint1

1-byte (8-bit) unsigned integer

int16_t Int2

2-byte (16-bit) signed integer

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

unsigned int

A callback function used to compare two keys in a database.

if(yy_accept[yy_current_state])

Uint1 Boolean

bool replacment for C

#define ASSERT

macro for assert.

@ eVeryLong

Is pattern too long for a simple multi-word processing?

@ eMultiWord

Does pattern consist of a multiple words?

@ eOneWord

Does pattern consist of a single word?

#define PHI_ASCII_SIZE

Size of ASCII alphabet.

Int4 FindPatternHits(Int4 *hitArray, const Uint1 *seq, Int4 len, Boolean is_dna, const SPHIPatternSearchBlk *patternSearch)

Find the places where the pattern matches seq; 3 different methods are used depending on the length o...

#define PHI_BITS_PACKED_PER_WORD

Number of bits packed in a word.

#define PHI_MAX_HIT

Maximal size of an array of pattern hits.

#define PHI_MAX_PATTERN_LENGTH

Threshold pattern length.

Auxiliary functions for finding pattern matches in sequence (PHI-BLAST), that are used in multiple so...

static void s_PackLongPattern(Int4 numPlaces, Uint1 *inputPattern, SPHIPatternSearchBlk *pattern_blk)

Pack the bit representation of the inputPattern into the array pattern_blk->match_maskL.

static Int4 s_NumOfOne(Int4 a)

Return the number of 1 bits in the base 2 representation of a number a.

static void s_FindPrefixAndSuffixPos(Int4 *S, Int4 mask, Int4 mask2, Uint4 *prefixPos, Uint4 *suffixPos)

Set up matches for words that encode 4 DNA characters; figure out for each of 256 possible DNA 4-mers...

static SPHIPatternSearchBlk * s_PatternSearchItemsInit()

Allocates the SPHIPatternSearchBlk structure.

static void s_InitDNAPattern(SPHIPatternSearchBlk *pattern_blk)

Initialize mask and other arrays for DNA patterns.

static Int4 s_PackPattern(Uint1 *inputPattern, Int4 length)

Pack the next length bytes of inputPattern into a bit vector where the bit is 1 if and only if the by...

static Int4 s_ExpandPattern(Int4 *inputPatternMasked, Uint1 *inputPattern, Int4 length, Int4 maxLength)

Determine the length of the pattern after it has been expanded for efficient searching.

Int2 SPHIPatternSearchBlkNew(char *pattern_in, Boolean is_dna, BlastScoreBlk *sbp, SPHIPatternSearchBlk **pattern_blk_out, Blast_Message **error_msg)

Initialize the pattern items structure, serving as a "pseudo" lookup table in a PHI BLAST search.

SPHIPatternSearchBlk * SPHIPatternSearchBlkFree(SPHIPatternSearchBlk *lut)

Deallocate memory for the PHI BLAST lookup table.

static void s_PackVeryLongPattern(Int4 *inputPatternMasked, Int4 numPlacesInPattern, SPHIPatternSearchBlk *pattern_blk)

Sets up fields in SPHIPatternSearchBlk structure when pattern is very long.

const int kMaskAaAlphabetBits

Masks all bits corresponding to the aminoacid alphabet, i.e.

Int4 PHIBlastScanSubject(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *query_blk, const BLAST_SequenceBlk *subject_blk, Int4 *offset_ptr, BlastOffsetPair *offset_pairs, Int4 array_size)

Implementation of the ScanSubject function for PHI BLAST.

static void s_MakePatternUpperCase(char *pattern_in, char *pattern_out, int length)

Convert the string representation of a PHIblast pattern to uppercase.

Pseudo lookup table structure and database scanning functions used in PHI-BLAST.

Structure to hold a sequence.

Int4 length

Length of sequence.

Uint1 * sequence

Sequence used for search (could be translation).

Structure used for scoring calculations.

Structure to hold the a message from the core of the BLAST engine.

Stores the letter frequency of a sequence or database.

double * prob

letter probs, (possible) non-zero offset.

Wrapper structure for different types of BLAST lookup tables.

void * lut

Pointer to the actual lookup table structure.

ELookupTableType lut_type

What kind of a lookup table it is?

Auxiliary items needed for a DNA pattern search with pattern containing multiple words.

Uint4 DNAprefixSLL[100][256]

Where prefix of DNA 4-mer matches pattern, for multiple-word patterns.

Uint4 DNAsuffixSLL[100][256]

Where suffix of DNA 4-mer matches pattern, for multiple-word patterns.

Structure containing auxiliary items needed for a DNA search with a pattern that fits in a single wor...

Uint4 * DNAwhichPrefixPosPtr

Prefix position array for DNA patterns.

Uint4 DNAwhichSuffixPositions[256]

Where suffix of DNA 4-mer matches pattern.

Uint4 * DNAwhichSuffixPosPtr

Suffix position array for DNA patterns.

Uint4 DNAwhichPrefixPositions[256]

Where prefix of DNA 4-mer matches pattern.

Auxiliary items needed for a PHI BLAST search with pattern containing multiple words.

Int4 match_maskL[100]

Bit mask representation of input pattern for long patterns.

SExtraLongPatternItems * extra_long_items

Additional items necessary if pattern contains pieces longer than a word.

SDNALongPatternItems * dna_items

Additional items necessary for a DNA pattern.

Int4 SLL[100][256]

For each letter in the alphabet and each word in the masked pattern representation,...

Int4 inputPatternMasked[(30 *11)]

Masked input pattern.

Int4 bitPatternByLetter[256][11]

Which positions can a character occur in for long patterns.

Int4 numWords

Number of words need to hold bit representation of pattern.

Structure containing all auxiliary information needed in a pattern search.

SShortPatternItems * one_word_items

Items necessary when pattern fits in one word.

EPatternType flagPatternLength

Indicates if the whole pattern fits in 1 word, each of several parts of the pattern fit in a word,...

double patternProbability

Probability of this letter combination.

Int4 minPatternMatchLength

Minimum length of string to match this pattern.

char * pattern

Pattern used, saved here for error reporting.

SLongPatternItems * multi_word_items

Additional items, when pattern requires multiple words.

Auxiliary items needed for a PHI BLAST search with a pattern that fits in a single word.

Int4 * whichPositionPtr

Array of positions where pattern lettern should match, for a single word of the pattern.

SDNAShortPatternItems * dna_items

Additional items for a DNA search.

Int4 match_mask

Bit mask representation of input pattern for patterns that fit in a word.

This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...

voidp calloc(uInt items, uInt size)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4