A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/blast__filter_8c_source.html below:

NCBI C++ ToolKit: src/algo/blast/core/blast_filter.c Source File

43 #define BLASTOPTIONS_BUFFER_SIZE 128 72  if

(not_started && *ptr ==

' '

)

86  if

(not_started ==

FALSE

)

89  while

(*buffer_ptr ==

' '

&& buffer_ptr >

buffer

)

114

ptr = strstr(repeat_options,

"-d"

);

117  while

(*ptr ==

' '

|| *ptr ==

'\t'

)

139  if

(!winmask_options)

142

ptr = strstr(winmask_options,

"-d"

);

148  while

(*ptr ==

' '

|| *ptr ==

'\t'

)

153  for

(endp = *

dbname

; *endp; ++endp) {

154  if

(*endp ==

' '

|| *endp ==

'\t'

) {

160

ptr = strstr(winmask_options,

"-t"

);

164  while

(*ptr ==

' '

|| *ptr ==

'\t'

)

184  int

arg, index, index1, window_pri=-1, linker_pri=-1, level_pri=-1;

190  if

(*ptr ==

' '

|| *ptr ==

NULLB

)

197

sscanf(

buffer

,

"%ld"

, &tmplong);

198

level_pri = (

int

)tmplong;

201

sscanf(

buffer

,

"%ld"

, &tmplong);

202

window_pri = (

int

)tmplong;

205

sscanf(

buffer

,

"%ld"

, &tmplong);

206

linker_pri = (

int

)tmplong;

222  buffer

[index1] = *ptr; ptr++;

226  if

(arg != 0 && arg != 3)

230

*window = window_pri;

231

*linker = linker_pri;

247  Int4

arg, index, index1;

253  if

(*ptr ==

' '

|| *ptr ==

NULLB

)

261

sscanf(

buffer

,

"%ld"

, &tmplong);

265

sscanf(

buffer

,

"%le"

, &tmpdouble);

269

sscanf(

buffer

,

"%le"

, &tmpdouble);

286  buffer

[index1] = *ptr; ptr++;

290  if

(arg != 0 && arg != 3)

302 s_SafeStrCat

(

char

** dest,

unsigned int

* dest_size,

const char

* string2append)

304  size_t

dest_length = strlen(*dest);

305  size_t

string2append_length = strlen(string2append);

306  if

((dest_length + string2append_length + 1) > *dest_size) {

307  size_t

target_size =

MAX

(string2append_length, dest_length) * 2;

308

*dest = (

char

*)realloc((

void

*)*dest, target_size);

310

(*dest_size) = (

unsigned int

)target_size;

316  strcat

(*dest, string2append);

323  char

* retval =

NULL

;

324  unsigned int

retval_size = 0;

326  if

(filtering_options ==

NULL

) {

331

retval = (

char

*)

calloc

(retval_size,

sizeof

(

char

));

341  char buffer

[24] = {

'\0'

};

360  char buffer

[24] = {

'\0'

};

361

snprintf(

buffer

,

sizeof

(

buffer

),

"S %d %1.1f %1.1f;"

,

392  char buffer

[24] = {

'\0'

};

415  if

(strlen(retval) != 0) {

430  return

strlen(retval) == 0

437  const char

* instructions,

443  const char

* ptr = instructions;

444  char

error_buffer[1024];

451

*filtering_options =

NULL

;

453

*blast_message =

NULL

;

461  buffer

= (

char

*)

calloc

(strlen(instructions),

sizeof

(char));

463  if

(ptr[0] ==

'm'

&& ptr[1] ==

' '

)

465

mask_at_hash =

TRUE

;

469  while

(*ptr !=

NULLB

)

478  double

locut = .0, hicut = .0;

483

snprintf(error_buffer,

sizeof

(error_buffer),

"Error parsing filter string: %s"

,

buffer

);

490

segOptions->

window

= window;

491

segOptions->

locut

= locut;

492

segOptions->

hicut

= hicut;

495  else if

(*ptr ==

'D'

)

501  int

window = 0, level = 0, linker = 0;

506

snprintf(error_buffer,

sizeof

(error_buffer),

"Error parsing filter string: %s"

,

buffer

);

513

dustOptions->

level

= level;

514

dustOptions->

window

= window;

515

dustOptions->

linker

= linker;

518  else if

(*ptr ==

'R'

)

529

snprintf(error_buffer,

sizeof

(error_buffer),

"Error parsing filter string: %s"

,

buffer

);

543  else if

(*ptr ==

'W'

)

555

snprintf(error_buffer,

sizeof

(error_buffer),

"Error parsing filter string: %s"

,

buffer

);

568

winmaskOptions->

taxid

= taxid;

572  else if

(*ptr ==

'L'

|| *ptr ==

'T'

)

582  else if

(*ptr ==

'm'

)

584

mask_at_hash =

TRUE

;

598

(*filtering_options)->dustOptions = dustOptions;

599

(*filtering_options)->segOptions = segOptions;

600

(*filtering_options)->repeatFilterOptions = repeatOptions;

601

(*filtering_options)->windowMaskerOptions = winmaskOptions;

602

(*filtering_options)->mask_at_hash = mask_at_hash;

708  Int4

num_elems = 0,

i

= 0;

715  if

(num_elems == 0) {

719

*

head

= ptrs[num_elems-1];

720  for

(

i

= num_elems-1;

i

> 0;

i

--) {

721

ptrs[

i

]->

next

= ptrs[

i

-1];

781  for

(index = 0; index < mask_loc->

total_size

; index++) {

793  if

(mask_loc ==

NULL

)

796  for

(index=0; index<mask_loc->

total_size

; index++)

830

memset((

void

*) &dna_seqlocs, 0,

sizeof

(dna_seqlocs));

831

memcpy((

void

*) &dna_seqlocs,

833  sizeof

(dna_seqlocs));

834

memset((

void

*) &mask_loc->

seqloc_array

[ctx_idx], 0,

sizeof

(dna_seqlocs));

844  if

(frame_seqloc ==

NULL

&& dna_seqlocs[0]) {

845

frame_seqloc = dna_seqlocs[0];

847  for

(itr = frame_seqloc; itr; itr = itr->

next

) {

872  ASSERT

(from < query_info->contexts[ctx_idx+

context

].query_length);

873  ASSERT

(to < query_info->contexts[ctx_idx+

context

].query_length);

908  for

(index=0; index < query_info->

num_queries

; ++index)

916  for

(frame_index=frame_start; frame_index<(frame_start+

NUM_FRAMES

);

937  if

(from >= dna_length)

938

from = dna_length - 1;

939  if

(to >= dna_length)

944  ASSERT

(from < dna_length);

947

seq_range->

left

= from;

948

seq_range->

right

= to;

963  if

(loc1->

left

< loc2->left)

965  else if

(loc1->

left

> loc2->left)

975  Int4 i

= 0, num_elems = 0;

979  if

(num_elems == 0) {

983

qsort(ptrs, (

size_t

)num_elems,

sizeof

(*ptrs),

989  for

(

i

= 0;

i

< num_elems - 1;

i

++) {

993  if

((stop + link_value) > next_ssr->

left

) {

997

curr_tail = ptrs[

i

+1];

1005  for

(

i

= 1;

i

< num_elems;

i

++) {

1007

tail->

next

= ptrs[

i

];

1027  if

(complement_mask ==

NULL

)

1030

*complement_mask =

NULL

;

1033

context <= query_info->last_context; ++

context

) {

1037  Int4

start_offset, end_offset, filter_start, filter_end;

1048  ASSERT

(start_offset <= end_offset);

1056

start_offset, end_offset);

1066  for

( ; loc; loc = loc->

next

) {

1069

filter_start = end_offset - seq_range->

right

;

1070

filter_end = end_offset - seq_range->

left

;

1072

filter_start = start_offset + seq_range->

left

;

1073

filter_end = start_offset + seq_range->

right

;

1082

last_interval_open =

TRUE

;

1085  if

(filter_start > start_offset) {

1087

left = start_offset;

1090

left = filter_end + 1;

1095

right = filter_start - 1;

1099

tail =

BlastSeqLocNew

((tail ? &tail : complement_mask), left, right);

1100  if

(filter_end >= end_offset) {

1102

last_interval_open =

FALSE

;

1105

left = filter_end + 1;

1109  if

(last_interval_open) {

1114

tail =

BlastSeqLocNew

((tail ? &tail : complement_mask), left, right);

1135

*seqloc_retval =

NULL

;

1149  if

(seg_options->

window

> 0)

1151  if

(seg_options->

locut

> 0.0)

1153  if

(seg_options->

hicut

> 0.0)

1175  for

(; masks; masks = masks->

next

) {

1201  Int4

query_length = 0;

1202  Int4

context_offset;

1244  ASSERT

(context < query_blk->lcase_mask->total_size);

1271  const int

kNumContexts = query_info->

last_context

+ 1;

1273  ASSERT

(query_info && query_blk && filter_maskloc);

1281

context <= query_info->last_context; ++

context

) {

1289

&filter_per_context,

1293  "Failure at filtering"

);

1301

(*filter_maskloc)->seqloc_array[

context

] = filter_per_context;

1312  for

(; mask_loc; mask_loc = mask_loc->

next

) {

1314  Int4

index, start, stop;

1317

start = length - 1 - mask_loc->

ssr

->

right

;

1318

stop = length - 1 - mask_loc->

ssr

->

left

;

1320

start = mask_loc->

ssr

->

left

;

1330  for

(index = start; index <= stop; index++)

1331  buffer

[index] = kMaskingLetter;

1342  for

(

i

= 0;

i

< length;

i

++) {

1343  if

(sequence[

i

] >= min_invalid) {

1367  for

(index=0; index<filter_maskloc->

total_size

; index++)

1375  if

(has_mask ==

FALSE

)

1386

context <= query_info->last_context; ++

context

) {

1388  Int4

query_length = 0;

1389  Int4

context_offset = 0;

#define sfree(x)

Safe free a pointer: belongs to a higher level header.

const double kSegLocut

Locut parameter for SEG.

const int kDustLinker

Parameter used by dust to link together close low-complexity segments.

const int kSegWindow

Window that SEG examines at once.

#define CODON_LENGTH

Codons are always of length 3.

#define NUM_FRAMES

Number of frames to which we translate in translating searches.

const double kSegHicut

Hicut parameter for SEG.

const int kDustLevel

Level parameter used by dust.

const int kDustWindow

Window parameter used by dust.

#define NCBI_XBLAST_EXPORT

NULL operations for other cases.

static Int2 s_ParseRepeatOptions(const char *repeat_options, char **dbname)

Parses repeat filtering options string.

void Blast_MaskUnsupportedAA(BLAST_SequenceBlk *seq, Uint1 min_invalid)

Mask protein letters that are currently unsupported.

void BlastSetUp_MaskQuery(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastMaskLoc *filter_maskloc, EBlastProgramType program_number)

Masks the sequence given a BlastMaskLoc.

BlastMaskLoc * BlastMaskLocDup(const BlastMaskLoc *mask_loc)

Perform a deep copy of the BlastMaskLoc structure passed to this function.

static Int2 s_ParseDustOptions(const char *ptr, int *level, int *window, int *linker)

Parses options used for dust.

BlastMaskLoc * BlastMaskLocFree(BlastMaskLoc *mask_loc)

Deallocate memory for a BlastMaskLoc structure as well as the BlastSeqLoc's pointed to.

static Int4 s_BlastSeqLocLen(const BlastSeqLoc *var)

Calculates number of links in a chain of BlastSeqLoc's.

const Uint1 kProtMask

NCBISTDAA element used to mask residues in BLAST.

Int2 BLAST_ComplementMaskLocations(EBlastProgramType program_number, const BlastQueryInfo *query_info, const BlastMaskLoc *mask_loc, BlastSeqLoc **complement_mask)

This function takes the list of mask locations (i.e., regions that should not be searched or not adde...

static Int2 s_GetFilteringLocationsForOneContext(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, Int4 context, EBlastProgramType program_number, const SBlastFilterOptions *filter_options, BlastSeqLoc **filter_out, Blast_Message **blast_message)

Calculates the mask locations one context at a time.

BlastSeqLoc * BlastSeqLocListDup(BlastSeqLoc *head)

Make a deep copy of the linked list of BlastSeqLoc-s pointed to by its argument.

Int2 BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, EBlastProgramType program_number, const SBlastFilterOptions *filter_options, BlastMaskLoc **filter_maskloc, Blast_Message **blast_message)

Does preparation for filtering and then calls BlastSetUp_Filter.

void Blast_MaskTheResidues(Uint1 *buffer, Int4 length, Boolean is_na, const BlastSeqLoc *mask_loc, Boolean reverse, Int4 offset)

Masks the letters in buffer.

BlastSeqLoc * BlastSeqLocNodeFree(BlastSeqLoc *loc)

Deallocate a single BlastSeqLoc structure and its contents, without following its next pointer.

void BlastSeqLocReverse(BlastSeqLoc *masks, Int4 query_length)

Converts reverse strand coordinates to forward strand in place.

static BlastSeqLoc * s_BlastSeqLocNodeDup(BlastSeqLoc *source)

Makes a copy of the BlastSeqLoc and also a copy of the SSRange element.

Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)

Produces SBlastFilterOptions from a string that has been traditionally supported in blast.

static Int2 s_ParseSegOptions(const char *ptr, Int4 *window, double *locut, double *hicut)

parses a string to set three seg options.

const Uint1 kNuclMask

BLASTNA element used to mask bases in BLAST.

Int2 BlastSetUp_Filter(EBlastProgramType program_number, Uint1 *sequence, Int4 length, Int4 offset, const SBlastFilterOptions *filter_options, BlastSeqLoc **seqloc_retval, Blast_Message **blast_message)

Runs seg filtering functions, according to the filtering options, returns BlastSeqLoc*.

static const char * s_LoadOptionsToBuffer(const char *instructions, char *buffer)

Copies filtering commands for one filtering algorithm from "instructions" to "buffer".

#define BLASTOPTIONS_BUFFER_SIZE

Allowed length of the filtering options string.

static char * s_SafeStrCat(char **dest, unsigned int *dest_size, const char *string2append)

Wrapper around strcat to ensure we don't do buffer overflows :)

static Int2 s_ParseWindowMaskerOptions(const char *winmask_options, char **dbname, int *taxid)

Parses window masker options string.

Int2 BlastMaskLocDNAToProtein(BlastMaskLoc *mask_loc, const BlastQueryInfo *query_info)

Given a BlastMaskLoc with an array of lists of DNA mask locations, substitutes that array by a new ar...

static BlastSeqLoc ** s_BlastSeqLocListToArrayOfPointers(const BlastSeqLoc *list, Int4 *count)

Converts a BlastSeqLoc list to an array of pointers, each pointing to an element of the list passed i...

static int s_SeqRangeSortByStartPosition(const void *vp1, const void *vp2)

Used for qsort, compares two SeqLoc's by starting position.

BlastSeqLoc * BlastSeqLocAppend(BlastSeqLoc **head, BlastSeqLoc *node)

Appends the BlastSeqLoc to the list of BlastSeqLoc-s pointed to by head.

void BlastSeqLocCombine(BlastSeqLoc **mask_loc, Int4 link_value)

Go through all mask locations in one sequence and combine any that overlap, deallocating the unneeded...

BlastMaskLoc * BlastMaskLocNew(Int4 total)

Allocate memory for a BlastMaskLoc.

void BlastSeqLocListReverse(BlastSeqLoc **head)

Reverse elements in the list.

BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)

Deallocate all BlastSeqLoc objects in a chain.

BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)

Create and initialize a new sequence interval.

Int2 BlastMaskLocProteinToDNA(BlastMaskLoc *mask_loc, const BlastQueryInfo *query_info)

Given a BlastMaskLoc with an array of lists of mask locations per protein frame, recalculates all mas...

char * BlastFilteringOptionsToString(const SBlastFilterOptions *filtering_options)

Convert the filtering options structure to a string.

BLAST filtering functions.

static NCBI_INLINE Boolean BlastIsReverseStrand(Boolean is_na, Int4 context)

Determines whether this is a nucleotide query and whether this a minus strand or not.

Int2 Blast_MessageWrite(Blast_Message **blast_msg, EBlastSeverity severity, int context, const char *message)

Writes a message to a structure.

const int kBlastMessageNoContext

Declared in blast_message.h as extern const.

Int2 SRepeatFilterOptionsNew(SRepeatFilterOptions **repeat_options)

Allocates memory for SRepeatFilterOptions, fills in defaults.

SDustOptions * SDustOptionsFree(SDustOptions *dust_options)

Frees SDustOptions.

Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions *filter_options)

Queries whether masking should be done only for the lookup table or for the entire search.

Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions *filter_options, Blast_Message **blast_message)

Validates filter options to ensure that program and options are consistent and that options have vali...

SRepeatFilterOptions * SRepeatFilterOptionsFree(SRepeatFilterOptions *repeat_options)

Frees SRepeatFilterOptions.

Int2 SSegOptionsNew(SSegOptions **seg_options)

Allocates memory for SSegOptions, fills in defaults.

SWindowMaskerOptions * SWindowMaskerOptionsFree(SWindowMaskerOptions *winmask_options)

Frees SWindowMaskerOptions.

Int2 SWindowMaskerOptionsNew(SWindowMaskerOptions **winmask_options)

Allocates memory for SWindowMaskerOptions, fills in defaults.

Int2 SDustOptionsNew(SDustOptions **dust_options)

Allocates memory for SDustOptions, fills in defaults.

SSegOptions * SSegOptionsFree(SSegOptions *seg_options)

Frees SSegOptions.

@ eEmpty

no filtering at all.

Int2 SBlastFilterOptionsNew(SBlastFilterOptions **filter_options, EFilterOptions type)

Allocates memory for SBlastFilterOptions and.

EBlastProgramType

Defines the engine's notion of the different applications of the BLAST algorithm.

Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)

Obtains the sequence length for a given query in the query, without taking into consideration any app...

Int2 SeqBufferSeg(Uint1 *sequence, Int4 length, Int4 offset, SegParameters *sparamsp, BlastSeqLoc **seg_locs)

Runs seg on a protein sequence in ncbistdaa.

SegParameters * SegParametersNewAa(void)

Allocated SeqParameter struct for proteins and fills with default values.

void SegParametersFree(SegParameters *sparamsp)

Free SegParameters structure.

Various auxiliary BLAST utility functions.

Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)

This function translates the context number of a context into the frame of the sequence.

unsigned int BLAST_GetNumberOfContexts(EBlastProgramType program)

Get the number of contexts for a given program.

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

uint8_t Uint1

1-byte (8-bit) unsigned integer

int16_t Int2

2-byte (16-bit) signed integer

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

char * dbname(DBPROCESS *dbproc)

Get name of current database.

unsigned int

A callback function used to compare two keys in a database.

Int2 FilterQueriesForMapping(Uint1 *sequence, Int4 length, Int4 offset, const SReadQualityOptions *options, BlastSeqLoc **seq_loc)

const CharType(& source)[N]

void * BlastMemDup(const void *orig, size_t size)

Copies memory using memcpy and malloc.

Uint1 Boolean

bool replacment for C

#define TRUE

bool replacment for C indicating true.

#define FALSE

bool replacment for C indicating false.

#define NULLB

terminating byte of a char* string.

#define ASSERT

macro for assert.

#define MAX(a, b)

returns larger of a and b.

Structure to hold a sequence.

Uint1 * sequence_start

Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.

BlastMaskLoc * lcase_mask

Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...

Int4 length

Length of sequence.

Uint1 * sequence_nomask

Start of query sequence without masking.

Uint1 * sequence_start_nomask

Query sequence without masking.

Uint1 * sequence

Sequence used for search (could be translation).

Boolean nomask_allocated

If false the two above are just pointers to sequence and sequence_start.

Int4 query_length

Length of this query, strand or frame.

Boolean is_valid

Determine if this context is valid or not.

Int4 query_offset

Offset of this query, strand or frame in the concatenated super-query.

Structure for keeping the query masking information.

Int4 total_size

Total size of the BlastSeqLoc array below.

BlastSeqLoc ** seqloc_array

Array of masked locations.

The query related information.

Int4 first_context

Index of the first element of the context array.

BlastContextInfo * contexts

Information per context.

int num_queries

Number of query sequences.

Int4 last_context

Index of the last element of the context array.

Used to hold a set of positions, mostly used for filtering.

SSeqRange * ssr

location data on the sequence.

struct BlastSeqLoc * next

next in linked list

Structure to hold the a message from the core of the BLAST engine.

SRepeatFilterOptions * repeatFilterOptions

for organism specific repeat filtering.

SSegOptions * segOptions

low-complexity filtering for proteins sequences (includes translated nucleotides).

SReadQualityOptions * readQualityOptions

quality filtering for mapping next-generation sequences

SWindowMaskerOptions * windowMaskerOptions

organism specific filtering with window masker.

SDustOptions * dustOptions

low-complexity filtering for nucleotides.

Options for dust algorithm, applies only to nucl.

int linker

min distance to link segments.

Filtering options for organsim specific repeats filtering.

char * database

Nucleotide database for mini BLAST search.

Options for SEG algorithm, applies only to protein-protein comparisons.

int window

initial window to trigger further work.

A structure containing two integers, used e.g.

Int4 left

left endpoint of range (zero based)

Int4 right

right endpoint of range (zero based)

Filtering options for organism-specific filtering with Window Masker.

const char * database

Use winmasker database at this location.

int taxid

Select masking database for this TaxID.

Structure to hold parameters for seg search.

Int4 window

initial window size to trigger further work.

static CS_CONTEXT * context

voidp calloc(uInt items, uInt size)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4