(not_started && *ptr ==
' ')
86 if(not_started ==
FALSE)
89 while(*buffer_ptr ==
' '&& buffer_ptr >
buffer)
114ptr = strstr(repeat_options,
"-d");
117 while(*ptr ==
' '|| *ptr ==
'\t')
139 if(!winmask_options)
142ptr = strstr(winmask_options,
"-d");
148 while(*ptr ==
' '|| *ptr ==
'\t')
153 for(endp = *
dbname; *endp; ++endp) {
154 if(*endp ==
' '|| *endp ==
'\t') {
160ptr = strstr(winmask_options,
"-t");
164 while(*ptr ==
' '|| *ptr ==
'\t')
184 intarg, index, index1, window_pri=-1, linker_pri=-1, level_pri=-1;
190 if(*ptr ==
' '|| *ptr ==
NULLB)
197sscanf(
buffer,
"%ld", &tmplong);
198level_pri = (
int)tmplong;
201sscanf(
buffer,
"%ld", &tmplong);
202window_pri = (
int)tmplong;
205sscanf(
buffer,
"%ld", &tmplong);
206linker_pri = (
int)tmplong;
222 buffer[index1] = *ptr; ptr++;
226 if(arg != 0 && arg != 3)
230*window = window_pri;
231*linker = linker_pri;
247 Int4arg, index, index1;
253 if(*ptr ==
' '|| *ptr ==
NULLB)
261sscanf(
buffer,
"%ld", &tmplong);
265sscanf(
buffer,
"%le", &tmpdouble);
269sscanf(
buffer,
"%le", &tmpdouble);
286 buffer[index1] = *ptr; ptr++;
290 if(arg != 0 && arg != 3)
302 s_SafeStrCat(
char** dest,
unsigned int* dest_size,
const char* string2append)
304 size_tdest_length = strlen(*dest);
305 size_tstring2append_length = strlen(string2append);
306 if((dest_length + string2append_length + 1) > *dest_size) {
307 size_ttarget_size =
MAX(string2append_length, dest_length) * 2;
308*dest = (
char*)realloc((
void*)*dest, target_size);
310(*dest_size) = (
unsigned int)target_size;
316 strcat(*dest, string2append);
323 char* retval =
NULL;
324 unsigned intretval_size = 0;
326 if(filtering_options ==
NULL) {
331retval = (
char*)
calloc(retval_size,
sizeof(
char));
341 char buffer[24] = {
'\0'};
360 char buffer[24] = {
'\0'};
361snprintf(
buffer,
sizeof(
buffer),
"S %d %1.1f %1.1f;",
392 char buffer[24] = {
'\0'};
415 if(strlen(retval) != 0) {
430 returnstrlen(retval) == 0
437 const char* instructions,
443 const char* ptr = instructions;
444 charerror_buffer[1024];
451*filtering_options =
NULL;
453*blast_message =
NULL;
461 buffer= (
char*)
calloc(strlen(instructions),
sizeof(char));
463 if(ptr[0] ==
'm'&& ptr[1] ==
' ')
465mask_at_hash =
TRUE;
469 while(*ptr !=
NULLB)
478 doublelocut = .0, hicut = .0;
483snprintf(error_buffer,
sizeof(error_buffer),
"Error parsing filter string: %s",
buffer);
490segOptions->
window= window;
491segOptions->
locut= locut;
492segOptions->
hicut= hicut;
495 else if(*ptr ==
'D')
501 intwindow = 0, level = 0, linker = 0;
506snprintf(error_buffer,
sizeof(error_buffer),
"Error parsing filter string: %s",
buffer);
513dustOptions->
level= level;
514dustOptions->
window= window;
515dustOptions->
linker= linker;
518 else if(*ptr ==
'R')
529snprintf(error_buffer,
sizeof(error_buffer),
"Error parsing filter string: %s",
buffer);
543 else if(*ptr ==
'W')
555snprintf(error_buffer,
sizeof(error_buffer),
"Error parsing filter string: %s",
buffer);
568winmaskOptions->
taxid= taxid;
572 else if(*ptr ==
'L'|| *ptr ==
'T')
582 else if(*ptr ==
'm')
584mask_at_hash =
TRUE;
598(*filtering_options)->dustOptions = dustOptions;
599(*filtering_options)->segOptions = segOptions;
600(*filtering_options)->repeatFilterOptions = repeatOptions;
601(*filtering_options)->windowMaskerOptions = winmaskOptions;
602(*filtering_options)->mask_at_hash = mask_at_hash;
708 Int4num_elems = 0,
i= 0;
715 if(num_elems == 0) {
719*
head= ptrs[num_elems-1];
720 for(
i= num_elems-1;
i> 0;
i--) {
721ptrs[
i]->
next= ptrs[
i-1];
781 for(index = 0; index < mask_loc->
total_size; index++) {
793 if(mask_loc ==
NULL)
796 for(index=0; index<mask_loc->
total_size; index++)
830memset((
void*) &dna_seqlocs, 0,
sizeof(dna_seqlocs));
831memcpy((
void*) &dna_seqlocs,
833 sizeof(dna_seqlocs));
834memset((
void*) &mask_loc->
seqloc_array[ctx_idx], 0,
sizeof(dna_seqlocs));
844 if(frame_seqloc ==
NULL&& dna_seqlocs[0]) {
845frame_seqloc = dna_seqlocs[0];
847 for(itr = frame_seqloc; itr; itr = itr->
next) {
872 ASSERT(from < query_info->contexts[ctx_idx+
context].query_length);
873 ASSERT(to < query_info->contexts[ctx_idx+
context].query_length);
908 for(index=0; index < query_info->
num_queries; ++index)
916 for(frame_index=frame_start; frame_index<(frame_start+
NUM_FRAMES);
937 if(from >= dna_length)
938from = dna_length - 1;
939 if(to >= dna_length)
944 ASSERT(from < dna_length);
947seq_range->
left= from;
948seq_range->
right= to;
963 if(loc1->
left< loc2->left)
965 else if(loc1->
left> loc2->left)
975 Int4 i= 0, num_elems = 0;
979 if(num_elems == 0) {
983qsort(ptrs, (
size_t)num_elems,
sizeof(*ptrs),
989 for(
i= 0;
i< num_elems - 1;
i++) {
993 if((stop + link_value) > next_ssr->
left) {
997curr_tail = ptrs[
i+1];
1005 for(
i= 1;
i< num_elems;
i++) {
1007tail->
next= ptrs[
i];
1027 if(complement_mask ==
NULL)
1030*complement_mask =
NULL;
1033context <= query_info->last_context; ++
context) {
1037 Int4start_offset, end_offset, filter_start, filter_end;
1048 ASSERT(start_offset <= end_offset);
1056start_offset, end_offset);
1066 for( ; loc; loc = loc->
next) {
1069filter_start = end_offset - seq_range->
right;
1070filter_end = end_offset - seq_range->
left;
1072filter_start = start_offset + seq_range->
left;
1073filter_end = start_offset + seq_range->
right;
1082last_interval_open =
TRUE;
1085 if(filter_start > start_offset) {
1087left = start_offset;
1090left = filter_end + 1;
1095right = filter_start - 1;
1099tail =
BlastSeqLocNew((tail ? &tail : complement_mask), left, right);
1100 if(filter_end >= end_offset) {
1102last_interval_open =
FALSE;
1105left = filter_end + 1;
1109 if(last_interval_open) {
1114tail =
BlastSeqLocNew((tail ? &tail : complement_mask), left, right);
1135*seqloc_retval =
NULL;
1149 if(seg_options->
window> 0)
1151 if(seg_options->
locut> 0.0)
1153 if(seg_options->
hicut> 0.0)
1175 for(; masks; masks = masks->
next) {
1201 Int4query_length = 0;
1202 Int4context_offset;
1244 ASSERT(context < query_blk->lcase_mask->total_size);
1271 const intkNumContexts = query_info->
last_context+ 1;
1273 ASSERT(query_info && query_blk && filter_maskloc);
1281context <= query_info->last_context; ++
context) {
1289&filter_per_context,
1293 "Failure at filtering");
1301(*filter_maskloc)->seqloc_array[
context] = filter_per_context;
1312 for(; mask_loc; mask_loc = mask_loc->
next) {
1314 Int4index, start, stop;
1317start = length - 1 - mask_loc->
ssr->
right;
1318stop = length - 1 - mask_loc->
ssr->
left;
1320start = mask_loc->
ssr->
left;
1330 for(index = start; index <= stop; index++)
1331 buffer[index] = kMaskingLetter;
1342 for(
i= 0;
i< length;
i++) {
1343 if(sequence[
i] >= min_invalid) {
1367 for(index=0; index<filter_maskloc->
total_size; index++)
1375 if(has_mask ==
FALSE)
1386context <= query_info->last_context; ++
context) {
1388 Int4query_length = 0;
1389 Int4context_offset = 0;
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
const double kSegLocut
Locut parameter for SEG.
const int kDustLinker
Parameter used by dust to link together close low-complexity segments.
const int kSegWindow
Window that SEG examines at once.
#define CODON_LENGTH
Codons are always of length 3.
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
const double kSegHicut
Hicut parameter for SEG.
const int kDustLevel
Level parameter used by dust.
const int kDustWindow
Window parameter used by dust.
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
static Int2 s_ParseRepeatOptions(const char *repeat_options, char **dbname)
Parses repeat filtering options string.
void Blast_MaskUnsupportedAA(BLAST_SequenceBlk *seq, Uint1 min_invalid)
Mask protein letters that are currently unsupported.
void BlastSetUp_MaskQuery(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastMaskLoc *filter_maskloc, EBlastProgramType program_number)
Masks the sequence given a BlastMaskLoc.
BlastMaskLoc * BlastMaskLocDup(const BlastMaskLoc *mask_loc)
Perform a deep copy of the BlastMaskLoc structure passed to this function.
static Int2 s_ParseDustOptions(const char *ptr, int *level, int *window, int *linker)
Parses options used for dust.
BlastMaskLoc * BlastMaskLocFree(BlastMaskLoc *mask_loc)
Deallocate memory for a BlastMaskLoc structure as well as the BlastSeqLoc's pointed to.
static Int4 s_BlastSeqLocLen(const BlastSeqLoc *var)
Calculates number of links in a chain of BlastSeqLoc's.
const Uint1 kProtMask
NCBISTDAA element used to mask residues in BLAST.
Int2 BLAST_ComplementMaskLocations(EBlastProgramType program_number, const BlastQueryInfo *query_info, const BlastMaskLoc *mask_loc, BlastSeqLoc **complement_mask)
This function takes the list of mask locations (i.e., regions that should not be searched or not adde...
static Int2 s_GetFilteringLocationsForOneContext(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, Int4 context, EBlastProgramType program_number, const SBlastFilterOptions *filter_options, BlastSeqLoc **filter_out, Blast_Message **blast_message)
Calculates the mask locations one context at a time.
BlastSeqLoc * BlastSeqLocListDup(BlastSeqLoc *head)
Make a deep copy of the linked list of BlastSeqLoc-s pointed to by its argument.
Int2 BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, EBlastProgramType program_number, const SBlastFilterOptions *filter_options, BlastMaskLoc **filter_maskloc, Blast_Message **blast_message)
Does preparation for filtering and then calls BlastSetUp_Filter.
void Blast_MaskTheResidues(Uint1 *buffer, Int4 length, Boolean is_na, const BlastSeqLoc *mask_loc, Boolean reverse, Int4 offset)
Masks the letters in buffer.
BlastSeqLoc * BlastSeqLocNodeFree(BlastSeqLoc *loc)
Deallocate a single BlastSeqLoc structure and its contents, without following its next pointer.
void BlastSeqLocReverse(BlastSeqLoc *masks, Int4 query_length)
Converts reverse strand coordinates to forward strand in place.
static BlastSeqLoc * s_BlastSeqLocNodeDup(BlastSeqLoc *source)
Makes a copy of the BlastSeqLoc and also a copy of the SSRange element.
Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)
Produces SBlastFilterOptions from a string that has been traditionally supported in blast.
static Int2 s_ParseSegOptions(const char *ptr, Int4 *window, double *locut, double *hicut)
parses a string to set three seg options.
const Uint1 kNuclMask
BLASTNA element used to mask bases in BLAST.
Int2 BlastSetUp_Filter(EBlastProgramType program_number, Uint1 *sequence, Int4 length, Int4 offset, const SBlastFilterOptions *filter_options, BlastSeqLoc **seqloc_retval, Blast_Message **blast_message)
Runs seg filtering functions, according to the filtering options, returns BlastSeqLoc*.
static const char * s_LoadOptionsToBuffer(const char *instructions, char *buffer)
Copies filtering commands for one filtering algorithm from "instructions" to "buffer".
#define BLASTOPTIONS_BUFFER_SIZE
Allowed length of the filtering options string.
static char * s_SafeStrCat(char **dest, unsigned int *dest_size, const char *string2append)
Wrapper around strcat to ensure we don't do buffer overflows :)
static Int2 s_ParseWindowMaskerOptions(const char *winmask_options, char **dbname, int *taxid)
Parses window masker options string.
Int2 BlastMaskLocDNAToProtein(BlastMaskLoc *mask_loc, const BlastQueryInfo *query_info)
Given a BlastMaskLoc with an array of lists of DNA mask locations, substitutes that array by a new ar...
static BlastSeqLoc ** s_BlastSeqLocListToArrayOfPointers(const BlastSeqLoc *list, Int4 *count)
Converts a BlastSeqLoc list to an array of pointers, each pointing to an element of the list passed i...
static int s_SeqRangeSortByStartPosition(const void *vp1, const void *vp2)
Used for qsort, compares two SeqLoc's by starting position.
BlastSeqLoc * BlastSeqLocAppend(BlastSeqLoc **head, BlastSeqLoc *node)
Appends the BlastSeqLoc to the list of BlastSeqLoc-s pointed to by head.
void BlastSeqLocCombine(BlastSeqLoc **mask_loc, Int4 link_value)
Go through all mask locations in one sequence and combine any that overlap, deallocating the unneeded...
BlastMaskLoc * BlastMaskLocNew(Int4 total)
Allocate memory for a BlastMaskLoc.
void BlastSeqLocListReverse(BlastSeqLoc **head)
Reverse elements in the list.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Int2 BlastMaskLocProteinToDNA(BlastMaskLoc *mask_loc, const BlastQueryInfo *query_info)
Given a BlastMaskLoc with an array of lists of mask locations per protein frame, recalculates all mas...
char * BlastFilteringOptionsToString(const SBlastFilterOptions *filtering_options)
Convert the filtering options structure to a string.
BLAST filtering functions.
static NCBI_INLINE Boolean BlastIsReverseStrand(Boolean is_na, Int4 context)
Determines whether this is a nucleotide query and whether this a minus strand or not.
Int2 Blast_MessageWrite(Blast_Message **blast_msg, EBlastSeverity severity, int context, const char *message)
Writes a message to a structure.
const int kBlastMessageNoContext
Declared in blast_message.h as extern const.
Int2 SRepeatFilterOptionsNew(SRepeatFilterOptions **repeat_options)
Allocates memory for SRepeatFilterOptions, fills in defaults.
SDustOptions * SDustOptionsFree(SDustOptions *dust_options)
Frees SDustOptions.
Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions *filter_options)
Queries whether masking should be done only for the lookup table or for the entire search.
Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions *filter_options, Blast_Message **blast_message)
Validates filter options to ensure that program and options are consistent and that options have vali...
SRepeatFilterOptions * SRepeatFilterOptionsFree(SRepeatFilterOptions *repeat_options)
Frees SRepeatFilterOptions.
Int2 SSegOptionsNew(SSegOptions **seg_options)
Allocates memory for SSegOptions, fills in defaults.
SWindowMaskerOptions * SWindowMaskerOptionsFree(SWindowMaskerOptions *winmask_options)
Frees SWindowMaskerOptions.
Int2 SWindowMaskerOptionsNew(SWindowMaskerOptions **winmask_options)
Allocates memory for SWindowMaskerOptions, fills in defaults.
Int2 SDustOptionsNew(SDustOptions **dust_options)
Allocates memory for SDustOptions, fills in defaults.
SSegOptions * SSegOptionsFree(SSegOptions *seg_options)
Frees SSegOptions.
@ eEmpty
no filtering at all.
Int2 SBlastFilterOptionsNew(SBlastFilterOptions **filter_options, EFilterOptions type)
Allocates memory for SBlastFilterOptions and.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Obtains the sequence length for a given query in the query, without taking into consideration any app...
Int2 SeqBufferSeg(Uint1 *sequence, Int4 length, Int4 offset, SegParameters *sparamsp, BlastSeqLoc **seg_locs)
Runs seg on a protein sequence in ncbistdaa.
SegParameters * SegParametersNewAa(void)
Allocated SeqParameter struct for proteins and fills with default values.
void SegParametersFree(SegParameters *sparamsp)
Free SegParameters structure.
Various auxiliary BLAST utility functions.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
unsigned int BLAST_GetNumberOfContexts(EBlastProgramType program)
Get the number of contexts for a given program.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
Int2 FilterQueriesForMapping(Uint1 *sequence, Int4 length, Int4 offset, const SReadQualityOptions *options, BlastSeqLoc **seq_loc)
const CharType(& source)[N]
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define NULLB
terminating byte of a char* string.
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
BlastMaskLoc * lcase_mask
Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...
Int4 length
Length of sequence.
Uint1 * sequence_nomask
Start of query sequence without masking.
Uint1 * sequence_start_nomask
Query sequence without masking.
Uint1 * sequence
Sequence used for search (could be translation).
Boolean nomask_allocated
If false the two above are just pointers to sequence and sequence_start.
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Structure for keeping the query masking information.
Int4 total_size
Total size of the BlastSeqLoc array below.
BlastSeqLoc ** seqloc_array
Array of masked locations.
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Int4 last_context
Index of the last element of the context array.
Used to hold a set of positions, mostly used for filtering.
SSeqRange * ssr
location data on the sequence.
struct BlastSeqLoc * next
next in linked list
Structure to hold the a message from the core of the BLAST engine.
SRepeatFilterOptions * repeatFilterOptions
for organism specific repeat filtering.
SSegOptions * segOptions
low-complexity filtering for proteins sequences (includes translated nucleotides).
SReadQualityOptions * readQualityOptions
quality filtering for mapping next-generation sequences
SWindowMaskerOptions * windowMaskerOptions
organism specific filtering with window masker.
SDustOptions * dustOptions
low-complexity filtering for nucleotides.
Options for dust algorithm, applies only to nucl.
int linker
min distance to link segments.
Filtering options for organsim specific repeats filtering.
char * database
Nucleotide database for mini BLAST search.
Options for SEG algorithm, applies only to protein-protein comparisons.
int window
initial window to trigger further work.
A structure containing two integers, used e.g.
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
Filtering options for organism-specific filtering with Window Masker.
const char * database
Use winmasker database at this location.
int taxid
Select masking database for this TaxID.
Structure to hold parameters for seg search.
Int4 window
initial window size to trigger further work.
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4