m = 0,
b= 0, e = 0;
62 if(ranges ==
NULL|| num_ranges <= 0) {
70 if(ranges[m].left > target) {
78 if( (target > ranges[
b].right) && (
b< (num_ranges-1) ) ) {
105 if(*seq_blk ==
NULL) {
110 ASSERT(seq_blk && *seq_blk);
112 if(buffer_allocated) {
113(*seq_blk)->sequence_start_allocated =
TRUE;
116(*seq_blk)->sequence = (*seq_blk)->sequence_start+1;
120(*seq_blk)->sequence_start =
NULL;
123(*seq_blk)->sequence_start_nomask = (*seq_blk)->sequence_start;
124(*seq_blk)->sequence_nomask = (*seq_blk)->sequence;
125(*seq_blk)->nomask_allocated =
FALSE;
127(*seq_blk)->length = length;
128(*seq_blk)->bases_offset = 0;
148 const Uint1* sequence,
161seq_blk->
length= seqlen;
168 const Uint1* sequence)
184 Uint4num_seq_ranges,
190 if( !seq_blk || !seq_ranges ) {
194 ASSERT(num_seq_ranges >= 1);
197 if(copy_seq_ranges) {
201 if( !
tmp) {
return-1; }
202memcpy((
void*)
tmp,
203(
void*) seq_ranges,
204num_seq_ranges *
sizeof(*seq_ranges));
213 tmp[num_seq_ranges - 1].right = seq_blk->
length;
271(*copy)->sequence_allocated =
FALSE;
272(*copy)->sequence_start_allocated =
FALSE;
273(*copy)->oof_sequence_allocated =
FALSE;
274(*copy)->lcase_mask_allocated =
FALSE;
275(*copy)->seq_ranges_allocated =
FALSE;
281 if(program ==
NULL)
286 else if(
strcasecmp(
"blastp", program) == 0)
288 else if(
strcasecmp(
"blastx", program) == 0)
290 else if(
strcasecmp(
"tblastn", program) == 0)
292 else if(
strcasecmp(
"tblastx", program) == 0)
294 else if(
strcasecmp(
"rpsblast", program) == 0)
296 else if(
strcasecmp(
"rpstblastn", program) == 0)
298 else if(
strcasecmp(
"psiblast", program) == 0)
300 else if(
strcasecmp(
"psitblastn", program) == 0)
302 else if(
strcasecmp(
"phiblastn", program) == 0)
304 else if(
strcasecmp(
"phiblastp", program) == 0)
306 else if(
strcasecmp(
"mapper", program) == 0)
315 if(program ==
NULL)
320*program =
strdup(
"blastn");
323*program =
strdup(
"blastp");
326*program =
strdup(
"blastx");
329*program =
strdup(
"tblastn");
332*program =
strdup(
"tblastx");
335*program =
strdup(
"rpsblast");
338*program =
strdup(
"rpstblastn");
341*program =
strdup(
"psiblast");
344*program =
strdup(
"psitblastn");
347*program =
strdup(
"phiblastp");
350*program =
strdup(
"phiblastn");
353*program =
strdup(
"mapper");
356*program =
strdup(
"unknown");
374 register int i, j, k, index0, index1, index2;
375 static Uint1mapping[4] = { 8,
385 if((codon[0] | codon[1] | codon[2]) > 15) {
394 for(
i= 0;
i< 4;
i++) {
395 if(codon[0] & mapping[
i]) {
397 for(j = 0; j < 4; j++) {
398 if(codon[1] & mapping[j]) {
399index1 = index0 + (j * 4);
400 for(k = 0; k < 4; k++) {
401 if(codon[2] & mapping[k]) {
432 Int4index, index_prot;
436nucl_seq = (frame >= 0 ? (
Uint1*)query_seq : (
Uint1*)(query_seq_rev+1));
439prot_seq[0] =
NULLB;
443codon[0] = nucl_seq[index];
444codon[1] = nucl_seq[index+1];
445codon[2] = nucl_seq[index+2];
449prot_seq[index_prot] = residue;
453prot_seq[index_prot] =
NULLB;
455 returnindex_prot - 1;
473new_seq[-1] = new_seq[-2] = new_seq[-3] = 0;
474new_seq[
len-3] = new_seq[
len-2] = new_seq[
len-1] = 0;
478max_start =
MIN(3,
len);
480 for(
i= 0;
i< max_start;
i++) {
481curr_letter = curr_letter << 2 | (old_seq[
i] & 3);
482new_seq[
i- max_start] = curr_letter;
488 for(;
i<
len;
i++) {
489curr_letter = curr_letter << 2 | (old_seq[
i] & 3);
490new_seq[
i- max_start] = curr_letter;
495max_start =
MIN(3,
len);
496 for(
i= 0;
i< max_start;
i++) {
497curr_letter = curr_letter << 2;
498new_seq[
len- (max_start -
i)] = curr_letter;
512 Int2total_remainder;
514 intbyte_value, codon=-1;
515 Uint1last_remainder, last_byte, remainder;
516 Uint1* nt_seq_end,* nt_seq_start;
517 Uint1* prot_seq_start;
518 intbyte_value1,byte_value2,byte_value3,byte_value4,byte_value5;
521 if(nt_seq ==
NULL|| prot_seq ==
NULL||
529prot_seq_start = prot_seq;
531remainder = length%4;
534nt_seq_end = (
Uint1*) (nt_seq + (length)/4 - 1);
535last_remainder = (4*(length/4) - frame + 1)%
CODON_LENGTH;
536total_remainder = last_remainder+remainder;
539byte_value = *nt_seq;
542 while(nt_seq < nt_seq_end) {
545codon = (byte_value >> 2);
546*prot_seq = translation[codon];
550codon = ((byte_value & 3) << 4);
552byte_value = *nt_seq;
553codon += (byte_value >> 4);
554*prot_seq = translation[codon];
556 if(nt_seq >= nt_seq_end) {
562codon = ((byte_value & 15) << 2);
564byte_value = *nt_seq;
565codon += (byte_value >> 6);
566*prot_seq = translation[codon];
568 if(nt_seq >= nt_seq_end) {
574codon = byte_value & 63;
575*prot_seq = translation[codon];
578byte_value = *nt_seq;
586 while(nt_seq < (nt_seq_end-10)) {
587byte_value1 = *(++nt_seq);
588byte_value2 = *(++nt_seq);
589byte_value3 = *(++nt_seq);
591codon = (byte_value >> 2);
592*prot_seq = translation[codon];
596codon = ((byte_value & 3) << 4);
597codon += (byte_value1 >> 4);
598*prot_seq = translation[codon];
601byte_value4 = *(++nt_seq);
603codon = ((byte_value1 & 15) << 2);
605codon += (byte_value2 >> 6);
606*prot_seq = translation[codon];
609codon = byte_value2 & 63;
610byte_value5 = *(++nt_seq);
611*prot_seq = translation[codon];
615codon = (byte_value3 >> 2);
616*prot_seq = translation[codon];
619byte_value = *(++nt_seq);
620codon = ((byte_value3 & 3) << 4);
621codon += (byte_value4 >> 4);
622*prot_seq = translation[codon];
625codon = ((byte_value4 & 15) << 2);
626codon += (byte_value5 >> 6);
627*prot_seq = translation[codon];
630codon = byte_value5 & 63;
631*prot_seq = translation[codon];
641byte_value = *nt_seq;
642codon = byte_value & 63;
644*prot_seq = translation[codon];
646}
else if(
state== 0) {
647byte_value = *nt_seq;
648codon = ((byte_value) >> 2);
650*prot_seq = translation[codon];
655byte_value = *(nt_seq_end);
656last_byte = *(nt_seq_end+1);
658codon = (last_byte >> 2);
659}
else if(
state== 2) {
660codon = ((byte_value & 15) << 2);
661codon += (last_byte >> 6);
662}
else if(
state== 3) {
663codon = ((byte_value & 3) << 4);
664codon += (last_byte >> 4);
666*prot_seq = translation[codon];
670nt_seq_start = (
Uint1*) nt_seq;
672 state= remainder+frame;
679codon = (last_byte >> 6);
680byte_value = *nt_seq;
681codon += ((byte_value & 15) << 2);
683}
else if(
state== 1) {
684codon = (last_byte >> 4);
685byte_value = *nt_seq;
686codon += ((byte_value & 3) << 4);
688}
else if(
state== 2) {
689codon = (last_byte >> 2);
692*prot_seq = translation[codon];
695 state= 3 + (remainder + frame + 1);
699byte_value = *nt_seq;
702 while(nt_seq > nt_seq_start) {
705codon = (byte_value & 63);
706*prot_seq = translation[codon];
710codon = (byte_value >> 6);
712byte_value = *nt_seq;
713codon += ((byte_value & 15) << 2);
714*prot_seq = translation[codon];
716 if(nt_seq <= nt_seq_start) {
722codon = (byte_value >> 4);
724byte_value = *nt_seq;
725codon += ((byte_value & 3) << 4);
726*prot_seq = translation[codon];
728 if(nt_seq <= nt_seq_start) {
734codon = (byte_value >> 2);
735*prot_seq = translation[codon];
738byte_value = *nt_seq;
746 while(nt_seq > (nt_seq_start+10)) {
747byte_value1 = *(--nt_seq);
748byte_value2 = *(--nt_seq);
749byte_value3 = *(--nt_seq);
751codon = (byte_value & 63);
752*prot_seq = translation[codon];
754codon = (byte_value >> 6);
755codon += ((byte_value1 & 15) << 2);
756*prot_seq = translation[codon];
758byte_value4 = *(--nt_seq);
759codon = (byte_value1 >> 4);
760codon += ((byte_value2 & 3) << 4);
761*prot_seq = translation[codon];
763codon = (byte_value2 >> 2);
764*prot_seq = translation[codon];
766byte_value5 = *(--nt_seq);
768codon = (byte_value3 & 63);
769*prot_seq = translation[codon];
771byte_value = *(--nt_seq);
772codon = (byte_value3 >> 6);
773codon += ((byte_value4 & 15) << 2);
774*prot_seq = translation[codon];
776codon = (byte_value4 >> 4);
777codon += ((byte_value5 & 3) << 4);
778*prot_seq = translation[codon];
780codon = (byte_value5 >> 2);
781*prot_seq = translation[codon];
788byte_value = *nt_seq;
790codon = (byte_value & 63);
791*prot_seq = translation[codon];
793}
else if(
state== 2) {
794codon = (byte_value >> 2);
795*prot_seq = translation[codon];
802 return(
Int4)(prot_seq - prot_seq_start);
808 Uint1** rev_sequence_ptr)
810 Uint1* rev_sequence;
814 Uint1conversion_table[16] = {
821 if(!rev_sequence_ptr)
826rev_sequence[0] = rev_sequence[length+1] =
NULLB;
828 for(index = 0; index < length; ++index) {
832rev_sequence[length-index] = conversion_table[sequence[index]];
835*rev_sequence_ptr = rev_sequence;
855context_number = context_number %
NUM_FRAMES;
856 switch(context_number) {
857 case0: frame = 1;
break;
858 case1: frame = 2;
break;
859 case2: frame = 3;
break;
860 case3: frame = -1;
break;
861 case4: frame = -2;
break;
862 case5: frame = -3;
break;
863 default:
abort();
break;
875 Int4index, new_index;
882 for(index=0, new_index=0; new_index < new_length-1;
885new_buffer[new_index] =
891new_buffer[new_index] =
903 for(; index < length; index++) {
905 case0: shift = 6;
break;
906 case1: shift = 4;
break;
907 case2: shift = 2;
break;
913new_buffer[new_index] |=
917*packed_seq = new_buffer;
957 for(
i= 0; ; ++
i) {
967*seq++ = tmp_seq[
offset];
993 Int2index1, index2, index3, bp1, bp2, bp3;
999 static Uint1mapping[4] = {2,
1004 if(genetic_code ==
NULL)
1008 if(translation ==
NULL)
1011 for(index1=0; index1<4; index1++)
1013 for(index2=0; index2<4; index2++)
1015 for(index3=0; index3<4; index3++)
1027codon = (mapping[bp1]<<4) + (mapping[bp2]<<2) + (mapping[bp3]);
1028translation[(index3<<4) + (index2<<2) + index1] =
1029genetic_code[codon];
1033codon = (mapping[index1]<<4) + (mapping[index2]<<2) +
1035translation[(index1<<4) + (index2<<2) + index3] =
1036genetic_code[codon];
1046 Int4nucl_length,
const Uint1* genetic_code,
1047 Uint1** translation_buffer_ptr,
Uint4** frame_offsets_ptr,
1048 Uint1** mixed_seq_ptr)
1050 Uint1* translation_buffer,* mixed_seq;
1051 Uint1* translation_table =
NULL,* translation_table_rc =
NULL;
1052 Uint1* nucl_seq_rev;
1055 Uint4* frame_offsets;
1058 Uint4buffer_length =2*(nucl_length+1)+2;
1063 if((translation_buffer =
1078frame_offsets[0] = 0;
1086nucl_length, nucl_seq, frame, translation_buffer+
offset);
1090nucl_length, nucl_seq, frame, translation_buffer+
offset);
1095nucl_length, frame, translation_buffer+
offset, genetic_code);
1105 sfree(nucl_seq_rev);
1107 free(translation_table);
1108 sfree(translation_table_rc);
1113 if(mixed_seq_ptr) {
1117*mixed_seq_ptr = mixed_seq = (
Uint1*)
malloc(2*nucl_length+3);
1120 for(
i= 0;
i<= nucl_length; ++
i) {
1123*seq++ = translation_buffer[frame_offsets[index+
context]+
offset];
1128 if(translation_buffer_ptr)
1129*translation_buffer_ptr = translation_buffer;
1131 sfree(translation_buffer);
1133 if(frame_offsets_ptr)
1134*frame_offsets_ptr = frame_offsets;
1136 sfree(frame_offsets);
1143 Uint1** translation_buffer_ptr,
Int4* protein_length,
1144 Uint1** mixed_seq_ptr)
1146 Uint1* translation_buffer;
1155 if(!mixed_seq_ptr) {
1156 if((translation_buffer =
1159 sfree(nucl_seq_rev);
1165nucl_length, frame, translation_buffer,
1168*protein_length = length;
1171 Int2frame_sign = ((frame < 0) ? -1 : 1);
1176 if((translation_buffer = (
Uint1*)
malloc(nucl_length+2)) ==
NULL)
1178 sfree(nucl_seq_rev);
1185nucl_length, (
short)(frame_sign*index),
1186translation_buffer+
offset, genetic_code);
1187frame_offsets[index-1] =
offset;
1191*mixed_seq_ptr = (
Uint1*)
malloc(nucl_length+2);
1193*protein_length = nucl_length;
1194 for(index = 0, seq = *mixed_seq_ptr; index <= nucl_length;
1196*seq = translation_buffer[frame_offsets[index%
CODON_LENGTH] +
1201 sfree(nucl_seq_rev);
1202 if(translation_buffer_ptr)
1203*translation_buffer_ptr = translation_buffer;
1205 sfree(translation_buffer);
1215 ASSERT(frame >= -3 && frame <= 3 && frame != 0);
1223 ASSERT(frame == 1 || frame == -1);
1224 returnframe == 1 ? 0 : 1;
1237 while(
b< e - 1) {
1256 for(index=0; index<target_t->
num_frames; index++)
1260 if(target_t->
range)
1269 const Uint1* gen_code_string,
1282retval->
partial= !is_ooframe;
1309 sfree(nucl_seq_rev);
1327 double* retval =
NULL;
1345retval[
i] = standard_probabilities->
prob[
i];
1354 char* retval =
NULL;
1361retval =
strdup(
string);
1366 for(p = retval; *p !=
NULLB; p++) {
1367*p =
toupper((
unsigned char)(*p));
1399 if( !progress_info ) {
1402 sfree(progress_info);
1408 if( !progress_info ) {
ESubjectMaskingType
Define the possible subject masking types.
#define COMPRESSION_RATIO
Compression ratio of nucleotide bases (4 bases in 1 byte)
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define CODON_LENGTH
Codons are always of length 3.
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
@ ePrelimSearch
Preliminary stage.
BLAST filtering functions.
BlastMaskLoc * BlastMaskLocFree(BlastMaskLoc *mask_loc)
Deallocate memory for a BlastMaskLoc structure as well as the BlastSeqLoc's pointed to.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Boolean Blast_ProgramIsValid(EBlastProgramType p)
Returns true if program is not undefined.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Uint4 QueryInfo_GetSeqBufLen(const BlastQueryInfo *qinfo)
Get the number of bytes required for the concatenated sequence buffer, given a query info structure.
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
Blast_ResFreq * Blast_ResFreqFree(Blast_ResFreq *rfp)
Deallocates Blast_ResFreq and prob0 element.
Int2 Blast_ResFreqStdComp(const BlastScoreBlk *sbp, Blast_ResFreq *rfp)
Calculates residues frequencies given a standard distribution.
Blast_ResFreq * Blast_ResFreqNew(const BlastScoreBlk *sbp)
Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.
Int4 BLAST_FrameToContext(Int2 frame, EBlastProgramType program)
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryIn...
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
static void s_BlastSequenceBlkFreeSeqRanges(BLAST_SequenceBlk *seq_blk)
Auxiliary function to free the BLAST_SequenceBlk::seq_ranges field if applicable.
Int2 BlastSeqBlkSetSeqRanges(BLAST_SequenceBlk *seq_blk, SSeqRange *seq_ranges, Uint4 num_seq_ranges, Boolean copy_seq_ranges, ESubjectMaskingType mask_type)
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure.
void SBlastProgressReset(SBlastProgress *progress_info)
Resets the progress structure to its original state (as if newly allocated) for a fresh start without...
Int4 SSeqRangeArrayLessThanOrEqual(const SSeqRange *ranges, Int4 num_ranges, Int4 target)
Returns the index of the range, such that this element is the first range that either contains the ta...
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
SBlastProgress * SBlastProgressNew(void *user_data)
Allocates and initializes a new SBlastProgress structure.
SSeqRange SSeqRangeNew(Int4 start, Int4 stop)
Create a new SSeqRange structure with both fields initialized.
size_t BLAST_GetTranslatedProteinLength(size_t nucleotide_length, unsigned int context)
Calculates the length of frame for a translated protein.
SBlastProgress * SBlastProgressFree(SBlastProgress *progress_info)
Deallocates a SBlastProgress structure.
Int2 BLAST_CreateMixedFrameDNATranslation(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info)
Initialize the mixed-frame sequence for out-of-frame gapped extension.
Int2 BlastNumber2Program(EBlastProgramType number, char **program)
Return string name for program given a number.
static Uint1 s_CodonToAA(Uint1 *codon, const Uint1 *codes)
Translate 3 nucleotides into an amino acid MUST have 'X' as unknown amino acid.
Int2 BlastSeqBlkSetCompressedSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence)
Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence wh...
int Blast_GetPartialTranslation(const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr)
Get one frame translation - needed when only parts of subject sequences are translated.
Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)
Sets up structure for target translation.
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
Int2 BLAST_PackDNA(const Uint1 *buffer, Int4 length, EBlastEncoding encoding, Uint1 **packed_seq)
Convert a sequence in ncbi4na or blastna encoding into a packed sequence in ncbi2na encoding.
SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)
Free SBlastTargetTranslation.
Int2 BlastCompressBlastnaSequence(BLAST_SequenceBlk *seq_blk)
Adds a specialized representation of sequence data to a sequence block.
Int4 BLAST_GetTranslation(const Uint1 *query_seq, const Uint1 *query_seq_rev, Int4 nt_length, Int2 frame, Uint1 *prot_seq, const Uint1 *genetic_code)
GetTranslation to get the translation of the nucl.
void BlastSequenceBlkClean(BLAST_SequenceBlk *seq_blk)
Deallocate memory only for the sequence in the sequence block.
Int2 BLAST_GetAllTranslations(const Uint1 *nucl_seq, EBlastEncoding encoding, Int4 nucl_length, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Uint4 **frame_offsets_ptr, Uint1 **mixed_seq_ptr)
Translate nucleotide into 6 frames.
unsigned int BLAST_GetNumberOfContexts(EBlastProgramType p)
Get the number of contexts for a given program.
Int2 BlastProgram2Number(const char *program, EBlastProgramType *number)
Set number for a given program type.
Int2 GetReverseNuclSequence(const Uint1 *sequence, Int4 length, Uint1 **rev_sequence_ptr)
Reverse a nucleotide sequence in the blastna encoding, adding sentinel bytes on both ends.
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
Int4 BSearchInt4(Int4 n, Int4 *A, Int4 size)
The following binary search routine assumes that array A is filled.
static Uint1 * s_BlastGetTranslationTable(const Uint1 *genetic_code, Boolean reverse_complement)
Gets the translation array for a given genetic code.
char * BLAST_StrToUpper(const char *string)
Returns a copy of the input string with all its characters turned to uppercase.
void BlastSequenceBlkCopy(BLAST_SequenceBlk **copy, BLAST_SequenceBlk *src)
Copies contents of the source sequence block without copying sequence buffers; sets all "field_alloca...
void __sfree(void **x)
Implemented in blast_util.c.
double * BLAST_GetStandardAaProbabilities()
Get the standard amino acid probabilities.
Int4 BLAST_TranslateCompressedSequence(Uint1 *translation, Int4 length, const Uint1 *nt_seq, Int2 frame, Uint1 *prot_seq)
Translate a nucleotide sequence without ambiguity codes.
Various auxiliary BLAST utility functions.
#define NCBI2NA_MASK
Bit mask for obtaining a single base from a byte in ncbi2na format.
#define FENCE_SENTRY
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.
#define IS_residue(x)
Does character encode a residue?
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
#define BLASTAA_SIZE
Size of aminoacid alphabet.
const Uint1 NCBI4NA_TO_BLASTNA[]
Translates between ncbi4na and blastna.
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
@ eBlastEncodingNcbi2na
NCBI2na.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
const struct ncbi::grid::netcache::search::fields::SIZE size
#define INT1_MAX
largest number represented by signed short (one byte)
#define MIN(a, b)
returns smaller of a and b.
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ABS(a)
returns absolute value of a (|a|)
#define NULLB
terminating byte of a char* string.
#define ASSERT
macro for assert.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Uint1 * compressed_nuc_seq_start
start of compressed_nuc_seq
Uint4 num_seq_ranges
Number of elements in seq_ranges.
Boolean sequence_allocated
TRUE if memory has been allocated for sequence.
BlastMaskLoc * lcase_mask
Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...
SSeqRange * seq_ranges
Ranges of the sequence to search.
Boolean lcase_mask_allocated
TRUE if memory has been allocated for lcase_mask.
Int4 length
Length of sequence.
ESubjectMaskingType mask_type
type of subject masking
Uint1 * sequence_nomask
Start of query sequence without masking.
Boolean seq_ranges_allocated
TRUE if memory has been allocated for seq_ranges.
Uint1 * sequence_start_nomask
Query sequence without masking.
Uint1 * sequence
Sequence used for search (could be translation).
Boolean oof_sequence_allocated
TRUE if memory has been allocated for oof_sequence.
Boolean nomask_allocated
If false the two above are just pointers to sequence and sequence_start.
Uint1 * compressed_nuc_seq
4-to-1 compressed version of sequence
Boolean sequence_start_allocated
TRUE if memory has been allocated for sequence_start.
Uint1 * oof_sequence
Mixed-frame protein representation of a nucleotide sequence for out-of-frame alignment.
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 last_context
Index of the last element of the context array.
Structure used for scoring calculations.
Boolean protein_alphabet
TRUE if alphabet_code is for a protein alphabet (e.g., ncbistdaa etc.), FALSE for nt.
Int2 alphabet_size
size of alphabet.
Uint1 alphabet_code
NCBI alphabet code.
Stores the letter frequency of a sequence or database.
double * prob
letter probs, (possible) non-zero offset.
Progress monitoring structure.
EBlastStage stage
Stage of the BLAST search currently in progress.
void * user_data
Pointer to user-provided data.
Information about target translations.
EBlastProgramType program_number
Program being run.
Int4 * range
start and stop of translated sequences.
Int4 num_frames
how many frames, one dimension of translation_buffer.
const Uint1 * gen_code_string
Genetic code string for translation.
BLAST_SequenceBlk * subject_blk
target sequence being translated.
Uint1 ** translations
two dimensional array for translations.
Boolean partial
specifies that nucleotide sequence is too long to translated.
A structure containing two integers, used e.g.
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
static CS_CONTEXT * context
static Uint4 reverse_complement(Uint4 seq, Uint1 size)
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4