(!hsp || !gap_align)
110 for(index = 0; index < hsp_list->
hspcnt; ++index) {
115(
Int4) ((hsp->
score+(0.5*scale_factor)) / scale_factor);
139 for(index=0; index<esp->
size; index++)
166 for(
i= 0;
i< hsplist->
hspcnt;
i++) {
215 BLAST_LinkHsps(program_number, hsp_list, query_info, subject_length,
224 doublescale_factor =
235hsp_list, kGapped,
FALSE, sbp, 0,
275 Int4subject_length=0;
276 Int4q_start, s_start;
283 Int4ridx, bidx, queryIdx,
count, t_counts;
284 doublet_factor, t_sum, adj_score;
288 const BooleankTranslateSubject =
290 const BooleankFullTranslation = (fence_hit && *fence_hit);
303 if(num_initial_hsps == 0) {
317 if(kSmithWaterman) {
323 if(kTranslateSubject) {
332translation_buffer = subject_blk->
sequence- 1;
333frame_offsets_a = frame_offsets =
339subject_length = subject_blk->
length;
343subject_length = subject_blk->
length;
3720, (subject_length > 0 ? subject_length :
375 for(index=0; index < num_initial_hsps; index++) {
376hsp = hsp_array[index];
382query_nomask =
query;
407 Int4start_shift = 0;
408 Int4adjusted_s_length;
409 const Uint1* adjusted_subject;
413 if(kTranslateSubject) {
417subject_length = frame_offsets[
context+1] - frame_offsets[
context] - 1;
418}
else if(kIsOutOfFrame) {
424}
else if(kFullTranslation) {
433 if(subject_length > 0) stat_length = subject_length;
440hsp, &q_start, &s_start);
463adjusted_s_length = subject_length;
466 if(!kTranslateSubject && !kSmithWaterman) {
468query_length, &start_shift);
469adjusted_subject =
subject+ start_shift;
489 if(kSmithWaterman) {
496 query, query_length,
497adjusted_subject, adjusted_s_length,
498hsp, hsp_list, score_params,
499hit_params, gap_align, start_shift, cutoff);
503}
else if(kGreedyTraceback) {
505query_length, adjusted_s_length, gap_align,
506score_params, q_start, s_start,
FALSE,
TRUE,
510adjusted_subject, gap_align, score_params, q_start, s_start,
511query_length, adjusted_s_length,
513 ASSERT(!(kFullTranslation && *fence_hit));
516fence_error = (fence_hit && *fence_hit);
533 for( ridx = 0; ridx < 32; ridx++ )
539 for( bidx = 0; bidx < gap_align->
edit_script->
num[ridx]; bidx++ ) {
550 if( matcounts[ ridx ] &&
553 count= matcounts[ ridx ];
558t_factor -= t_counts *
log( t_counts );
564gap_align->
score= adj_score;
568 if(gap_align->
score< cutoff)
581 if( hsp_array[index] )
585 if(!delete_hsp && !kGreedyTraceback) {
588 Int4align_length = 0;
602 if(status)
returnstatus;
616 if(translation_buffer) {
617 sfree(translation_buffer);
623 if(kSmithWaterman) {
629 if(frame_offsets_a) {
630 sfree(frame_offsets_a);
642 if(kGreedyTraceback) {
647 for(index=extra_start; index < hsp_list->
hspcnt; index++) {
649hsp = hsp_array[index];
658query_length,
subject, subject_length, hit_params,
662 subject, score_options, hit_options);
679 for(index = 0; index < hsp_list->
hspcnt; index++) {
680hsp = hsp_array[index];
698 if(query_info != query_info_in)
718score_params, hit_params, sbp,
753 Int4subject_length=0;
755 Int4q_start, s_start;
761 ASSERT(hsp_list && query_blk && subject_blk && gap_align && sbp &&
762score_params && hit_params && query_info && pattern_blk);
764 if(hsp_list->
hspcnt== 0) {
771query_length = query_blk->
length;
773subject_length = subject_blk->
length;
780 for(index=0; index < hsp_list->
hspcnt; index++) {
781 Int4query_pattern_length;
782hsp = hsp_array[index];
786query_pattern_length =
791score_params, q_start, s_start,
792query_length, subject_length,
793query_pattern_length,
840 Int4query_index, subject_index;
843 for(query_index = 0; query_index <
results->num_queries; ++query_index) {
844 if(!(hit_list =
results->hitlist_array[query_index]))
846 for(subject_index = 0;
847subject_index < hit_list->
hsplist_count; ++subject_index) {
854 if(hsp_list->
hspcnt== 0){
861query_info, hsp_list);
880 Int4query_index, subject_index;
883 for(query_index = 0; query_index <
results->num_queries; ++query_index) {
884 if(!(hit_list =
results->hitlist_array[query_index]))
886 for(subject_index = hitlist_size;
887subject_index < hit_list->
hsplist_count; ++subject_index) {
968num_pssm_rows = profile_header->
start_offsets[num_profiles];
969rps_pssm = (
Int4**)
malloc((num_pssm_rows+1) *
sizeof(
Int4*));
970pssm_start = profile_header->
start_offsets+ num_profiles + 1;
973*rps_freq = (
Int4**)
malloc((num_pssm_rows+1) *
sizeof(
Int4*));
977 for(index = 0; index < num_pssm_rows + 1; index++) {
978rps_pssm[index] = pssm_start;
979pssm_start += alphabet_size;
981(*rps_freq)[index] = freq_start;
982freq_start += alphabet_size;
998 for(ic=0; ic<ncol; ic++) {
999 for(ir=0; ir<psi_matrix->
pssm->
nrows; ir++) {
1011 #define RPS_K_MULT 1.2 1049 Int4default_db_genetic_code,
1075 Int4valid_kb_index = -1;
1077 if(!hsp_stream || !seq_src || !
results) {
1087 if(
NULL!= rpsblast_freqs)
1088 sfree(rpsblast_freqs);
1093sbp = gap_align->
sbp;
1097memset((
void*) &seq_arg, 0,
sizeof(seq_arg));
1110index <= query_info->last_context; index++) {
1112valid_kb_index = index;
1117 ASSERT(valid_kb_index != -1);
1140score_params->
gap_open/= scale_factor;
1142score_params->
shift_pen/= scale_factor;
1149 if(interrupt_search && (*interrupt_search)(progress_info) ==
TRUE) {
1164 if(
NULL!= rpsblast_freqs)
1165 sfree(rpsblast_freqs);
1174seq_arg.
oid= hsp_list->
oid;
1219rpsblast_pssms + db_seq_start, sbp);
1253one_query,
NULL, default_db_genetic_code,
1254hsp_list,
NULL, score_params, ext_params,
1255hit_params, psi_options,
NULL);
1260one_query, one_query_info, gap_align, sbp, score_params,
1279 if(hsp_list->
hspcnt== 0) {
1310 if(
NULL!= rpsblast_freqs)
1311 sfree(rpsblast_freqs);
1313 sfree(rpsblast_pssms);
1329 Int4oid = hsplist_array[0]->
oid;
1330 Int4num_hsps = 0,
i= 0, j = 0;
1337 for(
i= 0;
i< num_hsplists;
i++) {
1338num_hsps += hsplist_array[
i]->
hspcnt;
1344 for(
i= 0;
i< num_hsplists;
i++) {
1346 for(j = 0; j < hsp_list->
hspcnt; j++) {
1398 if( !thread_data ) {
1405hit_params->
options, query_info,
1413query_info, thread_data, db_options,
1414psi_options, rps_info, pattern_blk,
1415results_out, interrupt_search,
1428 for(
i= 0;
i<
array->num_elems;
i++) {
1429 array->tld[
i]->gap_align->gap_x_dropoff =
1430 array->tld[
i]->ext_params->gap_x_dropoff_final;
1459 if(!query_info || !hsp_stream || !results_out) {
1464seq_src = thread_data->
tld[0]->
seqsrc;
1472sbp = gap_align->
sbp;
1482default_db_genetic_code,
query, query_info,
1483gap_align, score_params, ext_params, hit_params,
1484rps_info, psi_options,
results,
1485interrupt_search, progress_info);
1495 query, query_info, sbp,
1496 NULL, seq_src, default_db_genetic_code,
1497 NULL, hsp_stream, score_params, ext_params,
1498hit_params, psi_options,
results);
1501 Uint4actual_num_threads = 0;
1511 if(getenv(
"NCBI_BLAST_DISABLE_OPENMP")) {
1512actual_num_threads = 1;
1514 if(actual_num_threads != thread_data->
num_elems) {
1518 #pragma omp parallel for default(none) num_threads(actual_num_threads) schedule(guided) if (actual_num_threads > 1) \ 1519 shared(retval, thread_data, batches, score_params, program_number, sbp, hit_params, pattern_blk, query, \ 1520 ext_params, query_info, default_db_genetic_code, has_been_interrupted, interrupt_search, progress_info, actual_num_threads) 1523 Int4hsplist_itr = 0;
1534tid = omp_get_thread_num();
1536seqsrc = thread_data->
tld[tid]->
seqsrc;
1541 if((interrupt_search && (*interrupt_search)(progress_info) ==
TRUE) &&
1542(actual_num_threads > 1)){
1544 #pragma omp critical(retval) 1547has_been_interrupted =
TRUE;
1550 #pragma omp flush(has_been_interrupted) 1551 if(has_been_interrupted) {
1557 if(perform_traceback) {
1561 if(perform_partial_fetch) {
1571seq_arg.
ranges= ranges;
1584 if(actual_num_threads > 1) {
1585 #pragma omp critical(tback_gen_code) 1616qi, sbp, hp,
NULL, elp)) != 0) {
1618 if(actual_num_threads >1) {
1619 #pragma omp critical(retval) 1621has_been_interrupted =
TRUE;
1625has_been_interrupted =
TRUE;
1633 for(hsplist_itr = 0; hsplist_itr < batch->
num_hsplists; hsplist_itr++) {
1636 if(perform_traceback) {
1639seq_arg.
seq, gap_align, sbp,
1640score_params, hit_params,
1641query_info, pattern_blk);
1645seq_arg.
seq, query_info,
1646gap_align, sbp, score_params,
1647ext_params->
options, hit_params,
1662 if(actual_num_threads > 1) {
1663 #pragma omp critical(tback_gen_code) 1679query_info, gap_align,
1701 if(hsp_list->
hspcnt== 0) {
1709 if(perform_traceback) {
1733 for(rmIdx = 0; rmIdx <
results->num_queries; rmIdx++) {
1737hspIdx <
results->hitlist_array[rmIdx]->hsplist_count;
1740 results->hitlist_array[rmIdx]->hsplist_array[hspIdx]->
1749 for(rmIdx = 0; rmIdx <
results->num_queries; rmIdx++) {
1753hspIdx <
results->hitlist_array[rmIdx]->hsplist_count;
1756 results->hitlist_array[rmIdx]->hsplist_array[hspIdx]->
1802 query, query_info, seq_src, score_options, ext_options,
1803hit_options, eff_len_options, db_options, psi_options, sbp,
1822 const intN_T = ((num_threads == 0) ? 1 : (
int)num_threads);
1825 char* ADAPTIVE_CBS_ENV = getenv(
"ADAPTIVE_CBS");
1827 if(thread_data ==
NULL) {
1832score_options, eff_len_options,
1833ext_options, hit_options, query_info,
1850thread_data, db_options, psi_options,
1851rps_info, pattern_blk,
results, interrupt_search, progress_info);
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Boolean(* TInterruptFnPtr)(SBlastProgress *progress_info)
Prototype for function pointer to determine whether the BLAST search should proceed or be interrupted...
#define CODON_LENGTH
Codons are always of length 3.
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
@ eTracebackSearch
Traceback stage.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Boolean BlastGetOffsetsForGappedAlignment(const Uint1 *query, const Uint1 *subject, const BlastScoreBlk *sbp, BlastHSP *hsp, Int4 *q_retval, Int4 *s_retval)
Function to look for the highest scoring window (of size HSP_MAX_WINDOW) in an HSP and return the mid...
Int2 BLAST_GappedAlignmentWithTraceback(EBlastProgramType program, const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Boolean *fence_hit)
Perform a gapped alignment with traceback.
Int2 BLAST_GreedyGappedAlignment(const Uint1 *query, const Uint1 *subject, Int4 query_length, Int4 subject_length, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_off, Int4 s_off, Boolean compressed_subject, Boolean do_traceback, Boolean *fence_hit)
Greedy gapped alignment, with or without traceback.
void AdjustSubjectRange(Int4 *subject_offset_ptr, Int4 *subject_length_ptr, Int4 query_offset, Int4 query_length, Int4 *start_shift)
Adjusts range of subject sequence to be passed for gapped extension, taking into account the length a...
void BlastGetStartForGappedAlignmentNucl(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp)
Function to look for the longest identity match run (up to size HSP_MAX_IDENT_RUN) in an HSP and retu...
Private interface for blast_gapalign.c.
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
void Blast_HSPListPHIGetEvalues(BlastHSPList *hsp_list, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk)
Calculate e-values for a PHI BLAST HSP list.
Int2 Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp)
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::...
Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value.
Boolean Blast_HSPTestIdentityAndLength(EBlastProgramType program_number, BlastHSP *hsp, const Uint1 *query, const Uint1 *subject, const BlastScoringOptions *score_options, const BlastHitSavingOptions *hit_options)
Calculates number of identities and alignment lengths of an HSP via Blast_HSPGetNumIdentities and det...
Int4 Blast_HSPListSubjectBestHit(EBlastProgramType program, const BlastHSPSubjectBestHitOptions *subject_besthit_opts, const BlastQueryInfo *query_info, BlastHSPList *hsp_list)
Int4 Blast_HSPListPurgeHSPsWithCommonEndpoints(EBlastProgramType program, BlastHSPList *hsp_list, Boolean purge)
Check for an overlap of two different alignments and remove redundant HSPs.
Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults *results, BlastHSPList *hsp_list, Int4 hitlist_size)
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure.
Int2 Blast_HSPListPurgeNullHSPs(BlastHSPList *hsp_list)
Cleans out the NULLed out HSP's from the HSP array that is part of the BlastHSPList.
BlastHSPList * BlastHSPListDup(const BlastHSPList *hsp_list)
Returns a duplicate (deep copy) of the given hsp list.
Boolean Blast_HSPTest(BlastHSP *hsp, const BlastHitSavingOptions *hit_options, Int4 align_length)
Determines whether this HSP should be kept or deleted.
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Int2 Blast_HitListPurgeNullHSPLists(BlastHitList *hit_list)
Purges a BlastHitList of NULL HSP lists.
Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
Int2 Blast_TrimHSPListByMaxHsps(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
void Blast_HSPListAdjustOddBlastnScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
For nucleotide BLAST, if the match reward score is equal to 2, random alignments are dominated by run...
Int2 Blast_HSPListReapByQueryCoverage(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options, const BlastQueryInfo *query_info, EBlastProgramType program_number)
Discard the HSPs below the min query coverage pct from the HSP list.
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Int2 Blast_HSPResultsApplyMasklevel(BlastHSPResults *results, const BlastQueryInfo *query_info, Int4 masklevel, Int4 query_length)
Apply Cross_match like masklevel to HSP list.
Boolean Blast_HSPListIsSortedByScore(const BlastHSPList *hsp_list)
Check if HSP list is sorted by score.
const Uint1 * Blast_HSPGetTargetTranslation(SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length)
Returns a buffer with a protein translated from nucleotide.
void Blast_HSPAdjustSubjectOffset(BlastHSP *hsp, Int4 start_shift)
Adjusts offsets if partial sequence was used for extension.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
void Blast_HSPListPHIGetBitScores(BlastHSPList *hsp_list, BlastScoreBlk *sbp)
Calculate bit scores from raw scores in an HSP list for a PHI BLAST search.
void Blast_HSPListSwap(BlastHSPList *list1, BlastHSPList *list2)
Swaps the two HSP lists via structure assignment.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the e-value threshold from the HSP list.
Int2 Blast_HSPListGetBitScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
Calculate bit scores from raw scores in an HSP list.
Boolean Blast_HSPReevaluateWithAmbiguitiesGapped(BlastHSP *hsp, const Uint1 *query_start, const Int4 query_length, const Uint1 *subject_start, const Int4 subject_length, const BlastHitSavingParameters *hit_params, const BlastScoringParameters *score_params, const BlastScoreBlk *sbp)
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
Utilities for dealing with BLAST HSPs in the core of BLAST.
void BlastHSPStreamClose(BlastHSPStream *hsp_stream)
Closes the BlastHSPStream structure for writing.
BlastHSPStreamResultBatch * Blast_HSPStreamResultBatchReset(BlastHSPStreamResultBatch *batch)
free the list of HSPLists within a batch
const int kBlastHSPStream_Eof
Return value when the end of the stream is reached (applicable to read method only)
void BlastHSPCBSStreamClose(BlastHSPStream *hsp_stream, int hitlist_size)
int BlastHSPStreamRead(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified read function for this BlastHSPStream implementation.
void BlastHSPStreamTBackClose(BlastHSPStream *hsp_stream, BlastHSPResults *results)
Closes the BlastHSPStream structure after traceback.
int BlastHSPStreamToHSPStreamResultsBatch(BlastHSPStream *hsp_stream, BlastHSPStreamResultsBatchArray **batches)
Extracts all data from the BlastHSPStream into its output parameters.
BlastHSPStreamResultsBatchArray * BlastHSPStreamResultsBatchArrayFree(BlastHSPStreamResultsBatchArray *it)
Releases memory acquired in BlastHSPStreamToHSPStreamResultsBatch.
Private interfaces to support the multi-threaded traceback in conjunction with the BlastHSPStream.
Boolean BlastIntervalTreeContainsHSP(const BlastIntervalTree *tree, const BlastHSP *hsp, const BlastQueryInfo *query_info, Int4 min_diag_separation)
Determine whether an interval tree contains an HSP that envelops an input HSP.
void Blast_IntervalTreeReset(BlastIntervalTree *tree)
Empty an interval tree structure but do not free it.
BlastIntervalTree * Blast_IntervalTreeInit(Int4 q_start, Int4 q_end, Int4 s_start, Int4 s_end)
Initialize an interval tree structure.
Int2 BlastIntervalTreeAddHSP(BlastHSP *hsp, BlastIntervalTree *tree, const BlastQueryInfo *query_info, EITreeIndexMethod index_method)
Add an HSP to an existing interval tree.
BlastIntervalTree * Blast_IntervalTreeFree(BlastIntervalTree *tree)
Deallocate an interval tree structure.
Interface for an interval tree, used for fast HSP containment tests.
@ eQueryAndSubject
Index by query and then by subject offset.
Header file for composition-based statistics.
Int2 Blast_RedoAlignmentCore_MT(EBlastProgramType program_number, Uint4 num_threads, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *query_info, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Top level routine to recompute alignments for each match found by the gapped BLAST algorithm (multi-t...
Int2 Blast_RedoAlignmentCore(EBlastProgramType program_number, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *query_info, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Top level routine to recompute alignments for each match found by the gapped BLAST algorithm (single-...
#define BLASTERR_INTERRUPTED
BLAST search was interrupted via a user-provided callback.
#define BLASTERR_MEMORY
System error: out of memory condition.
@ eGreedyTbck
Greedy extension (megaBlast)
@ eSmithWatermanTbck
Smith-waterman finds optimal scores, then ALIGN_EX to find alignment.
@ eSmithWatermanTbckFull
Smith-waterman to find all alignments.
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Boolean Blast_SubjectIsProtein(EBlastProgramType p)
Returns true if the subject is protein.
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
void ** _PSIAllocateMatrix(unsigned int ncols, unsigned int nrows, unsigned int data_type_sz)
Generic 2 dimensional matrix allocator.
void ** _PSIDeallocateMatrix(void **matrix, unsigned int ncols)
Generic 2 dimensional matrix deallocator.
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
BlastQueryInfo * BlastQueryInfoFree(BlastQueryInfo *query_info)
Deallocate memory for query information structure.
Int2 Blast_GetOneQueryStructs(BlastQueryInfo **one_query_info_ptr, BLAST_SequenceBlk **one_query_ptr, const BlastQueryInfo *query_info, BLAST_SequenceBlk *query, Int4 query_index)
Create auxiliary query structures with all data corresponding to a single query sequence within a con...
Int4 * ContextOffsetsToOffsetArray(const BlastQueryInfo *info)
Copy the context query offsets to an allocated array of Int4.
void OffsetArrayToContextOffsets(BlastQueryInfo *info, Int4 *new_offsets, EBlastProgramType prog)
Copy the context query offsets from an array of Int4, allocating the context array if needed.
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
#define FREQ_RATIO_SCALE
header for RPS blast frequency ratios ('.freq') file
#define RPS_MAGIC_NUM
RPS blast version number.
Declaration of ADT to retrieve sequences for the BLAST engine.
Int2 BlastSeqSrcSetRangesArgAddRange(BlastSeqSrcSetRangesArg *arg, Int4 begin, Int4 end, Int4 len)
add new range
Int4 BlastSeqSrcGetSeqLen(const BlastSeqSrc *seq_src, void *oid)
Retrieve sequence length (number of residues/bases)
void BlastSeqSrcSetRangesArgBuild(BlastSeqSrcSetRangesArg *arg)
build BlastSeqSrcSetRangesArg from range list
void BlastSeqSrcReleaseSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Deallocate individual sequence.
Int8 BlastSeqSrcGetTotLen(const BlastSeqSrc *seq_src)
Get the total length of all sequences in the sequence source.
BlastSeqSrcSetRangesArg * BlastSeqSrcSetRangesArgNew(Int4 num_ranges)
new setrangearg
Boolean BlastSeqSrcGetSupportsPartialFetching(const BlastSeqSrc *seq_src)
Find if the Blast Sequence Source supports partial fetching.
Int2 BlastSeqSrcGetSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Retrieve an individual sequence.
BlastSeqSrcSetRangesArg * BlastSeqSrcSetRangesArgFree(BlastSeqSrcSetRangesArg *arg)
free setrangearg
Definitions needed for implementing the BlastSeqSrc interface and low level details of the implementa...
Utilities initialize/setup BLAST.
Int2 BLAST_OneSubjectUpdateParameters(EBlastProgramType program_number, Uint4 subject_length, const BlastScoringOptions *scoring_options, BlastQueryInfo *query_info, const BlastScoreBlk *sbp, BlastHitSavingParameters *hit_params, BlastInitialWordParameters *word_params, BlastEffectiveLengthsParameters *eff_len_params)
Recalculates the parameters that depend on an individual sequence, if this is not a database search.
Int4 ** RPSRescalePssm(double scalingFactor, Int4 rps_query_length, const Uint1 *rps_query_seq, Int4 db_seq_length, Int4 **posMatrix, BlastScoreBlk *sbp)
Rescale the PSSM, using composition-based statistics, for use with RPS BLAST.
Blast_KarlinBlk * Blast_KarlinBlkNew(void)
Callocs a Blast_KarlinBlk.
Int2 Blast_KarlinBlkCopy(Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)
Copies contents of one Karlin block to another.
Smith-Waterman alignment for use within the infrastructure of BLAST.
Int2 BLAST_ComputeTraceback(EBlastProgramType program_number, BlastHSPStream *hsp_stream, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, const BlastSeqSrc *seq_src, BlastGapAlignStruct *gap_align, BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, BlastHitSavingParameters *hit_params, BlastEffectiveLengthsParameters *eff_len_params, const BlastDatabaseOptions *db_options, const PSIBlastOptions *psi_options, const BlastRPSInfo *rps_info, SPHIPatternSearchBlk *pattern_blk, BlastHSPResults **results_out, TInterruptFnPtr interrupt_search, SBlastProgress *progress_info)
Given the preliminary alignment results from a database search, redo the gapped alignment with traceb...
static void s_SThreadLocalDataArraySetGapXDropoffFinal(SThreadLocalDataArray *array)
Set the raw X-dropoff value for the final gapped extension with traceback.
void RPSPsiMatrixAttach(BlastScoreBlk *sbp, Int4 **rps_pssm, Int4 alphabet_size)
Modify a BlastScoreBlk structure so that it can be used in RPS-BLAST.
Int2 Blast_HSPUpdateWithTraceback(BlastGapAlignStruct *gap_align, BlastHSP *hsp)
Modifies the HSP data after the final gapped alignment.
Int2 BLAST_ComputeTraceback_MT(EBlastProgramType program_number, BlastHSPStream *hsp_stream, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, SThreadLocalDataArray *thread_data, const BlastDatabaseOptions *db_options, const PSIBlastOptions *psi_options, const BlastRPSInfo *rps_info, SPHIPatternSearchBlk *pattern_blk, BlastHSPResults **results_out, TInterruptFnPtr interrupt_search, SBlastProgress *progress_info)
Identical in function to BLAST_ComputeTraceback, but this performs its task in a multi-threaded manne...
static Int2 s_HSPListPostTracebackUpdate(EBlastProgramType program_number, BlastHSPList *hsp_list, const BlastQueryInfo *query_info, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, const BlastScoreBlk *sbp, Int4 subject_length)
Updates the e-values after the traceback alignment.
Int2 Blast_RunTracebackSearchWithInterrupt(EBlastProgramType program, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, const BlastSeqSrc *seq_src, const BlastScoringOptions *score_options, const BlastExtensionOptions *ext_options, const BlastHitSavingOptions *hit_options, const BlastEffectiveLengthsOptions *eff_len_options, const BlastDatabaseOptions *db_options, const PSIBlastOptions *psi_options, BlastScoreBlk *sbp, BlastHSPStream *hsp_stream, const BlastRPSInfo *rps_info, SPHIPatternSearchBlk *pattern_blk, BlastHSPResults **results, TInterruptFnPtr interrupt_search, SBlastProgress *progress_info, size_t num_threads)
Entry point from the API level to perform the traceback stage of a BLAST search, given the source of ...
static Int2 s_RPSComputeTraceback(EBlastProgramType program_number, BlastHSPStream *hsp_stream, const BlastSeqSrc *seq_src, Int4 default_db_genetic_code, BLAST_SequenceBlk *query, const BlastQueryInfo *query_info, BlastGapAlignStruct *gap_align, BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, const BlastHitSavingParameters *hit_params, const BlastRPSInfo *rps_info, const PSIBlastOptions *psi_options, BlastHSPResults *results, TInterruptFnPtr interrupt_search, SBlastProgress *progress_info)
Compute traceback information for alignments found by an RPS blast search.
static void s_BlastPruneExtraHits(BlastHSPResults *results, Int4 hitlist_size)
Delete extra subject sequences hits, if after-traceback hit list size is smaller than preliminary hit...
static void s_BlastHSPListRPSUpdate(EBlastProgramType program, BlastHSPList *hsplist)
Switches back the query and subject in all HSPs in an HSP list; also reassigns contexts to indicate q...
EBlastEncoding Blast_TracebackGetEncoding(EBlastProgramType program_number)
Get the subject sequence encoding type for the traceback, given a program number.
BlastSeqSrcSetRangesArg * BLAST_SetupPartialFetching(EBlastProgramType program_number, BlastSeqSrc *seq_src, const BlastHSPList **hsplist_array, Int4 num_hsplists)
Attempts to set up partial fetching, if it fails (e.g.
void RPSPsiMatrixDetach(BlastScoreBlk *sbp)
Remove the artificially built SPsiBlastScoreMatrix structure allocated by RPSPsiMatrixAttach.
#define RPS_K_MULT
Factor to multiply the Karlin-Altschul K parameter by for RPS BLAST, to make e-values more conservati...
static void s_RPSFillFreqRatiosInPsiMatrix(SPsiBlastScoreMatrix *psi_matrix, Int4 **freq, Int4 ncol)
static Int2 s_RPSGapAlignDataPrepare(BlastQueryInfo *concat_db_info, BlastGapAlignStruct *gap_align, const BlastRPSInfo *rps_info, Int4 ***rps_freq)
Prepares an auxiliary BlastQueryInfo structure for the concatenated database and creates a memory map...
Int2 Blast_RunTracebackSearch(EBlastProgramType program, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, const BlastSeqSrc *seq_src, const BlastScoringOptions *score_options, const BlastExtensionOptions *ext_options, const BlastHitSavingOptions *hit_options, const BlastEffectiveLengthsOptions *eff_len_options, const BlastDatabaseOptions *db_options, const PSIBlastOptions *psi_options, BlastScoreBlk *sbp, BlastHSPStream *hsp_stream, const BlastRPSInfo *rps_info, SPHIPatternSearchBlk *pattern_blk, BlastHSPResults **results, size_t num_threads)
Entry point from the API level to perform the traceback stage of a BLAST search, given the source of ...
static Int2 s_PHITracebackFromHSPList(EBlastProgramType program_number, BlastHSPList *hsp_list, BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, BlastGapAlignStruct *gap_align, BlastScoreBlk *sbp, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, const BlastQueryInfo *query_info, SPHIPatternSearchBlk *pattern_blk)
Performs traceback alignment for one HSP list in a PHI BLAST search.
static void s_BlastHSPRPSUpdate(BlastHSP *hsp)
Swaps insertions and deletions in an edit script for RPS BLAST search.
static void s_FilterBlastResults(BlastHSPResults *results, const BlastHitSavingOptions *hit_options, const BlastQueryInfo *query_info, EBlastProgramType program_number)
Delete hsps below query coverage percentage.
static void s_HSPListRescaleScores(BlastHSPList *hsp_list, double scale_factor)
Remove scaling from scores previously calculated on the hsp_list.
Int2 Blast_TracebackFromHSPList(EBlastProgramType program_number, BlastHSPList *hsp_list, const BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, const BlastQueryInfo *query_info_in, BlastGapAlignStruct *gap_align, const BlastScoreBlk *sbp, const BlastScoringParameters *score_params, const BlastExtensionOptions *ext_options, const BlastHitSavingParameters *hit_params, const Uint1 *gen_code_string, Boolean *fence_hit)
Compute gapped alignment with traceback for all HSPs from a single query/subject sequence pair.
Functions to do gapped alignment with traceback.
void SThreadLocalDataArrayTrim(SThreadLocalDataArray *array, Uint4 actual_num_threads)
Reduce the size of the array structure passed in to match the value of the actual_num_threads paramet...
Int2 SThreadLocalDataArraySetup(SThreadLocalDataArray *array, EBlastProgramType program, const BlastScoringOptions *score_options, const BlastEffectiveLengthsOptions *eff_len_options, const BlastExtensionOptions *ext_options, const BlastHitSavingOptions *hit_options, BlastQueryInfo *query_info, BlastScoreBlk *sbp, const BlastSeqSrc *seqsrc)
Set up a newly allocated SThreadLocalDataArray object so it can be used by multiple threads.
SThreadLocalDataArray * SThreadLocalDataArrayFree(SThreadLocalDataArray *array)
Deallocate the SThreadLocalDataArray structure passed in.
SThreadLocalDataArray * SThreadLocalDataArrayNew(Uint4 num_threads)
Allocate a new SThreadLocalDataArray structure.
BlastHSPResults * SThreadLocalDataArrayConsolidateResults(SThreadLocalDataArray *array)
Extracts a single, consolidated BlastHSPResults structure from its input for single threaded processi...
Private interface to support the multi-threaded traceback.
Various auxiliary BLAST utility functions.
Int4 BLAST_FrameToContext(Int2 frame, EBlastProgramType program)
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryIn...
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)
Sets up structure for target translation.
SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)
Free SBlastTargetTranslation.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
GapEditScript * GapEditScriptDelete(GapEditScript *esp)
Free edit script structure.
Defines the interface to interact with the genetic code singleton object.
Uint1 * GenCodeSingletonFind(Uint4 gen_code_id)
Returns the genetic code string for the requested genetic code id.
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
void SmithWatermanScoreWithTraceback(EBlastProgramType program_number, const Uint1 *A, Int4 a_size, const Uint1 *B, Int4 b_size, BlastHSP *template_hsp, BlastHSPList *hsp_list, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, BlastGapAlignStruct *gap_align, Int4 start_shift, Int4 cutoff)
Find all local alignments between two (unpacked) sequences, using the Smith-Waterman algorithm,...
#define BLASTAA_SIZE
Size of aminoacid alphabet.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingProtein
NCBIstdaa.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
Functions to link HSPs using sum statistics.
Int2 BLAST_LinkHsps(EBlastProgramType program_number, BlastHSPList *hsp_list, const BlastQueryInfo *query_info, Int4 subject_length, const BlastScoreBlk *sbp, const BlastLinkHSPParameters *link_hsp_params, Boolean gapped_calculation)
Link HSPs using sum statistics.
#define MIN(a, b)
returns smaller of a and b.
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
#define INT4_MIN
Smallest (most negative) number represented by signed int.
#define MAX(a, b)
returns larger of a and b.
static PCRE2_SIZE * offsets
static int pattern_info(int what, void *where, BOOL unsetok)
Function prototypes used for PHI BLAST gapped extension and gapped extension with traceback.
Int2 PHIGappedAlignmentWithTraceback(Uint1 *query, Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Int4 q_pat_length, Int4 s_pat_length, SPHIPatternSearchBlk *pattern_blk)
Perform a gapped alignment with traceback for PHI BLAST.
Structure to hold a sequence.
Int4 length
Length of sequence.
Uint1 * sequence_nomask
Start of query sequence without masking.
Uint1 * sequence
Sequence used for search (could be translation).
Uint1 * oof_sequence
Mixed-frame protein representation of a nucleotide sequence for out-of-frame alignment.
Uint1 * gen_code_string
for nucleotide subject sequences (tblast[nx]), the genetic code used to create a translated protein s...
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Options used to create the ReadDBFILE structure Include database name and various information for res...
Int4 genetic_code
Genetic code to use for translation, tblast[nx] only.
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
BlastEffectiveLengthsOptions * options
User provided values for these parameters.
Options used for gapped extension These include: a.
EBlastTbackExt eTbackExt
type of traceback extension.
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Computed values used as parameters for gapped alignments.
BlastExtensionOptions * options
The original (unparsed) options.
Structure supporting the gapped alignment.
Boolean positionBased
Is this PSI-BLAST?
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
BlastScoreBlk * sbp
Pointer to the scoring information block.
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Int4 cutoff_score
Raw cutoff score corresponding to the e-value provided by the user if no sum stats,...
BlastHSPSubjectBestHitOptions * subject_besthit_opts
Subject Culling.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Int4 allocated
The allocated size of the hsp_array.
Int4 query_index
Index of the query which this HSPList corresponds to.
The structure to contain all BLAST results, for multiple queries.
structure used to hold a collection of hits retrieved from the HSPStream
Int4 num_hsplists
number of lists of HSPs returned
BlastHSPList ** hsplist_array
array of HSP lists returned
Structure to extract the contents of the BlastHSPStream for MT traceback processing.
Uint4 num_batches
number of batches populated in the array_of_batches element
BlastHSPStreamResultBatch ** array_of_batches
Array of batches, each corresponding to a single OID with BLAST hits.
Default implementation of BlastHSPStream.
Structure holding all information about an HSP.
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
BlastSeg subject
Subject sequence info.
GapEditScript * gap_info
ALL gapped alignment is here.
Int4 score
This HSP's raw score.
The structure to contain all BLAST results for one query sequence.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
Options used when evaluating and saving hits These include: a.
Int4 max_hsps_per_subject
Queries are paired reads, for mapping.
Int4 hitlist_size
Maximal number of database sequences to return results for.
Int4 min_diag_separation
How many diagonals separate a hit from a substantial alignment before it's not blocked out.
double query_cov_hsp_perc
Min query coverage hsp percentage.
BlastHSPFilteringOptions * hsp_filt_opt
Contains options to configure the HSP filtering/writering structures If not set, the default HSP filt...
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastGappedCutoffs * cutoffs
per-context gapped cutoff information
Int4 mask_level
Only keep the highest scoring HSP when more than one HSP overlaps the same region of the query by mor...
Int4 cutoff_score_min
smallest cutoff score across all contexts
BlastLinkHSPParameters * link_hsp_params
Parameters for linking HSPs with sum statistics; linking is not done if NULL.
BlastHitSavingOptions * options
The original (unparsed) options.
Main structure describing an interval tree.
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Int4 last_context
Index of the last element of the context array.
Uint4 max_length
Length of the longest among the concatenated queries.
double * karlin_k
one Karlin value for each DB sequence
The RPS engine uses this structure to access all of the RPS blast related data (assumed to be collect...
BlastRPSProfileHeader * profile_header
for '.rps' file
BlastRPSAuxInfo aux_info
for '.aux' file
BlastRPSFreqRatiosHeader * freq_ratios_header
for '.freq' file
Structure used for scoring calculations.
Blast_KarlinBlk ** kbp_psi
K-A parameters for position-based alignments.
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
SPsiBlastScoreMatrix * psi_matrix
PSSM and associated data.
Int2 alphabet_size
size of alphabet.
Int4 number_of_contexts
Used by sfp and kbp, how large are these.
SBlastScoreMatrix * matrix
scoring matrix data
Boolean complexity_adjusted_scoring
Use cross_match-like complexity adjustment on raw scores.
Blast_KarlinBlk ** kbp_gap_psi
K-A parameters for psi alignments.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Boolean gapped_calculation
gap-free search if FALSE
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
double scale_factor
multiplier for all cutoff scores
Int4 gap_extend
Penalty for each gap residue (scaled version)
Int4 shift_pen
Penalty for shifting a frame in out-of-frame gapping (scaled version)
Int4 gap_open
Extra penalty for starting a gap (scaled version)
BlastScoringOptions * options
User-provided values for these params.
One sequence segment within an HSP.
Int4 gapped_start
Where the gapped extension started.
Int2 frame
Translation frame.
Int4 offset
Start of hsp.
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Boolean reset_ranges
This option allows the BLAST engine to communicate with the BlastSeqSrc that the offset ranges for a ...
EBlastEncoding encoding
Encoding of sequence, i.e.
Boolean check_oid_exclusion
Check whether an OID is excluded due to overlapping filtering.
BlastSeqSrcSetRangesArg * ranges
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Structure used as the argument to function SetRanges.
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Complete type definition of Blast Sequence Source ADT.
double K
K value used in statistics.
double logK
natural log of K value used in statistics
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
Int4 size
Size of above arrays.
EGapAlignOpType * op_type
Array of type of operation.
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Progress monitoring structure.
EBlastStage stage
Stage of the BLAST search currently in progress.
Scoring matrix used in BLAST.
size_t nrows
number of rows
double lambda
derived value of the matrix lambda -RMH-
double * freqs
array of assumed matrix background frequencies -RMH-
size_t ncols
number of columns
int ** data
actual scoring matrix data, stored in row-major form
Information about target translations.
Int4 index
Index of query pattern occurrence for this HSP.
Int4 length
Length of this pattern occurrence in subject.
Structure containing all auxiliary information needed in a pattern search.
In PHI BLAST, structure containing information about all pattern occurrences in query.
Scoring matrix data used in PSI-BLAST.
SBlastScoreMatrix * pssm
position-specific score matrix
double ** freq_ratios
PSSM's frequency ratios, dimensions are specified in pssm data above.
BlastGapAlignStruct * gap_align
BlastHitSavingParameters * hit_params
Hit saving parameters.
BlastScoringParameters * score_params
Scoring parameters, allocated anew in BLAST_GapAlignSetUp.
BlastSeqSrc * seqsrc
BlastSeqSrc so that each thread can set its own partial fetching.
BlastExtensionParameters * ext_params
Gapped extension parameters, allocated anew in BLAST_GapAlignSetUp.
BlastHSPResults * results
Structure to store results from this thread.
BlastEffectiveLengthsParameters * eff_len_params
Parameters for effective lengths calculations.
BlastQueryInfo * query_info
The effective search space is modified.
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4