A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/blast__kappa_8c_source.html below:

NCBI C++ ToolKit: src/algo/blast/core/blast_kappa.c Source File

60 # define STDERR_COMMA stderr, 76 #ifndef KAPPA_BLASTP_NO_SEG_SEQUENCE 77 #define KAPPA_BLASTP_NO_SEG_SEQUENCE 0 84 #ifndef KAPPA_TBLASTN_NO_SEG_SEQUENCE 85 #define KAPPA_TBLASTN_NO_SEG_SEQUENCE 0 108  for

(hsp_index = 0; hsp_index < hsp_list->

hspcnt

; hsp_index++) {

145  double

best_evalue = DBL_MAX;

153  double

query_eff =

MAX

((query_length - length_adjustment), 1);

154  double

subject_eff =

MAX

((subject_length - length_adjustment), 1.0);

155  double

dblen_eff = (double) query_context->

eff_searchsp

/ query_eff;

158  double

db_to_sequence_scale = subject_eff / dblen_eff;

161  for

(hsp_index = 0; hsp_index < hsp_list->

hspcnt

; hsp_index++) {

163  double

align_p_value;

164  double

combined_p_value;

168 #ifdef KAPPA_PRINT_DIAGNOSTICS 170  double

old_e_value = hsp->

evalue

;

172

hsp->

evalue

*= db_to_sequence_scale;

177

hsp->

evalue

/= db_to_sequence_scale;

179  if

(hsp->

evalue

< best_evalue) {

180

best_evalue = hsp->

evalue

;

183 #ifdef KAPPA_PRINT_DIAGNOSTICS 186

Blast_GiList* gi_list;

187

gi_list = BlastSeqSrcGetGis(seqSrc, (

void

*) (&subject_id));

188  if

((gi_list) && (gi_list->num_used > 0)) {

189

sequence_gi = gi_list->data[0];

193

printf(

"GI %d Lambda ratio %e comp. p-value %e; " 194  "adjust E-value of query length %d match length " 195  "%d from %e to %e\n"

,

196

sequence_gi, LambdaRatio, comp_p_value,

197

query_length, subject_length, old_e_value, hsp->

evalue

);

198

Blast_GiListFree(gi_list);

230

old_hspcnt = *hspcnt;

232  for

(iread = 1; iread < *hspcnt; iread++) {

237

hsp1 = hsp_array[iread];

238  for

(ireadBack = 0; ireadBack < iread && hsp1 !=

NULL

; ireadBack++) {

243

hsp2 = hsp_array[ireadBack];

245  if

( hsp2 ==

NULL

) {

268  for

(iread = 0; iread < *hspcnt; iread++) {

269  if

(hsp_array[iread] !=

NULL

) {

270

hsp_array[iwrite++] = hsp_array[iread];

275  for

( ; iwrite < old_hspcnt; iwrite++) {

276

hsp_array[iwrite] =

NULL

;

285  if

(edit_script !=

NULL

)

312  static const int

unknown_value = 0;

316  if

(hsp_list ==

NULL

) {

319

hsp_list->

oid

= oid;

321  for

(align = *alignments;

NULL

!= align; align = align->

next

) {

328

unknown_value, unknown_value,

331

&editScript, &new_hsp);

404  double

pvalueForThisPair,

409

*pbestEvalue = DBL_MAX;

431  if

((0 <= pvalueForThisPair) && (pvalueForThisPair <= 1)) {

434

&queryInfo->

contexts

[context_index],

435

LambdaRatio, subject_id);

440  if

(hsp_list->

hspcnt

> 0) {

445  return

status == 0 ? 0 : -1;

465  const Uint1

* gen_code_string,

480  if

( !hsp_list)

return

;

484

memset((

void

*) &seq_arg, 0,

sizeof

(seq_arg));

485

seq_arg.oid = hsp_list->

oid

;

486

seq_arg.encoding = encoding;

487

seq_arg.check_oid_exclusion =

TRUE

;

488

seq_arg.ranges = ranges;

494

subject_blk = seq_arg.seq;

503  subject

= seq_arg.seq->sequence;

509  for

(

i

= 0;

i

< hsp_list->

hspcnt

;

i

++) {

551 s_CalcLambda

(

double

probs[],

int

min_score,

int

max_score,

double

lambda0)

559

score_range = max_score - min_score + 1;

561  for

(

i

= 0;

i

< score_range;

i

++) {

562

avg += (min_score +

i

) * probs[

i

];

569

freq.

sprob

= &probs[-min_score];

595  const char

*matrixName,

596  double

**startNumerator)

601  double

*standardProb;

607  if

(stdFreqRatios ==

NULL

) {

610  for

(

i

= 0;

i

< numPositions;

i

++) {

612

returnRatios[

i

][j] = stdFreqRatios->

data

[

query

[

i

]][j];

618  if

(standardProb ==

NULL

) {

622  for

(

i

= 0;

i

< numPositions;

i

++) {

628

returnRatios[

i

][j] = startNumerator[

i

][j] / standardProb[j];

632  sfree

(standardProb);

650  const char

*matrixName)

658  if

(stdFreqRatios ==

NULL

) {

663

returnRatios[

i

][j] = stdFreqRatios->

data

[

i

][j];

676 #define SCALING_FACTOR 32 695  const char

* matrixName,

712

fillPosMatrix, posFreqs);

716  if

(posSearch ==

NULL

|| compactSearch ==

NULL

|| internal_pssm ==

NULL

) {

721

internal_pssm->

ncols

, internal_pssm->

nrows

);

724

internal_pssm->

ncols

, internal_pssm->

nrows

);

727

internal_pssm->

nrows

);

735

internal_pssm->

ncols

, internal_pssm->

nrows

);

738

internal_pssm->

ncols

, internal_pssm->

nrows

);

741

internal_pssm->

ncols

, internal_pssm->

nrows

);

743

scale_factor,

FALSE

, sbp);

775  double

localScalingFactor)

781  for

(frame_index = 0; frame_index < 6; frame_index++) {

782

tail[frame_index] =

NULL

;

783

numAligns[frame_index] = 0;

786  for

(hsp_index = 0; hsp_index < hspcnt; hsp_index++) {

787  BlastHSP

* hsp = hsp_array[hsp_index];

789

frame_index = hsp->

context

- init_context;

790  ASSERT

(frame_index < 6 && frame_index >= 0);

801  if

(new_align ==

NULL

)

803  if

(tail[frame_index] ==

NULL

) {

805  self

[frame_index] = new_align;

808

tail[frame_index]->

next

= new_align;

810

tail[frame_index] = new_align;

811

numAligns[frame_index]++;

853  Int4

* queryAlignmentExtent,

854  Int4

* matchAlignmentExtent,

857  Int4

XdropAlignScore;

859  Int4

doublingCount = 0;

868

&(

subject

->data[matchStart]) - 1,

869

queryEnd - queryStart + 1, matchEnd - matchStart + 1,

870

queryAlignmentExtent,

872

gap_align, scoringParams, queryStart - 1,

FALSE

,

FALSE

,

877  if

((XdropAlignScore < score) && (doublingCount < 3)) {

880

}

while

((XdropAlignScore < score) && (doublingCount < 3));

883

*newScore = XdropAlignScore;

910  if

(

self

!=

NULL

) {

911  if

(self->index >=0) {

913  if

(self->length > 0) {

918  free

(self->local_data);

920  self

->local_data =

NULL

;

945  Uint1

* subject_seq,

int

subject_len,

947  int

* query_ext_len,

int

* subject_ext_len,

950  int

num_identical = 0;

952  int

gaps_in_query = 0;

953  int

gaps_in_subject = 0;

956  while

(q_pos < query_len && s_pos < subject_len) {

960  while

(q_pos < query_len && s_pos < subject_len

961

&& query_seq[q_pos] == subject_seq[s_pos]) {

969  for

(

n

=1;

n

< max_shift && q_pos +

n

+ 1 < query_len

970

&& s_pos +

n

+ 1 < subject_len && !

match

;

n

++) {

973  if

(query_seq[q_pos +

n

] == subject_seq[s_pos +

n

]

974

&& query_seq[q_pos +

n

+ 1] == subject_seq[s_pos +

n

+ 1]) {

985  if

(!

match

&& query_seq[q_pos +

n

] == subject_seq[s_pos]

986

&& query_seq[q_pos +

n

+ 1] == subject_seq[s_pos + 1]) {

991

gaps_in_subject +=

n

;

996  if

(!

match

&& query_seq[q_pos] == subject_seq[s_pos +

n

]

997

&& query_seq[q_pos + 1] == subject_seq[s_pos +

n

+ 1]) {

1002

gaps_in_query +=

n

;

1014

*query_ext_len = q_pos;

1015

*subject_ext_len = s_pos;

1016

*align_len = q_pos > s_pos ? q_pos + gaps_in_query : s_pos + gaps_in_subject;

1018  return

num_identical;

1040  Uint1

* subject_seq,

int

subject_len,

1042  int

* query_ext_len,

int

* subject_ext_len,

1045  int

q_pos = query_len - 1;

1046  int

s_pos = subject_len - 1;

1047  int

num_identical = 0;

1048  int

gaps_in_query = 0;

1049  int

gaps_in_subject = 0;

1050  while

(q_pos >= 0 && s_pos >= 0) {

1055  while

(q_pos > 0 && s_pos > 0 && query_seq[q_pos] == subject_seq[s_pos]) {

1062  for

(

n

=1;

n

< max_shift && q_pos -

n

- 1 > 0 && s_pos -

n

- 1 > 0

1066  if

(query_seq[q_pos -

n

] == subject_seq[s_pos -

n

]

1067

&& query_seq[q_pos -

n

- 1] == subject_seq[s_pos -

n

- 1]) {

1075  if

(!

match

&& query_seq[q_pos -

n

] == subject_seq[s_pos]

1076

&& query_seq[q_pos -

n

- 1] == subject_seq[s_pos - 1]) {

1080

gaps_in_subject +=

n

;

1085  if

(!

match

&& query_seq[q_pos] == subject_seq[s_pos -

n

]

1086

&& query_seq[q_pos - 1] == subject_seq[s_pos -

n

- 1]) {

1090

gaps_in_query +=

n

;

1101

*query_ext_len = query_len - q_pos - 1;

1102

*subject_ext_len = subject_len - s_pos - 1;

1103

*align_len += *query_ext_len > *subject_ext_len ?

1104

*query_ext_len + gaps_in_query : *subject_ext_len + gaps_in_subject;

1106  return

num_identical;

1121  for

(k=0;k < word_size;k++) {

1144  const Uint8

* query_hashes,

1146  Uint1

* subject_seq,

1155  int

subject_from = 0;

1158  int

num_identical = 0;

1162  if

(!query_seq || !query_hashes || !subject_seq

1163

|| query_len < word_size || subject_len < word_size) {

1169  for

(s_pos = 0; s_pos < subject_len - word_size; s_pos++) {

1173  if

(s_pos == 0 ||

match

) {

1179  hash

+= subject_seq[s_pos + word_size - 1];

1184  for

(q_pos = query_from;q_pos < query_len - word_size; q_pos++) {

1185  if

(query_hashes[q_pos] ==

hash

) {

1191  if

(q_pos < query_len - word_size) {

1192  int

query_start = q_pos;

1193  int

subject_start = s_pos;

1195  int

query_left_len, query_right_len;

1196  int

subject_left_len, subject_right_len;

1197  int

align_len_left=0, align_len_right=0;

1200

num_identical += word_size;

1204

query_start - query_from,

1205

subject_seq + subject_from,

1206

subject_start - subject_from,

1208

&query_left_len, &subject_left_len,

1212

num_identical +=

s_ExtendRight

(query_seq + query_start + word_size,

1213

query_len - query_start - word_size,

1214

subject_seq + subject_start + word_size,

1215

subject_len - subject_start - word_size,

1217

&query_right_len, &subject_right_len,

1224

query_from = query_start + word_size + query_right_len;

1225

subject_from = subject_start + word_size + subject_right_len;

1227

s_pos = subject_from - 1;

1234  return

num_identical;

1260  const int

seqOffset,

1262  const int

queryOffset,

1263  const Uint8

* query_words,

1266  int

qStart = align->

queryStart

- queryOffset;

1268  int

qEnd = align->

queryEnd

- queryOffset - 1;

1270  int

sEnd = align->

matchEnd

- seqOffset - 1;

1271  const double

kMinFractionNearIdentical = 0.95;

1274  int

query_len = qEnd - qStart + 1;

1275  int

subject_len = sEnd - sStart + 1;

1276  int

align_len =

MIN

(query_len, subject_len);

1278  int

query_left_len = 0;

1279  int

subject_left_len = 0;

1280  int

query_right_len = 0;

1281  int

subject_right_len = 0;

1282  int

align_left_len = 0;

1283  int

align_right_len = 0;

1285  double

fraction_identical;

1290

seqData->

data

+ sStart, subject_len,

1292

&query_right_len, &subject_right_len,

1296  if

(query_right_len >= query_len || subject_right_len >= subject_len) {

1297

fraction_identical = (double)num_identical / (

double

)align_len;

1298  ASSERT

(fraction_identical - 1.0 < 1e-10);

1299  return

fraction_identical > kMinFractionNearIdentical;

1304

num_identical +=

s_ExtendLeft

(queryData->

data

+ qStart + query_right_len,

1305

query_len - query_right_len,

1306

seqData->

data

+ sStart + subject_right_len,

1307

subject_len - subject_right_len,

1309

&query_left_len, &subject_left_len,

1314  if

(query_left_len + query_right_len >= query_len

1315

|| subject_left_len + subject_right_len >= subject_len) {

1317

fraction_identical = (double)num_identical / (

double

)(align_len);

1318  ASSERT

(fraction_identical - 1.0 < 1e-10);

1319  return

fraction_identical > kMinFractionNearIdentical;

1325

query_words + qStart + query_right_len,

1326

query_len - query_left_len - query_right_len,

1327

seqData->

data

+ sStart + subject_right_len,

1328

subject_len - subject_left_len - subject_right_len,

1331

fraction_identical = (double)num_identical / (

double

)align_len;

1332  ASSERT

(fraction_identical - 1.0 < 1e-10);

1333  if

(fraction_identical > kMinFractionNearIdentical) {

1360  Int4

default_db_genetic_code,

1367  self

->local_data =

NULL

;

1370  if

(seq_info !=

NULL

) {

1371  self

->local_data = seq_info;

1376

memset((

void

*) &seq_info->

seq_arg

, 0,

sizeof

(seq_info->

seq_arg

));

1377

seq_info->

seq_arg

.

oid

=

self

->index = subject_index;

1403  if

(self->length == 0) {

1414 #define BLASTP_MASK_RESIDUE 21 1416 #define BLASTP_MASK_INSTRUCTIONS "S 10 1.8 2.1" 1438

&filter_options,

NULL

);

1441

seqData->

length

, 0, filter_options,

1442

&mask_seqloc,

NULL

);

1445  if

(is_seq_biased) {

1446

*is_seq_biased = (mask_seqloc !=

NULL

);

1452  if

(mask_seqloc !=

NULL

) {

1480  const Uint8

* query_words,

1482  const Boolean

shouldTestIdentical,

1484  const Boolean

isSmithWaterman,

1485  Boolean

* subject_maybe_biased)

1491  Uint1

* translation_buffer;

1493  Int4

translated_length;

1494  int

translation_frame;

1495  Uint1

* na_sequence;

1496  int

translation_start;

1498  int

num_nucleotides;

1500

local_data =

self

->local_data;

1508

translation_frame = range->

context

;

1509  if

(translation_frame > 0) {

1510

translation_start = 3 * range->

begin

;

1513  self

->length - 3 * range->

end

+ translation_frame + 1;

1516

3 * (range->

end

- range->

begin

) +

ABS

(translation_frame) - 1;

1520

(

Int2

) translation_frame,

1522

&translation_buffer,

1526

seqData->

buffer

= translation_buffer;

1527

seqData->

data

= translation_buffer + 1;

1528

seqData->

length

= translated_length;

1531  if

(compo_adjust_mode

1532

&& (!subject_maybe_biased || *subject_maybe_biased)) {

1534  if

( (!shouldTestIdentical)

1535

|| (shouldTestIdentical

1537

queryData, q_range->

begin

,

1538

query_words, align)))) {

1541

subject_maybe_biased);

1578  const Uint8

* query_words,

1580  const Boolean

shouldTestIdentical,

1582  const Boolean

isSmithWaterman,

1583  Boolean

* subject_maybe_biased)

1593  if

(self->local_data ==

NULL

)

1606

seqData->

length

=

self

->length;

1610  if

((self->index < 0) && (align->

frame

!= 0)) {

1615  for

(;

i

<

f

;

i

++) {

1621  for

(idx = 0; idx < seqData->

length

; idx++) {

1622

seqData->

data

[idx] = origData[idx];

1626  if

(compo_adjust_mode

1627

&& (!subject_maybe_biased || *subject_maybe_biased)) {

1629  if

( (!shouldTestIdentical)

1630

|| (shouldTestIdentical

1632

q_range->

begin

, query_words,

1636

subject_maybe_biased);

1642

*seqData->

data

++ =

'\0'

;

1677  const Uint8

* query_words,

1679  const Boolean

shouldTestIdentical,

1681  const Boolean

isSmithWaterman,

1682  Boolean

* subject_maybe_biased)

1692  for

(idx = 0; idx < queryData->

length

; idx++) {

1695

queryData->

data

[idx] = (origData[idx] != 24) ? origData[idx] : 3;

1700

q_range, queryData, query_words,

1701

align, shouldTestIdentical,

1702

compo_adjust_mode, isSmithWaterman,

1703

subject_maybe_biased);

1706

q_range, queryData, query_words,

1707

align, shouldTestIdentical,

1708

compo_adjust_mode, isSmithWaterman,

1709

subject_maybe_biased);

1755  int

queryStart, queryEnd, queryIndex, matchStart, matchEnd, frame;

1762

queryIndex = query_range->

context

;

1765

frame = subject_range->

context

;

1768

queryStart, queryEnd, queryIndex,

1769

matchStart, matchEnd, frame,

1772

*edit_script =

NULL

;

1818  Int4

ccat_query_length,

1821  Int4

full_subject_length,

1829  Int4

queryExtent, matchExtent;

1842

(void) ccat_query_length;

1843

(void) full_subject_length;

1848  subject

, matchStart, *pmatchEnd,

1849

gap_align, scoringParams,

1850

score, &queryExtent, &matchExtent,

1852

*pqueryEnd = queryStart + queryExtent;

1853

*pmatchEnd = matchStart + matchExtent;

1858  if

(editScript !=

NULL

) {

1860  Int4

aqueryStart = queryStart + query_range->

begin

;

1861  Int4

aqueryEnd = *pqueryEnd + query_range->

begin

;

1862  Int4

amatchStart = matchStart + subject_range->

begin

;

1863  Int4

amatchEnd = *pmatchEnd + subject_range->

begin

;

1866

aqueryStart, aqueryEnd,

1868

amatchStart, amatchEnd,

1869

subject_range->

context

, editScript);

1876  return

obj !=

NULL

? 0 : -1;

1903  int

ccat_query_length,

1906  int

full_subject_length,

1910  Int4

q_start, s_start;

1921

(void) ccat_query_length;

1922

(void) full_subject_length;

1937

subject_data->

data

, gapAlign,

1945

query_range, subject_range,

1946

matrix_adjust_rule);

1995  sfree

(*searchParams);

1996

*searchParams =

NULL

;

2029  for

(

i

= 0;

i

< numQueries;

i

++) {

2033  if

(positionBased) {

2089  if

(positionBased) {

2091

rows = query_length;

2097  for

(

i

= 0;

i

< rows;

i

++) {

2120  double

scale_factor)

2123  for

(

i

= 0;

i

< num_queries;

i

++) {

2126

kbp->

Lambda

/= scale_factor;

2172  if

(positionBased) {

2174

rows = query_length;

2179  for

(

i

= 0;

i

< rows;

i

++) {

2202  double

scale_factor,

2203  const char

* matrixName)

2209

lenName = strlen(matrixName);

2210  if

(

NULL

== (self->matrixName =

malloc

(lenName + 1))) {

2213

memcpy(self->matrixName, matrixName, lenName + 1);

2215  if

(self->positionBased) {

2225

queryBlk->

length

, sbp, scale_factor);

2226  self

->ungappedLambda = sbp->

kbp_psi

[0]->

Lambda

/ scale_factor;

2233

self->startFreqRatios,

2234

self->ungappedLambda);

2247  Uint8

* query_hashes;

2253  if

(!seq_data || !words || seq_len < word_size) {

2257

query_hashes = (

Uint8

*)

calloc

((seq_len - word_size + 1),

2259

*words = query_hashes;

2261  if

(!query_hashes) {

2267

query_hashes[0] =

s_GetHash

(&seq_data[0], word_size);

2268  for

(

i

= 1;

i

< seq_len - word_size;

i

++) {

2269

query_hashes[

i

] = query_hashes[

i

- 1];

2270

query_hashes[

i

] <<= 5;

2271

query_hashes[

i

] &=

mask

;

2272

query_hashes[

i

] += (

Uint8

)seq_data[

i

+ word_size - 1];

2288  for

(

i

= 0;

i

< num_queries;

i

++) {

2289  if

((*query_info)[

i

].words) {

2290  free

((*query_info)[

i

].words);

2295

*query_info =

NULL

;

2319  if

(compo_query_info !=

NULL

) {

2320  for

(

i

= 0;

i

< num_queries;

i

++) {

2327

query_info->

seq

.

data

= &query_data[query_info->

origin

];

2332

&query_info->

words

);

2340  return

compo_query_info;

2360  double

min_lambda = DBL_MAX;

2367  if

(gapping_params ==

NULL

)

2374  for

(

i

= 0;

i

< num_queries;

i

++) {

2376  context

->sbp->kbp_gap[

i

]->Lambda < min_lambda) {

2377

min_lambda =

context

->sbp->kbp_gap[

i

]->Lambda;

2385  return

gapping_params;

2399 #define NEAR_IDENTICAL_BITS_PER_POSITION (1.74) 2418

gapping_params =

NULL

;

2437  double

near_identical_cutoff=0;

2440

index <= queryInfo->last_context; ++index) {

2443

near_identical_cutoff =

2445

/

context

->sbp->kbp_gap[index]->Lambda;

2463  context

->scoringParams->options->matrix);

2469  if

(gapping_params ==

NULL

) {

2474

compo_adjust_mode, positionBased,

2475

query_is_translated,

2476

subject_is_translated,

2479

near_identical_cutoff);

2500

num_queries =

results

->num_queries;

2501  for

(query_index = 0; query_index < num_queries; query_index++) {

2507

hitlist =

results

->hitlist_array[query_index];

2535  while

(

copy

->state_struct !=

NULL

) {

2537  copy

->state_struct =

copy

->state_struct->next;

2547  if

(

copy

->edit_script !=

NULL

) {

2548  if

(

copy

->edit_script->op_type) {

2551  if

(

copy

->edit_script->num) {

2558  if

(

copy

->fwd_prelim_tback !=

NULL

) {

2559  if

(

copy

->fwd_prelim_tback->edit_ops) {

2560  sfree

(

copy

->fwd_prelim_tback->edit_ops);

2566  if

(

copy

->rev_prelim_tback !=

NULL

) {

2567  if

(

copy

->rev_prelim_tback->edit_ops) {

2568  sfree

(

copy

->rev_prelim_tback->edit_ops);

2574  if

(

copy

->greedy_align_mem !=

NULL

) {

2623  copy

->state_struct = c;

2651  copy

->edit_script = c;

2659  for

(

i

= 0;

i

< o->

size

; ++

i

) {

2672  copy

->fwd_prelim_tback = c;

2692  copy

->rev_prelim_tback = c;

2712  copy

->greedy_align_mem = c;

2720  orig

->dp_mem_alloc,

2768  Uint1

alphabet_code,

2769  Int4

number_of_contexts

2773  orig

->alphabet_code,

2774  orig

->number_of_contexts

2780  copy

->alphabet_start =

orig

->alphabet_start;

2791  for

(

i

= 0;

i

<

orig

->matrix->ncols; ++

i

) {

2794  orig

->matrix->data[

i

],

2795

m->

nrows

*

sizeof

(

int

)

2802  orig

->matrix->freqs,

2803

m->

ncols

*

sizeof

(

double

)

2810

&&

orig

->psi_matrix->pssm !=

NULL

) {

2819  for

(

i

= 0;

i

<

orig

->psi_matrix->pssm->ncols; ++

i

) {

2822  orig

->psi_matrix->pssm->data[

i

],

2823

m->

nrows

*

sizeof

(

int

)

2828

&&

orig

->psi_matrix->pssm->freqs !=

NULL

) {

2831  orig

->psi_matrix->pssm->freqs,

2832

m->

ncols

*

sizeof

(

double

)

2835

m->

lambda

=

orig

->psi_matrix->pssm->lambda;

2837

&&

orig

->psi_matrix->freq_ratios !=

NULL

) {

2839  for

(

i

= 0;

i

<

orig

->psi_matrix->pssm->ncols; ++

i

) {

2842  orig

->psi_matrix->freq_ratios[

i

],

2843  orig

->psi_matrix->pssm->nrows *

sizeof

(

double

)

2847  if

(

orig

->psi_matrix->kbp !=

NULL

) {

2851  copy

->matrix_only_scoring =

orig

->matrix_only_scoring;

2852  copy

->complexity_adjusted_scoring =

orig

->complexity_adjusted_scoring;

2857  copy

->read_in_matrix =

orig

->read_in_matrix;

2860  copy

->kbp_gap =

copy

->kbp_gap_psi;

2863  copy

->kbp_gap =

copy

->kbp_gap_std;

2881  int r

=

orig

->sfp[

ctx

]->score_max -

orig

->sfp[

ctx

]->score_min + 1;

2885  r

*

sizeof

(

double

)

2927  if

(

orig

->ambiguous_res !=

NULL

) {

2928

memcpy(

copy

->ambiguous_res,

orig

->ambiguous_res,

orig

->ambig_size);

2930  copy

->ambig_size =

orig

->ambig_size;

2931  copy

->ambig_occupy =

orig

->ambig_occupy;

2932  copy

->round_down =

orig

->round_down;

2948  Int4

default_db_genetic_code,

2965

default_db_genetic_code,

2988  Int4

default_db_genetic_code,

2997  int

status_code = 0;

3000  double

localScalingFactor;

3025  double

inclusion_ethresh;

3030  int

* numContexts_tld =

NULL

;

3031  int

* numQueries_tld =

NULL

;

3032  int

* compositionTestIndex_tld =

NULL

;

3058  if

(positionBased) {

3061  if

((

int

) compo_adjust_mode > 1) {

3069  if

((

int

) compo_adjust_mode > 1 &&

3074

inclusion_ethresh = (psiOptions

3077  ASSERT

(inclusion_ethresh != 0.0);

3079  int

actual_num_threads = 1;

3081

actual_num_threads = num_threads;

3087

compo_adjust_mode, positionBased);

3088  if

(savedParams ==

NULL

) {

3090  goto

function_cleanup;

3096  if

(status_code != 0) {

3097  goto

function_cleanup;

3107

localScalingFactor = 1.0;

3109  s_RescaleSearch

(sbp, scoringParams, numContexts, localScalingFactor);

3115  if

(status_code != 0) {

3116  return

(

Int2

) status_code;

3120  if

(redoneMatches ==

NULL

) {

3122  goto

function_cleanup;

3124  for

(query_index = 0; query_index < numQueries; query_index++) {

3129  if

(status_code != 0) {

3130  goto

function_cleanup;

3164  Int4

*** matrix_tld =

3181

redo_align_params_tld =

3186  int

* status_code_tld =

3226

compositionTestIndex_tld =

3243  for

(

i

= 0;

i

< actual_num_threads; ++

i

) {

3249  if

(query_info_tld[

i

] ==

NULL

) {

3251  goto

function_cleanup;

3261  if

(smithWaterman) {

3265  if

(status_code != 0) {

3266  goto

function_cleanup;

3270

numContexts_tld[

i

] = numContexts;

3271

numQueries_tld[

i

] = numQueries;

3272

compositionTestIndex_tld[

i

] = compositionTestIndex;

3274

gap_align_tld[

i

] =

3276

score_params_tld[

i

] = scoringParams;

3280

subjectBlk_tld[

i

] = subjectBlk;

3282

redoneMatches_tld[

i

] =

3284  if

(redoneMatches_tld[

i

] ==

NULL

) {

3286  goto

function_cleanup;

3288  for

(query_index = 0; query_index < numQueries; query_index++) {

3293  if

(status_code != 0) {

3294  goto

function_cleanup;

3313  if

(savedParams_tld[

i

] ==

NULL

) {

3315  goto

function_cleanup;

3318

savedParams_tld[

i

],

3325  if

(status_code != 0) {

3326  goto

function_cleanup;

3329  if

((

int

) compo_adjust_mode > 1 && !positionBased) {

3335  if

(status_code != 0) {

3336  goto

function_cleanup;

3340

gapping_params_context_tld[

i

].

gap_align

= gap_align_tld[

i

];

3341

gapping_params_context_tld[

i

].

scoringParams

= score_params_tld[

i

];

3342

gapping_params_context_tld[

i

].

sbp

= sbp_tld[

i

];

3344

gapping_params_context_tld[

i

].

prog_number

= program_number;

3346

redo_align_params_tld[

i

] =

3348

&gapping_params_context_tld[

i

],

3354  if

(redo_align_params_tld[

i

] ==

NULL

) {

3356  goto

function_cleanup;

3359  if

(positionBased) {

3365  if

(matrix_tld[

i

] ==

NULL

) {

3366  goto

function_cleanup;

3378  struct

BlastHSPListLinkedList {

3380  struct

BlastHSPListLinkedList*

next

;

3382  typedef struct

BlastHSPListLinkedList BlastHSPListLinkedList;

3386  if

(hsp_stream ==

NULL

) {

3388

*theseMatches = thisMatch;

3392

BlastHSPListLinkedList*

head

=

NULL

;

3393

BlastHSPListLinkedList* tail =

NULL

;

3400

BlastHSPListLinkedList* entry =

3401

(BlastHSPListLinkedList*)

calloc

(

3403  sizeof

(BlastHSPListLinkedList)

3405

entry->match = localMatch;

3420  for

(

i

= 0;

i

< numMatches; ++

i

) {

3421

theseMatches[

i

] =

head

->match;

3422

BlastHSPListLinkedList* here =

head

;

3429 #pragma omp parallel \ 3430  default(none) num_threads(actual_num_threads) \ 3431  if(actual_num_threads>1) \ 3432  shared(interrupt, seqsrc_tld, score_params_tld, hit_params_tld, \ 3433  gap_align_tld, results_tld, \ 3434  redoneMatches_tld, \ 3436  numMatches, theseMatches, \ 3437  numFrames, program_number, subjectBlk_tld, positionBased, \ 3438  default_db_genetic_code, localScalingFactor, queryInfo, \ 3439  sbp, smithWaterman, numQueries_tld, compositionTestIndex_tld, forbidden_tld, \ 3440  NRrecord_tld, actual_num_threads, sbp_tld, \ 3441  matrix_tld, query_info_tld, numContexts_tld, \ 3442  genetic_code_string, queryBlk, compo_adjust_mode, \ 3443  alignments_tld, incoming_align_set_tld, savedParams_tld, \ 3444  scoringParams, redo_align_params_tld, \ 3448 #pragma omp for schedule(static) 3449  for

(

b

= 0;

b

< numMatches; ++

b

) {

3450 #pragma omp flush(interrupt) 3468  void

* discarded_aligns =

NULL

;

3477  int

compositionTestIndex;

3483  double

pvalueForThisPair = (-1);

3489  if

(actual_num_threads > 1) {

3490

tid = omp_get_thread_num();

3493

seqSrc = seqsrc_tld[tid];

3494

scoringParams = score_params_tld[tid];

3495

hitParams = hit_params_tld[tid];

3496

redoneMatches = redoneMatches_tld[tid];

3497

alignments = alignments_tld[tid];

3498

incoming_align_set = incoming_align_set_tld[tid];

3499

NRrecord = NRrecord_tld[tid];

3501

redo_align_params = redo_align_params_tld[tid];

3502

matrix = matrix_tld[tid];

3503

pStatusCode = &status_code_tld[tid];

3504

query_info = query_info_tld[tid];

3505

numContexts = numContexts_tld[tid];

3506

numQueries = numQueries_tld[tid];

3507

compositionTestIndex = compositionTestIndex_tld[tid];

3508

subjectBlk = subjectBlk_tld[tid];

3509

forbidden = forbidden_tld[tid];

3517  if

(actual_num_threads > 1) {

3518 #pragma omp critical(intrpt) 3520 #pragma omp flush(interrupt) 3534  if

(actual_num_threads > 1) {

3535 #pragma omp critical(intrpt) 3537 #pragma omp flush(interrupt) 3543

context_index = query_index * numFrames;

3557

matchingSeq.

index

= -1;

3564

default_db_genetic_code,

3568  if

(*pStatusCode != 0) {

3574  goto

match_loop_cleanup;

3586  if

(*pStatusCode != 0) {

3587  goto

match_loop_cleanup;

3591  for

(frame_index = 0;

3592

frame_index < numFrames;

3593

frame_index++, context_index++) {

3594

incoming_aligns = incoming_align_set[frame_index];

3595  if

(!incoming_aligns) {

3601

kbp = sbp->

kbp_gap

[context_index];

3602  if

(smithWaterman) {

3608

numAligns[frame_index],

3620

compositionTestIndex,

3629

numAligns[frame_index],

3639

compositionTestIndex,

3644  if

(*pStatusCode != 0) {

3645  goto

match_loop_cleanup;

3648  if

(alignments[context_index] !=

NULL

) {

3649  Int2

qframe = frame_index;

3654

qframe = 2 - qframe;

3659

&alignments[context_index],

3660

matchingSeq.

index

,

3663  goto

match_loop_cleanup;

3667

incoming_align_set[frame_index] =

NULL

;

3670  if

(hsp_list->

hspcnt

> 1) {

3680

queryInfo, context_index,

3682

pvalueForThisPair, LambdaRatio,

3683

matchingSeq.

index

);

3684  if

(*pStatusCode != 0) {

3685  goto

query_loop_cleanup;

3687  if

(best_evalue <= hitParams->options->expect_value) {

3690

localScalingFactor);

3698

genetic_code_string,

3703  goto

query_loop_cleanup;

3706

&redoneMatches[query_index],

3713

&redoneMatches[query_index],

3720  if

(*pStatusCode == 0) {

3728  goto

query_loop_cleanup;

3730  if

(discarded_aligns !=

NULL

) {

3740

localMatch->

oid

= hsp_list->

oid

;

3744  if

(*pStatusCode != 0) {

3745  for

(context_index = 0;

3746

context_index < numContexts;

3749

&alignments[context_index],

3756  if

((actual_num_threads > 1) &&

3757

(*pStatusCode != 0 || !seqSrc)) {

3758 #pragma omp critical(intrpt) 3760 #pragma omp flush(interrupt) 3775  for

(

i

= 0;

i

< actual_num_threads; ++

i

) {

3776  if

(status_code_tld[

i

] != 0) {

3777

status_code = status_code_tld[

i

];

3780  for

(

i

= 0;

i

< actual_num_threads; ++

i

) {

3781  if

(seqSrc && status_code == 0) {

3784

redoneMatches_tld[

i

],

3787  if

(redoneMatches_tld[

i

] !=

NULL

) {

3789  for

(qi = 0; qi < numQueries; ++qi) {

3791  sfree

(redoneMatches_tld[

i

][qi].heapArray);

3796  if

(redoneMatches_tld[

i

] !=

NULL

) {

3798  for

(qi = 0; qi < numQueries; ++qi) {

3800  sfree

(redoneMatches_tld[

i

][qi].heapArray);

3805  sfree

(redoneMatches_tld[

i

]);

3807  if

(redoneMatches !=

NULL

) {

3809  for

(qi = 0; qi < numQueries; ++qi) {

3811  sfree

(redoneMatches[qi].heapArray);

3816  if

(hsp_stream !=

NULL

) {

3821  for

(

i

= 0;

i

< actual_num_threads; ++

i

) {

3825

hit_params_tld[

i

] =

NULL

;

3850  for

(

i

= 0;

i

< actual_num_threads; ++

i

) {

3853  for

(j = 0; j < local_results->

num_queries

; ++j) {

3864  sfree

(thread_data);

3868  if

(redoneMatches !=

NULL

) {

3869  for

(query_index = 0; query_index < numQueries; query_index++) {

3872  sfree

(redoneMatches);

3873

redoneMatches =

NULL

;

3875  if

(gapAlign !=

NULL

) {

3879

positionBased, compo_adjust_mode);

3882  for

(

i

= 0;

i

< actual_num_threads; ++

i

) {

3887  sfree

(alignments_tld[

i

]);

3888  sfree

(incoming_align_set_tld[

i

]);

3897  sfree

(alignments_tld);

3898  sfree

(compositionTestIndex_tld);

3899  sfree

(gap_align_tld);

3900  sfree

(gapping_params_context_tld);

3901  sfree

(hit_params_tld);

3902  sfree

(incoming_align_set_tld);

3904  sfree

(NRrecord_tld);

3905  sfree

(numContexts_tld);

3906  sfree

(numQueries_tld);

3907  sfree

(query_info_tld);

3908  sfree

(redo_align_params_tld);

3909  sfree

(redoneMatches_tld);

3910  sfree

(results_tld);

3911  sfree

(savedParams_tld);

3913  sfree

(score_params_tld);

3915  sfree

(status_code_tld);

3916  sfree

(subjectBlk_tld);

3917  sfree

(forbidden_tld);

3918  sfree

(theseMatches);

3920  return

(

Int2

) status_code;

#define sfree(x)

Safe free a pointer: belongs to a higher level header.

#define CODON_LENGTH

Codons are always of length 3.

BLAST filtering functions.

void Blast_MaskTheResidues(Uint1 *buffer, Int4 length, Boolean is_na, const BlastSeqLoc *mask_loc, Boolean reverse, Int4 offset)

Masks the letters in buffer.

Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)

Produces SBlastFilterOptions from a string that has been traditionally supported in blast.

Int2 BlastSetUp_Filter(EBlastProgramType program_number, Uint1 *sequence, Int4 length, Int4 offset, const SBlastFilterOptions *filter_options, BlastSeqLoc **seqloc_retval, Blast_Message **blast_message)

Runs seg filtering functions, according to the filtering options, returns BlastSeqLoc*.

BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)

Deallocate all BlastSeqLoc objects in a chain.

Int4 ALIGN_EX(const Uint1 *A, const Uint1 *B, Int4 M, Int4 N, Int4 *a_offset, Int4 *b_offset, GapPrelimEditBlock *edit_block, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Boolean reversed, Boolean reverse_sequence, Boolean *fence_hit)

Low level function to perform dynamic programming gapped extension with traceback.

GapEditScript * Blast_PrelimEditBlockToGapEditScript(GapPrelimEditBlock *rev_prelim_tback, GapPrelimEditBlock *fwd_prelim_tback)

Convert the initial list of traceback actions from a non-OOF gapped alignment into a blast edit scrip...

Structures and functions prototypes used for BLAST gapped extension.

Int2 BLAST_GappedAlignmentWithTraceback(EBlastProgramType program, const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Boolean *fence_hit)

Perform a gapped alignment with traceback.

Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)

Initializes the BlastGapAlignStruct structure.

BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)

Deallocates memory in the BlastGapAlignStruct structure.

Private interface for blast_gapalign.c.

Structures and API used for saving BLAST hits.

BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)

Deallocate memory for BLAST results.

Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)

Allocates BlastHSP and inits with information from input.

Int2 Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp)

Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::...

BlastHitList * Blast_HitListFree(BlastHitList *hitlist)

Deallocate memory for the hit list.

Int2 Blast_HSPResultsReverseOrder(BlastHSPResults *results)

Reverse order of HSP lists in each hit list in the BLAST results.

BlastHitList * Blast_HitListNew(Int4 hitlist_size)

Allocate memory for a hit list of a given size.

BlastHSPList * Blast_HSPListNew(Int4 hsp_max)

Creates HSP list structure with a default size HSP array.

BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)

Initialize the results structure.

Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)

Calculate the expected values for all HSPs in a hit list, without using the sum statistics.

BlastHSP * Blast_HSPFree(BlastHSP *hsp)

Deallocate memory for an HSP structure.

const Uint1 * Blast_HSPGetTargetTranslation(SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length)

Returns a buffer with a protein translated from nucleotide.

Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)

Saves HSP information into a BlastHSPList structure.

BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)

Deallocate memory for an HSP list structure as well as all it's components.

void Blast_HSPListSwap(BlastHSPList *list1, BlastHSPList *list2)

Swaps the two HSP lists via structure assignment.

void Blast_HSPListSortByScore(BlastHSPList *hsp_list)

Sort the HSPs in an HSP list by score.

Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)

Discard the HSPs above the e-value threshold from the HSP list.

Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)

Insert a new HSP list into the hit list.

Utilities for dealing with BLAST HSPs in the core of BLAST.

#define CONTAINED_IN_HSP(a, b, c, d, e, f)

TRUE if c is between a and b; f between d and e.

const int kBlastHSPStream_Eof

Return value when the end of the stream is reached (applicable to read method only)

int BlastHSPStreamRead(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)

Invokes the user-specified read function for this BlastHSPStream implementation.

void BlastHSPStreamTBackClose(BlastHSPStream *hsp_stream, BlastHSPResults *results)

Closes the BlastHSPStream structure after traceback.

Private interfaces to support the multi-threaded traceback in conjunction with the BlastHSPStream.

Int2 Blast_RedoAlignmentCore_MT(EBlastProgramType program_number, Uint4 num_threads, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)

Recompute alignments for each match found by the gapped BLAST algorithm.

struct BlastKappa_SavedParameters BlastKappa_SavedParameters

A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCo...

static void s_RestoreSearch(BlastScoreBlk *sbp, BlastScoringParameters *scoring, const BlastKappa_SavedParameters *searchParams, int query_length, Boolean positionBased, ECompoAdjustModes compo_adjust_mode)

Restore the parameters that were adjusted to their original values.

static int s_RecordInitialSearch(BlastKappa_SavedParameters *searchParams, BlastScoreBlk *sbp, const BlastScoringParameters *scoring, int query_length, ECompoAdjustModes compo_adjust_mode, Boolean positionBased)

Record the initial value of the search parameters that are to be adjusted.

#define KAPPA_BLASTP_NO_SEG_SEQUENCE

Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to com...

#define SCALING_FACTOR

SCALING_FACTOR is a multiplicative factor used to get more bits of precision in the integer matrix sc...

static BlastCompo_Alignment * s_RedoOneAlignment(BlastCompo_Alignment *in_align, EMatrixAdjustRule matrix_adjust_rule, BlastCompo_SequenceData *query_data, BlastCompo_SequenceRange *query_range, int ccat_query_length, BlastCompo_SequenceData *subject_data, BlastCompo_SequenceRange *subject_range, int full_subject_length, BlastCompo_GappingParams *gapping_params)

A callback: calculate the traceback for one alignment by performing an x-drop alignment in both direc...

static BlastGapAlignStruct * s_BlastGapAlignStruct_Copy(BlastGapAlignStruct *orig, BlastScoreBlk *sbp)

Create a "deep" copy of a BlastGapAlignStruct structure.

static int s_MatrixInfoInit(Blast_MatrixInfo *self, BLAST_SequenceBlk *queryBlk, BlastScoreBlk *sbp, double scale_factor, const char *matrixName)

Initialize an object of type Blast_MatrixInfo.

static int s_ScalePosMatrix(int **fillPosMatrix, const char *matrixName, double **posFreqs, Uint1 *query, int queryLength, BlastScoreBlk *sbp, double scale_factor)

Produce a scaled-up version of the position-specific matrix with a given set of position-specific res...

static void s_SWFindFinalEndsUsingXdrop(BlastCompo_SequenceData *query, Int4 queryStart, Int4 queryEnd, BlastCompo_SequenceData *subject, Int4 matchStart, Int4 matchEnd, BlastGapAlignStruct *gap_align, const BlastScoringParameters *scoringParams, Int4 score, Int4 *queryAlignmentExtent, Int4 *matchAlignmentExtent, Int4 *newScore)

Redo a S-W alignment using an x-drop alignment.

static void s_HSPListNormalizeScores(BlastHSPList *hsp_list, double lambda, double logK, double scoreDivisor)

Given a list of HSPs with (possibly) high-precision scores, rescale the scores to have standard preci...

static int s_GetPosBasedStartFreqRatios(double **returnRatios, Int4 numPositions, Uint1 *query, const char *matrixName, double **startNumerator)

Fill a two-dimensional array with the frequency ratios that underlie a position specific score matrix...

static int s_NewAlignmentUsingXdrop(BlastCompo_Alignment **pnewAlign, Int4 *pqueryEnd, Int4 *pmatchEnd, Int4 queryStart, Int4 matchStart, Int4 score, BlastCompo_SequenceData *query, BlastCompo_SequenceRange *query_range, Int4 ccat_query_length, BlastCompo_SequenceData *subject, BlastCompo_SequenceRange *subject_range, Int4 full_subject_length, BlastCompo_GappingParams *gapping_params, EMatrixAdjustRule matrix_adjust_rule)

A callback used when performing SmithWaterman alignments: Calculate the traceback for one alignment b...

static BlastCompo_QueryInfo * s_GetQueryInfo(Uint1 *query_data, const BlastQueryInfo *blast_query_info, Boolean skip)

Save information about all queries in an array of objects of type BlastCompo_QueryInfo.

static void s_BlastGapAlignStruct_Free(BlastGapAlignStruct *copy)

Free a BlastGapAlignStruct copy created by s_BlastGapAlignStruct_Copy.

#define KAPPA_TBLASTN_NO_SEG_SEQUENCE

Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to com...

static void s_ComputeNumIdentities(const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, BLAST_SequenceBlk *subject_blk, const BlastSeqSrc *seq_src, BlastHSPList *hsp_list, const BlastScoringOptions *scoring_options, const Uint1 *gen_code_string, const BlastScoreBlk *sbp, BlastSeqSrcSetRangesArg *ranges)

Compute the number of identities for the HSPs in the hsp_list.

static int s_FindNumIdentical(Uint1 *query_seq, const Uint8 *query_hashes, int query_len, Uint1 *subject_seq, int subject_len, int max_shift)

Find a local number of identical residues in two aligned sequences by finding word matches and doing ...

static void s_FillResultsFromCompoHeaps(BlastHSPResults *results, BlastCompo_Heap heaps[], Int4 hitlist_size)

Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.

static int s_ResultHspToDistinctAlign(BlastCompo_Alignment **self, int *numAligns, BlastHSP *hsp_array[], Int4 hspcnt, int init_context, const BlastQueryInfo *queryInfo, double localScalingFactor)

Convert an array of HSPs to a list of BlastCompo_Alignment objects.

static const Blast_RedoAlignCallbacks redo_align_callbacks

Callbacks used by the Blast_RedoOneMatch* routines.

static int s_SequenceGetRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *s_range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceData *query, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)

Obtain the sequence data that lies within the given range.

static int s_DoSegSequenceData(BlastCompo_SequenceData *seqData, EBlastProgramType program_name, Boolean *is_seq_biased)

Filter low complexity regions from the sequence data; uses the SEG algorithm.

static BlastCompo_GappingParams * s_GappingParamsNew(BlastKappa_GappingParamsContext *context, const BlastExtensionParameters *extendParams, int num_queries)

Create a new object of type BlastCompo_GappingParams.

static void s_SavedParametersFree(BlastKappa_SavedParameters **searchParams)

Release the data associated with a BlastKappa_SavedParameters and delete the object.

static void s_FreeEditScript(void *edit_script)

A callback used to free an EditScript that has been stored in a BlastCompo_Alignment.

static double s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)

A callback routine: compute lambda for the given score probabilities.

static int s_ExtendLeft(Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len)

Extend left from the end of the sequence and subject ranges and count identities.

static void s_HitlistReapContained(BlastHSP *hsp_array[], Int4 *hspcnt)

Remove from a hitlist all HSPs that are completely contained in an HSP that occurs earlier in the lis...

struct BlastKappa_SequenceInfo BlastKappa_SequenceInfo

BLAST-specific information that is associated with a BlastCompo_MatchingSequence.

static int s_HSPListFromDistinctAlignments(BlastHSPList *hsp_list, BlastCompo_Alignment **alignments, int oid, const BlastQueryInfo *queryInfo, int frame)

Converts a list of objects of type BlastCompo_Alignment to an new object of type BlastHSPList and ret...

static void s_FreeBlastCompo_QueryInfoArray(BlastCompo_QueryInfo **query_info, int num_queries)

#define NEAR_IDENTICAL_BITS_PER_POSITION

static void s_MatchingSequenceRelease(BlastCompo_MatchingSequence *self)

Release the resources associated with a matching sequence.

static int s_ExtendRight(Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len)

Do a simple gapped extension to the right from the beginning of query and subject ranges examining on...

static Boolean s_TestNearIdentical(const BlastCompo_SequenceData *seqData, const int seqOffset, const BlastCompo_SequenceData *queryData, const int queryOffset, const Uint8 *query_words, const BlastCompo_Alignment *align)

Test whether the aligned parts of two sequences that have a high-scoring gapless alignment are nearly...

static BlastScoreBlk * s_BlastScoreBlk_Copy(EBlastProgramType program, BlastScoreBlk *orig, Uint1 alphabet_code, Int4 number_of_contexts)

Create a "deep" copy of a BlastScoreBlk structure.

struct BlastKappa_GappingParamsContext BlastKappa_GappingParamsContext

Data and data-structures needed to perform a gapped alignment.

static Uint8 s_GetHash(const Uint1 *data, int word_size)

Get hash for a word of word_size residues assuming 28-letter alphabet.

static int s_SequenceGetProteinRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)

Get a string of protein data from a protein sequence.

static void s_BlastScoreBlk_Free(BlastScoreBlk **copy)

Free a BlastScoreBlk copy created by s_BlastScoreBlk_Copy.

static int s_MatchingSequenceInitialize(BlastCompo_MatchingSequence *self, EBlastProgramType program_number, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, Int4 subject_index, BlastSeqSrcSetRangesArg *ranges)

Initialize a new matching sequence, obtaining information about the sequence from the search.

static Blast_RedoAlignParams * s_GetAlignParams(BlastKappa_GappingParamsContext *context, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, const BlastHitSavingParameters *hitParams, const BlastExtensionParameters *extendParams)

Read the parameters required for the Blast_RedoOneMatch* functions from the corresponding parameters ...

Int2 Blast_RedoAlignmentCore(EBlastProgramType program_number, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)

Recompute alignments for each match found by the gapped BLAST algorithm.

static int s_HitlistEvaluateAndPurge(int *pbestScore, double *pbestEvalue, BlastHSPList *hsp_list, const BlastSeqSrc *seqSrc, int subject_length, EBlastProgramType program_number, const BlastQueryInfo *queryInfo, int context_index, BlastScoreBlk *sbp, const BlastHitSavingParameters *hitParams, double pvalueForThisPair, double LambdaRatio, int subject_id)

Adding evalues to a list of HSPs and remove those that do not have sufficiently good (low) evalue.

static int s_CreateWordArray(const Uint1 *seq_data, Int4 seq_len, Uint8 **words)

static void s_AdjustEvaluesForComposition(BlastHSPList *hsp_list, double comp_p_value, const BlastSeqSrc *seqSrc, Int4 subject_length, const BlastContextInfo *query_context, double LambdaRatio, int subject_id)

Adjusts the E-values in a BLAST_HitList to be composites of a composition-based P-value and a score/a...

static int s_GetStartFreqRatios(double **returnRatios, const char *matrixName)

Fill a two-dimensional array with the frequency ratios that underlie the named score matrix.

Int4 s_GetSubjectLength(Int4 total_subj_length, EBlastProgramType program_number)

static BlastCompo_Alignment * s_NewAlignmentFromGapAlign(BlastGapAlignStruct *gap_align, GapEditScript **edit_script, BlastCompo_SequenceRange *query_range, BlastCompo_SequenceRange *subject_range, EMatrixAdjustRule matrix_adjust_rule)

Reads a BlastGapAlignStruct that has been used to compute a traceback, and return a BlastCompo_Alignm...

static BlastKappa_SavedParameters * s_SavedParametersNew(Int4 rows, Int4 numQueries, ECompoAdjustModes compo_adjust_mode, Boolean positionBased)

Create a new instance of BlastKappa_SavedParameters.

static void s_RescaleSearch(BlastScoreBlk *sbp, BlastScoringParameters *sp, int num_queries, double scale_factor)

Rescale the search parameters in the search object and options object to obtain more precision.

static int s_SequenceGetTranslatedRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)

Obtain a string of translated data.

#define BLASTP_MASK_INSTRUCTIONS

Default instructions and mask residue for SEG filtering.

static void s_ClearHeap(BlastCompo_Heap *self)

Remove all matches from a BlastCompo_Heap.

Header file for composition-based statistics.

#define PSI_INCLUSION_ETHRESH

Defaults for PSI-BLAST and DELTA-BLAST options.

SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)

Frees SBlastFilterOptions and all subservient structures.

@ eSmithWatermanTbck

Smith-waterman finds optimal scores, then ALIGN_EX to find alignment.

int Kappa_impalaScaling(Kappa_posSearchItems *posSearch, Kappa_compactSearchItems *compactSearch, double scalingFactor, Boolean doBinarySearch, BlastScoreBlk *sbp)

Copied from posit2.c.

Kappa_compactSearchItems * Kappa_compactSearchItemsNew(const Uint1 *query, unsigned int queryLength, BlastScoreBlk *sbp)

Creates a new Kappa_compactSearchItems structure.

Kappa_posSearchItems * Kappa_posSearchItemsFree(Kappa_posSearchItems *posSearch)

Deallocates the Kappa_posSearchItems structure.

Kappa_compactSearchItems * Kappa_compactSearchItemsFree(Kappa_compactSearchItems *compactSearch)

Deallocates the Kappa_compactSearchItems structure.

Kappa_posSearchItems * Kappa_posSearchItemsNew(unsigned int queryLength, const char *matrix_name, int **posPrivateMatrix, double **posFreqs)

Allocates a new Kappa_posSearchItems structure.

Port of posit.h structures and impalaScaling for implementing composition based statistics for PSI-BL...

Boolean Blast_QueryIsPssm(EBlastProgramType p)

Returns true if the query is PSSM.

EBlastProgramType

Defines the engine's notion of the different applications of the BLAST algorithm.

Boolean Blast_SubjectIsTranslated(EBlastProgramType p)

Returns true if the subject is translated.

int _PSIConvertFreqRatiosToPSSM(_PSIInternalPssmData *internal_pssm, const Uint1 *query, const BlastScoreBlk *sbp, const double *std_probs)

Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores.

_PSIInternalPssmData * _PSIInternalPssmDataNew(Uint4 query_length, Uint4 alphabet_size)

Allocates a new _PSIInternalPssmData structure.

const double kPosEpsilon

minimum return value of s_computeRelativeEntropy

void _PSICopyMatrix_int(int **dest, int **src, unsigned int ncols, unsigned int nrows)

Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows...

_PSIInternalPssmData * _PSIInternalPssmDataFree(_PSIInternalPssmData *pssm_data)

Deallocates the _PSIInternalPssmData structure.

void _PSICopyMatrix_double(double **dest, double **src, unsigned int ncols, unsigned int nrows)

Copies src matrix into dest matrix, both of which must be double matrices with dimensions ncols by nr...

Private interface for Position Iterated BLAST API, contains the PSSM generation engine.

Int4 BlastSeqSrcGetSeqLen(const BlastSeqSrc *seq_src, void *oid)

Retrieve sequence length (number of residues/bases)

void BlastSeqSrcReleaseSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)

Deallocate individual sequence.

BlastSeqSrc * BlastSeqSrcCopy(const BlastSeqSrc *seq_src)

Copy function: needed to guarantee thread safety.

BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)

Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...

Int4 BlastSeqSrcGetMaxSeqLen(const BlastSeqSrc *seq_src)

Get the length of the longest sequence in the sequence source.

Boolean BlastSeqSrcGetSupportsPartialFetching(const BlastSeqSrc *seq_src)

Find if the Blast Sequence Source supports partial fetching.

Int2 BlastSeqSrcGetSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)

Retrieve an individual sequence.

BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)

Deallocates BlastScoreBlk as well as all associated structures.

double Blast_KarlinLambdaNR(Blast_ScoreFreq *sfp, double initialLambdaGuess)

Calculates the parameter Lambda given an initial guess for its value.

double BLAST_KarlinEtoP(double x)

Convert an E-value to a P-value.

Blast_KarlinBlk * Blast_KarlinBlkNew(void)

Callocs a Blast_KarlinBlk.

Blast_KarlinBlk * Blast_KarlinBlkFree(Blast_KarlinBlk *kbp)

Deallocates the KarlinBlk.

double BLAST_KarlinPtoE(double p)

Convert a P-value to an E-value.

Blast_ScoreFreq * Blast_ScoreFreqNew(Int4 score_min, Int4 score_max)

Creates a new structure to keep track of score frequencies for a scoring system.

SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew(size_t ncols)

Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.

Int2 Blast_KarlinBlkCopy(Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)

Copies contents of one Karlin block to another.

BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)

Allocates and initializes BlastScoreBlk.

Functions to do gapped alignment with traceback.

BlastSeqSrcSetRangesArg * BLAST_SetupPartialFetching(EBlastProgramType program_number, BlastSeqSrc *seq_src, const BlastHSPList **hsp_list, Int4 num_hsplists)

Attempts to set up partial fetching, if it fails (e.g.

EBlastEncoding Blast_TracebackGetEncoding(EBlastProgramType program_number)

Get the subject sequence encoding type for the traceback, given a program number.

SThreadLocalData * SThreadLocalDataFree(SThreadLocalData *tld)

Deallocate the SThreadLocalData structure passed in.

SThreadLocalDataArray * SThreadLocalDataArrayNew(Uint4 num_threads)

Allocate a new SThreadLocalDataArray structure.

BlastHSPResults * SThreadLocalDataArrayConsolidateResults(SThreadLocalDataArray *array)

Extracts a single, consolidated BlastHSPResults structure from its input for single threaded processi...

Private interface to support the multi-threaded traceback.

Various auxiliary BLAST utility functions.

BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)

Deallocate memory for a sequence block.

int Blast_GetPartialTranslation(const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr)

Get one frame translation - needed when only parts of subject sequences are translated.

Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)

Sets up structure for target translation.

SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)

Free SBlastTargetTranslation.

double * BLAST_GetStandardAaProbabilities(void)

Get the standard amino acid probabilities.

ncbi::TMaskedQueryRegions mask

Declares a "heap" data structure that is used to store computed alignments when composition adjustmen...

void * BlastCompo_HeapPop(BlastCompo_Heap *self)

Remove and return the element in the BlastCompo_Heap with largest (worst) evalue; ties are broken acc...

void BlastCompo_HeapRelease(BlastCompo_Heap *self)

Release the storage associated with the fields of a BlastCompo_Heap.

int BlastCompo_HeapInsert(BlastCompo_Heap *self, void *alignments, double eValue, int score, int subject_index, void **discardedAligns)

Try to insert a collection of alignments into a heap.

int BlastCompo_HeapInitialize(BlastCompo_Heap *self, int heapThreshold, double ecutoff)

Initialize a new BlastCompo_Heap; parameters to this function correspond directly to fields in the Bl...

int BlastCompo_HeapWouldInsert(BlastCompo_Heap *self, double eValue, int score, int subject_index)

Return true if self may insert a match that had the given eValue, score and subject_index.

Blast_CompositionWorkspace * Blast_CompositionWorkspaceNew(void)

Create a new Blast_CompositionWorkspace object, allocating memory for all its component arrays.

int Blast_CompositionWorkspaceInit(Blast_CompositionWorkspace *NRrecord, const char *matrixName)

Initialize the fields of a Blast_CompositionWorkspace for a specific underlying scoring matrix.

Blast_MatrixInfo * Blast_MatrixInfoNew(int rows, int cols, int positionBased)

Create a Blast_MatrixInfo object.

void Blast_Int4MatrixFromFreq(int **matrix, int size, double **freq, double Lambda)

Compute an integer-valued amino-acid score matrix from a set of score frequencies.

void Blast_CompositionWorkspaceFree(Blast_CompositionWorkspace **NRrecord)

Free memory associated with a record of type Blast_CompositionWorkspace.

void Blast_ReadAaComposition(Blast_AminoAcidComposition *composition, int alphsize, const Uint1 *sequence, int length)

Compute the true amino acid composition of a sequence, ignoring ambiguity characters and other nonsta...

ECompoAdjustModes

An collection of constants that specify all permissible modes of composition adjustment.

@ eCompositionBasedStats

Composition-based statistics as in NAR 29:2994-3005, 2001.

@ eNoCompositionBasedStats

Don't use composition based statistics.

@ eCompositionMatrixAdjust

Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence pro...

EMatrixAdjustRule

An collection of constants that specify all rules that may be used to generate a compositionally adju...

static int heap[2 *(256+1+29)+1]

static void cleanup(void)

static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

EGapAlignOpType

Operation types within the edit script.

GapEditScript * GapEditScriptDelete(GapEditScript *esp)

Free edit script structure.

void GapPrelimEditBlockReset(GapPrelimEditBlock *edit_block)

Reset a preliminary edit block without freeing it.

Defines the interface to interact with the genetic code singleton object.

Uint1 * GenCodeSingletonFind(Uint4 gen_code_id)

Returns the genetic code string for the requested genetic code id.

EBlastEncoding

Different types of sequence encodings for sequence retrieval from the BLAST database.

#define BLASTAA_SIZE

Size of aminoacid alphabet.

@ eBlastEncodingNcbi4na

NCBI4na.

@ eBlastEncodingProtein

NCBIstdaa.

uint8_t Uint1

1-byte (8-bit) unsigned integer

int16_t Int2

2-byte (16-bit) signed integer

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

unsigned int

A callback function used to compare two keys in a database.

where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig

for(len=0;yy_str[len];++len)

Functions to link HSPs using sum statistics.

Int2 BLAST_LinkHsps(EBlastProgramType program_number, BlastHSPList *hsp_list, const BlastQueryInfo *query_info, Int4 subject_length, const BlastScoreBlk *sbp, const BlastLinkHSPParameters *link_hsp_params, Boolean gapped_calculation)

Link HSPs using sum statistics.

SFreqRatios * _PSIMatrixFrequencyRatiosFree(SFreqRatios *freq_ratios)

Deallocate the frequency ratios structure.

SFreqRatios * _PSIMatrixFrequencyRatiosNew(const char *matrix_name)

Retrive the matrix's frequency ratios.

Definitions used to get joint probabilities for a scoring matrix.

int Blast_FrequencyDataIsAvailable(const char *matrix_name)

Retrieve the background letter probabilities implicitly used in constructing the score matrix matrix_...

int strcmp(const char *str1, const char *str2)

Prototypes for portable math library (ported from C Toolkit)

#define NCBIMATH_LN2

Natural log(2)

long BLAST_Nint(double x)

Nearest integer.

#define MIN(a, b)

returns smaller of a and b.

Uint1 Boolean

bool replacment for C

#define TRUE

bool replacment for C indicating true.

#define FALSE

bool replacment for C indicating false.

#define ABS(a)

returns absolute value of a (|a|)

#define NCBI_CONST_UINT8(v)

#define ASSERT

macro for assert.

#define MAX(a, b)

returns larger of a and b.

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)

void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)

Declarations for several linear algebra routines.

void Nlm_Int4MatrixFree(int ***mat)

Free a matrix created by Nlm_DenseMatrixNew or Nlm_LtriangMatrixNew.

int ** Nlm_Int4MatrixNew(int nrows, int ncols)

Create and return a new Int4 matrix.

static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)

static PCRE2_SIZE * offsets

Definitions used to redo a set of alignments, using either composition matrix adjustment or the Smith...

void BlastCompo_AlignmentsFree(BlastCompo_Alignment **palign, void(*free_context)(void *))

Recursively free all alignments in the singly linked list whose head is *palign.

Blast_RedoAlignParams * Blast_RedoAlignParamsNew(Blast_MatrixInfo **pmatrix_info, BlastCompo_GappingParams **pgapping_params, ECompoAdjustModes compo_adjust_mode, int positionBased, int subject_is_translated, int query_is_translated, int ccat_query_length, int cutoff_s, double cutoff_e, int do_link_hsps, const Blast_RedoAlignCallbacks *callbacks, double near_identical_cutoff)

Create new Blast_RedoAlignParams object.

#define GET_NUCL_LENGTH(l)

#define GET_TRANSLATED_LENGTH(l, f)

int BlastCompo_EarlyTermination(double evalue, BlastCompo_Heap significantMatches[], int numQueries)

Return true if a heuristic determines that it is unlikely to be worthwhile to redo a query-subject pa...

BlastCompo_Alignment * BlastCompo_AlignmentNew(int score, EMatrixAdjustRule whichRule, int queryIndex, int queryStart, int queryEnd, int matchStart, int matchEnd, int frame, void *context)

Create a new BlastCompo_Alignment; parameters to this function correspond directly to fields of Blast...

int Blast_RedoOneMatch(BlastCompo_Alignment **alignments, Blast_RedoAlignParams *params, BlastCompo_Alignment *incoming_aligns, int hspcnt, double Lambda, BlastCompo_MatchingSequence *matchingSeq, int ccat_query_length, BlastCompo_QueryInfo query_info[], int numQueries, int **matrix, int alphsize, Blast_CompositionWorkspace *NRrecord, double *pvalueThisPair, int compositionTestIndex, double *LambdaRatio)

Recompute all alignments for one query/subject pair using composition-based statistics or composition...

void Blast_RedoAlignParamsFree(Blast_RedoAlignParams **pparams)

Free a set of Blast_RedoAlignParams.

int Blast_RedoOneMatchSmithWaterman(BlastCompo_Alignment **alignments, Blast_RedoAlignParams *params, BlastCompo_Alignment *incoming_aligns, int hspcnt, double Lambda, double logK, BlastCompo_MatchingSequence *matchingSeq, BlastCompo_QueryInfo query_info[], int numQueries, int **matrix, int alphsize, Blast_CompositionWorkspace *NRrecord, Blast_ForbiddenRanges *forbidden, BlastCompo_Heap *significantMatches, double *pvalueThisPair, int compositionTestIndex, double *LambdaRatio)

Recompute all alignments for one query/subject pair using the Smith-Waterman algorithm and possibly a...

void Blast_ForbiddenRangesRelease(Blast_ForbiddenRanges *self)

Release the storage associated with the fields of self, but do not delete self.

int Blast_ForbiddenRangesInitialize(Blast_ForbiddenRanges *self, int capacity)

Initialize a new, empty Blast_ForbiddenRanges.

Structure to hold a sequence.

Uint1 * sequence_start

Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.

Int4 length

Length of sequence.

Uint1 * sequence_nomask

Start of query sequence without masking.

Uint1 * sequence

Sequence used for search (could be translation).

Uint1 * oof_sequence

Mixed-frame protein representation of a nucleotide sequence for out-of-frame alignment.

Uint1 * gen_code_string

for nucleotide subject sequences (tblast[nx]), the genetic code used to create a translated protein s...

Within the composition adjustment module, an object of type BlastCompo_Alignment represents a distinc...

int frame

the subject frame

int matchStart

the start of the alignment in the subject

int score

the score of this alignment

int matchEnd

one past the end of the alignment in the subject

int queryStart

the start of the alignment in the query

EMatrixAdjustRule matrix_adjust_rule

how the score matrix was computed

struct BlastCompo_Alignment * next

the next alignment in the list

int queryIndex

index of the query in a concatenated query

int queryEnd

one past the end of the alignment in the query

void * context

traceback info for a gapped alignment

Parameters used to compute gapped alignments.

int x_dropoff

for x-drop algorithms, once a path falls below the best score by this (positive) amount,...

int gap_open

penalty for opening a gap

void * context

a pointer to any additional gapping parameters that may be needed by the calling routine.

int gap_extend

penalty for extending a gapped alignment by one residue

A BlastCompo_Heap represents a collection of alignments between one query sequence and several matchi...

A BlastCompo_MatchingSequence represents a subject sequence to be aligned with the query.

Int4 index

index of this sequence in the database

void * local_data

holds any sort of data that is necessary for callbacks to access the sequence

Int4 length

length of this matching sequence

Collected information about a query.

int origin

origin of the query in a concatenated query

Blast_AminoAcidComposition composition

the composition of the query

BlastCompo_SequenceData seq

sequence data for the query

double eff_search_space

effective search space of searches involving this query

Uint8 * words

list words in the query, needed for testing whether the query and a subject are nearly identical

BlastCompo_SequenceData - represents a string of amino acids or nucleotides.

int length

the length of data.

Uint1 * buffer

if non-nil, points to memory that must be freed when this instance of BlastCompo_SequenceData is dele...

Uint1 * data

amino acid or nucleotide data

BlastCompo_SequenceRange - a struct whose instances represent a range of data in a sequence.

int begin

the starting index of the range

int end

one beyond the last item in the range

int context

integer identifier for this window, can indicate a translation frame or an index into a set of sequen...

The context related information.

Int4 query_length

Length of this query, strand or frame.

Boolean is_valid

Determine if this context is valid or not.

Int4 query_offset

Offset of this query, strand or frame in the concatenated super-query.

Int4 length_adjustment

Length adjustment for boundary conditions.

Int8 eff_searchsp

Effective search space for this context.

Options used for gapped extension These include: a.

EBlastTbackExt eTbackExt

type of traceback extension.

Int4 unifiedP

Indicates unified P values to be used in blastp or tblastn.

double gap_x_dropoff_final

X-dropoff value for the final gapped extension (in bits)

Int4 compositionBasedStats

mode of compositional adjustment to use; if zero then compositional adjustment is not used

Computed values used as parameters for gapped alignments.

BlastExtensionOptions * options

The original (unparsed) options.

Int4 gap_x_dropoff_final

X-dropoff value for the final gapped extension (raw)

Structure supporting the gapped alignment.

GapPrelimEditBlock * fwd_prelim_tback

traceback from right extensions

Int4 gap_x_dropoff

X-dropoff parameter to use.

GapPrelimEditBlock * rev_prelim_tback

traceback from left extensions

Int4 query_stop

query end offseet of current alignment

Int4 subject_start

subject start offset current alignment

BlastScoreBlk * sbp

Pointer to the scoring information block.

Int4 query_start

query start offset of current alignment

Int4 subject_stop

subject end offset of current alignment

Int4 score

Return value: alignment score.

GapEditScript * edit_script

The traceback (gap) information.

Auxiliary structure for dynamic programming gapped extension.

The structure to hold all HSPs for a given sequence after the gapped alignment.

Int4 oid

The ordinal id of the subject sequence this HSP list is for.

Int4 hspcnt

Number of HSPs saved.

BlastHSP ** hsp_array

Array of pointers to individual HSPs.

double best_evalue

Smallest e-value for HSPs in this list.

Int4 query_index

Index of the query which this HSPList corresponds to.

The structure to contain all BLAST results, for multiple queries.

BlastHitList ** hitlist_array

Array of results for individual query sequences.

Int4 num_queries

Number of query sequences.

Default implementation of BlastHSPStream.

Structure holding all information about an HSP.

double evalue

This HSP's e-value.

Int4 num_ident

Number of identical base pairs in this HSP.

BlastSeg query

Query sequence info.

Int4 context

Context number of query.

double bit_score

Bit score, calculated from score.

BlastSeg subject

Subject sequence info.

Int2 comp_adjustment_method

which mode of composition adjustment was used; relevant only for blastp and tblastn

Int4 score

This HSP's raw score.

The structure to contain all BLAST results for one query sequence.

double expect_value

The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.

Int4 hitlist_size

Maximal number of database sequences to return results for.

Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.

Int4 cutoff_score_min

smallest cutoff score across all contexts

Boolean do_sum_stats

TRUE if sum stats will be used.

BlastLinkHSPParameters * link_hsp_params

Parameters for linking HSPs with sum statistics; linking is not done if NULL.

BlastHitSavingOptions * options

The original (unparsed) options.

Data and data-structures needed to perform a gapped alignment.

BlastGapAlignStruct * gap_align

additional parameters for a gapped alignment

EBlastProgramType prog_number

the type of search being performed

BlastScoreBlk * sbp

the score block for this search

const BlastScoringParameters * scoringParams

scoring parameters for a gapped alignment

double localScalingFactor

the amount by which this search has been scaled

A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCo...

double scale_factor

the original scale factor

Int4 num_queries

Number of queries in this search.

Int4 gap_open

a penalty for the existence of a gap

double original_expect_value

expect value on entry

Int4 gapExtend

a penalty for each residue in the gap

Blast_KarlinBlk ** kbp_gap_orig

copy of the original gapped Karlin-Altschul block corresponding to the first context

Int4 ** origMatrix

The original matrix values.

BLAST-specific information that is associated with a BlastCompo_MatchingSequence.

const BlastSeqSrc * seq_src

BLAST sequence data source.

EBlastProgramType prog_number

identifies the type of blast search being performed.

BlastSeqSrcGetSeqArg seq_arg

argument to GetSequence method of the BlastSeqSrc (

The query related information.

Int4 first_context

Index of the first element of the context array.

BlastContextInfo * contexts

Information per context.

int num_queries

Number of query sequences.

Int4 last_context

Index of the last element of the context array.

Uint4 max_length

Length of the longest among the concatenated queries.

Structure used for scoring calculations.

Blast_KarlinBlk ** kbp_psi

K-A parameters for position-based alignments.

Blast_KarlinBlk ** kbp_gap

K-A parameters for gapped alignments.

SPsiBlastScoreMatrix * psi_matrix

PSSM and associated data.

Uint1 alphabet_code

NCBI alphabet code.

Int4 number_of_contexts

Used by sfp and kbp, how large are these.

SBlastScoreMatrix * matrix

scoring matrix data

Blast_KarlinBlk * kbp_ideal

Ideal values (for query with average database composition).

Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...

EBlastProgramType program_number

indicates blastn, blastp, etc.

char * matrix

Name of the matrix containing all scores: needed for finding neighboring words.

Boolean is_ooframe

Should out-of-frame gapping be used in a translated search?

Scoring parameters block Contains scoring-related information that is actually used for the blast sea...

double scale_factor

multiplier for all cutoff scores

Int4 gap_extend

Penalty for each gap residue (scaled version)

Int4 gap_open

Extra penalty for starting a gap (scaled version)

BlastScoringOptions * options

User-provided values for these params.

Int4 gapped_start

Where the gapped extension started.

Int2 frame

Translation frame.

Int4 offset

Start of hsp.

Used to hold a set of positions, mostly used for filtering.

Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...

Int4 oid

Oid in BLAST database, index in an array of sequences, etc [in].

EBlastEncoding encoding

Encoding of sequence, i.e.

Boolean check_oid_exclusion

Check whether an OID is excluded due to overlapping filtering.

BlastSeqSrcSetRangesArg * ranges

BLAST_SequenceBlk * seq

Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...

Structure used as the argument to function SetRanges.

Complete type definition of Blast Sequence Source ADT.

Work arrays used to perform composition-based matrix adjustment.

An instance of Blast_ForbiddenRanges is used by the Smith-Waterman algorithm to represent ranges in t...

Structure to hold the Gumbel parameters (for FSC).

Structure to hold the Karlin-Altschul parameters.

double K

K value used in statistics.

double Lambda

Lambda value used in statistics.

double logK

natural log of K value used in statistics

Information about a amino-acid substitution matrix.

Callbacks used by Blast_RedoOneMatch and Blast_RedoOneMatchSmithWaterman routines.

A parameter block for the Blast_RedoOneMatch and Blast_RedoOneMatchSmithWaterman routines.

Holds score frequencies used in calculation of Karlin-Altschul parameters for an ungapped search.

double * sprob0

arrays for frequency of given score

double score_avg

average score, must be negative for local alignment.

Int4 score_max

highest allowed scores

Int4 obs_min

lowest observed (actual) scores

double * sprob

arrays for frequency of given score, shifted down by score_min.

Int4 score_min

lowest allowed scores

Int4 obs_max

highest observed (actual) scores

Edit script: linked list of correspondencies between two sequences.

Int4 * num

Array of number of operations.

Int4 size

Size of above arrays.

EGapAlignOpType * op_type

Array of type of operation.

Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.

Int4 num_ops_allocated

size of allocated array

GapPrelimEditScript * edit_ops

array of edit operations

A version of GapEditScript used to store initial results from the gapped alignment routines.

Int4 num

Number of operations.

EGapAlignOpType op_type

Type of operation.

Structure to keep memory for state structure.

struct GapStateArrayStruct * next

Next link in the list.

Int4 length

length of the state_array.

Uint1 * state_array

array to be used.

Structure used to pass data into the scaling routines.

double * standardProb

Array of standard residue probabilities, as those returned by BLAST_GetStandardAaProbabilities.

Structure used to pass data into the scaling routines.

double ** posFreqs

PSSM's frequency ratios [alias].

int ** posPrivateMatrix

Scaled PSSM [alias].

Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...

double inclusion_ethresh

Minimum evalue for inclusion in PSSM calculation.

Scoring matrix used in BLAST.

size_t nrows

number of rows

double lambda

derived value of the matrix lambda -RMH-

double * freqs

array of assumed matrix background frequencies -RMH-

size_t ncols

number of columns

int ** data

actual scoring matrix data, stored in row-major form

Information about target translations.

Stores the frequency ratios along with their bit scale factor.

double ** data

The actual frequency ratios.

All auxiliary memory needed for the greedy extension algorithm.

Scoring matrix data used in PSI-BLAST.

SBlastScoreMatrix * pssm

position-specific score matrix

double ** freq_ratios

PSSM's frequency ratios, dimensions are specified in pssm data above.

Blast_KarlinBlk * kbp

Karlin-Altschul block associated with this PSSM.

Data structure to support MT traceback: this encapsulates the data that each thread modifies.

BlastHitSavingParameters * hit_params

Hit saving parameters.

BlastHSPResults * results

Structure to store results from this thread.

Internal representation of a PSSM in various stages of its creation and its dimensions.

int ** scaled_pssm

scaled PSSM (scores)

Uint4 nrows

number of rows (alphabet_size)

int ** pssm

PSSM (scores)

Uint4 ncols

number of columns (query_length)

double ** freq_ratios

frequency ratios

Headers for computing a "composition" p-value of a match, and for computing a unified p-value combini...

double Blast_Overall_P_Value(double p_comp, double p_alignment)

This function implements the method of Fisher, R.

static CS_CONTEXT * context

voidp calloc(uInt items, uInt size)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4