RetroSearch Browse

;

72

retval = (

void

**)

malloc

(

sizeof

(

void

*) * ncols);

77 for

(

= 0;

< ncols;

++) {

78

retval[

] = (

void

calloc

(nrows, data_type_sz);

95 for

(

= 0;

< ncols;

++) {

106 #define DEFINE_COPY_MATRIX_FUNCTION(type) \ 107 void _PSICopyMatrix_##type(type** dest, type** src, \ 108 unsigned int ncols, unsigned int nrows) \ 110 unsigned int i = 0; \ 111 unsigned int j = 0; \ 116 for (i = 0; i < ncols; i++) { \ 117 for (j = 0; j < nrows; j++) { \ 118 dest[i][j] = src[i][j]; \ 135 if

( !msa || !msa->dimensions || !msa->data ) {

149

(

void

*) msa->dimensions,

154

msa->dimensions->query_length,

156 if

( !retval->

) {

160 for

(s = 0; s < msa->dimensions->num_seqs + 1; s++) {

161 for

(p = 0; p < msa->dimensions->query_length; p++) {

163

retval->

[s][p].

= msa->data[s][p].letter;

175 for

(s = 0; s < msa->dimensions->num_seqs + 1; s++) {

211 unsigned int

retval = 0;

229 #ifdef DEBUG_PSSM_ENGINE 236

PrintMsa(

const char

* filename,

const PSIMsa

* msa)

241 fp

= fopen(filename,

"w"

);

242

PrintMsaFP(

, msa);

247

PrintMsaFP(FILE*

const PSIMsa

* msa)

254

fprintf(

"%3d\tGI=%10d\tEvalue=%2.0le\tBitScore=%4.1f\t"

255

msa->seqinfo[

].gi,

256

msa->seqinfo[

].evalue,

257

msa->seqinfo[

].bit_score);

265

fprintf(

"\n"

);

278

fprintf(

"%3d\t"

);

280

fprintf(

"NOT USED\n"

);

291

fprintf(

"\n"

);

296

__printPackedMsa(

const char

* filename,

const _PSIPackedMsa

* msa)

301 fp

= fopen(filename,

"w"

);

302

__printPackedMsaFP(

, msa);

335 if

( !retval->

) {

360 if

( !retval->

) {

434

retval->

= query_length;

435

retval->

= alphabet_size;

440 if

( !retval->

) {

474 if

(pssm_data->

) {

475

pssm_data->

= (

int

**)

512 if

( !retval->

) {

524 for

(

= 0;

< query_length;

++) {

534 if

( !aligned_blocks ) {

538 if

(aligned_blocks->

) {

546 sfree

(aligned_blocks);

550 #define EFFECTIVE_ALPHABET 20 578 if

( !retval->

) {

629 if

( !seq_weights ) {

641 if

(seq_weights->

) {

691

msa->

[p] == kGapResidue) {

717 if

(msa->

[s][p].

== kGapResidue) {

732 if

(msa->

[s][p].

== kGapResidue) {

769

found_aligned_sequence =

;

770 if

(msa->

[s][p].

!= kGapResidue) {

771

found_non_gap_residue =

;

776 if

( !found_aligned_sequence ) {

779 if

( !found_non_gap_residue ) {

841 if

( !ignore_unaligned_positions ) {

875 if

(cd_msa->

[p] == kGapResidue) {

887 if

(!cd_msa->

msa

[s][p].

) {

895 for

(k = 0; k < alphabet_size; k++) {

943 double

max_percent_identity);

993 Uint4

kQueryLength = 0;

994 Uint4

kNumberOfSeqs = 0;

1007 for

(p = 0; p < kQueryLength; p++) {

1012 for

(s = 0; s < kNumberOfSeqs; s++) {

1015 for

(p = 0; p < kQueryLength; p++, pos++) {

1052

fprintf(stderr,

"Position: %d - State: %s\n"

, position,

1054

fprintf(stderr,

"\tstart: %d\n"

, traits->

start

);

1056

fprintf(stderr,

"\tn_x_residues: %d\n"

, traits->

n_x_residues

);

1057

fprintf(stderr,

"\tn_identical: %d\n"

, traits->

n_identical

);

1071

traits->

start

= position;

1079 double

max_percent_identity)

1089 const double

percent_identity =

1091 if

(percent_identity >= max_percent_identity) {

1092 const unsigned int

align_stop =

1096

traits->

start

, align_stop);

1189 double

max_percent_identity)

1202 if

( seq_index1 == seq_index2 ||

1209

seq1 = msa->

[seq_index1];

1210

seq2 = msa->

[seq_index2];

1214 for

(p = 0; p < kQueryLength; p++, seq1++, seq2++) {

1229 if

(!kPos1Aligned && !kPos2Aligned) {

1231

max_percent_identity);

1237

seq1->

!= kXResidue && seq2->

!= kXResidue;

1246 if

(neither_is_X && (kPos2Aligned && seq1->

is_aligned

) &&

1253

max_percent_identity);

1302 if

( !msa || !aligned_blocks ) {

1327 ASSERT

(seq_index < msa->dimensions->num_seqs + 1);

1329

sequence_position = msa->

[seq_index];

1332

sequence_position[

!= kGapResidue) {

1339 if

( !sequence_position[curr].

is_aligned

) {

1361 ASSERT

(seq_index < msa->dimensions->num_seqs + 1);

1363

sequence_position = msa->

[seq_index];

1367

sequence_position[

last

!= kGapResidue) {

1371 for

(curr =

last

- 1; curr >= 0; curr--,

last

--) {

1373 if

( !sequence_position[curr].

is_aligned

) {

1391 #ifdef PSI_IGNORE_GAPS_IN_COLUMNS 1399 ASSERT

(seq_index < msa->dimensions->num_seqs + 1);

1401

sequence_position = msa->

[seq_index];

1404 #ifdef PSI_IGNORE_GAPS_IN_COLUMNS 1406

sequence_position[

!= kGapResidue) {

1425 Uint4

kQueryLength = 0;

1432 for

(

= 0;

< kQueryLength;

++) {

1444 for

(

= 0;

< kQueryLength;

++) {

1446 if

(msa->

[

] == kXResidue) {

1449 for

(idx = 0; idx <

; idx++) {

1451

msa->

[idx] != kXResidue) {

1452

aligned_blocks->

[idx]--;

1457

msa->

[idx] != kXResidue) {

1458

aligned_blocks->

[idx]--;

1529 Boolean

nsg_compatibility_mode);

1544 Boolean

nsg_compatibility_mode);

1554 Boolean

nsg_compatibility_mode,

1563 Uint4

kQueryLength = 0;

1566 const Uint4

kExpectedNumMatchingSeqs = nsg_compatibility_mode ? 0 : 1;

1567 Uint4

last_calc_pos = 0;

1569 if

( !msa || !aligned_blocks || !seq_weights ) {

1575 if

( !aligned_seqs || !prev_pos_aligned_seqs ) {

1580 for

(pos = 0; pos < kQueryLength; pos++) {

1583 if

(aligned_blocks->

[pos] == 0 ||

1591 if

(aligned_seqs->

<= kExpectedNumMatchingSeqs) {

1594

last_calc_pos = pos;

1596 if

(last_calc_pos != pos - 1 ||

1601

memset((

void

*)seq_weights->

row_sigma

, 0,

1605

aligned_seqs, seq_weights);

1608

seq_weights->

[pos] = seq_weights->

[pos-1];

1627

nsg_compatibility_mode);

1632 #ifndef PSI_IGNORE_GAPS_IN_COLUMNS 1635

nsg_compatibility_mode);

1646 sfree

(sum_weights);

1656 Uint4

kQueryLength = 0;

1659 double

* sum_weights =

;

1663 if

( !cd_msa || !seq_weights || !sbp || !options) {

1674 if

( !sum_weights) {

1683 for

(pos = 0; pos < kQueryLength; pos++) {

1684 double

total_observations = 0.0;

1689

memset(sum_weights, 0, sbp->

sizeof

(

double

));

1703

total_observations +=

1709 for

(residue = 0; residue < sbp->

; residue++) {

1710

sum_weights[residue] +=

1718 if

(total_observations > 0.0 && query_residue != kXResidue

1719

&& sum_weights[query_residue] == 0.0) {

1721

sum_weights[query_residue] = 1.0;

1722

total_observations += 1.0;

1726 if

(total_observations > 0.0) {

1728 for

(residue = 0; residue < sbp->

; residue++) {

1730

sum_weights[residue] / total_observations;

1741

total_observations);

1783 ASSERT

(position < msa->dimensions->query_length);

1794 Uint4

num_distinct_residues_for_column = 0;

1795 Uint4

num_local_std_letters = 0;

1798 ASSERT

(i < msa->dimensions->query_length);

1802 for

(asi = 0; asi < aligned_seqs->

; asi++) {

1803 const Uint4

kSeqIdx = aligned_seqs->

[asi];

1806 if

(residue_counts_for_column[kResidue] == 0) {

1807

num_distinct_residues_for_column++;

1808 if

(kResidue != kGapResidue && kResidue != kXResidue)

1809

num_local_std_letters++;

1811

residue_counts_for_column[kResidue]++;

1814

sigma += num_distinct_residues_for_column;

1817 if

(num_distinct_residues_for_column > 1) {

1820

distinct_residues_found =

;

1825 for

(asi = 0; asi < aligned_seqs->

; asi++) {

1826 const Uint4

seq_idx = aligned_seqs->

[asi];

1834

(residue_counts_for_column[residue] *

1835

num_distinct_residues_for_column) );

1840

seq_weights->

[position] = sigma;

1842 if

(distinct_residues_found) {

1843 double

weight_sum = 0.0;

1845 for

(asi = 0; asi < aligned_seqs->

; asi++) {

1846 const Uint4

seq_idx = aligned_seqs->

[asi];

1855 for

(asi = 0; asi < aligned_seqs->

; asi++) {

1856 const Uint4

seq_idx = aligned_seqs->

[asi];

1864 for

(asi = 0; asi < aligned_seqs->

; asi++) {

1865 const Uint4

seq_idx = aligned_seqs->

[asi];

1867

(1.0/(double) aligned_seqs->

);

1888 for

(asi = 0; asi < aligned_seqs->

; asi++) {

1889 const Uint4

seq_idx = aligned_seqs->

[asi];

1896 if

(residue != kGapResidue) {

1908 #ifdef PSI_IGNORE_GAPS_IN_COLUMNS 1914 ASSERT

(position < msa->dimensions->query_length);

1920 #ifdef PSI_IGNORE_GAPS_IN_COLUMNS 1922

msa->

[

][position].

!= kGapResidue) {

1934 Boolean

nsg_compatibility_mode)

1940 const Uint4

kExpectedNumMatchingSeqs = nsg_compatibility_mode ? 0 : 1;

1972 #define SEQUENCE_WEIGHTS_CHECK__ABORT_ON_FAILURE 0 1979 Boolean

nsg_compatibility_mode)

1983 const Uint4

kExpectedNumMatchingSeqs = nsg_compatibility_mode ? 0 : 1;

1985 #if SEQUENCE_WEIGHTS_CHECK__ABORT_ON_FAILURE 1994 double

running_total = 0.0;

2005 for

(residue = 0; residue < msa->

; residue++) {

2006

running_total += seq_weights->

match_weights

[pos][residue];

2009 if

(running_total < 0.99 || running_total > 1.01) {

2012 #if SEQUENCE_WEIGHTS_CHECK__ABORT_ON_FAILURE 2013

check_performed =

;

2017 #if SEQUENCE_WEIGHTS_CHECK__ABORT_ON_FAILURE 2020 if

( !check_performed &&

2021

!nsg_compatibility_mode ) {

2022 assert

"Did not perform sequence weights check"

);

2050 int

columnNumber,

int

queryLength,

2051 const double

*expno);

2062 const double

*backgroundProbabilities,

2063 const double

observations);

2065 #define MAX_IND_OBSERVATIONS 400 2066 #define PSEUDO_MAX 1000000 2076 Boolean

nsg_compatibility_mode,

2084 const double

kZeroObsPseudo = 30.0;

2089 if

( !msa || !seq_weights || !sbp || !aligned_blocks || !internal_pssm ) {

2099 double

columnCounts = 0.0;

2100 double

observations = 0.0;

2101 double

pseudoWeight;

2110 if

(0 == pseudo_count)

2113

columnCounts = pseudo_count;

2116

pseudoWeight = kZeroObsPseudo;

2120

pseudoWeight = columnCounts;

2135 double

pseudo = 0.0;

2137 const double

kBeta = pseudoWeight;

2138 double

numerator = 0.0;

2139 double

denominator = 0.0;

2140 double

qOverPEstimate = 0.0;

2150

freq_ratios->

[

][

]);

2160

denominator = observations + kBeta;

2162 if

(nsg_compatibility_mode && denominator == 0.0) {

2165 ASSERT

(denominator != 0.0);

2167

qOverPEstimate = numerator/denominator;

2171

internal_pssm->

[p][

] = qOverPEstimate *

2195 const double

kZeroObsPseudo = 30.0;

2198 const double

* backgroundProbabilities =

;

2200 if

( !cd_msa || !seq_weights || !sbp || !internal_pssm || pseudo_count < 0) {

2205 if

( !freq_ratios ) {

2210 if

( !backgroundProbabilities ) {

2216 double

columnCounts = 0.0;

2217 double

observations = 0.0;

2218 double

pseudoWeight;

2220 if

(cd_msa->

[p] != kXResidue)

2224

observations =

MAX

(0.0,

2227 if

(0 == pseudo_count)

2230

columnCounts = pseudo_count;

2234

pseudoWeight = kZeroObsPseudo;

2238

pseudoWeight = columnCounts;

2245 if

(cd_msa->

[p] == kXResidue ||

2253 double

pseudo = 0.0;

2255 const double

kBeta = pseudoWeight;

2256 double

numerator = 0.0;

2257 double

denominator = 0.0;

2258 double

qOverPEstimate = 0.0;

2265

freq_ratios->

[

][

]);

2275

denominator = observations + kBeta;

2277 ASSERT

(denominator != 0.0);

2279

qOverPEstimate = numerator/denominator;

2283

internal_pssm->

[p][

] = qOverPEstimate *

2301 const double

* std_prob,

2307 double

* retval =

;

2311 if

( !std_prob || !score_mat ) {

2315

retval = (

double

calloc

(query_length,

sizeof

(

double

));

2320 for

(p = 0; p < query_length; p++) {

2322 double

info_sum = 0.0;

2324 for

(

= 0;

< alphabet_sz;

++) {

2328 double

exponent = exp(score *

lambda

);

2329 double tmp

= std_prob[

] * exponent;

2334

retval[p] = info_sum;

2342 double

** freq_ratios,

2343 const double

* std_prob,

2347 double

* retval =

;

2351 if

( !std_prob || !freq_ratios ) {

2355

retval = (

double

calloc

(query_length,

sizeof

(

double

));

2360 for

(p = 0; p < query_length; p++) {

2362 double

info_sum = 0.0;

2364 for

(

= 0;

< alphabet_sz;

++) {

2369 double

qOverPEstimate = freq_ratios[p][

] / std_prob[

];

2377

retval[p] = info_sum;

2395 const double

* std_probs)

2404 double

ideal_lambda = 0.0;

2406 if

( !internal_pssm || !sbp || !std_probs )

2413 for

(

= 0;

< internal_pssm->

;

++) {

2421 double

qOverPEstimate = 0.0;

2430 if

(is_unaligned_column && qOverPEstimate != 0.0) {

2431

is_unaligned_column =

;

2435 if

(qOverPEstimate == 0.0 || std_probs[j] <

kEpsilon

) {

2438 double tmp

log

(qOverPEstimate)/ideal_lambda;

2443 if

( (j == kXResidue || j == kStarResidue) &&

2450 if

(is_unaligned_column) {

2455 if

(freq_ratios->

[kResidue][j] != 0.0) {

2482 const double

* std_probs,

2488 int

** scaled_pssm =

;

2489 int

** pssm =

;

2491 double

factor_low = 1.0;

2492 double

factor_high = 1.0;

2493 double

ideal_lambda = 0.0;

2495 double

new_lambda = 0.0;

2498 Uint4

query_length = 0;

2501 if

( !internal_pssm || !sbp || !

|| !std_probs )

2508

pssm = internal_pssm->

;

2510

query_length = internal_pssm->

;

2517 for

(

= 0;

< internal_pssm->

;

++) {

2518 for

(j = 0; j < internal_pssm->

; j++) {

2532 if

(new_lambda > ideal_lambda) {

2535

factor = factor_high;

2538

first_time =

;

2540 if

(too_high ==

) {

2543

factor_high += (factor_high - 1.0);

2544

factor = factor_high;

2546

}

else if

(new_lambda > 0) {

2550

factor = factor_low;

2552

first_time =

;

2554 if

(too_high ==

) {

2557

factor_low += (factor_low - 1.0);

2558

factor = factor_low;

2570

factor = (factor_high + factor_low)/2;

2572 for

(

= 0;

< internal_pssm->

;

++) {

2573 for

(j = 0; j < internal_pssm->

; j++) {

2588 if

(new_lambda > ideal_lambda) {

2589

factor_low = factor;

2591

factor_high = factor;

2602 double

scaling_factor)

2616

scaling_factor,

, sbp);

2620

internal_pssm->

, internal_pssm->

);

2637 for

(

= 0;

< length;

++) {

2638 if

(seq[

] != kXResidue) {

2650 const double

* std_probs,

2655 Uint4

alphabet_size = 0;

2657 Uint4

effective_length = 0;

2673 if

(alphabet_size <= 0) {

2681 for

(p = 0; p < query_length; p++) {

2682 if

(

[p] == kXResidue) {

2685 for

(

= 0;

< alphabet_size;

++) {

2686 const int kScore

= pssm[p][aa_alphabet[

]];

2699 if

( !score_freqs ) {

2703

score_freqs->

obs_min

= min_score;

2704

score_freqs->

obs_max

= max_score;

2705 for

(p = 0; p < query_length; p++) {

2706 if

(

[p] == kXResidue) {

2710 for

(

= 0;

< alphabet_size;

++) {

2711 const int kScore

= pssm[p][aa_alphabet[

]];

2719

(std_probs[aa_alphabet[

]]/effective_length);

2724 for

(s = min_score; s <= max_score; s++) {

2735 const double

* std_probs,

2770

contains_aligned_regions =

;

2775 if

( !contains_aligned_regions ) {

2782 unsigned int

seq_index,

2796

sequence_position = msa->

[seq_index];

2797 for

(

= start;

< stop;

++) {

2798

sequence_position[

= 0;

2819 if

( !diagnostics || !msa || !aligned_block || !seq_weights ||

2820

!internal_pssm || !internal_pssm->

) {

2876

(seq_weights->

[p] / aligned_block->

[p] - 1);

2884 if

(diagnostics->

) {

2886

diagnostics->

[p] = seq_weights->

[p];

2920 if

( !diagnostics || !cd_msa || !seq_weights ||

2921

!internal_pssm || !internal_pssm->