invalid_argument(
"Matrix contained NaN or Inf");
67 for(
size_t i= 0;
i< frac_diff.
GetRows(); ++
i) {
68 for(
size_tj = 0; j < frac_diff.
GetCols(); ++j) {
69 result(
i, j) = -0.75 *
log(1 - (4.0 / 3.0) * frac_diff(
i, j));
80 for(
size_t i= 0;
i< frac_diff.
GetRows(); ++
i) {
81 for(
size_tj = 0; j < frac_diff.
GetCols(); ++j) {
93 for(
size_t i= 0;
i< frac_diff.
GetRows(); ++
i) {
94 for(
size_tj = 0; j < frac_diff.
GetCols(); ++j) {
96- 0.2 * frac_diff(
i, j) * frac_diff(
i, j));
107 for(
size_t i= 0;
i< frac_diff.
GetRows(); ++
i) {
108 for(
size_tj = 0; j < frac_diff.
GetCols(); ++j) {
109 if(frac_diff(
i, j) >= 1.0) {
110 throwinvalid_argument(
"Grishin distance can not be computed \ 111 for sequences that are 100% different");
113 result(
i, j) = frac_diff(
i, j) * (2.0 - frac_diff(
i, j))
114/ (2 * (1.0 - frac_diff(
i, j)));
124 for(
size_t i= 0;
i< frac_diff.
GetRows(); ++
i) {
125 for(
size_tj = 0; j < frac_diff.
GetCols(); ++j) {
126 if(frac_diff(
i, j) >= 1.0) {
127 throwinvalid_argument(
"Grishin distance can not be computed \ 128 for sequences that are 100% different");
1310.65 * (pow(1 - frac_diff(
i, j), -1.0 / 0.65) - 1.0);
138 constvector<string>& labels)
144 for(
unsigned int i= 0;
i< dist_mat.
GetRows(); ++
i) {
147 if(labels.empty()) {
150new_node->
GetValue().SetLabel() = labels[
i];
155 intnext_id = dist_mat.
GetRows();
158vector<double>
r(dmat.
GetRows());
159 for(
unsigned int n= dist_mat.
GetRows();
n> 2; --
n) {
165it1 !=
tree->SubNodeEnd(); ++it1) {
166 i= (*it1)->GetValue().GetId();
169it2 !=
tree->SubNodeEnd(); ++it2) {
173j = (*it2)->GetValue().GetId();
182it1 !=
tree->SubNodeEnd(); ++it1) {
185 for(; it2 !=
tree->SubNodeEnd(); ++it2) {
186 i= (*it1)->GetValue().GetId();
187j = (*it2)->GetValue().GetId();
188m = dmat(
i, j) - (
r[
i] +
r[j]) / (
n- 2);
198 i= (*neighbor1)->GetValue().GetId();
199j = (*neighbor2)->GetValue().GetId();
202 doubleviu = dmat(
i, j) / 2 + (
r[
i] -
r[j]) / (2 * (
n- 2));
203 doublevju = dmat(
i, j) - viu;
204(*neighbor1)->GetValue().SetDist(viu);
205(*neighbor2)->GetValue().SetDist(vju);
206new_node->
AddNode(
tree->DetachNode(neighbor1));
207new_node->
AddNode(
tree->DetachNode(neighbor2));
208 tree->AddNode(new_node);
211it1 !=
tree->SubNodeEnd(); ++it1) {
212 intk = (*it1)->GetValue().GetId();
216dmat(k, u) = dmat(u, k)
217= (dmat(
i, k) + dmat(j, k) - dmat(
i, j)) / 2;
227 if((*it1)->IsLeaf()) {
230 doubled = dmat((*it1)->GetValue().GetId(), (*it2)->GetValue().GetId());
231(*it2)->GetValue().SetDist(d);
232(*it1)->AddNode(
tree->DetachNode(it2));
244 doubledistance = 0.0;
269 while(child !=
tree->SubNodeEnd()) {
270 if((*child)->IsLeaf()) {
278new_root->
GetValue().SetDist(0.0);
313 doublenew_distance = parent->
GetValue().GetDist();
314parent->
GetValue().SetDist(distance);
315distance = new_distance;
331parent = next_parent;
341best_node->
GetValue().GetDist()) {
367typedef
char boolean;
370 intbtype_in,
intwtype_in,
intntype_in);
381 constvector<
string>& labels)
385node->GetValue().SetId(
id);
386 if(!labels.empty()) {
387node->GetValue().SetLabel(labels[
id]);
389node->GetValue().SetLabel(me_node->label);
397child_node->
GetValue().SetDist(me_node->leftEdge->distance);
400child_node = node->
AddNode();
401child_node->
GetValue().SetDist(me_node->rightEdge->distance);
407 constvector<string>& labels) {
411edge = me_tree->root->leftEdge;
414node->
GetValue().SetDist(edge->distance);
418 if(!labels.empty()) {
419node->
GetValue().SetLabel(labels[
id]);
421node->
GetValue().SetLabel(me_tree->root->label);
432 constvector<string>& labels,
441 for(
unsigned int i= 0;
i< dist_mat.
GetRows(); ++
i) {
442 for(
unsigned intj = 0; j < dist_mat.
GetRows(); ++j) {
443dfme[
i][j] = dist_mat(
i, j);
449 char**clabels =
new char*[dist_mat.
GetRows()];
450vector<string> dummy_labels;
451dummy_labels.resize(dist_mat.
GetRows());
452 for(
unsigned int i= 0;
i< dist_mat.
GetRows(); ++
i) {
454clabels[
i] =
const_cast<char*
>(dummy_labels[
i].c_str());
459btype, wtype, ntype);
489 for(
unsigned intpos = 0; pos < seq1.size(); ++pos) {
490 if(seq1[pos] ==
'-'|| seq2[pos] ==
'-') {
494 if(seq1[pos] != seq2[pos]) {
499 returndiff_count / (double) pos_count;
508 for(
unsigned intpos = 0; pos < seq1.size(); ++pos) {
509 if(seq1[pos] ==
'-'|| seq2[pos] ==
'-') {
513 if(seq1[pos] == seq2[pos]) {
520 if(pos_count == 0) {
524 _ASSERT(same_count <= pos_count);
525 returnsame_count / (double) pos_count;
534avec.SetGapChar(
'-');
535avec.SetEndChar(
'-');
537 intnseqs = avec.GetNumRows();
538 result.Resize(nseqs, nseqs);
539vector<string> seq(nseqs);
541 for(
int i= 0;
i< nseqs; ++
i) {
542avec.GetWholeAlnSeqString(
i, seq[
i]);
545 for(
int i= 0;
i< nseqs; ++
i) {
547 for(
intj =
i+ 1; j < nseqs; ++j) {
557 intparent_uid,
int& next_uid)
559 const intlabel_fid = 0;
560 const intdist_fid = 1;
564 intmy_uid = next_uid++;
569node->SetParent(parent_uid);
570 if(ptn->
GetValue().GetLabel() !=
"") {
572node_feature->SetFeatureid(label_fid);
573node_feature->SetValue(ptn->
GetValue().GetLabel());
574node->SetFeatures().Set().push_back(node_feature);
576 if(ptn->
GetValue().IsSetDist()) {
578node_feature->SetFeatureid(dist_fid);
579node_feature->SetValue
581node->SetFeatures().Set().push_back(node_feature);
584btc->
SetNodes().Set().push_back(node);
597 const intlabel_fid = 0;
598 const intdist_fid = 1;
604fdescr->SetId(label_fid);
605fdescr->SetName(
"label");
606btc->
SetFdict().Set().push_back(fdescr);
609fdescr->SetId(dist_fid);
610fdescr->SetName(
"dist");
611btc->
SetFdict().Set().push_back(fdescr);
616btc->
SetNodes().Set().front()->ResetParent();
622 const intlabel_fid = 0;
623 const intdist_fid = 1;
629fdescr->SetId(label_fid);
630fdescr->SetName(
"label");
631btc->
SetFdict().Set().push_back(fdescr);
636btc->
SetNodes().Set().front()->ResetParent();
638 boolat_least_one_node_has_distance =
false;
640 for(
autoit = nodes.begin(); it != nodes.end(); ++it) {
641 const auto& node = **it;
642 if(!node.IsSetFeatures() || !node.GetFeatures().CanGet()) {
645 const auto& features = node.GetFeatures().Get();
646 for(
autoit1 = features.begin(); it1 != features.end(); ++it1) {
647 const auto& feature = **it1;
648 if(feature.IsSetFeatureid() && (dist_fid == feature.GetFeatureid())) {
649at_least_one_node_has_distance =
true;
653 if(at_least_one_node_has_distance) {
657 if(at_least_one_node_has_distance) {
659fdescr->SetId(dist_fid);
660fdescr->SetName(
"dist");
661btc->
SetFdict().Set().push_back(fdescr);
669 if(!isfinite(*it) || *it < 0.0) {
User-defined methods of the data storage class.
const CDense_seg & GetDenseg(void) const
CScope & GetScope(void) const
static void ZeroNegativeBranches(TTree *node)
Sets negative lengths of branches of a tree to zero.
static TTree * RerootTree(TTree *tree, TTree *node=NULL)
Reroot tree, new root is placed in the middle of the edge specified by node.
static bool AllFinite(const TMatrix &mat)
Check a matrix for NaNs and Infs.
static TTree * NjTree(const TMatrix &dist_mat, const vector< string > &labels=vector< string >())
Compute a tree by neighbor joining; as per Hillis et al.
static double FractionIdentity(const string &seq1, const string &seq2)
Calculate pairwise fraction identity based on positions where both sequences have a base/residue.
static TTree * x_FindLargestEdge(TTree *tree, TTree *best_node)
Find node with the longest edge in the tree.
static void GrishinGeneralDist(const TMatrix &frac_diff, TMatrix &result)
Grishin's distance for protein sequences 1 - p = ln(1 + 2d) / 2d.
static void GrishinDist(const TMatrix &frac_diff, TMatrix &result)
Grishin's distance for protein sequences: 1 - p = (1 - e^(2*d)) / (2 * d) approximated with d = p(2 +...
static double Divergence(const string &seq1, const string &seq2)
Calculate pairwise fractions of non-identity.
static void PoissonDist(const TMatrix &frac_diff, TMatrix &result)
Simple distance calculation for protein sequences: d = -ln(1 - p).
static TTree * FastMeTree(const TMatrix &dist_mat, const vector< string > &labels=vector< string >(), EFastMePar btype=eOls, EFastMePar wtype=eOls, EFastMePar ntype=eBalanced)
Compute a tree using the fast minimum evolution algorithm.
static void KimuraDist(const TMatrix &frac_diff, TMatrix &result)
Kimura's distance for protein sequences: d = -ln(1 - p - 0.2p^2).
static void JukesCantorDist(const TMatrix &frac_diff, TMatrix &result)
Jukes-Cantor distance calculation for DNA sequences: d = -3/4 ln(1 - (4/3)p).
void Resize(size_t i, size_t j, T val=T())
resize this matrix, filling the empty cells with a known value
TData & GetData()
retrieve the data associated with this matrix
size_t GetRows() const
get the number of rows in this matrix
size_t GetCols() const
get the number of columns in this matrix
definition of a Culling tree
void freeMatrix(double **D, int size)
static void s_AddNodeToBtc(CRef< CBioTreeContainer > btc, const TPhyTreeNode *ptn, int parent_uid, int &next_uid)
Recursive function for adding TPhyTreeNodes to BioTreeContainer.
static void s_AddFastMeSubtree(fastme::meNode *me_node, CDistMethods::TTree *node, const vector< string > &labels)
double ** initDoubleMatrix(int d)
CRef< CBioTreeContainer > MakeDistanceSensitiveBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer, potentially without dist feature key.
void s_ThrowIfNotAllFinite(const CDistMethods::TMatrix &mat)
static CDistMethods::TTree * s_ConvertFastMeTree(fastme::meTree *me_tree, const vector< string > &labels)
CRef< CBioTreeContainer > MakeBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer.
meTree * fastme_run(double **D_in, int numSpecies_in, char **labels, int btype_in, int wtype_in, int ntype_in)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
TNodeList::iterator TNodeList_I
TTreeType * DetachNode(TTreeType *subnode)
Remove the subtree from the tree without destroying it.
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
TNodeList::const_iterator TNodeList_CI
void AddNode(TTreeType *subnode)
Add new subnode.
bool IsLeaf() const
Report whether this is a leaf node.
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
const TValue & GetValue(void) const
Return node's value.
const TTreeType * GetParent(void) const
Get node's parent.
void SetNodes(TNodes &value)
Assign a value to Nodes data member.
void SetFdict(TFdict &value)
Assign a value to Fdict data member.
const Tdata & Get(void) const
Get the member data.
const TNodes & GetNodes(void) const
Get the Nodes member data.
const struct ncbi::grid::netcache::search::fields::SIZE size
Floating-point support routines.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4