);
122 "Distance matrix not assigned");
134 "Cluster index out of range");
151 if(dmat(elem, *elem_it) >
result) {
155 result= dmat(elem, *elem_it);
195 result+= dmat(elem, *elem_it);
226 ITERATE(vector<double>, it, vals) {
229 result/= (double)vals.size();
248 result= dist > current_dist ? dist : current_dist;
279 " distance method");
293 if(num_elements == 1) {
305 if(num_elements == 2) {
324 doubledist = (*m_DistMatrix)(0, 1) / 2.0;
350 doublemax_dist = 0.0;
351 for(
size_t i=0;
i< num_elements - 1;
i++) {
352 for(
size_tj=
i+1;j < num_elements;j++) {
354max_dist = (*m_DistMatrix)(
i, j);
359 if(max_dist <= max_diam) {
361 for(
int i=0;
i< (
int)num_elements;
i++) {
376vector<bool> used_entry(num_elements,
true);
380 for(
size_t i=0;
i< num_elements;
i++) {
381clusters[
i].AddElement((
int)
i);
385vector<TPhyTreeNode*> nodes(num_elements);
387 for(
size_t i=0;
i< nodes.size();
i++) {
391vector< vector<double> > dists_to_root(num_elements);
396 size_tnum_clusters = num_elements;
397 while(num_clusters > 1) {
403 while(!used_entry[min_i] && min_i < num_elements) {
408 while(!used_entry[min_j] && min_j < num_elements) {
412 if(min_j >= num_elements) {
415}
while(min_j >= num_elements && min_i < num_elements);
419 _ASSERT(do_trees || (min_i < num_elements && min_j < num_elements));
422 for(
size_t i=0;
i< num_elements - 1;
i++) {
423 for(
size_tj=
i+1;j < num_elements;j++) {
425 if(!used_entry[
i] || !used_entry[j]) {
429 if(dmat(
i, j) < dmat(min_i, min_j)) {
436 _ASSERT(used_entry[min_i] && used_entry[min_j]);
439 if(dmat(min_i, min_j) > max_diam) {
448&& clusters[min_i].
size() == 1 && clusters[min_j].
size() == 1) {
451 size_tnew_min_j = 0;
452 while((!used_entry[new_min_j] || new_min_j == min_j
453|| new_min_j == min_i
454|| clusters[new_min_j].
size() == 1)
455&& new_min_j < num_elements) {
459 if(new_min_j < num_elements) {
462 for(
size_tj=new_min_j+1;j < num_elements;j++) {
463 if(!used_entry[j] || clusters[j].
size() == 1
464|| j == min_j || j == min_i) {
468 if(dmat(min_i, j) < dmat(min_i, new_min_j)) {
474 if(new_min_j < num_elements) {
485 _ASSERT(clusters[min_i].
size() > 0 && clusters[min_j].
size() > 0);
486 _ASSERT(min_i < num_elements && min_j < num_elements);
497 const size_tleft = min_i;
498 const size_tright = min_j;
500new_root->
AddNode(nodes[left]);
501new_root->
AddNode(nodes[right]);
508 doublemean_dist_to_root_left =
s_FindMean(dists_to_root[left]);
509 doublemean_dist_to_root_right =
s_FindMean(dists_to_root[right]);
512 doubleleft_edge_length = dist - mean_dist_to_root_left;
513 doubleright_edge_length = dist - mean_dist_to_root_right;
514left_edge_length = left_edge_length > 0.0 ? left_edge_length : 0.0;
516= right_edge_length > 0.0 ? right_edge_length : 0.0;
518nodes[left]->GetValue().SetDist(left_edge_length);
519nodes[right]->GetValue().SetDist(right_edge_length);
522 if(dists_to_root[left].
empty()) {
523dists_to_root[left].push_back(dist);
527*it += left_edge_length;
531 if(dists_to_root[right].
empty()) {
532dists_to_root[right].push_back(dist);
536*it += right_edge_length;
543nodes[min_i] = new_root;
544nodes[min_j] =
NULL;
545 ITERATE(vector<double>, it, dists_to_root[min_j]) {
546dists_to_root[min_i].push_back(*it);
548dists_to_root[min_j].clear();
554used_entry[min_j] =
false;
556 _ASSERT(!do_trees || !nodes[min_j]);
563 for(
size_t i=0;i < min_i && min_i > 0;
i++) {
564 if(!used_entry[
i]) {
568dmat(
i, min_i) =
s_FindDist(clusters[
i], included_cluster,
570dmat(
i, min_i), dist_method);
572dmat(min_i,
i) = dmat(
i, min_i);
575 for(
size_tj=min_i+1;j < num_elements;j++) {
576 if(!used_entry[j]) {
580dmat(min_i, j) =
s_FindDist(clusters[j], included_cluster,
582dmat(min_i, j), dist_method);
584dmat(j, min_i) = dmat(min_i, j);
587clusters[min_j].clear();
591 for(
size_t i=0;
i< num_elements;
i++) {
592 if(used_entry[
i]) {
600it->AddElement(*elem);
637 "Element index in pre-set cluster larger than " 638 "number of elements provided with links");
654 _ASSERT(it->first != it->second);
709 if(
m_Clusters[cluster_id].GetMaxDistance() < it->weight) {
710 m_Clusters[cluster_id].SetMaxDistance(it->weight);
763 ITERATE(vector<int>, it, cluster) {
783 m_Trees.push_back(it->m_Tree);
833 doubledist = link.
weight/ 2.0;
840 m_Clusters[cluster_id].m_DistToRoot.push_back(dist);
841 m_Clusters[cluster_id].m_DistToRoot.push_back(dist);
898 doublesum_dist = 0.0;
902 doubleave_dist_to_root
903= sum_dist / (double)
m_Clusters[cluster_id].m_DistToRoot.size();
906 doubled = (distance - ave_dist_to_root) / 2.0;
908old_root->
GetValue().SetDist(d > 0.0 ? d : 0.0);
909node->
GetValue().SetDist(d > 0.0 ? d : 0.0);
918 m_Clusters[cluster_id].m_DistToRoot.push_back(d);
932vector<int> el(1, elem);
967 doubleleft_dist_to_root, right_dist_to_root;
972left_dist_to_root = sum / (double)cluster1.
size();
978right_dist_to_root = sum / (double)cluster2.
size();
980 doubleleft_dist = dist - left_dist_to_root;
981 doubleright_dist = dist - right_dist_to_root;
984left->
GetValue().SetDist(left_dist > 0.0 ? left_dist : 0.0);
985right->
GetValue().SetDist(right_dist > 0.0 ? right_dist : 0.0);
998 if(cluster1.
m_DistToRoot.capacity() < num_elements) {
1001+ (
int)(0.3 * num_elements));
1009 ITERATE(vector<int>, it, cluster2) {
1025 double& dist)
const 1068trees.push_back(*it);
1077trees.push_back(*it);
1082 #ifdef NCBI_COMPILER_WORKSHOP 1086 #pragma weak "__1cEncbiGcobaltKCClustererMReleaseTrees6MrnDstdGvector4Cpn0AJCTreeNode4n0AMCPhyNodeData_n0AVCDefaultNodeKeyGetter4n0E_____n0DJallocator4Cp4_____v_"= "__1cEncbiGcobaltKCClustererMReleaseTrees6MrnDstdGvector4Cpn0AJCTreeNode4n0AMCPhyNodeData_n0AVCDefaultNodeKeyGetter4n0E_____n0DJallocator4C5_____v_"
1092 if(index < 0 || index >= (
int)
m_Trees.size()) {
1094 "Tree index out of range");
1103 if(index < 0 || index >= (
int)
m_Trees.size()) {
1105 "Tree index out of range");
1118cluster->SetPrototype(cluster->FindCenterElement(*
m_DistMatrix));
1127 "Cluster index out of range");
1133 for(
size_t i=0;
i< cluster.
size() - 1;
i++) {
1134 for(
size_tj=
i+1;j < cluster.
size();j++) {
1139 "Distance matrix size is smaller than number of" 1143mat(
i, j) = mat(j,
i) = (*m_DistMatrix)(cluster[
i], cluster[j]);
1167 " or distance links must be set");
1177 "Cluster element index out of range");
1187 if(m_Elements.empty()) {
1192 if(m_Elements.size() == 1) {
1193 returnm_Elements[0];
1196vector<double> sum_distance;
1197sum_distance.resize(m_Elements.size());
1198 for(
size_t i=0;
i< m_Elements.size();
i++) {
1200 for(
size_tj=0;j < m_Elements.size();j++) {
1205dist += dmatrix(m_Elements[
i], m_Elements[j]);
1207sum_distance[
i] = dist;
1210 size_tmin_index = 0;
1211 for(
size_t i=1;
i< sum_distance.size();
i++) {
1212 if(sum_distance[
i] < sum_distance[min_index]) {
1217 returnm_Elements[min_index];
Exceptions for CClusterer class.
int FindCenterElement(const TDistMatrix &dmatrix) const
Find element that is closest to the center of the cluster.
void clear(void)
Removes all elements from the cluster.
size_t size(void) const
Get cluster size.
TPhyTreeNode * m_Tree
Cluster tree root.
void AddElement(int el)
Add element to the cluster.
vector< int > m_Elements
List of indeces of cluster elements.
int operator[](size_t index) const
Get element.
vector< double > m_DistToRoot
Distances between cluster elements and tree root.
void SetMaxDistance(double dist)
Set maximum distance in cluster.
vector< TPhyTreeNode * > & GetTrees(void)
Get list of trees for clusters.
list< int > m_UnusedEntries
void ComputeClustersFromLinks(void)
Compute clusters using graph of distances between elements.
int GetClusterId(int elem) const
Find id of cluster to which given element belongs.
bool x_CanAddElem(int cluster_id, int elem, double &dist) const
Check whether element can be added to the cluster.
void ReleaseTrees(vector< TPhyTreeNode * > &trees)
Get list of trees for clusters and release ownership to caller.
@ eDist
Clusters can be joined if there is a link between at least one pair of elements.
@ eClique
Clusters can be joined if there is a link between all pairs of their elements.
EDistMethod
Method for computing distance between clusters.
@ eCompleteLinkage
Maximum distance between elements.
@ eAverageLinkage
Avegrae distance between elements.
TPhyTreeNode * ReleaseTree(int index=0)
Get cluster tree and release ownership to caller.
shared_ptr< TDistMatrix > m_DistMatrix
void Reset(void)
Clear clusters and distance matrix.
const TSingleCluster & GetSingleCluster(size_t index) const
Get list of elements of a specified cluster.
CNcbiMatrix< double > TDistMatrix
CSingleCluster TSingleCluster
const TPhyTreeNode * GetTree(int index=0) const
Get tree for specific cluster.
void x_JoinElements(const CLinks::SLink &link)
Join two elements and form a cluster.
vector< int > m_ClusterId
void SetDistMatrix(const TDistMatrix &dmat)
Set new distance matrix.
const TDistMatrix & GetDistMatrix(void) const
Get distance matrix.
void SetLinks(CRef< CLinks > links)
Set distance links.
void x_JoinClustElem(int cluster_id, int elem, double dist)
Add element to a cluster.
void Run(void)
Cluster elements.
~CClusterer()
Destructor.
void x_JoinClusters(int cluster1_id, int cluster2_id, double dist)
Join two clusters.
void ComputeClusters(double max_diam, EDistMethod dist_method=eCompleteLinkage, bool do_trees=true, double infinity=-1.0)
Compute clusters.
void x_Init(void)
Initialize parameters.
vector< TSingleCluster > TClusters
bool x_CanJoinClusters(int cluster1_id, int cluster2_id, double &dist) const
Check whether two clusters can be joined.
void x_CreateCluster(int elem)
Create one-element cluster.
vector< TPhyTreeNode * > m_Trees
EClustMethod m_LinkMethod
void SetPrototypes(void)
Set prototypes for all clusters as center elements.
void PurgeDistMatrix(void)
Delete distance matrix.
CClusterer(void)
Create empty clusterer.
void GetClusterDistMatrix(int index, TDistMatrix &mat) const
Get distance matrix for elements of a selected cluster.
bool IsLink(int first, int second) const
Check if link exists between given two nodes.
void Sort(void)
Sort links according to weights in ascending order.
Uint4 GetNumElements(void) const
Get number of nodes.
list< SLink >::const_iterator SLink_CI
SLink_CI begin(void) const
Get iterator pointing to the first link.
SLink_CI end(void) const
Get iterator pointing behind the last link.
bool IsSorted(void) const
Check whether the links are sorted according to weights.
void Resize(size_t i, size_t j, T val=T())
resize this matrix, filling the empty cells with a known value
size_t GetRows() const
get the number of rows in this matrix
size_t GetCols() const
get the number of columns in this matrix
iterator begin()
iterators
definition of a Culling tree
static void s_CheckDistMatrix(const CClusterer::TDistMatrix &dmat)
static void s_PurgeTrees(vector< TPhyTreeNode * > &trees)
TPhyTreeNode * s_CreateTreeLeaf(int id)
static double s_FindDistAsMax(const CClusterer::TSingleCluster &cluster1, const CClusterer::TSingleCluster &cluster2, const CClusterer::TDistMatrix &dmat)
static double s_FindMean(const vector< double > &vals)
Find mean.
static double s_FindDistAsMean(const CClusterer::TSingleCluster &cluster1, const CClusterer::TSingleCluster &cluster2, const CClusterer::TDistMatrix &dmat)
Find mean distance between cluster elements.
static double s_FindDist(const CClusterer::CSingleCluster &cluster, const CClusterer::CSingleCluster &included_cluster, const CClusterer::CSingleCluster &extended_cluster, const CClusterer::TDistMatrix &dmat, double current_dist, CClusterer::EDistMethod dist_method)
Find distance between two clusters (cluster and included_cluster + extended_cluster) using given dist...
static double s_FindSumDistFromElem(int elem, const CClusterer::TSingleCluster &cluster, const CClusterer::TDistMatrix &dmat)
Find sum of distances between given element and cluster elements.
static double s_FindMaxDistFromElem(int elem, const CClusterer::TSingleCluster &cluster, const CClusterer::TDistMatrix &dmat)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
void AddNode(TTreeType *subnode)
Add new subnode.
const TValue & GetValue(void) const
Return node's value.
unsigned int
A callback function used to compare two keys in a database.
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
CTreeNode< CPhyNodeData > TPhyTreeNode
double weight
Edge weight.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4