A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/phytree__calc_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/phy_tree/phytree_calc.cpp Source File

46 #ifdef NCBI_COMPILER_MSVC 47 # define isfinite _finite 48 #elif defined(NCBI_OS_SOLARIS) && !defined(__builtin_isfinite) \ 49  && (!defined(__GNUC__) || (__GNUC__ == 4 && __GNUC_MINOR < 5)) 51 # define isfinite finite 59

: m_NumElements(num_elements), m_Diagnol(0.0)

61  if

(num_elements > 0) {

62  m_Distances

.resize(num_elements * num_elements - num_elements, -1.0);

68

m_NumElements = num_elements;

69  if

(num_elements > 0) {

70

m_Distances.resize(num_elements * num_elements - num_elements);

76  if

(

i

>= m_NumElements || j >= m_NumElements ||

i

< 0 || j < 0) {

78  "Distance matrix index out of bounds"

);

89  int

index = (

i

*

i

-

i

) / 2 + j;

90  _ASSERT

(index < (

int

)m_Distances.size());

92  return

m_Distances[index];

97  if

(

i

>= m_NumElements || j >= m_NumElements ||

i

< 0 || j < 0) {

99  "Distance matrix index out of bounds"

);

104  "Distance matrix diagnol elements cannot be set"

);

111  int

index = (

i

*

i

-

i

) / 2 + j;

112  _ASSERT

(index < (

int

)m_Distances.size());

114  return

m_Distances[index];

142  for

(

i

=0;

i

< ids.size();

i

++) {

157

+

" not found in alignment"

);

165  "Tree was not constructed"

);

180

seqalign->

SetSegs

().SetDenseg(*denseg);

196  double val

= mat(

i

, j);

197  if

(!isfinite(

val

) ||

val

< 0.0) {

212

vector<string> sequences(num_seqs);

218  const string

& query_seq = sequences[query_idx];

220

bitvector.

set

(query_idx);

226  for

(

int i

=0;

i

< num_seqs;

i

++) {

229  if

(

i

== query_idx) {

242

links.push_back(

SLink

(query_idx,

i

, dist));

243

bitvector.

set

(

i

);

247

sequences[

i

].erase();

257

list<SLink>::const_iterator it1 = links.begin();

260  for

(; it1 != links.end() && it1->index1 == query_idx; ++it1) {

263  if

(!bitvector.

test

(it1->index2)) {

266  const string

& seqi = sequences[it1->index2];

269

list<SLink>::const_iterator it2(it1);

273  for

(; it2 != links.end() && it2->index1 == query_idx

274

&& bitvector.

test

(it1->index2); ++it2) {

277  if

(!bitvector.

test

(it2->index2)) {

280  const string

& seqj = sequences[it2->index2];

289

links.push_back(

SLink

(it2->index2, it1->index2, dist));

300

bitvector[score_1 > score_2 ? it2->index2 : it1->index2]

311  for

(; en.

valid

(); ++en) {

312

included.push_back((

int

)*en);

318  ITERATE

(list<SLink>, it, links) {

320  if

(bitvector.

test

(it->index1) && bitvector.

test

(it->index2)) {

324  int

index1 = (it->index1 == 0 ? 0 :

325

bitvector.

count_range

(0, it->index1 - 1, rs_index));

327  int

index2 = (it->index2 == 0 ? 0 :

328

bitvector.

count_range

(0, it->index2 - 1, rs_index));

335  "divergence matrix contains invalid or inifinite values"

);

338  return

included.size() > 1;

342  const CAlnVec

& align_data_source,

343

vector<string>& seqids)

350  string

seq_id_str =

""

;

351

(*seq_id).GetLabel(&seq_id_str);

353

seqids.push_back(seq_id_str);

362  ITERATE

(vector<int>, it, used_indices) {

363  for

(; index < *it; index++) {

384

vector<string> new_labels;

385  ITERATE

(vector<int>, it, used_indices) {

386

new_labels.push_back(

m_Labels

[*it]);

440  "Invalid distance calculation method"

);

452  "distance matrix contains invalid or infinite values"

);

482  "Invalid tree reconstruction method"

);

487  "Tree was not created"

);

496  int

num_children = 0;

497  double

cumulative_length = 0.0;

501

cumulative_length += (*it)->GetValue().GetDist();

505  double

new_len = cumulative_length / (double)num_children;

509

(*it)->GetValue().SetDist(new_len);

530  if

(node->

GetValue

().IsSetDist()) {

531  double

dist = node->

GetValue

().GetDist();

532  if

(!isfinite(dist) || dist < 0.0) {

544  " is not the same as number of sequences"

);

549  "divergence must be positive"

);

555  "divergence must be smaller than 0.85 if Kimura distance is" 559

vector<int> used_inds;

570

- (

int

)used_inds.size())

571

+

" sequences were discarded due to" 572  " divergence that exceeds maximum allowed."

);

579  m_Messages

.push_back(

"Sequence dissimilarity exceeds maximum" 601  bool

success =

true

;

603  if

(seq_align_set.

Get

().size() == 1) {

606  else if

(seq_align_set.

Get

().size() > 1) {

616

mix.

Merge

(merge_flags);

623  "Empty seqalign provided"

);

642  const char

kGap =

'-'

;

644

seg_info.resize(aln.size());

648  size_t

query_start = 0;

649  size_t

query_end =

query

.length() - 1;

650  while

(query_start <

query

.length() &&

query

[query_start] == kGap) {

653  while

(query_end > 0 &&

query

[query_end] == kGap) {

658  for

(

size_t i

=0;

i

< aln.size();

i

++) {

659  const string

& sequence = aln[

i

];

660

vector<TRange>& segs = seg_info[

i

];

662  _ASSERT

(query_end < sequence.length());

666  for

(

size_t

k=query_start;k <= query_end;k++) {

667  if

(sequence[k] != kGap) {

672  const int

kMinSplit =

max

(seq_len / 20, 4);

674  while

(p < sequence.length()) {

677  while

(p < sequence.length() && (p < query_start ||

678

sequence[p] == kGap)) {

686  while

(p < sequence.length() && gaps < kMinSplit && p <= query_end) {

689  while

(p < sequence.length() && p <= query_end &&

690

sequence[p] != kGap) {

699  while

(p < sequence.length() && p <= query_end &&

700

sequence[p] == kGap) {

709  if

(seg.

GetLength

() > 4 || segs.empty()) {

static CRef< CScope > m_Scope

TDim GetNumRows(void) const

const CDense_seg & GetDenseg(void) const

void Add(const CDense_seg &ds, TAddFlags flags=0)

void Merge(TMergeFlags flags=0)

const CSeq_align & GetSeqAlign(void) const

const CBioseq_Handle & GetBioseqHandle(TNumrow row) const

void SetEndChar(TResidue gap_char)

void SetGapChar(TResidue gap_char)

string & GetWholeAlnSeqString(TNumrow row, string &buffer, TSeqPosList *insert_aln_starts=0, TSeqPosList *insert_starts=0, TSeqPosList *insert_lens=0, unsigned int scrn_width=0, TSeqPosList *scrn_lefts=0, TSeqPosList *scrn_rights=0) const

int CalculateScore(TNumrow row1, TNumrow row2) const

CRef< CDense_seg > ExtractRows(const vector< TDim > &rows) const

Extract specified rows of the alignment, in specified order.

void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)

overloaded Assign()

static TTree * RerootTree(TTree *tree, TTree *node=NULL)

Reroot tree, new root is placed in the middle of the edge specified by node.

static TTree * NjTree(const TMatrix &dist_mat, const vector< string > &labels=vector< string >())

Compute a tree by neighbor joining; as per Hillis et al.

static double FractionIdentity(const string &seq1, const string &seq2)

Calculate pairwise fraction identity based on positions where both sequences have a base/residue.

static void GrishinGeneralDist(const TMatrix &frac_diff, TMatrix &result)

Grishin's distance for protein sequences 1 - p = ln(1 + 2d) / 2d.

static void GrishinDist(const TMatrix &frac_diff, TMatrix &result)

Grishin's distance for protein sequences: 1 - p = (1 - e^(2*d)) / (2 * d) approximated with d = p(2 +...

static void PoissonDist(const TMatrix &frac_diff, TMatrix &result)

Simple distance calculation for protein sequences: d = -ln(1 - p).

static TTree * FastMeTree(const TMatrix &dist_mat, const vector< string > &labels=vector< string >(), EFastMePar btype=eOls, EFastMePar wtype=eOls, EFastMePar ntype=eBalanced)

Compute a tree using the fast minimum evolution algorithm.

static void KimuraDist(const TMatrix &frac_diff, TMatrix &result)

Kimura's distance for protein sequences: d = -ln(1 - p - 0.2p^2).

static void JukesCantorDist(const TMatrix &frac_diff, TMatrix &result)

Jukes-Cantor distance calculation for DNA sequences: d = -3/4 ln(1 - (4/3)p).

void Resize(size_t i, size_t j, T val=T())

resize this matrix, filling the empty cells with a known value

size_t GetRows() const

get the number of rows in this matrix

Guide tree calc exceptions.

Distance matrix (square, symmetric with zeros on diagnol)

CDistMatrix(int num_elements=0)

Constructor.

const double & operator()(int i, int j) const

Get distance value.

bool Empty(void) const

Is matrix size zero.

void Resize(int num_elements)

Resize matrix.

vector< double > m_Distances

Storage for distance values.

int GetNumElements(void) const

Get number of rows/columns.

CDistMatrix m_DivergenceMatrix

Matrix of percent identities based distances.

CRef< CSeq_align > GetSeqAlign(void) const

Get seq_align that corresponds to current tree.

CRef< CBioTreeContainer > GetSerialTree(void) const

Get serial tree.

vector< string > m_RemovedSeqIds

Sequences that are not included in the tree.

bool m_CalcSegInfo

Calculate segment positions.

vector< int > m_RemovedSeqIndeces

Indeces of sequences that are not included in the tree.

bool CalcBioTree(void)

Compute bio tree for the current alignment in a black box manner.

CDistMethods::TMatrix m_FullDistMatrix

Full distance matrix, this data structure is required by CDistMethods functions.

TPhyTreeNode * m_Tree

Computed tree.

void x_CalcDistMatrix(void)

Compute distance as evolutionary corrected dissimilarity.

const vector< CRef< CSeq_id > > & GetSeqIds(void) const

Get seq-ids of sequences used in tree construction.

void x_Init(void)

Initialize class attributes.

@ eFastME

Fast Minumum Evolution.

int m_QueryIdx

Index of query sequence in the input alignment.

bool x_CalcDivergenceMatrix(vector< int > &used_indices)

Compute divergence matrix and find sequences to exclude from tree reconstruction.

CPhyTreeCalc(const CSeq_align &seq_aln, CRef< CScope > scope)

Create CPhyTreeCalc from Seq-align.

void x_CalcAlnSegInfo(const vector< string > &aln, TSegInfo &seg_info)

Calculate the alignment segemnts summary.

EDistMethod m_DistMethod

Method of calculating evolutionary distance correction.

ETreeMethod m_TreeMethod

Method of calculating tree.

vector< string > m_Messages

Error/warning messages.

CDistMatrix m_DistMatrix

Matrix of evolutionary distance.

CRef< CScope > m_Scope

Scope.

double m_MaxDivergence

Maximum allowed divergence between sequences.

vector< string > m_Labels

Labels for tree leaves.

void SetQuery(int index)

Set query sequence by index in alignment.

TSegInfo m_SegInfo

Positions of segments in mulitple or query anchored alignment.

CRef< CAlnVec > m_AlignDataSource

Alignment data source.

void x_InitAlignDS(const CSeq_align &seq_aln)

Initialize alignment data source.

void x_CorrectBranchLengths(TPhyTreeNode *node)

Change non-finite and negative tree branch lengths to zeros.

vector< vector< TRange > > TSegInfo

void x_CreateValidAlign(const vector< int > &used_indices)

Create alignment only for sequences included in tree computation.

void x_ComputeTree(bool correct=true)

Compute phylogenetic tree.

definition of a Culling tree

Constant iterator designed to enumerate "ON" bits counted_enumerator keeps bitcount,...

bool valid() const noexcept

Checks if iterator is still valid.

bool test(size_type n) const noexcept

returns true if bit n is set and false is bit n is 0.

bvector< Alloc > & set(size_type n, bool val=true)

Sets bit n if val is true, clears bit n if val is false.

size_type count_range(size_type left, size_type right, const rs_index_type &rs_idx) const noexcept

Returns count of 1 bits in the given range [left..right] Uses rank-select index to accelerate the sea...

enumerator first() const

Returns enumerator pointing on the first non-zero bit.

void build_rs_index(rs_index_type *rs_idx, bvector< Alloc > *bv_blocks=0) const

compute running total of all blocks in bit vector (rank-select index)

size_type count() const noexcept

population count (count of ON bits)

Rank-Select acceleration index.

CRef< objects::CBioTreeContainer > MakeBioTreeContainer(const TPhyTreeNode *tree)

Conversion from TPhyTreeNode to CBioTreeContainer.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

const string AsFastaString(void) const

CConstRef< CSeq_id > GetSeqId(void) const

static CSeq_id_Handle GetHandle(const CSeq_id &id)

Normal way of getting a handle, works for any seq-id.

const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)

If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...

@ eGetId_Best

return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function

bool IsSameBioseq(const CSeq_id_Handle &id1, const CSeq_id_Handle &id2, EGetBioseqFlag get_flag)

Check if two seq-ids are resolved to the same Bioseq.

@ eGetBioseq_All

Search bioseq, load if not loaded yet.

void Reset(void)

Reset reference object.

position_type GetLength(void) const

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

TNodeList::iterator TNodeList_I

TNodeList_CI SubNodeBegin(void) const

Return first const iterator on subnode list.

TNodeList::const_iterator TNodeList_CI

bool IsLeaf() const

Report whether this is a leaf node.

TNodeList_CI SubNodeEnd(void) const

Return last const iterator on subnode list.

const TValue & GetValue(void) const

Return node's value.

const TDenseg & GetDenseg(void) const

Get the variant data.

void SetSegs(TSegs &value)

Assign a value to Segs data member.

void SetDim(TDim value)

Assign a value to Dim data member.

void SetType(TType value)

Assign a value to Type data member.

vector< CRef< CSeq_id > > TIds

TDim GetDim(void) const

Get the Dim member data.

const TIds & GetIds(void) const

Get the Ids member data.

const Tdata & Get(void) const

Get the member data.

const TSegs & GetSegs(void) const

Get the Segs member data.

list< CRef< CSeq_align > > TAlign

Compressed bitset (entry point to bm.h)

Floating-point support routines.

The NCBI C++/STL use hints.

bool s_ValidateMatrix(const CPhyTreeCalc::CDistMatrix &mat)

static void s_RecordSeqId(int index, const CAlnVec &align_data_source, vector< string > &seqids)

Structure for storing divergences between sequences as list of links.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4