(
"Kimura (protein)");
77 static const string sc_FastMe(
"Fast Minimum Evolution");
78 static const string sc_Nj(
"Neighbor Joining");
80 static const string sc_SeqId(
"Sequence ID");
81 static const string sc_TaxName(
"Taxonomic Name (if available)");
82 static const string sc_SeqTitle(
"Sequence Title (if available)");
89 "Build Phylogenetic Tree from Alignment",
90 "Build Phylogenetic Tree from Alignment",
91 "https://www.ncbi.nlm.nih.gov/tools/gbench/tutorial28/",
99 return "tree_building_tool";
105 return "Phylogenetic Tree Builder Tool";
195feat_descr->SetId(
id);
196feat_descr->SetName(name);
197btc.
SetFdict().Set().push_back(feat_descr);
203node_feature->SetFeatureid(
id);
204node_feature->SetValue(
value);
205node.
SetFeatures().Set().push_back(node_feature);
212vector<string> labels;
213vector<string> organisms;
214vector<string> titles;
215vector<string> taxids;
247 BuildAln(anchored_aln_vec, anchored_aln, aln_user_options);
249 if(anchored_aln.
GetDim() == 1) {
254 BuildAln(anchored_aln_vec, anchored_aln, aln_user_options);
256sparse_aln_ref =
new CSparseAln(anchored_aln, *scope);
267labels.resize(num_rows);
268organisms.resize(num_rows);
269titles.resize(num_rows);
270taxids.resize(num_rows);
272 for(
int i= 0;
i< num_rows; ++
i) {
281 catch(std::exception&) {
291 if(seq_id && seq_id->
IsGi())
292gi = seq_id->
GetGi();
296 ERR_POST(
"CTreeBuilder: No tax_id for gi|"<< gi);
303 boolis_species =
false;
304 boolis_uncultured =
false;
306org_ref = taxon.
GetOrgRef(tax_id, is_species,
307is_uncultured, blast_name);
309 catch(std::exception&) {
313organisms[
i] = org_ref->GetTaxname();
316labels[
i] = organisms[
i];
322titles[
i] = sequence::CDeflineGenerator().GenerateDefline(bsh);
324labels[
i] = titles[
i];
328 if(labels[
i].
empty()) {
343 throwruntime_error(
"The sequence divergence matrix contained " 344 "invalid or infinite values.\n" 345 "This can happen when some pairs of sequences " 346 "share no non-gap positions\n" 347 "in the alignment, leading to fractional " 348 "sequence identities of 0/0.");
353 throwruntime_error(
"The calculated distance matrix contained " 354 "invalid or infinite values.\nThis can happen " 355 "with the Kimura method when some " 356 "pairs of sequences are less than about 14.6% " 362 throwruntime_error(
"The calculated distance matrix contained " 363 "invalid or infinite values.\nThis can happen " 364 "with the Jukes-Cantor method when some " 365 "pairs of sequences are less than 25% " 371 throwruntime_error(
"The calculated distance matrix contained " 372 "invalid or infinite values.");
375 throwruntime_error(
string(
"Invalid distance calculation type: ")
382vector<string> numerical_labels(dmat.
GetCols());
383 for(
size_t i= 0;
i< numerical_labels.size(); ++
i) {
387unique_ptr<TPhyTreeNode>
tree;
390}
else if(sCons ==
sc_Nj) {
393 throwruntime_error(
string(
"Invalid tree reconstruction algorithm: ")
402 const intkLabelId = 0;
403 const intkSeqIdId = 2;
404 const intkOrganismId = 3;
405 const intkTitleId = 4;
407 const intkHostId = 6;
408 const intkCountryId = 7;
409 const intkIsolationSourceId = 8;
410 const intkCollectionDateId = 9;
420 if( !(*node)->CanGetFeatures() ) {
424 if((*node_feature)->GetFeatureid() == kLabelId) {
430(*node_feature)->SetValue(labels[seq_number]);
438 if(!organisms[seq_number].
empty()) {
439 s_AddFeature(**node, kOrganismId, organisms[seq_number]);
445 if(!titles[seq_number].
empty()) {
451 if(biosource->IsSetOrgname()) {
452 const COrgName& on = biosource->GetOrgname();
454 for(
const auto& orgmod : on.
GetMod()) {
456 if(created_descr.
insert(kHostId).second)
465 if(biosource->IsSetSubtype()) {
466 for(
const auto& subsrc : biosource->GetSubtype()) {
467 if(subsrc->IsSetSubtype()) {
468 switch(subsrc->GetSubtype()) {
470 if(created_descr.
insert(kCountryId).second)
475 if(created_descr.
insert(kIsolationSourceId).second)
477 s_AddFeature(**node, kIsolationSourceId, subsrc->GetName());
480 if(created_descr.
insert(kCollectionDateId).second)
482 s_AddFeature(**node, kCollectionDateId, subsrc->GetName());
507dm->FromMatrix(dmat);
508dm->SetLabels() = labels;
522 result.Resize(nseqs, nseqs);
523vector<string> seq(nseqs);
525 for(
int i= 0;
i< nseqs; ++
i) {
529 for(
int i= 0;
i< nseqs; ++
i) {
531 for(
intj =
i+ 1; j < nseqs; ++j) {
User-defined methods of the data storage class.
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
const_iterator insert(const CSeq_align &seq_align)
Insert new CSeq_align into the list.
size_type size(void) const
Container mapping seq-aligns to vectors of participating seq-ids.
void push_back(const CSeq_align &aln)
Adding an alignment.
TDim GetNumRows(void) const
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
Options for different alignment manager operations.
@ ePreserveRows
Preserve all rows as they were in the input (e.g.
const CBioseq_Handle & GetBioseqHandle(TNumrow row) const
void SetEndChar(TResidue gap_char)
void SetGapChar(TResidue gap_char)
Query-anchored alignment can be 2 or multi-dimentional.
TDim GetDim(void) const
How many rows.
CDataLoadingAppJob - a base class for Jobs loading data into projects.
void AddProjectItem(objects::CProjectItem &item)
static bool AllFinite(const TMatrix &mat)
Check a matrix for NaNs and Infs.
static TTree * NjTree(const TMatrix &dist_mat, const vector< string > &labels=vector< string >())
Compute a tree by neighbor joining; as per Hillis et al.
static double Divergence(const string &seq1, const string &seq2)
Calculate pairwise fractions of non-identity.
static void PoissonDist(const TMatrix &frac_diff, TMatrix &result)
Simple distance calculation for protein sequences: d = -ln(1 - p).
static TTree * FastMeTree(const TMatrix &dist_mat, const vector< string > &labels=vector< string >(), EFastMePar btype=eOls, EFastMePar wtype=eOls, EFastMePar ntype=eBalanced)
Compute a tree using the fast minimum evolution algorithm.
static void KimuraDist(const TMatrix &frac_diff, TMatrix &result)
Kimura's distance for protein sequences: d = -ln(1 - p - 0.2p^2).
static void JukesCantorDist(const TMatrix &frac_diff, TMatrix &result)
Jukes-Cantor distance calculation for DNA sequences: d = -3/4 ln(1 - (4/3)p).
size_t GetCols() const
get the number of columns in this matrix
void SetObject(CSerialObject &object)
wrapper for setting the object pointed to by this item
TRng GetAlnRange(void) const
Get whole alignment range.
TNumrow GetNumRows(void) const
Synonym of the above.
void SetGapChar(TResidue gap_char)
Gap character modifier.
string & GetAlnSeqString(TNumrow row, string &buffer, const TSignedRange &rq_aln_rng, bool force_translation=false) const
Fetch alignment sequence data.
const objects::CBioseq_Handle & GetBioseqHandle(TNumrow row) const
Get bioseq handle for the row.
const string & GetLastError() const
bool GetTaxId4GI(TGi gi, TTaxId &tax_id_out)
CConstRef< COrg_ref > GetOrgRef(TTaxId tax_id, bool &is_species, bool &is_uncultured, string &blast_name, bool *is_specified=NULL)
static void x_Divergence(CSparseAln &spaln, CDistMethods::TMatrix &result)
virtual void x_CreateProjectItems()
override this function in derived classes and populate m_Items.
CTreeBuilderJob(const CTreeBuilderParams ¶ms)
CTreeBuilderJob.
CTreeBuilderParams m_Params
IRegSettings An interface for objects that save / restore settings using CGuiRegistry.
iterator_bool insert(const value_type &val)
CRef< objects::CBioTreeContainer > MakeBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
wxString GetLeafLabels() const
virtual CAlgoToolManagerParamsPanel * x_GetParamsPanel()
returns a pointer to the parameters panel, override in derived classes
virtual void x_SelectCompatibleInputObjects()
select only Seq-ids
virtual void LoadSettings()
virtual IRegSettings * x_GetParamsAsRegSetting()
return a pointer to Parameters object as IRegSettings interface
CTreeBuilderPanel * m_Panel
CTreeBuilderParams m_Params
wxString GetConstructMethod() const
virtual CDataLoadingAppJob * x_CreateLoadingJob()
factory method for creating the job that executes the tool algorithm override in derived classes
virtual void CleanUI()
override this function in a derived class and clean extra members
TConstScopedObjects m_SeqIds
void SetData(const CTreeBuilderParams &data)
CTreeBuilderParams & GetData()
bool Create(wxWindow *parent, wxWindowID id=ID_CTreeBuilderPanel, const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(400, 300), long style=wxTAB_TRAVERSAL)
virtual void InitUI()
override this function in a derived class and initialize extra members
const SConstScopedObject & GetObject() const
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
CTreeBuilderTool()
CTreeBuilderTool.
virtual bool x_CreateParamsPanelIfNeeded()
returns / creates Parameters panel, override in derived classes see cpp file for example
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
virtual bool x_ValidateParams()
validates user input in Parameters panel, report errors if any
void SetObjects(TConstScopedObjects *objects)
wxString GetDistanceMethod() const
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
CRef< objects::CScope > scope
string m_Descr
mutex to sync our internals
static void SetLabelByData(objects::CProjectItem &item, objects::CScope *scope=NULL)
CConstRef< CObject > object
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CConstRef< CSeq_id > GetSeqId(void) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
const CBioSource * GetBioSource(const CBioseq &bioseq)
Retrieve the BioSource object for a given bioseq handle.
const COrg_ref & GetOrg_ref(const CBioseq_Handle &handle)
Return the org-ref associated with a given sequence.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
@ eGetId_ForceGi
return only a gi-based seq-id
CConstRef< CSeq_id > GetNonLocalIdOrNull(void) const
Find a non-local ID if present, consulting assembly details if all IDs for the overall sequence are l...
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
@ eSubtype_collection_date
DD-MMM-YYYY format.
@ eSubtype_isolation_source
void SetNodes(TNodes &value)
Assign a value to Nodes data member.
void SetFdict(TFdict &value)
Assign a value to Fdict data member.
void SetFeatures(TFeatures &value)
Assign a value to Features data member.
list< CRef< CNodeFeature > > Tdata
void SetTreetype(const TTreetype &value)
Assign a value to Treetype data member.
list< CRef< CNode > > Tdata
const TMod & GetMod(void) const
Get the Mod member data.
bool CanGetMod(void) const
Check if it is safe to call GetMod method.
@ eSubtype_nat_host
natural host of this specimen
const TDenseg & GetDenseg(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
TGi GetGi(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
constexpr bool empty(list< Ts... >) noexcept
const GenericPointer< typename T::ValueType > T2 value
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
string ToStdString(const wxString &s)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4