(
void);
62 virtual int Run(
void);
63 virtual void Exit(
void);
85arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
86 "K-mer based sequence clustering");
89arg_desc->AddKey(
"i",
"infile",
"Query file name",
92arg_desc->AddDefaultKey(
"k",
"num",
"K-mer length",
95arg_desc->AddDefaultKey(
"alph",
"alphabet",
"Compressed alphabet",
99 "se-v10",
"se-b15"));
101arg_desc->AddDefaultKey(
"dist_method",
"method",
"Distance method",
107arg_desc->AddDefaultKey(
"max_dist",
"val",
108 "Maximum distance between sequences in a cluster",
111arg_desc->AddDefaultKey(
"clust_method",
"method",
"Clustering method",
117arg_desc->AddDefaultKey(
"impl",
"method",
"K-mer implementation method",
140vector< CRef<CSeq_loc> > queries;
145vector<TKmerCounts> kmer_counts;
146 unsigned intkmer_len = (
unsigned int)args[
"k"].AsInteger();
148TKMethods::ECompressedAlphabet alph = TKMethods::eRegular;
149 if(args[
"alph"].AsString() ==
"regular") {
150alph = TKMethods::eRegular;
152 else if(args[
"alph"].AsString() ==
"se-v10") {
153alph = TKMethods::eSE_V10;
155 else if(args[
"alph"].AsString() ==
"se-b15") {
156alph = TKMethods::eSE_B15;
160+ args[
"alph"].AsString();
167TKMethods::ComputeCounts(queries, *scope, kmer_counts);
169TKMethods::EDistMeasures dist_method = TKMethods::eFractionCommonKmersLocal;
170 if(args[
"dist_method"].AsString() ==
"local") {
171dist_method = TKMethods::eFractionCommonKmersLocal;
173 else if(args[
"dist_method"].AsString() ==
"global") {
174dist_method = TKMethods::eFractionCommonKmersGlobal;
178+ args[
"dist_method"].AsString();
183 doublemax_diameter = args[
"max_dist"].AsDouble();
185 if(args[
"clust_method"].AsString() ==
"clique") {
190 CRef<CLinks>links = TKMethods::ComputeDistLinks(kmer_counts, dist_method,
201 for(
int i=0;
i< (
int)queries.size();
i++) {
206list< CRef<CSeq_id> > id;
209 NcbiCout<< id_str <<
"\t"<<
"Cluster_" 225vector< CRef<CSeq_loc> > queries;
230vector<TKmerCounts> kmer_counts;
231 unsigned intkmer_len = (
unsigned int)args[
"k"].AsInteger();
233TKMethods::ECompressedAlphabet alph = TKMethods::eRegular;
234 if(args[
"alph"].AsString() ==
"regular") {
235alph = TKMethods::eRegular;
237 else if(args[
"alph"].AsString() ==
"se-v10") {
238alph = TKMethods::eSE_V10;
240 else if(args[
"alph"].AsString() ==
"se-b15") {
241alph = TKMethods::eSE_B15;
245+ args[
"alph"].AsString();
252TKMethods::ComputeCounts(queries, *scope, kmer_counts);
254TKMethods::EDistMeasures dist_method = TKMethods::eFractionCommonKmersLocal;
255 if(args[
"dist_method"].AsString() ==
"local") {
256dist_method = TKMethods::eFractionCommonKmersLocal;
258 else if(args[
"dist_method"].AsString() ==
"global") {
259dist_method = TKMethods::eFractionCommonKmersGlobal;
263+ args[
"dist_method"].AsString();
268 doublemax_diameter = args[
"max_dist"].AsDouble();
270 if(args[
"clust_method"].AsString() ==
"clique") {
275 CRef<CLinks>links = TKMethods::ComputeDistLinks(kmer_counts, dist_method,
286 for(
int i=0;
i< (
int)queries.size();
i++) {
291list< CRef<CSeq_id> > id;
294 NcbiCout<< id_str <<
"\t"<<
"Cluster_" 307 if(args[
"impl"].AsString() ==
"bin") {
331 int main(
intargc,
const char* argv[])
Declares class to display one-line descriptions at the top of the BLAST report.
K-mer counts implemented as bit vectors.
virtual int Run(void)
Run the application.
virtual void Init(void)
Initialize the application.
virtual void Exit(void)
Cleanup on application exit.
CRef< CObjectManager > m_ObjMgr
int x_RunBinary(const CArgs &args)
int x_RunSparse(const CArgs &args)
Interface for CClusterer class used for clustering any type of data based on distance matrix.
int GetClusterId(int elem) const
Find id of cluster to which given element belongs.
EClustMethod
Method for clustering from links.
@ eDist
Clusters can be joined if there is a link between at least one pair of elements.
@ eClique
Clusters can be joined if there is a link between all pairs of their elements.
void SetMakeTrees(bool trees)
Set make cluster tree/dendrogram option.
void SetLinks(CRef< CLinks > links)
Set distance links.
void Run(void)
Cluster elements.
void SetClustMethod(EClustMethod method)
Set clustering method for links.
static void GetSeqIdList(const objects::CBioseq_Handle &bh, list< CRef< objects::CSeq_id > > &ids)
Converts a Bioseq handle's sequence id type into a list of objects::CSeq_id references,...
static string GetSeqIdListString(const list< CRef< objects::CSeq_id > > &id, bool show_gi)
Creates a '|' delimited string, corresponding to a list of Seq-ids.
Kmer counts for alignment free sequence similarity computation implemented as a sparse vector.
Interface for computing and manipulating k-mer counts vectors that allows for different implementatio...
int main(int argc, const char *argv[])
void GetSeqLocFromStream(CNcbiIstream &instream, vector< CRef< objects::CSeq_loc > > &seqs, CRef< objects::CScope > &scope, objects::CFastaReader::TFlags flags)
Reads fasta sequences from stream, adds them to scope, and returns them as the list of Seq_locs.
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ fHideVersion
Hide version description.
@ eInputFile
Name of file (must exist and be readable)
@ eDouble
Convertible into a floating point number (double)
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
@ fForceType
Force specified type regardless of accession.
@ fAssumeProt
Assume prots unless accns indicate otherwise.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
virtual void SetParams()
Called at the beginning of Run, before creating thread pool.
unsigned int
A callback function used to compare two keys in a database.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4