);
55argdescr->AddOptionalKey (
"qdb",
"qdb",
"cDNA BLAST database",
58argdescr->AddOptionalKey (
"sdb",
"sdb",
"Genomic BLAST database",
61argdescr->AddFlag (
"ho",
"Print raw hits only - no compartments");
63argdescr->AddDefaultKey(
"penalty",
"penalty",
"Per-compartment penalty",
66argdescr->AddDefaultKey(
"min_idty",
"min_idty",
"Minimal overall identity. Note: in current implementation there is no sense to set different 'min_idty' and 'min_singleton_idty' (minimum is used anyway).",
69argdescr->AddDefaultKey(
"min_singleton_idty",
"min_singleton_idty",
70 "Minimal identity for singleton compartments. " 71 "The actual parameter passed to the compartmentization " 72 "procedure is least of this parameter multipled " 73 "by the seq length, and min_singleton_idty_bps. Note: in current implementation there is no sense to set different 'min_idty' and 'min_singleton_idty' (minimum is used anyway).",
76argdescr->AddDefaultKey(
"min_singleton_idty_bps",
"min_singleton_idty_bps",
77 "Minimal identity for singleton compartments " 78 "in base pairs. Default = parameter disabled.",
81argdescr->AddDefaultKey (
"max_intron",
"max_intron",
82 "Maximum intron length (in base pairs)",
87argdescr->AddDefaultKey(
"dropoff",
"dropoff",
88 "Max score drop-off during hit extension.",
91s_GetDefaultDropOff()));
93argdescr->AddDefaultKey(
"min_query_len",
"min_query_len",
94 "Minimum length for individual cDNA sequences.",
97argdescr->AddDefaultKey(
"min_hit_len",
"min_hit_len",
98 "Minimum length for reported hits in hits-only mode. " 99 "No effect in compartments mode.",
102argdescr->AddDefaultKey (
"maxvol",
"maxvol",
103 "Maximum index volume size in MB (approximate)",
107argdescr->AddFlag(
"noxf",
"[With external hits] Suppress overlap x-filtering: " 108 "print all compartment hits intact.");
110argdescr->AddOptionalKey(
"seqlens",
"seqlens",
111 "[With external hits] Two-column file with sequence IDs " 112 "and their lengths. If none supplied, the program will " 113 "attempt fetching the lengths from GenBank. " 114 "Cannot be used with -qdb.",
117argdescr->AddDefaultKey(
"N",
"N",
118 "[With external hits] Max number of compartments " 119 "per query (0 = All).",
123argdescr->SetConstraint(
"penalty", constrain01);
124argdescr->SetConstraint(
"min_idty", constrain01);
125argdescr->SetConstraint(
"min_singleton_idty", constrain01);
128argdescr->SetConstraint(
"maxvol", constrain_maxvol);
131argdescr->SetConstraint(
"min_query_len", constrain_minqlen);
134argdescr->SetConstraint(
"min_hit_len", constrain_minhitlen);
146 if(
id.
size() &&
id[0] !=
'#') {
189 const boolis_qdb (args[
"qdb"]);
190 const boolis_seqlens (args[
"seqlens"]);
208 m_penalty= args[
"penalty"].AsDouble();
268args[
"sdb"].AsString()));
270matcher->SetOutputMethod(
true);
279matcher->SetHitsOnly(args[
"ho"]);
280matcher->SetMinHitLength(args[
"min_hit_len"].AsInteger());
281matcher->SetMaxVolSize(1024 * 1024 * (args[
"maxvol"].AsInteger()));
283matcher->SetDropOff(args[
"dropoff"].AsInteger());
285 try{ matcher->Run(); }
286 catch(std::bad_alloc&) {
288 "Not enough memory available to run this program");
318 stringquery0, subj0;
327 const string query(hit->GetQueryId()->GetSeqIdString(
true));
328 const stringsubj (hit->GetSubjId()->GetSeqIdString(
true));
330 if(query0.size() == 0 || subj0.size() == 0) {
333id2id[query0] = subj0;
337 if(
query!= query0 || subj != subj0) {
340 if(rv != 0)
returnrv;
342 if(
query!= query0) {
352cout << **ii << endl;
363TIdToId::const_iterator im = id2id.find(query0);
364 if(im == id2id.end() || im->second != subj0) {
365id2id[query0] = subj0;
368cerr <<
"Input hit stream not properly ordered"<< endl;
374hitrefs.push_back(hit);
378 if(hitrefs.size()) {
380 if(rv != 0)
returnrv;
388cout << **ii << endl;
402cerr <<
"Cannot retrieve sequence lengths for: " 412 typedefTAccessor::TCoord TCoord;
414 constTCoord penalty_bps (TCoord(
m_penalty* qlen + 0.5));
415 constTCoord min_matches (TCoord(
m_min_idty* qlen + 0.5));
418 constTCoord min_singleton_matches (
min(msm1, msm2));
420TAccessor ca (penalty_bps, min_matches, min_singleton_matches, !
m_NoXF);
421ca.SetMaxIntron(
static_cast<TCoord
>(
m_max_intron));
422ca.Run(hitrefs.begin(), hitrefs.end());
425 for(
boolb0 (ca.GetFirst(comp)); b0 ; b0 = ca.GetNext(comp)) {
440 #ifdef PCOMPARTMENT_RANKER_M1 442 const size_texons_lhs (lhs->GetExonCount());
443 const size_texons_rhs (rhs->GetExonCount());
444 if(exons_lhs == exons_rhs) {
445 returnlhs->GetMatchCount() > rhs->GetMatchCount();
448 returnexons_lhs > exons_rhs;
453 const size_tidtybin_lhs (lhs->GetIdentityBin());
454 const size_tidtybin_rhs (rhs->GetIdentityBin());
455 if(idtybin_lhs == idtybin_rhs) {
456 const size_texons_lhs (lhs->GetExonCount());
457 const size_texons_rhs (rhs->GetExonCount());
458 if(exons_lhs == exons_rhs) {
459 returnlhs->GetMatchCount() > rhs->GetMatchCount();
462 returnexons_lhs > exons_rhs;
466 returnidtybin_lhs > idtybin_rhs;
470 #undef PCOMPARTMENT_RANKER_M1 519m_SeqLength(length), m_IdentityBin(0), m_ExonCount(0), m_MatchCount(0)
521 if(hitrefs.size() == 0) {
523 "Cannot init compartment with empty hit list");
526 for(THitRefs::const_reverse_iterator ii(hitrefs.rbegin()), ie(hitrefs.rend());
535 if(m_HitRefs.size() == 0) {
536m_HitRefs.push_back(hitref);
540 const THitRef& hb (m_HitRefs.back());
541 const boolcs (hb->GetSubjStrand());
542 if(cs != hitref->GetSubjStrand()) {
544 "different from that of the compartment.");
547m_HitRefs.push_back(hitref);
554 if(m_HitRefs.size()) {
555 returnm_HitRefs.front()->GetSubjStrand();
565 const THit::TId& subjid_lhs (m_HitRefs.front()->GetSubjId());
567 const TIntIdco (subjid_lhs->CompareOrdered(*subjid_rhs));
570 const THit::TId& queryid_lhs (m_HitRefs.front()->GetQueryId());
572 const TIntIdco (queryid_lhs->CompareOrdered(*queryid_rhs));
577 const boolstrand_rhs (rhs.
GetStrand());
578 if(strand_lhs == strand_rhs) {
580 returnGetSpan().first < rhs.
GetSpan().first;
583 returnGetSpan().first > rhs.
GetSpan().first;
587 returnstrand_lhs < strand_rhs;
613 const size_tkMinIntronLength (25);
614 const size_tkMinExonLength (10);
617 THitRef& h (m_HitRefs.front());
618 doublematches ( h->GetLength() * h->GetIdentity() );
620 if(m_HitRefs.size() > 1) {
628 if(
prev.NotEmpty()) {
631 if(q0 + kMinExonLength <= h->GetQueryStop()) {
634- (h->GetQueryStart() - q0));
635 if(
prev->GetSubjStop() + kMinIntronLength <= s0) {
639matches += (h->GetQueryStop() - q0max) * h->GetIdentity();
651 if(
prev.NotEmpty()) {
654 if(q0 + kMinExonLength <= h->GetQueryStop()) {
657+ h->GetQueryStart() - q0);
658 if(s0 + kMinIntronLength <= prev->GetSubjStop()) {
662matches += (h->GetQueryStop() - q0max) * h->GetIdentity();
671m_MatchCount = size_t(
round(matches));
672m_IdentityBin = size_t(floor(
double(m_MatchCount) / m_SeqLength / 0.1));
679ostr << **ii << endl;
690 int main(
intargc,
const char* argv[])
CCompartment(const THitRefs &hitrefs, size_t length)
bool GetStrand(void) const
void x_AddHit(const THitRef &hitref)
bool operator<(const CCompartment &rhs) const
pair< THit::TCoord, THit::TCoord > TRange
TRange GetSpan(void) const
virtual void Init()
Initialize the application.
CRef< objects::CScope > m_Scope
virtual void Exit()
Cleanup on application exit.
void x_RankAndStore(void)
virtual int Run()
Run the application.
size_t x_GetSeqLength(const string &id)
size_t m_MaxCompsPerQuery
size_t m_min_singleton_idty_bps
TCompartRefs m_Compartments
vector< TCompartRef > TCompartRefs
TCompartRefs m_CompartmentsPermanent
int x_DoWithExternalHits(void)
double m_min_singleton_idty
int x_ProcessPair(const string &query0, THitRefs &hitrefs)
vector< THitRef > THitRefs
void x_ReadSeqLens(CNcbiIstream &istr)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
ostream & operator<<(ostream &ostr, const CCompartApp::CCompartment &rhs)
int main(int argc, const char *argv[])
bool PCompartmentRanker(const CCompartApp::TCompartRef &lhs, const CCompartApp::TCompartRef &rhs)
bool operator<(const CCompartApp::TCompartRef &lhs, const CCompartApp::TCompartRef &rhs)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideLogfile
Hide log file description.
@ fHideConffile
Hide configuration file description.
@ fHideVersion
Hide version description.
@ eInputFile
Name of file (must exist and be readable)
@ eDouble
Convertible into a floating point number (double)
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
const struct ncbi::grid::netcache::search::fields::SIZE size
#define GetProgramName
Avoid name clash with the NCBI C Toolkit.
std::istream & in(std::istream &in_, double &x_)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4