A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/advanced__cleanup_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/align/advanced_cleanup/advanced_cleanup.cpp Source File

75  "Maximum intron size for a Splign compartment to " 76  "identify its query as RNA"

,

79

arg_desc.

AddDefaultKey

(

"min-rna-total-coverage"

,

"MinRnaTotalCoverage"

,

80  "Minimum total query percent coverage for a Splign " 81  "compartment to be identified as RNA"

,

85

arg_desc.

AddFlag

(

"allow-consistent-intersection"

,

86  "Allow intersecting alignments in genomic compartments, " 87  "but enforce consistency"

);

88

arg_desc.

AddFlag

(

"allow-inconsistent-intersection"

,

89  "Allow intersecting alignments in genomic compartments, " 90  "and don't enforce consistency"

);

93  "allow-consistent-intersection"

);

95

arg_desc.

AddFlag

(

"allow-large-compart-gaps"

,

96  "Permit genomic compartments to contain large gaps between " 97  "alignments; default limits gaps to 3 x alignment size"

);

99

arg_desc.

AddDefaultKey

(

"max-compartment-failures"

,

"MaxCompartmentFailures"

,

100  "Fail if we have failure on more than this number of compartments"

,

107

arg_desc.

AddFlag

(

"no-prosplign-introns"

,

108  "Generate ProSplign alignment without introns"

);

110

prosplign::CCompartOptions::SetupArgDescriptions(&arg_desc);

111

arg_desc.

AddOptionalKey

(

"prosplign-size-threshold"

,

"ProsplignSizeThreshold"

,

112  "Maximum compartment size - protein length x " 113  "genomic range length - on which to run prosplign"

,

116

arg_desc.

AddFlag

(

"prosplign-gaps"

,

"Precalculate un-bridgeable gaps. Prohibit compartment to go over un-bridgeable gaps."

);

117

arg_desc.

AddFlag

(

"prosplign-unk-gaps"

,

"Prohibit compartment to go over gaps of unknown length."

);

121  "GenColl ASN.1 to process"

);

128

, m_MinSingletonIdty(0.7)

129

, m_MinSingletonIdtyBps(9999999)

133

, m_MaxCompartmentFailures(UINT_MAX)

134

, m_CompartmentFailureCount(0)

135

, m_ProsplignGaps(

false

)

136

, m_ProsplignUnknownGaps(

false

)

137

, m_ProsplignRunThreshold(UINT_MAX)

163  m_Penalty

= args[

"compartment_penalty"

].AsDouble();

164  m_MinIdty

= args[

"min_compartment_idty"

].AsDouble();

168  if

(args[

"min_singleton_idty"

]) {

174  if

(args[

"allow-consistent-intersection"

] ||

175

args[

"allow-inconsistent-intersection"

])

179  if

(args[

"allow-inconsistent-intersection"

]) {

182  if

(!args[

"allow-large-compart-gaps"

]) {

190

assm_source; ++assm_source)

194  "Can have only one gc assembly input"

);

196  _TRACE

(

"Reading assembly..."

);

220  for

(

unsigned

arg = 0; arg < raw_args.

Size

(); ++arg) {

221

command_line_args.

insert

(raw_args[arg]);

223  if

(!command_line_args.count(

"-min_hole_len"

)) {

226  if

(!command_line_args.count(

"-compartment_penalty"

)) {

227  m_CompartOptions

->m_CompartmentPenalty = prosplign::CCompartOptions::default_CompartmentPenalty;

229  if

(!command_line_args.count(

"-min_compartment_idty"

)) {

230  m_CompartOptions

->m_MinCompartmentIdty = prosplign::CCompartOptions::default_MinCompartmentIdty;

233

args[

"no-prosplign-introns"

]));

236  if

(args[

"prosplign-size-threshold"

]) {

248

EQueryType query_type,

249  bool

with_best_placement,

251

ESplignDirRun splign_direction)

253

cleaned_aligns.clear();

259  Cleanup

(query_it->second, this_query_output, query_type,

260

with_best_placement, splign_direction);

261

cleaned_aligns.splice(cleaned_aligns.end(), this_query_output);

263

}

else if

(!input_aligns.empty()) {

266

input_aligns.front()->GetSeq_id(1))] = input_aligns;

267  Cleanup

(subject_aligns, cleaned_aligns, query_type,

268

with_best_placement, splign_direction);

274

EQueryType query_type,

275  bool

with_best_placement,

276

ESplignDirRun splign_direction)

278

cleaned_aligns.clear();

286

list<CSplignCompartment> splign_compartments;

287

list< CRef<CSeq_align_set> > genomic_compartments;

288  if

(query_type ==

eInfer

) {

291

query_aligns.begin()->second.front()->GetSeq_id(0));

295  "Type information not available for " 314

splign_compartments,

true

);

320  if

(compart_it->HasRnaCharacteristics()) {

329  switch

(query_type) {

334

TCompartmentsBySubject;

335

TAlignsBySubjectAndPos indexed_aligns;

336

TCompartmentsBySubject failed_compartments;

337  bool

found_prosplign_alignments =

false

;

340  TAlignsByPos

&aligns_by_pos = indexed_aligns[subject_it->first];

342  true

, &aligns_by_pos);

343  sort

(aligns_by_pos.begin(), aligns_by_pos.end());

349

compart_aligns, genomic_range);

351  if

(compart_aligns.empty()) {

352

failed_compartments[subject_it->first]

353

. push_back(genomic_range);

355

found_prosplign_alignments =

true

;

358

cleaned_aligns.splice(cleaned_aligns.end(), compart_aligns);

361  if

(!found_prosplign_alignments) {

364  ITERATE

(TCompartmentsBySubject, subject_it, failed_compartments) {

365  ITERATE

(vector<TSeqRange>, compart_it, subject_it->second)

368

indexed_aligns[subject_it->first], *compart_it,

379  if

(splign_compartments.empty()) {

382

splign_compartments,

true

);

386  ITERATE

(list<CSplignCompartment>, it, splign_compartments) {

390

cleaned_aligns.push_back(align);

396

cleaned_aligns.push_back(align);

400  if

(!cleaned_aligns.empty()) {

407

genomic_compartments);

415

genomic_compartments,

true

);

426  if

(with_best_placement) {

438

aligns_by_pair[

query

][

subject

].push_back(*align_it);

444

list<CSplignCompartment> &compartments,

455

}

else if

(!input_aligns.empty()) {

462  if

(hitref->GetQueryStrand() ==

false

) {

463

hitref->FlipStrands();

465

hit_refs.push_back(hitref);

480  const TCoord

min_singleton_matches (

min

(msm1, msm2));

482  TAccessor

ca(penalty_bps, min_matches, min_singleton_matches, !

m_NoXF

);

487  TCoord

smin (0), smax (kCoordMax);

488  bool

same_strand (

false

);

494

same_strand =

false

;

499

same_strand = strand_this == strand_next;

500

smax = same_strand? (box + 4)[2]: kCoordMax;

509  if

(smax < box[3]) smax = box[3];

510  if

(smin > box[2]) smin = box[2];

513

compartments.push_back(comp);

516

smin = same_strand? box[3]: 0;

551  true

, aligns_by_pos);

554

}

else if

(!input_aligns.empty()) {

560

hit_refs.push_back(hitref);

562

aligns_by_pos->push_back(make_pair(hitref->GetSubjMin(), *iter));

565

vector<pair<TSeqPos, TSeqPos> > *pgap =

NULL

;

576  for

(;smit; ++smit) {

580

(unk_gaps && slit->CanGetFuzz() && slit->GetFuzz().IsLim() &&

597

THitComparator sorter (THitComparator::eSubjMinSubjMax);

598

stable_sort(hit_refs.begin(), hit_refs.end(), sorter);

602

compartments.splice(compartments.end(), compartments_for_aligns);

612  "Function RunSplignOnCompartment() supported only " 613  "for one direction"

);

646

.

Print

(

"splign_error"

, comp_align.

m_Msg

)

647

.

Print

(

"compart_query"

, compart.

query

.AsString())

648

.

Print

(

"compart_subject"

, compart.

subject

.AsString())

651

.

Print

(

"compart_strand"

, dir ==

eDirSense

?

"plus"

:

"minus"

);

652  if

(comp_align.

m_Msg

.find(

"Space limit exceeded"

) == string::npos &&

656  "Failures on too many compartments"

);

670  cleanup

.FillUnaligned(

true

);

671  cleanup

.Cleanup(compart, compart_cleaned_aligns);

677

cleaned_aligns.splice(cleaned_aligns.end(), compart_cleaned_aligns);

691  switch

(desc.

Which

()) {

698  "unexpected number of IDs in align-ref"

);

718  "failed to find protein"

);

721  if

( !genomic_loc ) {

723  "failed to find genomic location"

);

736

cleaned_aligns.push_back(prosplign_output);

754  for

(TAlignsByPos::const_iterator it =

755

lower_bound(aligns_by_pos.begin(), aligns_by_pos.end(),

757

it != lower_bound(aligns_by_pos.begin(), aligns_by_pos.end(),

761

compart_members.push_back(it->second);

769  const

list<CSplignCompartment> &splign_compartments,

772  ITERATE

(list<CSplignCompartment>, compart_it, splign_compartments) {

775

genomic_compart->

Set

().push_back(

779

genomic_compartments.push_back(genomic_compart);

838

align_set.

Set

().splice(align_set.

Set

().end(), aligns);

842

aligns.splice(aligns.end(), align_set.

Set

());

849 

{

return

hit1->GetQueryMin() < hit2->GetQueryMin(); }

854  if

(

hits

.size() < 2) {

862  if

((*hit_it)->GetSubjStrand() != prev_hit->GetSubjStrand()) {

866  if

((*hit_it)->GetSubjStrand()

867

? ((*hit_it)->GetSubjMin() < prev_hit->GetSubjMin())

868

: ((*hit_it)->GetSubjMax() > prev_hit->GetSubjMax()))

874  TSeqRange

subject_hit1(prev_hit->GetSubjMin(), prev_hit->GetSubjMax()),

875

subject_hit2((*hit_it)->GetSubjMin(), (*hit_it)->GetSubjMax());

877

intron -= subject_hit1;

878

intron -= subject_hit2;

884

query_coverage +=

TSeqRange

((*hit_it)->GetQueryMin(),

885

(*hit_it)->GetQueryMax());

897

, m_AlignRef(const_cast<

CSeq_align

*>(&align))

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

CSplignAlignmentHit(const objects::CSeq_align &align)

void x_CleanupProsplignAsGenomic(const TAlignsByPos &aligns_by_pos, const TSeqRange &genomic_range, objects::CSeq_align_set::Tdata &cleaned_aligns)

void SetParams(const CArgs &args)

int m_GenomicCompartOptions

unique_ptr< prosplign::CCompartOptions > m_CompartOptions

CRef< objects::CScope > m_Scope

map< objects::CSeq_id_Handle, CSeq_align_set::Tdata > TAlignsBySubject

CRef< objects::CSeq_align > RunSplignOnCompartment(const CSplignCompartment &compart, ESplignDirRun dir)

void x_SplignCompartmentsToGenomicFormat(const list< CSplignCompartment > &splign_compartments, list< CRef< objects::CSeq_align_set > > &genomic_compartments)

bool m_ProsplignUnknownGaps

void GetGenomicCompartments(const objects::CSeq_align_set::Tdata &input_aligns, list< CRef< objects::CSeq_align_set > > &compartments, bool one_pair=false)

Divide list of genomic alignments into compartments.

static void SetupArgDescriptions(CArgDescriptions &arg_desc)

map< objects::CSeq_id_Handle, vector< pair< TSeqPos, TSeqPos > > > m_seq_gaps

void SetScope(const CRef< objects::CScope > &scope)

vector< pair< TSeqPos, CRef< objects::CSeq_align > > > TAlignsByPos

void GetProsplignCompartments(const objects::CSeq_align_set::Tdata &input_aligns, prosplign::TCompartments &compartments, bool one_pair=false, TAlignsByPos *aligns_by_pos=NULL)

void CleanupGenomicCompartment(const objects::CSeq_align_set::Tdata &compart, objects::CSeq_align_set::Tdata &cleaned_aligns, bool add_scores=true)

TSeqPos m_MinSingletonIdtyBps

map< objects::CSeq_id_Handle, TAlignsBySubject > TAlignsBySeqIds

void GetSplignCompartments(const objects::CSeq_align_set::Tdata &input_aligns, list< CSplignCompartment > &compartments, bool one_pair=false)

Divide list of RNA alignments into Splign compartments.

void Cleanup(const objects::CSeq_align_set::Tdata &input_aligns, objects::CSeq_align_set::Tdata &cleaned_aligns, EQueryType query_type=eInfer, bool with_best_placement=true, bool one_pair=false, ESplignDirRun splign_direction=eDirBoth)

Divide list of RNA alignments into Splign compartments.

unsigned m_CompartmentFailureCount

bool x_CleanupProsplignCompartment(const objects::CSeq_annot &compartment, const TAlignsByPos &aligns_by_pos, objects::CSeq_align_set::Tdata &cleaned_aligns, TSeqRange &genomic_range)

CRef< CProSplignScoring > m_ProSplignScoring

void x_AddStandardAlignmentScores(objects::CSeq_align &align)

void DivideByQuerySubjectPairs(const objects::CSeq_align_set::Tdata &input_aligns, TAlignsBySeqIds &aligns_by_pair)

double m_MinSingletonIdty

unsigned m_MaxCompartmentFailures

CRef< CProSplign > m_ProSplign

CRef< CProSplignOutputOptions > m_ProSplignOutputOptions

TSeqPos m_ProsplignRunThreshold

void BestPlacement(objects::CSeq_align_set::Tdata &aligns)

pair< TCoord, TCoord > TCoordRange

class CAlignCleanup implements an alignment cleanup utility based on the C++ alignment manager.

void Run(typename THitRefs::iterator start, typename THitRefs::iterator finish, CScope *scope=NULL, const vector< pair< TCoord, TCoord > > *gaps=NULL)

Execute: identify compartments.

bool GetStrand(size_t i) const

void SetMaxIntron(TCoord mi)

Assign the maximum intron length, in base pairs.

void Get(size_t idx, THitRefs &compartment) const

Retrieve a compartment by index.

const TCoord * GetBox(size_t i) const

bool GetStatus(size_t i) const

pair< size_t, size_t > GetCounts(void) const

Retrieve the compartment counts.

void CreateHierarchy(CGC_Assembly *target_set=NULL)

Generate the internal up-pointers.

class CInputStreamSource encapsulates details of how we supply applications with input data through s...

static void SetStandardInputArgs(CArgDescriptions &arg_desc, const string &prefix="input", const string &description="data to process", bool is_mandatory=false)

Supply a standard set of arguments via argument descriptions to an application.

static CNcbiApplication * Instance(void)

Singleton method.

bool operator()(const CSplign::THitRef &hit1, const CSplign::THitRef &hit2) const

Output filtering parameters.

CProSplignOutputOptions & SetMinHoleLen(int)

fill back small holes between good pieces holes with both unaligned protein and nucleotide portions l...

static const int default_min_hole_len

static void SetupArgDescriptions(CArgDescriptions *argdescr)

static void SetupArgDescriptions(CArgDescriptions *argdescr)

spliced protein to genomic alignment

CRef< objects::CSeq_align > FindAlignment(objects::CScope &scope, const objects::CSeq_id &protein, const objects::CSeq_loc &genomic, CProSplignOutputOptions output_options=CProSplignOutputOptions())

Aligns protein to a region on genomic sequence.

position_type GetCoveredLength(void) const

Returns total length covered by ranges in this collection, i.e.

void AddScore(CScope &scope, CSeq_align &align, CSeq_align::EScoreType score)

int GetGapCount(const CSeq_align &align)

Compute the number of gaps in the alignment.

@ eScore_PercentIdentity_Gapped

@ eScore_PercentIdentity_Ungapped

@ eScore_HighQualityPercentCoverage

TSeqPos GetSeqStop(TDim row) const

void SetNamedScore(const string &id, int score)

const CSeq_id & GetSeq_id(TDim row) const

Get seq-id (the first one if segments have different ids).

TSeqPos GetSeqStart(TDim row) const

static void SetupArgDescriptions(CArgDescriptions *arg_desc)

Setup core splign argument descriptions for the application.

static void ArgsToSplign(CSplign *splign, const CArgs &args)

Translate command line arguments into splign algorithm core settings.

CRef< objects::CSeq_align_set > AsSeqAlignSet(const CSplign::TResults *results=0, int flags=eAF_SplicedSegWithParts) const

Format alignment as a seq-align-set.

void SetSeqIds(CConstRef< objects::CSeq_id > id1, CConstRef< objects::CSeq_id > id2)

@ eAF_SplicedSegWithParts

CSplign is the central library object for computing spliced cDNA-to-genomic alignments.

CRef< objects::CScope > & SetScope(void)

void SetMaxIntron(size_t max_intron)

void SetStrand(bool strand)

void PreserveScope(bool preserve=true)

Controls whether to clean the scope object's cache on a new sequence.

bool AlignSingleCompartment(THitRefs *hitrefs, THit::TCoord range_left, THit::TCoord range_right, SAlignedCompartment *result)

void SetStartModelId(size_t model_id)

vector< SAlignedCompartment > TResults

vector< THitRef > THitRefs

static void Rank(objects::CSeq_align_set &sas, score_fn_t score_fn=&NBestPlacement::GetScore)

Adds the following scores: `best_placement_score` as computed by score_fn `rank` - all top scoring al...

iterator_bool insert(const value_type &val)

TCompartments SelectCompartmentsHits(const THitRefs &hitrefs, CCompartOptions compart_options, const vector< pair< THit::TCoord, THit::TCoord > > *gaps=NULL)

Composition of first two functions.

list< CRef< CSeq_annot > > TCompartments

static void cleanup(void)

void FindCompartments(const list< CRef< CSeq_align > > &aligns, list< CRef< CSeq_align_set > > &align_sets, TCompartOptions options=fCompart_Defaults, float diff_len_filter=3.0f)

@ fCompart_FilterByDiffLen

@ fCompart_AllowIntersections

@ fCompart_AllowInconsistentIntersection

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

int TSignedSeqPos

Type for signed sequence position.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

const CNcbiArguments & GetArguments(void) const

Get the application's cached unprocessed command-line arguments.

void AddFlag(const string &name, const string &comment, CBoolEnum< EFlagValue > set_value=eFlagHasValueIfSet, TFlags flags=0)

Add description for flag argument.

void SetDependency(const string &arg1, EDependency dep, const string &arg2)

Define a dependency.

void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)

Add description for optional key without default value.

void SetCurrentGroup(const string &group)

Set current arguments group name.

void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)

Add description for optional key with default value.

@ eRequires

One argument requires another.

@ eExcludes

One argument excludes another.

@ eInteger

Convertible into an integer number (int or Int8)

CDiagContext_Extra & Print(const string &name, const string &value)

The method does not print the argument, but adds it to the string.

CDiagContext & GetDiagContext(void)

Get diag context instance.

CDiagContext_Extra Extra(void) const

Create a temporary CDiagContext_Extra object.

SIZE_TYPE Size(void) const

Get size (number) of arguments.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

#define MSerial_AsnText

I/O stream manipulators –.

CConstRef< CSeq_id > GetSeqId(void) const

int CompareOrdered(const CSeq_id &sid2) const

static CSeq_id_Handle GetHandle(const CSeq_id &id)

Normal way of getting a handle, works for any seq-id.

string AsString(void) const

TRange GetTotalRange(void) const

@ fThrowOnMissing

throw exception if sequence or requested data aren't found

TInst_Mol GetInst_Mol(void) const

bool CanGetInst_Mol(void) const

TSeqPos GetEndPosition(void) const

return end position of current segment in sequence (exclusive)

SSeqMapSelector & SetResolveCount(size_t res_cnt)

Set max depth of resolving seq-map.

SSeqMapSelector & SetFlags(TFlags flags)

Select segment type(s)

CSeqMap::ESegmentType GetType(void) const

TSeqPos GetPosition(void) const

return position of current segment in sequence

CConstRef< CSeq_literal > GetRefGapLiteral(void) const

return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap

TObjectType * GetPointer(void) THROWS_NONE

Get pointer,.

void Reset(void)

Reset reference object.

void Reset(void)

Reset reference object.

position_type GetLength(void) const

position_type GetToOpen(void) const

TThisType & Set(position_type from, position_type to)

CRange< TSeqPos > TSeqRange

typedefs for sequence ranges

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

Tdata & Set(void)

Assign a value to data member.

E_Choice Which(void) const

Which variant is currently selected.

const TSpliced & GetSpliced(void) const

Get the variant data.

const TExons & GetExons(void) const

Get the Exons member data.

list< CRef< CSeq_align > > Tdata

const Tdata & Get(void) const

Get the member data.

const TSegs & GetSegs(void) const

Get the Segs member data.

const Tdata & Get(void) const

Get the member data.

E_Choice Which(void) const

Which variant is currently selected.

const TDesc & GetDesc(void) const

Get the Desc member data.

const TRegion & GetRegion(void) const

Get the variant data.

const TIds & GetIds(void) const

Get the Ids member data.

const TAlign & GetAlign(void) const

Get the variant data.

list< CRef< CAnnotdesc > > Tdata

@ e_Align

definition of the SeqAligns

@ e_Region

all contents cover this region

constexpr auto sort(_Init &&init)

const struct ncbi::grid::netcache::search::fields::SIZE size

static unsigned s_MaxRnaIntronSize

objects::CSeq_id_Handle query

objects::CSeq_id_Handle subject

bool HasRnaCharacteristics()

static unsigned s_MinRnaTotalCoverage

ECompartmentStatus m_Status

bool operator()(const CRef< CSeq_align > &al1, const CRef< CSeq_align > &al2) const

Selector used in CSeqMap methods returning iterators.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4