A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/contig__assembly_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/align/contig_assembly/contig_assembly.cpp Source File

64  bool

in_quotes =

false

;

66  bool

in_space =

true

;

71  for

(

unsigned int i

= 0;

i

< s.size(); ++

i

) {

73  if

((c ==

' '

|| c ==

'\t'

) && !in_quotes) {

85  if

(c ==

'\''

|| c ==

'"'

) {

87  if

(c == quote_char) {

104  throw

runtime_error(

"Unbalanced quotes (')"

);

110

vector<CContigAssembly::SAlignStats::STails>& tails)

113  for

(

int

j = 0; j < vec.

GetNumRows

(); ++j) {

121

tls.

right

= seq_len - stop - 1;

124

tls.

left

= seq_len - stop - 1;

126

tails.push_back(tls);

133  const string

& param_string,

137

query_loc.

SetWhole

().Assign(query_id);

139

subject_loc.

SetWhole

().Assign(subject_id);

140  return Blastn

(query_loc, subject_loc, param_string, scope);

146  const string

& param_string,

149  SSeqLoc

query_sl(query_loc, scope);

150  SSeqLoc

subject_sl(subject_loc, scope);

158  for

(

unsigned int i

= 0;

i

< args.size();

i

+= 2) {

159  const string

& name = args[

i

];

160  if

(

i

+ 1 >= args.size()) {

161  throw

runtime_error(

"no value given for "

+ name);

163  const string

&

value

= args[

i

+ 1];

166

}

else if

(name ==

"-r"

) {

168

}

else if

(name ==

"-q"

) {

170

}

else if

(name ==

"-e"

) {

172

}

else if

(name ==

"-Z"

) {

174

}

else if

(name ==

"-F"

) {

176

}

else if

(name ==

"-G"

) {

178

}

else if

(name ==

"-E"

) {

181  throw

runtime_error(

"invalid option: "

+ name);

191

vector<unsigned int>& plus_vec,

192

vector<unsigned int>& minus_vec)

195

*align_set.

Get

().front()->GetSegs().GetDenseg().GetIds()[0];

197

*align_set.

Get

().front()->GetSegs().GetDenseg().GetIds()[1];

203

plus_vec.resize(len0 + len1);

206

minus_vec.resize(len0 + len1);

210  const CDense_seg

& ds = (*aln)->GetSegs().GetDenseg();

214  if

(start0 == -1 || start1 == -1) {

219  TSeqPos

diag = (start0 + seg_len - 1) + start1;

220

minus_vec[diag] += seg_len;

223  const CDense_seg

& ds = (*aln)->GetSegs().GetDenseg();

227  if

(start0 == -1 || start1 == -1) {

232  TSeqPos

diag = start1 - start0 + len0 - 1 ;

233

plus_vec[diag] += seg_len;

243

vector<TRange>& max_range)

245  unsigned int

running_sum = 0;

247  for

(

i

= 0;

i

< window; ++

i

) {

248

running_sum += vec[

i

];

252

max_range.push_back(

TRange

(window - 1, window - 1));

254  for

(

i

= window;

i

< vec.size(); ++

i

) {

255

running_sum -= vec[

i

- window];

256

running_sum += vec[

i

];

257  if

(running_sum >=

max

) {

258  if

(running_sum >

max

) {

262  if

(max_range.size() && max_range.back().GetFrom() ==

i

- 1) {

263

max_range.back().SetFrom(

i

);

265

max_range.push_back(

TRange

(

i

,

i

));

279

vector<unsigned int> plus_vec, minus_vec;

280  DiagCounts

(align_set, scope, plus_vec, minus_vec);

282  unsigned int

plus_count;

283

vector<TRange> plus_range;

286  unsigned int

minus_count;

287

vector<TRange> minus_range;

291

vector<TRange>*

r

=

NULL

;

292  if

(plus_count > minus_count) {

304

(

r

->front().GetFrom() +

r

->front().GetTo() + 1) / 2 -

window_size

/ 2;

314  unsigned int

half_width,

CScope

& scope)

345  if

(diag <= seq0.size()) {

347

shift = seq0.size() - 1 - diag;

350

shift = diag - (seq0.size() - 1);

363

ds->

SetIds

().push_back(cr_id0);

364

ds->

SetIds

().push_back(cr_id1);

381  for

(

unsigned int i

= 0;

i

< ds->

GetLens

().

size

() - 1; ++

i

) {

398  unsigned int

slop,

CScope

& scope)

404  if

( (

stats

.tails[0].right <= slop &&

stats

.tails[1].left <= slop) ||

405

(

stats

.tails[0].left <= slop &&

stats

.tails[1].right <= slop) ) {

446  unsigned int

slop,

CScope

& scope)

462  unsigned int

slop,

CScope

& scope)

478  bool

FirstContainsSecond;

479  bool

SecondContainsFirst;

481

FirstContainsSecond = ((((long)

stats

.tails[0].left -

stats

.tails[1].left) >= -

int

(slop)) &&

482

(((

long

)

stats

.tails[0].right -

stats

.tails[1].right) >= -

int

(slop)));

483

SecondContainsFirst = ((((long)

stats

.tails[1].left -

stats

.tails[0].left) >= -

int

(slop)) &&

484

(((

long

)

stats

.tails[1].right -

stats

.tails[0].right) >= -

int

(slop)));

486  return

(FirstContainsSecond | SecondContainsFirst);

501

Ident =

stats

.pct_identity/100.0;

511  int

Wg = -5, Wm = 1, Wms = -2, Ws = -2;

518  for

(

unsigned int i

= 0;

i

< ds_in.

GetLens

().

size

(); ++

i

) {

521

vector<int> scores(sz);

523  for

(

unsigned int i

= 0;

i

< scores.size(); ++

i

) {

527

previous_score = scores[

i

- 1];

533

scores[

i

] = previous_score + Ws;

535

scores[

i

] = previous_score + Wg + Ws;

537

}

else if

(res1 ==

'-'

) {

539

scores[

i

] = previous_score + Ws;

541

scores[

i

] = previous_score + Wg + Ws;

543

}

else if

(res0 == res1) {

545

scores[

i

] = previous_score + Wm;

548

scores[

i

] = previous_score + Wms;

552  if

(scores[

i

] < 0) {

559  unsigned int

right_end = 0;

560  unsigned int

left_end;

561  for

(

unsigned int i

= 0;

i

< scores.size(); ++

i

) {

562  if

(scores[

i

] > max_score) {

563

max_score = scores[

i

];

570  if

(right_end == 0) {

577  for

(

i

= right_end - 1;

i

> 0; --

i

) {

578  if

(scores[

i

] == 0) {

585  if

(scores[0] == 0) {

601

vector<CRef<CSeq_align> >

603  const string

& blast_params,

double

min_ident,

604  unsigned int

max_end_slop,

CScope

& scope,

606  const

vector<unsigned int>& band_halfwidths,

607  unsigned int

diag_finding_window,

608  unsigned int

min_align_length,

611  if

(min_ident > 1 || min_ident < 0) {

612  throw

runtime_error(

"min_ident must be between zero and one (got " 623

*ostr <<

"Filtering on "

<< min_ident <<

"%, slop "

<< max_end_slop

624

<<

"bp, min length "

<< min_align_length <<

"bp" 625

<<

" and strands "

<< strandmap[strand0] <<

", "

<< strandmap[strand1] << endl;

629

alns =

Blastn

(id0, id1, blast_params, scope);

631  catch

(exception& e) {

633

*ostr <<

"blast failed:\n"

<< e.what() << endl;

635  return

vector<CRef<CSeq_align> >();

638

vector<CRef<CSeq_align> > good_alns;

644  if

(

IsDovetail

((*aln)->GetSegs().GetDenseg(), max_end_slop, scope)

645

&&

FracIdent

((*aln)->GetSegs().GetDenseg(), scope) >= min_ident

647

&&

x_DensegLength

((*aln)->GetSegs().GetDenseg()) >= min_align_length ) {

649

good_alns.push_back(*aln);

652  if

(!good_alns.empty()) {

654

*ostr <<

"Found "

<< good_alns.size() <<

655  " acceptable dovetail alignment(s) by blast " 656

<< blast_params << endl;

661

*ostr <<

"Found no acceptable dovetail alignments by blast " 662

<< blast_params << endl;

664  if

(alns->

Get

().empty()) {

666

*ostr <<

"No alignments found by blast; " 667  "can't do banded alignment"

<< endl;

669  return

vector<CRef<CSeq_align> >();

676  ITERATE

(vector<unsigned int>, band_halfwidth, band_halfwidths) {

678

*ostr <<

"Trying banded global alignment with bandwidth = " 679

<< 2 * *band_halfwidth + 1 << endl;

685

diag, *band_halfwidth, scope);

689

*ostr <<

"banded alignment failed:\n"

<< e.

what

() << endl;

696

*ostr <<

"banded alignment failed: num segs == 0\n"

<< endl;

702  double

frac_ident =

FracIdent

(*local_ds, scope);

704

*ostr <<

"Fraction identity: "

<< frac_ident << endl;

706  if

(

IsDovetail

(*local_ds, max_end_slop, scope)

707

&&

FracIdent

(*local_ds, scope) >= min_ident

711

*ostr <<

"Alignment acceptable (full dovetail)"

<< endl;

715

aln->

SetSegs

().SetDenseg(*local_ds);

717  return

vector<CRef<CSeq_align> >(1, aln);

722

*ostr <<

"No acceptable alignments from banded alignment algorithm" 731

&&

FracIdent

((*aln)->GetSegs().GetDenseg(), scope) >= min_ident

733

&&

x_DensegLength

((*aln)->GetSegs().GetDenseg()) >= min_align_length

736

good_alns.push_back(*aln);

740

*ostr <<

"Found "

<< good_alns.size() <<

741  " acceptable half-dovetail " 742  "or contained alignment(s) by blast"

<< endl;

744  if

(!good_alns.empty()) {

751

&&

FracIdent

(*local_ds, scope) >= min_ident

754  string

dovetail_string;

755  if

(

IsContained

(*local_ds, max_end_slop, scope)) {

756

dovetail_string =

"contained"

;

758

dovetail_string =

"half-dovetail"

;

761

*ostr <<

"Banded alignment acceptable (" 762

<< dovetail_string <<

")"

<< endl;

766

aln->

SetSegs

().SetDenseg(*local_ds);

768  return

vector<CRef<CSeq_align> >(1, aln);

772

*ostr <<

"Banded alignment not an acceptable " 773  "half-dovetail or contained"

<< endl;

775  return

vector<CRef<CSeq_align> >();

783

objects::CScope& scope)

793  _ASSERT

(row1.size() == row2.size());

796  for

(

unsigned int i

= 0;

i

< row1.size(); ++

i

) {

797  if

(row1[

i

] !=

'N'

&& row2[

i

] !=

'N'

) {

800  if

(row1[

i

] != row2[

i

]) {

801  if

(row1[

i

] ==

'-'

) {

803  while

(

i

+1 < row1.size() && row1[

i

+1] ==

'-'

) ++

i

;

804

}

else if

(row2[

i

] ==

'-'

) {

806  while

(

i

+1 < row1.size() && row2[

i

+1] ==

'-'

) ++

i

;

829

align_stats.

gaps

.clear();

832

vector<CRef<CSeq_loc> > dust_locs;

843

loc->SetPacked_int(*res);

844

dust_locs.push_back(loc);

850  bool

simple =

false

;

853  bool

is_gap =

false

;

854  for

(

int

j = 0; j < vec.

GetNumRows

(); ++j) {

857  unsigned int

other_row = (j + 1) % 2;

867

gap_loc.

SetInt

().SetId().Set(

"lcl|dummy"

);

879

}

else if

(gap_simple == -1) {

892

align_stats.

is_simple

.push_back(simple);

911

SAlignStats& align_stats)

920

SAlignStats& align_stats)

933  if

(

stats

.tails[0].left <

stats

.tails[1].left) {

944  bool

matches[2] = {

false

,

false

};

957  if

(!(matches[0] & matches[1])) {

964  return

(matches[0] & matches[1]);

973  int

Dim = ds.GetDim();

975  for

(

unsigned int

Seg = 0; Seg < Lens.size(); Seg++) {

977  if

(Starts[(Seg*Dim)] == -1 || Starts[(Seg*Dim)+1] == -1)

991  for

(

int i

= 0;

i

< vec.GetNumSegs(); ++

i

) {

992  bool

is_gap =

false

;

993  for

(

int

j = 0; j < vec.GetNumRows(); ++j) {

994  if

(vec.GetStart(j,

i

) == -1) {

1000

AlignedLength += vec.GetLen(

i

);

1006  unsigned int

identities = 0;

1007  for

(

int i

= 0;

i

< vec.GetNumSegs(); ++

i

) {

1009

vec.GetSegSeqString(s1, 0,

i

);

1010  for

(

int

j = 1; j < vec.GetNumRows(); ++j) {

1012

vec.GetSegSeqString(s2, j,

i

);

1014  for

(

unsigned int

k = 0; k <

min

(s1.size(), s2.size()); ++k) {

1015

identities += (s1[k] == s2[k]);

1020

align_stats.

mismatches

= AlignedLength - identities;

1022

100.0 * double(identities) / double(AlignedLength);

Declares the CBl2Seq (BLAST 2 Sequences) class.

Declares the CBlastNucleotideOptionsHandle class.

Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.

Definitions of special type used in BLAST.

vector< CRef< objects::CSeq_align_set > > TSeqAlignVector

Vector of Seq-align-sets.

@ eBlastn

Nucl-Nucl (traditional blastn)

TSignedSeqPos GetStop(TNumrow row, TNumseg seg, int offset=0) const

TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const

bool IsPositiveStrand(TNumrow row) const

TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const

TDim GetNumRows(void) const

TSeqPos GetAlnStop(TNumseg seg) const

TSeqPos GetLen(TNumseg seg, int offset=0) const

TSeqPos GetSeqStop(TNumrow row) const

TNumseg GetNumSegs(void) const

TSeqPos GetSeqStart(TNumrow row) const

const CBioseq_Handle & GetBioseqHandle(TNumrow row) const

void SetEndChar(TResidue gap_char)

void SetGapChar(TResidue gap_char)

CScope & GetScope(void) const

string & GetAlnSeqString(string &buffer, TNumrow row, const CAlnMap::TSignedRange &aln_rng) const

TResidue GetResidue(TNumrow row, TSeqPos aln_pos) const

Runs the BLAST algorithm between 2 sequences.

Handle to the nucleotide-nucleotide options to the BLAST algorithm.

unsigned int m_AdjustedLen

CAlnStats(unsigned int adjusted_len, unsigned int mm, unsigned int gaps)

static void FindMaxRange(const vector< unsigned int > &vec, unsigned int window, unsigned int &max, vector< TRange > &max_range)

static void x_GatherIdentStats(const objects::CAlnVec &vec, SAlignStats &align_stats)

static CRef< objects::CDense_seg > BestLocalSubAlignment(const objects::CDense_seg &ds_in, objects::CScope &scope)

Find the highest-scoring local subalignment.

static bool IsAtLeastHalfDovetail(const objects::CDense_seg &ds, unsigned int slop, objects::CScope &scope)

static CRef< objects::CSeq_align_set > Blastn(const objects::CSeq_id &query_id, const objects::CSeq_id &subject_id, const string &param_string, objects::CScope &scope)

Utility for running blastn.

static void GatherAlignStats(const objects::CAlnVec &vec, SAlignStats &align_stats)

static bool x_IsAllowedStrands(const objects::CDense_seg &ds, objects::ENa_strand strand0, objects::ENa_strand strand1)

static void FindDiagFromAlignSet(const objects::CSeq_align_set &align_set, objects::CScope &scope, unsigned int window_size, objects::ENa_strand &strand, unsigned int &diag)

Given a set of alignments, pick out a diagonal to use as the center of a band in a banded alignment.

CRange< unsigned int > TRange

Find the range (or more than one tied range) containing the maximal diagonal count,...

static void DiagCounts(const objects::CSeq_align_set &align_set, objects::CScope &scope, vector< unsigned int > &plus_vec, vector< unsigned int > &minus_vec)

Count the cells with "ink" along each diagonal in a dot-matrix-type plot of some set of alignments (e...

static vector< CRef< objects::CSeq_align > > Align(const objects::CSeq_id &id0, const objects::CSeq_id &id1, const string &blast_params, double min_ident, unsigned int max_end_slop, objects::CScope &scope, CNcbiOstream *ostr=0, const vector< unsigned int > &band_halfwidths=vector< unsigned int >(1, 200), unsigned int diag_finding_window=200, unsigned int min_align_length=50, objects::ENa_strand strand0=objects::eNa_strand_unknown, objects::ENa_strand strand1=objects::eNa_strand_unknown)

Most users of the class need only to call this function.

static CRef< objects::CDense_seg > BandedGlobalAlignment(const objects::CSeq_id &id0, const objects::CSeq_id &id1, objects::ENa_strand strand, unsigned int diag, unsigned int half_width, objects::CScope &scope)

Do a banded global alignment using an arbitrary band.

static void x_OrientAlign(objects::CDense_seg &ds, objects::CScope &scope)

static TSeqPos x_DensegLength(const objects::CDense_seg &ds)

static bool IsDovetail(const objects::CDense_seg &ds, unsigned int slop, objects::CScope &scope)

static bool IsContained(const objects::CDense_seg &ds, unsigned int slop, objects::CScope &scope)

static double FracIdent(const objects::CDense_seg &ds, objects::CScope &scope)

ENa_strand GetSeqStrand(TDim row) const

TSeqPos GetSeqStop(TDim row) const

void Reverse(void)

Reverse the segments' orientation.

TSeqPos GetSeqStart(TDim row) const

void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)

Initialize from pairwise alignment transcript (a string representation produced by CNWAligner)

CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const

Extract a slice of the alignment that includes the specified range.

Looks for low complexity parts of sequences according to the symmetric version of DUST algorithm.

CRef< objects::CPacked_seqint > GetMaskedInts(objects::CSeq_id &seq_id, const sequence_type &seq)

Mask a sequence and return result as a CPacked_seqint instance.

static void s_SplitCommandLine(string s, vector< string > &result)

static void s_GetTails(const CAlnVec &vec, vector< CContigAssembly::SAlignStats::STails > &tails)

void SetSpaceLimit(const size_t &maxmem)

void SetShift(Uint1 where, size_t offset)

string GetTranscriptString(void) const

void SetEndSpaceFree(bool Left1, bool Right1, bool Left2, bool Right2)

void SetEvalueThreshold(double eval)

Sets EvalueThreshold.

void SetMatchReward(int r)

Sets MatchReward.

void SetTraditionalBlastnDefaults()

Sets TraditionalBlastnDefaults.

void SetMismatchPenalty(int p)

Sets MismatchPenalty.

void SetGapXDropoffFinal(double x)

Sets GapXDropoffFinal.

void SetGapExtensionCost(int e)

Sets GapExtensionCost.

void SetFilterString(const char *f, bool clear=true)

Sets FilterString.

void SetWordSize(int ws)

Sets WordSize.

void SetGapOpeningCost(int g)

Sets GapOpeningCost.

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

int TSignedSeqPos

Type for signed sequence position.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

virtual const char * what(void) const noexcept

Standard report (includes full backlog).

string GetSeqIdString(bool with_version=false) const

Return seqid string with optional version for text seqid type.

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Optimized implementation of CSerialObject::Assign, which is not so efficient.

sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)

Returns the sequence::ECompare containment relationship between CSeq_locs.

@ fCompareOverlapping

Check if seq-locs are overlapping.

@ eSame

CSeq_locs contain each other.

@ eContained

First CSeq_loc contained by second.

CBioseq_Handle GetBioseqHandle(const CSeq_id &id)

Get bioseq handle by seq-id.

TSeqPos GetBioseqLength(void) const

CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const

Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.

@ eStrand_Plus

Plus strand.

@ eStrand_Minus

Minus strand.

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const

Fill the buffer string with the sequence data for the interval [start, stop).

void SetIupacCoding(void)

Set coding to either Iupacaa or Iupacna depending on molecule type.

uint8_t Uint1

1-byte (8-bit) unsigned integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)

Convert double to string.

static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to int.

static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)

Convert string to double.

const TDenseg & GetDenseg(void) const

Get the variant data.

Tdata & Set(void)

Assign a value to data member.

TLens & SetLens(void)

Assign a value to Lens data member.

const TStarts & GetStarts(void) const

Get the Starts member data.

void SetSegs(TSegs &value)

Assign a value to Segs data member.

const TLens & GetLens(void) const

Get the Lens member data.

vector< TSignedSeqPos > TStarts

void SetType(TType value)

Assign a value to Type data member.

TStarts & SetStarts(void)

Assign a value to Starts data member.

TStrands & SetStrands(void)

Assign a value to Strands data member.

void SetNumseg(TNumseg value)

Assign a value to Numseg data member.

const TIds & GetIds(void) const

Get the Ids member data.

bool CanGetStrands(void) const

Check if it is safe to call GetStrands method.

TNumseg GetNumseg(void) const

Get the Numseg member data.

list< CRef< CSeq_align > > Tdata

TIds & SetIds(void)

Assign a value to Ids data member.

const TStrands & GetStrands(void) const

Get the Strands member data.

const Tdata & Get(void) const

Get the member data.

const TSegs & GetSegs(void) const

Get the Segs member data.

@ eType_partial

mapping pieces together

ENa_strand

strand of nucleic acid

unsigned int

A callback function used to compare two keys in a database.

const struct ncbi::grid::netcache::search::fields::SIZE size

const GenericPointer< typename T::ValueType > T2 value

Uint8 GetPhysicalMemorySize(void)

Return the amount of physical memory available in the system.

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

#define row(bind, expected)

Alignment characterization.

vector< STails > tails

unaligned tails

double pct_identity

% identity (varies from 0 to 100)

TSeqPos mismatches

number of mismatched bases

TSeqPos total_length

total covered length of the alignment, including gaps

TSeqPos gap_count

count of total number of gaps

vector< bool > is_simple

for each gap, whether is consists of "simple sequence"

vector< TSeqPos > gaps

the set of gap lengths for this alignment

TSeqPos aligned_length

total number of bases included in the alignment

Structure to represent a single sequence to be fed to BLAST.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4