A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/orf_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/sequence/orf.cpp Source File

56  static const char

* iupac_revcomp_table =

57  "................................................................" 58  ".TVGH..CD..M.KN...YSAABW.R.......tvgh..cd..m.kn...ysaabw.r......" 59  "................................................................" 60  "................................................................"

;

61  return

iupac_revcomp_table[

static_cast<unsigned char>

(c)];

66  return

c ==

'N'

|| c ==

'n'

||

Complement

(c) ==

'.'

;

75 template

<

class

TSeq>

78  const

vector<string>& allowable_starts)

80  const TSeqPos

inframe_to_open = to+1;

83

starts.

insert

(inframe_to_open);

85  for

(

TSeqPos

pos = inframe_to_open - 3;

86

pos >= from && pos < inframe_to_open;

89  ITERATE

(vector<string>, it, allowable_starts) {

90  if

( seq[pos + 0] == (*it)[0]

91

&& seq[pos + 1] == (*it)[1]

92

&& seq[pos + 2] == (*it)[2])

104  bool

from_fuzz=

false

,

bool

to_fuzz=

false

)

106  if

(!to_fuzz) to += 3;

108  auto

& interval = *intervals.back();

109

interval.SetFrom(from);

112

interval.SetFuzz_from().SetLim(objects::CInt_fuzz::eLim_lt);

114

interval.SetFuzz_to().SetLim(objects::CInt_fuzz::eLim_gt);

122 template

<

class

TSeq>

124  unsigned int

min_length_bp,

126  const

vector<string>& allowable_starts,

131

vector<TSeqPos> stops[3];

132  const

objects::CTrans_table& tbl =

133

objects::CGen_code_table::GetTransTable(genetic_code);

135  for

(

unsigned int i

= 0;

i

< seq.size(); ++

i

) {

137  if

(tbl.IsOrfStop(

state

)) {

138  auto

codon_start =

i

-2;

139  auto

frame = codon_start % 3;

140

stops[frame].push_back(codon_start);

144  while

(++j < seq.size() &&

IsGapOrN

(seq[j]))

146  if

(j -

i

> max_seq_gap) {

148  for

(

int f

= 0;

f

< 3; ++

f

) {

149

stops[

f

].push_back(

i

);

150

stops[

f

].push_back(j -1);

159  for

(

int

frame = 0; frame < 3; frame++) {

161

stops[frame].push_back(

static_cast<TSeqPos>

(seq.size()));

162

stops[frame].push_back(

static_cast<TSeqPos>

(seq.size()));

165  for

(

unsigned int i

= 0;

i

< stops[frame].size() -1;

i

++) {

169  bool

gap_after = (stop >= seq.size() ||

IsGapOrN

(seq[stop]));

171  if

(stop >= min_length_bp + from) {

173

to = ((stop - from) / 3) * 3 + from - 1;

174  _ASSERT

( gap_after || to+1==stop );

175  if

(to +1 >= min_length_bp + from) {

177  if

(!allowable_starts.empty()) {

181

from = *starts.

begin

();

183  if

(to +1 >= min_length_bp + from) {

184  if

(from != from0 && stop_to_stop) {

188  if

(!(stop_to_stop && from != from0 && longest_orfs)) {

190

!stop_to_stop && from < 3, gap_after);

193  if

(!longest_orfs && !allowable_starts.empty()) {

195  for

(

auto

s: starts) {

197  if

(to +1 < min_length_bp + from)

214

stop = stops[frame][

i

] +3;

215

from = ((stop - frame)/3)*3 + frame;

227 template

<

class

TSeq>

229  unsigned int

min_length_bp,

231  const

vector<string>& allowable_starts_,

235  if

(seq.size() < 3) {

239  if

(min_length_bp < 3) min_length_bp = 3;

243  bool

stop_to_stop =

false

;

244  auto

stop = find(allowable_starts_.begin(), allowable_starts_.end(),

"STOP"

);

245

vector<string> allowable_starts_2;

246  if

(stop != allowable_starts_.end()) {

247

stop_to_stop =

true

;

248  if

(allowable_starts_.size() > 1) {

249

allowable_starts_2 = allowable_starts_;

250

allowable_starts_2.erase(allowable_starts_2.begin() + distance(allowable_starts_.begin(), stop));

253  const

vector<string>& allowable_starts = stop_to_stop ? allowable_starts_2 : allowable_starts_;

261

genetic_code, allowable_starts, longest_orfs, max_seq_gap, stop_to_stop);

262  for

(

auto

& interval: ranges) {

264

orf->SetInt().Assign(*interval);

275

reverse(comp.begin(), comp.end());

277

*

i

= objects::CSeqportUtil

282

genetic_code, allowable_starts, longest_orfs, max_seq_gap, stop_to_stop);

283  for

(

auto

& interval: ranges) {

287  unsigned int

from =

static_cast<unsigned int>

(comp.size()) - interval->GetTo() - 1;

288  unsigned int

to =

static_cast<unsigned int>

(comp.size()) - interval->GetFrom() - 1;

289

orf->SetInt().SetFrom(from);

290

orf->SetInt().SetTo(to);

302  const

objects::CTrans_table& tbl =

303

objects::CGen_code_table::GetTransTable(genetic_code);

305  static const char

* iupacs =

"ACGTRYSWKMBDHVN"

;

306  static const Uint1

k_num_iupacs = 15;

309  for

(

Uint1

i1 = 0; i1 < k_num_iupacs; i1++) {

310  char

c1 = iupacs[i1];

311  for

(

Uint1

i2 = 0; i2 < k_num_iupacs; i2++) {

312  char

c2 = iupacs[i2];

313  for

(

Uint1

i3 = 0; i3 < k_num_iupacs; i3++) {

314  char

c3 = iupacs[i3];

315  int state

= tbl.SetCodonState(c1, c2, c3);

317  if

( (include_atg && tbl.IsATGStart(

state

))

318

|| (include_alt && tbl.IsAltStart(

state

)) )

338  unsigned int

min_length_bp,

340  const

vector<string>& allowable_starts,

345

genetic_code, allowable_starts, longest_orfs, max_seq_gap);

353  unsigned int

min_length_bp,

355  const

vector<string>& allowable_starts,

360

genetic_code, allowable_starts, longest_orfs, max_seq_gap);

368  unsigned int

min_length_bp,

370  const

vector<string>& allowable_starts,

377

vec.GetSeqData(0, vec.size(), seq_iupac);

379

genetic_code, allowable_starts, longest_orfs, max_seq_gap);

388  unsigned int

min_length_bp,

389  unsigned int

non_overlap_min_length_bp,

393  if

(cds_start > seq.

size

()) {

395  "cds_start not within input CSeqVector"

);

398  if

(cds_start <= 3) {

403

vector<string> start_codon(1,

"ATG"

);

405  FindOrfs

(seq, ORFs, min_length_bp, genetic_code, start_codon,

false

, max_seq_gap);

418  if

(ORF_start < 3 || ORF_start >= cds_start ||

419

ORF_start + 5 > seq.

size

() ||

420

(ORF_end >= cds_start ? (cds_start - ORF_start) % 3 == 0

421

: ORF_end - ORF_start < non_overlap_min_length_bp))

426

seq.

GetSeqData

(ORF_start - 3, ORF_start + 5, Kozak_signal);

427  if

((Kozak_signal[0] ==

'A'

|| Kozak_signal[0] ==

'G'

) &&

428

Kozak_signal[6] ==

'G'

&& Kozak_signal[7] !=

'T'

)

430

(ORF_end >= cds_start ? overlap_results : non_overlap_results)

441

annot->

SetData

().SetFtable();

449

feat->

SetData

().SetCdregion().SetOrf(

true

);

452

feat->

SetTitle

(

"Open reading frame"

);

461

annot->

SetData

().SetFtable().push_back(feat);

User-defined methods of the data storage class.

@ eExtreme_Positional

numerical value

@ eExtreme_Biological

5' and 3'

static void FindStrongKozakUOrfs(const objects::CSeqVector &seq, TSeqPos cds_start, TLocVec &overlap_results, TLocVec &non_overlap_results, unsigned int min_length_bp=3, unsigned int non_overlap_min_length_bp=105, int genetic_code=1, size_t max_seq_gap=k_default_max_seq_gap)

Specifically find ORFS with a strong Kozak signal that are upstream of cds_start.

static vector< string > GetStartCodons(int genetic_code, bool include_atg, bool include_alt)

Create vector of allowable_starts by genetic-code.

vector< CRef< objects::CSeq_loc > > TLocVec

static CRef< objects::CSeq_annot > MakeCDSAnnot(const TLocVec &orfs, int genetic_code=1, objects::CSeq_id *id=NULL)

/ This version returns an annot full of CDS features.

static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)

Find ORFs in both orientations.

namespace ncbi::objects::

iterator_bool insert(const value_type &val)

const_iterator begin() const

const TResidue codons[4][4]

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const

Fill the buffer string with the sequence data for the interval [start, stop).

uint8_t Uint1

1-byte (8-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

@ eSeq_code_type_iupacna

IUPAC 1 letter nuc acid code.

void SetLocation(TLocation &value)

Assign a value to Location data member.

void SetTitle(const TTitle &value)

Assign a value to Title data member.

void SetData(TData &value)

Assign a value to Data data member.

void SetExp_ev(TExp_ev value)

Assign a value to Exp_ev data member.

@ eExp_ev_not_experimental

similarity, pattern, etc

void SetData(TData &value)

Assign a value to Data data member.

@ e_Iupacna

IUPAC 1 letter nuc acid code.

bool IsGapOrN(const char c)

vector< CRef< CSeq_interval > > TRangeVec

char Complement(const char c)

void AddInterval(TRangeVec &intervals, TSeqPos from, TSeqPos to, bool from_fuzz=false, bool to_fuzz=false)

set< TSeqPos > FindStarts(const TSeq &seq, TSeqPos from, TSeqPos to, const vector< string > &allowable_starts)

static void s_FindOrfs(const TSeq &seq, COrf::TLocVec &results, unsigned int min_length_bp, int genetic_code, const vector< string > &allowable_starts_, bool longest_orfs, size_t max_seq_gap)

Find all ORFs in both orientations that are at least min_length_bp long (not including the stop).

void FindForwardOrfs(const TSeq &seq, TRangeVec &ranges, unsigned int min_length_bp, int genetic_code, const vector< string > &allowable_starts, bool longest_orfs, size_t max_seq_gap, bool stop_to_stop)

Find all ORFs in forward orientation with length in *base pairs* >= min_length_bp.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4