* iupac_revcomp_table =
57 "................................................................" 58 ".TVGH..CD..M.KN...YSAABW.R.......tvgh..cd..m.kn...ysaabw.r......" 59 "................................................................" 60 "................................................................";
61 returniupac_revcomp_table[
static_cast<unsigned char>(c)];
66 returnc ==
'N'|| c ==
'n'||
Complement(c) ==
'.';
75 template<
classTSeq>
78 constvector<string>& allowable_starts)
80 const TSeqPosinframe_to_open = to+1;
83starts.
insert(inframe_to_open);
85 for(
TSeqPospos = inframe_to_open - 3;
86pos >= from && pos < inframe_to_open;
89 ITERATE(vector<string>, it, allowable_starts) {
90 if( seq[pos + 0] == (*it)[0]
91&& seq[pos + 1] == (*it)[1]
92&& seq[pos + 2] == (*it)[2])
104 boolfrom_fuzz=
false,
boolto_fuzz=
false)
106 if(!to_fuzz) to += 3;
108 auto& interval = *intervals.back();
109interval.SetFrom(from);
112interval.SetFuzz_from().SetLim(objects::CInt_fuzz::eLim_lt);
114interval.SetFuzz_to().SetLim(objects::CInt_fuzz::eLim_gt);
122 template<
classTSeq>
124 unsigned intmin_length_bp,
126 constvector<string>& allowable_starts,
131vector<TSeqPos> stops[3];
132 constobjects::CTrans_table& tbl =
133objects::CGen_code_table::GetTransTable(genetic_code);
135 for(
unsigned int i= 0;
i< seq.size(); ++
i) {
137 if(tbl.IsOrfStop(
state)) {
138 autocodon_start =
i-2;
139 autoframe = codon_start % 3;
140stops[frame].push_back(codon_start);
144 while(++j < seq.size() &&
IsGapOrN(seq[j]))
146 if(j -
i> max_seq_gap) {
148 for(
int f= 0;
f< 3; ++
f) {
149stops[
f].push_back(
i);
150stops[
f].push_back(j -1);
159 for(
intframe = 0; frame < 3; frame++) {
161stops[frame].push_back(
static_cast<TSeqPos>(seq.size()));
162stops[frame].push_back(
static_cast<TSeqPos>(seq.size()));
165 for(
unsigned int i= 0;
i< stops[frame].size() -1;
i++) {
169 boolgap_after = (stop >= seq.size() ||
IsGapOrN(seq[stop]));
171 if(stop >= min_length_bp + from) {
173to = ((stop - from) / 3) * 3 + from - 1;
174 _ASSERT( gap_after || to+1==stop );
175 if(to +1 >= min_length_bp + from) {
177 if(!allowable_starts.empty()) {
181from = *starts.
begin();
183 if(to +1 >= min_length_bp + from) {
184 if(from != from0 && stop_to_stop) {
188 if(!(stop_to_stop && from != from0 && longest_orfs)) {
190!stop_to_stop && from < 3, gap_after);
193 if(!longest_orfs && !allowable_starts.empty()) {
195 for(
autos: starts) {
197 if(to +1 < min_length_bp + from)
214stop = stops[frame][
i] +3;
215from = ((stop - frame)/3)*3 + frame;
227 template<
classTSeq>
229 unsigned intmin_length_bp,
231 constvector<string>& allowable_starts_,
235 if(seq.size() < 3) {
239 if(min_length_bp < 3) min_length_bp = 3;
243 boolstop_to_stop =
false;
244 autostop = find(allowable_starts_.begin(), allowable_starts_.end(),
"STOP");
245vector<string> allowable_starts_2;
246 if(stop != allowable_starts_.end()) {
247stop_to_stop =
true;
248 if(allowable_starts_.size() > 1) {
249allowable_starts_2 = allowable_starts_;
250allowable_starts_2.erase(allowable_starts_2.begin() + distance(allowable_starts_.begin(), stop));
253 constvector<string>& allowable_starts = stop_to_stop ? allowable_starts_2 : allowable_starts_;
261genetic_code, allowable_starts, longest_orfs, max_seq_gap, stop_to_stop);
262 for(
auto& interval: ranges) {
264orf->SetInt().Assign(*interval);
275reverse(comp.begin(), comp.end());
277*
i= objects::CSeqportUtil
282genetic_code, allowable_starts, longest_orfs, max_seq_gap, stop_to_stop);
283 for(
auto& interval: ranges) {
287 unsigned intfrom =
static_cast<unsigned int>(comp.size()) - interval->GetTo() - 1;
288 unsigned intto =
static_cast<unsigned int>(comp.size()) - interval->GetFrom() - 1;
289orf->SetInt().SetFrom(from);
290orf->SetInt().SetTo(to);
302 constobjects::CTrans_table& tbl =
303objects::CGen_code_table::GetTransTable(genetic_code);
305 static const char* iupacs =
"ACGTRYSWKMBDHVN";
306 static const Uint1k_num_iupacs = 15;
309 for(
Uint1i1 = 0; i1 < k_num_iupacs; i1++) {
310 charc1 = iupacs[i1];
311 for(
Uint1i2 = 0; i2 < k_num_iupacs; i2++) {
312 charc2 = iupacs[i2];
313 for(
Uint1i3 = 0; i3 < k_num_iupacs; i3++) {
314 charc3 = iupacs[i3];
315 int state= tbl.SetCodonState(c1, c2, c3);
317 if( (include_atg && tbl.IsATGStart(
state))
318|| (include_alt && tbl.IsAltStart(
state)) )
338 unsigned intmin_length_bp,
340 constvector<string>& allowable_starts,
345genetic_code, allowable_starts, longest_orfs, max_seq_gap);
353 unsigned intmin_length_bp,
355 constvector<string>& allowable_starts,
360genetic_code, allowable_starts, longest_orfs, max_seq_gap);
368 unsigned intmin_length_bp,
370 constvector<string>& allowable_starts,
377vec.GetSeqData(0, vec.size(), seq_iupac);
379genetic_code, allowable_starts, longest_orfs, max_seq_gap);
388 unsigned intmin_length_bp,
389 unsigned intnon_overlap_min_length_bp,
393 if(cds_start > seq.
size()) {
395 "cds_start not within input CSeqVector");
398 if(cds_start <= 3) {
403vector<string> start_codon(1,
"ATG");
405 FindOrfs(seq, ORFs, min_length_bp, genetic_code, start_codon,
false, max_seq_gap);
418 if(ORF_start < 3 || ORF_start >= cds_start ||
419ORF_start + 5 > seq.
size() ||
420(ORF_end >= cds_start ? (cds_start - ORF_start) % 3 == 0
421: ORF_end - ORF_start < non_overlap_min_length_bp))
426seq.
GetSeqData(ORF_start - 3, ORF_start + 5, Kozak_signal);
427 if((Kozak_signal[0] ==
'A'|| Kozak_signal[0] ==
'G') &&
428Kozak_signal[6] ==
'G'&& Kozak_signal[7] !=
'T')
430(ORF_end >= cds_start ? overlap_results : non_overlap_results)
441annot->
SetData().SetFtable();
449feat->
SetData().SetCdregion().SetOrf(
true);
452feat->
SetTitle(
"Open reading frame");
461annot->
SetData().SetFtable().push_back(feat);
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
static void FindStrongKozakUOrfs(const objects::CSeqVector &seq, TSeqPos cds_start, TLocVec &overlap_results, TLocVec &non_overlap_results, unsigned int min_length_bp=3, unsigned int non_overlap_min_length_bp=105, int genetic_code=1, size_t max_seq_gap=k_default_max_seq_gap)
Specifically find ORFS with a strong Kozak signal that are upstream of cds_start.
static vector< string > GetStartCodons(int genetic_code, bool include_atg, bool include_alt)
Create vector of allowable_starts by genetic-code.
vector< CRef< objects::CSeq_loc > > TLocVec
static CRef< objects::CSeq_annot > MakeCDSAnnot(const TLocVec &orfs, int genetic_code=1, objects::CSeq_id *id=NULL)
/ This version returns an annot full of CDS features.
static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)
Find ORFs in both orientations.
namespace ncbi::objects::
iterator_bool insert(const value_type &val)
const_iterator begin() const
const TResidue codons[4][4]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
uint8_t Uint1
1-byte (8-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
@ eSeq_code_type_iupacna
IUPAC 1 letter nuc acid code.
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetExp_ev(TExp_ev value)
Assign a value to Exp_ev data member.
@ eExp_ev_not_experimental
similarity, pattern, etc
void SetData(TData &value)
Assign a value to Data data member.
@ e_Iupacna
IUPAC 1 letter nuc acid code.
bool IsGapOrN(const char c)
vector< CRef< CSeq_interval > > TRangeVec
char Complement(const char c)
void AddInterval(TRangeVec &intervals, TSeqPos from, TSeqPos to, bool from_fuzz=false, bool to_fuzz=false)
set< TSeqPos > FindStarts(const TSeq &seq, TSeqPos from, TSeqPos to, const vector< string > &allowable_starts)
static void s_FindOrfs(const TSeq &seq, COrf::TLocVec &results, unsigned int min_length_bp, int genetic_code, const vector< string > &allowable_starts_, bool longest_orfs, size_t max_seq_gap)
Find all ORFs in both orientations that are at least min_length_bp long (not including the stop).
void FindForwardOrfs(const TSeq &seq, TRangeVec &ranges, unsigned int min_length_bp, int genetic_code, const vector< string > &allowable_starts, bool longest_orfs, size_t max_seq_gap, bool stop_to_stop)
Find all ORFs in forward orientation with length in *base pairs* >= min_length_bp.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4