( !
id|| !
ctx) {
102.AddField(
"count", (
int) feat_iter.
GetSize());
124(vec[pos - 3] ==
'A'|| vec[pos - 3] ==
'G')) {
127 if(vec.
size() > pos + 3 &&
128vec[pos + 3] ==
'G') {
166upstr->
SetInt().SetId().Assign(
id);
172upstr->
SetInt().SetFrom(0);
176loc.
SetMix().AddSeqLoc(*upstr);
194standard->
Set().push_back(code_id);
203.AddField(
"is_partial",
206.AddField(
"is_pseudo",
209.AddField(
"is_except",
234 TSeqPospos_nearest_best_start = 0;
235 for(
int i= upstream_length - 3;
i>= 0;
i-= 3) {
236 if(vec[
i] ==
'A'&& vec[
i+ 1] ==
'T'&& vec[
i+ 2] ==
'G') {
238 if(strength > best_strength) {
239best_strength = strength;
240pos_nearest_best_start =
i;
245.AddField(
"inframe_upstream_start_exists", best_strength !=
eNone);
246 if(best_strength !=
eNone) {
248.AddField(
"inframe_upstream_start_best_kozak_strength",
251.AddField(
"nearest_best_upstream_start_distance",
252 int(upstream_length - pos_nearest_best_start - 3));
279 for(
int i= upstream_length - 3;
i>= 0;
i-= 3) {
283.AddField(
"inframe_upstream_stop_exists",
286.AddField(
"nearest_inframe_upstream_stop_distance",
287 int(upstream_length -
i- 3));
292.AddField(
"inframe_upstream_stop_exists",
315 staticstd::map<string, CRef<CHMMParameters> > s_hmmparams_cache;
320 if(!
ctx->HasKey(
"gnomon_model_file")) {
325 stringmodel_file_name = (*ctx)[
"gnomon_model_file"];
329 if(s_hmmparams_cache.find(model_file_name) == s_hmmparams_cache.end()) {
335hmm_params = s_hmmparams_cache[model_file_name];
342 ctx->GetScope(), &gccontent);
346.AddField(
"model_file", model_file_name);
348.AddField(
"model_percent_gc", gccontent);
350.AddField(
"score",
max(score, -1e100));
370 if( !
id|| !
ctx) {
379 int len=
ctx->GetScope()
380.GetBioseqHandle(
dynamic_cast<const CSeq_id&
>(obj)).GetInst_Length();
382.AddField(
"length",
len);
391.AddField(
"length",
410 intxcript_len =
ctx->GetScope().GetBioseqHandle(
id).GetInst_Length();
411 result.SetOutput_data().AddField(
"length_5_prime_utr", (
int) cds_from);
412 result.SetOutput_data().AddField(
"length_3_prime_utr",
413(
int) (xcript_len - cds_to - 1));
438vec.
GetSeqData(upstream_length, upstream_length + 3, seq);
441.AddField(
"is_start",
445.AddField(
"first_codon", seq);
448.AddField(
"kozak_strength",
473.AddField(
"is_stop",
545start_translating = 0;
548start_translating = 1;
551start_translating = 2;
559 boolpremature_stop_found =
false;
560 for(
TSeqPos i= start_translating;
i< vec.
size() - 3;
i+= 3) {
563 if(!premature_stop_found) {
565.AddField(
"has_premature_stop_codon",
true);
567.AddField(
"first_premature_stop_position",
568 static_cast<int>(
i));
569premature_stop_found =
true;
574.AddField(
"has_premature_stop_codon_not_sec",
true);
576.AddField(
"first_premature_stop_position_not_sec",
577 static_cast<int>(
i));
584.AddField(
"has_premature_stop_codon_not_sec",
false);
585 if(!premature_stop_found) {
587.AddField(
"has_premature_stop_codon",
false);
606latest.
Reset(&
id);
619translation,
false);
620 result.SetOutput_data().AddField(
"length_translation",
621 int(translation.size()));
636 if(prod_vec[
i] == translation[
i]) {
641 result.SetOutput_data().AddField(
"length_annotated_prot_prod",
642 int(prod_vec.
size()));
644.AddField(
"fraction_identity",
649 if(updated_id->
Equals(prod_id)) {
651.AddField(
"fraction_identity_updated_prot_prod",
654 result.SetOutput_data().AddField(
"length_updated_prot_prod",
655 int(prod_vec.
size()));
658=
ctx->GetScope().GetBioseqHandle(*updated_id);
665 if(updated_prod_vec[
i] == translation[
i]) {
670.AddField(
"fraction_identity_updated_prot_prod",
672/
max(updated_prod_vec.
size(),
673(
TSeqPos)translation.size()));
674 result.SetOutput_data().AddField(
"length_updated_prot_prod",
675 int(updated_prod_vec.
size()));
678.AddField(
"prot_prod_updated", !updated_id->
Equals(prod_id));
680.AddField(
"updated_prod_id", updated_id->
AsFastaString());
699 if( !
id|| !
ctx) {
715 for(pos = vec.
size() - 1; pos > 0; --pos) {
716 if(vec[pos] !=
'A') {
720 result->SetOutput_data().AddField(
"trailing_a_count",
721 int(vec.
size() - pos - 1));
729 static const intw_match = 1;
730 static const intw_mismatch = -4;
731 static const intx_dropoff = 15;
733 size_tbest_pos =
NPOS;
737 for(
size_tcurr_pos = vec.
size() - 1;
738curr_pos > 0 && curr_score + x_dropoff > best_score;
741curr_score += vec[curr_pos] ==
'A'? w_match : w_mismatch;
742 if(curr_score >= best_score) {
743best_score = curr_score;
747tail_length = (best_pos ==
NPOS) ? 0 : vec.
size() - best_pos;
748 result->SetOutput_data().AddField(
"tail_length", tail_length);
770 size_tend_pos = vec.
size() - 1 - tail_length;
771 size_tbegin_pos = end_pos > window ? end_pos - window : 0;
776 for(
intii = 0; ii < 12; ii++) {
779 result->SetOutput_data().AddField(
"signal_pos",
static_cast<int>(pos + begin_pos));
780 result->SetOutput_data().AddField(
"is_canonical_pas", (ii <= 1));
803 result.SetOutput_data().AddField(
"overlapping_strong_uorfs", (
int)overlapping_uorfs.size());
804 result.SetOutput_data().AddField(
"upstream_strong_uorfs", (
int)upstream_uorfs.size());
813 if( !
id|| !
ctx) {
827vector<CRef<CSeq_loc> > orfs;
829 TSeqPosmax_orf_length_forward = 0;
830 TSeqPosmax_orf_length_either = 0;
831 TSeqPoslargest_forward_orf_end = 0;
834max_orf_length_either =
max(max_orf_length_either, orf_length);
836 if(orf_length > max_orf_length_forward) {
837max_orf_length_forward = orf_length;
838largest_forward_orf_end = (*orf)->GetInt().GetTo();
840max_orf_length_forward =
max(max_orf_length_forward, orf_length);
844 result->SetOutput_data().AddField(
"max_orf_length_forward_strand",
845 int(max_orf_length_forward));
846 result->SetOutput_data().AddField(
"largest_forward_orf_end_pos",
847 int(largest_forward_orf_end));
848 result->SetOutput_data().AddField(
"max_orf_length_either_strand",
849 int(max_orf_length_either));
853vector<string> allowable_starts;
854allowable_starts.push_back(
"ATG");
856max_orf_length_forward = 0;
857max_orf_length_either = 0;
860max_orf_length_either =
max(max_orf_length_either, orf_length);
862 if(orf_length > max_orf_length_forward) {
863max_orf_length_forward = orf_length;
864largest_forward_orf_end = (*orf)->GetInt().GetTo();
866max_orf_length_forward =
max(max_orf_length_forward, orf_length);
870 result->SetOutput_data().AddField(
"max_atg_orf_length_forward_strand",
871 int(max_orf_length_forward));
872 result->SetOutput_data().AddField(
"largest_forward_atg_orf_end_pos",
873 int(largest_forward_orf_end));
874 result->SetOutput_data().AddField(
"max_atg_orf_length_either_strand",
875 int(max_orf_length_either));
886 int count, not_start_not_sec_count;
889not_start_not_sec_count = 0;
896++not_start_not_sec_count;
901not_start_not_sec_count = 0;
905.AddField(
"code_break_count",
count);
907.AddField(
"code_break_not_start_not_sec_count",
908not_start_not_sec_count);
932vector<int> starts(
eStrong+ 1, upstream_length);
934 for(
int i= upstream_length - 3;
i>= 0;
i-= 3) {
936 if(codon ==
"ATG") {
938starts[strength] =
i;
940 if(codon ==
"TAA"|| codon ==
"TAG"|| codon ==
"TGA") {
948 intupstream_utr_atg_count(0);
949 for(
int i= upstream_length - 3;
i>= 0;
i-= 1) {
951 if(codon ==
"ATG") {
952upstream_utr_atg_count++;
957.AddField(
"max_extension_weak_kozak",
958 static_cast<int>(upstream_length - starts[
eWeak]));
960.AddField(
"max_extension_moderate_kozak",
961 static_cast<int>(upstream_length - starts[
eModerate]));
963.AddField(
"max_extension_strong_kozak",
964 static_cast<int>(upstream_length - starts[
eStrong]));
966.AddField(
"upstream_utr_atg_count",
967upstream_utr_atg_count);
989vector<TSeqPos> out_indices;
1001.AddField(
"cds_ambiguity_count",
1012 if( !
id|| !
ctx) {
1028 result->SetOutput_data()
1029.AddField(
"ambiguity_count",
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t GetSize(void) const
static double GetScore(CConstRef< CHMMParameters > hmm_params, const objects::CSeq_loc &cds, objects::CScope &scope, int *const gccontent, double *const startscore=0)
static const CTrans_table & GetTransTable(int id)
HMM model parameters just create it and pass to a Gnomon engine.
static void FindStrongKozakUOrfs(const objects::CSeqVector &seq, TSeqPos cds_start, TLocVec &overlap_results, TLocVec &non_overlap_results, unsigned int min_length_bp=3, unsigned int non_overlap_min_length_bp=105, int genetic_code=1, size_t max_seq_gap=k_default_max_seq_gap)
Specifically find ORFS with a strong Kozak signal that are upstream of cds_start.
vector< CRef< objects::CSeq_loc > > TLocVec
static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)
Find ORFs in both orientations.
CSeqTestContext defines any contextual information that a derived class might need.
CRef< objects::CSeq_test_result > x_SkeletalTestResult(const string &test_name)
Create a Seq-test-result with some fields filled in, including a name for this test,...
CRef< objects::CSeq_test_result_set > x_TestAllCdregions(const CSerialObject &obj, const CSeqTestContext *ctx, const string &test_name, TCdregionTester cdregion_tester)
Given a Seq-id and a context, analyze all coding regions by calling a supplied function.
namespace ncbi::objects::
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
CSeq_test_result_set â.
static TSeqPos GetAmbigs(const CSeq_data &in_seq, CSeq_data *out_seq, vector< TSeqPos > *out_indices, CSeq_data::E_Choice to_code=CSeq_data::e_Ncbi2na, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
Base class for all serializable objects.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
bool CanTest(const CSerialObject &obj, const CSeqTestContext *ctx) const
Test to see whether the given object *can* be used in this test.
bool IsOrfStart(int state) const
static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)
bool IsOrfStop(int state) const
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
const string AsFastaString(void) const
const CSeq_loc & GetEmbeddingSeq_loc(void) const
Get the nearest seq-loc containing the current range.
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
CConstRef< CSeq_id > FindLatestSequence(const CSeq_id &id, CScope &scope)
Given a seq-id check its replace history and try to find the latest revision.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
bool IsSetExcept(void) const
bool GetExcept(void) const
bool GetPseudo(void) const
const CSeqFeatData & GetData(void) const
TInst_Mol GetInst_Mol(void) const
bool IsSetPseudo(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
bool CanGetInst_Mol(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
bool IsSetPartial(void) const
const CSeq_loc & GetLocation(void) const
bool GetPartial(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
@ eReverseSearch
Search in a backward direction.
@ eCase
Case sensitive compare.
#define DEFINE_STATIC_FAST_MUTEX(id)
Define static fast mutex and initialize it.
Tdata & Set(void)
Assign a value to data member.
TNcbi8aa GetNcbi8aa(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
const TLoc & GetLoc(void) const
Get the Loc member data.
Tdata & Set(void)
Assign a value to data member.
const TLocation & GetLocation(void) const
Get the Location member data.
list< CRef< CCode_break > > TCode_break
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
const TCode & GetCode(void) const
Get the Code member data.
const TCdregion & GetCdregion(void) const
Get the variant data.
const TAa & GetAa(void) const
Get the Aa member data.
const TProduct & GetProduct(void) const
Get the Product member data.
TNcbieaa GetNcbieaa(void) const
Get the variant data.
bool CanGetCode(void) const
Check if it is safe to call GetCode method.
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
TNcbistdaa GetNcbistdaa(void) const
Get the variant data.
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
@ eFrame_not_set
not set, code uses one
@ eFrame_three
reading frame
@ e_Ncbi8aa
NCBI8aa code.
@ e_not_set
No variant selected.
@ e_Ncbieaa
ASCII value of NCBIeaa code.
TIupacna & SetIupacna(void)
Select the variant.
Defines: CTimeFormat - storage class for time format.
static CConstRef< CSeq_id > s_FindLatest(const CSeq_id &id, CScope &scope)
static void s_CdsStopCodon(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_CdsFlags(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_InframeUpstreamStart(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_CdsLength(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_CdsCountAmbiguities(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static CSeqVector s_GetCdregionPlusUpstream(CFeat_CI feat_iter, const CSeqTestContext *ctx, TSeqPos &upstream_length)
string s_KozakStrengthToString(EKozakStrength strength)
static void s_CompareProtProdToTrans(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_OrfExtension(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_InframeUpstreamStop(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
CConstRef< CGenetic_code > s_GetCode(const CCdregion &cdr)
static void s_Code_break(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
EKozakStrength s_GetKozakStrength(const CSeqVector &vec, TSeqPos pos)
static bool s_IsSelenocysteine(const CCode_break &code_break)
static void s_CdsStartCodon(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_Utrs(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_PrematureStopCodon(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static void s_CodingPropensity(const CSeq_id &id, const CSeqTestContext *ctx, CFeat_CI feat_iter, CSeq_test_result &result)
static TSeqPos s_CountAmbiguities(const CSeqVector &vec)
TSeqPos CodeBreakPosInCds(const CCode_break &code_break, const CSeq_feat &feat, CScope &scope)
void TestStrongKozakUorfs(const CBioseq_Handle bsh, CSeq_test_result &result)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4