(aln->
GetType() == ncbi::CSeq_align::eType_disc) {
115 if(neighborhood > 0 && pos > 0 && pos < bsh.
GetInst_Length() - 1) {
120loc->
SetInt().SetFrom(pos);
121loc->
SetInt().SetTo(pos);
122loc->
SetInt().SetStrand(strand);
129loc->
SetInt().SetFrom(pos <
abs(neighborhood) ? 0 : pos -
abs(neighborhood));
137 if(neighborhood < 0) {
138reverse(seq.begin(), seq.end());
141 size_tbest_pos(
NPOS);
143 static const intw_match = 1;
144 static const intw_mismatch = -4;
145 static const intx_dropoff = 15;
150 for(
size_tcurr_pos = 0;
151curr_pos < seq.size() && curr_score + x_dropoff > best_score;
154curr_score += seq[curr_pos] ==
'A'? w_match : w_mismatch;
155 if(curr_score >= best_score) {
156best_score = curr_score;
161 size_tpriming_length = best_pos ==
NPOS? 0 : best_pos + 1;
167 returnpriming_length;
178 if( !aln || !
ctx) {
198.AddField(
"exon_count", (
int) disc.size());
206 TSeqPoscds_from = 0, cds_to = 0;
208 const CSeq_id& genomic_id = disc.front()->GetSeq_id(1);
225 stringxcript_prot, genomic_prot;
230 boolcan_make_same_prot =
false;
231 if(xcript_prot == genomic_prot &&
232xcript_prot.find_first_not_of(
"ACDEFGHIKLMNPQRSTVWY*") ==
NPOS) {
235can_make_same_prot =
true;
256can_make_same_prot =
false;
263.AddField(
"can_make_same_prot", can_make_same_prot);
268 TSeqPosaligned_residue_count = 0;
269 TSeqPoscds_aligned_residue_count = 0;
270 TSeqPosmatch_count = 0, possible_match_count = 0;
271 TSeqPoscds_match_count = 0, cds_possible_match_count = 0;
272vector<int> cds_match_count_by_frame(3);
273vector<int> cds_possible_match_count_by_frame(3);
277 TSeqPosconsensus_cds_splices = 0;
280 TSeqPosmin_exon_length = 0, max_exon_length = 0;
281 TSeqPosmin_intron_length = 0, max_intron_length = 0;
282 TSeqPosexon_length, intron_length;
283 TSeqPosexon_match_count, exon_possible_match_count;
284 doubleworst_exon_match_frac = 1.1;
285 TSeqPosworst_exon_match_count = 0;
286 TSeqPosworst_exon_possible_match_count = 0;
289 intcds_indel_count = 0;
295 string& genomic_seq = genomic_seq_data.
SetIupacna().
Set();
297 string& genomic_cds_seq = genomic_cds_seq_data.
SetIupacna().
Set();
306exon_length =
r.GetLength();
308 if(exon_index == 0) {
309exon_length_5p = exon_length;
311exon_length_3p = exon_length;
313 if(exon_index == 0 || exon_length > max_exon_length) {
314max_exon_length = exon_length;
316 if(exon_index == 0 || exon_length < min_exon_length) {
317min_exon_length = exon_length;
321intron_length = last_genomic_end - exon.
GetSeqStop(1)- 1;
323intron_length = exon.
GetSeqStart(1) - last_genomic_end - 1;
326 if(exon_index > 0) {
328 if(exon_index == 1 || intron_length > max_intron_length) {
329max_intron_length = intron_length;
331 if(exon_index == 1 || intron_length < min_intron_length) {
332min_intron_length = intron_length;
339loc.
SetInt().SetFrom(last_genomic_end - 2);
340loc.
SetInt().SetTo (last_genomic_end - 1);
343loc.
SetInt().SetFrom(last_genomic_end + 1);
344loc.
SetInt().SetTo (last_genomic_end + 2);
366 if(consensus_splice) {
373 if(consensus_splice) {
374++consensus_cds_splices;
389.AddField(
"introns_5_prime_of_start", exon_index);
393 unsigned intdownstream_intron_count =
394disc.size() - exon_index - 1;
396.AddField(
"introns_3_prime_of_stop",
397(
int) downstream_intron_count);
398 if(downstream_intron_count > 0) {
400.AddField(
"dist_stop_to_exon_end",
403.AddField(
"dist_stop_to_last_intron",
404 int((*++disc.rbegin())->GetSeqStop(0) - cds_to));
412exon_match_count = 0;
413exon_possible_match_count = 0;
416 boolin_cds = has_cds
421 boolgap_in_xcript = avec.
GetResidue(0,
i) ==
'-';
422 boolgap_in_genomic = avec.
GetResidue(1,
i) ==
'-';
424 if(!gap_in_xcript) {
427in_cds = pos >= cds_from && pos <= cds_to;
429 if(!gap_in_genomic) {
430++aligned_residue_count;
432++cds_aligned_residue_count;
437 if(cds_match_count == 0
438&& cds_possible_match_count == 0) {
447++cds_match_count_by_frame[frame];
449}
else if(!gap_in_xcript) {
450 unsigned charcdna_res =
452 unsigned chargenomic_res =
456 if(!(cdna_res & ~genomic_res)) {
457++exon_possible_match_count;
459 if(cds_match_count == 0
460&& cds_possible_match_count == 0) {
464++cds_possible_match_count;
469++cds_possible_match_count_by_frame[frame];
475 if(gap_in_xcript && in_cds) {
478 if(gap_in_genomic && in_cds) {
481 if(!gap_in_genomic && in_cds) {
482genomic_cds_seq.push_back(avec.
GetResidue(1,
i));
485match_count += exon_match_count;
486possible_match_count += exon_possible_match_count;
489 doubleexon_match_frac =
490double(exon_match_count + exon_possible_match_count) / exon_length;
491 if(exon_match_frac < worst_exon_match_frac) {
492worst_exon_match_frac = exon_match_frac;
493worst_exon_match_count = exon_match_count;
494worst_exon_possible_match_count = exon_possible_match_count;
495worst_exon_length = exon_length;
503 for(
intseg = 0; seg < avec.
GetNumSegs(); ++seg) {
504 if(avec.
GetStart(0, seg) == -1) {
510 if(avec.
GetStart(1, seg) == -1) {
526 stringexon_genomic_seq;
529genomic_seq += exon_genomic_seq;
536.AddField(
"max_exon_length", (
int) max_exon_length);
538.AddField(
"min_exon_length", (
int) min_exon_length);
541.AddField(
"5p_terminal_exon_length", (
int) exon_length_5p);
543.AddField(
"3p_terminal_exon_length", (
int) exon_length_3p);
545 if(disc.size() > 1) {
547.AddField(
"max_intron_length", (
int) max_intron_length);
549.AddField(
"min_intron_length", (
int) min_intron_length);
552.AddField(
"aligned_residues", (
int) aligned_residue_count);
554.AddField(
"matching_residues", (
int) match_count);
556.AddField(
"possibly_matching_residues", (
int) possible_match_count);
559.AddField(
"cds_matching_residues", (
int) cds_match_count);
561.AddField(
"cds_possibly_matching_residues",
562(
int) cds_possible_match_count);
564.AddField(
"cds_aligned_residues", (
int) cds_aligned_residue_count);
566.AddField(
"in_frame_cds_matching_residues",
567(
int) cds_match_count_by_frame[0]);
569.AddField(
"in_frame_cds_possibly_matching_residues",
570(
int) cds_possible_match_count_by_frame[0]);
574.AddField(
"total_splices_in_alignment", (
int) total_splices);
576.AddField(
"consensus_splices_in_alignment", (
int) consensus_splices);
579.AddField(
"total_cds_splices_in_alignment",
580(
int) total_cds_splices);
582.AddField(
"consensus_cds_splices_in_alignment",
583(
int) consensus_cds_splices);
586.AddField(
"5_prime_bases_not_aligned",
587(
int) disc.front()->GetSeqStart(0));
589.AddField(
"3_prime_bases_not_aligned",
591- disc.back()->GetSeqStop(0) - 1));
594.AddField(
"start_codon_in_aligned_region",
595disc.front()->GetSeqStart(0) <= cds_from);
597.AddField(
"stop_codon_in_aligned_region",
598disc.back()->GetSeqStop(0) >= cds_to);
602.AddField(
"worst_exon_matches",
int(worst_exon_match_count));
604.AddField(
"worst_exon_possible_matches",
605 int(worst_exon_possible_match_count));
607.AddField(
"worst_exon_length",
int(worst_exon_length));
610.AddField(
"indel_count", indel_count);
612.AddField(
"cds_indel_count", cds_indel_count);
615vector<TSeqPos> out_indices;
619.AddField(
"genomic_ambiguity_count",
int(gac));
623.AddField(
"genomic_cds_ambiguity_count",
int(gcac));
627.AddField(
"upstream_polya_priming",
630.AddField(
"downstream_polya_priming",
635 const CSeq_align& first_exon = *disc.front();
642 TSeqPosgenomic_earliest, genomic_latest;
664.AddField(is_minus ?
"downstream_dist_to_genomic_gap_or_end" 665:
"upstream_dist_to_genomic_gap_or_end",
666 int(genomic_earliest) -
int(
last));
683.AddField(is_minus ?
"upstream_dist_to_genomic_gap_or_end" 684:
"downstream_dist_to_genomic_gap_or_end",
685 int(
last) -
int(genomic_latest));
691 if(disc.size() == 1) {
692 TSeqPoskLargestGeneDist = 10000;
700genomic_roi_from = genomic_start + 1;
702genomic_roi_from = genomic_start;
704 if(genomic_start + kLargestGeneDist + 1
706genomic_roi_to = genomic_start + kLargestGeneDist + 1;
712 if(genomic_start > kLargestGeneDist) {
713genomic_roi_from = genomic_start - kLargestGeneDist - 1;
715genomic_roi_from = 0;
717 if(genomic_start > 1) {
718genomic_roi_to = genomic_start - 1;
729 TSeqPosshortest_dist = kLargestGeneDist + 1;
742shortest_dist = genomic_start
750.AddField(
"has_nearby_upstream_gene_same_strand",
751shortest_dist <= kLargestGeneDist);
752 if(shortest_dist <= kLargestGeneDist) {
754.AddField(
"distance_from_upstream_gene_same_strand",
755 int(shortest_dist));
767 staticvector<TSignedSeqPos>
771vector<TSignedSeqPos> rv;
772rv.reserve(lens.size());
781rv.push_back((end + 1) -
offset);
783rv.push_back(start +
offset);
799vector<TSeqPos> product_lens;
800vector<TSeqPos> genomic_lens;
804product_lens.push_back(part.
GetMatch());
805genomic_lens.push_back(part.
GetMatch());
809}
else if(part.
IsDiag()) {
810product_lens.push_back(part.
GetDiag());
811genomic_lens.push_back(part.
GetDiag());
814genomic_lens.push_back(0);
816product_lens.push_back(0);
819 throwruntime_error(
"unhandled part type in Spliced-enon");
824lens.reserve(product_lens.size());
825 for(
unsigned int i= 0;
i< product_lens.size(); ++
i) {
826lens.push_back(
max(product_lens[
i], genomic_lens[
i]));
828vector<TSignedSeqPos> product_starts =
832vector<TSignedSeqPos> genomic_starts =
838starts.reserve(product_starts.size() + genomic_starts.size());
839 for(
unsigned int i= 0;
i< lens.size(); ++
i) {
840starts.push_back(product_starts[
i]);
841starts.push_back(genomic_starts[
i]);
850 for(
unsigned int i= 0;
i< lens.size(); ++
i) {
851strands.push_back(product_strand);
852strands.push_back(genomic_strand);
879product_strand, genomic_strand,
880product_id, genomic_id);
882ds_align->
SetSegs().SetDenseg(*ds);
884disc->
SetSegs().SetDisc().Set().push_back(ds_align);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
TSignedSeqPos GetStop(TNumrow row, TNumseg seg, int offset=0) const
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
TSeqPos GetAlnStop(TNumseg seg) const
TSeqPos GetSeqStop(TNumrow row) const
TSignedRange GetRange(TNumrow row, TNumseg seg, int offset=0) const
TNumseg GetNumSegs(void) const
TSeqPos GetSeqStart(TNumrow row) const
string & GetSeqString(string &buffer, TNumrow row, TSeqPos seq_from, TSeqPos seq_to) const
void SetEndChar(TResidue gap_char)
void SetGapChar(TResidue gap_char)
TResidue GetResidue(TNumrow row, TSeqPos aln_pos) const
static unsigned char FromIupac(unsigned char c)
void Compact()
Join adjacent mergeable segments to create a more compact alignment.
CSeqTestContext defines any contextual information that a derived class might need.
CRef< objects::CSeq_test_result > x_SkeletalTestResult(const string &test_name)
Create a Seq-test-result with some fields filled in, including a name for this test,...
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
CSeq_test_result_set â.
static TSeqPos GetAmbigs(const CSeq_data &in_seq, CSeq_data *out_seq, vector< TSeqPos > *out_indices, CSeq_data::E_Choice to_code=CSeq_data::e_Ncbi2na, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
Base class for all serializable objects.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
bool CanTest(const CSerialObject &obj, const CSeqTestContext *ctx) const
Test to see whether the given object *can* be used in this test.
bool IsConsensusSplice(const string &splice5, const string &splice3)
Consensus splice is GY..AG or AT..AC.
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
C * SerialClone(const C &src)
Create on heap a clone of the source object.
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
TInst_Length GetInst_Length(void) const
const CSeqMap & GetSeqMap(void) const
Get sequence map.
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
CSeqMap::ESegmentType GetType(void) const
TSeqPos GetPosition(void) const
return position of current segment in sequence
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
CSeqMap_CI Begin(CScope *scope) const
NCBI style methods.
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
const_iterator begin(void) const
const_iterator end(void) const
void Reset(void)
Reset reference object.
bool IntersectingWith(const TThisType &r) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetTo(TTo value)
Assign a value to To data member.
Tdata & Set(void)
Assign a value to data member.
const TDenseg & GetDenseg(void) const
Get the variant data.
TLens & SetLens(void)
Assign a value to Lens data member.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
TMatch GetMatch(void) const
Get the variant data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool IsMismatch(void) const
Check if variant Mismatch is selected.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
vector< ENa_strand > TStrands
TDiag GetDiag(void) const
Get the variant data.
vector< TSignedSeqPos > TStarts
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
void SetType(TType value)
Assign a value to Type data member.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
bool IsGenomic_ins(void) const
Check if variant Genomic_ins is selected.
bool IsMatch(void) const
Check if variant Match is selected.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
TStarts & SetStarts(void)
Assign a value to Starts data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsDisc(void) const
Check if variant Disc is selected.
TType GetType(void) const
Get the Type member data.
bool IsDiag(void) const
Check if variant Diag is selected.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
list< CRef< CSpliced_exon_chunk > > TParts
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
list< CRef< CSeq_align > > Tdata
TIds & SetIds(void)
Assign a value to Ids data member.
bool IsProduct_ins(void) const
Check if variant Product_ins is selected.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TDisc & GetDisc(void) const
Get the variant data.
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
@ eType_partial
mapping pieces together
@ eType_disc
discontinuous alignment
@ eProduct_type_transcript
const TLoc & GetLoc(void) const
Get the Loc member data.
list< CRef< CCode_break > > TCode_break
const TData & GetData(void) const
Get the Data member data.
const TCode & GetCode(void) const
Get the Code member data.
const TCdregion & GetCdregion(void) const
Get the variant data.
bool CanGetCode(void) const
Check if it is safe to call GetCode method.
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
ENa_strand
strand of nucleic acid
TIupacna & SetIupacna(void)
Select the variant.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static CRef< CSeq_align > s_SplicedToDisc(const CSeq_align &spliced_seg_aln)
static ENa_strand s_GetSeqStrand(const CSeq_align &aln, CSeq_align::TDim row)
static size_t s_GetPolyA_genomic_priming(const CSeq_align &aln, CScope &scope, int neighborhood)
static vector< TSignedSeqPos > s_CalculateStarts(const vector< TSeqPos > &lens, ENa_strand strand, TSeqPos start, TSeqPos end)
static CRef< CDense_seg > s_ExonToDenseg(const CSpliced_exon &exon, ENa_strand product_strand, ENa_strand genomic_strand, const CSeq_id &product_id, const CSeq_id &genomic_id)
#define row(bind, expected)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4