;
126 "The sequence of the model RefSeq transcript was modified relative " 127 "to this genomic sequence to represent the inferred CDS";
129 "The sequence of the model RefSeq protein was modified relative " 130 "to this genomic sequence to represent the inferred CDS";
156 for( ; desc_iter; ++desc_iter) {
169, m_intron_stitch_threshold_flags(fBoth)
170, m_min_intron(kDefaultMinIntron)
171, m_allowed_unaligned(kDefaultAllowedUnaligned)
172, m_is_gnomon(
false)
173, m_is_best_refseq(
false)
187:
m_impl(new SImplementation(scope))
223 return m_impl->CleanAlignment(align_in);
232 return m_impl->ConvertAlignToAnnot(align, annot, seqs, gene_id, cdregion,
false);
240 m_impl->ConvertAlignToAnnot(aligns, annot, seqs);
244 constobjects::CSeq_loc &loc,
245objects::CSeq_annot& annot,
246objects::CBioseq_set& seqs,
254 "Can't find genomic sequence "+ loc.GetId()->AsFastaString());
266 size_tnew_id_num = counter.
Add(1);
269 string str(
"lcl|MRNA_");
276 string str(
"lcl|PROT_");
286fake_align.
SetSegs().SetSpliced().SetProduct_id().Assign(*rna_id);
287fake_align.
SetSegs().SetSpliced().SetGenomic_id().Assign(*loc.GetId());
289fake_align.
SetSegs().SetSpliced().SetGenomic_strand(loc.GetStrand());
290fake_align.
SetSegs().SetSpliced().SetProduct_type(
296exon->SetProduct_start().SetNucpos(product_pos);
297product_pos += loc_it.GetRange().GetLength();
298exon->SetProduct_end().SetNucpos(product_pos-1);
299exon->SetGenomic_start(loc_it.GetRange().GetFrom());
300exon->SetGenomic_end(loc_it.GetRange().GetTo());
302 match->SetMatch(loc_it.GetRange().GetLength());
303exon->SetParts().push_back(
match);
304fake_align.
SetSegs().SetSpliced().SetExons().push_back(exon);
306fake_align.
SetSegs().SetSpliced().SetProduct_length(product_pos);
309cdregion.
SetData().SetCdregion().SetFrame(frame);
314 "Non-standard frame specified with 5'-complete location");
338 if(product_pos % 3) {
340 "Non-whole number of codons with 3'-complete location");
348cdregion.
SetData().SetCdregion().SetCode().Set().push_back(
code);
354 m_impl->ConvertAlignToAnnot(fake_align, annot, seqs, 0, &cdregion,
false);
363 m_impl->SetFeatureExceptions(feat, align);
371 m_impl->SetPartialFlags(gene_feat, mrna_feat, cds_feat);
376 m_impl->RecomputePartialFlags(annot);
383: m_aln(aln), m_scope(scope), m_genomic_row(-1)
384, m_allowed_unaligned(allowed_unaligned), m_opts(opts)
397 "CreateGeneModelFromAlign(): " 398 "failed to create consistent alignment");
414 "CreateGeneModelFromAlign(): " 415 "More than one genomic row in alignment");
421 "CreateGeneModelFromAlign(): " 422 "No genomic sequence found in alignment");
430 if(m_aln.GetSegs().IsSpliced()) {
431rna_loc = x_GetLocFromSplicedExons(m_aln);
433 const CSeq_id&
id= m_aln.GetSeq_id(GetRnaRow());
440rna_loc = x_Mapper()->Map(*range_loc);
448 returnm_genomic_row;
454 returnGetGenomicRow() == 0 ? 1 : 0;
465x_Mapper()->IncludeSourceLocs(
b);
470x_Mapper()->SetMergeNone();
494 if(!prev_exon.
IsNull() &&
514 if(donor_ok || !acceptor_ok) {
517 if(acceptor_ok || !donor_ok) {
526prev_int = genomic_int;
562align->
Assign(align_in);
564vector<SExon> orig_exons =
GetExons(*align);
573 if(
GetExons(*align) != orig_exons) {
596model_num.erase(model_num.size()-2, 2);
613 boolfound_start_codon =
false;
614 boolfound_stop_codon =
false;
617 if((*mod_it)->IsStart_codon_found()) {
618found_start_codon = (*mod_it)->GetStart_codon_found();
620 if((*mod_it)->IsStop_codon_found()) {
621found_stop_codon = (*mod_it)->GetStop_codon_found();
629 "Can't find genomic sequence "+
634fake_transcript_align->
Assign(*align);
635align.
Reset(fake_transcript_align);
645 size_tnew_id_num = counter.
Add(1);
647 string str(
"lcl|MRNA_");
654fake_transcript_align->
SetSegs().SetSpliced().SetProduct_id(
657fake_transcript_align->
SetSegs().SetSpliced().SetProduct_type(
660fake_transcript_align->
SetSegs().SetSpliced().SetExons())
667fake_transcript_align->
SetSegs().SetSpliced().SetExons().back();
668 boolaligned_to_the_end =
669last_exon->GetProduct_end().GetNucpos()+1==
672fake_transcript_align->
SetSegs().SetSpliced().SetProduct_length() =
674(((found_stop_codon && aligned_to_the_end) || !aligned_to_the_end)?3:0);
676 if(found_stop_codon && aligned_to_the_end) {
677 boolis_minus = last_exon->IsSetGenomic_strand() ?
680. IsSetGenomic_strand() &&
686? last_exon->GetGenomic_start()
687: genomic_length - last_exon->GetGenomic_end() - 1);
688 if(space_for_codon < 3) {
691 "Stop codon goes outside genomic sequence");
694new_exon->SetProduct_start().SetNucpos(
695last_exon->GetProduct_end().GetNucpos() + space_for_codon + 1);
696new_exon->SetProduct_end().SetNucpos(
697last_exon->GetProduct_end().GetNucpos() + 3);
698new_exon->SetGenomic_start(
699is_minus ? genomic_length - 3 + space_for_codon : 0);
700new_exon->SetGenomic_end(
701is_minus ? genomic_length - 1 : 2 - space_for_codon);
702 if(last_exon->IsSetProduct_strand()) {
703new_exon->SetProduct_strand(last_exon->GetProduct_strand());
705 if(last_exon->IsSetGenomic_strand()) {
706new_exon->SetGenomic_strand(last_exon->GetGenomic_strand());
708fake_transcript_align->
SetSegs().SetSpliced().SetExons()
709. push_back(new_exon);
713last_exon->SetProduct_end().SetNucpos() += space_for_codon;
715last_exon->SetGenomic_start() -= space_for_codon;
717last_exon->SetGenomic_end() += space_for_codon;
719 if(last_exon->IsSetParts() && space_for_codon) {
722match_stop_codon->SetMatch(space_for_codon);
723last_exon->SetParts().push_back(match_stop_codon);
728cd_feat->
SetData().SetCdregion();
731fake_transcript_align->
SetSegs().SetSpliced().SetProduct_id(),
733 if(!found_start_codon &&
734fake_transcript_align->
SetSegs().SetSpliced().SetExons().front()->GetProduct_start().GetNucpos()==0) {
737 if(!found_stop_codon && aligned_to_the_end) {
746cd_feat->
SetData().SetCdregion().SetCode().Set().push_back(
code);
757transcribed_rna_id.
Assign(query_rna_id);
758 if(cds_feat_on_genome_with_translated_product &&
759cds_feat_on_genome_with_translated_product->
CanGetProduct() &&
760cds_feat_on_query_mrna &&
762 CSeq_id* translated_protein_id =
const_cast<CSeq_id*
>(cds_feat_on_genome_with_translated_product->
SetProduct().GetId());
773 const CSeq_feat* cds_feat_on_query_mrna_ptr,
774 boolcall_on_align_list)
784 if(is_protein_align) {
812 if(cds_feat_on_query_mrna_ptr) {
814cds_feat_on_query_mrna->
Assign(*cds_feat_on_query_mrna_ptr);
817 if(cdregion_handle) {
824vector<CMappedFeat> ncRNAs;
827 if(query_rna_handle) {
829feat_iter; ++feat_iter) {
830 const CSeq_loc&rna_loc = feat_iter->GetLocation();
831 if(feat_iter->GetData().GetSubtype() !=
833++rna_loc.
begin() == rna_loc.
end() &&
837full_length_rna = *feat_iter;
838}
else if(feat_iter->GetData().GetSubtype() ==
841ncRNAs.push_back(*feat_iter);
848 size_tmodel_num = counter.
Add(1);
852rna_feat_loc_on_genome->Assign(mapper.GetRnaLoc());
855list<CRef<CSeq_loc> > transcribed_mrna_seqloc_refs;
863cds_feat_on_query_mrna, cds_feat_on_transcribed_mrna);
870*align, rna_feat_loc_on_genome, opts)
872*transcribed_rna_id, cds_feat_on_query_mrna);
873 if(mrna_feat_on_genome_with_translated_product &&
874!mrna_feat_on_genome_with_translated_product->
IsSetProduct()) {
876mrna_feat_on_genome_with_translated_product->
877SetProduct().SetWhole().
Assign(*transcribed_rna_id);
882transcribed_mrna_seqloc_refs,
883*align, rna_feat_loc_on_genome, time, model_num, seqs, opts);
891*mrna_feat_on_genome_with_translated_product,
892cds_feat_on_genome_with_translated_product.
GetPointer());
898 if(!call_on_align_list){
903rna_feat_loc_on_genome, genomic_id, gene_id);
907annot.
SetData().SetFtable().push_back(gene_feat);
909gene =
genes.
insert(make_pair(gene_id,gene_feat)).first;
911gene_feat = gene->second;
913&mrna_feat_on_genome_with_translated_product->
GetLocation()));
917genexref->SetId(*gene_feat->
SetIds().front());
920mrnaxref->SetId(*mrna_feat_on_genome_with_translated_product->
SetIds().front());
922gene_feat->
SetXref().push_back(mrnaxref);
923mrna_feat_on_genome_with_translated_product->
SetXref().push_back(genexref);
927rna_feat_loc_on_genome, genomic_id);
930annot.
SetData().SetFtable().push_back(gene_feat);
935 if(mrna_feat_on_genome_with_translated_product) {
938annot.
SetData().SetFtable().push_back(mrna_feat_on_genome_with_translated_product);
943 if(cds_feat_on_genome_with_translated_product.
NotNull()) {
944propagated_features.push_back(cds_feat_on_genome_with_translated_product);
946 if(cds_feat_on_query_mrna && cds_feat_on_query_mrna->
CanGetProduct()) {
950 for(
CFeat_CIfeat_iter(prot_handle,
952feat_iter; ++feat_iter) {
954feat_iter->GetData().GetProt();
956!prot_ref.
GetName().empty()) {
959prot_xref->SetData().SetProt().SetName()
960. push_back(prot_ref.
GetName().front());
961cds_feat_on_genome_with_translated_product->
SetXref().push_back(prot_xref);
969 ITERATE(vector<CMappedFeat>, it, ncRNAs){
973propagated_features.push_back(ncrna_feat);
978annot.
SetData().SetFtable().push_back(*it);
982 if((*it)->IsSetIds()) {
983propagatedxref->SetId(*(*it)->SetIds().front());
987mrnaxref->SetId(*mrna_feat_on_genome_with_translated_product->
SetIds().front());
989(*it)->SetXref().push_back(mrnaxref);
990mrna_feat_on_genome_with_translated_product->
SetXref().push_back(propagatedxref);
994 if(!call_on_align_list){
995 if(propagated_features.empty()){
1000 SetPartialFlags(gene_feat, mrna_feat_on_genome_with_translated_product, *it);
1006 if(mrna_feat_on_genome_with_translated_product) {
1007mrna_feat_on_genome_with_translated_product->
SetProduct().SetWhole().Assign(query_rna_id);
1009 if(cds_feat_on_genome_with_translated_product) {
1011cds_feat_on_genome_with_translated_product->
1013cds_feat_on_transcribed_mrna->
1017seq_id->Assign(query_rna_id);
1018cds_feat_on_transcribed_mrna->
SetLocation().SetId(*seq_id);
1020(*loc)->SetId(*seq_id);
1025 if(!query_rna_handle) {
1027cds_feat_on_query_mrna, cds_feat_on_genome_with_translated_product);
1031 if(mrna_feat_on_genome_with_translated_product) {
1033 m_scope->GetBioseqHandle(query_rna_id);
1040cds_feat_on_genome_with_translated_product.
GetPointer(),
1042cds_feat_on_transcribed_mrna.
GetPointer());
1045 m_scope->RemoveTopLevelSeqEntry(rna_seh);
1048 if(cds_feat_on_genome_with_translated_product) {
1056 TSeqPosclean_match_count = 0;
1060&transcribed_mrna_seqloc_refs,
1061&clean_match_count);
1062 if(!clean_match_count) {
1064annot.
SetData().SetFtable().remove(cds_feat_on_genome_with_translated_product);
1065cds_feat_on_genome_with_translated_product =
NULL;
1068 m_scope->RemoveTopLevelSeqEntry(prot_seh);
1073 RenameGeneratedBioseqs(query_rna_id, *transcribed_rna_id, cds_feat_on_query_mrna, cds_feat_on_genome_with_translated_product);
1078 m_scope->AddTopLevelSeqEntry(**it);
1087 for(CBioseq_set::TSeq_set::iterator bioseq_it =
1091 if(((*bioseq_it)->GetSeq().IsNa() &&
1093((*bioseq_it)->GetSeq().IsAa() &&
1096bioseq_it = seqs.
SetSeq_set().erase(bioseq_it);
1105 if(loc->IsPacked_int() && loc->GetPacked_int().Get().size()==1) {
1107loc->SetInt(*interval);
1110 returnis_protein_align ? cds_feat_on_genome_with_translated_product : mrna_feat_on_genome_with_translated_product;
1133 const CSeq_id& genomic_id = clean_align->
GetSeq_id(mapper.GetGenomicRow());
1137 else if(!(gene_handle == genomic_id))
1139 "Bad list of alignments to ConvertAlignToAnnot(); alignments on different genes");
1142loc->Assign(mapper.GetRnaLoc());
1153gene_annot.
SetData().SetFtable().push_front(gene_feat);
1155annot.
SetData().SetFtable().splice(annot.
SetData().SetFtable().end(),
1156gene_annot.
SetData().SetFtable());
1173 if(!inst.
SetExt().SetDelta().Set().empty()) {
1198inst.
SetExt().SetDelta().AddLiteral(seq, mol_class);
1208inst.
SetExt().SetDelta().AddLiteral(seq, mol_class);
1218 booladd_unaligned_parts,
1219 boolmark_transcript_deletions,
1235 intprev_product_to = -1;
1236 boolprev_fuzz =
false;
1246 if((prev_product_to > -1 &&
1249 if(has_gap !=
NULL) {
1253inst.
SetExt().SetDelta().AddLiteral
1257 intgap_len = add_unaligned_parts ? mrna_loc->
GetTotalRange().
GetFrom()-(prev_product_to+1) : 0;
1259seq_size += gap_len;
1260prev_product_to += gap_len;
1261inst.
SetExt().SetDelta().AddLiteral(gap_len);
1263inst.
SetExt().SetDelta().Set().back()
1268 unsignedpart_count = 0;
1269 unsignedmapped_exon_len = 0;
1270 for(
CSeq_loc_CIpart_it(*mrna_loc); part_it; ++part_it) {
1272 if(prev_product_to<0) {
1273prev_product_to = part_it.GetRange().GetFrom()-1;
1274 if(add_unaligned_parts && part_it.GetRange().GetFrom() > 0) {
1275seq_size = part_it.GetRange().GetFrom();
1276inst.
SetExt().SetDelta().AddLiteral(seq_size);
1279 intdeletion_len = part_it.GetRange().GetFrom()-(prev_product_to+1);
1285 if(deletion_len > 0) {
1286 if(mark_transcript_deletions && part_count == 1) {
1290deletion_loc.
SetInt().SetId().Assign(part_it.GetSeq_id());
1291deletion_loc.
SetInt().SetFrom(prev_product_to+1);
1292deletion_loc.
SetInt().SetTo(part_it.GetRange().GetFrom()-1);
1297 if(deletion_len > 0 && (mark_transcript_deletions || part_count > 1)) {
1298 if(has_indel !=
NULL) {
1301 stringdeletion(deletion_len,
'N');
1303seq_size += deletion.size();
1311mapped_exon_len += it.GetRange().GetLength();
1320seq_size += vec.
size();
1322prev_product_to = part_it.GetRange().GetTo();
1324 if(has_indel !=
NULL&&
1326mapped_exon_len != loc_it.GetRange().GetLength())) {
1337 if(seq_size < (
int)length) {
1339inst.
SetExt().SetDelta().AddLiteral
1343inst.
SetExt().SetDelta().AddLiteral(length-seq_size);
1378}
else if(cds_feat_on_query_mrna.
IsNull()) {
1396assembly->
Assign(align);
1397bioseq.
SetInst().SetHist().SetAssembly().push_back(assembly);
1402 string str(
"lcl|CDNA_");
1408transcribed_rna_id->
Set(
str);
1410bioseq.
SetId().push_back(transcribed_rna_id);
1412 if(cds_feat_on_query_mrna.
NotNull()) {
1418cds_feat_on_transcribed_mrna->
Assign(*cds_feat_on_query_mrna);
1419cds_feat_on_transcribed_mrna->
SetLocation().SetId(*transcribed_rna_id);
1421annot->
SetData().SetFtable().push_back(cds_feat_on_transcribed_mrna);
1425cds_feat_on_transcribed_mrna->
SetData().SetCdregion();
1428(*it)->SetLoc().SetId(*transcribed_rna_id);
1439 returntranscribed_rna_id;
1445code_break->
SetLoc(loc);
1446code_break->
SetAa().SetNcbieaa(ncbieaa);
1448feat.
SetData().SetCdregion().SetCode_break().push_back(code_break);
1466 string str(
"lcl|PROT_");
1473cds_feat_on_transcribed_mrna->
SetProduct().SetWhole(*translated_protein_id);
1475bioseq.
SetId().push_back(translated_protein_id);
1496bioseq.
SetDescr().Set().push_back(desc);
1510 boolfinal_code_break =
false;
1512final_code_break = (strprot[strprot.size()-1] !=
'*');
1514strprot.resize(strprot.size()-1);
1521seq_inst.
SetExt().SetDelta();
1541 boolstarts_with_code_break =
false;
1545starts_with_code_break =
true;
1553 size_tskip_5_prime = 0;
1554 size_tskip_3_prime = 0;
1555 unsignedcount_internal_stops = 0;
1558 intcodon_start_pos = (
int)ci.GetPosition() + frame;
1559 int len=
int(ci.GetLength()) - frame;
1561 _ASSERT( -3 < frame && frame < 3 );
1565(ci.IsUnknownLength() || !ci.IsSetData()) &&
1575 boolstop_codon_included = e > strprot.size();
1576 if(stop_codon_included) {
1588 if(ci.IsUnknownLength()) {
1589seq_inst.
SetExt().SetDelta().AddLiteral(
len);
1591}
else if(!ci.IsSetData()) {
1592 if(
b==skip_5_prime &&
1594skip_5_prime += e-
b;
1595}
else if(stop_codon_included &&
b==e) {
1599 if(strprot[
b] !=
'X') {
1604seq_inst.
SetExt().SetDelta().AddLiteral(
static_cast<TSeqPos>(e-
b));
1608 if(stop_codon_included && final_code_break) {
1611stop_codon_on_mrna->
SetInt().SetFrom(pos_on_mrna);
1612stop_codon_on_mrna->
SetInt().SetTo(pos_on_mrna + 2);
1613 AddCodeBreak(*cds_feat_on_transcribed_mrna, *stop_codon_on_mrna,
'*');
1614transcribed_mrna_seqloc_refs.push_back(stop_codon_on_mrna);
1618 if(
b==0 && strprot[
b] !=
'M'&&
1619!starts_with_code_break &&
1621strprot[
b] =
'M';
1624start_codon_on_mrna->
SetInt().SetFrom(pos_on_mrna);
1625start_codon_on_mrna->
SetInt().SetTo(pos_on_mrna + 2);
1626 AddCodeBreak(*cds_feat_on_transcribed_mrna, *start_codon_on_mrna,
'M');
1627transcribed_mrna_seqloc_refs.push_back(start_codon_on_mrna);
1631 size_tstop_aa_pos =
b-1;
1632 while((stop_aa_pos = strprot.find(
'*', stop_aa_pos+1)) < e) {
1633strprot[stop_aa_pos] =
'X';
1637internal_stop_on_mrna->
SetInt().SetFrom(pos_on_mrna);
1638internal_stop_on_mrna->
SetInt().SetTo(pos_on_mrna + 2);
1639 AddCodeBreak(*cds_feat_on_transcribed_mrna, *internal_stop_on_mrna,
'X');
1640transcribed_mrna_seqloc_refs.push_back(internal_stop_on_mrna);
1641++count_internal_stops;
1650 _ASSERT( -2 <= frame && frame <= 0 );
1654align_info->
SetType().SetStr(
"AlignInfo");
1655align_info->
AddField(
"num_internal_stop_codon", (
int)count_internal_stops);
1656cds_feat_on_transcribed_mrna->
AddExt(align_info);
1660 if(
b< strprot.size() && strprot[
b] !=
'X') {
1669strprot.size() <=
b+ (frame==0?0:1) );
1673!seq_inst.
GetExt().
GetDelta().
Get().back()->GetLiteral().IsSetSeq_data()) {
1674skip_3_prime += seq_inst.
GetExt().
GetDelta().
Get().back()->GetLiteral().GetLength();
1675seq_inst.
SetExt().SetDelta().Set().pop_back();
1679 if(skip_5_prime || skip_3_prime) {
1690cds_feat_on_transcribed_mrna->
SetLocation(*to_mrna.
Map(*prot_loc));
1695 if(seq_inst.
SetExt().SetDelta().Set().size() == 1 && seq_inst.
SetExt().SetDelta().Set().back()->GetLiteral().IsSetSeq_data()) {
1698dprot->
Assign(seq_inst.
SetExt().SetDelta().Set().back()->GetLiteral().GetSeq_data());
1707cds_feat_on_assembly_mrna->
Assign(*cds_feat_on_transcribed_mrna);
1711cds_feat_on_assembly_mrna->
SetLocation().SetInt().SetTo() -= 3;
1717prot_assembly->
SetSegs().SetSpliced().SetProduct_length(seq_inst.
GetLength());
1719seq_inst.
SetHist().SetAssembly().push_back(prot_assembly);
1731 m_scope->RemoveTopLevelSeqEntry(prot_seh);
1734 m_scope->RemoveTopLevelSeqEntry(mrna_seh);
1755 if(!gnomon_model_num.empty()) {
1757obj_id->
SetStr(
"rna."+ gnomon_model_num);
1760mrna_feat->
SetIds().push_back(feat_id);
1763mrna_feat->
SetProduct().SetWhole().Assign(transcribed_rna_id);
1767 if(
info&&
info->IsSetBiomol()) {
1768 switch(
info->GetBiomol()) {
1792 if(
info->IsSetGbmoltype()) {
1793RNA_class =
info->GetGbmoltype();
1810 if(!RNA_class.empty()) {
1811mrna_feat->
SetData().SetRna().SetExt().SetGen().SetClass(RNA_class);
1814 if(!name.empty()) {
1815 if(!RNA_class.empty()) {
1816mrna_feat->
SetData().SetRna().SetExt().SetGen().SetProduct(name);
1818mrna_feat->
SetData().SetRna().SetExt().SetName(name);
1840 boolupdate_existing_gene = gene_feat;
1841 stringgene_id_str =
"gene.";
1846 if(!update_existing_gene) {
1847 if(feat_iter && feat_iter.
GetSize()) {
1855gene_feat->
SetData().SetGene();
1859obj_id->
SetStr(gene_id_str);
1862gene_feat->
SetIds().push_back(feat_id);
1875}
else if(feat_iter && feat_iter.
GetSize()) {
1885 if(feat_iter && feat_iter.
GetSize() == 1 && update_existing_gene) {
1891 tag->Assign(**xref_it);
1892 boolduplicate =
false;
1897 if((*previous_xref_it)->Match(**xref_it)){
1910gene_feat->
SetData().SetGene().SetDesc(gene_id_str);
1934align, loc, opts,
offset);
1936 if(cds_feat_on_genome) {
1942loc_ranges += loc_it.GetRange();
1948 stringgnomon_model_num;
1955 if(!gnomon_model_num.empty()) {
1957obj_id->
SetStr(
"cds."+ gnomon_model_num);
1960cds_feat_on_transcribed_mrna->
SetIds().push_back(feat_id);
1963transcribed_mrna_seqloc_refs,
1964time, model_num, seqs);
1967cds_feat->
Assign(*cds_feat_on_transcribed_mrna);
1975 if(is_partial_5prime &&
offset) {
1978orig_frame = cds_feat->
GetData()
1984 intframe = (
offset- orig_frame) % 3;
1988frame = (3 - frame) % 3;
1989 if(frame != orig_frame) {
1992cds_feat->
SetData().SetCdregion()
1996cds_feat->
SetData().SetCdregion()
2000cds_feat->
SetData().SetCdregion()
2006 "mod 3 out of bounds");
2011 if(!gnomon_model_num.empty() && !is_partial_5prime) {
2013 if(cds_start >= 3) {
2021vec.
GetSeqData(cds_start % 3, cds_start, mrna);
2030 SIZE_TYPEstop_5prime = strprot.rfind(
'*');
2031 if(stop_5prime !=
NPOS) {
2032stop_5prime = stop_5prime*3+cds_start%3;
2034stop_5prime_feature->
SetData().SetImp().SetKey(
"misc_feature");
2035stop_5prime_feature->
SetComment(
"upstream in-frame stop codon");
2041stop_5prime_feature->
SetLocation(*stop_5prime_location);
2058cds_feat->
SetData().SetCdregion();
2059CCdregion::TCode_break::iterator it =
2063code_break_loc.
Assign((*it)->GetLoc());
2068 if(new_cb_loc->
IsEquiv()) {
2069new_cb_loc = new_cb_loc->
GetEquiv().
Get().front();
2073 if(new_cb_loc && !new_cb_loc->
IsNull()) {
2075new_cb_ranges += loc_it.GetRange();
2077new_cb_ranges &= loc_ranges;
2080(*it)->SetLoc(*new_cb_loc);
2101name = sequence::CDeflineGenerator().GenerateDefline(handle);
2113 if(feat_iter && feat_iter.
GetSize() &&
2119 size_tlast_comma = name.rfind(
',');
2120 if(last_comma != string::npos) {
2121name.erase(last_comma);
2133 if(desc->GetUser().HasField(
"polyA required for stop codon")) {
2152non_const_loc->
Assign(*loc);
2154align, non_const_loc, opts,
offset);
2173list< CRef< CSeq_loc > >& a_list = a_mix->
SetMix().Set();
2174 constlist< CRef< CSeq_loc > >& b_list = b_mix->
GetMix().
Get();
2177 for(list<
CRef< CSeq_loc >>::iterator a_i = a_list.begin(); a_i != a_list.end();) {
2180a_list.splice(a_i, diff->
SetMix().Set());
2181a_i = a_list.erase(a_i);
2184 if(a_list.size() == 1) {
2185 returna_list.front();
2214 for(
CSeq_loc_CIloc_it(feature_on_mrna->GetLocation());
2234 "failed to find requisite parts of " 2239 if( !this_loc_mapped ||
2240this_loc_mapped->
IsNull() ||
2241this_loc_mapped->
IsEmpty() ) {
2245 if( !mapped_loc ) {
2249feature_on_mrna->GetLocation().GetTotalRange().GetFrom();
2252 boolis_partial_5prime =
2254 boolis_partial_3prime =
2258 boollast_range = !++it1;
2259 if(is_partial_3prime && last_range &&
2262feature_on_mrna->GetData().IsCdregion() &&
2268equiv->
GetEquiv().
Get().back()->GetTotalRange().GetTo();
2269 if(missing_end < 3) {
2272is_partial_3prime =
false;
2286sub.
SetInt().SetId().Assign(*this_loc_mapped->
GetId());
2290 boolcross_origin = (left > right);
2297half->
SetTo(genomic_size-1);
2300half->
SetTo(right);
2309 if(this_loc_mapped->
IsMix()) {
2313 if(subloc_it.GetRangeAsSeq_loc()->
2316mrna_fuzzy_boundaries.
insert(
2317subloc_it.GetRange().GetFrom());
2319 if(subloc_it.GetRangeAsSeq_loc()->
2322mrna_fuzzy_boundaries.
insert(
2323subloc_it.GetRange().GetTo());
2328this_loc_mapped->
SetMix().Set())
2330(*subloc_it)->SetPartialStart(
2331mrna_fuzzy_boundaries.count(
2334(*subloc_it)->SetPartialStop(
2335mrna_fuzzy_boundaries.count(
2348mapped_loc->
SetMix().Set().push_back(this_loc_mapped);
2360 if(mapped_loc && feature_on_mrna->GetData().IsRna())
2399 if(mapped_loc && feature_on_mrna->GetData().IsCdregion()) {
2404 for(; vec.
IsInGap(start_gap); ++start_gap);
2405 if(start_gap > 0 && start_gap < vec.
size()) {
2412orig_mapped_loc.
Assign(*mapped_loc);
2415 while(mapped_loc->
SetPacked_int().Set().front()->GetLength()
2418start_gap -= mapped_loc->
SetPacked_int().Set().front()->GetLength();
2425first_exon.
SetTo() -= start_gap;
2427first_exon.
SetFrom() += start_gap;
2433loc->
Assign(*SubtractPreserveBiologicalOrder(*loc, *SubtractPreserveBiologicalOrder(orig_mapped_loc, *mapped_loc)));
2438 for(; vec.
IsInGap(vec.
size() - 1 - end_gap); ++end_gap);
2439 if(end_gap > 0 && end_gap < vec.
size()) {
2444orig_mapped_loc.
Assign(*mapped_loc);
2447 while(mapped_loc->
SetPacked_int().Set().back()->GetLength() <= end_gap)
2449end_gap -= mapped_loc->
SetPacked_int().Set().back()->GetLength();
2456last_exon.
SetFrom() += end_gap;
2458last_exon.
SetTo() -= end_gap;
2463loc->
Assign(*SubtractPreserveBiologicalOrder(*loc, *SubtractPreserveBiologicalOrder(orig_mapped_loc, *mapped_loc)));
2472mapped_feat->
Assign(*feature_on_mrna);
2485 if(propagated_feat){
2500 if(mrna_feat && propagated_feat)
2532 if(gene_feat && mrna_feat){
2545 if(gene_feat && propagated_feat && !mrna_feat){
2582feature::CFeatTree
tree(sah);
2583vector<CMappedFeat> top_level_features =
tree.GetChildren(
CMappedFeat());
2586vector< vector<CMappedFeat> > top_level_features_by_type;
2589 ITERATE(vector<CMappedFeat>, it, top_level_features)
2590top_level_features_by_type[it->GetData().Which()].push_back(*it);
2597 ITERATE(vector<CMappedFeat>, gene_it,
2607vector<CMappedFeat> gene_children =
2608gene_feat ?
tree.GetChildren(*gene_it)
2610 sort(gene_children.begin(), gene_children.end());
2612 ITERATE(vector<CMappedFeat>, child_it, gene_children){
2621}
else if(!child_feat || child_feat->
GetData().
IsRna()){
2622vector<CMappedFeat> rna_children =
2623child_feat ?
tree.GetChildren(*child_it)
2629 while((child_it+1) != gene_children.end() &&
2632(child_it+1)->GetTotalRange())){
2633rna_children.push_back(*(++child_it));
2635 if(rna_children.empty()){
2639 ITERATE(vector<CMappedFeat>, rna_child_it, rna_children){
2657!propagated_feature || !propagated_feature->
IsSetDbxref())
2663 if((*gene_xref_it)->GetDb() !=
"miRBase")
2665 if((*gene_xref_it)->GetDb() == (*propagated_xref_it)->GetDb() &&
2666!(*gene_xref_it)->Match(**propagated_xref_it))
2668 stringpropagated_feature_desc;
2670propagated_feature_desc =
"corresponding cdregion";
2673 "Unexpected propagated feature type");
2674propagated_feature_desc =
"propagated ncRNA feature";
2680<<
" and "<< propagated_feature_desc
2681<<
" have "<< (*gene_xref_it)->GetDb()
2682<<
" dbxrefs with inconsistent tags");
2697 for(
CFeat_CIfeat_iter(handle, sel); feat_iter; ++feat_iter) {
2699feat->
Assign(feat_iter->GetOriginalFeature());
2701mapper.
Map(feat_iter->GetLocation());
2706annot.
SetData().SetFtable().push_back(feat);
2736 if( !(*it)->IsSetId() ) {
2741 const CFeat_id& feat_id = (*it)->GetId();
2764(
"rearrangement required for product");
2790 for( ; align_iter; ++align_iter) {
2797al.
Reset(&this_align);
2803 boolhas_length_mismatch =
false;
2805 boolhas_incomplete_polya_tail =
false;
2806 boolpartial_unaligned_section =
false;
2821has_length_mismatch =
true;
2840partial_unaligned_section =
true;
2849 switch((*part_it)->Which()) {
2851pos += (*part_it)->GetMatch();
2855 TSeqRange(pos, pos+(*part_it)->GetMismatch()-1);
2856pos += (*part_it)->GetMismatch();
2859pos += (*part_it)->GetDiag();
2863delete_sizes[pos] = (*part_it)->GetGenomic_ins();
2867 TSeqRange(pos, pos+(*part_it)->GetProduct_ins()-1);
2868pos += (*part_it)->GetProduct_ins();
2880 if(
r.GetFrom() != 0) {
2882partial_unaligned_section =
true;
2884insert_locs +=
TSeqRange(0,
r.GetFrom()-1);
2898 if(
r.GetTo() + 1 < max_align_len) {
2900partial_unaligned_section =
true;
2902insert_locs +=
TSeqRange(
r.GetTo()+1, max_align_len-1);
2914 if( insert_locs.
empty() && delete_locs.
empty() && !partial_unaligned_section)
2930mismatch_locs.
clear();
2932 for( ; prod_it != prod_end && genomic_it != genomic_end;
2933++prod_it, ++genomic_it) {
2934 if(*prod_it != *genomic_it) {
2939 unsignedtail_len =
Convert(prod_end - prod_it);
2941 for( ; prod_it != prod_end; ++prod_it) {
2942 if(*prod_it ==
'A') {
2947 if(tail_len && count_a >= tail_len * 0.8) {
2949 if(count_a < tail_len * 0.95) {
2950has_incomplete_polya_tail =
true;
2953 else if(tail_len) {
2955partial_unaligned_section =
true;
2958insert_locs +=
TSeqRange(end_pos-tail_len+1, end_pos);
2964 if(!insert_locs.
empty() ||
2965!delete_locs.
empty() ||
2966has_length_mismatch ||
2967has_incomplete_polya_tail ||
2968partial_unaligned_section) {
2969except_text =
"unclassified transcription discrepancy";
2971 else if(!mismatch_locs.
empty()) {
2972except_text =
"mismatches in transcription";
2976 x_SetComment(feat, cds_feat, cds_feat_on_mrna, align, mismatch_locs,
2977insert_locs, delete_locs, delete_sizes,
2978partial_unaligned_section);
2989 if(range_it->GetLength() > pos) {
2990pos += range_it->GetFrom();
2993pos -= range_it->GetLength();
2996 CSeq_locbase_loc(*mapped_protein_id, pos, pos);
2998mapped = to_genomic->
Map(*mrna_loc);
3008 const CSeq_feat* cds_feat_on_query_mrna,
3009 const CSeq_feat* cds_feat_on_transcribed_mrna,
3014|| ( cds_feat_on_query_mrna && !cds_feat_on_query_mrna->
IsSetProduct() )
3027 if( !(*it)->IsSetId() ) {
3032 const CFeat_id& feat_id = (*it)->GetId();
3055(
"rearrangement required for product");
3072 boolhas_start =
false;
3073 boolhas_stop =
false;
3075 boolhas_gap =
false;
3076 boolhas_indel =
false;
3082 if(cds_feat_on_query_mrna) {
3087corrected_cds_feat_on_query_mrna->
Assign(*cds_feat_on_query_mrna);
3091corrected_cds_feat_on_transcribed_mrna->
Assign(*cds_feat_on_transcribed_mrna);
3095 intcds_start_on_mrna = 0;
3096 intframe_on_mrna = 0;
3097 boolfilled_by_polya =
false;
3099 if(align !=
NULL) {
3111 stringexcept_text =
"unclassified translation discrepancy";
3114 if(clean_match_count) {
3115*clean_match_count = seq.
size();
3124 intmissing_end = 0;
3125 if(cds_feat_on_query_mrna) {
3147seq.
GetSeqData(cds_start_on_mrna + frame_on_mrna, cds_start_on_mrna + cds_len_on_query_mrna, mrna);
3148 if((missing_end == 1 || missing_end == 2) &&
3154filled_by_polya =
true;
3155 for(
size_tpos = mrna.size() - missing_end;
3156pos < mrna.size(); ++pos)
3172 if(xlate.size() && xlate[0] ==
'-') {
3175 stringfirst_codon = mrna.substr(0,3);
3179xlate[0] = first_aa[0];
3189 const CSeq_loc& cb_on_genome = (*it)->GetLoc();
3191 if(!cb_on_mrna)
continue;
3194 if(
r.GetLength() != 3) {
3203 switch((*it)->GetAa().Which()) {
3205src += (char)(*it)->GetAa().GetNcbieaa();
3210src += (char)(*it)->GetAa().GetNcbistdaa();
3215src += (char)(*it)->GetAa().GetNcbi8aa();
3227xlate[pos] = dst[0];
3239 if(corrected_cds_feat_on_transcribed_mrna) {
3257whole_product->
SetWhole(*cds_id);
3261 if(cds_feat_on_transcribed_mrna) {
3264 CSeq_loccds_feat_on_transcribed_mrna_loc;
3265cds_feat_on_transcribed_mrna_loc.
Assign(corrected_cds_feat_on_transcribed_mrna->
GetLocation());
3267cds_feat_on_transcribed_mrna_loc.
FlipStrand();
3273product_ranges.
clear();
3275product_ranges += loc_it.GetRange();
3284product_ranges.
GetTo());
3288 if((xlate.size() == product_ranges.
GetTo() + (filled_by_polya ? 1 : 2) ||
3290xlate[xlate.size() - 1] ==
'*')
3292xlate.resize(xlate.size() - 1);
3301 if( (product_ranges.
GetFrom()==0 && xlate.size() && xlate[0] ==
'M') ||
3306 if(product_ranges.
Empty()) {
3311 if(product_ranges[0].IsWhole()) {
3314 stringxlate_trimmed;
3316actual +=
whole.substr(range_it->GetFrom(), range_it->GetLength());
3317xlate_trimmed += xlate.substr(range_it->GetFrom(), range_it->GetLength());
3319xlate = xlate_trimmed;
3321 if(actual !=
whole) {
3336string::const_iterator it1 = actual.begin();
3337string::const_iterator it1_end = actual.end();
3338string::const_iterator it2 = xlate.begin();
3339string::const_iterator it2_end = xlate.end();
3341 for( ; it1 != it1_end && it2 != it2_end; ++it1, ++it2) {
3344mapped_protein_id, product_ranges, to_mrna, to_genomic);
3347 if(!mapped->
IsInt()) {
3364 "fTrustProteinSeq & fForceTranslateCds combination not implemented");
3367 charactual_aa = *it1;
3368code_break->
SetAa().SetNcbieaa(actual_aa);
3370}
else if(*it2 ==
'-'|| *it2 ==
'*') {
3372}
else if(*it1 != *it2) {
3374}
else if(clean_match_count && (!mapped ||
3377++*clean_match_count;
3381 if(has_stop && filled_by_polya
3386product_ranges, to_mrna, to_genomic);
3394feat.
SetComment() +=
"stop codon completed by the addition of " 3395 "3' A residues to the mRNA";
3406(feat.
GetComment().find(
"indel") != string::npos ||
3407feat.
GetComment().find(
"inserted") != string::npos ||
3408feat.
GetComment().find(
"deleted") != string::npos))
3413 if(actual.size() != xlate.size() ||
3414!has_stop || !has_start ||
3415has_gap || has_indel) {
3416except_text =
"unclassified translation discrepancy";
3418 else if(mismatch_count) {
3419except_text =
"mismatches in translation";
3428 stringexcept_text =
text;
3430list<string> except_toks;
3434 for(list<string>::iterator it = except_toks.begin();
3435it != except_toks.end(); ) {
3438*it ==
"annotated by transcript or proteomic data"||
3439*it ==
"unclassified transcription discrepancy"||
3440*it ==
"mismatches in transcription"||
3441*it ==
"unclassified translation discrepancy"||
3442*it ==
"mismatches in translation") {
3443except_toks.erase(it++);
3451 if( !except_text.empty() ) {
3455 if(it->GetSeqId()->IsOther() &&
3456it->GetSeqId()->GetOther().GetAccession()[0] ==
'N'&&
3457 string(
"MRP").find(it->GetSeqId()->GetOther().GetAccession()[1]) != string::npos)
3459except_text =
"annotated by transcript or proteomic data";
3462 stringproduct_type_string;
3464product_type_string =
"AA sequence";
3467product_type_string =
"RNA sequence";
3470product_type_string +=
", mRNA";
3473qualifier->
SetQual(
"inference");
3474qualifier->
SetVal(
"similar to "+ product_type_string +
" (same species):RefSeq:"+
3475it->GetSeqId()->GetOther().GetAccession() +
'.'+
3477feat.
SetQual().push_back(qualifier);
3480except_toks.push_back(except_text);
3482except_text =
NStr::Join(except_toks,
", ");
3484 if(except_text.empty()) {
3500 stringproduct_type_string =
"RNA sequence";
3504product_type_string +=
", mRNA";
3507 stringdb =
"INSD";
3516qualifier->
SetQual(
"inference");
3517qualifier->
SetVal(
"similar to "+ product_type_string +
" (same species):"+db+
":"+
3519feat.
SetQual().push_back(qualifier);
3526 const CSeq_feat* cds_feat_on_query_mrna,
3527 const CSeq_feat* cds_feat_on_transcribed_mrna,
3534align_ref.
Reset(align);
3543 for(CSeq_feat::TQual::iterator it = feat.
SetQual().begin();
3544it != feat.
SetQual().end(); )
3546 if((*it)->CanGetQual() && (*it)->GetQual() ==
"inference") {
3547it = feat.
SetQual().erase(it);
3553 if(feat.
GetQual().empty()) {
3571cds_feat_on_query_mrna, cds_feat_on_transcribed_mrna,
3572transcribed_mrna_seqloc_refs,
3598 static string s_Count(
unsignednum,
const string&item_name)
3611 boolpartial_unaligned_section)
3613 if(mismatch_locs.
empty() && insert_locs.
empty() && delete_locs.
empty() &&
3614!partial_unaligned_section &&
3621 stringrna_comment, cds_comment;
3630inserts_in_cds &= insert_locs;
3631deletes_in_cds &= delete_locs;
3633 if(cds_feat_on_mrna) {
3637cds_ranges += loc_it.GetRange();
3642align_info->
SetType().SetStr(
"AlignInfo");
3645 unsignedindel_count =
Convert(insert_locs.
size() + delete_locs.
size());
3646 unsignedframeshift_count = 0;
3647 unsignedpct_coverage = 100, cds_pct_coverage = 100;
3648 if(partial_unaligned_section) {
3655 if(cds_feat && cds_feat_on_mrna) {
3656 unsignedcds_indel_count = 0;
3658++(it->GetLength() % 3 ? frameshift_count : cds_indel_count);
3661++(delete_sizes[it->GetFrom()] % 3 ? frameshift_count
3664indel_count -= frameshift_count;
3665 unsignedcds_mismatch_count = 0;
3666 boolstart_codon_mismatch =
false;
3681 if(!single_interval_product) {
3683 "product is required to be a single interval");
3685 for(
TSeqPospos = start_pos; pos < start_pos +
prot.size(); ++pos)
3687 CSeq_locaa_loc(*cds_id, pos, pos);
3692 if(codon.
size() == 3) {
3694codon[0], codon[1], codon[2]);
3695 chartranslated_codon = pos == 0
3698 if(translated_codon !=
prot[pos]) {
3699++cds_mismatch_count;
3704start_codon_mismatch =
true;
3709 if(cds_mismatch_count || cds_indel_count || frameshift_count || cds_pct_coverage < 100)
3711cds_comment =
"The RefSeq protein";
3712 if(cds_mismatch_count) {
3713cds_comment +=
" has " 3714+
s_Count(cds_mismatch_count,
"substitution");
3716 if(frameshift_count) {
3717cds_comment += (cds_mismatch_count ?
", ":
" has ")
3718+
s_Count(frameshift_count,
"frameshift");
3720 if(cds_indel_count) {
3721cds_comment += (cds_mismatch_count || frameshift_count ?
", ":
" has ")
3722+
s_Count(cds_indel_count,
"non-frameshifting indel");
3724 if(cds_pct_coverage < 100) {
3725 if(cds_mismatch_count || cds_indel_count || frameshift_count) {
3726cds_comment +=
" and";
3728cds_comment +=
" aligns at " 3732cds_comment +=
" compared to this genomic sequence";
3734 if(start_codon_mismatch) {
3735align_info->
AddField(
"start_codon_mismatches", 1);
3738rna_comment =
"The RefSeq transcript";
3739 if(!mismatch_locs.
empty()) {
3740rna_comment +=
" has "+
3744 if(frameshift_count) {
3745rna_comment += (mismatch_locs.
empty() ?
" has ":
", ") +
3746 s_Count(frameshift_count,
"frameshift");
3747align_info->
AddField(
"num_frameshifts", (
int)frameshift_count);
3750rna_comment += (mismatch_locs.
empty() && !frameshift_count?
" has ":
", ") +
3751 s_Count(indel_count,
"non-frameshifting indel");
3752align_info->
AddField(
"num_nonframeshift_indel", (
int)indel_count);
3754 if(partial_unaligned_section) {
3755 if(!mismatch_locs.
empty() || indel_count || frameshift_count) {
3756rna_comment +=
" and";
3758rna_comment +=
" aligns at " 3762 if(rna_comment ==
"The RefSeq transcript") {
3763rna_comment.clear();
3765rna_comment +=
" compared to this genomic sequence";
3771deleted_bases = 0, cds_deleted_bases = 0,
3775 "Delete locations should always be one base");
3776deleted_bases += delete_sizes.
find(delete_it->GetFrom())->second;
3779 for(
TSeqPospos = insert_it->GetFrom();
3780pos <= insert_it->GetTo(); ++pos)
3787 "Delete locations should always be one base");
3788delete_codons.
insert((delete_it->GetFrom() -
3790cds_deleted_bases +=
3791delete_sizes.
find(delete_it->GetFrom())->second;
3798 switch((*it)->GetAa().Which()) {
3800aa = (*it)->GetAa().GetNcbieaa();
3805 stringsrc_string(1, (*it)->GetAa().GetNcbistdaa()),
3816 stringsrc_string(1, (*it)->GetAa().GetNcbi8aa()),
3833 unsignedinsert_codons_count =
Convert(insert_codons.
size()),
3834delete_codons_count =
Convert(delete_codons.
size());
3835 if(inserted_bases || deleted_bases) {
3838 if(inserted_bases) {
3839rna_comment +=
": inserted "+
s_Count(inserted_bases,
"base")
3840+
" in "+
s_Count(insert_codons_count,
"codon");
3842 if(deleted_bases) {
3844+
" deleted "+
s_Count(deleted_bases,
"base")
3845+
" in "+
s_Count(delete_codons_count,
"codon");
3847 if(cds_inserted_bases || cds_deleted_bases || code_breaks) {
3850 if(cds_inserted_bases) {
3851cds_comment +=
": inserted "+
s_Count(cds_inserted_bases,
"base")
3852+
" in "+
s_Count(insert_codons_count,
"codon");
3854 if(cds_deleted_bases) {
3856+
" deleted "+
s_Count(cds_deleted_bases,
"base")
3857+
" in "+
s_Count(delete_codons_count,
"codon");
3861+
" substituted "+
s_Count(code_breaks,
"base")
3862+
" at "+
s_Count(code_breaks,
"genomic stop codon");
3877 if(internal_unaligned > 0) {
3878align_info->
AddField(
"internal_unaligned", internal_unaligned);
3886align_info->
AddField(
"3prime_unaligned",
3887(
int)(product_length - align->
GetSeqStop(0) - 1));
3891 if(!rna_comment.empty()) {
3895}
else if(rna_feat.
GetComment().find(rna_comment) == string::npos) {
3896rna_feat.
SetComment() +=
"; "+ rna_comment;
3899 if(!cds_comment.empty()) {
3903}
else if(cds_feat->
GetComment().find(cds_comment) == string::npos) {
3904cds_feat->
SetComment() +=
"; "+ cds_comment;
3907 if(!align_info->
GetData().empty()) {
3908rna_feat.
AddExt(align_info);
3926}
else if(feat.
GetComment().find(comment) == string::npos) {
3932comment =
" added "+
s_Count(insert_length,
"base") +
" not found in genome assembly";
3944 stringensembl_match_rna, ensembl_match_cds;
3945vector<string> keywords;
3949 for(
CSeqdesc_CIdesc(rna_handle, desc_types); desc; ++desc) {
3950 if(desc->IsGenbank() && desc->GetGenbank().IsSetKeywords()) {
3951 for(
const string&keyword : desc->GetGenbank().GetKeywords()) {
3953(keyword ==
"MANE Select"|| keyword ==
"MANE Plus" 3954|| keyword ==
"MANE Plus Clinical"))
3957 if(keyword ==
"MANE Select") {
3958keywords.push_back(
"RefSeq Select");
3959}
else if(keyword ==
"MANE Plus Clinical") {
3960keywords.push_back(
"RefSeq Plus Clinical");
3963keywords.push_back(keyword);
3966}
else if(desc->IsUser() &&
3967desc->GetUser().HasField(
"MANE Ensembl match"))
3971 "/", ensembl_match_rna, ensembl_match_cds);
3974}
else if(desc->IsUser() && desc->GetUser().GetType().IsStr() &&
3975desc->GetUser().GetType().GetStr() ==
"RefGeneTracking"&&
3976need_location_check)
3978 if(desc->GetUser().HasField(
"EnsemblLocation")) {
3980desc->GetUser().GetField(
"EnsemblLocation"));
3981}
else if(desc->GetUser().HasField(
"SelectGeneLocation")) {
3985desc->GetUser().GetField(
"SelectGeneLocation")));
3990 if((match_found >=
eOverlap|| !need_location_check) && !keywords.empty())
3999 if(match_found ==
eExact&& !drop && !ensembl_match_rna.empty()) {
4001rna_ensembl_ref->
SetDb(
"Ensembl");
4002rna_ensembl_ref->
SetTag().SetStr(ensembl_match_rna);
4003rna_feat.
SetDbxref().push_back(rna_ensembl_ref);
4004 if(cds_feat && !ensembl_match_cds.empty()) {
4006cds_ensembl_ref->
SetDb(
"Ensembl");
4007cds_ensembl_ref->
SetTag().SetStr(ensembl_match_cds);
4008cds_feat->
SetDbxref().push_back(cds_ensembl_ref);
4018 if(!loc_field.
HasField(
"seq_id") || !loc_field.
HasField(
"from") ||
4022+
" doesn't have expected fields");
4041 if(!loc_genomic_acc.
Match(genomic_acc) || loc_strand != align.
GetSeqStrand(1))
4057 for(
const string&keyword : keywords) {
4059qualifier->
SetQual(
"tag");
4060qualifier->
SetVal(keyword);
4061feat.
SetQual().push_back(qualifier);
4085 id->Assign(*loc1->
GetId());
4091merged_loc = left_loc;
4092merged_loc->
Add(*right_loc);
4093merged_loc->
Add(*loc1);
4095merged_loc->
Add(*loc2);
4105x[1] += genomic_size;
4107x[3] += genomic_size;
4110x[0] += genomic_size;
4111x[1] += genomic_size;
4112}
else if(x[3] < x[0]) {
4113x[2] += genomic_size;
4114x[3] += genomic_size;
4118x[0] =
min(x[0], x[2]);
4119x[1] =
max(x[1], x[3]) - genomic_size;
4135 id->Assign(*loc.
GetId());
4143left_loc->
Add(*it.GetRangeAsSeq_loc());
4145right_loc->
Add(*it.GetRangeAsSeq_loc());
4155 swap(left_loc, right_loc);
4157left_loc->
Add(*right_loc);
4159 if(no_gap_at_origin) {
4163 if(interval.
GetFrom() == 0) {
4166 if(interval.
GetTo() == genomic_size-1) {
4196 returngenomic_ids.
size() > 1;
4206 const intk_gap_length,
4207 const intnext_exon_start)
4209 if(insert->
SetMix().Set().size() > 1) {
4213 if(insert->
SetMix().Set().size() > 0) {
4214 inthalf_intron_length = (next_exon_start - region_end)/2;
4215 intcopy_length =
min(k_gap_length, half_intron_length);
4216region_end += copy_length;
4218 if(region_begin < region_end) {
4222edited_sequence_seqloc->
SetMix().Set().push_back(genome_loc);
4224 if(copy_length < k_gap_length) {
4229edited_sequence_seqloc->
SetMix().Set().push_back(gap_loc);
4233edited_sequence_seqloc->
SetMix().Set().push_back(insert);
4236 if(copy_length < k_gap_length) {
4239edited_sequence_seqloc->
SetMix().Set().push_back(gap_loc);
4243region_begin = region_end;
4253 const CSeq_feat* cds_feat_on_query_mrna_ptr,
4254 boolcall_on_align_list)
4258align->
Assign(input_align);
4289 const intk_gap_length =
min(1000,
int(genomic_length));
4294spliced_seg.
SetExons().reverse();
4296 intregion_begin = 0;
4307 if(!seqid.
Match(*genomic_seqid)) {
4314insert->
SetMix().Set().push_back(loc);
4316 intexon_length = exon_stop - exon_start +1;
4317exon_stop = region_end + k_gap_length -1;
4318exon_start = region_end + k_gap_length - exon_length;
4324 if(!(region_end <= exon_start)) {
4337region_end = exon_stop +1;
4356 if(region_begin < (
int)genomic_length) {
4359genomic_length -1));
4360edited_sequence_seqloc->
SetMix().Set().push_back(genome_loc);
4365spliced_seg.
SetExons().reverse();
4373seqentry->
SetSeq(*bioseq);
4380seq_desc->
Assign(*desc);
4381bioseq->
SetDescr().Set().push_back(seq_desc);
4388seq_desc->
Assign(*desc);
4389bioseq->
SetDescr().Set().push_back(seq_desc);
4403gene_feat = gene->second;
4411call_on_align_list);
4413 m_scope->RemoveBioseq(bioseq_handle);
4414annot_local.
SetData().SetFtable().clear();
4418 genes[gene_id] = gene_feat;
4426 TSeqPoscds_insert_length = 0;
4429align->
Assign(input_align);
4436 if(!seqid.
Match(*genomic_seqid)) {
4440 if(cds_feat_on_query_mrna_ptr) {
4441 intcds_intersection_len =
4447 if(cds_intersection_len > 0) {
4448cds_insert_length += cds_intersection_len;
4452spliced_seg.
SetExons().erase(it);
4460gene_id, cds_feat_on_query_mrna_ptr,
4461call_on_align_list);
4465align->
Assign(input_align);
4468 if(entry.
IsSeq() &&
4473entry.
SetSeq().
SetInst().SetHist().SetAssembly().front() =
4484it != annot_local.
SetData().SetFtable().rend(); ++it) {
4486 if(
f.GetData().IsGene()) {
4490 if(
f.GetData().IsCdregion() && cds_insert_length==0) {
4502annot.
SetData().SetFtable().splice(annot.
SetData().SetFtable().end(),
4503annot_local.
SetData().SetFtable());
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t GetSize(void) const
const CSeq_id * GetFirstId() const
TFeatureGeneratorFlags GetFlags() const
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
unique_ptr< SImplementation > m_impl
void RecomputePartialFlags(objects::CSeq_annot &annot)
Recompute the correct partial states for all features in this annotation.
void SetMinIntron(TSeqPos)
EIntronStitchThresholdFlags
void SetFeatureExceptions(objects::CSeq_feat &feat, const objects::CSeq_align *align=NULL)
Correctly mark exceptions on a feature.
void SetFlags(TFeatureGeneratorFlags)
void SetAllowedUnaligned(TSeqPos)
CFeatureGenerator(CRef< objects::CScope > scope)
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
void SetIntronStitchThresholdFlags(EIntronStitchThresholdFlags)
@ fGenerateStableLocalIds
@ fAddTranslatedCDSAssembly
int TFeatureGeneratorFlags
void SetPartialFlags(CRef< objects::CSeq_feat > gene_feat, CRef< objects::CSeq_feat > mrna_feat, CRef< objects::CSeq_feat > cds_feat)
Mark the correct partial states for a set of features.
void ConvertLocToAnnot(const objects::CSeq_loc &loc, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, objects::CCdregion::EFrame frame=objects::CCdregion::eFrame_one, CRef< objects::CSeq_id > prot_id=CRef< objects::CSeq_id >(), CRef< objects::CSeq_id > rna_id=CRef< objects::CSeq_id >())
Convert genomic location to an annotation.
@Gb_qual.hpp User-defined methods of the data storage class.
static const CTrans_table & GetTransTable(int id)
static void SetFeatureExceptions(objects::CSeq_feat &feat, objects::CScope &scope, const objects::CSeq_align *align=NULL)
Correctly mark exceptions on a feature.
static void CreateGeneModelFromAlign(const objects::CSeq_align &align, objects::CScope &scope, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, TGeneModelCreateFlags flags=fDefaults, TSeqPos allowed_unaligned=10)
Create a gene model from an alignment this will optionally promote all features through the alignment...
static void SetPartialFlags(objects::CScope &scope, CRef< objects::CSeq_feat > gene_feat, CRef< objects::CSeq_feat > mrna_feat, CRef< objects::CSeq_feat > cds_feat)
static void CreateGeneModelsFromAligns(const list< CRef< objects::CSeq_align > > &aligns, objects::CScope &scope, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, TGeneModelCreateFlags flags=fDefaults, TSeqPos allowed_unaligned=10)
int TGeneModelCreateFlags
static void RecomputePartialFlags(objects::CScope &scope, objects::CSeq_annot &annot)
position_type GetTo() const
position_type GetFrom() const
position_type GetCoveredLength(void) const
Returns total length covered by ranges in this collection, i.e.
double GetPercentCoverage(CScope &scope, const CSeq_align &align, unsigned query=0)
Compute percent coverage of the query (sequence 0) (range 0-100)
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
ESubtype GetSubtype(void) const
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
TSeqPos GetSeqStop(TDim row) const
CRef< CSeq_loc > CreateRowSeq_loc(TDim row) const
TDim CheckNumRows(void) const
Validatiors.
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
CSeq_feat_EditHandle â.
namespace ncbi::objects::
void AddExt(CRef< CUser_object > ext, TAddExt add_flags=0)
Add an extension by type in exts container.
void SetPartialStart(bool val, ESeqLocExtremes ext)
void SetPartialStop(bool val, ESeqLocExtremes ext)
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
TSeqPos InternalUnaligned(void) const
CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
char GetStartResidue(int state) const
char GetCodonResidue(int state) const
static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)
Template class for iteration on objects of class C.
int GetInt(void) const
get value
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user field.
const string & GetString(void) const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
container_type::iterator iterator
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
CMappedFeat GetCdsOnMrna(const objects::CSeq_id &rna_id, CScope &scope)
static const char * str(char *buf, int n)
static bool s_Contains(const TSeqRange &range1, const TSeqRange &range2)
Check whether range1 contains range2.
bool IsProteinAlign(const CSeq_align &align)
void AddInsertWithGaps(CRef< CSeq_loc > &edited_sequence_seqloc, CSeq_id &genomic_seqid, int ®ion_begin, int ®ion_end, int &offset, CRef< CSeq_loc > &insert, const int k_gap_length, const int next_exon_start)
const char * k_except_text_for_gap_filled_gnomon_model
const char * k_cds_comment
void AddCodeBreak(CSeq_feat &feat, CSeq_loc &loc, char ncbieaa)
static void s_TransformToNucpos(CProduct_pos &pos)
const char * k_rna_comment
void AddLiteral(CSeq_inst &inst, const string &seq, CSeq_inst::EMol mol_class)
string ExtractGnomonModelNum(const CSeq_id &seq_id)
void RenameGeneratedBioseqs(const CSeq_id &query_rna_id, CSeq_id &transcribed_rna_id, CRef< CSeq_feat > cds_feat_on_query_mrna, CRef< CSeq_feat > cds_feat_on_genome_with_translated_product)
bool IsContinuous(const CSeq_loc &loc)
static string s_Count(unsigned num, const string &item_name)
static CRef< CSeq_loc > s_MapSingleAA(TSeqPos pos, CRef< CSeq_id > mapped_protein_id, const CRangeCollection< TSeqPos > &product_ranges, CRef< CSeq_loc_Mapper > to_mrna, CRef< CSeq_loc_Mapper > to_genomic)
static const CMolInfo * s_GetMolInfo(const CBioseq_Handle &handle)
Return the mol-info object for a given sequence.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
TValue Add(int delta) THROWS_NONE
Atomically add value (=delta), and return new counter value.
#define NCBI_ASSERT(expr, mess)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
const string AsFastaString(void) const
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
CSeq_id::EAccessionInfo IdentifyAccession(void) const
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
void SetPacked_int(TPacked_int &v)
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
void ChangeToPackedInt(void)
Works only if location is currently an interval, point, packed-int (handled trivially),...
bool IsReverseStrand(void) const
Return true if all ranges have reverse strand.
void FlipStrand(void)
Flip the strand (e.g. plus to minus)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
const_iterator end(void) const
const_iterator begin(void) const
int Compare(const CSeq_loc &loc) const
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
void SetPartialStop(bool val, ESeqLocExtremes ext)
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
@ eOrder_Biological
Iterate sub-locations in positional order.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
const CMolInfo * GetMolInfo(const CBioseq &bioseq)
Retrieve the MolInfo object for a given bioseq handle.
const COrg_ref * GetOrg_refOrNull(const CBioseq_Handle &handle)
Return the pointer to org-ref associated with a given sequence or null if there is no org-ref associa...
const COrg_ref & GetOrg_ref(const CBioseq_Handle &handle)
Return the org-ref associated with a given sequence.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ fIs5PrimePartial
= 0x4 Translate first codon even if not start codon (because sequence is 5' partial)
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
@ eGetId_ForceAcc
return only an accession based seq-id
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
CSeq_loc_Mapper_Base & SetMergeAll(void)
Merge any abutting or overlapping intervals.
@ eProductToLocation
Map from the feature's product to location.
@ eLocationToProduct
Map from the feature's location to product.
@ fAlign_Dense_seg_TotalRange
Ignore internal dense-seg structure - map each dense-seg according to the total ranges involved.
vector< CSeq_id_Handle > TId
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsSetDbxref(void) const
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TInst_Topology GetInst_Topology(void) const
CSeq_annot_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
const CSeq_feat::TDbxref & GetDbxref(void) const
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
const TId & GetId(void) const
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & ExcludeFeatSubtype(TFeatSubtype subtype)
Exclude feature subtype from the search.
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
TSeqPos GetPos(void) const
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & SetResolveNone(void)
SetResolveNone() is equivalent to SetResolveMethod(eResolve_None).
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
const CSeqMap & GetSeqMap(void) const
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
const_iterator begin(void) const
const_iterator end(void) const
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
bool IsNull(void) const THROWS_NONE
Check if pointer is null â same effect as Empty().
bool NotNull(void) const THROWS_NONE
Check if pointer is not null â same effect as NotEmpty().
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotNull(void) const THROWS_NONE
Check if pointer is not null â same effect as NotEmpty().
TObjectType * GetNonNullPointer(void) const
Get pointer value and throw a null pointer exception if pointer is null.
int64_t Int8
8-byte (64-bit) signed integer
position_type GetLength(void) const
bool NotEmpty(void) const
bool IntersectingWith(const TThisType &r) const
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
static TThisType GetWhole(void)
static position_type GetWholeTo(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
@ eCurrent
Use current time. See also CCurrentTime.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
const TLocus & GetLocus(void) const
Get the Locus member data.
const TTag & GetTag(void) const
Get the Tag member data.
void SetTag(TTag &value)
Assign a value to Tag data member.
bool IsId(void) const
Check if variant Id is selected.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
TStr & SetStr(void)
Select the variant.
const TData & GetData(void) const
Get the Data member data.
void SetType(TType &value)
Assign a value to Type data member.
const TLabel & GetLabel(void) const
Get the Label member data.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId GetId(void) const
Get the variant data.
@ eLim_circle
artificial break at origin of circle
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
const TName & GetName(void) const
Get the Name member data.
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
TType GetType(void) const
Get the Type member data.
EType
type of RNA feature
bool CanGetType(void) const
Check if it is safe to call GetType method.
@ eType_scRNA
will become ncRNA, with RNA-gen.class = scRNA
@ eType_snoRNA
will become ncRNA, with RNA-gen.class = snoRNA
@ eType_ncRNA
non-coding RNA; subsumes snRNA, scRNA, snoRNA
@ eType_snRNA
will become ncRNA, with RNA-gen.class = snRNA
const TDonor_after_exon & GetDonor_after_exon(void) const
Get the Donor_after_exon member data.
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
TNucpos & SetNucpos(void)
Select the variant.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
bool CanGetGenomic_id(void) const
Check if it is safe to call GetGenomic_id method.
void ResetGenomic_id(void)
Reset Genomic_id data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TAcceptor_before_exon & GetAcceptor_before_exon(void) const
Get the Acceptor_before_exon member data.
bool CanGetGenomic_id(void) const
Check if it is safe to call GetGenomic_id method.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
bool IsSetAcceptor_before_exon(void) const
splice sites Check if a value has been assigned to Acceptor_before_exon data member.
TExons & SetExons(void)
Assign a value to Exons data member.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
void SetDim(TDim value)
Assign a value to Dim data member.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
void ResetGenomic_strand(void)
Reset Genomic_strand data member.
bool IsSetGenomic_strand(void) const
genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...
void SetGenomic_start(TGenomic_start value)
Assign a value to Genomic_start data member.
void SetType(TType value)
Assign a value to Type data member.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
list< CRef< CSpliced_seg_modifier > > TModifiers
bool CanGetGenomic_strand(void) const
Check if it is safe to call GetGenomic_strand method.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
void SetGenomic_end(TGenomic_end value)
Assign a value to Genomic_end data member.
const TBases & GetBases(void) const
Get the Bases member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
void SetGenomic_strand(TGenomic_strand value)
Assign a value to Genomic_strand data member.
bool IsSpliced(void) const
Check if variant Spliced is selected.
bool CanGetPoly_a(void) const
Check if it is safe to call GetPoly_a method.
const TModifiers & GetModifiers(void) const
Get the Modifiers member data.
TNucpos GetNucpos(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool IsSetDonor_after_exon(void) const
Check if a value has been assigned to Donor_after_exon data member.
bool CanGetGenomic_strand(void) const
Check if it is safe to call GetGenomic_strand method.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eProduct_type_transcript
void SetAa(TAa &value)
Assign a value to Aa data member.
TXref & SetXref(void)
Assign a value to Xref data member.
void SetQual(const TQual &value)
Assign a value to Qual data member.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
vector< CRef< CDbtag > > TDbxref
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
void ResetPartial(void)
Reset Partial data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
TIds & SetIds(void)
Assign a value to Ids data member.
const TLoc & GetLoc(void) const
Get the Loc member data.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void ResetExcept(void)
Reset Except data member.
void ResetCode_break(void)
Reset Code_break data member.
void SetPartial(TPartial value)
Assign a value to Partial data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
const TLocal & GetLocal(void) const
Get the variant data.
void ResetExcept_text(void)
Reset Except_text data member.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
const TLocation & GetLocation(void) const
Get the Location member data.
void SetExcept(TExcept value)
Assign a value to Except data member.
bool IsLocal(void) const
Check if variant Local is selected.
TLocal & SetLocal(void)
Select the variant.
bool IsGene(void) const
Check if variant Gene is selected.
void ResetId(void)
Reset Id data member.
list< CRef< CCode_break > > TCode_break
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
const TCode & GetCode(void) const
Get the Code member data.
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
void SetData(TData &value)
Assign a value to Data data member.
TCode_break & SetCode_break(void)
Assign a value to Code_break data member.
const TCdregion & GetCdregion(void) const
Get the variant data.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
const TProduct & GetProduct(void) const
Get the Product member data.
const TComment & GetComment(void) const
Get the Comment member data.
void SetVal(const TVal &value)
Assign a value to Val data member.
const TGene & GetGene(void) const
Get the variant data.
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
const TXref & GetXref(void) const
Get the Xref member data.
vector< CRef< CSeqFeatXref > > TXref
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
const TRna & GetRna(void) const
Get the variant data.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
TQual & SetQual(void)
Assign a value to Qual data member.
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool IsRna(void) const
Check if variant Rna is selected.
void ResetQual(void)
Reset Qual data member.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
@ e_not_set
No variant selected.
@ eFrame_three
reading frame
@ e_MaxChoice
== e_Variation+1
@ e_Ncbi8aa
NCBI8aa code.
@ e_Ncbieaa
ASCII value of NCBIeaa code.
void SetTo(TTo value)
Assign a value to To data member.
bool IsMix(void) const
Check if variant Mix is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
bool IsOther(void) const
Check if variant Other is selected.
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
bool IsGeneral(void) const
Check if variant General is selected.
list< CRef< CSeq_loc > > Tdata
bool IsEquiv(void) const
Check if variant Equiv is selected.
E_Choice Which(void) const
Which variant is currently selected.
void SetFrom(TFrom value)
Assign a value to From data member.
const Tdata & Get(void) const
Get the member data.
TVersion GetVersion(void) const
Get the Version member data.
const Tdata & Get(void) const
Get the member data.
const TEquiv & GetEquiv(void) const
Get the variant data.
const TOther & GetOther(void) const
Get the variant data.
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
bool IsLocal(void) const
Check if variant Local is selected.
TStrand GetStrand(void) const
Get the Strand member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
void SetStrand(TStrand value)
Assign a value to Strand data member.
const TMix & GetMix(void) const
Get the variant data.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_not_set
No variant selected.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TClass GetClass(void) const
Get the Class member data.
bool IsSeq(void) const
Check if variant Seq is selected.
void SetClass(TClass value)
Assign a value to Class data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
TLiteral & SetLiteral(void)
Select the variant.
void SetLength(TLength value)
Assign a value to Length data member.
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
bool CanGetBiomol(void) const
Check if it is safe to call GetBiomol method.
const TInst & GetInst(void) const
Get the Inst member data.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
bool IsSetAssembly(void) const
how was this assembled? Check if a value has been assigned to Assembly data member.
TTopology GetTopology(void) const
Get the Topology member data.
const TIupacna & GetIupacna(void) const
Get the variant data.
void SetExt(TExt &value)
Assign a value to Ext data member.
void SetHist(THist &value)
Assign a value to Hist data member.
const TLiteral & GetLiteral(void) const
Get the variant data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
bool IsSetHist(void) const
sequence history Check if a value has been assigned to Hist data member.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
TLength GetLength(void) const
Get the Length member data.
TLength GetLength(void) const
Get the Length member data.
void SetInst(TInst &value)
Assign a value to Inst data member.
const THist & GetHist(void) const
Get the Hist member data.
const TExt & GetExt(void) const
Get the Ext member data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
EMol
molecule class in living organism
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TDelta & GetDelta(void) const
Get the variant data.
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
void SetRepr(TRepr value)
Assign a value to Repr data member.
list< CRef< CSeq_feat > > TFtable
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
bool IsLiteral(void) const
Check if variant Literal is selected.
bool IsSetSeq_data(void) const
may have the data Check if a value has been assigned to Seq_data data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void ResetExt(void)
Reset Ext data member.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
const TNcbi8na & GetNcbi8na(void) const
Get the variant data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
TMolinfo & SetMolinfo(void)
Select the variant.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
void SetMol(TMol value)
Assign a value to Mol data member.
void ResetSeq_data(void)
Reset Seq_data data member.
E_Choice Which(void) const
Which variant is currently selected.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eCompleteness_complete
complete biological entity
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Ncbi2na
2 bit nucleic acid code
@ e_Iupacna
IUPAC 1 letter nuc acid code.
@ e_Ncbi8na
8 bit extended nucleic acid code
@ e_Ncbi4na
4 bit nucleic acid code
@ e_Iupacaa
IUPAC 1 letter amino acid code.
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
@ e_Org
if all from one organism
@ e_User
user defined object
@ e_Genbank
GenBank specific info.
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
static void text(MDB_val *v)
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
int GetGeneticCode(const CBioseq_Handle &bsh)
const GenericPointer< typename T::ValueType > T2 value
Defines: CTimeFormat - storage class for time format.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
void IncludeSourceLocs(bool b=true)
CRef< CSeq_loc > Map(const CSeq_loc &loc)
CSeq_align::TDim GetRnaRow() const
CSeq_align::TDim GetGenomicRow() const
SMapper(const CSeq_align &aln, CScope &scope, TSeqPos allowed_unaligned=10, CSeq_loc_Mapper::TMapOptions opts=0)
CSeq_align::TDim m_genomic_row
CRef< CSeq_loc > x_GetLocFromSplicedExons(const CSeq_align &aln) const
This has special logic to set partialness based on alignment properties In addition,...
const CSeq_loc & GetRnaLoc()
CRef< CSeq_loc_Mapper > x_Mapper()
TSeqPos m_allowed_unaligned
CRef< CSeq_feat > x_MapFeature(const objects::CSeq_feat *feature_on_mrna, const CSeq_align &align, CRef< CSeq_loc > loc, CSeq_loc_Mapper::TMapOptions opts, TSeqPos &offset)
void x_AddKeywordQuals(CSeq_feat &feat, const vector< string > &keywords)
CRef< CSeq_feat > x_CreateCdsFeature(CConstRef< CSeq_feat > cds_feat_on_query_mrna, CRef< objects::CSeq_feat > cds_feat_on_transcribed_mrna, list< CRef< CSeq_loc > > &transcribed_mrna_seqloc_refs, const CSeq_align &align, CRef< CSeq_loc > loc, const CTime &time, size_t model_num, CBioseq_set &seqs, CSeq_loc_Mapper::TMapOptions opts)
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align_in)
void MaximizeTranslation(objects::CSeq_align &align)
vector< SExon > GetExons(const CSeq_align &align)
CRef< CSeq_loc > MergeSeq_locs(const CSeq_loc *loc1, const CSeq_loc *loc2=NULL)
void RecalculateScores(CSeq_align &align)
void x_SetCommentForGapFilledModel(CSeq_feat &feat, TSeqPos insert_length)
void RecomputePartialFlags(objects::CSeq_annot &annot)
void x_SetQualForGapFilledModel(CSeq_feat &feat, CSeq_id_Handle id)
TFeatureGeneratorFlags m_flags
void x_CollectMrnaSequence(CSeq_inst &inst, const CSeq_align &align, const CSeq_loc &loc, bool add_unaligned_parts=true, bool mark_transcript_deletions=true, bool *has_gap=NULL, bool *has_indel=NULL)
void x_CopyAdditionalFeatures(const CBioseq_Handle &handle, SMapper &mapper, CSeq_annot &annot)
SImplementation(objects::CScope &scope)
void x_CreateGeneFeature(CRef< CSeq_feat > &gene_feat, const CBioseq_Handle &handle, SMapper &mapper, CRef< CSeq_loc > loc, const CSeq_id &genomic_id, Int8 gene_id=0)
CRef< CSeq_id > x_CreateMrnaBioseq(const CSeq_align &align, CConstRef< CSeq_loc > loc, const CTime &time, size_t model_num, CBioseq_set &seqs, CConstRef< CSeq_feat > cds_feat_on_query_mrna, CRef< CSeq_feat > &cds_feat_on_transcribed_mrna)
void SetPartialFlags(CRef< CSeq_feat > gene_feat, CRef< CSeq_feat > mrna_feat, CRef< CSeq_feat > cds_feat)
const CBioseq & x_CreateProteinBioseq(CSeq_loc *cds_loc, CRef< CSeq_feat > cds_feat_on_transcribed_mrna, list< CRef< CSeq_loc > > &transcribed_mrna_seqloc_refs, const CTime &time, size_t model_num, CBioseq_set &seqs)
void ClearScores(CSeq_align &align)
string x_ConstructRnaName(const CBioseq_Handle &handle)
void x_SetComment(CSeq_feat &rna_feat, CSeq_feat *cds_feat, const CSeq_feat *cds_feat_on_mrna, const CSeq_align *align, const CRangeCollection< TSeqPos > &mismatch_locs, const CRangeCollection< TSeqPos > &insert_locs, const CRangeCollection< TSeqPos > &delete_locs, map< TSeqPos, TSeqPos > &delete_sizes, bool partial_unaligned_edge)
void SetFeatureExceptions(objects::CSeq_feat &feat, const objects::CSeq_align *align, objects::CSeq_feat *cds_feat=NULL, const objects::CSeq_feat *cds_feat_on_query_mrna=NULL, const objects::CSeq_feat *cds_feat_on_transcribed_mrna=NULL, list< CRef< CSeq_loc > > *transcribed_mrna_seqloc_refs=NULL, TSeqPos *clean_match_count=NULL)
void x_AddSelectMarkup(const CSeq_align &align, const CBioseq_Handle &rna_handle, const CSeq_id &genomic_acc, CSeq_feat &rna_feat, CSeq_feat *cds_feat)
void x_CheckInconsistentDbxrefs(CConstRef< CSeq_feat > gene_feat, CConstRef< CSeq_feat > cds_feat)
void x_HandleRnaExceptions(CSeq_feat &feat, const CSeq_align *align, CSeq_feat *cds_feat, const CSeq_feat *cds_feat_on_mrna)
Handle feature exceptions.
CRef< CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id, const objects::CSeq_feat *cdregion, bool call_on_align_list)
void x_HandleCdsExceptions(CSeq_feat &feat, const CSeq_align *align, const CSeq_feat *cds_feat_on_query_mrna, const CSeq_feat *cds_feat_on_transcribed_mrna, list< CRef< CSeq_loc > > *transcribed_mrna_seqloc_refs, TSeqPos *clean_match_count)
CRef< CSeq_feat > x_CreateMrnaFeature(CRef< CSeq_loc > loc, const CSeq_id &query_rna_id, CSeq_id &transcribed_rna_id, CConstRef< CSeq_feat > cds_feat_on_query_mrna)
bool x_RequiresPolyAForStopCodon(const objects::CSeq_id &mrna)
bool HasMixedGenomicIds(const CSeq_align &input_align)
e_MatchType x_CheckMatch(const CSeq_align &align, const CSeq_id &genomic_acc, const CUser_field &loc_field)
void x_SetExceptText(CSeq_feat &feat, const string &except_text)
void TrimHolesToCodons(objects::CSeq_align &align)
CRef< CSeq_loc > FixOrderOfCrossTheOriginSeqloc(const CSeq_loc &loc, TSeqPos outside_point, CSeq_loc::TOpFlags flags=CSeq_loc::fSort)
CRef< CSeq_feat > x_CreateNcRnaFeature(const objects::CSeq_feat *ncrnafeature_on_mrna, const CSeq_align &align, CConstRef< CSeq_loc > loc, CSeq_loc_Mapper::TMapOptions opts)
void TransformProteinAlignToTranscript(CConstRef< CSeq_align > &align, CRef< CSeq_feat > &cd_feat)
CRef< CSeq_feat > ConvertMixedAlignToAnnot(const CSeq_align &input_align, CSeq_annot &annot, CBioseq_set &seqs, Int8 gene_id, const CSeq_feat *cds_feat_on_query_mrna_ptr, bool call_on_align_list)
CRef< objects::CScope > m_scope
void StitchSmallHoles(objects::CSeq_align &align)
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4