sequence;
104 boolfound_ambig =
false;
105string::iterator it = seq_string.begin();
106 while(it != seq_string.end() && !found_ambig) {
107 if(*it !=
'A'&& *it !=
'T'&& *it !=
'C'&& *it !=
'G'&& *it !=
'U') {
114 "Feature comment indicates ambiguity in stop codon " 115 "but no ambiguities are present in stop codon.",
m_Feat);
123 boolsuppress =
false;
132 "Apparent EC number in CDS comment");
145 "CDS has both RNA editing /exception and /transl_except qualifiers");
150 #define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var) \ 151 ITERATE (CBioseq_Handle::TId, Itr, Var.GetId()) 156is_nt = is_ng = is_nw = is_nc =
false;
160 switch(sid.
Which()) {
182 bool& is_nt,
bool& is_ng,
bool& is_nw,
bool& is_nc)
184is_nt = is_ng = is_nw = is_nc =
false;
202 boolis_nt, is_ng, is_nw, is_nc;
216(is_nt || is_ng || is_nw),
239 return(*it)->GetId();
253}
else if((*it)->IsName()) {
268 stringerror_message;
270 boolgot_dash = transl_start ==
'-';
271 stringcodon_desc = got_dash ?
"illegal":
"ambiguous";
273 " internal stops (and "+ codon_desc +
" start codon). Genetic code ["+ gccode +
"]";
276 " internal stops. Genetic code ["+ gccode +
"]";
278 returnerror_message;
289 stringerror_message;
291 boolgot_dash = transl_prot[0] ==
'-';
292 stringcodon_desc = got_dash ?
"illegal":
"ambiguous";
294 " internal stops (and "+ codon_desc +
" start codon). Genetic code ["+ gccode +
"]";
297 " internal stops. Genetic code ["+ gccode +
"]";
299 returnerror_message;
305 boolgot_dash = first_char ==
'-';
306 stringcodon_desc = got_dash ?
"Illegal":
"Ambiguous";
307 stringp_word = got_dash ?
"Probably":
"Possibly";
312 stringerror_message;
314 if(internal_stop_count > 0) {
315error_message = codon_desc +
" start codon (and "+
317 " internal stops). "+ p_word +
" wrong genetic code ["+
320error_message = codon_desc +
" start codon used. Wrong genetic code ["+
321gccode +
"] or protein should be partial";
323 returnerror_message;
349 "Unable to fetch CDS product '"+
label+
"'");
353 boolis_nt, is_ng, is_nw, is_nc;
363 "No protein Bioseq given");
366 boolunclassified_except =
false;
368unclassified_except =
true;
376 "Unparsed transl_except qual (but protein is okay). Skipped");
379 "Unparsed transl_except qual. Skipped");
399 "Unable to translate");
409 "Unnecessary alternative start codon exception");
438 "Suspicious CDS location - reading frame > 1 but not 5' partial");
448 "Suspicious CDS location - reading frame > 1 and not at consensus splice site");
453 "Missing stop codon");
457 "Got stop codon, but 3'end is labeled partial");
461 "Start of location should probably be partial");
466 " base(s) past stop codon");
473 "] is more than 120% of the ";
483 boolrna_editing =
false;
492 "] does not match ";
496 msg+=
"translation length ["+
500 msg+=
" (RNA editing present)";
506 boolmismatch_except =
false;
508mismatch_except =
true;
519+
") are not equal");
524 "End of location should probably be partial");
528 "This SeqFeat should not be partial");
533 "CDS has exception but passes translation test");
538 "CDS has unclassified exception but only difference is " 545 "CDS has unnecessary translated product replaced exception");
577 size_tnum_mismatches = mismatches.size();
579 if(num_mismatches > 10) {
586 ", residue in protein [";
587 msg+= mismatches.front().prot_res;
588 msg+=
"] != translation [";
589 msg+= mismatches.front().transl_res;
591 if(!nuclocstr.empty()) {
592 msg+=
" at "+ nuclocstr;
597 ", residue in protein [";
598 msg+= mismatches.back().prot_res;
599 msg+=
"] != translation [";
600 msg+= mismatches.back().transl_res;
602 if(!nuclocstr.empty()) {
603 msg+=
" at "+ nuclocstr;
612 msg+=
". Genetic code ["+ gccode +
"]";
616 for(
size_t i= 0;
i< mismatches.size(); ++
i) {
618 if(mismatches[
i].pos == 0 && mismatches[
i].transl_res ==
'-') {
623 if(mismatches[
i].prot_res ==
'X'&&
624(mismatches[
i].transl_res ==
'B'|| mismatches[
i].transl_res ==
'Z'|| mismatches[
i].transl_res ==
'J')) {
633 msg+= mismatches[
i].prot_res;
634 msg+=
"] != translation [";
635 msg+= mismatches[
i].transl_res;
637 if(!nuclocstr.empty()) {
638 msg+=
" at "+ nuclocstr;
649 for(
autoit = problems.begin(); it != problems.end(); it++) {
651 switch(it->problem) {
653 if(!has_exception) {
655 "transl_except qual out of frame.");
659 msg=
"Suspicious transl_except ";
661 msg+=
" at first codon of complete CDS";
665 msg=
"Unnecessary transl_except ";
667 msg+=
" at position ";
673 msg=
"Unexpected transl_except ";
676+
" just past end of protein";
694 constCSeq_loc& cbr_loc = cbr.
GetLoc();
696 if( ((comp !=
eContained) && (comp !=
eSame)) || cbr_loc.IsNull() || cbr_loc.IsEmpty()) {
698 "Code-break location not in coding region");
706 "Code-break: SeqLoc ["+ lbl +
"] out of range");
710 if(!p_loc || p_loc->
IsNull() || frame != 1) {
712 "Code-break location not in coding region - may be frame problem");
720 "Translation exception locations should not be partial");
724 string msg=
"Multiple code-breaks at same location ";
726 if( !
str.empty() ) {
752 "An ORF coding region should not have a product");
757 if(feat_is_pseudo) {
759 "A pseudo coding region should not have a product");
762 "A coding region overlapped by a pseudogene should not have a product");
765 "A pseudo coding region should not have a product");
782 "Protein product not packaged in nuc-prot set with nucleotide in small genome set");
785 "Protein product not packaged in nuc-prot set with nucleotide");
790 if( !pseudo && !conflict ) {
816 "Exception flag should be set in coding region");
820 "Use the proper genetic code, if available, " 821 "or set transl_excepts on specific codons");
824 "protein_id should not be a gbqual on a CDS feature");
827 "gene_synonym should not be a gbqual on a CDS feature");
830 "transcript_id should not be a gbqual on a CDS feature");
835 "conflicting codon_start values");
838 "codon_start value should be 1, 2, or 3");
956}
catch(
conststd::exception& ) {
976 "A coding region contains invalid genetic code ["+
NStr::IntToString(cdsgencode) +
"]");
985 if(biopgencode != cdsgencode
997 "Genetic code conflict between CDS (code "+
999 ") and BioSource.genome biological context ("+
1003 "Genetic code conflict between CDS (code "+
1005 ") and BioSource (code "+
1018 intnum_short_exons = 0;
1024 size_tprev_len = 16;
1025 size_tprev_start = 0;
1026 size_tprev_stop = 0;
1028 if(prev_len <= 15) {
1030 if(!message.empty()) {
1036prev_len = it.GetRange().GetLength();
1037prev_start = it.GetRange().GetFrom();
1038prev_stop = it.GetRange().GetTo();
1042 if(num_short_exons > 1) {
1044 "Coding region has multiple internal exons that are too short at positions "+ message);
1045}
else if(num_short_exons == 1) {
1047 "Internal coding region exon is too short at position "+ message);
1106 boolsupress =
false;
1117 "mRNA contains CDS but internal intron-exon boundaries " 1122 "mRNA overlaps or contains CDS but does not completely " 1123 "contain intervals");
1138 staticconstexpr
autoparent_types = ct::make_array<CSeqFeatData::ESubtype> (
1147feat_tree =
Ref(
newfeature::CFeatTree());
1149 for(
autoparent_type : parent_types) {
1150feat_tree->AddFeaturesFor(mappedFeat, parent_type);
1158 for(
autoparent_type : parent_types) {
1159 CMappedFeatparent = feat_tree->GetParent(fh, parent_type);
1228 if(!far_mrna_nps) {
1311 if(! cds_ref || ! pep_ref) {
1331 "Peptide under CDS matches small Gene");
1362 if(partial5 || partial3) {
1364 "CDS is partial but protein is complete");
1374 "CDS is 5' complete but protein is NH2 partial");
1383 "CDS is 3' partial but protein is NH2 partial");
1390 "CDS is 3' complete but protein is CO2 partial");
1399 "CDS is 5' partial but protein is CO2 partial");
1404 if(partial5 && partial3) {
1405}
else if(partial5) {
1412 "CDS is 5' partial but protein has neither end");
1413}
else if(partial3) {
1420 "CDS is 3' partial but protein has neither end");
1423 "CDS is complete but protein has neither end");
1446 "annotated by transcript or proteomic data",
1447 "artificial frameshift",
1448 "mismatches in translation",
1449 "rearrangement required for product",
1450 "reasons given in citation",
1451 "translated product replaced",
1452 "unclassified translation discrepancy" 1459 for(
auto&
str: sc_BypassCdsPartialCheck) {
1478 if(
last.GetRange().GetFrom() == 0) {
1507 if(
first.GetRange().GetFrom() == 0) {
1537 boolfound_match =
false;
1546 const CSeq_id& sid = loc_i.GetSeq_id();
1565 if(prod_nps == nuc_nps) {
1566found_match =
true;
1571 return!found_match;
1577 if(
abs((
int)this_start - (
int)last_stop) < 11) {
1578shortlist.push_back(
TShortIntron(last_stop, this_start));
1579}
else if(
abs((
int)this_stop - (
int)last_start) < 11) {
1580shortlist.push_back(
TShortIntron(last_start, this_stop));
1587vector<CCdregionValidator::TShortIntron> shortlist;
1589CSeq_loc_CI li(loc);
1591 TSeqPoslast_start = li.GetRange().GetFrom();
1592 TSeqPoslast_stop = li.GetRange().GetTo();
1594last_id->
Assign(li.GetSeq_id());
1598 TSeqPosthis_start = li.GetRange().GetFrom();
1599 TSeqPosthis_stop = li.GetRange().GetTo();
1600 if(
abs((
int)this_start - (
int)last_stop) < 11 ||
abs((
int)this_stop - (
int)last_start) < 11) {
1601 if(li.GetSeq_id().Equals(*last_id)) {
1608 for(
autoid_it : last_bsh.
GetId()) {
1609 if(id_it.GetSeqId()->Equals(li.GetSeq_id())) {
1617last_start = this_start;
1618last_stop = this_stop;
1619last_id->
Assign(li.GetSeq_id());
1642 if(shortlist.size() == 0) {
1648 if(nonsense_introns.size() > 0) {
1652 if(shortlist.size() == 1) {
1654}
else if(shortlist.size() == 2) {
1659 for(
size_t i= 0;
i< shortlist.size() - 2;
i++) {
1665 "Introns at positions "+ message +
" should be at least 10 nt long");
1689 "rearrangement required for product") !=
NPOS) {
1696 "Expected CDS product absent");
1712}
catch(
construntime_error& ) {
1721 if( transl_prot.empty() || prot_seq.empty() ||
NStr::Equal(transl_prot, prot_seq) ) {
1723 "Coding region conflict flag should not be set");
1726 "Coding region conflict flag is set");
1743 const CSeq_id* sid =
nullptr;
1749 "Unable to find product Bioseq from CDS feature");
1770 "Same product Bioseq from multiple CDS features");
1790 "Coding region and protein feature partials conflict");
1797 if(vec.
IsInGap(pos) || vec[pos] ==
'N') {
1811 boolcheck_gaps =
false;
1817 boolhas_abutting_gap =
false;
1825pos = is_minus_strand ? start + 1 : start - 1;
1832 if(!has_abutting_gap) {
1850pos = is_minus_strand ? stop - 1 : stop + 1;
1857 if(!has_abutting_gap) {
1884 if(contained_mrna.size() == 1) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
@ eErr_SEQ_FEAT_PartialProblemMismatch5Prime
@ eErr_SEQ_FEAT_CDSmRNAmismatch
@ eErr_SEQ_FEAT_CDShasTooManyXs
@ eErr_SEQ_FEAT_TranslExceptPhase
@ eErr_SEQ_FEAT_InvalidCodonStart
@ eErr_SEQ_FEAT_SuspiciousFrame
@ eErr_SEQ_FEAT_UnnecessaryTranslExcept
@ eErr_SEQ_FEAT_PartialsInconsistentCDSProtein
@ eErr_SEQ_FEAT_MissingCDSproduct
@ eErr_SEQ_FEAT_TranslExceptAndRnaEditing
@ eErr_SEQ_FEAT_UnnecessaryException
@ eErr_SEQ_FEAT_PartialProblemMismatch3Prime
@ eErr_SEQ_FEAT_InternalStop
@ eErr_SEQ_FEAT_MisMatchAA
@ eErr_SEQ_FEAT_WrongQualOnFeature
@ eErr_SEQ_FEAT_ProductFetchFailure
@ eErr_SEQ_FEAT_GenCodeMismatch
@ eErr_SEQ_FEAT_PseudoCdsHasProduct
@ eErr_SEQ_FEAT_ProductLength
@ eErr_SEQ_FEAT_CdTransFail
@ eErr_SEQ_FEAT_BadCDScomment
@ eErr_SEQ_FEAT_NoProtein
@ eErr_SEQ_FEAT_MultipleCDSproducts
@ eErr_SEQ_FEAT_EcNumberInCDSComment
@ eErr_SEQ_FEAT_ShortExon
@ eErr_SEQ_FEAT_StartCodon
@ eErr_SEQ_FEAT_OrfCdsHasProduct
@ eErr_SEQ_FEAT_ErroneousException
@ eErr_SEQ_FEAT_GeneOnNucPositionOfPeptide
@ eErr_SEQ_FEAT_DuplicateTranslExcept
@ eErr_SEQ_FEAT_PartialProblemHasStop
@ eErr_SEQ_FEAT_PseudoCdsViaGeneHasProduct
@ eErr_SEQ_FEAT_CodonQualifierUsed
@ eErr_SEQ_FEAT_ShortIntron
@ eErr_SEQ_FEAT_AltStartCodonException
@ eErr_SEQ_FEAT_WrongQualOnCDS
@ eErr_SEQ_FEAT_TranslExceptIsPartial
@ eErr_SEQ_FEAT_ConflictFlagSet
@ eErr_SEQ_FEAT_CDSproductPackagingProblem
@ eErr_SEQ_FEAT_BadConflictFlag
@ eErr_SEQ_FEAT_CDSmRNArange
@ eErr_SEQ_FEAT_TranslExcept
@ eErr_SEQ_FEAT_PartialProblem
@ eErr_SEQ_FEAT_TerminalXDiscrepancy
@ eErr_SEQ_FEAT_GenCodeInvalid
@ eErr_SEQ_FEAT_PseudoCDSmRNArange
@ eErr_SEQ_FEAT_MissingExceptionFlag
@ eErr_SEQ_FEAT_IntronIsStopCodon
const string s_PlastidTxt[20]
static void s_LocIdType(CBioseq_Handle bsh, bool &is_nt, bool &is_ng, bool &is_nw, bool &is_nc)
#define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var)
int GetGcodeForInternalStopErrors(const CCdregion &cdr)
string GetStartCodonErrorMessage(const CSeq_feat &feat, const char first_char, size_t internal_stop_count)
string GetInternalStopErrorMessage(const CSeq_feat &feat, size_t internal_stop_count, bool bad_start, char transl_start)
MAKE_CONST_SET(sc_BypassCdsPartialCheck, ct::tagStrNocase, { "RNA editing", "annotated by transcript or proteomic data", "artificial frameshift", "mismatches in translation", "rearrangement required for product", "reasons given in citation", "translated product replaced", "unclassified translation discrepancy" })
static bool IsGeneticCodeValid(int gcode)
int GetGcodeForName(const string &code_name)
static int s_GetStrictGenCode(const CBioSource &src)
CRef< CSeq_loc > GetLastCodonLoc(const CSeq_feat &cds, CScope &scope)
bool DoesCodingRegionHaveTerminalCodeBreak(const objects::CCdregion &cdr)
size_t GetProtLen() const
size_t GetProdTerminalX() const
vector< STranslExceptProblem > TTranslExceptProblems
size_t GetNumNonsenseIntrons() const
void CalculateTranslationProblems(const CSeq_feat &feat, CBioseq_Handle loc_handle, CBioseq_Handle prot_handle, bool ignore_exceptions, bool far_fetch_cds, bool standalone_annot, bool single_seq, bool is_gpipe, bool is_genomic, bool is_refseq, bool is_nt_or_ng_or_nw, bool is_nc, bool has_accession, CScope *scope)
@ eCDSTranslationProblem_NoStop
@ eCDSTranslationProblem_NoProtein
@ eCDSTranslationProblem_FrameNotConsensus
@ eCDSTranslationProblem_ShouldStartPartial
@ eCDSTranslationProblem_BadStart
@ eCDSTranslationProblem_FrameNotPartial
@ eCDSTranslationProblem_UnnecessaryException
@ eCDSTranslationProblem_UnableToFetch
@ eCDSTranslationProblem_ShouldNotBePartialButIs
@ eCDSTranslationProblem_ErroneousException
@ eCDSTranslationProblem_TooManyX
@ eCDSTranslationProblem_UnqualifiedException
@ eCDSTranslationProblem_StopPartial
@ eCDSTranslationProblem_ShouldBePartialButIsnt
static vector< CRef< CSeq_loc > > GetNonsenseIntrons(const CSeq_feat &feat, CScope &scope)
vector< STranslationMismatch > TTranslationMismatches
const TTranslationMismatches & GetTranslationMismatches() const
size_t GetTranslationProblemFlags() const
size_t GetInternalStopCodons() const
@ eTranslExceptUnnecessary
@ eTranslExceptSuspicious
@ eTranslExceptUnexpected
size_t GetTransLen() const
bool UnableToTranslate() const
int GetRaggedLength() const
char GetTranslStartCharacter() const
size_t GetTranslTerminalX() const
const TTranslExceptProblems & GetTranslExceptProblems() const
bool HasException() const
bool HasUnparsedTranslExcept() const
void x_ReportTranslationMismatches(const CCDSTranslationProblems::TTranslationMismatches &mismatches)
void x_ValidateParentPartialness()
void ReportShortIntrons()
bool x_CDS5primePartialTest() const
void x_ValidateSeqFeatLoc() override
void x_ValidateBadMRNAOverlap()
bool x_CDS3primePartialTest() const
static string x_FormatIntronInterval(const TShortIntron &interval)
void x_ValidateFarProducts()
static bool IsPlastid(int genome)
CConstRef< CSeq_feat > m_Gene
void x_ValidateGeneticCode()
bool x_IsProductMisplaced() const
static vector< TShortIntron > x_GetShortIntrons(const CSeq_loc &loc, CScope *scope)
pair< TSeqPos, TSeqPos > TShortIntron
void x_ValidateCodebreak()
void x_ValidateCDSPeptides()
CCdregionValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
bool x_BypassCDSPartialTest() const
void x_ValidateConflict()
void x_ReportTranslationProblems(const CCDSTranslationProblems &problems)
void x_ValidateProductPartials()
void x_ValidateCDSPartial()
void x_ValidateProductId()
bool x_CheckPosNOrGap(TSeqPos pos, const CSeqVector &vec)
string MapToNTCoords(TSeqPos pos)
static void x_AddToIntronList(vector< TShortIntron > &shortlist, TSeqPos last_start, TSeqPos last_stop, TSeqPos this_start, TSeqPos this_stop)
void x_ValidateFeatComment() override
void x_ValidateCommonProduct()
void x_ValidateExceptText(const string &text) override
bool x_ReportOrigProteinId() override
void x_ReportTranslExceptProblems(const CCDSTranslationProblems::TTranslExceptProblems &problems, bool has_exception)
@Gb_qual.hpp User-defined methods of the data storage class.
static const CTrans_table & GetTransTable(int id)
static const CGenetic_code_table & GetCodeTable(void)
CRef< feature::CFeatTree > GetFeatTreeFromCache(const CSeq_loc &loc, CScope &scope)
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
Exceptions for objmgr/util library.
@ eSubtype_transit_peptide_aa
@ eSubtype_sig_peptide_aa
@ eSubtype_mat_peptide_aa
namespace ncbi::objects::
void PostErr(EDiagSev sv, EErrType et, const string &msg)
CBioseq_Handle m_ProductBioseq
static bool s_IsPseudo(const CSeq_feat &feat)
CBioseq_Handle m_LocationBioseq
virtual void x_ValidateFeatComment()
void x_ValidateLocusTagGeneralMatch(CConstRef< CSeq_feat > gene)
void x_ReportPseudogeneConflict(CConstRef< CSeq_feat > gene)
static bool x_BioseqHasNmAccession(CBioseq_Handle bsh)
void ValidateSplice(bool gene_pseudo, bool check_all)
virtual void x_ValidateSeqFeatLoc()
virtual void x_ValidateExceptText(const string &text)
static bool s_BioseqHasRefSeqThatStartsWithPrefix(CBioseq_Handle bsh, string prefix)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
bool IsRemoteFetch() const
bool IsStandaloneAnnot() const
bool IsFarSequence(const CSeq_id &id)
bool IsHugeFileMode() const
bool x_IsFarFetchFailure(const CSeq_loc &loc)
bool RequireLocalProduct(const CSeq_id *sid) const
bool IsFarFetchCDSproducts() const
bool IgnoreExceptions() const
void SetFarFetchFailure()
CGeneCache & GetGeneCache()
bool IsSmallGenomeSet() const
const CSeq_entry & GetTSE() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void SetDiagFilter(EDiagFilter what, const char *filter_str)
Set diagnostic filter.
string GetDiagFilter(EDiagFilter what)
Get current diagnostic filter.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Critical
Critical error message.
@ eDiagFilter_All
for all non-FATAL
@ eDiagFilter_Post
for all non-TRACE, non-FATAL
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
EAccessionInfo
For IdentifyAccession (below)
CSeq_id::EAccessionInfo IdentifyAccession(void) const
CSeq_id::E_Choice Which(void) const
@ eAcc_refseq_wgs_intermed
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
ESeqLocCheck SeqLocCheck(const CSeq_loc &loc, CScope *scope)
Checks that a CSeq_loc is all on one strand on one CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CRef< CSeq_loc > ProductToSource(const CSeq_feat &feat, const CSeq_loc &prod_loc, TP2SFlags flags=0, CScope *scope=0)
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
CRef< CSeq_loc > SourceToProduct(const CSeq_feat &feat, const CSeq_loc &source_loc, TS2PFlags flags=0, CScope *scope=0, int *frame=0)
CConstRef< CSeq_feat > GetmRNAforCDS(const CSeq_feat &cds, CScope &scope)
GetmRNAforCDS A function to find a CSeq_feat representing the appropriate mRNA for a given CDS.
vector< TFeatScore > TFeatScores
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ fS2P_AllowTer
map the termination codon as a legal location
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
@ eProductToLocation
Map from the feature's product to location.
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
bool IsSetProduct(void) const
virtual const CSeq_loc & GetLocation(void) const
TInst_Length GetInst_Length(void) const
bool IsSetInst(void) const
CSeq_entry_Handle GetExactComplexityLevel(CBioseq_set::EClass cls) const
Return level with exact complexity, or empty handle if not found.
CSeqFeatData::ESubtype GetFeatSubtype(void) const
const CGene_ref * GetGeneXref(void) const
get gene (if present) from Seq-feat.xref list
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const TId & GetId(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
const TInst & GetInst(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
const CSeq_loc & GetProduct(void) const
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void SetCoding(TCoding coding)
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static const char label[]
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool CanGetGenome(void) const
Check if it is safe to call GetGenome method.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetPgcode(void) const
plastid genetic code Check if a value has been assigned to Pgcode data member.
TMgcode GetMgcode(void) const
Get the Mgcode member data.
TGcode GetGcode(void) const
Get the Gcode member data.
bool IsSetMgcode(void) const
mitochondrial genetic code Check if a value has been assigned to Mgcode data member.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetGcode(void) const
genetic code (see CdRegion) Check if a value has been assigned to Gcode data member.
TPgcode GetPgcode(void) const
Get the Pgcode member data.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
bool IsSetEc(void) const
E.C.
bool IsSetOrf(void) const
just an ORF ? Check if a value has been assigned to Orf data member.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
TOrf GetOrf(void) const
Get the Orf member data.
const TLoc & GetLoc(void) const
Get the Loc member data.
list< CRef< CGenetic_code > > Tdata
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsSetConflict(void) const
conflict Check if a value has been assigned to Conflict data member.
bool IsGene(void) const
Check if variant Gene is selected.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
bool CanGetData(void) const
Check if it is safe to call GetData method.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
const TCode & GetCode(void) const
Get the Code member data.
list< CRef< C_E > > Tdata
const TCdregion & GetCdregion(void) const
Get the variant data.
bool CanGetExcept_text(void) const
Check if it is safe to call GetExcept_text method.
const TProduct & GetProduct(void) const
Get the Product member data.
const Tdata & Get(void) const
Get the member data.
bool CanGetExcept(void) const
Check if it is safe to call GetExcept method.
const TComment & GetComment(void) const
Get the Comment member data.
bool CanGetOrf(void) const
Check if it is safe to call GetOrf method.
const TGene & GetGene(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TProt & GetProt(void) const
Get the variant data.
bool CanGetCode(void) const
Check if it is safe to call GetCode method.
TExcept GetExcept(void) const
Get the Except member data.
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
TConflict GetConflict(void) const
Get the Conflict member data.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ eFrame_not_set
not set, code uses one
void SetPoint(TPoint value)
Assign a value to Point data member.
void SetId(TId &value)
Assign a value to Id data member.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsOther(void) const
Check if variant Other is selected.
void SetStrand(TStrand value)
Assign a value to Strand data member.
bool IsGeneral(void) const
Check if variant General is selected.
const TOther & GetOther(void) const
Get the variant data.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
const TAccession & GetAccession(void) const
Get the Accession member data.
bool IsSeq(void) const
Check if variant Seq is selected.
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
TRepr GetRepr(void) const
Get the Repr member data.
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
const TSource & GetSource(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
bool CanGetCompleteness(void) const
Check if it is safe to call GetCompleteness method.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_delta
sequence made by changes (delta) to others
@ eCompleteness_has_left
5' or NH3 end present
@ eCompleteness_complete
complete biological entity
@ eCompleteness_has_right
3' or COOH end present
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
static void text(MDB_val *v)
std::false_type tagStrNocase
const struct ncbi::grid::netcache::search::fields::KEY key
#define FOR_EACH_CODEBREAK_ON_CDREGION(Itr, Var)
FOR_EACH_CODEBREAK_ON_CDREGION EDIT_EACH_CODEBREAK_ON_CDREGION.
#define FOR_EACH_GBQUAL_ON_FEATURE
#define NCBI_SEQID(Type)
@NAME Convenience macros for NCBI objects
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4