sequence;
61 CValidError_base(imp), m_AnnotValidator(imp), m_DescrValidator(imp), m_BioseqValidator(imp)
74 for(
const auto& desc : seq.
GetDescr().
Get()) {
89 boolacceptableDiff =
false;
92CUser_object::TData::const_iterator iter1 = aop1->
GetData().begin();
93CUser_object::TData::const_iterator iter2 = aop2->
GetData().begin();
94 for(; iter1 != aop1->
GetData().end() && iter2 != aop2->
GetData().end(); ++iter1, ++iter2) {
105 if(fld1 ==
"FeatureListType"&& fld2 ==
"FeatureListType") {
114 if(featlisttype1 ==
"Complete Genome"&& featlisttype2 ==
"Partial Genome") {
115acceptableDiff =
true;
118 if(featlisttype1 ==
"Partial Genome"&& featlisttype2 ==
"Complete Genome") {
119acceptableDiff =
true;
123}
else if(fld1 != fld2) {
129 returnacceptableDiff;
142 for(
const auto& se_list_it : seqset.
GetSeq_set()) {
148}
else if(se.
IsSeq()) {
157 for(; seqit; ++seqit) {
161}
else if(seqit->IsNa()) {
174[
this, &seqset]() {
175PostErr(eDiag_Warning, eErr_SEQ_PKG_BioseqSetClassNotSet,
176 "Bioseq_set class not set", seqset);
180 "Bioseq_set class not set", seqset);
211 "Set class should not be conset", seqset);
221 if(nuccnt == 0 && protcnt == 0) {
223 "No Bioseqs in this set", seqset);
230 boolnot_all_autodef =
false;
231 boolnot_same_autodef =
false;
232 boolhas_any_autodef =
false;
243 boolhas_autodef =
false;
249 const CBioseq& seq = sub->GetSeq();
254has_any_autodef =
true;
255 if(! first_autodef) {
256first_autodef = aduo;
257}
else if(! first_autodef->
Equals(*aduo)) {
258 if(! second_autodef) {
259second_autodef = aduo;
262not_same_autodef =
true;
264}
else if(! second_autodef->
Equals(*aduo)) {
265not_same_autodef =
true;
269has_autodef =
false;
274}
else if(se->IsSeq()) {
275 const CBioseq& seq = se->GetSeq();
280has_any_autodef =
true;
281 if(! first_autodef) {
282first_autodef = aduo;
283}
else if(! first_autodef->
Equals(*aduo)) {
284 if(! second_autodef) {
285second_autodef = aduo;
288not_same_autodef =
true;
290}
else if(! second_autodef->
Equals(*aduo)) {
291not_same_autodef =
true;
295has_autodef =
false;
300not_all_autodef =
true;
303 if(not_all_autodef && has_any_autodef) {
305 "Not all pop/phy/mut/eco set components have an autodef user object",
308 if(not_same_autodef && has_any_autodef) {
310 "Inconsistent autodef user objects in pop/phy/mut/eco set",
319 boolsuppressMissingSetTitle =
false;
321 if(has_any_autodef && (! not_all_autodef) && (! not_same_autodef)) {
322suppressMissingSetTitle =
true;
341 for(
const auto& annot_it : seqset.
GetAnnot()) {
349[
this, &seqset]() { x_ValidateSetDescriptors(seqset
); });
415 if(cds_seq == contig) {
434 "No nucleotides in nuc-prot set", seqset);
435}
else if(nuccnt > 1 && segcnt != 1) {
437 "Multiple unsegmented nucleotides in nuc-prot set", seqset);
441 "No proteins in nuc-prot set", seqset);
444 intprot_biosource = 0;
447 boolhas_Primary =
false;
448 boolhas_TPA =
false;
450sequence::CDeflineGenerator defline_generator;
453 for(
const auto& se_list_it : seqset.
GetSeq_set()) {
458 boolhasMetaGenomeSource =
false;
460 if(closest_biosource) {
461 const CBioSource& src = closest_biosource->GetSource();
464 const COrgMod& omd = *omd_itr;
466hasMetaGenomeSource =
true;
474 for(
const auto& it : seq.
GetDescr().
Get()) {
481 "DBLink user object should not be on a Bioseq", seq);
493 "Nucleotide bioseq should be product of mRNA " 494 "feature on contig, but is not",
498 for(
const auto& id_it : seq.
GetId()) {
500 if(
id.IsOther() &&
id.GetOther().IsSetAccession()) {
505}
else if(
id.IsGenbank() ||
id.IsEmbl() ||
id.IsDdbj()) {
507}
else if(
id.IsTpg() ||
id.IsTpe() ||
id.IsTpd()) {
512}
else if(seq.
IsAa()) {
516 "Protein bioseq should be product of CDS " 517 "feature on contig, but is not",
521 for(
const auto& id_it : seq.
GetId()) {
523 if(
id.IsGenbank() ||
id.IsEmbl() ||
id.IsDdbj()) {
525}
else if(
id.IsTpg() ||
id.IsTpe() ||
id.IsTpd()) {
532 for(
const auto& desc : seq.
GetDescr().
Get()) {
533 if(desc->IsSource()) {
536 if(desc->IsTitle()) {
537instantiated = desc->GetTitle();
544 stringgenerated = defline_generator.GenerateDefline(seq, *
m_Scope, sequence::CDeflineGenerator::fIgnoreExisting);
546generated = defline_generator.GenerateDefline(seq, *
m_Scope, sequence::CDeflineGenerator::fIgnoreExisting | sequence::CDeflineGenerator::fAllProteinNames);
548instantiated.erase(0, 11);
550instantiated.erase(0, 12);
552instantiated.erase(0, 21);
555generated.erase(0, 11);
557generated.erase(0, 12);
559generated.erase(0, 21);
561generated.erase(0, 21);
568 if(hasMetaGenomeSource &&
NStr::EqualNocase(
"MAG: "+ instantiated, generated)) {
570}
else if(hasMetaGenomeSource &&
NStr::EqualNocase(
"MAG "+ instantiated, generated)) {
574 "Instantiated protein title does not match automatically " 575 "generated title", seq);
591 const string& set_class = tv->
FindName(
set.GetClass(),
true);
594 "Nuc-prot Bioseq-set contains wrong Bioseq-set, " 595 "its class is \""+ set_class +
"\".",
set);
600 if(prot_biosource > 1) {
603 " proteins with a BioSource descriptor", seqset);
604}
else if(prot_biosource > 0) {
606 "Nuc-prot set has 1 protein with a BioSource descriptor", seqset);
609 boolhas_source =
false;
610 boolhas_title =
false;
611 boolhas_refgenetracking =
false;
613 for(
const auto& desc : seqset.
GetDescr().
Get()) {
615&& desc->GetSource().IsSetOrg()
616&& desc->GetSource().GetOrg().IsSetTaxname()
617&& !
NStr::IsBlank(desc->GetSource().GetOrg().GetTaxname())) {
619}
else if(desc->IsTitle()) {
621}
else if(desc->IsUser() && desc->GetUser().IsRefGeneTracking()) {
622has_refgenetracking =
true;
637 "Nuc-prot set does not contain expected BioSource descriptor", seqset);
643 "Nuc-prot set should not have title descriptor", seqset);
646 if(has_refgenetracking && (! is_nm)) {
648 "Nuc-prot set should not have RefGeneTracking user object", seqset);
651 if(has_Primary && has_TPA) {
653 "Nuc-prot set should not have a mixture of Primary and TPA accession types", seqset);
669 const CMolInfo* mol_info =
nullptr;
671 for(; miit; ++miit) {
677}
else if(mol_info->
GetBiomol() != miit->GetBiomol()) {
685 "Pop/phy/mut/eco set contains inconsistent moltype", seqset);
697 "No segmented Bioseq in segset", seqset);
704 for(
const auto& se_list_it : seqset.
GetSeq_set()) {
711mol = seq_inst.
GetMol();
715 "Segmented set contains mixture of nucleotides" 716 " and proteins", seqset);
720}
else if(se.
IsSet()) {
723 if(
set.IsSetClass() &&
727 const string& set_class_str =
731 "Segmented set contains wrong Bioseq-set, " 732 "its class is \""+ set_class_str +
"\".",
750 for(
const auto& se_list_it : seqset.
GetSeq_set()) {
755mol = seq_inst.
GetMol();
757seq_inst_mol = seq_inst.
GetMol();
761 "Parts set contains mixture of nucleotides " 762 "and proteins", seqset);
766}
else if(se.
IsSet()) {
770 const string& set_class_str =
774 "Parts set contains unwanted Bioseq-set, " 775 "its class is \""+ set_class_str +
"\".",
790 boolhas_title =
false;
793 for(
const auto& desc : seqset.
GetDescr().
Get()) {
794 if(desc->IsTitle()) {
799 "Only Pop/Phy/Mut/Eco sets should have titles",
803 "Only Pop/Phy/Mut/Eco sets should have titles", seqset);
813 if(! suppressMissingSetTitle) {
815 "Pop/Phy/Mut/Eco set does not have title", seqset);
833 "Pop/Phy/Mut/Eco set has no components", seqset);
834}
else if(seqset.
GetSeq_set().size() == 1) {
835 boolhas_alignment =
false;
837 while(annot_it && ! has_alignment) {
839has_alignment =
true;
843 if(! has_alignment) {
845 "Pop/Phy/Mut/Eco set has only one component and no alignments", seqset);
858 boolhas_title =
false;
860 for(
const auto& desc : seq.
GetDescr().
Get()) {
861 if(desc->IsTitle()) {
869 "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title", seq);
884class_name =
"Pop set";
887class_name =
"Mut set";
890class_name =
"Genbank set";
895class_name =
"Phy/eco/wgs set";
898class_name =
"GenProd set";
901class_name =
"Small genome set";
904class_name =
"Nuc-prot set";
912 for(
const auto& desc : seqset.
GetDescr().
Get()) {
913 if(desc->IsMolinfo()) {
915class_name +
" has MolInfo on set", seqset);
925 static const stringsp =
" sp. ";
929 "RefSeq record should not be a Pop-set", seqset);
933 stringfirst_taxname;
934 boolis_first =
true;
935 for(; seqit; ++seqit) {
947 if(
f&&
f->GetData().GetBiosrc().IsSetOrg() &&
f->GetData().GetBiosrc().GetOrg().IsSetTaxname()) {
948taxname =
f->GetData().GetBiosrc().GetOrg().GetTaxname();
953first_taxname = taxname;
967 size_t len= pos + sp.length();
975 size_tcomp_len =
min(taxname.length(), first_taxname.length());
981 "Population set contains inconsistent organism names.", seqset);
997 boolid_no_good =
false;
1004 "Seq-annot packaged directly on genomic product set",
1008CBioseq_set::TSeq_set::const_iterator se_list_it =
1011 if(! (**se_list_it).IsSeq()) {
1015 const CBioseq& seq = (*se_list_it)->GetSeq();
1027id_type =
id.Which();
1030}
catch(
conststd::exception&) {
1039 if(loc_label.empty()) {
1045 "Product of mRNA feature ("+ loc_label +
1046 ") not packaged in genomic product set",
1053 "Product of mRNA feature (?) not packaged in " 1054 "genomic product set",
1066 for(
const auto& se : seqset.
GetSeq_set()) {
1068 if(! se->GetSet().IsSetClass()
1074 "Nested sets within Pop/Phy/Mut/Eco/Wgs set",
1086 for(
const auto& it : seqset.
GetDescr().
Get()) {
1091 "DBLink user object should not be on this set",
@ eErr_SEQ_PKG_PartsSetMixedBioseqs
@ eErr_SEQ_DESCR_BioSourceOnProtein
@ eErr_SEQ_DESCR_InconsistentProteinTitle
@ eErr_SEQ_PKG_NucProtSetHasTitle
@ eErr_SEQ_DESCR_RefGeneTrackingOnNucProtSet
@ eErr_SEQ_PKG_SegSetProblem
@ eErr_SEQ_PKG_BioseqSetClassNotSet
@ eErr_SEQ_PKG_MissingSetTitle
@ eErr_SEQ_PKG_PartsSetHasSets
@ eErr_SEQ_PKG_ImproperlyNestedSets
@ eErr_SEQ_PKG_InconsistentMoltypeSet
@ eErr_SEQ_DESCR_DBLinkProblem
@ eErr_SEQ_PKG_InconsistentAutodef
@ eErr_SEQ_PKG_SegSetNotParts
@ eErr_SEQ_PKG_MisplacedMolInfo
@ eErr_SEQ_DESCR_DBLinkOnSet
@ eErr_SEQ_DESCR_TitleNotAppropriateForSet
@ eErr_SEQ_PKG_ConSetProblem
@ eErr_SEQ_PKG_ComponentMissingTitle
@ eErr_SEQ_DESCR_BioSourceMissing
@ eErr_SEQ_PKG_GenomicProductPackagingProblem
@ eErr_SEQ_PKG_SegSetMixedBioseqs
@ eErr_SEQ_INST_PrimaryAndThirdPartyMixture
@ eErr_SEQ_DESCR_InconsistentTaxNameSet
@ eErr_SEQ_PKG_SingleItemSet
@ eErr_SEQ_PKG_NucProtNotSegSet
@ eErr_SEQ_PKG_RefSeqPopSet
@ eErr_SEQ_PKG_NucProtProblem
@ eErr_SEQ_PKG_MissingAutodef
bool IsSetOrgMod(void) const
const COrgName & GetOrgname(void) const
CSeq_entry * GetParentEntry(void) const
bool NeedsDocsumTitle() const
CConstRef< CSeqdesc > GetClosestDescriptor(CSeqdesc::E_Choice choice, int *level=NULL) const
@OrgMod.hpp User-defined methods of the data storage class.
ESubtype GetSubtype(void) const
static bool IsNa(EMol mol)
Template class for iteration on objects of class C (non-medifiable version)
@ eObjectType_AutodefOptions
EObjectType GetObjectType() const
void ValidateSeqAnnot(const CSeq_annot_Handle &annot)
void ValidateSeqAnnotContext(const CSeq_annot &annot, const CBioseq &seq)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
void ValidateBioseq(const CBioseq &seq)
void ValidatePhyMutEcoWgsSet(const CBioseq_set &seqset)
void ValidatePartsSet(const CBioseq_set &seqset)
void ShouldHaveNoDblink(const CBioseq_set &seqset)
void ValidateSetElements(const CBioseq_set &seqset, bool suppressMissingSetTitle=false)
void CheckForInconsistentBiomols(const CBioseq_set &seqset)
CValidError_descr m_DescrValidator
CValidError_bioseq m_BioseqValidator
void ValidatePopSet(const CBioseq_set &seqset)
void ValidateGenProdSet(const CBioseq_set &seqset)
void x_ValidateSetDescriptors(const CBioseq_set &seqset, bool suppressMissingSetTitle=false)
~CValidError_bioseqset() override
void ValidateNucProtSet(const CBioseq_set &seqset, int nuccnt, int protcnt, int segcnt)
void SetShouldNotHaveMolInfo(const CBioseq_set &seqset)
void CheckForImproperlyNestedSets(const CBioseq_set &seqset)
void ValidateGenbankSet(const CBioseq_set &seqset)
bool IsMrnaProductInGPS(const CBioseq &seq)
void ValidateSetTitle(const CBioseq_set &seqset, bool suppressMissingSetTitle=false)
CValidError_bioseqset(CValidError_imp &imp)
bool IsCDSProductInGPS(const CBioseq &seq, const CBioseq_set &gps)
void ValidateBioseqSet(const CBioseq_set &seqset)
CValidError_annot m_AnnotValidator
void ValidateSegSet(const CBioseq_set &seqset, int segcnt)
void ValidateSeqDescr(const CSeq_descr &descr, const CSeq_entry &ctx)
bool IsHugeFileMode() const
SValidatorContext & SetContext()
bool IsIndexerVersion() const
bool IsHugeSet(const CBioseq_set &bioseqSet) const
CBioseq_Handle GetBioseqHandleFromLocation(CScope *scope, const CSeq_loc &loc, const CTSE_Handle &tse)
Include a standard set of the NCBI C++ Toolkit most basic headers.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Critical
Critical error message.
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
#define ENUM_METHOD_NAME(EnumName)
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CBioseq_set_Handle GetBioseq_setHandle(const CBioseq_set &seqset, EMissing action=eMissing_Default)
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const CSeqFeatData & GetData(void) const
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
bool IsSetProduct(void) const
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetByProduct(bool byProduct=true)
Set flag indicating if the features should be searched by their product rather than location.
const CSeq_loc & GetProduct(void) const
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static int strncasecmp(const char *s1, const char *s2, size_t n)
Case-insensitive comparison of two zero-terminated strings, narrowed to the specified number of chara...
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
@ eNocase
Case insensitive compare.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
const TOrg & GetOrg(void) const
Get the Org member data.
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
const TMod & GetMod(void) const
Get the Mod member data.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetSubname(void) const
Check if a value has been assigned to Subname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
@ eSubtype_metagenome_source
E_Choice
Choice variants.
const TOther & GetOther(void) const
Get the variant data.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_not_set
No variant selected.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
@ eClass_parts
parts for 2 or 3
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_conset
constructed sequence + parts
@ eClass_wgs_set
whole genome shotgun project
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_genbank
converted genbank
@ eClass_segset
segmented sequence + parts
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
const TUser & GetUser(void) const
Get the variant data.
const TInst & GetInst(void) const
Get the Inst member data.
const TSource & GetSource(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
const Tdata & Get(void) const
Get the member data.
TMol GetMol(void) const
Get the Mol member data.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
EMol
molecule class in living organism
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
bool IsUser(void) const
Check if variant User is selected.
@ eRepr_seg
segmented sequence
@ e_User
user defined object
@ e_Source
source of materials, includes Org-ref
@ eMol_not_set
> cdna = rna
once_flag ClassNotSetOnceFlag
once_flag DescriptorsOnceFlag
static bool x_AlmostEquals(CConstRef< CUser_object > aop1, CConstRef< CUser_object > aop2)
CConstRef< CUser_object > s_AutoDefUserObjectFromBioseq(const CBioseq &seq)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4