(
charch : src) {
85 unsigned charchu = ch;
86 if(chu > 31 && chu < 128) {
110 const string&
str= *it;
112 const char& ch = *c_it;
113 unsigned charchu = ch;
114 if(ch > 127 || (ch < 32 && ch !=
'\t'&& ch !=
'\r'&& ch !=
'\n')) {
125 switch( desc.
Which() ) {
128 "Modif descriptor is obsolete", *
m_Ctx, desc);
129CSeqdesc::TModif::const_iterator it2 = desc.
GetModif().begin();
130 while(it2 != desc.
GetModif().end()) {
140 "MolType descriptor is obsolete", *
m_Ctx, desc);
145 "Method descriptor is obsolete", *
m_Ctx, desc);
173 "Name descriptor needs text",
ctx, desc);
181 "OrgRef descriptor is obsolete", *
m_Ctx, desc);
194 "Region descriptor needs text",
ctx, desc);
234 const string& comment,
239 "Comment may refer to reference by serial number - " 240 "attach reference specific comments to the reference " 241 "REMARK instead.", *
m_Ctx, desc);
245 "Comment descriptor needs text", *
m_Ctx, desc);
247 if(
NStr::Find(comment,
"::") != string::npos) {
249 "Comment may be formatted to look like a structured comment.", *
m_Ctx, desc);
259 "Title descriptor needs text",
ctx, desc);
263 "Title descriptor has internal PMID",
ctx, desc);
267 charend = cpy.c_str()[cpy.length() - 1];
269 if(end ==
'.'&& cpy.length() > 4) {
270end = cpy.c_str()[cpy.length() - 2];
277 "Title descriptor ends in bad punctuation",
ctx, desc);
325 if(
NStr::Find(
msg,
"is not a valid value") != string::npos) {
327}
else if(
NStr::Find(
msg,
"field is out of order") != string::npos) {
331}
else if(
NStr::Find(
msg,
"is not a valid field name") != string::npos
332||
NStr::Find(
msg,
"field without label") != string::npos) {
353 if(errors.size() > 0) {
382 if(errors.size() > 0) {
398 "Evidence-For-Name-Assignment",
400 "Genome-Annotation-Data",
401 "Genome-Assembly-Data",
402 "GISAID_EpiFlu(TM)Data",
406 "International Barcode of Life (iBOL)Data",
422 "RefSeq-Attributes",
424 "SymbiotaSpecimenReference",
425 "Taxonomic-Update-Statistics",
430 returnsc_OfficialPrefixList.find(
val) != sc_OfficialPrefixList.end();
440 if((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
442(*it)->IsSetData() && (*it)->GetData().IsStr()) {
443 const string&
val= (*it)->GetData().GetStr();
460 if((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
462(*it)->IsSetData() && (*it)->GetData().IsStr()) {
463 const string&
val= (*it)->GetData().GetStr();
478 const boolreport =
false;
491 const string& prefix,
499report_prefix +
" is not a valid value for StructuredCommentPrefix", *
m_Ctx, desc);
508 const string& prefix,
518 stringsfx = report_sfx;
527 "StructuredCommentSuffix '"+ report_sfx +
"' does not match prefix", *
m_Ctx, desc);
545 auto& fields =
tmp.SetData();
565 "Structured Comment user object descriptor is empty", *
m_Ctx, desc);
576 "Structured Comment lacks prefix and/or suffix", *
m_Ctx, desc);
591 const boolisV2Prefix =
592(prefix ==
"HumanSTR"&& usr.
HasField(
"Bracketed record seq.",
""));
593 const stringqueryPrefix = isV2Prefix ?
"HumanSTRv2": prefix;
606 if(
autopSuffix = usr.
GetFieldRef(
"StructuredCommentSuffix"); pSuffix) {
618 "Structured Comment lacks prefix and/or suffix", *
m_Ctx, desc);
624 if(
NStr::Equal(prefix,
"Genome-Assembly-Data")) {
629 "Assembly Name should not start with 'NCBI' or 'GenBank' in structured comment", *
m_Ctx, desc);
638 "Genome Representation should not start with 'Partial' in structured comment", *
m_Ctx, desc);
646 "Structured Comment invalid; the field value and/or name are incorrect", *
m_Ctx, desc);
655 unsigned intskip = 4;
657 if(
str.length() < 5)
return true;
659 if(
str[0] !=
'S')
return true;
660 if(
str[1] !=
'A')
return true;
661 if(
str[2] !=
'M')
return true;
662 if(
str[3] !=
'E'&&
str[3] !=
'N'&&
str[3] !=
'D')
return true;
664 if(
str[3] ==
'E') {
671 for(
i= skip;
i<
str.length();
i++) {
673 if(!
isdigit(ch))
return true;
684 if(
str.length() < 9)
return true;
686 if(
str[0] !=
'S')
return true;
687 if(
str[1] !=
'R')
return true;
688 if(
str[2] !=
'S')
return true;
690 for(
i= 3;
i<
str.length();
i++) {
692 if(!
isdigit(ch))
return true;
703 if(
str.length() < 9)
return true;
706 if(ch !=
'S'&& ch !=
'D'&& ch !=
'E')
return true;
708 if(!
isupper(ch))
return true;
710 if(!
isupper(ch))
return true;
712 for(
i= 3;
i<
str.length();
i++) {
714 if(!
isdigit(ch))
return true;
726 if(
str.length() < 6)
return true;
728 if(
str[0] !=
'P')
return true;
729 if(
str[1] !=
'R')
return true;
730 if(
str[2] !=
'J')
return true;
731 if(
str[3] !=
'E'&&
str[3] !=
'N'&&
str[3] !=
'D')
return true;
732 if(
str[4] !=
'A'&&
str[4] !=
'B')
return true;
734 for(
i= 5;
i<
str.length();
i++) {
736 if(!
isdigit(ch))
return true;
744 "Trace Assembly Archive",
748 "Sequence Read Archive",
765 "DBLink user object descriptor is empty", *
m_Ctx, desc);
776 const auto& fdata = fld.
GetData();
777 if(fdata.IsStrs()) {
780 const string&
str= *st_itr;
784 "Bad BioSample format - "+
str, *
m_Ctx, desc);
787 "Old BioSample format - "+
str, *
m_Ctx, desc);
791}
else if(fdata.IsStr()) {
792 const string&
str= fdata.GetStr();
796 "Bad BioSample format - "+ fdata.GetStr(), *
m_Ctx, desc);
799 "Old BioSample format - "+ fdata.GetStr(), *
m_Ctx, desc);
808 const string&
str= *st_itr;
811 "Bad Sequence Read Archive format - "+
str, *
m_Ctx, desc);
819 const string&
str= *st_itr;
822 "Bad BioProject format - "+
str, *
m_Ctx, desc);
830 const string&
str= *st_itr;
833 "Trace Asssembly Archive accession "+
str+
" does not begin with TI prefix", *
m_Ctx, desc);
839 for(
auto&
str: s_legalDblinkNames) {
843 "Bad DBLink capitalization - "+ label_str, *
m_Ctx, desc);
859 "User object with no type", *
m_Ctx, desc);
865 "User object with no type", *
m_Ctx, desc);
871 "User object with no data", *
m_Ctx, desc);
875 boolhas_ref_track_status =
false;
877 if( (*field)->CanGetLabel() ) {
878 const CObject_id& obj_id = (*field)->GetLabel();
879 if( !obj_id.
IsStr() ) {
883has_ref_track_status =
true;
884 if((*field)->IsSetData() && (*field)->GetData().IsStr()) {
887 "RefGeneTracking object has illegal Status '" 888+ (*field)->GetData().GetStr() +
"'",
895 if( !has_ref_track_status ) {
897 "RefGeneTracking object needs to have Status set", *
m_Ctx, desc);
914 "Molinfo-biomol unknown used", *
m_Ctx, desc);
942p =
"other-genetic";
958 "Biomol \""+ p +
"\" is not appropriate for sequences that use the TSA technique.",
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eErr_SEQ_DESCR_DBLinkBadBioSample
@ eErr_SEQ_DESCR_DBLinkBadCapitalization
@ eErr_SEQ_DESCR_BadStrucCommInvalidSuffix
@ eErr_SEQ_DESCR_BadGenomeRepresentation
@ eErr_SEQ_DESCR_BadAssemblyName
@ eErr_SEQ_DESCR_StrucCommMissingUserObject
@ eErr_SEQ_DESCR_BadStrucCommInvalidFieldName
@ eErr_SEQ_DESCR_BadStrucCommInvalidFieldValue
@ eErr_SEQ_DESCR_SerialInComment
@ eErr_SEQ_DESCR_BadPunctuation
@ eErr_SEQ_DESCR_RefGeneTrackingIllegalStatus
@ eErr_SEQ_DESCR_TitleMissingText
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_DESCR_MoltypeUnknown
@ eErr_GENERIC_NonAsciiAsn
@ eErr_SEQ_DESCR_TitleHasPMID
@ eErr_SEQ_FEAT_RefSeqInText
@ eErr_SEQ_DESCR_BadStrucCommMultipleFields
@ eErr_SEQ_DESCR_StrucCommMissingPrefixOrSuffix
@ eErr_SEQ_DESCR_WrongBiomolForTSA
@ eErr_SEQ_DESCR_BadStrucCommMissingField
@ eErr_SEQ_DESCR_DBLinkBadFormat
@ eErr_SEQ_DESCR_UserObjectNoType
@ eErr_SEQ_DESCR_DBLinkMissingUserObject
@ eErr_SEQ_DESCR_BadStrucCommFieldOutOfOrder
@ eErr_SEQ_DESCR_BadStrucCommInvalidPrefix
@ eErr_SEQ_DESCR_DBLinkBadBioProject
@ eErr_SEQ_DESCR_DBLinkBadSRAaccession
@ eErr_SEQ_DESCR_MissingText
@ eErr_SEQ_DESCR_FakeStructuredComment
@ eErr_SEQ_DESCR_UserObjectNoData
@ eErr_SEQ_DESCR_RegionMissingText
@ eErr_SEQ_DESCR_RefGeneTrackingWithoutStatus
@ eErr_SEQ_DESCR_CommentMissingText
int Compare(const CObject_id &oid2) const
Template class for iteration on objects of class C (non-medifiable version)
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
bool IsRefGeneTracking() const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
bool IsStructuredComment() const
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
static bool IsWGSMaster(const CBioseq &seq, CScope &scope)
bool x_ValidateStructuredComment(const CUser_object &usr, const CSeqdesc &desc, bool report=true)
CConstRef< CSeq_entry > m_Ctx
void ValidateSeqDesc(const CSeqdesc &desc, const CSeq_entry &ctx)
Validate descriptors as stand alone objects (no context)
void ValidateTitle(const string &title, const CSeqdesc &desc, const CSeq_entry &ctx)
void x_ReportStructuredCommentErrors(const CSeqdesc &desc, const CComment_rule::TErrorList &errors)
void ValidateMolInfo(const CMolInfo &minfo, const CSeqdesc &desc)
~CValidError_desc() override
bool x_ValidateStructuredCommentPrefix(const string &prefix, const CSeqdesc &desc, bool report)
void ValidateUser(const CUser_object &usr, const CSeqdesc &desc)
bool ValidateStructuredCommentGeneric(const CUser_object &usr, const CSeqdesc &desc, bool report)
bool IsValidStructuredComment(const CSeqdesc &desc)
void ValidateComment(const string &comment, const CSeqdesc &desc)
bool ValidateStructuredCommentInternal(const CSeqdesc &desc, bool report=true)
bool ValidateDblink(const CUser_object &usr, const CSeqdesc &desc, bool report=true)
bool x_ValidateStructuredCommentUsingRule(const CComment_rule &rule, const CSeqdesc &desc, bool report)
bool x_ValidateStructuredCommentSuffix(const string &prefix, const CUser_field &suffix, const CSeqdesc &desc, bool report)
bool ValidateStructuredComment(const CUser_object &usr, const CSeqdesc &desc, const CComment_rule &rule, bool report)
bool IsSerialNumberInComment(const string &comment)
void PostBadDateError(EDiagSev sv, const string &msg, int flags, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
bool IsGenomeSubmission() const
void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void ValidatePubdesc(const CPubdesc &pub, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
static bool is_valid(const char *num, int type, CONV_RESULT *cr)
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char * str(char *buf, int n)
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error â guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
@ eNocase
Case insensitive compare.
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
bool CanGetType(void) const
Check if it is safe to call GetType method.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsStrs(void) const
Check if variant Strs is selected.
const TStrs & GetStrs(void) const
Get the variant data.
bool IsId(void) const
Check if variant Id is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
vector< CStringUTF8 > TStrs
vector< CRef< CUser_field > > TData
const TUser & GetUser(void) const
Get the variant data.
const TUpdate_date & GetUpdate_date(void) const
Get the variant data.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
const TPub & GetPub(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
TTech GetTech(void) const
Get the Tech member data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
const TModif & GetModif(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
const TCreate_date & GetCreate_date(void) const
Get the variant data.
const TComment & GetComment(void) const
Get the variant data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
const TName & GetName(void) const
Get the variant data.
const TRegion & GetRegion(void) const
Get the variant data.
bool IsUser(void) const
Check if variant User is selected.
@ eTech_tsa
transcriptome shotgun assembly
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_genomic_mRNA
reported a mix of genomic and cdna sequence
@ eBiomol_other_genetic
other genetic material
@ e_Embl
EMBL specific information.
@ e_Het
cofactor, etc associated but not bound
@ e_Org
if all from one organism
@ e_Num
a numbering system
@ e_User
user defined object
@ e_Update_date
date of last update
@ e_Pub
a reference to the publication
@ e_Pir
PIR specific info.
@ e_Genbank
GenBank specific info.
@ e_Prf
PRF specific information.
@ e_Mol_type
type of molecule
@ e_Sp
SWISSPROT specific info.
@ e_Dbxref
xref to other databases
@ e_Comment
a more extensive comment
@ e_Method
sequencing method
@ e_Region
overall region (globin locus)
@ e_Molinfo
info on the molecule and techniques
@ e_Maploc
map location of this sequence
@ e_Create_date
date entry first created/released
@ e_Title
a title for this sequence
@ e_Pdb
PDB specific information.
@ e_not_set
No variant selected.
@ e_Name
a name for this sequence
@ e_Source
source of materials, includes Org-ref
std::false_type tagStrNocase
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
#define FOR_EACH_CHAR_IN_STRING(Itr, Var)
FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static bool x_IsBadBioSampleFormat(const string &str)
static string s_AsciiString(const string &src)
EErrType s_GetErrTypeFromString(const string &msg)
static bool x_IsBadSRAFormat(const string &str)
static bool s_IsAllowedPrefix(const string &val)
MAKE_CONST_SET(sc_OfficialPrefixList, ct::tagStrNocase, { "Assembly-Data", "BWP:1.0", "EpifluData", "Evidence-Data", "Evidence-For-Name-Assignment", "FluData", "Genome-Annotation-Data", "Genome-Assembly-Data", "GISAID_EpiFlu(TM)Data", "HCVDataBaseData", "HIVDataBaseData", "HumanSTR", "International Barcode of Life (iBOL)Data", "MIENS-Data", "MIGS-Data", "MIGS:3.0-Data", "MIGS:4.0-Data", "MIMARKS:3.0-Data", "MIMARKS:4.0-Data", "MIMS-Data", "MIMS:3.0-Data", "MIMS:4.0-Data", "MIGS:5.0-Data", "MIMAG:5.0-Data", "MIMARKS:5.0-Data", "MIMS:5.0-Data", "MISAG:5.0-Data", "MIUVIG:5.0-Data", "RefSeq-Attributes", "SIVDataBaseData", "SymbiotaSpecimenReference", "Taxonomic-Update-Statistics", })
bool HasBadGenomeAssemblyName(const CUser_object &usr)
bool s_UserFieldCompare(const CRef< CUser_field > &f1, const CRef< CUser_field > &f2)
bool HasBadGenomeAssemblyPartial(const CUser_object &usr)
static EDiagSev s_ErrorLevelFromFieldRuleSev(CField_rule::TSeverity severity)
static bool x_IsBadBioProjectFormat(const string &str)
static bool x_IsNotAltBioSampleFormat(const string &str)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4