& molInfo = pSeqdesc->GetMolinfo();
62 return(molInfo.IsSetTech() &&
72 for(
autopId :
info.m_ids) {
92 for(
autopId : ids) {
93 if(pId && pId->IsOther() && pId->GetOther().IsSetAccession()) {
103 for(
autopId :
info.m_ids) {
104 if(pId && pId->IsOther() && pId->GetOther().IsSetAccession()) {
120 for(
autopId :
info.m_ids) {
132 const auto& molInfo = pSeqdesc->
GetMolinfo();
158 for(
autopId :
info.m_ids) {
198list<CRef<CSeq_id>> tmpIds;
199 for(
autopId : ids) {
201pTmpId->Assign(*pId);
202tmpIds.push_back(pTmpId);
216 const string& message,
235m_Options(options) {}
253 const auto& biosets =
m_Reader.GetBiosets();
254 if(biosets.size() < 2) {
258 autoit =
next(biosets.begin());
263 const boolsuppressContext =
false;
271 return(
m_Reader.FindBioseq(pId) !=
nullptr);
278 for(
auto val: collidingNumbers) {
286 if(! (
m_Reader.GetSubmitBlock())) {
297 if(! (
m_Reader.GetSubmitBlock())) {
312 "No source information included on this record.",
322 "Pop/phy/mut/eco set contains inconsistent moltype",
348 if(
autopSetClass =
m_Reader.GetTopLevelClass();
383 "Skipping validation of remaining /inference qualifiers",
391 if(
m_Reader.GetBiosets().size() < 2) {
400 context.PreprocessHugeFile =
true;
418 if(
m_Reader.IsNotJustLocalOrGeneral()) {
419 context.NotJustLocalOrGeneral =
true;
437 for(
autopPub : pub.
GetPub().
Get()) {
440}
else if(pPub->IsGen()) {
441 const auto&
gen= pPub->GetGen();
442 if(
gen.IsSetSerial_number()) {
467 type.GetTypeInfo()->DefaultReadData(
in, pMolInfo);
472 auto* pObject =
object.GetObjectPtr();
473 object.GetTypeInfo()->DefaultReadData(
in, pObject);
481 type.GetTypeInfo()->DefaultReadData(
in, pPubdesc);
486 auto* pObject =
object.GetObjectPtr();
487 object.GetTypeInfo()->DefaultReadData(
in, pObject);
495 type.GetTypeInfo()->DefaultSkipData(
in);
499 object.GetTypeInfo()->DefaultReadData(
in,
object.GetObjectPtr());
506 auto* pObject =
object.GetObjectPtr();
507 object.GetTypeInfo()->DefaultReadData(
in, pObject);
509 if(pUser_object->IsSetType() && pUser_object->GetType().IsStr() &&
NStr::EqualNocase(pUser_object->GetType().GetStr(),
"TpaAssembly")) {
522 type.GetTypeInfo()->DefaultReadData(
in, pSeqhist);
523 if(pSeqhist->IsSetAssembly() && ! pSeqhist->GetAssembly().empty()) {
536 type.GetTypeInfo()->DefaultSkipData(
in);
544 autogbqual_qual_mi = gbqual_info.FindMember(
"qual");
549 type.GetTypeInfo()->DefaultReadData(
in, &
str);
550 if(
str==
"inference") {
558 const string& hugeSetId,
594 if(biosets.size() < 2 || bioseqs.size() < 1) {
598 if(
autoit =
next(biosets.begin());
603 const auto& firstBioseq = bioseqs.begin();
604 const auto& parentIt = firstBioseq->m_parent_set;
610 for(
autoit = firstBioseq; it != bioseqs.end(); ++it) {
611 if(
s_IsNa(it->m_mol) && it->m_parent_set == parentIt) {
620 const string& hugeSetId,
624 for(
autopErrorItem : pErrors->GetErrs()) {
626pPrunedErrors->AddValidErrItem(pErrorItem);
629pErrors = pPrunedErrors;
634 const string& hugeSetId,
639[globalInfo, hugeSetId](
CRef<CValidErrItem>pItem) { return pItem.Empty() || s_DropErrorItem(globalInfo, hugeSetId, *pItem); });
641errors.erase(it, errors.end());
User-defined methods of the data storage class.
@ eErr_SEQ_DESCR_InconsistentBioSources_ConLocation
@ eErr_GENERIC_MissingPubRequirement
@ eErr_SEQ_DESCR_NoOrgFound
@ eErr_SEQ_DESCR_TransgenicProblem
@ eErr_SEQ_PKG_InconsistentMoltypeSet
@ eErr_SEQ_DESCR_NoPubFound
@ eErr_SEQ_INST_MitoMetazoanTooLong
@ eErr_SEQ_DESCR_NoSourceDescriptor
@ eErr_GENERIC_CollidingSerialNumbers
@ eErr_SEQ_FEAT_OnlyGeneXrefs
@ eErr_SEQ_FEAT_TooManyInferenceAccessions
@ eErr_SEQ_INST_TpaAssemblyProblem
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
CConstRef< CSeqdesc > GetClosestDescriptor(const TBioseqInfo &info, CSeqdesc::E_Choice choice) const
auto & GetBiosets() const
static bool IsHugeSet(CBioseq_set::TClass setClass)
auto & GetBioseqs() const
const CBioseq_set::TClass * GetTopLevelClass() const
void ReportGlobalErrors(const TGlobalInfo &globalInfo, IValidError &errors) const
void x_ReportConflictingBiomols(IValidError &errors) const
void x_ReportMissingCitSubs(bool hasRefSeqAccession, IValidError &errors) const
unique_ptr< string > m_pIdString
void UpdateValidatorContext(TGlobalInfo &globalInfo, SValidatorContext &context) const
string x_GetIdString() const
void x_PostMsg(EDiagSev severity, EErrType errorType, const string &message, IValidError &errors) const
edit::CHugeAsnReader TReader
string x_GetHugeSetLabel() const
CHugeFileValidator(const TReader &reader, TOptions options)
void x_ReportMissingBioSources(IValidError &errors) const
void x_ReportCollidingSerialNumbers(const set< int > &collidingNumbers, IValidError &errors) const
void x_ReportMissingPubs(IValidError &errors) const
static void RegisterReaderHooks(CObjectIStream &objStream, SGlobalInfo &m_GlobalInfo)
void ReportPostErrors(const SValidatorContext &context, IValidError &errors) const
bool IsInBlob(const CSeq_id &id) const
@Pubdesc.hpp User-defined methods of the data storage class.
static string GetBioseqSetLabel(const CBioseq_set &st, CRef< CScope > scope, bool suppress_context)
static bool IsTSAAccession(const CSeq_id &id)
static bool IsWGSAccession(const CSeq_id &id)
@ eVal_refseq_conventions
virtual void AddValidErrItem(EDiagSev sev, unsigned int ec, const string &msg, const string &desc, const CSerialObject &obj, const string &acc, const int ver, const string &location=kEmptyStr, const int seq_offset=0)=0
iterator_bool insert(const value_type &val)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
static const TObjectType * SafeCast(TTypeInfo type)
@ eFasta
Tagged ID in NCBI's traditional FASTA style.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static const char label[]
bool IsSet(void) const
Check if a value has been assigned to data member.
const Tdata & Get(void) const
Get the member data.
@ e_Gi
GenInfo Integrated Database.
@ eClass_wgs_set
whole genome shotgun project
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
EMol
molecule class in living organism
bool IsSetPub(void) const
the citation(s) Check if a value has been assigned to Pub data member.
const TPub & GetPub(void) const
Get the Pub member data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_virtual
no seq data
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_wgs
whole genome shotgun sequencing
@ e_Molinfo
info on the molecule and techniques
@ eMol_na
just a nucleic acid
const TMsg & GetMsg(void) const
Get the Msg member data.
TErrIndex GetErrIndex(void) const
Get the ErrIndex member data.
const TAccession & GetAccession(void) const
Get the Accession member data.
static bool s_IsWGS(const TBioseqInfo &info, const CHugeAsnReader &reader)
static bool s_x_ReportMissingCitSub(const TBioseqInfo &info, const CHugeAsnReader &reader, bool IsRefSeq)
static bool s_IsGpipe(const TBioseqInfo &info)
bool g_IsCuratedRefSeq(const TBioseqInfo &info)
static bool s_IsMaster(const TBioseqInfo &info)
static string s_GetBioseqAcc(const CSeq_id &id, int *version)
static bool s_IsNa(CSeq_inst::EMol mol)
static bool s_CuratedRefSeq(const string &accession)
static bool s_IsTSAContig(const TBioseqInfo &info, const CHugeAsnReader &reader)
static bool s_IsNoncuratedRefSeq(const list< CConstRef< CSeq_id >> &ids)
static bool s_x_ReportMissingPubs(const TBioseqInfo &info, const CHugeAsnReader &reader)
static void s_UpdateGlobalInfo(const CPubdesc &pub, CHugeFileValidator::TGlobalInfo &globalInfo)
static bool s_DropErrorItem(const CHugeFileValidator::TGlobalInfo &globalInfo, const string &hugeSetId, const CValidErrItem &item)
static string s_GetIdString(const list< CConstRef< CSeq_id >> &ids, int *version)
static bool s_IsWGSMaster(const TBioseqInfo &info, const CHugeAsnReader &reader)
string g_GetHugeSetIdString(const CHugeAsnReader &reader)
void g_PostprocessErrors(const CHugeFileValidator::TGlobalInfo &globalInfo, const string &hugeSetId, CRef< CValidError > &pErrors)
const string version
version string
std::istream & in(std::istream &in_, double &x_)
void SetLocalSkipHook(const CObjectTypeInfo &obj_type_info, CObjectIStream &istr, _Func _func)
void SetLocalReadHook(const CObjectTypeInfo &obj_type_info, CObjectIStream &ostr, _Func _func)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
set< int > pubSerialNumbers
set< CMolInfo::TBiomol > biomols
int CumulativeInferenceCount
set< int > conflictingSerialNumbers
const int InferenceAccessionCutoff
static CS_CONTEXT * context
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4