(recordType ==
"cds") {
84 stringcdsId = parentId;
117 const string& strRawInput )
127 if(! parent.empty() && parent ==
id) {
130 "ID and Parent have the same value \""+ parent +
"\"");
133 if(
m_strType==
"protein_coding_gene"||
144 if(
m_strType==
"pseudogenic_transcript") {
177 const string& strRawKey )
200 return "Derives_from";
210 return "Ontology_term";
219 const string& title,
269 for(
const auto& lineData: readerData) {
270 const auto& line = lineData.mData;
303annot.
SetData().SetAlign().push_back(pAlign);
323 if(!pRecord->AssignFromGff(line)) {
327 string id= pRecord->GetOneAttribute(
"ID");
328 stringparent = pRecord->GetOneAttribute(
"Parent");
330 if(! parent.empty()) {
332cerr <<
"Parent "<< parent <<
" not yet seen for ID "<<
id<< endl;
340 if(!
id.
empty()) {
358 if(pRecord->IsMultiParent()) {
363 "This GFF3 reader does not support multiparented features"));
380 const string& strLine)
391 if( !pRecord->AssignFromGff(strLine) ) {
396 if( !pRecord->GetAttribute(
"ID",
id) ) {
400 if(alignments.find(
id) == alignments.end()) {
409alignments[id].push_back(alignment);
434 if(recType ==
"exon"|| recType ==
"five_prime_utr"|| recType ==
"three_prime_utr") {
437 if(recType ==
"cds") {
440 if(recType ==
"gene") {
450 if(recType ==
"region") {
462 const string& mrnaId,
468 stringmessage =
"Bad data line: ";
469message += exon.
Type();
470message +=
" referring to non-existent parent feature.";
477 const CSeq_interval& containingInt = cit->second.GetObject();
481containedInt.
GetTo() > containingInt.
GetTo()) {
482 stringmessage =
"Bad data line: ";
483message += exon.
Type();
484message +=
" extends beyond parent feature.";
544 if(!parentId.empty()) {
558 if(!parentId.empty()) {
571 const string& parent,
580 const string&grandParentsStr = pParent->
GetNamedQual(
"Parent");
581list<string> grandParents;
582 NStr::Split(grandParentsStr,
",", grandParents, 0);
583 for(list<string>::const_iterator gpcit = grandParents.begin();
584gpcit != grandParents.end(); ++gpcit) {
595pGrandParentXref->SetId(*pGrandParentId);
596pFeature->
SetXref().push_back(pGrandParentXref);
602pGrandChildXref->SetId(*pGrandChildId);
603pGrandParent->
SetXref().push_back(pGrandChildXref);
610 const string& parent,
624pParentXref->SetId(*pParentId);
625pChild->
SetXref().push_back(pParentXref);
631pChildXref->SetId(*pChildId);
632pParent->
SetXref().push_back(pChildXref);
654 "Bad data line: Duplicate feature ID \""+
id+
"\".");
662underConstruction = it->second;
679 stringfeatType = record.
Type();
680 if(featType ==
"stop_codon_read_through"|| featType ==
"selenocysteine") {
686 "Bad data line: Unassigned code break.");
694 "Bad data line: Code break assigned to missing feature.");
707pCodeBreak->
SetAa().SetNcbieaa(
708(featType ==
"selenocysteine") ?
'U':
'X');
711list< CRef< CCode_break > >& codeBreaks = cdRegion.
SetCode_break();
712codeBreaks.push_back(pCodeBreak);
752list<string> parents;
754 for(list<string>::const_iterator cit = parents.begin();
755cit != parents.end();
761 "Bad data line: mRNA record with bad parent assignment.");
777 "Internal error: Unexpected location type.");
844annot.
SetData().SetFtable().push_back( pFeature ) ;
864 if(it->second == parentId) {
870 "Bad data line: CDS record with bad parent assignments.");
879 return(mCdsParents.find(
id) != mCdsParents.end());
904 static const char*
constignoredTypesAlways_[] = {
910STRINGARRAY::const_iterator cit = ignoredTypesAlways.find(ftype);
911 if(cit != ignoredTypesAlways.end()) {
919 static const char*
constspecialTypesGenbank_[] = {
921 "autocatalytically_spliced_intron",
923 "hammerhead_ribozyme",
937 "stop_codon_read_through",
944 static const char*
constignoredTypesGenbank_[] = {
945 "apicoplast_chromosome",
948 "chloroplast_chromosome",
949 "chromoplast_chromosome",
952 "cyanelle_chromosome",
955 "expressed_sequence_match",
957 "leucoplast_chromosome",
958 "macronuclear_chromosome",
961 "micronuclear_chromosome",
962 "mitochondrial_chromosome",
963 "nuclear_chromosome",
964 "nucleomorphic_chromosome",
966 "nucleotide_to_protein_match",
967 "partial_genomic_sequence_assembly",
971 "sequence_assembly",
973 "translated_nucleotide_match",
978cit = specialTypesGenbank.find(ftype);
979 if(cit != specialTypesGenbank.end()) {
983cit = ignoredTypesGenbank.find(ftype);
984 if(cit != ignoredTypesGenbank.end()) {
1002 const autoit = attrs.
find(
"ID");
1003 if(it != attrs.end()) {
1013 if(featData.
IsRna()) {
1016 const autosubtype = featData.
GetSubtype();
1038 for(
autoitLocation :
mpLocations->LocationMap()) {
1039 auto id= itLocation.first;
1047 const auto& locs = itLocation.second;
1060list<CGff3LocationRecord> exonLocs;
1061 for(
autorecord : locs) {
1062 if(record.mType ==
"exon") {
1063exonLocs.push_back(record);
1067 if(! exonLocs.empty()) {
1068 mpLocations->MergeLocation(pNewLoc, frame, exonLocs);
1074list<CGff3LocationRecord> geneLocs;
1075 for(
autorecord : locs) {
1076 if(record.mType ==
"gene") {
1077geneLocs.push_back(record);
1081 if(! geneLocs.empty()) {
1082 mpLocations->MergeLocation(pNewLoc, frame, geneLocs);
1089 mpLocations->MergeLocation(pNewLoc, frame, itLocation.second);
1092 auto& cdrData = pFeature->
SetData().SetCdregion();
1107vector<string> tokens;
1109 if(tokens.size() < 2) {
1113 "Bad sequence-region pragma - ignored.");
1116 if(tokens.size() >= 4) {
1120 catch(exception&) {
1124 "Bad sequence-region pragma - ignored.");
1128 mpLocations->SetSequenceSize(tokens[1], sequenceSize);
1130 mpLocations->SetSequenceSize(resolvedId, sequenceSize);
1143 const string& seqId)
const 1146 returnmpLocations->GetSequenceSize(seqId);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static bool IsAlignmentData(const CTempString &)
virtual bool x_CreateAlignment(const CGff2Record &gff, CRef< CSeq_align > &pAlign)
bool xFeatureSetQualifier(const CTempString &, const CTempString &, CRef< CSeq_feat >)
CRef< CAnnotdesc > m_CurrentBrowserInfo
void xPostProcessAnnot(CSeq_annot &) override
bool x_GetFeatureById(const CTempString &, CRef< CSeq_feat > &)
IdToFeatureMap m_MapIdToFeature
bool x_MergeAlignments(const list< CRef< CSeq_align >> &alignment_list, CRef< CSeq_align > &processed)
virtual bool xParseStructuredComment(const CTempString &)
virtual bool xIsIgnoredFeatureId(const CTempString &)
unsigned int mCurrentFeatureCount
bool IsInGenbankMode() const
bool GetAttribute(const string &, string &) const
virtual bool UpdateFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
virtual bool AssignFromGff(const string &)
virtual bool InitializeFeature(TReaderFlags, CRef< CSeq_feat >, SeqIdResolver=nullptr) const
const TAttributes & Attributes() const
static string xNormalizedAttributeKey(const CTempString &)
string GetOneAttribute(const string &)
string x_NormalizedAttributeKey(const string &)
bool AssignFromGff(const string &) override
static string xNextGenericId()
void xPostProcessAnnot(CSeq_annot &) override
bool xIsIgnoredFeatureType(const CTempString &) override
set< string > mIDsAlreadySeen
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
virtual bool xFindFeatureUnderConstruction(const CGff2Record &, CRef< CSeq_feat > &)
virtual bool xUpdateAnnotCds(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *) override
virtual bool xUpdateAnnotGeneric(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
virtual bool xUpdateAnnotRegion(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
map< string, string > mIdToSeqIdMap
void xProcessSequenceRegionPragma(const CTempString &pragma) override
CGff3ReadRecord * x_CreateRecord() override
virtual bool xUpdateAnnotExon(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
void xVerifyExonLocation(const string &, const CGff2Record &)
virtual bool xFeatureSetXrefParent(const string &, CRef< CSeq_feat >)
void xProcessAlignmentData(CSeq_annot &pAnnot)
TSeqPos SequenceSize() const
virtual bool xUpdateAnnotRna(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
bool xParseFeature(const CTempString &, CSeq_annot &, ILineErrorListener *) override
map< string, string > mCdsParentMap
virtual bool xInitializeFeature(const CGff2Record &, CRef< CSeq_feat >)
bool xReadInit() override
CGff3Reader(TReaderFlags uFlags, const string &name="", const string &title="", SeqIdResolver resolver=CReadUtil::AsSeqId, CReaderListener *=nullptr)
SAlignmentData mAlignmentData
shared_ptr< CGff3LocationMerger > mpLocations
set< string > mCdsParents
TSeqPos GetSequenceSize(const string &) const
virtual void xValidateAnnot(const CSeq_annot &) override
string xMakeRecordId(const CGff2Record &record)
map< string, CRef< CSeq_interval > > mMrnaLocs
bool xHasCdsChild(const string &id) const
static unsigned int msGenericIdCounter
void xVerifyCdsParents(const CGff2Record &)
virtual bool xParseAlignment(const string &strLine)
virtual bool xUpdateAnnotGene(const CGff2Record &, CRef< CSeq_feat >, CSeq_annot &, ILineErrorListener *)
virtual bool xFeatureSetXrefGrandParent(const string &, CRef< CSeq_feat >)
void xProcessData(const TReaderData &, CSeq_annot &) override
bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &) override
CRef< CSeq_loc > GetSeqLoc(TReaderFlags, SeqIdResolver seqidresolve=nullptr) const
const string & Type() const
ENa_strand Strand() const
const string & Id() const
virtual void SetType(const string &recType)
const string & NormalizedType() const
static CObjReaderLineException * Create(EDiagSev eSeverity, unsigned int uLine, const std::string &strMessage, EProblem eProblem=eProblem_GeneralParsingError, const std::string &strSeqId=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), CObjReaderLineException::EErrCode eErrCode=eFormat, const TVecOfLines &vecOfOtherLines=TVecOfLines())
Please use this instead of the constructor because the ctor is protected.
Common file reader utility functions.
virtual bool xParseBrowserLine(const CTempString &, CSeq_annot &)
SeqIdResolver mSeqIdResolve
unsigned int m_uLineNumber
void ProcessError(CObjReaderLineException &, ILineErrorListener *)
vector< TReaderLine > TReaderData
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
ESubtype GetSubtype(void) const
void SetNameDesc(const string &name)
void SetTitleDesc(const string &title)
namespace ncbi::objects::
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
static bool SoTypeToFeature(std::string_view, CSeq_feat &, bool=false)
static string ResolveSoAlias(std::string_view)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Abstract base class for lightweight line-by-line reading.
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
Include a standard set of the NCBI C++ Toolkit most basic headers.
static void fatal(const char *msg,...)
static bool s_TreatAsRna(const CSeqFeatData &featData)
unsigned int TSeqPos
Type for sequence locations and lengths.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error â guarantees exit(or abort)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
void SetAa(TAa &value)
Assign a value to Aa data member.
TXref & SetXref(void)
Assign a value to Xref data member.
const TKey & GetKey(void) const
Get the Key member data.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsImp(void) const
Check if variant Imp is selected.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
const TId & GetId(void) const
Get the Id member data.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
TCode_break & SetCode_break(void)
Assign a value to Code_break data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
bool IsRna(void) const
Check if variant Rna is selected.
const TImp & GetImp(void) const
Get the variant data.
@ eFrame_not_set
not set, code uses one
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
E_Choice Which(void) const
Which variant is currently selected.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
void SetStrand(TStrand value)
Assign a value to Strand data member.
E_Choice
Choice variants.
void SetData(TData &value)
Assign a value to Data data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
@ e_not_set
No variant selected.
Lightweight interface for getting lines of data with minimal memory copying.
constexpr bool empty(list< Ts... >) noexcept
static SLJIT_INLINE sljit_ins lr(sljit_gpr dst, sljit_gpr src)
#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)
MAP_ID_TO_ALIGN mAlignments
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4