& strGtfType,
73 const string& strRawAttributes )
79 for(
size_tu=0; u <
attributes.size(); ++u ) {
84 if(strGtfType ==
"gene") {
89 if(strGtfType ==
"transcript") {
104 if(
key.empty() &&
value.empty() ) {
123 const string& strAnnotName,
124 const string& strAnnotTitle,
128 CGff2Reader( uFlags, strAnnotName, strAnnotTitle, resolver, pRL)
167 for(
const auto& lineData: readerData) {
187 return(recType ==
"exon"|| recType ==
"5utr"|| recType ==
"3utr");
192 return(recType ==
"cds"|| recType ==
"start_codon"|| recType ==
"stop_codon");
218 if(recType ==
"gene") {
220}
else if(recType ==
"mrna"|| recType ==
"transcript") {
230 const auto& xAttributes = x.
Get();
231 const auto& yAttributes = y.
Get();
233 autoxit = xAttributes.
begin();
234 autoyit = yAttributes.begin();
235 while(xit != xAttributes.end() && yit != yAttributes.end()) {
236 if(xit->first < yit->first) {
238}
else if(yit->first < xit->first) {
243 if(xVals.
empty()) {
244 result.AddValue(xit->first,
"");
249set_intersection(begin(xVals), end(xVals),
250begin(yVals), end(yVals),
251inserter(commonVals, commonVals.
begin()));
252 if(!commonVals.
empty()) {
253 for(
const auto&
val: commonVals) {
274 autofeatId =
mpLocations->GetFeatureIdFor(gff,
"cds");
285 return(line.
empty() || line[0] ==
'#');
292 const string& qualName,
298 if(!values.
empty()) {
306 const string& parentType,
310 if(parentType ==
"gene") {
330 auto& quals = feature.
SetQual();
331 for(
autoit = quals.begin(); it != quals.end();
) {
332 const string& qualKey = (*it)->GetQual();
335qualKey ==
"locus_tag"||
336qualKey ==
"old_locus_tag"||
337qualKey ==
"product"||
338qualKey ==
"protein_id") {
343 const string& qualVal = (*it)->GetVal();
346 if(prevAttributes.
HasValue(qualKey, qualVal) && (!
attributes.HasValue(qualKey, qualVal))) {
348it = quals.erase(it);
357 const string& recType,
359 const string& parentType,
363 if(parentType ==
"gene") {
383 const string& parentType,
393 autoparentFeatId =
mpLocations->GetFeatureIdFor(record, parentType);
398 mpLocations->AddRecordForId(parentFeatId, record);
403 mpLocations->AddRecordForId(parentFeatId, record);
414 auto& childTypeToAttribs = parentIt->second;
415 if(
autochildIt = childTypeToAttribs.find(recType);
416childIt != childTypeToAttribs.end()) {
418 auto& childAttributes = childIt->second;
422childAttributes = accumulatedAttributes;
425childTypeToAttribs.emplace(recType, attribs);
443 const string& prefix,
447 static intseqNum(1);
449 stringstrFeatureId = prefix;
450 if(strFeatureId.empty()) {
451strFeatureId =
"id";
453strFeatureId +=
"_";
455feature.
SetId().SetLocal().SetStr(strFeatureId);
464 autofeatId =
mpLocations->GetFeatureIdFor(record,
"gene");
480(record.
Type() ==
"gene") ?
495 "locus_tag",
"transcript_id",
"gene" 539 if(ignoredAttrs.contains(name)) {
566 autofeatId =
mpLocations->GetFeatureIdFor(gff,
"cds");
579 if(!transcriptId.empty()) {
580 if(
autogeneId = gff.
GeneKey(); !geneId.empty()) {
582 if(it->second != geneId) {
583 string msg=
"Gene id '"+ geneId +
"' for transcript '"+ transcriptId +
584 "' conflicts with previously-assigned '"+ it->second +
"'";
602 if(rec_type ==
"mrna"||
s_IsCDSType(rec_type)) {
611 if(
autobiotype = attrs.
ValueOf(
"transcript_biotype");
616 returnattrs.
ValueOf(
"transcript_type");
626 autofeatId =
mpLocations->GetFeatureIdFor(record,
"transcript");
638pFeature->SetData().SetRna().SetType(rna_type);
644pFeature->SetData().SetRna().SetExt().SetName(product);
651 if(rec_type ==
"cds") {
666 const string& featId)
673 returnfeatIt->second;
685 stringgeneSynonym =
attributes.ValueOf(
"gene_synonym");
686 if(!geneSynonym.empty()) {
687gene.
SetSyn().push_back(geneSynonym);
689 stringlocusTag =
attributes.ValueOf(
"locus_tag");
690 if(!locusTag.empty()) {
695 if(!locus.empty()) {
710 stringproteinId =
attributes.ValueOf(
"protein_id");
711 if(!proteinId.empty()) {
717 stringribosomalSlippage =
attributes.ValueOf(
"ribosomal_slippage");
718 if(!ribosomalSlippage.empty()) {
722 stringtransTable =
attributes.ValueOf(
"transl_table");
723 if(!transTable.empty()) {
726cdr.
SetCode().Set().push_back(pGc);
753 for(
auto value: values) {
754vector< string > tags;
756 for(
autoit = tags.begin(); it != tags.end(); ++it ) {
784 for(
const auto& pQual : feature.
GetQual()) {
785 if(pQual->GetQual() ==
key) {
786existingVals.
insert(pQual->GetVal());
790 for(
auto value: values) {
791 if(existingVals.
find(
value) == existingVals.
end()) {
815 for(
autoitLocation:
mpLocations->LocationMap()) {
816 auto id= itLocation.first;
824featSubType, itLocation.second);
829 for(
autoitLocation:
mpLocations->LocationMap()) {
830 auto id= itLocation.first;
844 autoparentRnaFeatId =
string(
"transcript:") + pFeature->
GetNamedQual(
"gene_id") +
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void xSetXrefFromTo(CSeq_feat &, CSeq_feat &)
virtual bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &)
void xPostProcessAnnot(CSeq_annot &) override
bool x_GetFeatureById(const CTempString &, CRef< CSeq_feat > &)
IdToFeatureMap m_MapIdToFeature
virtual bool xParseFeature(const CTempString &, CSeq_annot &, ILineErrorListener *)
static CRef< CDbtag > x_ParseDbtag(const CTempString &)
virtual bool xParseStructuredComment(const CTempString &)
unsigned int mCurrentFeatureCount
static string xNormalizedAttributeValue(const CTempString &)
bool xSplitGffAttributes(const string &, vector< string > &) const
static string xNormalizedAttributeKey(const CTempString &)
const string & Type() const
const string & NormalizedType() const
void AddValue(const string &key, const string &value)
string ValueOf(const string &key) const
void GetValues(const string &key, MultiValue &values) const
const MultiAttributes & Get() const
bool HasValue(const string &key, const string &value="") const
string TranscriptId() const
CGtfAttributes mAttributes
bool xAssignAttributesFromGff(const string &, const string &)
const CGtfAttributes & GtfAttributes() const
void xFeatureAddQualifiers(const string &key, const CGtfAttributes::MultiValue &, CSeq_feat &)
void xFeatureSetQualifiersRna(const CGtfAttributes &attribs, CSeq_feat &)
void xCheckForGeneIdConflict(const CGtfReadRecord &record)
void xCreateParent(const CGtfReadRecord &record, const string &parentType, CSeq_annot &annot)
void xAssignFeatureId(const string &stub, CSeq_feat &feat)
bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *=nullptr) override
unique_ptr< CGtfLocationMerger > mpLocations
void xCreateGene(const CGtfReadRecord &, CSeq_annot &)
void xFeatureSetQualifiersGene(const CGtfAttributes &attribs, CSeq_feat &)
void xFeatureSetDataCds(const CGtfReadRecord &, CSeq_feat &)
void xPostProcessAnnot(CSeq_annot &) override
void xCreateRna(const CGtfReadRecord &record, CSeq_annot &annot)
void xAddQualsToParent(const string &recType, const CGtfAttributes &attribs, const string &parentType, CSeq_feat &parent)
void xAddQualToFeat(const CGtfAttributes &attribs, const string &qualName, CSeq_feat &feat)
CRef< CSeq_feat > xFindFeatById(const string &)
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
void xUpdateAnnotCds(const CGtfReadRecord &, CSeq_annot &)
CGtfReader(unsigned int=0, const string &="", const string &="", SeqIdResolver=CReadUtil::AsSeqId, CReaderListener *=nullptr)
void xFeatureSetDataGene(const CGtfReadRecord &, CSeq_feat &)
void xUpdateGeneAndMrna(const CGtfReadRecord &, CSeq_annot &)
bool xIsCommentLine(const CTempString &line) override
void xUpdateAnnotParent(const CGtfReadRecord &record, const string &parentType, CSeq_annot &annot)
map< string, string > m_TranscriptToGeneMap
TParentChildQualMap m_ParentChildQualMap
void xCreateCds(const CGtfReadRecord &, CSeq_annot &)
void xFeatureSetQualifiersCds(const CGtfAttributes &attribs, CSeq_feat &)
bool xProcessQualifierSpecialCase(const string &, const CGtfAttributes::MultiValue &, CSeq_feat &)
void xSetAncestorXrefs(CSeq_feat &, CSeq_feat &) override
void xFeatureSetQualifiers(const CGtfAttributes &attribs, const set< string > &ignoredAttrs, CSeq_feat &)
void xProcessData(const TReaderData &, CSeq_annot &) override
Common file reader utility functions.
virtual bool xParseBrowserLine(const CTempString &, CSeq_annot &)
unique_ptr< CReaderMessageHandler > m_pMessageHandler
SeqIdResolver mSeqIdResolve
unsigned int m_uLineNumber
vector< TReaderLine > TReaderData
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
virtual bool xIsTrackTerminator(const CTempString &)
ESubtype GetSubtype(void) const
namespace ncbi::objects::
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
static bool SoTypeToFeature(std::string_view, CSeq_feat &, bool=false)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Abstract base class for lightweight line-by-line reading.
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const struct attribute attributes[]
@ eDiag_Error
Error message.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
TSyn & SetSyn(void)
Assign a value to Syn data member.
void SetLocus(const TLocus &value)
Assign a value to Locus data member.
void SetLocus_tag(const TLocus_tag &value)
Assign a value to Locus_tag data member.
EType
type of RNA feature
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
const TQual & GetQual(void) const
Get the Qual member data.
void SetCode(TCode &value)
Assign a value to Code data member.
void SetExcept(TExcept value)
Assign a value to Except data member.
const TData & GetData(void) const
Get the Data member data.
void SetId(TId &value)
Assign a value to Id data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
TQual & SetQual(void)
Assign a value to Qual data member.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsGenbank(void) const
Check if variant Genbank is selected.
static bool s_IsCDSType(const string &recType)
static string s_GetTranscriptBiotype(const CGtfAttributes &attrs)
static CRNA_ref::EType s_RnaTypeFromRecType(const string &rec_type)
static CGtfAttributes s_GetIntersection(const CGtfAttributes &x, const CGtfAttributes &y)
static void s_TrimFeatQuals(const CGtfAttributes &prevAttributes, const CGtfAttributes &attributes, CSeq_feat &feature)
static bool s_IsExonOrUTR(const string &recType)
Lightweight interface for getting lines of data with minimal memory copying.
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4