: m_pMessageListener(pListener)
111 string(
"Bad data line: record ID \"") +
id+
"\" is used multiple times");
118mapIt =
mIds.emplace(
id, list<CGffIdTrackRecord>()).first;
119mapIt->second.push_back(trackRecord);
120 if(!parentId.empty()) {
125 auto& recordList = mapIt->second;
127 if(pendingType ==
"exon") {
128recordList.push_back(trackRecord);
129 if(!parentId.empty()) {
135 if(!
id.
empty()) {
137 autoexpectedType = recordList.front().mSeqType;
138 if(pendingType != expectedType) {
142 throwerrorDuplicateId;
145 autopendingSeqId = record.
Id();
146 autoexpectedSeqId = recordList.front().mSeqId;
147 if(pendingSeqId != expectedSeqId) {
151 throwerrorDuplicateId;
155 if(!parentId.empty()) {
158recordList.push_back(trackRecord);
173 string(
"Bad data line: Parent \""+ parentId +
174 "\" does not refer to a GFF3 record ID"));
179 throwerrorBadParentId;
194mIdResolver(idResolver),
195mIdTracker(pListener),
196m_pMessageListener(pListener)
210 autoseqSize = seqSizeIt->second;
216 if(record.
SeqStart() >= seqSize) {
217 stringmessage =
"Bad data line: ";
218message +=
"feature in-point is outside the containing sequence.";
231 stringmessage =
"Bad data line: ";
232message +=
"feature is longer than the entire containing sequence.";
263 for(
const auto&
id: ids) {
282 LOCATIONS& locations = existingEntry->second;
284 if(locations.size() == 1 && locations.front().mType ==
"gene") {
288existingEntry->second.push_front(
location);
310 if(recordType ==
"exon"||
311recordType ==
"five_prime_utr"||
312recordType ==
"three_prime_utr") {
322 for(
auto&
id: ids) {
323 id= record.
Type() +
":"+ id;
347 const string& seqId)
const 350 autosizeIt = mSequenceSizes.find(seqId);
351 if(sizeIt == mSequenceSizes.end()) {
354 returnsizeIt->second;
364 const auto& seqId = *(locRecord.
mpGffId);
368 if(sequenceSize == 0) {
370pInterval->
SetId().Assign(seqId);
374pLocation->
SetInt(*pInterval);
379 if(locRecord.
mStart>= sequenceSize || locRecord. mStop < sequenceSize) {
381pInterval->
SetId().Assign(seqId);
383pInterval->
SetTo(locRecord.
mStop% sequenceSize);
385pLocation->
SetInt(*pInterval);
389pTop->
SetId().Assign(seqId);
391pTop->
SetTo(locRecord.
mStop% sequenceSize);
395pBottom->
SetId().Assign(seqId);
397pBottom->
SetTo(sequenceSize - 1);
404 if(locRecord.
mStart>= sequenceSize || locRecord.
mStop< sequenceSize) {
406pInterval->
SetId().Assign(seqId);
408pInterval->
SetTo(locRecord.
mStop% sequenceSize);
410pLocation->
SetInt(*pInterval);
414pBottom->
SetId().Assign(seqId);
416pBottom->
SetTo(sequenceSize - 1);
420pTop->
SetId().Assign(seqId);
422pTop->
SetTo(locRecord.
mStop% sequenceSize);
440 if(locations.empty()) {
445 if(locations.size() == 1) {
446 auto& onlyOne = locations.front();
448frame = onlyOne.mFrame;
452 auto& mix = pSeqLoc->
SetMix();
453 for(
auto&
location: locations) {
457 const auto&
front= locations.front();
458frame =
front.mFrame;
477 for(
const auto&
location: locations) {
bool GetAttribute(const string &, string &) const
void GetLocation(const string &, CRef< CSeq_loc > &, CCdregion::EFrame &)
map< string, TSeqPos > mSequenceSizes
CReaderListener * m_pMessageListener
void MergeLocation(CRef< CSeq_loc > &, CCdregion::EFrame &, LOCATIONS &)
CGff3ReadRecord::SeqIdResolver mIdResolver
LOCATION_MAP mMapIdToLocations
TSeqPos GetSequenceSize(const string &) const
CRef< CSeq_loc > xGetRecordLocation(const CGff3LocationRecord &)
bool AddRecord(const CGff2Record &)
static bool xGetLocationIds(const CGff2Record &, list< string > &)
list< CGff3LocationRecord > LOCATIONS
CGff3LocationMerger(unsigned int flags=0, CGff3ReadRecord::SeqIdResolver=CReadUtil::AsSeqId, TSeqPos sequenceSize=0, CReaderListener *pListener=nullptr)
void VerifyRecordLocation(const CGff2Record &)
void AddRecordForId(const string &, const CGff2Record &)
static void xSortLocations(LOCATIONS &)
static bool ComparePositions(const CGff3LocationRecord &lhs, const CGff3LocationRecord &rhs)
static bool ComparePartNumbers(const CGff3LocationRecord &lhs, const CGff3LocationRecord &rhs)
CConstRef< CSeq_id > mpGffId
CGff3LocationRecord(const CGff2Record &, unsigned int, CGff3ReadRecord::SeqIdResolver)
CRef< CSeq_id > GetSeqId(TReaderFlags, SeqIdResolver=nullptr) const
const string & Type() const
ENa_strand Strand() const
const string & Id() const
const string & NormalizedType() const
CGffIdTracker(CReaderListener *pListener=nullptr)
CReaderListener * m_pMessageListener
map< string, list< CGffIdTrackRecord > > mIds
void CheckAndIndexRecord(string id, const CGff2Record &record)
const_iterator end() const
const_iterator find(const key_type &key) const
The NCBI C++ standard methods for dealing with std::string.
static const char location[]
unsigned int TSeqPos
Type for sequence locations and lengths.
@ eDiag_Error
Error message.
void SetPacked_int(TPacked_int &v)
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
void SetNull(void)
Override all setters to incorporate cache invalidation.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
@ eFrame_not_set
not set, code uses one
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
virtual void Reset(void)
Reset the whole object.
void SetStrand(TStrand value)
Assign a value to Strand data member.
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4