= sequence.length();
95 unsigned intnGaps = 0;
96vector<unsigned int>::const_iterator posBeg = positions.begin(), posEnd = positions.end();
98numNonAlpha.
clear();
99 for(
unsigned int i= 0;
i<
len; ++
i) {
100 if(find(posBeg, posEnd,
i) != posEnd) {
101numNonAlpha[
i] = nGaps;
104aa =
toupper((
unsigned char) sequence[
i]);
109numNonAlpha[
len] = nGaps;
144 boolbuiltIt =
false;
154 if(dummyCD.
GetId().
Get().size() == 0) {
157global->SetAccession(
"tempCDAcc");
158global->SetVersion(0);
160dummyCD.
SetId().Set().push_back(cdId);
164dummyCD.
SetName(
"tempCDName");
202 boolbuiltIt =
false;
203 unsigned int i, j,
len, maxLen;
206 stringmasterSequence, sequence;
207vector<unsigned int> residueCount;
210 unsigned intnBlocks;
211vector<unsigned int> blockStarts;
212vector<unsigned int> blockLengths;
223maxLen = (maxLenIndex < nSeq) ?
m_sequences[maxLenIndex].length() : 0;
226residueCount.resize(maxLen);
227 for(
i= 0;
i< nSeq; ++
i) {
229 len= sequence.length();
230 for(j = 0; j <
len; ++j) {
231uc =
toupper((
unsigned char) sequence[j]);
238nBlocks =
GetBlocksFromCounts(nSeq, residueCount, forcedBreaks, blockStarts, blockLengths);
239 if(nBlocks == 0 || nBlocks != blockLengths.size())
return false;
254 for(
i= 0;
i< nSeq; ++
i) {
261aligns.push_back(sa);
262 _TRACE(
" Made IBM dummy seq-align "<<
i);
272aligns.push_back(sa);
273 _TRACE(
" Made IBM seq-align "<<
i);
278 if(aligns.size() == nSeq - 1 || (nSeq == 1 && aligns.size() == nSeq)) {
281 _TRACE(
"IBM Seq-annot installed in member variable m_seqAnnot\n");
283 _TRACE(
"Error: IBM Seq-annot NOT installed in member variable m_seqAnnot\n");
291 boolbuiltIt =
false;
292 unsigned int i, j,
len, maxLen, masterLen;
295 stringsequence, masterSequence;
296vector<unsigned int> residueCount, masterResidueCount;
299 unsigned intnBlocks;
300vector<unsigned int> blockStarts;
301vector<unsigned int> blockLengths;
315masterLen = masterSequence.length();
316masterResidueCount.assign(masterLen, 0);
317 for(j = 0; j < masterLen; ++j) {
318uc =
toupper((
unsigned char) masterSequence[j]);
320++masterResidueCount[j];
336 for(
i= 0;
i< nSeq; ++
i) {
342blockLengths.clear();
343nBlocks =
GetBlocksFromCounts(1, masterResidueCount, forcedBreaks, blockStarts, blockLengths);
344 if(nBlocks == 0)
return false;
348aligns.push_back(sa);
349 _TRACE(
" Made IBM dummy seq-align "<<
i);
356residueCount.assign(maxLen, 0);
358 len=
min((
unsigned int)sequence.length(), masterLen);
359 for(j = 0; j <
len; ++j) {
360uc =
toupper((
unsigned char) sequence[j]);
362 if(
isalpha(uc) && masterResidueCount[j] > 0) {
369blockLengths.clear();
371 if(nBlocks == 0)
return false;
380aligns.push_back(pairwiseSA);
388 if(aligns.size() == nSeq - 1 || (nSeq == 1 && aligns.size() == nSeq)) {
391 _TRACE(
"As-is Seq-annot installed in member variable m_seqAnnot\n");
393 _TRACE(
"Error: As-is Seq-annot NOT installed in member variable m_seqAnnot\n");
401 unsigned intmasterIndex = 0;
402 unsigned int i, j, maxLen, nSeq, nStructs;
403 unsigned intnAlignedMax, nGapsMin, nGaps;
404 unsigned intfirstCommon, lastCommon;
407vector<unsigned int> isConsidered;
408vector<unsigned int> nGapsBySeq, nAlignedBySeq, nAlignedByCol, lengths;
409vector<string> tmpSeqs;
430 for(
i= 0;
i< nSeq; ++
i) {
431isConsidered.push_back((nStructs == 0 || dummyCD.
GetPDB(
i, pPDB)) ? 1 : 0);
433lengths.push_back(tmpSeqs.back().length());
434 if(lengths.back() > maxLen) maxLen = lengths.back();
438nAlignedBySeq.resize(nSeq);
439nGapsBySeq.resize(nSeq);
440nAlignedByCol.resize(maxLen);
441 for(j = 0; j < maxLen; ++j) {
443 for(
i= 0;
i< nSeq; ++
i) {
444 if(isConsidered[
i] > 0 && j < lengths[
i] &&
isalpha((
unsigned char) tmpSeqs[
i][j])) {
455firstCommon = maxLen - 1;
458 for(
i= (nStructs > 0) ? nStructs : nSeq;
i> 1 && lastCommon < firstCommon; --
i) {
460 while(j < maxLen && nAlignedByCol[j] <
i) {
466 while(j > 0 && nAlignedByCol[j - 1] <
i) {
475 if(lastCommon <= firstCommon) {
478cout <<
"Pick the best master based on largest footprint ["<< firstCommon <<
", "<< lastCommon-1 <<
"] where "<<
i+1 <<
" of the "<< nSeq <<
" sequences are aligned:\n";
479 if(nStructs > 0) cout <<
"(only the structured sequences were candidates)\n";
484 for(j = firstCommon; j < lastCommon; ++j) {
485alignedSeqs.
clear();
488 for(
i= 0;
i< nSeq; ++
i) {
489 if(isConsidered[
i] > 0 && j < lengths[
i]) {
490 if(
isalpha((
unsigned char) tmpSeqs[
i][j])) {
501 if(nAlignedByCol[j] > 1) {
503alignedSeqsIt != alignedSeqs.
end(); ++alignedSeqsIt) {
504++nAlignedBySeq[*alignedSeqsIt];
512 for(
i= 0;
i< nSeq; ++
i) {
514 if(isConsidered[
i] == 0)
continue;
518 if(nAlignedBySeq[
i] > nAlignedMax) {
519alignedSeqs.
clear();
520nAlignedMax = nAlignedBySeq[
i];
522 _TRACE(
"Current longest aligned row "<<
i<<
" with "<< nAlignedMax <<
" residues.\n");
523}
else if(nAlignedBySeq[
i] == nAlignedMax) {
525 _TRACE(
"Duplicate longest aligned row "<<
i<<
" with "<< nAlignedMax <<
" residues.\n");
529 _TRACE(
"Naligned seqs "<< alignedSeqs.
size() <<
"; tmpSeqs size = "<< tmpSeqs.size());
533 if(alignedSeqs.
size() == 1) {
535}
else if(alignedSeqs.
size() > 1) {
536nGapsMin = 1000000000;
538alignedSeqsIt != alignedSeqs.
end(); ++alignedSeqsIt) {
545nGaps = nGapsBySeq[*alignedSeqsIt];
546 if(nGaps < nGapsMin) {
548masterIndex = *alignedSeqsIt;
553cout <<
" -> master sequence index (zero-based) determined to be "<<
m_masterIndex<< endl;
567 _TRACE(
"Master sequence index (zero-based) determined to be "<<
m_masterIndex<<
"\n");
579 boolinBlock =
false, forcedNewBlock =
false;
580 unsigned int i,
n= counts.size();
581 unsigned intstart = 0;
582 unsigned intblockId = 0;
585 for(
i= 0;
i<
n;
i++)
589 if(counts[
i] >= threshold)
598forcedNewBlock = (
i> 0 && forcedBreak.
find(
i-1) != setEnd);
600 if(counts[
i] < threshold)
603starts.push_back(start);
604lengths.push_back(
i- start);
607}
else if(forcedNewBlock) {
608starts.push_back(start);
609lengths.push_back(
i- start);
615starts.push_back(start);
616lengths.push_back(
i- start);
620 returnstarts.size();
628 stringoriginalSequence;
656newLength = originalSequence.length();
657bioseq.
SetInst().SetLength(newLength);
667 for(
i= 0;
i< nSeq; ++
i) {
674 unsigned int i,
len;
678longestSequenceIndex = 0;
680 for(
i= 0;
i< nSeq; ++
i) {
687 _TRACE(
"New longest sequence "<<
i+1 <<
": new max len = "<<
len<<
"; old max len = "<< maxLen);
689longestSequenceIndex =
i;
693cerr <<
"len = 0 in CacheSequences for i = "<<
i<<
", maxLen = "<< maxLen <<
"; gi = "<< dummyCD.
GetGIFromSequenceList(
i) <<
":\n"<< s << endl;
703 unsigned intj, masterStart, slaveStart;
704 unsigned intnBlocks = blockStarts.
size();
706 if(masterSequence.length() == 0 || slaveSequence.length() == 0)
return false;
707 if(masterSeqid.
Empty() || slaveSeqid.
Empty())
return false;
708 if(nBlocks != blockLengths.size())
return false;
713 if(pairwiseSA.
Empty()) {
715 if(pairwiseSA.
Empty()) {
723TDD& ddl = pairwiseSA->
SetSegs().SetDendiag();
724 for(j = 0; j < nBlocks; ++j) {
732masterCopy->Assign(*masterSeqid);
733slaveCopy->
Assign(*slaveSeqid);
734ids.push_back(masterCopy);
735ids.push_back(slaveCopy);
738masterStart = blockStarts[j] - nGapsPriorToBlockStartMaster[blockStarts[j]];
739slaveStart = blockStarts[j] - nGapsPriorToBlockStart[blockStarts[j]];
740dd->SetStarts().push_back(masterStart);
741dd->SetStarts().push_back(slaveStart);
742dd->SetLen(blockLengths[j]);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
TGi GetGIFromSequenceList(int SeqIndex) const
int Num3DAlignments() const
string GetSequenceStringByIndex(int SeqIndex)
bool GetSeqIDForIndex(int SeqIndex, CRef< CSeq_id > &SeqID) const
void SetAccession(string Accession, int Version)
int GetNumSequences() const
bool GetPDB(int Row, const CPDB_seq_id *&pPDB)
const CRef< CSeq_entry > & GetSeqEntry() const
virtual bool ReadFile(CNcbiIstream &iStream)=0
bool BuildMasterSlaveSeqAlign(const CRef< CSeq_id > &masterSeqid, const CRef< CSeq_id > &slaveSeqid, const string &masterSequence, const string &slaveSequence, const vector< unsigned int > &blockStarts, const vector< unsigned int > &blockLengths, CRef< CSeq_align > &pairwiseSA)
static unsigned int GetBlocksFromCounts(unsigned int threshold, const vector< unsigned int > &counts, const set< unsigned int > &forcedBreak, vector< unsigned int > &starts, vector< unsigned int > &lengths)
string GetSequence(unsigned int index) const
bool m_preferStructureMaster
static bool PurgeNonAlphaFromSequence(CBioseq &bioseq)
unsigned int DetermineMasterIndex(CCdCore &dummyCD, MasteringMethod masterMethod)
@ eMostAlignedAndFewestGaps
bool MakeAsIsSeqAnnot(CCdCore &dummyCD)
virtual bool IsSeqAnnotValid() const
void PurgeNonAlphaFromCachedSequences()
CRef< CSeq_annot > m_seqAnnot
vector< string > m_sequences
static bool isNotAlpha(char c)
CSeqAnnotFromFasta(bool doIbm=true, bool preferStructureMaster=false, bool caseSensitive=false)
bool MakeIBMSeqAnnot(CCdCore &dummyCD)
void CacheSequences(CCdCore &dummyCD, unsigned int &longestSequenceIndex, bool degapSequences)
virtual bool MakeSeqAnnotFromFasta(CNcbiIstream &is, CFastaIOWrapper &fastaIO, MasteringMethod masterMethod, unsigned int masterIndex=(unsigned int) eUnassignedMaster)
static void CountNonAlphaToPositions(const vector< unsigned int > &positions, const string &sequence, map< unsigned int, unsigned int > &numNonAlpha)
unsigned int m_masterIndex
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
bool PurgeNonAlpha(string &s)
void NcbistdaaToNcbieaaString(const vector< char > &vec, string *str)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
void SetId(TId &value)
Assign a value to Id data member.
const TId & GetId(void) const
Get the Id member data.
const Tdata & Get(void) const
Get the member data.
TGid & SetGid(void)
Select the variant.
void SetSequences(TSequences &value)
Assign a value to Sequences data member.
void SetName(const TName &value)
Assign a value to Name data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
void SetDim(TDim value)
Assign a value to Dim data member.
void SetType(TType value)
Assign a value to Type data member.
vector< CRef< CSeq_id > > TIds
list< CRef< CDense_diag > > TDendiag
@ eType_partial
mapping pieces together
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
const TInst & GetInst(void) const
Get the Inst member data.
bool IsIupacaa(void) const
Check if variant Iupacaa is selected.
bool IsNcbistdaa(void) const
Check if variant Ncbistdaa is selected.
TNcbieaa & SetNcbieaa(void)
Select the variant.
list< CRef< CSeq_align > > TAlign
void SetInst(TInst &value)
Assign a value to Inst data member.
TNcbistdaa & SetNcbistdaa(void)
Select the variant.
void Select(E_Choice index, EResetVariant reset=eDoResetVariant)
Select the requested variant if needed.
TIupacaa & SetIupacaa(void)
Select the variant.
@ e_Ncbieaa
extended ASCII 1 letter aa codes
unsigned int
A callback function used to compare two keys in a database.
The NCBI C++/STL use hints.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4