(
const char* filename,
const PSIMsa* msa, vector<string>& seqIds)
59 fp= fopen(filename,
"w");
61cerr <<
"Error: PssmMaker::printMsa - failed to open file "<< filename << endl;
69fprintf(
fp,
">%s\n", seqIds[
i-startRow].c_str());
86matrixName(
"BLOSUM62"),
87requestInformationContent(
false),
88requestResidueFrequencies(
false),
89requestWeightedResidueFrequencies(
false),
90requestFrequencyRatios(
false),
91requestNumIndepObs(
false),
92gaplessColumnWeights(
false),
93unalignedSegThreshold(-1),
94inclusionThreshold(0.5),
108: m_profiles(profiles),m_options(0), m_useConsensus(useConsensus), m_diagRequest(),
130 if(
config.pseudoCount > 0 ) {
137 if(SumAInf > 84 ) iPseudo = 10;
138 else if(SumAInf > 55 ) iPseudo = 7;
139 else if(SumAInf > 43 ) iPseudo = 5;
140 else if(SumAInf > 41.5) iPseudo = 4;
141 else if(SumAInf > 40 ) iPseudo = 3;
142 else if(SumAInf > 39 ) iPseudo = 2;
168vector<char> residuesOnColumn;
326: m_conMaker(0), m_useConsensus(useConsensus), m_addQuery(addQueryToPssm),
327m_masterSeqEntry(), m_trunctMaster(), m_cd(cd), m_pssmInput(0)
339vector<int> seqIndice;
341 if(seqIndice.size() > 0)
378pssmRef = pssmEngine.
Run();
383 if(pssmRef.
Empty())
436list< double >* freqs = 0;
441list< int > & scores = pssm.
SetFinalData().SetScores();
442 for(
unsigned intcol = 0; col < consensus.size(); col++)
444 charc1 = consensus.at(col);
445 for(
char row= 0;
row< numRows;
row++)
449scores.push_back(score);
451freqs->push_back(0.0);
464 static const stringcommaSpace(
", ");
465 static const stringperiodSpaceSpace(
". ");
468list< CRef< CSeq_id > > & ids = bioseq.
SetId();
473list< CRef< CCdd_id > >& cdids =
m_cd->
SetId().Set();
475list< CRef< CCdd_id > >::iterator cit = cdids.begin();
476 for(; cit != cdids.end(); cit++)
480uid = (*cit)->GetUid();
487dbtag.
SetDb(
"CDD");
492dbtag.
SetDb(
"Cdd");
494ids.push_back(seqId);
496list< CRef< CSeqdesc > >& descList = bioseq.
SetDescr().Set();
503 if(cdTitle.length() > 0) {
505cdTitle = cdTitle.substr(0, cdTitle.length() - 1);
510seqDescTitle += commaSpace;
514 if(cdTitle.length() > 0) {
515seqDescTitle += commaSpace + cdTitle + periodSpaceSpace;
519list< CRef< CCdd_descr > >::iterator lit = cddescList.begin();
521 for(; lit != cddescList.end(); lit++)
523 if((*lit)->IsComment())
525 if(cdTitle.length() == 0) {
526seqDescTitle += commaSpace;
528seqDescTitle += (*lit)->GetComment();
530seqDescTitle +=
'.';
538list< CRef< CSeqdesc > >::iterator it = descList.begin();
539 for(; it != descList.end(); it++)
540 if( (*it)->IsTitle() ) {
544descList.push_back(desc);
551 bmp.getSlave().setSeqId(seqId);
580vector<string> seqIdStr;
583seqIdStr.push_back(seqIds[0]->AsFastaString());
584 for(
unsigned int i= 1;
i< seqIds.size();
i++)
586seqIdStr.push_back(seqIds[
i]->AsFastaString());
594 if(fileName.length() == 0) {
599 unsigned int nRows, nCols;
600vector<string> seqIdStr;
604FILE*
fp= fopen(fileName.c_str(),
"w");
606cerr <<
"Error: PssmMaker::printAlignmentByColumn - failed to open file "<< fileName << endl;
614seqIdStr.push_back(seqIds[0]->AsFastaString());
615 for(
unsigned intk = 1; k < seqIds.size(); k++)
617seqIdStr.push_back(seqIds[k]->AsFastaString());
619 for(
i= 0;
i< seqIdStr.size(); ++
i) {
620fprintf(
fp,
"row %d: %s\n",
i,seqIdStr[
i].c_str());
626 static const stringdash(
"-");
627fprintf(
fp,
"Query length: %d; Number of rows: %d\n", nCols,
nRows);
628 for(j = 0; j < nCols; j++) {
629fprintf(
fp,
">column %d\n", j+1);
637fprintf(
fp,
"\n");
644 unsigned int i, j,
nRows, nCols;
653 for(j = 0; j < nCols; j++) {
655colResidues.assign(
nRows+ 1,
'-');
660colResidues[
i] =
'-';
663columnMap[j] = colResidues;
669cd_utils::PssmMaker pm(ccd,
true,
true);
670cd_utils::PssmMakerOptions
config;
671 config.requestFrequencyRatios =
false;
684 bmp.getSlave() =
bmp.getMaster();
685 bmp.remaster(guide);
686 intscore = ps.
score(
bmp,bioseq);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
PSIMsa * PSIMsaFree(PSIMsa *msa)
Deallocates the PSIMsa structure.
PSIMsa * PSIMsaNew(const PSIMsaDimensions *dimensions)
Allocates and initializes the multiple sequence alignment data structure for use as input to the PSSM...
bool GetSeqAlign(int Row, CRef< CSeq_align > &seqAlign)
bool GetSeqIDFromAlignment(int RowIndex, CRef< CSeq_id > &SeqID) const
bool FindConsensusInSequenceList(vector< int > *indices=NULL) const
bool GetSeqEntryForRow(int rowId, CRef< CSeq_entry > &seqEntry) const
bool GetBioseqForRow(int rowId, CRef< CBioseq > &bioseq)
string GetAccession(int &Version) const
bool GetSeqEntryForIndex(int seqIndex, CRef< CSeq_entry > &seqEntry) const
Computes a PSSM as specified in PSI-BLAST.
const char * GetMatrixName()
Obtain the name of the underlying matrix to use when building the PSSM.
int countResiduesInRow(int row)
unsigned int GetQueryLength()
Get the query's length.
void copyRow(PSIMsaCell *src, PSIMsaCell *dest)
CdPssmInput(ResidueProfiles &profiles, PssmMakerOptions &config, bool useConsensus)
void unalignLeadingTrailingGaps()
PSIDiagnosticsRequest m_diagRequest
ResidueProfiles & m_profiles
void Process()
Algorithm to produce multiple sequence alignment structure should be implemented in this method.
void read(ColumnResidueProfile &crp)
PSIMsa * GetData()
Obtain the multiple sequence alignment structure.
PSIMsaDimensions m_msaDimensions
Multiple sequence alignment dimensions.
const PSIDiagnosticsRequest * GetDiagnosticsRequest()
Obtain the diagnostics data that is requested from the PSSM engine Its results will be populated in t...
PSIBlastOptions * SetOptions()
Obtain the options for the PSSM engine.
PSIBlastOptions * m_options
unsigned char * GetQuery()
Get the query sequence used as master for the multiple sequence alignment in ncbistdaa encoding.
const PSIBlastOptions * GetOptions()
Obtain the options for the PSSM engine.
static int getResiduesStringSize()
int getIndexByConsensus() const
void getResiduesByRow(vector< char > &residues, bool byNcbiStd=true) const
static unsigned char getNcbiStdCode(char eaa)
static char getEaaCode(char stdCode)
const string & getConsensus()
CRef< CSeq_entry > getConsensusSeqEntry()
ResidueProfiles & getResidueProfiles()
const BlockModelPair & getGuideAlignment() const
void skipUnalignedSeg(int threshold)
const string & getConsensus()
void setOptions(const PssmMakerOptions &option)
CRef< CPssmWithParameters > m_pssmMade
PssmMaker(CCdCore *cd, bool useConsensus=true, bool addQueryToPssm=true)
ConsensusMaker * m_conMaker
CdPssmInput * m_pssmInput
vector< char > m_trunctMaster
bool getTrunctMaster(CRef< CSeq_entry > &seqEntry)
CRef< CPssmWithParameters > makeDefaultPssm()
CRef< CPssmWithParameters > make()
PssmMakerOptions m_config
void printAlignment(string &fileName)
void getPssmColumnResidues(map< unsigned int, string > &columnMap)
void modifyQuery(CRef< CSeq_entry > query)
const BlockModelPair & getGuideAlignment()
void printAlignmentByColumn(string &fileName)
CRef< CSeq_entry > m_masterSeqEntry
int score(const CRef< CSeq_align > align, const CRef< CBioseq > bioseq)
const vector< CRef< CSeq_id > > getSeqIdsByRow() const
void traverseColumnsOnMaster(ColumnReader &cr)
double calcInformationContent(bool byConsensus=true)
const string getConsensus(bool inNcbieaa=true)
int countColumnsOnMaster(string &seq)
void traverseColumnsOnConsensus(ColumnReader &cr)
int GetScore(char i, char j)
The NCBI C++ standard methods for dealing with std::string.
int findHighestScoringRowByPssm(CCdCore *ccd)
static void printMsa(const char *filename, const PSIMsa *msa, vector< string > &seqIds)
string GetScoringMatrixName(EScoreMatrixType type)
void NcbistdaaToNcbieaaString(const vector< char > &vec, string *str)
bool IsConsensus(const CRef< CSeq_id > &seqId)
thread_local unique_ptr< FtaMsgPost > bmp
CRef< objects::CPssmWithParameters > Run()
Runs the PSSM engine to compute the PSSM.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
void SetId(TId &value)
Assign a value to Id data member.
void SetDescription(TDescription &value)
Assign a value to Description data member.
const TName & GetName(void) const
Get the Name member data.
void SetTag(TTag &value)
Assign a value to Tag data member.
TStr & SetStr(void)
Select the variant.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId & SetId(void)
Select the variant.
void SetPssm(TPssm &value)
Assign a value to Pssm data member.
void SetIntermediateData(TIntermediateData &value)
Assign a value to IntermediateData data member.
void SetFinalData(TFinalData &value)
Assign a value to FinalData data member.
void SetNumColumns(TNumColumns value)
Assign a value to NumColumns data member.
void SetNumRows(TNumRows value)
Assign a value to NumRows data member.
TGeneral & SetGeneral(void)
Select the variant.
TSeq & SetSeq(void)
Select the variant.
TId & SetId(void)
Assign a value to Id data member.
void ResetId(void)
Reset Id data member.
TTitle & SetTitle(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void ResetSeq_data(void)
Reset Seq_data data member.
@ e_Ncbieaa
extended ASCII 1 letter aa codes
unsigned int
A callback function used to compare two keys in a database.
#define ASSERT
macro for assert.
Declares the CPSIBlastOptionsHandle class.
C++ API for the PSI-BLAST PSSM engine.
#define row(bind, expected)
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double impala_scaling_factor
Scaling factor as used in IMPALA to do the matrix rescaling.
Int4 pseudo_count
Pseudocount constant.
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
Boolean information_content
request information content
Boolean frequency_ratios
request frequency ratios
Boolean independent_observations
request number of independent observations
Boolean weighted_residue_frequencies
request observed weighted residue frequencies
Boolean gapless_column_weights
request gapless column weights
Boolean residue_frequencies
request observed residue frequencies
Structure to describe the characteristics of a position in the multiple sequence alignment data struc...
Boolean is_aligned
Is this letter part of the alignment?
Uint1 letter
Preferred letter at this position, in ncbistdaa encoding.
Uint4 num_seqs
Number of distinct sequences aligned with the query (does not include the query)
Uint4 query_length
Length of the query.
Multiple sequence alignment (msa) data structure containing the raw data needed by the PSSM engine to...
PSIMsaCell ** data
actual data, dimensions are (dimensions->num_seqs+1) by (dimensions->query_length)
PSIMsaDimensions * dimensions
dimensions of the msa
bool IsRequestingIntermediateData()
double inclusionThreshold
bool requestFrequencyRatios
request frequency ratios
int unalignedSegThreshold
bool requestWeightedResidueFrequencies
request observed weighted residue frequencies
bool gaplessColumnWeights
bool requestResidueFrequencies
request observed residue frequencies
bool requestInformationContent
request information content
bool requestNumIndepObs
request number of independent observations per position
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4