( !score_blk->protein_alphabet ) {
86 "BlastScoreBlk is not configured for a protein alphabet");
91score_blk->kbp_psi[0]->Lambda = pssm->GetPssm().GetLambdaUngapped();
92}
else if(score_blk->kbp_std[0]->Lambda > 0.0) {
93score_blk->kbp_psi[0]->Lambda = score_blk->kbp_std[0]->Lambda;
97score_blk->kbp_psi[0]->K = pssm->GetPssm().GetKappaUngapped();
98}
else if(score_blk->kbp_std[0]->K > 0.0) {
99score_blk->kbp_psi[0]->K = score_blk->kbp_std[0]->K;
101score_blk->kbp_psi[0]->logK =
log(score_blk->kbp_psi[0]->K);
104score_blk->kbp_psi[0]->H = pssm->GetPssm().GetHUngapped();
105}
else if(score_blk->kbp_std[0]->K > 0.0) {
106score_blk->kbp_psi[0]->H = score_blk->kbp_std[0]->H;
111score_blk->kbp_gap_psi[0]->Lambda = pssm->GetPssm().GetLambda();
112}
else if(score_blk->kbp_gap_std[0]->Lambda > 0.0) {
113score_blk->kbp_gap_psi[0]->Lambda = score_blk->kbp_gap_std[0]->Lambda;
117score_blk->kbp_gap_psi[0]->K = pssm->GetPssm().GetKappa();
118}
else if(score_blk->kbp_gap_std[0]->K > 0.0) {
119score_blk->kbp_gap_psi[0]->K = score_blk->kbp_gap_std[0]->K;
121score_blk->kbp_gap_psi[0]->logK =
log(score_blk->kbp_gap_psi[0]->K);
124score_blk->kbp_gap_psi[0]->H = pssm->GetPssm().GetH();
125}
else if(score_blk->kbp_gap_std[0]->H > 0.0) {
126score_blk->kbp_gap_psi[0]->H = score_blk->kbp_gap_std[0]->H;
130 const size_tkQueryLength = pssm->GetPssm().GetNumColumns();
134 boolmissing_scores =
false;
136unique_ptr< CNcbiMatrix<int> > scores
138 _ASSERT(score_blk->psi_matrix->pssm->ncols == scores->GetCols());
139 _ASSERT(score_blk->psi_matrix->pssm->nrows == scores->GetRows());
141 for(
TSeqPosc = 0; c < scores->GetCols(); c++) {
142 for(
TSeqPos r= 0;
r< scores->GetRows();
r++) {
143score_blk->psi_matrix->pssm->data[c][
r] = (*scores)(
r, c);
146}
catch(
conststd::runtime_error&) {
147missing_scores =
true;
151 boolmissing_freq_ratios =
false;
153 boolfreq_ratios_all_zeros =
true;
156unique_ptr< CNcbiMatrix<double> > freq_ratios
158 _ASSERT(score_blk->psi_matrix->pssm->ncols ==
159freq_ratios->GetCols());
160 _ASSERT(score_blk->psi_matrix->pssm->nrows ==
161freq_ratios->GetRows());
163 for(
TSeqPosc = 0; c < freq_ratios->GetCols(); c++) {
164 for(
TSeqPos r= 0;
r< freq_ratios->GetRows();
r++) {
165score_blk->psi_matrix->freq_ratios[c][
r] =
166(*freq_ratios)(
r, c);
168freq_ratios_all_zeros =
false;
172}
catch(
conststd::runtime_error&) {
173missing_freq_ratios =
true;
176 if(missing_scores && missing_freq_ratios) {
178 "Missing scores and frequency ratios in PSSM");
185freq_ratios_all_zeros) {
187os <<
"Frequency ratios for PSSM are all zeros, frequency ratios for ";
188os << options->GetMatrixName() <<
" will be used during traceback ";
189os <<
"in composition based statistics";
192 _ASSERT(messages.size() == 1);
193messages.front().push_back(sm);
201os <<
"Composition-based score adjustment conditioned on " 202<<
"sequence properties and unconditional composition-based score " 203<<
"adjustment is not supported with PSSMs, resetting to default " 204<<
"value of standard composition-based statistics";
207 _ASSERT(messages.size() == 1);
208messages.front().push_back(sm);
222 typenamelist<T>::const_iterator itr =
source.begin();
223 if(by_row ==
true) {
225 for(
SIZE_TYPEc = 0; c < num_columns; c++) {
226dest(
r, c) = *itr++;
230 for(
SIZE_TYPEc = 0; c < num_columns; c++) {
232dest(
r, c) = *itr++;
242 if( !pssm_asn.GetPssm().CanGetFinalData() ||
243!pssm_asn.GetPssm().GetFinalData().CanGetScores() ||
244pssm_asn.GetPssm().GetFinalData().GetScores().empty() ) {
245 throwruntime_error(
"Cannot obtain scores from ASN.1 PSSM");
248 const CPssm& pssm = pssm_asn.GetPssm();
250(
size_t)pssm.
GetNumRows()*pssm_asn.GetPssm().GetNumColumns());
252unique_ptr< CNcbiMatrix<int> > retval
260 returnretval.release();
267 if( !pssm_asn.GetPssm().CanGetIntermediateData() ||
268!pssm_asn.GetPssm().GetIntermediateData().CanGetFreqRatios() ||
269pssm_asn.GetPssm().GetIntermediateData().GetFreqRatios().empty() ) {
270 throwruntime_error(
"Cannot obtain frequency ratios from ASN.1 PSSM");
273 const CPssm& pssm = pssm_asn.GetPssm();
275(
size_t)pssm.
GetNumRows()*pssm_asn.GetPssm().GetNumColumns());
277unique_ptr< CNcbiMatrix<double> > retval
283 returnretval.release();
288(
constobjects::CPssmWithParameters& pssm_asn)
290 if( !pssm_asn.GetPssm().CanGetIntermediateData() ||
291!pssm_asn.GetPssm().GetIntermediateData().CanGetResFreqsPerPos() ||
292pssm_asn.GetPssm().GetIntermediateData().GetResFreqsPerPos().empty() )
297 const CPssm& pssm = pssm_asn.GetPssm();
299(
size_t)pssm.
GetNumRows()*pssm_asn.GetPssm().GetNumColumns());
301unique_ptr< CNcbiMatrix<int> > retval
307 returnretval.release();
312(
constobjects::CPssmWithParameters& pssm_asn)
314 if( !pssm_asn.GetPssm().CanGetIntermediateData() ||
315!pssm_asn.GetPssm().GetIntermediateData().
316CanGetWeightedResFreqsPerPos() ||
317pssm_asn.GetPssm().GetIntermediateData().
318GetWeightedResFreqsPerPos().
empty() ) {
322 const CPssm& pssm = pssm_asn.GetPssm();
324GetWeightedResFreqsPerPos().
size() ==
325(
size_t)pssm.
GetNumRows()*pssm_asn.GetPssm().GetNumColumns());
327unique_ptr< CNcbiMatrix<double> > retval
333 returnretval.release();
338(
constobjects::CPssmWithParameters& pssm_asn,
339vector<double>& retval)
342 if( !pssm_asn.GetPssm().CanGetIntermediateData() ||
343!pssm_asn.GetPssm().GetIntermediateData().CanGetInformationContent() ||
348 const CPssm& pssm = pssm_asn.GetPssm();
351back_inserter(retval));
356(
constobjects::CPssmWithParameters& pssm_asn,
357vector<double>& retval)
360 if( !pssm_asn.GetPssm().CanGetIntermediateData() ||
362GetIntermediateData().CanGetGaplessColumnWeights() ||
367 const CPssm& pssm = pssm_asn.GetPssm();
370back_inserter(retval));
375vector<double>& retval)
378 if( !pssm_asn.GetPssm().CanGetIntermediateData() ||
379!pssm_asn.GetPssm().GetIntermediateData().CanGetSigma() ||
380pssm_asn.GetPssm().GetIntermediateData().GetSigma().empty() ) {
383 const CPssm& pssm = pssm_asn.GetPssm();
386back_inserter(retval));
391(
constobjects::CPssmWithParameters& pssm_asn, vector<int>& retval)
394 if( !pssm_asn.GetPssm().CanGetIntermediateData() ||
396GetIntermediateData().CanGetIntervalSizes() ||
401 const CPssm& pssm = pssm_asn.GetPssm();
404back_inserter(retval));
409(
constobjects::CPssmWithParameters& pssm_asn, vector<int>& retval)
412 if( !pssm_asn.GetPssm().CanGetIntermediateData() ||
414GetIntermediateData().CanGetNumMatchingSeqs() ||
419 const CPssm& pssm = pssm_asn.GetPssm();
422back_inserter(retval));
430 _ASSERT(pssm.GetParams().GetRpsdbparams().IsSetMatrixName());
431pssm.SetParams().SetRpsdbparams().SetGapOpen(gap_open);
432pssm.SetParams().SetRpsdbparams().SetGapExtend(gap_extend);
444 if(pssm.GetPssm().CanGetFinalData()) {
445 _ASSERT(pssm.GetPssm().GetFinalData().GetScores().size() ==
446(
size_t)
BLASTAA_SIZE*pssm.GetPssm().GetNumColumns());
449 const size_tdiff = (size_t)
BLASTAA_SIZE- pssm.GetPssm().GetNumRows();
451pssm.SetPssm().SetIntermediateData().SetFreqRatios();
453 if(pssm.GetPssm().GetByRow() ==
true) {
454freq_ratios.resize(pssm.GetPssm().GetNumColumns() *
BLASTAA_SIZE, 0.0);
456CPssmIntermediateData::TFreqRatios::iterator itr = freq_ratios.begin();
457 for(
intc = 0; c < pssm.GetPssm().GetNumColumns(); c++) {
458advance(itr, pssm.GetPssm().GetNumRows());
459freq_ratios.insert(itr, diff, 0.0);
476(
size_t)pssm->GetPssm().GetNumColumns());
477unique_ptr< CNcbiMatrix<double> > freq_ratios
487 if(pssm->GetPssm().GetNumRows() !=
492pssm->SetPssm().SetFinalData().SetScores() =
494pssm->SetPssm().SetFinalData().SetLambda() =
496pssm->SetPssm().SetFinalData().SetKappa() =
498pssm->SetPssm().SetFinalData().SetH() =
512(score_type ==
"e_value"|| score_type ==
"sum_e")) {
523 if(score.
GetValue().
IsReal() && score_type ==
"bit_score") {
550CPsiBlastAlignmentProcessor::operator()
551(
constobjects::CSeq_align_set& alignments,
552 doubleevalue_inclusion_threshold,
560 if(e < evalue_inclusion_threshold) {
572 if( !pssm.CanGetPssm() ) {
574 "Missing PSSM data");
577 boolmissing_scores(
false);
578 if( !pssm.GetPssm().CanGetFinalData() ||
579!pssm.GetPssm().GetFinalData().CanGetScores() ||
580pssm.GetPssm().GetFinalData().GetScores().empty() ) {
581missing_scores =
true;
584 boolmissing_freq_ratios(
false);
585 if( !pssm.GetPssm().CanGetIntermediateData() ||
586!pssm.GetPssm().GetIntermediateData().CanGetFreqRatios() ||
587pssm.GetPssm().GetIntermediateData().GetFreqRatios().empty() ) {
588missing_freq_ratios =
true;
591 if(missing_freq_ratios && missing_scores) {
593 "PSSM data must contain either scores or frequency ratios");
595 if(missing_scores && require_scores) {
597 "PSSM data must contain scores (did you run the PSSM engine?)");
601 if(!missing_scores &&
602pssm.GetPssm().GetFinalData().CanGetScalingFactor() &&
603pssm.GetPssm().GetFinalData().GetScalingFactor() != 1) {
604 string msg(
"PSSM has a scaling factor of ");
607.GetScalingFactor());
608 msg+=
". PSI-BLAST does not accept scaled PSSMs";
612 if( !pssm.HasQuery() ) {
614 "Missing query sequence in PSSM");
616 if( !pssm.GetQuery().IsSeq() ) {
618 "Query sequence in ASN.1 PSSM is not a single Bioseq");
621 if( !pssm.GetPssm().GetIsProtein() ) {
623 "PSSM does not represent protein scoring matrix");
636 stringexcpt_msg(
"PSI-BLAST only accepts ");
638excpt_msg +=
"one protein sequence as query";
640excpt_msg +=
"protein sequences as subjects";
654 if(e.
GetMsg().find(
"Incompatible sequence codings") !=
NPOS) {
663 static_cast<unsigned>(sblk->
length));
665excpt_msg.assign(
"PSI-BLAST cannot accept nucleotide ");
666excpt_msg += (qf_type ==
eQFT_Query?
"queries":
"subjects");
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Auxiliary functions for BLAST.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Declares the BLAST exception class.
The structures and functions in blast_options.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
const double kEpsilon
Small constant to test against 0.
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
#define BLAST_SCORE_MIN
minimum allowed score (for one letter comparison).
SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew(size_t ncols)
Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
@ eDeltaBlast
Delta Blast.
Defines BLAST error codes (user errors included)
Handle to the options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
static ESequenceType SequenceType(const char *str, unsigned length=0, ESTStrictness strictness=eST_Default)
Guess sequence type.
NCBI C++ Object Manager free implementation of IQueryFactory.
Implements the interface to retrieve data for the last 2 stages of the PSSM creation.
Computes a PSSM as specified in PSI-BLAST.
static const double kInvalidStat
Error or Warning Message from search.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
static SQLCHAR output[256]
static void QueryFactory(CRef< IQueryFactory > query_factory, const CBlastOptionsHandle &opts_handle, EQueryFactoryType query_factory_type=eQFT_Query)
Function to perform sanity checks on the query factory.
static CNcbiMatrix< int > * GetResidueFrequencies(const objects::CPssmWithParameters &pssm)
Returns matrix of BLASTAA_SIZE by query size (dimensions are opposite of what is stored in the BlastS...
CRef< objects::CPssmWithParameters > Run()
Runs the PSSM engine to compute the PSSM.
void PsiBlastSetupScoreBlock(BlastScoreBlk *score_blk, CConstRef< objects::CPssmWithParameters > pssm, TSearchMessages &messages, CConstRef< CBlastOptions > options)
Setup CORE BLAST score block structure with data from the scoremat PSSM.
void Convert2Matrix(const list< T > &source, CNcbiMatrix< T > &dest, bool by_row, SIZE_TYPE num_rows, SIZE_TYPE num_columns)
Convert a list of values into a CNcbiMatrix.
static CNcbiMatrix< int > * GetScores(const objects::CPssmWithParameters &pssm)
Returns matrix of BLASTAA_SIZE by query size (dimensions are opposite of what is stored in the BlastS...
#define BLASTAA_SIZE
Size of aminoacid alphabet.
static void GetSigma(const objects::CPssmWithParameters &pssm, vector< double > &retval)
Data used in sequence weights computation.
static CNcbiMatrix< double > * GetWeightedResidueFrequencies(const objects::CPssmWithParameters &pssm)
Returns matrix of BLASTAA_SIZE by query size (dimensions are opposite of what is stored in the BlastS...
virtual BLAST_SequenceBlk * GetSequenceBlk()=0
Accessor for the BLAST_SequenceBlk structure.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
EQueryFactoryType
Enumeration to specify the different uses of the query factory.
int GetGapExtensionCost() const
static double s_GetBitScore(const CScore &score)
Returns the bit_score from this score object.
static void s_AdjustFrequencyRatiosMatrixToMatchScoreMatrix(objects::CPssmWithParameters &pssm)
After creating the PSSM from frequency ratios, adjust the frequency ratios matrix to match the dimens...
static double s_GetEvalue(const CScore &score)
Returns the evalue from this score object.
static CNcbiMatrix< double > * GetFreqRatios(const objects::CPssmWithParameters &pssm)
Returns matrix of BLASTAA_SIZE by query size (dimensions are opposite of what is stored in the BlastS...
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
static void GetNumMatchingSeqs(const objects::CPssmWithParameters &pssm, vector< int > &retval)
Gets the number of matching sequences per position of the PSSM.
void PsiBlastComputePssmScores(CRef< objects::CPssmWithParameters > pssm, const CBlastOptions &opts)
Given a PSSM with frequency ratios and options, invoke the PSSM engine to compute the scores.
virtual size_t GetNumQueries()=0
Get the number of queries.
double GetLowestEvalue(const objects::CDense_seg::TScores &scores, double *bit_score)
Returns the lowest score from the list of scores in CDense_seg::TScores.
void PsiBlastAddAncillaryPssmData(objects::CPssmWithParameters &pssm, int gap_open, int gap_extend)
Even though the query sequence and the matrix gap costs are not a product of the PSSM engine,...
static void Pssm(const objects::CPssmWithParameters &pssm, bool require_scores=false)
Perform validation on the PSSM.
static void GetGaplessColumnWeights(const objects::CPssmWithParameters &pssm, vector< double > &retval)
Returns the relative gapless PSSM column weights to pseudocounts for the provided PSSM.
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
int GetGapOpeningCost() const
static void GetInformationContent(const objects::CPssmWithParameters &pssm, vector< double > &retval)
Returns the information content per position of the PSSM.
const char * GetMatrixName() const
static void GetIntervalSizes(const objects::CPssmWithParameters &pssm, vector< int > &retval)
Length of the aligned regions per position of the query sequence.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
const TStr & GetStr(void) const
Get the variant data.
const TFreqRatios & GetFreqRatios(void) const
Get the FreqRatios member data.
const TNumMatchingSeqs & GetNumMatchingSeqs(void) const
Get the NumMatchingSeqs member data.
TNumRows GetNumRows(void) const
Get the NumRows member data.
const TGaplessColumnWeights & GetGaplessColumnWeights(void) const
Get the GaplessColumnWeights member data.
TH GetH(void) const
Get the H member data.
TKappa GetKappa(void) const
Get the Kappa member data.
const TScores & GetScores(void) const
Get the Scores member data.
const TWeightedResFreqsPerPos & GetWeightedResFreqsPerPos(void) const
Get the WeightedResFreqsPerPos member data.
const TIntervalSizes & GetIntervalSizes(void) const
Get the IntervalSizes member data.
const TSigma & GetSigma(void) const
Get the Sigma member data.
const TInformationContent & GetInformationContent(void) const
Get the InformationContent member data.
const TFinalData & GetFinalData(void) const
Get the FinalData member data.
TNumColumns GetNumColumns(void) const
Get the NumColumns member data.
const TIntermediateData & GetIntermediateData(void) const
Get the IntermediateData member data.
TByRow GetByRow(void) const
Get the ByRow member data.
list< double > TFreqRatios
const TResFreqsPerPos & GetResFreqsPerPos(void) const
Get the ResFreqsPerPos member data.
const TPssm & GetPssm(void) const
Get the Pssm member data.
TLambda GetLambda(void) const
Get the Lambda member data.
bool IsReal(void) const
Check if variant Real is selected.
const TValue & GetValue(void) const
Get the Value member data.
vector< CRef< CScore > > TScores
list< CRef< CSeq_align > > Tdata
TReal GetReal(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Defines a concrete strategy to obtain PSSM input data for PSI-BLAST.
Declarations of auxiliary functions/classes for PSI-BLAST.
C++ API for the PSI-BLAST PSSM engine.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Int4 length
Length of sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Structure used for scoring calculations.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4