program_uc = program;
63 return NStr::ToUpper(program_uc) +
" "+ blast::CBlastVersion().Print();
78blast::CReference::EPublication pub,
81 stringreference(
"Reference");
82 if(pub == blast::CReference::eCompAdjustedMatrices) {
83reference +=
" for compositional score matrix adjustment";
84}
else if(pub == blast::CReference::eCompBasedStats) {
85reference +=
" for composition-based statistics";
87reference +=
" starting in round 2";
89}
else if(pub == blast::CReference::eIndexedMegablast) {
90reference +=
" for database indexing";
92reference +=
" for DELTA-BLAST";
100 stringhttpProt =
"https:";
101 if(!config_reg.
Empty()) {
102 if(config_reg.
HasEntry(
"BLASTFMTUTIL",
"PROTOCOL")) {
103httpProt = config_reg.
Get(
"BLASTFMTUTIL",
"PROTOCOL");
106 str<<
"<b><a href=\"" 108<< blast::CReference::GetPubmedUrl(pub)
109<<
"\">"<< reference <<
"</a>:</b>" 111x_WrapOutputLine(
str.str() + blast::CReference::GetString(pub),
116 str<< reference <<
": ";
117x_WrapOutputLine(
str.str() + blast::CReference::GetHTMLFreeString(pub),
125 stringdefinition_line,
135 stringdbString = (html) ?
"<b>Database:</b> ":
"Database: ";
136 str<< dbString << definition_line << endl;
137 if(!(html && with_links)) x_WrapOutputLine(
str.str(),line_len,
out);
173(
constobjects::CPssmWithParameters& pssm_with_params,
179vector<double> info_content, gapless_col_weights, sigma;
180blast::CScorematPssmConverter::GetInformationContent(pssm_with_params,
182blast::CScorematPssmConverter::GetGaplessColumnWeights(pssm_with_params,
183gapless_col_weights);
184blast::CScorematPssmConverter::GetSigma(pssm_with_params, sigma);
188 boolpssm_calculation_done = info_content.empty() ?
false:
true;
190 if(pssm_calculation_done) {
191 out<<
"\nLast position-specific scoring matrix computed, weighted ";
192 out<<
"observed percentages rounded down, information per position, ";
193 out<<
"and relative weight of gapless real matches to pseudocounts\n";
195 out<<
"\nLast position-specific scoring matrix computed\n";
202 const SIZE_TYPEkQueryLength = pssm_with_params.GetPssm().GetQueryLength();
204(
SIZE_TYPE)pssm_with_params.GetPssm().GetNumColumns());
205unique_ptr< TNcbiMatrixInt > pssm
206(blast::CScorematPssmConverter::GetScores(pssm_with_params));
207unique_ptr< TNcbiMatrixDouble > weighted_res_freqs
208(blast::CScorematPssmConverter::
209GetWeightedResidueFrequencies(pssm_with_params));
210vector<int> interval_sizes, num_matching_seqs;
211blast::CScorematPssmConverter::GetIntervalSizes(pssm_with_params,
213blast::CScorematPssmConverter::GetNumMatchingSeqs(pssm_with_params,
224 if(*it > max_score) {
228 if(-*it > max_score) {
235 while(max_score > 0) {
239 intwidth = num_digits + 2;
246 if(pssm_calculation_done) {
254pssm_with_params.GetPssm().GetQuerySequenceData(
query);
255 constvector<char>& query_seq =
query.Get();
260 out<<
"\n"<< setw(5) << (
i+1) <<
" "<<
273 if(pssm_calculation_done) {
286 out<<
" "<< setprecision(2) << info_content[
i] <<
" ";
289 if((num_matching_seqs[
i] > 1) && (query_seq[
i] != kXResidue)) {
290 out<< setprecision(2) << gapless_col_weights[
i];
298ancillary_data->GetUngappedKarlinBlk();
300ancillary_data->GetGappedKarlinBlk();
302ancillary_data->GetPsiUngappedKarlinBlk();
304ancillary_data->GetPsiGappedKarlinBlk();
305 out<<
"\n\n"<< setprecision(4);
306 out<<
" K Lambda\n";
308 out<<
"Standard Ungapped " 309<< ungapped_kbp->
K<<
" " 310<< ungapped_kbp->
Lambda<<
"\n";
313 out<<
"Standard Gapped " 314<< gapped_kbp->
K<<
" " 315<< gapped_kbp->
Lambda<<
"\n";
317 if(psi_ungapped_kbp) {
318 out<<
"PSI Ungapped " 319<< psi_ungapped_kbp->
K<<
" " 320<< psi_ungapped_kbp->
Lambda<<
"\n";
322 if(psi_gapped_kbp) {
324<< psi_gapped_kbp->
K<<
" " 325<< psi_gapped_kbp->
Lambda<<
"\n";
333 const string& db_name,
334 const string& db_title,
341 static const stringkHistSeqalign(
"Hist Seqalign");
342hist_align_obj->
SetType().SetStr(kHistSeqalign);
343hist_align_obj->
AddField(kHistSeqalign,
true);
348 static const stringkBlastType(
"Blast Type");
349blast_type->
SetType().SetStr(kBlastType);
357 static const stringkVDBNames(
"Database Names");
358blast_db_info->
SetType().SetStr(kVDBNames);
359blast_db_info->
AddField( db_name,
true);
364 static const stringkBlastDBTitle(
"Blast Database Title");
365blast_db_info->
SetType().SetStr(kBlastDBTitle);
368blast_db_info->
AddField(
"n/a",
false);
372blast_db_info->
AddField( db_name,
true);
377blast_db_info->
AddField( db_title, is_nucl );
384retval->
SetData().SetAlign();
386retval->
SetData().SetAlign().push_back(*itr);
395 const intkAsciiSize = 256;
396 Resize(kAsciiSize, kAsciiSize, INT_MIN);
400 const intkNumValues =
max(ncols, nrows);
401vector<char> ncbistdaa_values(kNumValues);
402 for(
intindex = 0; index < kNumValues; ++index)
403ncbistdaa_values[index] = (
char) index;
412vector<char> iupacaa_values(kNumValues);
413 for(
intindex = 0; index < kNumValues; ++index)
414iupacaa_values[index] = iupacaa_seq.
GetIupacaa().
Get()[index];
417 for(
unsigned int row= 0;
row< nrows; ++
row) {
418 for(
unsigned intcol = 0; col < ncols; ++col) {
419 if(iupacaa_values[
row] >= 0 && iupacaa_values[col] >= 0) {
420(*this)((
int)iupacaa_values[
row], (
int)iupacaa_values[col]) =
447 constncbi::TMaskedQueryRegions& mask_info,
448align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
452vector<CRange<int> > segs_v;
453 for(
intindex = 0; index < kNumSegs; ++index) {
456segs_v.push_back(range);
459vector<CRange<int> > masks_v;
460 intaln_stop =
static_cast<int>(query_seq.size()) - 1;
461 ITERATE(ncbi::TMaskedQueryRegions, mask_iter, mask_info) {
462 if((*mask_iter)->GetFrame() != query_frame)
466(*mask_iter)->GetInterval().GetFrom());
469(*mask_iter)->GetInterval().GetTo());
471 if(query_frame < 0) {
480masks_v.push_back(range);
488 for(
intseg_index = 0;
489seg_index < (
int) segs_v.size() && mask_index < (
int) masks_v.size();
491 if(segs_v[seg_index].
Empty())
493 intseg_start = segs_v[seg_index].GetFrom();
494 intseg_stop = segs_v[seg_index].GetTo();
496 while(mask_index < (
int) masks_v.size() &&
497(mask_pos =
max(seg_start, masks_v[mask_index].GetFrom()))
499 intmask_stop =
min(seg_stop, masks_v[mask_index].GetTo());
501 for( ; mask_pos <= mask_stop; ++mask_pos) {
502 if( query_seq[mask_pos] ==
'-')
continue;
504query_seq[mask_pos] =
'X';
506query_seq[mask_pos]=
'N';
508query_seq[mask_pos] =
509 tolower((
unsigned char)query_seq[mask_pos]);
514 if(mask_pos < seg_stop)
541 constobjects::CDense_seg& ds,
542objects::CScope& scope,
553 string& masked_query,
555 constobjects::CDense_seg & ds,
556objects::CScope & scope,
559 constncbi::TMaskedQueryRegions& mask_info,
560align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
568masked_query =
query;
569 s_MaskQuerySeq(aln_vec, masked_query, mask_info, mask_char, query_frame);
578 if(!org_align_set.
IsSet() || org_align_set.
Get().empty()) {
579 _TRACE(
"Empty seq_align_set");
584 unsigned intcheck_type = score_type;
585 if(org_align_set.
Get().front()->GetNamedScore(
"seq_percent_coverage", dont_care)) {
588 if(org_align_set.
Get().front()->GetNamedScore(
"uniq_seq_percent_coverage", dont_care)) {
608 _TRACE(
"Invalid Query Length");
613list<CRef<CSeq_align> > & tmp_align_list = tmp_align_set.
Set();
614list<CRef<CSeq_align> > & org_align_list = org_align_set.
Set();
616list<CRef<CSeq_align> >::iterator left_it = org_align_list.begin();
617list<CRef<CSeq_align> >::iterator right_it = org_align_list.begin();
619 while(left_it != org_align_list.end())
621 const CSeq_id& cur_id = (*left_it)->GetSeq_id(1);
624 for(; right_it != org_align_list.end(); ++right_it)
626 const CSeq_id&
id= (*right_it)->GetSeq_id(1);
627 if(!
id.Match(cur_id))
631tmp_align_list.assign(left_it, right_it);
633 intmaster_coverage = align_format::CAlignFormatUtil::GetMasterCoverage(tmp_align_set);
637 doublesubj_coverage = 100.0 * (double) master_coverage/ (
double) query_len;
640 if(subj_coverage < 99)
643(*left_it)->SetNamedScore (
"seq_percent_coverage", (
int) subj_coverage);
647 intuniq_coverage = align_format::CAlignFormatUtil::GetUniqSeqCoverage(tmp_align_set);
650 doubleuniq_subj_coverage = 100.0 * (double) uniq_coverage/ (
double) query_len;
653 if(uniq_subj_coverage < 99)
654uniq_subj_coverage +=0.5;
656(*left_it)->SetNamedScore (
"uniq_seq_percent_coverage", (
int) uniq_subj_coverage);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
BLAST formatter utilities.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
#define BLAST_SCORE_MIN
minimum allowed score (for one letter comparison).
Definitions of special type used in BLAST.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eDeltaBlast
Delta Blast.
CNcbiMatrix< int > TNcbiMatrixInt
static void s_MaskQuerySeq(CAlnVec &alnvec, string &query_seq, const ncbi::TMaskedQueryRegions &mask_info, align_format::CDisplaySeqalign::SeqLocCharOption mask_char, int query_frame)
Masks a query sequence string corresponding to an alignment, given a list of mask locations.
CNcbiMatrix< double > TNcbiMatrixDouble
static int RESIDUE_ORDER[]
Standard order of letters according to S.
static void s_GetQueryAndSubjectStrings(CAlnVec &aln_vec, string &query, string &subject, int master_gen_code, int slave_gen_code)
TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
TSeqPos GetAlnStop(TNumseg seg) const
TSeqPos GetAlnStart(TNumseg seg) const
TNumseg GetNumSegs(void) const
void SetGapChar(TResidue gap_char)
string & GetWholeAlnSeqString(TNumrow row, string &buffer, TSeqPosList *insert_aln_starts=0, TSeqPosList *insert_starts=0, TSeqPosList *insert_lens=0, unsigned int scrn_width=0, TSeqPosList *scrn_lefts=0, TSeqPosList *scrn_rights=0) const
void SetAaCoding(TCoding coding)
void SetGenCode(int gen_code, TNumrow row=-1)
TSeqPos GetLength(void) const
bool IsSetLength(void) const
void Resize(size_t i, size_t j, int val=int())
resize this matrix, filling the empty cells with a known value
void AddUserObject(CUser_object &obj)
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
std::ofstream out("events_result.xml")
main entry point for tests
bool Empty(const CNcbiOstrstream &src)
static const char * str(char *buf, int n)
EBlastProgramType EProgramToEBlastProgramType(EProgram p)
Convert EProgram to EBlastProgramType.
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
string EProgramToTaskName(EProgram p)
Convert a EProgram enumeration value to a task name (as those used in the BLAST command line binaries...
const char NCBISTDAA_TO_AMINOACID[]
Translates between ncbieaa and ncbistdaa.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SBlastXMLIncremental()
Default ctor()
static void PrintAsciiPssm(const objects::CPssmWithParameters &pssm, CConstRef< blast::CBlastAncillaryData > ancillary_data, CNcbiOstream &out)
Prints the PSSM in ASCII format (as in blastpgp's -Q option)
static void BlastPrintVersionInfo(const string program, bool html, CNcbiOstream &out)
Print out blast engine version.
static void PrintDbInformation(size_t line_len, string definition_line, int nNumSeqs, Uint8 nTotalLength, bool html, bool with_links, CNcbiOstream &out)
static void BlastPrintReference(bool html, size_t line_len, CNcbiOstream &out, blast::CReference::EPublication publication=blast::CReference::eGappedBlast, bool is_psiblast=false)
Print out blast reference.
static void InsertSubjectScores(objects::CSeq_align_set &org_align_set, const objects::CBioseq_Handle &query_handle, TSeqRange query_range=TSeqRange(), ESubjectScores score_type=eQueryCovPerSubj)
string m_SerialXmlEnd
tag to be printed at end.
static CRef< objects::CSeq_annot > CreateSeqAnnotFromSeqAlignSet(const objects::CSeq_align_set &alnset, blast::EProgram program, const string &db_name, const string &db_title, bool vdb_search=false)
static void GetWholeAlnSeqStrings(string &query, string &subject, const objects::CDense_seg &ds, objects::CScope &scope, int master_gen_code, int slave_gen_code)
static string BlastGetVersion(const string program)
Returns the version and release date, e.g.
CBlastFormattingMatrix(int **data, unsigned int nrows, unsigned int ncols)
Constructor - allocates the matrix with appropriate size and populates with the values retrieved from...
const TPrim & Get(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty â not pointing to any object which means having a null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty â pointing to an object and has a non-null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
position_type GetLength(void) const
bool NotEmpty(void) const
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
virtual bool HasEntry(const string §ion, const string &name=kEmptyStr, TFlags flags=0) const
bool Empty(TFlags flags=fAllLayers) const
Verify if Registry is empty.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string & ToUpper(string &str)
Convert string to upper case â string& version.
static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)
Convert UInt8 to string.
@ fWithCommas
Use commas as thousands separator.
TFrom GetFrom(void) const
Get the From member data.
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
void SetData(TData &value)
Assign a value to Data data member.
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Iupacaa
IUPAC 1 letter amino acid code.
unsigned int
A callback function used to compare two keys in a database.
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
const GenericPointer< typename T::ValueType > T2 value
#define DIM(A)
dimension of an array.
C++ API for the PSI-BLAST PSSM engine.
Defines BLAST database access classes.
#define row(bind, expected)
Structure to hold the Karlin-Altschul parameters.
double K
K value used in statistics.
double Lambda
Lambda value used in statistics.
Auxiliary structure used for sorting CRange<int> objects in increasing order of starting positions.
bool operator()(CRange< int > const &range1, CRange< int > const &range2)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4