(
intseq_size);
150 for(
int i= 0;
i<
size; ++
i)
183 virtual void Init();
253 const string& filename);
268 bool x_ValidateCd(
constlist<double>& freqs,
constlist<double>& observ,
unsigned intalphabet_size);
270list<Int4> & FreqOffsets, list<Int4> & ObsrOffsets,
Int4CurrFreqOffset,
Int4CurrObsrOffset);
273 void x_UpdateDelta(CRPS_DbInfo & rpsDbInfo, vector<string> & smpFilenames);
316m_WordDefaultScoreThreshold(0), m_OutDbName(
kEmptyStr),
317m_OutDbType(
kEmptyStr), m_CreateIndexFile(
false),m_GapOpenPenalty(0),
318m_GapExtPenalty(0), m_PssmScaleFactor(0),m_Matrix(
kEmptyStr), m_op_mode(op_invalid),
319m_binary_scoremat(
false), m_MaxSmpFilesPerVol(0), m_NumOfVols(0), m_DbVer(
eBDB_Version5),
321m_ObsrvThreshold(0), m_ExcludeInvalid(
false),
322m_UseModelThreshold(
true)
358 stringwcounts_str =
m_VolNames[
i] +
".wcounts";
386arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
387 "Application to create databases for rpsblast, cobalt and deltablast, version " 390 stringdflt(
"Default = input file name provided to -");
393arg_desc->SetCurrentGroup(
"Input options");
395 "Input file that contains a list of smp files (delimited by space, tab or newline)",
399 "Scoremats are in binary format",
402arg_desc->SetCurrentGroup(
"Configuration options");
403arg_desc->AddOptionalKey(
kArgDbTitle,
"database_title",
404 "Title for database\n"+ dflt,
408 "Minimum word score to add a word to the lookup table",
413arg_desc->SetCurrentGroup(
"Output options");
414arg_desc->AddOptionalKey(
kOutDbName,
"database_name",
415 "Name of database to be created\n"+
418arg_desc->AddDefaultKey(
"blastdb_version",
"version",
419 "Version of BLAST database to be created",
422arg_desc->SetConstraint(
"blastdb_version",
426 "Maximum number of SMP files per DB volume",
429arg_desc->AddDefaultKey(
kOutDbType,
"output_db_type",
430 "Output database type: cobalt, delta, rps",
434arg_desc->AddDefaultKey(
kOutIndexFile,
"create_index_files",
435 "Create Index Files",
438arg_desc->SetCurrentGroup(
"Used only if scoremat files do not contain PSSM scores, ignored otherwise.");
439arg_desc->AddOptionalKey(
kArgGapOpen,
"gap_open_penalty",
440 "Cost to open a gap",
443arg_desc->AddOptionalKey(
kArgGapExtend,
"gap_extend_penalty",
444 "Cost to extend a gap, ",
448 "Pssm Scale factor ",
453 "Scoring matrix name",
460arg_desc->SetCurrentGroup(
"Delta Blast Options");
461arg_desc->AddDefaultKey(
kObsrThreshold,
"observations_threshold",
"Exclude domains with " 462 "with maximum number of independent observations " 466arg_desc->AddDefaultKey(
kExcludeInvalid,
"exclude_invalid",
"Exclude domains that do " 467 "not pass validation test",
470arg_desc->SetCurrentGroup(
"Taxonomy options");
471arg_desc->AddOptionalKey(
"taxid",
"TaxID",
472 "Taxonomy ID to assign to all sequences",
477arg_desc->AddOptionalKey(
"taxid_map",
"TaxIDMapFile",
478 "Text file mapping sequence IDs to taxonomy IDs.\n" 479 "Format:<SequenceId> <TaxonomyId><newline>",
536 intdefault_gap_open = 0;
537 intdefault_gap_extend = 0;
573}
else if(args[
"taxid_map"].
HasValue()) {
592 if(
tmp.size() > 0)
604 const string& filename)
614 stringerr = filename +
" contains no bioseq data";
620 stringerr = filename +
" contains no info on num of columns or num of rows";
626 stringerr = filename +
" 's num of columns does not match size of sequence";
633 stringerr = filename +
" has invalid alphabet size";
639 stringerr = filename +
" contains no frequency ratios.\n"+
640 "Please use a recent version of psiblast to regenerate PSSM files\n";
649 stringerr = filename +
" contains no core block to build cobalt database";
668 stringerr = filename +
" contains no pssm or residue frequencies";
674 stringerr = filename +
" contains no scoremat";
691 boolisRemoved =
false;
692 static const char* mp_ext[]={
".rps",
".loo",
".aux",
".freq",
".blocks",
".wcounts",
".obsr",
NULL};
693 for(
const char** mp=mp_ext; *mp !=
NULL; mp++) {
697 if(
CFile(fname).Remove()) {
701 unsigned intindex = 0;
704 while(
CFile(vfname).Remove()) {
732 stringrps_str = rpsDbInfo.
db_name+
".rps";
737 stringlookup_str = rpsDbInfo.
db_name+
".loo";
742 stringaux_str = rpsDbInfo.
db_name+
".aux";
743rpsDbInfo.
aux_file.open(aux_str.c_str());
744 if(!rpsDbInfo.
aux_file.is_open())
747 stringfreq_str = rpsDbInfo.
db_name+
".freq";
762rpsDbInfo.
pssm_file.write((
char*) &num_files,
sizeof(
Int4));
763rpsDbInfo.
freq_file.write((
char*) &num_files,
sizeof(
Int4));
764 for(
Int4 i= 0;
i<= num_files;
i++)
772 stringblocks_str = rpsDbInfo.
db_name+
".blocks";
858vector <char> query_v = query_stdaa.
Get();
860 if((
Int4) (query_v.size()) != seq_size)
866 for(
unsigned int i= 0;
i< query_v.size();
i++)
872matrix_name.c_str(), rpsDbInfo.
gap_open,
882 i.SetH() = o.
GetH();
929 doublethreshold = rpsDbInfo.
scale_factor* wordScoreThreshold;
960rpsDbInfo.
aux_file<< scientific << 0.0 <<
"\n";
961rpsDbInfo.
aux_file<< scientific << 0.0 <<
"\n";
975list<CRef<CCoreBlock> >::const_iterator itr = block_list.begin();
979 while(itr != block_list.end())
993 if(descr_list.size() > 0)
1002seq_id_str = accession;
1028list<double>::const_iterator itr_fr = freq_ratios.begin();
1029rpsDbInfo.
freq_file.seekp(0, ios_base::end);
1032 for(
i= 0;
i< seq_size;
i++) {
1033 for(j = 0; j < alphabet_size; j++) {
1034 if(itr_fr == freq_ratios.end())
1048 for(
i= 0;
i< seq_size;
i++) {
1056memset(
row, 0,
sizeof(
row));
1059rpsDbInfo.
freq_file.seekp( 8 + (seq_index) *
sizeof(
Int4), ios_base::beg);
1086 if(
NULL== posMatrix)
1121 for(
i= 0;
i< seq_size;
i++) {
1122 for(j = 0; j < alphabet_size; j++) {
1123 if(score_list_itr == score_list_end)
1125posMatrix[
i][j] = *score_list_itr;
1128 if(j < alphabet_size)
1136 for(j = 0; j < alphabet_size; j++) {
1137 for(
i= 0;
i< seq_size;
i++) {
1138 if(score_list_itr == score_list_end)
1140posMatrix[
i][j] = *score_list_itr;
1146 if(j == alphabet_size) {
1148 for(
i= 0;
i< seq_size;
i++) {
1155 if(
i< seq_size || j < alphabet_size)
1158 if(score_list_itr != score_list_end)
1170rpsDbInfo.
pssm_file.seekp(0, ios_base::end);
1171 for(
i= 0;
i< seq_size + 1;
i++) {
1180rpsDbInfo.
pssm_file.seekp( 8 + (seq_index) *
sizeof(
Int4), ios_base::beg);
1215 Int4cursor, old_cursor;
1219memset(&header, 0,
sizeof(header));
1224 for(index = cursor = 0; index < lut->
backbone_size; index++) {
1252 for(
i= 1;
i< cell->
num_used;
i++, cursor++) {
1263cursor *
sizeof(
Int4);
1267rpsDbInfo.
lookup_file.write((
const char*)&header,
sizeof(header));
1275memset(&empty_cell, 0,
sizeof(empty_cell));
1277rpsDbInfo.
lookup_file.write((
const char*)&empty_cell,
sizeof(empty_cell));
1323defline->SetSeqid() = bio.
GetId();
1324defline_set->
Set().push_back(defline);
1332*
m_LogFile<<
"Deleted existing BLAST database with identical name."<< endl;
1335 intnum_smps = smpFilenames.size();
1345vector<string>::iterator
b= smpFilenames.begin();
1346vector<string>::iterator
r=
b+ num_seqs;
1348vector<string> vol_smps(
b,
r);
1352 if(residue_seqs > 0) {
1374 for(
intseq_index=0; seq_index < rpsDbInfo.
num_seqs; seq_index++)
1376 stringfilename = smps[seq_index];
1380 stringerr = filename +
" does not exists";
1401 stringerr = filename +
" contains invalid scoremat";
1420 for(
const auto& it: bioseq.
GetDescr().
Get()) {
1422 TTaxIdtaxid = it->GetOrg().GetTaxId();
1478rpsDbInfo.
aux_file<< seq_size <<
"\n";
1499ostr.write((
char*)&(*it),
sizeof(
Int4));
1507ostr.write((
char*)&(*it),
sizeof(
Uint4));
1514vector<string> deltaList;
1516 for(
unsigned intseq_index=0; seq_index < smpFilenames.size(); seq_index++)
1518 stringfilename = smpFilenames[seq_index];
1522 stringerr = filename +
" does not exists";
1543 stringerr = filename +
" contains invalid scoremat";
1551 stringerr = filename +
" contains no weighted residue frequencies for building delta database";
1557 stringerr = filename +
" contains no observations information for building delta database";
1563deltaList.push_back(filename);
1577list<Int4> FreqOffsets;
1578list<Int4> ObsrOffsets;
1579 Int4CurrFreqOffset = 0;
1580 Int4CurrObsrOffset= 0;
1582 for(
unsigned intseq_index=0; seq_index < smpFilenames.size(); seq_index++)
1584 stringfilename = smpFilenames[seq_index];
1608list<double> modify_freqs;
1613vector<double>
tmp(orig_freqs.size());
1614list<double>::const_iterator f_itr = orig_freqs.begin();
1616 for(
int i= 0;
i< alphabet_size;
i++)
1618 for(
intj = 0; j < seq_size; j++)
1620 tmp[
i+ j*alphabet_size] = *f_itr;
1624 copy(
tmp.begin(),
tmp.end(), modify_freqs.begin());
1630 if(0 == modify_freqs.size())
1631 copy(orig_freqs.begin(), orig_freqs.end(), modify_freqs.begin());
1633list<double>::iterator p_itr = modify_freqs.begin();
1635 for(
intj=0; j < seq_size; j++)
1637 for(
int i=0;
i< alphabet_size;
i++)
1639 if(modify_freqs.end() == p_itr)
1645modify_freqs.insert(p_itr, (
BLASTAA_SIZE-alphabet_size), 0);
1649 constlist<double> & freqs = (modify_freqs.size()? modify_freqs:orig_freqs );
1652ObsrOffsets.push_back(CurrObsrOffset);
1654list<Uint4> ObsrBuff;
1657 unsigned intnum_obsr_columns = 0;
1658list<double>::const_iterator obsr_it = obsr.begin();
1661 doublecurrent = *obsr_it;
1665 while(obsr_it != obsr.end() &&
fabs(*obsr_it - current) < 1e-4)
1675ObsrBuff.push_back(num);
1677 while(obsr_it != obsr.end());
1679 Uint4num_weighted_counts = 0;
1684list<Uint4> FreqBuff;
1686 ITERATE(list<double>, it, freqs)
1689num_weighted_counts++;
1692 if(num_obsr_columns != num_weighted_counts /
BLASTAA_SIZE)
1694 stringerr =
"Number of frequencies and observations columns do not match in "+ filename;
1699 unsigned intpadded_size = FreqBuff.size() +
BLASTAA_SIZE;
1700FreqBuff.resize(padded_size, 0);
1702CurrFreqOffset += FreqBuff.size();
1703CurrObsrOffset += ObsrBuff.size();
1709tmp_obsr_buff.flush();
1710tmp_freq_buff.flush();
1711 x_WrapUpDelta(rpsDbInfo, tmp_obsr_file, tmp_freq_file, FreqOffsets, ObsrOffsets, CurrFreqOffset, CurrObsrOffset);
1716 constlist<double>& observ,
1717 unsigned intalphabet_size)
1720 if(freqs.size() / alphabet_size != observ.size())
1722 stringerr =
"Number of frequency and observations columns do not match";
1726 ITERATE(list<double>, it, freqs)
1728 unsigned intresidue = 0;
1730 while(residue < alphabet_size - 1)
1742 ITERATE(list<double>, it, observ)
1761list<double> modify_freqs;
1766vector<double>
tmp(orig_freqs.size());
1767list<double>::const_iterator f_itr = orig_freqs.begin();
1769 for(
int i= 0;
i< alphabet_size;
i++)
1771 for(
intj = 0; j < seq_size; j++)
1773 tmp[
i+ j*alphabet_size] = *f_itr;
1777 copy(
tmp.begin(),
tmp.end(), modify_freqs.begin());
1783 if(0 == modify_freqs.size())
1784 copy(orig_freqs.begin(), orig_freqs.end(), modify_freqs.begin());
1786list<double>::iterator p_itr = modify_freqs.begin();
1788 for(
intj=0; j < seq_size; j++)
1790 for(
int i=0;
i< alphabet_size;
i++)
1792 if(modify_freqs.end() == p_itr)
1798modify_freqs.insert(p_itr, (
BLASTAA_SIZE-alphabet_size), 0);
1802 constlist<double> & freqs = (modify_freqs.size()? modify_freqs:orig_freqs );
1803 doublemax_obsr = *max_element(obsr.begin(), obsr.end()) + 1.0;
1807 " was excluded: due to too few independent observations\n";
1814 " was excluded: it conatins an invalid CD \n";
1823list<Int4> & FreqOffsets, list<Int4> & ObsrOffsets,
Int4CurrFreqOffset,
Int4CurrObsrOffset)
1826ObsrOffsets.push_back(CurrObsrOffset);
1828 stringwcounts_str = rpsDbInfo.
db_name+
".wcounts";
1830 if(!wcounts_file.is_open())
1833 stringobsr_str = rpsDbInfo.
db_name+
".obsr";
1835 if(!obsr_file.is_open())
1843wcounts_file.write((
char*)&magic_number,
sizeof(
Int4));
1844obsr_file.write((
char*)&magic_number,
sizeof(
Int4));
1847 Int4num_wcounts_records = FreqOffsets.size() -1;
1848 Int4num_obsr_records = ObsrOffsets.size() -1;
1849wcounts_file.write((
char*)&num_wcounts_records,
sizeof(
Int4));
1850obsr_file.write((
char*)&num_obsr_records,
sizeof(
Int4));
1853wcounts_file.flush();
1854wcounts_file << tmp_freq_buff.rdbuf();
1855wcounts_file.flush();
1856wcounts_file.close();
1860obsr_file << tmp_obsr_buff.rdbuf();
1882 catch(
constblast::CInputException& e) {
1890 catch(
constblast::CBlastException& e) {
1911 if(args[
"dbtype"].
HasValue()) {
1920 #ifndef SKIP_DOXYGEN_PROCESSING 1921 int main(
intargc,
const char* argv[]
)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares singleton objects to store the version and reference for the BLAST engine.
Routines for creating protein BLAST lookup tables.
BlastAaLookupTable * BlastAaLookupTableDestruct(BlastAaLookupTable *lookup)
Free the lookup table.
void BlastAaLookupIndexQuery(BlastAaLookupTable *lookup, Int4 **matrix, BLAST_SequenceBlk *query, BlastSeqLoc *unmasked_regions, Int4 query_bias)
Index a protein query.
struct RPSBackboneCell RPSBackboneCell
structure defining one cell of the RPS lookup table
#define RPS_HITS_PER_CELL
maximum number of hits in an RPS backbone cell; this may be redundant (have the same value as AA_HITS...
Int4 BlastAaLookupFinalize(BlastAaLookupTable *lookup, EBoneType bone_type)
Pack the data structures comprising a protein lookup table into their final form.
Int4 BlastAaLookupTableNew(const LookupTableOptions *opt, BlastAaLookupTable **lut)
Create a new protein lookup table.
#define BLAST_INPUT_ERROR
Command line binary exit code: error in input query/options.
#define BLAST_UNKNOWN_ERROR
Command line binary exit code: unknown error.
#define BLAST_DATABASE_ERROR
Command line binary exit code: error in database/subject.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Interface for converting sources of sequence data into blast sequence input.
The structures and functions in blast_options.
Int2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions *options, EBlastProgramType program, const char *filter_string, Uint1 strand_option)
Fill non-default contents of the QuerySetUpOptions.
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
#define BLAST_WORDSIZE_PROT
length of word to trigger an extension.
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
#define FREQ_RATIO_SCALE
header for RPS blast frequency ratios ('.freq') file
#define RPS_MAGIC_NUM_28
Version number for 28-letter alphabet.
Int2 BLAST_GetProteinGapExistenceExtendParams(const char *matrixName, Int4 *gap_existence, Int4 *gap_extension)
Extract the recommended gap existence and extension values.
#define BLAST_SCORE_MAX
maximum allowed score (for one letter comparison).
Code to build a database given various sources of sequence data.
Class to constrain the values of an argument to those greater than or equal to the value specified in...
const CSeq_id * GetFirstId() const
Defines BLAST error codes (user errors included)
void AddParam(EUsageParams p, int val)
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
static void CreateDirectories(const string &dbname)
Create Directory for blast db.
Defines user input exceptions.
void Create(int seq_size)
unsigned int GetSize(void)
CMakeDbPosMatrix pos_matrix
CNcbiOfstream lookup_file
QuerySetUpOptions * query_options
CNcbiOfstream blocks_file
BlastAaLookupTable * lookup
CRef< CWriteDB > output_db
LookupTableOptions * lookup_options
CMakeProfileDBApp(void)
@inheritDoc
void x_AddCmdOptions(void)
virtual void Init()
@inheritDoc
CheckInputScoremat_RV x_CheckInputScoremat(const CPssmWithParameters &pssm_w_parameters, const string &filename)
CRef< CTaxIdSet > m_Taxids
CNcbiIstream * m_InPssmList
void x_RPSUpdateLookup(CRPS_DbInfo &rpsDbInfo, Int4 seq_size)
vector< string > x_CreateDeltaList(void)
void x_WrapUpDelta(CRPS_DbInfo &rpsDbInfo, CTmpFile &tmp_obsr_file, CTmpFile &tmp_freq_file, list< Int4 > &FreqOffsets, list< Int4 > &ObsrOffsets, Int4 CurrFreqOffset, Int4 CurrObsrOffset)
void x_RPSUpdateStatistics(CRPS_DbInfo &rpsDbInfo, CPssmWithParameters &seq, Int4 seq_size)
virtual int Run()
@inheritDoc
void x_CreateAliasFile(void)
void x_FillInRPSDbParameters(CRPS_DbInfo &rpsDbInfo, CPssmWithParameters &pssm_p)
void x_InitProgramParameters(void)
void x_InitRPSDbInfo(CRPS_DbInfo &rpsDBInfo, Int4 vol, Int4 num_files)
void x_RPS_DbClose(CRPS_DbInfo &rpsDbInfo)
bool x_CheckDelta(const CPssm &pssm, Int4 seq_size, const string &filename)
void x_RPSAddFirstSequence(CRPS_DbInfo &rpsDbInfo, CPssmWithParameters &pssm_w_parameters, bool freq_only)
void x_UpdateRPSDbInfo(CRPS_DbInfo &rpsDbInfo, const CPssmWithParameters &pssm_p)
void x_UpdateDelta(CRPS_DbInfo &rpsDbInfo, vector< string > &smpFilenames)
double m_WordDefaultScoreThreshold
void x_RPSUpdatePSSM(CRPS_DbInfo &rpsDbInfo, const CPssm &pssm, Int4 seq_index, Int4 seq_size)
void x_InitOutputDb(CRPS_DbInfo &rpsDBInfo)
void x_SetupArgDescriptions(void)
CBlastUsageReport m_UsageReport
vector< string > m_VolNames
bool x_ValidateCd(const list< double > &freqs, const list< double > &observ, unsigned int alphabet_size)
void x_UpdateFreqRatios(CRPS_DbInfo &rpsDbInfo, const CPssmWithParameters &pssm_p, Int4 seq_index, Int4 seq_size)
vector< string > x_GetSMPFilenames(void)
void x_UpdateCobalt(CRPS_DbInfo &rpsDbInfo, const CPssmWithParameters &pssm_p, Int4 seq_size)
void x_MakeVol(Int4 vol, vector< string > &smps)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Implements the interface to retrieve data for the last 2 stages of the PSSM creation.
Computes a PSSM as specified in PSI-BLAST.
void GetQuerySequenceData(CNCBIstdaa &sequence) const
Retrieve the query sequence data in ncbistdaa format.
SIZE_TYPE GetQueryLength() const
Return the query length or 0 if no query is available.
void GetString(string &s) const
Return the data by assigning it to a string.
void FixTaxId(CRef< objects::CBlast_def_line_set > deflines)
Check that each defline has the specified taxid; if not, replace the defline and set the taxid.
void AddTaxId(const objects::CSeq_id &seqid, const TTaxId &taxid)
void SetMappingFromFile(CNcbiIstream &f)
static string MakeShortName(const string &base, int index)
Construct the short name for a volume.
void SetMaxFileSize(Uint8 sz)
Set maximum size for output files.
@ eProtein
Protein database.
void AddSequence(const CBioseq &bs)
Add a sequence as a CBioseq.
EIndexType
Whether and what kind of indices to build.
@ eDefault
Like eFullIndex but also build a numeric Trace ID index.
@ eNoIndex
Build a database without any indices.
static CRef< CBlast_def_line_set > ExtractBioseqDeflines(const CBioseq &bs, bool parse_ids=true, bool long_ids=false, bool scan_bioseq_4_cfastareader_usrobj=false)
Extract Deflines From Bioseq.
void SetDeflines(const CBlast_def_line_set &deflines)
Set the deflines to be used for the sequence.
void Close()
Close the Database.
Constant declarations for command line arguments for BLAST programs.
const string kArgMatrixName
Argument for scoring matrix.
const string kArgDbTitle
Title for the BLAST database.
const string kArgGapExtend
Argument to select the gap extending penalty.
const string kArgGapOpen
Argument to select the gap opening penalty.
const string kArgWordScoreThreshold
Argument to specify the minimum word score such that the word is added to the lookup table.
void Print(const CCompactSAMApplication::AlignInfo &ai)
std::ofstream out("events_result.xml")
main entry point for tests
CRef< objects::CPssmWithParameters > Run()
Runs the PSSM engine to compute the PSSM.
#define BLASTAA_SIZE
Size of aminoacid alphabet.
static CNcbiMatrix< double > * GetFreqRatios(const objects::CPssmWithParameters &pssm)
Returns matrix of BLASTAA_SIZE by query size (dimensions are opposite of what is stored in the BlastS...
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
CVersionInfo GetVersion(void) const
Get the program version information.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
#define TAX_ID_FROM(T, value)
@ fHideXmlHelp
Hide XML help description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
@ eDouble
Convertible into a floating point number (double)
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
const string & GetFileName(void) const
Return used file name (generated or given in the constructor).
#define MSerial_AsnBinary
const TPrim & Get(void) const
#define MSerial_AsnText
I/O stream manipulators â.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Start(void)
Start the timer.
Tdata & Set(void)
Assign a value to data member.
const TFreqRatios & GetFreqRatios(void) const
Get the FreqRatios member data.
const TNumIndeptObsr & GetNumIndeptObsr(void) const
Get the NumIndeptObsr member data.
const TMatrixName & GetMatrixName(void) const
Get the MatrixName member data.
const TQuery & GetQuery(void) const
Get the Query member data.
TNumRows GetNumRows(void) const
Get the NumRows member data.
void SetParams(TParams &value)
Assign a value to Params data member.
bool IsSetFinalData(void) const
Final representation for the PSSM Check if a value has been assigned to FinalData data member.
bool IsSetStop(void) const
end of block on query Check if a value has been assigned to Stop data member.
TH GetH(void) const
Get the H member data.
TKappa GetKappa(void) const
Get the Kappa member data.
const TScores & GetScores(void) const
Get the Scores member data.
const TWeightedResFreqsPerPos & GetWeightedResFreqsPerPos(void) const
Get the WeightedResFreqsPerPos member data.
bool IsSetGapOpen(void) const
gap opening penalty corresponding to the matrix above Check if a value has been assigned to GapOpen d...
TGapExtend GetGapExtend(void) const
Get the GapExtend member data.
TWordScoreThreshold GetWordScoreThreshold(void) const
Get the WordScoreThreshold member data.
TScalingFactor GetScalingFactor(void) const
Get the ScalingFactor member data.
const TBlocks & GetBlocks(void) const
Get the Blocks member data.
bool IsSetStart(void) const
begin of block on query Check if a value has been assigned to Start data member.
bool IsSetWordScoreThreshold(void) const
Word score threshold Check if a value has been assigned to WordScoreThreshold data member.
bool IsSetScalingFactor(void) const
scaling factor used to obtain more precision when building the PSSM.
bool IsSetFreqRatios(void) const
PSSM's frequency ratios Check if a value has been assigned to FreqRatios data member.
TStop GetStop(void) const
Get the Stop member data.
void SetMatrixName(const TMatrixName &value)
Assign a value to MatrixName data member.
bool IsSetIntermediateData(void) const
both intermediateData and finalData can be provided, but at least one of them must be provided.
const TFinalData & GetFinalData(void) const
Get the FinalData member data.
bool IsSetWeightedResFreqsPerPos(void) const
Weighted observed residue frequencies per position of the PSSM.
bool IsSetRpsdbparams(void) const
data needed by formatrpsdb to create RPS-BLAST databases.
void SetPssm(TPssm &value)
Assign a value to Pssm data member.
TNumColumns GetNumColumns(void) const
Get the NumColumns member data.
const TConstraints & GetConstraints(void) const
Get the Constraints member data.
bool IsSetMatrixName(void) const
name of the underlying score matrix whose frequency ratios were used in PSSM construction (e....
bool IsSetNumRows(void) const
The dimensions of the matrix are returned so the client can verify that all data was received.
void SetFinalData(TFinalData &value)
Assign a value to FinalData data member.
TStart GetStart(void) const
Get the Start member data.
bool IsSetQuery(void) const
PSSM representative sequence (master) Check if a value has been assigned to Query data member.
TGapOpen GetGapOpen(void) const
Get the GapOpen member data.
bool IsSetNumIndeptObsr(void) const
Number of independent observations per position of the PSSM NOTE: this is needed for building CDD dat...
bool IsSetConstraints(void) const
alignment constraints needed by sequence-structure threader and other global or local block-alignment...
bool IsSetGapExtend(void) const
gap extension penalty corresponding to the matrix above Check if a value has been assigned to GapExte...
bool IsSetNumColumns(void) const
number of columns Check if a value has been assigned to NumColumns data member.
const TIntermediateData & GetIntermediateData(void) const
Get the IntermediateData member data.
TByRow GetByRow(void) const
Get the ByRow member data.
void SetGapOpen(TGapOpen value)
Assign a value to GapOpen data member.
const TParams & GetParams(void) const
Get the Params member data.
bool IsSetBlocks(void) const
nblocks locations Check if a value has been assigned to Blocks data member.
bool IsSetPssm(void) const
This field is applicable to PSI-BLAST and formatrpsdb.
void SetGapExtend(TGapExtend value)
Assign a value to GapExtend data member.
const TPssm & GetPssm(void) const
Get the Pssm member data.
bool IsSetParams(void) const
This field's rpsdbparams is used to specify the values of options for processing by formatrpsdb.
const TRpsdbparams & GetRpsdbparams(void) const
Get the Rpsdbparams member data.
TLambda GetLambda(void) const
Get the Lambda member data.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
const TTitle & GetTitle(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
const Tdata & Get(void) const
Get the member data.
bool CanGetDescr(void) const
Check if it is safe to call GetDescr method.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
bool IsTitle(void) const
Check if variant Title is selected.
const TDescr & GetDescr(void) const
Get the Descr member data.
unsigned int
A callback function used to compare two keys in a database.
if(yy_accept[yy_current_state])
static void s_WriteInt4List(CNcbiOfstream &ostr, const list< Int4 > &l)
static const string kDefaultMatrix(kMatrixBLOSUM62)
static const string kOutDbName("out")
static CRef< CBlast_def_line_set > s_GenerateBlastDefline(const CBioseq &bio)
static const string kMatrixBLOSUM80
static const string kOutDbType("dbtype")
static const string kExcludeInvalid("exclude_invalid")
#define RPS_NUM_LOOKUP_CELLS
static const string kMatrixPAM250
static const string kMaxSmpFilesPerVol("max_smp_vol")
static const string kMatrixBLOSUM62
static bool s_HasDefline(const CBioseq &bio)
static const Uint4 kFixedPointScaleFactor
static const string kLogFile("logfile")
static const string kDefaultOutIndexFile("true")
static const string kDefaultOutDbType(kOutDbRps)
static const string kDefaultExcludeInvalid("true")
#define RPS_DATABASE_VERSION
static const string kMatrixBLOSUM50
static const string kOutDbRps
static void s_WriteUint4List(CNcbiOfstream &ostr, const list< Uint4 > &l)
static const string kMatrixBLOSUM90
#define kDefaultWordScoreThreshold
#define kDefaultObsrThreshold
static const string kInPssmList("in")
int main(int argc, const char *argv[])
#define kDefaultMaxSmpFilesPerVol
static const string kObsrThreshold("obsr_threshold")
static const string kMatrixPAM70
static const string kMatrixBLOSUM45
static const string kOutDbDelta
static bool s_DeleteMakeprofileDb(const string &name)
static const string kMatrixPAM30
static const string kBinaryScoremat("binary")
static const string kOutDbCobalt
static const string kUseCmdlineThreshold("force")
static const string kPssmScaleFactor("scale")
static const string kOutIndexFile("index")
#define kDefaultPssmScaleFactor
const string version
version string
const struct ncbi::grid::netcache::search::fields::SIZE size
Prototypes for portable math library (ported from C Toolkit)
long BLAST_Nint(double x)
Nearest integer.
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define INT2_MIN
smallest (most negative) number represented by signed (two byte) short
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Miscellaneous common-use basic types and functionality.
Defines: CTimeFormat - storage class for time format.
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Defines a concrete strategy to obtain PSSM input data for PSI-BLAST.
C++ API for the PSI-BLAST PSSM engine.
bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)
Deletes all files associated with a BLAST database.
EBlastDbVersion
BLAST database version.
This file defines several SeqDB utility functions related to byte order and file system portability.
CSeqDB_Substring SeqDB_RemoveDirName(CSeqDB_Substring s)
Returns a filename minus greedy path.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define row(bind, expected)
structure defining one cell of the compacted lookup table
union AaLookupBackboneCell::@3 payload
union that specifies either entries stored right on the backbone if fewer than AA_HITS_PER_CELL are p...
Int4 entries[3]
if the number of hits for this cell is AA_HITS_PER_CELL or less, the hits are all stored directly in ...
Int4 overflow_cursor
integer offset into the overflow array where the list of hits for this cell begins
Int4 num_used
number of hits stored for this cell
The basic lookup table structure for blastp searches.
void * thick_backbone
may point to BackboneCell, SmallboneCell, or TinyboneCell.
Boolean use_pssm
if TRUE, lookup table construction will assume that the underlying score matrix is position- specific
Int4 threshold
the score threshold for neighboring words
void * overflow
may point to Int4 or Uint2, the overflow array for the compacted lookup table
Int4 backbone_size
number of cells in the backbone
Used to hold a set of positions, mostly used for filtering.
Options needed to construct a lookup table Also needed: query sequence and query length.
Options required for setting up the query sequence.
structure defining one cell of the RPS lookup table
Class which defines sequence id to taxid mapping.
Defines BLAST database construction classes.
void CWriteDB_CreateAliasFile(const string &file_name, const string &db_name, CWriteDB::ESeqType seq_type, const string &gi_file_name, const string &title=string(), EAliasFileFilterType alias_type=eGiList)
Writes an alias file that restricts a database with a gi list.
@ eNoAliasFilterType
Sentinel value.
Code for database files construction.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4