A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/makeprofiledb_8cpp_source.html below:

NCBI C++ ToolKit: src/app/blastdb/makeprofiledb.cpp Source File

64 #include "../blast/blast_app_util.hpp" 66 #ifndef SKIP_DOXYGEN_PROCESSING 107 #define kDefaultWordScoreThreshold (9.82) 108 #define kDefaultPssmScaleFactor (100.00) 109 #define kDefaultObsrThreshold (6.0) 110 #define kDefaultMaxSmpFilesPerVol (2500) 114 #define kEpsylon (0.0001) 116 #define DEFAULT_POS_MATRIX_SIZE 2000 117 #define RPS_NUM_LOOKUP_CELLS 32768 118 #if BLASTAA_SIZE == 28 119 #define RPS_DATABASE_VERSION RPS_MAGIC_NUM_28 121 #define RPS_DATABASE_VERSION RPS_MAGIC_NUM 124 #define kSingleVol (-1) 132  void Create

(

int

seq_size);

150  for

(

int i

= 0;

i

<

size

; ++

i

)

183  virtual void Init

();

253  const string

& filename);

268  bool x_ValidateCd

(

const

list<double>& freqs,

const

list<double>& observ,

unsigned int

alphabet_size);

270

list<Int4> & FreqOffsets, list<Int4> & ObsrOffsets,

Int4

CurrFreqOffset,

Int4

CurrObsrOffset);

273  void x_UpdateDelta

(CRPS_DbInfo & rpsDbInfo, vector<string> & smpFilenames);

316

m_WordDefaultScoreThreshold(0), m_OutDbName(

kEmptyStr

),

317

m_OutDbType(

kEmptyStr

), m_CreateIndexFile(

false

),m_GapOpenPenalty(0),

318

m_GapExtPenalty(0), m_PssmScaleFactor(0),m_Matrix(

kEmptyStr

), m_op_mode(op_invalid),

319

m_binary_scoremat(

false

), m_MaxSmpFilesPerVol(0), m_NumOfVols(0), m_DbVer(

eBDB_Version5

),

321

m_ObsrvThreshold(0), m_ExcludeInvalid(

false

),

322

m_UseModelThreshold(

true

)

358  string

wcounts_str =

m_VolNames

[

i

] +

".wcounts"

;

386

arg_desc->SetUsageContext(

GetArguments

().GetProgramBasename(),

387  "Application to create databases for rpsblast, cobalt and deltablast, version " 390  string

dflt(

"Default = input file name provided to -"

);

393

arg_desc->SetCurrentGroup(

"Input options"

);

395  "Input file that contains a list of smp files (delimited by space, tab or newline)"

,

399  "Scoremats are in binary format"

,

402

arg_desc->SetCurrentGroup(

"Configuration options"

);

403

arg_desc->AddOptionalKey(

kArgDbTitle

,

"database_title"

,

404  "Title for database\n"

+ dflt,

408  "Minimum word score to add a word to the lookup table"

,

413

arg_desc->SetCurrentGroup(

"Output options"

);

414

arg_desc->AddOptionalKey(

kOutDbName

,

"database_name"

,

415  "Name of database to be created\n"

+

418

arg_desc->AddDefaultKey(

"blastdb_version"

,

"version"

,

419  "Version of BLAST database to be created"

,

422

arg_desc->SetConstraint(

"blastdb_version"

,

426  "Maximum number of SMP files per DB volume"

,

429

arg_desc->AddDefaultKey(

kOutDbType

,

"output_db_type"

,

430  "Output database type: cobalt, delta, rps"

,

434

arg_desc->AddDefaultKey(

kOutIndexFile

,

"create_index_files"

,

435  "Create Index Files"

,

438

arg_desc->SetCurrentGroup(

"Used only if scoremat files do not contain PSSM scores, ignored otherwise."

);

439

arg_desc->AddOptionalKey(

kArgGapOpen

,

"gap_open_penalty"

,

440  "Cost to open a gap"

,

443

arg_desc->AddOptionalKey(

kArgGapExtend

,

"gap_extend_penalty"

,

444  "Cost to extend a gap, "

,

448  "Pssm Scale factor "

,

453  "Scoring matrix name"

,

460

arg_desc->SetCurrentGroup(

"Delta Blast Options"

);

461

arg_desc->AddDefaultKey(

kObsrThreshold

,

"observations_threshold"

,

"Exclude domains with " 462  "with maximum number of independent observations " 466

arg_desc->AddDefaultKey(

kExcludeInvalid

,

"exclude_invalid"

,

"Exclude domains that do " 467  "not pass validation test"

,

470

arg_desc->SetCurrentGroup(

"Taxonomy options"

);

471

arg_desc->AddOptionalKey(

"taxid"

,

"TaxID"

,

472  "Taxonomy ID to assign to all sequences"

,

477

arg_desc->AddOptionalKey(

"taxid_map"

,

"TaxIDMapFile"

,

478  "Text file mapping sequence IDs to taxonomy IDs.\n" 479  "Format:<SequenceId> <TaxonomyId><newline>"

,

536  int

default_gap_open = 0;

537  int

default_gap_extend = 0;

573

}

else if

(args[

"taxid_map"

].

HasValue

()) {

592  if

(

tmp

.size() > 0)

604  const string

& filename)

614  string

err = filename +

" contains no bioseq data"

;

620  string

err = filename +

" contains no info on num of columns or num of rows"

;

626  string

err = filename +

" 's num of columns does not match size of sequence"

;

633  string

err = filename +

" has invalid alphabet size"

;

639  string

err = filename +

" contains no frequency ratios.\n"

+

640  "Please use a recent version of psiblast to regenerate PSSM files\n"

;

649  string

err = filename +

" contains no core block to build cobalt database"

;

668  string

err = filename +

" contains no pssm or residue frequencies"

;

674  string

err = filename +

" contains no scoremat"

;

691  bool

isRemoved =

false

;

692  static const char

* mp_ext[]={

".rps"

,

".loo"

,

".aux"

,

".freq"

,

".blocks"

,

".wcounts"

,

".obsr"

,

NULL

};

693  for

(

const char

** mp=mp_ext; *mp !=

NULL

; mp++) {

697  if

(

CFile

(fname).Remove()) {

701  unsigned int

index = 0;

704  while

(

CFile

(vfname).Remove()) {

732  string

rps_str = rpsDbInfo.

db_name

+

".rps"

;

737  string

lookup_str = rpsDbInfo.

db_name

+

".loo"

;

742  string

aux_str = rpsDbInfo.

db_name

+

".aux"

;

743

rpsDbInfo.

aux_file

.open(aux_str.c_str());

744  if

(!rpsDbInfo.

aux_file

.is_open())

747  string

freq_str = rpsDbInfo.

db_name

+

".freq"

;

762

rpsDbInfo.

pssm_file

.write((

char

*) &num_files,

sizeof

(

Int4

));

763

rpsDbInfo.

freq_file

.write((

char

*) &num_files,

sizeof

(

Int4

));

764  for

(

Int4 i

= 0;

i

<= num_files;

i

++)

772  string

blocks_str = rpsDbInfo.

db_name

+

".blocks"

;

858

vector <char> query_v = query_stdaa.

Get

();

860  if

((

Int4

) (query_v.size()) != seq_size)

866  for

(

unsigned int i

= 0;

i

< query_v.size();

i

++)

872

matrix_name.c_str(), rpsDbInfo.

gap_open

,

882  i

.SetH() = o.

GetH

();

929  double

threshold = rpsDbInfo.

scale_factor

* wordScoreThreshold;

960

rpsDbInfo.

aux_file

<< scientific << 0.0 <<

"\n"

;

961

rpsDbInfo.

aux_file

<< scientific << 0.0 <<

"\n"

;

975

list<CRef<CCoreBlock> >::const_iterator itr = block_list.begin();

979  while

(itr != block_list.end())

993  if

(descr_list.size() > 0)

1002

seq_id_str = accession;

1028

list<double>::const_iterator itr_fr = freq_ratios.begin();

1029

rpsDbInfo.

freq_file

.seekp(0, ios_base::end);

1032  for

(

i

= 0;

i

< seq_size;

i

++) {

1033  for

(j = 0; j < alphabet_size; j++) {

1034  if

(itr_fr == freq_ratios.end())

1048  for

(

i

= 0;

i

< seq_size;

i

++) {

1056

memset(

row

, 0,

sizeof

(

row

));

1059

rpsDbInfo.

freq_file

.seekp( 8 + (seq_index) *

sizeof

(

Int4

), ios_base::beg);

1086  if

(

NULL

== posMatrix)

1121  for

(

i

= 0;

i

< seq_size;

i

++) {

1122  for

(j = 0; j < alphabet_size; j++) {

1123  if

(score_list_itr == score_list_end)

1125

posMatrix[

i

][j] = *score_list_itr;

1128  if

(j < alphabet_size)

1136  for

(j = 0; j < alphabet_size; j++) {

1137  for

(

i

= 0;

i

< seq_size;

i

++) {

1138  if

(score_list_itr == score_list_end)

1140

posMatrix[

i

][j] = *score_list_itr;

1146  if

(j == alphabet_size) {

1148  for

(

i

= 0;

i

< seq_size;

i

++) {

1155  if

(

i

< seq_size || j < alphabet_size)

1158  if

(score_list_itr != score_list_end)

1170

rpsDbInfo.

pssm_file

.seekp(0, ios_base::end);

1171  for

(

i

= 0;

i

< seq_size + 1;

i

++) {

1180

rpsDbInfo.

pssm_file

.seekp( 8 + (seq_index) *

sizeof

(

Int4

), ios_base::beg);

1215  Int4

cursor, old_cursor;

1219

memset(&header, 0,

sizeof

(header));

1224  for

(index = cursor = 0; index < lut->

backbone_size

; index++) {

1252  for

(

i

= 1;

i

< cell->

num_used

;

i

++, cursor++) {

1263

cursor *

sizeof

(

Int4

);

1267

rpsDbInfo.

lookup_file

.write((

const char

*)&header,

sizeof

(header));

1275

memset(&empty_cell, 0,

sizeof

(empty_cell));

1277

rpsDbInfo.

lookup_file

.write((

const char

*)&empty_cell,

sizeof

(empty_cell));

1323

defline->SetSeqid() = bio.

GetId

();

1324

defline_set->

Set

().push_back(defline);

1332

*

m_LogFile

<<

"Deleted existing BLAST database with identical name."

<< endl;

1335  int

num_smps = smpFilenames.size();

1345

vector<string>::iterator

b

= smpFilenames.begin();

1346

vector<string>::iterator

r

=

b

+ num_seqs;

1348

vector<string> vol_smps(

b

,

r

);

1352  if

(residue_seqs > 0) {

1374  for

(

int

seq_index=0; seq_index < rpsDbInfo.

num_seqs

; seq_index++)

1376  string

filename = smps[seq_index];

1380  string

err = filename +

" does not exists"

;

1401  string

err = filename +

" contains invalid scoremat"

;

1420  for

(

const auto

& it: bioseq.

GetDescr

().

Get

()) {

1422  TTaxId

taxid = it->GetOrg().GetTaxId();

1478

rpsDbInfo.

aux_file

<< seq_size <<

"\n"

;

1499

ostr.write((

char

*)&(*it),

sizeof

(

Int4

));

1507

ostr.write((

char

*)&(*it),

sizeof

(

Uint4

));

1514

vector<string> deltaList;

1516  for

(

unsigned int

seq_index=0; seq_index < smpFilenames.size(); seq_index++)

1518  string

filename = smpFilenames[seq_index];

1522  string

err = filename +

" does not exists"

;

1543  string

err = filename +

" contains invalid scoremat"

;

1551  string

err = filename +

" contains no weighted residue frequencies for building delta database"

;

1557  string

err = filename +

" contains no observations information for building delta database"

;

1563

deltaList.push_back(filename);

1577

list<Int4> FreqOffsets;

1578

list<Int4> ObsrOffsets;

1579  Int4

CurrFreqOffset = 0;

1580  Int4

CurrObsrOffset= 0;

1582  for

(

unsigned int

seq_index=0; seq_index < smpFilenames.size(); seq_index++)

1584  string

filename = smpFilenames[seq_index];

1608

list<double> modify_freqs;

1613

vector<double>

tmp

(orig_freqs.size());

1614

list<double>::const_iterator f_itr = orig_freqs.begin();

1616  for

(

int i

= 0;

i

< alphabet_size;

i

++)

1618  for

(

int

j = 0; j < seq_size; j++)

1620  tmp

[

i

+ j*alphabet_size] = *f_itr;

1624  copy

(

tmp

.begin(),

tmp

.end(), modify_freqs.begin());

1630  if

(0 == modify_freqs.size())

1631  copy

(orig_freqs.begin(), orig_freqs.end(), modify_freqs.begin());

1633

list<double>::iterator p_itr = modify_freqs.begin();

1635  for

(

int

j=0; j < seq_size; j++)

1637  for

(

int i

=0;

i

< alphabet_size;

i

++)

1639  if

(modify_freqs.end() == p_itr)

1645

modify_freqs.insert(p_itr, (

BLASTAA_SIZE

-alphabet_size), 0);

1649  const

list<double> & freqs = (modify_freqs.size()? modify_freqs:orig_freqs );

1652

ObsrOffsets.push_back(CurrObsrOffset);

1654

list<Uint4> ObsrBuff;

1657  unsigned int

num_obsr_columns = 0;

1658

list<double>::const_iterator obsr_it = obsr.begin();

1661  double

current = *obsr_it;

1665  while

(obsr_it != obsr.end() &&

fabs

(*obsr_it - current) < 1e-4)

1675

ObsrBuff.push_back(num);

1677  while

(obsr_it != obsr.end());

1679  Uint4

num_weighted_counts = 0;

1684

list<Uint4> FreqBuff;

1686  ITERATE

(list<double>, it, freqs)

1689

num_weighted_counts++;

1692  if

(num_obsr_columns != num_weighted_counts /

BLASTAA_SIZE

)

1694  string

err =

"Number of frequencies and observations columns do not match in "

+ filename;

1699  unsigned int

padded_size = FreqBuff.size() +

BLASTAA_SIZE

;

1700

FreqBuff.resize(padded_size, 0);

1702

CurrFreqOffset += FreqBuff.size();

1703

CurrObsrOffset += ObsrBuff.size();

1709

tmp_obsr_buff.flush();

1710

tmp_freq_buff.flush();

1711  x_WrapUpDelta

(rpsDbInfo, tmp_obsr_file, tmp_freq_file, FreqOffsets, ObsrOffsets, CurrFreqOffset, CurrObsrOffset);

1716  const

list<double>& observ,

1717  unsigned int

alphabet_size)

1720  if

(freqs.size() / alphabet_size != observ.size())

1722  string

err =

"Number of frequency and observations columns do not match"

;

1726  ITERATE

(list<double>, it, freqs)

1728  unsigned int

residue = 0;

1730  while

(residue < alphabet_size - 1)

1742  ITERATE

(list<double>, it, observ)

1761

list<double> modify_freqs;

1766

vector<double>

tmp

(orig_freqs.size());

1767

list<double>::const_iterator f_itr = orig_freqs.begin();

1769  for

(

int i

= 0;

i

< alphabet_size;

i

++)

1771  for

(

int

j = 0; j < seq_size; j++)

1773  tmp

[

i

+ j*alphabet_size] = *f_itr;

1777  copy

(

tmp

.begin(),

tmp

.end(), modify_freqs.begin());

1783  if

(0 == modify_freqs.size())

1784  copy

(orig_freqs.begin(), orig_freqs.end(), modify_freqs.begin());

1786

list<double>::iterator p_itr = modify_freqs.begin();

1788  for

(

int

j=0; j < seq_size; j++)

1790  for

(

int i

=0;

i

< alphabet_size;

i

++)

1792  if

(modify_freqs.end() == p_itr)

1798

modify_freqs.insert(p_itr, (

BLASTAA_SIZE

-alphabet_size), 0);

1802  const

list<double> & freqs = (modify_freqs.size()? modify_freqs:orig_freqs );

1803  double

max_obsr = *max_element(obsr.begin(), obsr.end()) + 1.0;

1807  " was excluded: due to too few independent observations\n"

;

1814  " was excluded: it conatins an invalid CD \n"

;

1823

list<Int4> & FreqOffsets, list<Int4> & ObsrOffsets,

Int4

CurrFreqOffset,

Int4

CurrObsrOffset)

1826

ObsrOffsets.push_back(CurrObsrOffset);

1828  string

wcounts_str = rpsDbInfo.

db_name

+

".wcounts"

;

1830  if

(!wcounts_file.is_open())

1833  string

obsr_str = rpsDbInfo.

db_name

+

".obsr"

;

1835  if

(!obsr_file.is_open())

1843

wcounts_file.write((

char

*)&magic_number,

sizeof

(

Int4

));

1844

obsr_file.write((

char

*)&magic_number,

sizeof

(

Int4

));

1847  Int4

num_wcounts_records = FreqOffsets.size() -1;

1848  Int4

num_obsr_records = ObsrOffsets.size() -1;

1849

wcounts_file.write((

char

*)&num_wcounts_records,

sizeof

(

Int4

));

1850

obsr_file.write((

char

*)&num_obsr_records,

sizeof

(

Int4

));

1853

wcounts_file.flush();

1854

wcounts_file << tmp_freq_buff.rdbuf();

1855

wcounts_file.flush();

1856

wcounts_file.close();

1860

obsr_file << tmp_obsr_buff.rdbuf();

1882  catch

(

const

blast::CInputException& e) {

1890  catch

(

const

blast::CBlastException& e) {

1911  if

(args[

"dbtype"

].

HasValue

()) {

1920 #ifndef SKIP_DOXYGEN_PROCESSING 1921 int main

(

int

argc,

const char

* argv[]

)

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

Declares singleton objects to store the version and reference for the BLAST engine.

Routines for creating protein BLAST lookup tables.

BlastAaLookupTable * BlastAaLookupTableDestruct(BlastAaLookupTable *lookup)

Free the lookup table.

void BlastAaLookupIndexQuery(BlastAaLookupTable *lookup, Int4 **matrix, BLAST_SequenceBlk *query, BlastSeqLoc *unmasked_regions, Int4 query_bias)

Index a protein query.

struct RPSBackboneCell RPSBackboneCell

structure defining one cell of the RPS lookup table

#define RPS_HITS_PER_CELL

maximum number of hits in an RPS backbone cell; this may be redundant (have the same value as AA_HITS...

Int4 BlastAaLookupFinalize(BlastAaLookupTable *lookup, EBoneType bone_type)

Pack the data structures comprising a protein lookup table into their final form.

Int4 BlastAaLookupTableNew(const LookupTableOptions *opt, BlastAaLookupTable **lut)

Create a new protein lookup table.

#define BLAST_INPUT_ERROR

Command line binary exit code: error in input query/options.

#define BLAST_UNKNOWN_ERROR

Command line binary exit code: unknown error.

#define BLAST_DATABASE_ERROR

Command line binary exit code: error in database/subject.

BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)

Deallocate all BlastSeqLoc objects in a chain.

BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)

Create and initialize a new sequence interval.

Interface for converting sources of sequence data into blast sequence input.

The structures and functions in blast_options.

Int2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions *options, EBlastProgramType program, const char *filter_string, Uint1 strand_option)

Fill non-default contents of the QuerySetUpOptions.

Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)

Allocate memory for QuerySetUpOptions and fill with default values.

Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program, Boolean is_megablast, double threshold, Int4 word_size)

Allocate memory for lookup table options and fill with default values.

Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)

Allocate memory for lookup table options and fill with default values.

#define BLAST_WORDSIZE_PROT

length of word to trigger an extension.

LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)

Deallocates memory for LookupTableOptions*.

QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)

Deallocate memory for QuerySetUpOptions.

#define FREQ_RATIO_SCALE

header for RPS blast frequency ratios ('.freq') file

#define RPS_MAGIC_NUM_28

Version number for 28-letter alphabet.

Int2 BLAST_GetProteinGapExistenceExtendParams(const char *matrixName, Int4 *gap_existence, Int4 *gap_extension)

Extract the recommended gap existence and extension values.

#define BLAST_SCORE_MAX

maximum allowed score (for one letter comparison).

Code to build a database given various sources of sequence data.

Class to constrain the values of an argument to those greater than or equal to the value specified in...

const CSeq_id * GetFirstId() const

Defines BLAST error codes (user errors included)

void AddParam(EUsageParams p, int val)

Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.

static void CreateDirectories(const string &dbname)

Create Directory for blast db.

Defines user input exceptions.

void Create(int seq_size)

unsigned int GetSize(void)

CMakeDbPosMatrix pos_matrix

CNcbiOfstream lookup_file

QuerySetUpOptions * query_options

CNcbiOfstream blocks_file

BlastAaLookupTable * lookup

CRef< CWriteDB > output_db

LookupTableOptions * lookup_options

CMakeProfileDBApp(void)

@inheritDoc

void x_AddCmdOptions(void)

virtual void Init()

@inheritDoc

CheckInputScoremat_RV x_CheckInputScoremat(const CPssmWithParameters &pssm_w_parameters, const string &filename)

CRef< CTaxIdSet > m_Taxids

CNcbiIstream * m_InPssmList

void x_RPSUpdateLookup(CRPS_DbInfo &rpsDbInfo, Int4 seq_size)

vector< string > x_CreateDeltaList(void)

void x_WrapUpDelta(CRPS_DbInfo &rpsDbInfo, CTmpFile &tmp_obsr_file, CTmpFile &tmp_freq_file, list< Int4 > &FreqOffsets, list< Int4 > &ObsrOffsets, Int4 CurrFreqOffset, Int4 CurrObsrOffset)

void x_RPSUpdateStatistics(CRPS_DbInfo &rpsDbInfo, CPssmWithParameters &seq, Int4 seq_size)

virtual int Run()

@inheritDoc

void x_CreateAliasFile(void)

void x_FillInRPSDbParameters(CRPS_DbInfo &rpsDbInfo, CPssmWithParameters &pssm_p)

void x_InitProgramParameters(void)

void x_InitRPSDbInfo(CRPS_DbInfo &rpsDBInfo, Int4 vol, Int4 num_files)

void x_RPS_DbClose(CRPS_DbInfo &rpsDbInfo)

bool x_CheckDelta(const CPssm &pssm, Int4 seq_size, const string &filename)

void x_RPSAddFirstSequence(CRPS_DbInfo &rpsDbInfo, CPssmWithParameters &pssm_w_parameters, bool freq_only)

void x_UpdateRPSDbInfo(CRPS_DbInfo &rpsDbInfo, const CPssmWithParameters &pssm_p)

void x_UpdateDelta(CRPS_DbInfo &rpsDbInfo, vector< string > &smpFilenames)

double m_WordDefaultScoreThreshold

void x_RPSUpdatePSSM(CRPS_DbInfo &rpsDbInfo, const CPssm &pssm, Int4 seq_index, Int4 seq_size)

void x_InitOutputDb(CRPS_DbInfo &rpsDBInfo)

void x_SetupArgDescriptions(void)

CBlastUsageReport m_UsageReport

vector< string > m_VolNames

bool x_ValidateCd(const list< double > &freqs, const list< double > &observ, unsigned int alphabet_size)

void x_UpdateFreqRatios(CRPS_DbInfo &rpsDbInfo, const CPssmWithParameters &pssm_p, Int4 seq_index, Int4 seq_size)

vector< string > x_GetSMPFilenames(void)

void x_UpdateCobalt(CRPS_DbInfo &rpsDbInfo, const CPssmWithParameters &pssm_p, Int4 seq_size)

void x_MakeVol(Int4 vol, vector< string > &smps)

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

Implements the interface to retrieve data for the last 2 stages of the PSSM creation.

Computes a PSSM as specified in PSI-BLAST.

void GetQuerySequenceData(CNCBIstdaa &sequence) const

Retrieve the query sequence data in ncbistdaa format.

SIZE_TYPE GetQueryLength() const

Return the query length or 0 if no query is available.

void GetString(string &s) const

Return the data by assigning it to a string.

void FixTaxId(CRef< objects::CBlast_def_line_set > deflines)

Check that each defline has the specified taxid; if not, replace the defline and set the taxid.

void AddTaxId(const objects::CSeq_id &seqid, const TTaxId &taxid)

void SetMappingFromFile(CNcbiIstream &f)

static string MakeShortName(const string &base, int index)

Construct the short name for a volume.

void SetMaxFileSize(Uint8 sz)

Set maximum size for output files.

@ eProtein

Protein database.

void AddSequence(const CBioseq &bs)

Add a sequence as a CBioseq.

EIndexType

Whether and what kind of indices to build.

@ eDefault

Like eFullIndex but also build a numeric Trace ID index.

@ eNoIndex

Build a database without any indices.

static CRef< CBlast_def_line_set > ExtractBioseqDeflines(const CBioseq &bs, bool parse_ids=true, bool long_ids=false, bool scan_bioseq_4_cfastareader_usrobj=false)

Extract Deflines From Bioseq.

void SetDeflines(const CBlast_def_line_set &deflines)

Set the deflines to be used for the sequence.

void Close()

Close the Database.

Constant declarations for command line arguments for BLAST programs.

const string kArgMatrixName

Argument for scoring matrix.

const string kArgDbTitle

Title for the BLAST database.

const string kArgGapExtend

Argument to select the gap extending penalty.

const string kArgGapOpen

Argument to select the gap opening penalty.

const string kArgWordScoreThreshold

Argument to specify the minimum word score such that the word is added to the lookup table.

void Print(const CCompactSAMApplication::AlignInfo &ai)

std::ofstream out("events_result.xml")

main entry point for tests

CRef< objects::CPssmWithParameters > Run()

Runs the PSSM engine to compute the PSSM.

#define BLASTAA_SIZE

Size of aminoacid alphabet.

static CNcbiMatrix< double > * GetFreqRatios(const objects::CPssmWithParameters &pssm)

Returns matrix of BLASTAA_SIZE by query size (dimensions are opposite of what is stored in the BlastS...

void SetFullVersion(CRef< CVersionAPI > version)

Set version data for the program.

void HideStdArgs(THideStdArgs hide_mask)

Set the hide mask for the Hide Std Flags.

virtual const CArgs & GetArgs(void) const

Get parsed command line arguments.

int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)

Main function (entry point) for the NCBI application.

CVersionInfo GetVersion(void) const

Get the program version information.

virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)

Setup the command line argument descriptions.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

SStrictId_Tax::TId TTaxId

Taxon id type.

const CNcbiArguments & GetArguments(void) const

Get the application's cached unprocessed command-line arguments.

#define TAX_ID_FROM(T, value)

@ fHideXmlHelp

Hide XML help description.

@ fHideFullVersion

Hide full version description.

@ fHideDryRun

Hide dryrun description.

@ fHideConffile

Hide configuration file description.

@ eExcludes

One argument excludes another.

@ eInputFile

Name of file (must exist and be readable)

@ eBoolean

{'true', 't', 'false', 'f'}, case-insensitive

@ eDouble

Convertible into a floating point number (double)

@ eString

An arbitrary string.

@ eInteger

Convertible into an integer number (int or Int8)

#define ERR_POST(message)

Error posting with file, line number information but without error codes.

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

void Error(CExceptionArgs_Base &args)

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

void Info(CExceptionArgs_Base &args)

virtual bool Remove(TRemoveFlags flags=eRecursive) const

Remove a directory entry.

const string & GetFileName(void) const

Return used file name (generated or given in the constructor).

#define MSerial_AsnBinary

const TPrim & Get(void) const

#define MSerial_AsnText

I/O stream manipulators –.

void Reset(void)

Reset reference object.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

bool IsEnabled(void)

Indicates whether application usage statistics collection is enabled for a current reporter instance.

CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)

Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)

IO_PREFIX::ofstream CNcbiOfstream

Portable alias for ofstream.

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)

Convert double to string.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)

Split a string into two pieces using the specified delimiters.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

@ fSplit_MergeDelimiters

Merge adjacent delimiters.

double Elapsed(void) const

Return time elapsed since first Start() or last Restart() call (in seconds).

void Start(void)

Start the timer.

Tdata & Set(void)

Assign a value to data member.

const TFreqRatios & GetFreqRatios(void) const

Get the FreqRatios member data.

const TNumIndeptObsr & GetNumIndeptObsr(void) const

Get the NumIndeptObsr member data.

const TMatrixName & GetMatrixName(void) const

Get the MatrixName member data.

const TQuery & GetQuery(void) const

Get the Query member data.

TNumRows GetNumRows(void) const

Get the NumRows member data.

void SetParams(TParams &value)

Assign a value to Params data member.

bool IsSetFinalData(void) const

Final representation for the PSSM Check if a value has been assigned to FinalData data member.

bool IsSetStop(void) const

end of block on query Check if a value has been assigned to Stop data member.

TH GetH(void) const

Get the H member data.

TKappa GetKappa(void) const

Get the Kappa member data.

const TScores & GetScores(void) const

Get the Scores member data.

const TWeightedResFreqsPerPos & GetWeightedResFreqsPerPos(void) const

Get the WeightedResFreqsPerPos member data.

bool IsSetGapOpen(void) const

gap opening penalty corresponding to the matrix above Check if a value has been assigned to GapOpen d...

TGapExtend GetGapExtend(void) const

Get the GapExtend member data.

TWordScoreThreshold GetWordScoreThreshold(void) const

Get the WordScoreThreshold member data.

TScalingFactor GetScalingFactor(void) const

Get the ScalingFactor member data.

const TBlocks & GetBlocks(void) const

Get the Blocks member data.

bool IsSetStart(void) const

begin of block on query Check if a value has been assigned to Start data member.

bool IsSetWordScoreThreshold(void) const

Word score threshold Check if a value has been assigned to WordScoreThreshold data member.

bool IsSetScalingFactor(void) const

scaling factor used to obtain more precision when building the PSSM.

bool IsSetFreqRatios(void) const

PSSM's frequency ratios Check if a value has been assigned to FreqRatios data member.

TStop GetStop(void) const

Get the Stop member data.

void SetMatrixName(const TMatrixName &value)

Assign a value to MatrixName data member.

bool IsSetIntermediateData(void) const

both intermediateData and finalData can be provided, but at least one of them must be provided.

const TFinalData & GetFinalData(void) const

Get the FinalData member data.

bool IsSetWeightedResFreqsPerPos(void) const

Weighted observed residue frequencies per position of the PSSM.

bool IsSetRpsdbparams(void) const

data needed by formatrpsdb to create RPS-BLAST databases.

void SetPssm(TPssm &value)

Assign a value to Pssm data member.

TNumColumns GetNumColumns(void) const

Get the NumColumns member data.

const TConstraints & GetConstraints(void) const

Get the Constraints member data.

bool IsSetMatrixName(void) const

name of the underlying score matrix whose frequency ratios were used in PSSM construction (e....

bool IsSetNumRows(void) const

The dimensions of the matrix are returned so the client can verify that all data was received.

void SetFinalData(TFinalData &value)

Assign a value to FinalData data member.

TStart GetStart(void) const

Get the Start member data.

bool IsSetQuery(void) const

PSSM representative sequence (master) Check if a value has been assigned to Query data member.

TGapOpen GetGapOpen(void) const

Get the GapOpen member data.

bool IsSetNumIndeptObsr(void) const

Number of independent observations per position of the PSSM NOTE: this is needed for building CDD dat...

bool IsSetConstraints(void) const

alignment constraints needed by sequence-structure threader and other global or local block-alignment...

bool IsSetGapExtend(void) const

gap extension penalty corresponding to the matrix above Check if a value has been assigned to GapExte...

bool IsSetNumColumns(void) const

number of columns Check if a value has been assigned to NumColumns data member.

const TIntermediateData & GetIntermediateData(void) const

Get the IntermediateData member data.

TByRow GetByRow(void) const

Get the ByRow member data.

void SetGapOpen(TGapOpen value)

Assign a value to GapOpen data member.

const TParams & GetParams(void) const

Get the Params member data.

bool IsSetBlocks(void) const

nblocks locations Check if a value has been assigned to Blocks data member.

bool IsSetPssm(void) const

This field is applicable to PSI-BLAST and formatrpsdb.

void SetGapExtend(TGapExtend value)

Assign a value to GapExtend data member.

const TPssm & GetPssm(void) const

Get the Pssm member data.

bool IsSetParams(void) const

This field's rpsdbparams is used to specify the values of options for processing by formatrpsdb.

const TRpsdbparams & GetRpsdbparams(void) const

Get the Rpsdbparams member data.

TLambda GetLambda(void) const

Get the Lambda member data.

const TSeq & GetSeq(void) const

Get the variant data.

bool IsSeq(void) const

Check if variant Seq is selected.

const TTitle & GetTitle(void) const

Get the variant data.

const TId & GetId(void) const

Get the Id member data.

const Tdata & Get(void) const

Get the member data.

bool CanGetDescr(void) const

Check if it is safe to call GetDescr method.

bool IsSetDescr(void) const

descriptors Check if a value has been assigned to Descr data member.

bool IsTitle(void) const

Check if variant Title is selected.

const TDescr & GetDescr(void) const

Get the Descr member data.

unsigned int

A callback function used to compare two keys in a database.

if(yy_accept[yy_current_state])

static void s_WriteInt4List(CNcbiOfstream &ostr, const list< Int4 > &l)

static const string kDefaultMatrix(kMatrixBLOSUM62)

static const string kOutDbName("out")

static CRef< CBlast_def_line_set > s_GenerateBlastDefline(const CBioseq &bio)

static const string kMatrixBLOSUM80

static const string kOutDbType("dbtype")

static const string kExcludeInvalid("exclude_invalid")

#define RPS_NUM_LOOKUP_CELLS

static const string kMatrixPAM250

static const string kMaxSmpFilesPerVol("max_smp_vol")

static const string kMatrixBLOSUM62

static bool s_HasDefline(const CBioseq &bio)

static const Uint4 kFixedPointScaleFactor

static const string kLogFile("logfile")

static const string kDefaultOutIndexFile("true")

static const string kDefaultOutDbType(kOutDbRps)

static const string kDefaultExcludeInvalid("true")

#define RPS_DATABASE_VERSION

static const string kMatrixBLOSUM50

static const string kOutDbRps

static void s_WriteUint4List(CNcbiOfstream &ostr, const list< Uint4 > &l)

static const string kMatrixBLOSUM90

#define kDefaultWordScoreThreshold

#define kDefaultObsrThreshold

static const string kInPssmList("in")

int main(int argc, const char *argv[])

#define kDefaultMaxSmpFilesPerVol

static const string kObsrThreshold("obsr_threshold")

static const string kMatrixPAM70

static const string kMatrixBLOSUM45

static const string kOutDbDelta

static bool s_DeleteMakeprofileDb(const string &name)

static const string kMatrixPAM30

static const string kBinaryScoremat("binary")

static const string kOutDbCobalt

static const string kUseCmdlineThreshold("force")

static const string kPssmScaleFactor("scale")

static const string kOutIndexFile("index")

#define kDefaultPssmScaleFactor

const string version

version string

const struct ncbi::grid::netcache::search::fields::SIZE size

Prototypes for portable math library (ported from C Toolkit)

long BLAST_Nint(double x)

Nearest integer.

#define TRUE

bool replacment for C indicating true.

#define FALSE

bool replacment for C indicating false.

#define INT2_MIN

smallest (most negative) number represented by signed (two byte) short

Defines the CNcbiApplication and CAppException classes for creating NCBI applications.

Miscellaneous common-use basic types and functionality.

Defines: CTimeFormat - storage class for time format.

std::istream & in(std::istream &in_, double &x_)

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)

Defines a concrete strategy to obtain PSSM input data for PSI-BLAST.

C++ API for the PSI-BLAST PSSM engine.

bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)

Deletes all files associated with a BLAST database.

EBlastDbVersion

BLAST database version.

This file defines several SeqDB utility functions related to byte order and file system portability.

CSeqDB_Substring SeqDB_RemoveDirName(CSeqDB_Substring s)

Returns a filename minus greedy path.

static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

#define row(bind, expected)

structure defining one cell of the compacted lookup table

union AaLookupBackboneCell::@3 payload

union that specifies either entries stored right on the backbone if fewer than AA_HITS_PER_CELL are p...

Int4 entries[3]

if the number of hits for this cell is AA_HITS_PER_CELL or less, the hits are all stored directly in ...

Int4 overflow_cursor

integer offset into the overflow array where the list of hits for this cell begins

Int4 num_used

number of hits stored for this cell

The basic lookup table structure for blastp searches.

void * thick_backbone

may point to BackboneCell, SmallboneCell, or TinyboneCell.

Boolean use_pssm

if TRUE, lookup table construction will assume that the underlying score matrix is position- specific

Int4 threshold

the score threshold for neighboring words

void * overflow

may point to Int4 or Uint2, the overflow array for the compacted lookup table

Int4 backbone_size

number of cells in the backbone

Used to hold a set of positions, mostly used for filtering.

Options needed to construct a lookup table Also needed: query sequence and query length.

Options required for setting up the query sequence.

structure defining one cell of the RPS lookup table

Class which defines sequence id to taxid mapping.

Defines BLAST database construction classes.

void CWriteDB_CreateAliasFile(const string &file_name, const string &db_name, CWriteDB::ESeqType seq_type, const string &gi_file_name, const string &title=string(), EAliasFileFilterType alias_type=eGiList)

Writes an alias file that restricts a database with a gi list.

@ eNoAliasFilterType

Sentinel value.

Code for database files construction.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4