A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/makeclusterdb_8cpp_source.html below:

NCBI C++ ToolKit: src/app/blastdb/makeclusterdb.cpp Source File

60 #include "../blast/blast_app_util.hpp" 67 #ifndef SKIP_DOXYGEN_PROCESSING 118  return

(

a

->GetId() <

b

->GetId());

123  return

(

a

->GetRefSeqOid() <

b

->GetRefSeqOid());

138

vector<SBlastDbMaskData> & mask_range,

139

vector<int> & column_ids,

140

vector<CTempString> & column_blobs);

142 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 143  (!defined(NCBI_COMPILER_MIPSPRO)) ) 164 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 165  (!defined(NCBI_COMPILER_MIPSPRO)) ) 175

: m_Source(source_db), m_Clusters(cluster), m_CurrentCluster(0)

177 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 178  (!defined(NCBI_COMPILER_MIPSPRO)) ) 183  string

algo_opts, algo_name;

214

vector<SBlastDbMaskData> & mask_range,

215

vector<int> & column_ids,

216

vector<CTempString> & column_blobs)

232  const char

* seq_ptr;

233  int

slength(0), alength(0);

238

ambiguities =

CTempString

(seq_ptr + slength, alength);

252  if

(ref_seqid.

Match

(**seqid)) {

261  const

vector<CRef<CClusterSeq> > & mem_seqs = cluster->

GetMemSeqs

();

262  if

(mem_seqs.size() > 0) {

263

vector<blastdb::TOid> mem_oids;

264  for

(

unsigned int i

=0;

i

< mem_seqs.size();

i

++) {

265  int64_t

mem_oid = mem_seqs[

i

]->GetOid();

270

mem_oids.push_back(mem_oid);

272  std::sort

(mem_oids.begin(), mem_oids.end());

277

vector<CBlast_def_line::TTaxid> diff_ts;

278

diff_ts.resize(taxids.

size

());

279

vector<CBlast_def_line::TTaxid>::iterator diff_ts_itr;

281

diff_ts_itr = std::set_difference(taxids.

begin

(), taxids.

end

(), ref_ts.

begin

(), ref_ts.

end

(), diff_ts.begin());

282

diff_ts.resize(diff_ts_itr - diff_ts.begin());

283  if

(diff_ts.size() > 0) {

286  if

(tx.

size

() > 0) {

289

bf->SetLeafTaxIds(leaf_ts);

293

deflines->

Set

().push_back(bf);

295 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 296  (!defined(NCBI_COMPILER_MIPSPRO)) ) 307

mask_data.

offsets

.push_back(pair<TSeqPos, TSeqPos>(range->first, range->second));

310

mask_range.push_back(mask_data);

316

column_blobs.resize(column_ids.size());

317  m_Blobs

.resize(column_ids.size());

319  for

(

int i

= 0;

i

< (

int

)column_ids.size();

i

++) {

355  virtual void Init

();

393

arg_desc->SetUsageContext(

GetArguments

().GetProgramBasename(),

394  "Application to create BLAST databases, version " 397

arg_desc->SetCurrentGroup(

"Input options"

);

398

arg_desc->AddDefaultKey(

kInput

,

"input_file"

,

401

arg_desc->AddDefaultKey(

kArgDb

,

"source_db"

,

405

arg_desc->AddDefaultKey(

kArgDbType

,

"molecule_type"

,

410

arg_desc->SetCurrentGroup(

"Configuration options"

);

411

arg_desc->AddOptionalKey(

kArgDbTitle

,

"database_title"

,

412  "Title for BLAST database\n"

,

415

arg_desc->SetCurrentGroup(

"Output options"

);

416

arg_desc->AddOptionalKey(

kOutput

,

"database_name"

,

417  "Name of BLAST database to be created\n"

,

419

arg_desc->AddDefaultKey(

"max_file_sz"

,

"number_of_bytes"

,

420  "Maximum file size for BLAST database files"

,

422

arg_desc->AddOptionalKey(

"metadata_output_prefix"

,

""

,

424

arg_desc->AddOptionalKey(

"logfile"

,

"File_Name"

,

425  "File to which the program log should be redirected"

,

428

arg_desc->AddFlag(

"verbose"

,

"Produce verbose output"

,

true

);

439  static string

kMods =

"KMGTPEZY"

;

442  for

(

i

= 0;

i

< kMods.size();

i

++) {

443  if

(v <

Uint8

(minprec)*1024) {

451

rv.append(kMods,

i

, 1);

463  unsigned int

cluster_id = 0;

464  while

(input_stream) {

465

getline(input_stream, line);

466  if

(line.empty() || (line.find_first_not_of(

' '

) == std::string::npos)) {

472  if

(cols.size() < 3) {

475  string

ref_id(cols[0]);

476  if

(current_cluster.

Empty

() || (current_cluster->

GetRefSeqId

() != ref_id)) {

484  string

mem_id(cols[1]);

485  if

(ref_id != mem_id) {

492  LOG_POST

(

Info

<<

"Num of Reference Seqs: "

<< cluster_id);

500

vector<blastdb::TOid> oids;

507

accs.push_back((*itr)->GetId());

527  bool

is_protein = (args[

kArgDbType

].AsString() ==

"prot"

);

535  m_LogFile

= & (args[

"logfile"

].HasValue() ? args[

"logfile"

].AsOutputFile() : cout);

541  bool

long_seqids =

true

;

542  bool

limit_defline =

false

;

546  if

(args[

"verbose"

]) {

552  static const Uint8

MAX_VOL_FILE_SIZE = 0x100000000;

553  if

(bytes >= MAX_VOL_FILE_SIZE) {

568 #ifdef METADATA_CLUSTERDB 573  string

output_prefix = args[

"metadata_output_prefix"

]

574

? args[

"metadata_output_prefix"

].AsString() :

kEmptyStr

;

582  string

metadata_filename = new_db +

"."

+ extn;

583

ofstream

out

(metadata_filename.c_str());

586

json_out->PreserveKeyNames();

588

json_out->WriteObject(obj_info);

618 #ifndef SKIP_DOXYGEN_PROCESSING 619 int main

(

int

argc,

const char

* argv[]

)

User-defined methods of the data storage class.

User-defined methods of the data storage class.

Declares singleton objects to store the version and reference for the BLAST engine.

#define CATCH_ALL(exit_code)

Standard catch statement for all BLAST command line programs.

Interface for converting sources of sequence data into blast sequence input.

Auxiliary classes/functions for BLAST input library.

Code to build a database given various sources of sequence data.

void AddParam(EUsageParams p, int val)

Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.

Build BlastDB format databases from various data sources.

bool AddSequences(IBioseqSource &src, bool add_pig=false)

Add sequences from an IBioseqSource object.

void SetVerbosity(bool v)

Specify level of output verbosity.

int RegisterMaskingAlgorithm(EBlast_filter_program program, const string &options, const string &name="")

Define a masking algorithm.

string GetOutputDbName() const

bool EndBuild(bool erase=false)

Finish building a new database.

void SetMaxFileSize(Uint8 max_file_size)

Set the maximum size of database component files.

void SetLeafTaxIds(const TIdToLeafs &taxids, bool keep_taxids)

Specify a leaf-taxids object.

virtual bool GetNext(CTempString &sequence, CTempString &ambiguities, CRef< CBlast_def_line_set > &deflines, vector< SBlastDbMaskData > &mask_range, vector< int > &column_ids, vector< CTempString > &column_blobs)

uint64_t m_CurrentCluster

virtual void GetColumnNames(vector< string > &names)

Get the names of all columns defined by this sequence source.

CRef< CSeqDBExpert > m_Source

vector< string > m_ColumnNames

CClusterDBSource(CRef< CSeqDBExpert > &source_db, vector< CRef< CCluster > > &clusters, CBuildDatabase *outdb)

virtual ~CClusterDBSource()

vector< CBlastDbBlob > m_Blobs

virtual int GetColumnId(const string &name)

Get the column ID for a column mentioned by name.

virtual const map< string, string > & GetColumnMetaData(int id)

Get metadata for the column with the specified Column ID.

vector< int > m_ColumnIds

vector< CRef< CCluster > > & m_Clusters

map< int, int > m_MaskIdMap

const string & GetId() const

CRef< CCluster > & GetCluster()

CClusterSeq(CRef< CCluster > cluster, const string &id, bool is_refseq)

CRef< CCluster > m_Cluster

void AddMemSeq(CRef< CClusterSeq > &m)

const string & GetRefSeqId()

void SetRefSeq(CRef< CClusterSeq > &r)

CCluster(unsigned int cluster_id)

const vector< CRef< CClusterSeq > > & GetMemSeqs()

CRef< CClusterSeq > m_RefSeq

unsigned int GetClusterId()

vector< CRef< CClusterSeq > > m_MemSeqs

CRef< CClusterSeq > & GetRefSeq()

Defines invalid user input exceptions.

The main application class.

vector< CRef< CCluster > > m_Clusters

CRef< CBuildDatabase > m_DB

void x_ProcessInputData(const string &source_db, bool is_protein)

vector< CRef< CClusterSeq > > m_ClusterSeqs

CMakeClusterDBApp()

@inheritDoc

virtual void Init()

@inheritDoc

virtual int Run()

@inheritDoc

void x_ProcessInputFile(const string &input_file)

CRef< CSeqDBExpert > m_SourceDB

CBlastUsageReport m_UsageReport

void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const

Raw Sequence and Ambiguity Data.

void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob)

Fetch the data blob for the given column and oid.

void GetAvailableMaskAlgorithms(vector< int > &algorithms)

Get a list of algorithm IDs for which mask data exists.

ESeqType

Sequence types (eUnknown tries protein, then nucleotide).

void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids) const

void GetMaskAlgorithmDetails(int algorithm_id, objects::EBlast_filter_program &program, string &program_name, string &algo_opts)

Get information about one type of masking available here.

void RetSequence(const char **buffer) const

Returns any resources associated with the sequence.

CRef< CBlast_db_metadata > GetDBMetaData(string user_path=kEmptyStr)

void ListColumns(vector< string > &titles)

List columns titles found in this database.

bool CheckOrFindOID(int &next_oid) const

Find an included OID, incrementing next_oid if necessary.

int GetColumnId(const string &title)

Get an ID number for a given column title.

CRef< CBlast_def_line_set > GetHdr(int oid) const

Get the ASN.1 header for the sequence.

void AccessionsToOids(const vector< string > &accs, vector< blastdb::TOid > &oids) const

void GetMaskData(int oid, const vector< int > &algo_ids, TSequenceRanges &ranges)

Get masked ranges of a sequence.

const map< string, string > & GetColumnMetaData(int column_id)

Get all metadata for the specified column.

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

Interface to a source of raw sequence data.

iterator_bool insert(const value_type &val)

const_iterator begin() const

const_iterator end() const

const string kArgDbTitle

Title for the BLAST database.

const string kArgDbType

BLAST database molecule type.

const string kArgDb

BLAST database name.

void Print(const CCompactSAMApplication::AlignInfo &ai)

API (CDeflineGenerator) for computing sequences' titles ("definitions").

std::ofstream out("events_result.xml")

main entry point for tests

static const struct name_t names[]

void SetFullVersion(CRef< CVersionAPI > version)

Set version data for the program.

void HideStdArgs(THideStdArgs hide_mask)

Set the hide mask for the Hide Std Flags.

virtual const CArgs & GetArgs(void) const

Get parsed command line arguments.

int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)

Main function (entry point) for the NCBI application.

CVersionInfo GetVersion(void) const

Get the program version information.

virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)

Setup the command line argument descriptions.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

const CNcbiArguments & GetArguments(void) const

Get the application's cached unprocessed command-line arguments.

@ fHideXmlHelp

Hide XML help description.

@ fHideFullVersion

Hide full version description.

@ fHideDryRun

Hide dryrun description.

@ fHideConffile

Hide configuration file description.

@ eNoOwnership

No ownership is assumed.

@ fAppend

Append to end-of-file; for eOutputFile or eIOFile.

@ eInputFile

Name of file (must exist and be readable)

@ eString

An arbitrary string.

@ eOutputFile

Name of file (must be writable)

void SetDiagPostPrefix(const char *prefix)

Specify a string to prefix all subsequent error postings with.

EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)

Set the threshold severity for posting the messages.

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

@ eDiag_Warning

Warning message.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

void Warning(CExceptionArgs_Base &args)

void Info(CExceptionArgs_Base &args)

static char GetPathSeparator(void)

Get path separator symbol specific for the current platform.

bool Match(const CSeq_id &sid2) const

Match() - TRUE if SeqIds are equivalent.

const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)

If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...

TObjectType * GetPointer(void) THROWS_NONE

Get pointer,.

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

uint64_t Uint8

8-byte (64-bit) unsigned integer

bool IsEnabled(void)

Indicates whether application usage statistics collection is enabled for a current reporter instance.

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static Uint8 StringToUInt8_DataSize(const CTempString str, TStringToNumFlags flags=0)

Convert string that can contain "software" qualifiers to Uint8.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)

Convert UInt8 to string.

double Elapsed(void) const

Return time elapsed since first Start() or last Restart() call (in seconds).

void Start(void)

Start the timer.

EBlast_filter_program

This defines the possible sequence filtering algorithms to be used in a BLAST database.

Tdata & Set(void)

Assign a value to data member.

list< CRef< CBlast_def_line > > Tdata

@ eBlast_filter_program_other

unsigned int

A callback function used to compare two keys in a database.

static const string kOutput("out")

Command line flag to represent the output.

static string Uint8ToString_DataSize(Uint8 v, unsigned minprec=10)

Converts a Uint8 into a string which contains a data size (converse to NStr::StringToUInt8_DataSize)

int main(int argc, const char *argv[])

bool SortClusterSeqs(CRef< CClusterSeq > &a, CRef< CClusterSeq > &b)

static const string kInputSeparators(" ")

Defines token separators when multiple inputs are present.

static const string kInput("in")

Command line flag to represent the input.

bool SortCluster(CRef< CCluster > &a, CRef< CCluster > &b)

constexpr auto sort(_Init &&init)

constexpr bool empty(list< Ts... >) noexcept

const string version

version string

Defines the CNcbiApplication and CAppException classes for creating NCBI applications.

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

Defines BLAST database access classes.

Defines exception class and several constants for SeqDB.

Int4 TOid

Ordinal ID in BLAST databases.

void SeqDB_GetMetadataFileExtension(bool db_is_protein, string &extn)

List of sequence offset ranges.

Structure describing filtered regions created using a particular sequence filtering algorithm.

int algorithm_id

Identifies the algorithm used.

vector< pair< TSeqPos, TSeqPos > > offsets

Start and end offsets of the filtered area.

Defines BLAST database construction classes.

Defines exception class for WriteDB.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4