A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/writedb_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/blast/seqdb_writer/writedb.cpp Source File

59  bool

scan_bioseq_4_cfastareader_usrobj)

112  string

s(sequence.

data

(), sequence.

length

());

131  bool

scan_bioseq_4_cfastareader_usrobj)

151 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 152  (!defined(NCBI_COMPILER_MIPSPRO)) ) 155  const string

& options,

163  const string

& description,

164  const string

& options)

170  const

vector<TGi> & gis)

204

ofstream outp(fname.c_str(), ios::binary);

218  if

((

id

>> 32) != 0) {

228

magic = eight ? -2 : -1;

232

magic = eight ? -4 : -3;

238  "Error: Unsupported ID type specified."

);

263  char

dbtype(is_protein ?

'p'

:

'n'

);

266  string msg

(

"Failed to find "

);

267  msg

+= (is_protein ?

"protein "

:

"nucleotide "

);

287  int

* num_seqs_found)

292

*num_seqs_found = 0u;

296  _TRACE

(

"Attempting to compute length for '"

<<

dbname

<<

"'"

);

309  const string

& gi_file_name =

kEmptyStr

,

310  int

num_seqs_in_gifile = 0)

312  if

( !gi_file_name.empty() ) {

318  LOG_POST

(

"Created "

<< (is_protein ?

"protein "

:

"nucleotide "

) <<

319  dbname

<<

" BLAST (alias) database with "

<< num_seqs_found

320

<<

" sequences (out of "

<< num_seqs_in_gifile <<

" in " 321

<< gi_file_name <<

", "

<< setprecision(0) << fixed <<

322

(num_seqs_found*100.0/num_seqs_in_gifile) <<

"% found)"

);

324  LOG_POST

(

"Created "

<< (is_protein ?

"protein "

:

"nucleotide "

) <<

325  "BLAST (alias) database "

<<

dbname

<<

" with "

<<

326

num_seqs_found <<

" sequences"

);

331  const string

& db_name,

333  const string

& gi_file_name,

334  const string

& title,

348  case eGiList

: retval =

"GILIST"

;

break

;

349  case eTiList

: retval =

"TILIST"

;

break

;

350  case eSeqIdList

: retval =

"SEQIDLIST"

;

break

;

351  case eTaxIdList

: retval =

"TAXIDLIST"

;

break

;

360  const

vector<string>& databases,

362  const string

& gi_file_name,

363  const string

& title,

371

fnamestr <<

file_name

<< (is_prot ?

".pal"

:

".nal"

);

374

ofstream

out

(fname.c_str());

378  if

( !title.empty() ) {

379  out

<<

"TITLE "

<< title <<

"\n"

;

382  ITERATE

(vector< string >, iter, databases) {

383  out

<<

"\""

<< *iter <<

"\" "

;

386  if

( !gi_file_name.empty() ) {

389

<< gi_file_name <<

"\n"

;

390

}

else if

(oid_range) {

391  out

<<

"FIRST_OID "

<< oid_range->GetFrom() <<

"\n" 392

<<

"LAST_OID "

<< oid_range->GetToOpen() <<

"\n"

;

398  _TRACE

(

"Deleting "

<< fname);

399  string msg

(

"BLASTDB alias file creation failed. Some referenced files may be missing"

);

404  _TRACE

(

"Deleting "

<< fname);

407

<<

" in BLAST database"

;

413  out

<<

"NSEQ "

<< num_seqs <<

"\n"

;

414  out

<<

"LENGTH "

<< dbsize <<

"\n"

;

428  return

(num_digits >2) ? num_digits: 2;

433  unsigned int

num_volumes,

435  const string

& title)

438  string

concatenated_blastdb_name;

439

vector<string> volume_names(num_volumes,

kEmptyStr

);

441  for

(

unsigned int i

= 0;

i

< num_volumes;

i

++) {

443

oss <<

file_name

<<

"."

<< setfill(

'0'

) << setw(num_digits) <<

i

;

446

volume_names.push_back(vol_name);

447

concatenated_blastdb_name += vol_name +

" "

;

455

fname <<

file_name

<< (is_prot ?

".pal"

:

".nal"

);

461  if

( !title.empty() ) {

462  out

<<

"TITLE "

<< title <<

"\n"

;

466  ITERATE

(vector<string>, itr, volume_names) {

470  out

<<

"NSEQ "

<< num_seqs <<

"\n"

;

471  out

<<

"LENGTH "

<< dbsize <<

"\n"

;

477  const

vector<string>& databases,

479  const string

& gi_file_name,

480  const string

& title,

487  const

vector<string>& db_names,

490  const string

& title

)

498  bool

delete_source_alias_files

)

500  if

(alias_files.empty()) {

503  "No alias files available to create group alias file."

);

512  ITERATE

(list<string>, itr, alias_files) {

513

ifstream

in

(itr->c_str());

515  LOG_POST

(

Warning

<< *itr <<

" does not exist, omitting from group alias file"

);

520  while

(getline(

in

, line)) {

530  if

(delete_source_alias_files) {

531  ITERATE

(list<string>, itr, alias_files) {

533  _TRACE

(

"Deleting "

<< *itr);

541

list<string> alias_files;

549  const string

& output_db,

552  const string

& title)

566  if

(vols.size() == 0) {

571  string

out_ext = is_protein?

".pal"

:

".nal"

;

573

ofstream ofs(output_db + out_ext);

574

ofs <<

"TITLE "

<< title <<endl;

576  for

(

unsigned int i

= 0;

i

< vols.size();

i

++) {

580  string

DBList =

"DBLIST "

+ v_basename;

581  string

OidList =

"OIDLIST "

;

584  string

full_path = vols[

i

] + ex_model_ext;

589

OidList +=

f

.GetName();

592  if

(vols.size() > 1) {

593

oss << output_db <<

"."

<< setfill(

'0'

) << setw(num_digits) <<

i

<< out_ext;

595

ovs << DBList << endl;

596

ovs << OidList << endl;

597

ovs <<

"OID_MASK_TYPE "

<< oid_mask_type << endl;

600

ofs << DBList << endl;

601

ofs << OidList << endl;

602

ofs <<

"OID_MASK_TYPE "

<< oid_mask_type << endl;

606  if

(vols.size() > 1) {

609  for

(

unsigned int i

= 0;

i

< vols.size();

i

++) {

610

oss <<

" "

<< output_db <<

"."

<< setfill(

'0'

) << setw(num_digits) <<

i

;

616  Uint8

total_length = 0;

622

ofs <<

"NSEQ "

<< num_seqs << endl;

623

ofs <<

"LENGTH "

<< total_length << endl;

TContainerType m_Ids

List of identifiers to use.

CBinaryListBuilder(EIdType id_type)

Construct a list of a given type.

void Write(const string &fname)

Write the list to a file.

EIdType m_IdType

Whether to use GIs or TIs.

EIdType

Identifier types.

`Blob' Class for SeqDB (and WriteDB).

This represents a set of masks for a given sequence.

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

CSeqDB_Substring FindBaseName() const

Returns the portion of this path containing the base name.

void GetString(string &s) const

Return the data by assigning it to a string.

static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)

Find volume paths.

Uint8 GetTotalLength() const

Returns the sum of the lengths of all available sequences.

ESeqType

Sequence types (eUnknown tries protein, then nucleotide).

void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx=true) const

Returns the sum of the sequence lengths.

int GetNumSeqs() const

Returns the number of sequences available.

@ eFilteredAll

Values from alias files, or summation over all included sequences.

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

int CreateColumn(const string &title, bool mbo=false)

Set up a generic CWriteDB metadata column.

void SetMaskedLetters(const string &masked)

Set bases that should not be used in sequences.

void SetPig(int pig)

Set the PIG identifier of this sequence.

void AddColumnMetaData(int col_id, const string &key, const string &value)

Add meta data to a column.

void ListFiles(vector< string > &files)

List Filenames.

void AddSequence(const CTempString &sequence, const CTempString &ambiguities)

Add a new sequence as raw sequence and ambiguity data.

void Close()

Close the file and flush any remaining data to disk.

void SetMaxFileSize(Uint8 sz)

Set the maximum size for any file in the database.

static CRef< CBlast_def_line_set > ExtractBioseqDeflines(const CBioseq &bs, bool parse_ids, bool long_seqids, bool scan_bioseq_4_cfastareader_usrobj=false)

Extract deflines from a CBioseq.

void SetMaskData(const CMaskedRangesVector &ranges, const vector< TGi > &gis)

Set filtering data for a sequence.

void SetDeflines(const CBlast_def_line_set &deflines)

This method replaces any stored header data for the current sequence with the provided CBlast_def_lin...

void ListVolumes(vector< string > &vols)

List Volumes.

int FindColumn(const string &title) const

Find an existing column.

int RegisterMaskAlgorithm(EBlast_filter_program program, const string &options, const string &name="")

Register a type of filtering data found in this database.

CBlastDbBlob & SetBlobData(int col_id)

Get a blob to use for a given column letter.

void SetMaxVolumeLetters(Uint8 sz)

Set the maximum letters in one volume.

int CreateUserColumn(const string &title)

Set up a user-defined CWriteDB column.

void SetMaxFileSize(Uint8 sz)

Set maximum size for output files.

@ eProtein

Protein database.

int RegisterMaskAlgorithm(EBlast_filter_program program, const string &options=string(), const string &name=string())

Register a type of filtering data found in this database.

CWriteDB(const string &dbname, ESeqType seqtype, const string &title, int itype=eDefault, bool parse_ids=true, bool long_ids=false, bool use_gi_mask=false, EBlastDbVersion dbver=eBDB_Version4, bool limit_defline=false, Uint8 oid_masks=EOidMaskType::fNone, bool scan_bioseq_4_cfastareader_usrobj=false)

Constructor.

void AddColumnMetaData(int col_id, const string &key, const string &value)

Add meta data to a user-defined column.

int FindColumn(const string &title) const

Find an existing column.

void ListFiles(vector< string > &files)

List Filenames.

CBlastDbBlob & SetBlobData(int column_id)

Add blob data to a user-defined column.

void SetMaskData(const CMaskedRangesVector &ranges, const vector< TGi > &gis)

Set filtering data for a sequence.

void SetPig(int pig)

Set the PIG to be used for the sequence.

void AddSequence(const CBioseq &bs)

Add a sequence as a CBioseq.

void SetMaxVolumeLetters(Uint8 letters)

Set maximum letters for output volumes.

CWriteDB_Impl * m_Impl

Implementation object.

EIndexType

Whether and what kind of indices to build.

void SetMaskedLetters(const string &masked)

Set letters that should not be used in sequences.

void ListVolumes(vector< string > &vols)

List Volumes.

static CRef< CBlast_def_line_set > ExtractBioseqDeflines(const CBioseq &bs, bool parse_ids=true, bool long_ids=false, bool scan_bioseq_4_cfastareader_usrobj=false)

Extract Deflines From Bioseq.

void SetDeflines(const CBlast_def_line_set &deflines)

Set the deflines to be used for the sequence.

void Close()

Close the Database.

std::ofstream out("events_result.xml")

main entry point for tests

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NCBI_CURRENT_FUNCTION

Get current function name.

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

void Warning(CExceptionArgs_Base &args)

void FindFiles(TPathIterator path_begin, TPathIterator path_end, const vector< string > &masks, TFindFunc &find_func, TFindFiles flags=fFF_Default)

Generic algorithm for file search.

virtual bool Remove(TRemoveFlags flags=eRecursive) const

Remove a directory entry.

static string GetCwd(void)

Get the current working directory.

string GetName(void) const

Get the base entry name with extension (if any).

void Reset(void)

Reset reference object.

int32_t Int4

4-byte (32-bit) signed integer

int64_t Int8

8-byte (64-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string (in-place)

const char * data(void) const

Return a pointer to the array represented.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

size_type length(void) const

Return the length of the represented array.

string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const

Transform time to string.

@ eCurrent

Use current time. See also CCurrentTime.

EBlast_filter_program

This defines the possible sequence filtering algorithms to be used in a BLAST database.

char * dbname(DBPROCESS *dbproc)

Get name of current database.

constexpr auto sort(_Init &&init)

const struct ncbi::grid::netcache::search::fields::KEY key

const GenericPointer< typename T::ValueType > T2 value

std::istream & in(std::istream &in_, double &x_)

Defines BLAST database access classes.

const string kSeqDBGroupAliasFileName

The name of the group alias file name expected at each directory For more documentation,...

const string SeqDB_GetOidMaskFileExt(bool db_is_protein, EOidMaskType t)

EBlastDbVersion

BLAST database version.

string SeqDB_ResolveDbPathNoExtension(const string &filename, char dbtype='-')

Resolve a file path using SeqDB's path algorithms.

This file defines several SeqDB utility functions related to byte order and file system portability.

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

static bool ambig(char c)

static string s_AliasFileFilterTypeToString(EAliasFileFilterType e)

Auxiliary function to convert the enumeration into a string.

int s_GetNumOfDigits(int n)

static bool s_ComputeNumSequencesAndDbLength(const string &dbname, bool is_prot, Uint8 *dbsize, int *num_seqs_found)

Computes the number of sequences and (alias) database length for alias files.

void CWriteDB_CreateOidMaskDB(const string &input_db, const string &output_db, CWriteDB::ESeqType seq_type, int oid_mask_type, const string &title)

static void s_PrintAliasFileCreationLog(const string &dbname, bool is_protein, int num_seqs_found, const string &gi_file_name=kEmptyStr, int num_seqs_in_gifile=0)

void CWriteDB_CreateAliasFile(const string &file_name, const string &db_name, CWriteDB::ESeqType seq_type, const string &gi_file_name, const string &title, EAliasFileFilterType alias_type)

void CWriteDB_ConsolidateAliasFiles(const list< string > &alias_files, bool delete_source_alias_files)

static void s_CreateAliasFilePriv(const string &file_name, const vector< string > &databases, CWriteDB::ESeqType seq_type, const string &gi_file_name, const string &title, EAliasFileFilterType alias_type, const TSeqRange *oid_range=NULL)

static bool s_DoesBlastDbExist(const string &dbname, bool is_protein)

Returns true if the BLAST DB exists, otherwise throws a CSeqDBException.

Defines BLAST database construction classes.

EAliasFileFilterType

Defines the possible filtering types that can be applied to an alias file.

@ eTiList

Filter a BLAST database via TIs (Trace IDs)

@ eSeqIdList

Filter a BLAST database via a Seq-id list.

@ eTaxIdList

Filter a BLAST database via Taxonomy Id list.

@ eGiList

Filter a BLAST database via GIs.

@ eNoAliasFilterType

Sentinel value.

Data conversion tools for CWriteDB and associated code.

void s_WriteInt8BE(ostream &str, Uint8 x)

Write an eight byte integer to a stream in big-endian format.

void s_WriteInt4(ostream &str, int x)

Write a four byte integer to a stream in big endian format.

Defines implementation class of WriteDB.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4