A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/build__db_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/blast/seqdb_writer/build_db.cpp Source File

58 #ifndef SKIP_DOXYGEN_PROCESSING 69  bool

have_seqid =

false

;

70  bool

have_gi =

false

;

77  m_LogFile

<<

"Seq-id "

<< seqid->AsFastaString()

78

<<

" contains id "

<<

id

->AsFastaString() << endl;

83  m_LogFile

<<

"WARNING: multiple GIs discovered; gi[0] = " 87  m_LogFile

<<

"Seq-id "

<< seqid->AsFastaString()

89

<<

id

->GetGi() << endl;

93

}

else if

((! have_seqid) && (id->

Which

() == seqid->Which())) {

94  m_LogFile

<<

"Remote: Resolving <"

<< seqid->AsFastaString()

95

<<

"> to <"

<<

id

->AsFastaString() <<

">"

<< endl;

101

<<

"Warning: Resolution still does not provide version." 104

seqid.

Reset

(

const_cast<CSeq_id

*

>

(

id

.GetPointer()));

120  ITERATE

(vector<string>,

id

, ids) {

135  bool

specific =

false

;

150  m_LogFile

<<

"Did not recognize id: \""

<< *

id

<<

"\""

<< endl;

158  m_LogFile

<<

"Found numerical GI:"

<< gi << endl;

195  bool

found(

false

),

done

(

false

);

197  ITERATE

(vector<int>, oid, oids) {

204  size_t

pos =

S

.find(acc);

206  if

(pos != string::npos) {

207  size_t

endpos = pos + acc.size();

209  bool

start_okay = (pos == 0 ||

S

[pos-1] ==

'|'

);

210  bool

end_okay = ((endpos ==

S

.size()) ||

211

(

S

[endpos] ==

'.'

||

212  S

[endpos] ==

'|'

));

214  if

(start_okay && end_okay) {

218  if

(

done

|| (! found)) {

283  m_LogFile

<<

"Duplication from source DB; duplicated " 284

<<

count

<<

" sequences in "

<<

t

<<

" seconds."

<< endl;

292  if

((! bs->CanGetInst()) || bs->GetInst().CanGetSeq_data() ||

293

! bs->GetInst().CanGetExt() || ! bs->GetInst().GetExt().IsDelta()) {

297  if

(bs->GetInst().CanGetMol() &&

301

oss <<

id

->AsFastaString() <<

": Protein delta sequences are not supported."

;

307  const CDelta_ext

& dext = bs->GetInst().GetExt().GetDelta();

312  typedef

list< CRef< CDelta_seq > >

TItems

;

319  if

(bs->GetInst().CanGetLength()) {

320

seq8na.reserve(bs->GetInst().GetLength());

326  if

(((**item).IsLoc()) && ((**item).GetLoc().IsNull())) {

327

seq8na.append(1, 0x0f);

339  "Part of the delta sequence, including its length, is un-available."

);

361  "Unhandled type of sequence data encountered."

);

371  int

length = seq8na.size();

386

bs2->

SetId

() = bs->GetId();

389  if

(bs->IsSetDescr()) {

395

inst->

SetSeq_data

().SetNcbi4na().Set().swap(seq4na);

402  if

(bs->IsSetAnnot()) {

410  "Bioseq must have Seq-data or " 411  "Delta containing only literals."

);

437

headers->RemoveGIs();

444 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 445  (!defined(NCBI_COMPILER_MIPSPRO)) ) 461

gis.push_back((*id)->GetGi());

470

objects::CSeqVector * sv,

484  if

(bs->GetInst().CanGetSeq_data())

499 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 500  (!defined(NCBI_COMPILER_MIPSPRO)) ) 501  const

list< CRef<CSeq_id> > & ids = bs->GetId();

523  m_LogFile

<<

"Caught exception for query: " 524

<< seqid.AsFastaString() << endl

525

<< e.

what

() << endl;

543  m_LogFile

<<

"Could not find entry for: " 544

<< seqid.AsFastaString() << endl;

551  m_LogFile

<<

"-- REMOTE: Found sequence " 552

<< seqid.AsFastaString() << endl;

560  bool

found_all =

true

;

565  for

(

i

= 0;

i

< num_gis;

i

++) {

574  m_LogFile

<<

" not found locally; adding remotely."

<< endl;

585  m_LogFile

<<

" found locally; not adding remotely."

<< endl;

591  for

(

i

= 0;

i

< num_seqids;

i

++) {

594

<< gi_list.

GetKey

<

string

>(

i

);

601  m_LogFile

<<

" not found locally; adding remotely."

<< endl;

605  string

acc = gi_list.

GetKey

<

string

>(

i

);

611  m_LogFile

<<

" found locally; not adding remotely."

<< endl;

618  m_LogFile

<<

"Adding sequences from remote source; added " 619

<<

count

<<

" sequences in "

<<

t

<<

" seconds."

<< endl;

628  bool

success =

true

;

635  for

(

i

= 0;

i

< num_gis;

i

++) {

642

<<

" was not resolvable."

<< endl;

649

<<

" found locally."

<< endl;

655  for

(

i

= 0;

i

< num_seqids;

i

++) {

662

<< gi_list.

GetKey

<

string

>(

i

)

663

<<

" was not resolvable."

<< endl;

670

<< gi_list.

GetKey

<

string

>(

i

)

671

<<

" found locally."

<< endl;

676  m_LogFile

<<

"Could not resolve "

<< unresolved <<

" IDs."

<< endl;

705

: m_FastaReader(

NULL

)

732

TFlags

flags

= (TFlags) iflags;

743  if

(!

value

.empty()) {

745  catch

(

const

exception&) {}

763  static const string

kKeyword(

"m_Pos = "

);

766  string

pos(

"unknown"

);

767  if

(start !=

NPOS

&& end !=

NPOS

) {

768

start += kKeyword.size();

769

pos =

string

(e.what()).substr(start, end-start);

771  string msg

= e.GetMsg();

772  const string

extra_string(

"CFastaReader: "

);

774  msg

.erase(0, extra_string.size());

807  string

bioseq_id(

"Unknown"

);

810  const

list< CRef<CSeq_id> > & ids = bs->

GetId

();

815  bool

skip_this =

false

;

816  for

(list<

CRef<CSeq_id>

>::const_iterator it = ids.begin(); it != ids.end(); it++ ){

817  if

( it->NotEmpty() ){

819  if

(

info

== CSeq_id::EAccessionInfo::eAcc_gi ){

820

check_gi = (*it)->GetGi();

821  if

( check_gi > max_gi32_val )

829  m_LogFile

<<

"Ignoring gi '"

<< check_gi <<

"' as it has value larger then "

<< 0xFFFFFFFF<< endl;

836  if

(! ids.empty() && ids.front().NotEmpty()) {

837

bioseq_id.assign(ids.front()->AsFastaString());

846  for

(

auto

& it: bss->

SetId

()) {

848  if

(!it->IsLocal() && !it->IsGi() &&

852  string label

= it->GetSeqIdString(

true

);

868  "Sequences longer than 2,147,483,647 bases are not supported. Offending sequence is "

+ bioseq_id);

872  m_LogFile

<<

"Ignoring sequence '"

<< bioseq_id

873

<<

"' as it has no sequence data"

<< endl;

879  m_LogFile

<<

"Adding bioseq from fasta; first id is: '"

<< bioseq_id

897  m_LogFile

<<

"Adding sequences from FASTA; added " 898

<<

count

<<

" sequences in "

<<

t

<<

" seconds."

<< endl;

911 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 912  (!defined(NCBI_COMPILER_MIPSPRO)) ) 915

vector<string> all_names;

921  for

(

int i

= 0;

i

< (

int

) all_names.size();

i

++) {

922  string

name = all_names[

i

];

926  if

(name ==

"BlastDb/MaskData"

) {

939  ITERATE

(StringPairMap, iter, meta) {

943

in2out[in_id] = out_id;

948

vector<CTempString> column_blobs;

949

vector<int> column_ids;

967  _ASSERT

(column_blobs.size() == column_ids.size());

969  if

(sequence.

empty

()) {

971  "Error in raw data: no sequence"

);

976  "Error in raw data: " 977  "protein db cannot with ambiguities"

);

980  if

(deflines.

Empty

()) {

982  "Error in raw data: no headers provided"

);

991 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 992  (!defined(NCBI_COMPILER_MIPSPRO)) ) 993  for

(

int i

= 0;

i

< (

int

)column_ids.size();

i

++) {

994  int

in_id = column_ids[

i

];

995  if

(in_id == mask_id)

continue

;

997  if

(column_blobs[

i

].

size

() == 0)

1002  int

out_id = in2out[in_id];

1014  const

list< CRef<CSeq_id> > & ids = (*defline)->GetSeqid();

1016  if

((*id)->IsGi()) {

1017

gis.push_back((*id)->GetGi());

1022  if

(!rng.

empty

()) {

1023

mask_data.insert(mask_data.end(), rng.begin(), rng.end());

1029  if

(!mask_data.

empty

()) {

1044  m_LogFile

<<

"Adding sequences from raw db source; added " 1045

<<

count

<<

" sequences in "

<<

t

<<

" seconds."

<< endl;

1055  if

(dir_name.empty()) {

1062  string msg

(

"Failed to create directory '"

+ d.

GetName

() +

"'"

);

1067  string msg

(

"You do not have write permissions on '"

+

1074  const string

& title,

1083  bool

scan_bioseq_4_cfastareader_usrobj)

1084

: m_IsProtein (is_protein),

1085

m_KeepLinks (

false

),

1086

m_KeepMbits (

false

),

1087

m_KeepLeafs (

false

),

1089

m_LogFile (*logfile),

1090

m_UseRemote (

true

),

1093

m_Verbose (

false

),

1095

m_LongIDs (long_seqids),

1096

m_FoundMatchingMasks(

false

),

1097

m_SkipCopyingGis(

false

),

1098

m_SkipLargeGis(

true

),

1100

m_ScanBioseq4CFastaReaderUsrObjct(scan_bioseq_4_cfastareader_usrobj)

1104  m_LogFile

<<

"\n\nBuilding a new DB, current time: " 1107  m_LogFile

<<

"New DB name: "

<< output_dbname << endl;

1108  m_LogFile

<<

"New DB title: "

<< title << endl;

1109  const string

mol_type(is_protein ?

"Protein"

:

"Nucleotide"

);

1110  m_LogFile

<<

"Sequence type: "

<< mol_type << endl;

1112  m_LogFile

<<

"Deleted existing "

<< mol_type

1113

<<

" BLAST database named "

<< output_dbname << endl;

1137  const string

& title,

1147  bool

scan_bioseq_4_cfastareader_usrobj)

1148

: m_IsProtein (is_protein),

1149

m_KeepLinks (

false

),

1150

m_KeepMbits (

false

),

1151

m_KeepLeafs (

false

),

1153

m_LogFile (*logfile),

1154

m_UseRemote (

true

),

1157

m_Verbose (

false

),

1158

m_ParseIDs (parse_seqids),

1159

m_LongIDs (long_seqids),

1160

m_FoundMatchingMasks(

false

),

1161

m_SkipCopyingGis(

false

),

1162

m_SkipLargeGis(

true

),

1163

m_ScanBioseq4CFastaReaderUsrObjct(scan_bioseq_4_cfastareader_usrobj)

1167  m_LogFile

<<

"\n\nBuilding a new DB, current time: " 1170  m_LogFile

<<

"New DB name: "

<< output_dbname << endl;

1171  m_LogFile

<<

"New DB title: "

<< title << endl;

1172  const string

mol_type(is_protein ?

"Protein"

:

"Nucleotide"

);

1173  m_LogFile

<<

"Sequence type: "

<< mol_type << endl;

1175  m_LogFile

<<

"Deleted existing "

<< mol_type

1176

<<

" BLAST database named "

<< output_dbname << endl;

1196

scan_bioseq_4_cfastareader_usrobj));

1207  ERR_POST

(

Error

<<

"No sequences matched any of the masks provided.\n" 1208

<<

"Please ensure that the -parse_seqids option is used " 1209

<<

"in the\nfiltering program as well as makeblastdb."

);

1212  ERR_POST

(

Error

<<

"No sequences matched any of the taxids provided."

);

1265  m_LogFile

<<

"Keep Linkouts: "

<< (keep_links ?

"T"

:

"F"

) << endl;

1273  m_LogFile

<<

"Keep MBits: "

<< (keep_mbits ?

"T"

:

"F"

) << endl;

1283  m_LogFile

<<

"Keep Leaf Taxids: "

<< (keep_taxids ?

"T"

:

"F"

) << endl;

1296  bool

success =

AddIds

(ids);

1304

success = success && success2;

1311  m_LogFile

<<

"Total time to build database: " 1312

<<

t

<<

" seconds.\n"

<< endl;

1324  bool

success =

true

;

1373  if

(this_oid != -1) {

1374  if

(seen_it.

find

(this_oid) == seen_it.

end

()) {

1375

seen_it[this_oid] = this_gi;

1378

<<

" is duplicate of GI " 1379

<< seen_it[this_oid]

1401  bool

success =

true

;

1411  if

(success ==

false

)

1430

}

catch

(exception& e) {

1441  bool

success =

true

;

1443

vector<string> vols;

1444

vector<string> files;

1451  _ASSERT

(vols.empty() == files.empty());

1459  ITERATE

(vector<string>, iterf, files) {

1462  _TRACE

(

"Deleting "

<< *iterf);

1469  if

(close_exception) {

1471

close_exception->

GetMsg

());

1480

objects::CBlast_def_line& defline,

1483

vector<string>& keys

1486  bool

found =

false

;

1490  if

(!

key

->empty()) {

1492  if

(item != leafs.

end

()) {

1494

taxids.

insert

(item->second.begin(), item->second.end());

1506

defline.SetLeafTaxIds(tv);

1509

defline.SetLeafTaxIds(tv);

1520

vector<string> & keys)

1522  bool

found =

false

;

1531  if

(item != bitmap.

end

()) {

1533  value

|= item->second;

1538

list<int> & linkv = (is_memb

1539

? defline.SetMemberships()

1540

: defline.SetLinks());

1546  if

(linkv.empty()) {

1547

linkv.push_back(

value

);

1549

linkv.front() |=

value

;

1554

defline.ResetMemberships();

1556

defline.ResetLinks();

1565

vector<string> keys;

1585  const string

& options,

1586  const string

& name)

1588 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 1589  (!defined(NCBI_COMPILER_MIPSPRO)) ) 1598  const string

& description,

1599  const string

& options)

1601 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 1602  (!defined(NCBI_COMPILER_MIPSPRO)) )

static CConstRef< CBioseq > s_FixBioseqDeltas(CConstRef< objects::CBioseq > bs)

static void s_SetDeflineLeafs(objects::CBlast_def_line &defline, TIdToLeafs &leafs, bool keep_old, vector< string > &keys)

static void s_SetDeflineBits(objects::CBlast_def_line &defline, TIdToBits &bitmap, bool keep_old, bool is_memb, vector< string > &keys)

Code to build a database given various sources of sequence data.

TSeqPos GetLength(void) const

`Blob' Class for SeqDB (and WriteDB).

void WriteRaw(const char *begin, int size)

Write raw data to the blob (moving the write pointer).

void Clear()

Clear all owned data and reference an empty string.

Implementation of ILineReader for IReader.

bool AddSequences(IBioseqSource &src, bool add_pig=false)

Add sequences from an IBioseqSource object.

bool m_IsProtein

True for a protein database, false for nucleotide.

bool AddFasta(CNcbiIstream &fasta_file)

Add sequences from a file containing FASTA data.

int m_DeflineCount

Define count.

bool m_KeepLinks

True to keep linkout bits from source dbs, false to discard.

bool m_SkipLargeGis

If set to true, skip GIs with value > 0x7FFFFFFF.

TIdToBits m_Id2Links

Table of linkout bits to apply to sequences.

bool m_FoundMatchingMasks

If true, there were sequences whose IDs matched those in the provided masking locations (via SetMaskD...

static void CreateDirectories(const string &dbname)

Create Directory for blast db.

bool x_ReportUnresolvedIds(const CInputGiList &gi_list) const

Write log messages for any unresolved IDs.

bool m_UseRemote

Whether to use remote resolution and sequence fetching.

void x_EditHeaders(CRef< objects::CBlast_def_line_set > headers)

Modify deflines with linkout and membership bits and taxids.

objects::CScope & x_GetScope()

Get a scope for remote loading of objects.

void SetMaskDataSource(IMaskDataSource &ranges)

Specify an object mapping Seq-id to subject masking data.

void SetMembBits(const TLinkoutMap &membbits, bool keep_mbits)

Specify a membership bit lookup object.

bool m_ParseIDs

If true, string IDs found in FASTA input will be parsed as Seq-ids.

int RegisterMaskingAlgorithm(EBlast_filter_program program, const string &options, const string &name="")

Define a masking algorithm.

int m_OIDCount

Number of OIDs stored in this database.

void SetSourceDb(const string &src_db_name)

Specify source database(s) via the database name(s).

bool m_ScanBioseq4CFastaReaderUsrObjct

bool m_KeepMbits

True to keep membership bits from source dbs, false to discard.

CRef< CTaxIdSet > m_Taxids

Set of TaxIDs configured to apply to sequences.

CRef< objects::CScope > m_Scope

Sequence scope, used for remote fetching.

CRef< objects::CObjectManager > m_ObjMgr

Object manager, used for remote fetching.

ostream & m_LogFile

Logfile.

bool m_Verbose

If true, more detailed log messages will be produced.

bool m_KeepLeafs

True to keep leaf taxids from source dbs, false to discard.

bool x_EndBuild(bool erase, const CException *close_exception)

bool m_SkipCopyingGis

If set to true, when copying BLASTDBs, skip the GIs.

CRef< CWriteDB > m_OutputDb

Database being produced here.

CRef< CSeqDBExpert > m_SourceDb

Database for duplicating sequences locally (-sourcedb option.)

void SetTaxids(CTaxIdSet &taxids)

Specify a mapping of sequence ids to taxonomic ids.

CRef< CInputGiList > x_ResolveGis(const vector< string > &ids)

Resolve various input IDs (as strings) to GIs.

void x_SetLinkAndMbit(CRef< objects::CBlast_def_line_set > headers)

Store linkout (now deprecated) and membership bits in provided headers.

CRef< IMaskDataSource > m_MaskData

Subject masking data.

bool x_EditAndAddBioseq(CConstRef< objects::CBioseq > bs, objects::CSeqVector *sv, bool add_pig=false)

Modify a Bioseq as needed and add it to the database.

bool m_LongIDs

If true, use long sequence ids (database|accession)

bool Build(const vector< string > &ids, CNcbiIstream *fasta_file)

Build the database.

void x_AddOneRemoteSequence(const objects::CSeq_id &seqid, bool &found, bool &error)

Fetch a sequence from the remote service and add it to the db.

void x_AddMasksForSeqId(const list< CRef< CSeq_id > > &ids)

Add the masks for the Seq-id(s) (usually just one) to the database being created.

TIdToLeafs m_Id2Leafs

Table of leaf taxids to apply to sequences.

void x_ResolveRemoteId(CRef< objects::CSeq_id > &seqid, TGi &gi)

Resolve an ID remotely.

void x_DupLocal()

Duplicate IDs from local databases.

TIdToBits m_Id2Mbits

Table of membership bits to apply to sequences.

bool x_ResolveFromSource(const string &acc, CRef< objects::CSeq_id > &id)

Determine if this string ID can be found in the source database.

void StartBuild()

Start building a new database.

bool EndBuild(bool erase=false)

Finish building a new database.

void SetLinkouts(const TLinkoutMap &linkouts, bool keep_links)

Specify a linkout bit lookup object.

void SetMaxFileSize(Uint8 max_file_size)

Set the maximum size of database component files.

void x_AddPig(CRef< objects::CBlast_def_line_set > headers)

Add pig if id can be extracted from the deflines.

bool AddIds(const vector< string > &ids)

Add the specified sequences from the source database.

void SetMaskLetters(const string &mask_letters)

Specify letters to mask out of protein sequence data.

bool x_AddRemoteSequences(CInputGiList &gi_list)

Duplicate IDs from local databases.

CBuildDatabase(const string &dbname, const string &title, bool is_protein, CWriteDB::TIndexType indexing, bool use_gi_mask, ostream *logfile, bool long_seqids=false, EBlastDbVersion dbver=eBDB_Version4, bool limit_defline=false, Uint8 oid_masks=EOidMaskType::fNone, bool scan_bioseq_4_cfastareader_usrobj=true)

Constructor.

void SetLeafTaxIds(const TIdToLeafs &taxids, bool keep_taxids)

Specify a leaf-taxids object.

CFastaBioseqSource(CNcbiIstream &fasta_file, bool is_protein, bool parse_ids, bool long_ids)

virtual CConstRef< CBioseq > GetNext()

Get a Bioseq object if there are any more to get.

CFastaReader * m_FastaReader

CRef< ILineReader > m_LineReader

Base class for reading FASTA sequences.

Gi List for database construction.

void AppendSi(const string &si, int oid=-1)

Append a Seq-id.

void AppendGi(TGi gi, int oid=-1)

Append a GI.

Thrown on an attempt to access wrong choice variant.

This represents a set of masks for a given sequence.

bool empty() const

Redefine empty to mean no elements or none of its elements being empty.

static CNcbiApplication * Instance(void)

Singleton method.

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)

void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const

Raw Sequence and Ambiguity Data.

int GetNumGis() const

Get the number of GIs in the array.

const SGiOid & GetGiOid(int index) const

Access an element of the array.

int GetNumSis() const

Get the number of Seq-ids in the array.

const SSiOid & GetSiOid(int index) const

Access an element of the array.

T GetKey(int index) const

const string & GetDBNameList() const

Get list of database names.

list< CRef< CSeq_id > > GetSeqIDs(int oid) const

Gets a list of sequence identifiers.

ESeqType GetSequenceType() const

Returns the type of database opened - protein or nucleotide.

string GetTitle() const

Returns the database title.

void AccessionToOids(const string &acc, vector< int > &oids) const

Translate an Accession to a list of OIDs.

bool CheckOrFindOID(int &next_oid) const

Find an included OID, incrementing next_oid if necessary.

string GetDate() const

Returns the construction date of the database.

CRef< CBlast_def_line_set > GetHdr(int oid) const

Get the ASN.1 header for the sequence.

@Seq_descr.hpp User-defined methods of the data storage class.

void FixTaxId(CRef< objects::CBlast_def_line_set > deflines)

Check that each defline has the specified taxid; if not, replace the defline and set the taxid.

bool HasEverFixedId() const

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

int CreateUserColumn(const string &title)

Set up a user-defined CWriteDB column.

void SetMaxFileSize(Uint8 sz)

Set maximum size for output files.

@ eProtein

Protein database.

@ eNucleotide

Nucleotide database.

int RegisterMaskAlgorithm(EBlast_filter_program program, const string &options=string(), const string &name=string())

Register a type of filtering data found in this database.

void AddColumnMetaData(int col_id, const string &key, const string &value)

Add meta data to a user-defined column.

int FindColumn(const string &title) const

Find an existing column.

void ListFiles(vector< string > &files)

List Filenames.

CBlastDbBlob & SetBlobData(int column_id)

Add blob data to a user-defined column.

void SetMaskData(const CMaskedRangesVector &ranges, const vector< TGi > &gis)

Set filtering data for a sequence.

void SetPig(int pig)

Set the PIG to be used for the sequence.

void AddSequence(const CBioseq &bs)

Add a sequence as a CBioseq.

EIndexType

Whether and what kind of indices to build.

@ eDefault

Like eFullIndex but also build a numeric Trace ID index.

@ eSparseIndex

Use only simple accessions in the string index.

void SetMaskedLetters(const string &masked)

Set letters that should not be used in sequences.

void ListVolumes(vector< string > &vols)

List Volumes.

int TIndexType

Bitwise OR of "EIndexType".

static CRef< CBlast_def_line_set > ExtractBioseqDeflines(const CBioseq &bs, bool parse_ids=true, bool long_ids=false, bool scan_bioseq_4_cfastareader_usrobj=false)

Extract Deflines From Bioseq.

void SetDeflines(const CBlast_def_line_set &deflines)

Set the deflines to be used for the sequence.

void Close()

Close the Database.

Interface to a source of Bioseq objects.

virtual CConstRef< objects::CBioseq > GetNext()=0

Get a Bioseq object if there are any more to get.

@ eProblem_ModifierFoundButNoneExpected

@ eProblem_TooManyAmbiguousResidues

An interface providing lookups of mask-data by Seq-id.

virtual CMaskedRangesVector & GetRanges(const list< CRef< CSeq_id > > &id)=0

Get ranges of masking data for the given Seq-ids.

Interface to a source of raw sequence data.

virtual const map< string, string > & GetColumnMetaData(int id)=0

Get metadata for the column with the specified Column ID.

virtual bool GetNext(CTempString &sequence, CTempString &ambiguities, CRef< objects::CBlast_def_line_set > &deflines, vector< SBlastDbMaskData > &mask_ranges, vector< int > &column_ids, vector< CTempString > &column_blobs)=0

Get a raw sequence.

virtual void GetColumnNames(vector< string > &names)=0

Get the names of all columns defined by this sequence source.

virtual int GetColumnId(const string &name)=0

Get the column ID for a column mentioned by name.

container_type::iterator iterator

const_iterator end() const

const_iterator find(const key_type &key) const

iterator_bool insert(const value_type &val)

const_iterator begin() const

const_iterator end() const

Operators to edit gaps in sequences.

const CNcbiRegistry & GetConfig(void) const

Get the application's cached configuration parameters (read-only).

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

#define ERR_POST(message)

Error posting with file, line number information but without error codes.

void Error(CExceptionArgs_Base &args)

#define NCBI_EXCEPTION_VAR(name, exception_class, err_code, message)

Create an instance of the exception to be thrown later.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

const string & GetMsg(void) const

Get message string.

virtual const char * what(void) const noexcept

Standard report (includes full backlog).

#define NCBI_RETHROW(prev_exception, exception_class, err_code, message)

Generic macro to re-throw an exception.

static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)

Get an absolute path from some, possibly relative, path.

string GetDir(EIfEmptyPath mode=eIfEmptyPath_Current) const

Get the directory component for this directory entry.

bool CreatePath(TCreateFlags flags=fCreate_Default) const

Create the directory path recursively possibly more than one at a time.

virtual bool Remove(TRemoveFlags flags=eRecursive) const

Remove a directory entry.

bool CheckAccess(TMode access_mode) const

Check access rights.

virtual bool Exists(void) const

Check if directory "dirname" exists.

string GetName(void) const

Get the base entry name with extension (if any).

@ eIfEmptyPath_Empty

Return empty string.

@ fWrite

Write permission.

#define MSerial_AsnText

I/O stream manipulators –.

virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)

Read a single effective sequence, which may turn out to be a segmented set.

EFlags

Note on fAllSeqIds: some databases (notably nr) have merged identical sequences, joining their deflin...

virtual bool AtEOF(void) const =0

Indicates (negatively) whether there is any more input.

void SetMaxIDLength(Uint4 max_len)

If this is set, an exception will be thrown if a Sequence ID exceeds the given length.

void IgnoreProblem(ILineError::EProblem problem)

@ fNoParseID

Generate an ID (whole defline -> title)

@ fQuickIDCheck

Just check local IDs' first characters.

@ fRequireID

Reject deflines that lack IDs.

@ fDisableNoResidues

If no residues found do not raise an error.

@ fForceType

Force specified type regardless of accession.

@ fParseRawID

Try to identify raw accessions.

@ fAssumeNuc

Assume nucs unless accns indicate otherwise.

@ fParseGaps

Make a delta sequence if gaps found.

@ fAssumeProt

Assume prots unless accns indicate otherwise.

const string AsFastaString(void) const

EAccessionInfo

For IdentifyAccession (below)

static int BestRank(const CRef< CSeq_id > &id)

const CTextseq_id * GetTextseq_Id(void) const

Return embedded CTextseq_id, if any.

static CRef< CObjectManager > GetInstance(void)

Return the existing object manager or create one.

vector< CSeq_id_Handle > TIds

CConstRef< CBioseq > GetCompleteBioseq(void) const

Get the complete bioseq.

TBioseqStateFlags GetState(void) const

Get state of the bioseq.

bool Empty(void) const THROWS_NONE

Check if CConstRef is empty – not pointing to any object which means having a null value.

void Reset(void)

Reset reference object.

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool NotEmpty(void) const THROWS_NONE

Check if CConstRef is not empty – pointing to an object and has a non-null value.

TObjectType * GetNonNullPointer(void) const

Get pointer value and throw a null pointer exception if pointer is null.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

uint64_t Uint8

8-byte (64-bit) unsigned integer

virtual const string & Get(const string &section, const string &name, TFlags flags=0) const

Get the parameter value.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

NCBI_NS_STD::string::size_type SIZE_TYPE

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

const char * data(void) const

Return a pointer to the array represented.

bool empty(void) const

Return true if the represented string is empty (i.e., the length is zero)

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to unsigned int.

size_type size(void) const

Return the length of the represented array.

double Elapsed(void) const

Return time elapsed since first Start() or last Restart() call (in seconds).

string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const

Transform time to string.

@ eCurrent

Use current time. See also CCurrentTime.

@ eStart

Start timer immediately after creating.

C::value_type FindBestChoice(const C &container, F score_func)

Find the best choice (lowest score) for values in a container.

static const char label[]

EBlast_filter_program

This defines the possible sequence filtering algorithms to be used in a BLAST database.

bool IsSetOther_info(void) const

In proteins this stores the PIG, in nucleotides this stores the "origin GIs" (if one sequence is desc...

const Tdata & Get(void) const

Get the member data.

list< CRef< CBlast_def_line > > Tdata

const TOther_info & GetOther_info(void) const

Get the Other_info member data.

E_Choice Which(void) const

Which variant is currently selected.

bool IsGi(void) const

Check if variant Gi is selected.

bool IsSetVersion(void) const

Check if a value has been assigned to Version data member.

const TSeq & GetSeq(void) const

Get the variant data.

bool IsSeq(void) const

Check if variant Seq is selected.

TId & SetId(void)

Assign a value to Id data member.

TAnnot & SetAnnot(void)

Assign a value to Annot data member.

const TId & GetId(void) const

Get the Id member data.

bool IsNcbi4na(void) const

Check if variant Ncbi4na is selected.

TLength GetLength(void) const

Get the Length member data.

bool CanGetLength(void) const

Check if it is safe to call GetLength method.

void SetInst(TInst &value)

Assign a value to Inst data member.

bool CanGetId(void) const

Check if it is safe to call GetId method.

void SetDescr(TDescr &value)

Assign a value to Descr data member.

const TNcbi4na & GetNcbi4na(void) const

Get the variant data.

void SetRepr(TRepr value)

Assign a value to Repr data member.

bool CanGetSeq_data(void) const

Check if it is safe to call GetSeq_data method.

const TNcbi2na & GetNcbi2na(void) const

Get the variant data.

const Tdata & Get(void) const

Get the member data.

void SetLength(TLength value)

Assign a value to Length data member.

bool IsGap(void) const

Check if variant Gap is selected.

void SetSeq_data(TSeq_data &value)

Assign a value to Seq_data data member.

bool IsNcbi2na(void) const

Check if variant Ncbi2na is selected.

const TSeq_data & GetSeq_data(void) const

Get the Seq_data member data.

void SetMol(TMol value)

Assign a value to Mol data member.

@ eRepr_raw

continuous sequence

@ eMol_na

just a nucleic acid

@ e_Literal

a piece of sequence

char * dbname(DBPROCESS *dbproc)

Get name of current database.

unsigned int

A callback function used to compare two keys in a database.

bool CheckAccession(const string &acc, TGi &gi, CRef< objects::CSeq_id > &seqid, bool &specific)

void GetDeflineKeys(const objects::CBlast_def_line &defline, vector< string > &keys)

Get all keys for a defline.

void MapToLMBits(const TLinkoutMap &gilist, TIdToBits &gi2links)

Read a set of GI lists, each a vector of strings, and combine the bits into the resulting linkbits ma...

const struct ncbi::grid::netcache::search::fields::SIZE size

const struct ncbi::grid::netcache::search::fields::KEY key

const GenericPointer< typename T::ValueType > T2 value

Defines unified interface to application:

CSeqDB::ESeqType ParseMoleculeTypeString(const string &str)

Convert a string to a CSeqDB ESeqType object.

bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)

Deletes all files associated with a BLAST database.

EBlastDbVersion

BLAST database version.

Defines `expert' version of CSeqDB interfaces.

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

TGi gi

The GI or 0 if unknown.

int oid

The OID or -1 if unknown.

int oid

The OID or -1 if unknown.

static bool ambig(char c)

Defines BLAST database construction classes.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4