A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/blastkmer_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/blast/proteinkmer/blastkmer.cpp Source File

47

: m_QueryVector(query_vector),

53

m_KmerFiles.push_back(kmerfile);

55

seqdb->FindVolumePaths(m_KmerFiles,

false

);

57  if

(options->Validate() ==

false

)

86  bool

kmerFound =

false

;

98  else if

(kmerParams.

version

== 2)

116

vector< set<uint32_t > > candidates;

117

candidates.resize(query_hash.size());

148  CSeqVector

seqvect(*(query_vector[queryNum].seqloc), *(query_vector[queryNum].scope));

152

seqid->

Assign

(*(query_vector[queryNum].seqloc->GetId()));

158  for

(TBlastKmerPrelimScoreVector::iterator iter=score_vector.begin(); iter != score_vector.end(); ++iter)

167  return

one.second > two.second;

173  for

(TBlastKmerPrelimScoreVector::iterator itr=

results

.begin(); itr !=

results

.end(); ++itr)

175

seqdb->

GetGis

((*itr).first, retvalue,

true

);

184  int

numFiles =

static_cast<int>

(

m_KmerFiles

.size());

185  if

(numThreads > numQuery)

186

numThreads = numFiles;

188

vector<SOneBlastKmerSearch> kmerSearchVector;

189

kmerSearchVector.reserve(numQuery);

190  for

(

int i

=0;

i

<numQuery;

i

++)

197  if

(query_seq.length() <

static_cast<

string::size_type

>

(kmerParams.

kmerNum

))

200

kmerSearch.

qSeqid

= qseqid;

201  x_ProcessQuery

(query_seq, kmerSearch, kmerParams,

a

,

b

, kValues, badMers);

202

}

catch

(

const

ncbi::CException& e) {

204  string msg

= e.GetMsg();

206  if

(

msg

.find(

"WARNING:"

) != std::string::npos)

210

}

catch

(

const

std::exception& e) {

219

kmerSearchVector.push_back(kmerSearch);

222 #pragma omp parallel for num_threads(numThreads) 223 for

(

int

index=0; index<numFiles; index++)

226  for

(

int i

=0;

i

<numQuery;

i

++)

237  for

(

int i

=0;

i

<numQuery;

i

++)

244

kmerResultSet->push_back(kmerResults);

252  for

(

int

index=0; index<numFiles; index++)

253

final_size += kmerSearch.

scoreVector

[index].size();

254

final_results.reserve(final_size);

257  for

(

int

index=0; index<numFiles; index++)

265

final_results.insert(final_results.end(), score_vector.begin(), score_vector.end());

280  int

vec_size =

static_cast<int>

( final_results.size() );

282  if

(vec_size > num_matches)

283

final_results.erase(final_results.begin()+num_matches, final_results.end());

293  if

(intersect->

Size

() > 0)

296

final_results.erase(final_results.begin(), final_results.end());

304  if

(intersect->

Size

() > 0)

307

final_results.erase(final_results.begin(), final_results.end());

313

kmerResultSet->push_back(kmerResults);

315  return

kmerResultSet;

333  int

rows_per_band = mhfile.

GetRows

();

338

vector<uint32_t>

a

(num_hashes);

339

vector<uint32_t>

b

(num_hashes);

344  a

[0] =random_nums[0];

345  b

[0] =random_nums[1];

349  for

(

int i

=0;

i

<num_hashes;

i

++)

350  a

[

i

] = random_nums[

i

];

351  for

(

int i

=0;

i

<num_hashes;

i

++)

352  b

[

i

] = random_nums[

i

+num_hashes];

355

vector < vector<int> > kValues;

359  unsigned char

* kvaluesArray = mhfile.

GetKValues

();

360  for

(

int i

=0;

i

<samples;

i

++)

363  for

(

int

j=0; j<rows_per_band; j++)

364

temp.push_back(kvaluesArray[total++]);

365

kValues.push_back(temp);

369  SBlastKmerParameters

kmerParams(num_hashes, rows_per_band, samples, kmerNum, alphabetChoice, kmerVer);

377  return

kmerResultsSet;

void s_GetAllGis(vector< TGi > &retvalue, TBlastKmerPrelimScoreVector results, CRef< CSeqDB > seqdb)

static void s_AdjustPrelimScoreVectorOID(TBlastKmerPrelimScoreVector &score_vector, int offset)

bool s_SortFinalResults(const pair< uint32_t, double > &one, const pair< uint32_t, double > &two)

static void s_GetQuerySequence(const TSeqLocVector &query_vector, string &query_seq, CRef< CSeq_id > &seqid, int queryNum)

CRef< CBlastKmerResults > MakeEmptyResults(TSeqLocVector &queryVector, int queryNum, const string &errMsg, EBlastSeverity severity=eBlastSevError)

Empty results (use on error)

vector< pair< uint32_t, double > > TBlastKmerPrelimScoreVector

Vector of pairs of database OIDs and scores.

void neighbor_query(const vector< vector< uint32_t > > &query_hash, const uint64_t *lsh, vector< set< uint32_t > > &candidates, CMinHashFile &mhfile, int num_hashes, int min_hits, double thresh, TBlastKmerPrelimScoreVector &score_vector, BlastKmerStats &kmer_stats, int kmerVersion)

void get_LSH_hashes(vector< vector< uint32_t > > &query_hash, vector< vector< uint32_t > > &lsh_hash_vec, int num_bands, int rows_per_band)

void get_LSH_hashes5(vector< vector< uint32_t > > &query_hash, vector< vector< uint32_t > > &lsh_hash_vec, int numHashes, int numRows)

Gets the LSH hash for one hash function.

bool minhash_query2(const string &query, vector< vector< uint32_t > > &seq_hash, int kmerNum, int numHashes, int alphabetChoice, vector< int > badMers, int chunkSize)

Hash the query for the minimum values;.

void get_LSH_match_from_hash(const vector< vector< uint32_t > > &lsh_hash_vec, const uint64_t *lsh_array, vector< set< uint32_t > > &candidates)

bool minhash_query(const string &query, vector< vector< uint32_t > > &seq_hash, int num_hashes, uint32_t *a, uint32_t *b, int do_seg, int kmerNum, int alphabetChoice, int chunkSize)

void get_LSH_hashes2(vector< vector< uint32_t > > &query_hash, vector< vector< uint32_t > > &lsh_hash_vec, int num_k, int num_l, vector< vector< int > > &kValues)

Class of optiosn for the KMEr search.

int GetNumTargetSeqs() const

Gets the number of matches (subject sequences) to return.

double GetThresh() const

Get the threshold.

int GetMinHits() const

Get the number of LSH hits to initiate the calculation of the Jaccard distance.

bool Validate() const

Checks that options are valid.

This class holds one or more CBlastKmerResults.

This class represents the results for one KMER search (one query).

Class to perform a KMER-BLASTP search.

CRef< CBlastKmerOptions > m_Opts

Specifies values for some options (e.g., threshold)

CRef< CSeqDBNegativeList > m_NegGIList

Negative GIList to limit search by.

TSeqLocVector m_QueryVector

Holds the query seqloc and scope.

CRef< CBlastKmerResultsSet > Run()

Performs search on one or more queries Performs search on one or more queries.

void x_RunKmerFile(const vector< vector< uint32_t > > &query_hash, const vector< vector< uint32_t > > &query_LSH_hash, CMinHashFile &mhfile, TBlastKmerPrelimScoreVector &score_vector, BlastKmerStats &kmer_stats)

Search individual kmer file.

CRef< CBlastKmerResultsSet > x_SearchMultipleQueries(int firstQuery, int numQuery, const SBlastKmerParameters &kmerParams, uint32_t *a, uint32_t *b, vector< vector< int > > &kValues, vector< int > badMers)

Search multiple queries.

CRef< CSeqDBGiList > m_GIList

GIList to limit search by.

CRef< CBlastKmerResultsSet > RunSearches()

CBlastKmer(TSeqLocVector &query_vector, CRef< CBlastKmerOptions > options, CRef< CSeqDB > seqdb, string kmerfile=kEmptyStr)

Constructor Processes all proteins in TSeqLocVector.

CRef< CSeqDB > m_SeqDB

CSeqDB for BLAST db.

vector< string > m_KmerFiles

Name of the kmer files.

void x_ProcessQuery(const string &query_seq, SOneBlastKmerSearch &kmerSearch, const SBlastKmerParameters &kmerParams, uint32_t *a, uint32_t *b, vector< vector< int > > &kvalues, vector< int > badMers)

Preprocess query to sequence hashes.

GI list containing the intersection of two other lists of GIs.

Access data in Minhash files.

void GetBadMers(vector< int > &badMers) const

Overrepresented KMERs.

int GetNumHashes(void) const

Returns the number of values in an array of hashes (probably 32)

uint64_t * GetLSHArray(void) const

int GetVersion(void) const

int GetNumSeqs(void) const

uint32_t * GetRandomNumbers(void) const

int GetChunkSize(void) const

Get number of letters in a chunk (version 3 or higher)

int GetKmerSize(void) const

Returns the length of the KMER.

int GetSegStatus(void) const

int GetAlphabet(void) const

One of two alphabets from Shiryev et al.

unsigned char * GetKValues(void) const

LSH points for Buhler approach.

static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)

Find volume paths.

void GetGis(int oid, vector< TGi > &gis, bool append=false) const

Gets a list of GIs for an OID.

const string & GetDBNameList() const

Get list of database names.

Class for the messages for an individual query sequence.

size_t GetNumberOfThreads(void) const

Accessor for the number of threads to use.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Optimized implementation of CSerialObject::Assign, which is not so efficient.

void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const

Fill the buffer string with the sequence data for the interval [start, stop).

void SetCoding(TCoding coding)

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

@ e_Ncbistdaa

consecutive codes for std aas

char * dbname(DBPROCESS *dbproc)

Get name of current database.

unsigned int

A callback function used to compare two keys in a database.

constexpr auto sort(_Init &&init)

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

vector< SSeqLoc > TSeqLocVector

Vector of sequence locations.

Structure for ancillary data on KMER search.

int jd_count

How often was the Jaccard distance calculated.

int total_matches

How many matches returned.

int num_sequences

Number of database sequences considered (in this volume)

int oids_considered

How many OIDs were considered as candidates.

int hit_count

How many hits to the hash array were there?

int jd_oid_count

How many OIDs was the Jaccard distance calculated for.

int version

Version of index used (0 indicates default).

int chunkSize

size of a query chunk to process (default is 150).

int numHashes

Number of hash functions per signature.

int samples

Number of samples of query signature are made?

int rowsPerBand

Number of values sampled from signature.

int alphabetChoice

15 or 10 letter alphabet (0 for 15, 1 for 10).

int kmerNum

number of letters in KMER.

vector< TBlastKmerPrelimScoreVector > scoreVector

Scores for one query.

EBlastSeverity severity

Error or warning (only use if status is non-zero).

int status

Status of the query (0 is good, otherwise an error has occurred)

vector< vector< uint32_t > > queryLSHHash

LSH Hashes for one query (multiple chunks)

vector< BlastKmerStats > kmerStatsVector

Stats for one query.

vector< vector< uint32_t > > queryHash

Hashes for one query (multiple chunks)

CRef< CSeq_id > qSeqid

Seqid of the query.

string errDescription

Error description.

Structure to represent a single sequence to be fed to BLAST.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4