(file_deleter.
Exists()) {
71 boolis_remote
,
booluse_default
,
72 stringtask_name
,
boolmt_mode
)
86 char* batch_sz_str = getenv(
"BATCH_SIZE");
89 _TRACE(
"DEBUG: Using query batch size "<< retval);
99 if(! use_default)
return0;
101 if(task_name ==
"")
125 if(task_name ==
"blastx-fast"&& mt_mode ==
true)
141 _TRACE(
"Using query batch size "<< retval);
147 const char* error_prefix
)
149 static const char* kDfltErrorPrefix =
"Failed to parse sequence range";
150 static const stringkDelimiters(
"-");
151 stringerror_msg(error_prefix ? error_prefix : kDfltErrorPrefix);
153vector<string> tokens;
155 if(tokens.size() != 2 || tokens.front().empty() || tokens.back().empty()) {
156error_msg +=
" (Format: start-stop)";
161 if(from <= 0 || to <= 0) {
162error_msg +=
" (range elements cannot be less than or equal to 0)";
166error_msg +=
" (range cannot be empty)";
170error_msg +=
" (start cannot be larger than stop)";
183 const char* error_prefix
)
185 static const char* kDfltErrorPrefix =
"Failed to parse sequence range";
186 static const stringkDelimiters(
"-");
187 stringerror_msg(error_prefix ? error_prefix : kDfltErrorPrefix);
189vector<string> tokens;
191 if(tokens.front().empty()) {
192error_msg +=
" (start cannot be empty)";
200 if(!tokens.back().empty()) {
203 if(from <= 0 || to <= 0) {
204error_msg +=
" (range elements cannot be less than or equal to 0)";
208error_msg +=
" (start cannot be larger than stop)";
226 booluse_lcase_masking,
238 if(!read_proteins && gaps_to_Ns) {
245sequences =
input->GetAllSeqs(*scope);
256 const TSeqPoskResetSeqNumMax = 1000;
257 const TSeqPoskResetSeqNum250 = 250;
259 if(num_descriptions) {
260*num_descriptions = max_target_seqs;
261warnings +=
"Number of descriptions overridden to ";
266warnings += (warnings.empty() ?
"Number ":
", number ");
267warnings +=
"of overview alignments overridden to ";
270 if(num_alignments) {
271 booloverridden =
false;
272 TSeqPoshalfHits = max_target_seqs/2;
274*num_alignments = max_target_seqs;
277 else if(halfHits < kResetSeqNum250) {
281 else if(halfHits <= kResetSeqNumMax) {
282*num_alignments = halfHits;
286*num_alignments = kResetSeqNumMax;
290warnings += (warnings.empty() ?
"Number ":
", number ");
291warnings +=
"of alignments overridden to ";
295 if( !warnings.empty() ) {
309bioseq.GetLength() == 0)
311 else if(bioseq.GetInst().CanGetSeq_data() ==
true)
313 else if(bioseq.GetInst().IsSetExt())
319bioseq.GetInst().GetExt().GetDelta().Get()) {
336 if(sequences.
Empty() || sequences->
Empty()) {
340vector<string> empty_sequence_ids;
341 boolall_empty =
true;
344 if((*query)->GetLength() == 0) {
346push_back((*query)->GetQuerySeqLoc()->GetId()->AsFastaString());
354 "Query contains no sequence data");
357 if(!empty_sequence_ids.empty())
359warnings.assign(
"The following sequences had no sequence data:");
360warnings += empty_sequence_ids.front();
361 for(
TSeqPos i= 1;
i< empty_sequence_ids.size();
i++) {
362warnings +=
", "+ empty_sequence_ids[
i];
372 if(sequences.empty()) {
376vector<string> empty_sequence_ids;
377 boolall_empty =
true;
382push_back(
query->seqloc->GetId()->AsFastaString());
390 "Query contains no sequence data");
393 if(!empty_sequence_ids.empty())
395warnings.assign(
"The following sequences had no sequence data:");
396warnings += empty_sequence_ids.front();
397 for(
TSeqPos i= 1;
i< empty_sequence_ids.size();
i++) {
398warnings +=
", "+ empty_sequence_ids[
i];
408 if(sequences.
Empty()) {
412vector<string> empty_sequence_ids;
413 boolall_empty =
true;
417 if(!itr->IsSetLength() || itr->GetLength() == 0) {
419push_back(itr->GetFirstId()->AsFastaString());
427 "Query contains no sequence data");
430 if(!empty_sequence_ids.empty())
432warnings.assign(
"The following sequences had no sequence data:");
433warnings += empty_sequence_ids.front();
434 for(
TSeqPos i= 1;
i< empty_sequence_ids.size();
i++) {
435warnings +=
", "+ empty_sequence_ids[
i];
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the BLAST exception class.
Interface for reading SRA sequences into blast input.
bool HasRawSequenceData(const objects::CBioseq &bioseq)
Returns true if the Bioseq passed as argument has the full, raw sequence data in its Seq-inst field.
TSeqRange ParseSequenceRange(const string &range_str, const char *error_prefix)
Parse and extract a sequence range from argument provided to this function.
int GetQueryBatchSize(EProgram program, bool is_ungapped, bool is_remote, bool use_default, string task_name, bool mt_mode)
Retrieve the appropriate batch size for the specified task.
TSeqRange ParseSequenceRangeOpenEnd(const string &range_str, const char *error_prefix)
Parse and extract a sequence range from argument provided to this function.
void CheckForEmptySequences(CRef< CBlastQueryVector > sequences, string &warnings)
Inspect the sequences parameter for empty sequences.
string CalculateFormattingParams(TSeqPos max_target_seqs, TSeqPos *num_descriptions, TSeqPos *num_alignments, TSeqPos *num_overview)
Calculates the formatting parameters based on the maximum number of target sequences selected (a....
CRef< CScope > ReadSequencesToBlast(CNcbiIstream &in, bool read_proteins, const TSeqRange &range, bool parse_deflines, bool use_lcase_masking, CRef< CBlastQueryVector > &sequences, bool gaps_to_Ns)
Read sequence input for BLAST.
Auxiliary classes/functions for BLAST input library.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eTblastx
Translated nucl-Translated nucl.
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eBlastp
Protein-Protein.
@ eMapper
Jumper alignment for mapping.
@ eTblastn
Protein-Translated nucl.
@ eMegablast
Nucl-Nucl (traditional megablast)
@ eDiscMegablast
Nucl-Nucl using discontiguous megablast.
@ eBlastx
Translated nucl-Protein.
CNcbiOstream * GetStream()
Retrieve the newly opened stream, caller doesn't own the return value.
unique_ptr< CNcbiOstream > m_FileStream
The output stream.
string m_FileName
The file's name.
int m_Version
File version if larger than zero.
static bool IsEmptyBioseq(const CBioseq &bioseq)
Returns true if the Bioseq contained in the seq_entry is empty (i.e.
Defines BLAST error codes (user errors included)
Class representing a text file containing sequences in fasta format.
Class that centralizes the configuration data for sequences to be converted.
void SetConvertGapsToNs(bool val)
Turn on/off converting gaps to Ns in read FASTA sequences.
void SetLowercaseMask(bool mask)
Turn lowercase masking on/off.
void SetSubjectLocalIdMode()
Append subject-specific prefix codes to all generated local ids.
void SetRange(const TSeqRange &r)
Set range for all sequences.
void SetBelieveDeflines(bool believe)
Turn parsing of sequence IDs on/off.
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
bool Empty() const
Returns true if this query vector is empty.
Defines user input exceptions.
Template class for iteration on objects of class C (non-medifiable version)
const TSeqPos kDfltArgMaxTargetSequences
Default maximum number of target sequences, to be used only on the web.
string EProgramToTaskName(EProgram p)
Convert a EProgram enumeration value to a task name (as those used in the BLAST command line binaries...
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
virtual bool Exists(void) const
Check existence of file.
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetTo(TTo value)
Assign a value to To data member.
list< CRef< CDelta_seq > > Tdata
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_virtual
no seq data
@ e_Loc
point to a sequence
std::istream & in(std::istream &in_, double &x_)
Main argument class for PSI-BLAST application.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Configuration structure for the CBlastScopeSource.
void OptimizeForWholeLargeSequenceRetrieval(bool value=true)
Configures the BLAST database data loader to optimize the retrieval of *entire* large sequences.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4