kSeqLenThreshold2Guess = 25;
100 boollowercase =
false,
101 boolbelieve_defline =
false,
103 boolretrieve_seq_data =
true,
104 intlocal_id_counter = 1,
105 unsigned intseqlen_thresh2guess =
107 boolskip_seq_check =
false);
195 returnm_SeqLenThreshold2Guess;
199m_SeqLenThreshold2Guess =
val;
256 caseeInvalidStrand:
return "eInvalidStrand";
257 caseeSeqIdNotFound:
return "eSeqIdNotFound";
258 caseeEmptyUserInput:
return "eEmptyUserInput";
259 caseeInvalidRange:
return "eInvalidRange";
260 caseeSequenceMismatch:
return "eSequenceMismatch";
261 caseeInvalidInput:
return "eInvalidInput";
266 #ifndef SKIP_DOXYGEN_PROCESSING 313: m_Source(
source), m_BatchSize(batch_size), m_NumSeqs(0), m_TotalLength(0) {}
358 bool End() {
returnm_Source->End(); }
398 boolretrieve_seq_data);
416 static boolIsEmptyBioseq(
const CBioseq& bioseq);
435 virtual bool End(
void) = 0;
456 bool End(
void) {
returnm_Source->End();}
Declares CBlastScopeSource class to create properly configured CScope objects to invoke the BLAST dat...
ncbi::TMaskedQueryRegions mask
Auxiliary class for creating Bioseqs given SeqIds.
CRef< CScope > m_scope
Scope object used to retrieve the bioseqs.
CBlastBioseqMaker(CRef< CScope > scope)
Constructor.
CBlastInputOMF & operator=(const CBlastInputOMF &rhs)
CBlastInputOMF(const CBlastInputOMF &rhs)
TSeqPos m_MaxNumSequences
TSeqPos GetBatchSize(void) const
CBlastInputSourceOMF * m_Source
CRef< CBioseq_set > m_BioseqSet
void SetMaxBatchNumSeqs(TSeqPos num)
Int8 GetTotalLengthProcessed() const
Int8 GetNumSeqsProcessed() const
void SetBatchSize(TSeqPos num)
TSeqPos GetMaxBatchNumSeqs(void) const
Class that centralizes the configuration data for sequences to be converted.
void SetConvertGapsToNs(bool val)
Turn on/off converting gaps to Ns in read FASTA sequences.
void SetLocalIdCounterInitValue(int val)
Set the local id counter initial value.
TSeqRange GetRange() const
Get range for all sequences.
bool GetConvertGapsToNs(void) const
Retrieve gaps to Ns converstion option value.
bool m_LowerCaseMask
Whether to save lowercase mask locs.
void SetRetrieveSeqData(bool value)
Turn on or off the retrieval of sequence data.
objects::ENa_strand m_Strand
Strand to assign to sequences.
void SetSkipSeqCheck(bool skip)
Turn validation of sequence on/off.
bool m_GapsToNs
Convert gaps to Ns in FASTA sequences.
bool m_BelieveDeflines
Whether to parse sequence IDs.
TSeqRange m_Range
Sequence range.
unsigned int m_SeqLenThreshold2Guess
The sequence length threshold to guess molecule type.
const string & GetLocalIdPrefix() const
Retrieve the custom prefix string used for generating local ids.
void SetLowercaseMask(bool mask)
Turn lowercase masking on/off.
~CBlastInputSourceConfig()
Destructor.
void SetLocalIdPrefix(const string &prefix)
Set the custom prefix string used for generating local ids.
void SetSubjectLocalIdMode()
Append subject-specific prefix codes to all generated local ids.
void SetRange(const TSeqRange &r)
Set range for all sequences.
objects::ENa_strand GetStrand() const
Retrieve the current strand value.
void SetBelieveDeflines(bool believe)
Turn parsing of sequence IDs on/off.
bool IsProteinInput() const
Determine if this object is for configuring reading protein sequences.
void SetSeqLenThreshold2Guess(unsigned int val)
Set the sequence length threshold to guess the molecule type.
void SetStrand(objects::ENa_strand strand)
Set the strand to a specified value.
bool m_SkipSeqCheck
Whether to validate sequence data -RMH-.
int m_LocalIdCounter
Initialization parameter to CSeqidGenerator.
int GetLocalIdCounterInitValue() const
Retrieve the local id counter initial value.
const SDataLoaderConfig & GetDataLoaderConfig()
Retrieve the data loader configuration object for read-only access.
bool GetBelieveDeflines() const
Retrieve current sequence ID parsing status.
bool m_RetrieveSeqData
Configuration for CBlastInputReader.
string m_LocalIdPrefix
Custom prefix string passed to CSeqidGenerator.
unsigned int GetSeqLenThreshold2Guess() const
Retrieve the sequence length threshold to guess the molecule type.
SDataLoaderConfig m_DLConfig
Configuration object for data loaders, used by CBlastInputReader.
SDataLoaderConfig & SetDataLoaderConfig()
Retrieve the data loader configuration object for manipulation.
bool GetSkipSeqCheck() const
Retrieve status of sequence alphabet validation.
bool GetLowercaseMask() const
Retrieve lowercase mask status.
bool RetrieveSeqData() const
True if the sequence data must be fetched.
TSeqRange & SetRange(void)
Set range for all sequences.
void SetQueryLocalIdMode()
Append query-specific prefix codes to all generated local ids.
virtual ~CBlastInputSourceOMF()
virtual int GetNextSequence(CBioseq_set &bioseq_set)=0
Get one sequence (or a pair for NGS reads)
Base class representing a source of biological sequences.
virtual CRef< CBlastSearchQuery > GetNextSequence(CScope &scope)=0
Retrieve a single sequence (in a CBlastSearchQuery container)
virtual bool End()=0
Signal whether there are any unread sequence left.
virtual SSeqLoc GetNextSSeqLoc(CScope &scope)=0
Retrieve a single sequence (in an SSeqLoc container)
virtual ~CBlastInputSource()
Destructor.
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
~CBlastInput()
Destructor.
Int8 GetNumSeqsProcessed() const
TSeqPos GetBatchSize() const
Retrieve the target size of a batch of sequences.
CBlastInput(CBlastInputSource *source, int batch_size=kMax_Int)
Constructor.
bool End()
Determine if we have reached the end of the BLAST input.
void SetBatchSize(TSeqPos batch_size)
Set the target size of a batch of sequences.
CRef< CBlastInputSource > m_Source
pointer to source of sequences
Int8 GetTotalLengthProcessed() const
TSeqPos m_BatchSize
total size of one block of sequences
Defines user input exceptions.
NCBI_EXCEPTION_DEFAULT(CInputException, CException)
EErrCode
Error types that reading BLAST input can generate.
@ eInvalidStrand
Invalid strand specification.
@ eSequenceMismatch
Expected sequence type isn't what was expected.
@ eInvalidRange
Invalid range specification.
@ eEmptyUserInput
No input was provided.
@ eSeqIdNotFound
The sequence ID cannot be resolved.
virtual const char * GetErrCodeString(void) const override
Translate from the error code value to its string representation.
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
CException & operator=(const CException &)
Private assignment operator to prohibit assignment.
TErrCode GetErrCode(void) const
Get error code.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
int64_t Int8
8-byte (64-bit) signed integer
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
#define NCBI_BLASTINPUT_EXPORT
ENa_strand
strand of nucleic acid
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Definition of SSeqLoc structure.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Configuration structure for the CBlastScopeSource.
Structure to represent a single sequence to be fed to BLAST.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4