A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/blast__fasta__input_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/blast/blastinput/blast_fasta_input.cpp Source File

74  unsigned int

seq_len_threshold)

88  if

(GetCurrentPos(eRawPos) < m_SeqLenThreshold) {

89  _ASSERT

( (TestFlag(fAssumeNuc) ^ TestFlag(fAssumeProt) ) );

90

SetCurrentSeq().SetInst().SetMol(TestFlag(fAssumeNuc)

121  bool

retrieve_seq_data,

122  unsigned int

seqlen_thresh2guess,

133  if

( !line.empty() &&

isalnum

(line.data()[0]&0xff) ) {

139  id

.Reset(

new CSeq_id

(line));

143

retval->

SetSeq

(*bioseq);

152

}

catch

(

const

exception&) {

193  "Empty SeqID passed to the molecule type validation"

);

200  "GI/accession/sequence mismatch: protein input required but nucleotide provided"

);

205  "GI/accession/sequence mismatch: nucleotide input required but protein provided"

);

212  string

message =

"No sequence available for "

+

id

->AsFastaString();

248  "GI/accession/sequence mismatch: protein input required but nucleotide provided"

);

294

m_LineReader(iconfig.GetConvertGapsToNs() ?

297

m_ReadProteins(iconfig.IsProteinInput())

305

m_ReadProteins(iconfig.IsProteinInput())

307  if

(user_input.empty()) {

309  "No sequence input was provided"

);

331  const char

* env_var = getenv(

"BLASTINPUT_GEN_DELTA_SEQ"

);

332  if

(env_var ==

NULL

|| (env_var &&

string

(env_var) ==

kEmptyStr

)) {

412  "Nucleotide FASTA provided for protein sequence"

);

415  "Protein FASTA provided for nucleotide sequence"

);

428  "Cannot assign nucleotide strand to protein sequence"

);

446  if

(to > 0 && to < from) {

448  "Invalid sequence range"

);

452  "Invalid from coordinate (greater than sequence length)"

);

459

retval->

SetInt

().SetFrom(from);

460

retval->

SetInt

().SetTo((to > 0 && to < seqlen) ? to : (seqlen-1));

475  SSeqLoc

retval(seqloc, &scope);

477

retval.

mask

= lcase_mask;

495  const bool

apply_mask_to_both_strands =

true

;

499

program, apply_mask_to_both_strands);

514

m_ParseSeqIds(

false

)

527  if

(line[0] !=

'>'

) {

529  "defline expected"

);

543

m_ParseSeqIds(

false

)

550  "used with two input files"

);

564  if

(line[0] !=

'>'

) {

566  "defline expected"

);

570

++(*m_SecondLineReader);

574  if

(line[0] !=

'>'

) {

576  "defline expected"

);

626  if

(it->IsUser() && it->GetUser().GetType().GetStr() ==

"Mapping"

) {

632  if

(seqdesc.

Empty

()) {

657  "format x_ReadFastaOrFastq read either FASTA or FASTQ"

);

674  "format x_ReadFastaOrFastq read either FASTA or " 678  if

(

first

.NotEmpty()) {

687  if

(

first

.NotEmpty()) {

696  if

(

first

.NotEmpty()) {

735  if

(line[0] !=

'>'

) {

737

(

string

)

"Missing defline before line: "

+

745

(

string

)

"No sequence data for defline: "

+

id

+

746  "\nTruncated file?"

);

758

(

string

)

"No sequence data for defline: "

+ line);

763  size_t

p = line.

find

(

'>'

);

767

(

string

)

"FASTC parse error: Sequence separator '><'" 768  " was not found in line: "

+

774  char

* second = (

char

*)line.

data

() + p + 2;

775  size_t

first_len = p;

776  size_t

second_len = line.

length

() - p - 2;

781

bioseq.

SetId

().clear();

785

bioseq.

SetId

().push_back(seqid);

790

bioseq.

SetDescr

().Set().push_back(title);

796  first

[first_len] = 0;

798

bioseq.

SetDescr

().Set().push_back(seqdesc_first);

801

bioseq_set.

SetSeq_set

().push_back(seq_entry);

807

bioseq.

SetId

().clear();

811

bioseq.

SetId

().push_back(seqid);

816

bioseq.

SetDescr

().Set().push_back(title);

822

second[second_len] = 0;

824

bioseq.

SetDescr

().Set().push_back(seqdesc_last);

827

bioseq_set.

SetSeq_set

().push_back(seq_entry);

842

line = **line_reader;

843  while

(line[0] !=

'>'

) {

846  if

(line.

empty

() && !line_reader->

AtEOF

()) {

848

line = **line_reader;

864  if

(line_reader->

AtEOF

()) {

870

line = **line_reader;

877

bioseq.

SetId

().clear();

881

bioseq.

SetId

().push_back(seqid);

887

bioseq.

SetDescr

().Set().push_back(title);

892

bioseq.

SetInst

().SetLength(start);

910  bool

empty_sequence =

false

;

914

line = **line_reader;

917  while

(!line_reader->

AtEOF

() && line.

empty

()) {

919

line = **line_reader;

922  if

(line[0] !=

'@'

) {

924  " defline expected at line: "

+

932

line = **line_reader;

934  while

(!line_reader->

AtEOF

() && line.

empty

()) {

936

line = **line_reader;

940  if

(line.

length

() > 0) {

943

bioseq.

SetId

().clear();

947

bioseq.

SetId

().push_back(seqid);

953

bioseq.

SetDescr

().Set().push_back(title);

960  if

(line[0] ==

'+'

) {

961

bioseq.

SetInst

().SetLength(0);

963

empty_sequence =

true

;

974  if

(!empty_sequence) {

977

line = **line_reader;

979  while

(!line_reader->

AtEOF

() && line.

empty

()) {

981

line = **line_reader;

985  if

(line[0] !=

'+'

) {

987  " defline expected at line: "

+

991  if

(!empty_sequence) {

994

line = **line_reader;

996  if

(!line.

empty

()) {

1002  while

(!line_reader->

AtEOF

() && line.

empty

()) {

1004

line = **line_reader;

1018  "used with two files"

);

1033  if

(

first

.NotEmpty()) {

1041  if

(

first

.NotEmpty()) {

1057  size_t

end = line.

find

(

' '

, 1);

static CUser_object & s_SetSeqdescUser(CSeq_entry &entry)

Interface for reading SRA sequences into blast input.

Auxiliary classes/functions for BLAST input library.

bool HasRawSequenceData(const objects::CBioseq &bioseq)

Returns true if the Bioseq passed as argument has the full, raw sequence data in its Seq-inst field.

EBlastProgramType

Defines the engine's notion of the different applications of the BLAST algorithm.

Definitions and functions associated with the BlastQueryInfo structure.

@ eFirstSegment

The first sequence of a pair with both sequences read and accepted.

Auxiliary class for creating Bioseqs given SeqIds.

CRef< CBioseq > CreateBioseqFromId(CConstRef< CSeq_id > id, bool retrieve_seq_data)

Creates a Bioseq given a SeqId.

bool IsProtein(CConstRef< CSeq_id > id)

Checks the molecule type of the Bioseq identified by the given SeqId.

bool HasSequence(CConstRef< CSeq_id > id)

Checks whether the Bioseq actually contains sequence.

CRef< ILineReader > m_LineReader

interface to read lines

AutoPtr< CFastaReader > m_InputReader

Reader of FASTA sequences or identifiers.

bool m_ReadProteins

read protein sequences?

CBlastFastaInputSource(CNcbiIstream &infile, const CBlastInputSourceConfig &iconfig)

Constructor.

virtual CRef< CBlastSearchQuery > GetNextSequence(CScope &scope)

Retrieve a single sequence (in a CBlastSearchQuery container)

CRef< objects::CSeq_loc > x_FastaToSeqLoc(CRef< objects::CSeq_loc > &lcase_mask, CScope &scope)

Read a single sequence from file and convert to a Seq_loc.

void x_InitInputReader()

Initialization method for the input reader.

virtual bool End()

Signal whether there are any unread sequences left.

CBlastInputSourceConfig m_Config

Configuration for the sequences to be read.

virtual SSeqLoc GetNextSSeqLoc(CScope &scope)

Retrieve a single sequence (in an SSeqLoc container)

Class to read non-FASTA sequence input to BLAST programs using the various data loaders configured in...

CBlastInputReader(const SDataLoaderConfig &dlconfig, bool read_proteins, bool retrieve_seq_data, unsigned int seqlen_thresh2guess, ILineReader &reader, CFastaReader::TFlags flags)

Constructor.

void x_ValidateMoleculeType(CConstRef< CSeq_id > id)

Performs sanity checks to make sure that the sequence requested is of the expected type.

bool m_ReadProteins

True if we're supposed to be reading proteins, else false.

virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener)

Overloaded method to attempt to read non-FASTA input types.

CRef< CBlastScopeSource > GetQueryScopeSource() const

Retrieves the CBlastScopeSource object used to fetch the query sequence(s) if these were provided as ...

bool m_RetrieveSeqData

True if the sequence data must be fetched.

CRef< CBioseq > x_CreateBioseq(CRef< CSeq_id > id)

Auxiliary function to create a Bioseq given a CSeq_id ready to be added to a BlastObject,...

CRef< CBlastBioseqMaker > m_BioseqMaker

The object that creates Bioseqs given SeqIds.

const SDataLoaderConfig & m_DLConfig

Configuration options for the CBlastScopeSource.

CRef< CBlastScopeSource > m_QueryScopeSource

The source of CScope objects to fetch sequences if given by Seq-id.

Class that centralizes the configuration data for sequences to be converted.

TSeqRange GetRange() const

Get range for all sequences.

const string & GetLocalIdPrefix() const

Retrieve the custom prefix string used for generating local ids.

objects::ENa_strand GetStrand() const

Retrieve the current strand value.

int GetLocalIdCounterInitValue() const

Retrieve the local id counter initial value.

const SDataLoaderConfig & GetDataLoaderConfig()

Retrieve the data loader configuration object for read-only access.

bool GetBelieveDeflines() const

Retrieve current sequence ID parsing status.

unsigned int GetSeqLenThreshold2Guess() const

Retrieve the sequence length threshold to guess the molecule type.

bool GetSkipSeqCheck() const

Retrieve status of sequence alphabet validation.

bool GetLowercaseMask() const

Retrieve lowercase mask status.

bool RetrieveSeqData() const

True if the sequence data must be fetched.

Class whose purpose is to create CScope objects which have data loaders added with different prioriti...

void AddDataLoaders(CRef< objects::CScope > scope)

Add the data loader configured in the object to the provided scope.

CRef< objects::CScope > NewScope()

Create a new, properly configured CScope.

CFastaReader-derived class which contains customizations for processing BLAST sequence input.

virtual void AssignMolType(ILineErrorListener *pMessageListener)

Override logic for assigning the molecule type.

CCustomizedFastaReader(ILineReader &reader, CFastaReader::TFlags flags, unsigned int seq_len_threshold)

Constructor.

virtual void x_CloseGap(TSeqPos, bool, ILineErrorListener *)

Override this method to force the parent class to ignore gaps.

unsigned int m_SeqLenThreshold

Sequence length threshold for molecule type guessing.

Base class for reading FASTA sequences.

Defines user input exceptions.

@ eSequenceMismatch

Expected sequence type isn't what was expected.

Simple implementation of ILineReader for regions of memory (such as memory-mapped files).

CRef< CSeq_id > x_GetNextSeqId(void)

EInputFormat m_Format

Input format: FASTA, FASTQ, FASTC.

TSeqPos m_BasesAdded

Number of bases added so far.

bool m_ParseSeqIds

Should defline ids be used Bioseq objects.

virtual int GetNextSequence(CBioseq_set &bioseq_set)

Get one sequence (or a pair for NGS reads)

CRef< CSeq_entry > x_ReadFastqOneSeq(CRef< ILineReader > line_reader)

Read one sequence from a FASTQ file.

CRef< ILineReader > m_SecondLineReader

CShortReadFastaInputSource(CNcbiIstream &infile, EInputFormat format=eFasta, bool paired=false)

unsigned int m_Id

A counter for generating local ids.

void x_ReadFastaOrFastq(CBioseq_set &bioseq_set)

Read sequences in FASTA or FASTQ format.

bool x_ReadFromTwoFiles(CBioseq_set &bioseq_set, EInputFormat format)

Read sequences from two FASTA or FASTQ files (for paired reads)

void x_ReadFastc(CBioseq_set &bioseq_set)

Read sequences in FASTC format: defline, new line, a pair of sequences on a single line separated by ...

CRef< CSeq_entry > x_ReadFastaOneSeq(CRef< ILineReader > line_reader)

Read one sequence from a FASTA file.

CRef< ILineReader > m_LineReader

EInputFormat

Input formats.

CTempString x_ParseDefline(CTempString &line)

TSeqPos m_SeqBuffLen

string::capacity() can be used instead

bool m_IsPaired

Are paired sequences in the input.

Stream line reader that converts gaps to Ns before returning each line.

CTempString operator*(void) const

Return the current line, minus its terminator.

CStreamLineReaderConverter & operator++(void)

Make a line available.

CStreamLineReaderConverter(CNcbiIstream &instream)

Simple implementation of ILineReader for i(o)streams.

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

Template class for iteration on objects of class C (non-medifiable version)

CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)

add a data field to the user object that holds a given value

@ eProblem_ModifierFoundButNoneExpected

@ eProblem_TooManyAmbiguousResidues

Abstract base class for lightweight line-by-line reading.

Collection of masked regions for a single query sequence.

bool Empty(const CNcbiOstrstream &src)

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

TMaskedQueryRegions PackedSeqLocToMaskedQueryRegions(CConstRef< objects::CSeq_loc > sloc, EBlastProgramType program, bool assume_both_strands=false)

Auxiliary function to convert a Seq-loc describing masked query regions to a TMaskedQueryRegions obje...

void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)

Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...

unsigned int TSeqPos

Type for sequence locations and lengths.

element_type * get(void) const

Get pointer.

TErrCode GetErrCode(void) const

Get error code.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

const string & GetMsg(void) const

Get message string.

virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)

Read a single effective sequence, which may turn out to be a segmented set.

CSeqIdGenerator & SetIDGenerator(void)

long TFlags

binary OR of EFlags

CStreamLineReader & operator++(void)

Make a line available.

virtual void UngetLine(void)=0

Unget current line, which must be valid.

CRef< CSeq_loc > SaveMask(void)

Directs the *following* call to ReadOneSeq to note the locations of lowercase letters.

CTempString operator*(void) const

Return the current line, minus its terminator.

virtual Uint8 GetLineNumber(void) const =0

Returns the current line number (counting from 1, not 0).

virtual bool AtEOF(void) const =0

Indicates (negatively) whether there is any more input.

virtual void AssignMolType(ILineErrorListener *pMessageListener)

ILineReader & GetLineReader(void)

void IgnoreProblem(ILineError::EProblem problem)

@ fNoParseID

Generate an ID (whole defline -> title)

@ fQuickIDCheck

Just check local IDs' first characters.

@ fDLOptional

Don't require a leading defline.

@ fHyphensIgnoreAndWarn

When a hyphen is encountered in seq data, ignore it but warn.

@ fSkipCheck

Skip (rudimentary) body content check.

@ fDisableNoResidues

If no residues found do not raise an error.

@ fParseRawID

Try to identify raw accessions.

@ fNoSplit

Don't split out ambiguous sequence regions.

@ fAssumeNuc

Assume nucs unless accns indicate otherwise.

@ fAssumeProt

Assume prots unless accns indicate otherwise.

string GetSeqIdString(bool with_version=false) const

Return seqid string with optional version for text seqid type.

CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)

Reassign based on flat specifications; arguments interpreted as with constructors.

static int BestRank(const CRef< CSeq_id > &id)

@ fParse_AnyLocal

Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...

@ fParse_ValidLocal

Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...

CConstBeginInfo ConstBegin(const C &obj)

Get starting point of non-modifiable object hierarchy.

CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)

Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

static TThisType GetEmpty(void)

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

const char * data(void) const

Return a pointer to the array represented.

bool empty(void) const

Return true if the represented string is empty (i.e., the length is zero)

static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

size_type length(void) const

Return the length of the represented array.

CTempString substr(size_type pos) const

Obtain a substring from this string, beginning at a given offset.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

size_type find(const CTempString match, size_type pos=0) const

Find the first instance of the entire matching string within the current string, beginning at an opti...

static const size_type npos

C::value_type FindBestChoice(const C &container, F score_func)

Find the best choice (lowest score) for values in a container.

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

void SetType(TType &value)

Assign a value to Type data member.

bool IsLocal(void) const

Check if variant Local is selected.

@ eNa_strand_both

in forward orientation

@ e_not_set

No variant selected.

const TSeq & GetSeq(void) const

Get the variant data.

TSeq & SetSeq(void)

Select the variant.

TSeq_set & SetSeq_set(void)

Assign a value to Seq_set data member.

TId & SetId(void)

Assign a value to Id data member.

const TInst & GetInst(void) const

Get the Inst member data.

TTitle & SetTitle(void)

Select the variant.

TLength GetLength(void) const

Get the Length member data.

void SetInst(TInst &value)

Assign a value to Inst data member.

void SetDescr(TDescr &value)

Assign a value to Descr data member.

TUser & SetUser(void)

Select the variant.

@ eRepr_raw

continuous sequence

@ eMol_na

just a nucleic acid

The blob sat and sat key Both must be positive integers</td > n< td > Non empty string The interpretation of the blob id depends on a processor Cassandra n processor expects the following format

Configuration structure for the CBlastScopeSource.

bool UseDataLoaders() const

Determine whether either of the data loaders should be used.

Structure to represent a single sequence to be fed to BLAST.

CRef< objects::CSeq_loc > mask

Seq-loc describing regions to mask in the seqloc field Acceptable types of Seq-loc are Seq-interval a...


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4