A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/aln__reader_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/readers/aln_reader.cpp Source File

57 #define NCBI_USE_ERRCODE_X Objtools_Rd_Align 65  auto

lineNumber =

error

.GetLineNum();

66  if

(lineNumber == -1) {

68  "At ID '"

<<

error

.GetID() <<

"' " 69  "in category '"

<<

static_cast<int>

(

error

.GetCategory()) <<

"': " 70

<<

error

.GetMsg() <<

"'"

);

73  "At ID '"

<<

error

.GetID() <<

"' " 74  "in category '"

<<

static_cast<int>

(

error

.GetCategory()) <<

"' " 75  "at line "

<<

error

.GetLineNum() <<

": " 76

<<

error

.GetMsg() <<

"'"

);

131  const string

& idString,

152  using TIds

= list<CRef<CSeq_id>>;

172

m_fValidateIds(fValidateIds),

174

m_IS(is), m_ReadDone(

false

), m_ReadSucceeded(

false

),

175

m_UseNexusInfo(

true

)

188  if

(!fSingleIdValidate) {

192  return

[fSingleIdValidate](

const

list<CRef<CSeq_id>>& ids,

195  for

(

const auto

& pId : ids) {

196

fSingleIdValidate(*pId, lineNum, errorReporter);

212

{EAlphabet::eAlpha_Default,

215

{EAlphabet::eAlpha_Nucleotide,

216  "ABCDGHKMNRSTUVWXYabcdghkmnrstuvwxy"

},

218

{EAlphabet::eAlpha_Protein,

219  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*"

},

221

{EAlphabet::eAlpha_Dna,

222  "ABCDGHKMNRSTVWXYabcdghkmnrstvwxy"

},

224

{EAlphabet::eAlpha_Rna,

225  "ABCDGHKMNRSTVWXYabcdghkmnrstvwxy"

},

227

{EAlphabet::eAlpha_Dna_no_ambiguity,

230

{EAlphabet::eAlpha_Rna_no_ambiguity,

233  return

alphaMap[alphaId];

272  const string

& seqId,

274  const string

& message,

294

TReadFlags readFlags,

295

ncbi::objects::ILineErrorListener* pErrorListener)

321  bool

generate_local_ids,

322

ncbi::objects::ILineErrorListener*

)

347  const auto

& idString = seqIdInfo.

mData

;

362  "Unable to parse sequence ID string."

);

377  const auto

num_sequences = alignmentInfo.

NumSequences

();

379  if

(num_sequences == 0) {

383  "No sequence data was detected in alignment file."

);

387  if

(num_sequences == 1) {

391  "Only one sequence was detected in the alignment file. An alignment file must contain more than one sequence."

);

398  for

(

auto

seqIdInfo : alignmentInfo.

mIds

) {

402  m_Ids

.push_back(ids);

405  size_t

numDeflines = alignmentInfo.

NumDeflines

();

407  if

(numDeflines ==

m_Ids

.size()) {

409  for

(

size_t i

=0;

i

< numDeflines; ++

i

) {

418  "Expected %d deflines but finding %d. "

,

422  "If deflines are used, each sequence must have a corresponding defline. " 423  "Note that deflines are optional."

,

440  if

(begin_len <

m_Seqs

[row_i].length()) {

441

string::iterator s =

m_Seqs

[row_i].end();

442  while

(s !=

m_Seqs

[row_i].begin()) {

444  if

(

GetEndGap

().find(*s) != string::npos) {

503  for

(

int i

=0;

i

<

m_Dim

; ++

i

) {

523  "CAlnReader::GetSeqAlign(): " 524  "Seq_align is not available until after Read()"

, 0);

550

aln_stop =

m_Seqs

[row_i].size();

560

vector<bool> is_gap; is_gap.resize(

m_Dim

,

true

);

561

vector<bool> prev_is_gap; prev_is_gap.resize(

m_Dim

,

true

);

562

vector<TSignedSeqPos> next_start; next_start.resize(

m_Dim

, 0);

564  TSeqPos

prev_aln_pos = 0, prev_len = 0;

565  bool

new_seg =

true

;

568  for

(

TSeqPos

aln_pos = 0; aln_pos < aln_stop; aln_pos++) {

570  if

(aln_pos >=

m_Seqs

[row_i].length()) {

571  if

(!is_gap[row_i]) {

572

is_gap[row_i] =

true

;

576  string

residue =

m_Seqs

[row_i].substr(aln_pos, 1);

578  if

(!

x_IsGap

(row_i, aln_pos, residue)) {

581

is_gap[row_i] =

false

;

590  if

( !is_gap[row_i] ) {

591

is_gap[row_i] =

true

;

601

lens.push_back(prev_len = aln_pos - prev_aln_pos);

603  if

( !prev_is_gap[row_i] ) {

604

next_start[row_i] += prev_len;

609

starts.resize(starts_i +

m_Dim

);

612

starts[starts_i++] = -1;

614

starts[starts_i++] = next_start[row_i];;

616

prev_is_gap[row_i] = is_gap[row_i];

619

prev_aln_pos = aln_pos;

630

lens.push_back(aln_stop - prev_aln_pos);

632  _ASSERT

((

int

)lens.size() == numseg);

636  m_Aln

->Validate(

true

);

644  const string

& alphabet,

645  const string

& seqData,

655  const string

& alphabet,

656  const string

& seqData,

657  const string

& seqId,

662  string

seqChars = seqData;

663  if

(!missingChars.empty()) {

665  remove_if

(seqChars.begin(), seqChars.end(),

666

[&](

char

c) { return missingChars.find(c) != string::npos;}),

678

alphabet.size() >= 2*26) {

682  auto

posFirstT = seqChars.find_first_of(

"Tt"

);

683  auto

posFirstU = seqChars.find_first_of(

"Uu"

);

684  if

(posFirstT != string::npos && posFirstU != string::npos) {

685  string msg

=

"Invalid Mol Type: " 686  "U and T cannot appear in the same nucleotide sequence. " 687  "Reinterpreting as protein."

;

705  const string

& seqData)

const 709

pSeqInst->SetMol(mol);

710

pSeqInst->SetLength(seqData.size());

713  data

.SetIupacaa().Set(seqData);

715  data

.SetIupacna().Set(seqData);

729  "CAlnReader::GetSeqEntry(): " 730  "Seq_entry is not available until after Read()"

, 0);

741

seq_annot->

SetData

().SetAlign().push_back(seq_align);

744  m_Entry

->SetSet().SetAnnot().push_back(seq_annot);

746  auto

& seq_set =

m_Entry

->SetSet().SetSeq_set();

750  const string

& seq_str =

m_SeqVec

[row_i];

754  auto

& ids = pSubEntry->SetSeq().SetId();

765  const string

seqId = ids.front()->AsFastaString();

770

pSubEntry->SetSeq().SetInst(*pSeqInst);

771

seq_set.push_back(pSubEntry);

777  for

(

auto

& pSeqEntry : seq_set) {

782  for

(

auto

& pSeqEntry : seq_set) {

784

pSeqEntry->SetSeq());

798  for

(

const auto

&

mod

: mods) {

811  auto

defline = defline_info.

mData

;

818  const auto

idString = pFirstID->AsFastaString();

821

errorReporter(idString, defline_info.

mNumLine

, pErrorListener);

838  const bool

logInfo = pErrorListener ?

842  CModAdder::Apply

(mod_handler, bioseq, skipped_mods, logInfo, errorReporter);

858

pDesc->SetTitle() = title;

859

bioseq.

SetDescr

().Set().push_back(std::move(pDesc));

User-defined methods of the data storage class.

User-defined methods of the data storage class.

END_ENUM_INFO string ErrorPrintf(const char *format,...)

thread_local unique_ptr< CAlnErrorReporter > theErrorReporter

static void sReportError(ILineErrorListener *pEC, EDiagSev severity, int code, int subcode, const string &seqId, int lineNumber, const string &message, ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)

static CAlnReader::FValidateIds s_GetMultiIdValidate(CAlnReader::FIdValidate fSingleIdValidate)

static void s_AppendMods(const CModHandler::TModList &mods, string &title)

string sAlnErrorToString(const CAlnError &error)

bool ReadAlignmentFile(istream &istr, bool gen_local_ids, bool use_nexus_info, CSequenceInfo &sequence_info, SAlignmentFile &alignmentInfo, ILineErrorListener *pErrorListener=nullptr)

void remove_if(Container &c, Predicate *__pred)

void Report(int lineNumber, EDiagSev severity, EReaderCode subsystem, EAlnSubcode errorCode, const string &descr, const string &seqId="")

CAlnError(int category, int line_num, string id, string message)

EAlnErr GetCategory() const

const string & GetMsg() const

const string & GetID() const

class CAlnReader supports importing a large variety of text-based alignment formats into standard dat...

vector< string > m_IdStrings

void ParseDefline(const string &defline, const SDeflineParseInfo &info, const TIgnoredProblems &ignoredErrors, list< CRef< objects::CSeq_id >> &ids, bool &hasRange, TSeqPos &rangeStart, TSeqPos &rangeEnd, TSeqTitles &seqTitles, objects::ILineErrorListener *pMessageListener)

void x_ParseAndValidateSeqIds(const TLineInfo &seqIdInfo, TReadFlags flags, TIdList &ids)

objects::CFastaDeflineReader::TIgnoredProblems TIgnoredProblems

objects::CSeq_inst::EMol GetSequenceMolType(const string &alphabet, const string &seqData, objects::ILineErrorListener *pErrorListener=nullptr)

Get a sequence's moltype, also considering the alphabet used to read it.

void x_CalculateMiddleSections()

virtual ~CAlnReader(void)

objects::CFastaDeflineReader::SDeflineParseInfo SDeflineParseInfo

void SetPaup(EAlphabet alpha)

vector< string > m_SeqVec

void Read(bool guess, bool generate_local_ids=false, objects::ILineErrorListener *pErrorListener=nullptr)

TAlignMiddles m_MiddleSections

function< void(const list< CRef< objects::CSeq_id > > &, int, objects::CAlnErrorReporter *)> FValidateIds

static string GetAlphabetLetters(EAlphabet)

vector< TSeqPos > m_SeqLen

objects::CSeq_inst::EMol x_GetSequenceMolType(const string &alphabet, const string &seqData, const string &seqId="", objects::ILineErrorListener *pErrorListener=nullptr)

const string & GetMiddleGap(void) const

int TReadFlags

binary OR of EReadFlags

objects::CDense_seg::TDim TNumrow

const string & GetAlphabet(void) const

pair< TSeqPos, TSeqPos > TAlignMiddleInterval

characters have different contexts, depending on whether they are before the first non-gap character,...

FValidateIds m_fValidateIds

objects::CFastaDeflineReader::TFastaFlags TFastaFlags

CRef< objects::CSeq_inst > x_GetSeqInst(objects::CSeq_inst::EMol mol, const string &seqData) const

void SetClustal(EAlphabet alpha)

void x_AddMods(const TLineInfo &defline_info, objects::CBioseq &bioseq, objects::ILineErrorListener *pErrorListener)

function< void(const objects::CSeq_id &, int, objects::CAlnErrorReporter *)> FIdValidate

ncbi::objects::CSequenceInfo mSequenceInfo

CRef< objects::CSeq_align > GetSeqAlign(TFastaFlags fasta_flags=0, objects::ILineErrorListener *pErrorListener=nullptr)

Create ASN.1 classes from the parsed alignment.

EAlignFormat m_AlignFormat

CNcbiIstream & m_IS

Other internal data.

CRef< objects::CSeq_entry > m_Entry

const string & GetEndGap(void) const

bool x_IsGap(TNumrow row, TSeqPos pos, const string &residue)

const string & GetMissing(void) const

objects::CFastaDeflineReader::TSeqTitles TSeqTitles

void SetPhylip(EAlphabet alpha)

void SetAlphabet(const string &value)

virtual CRef< objects::CSeq_id > GenerateID(const string &fasta_defline, const TSeqPos &line_number, TFastaFlags fasta_flags)

CRef< objects::CSeq_align > m_Aln

void x_VerifyAlignmentInfo(const ncbi::objects::SAlignmentFile &, TReadFlags readFlags)

void x_AssignDensegIds(TFastaFlags fasta_flags, objects::CDense_seg &denseg)

CRef< objects::CSeq_entry > GetSeqEntry(TFastaFlags fasta_flags=objects::CFastaReader::fAddMods, objects::ILineErrorListener *pErrorListener=nullptr)

void SetAllGap(const string &value)

Convenience function for setting beginning, middle, and end gap to the same thing.

void x_AddTitle(const string &defline, objects::CBioseq &bioseq)

list< CRef< objects::CSeq_id > > TIdList

Parsed result data (analogous to SAlignmentFile) Seqs are upper-case strings representing the sequenc...

vector< TLineInfo > m_DeflineInfo

void SetFastaGap(EAlphabet alpha)

Alternative & easy way to choose alphabet, etc.

const string & GetBeginningGap(void) const

CAlnReader(CNcbiIstream &is, FValidateIds fIdValidate=nullptr)

const CSeq_id * GetFirstId() const

CAlnErrorReporter * m_pErrorReporter

CDefaultIdErrorReporter(CAlnErrorReporter *pErrorReporter)

void operator()(EDiagSev severity, int lineNum, const string &idString, CFastaIdValidate::EErrCode, const string &msg)

void operator()(const TIds &ids, int lineNum, CAlnErrorReporter *pErrorReporter)

list< CRef< CSeq_id > > TIds

CFastaIdValidate m_FastaIdValidate

static void ParseDefline(const CTempString &defline, const SDeflineParseInfo &info, const TIgnoredProblems &ignoredErrors, TIds &ids, bool &hasRange, TSeqPos &rangeStart, TSeqPos &rangeEnd, TSeqTitles &seqTitles, ILineErrorListener *pMessageListener)

static ESequenceType SequenceType(const char *str, unsigned length=0, ESTStrictness strictness=eST_Default)

Guess sequence type.

static CLineErrorEx * Create(EProblem eProblem, EDiagSev eSeverity, int code, int subcode, const std::string &strSeqId, unsigned int uLine, const std::string &strErrorMessage=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())

Use this because the constructor is protected.

static void Apply(const CModHandler &mod_handler, CBioseq &bioseq, TSkippedMods &skipped_mods, FPostMessage fPostMessage=nullptr)

list< CModData > TModList

void AddMods(const TModList &mods, EHandleExisting handle_existing, TModList &rejected_mods, FReportError fReportError=nullptr)

static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())

static void Apply(const CTempString &title, TModList &mods, string &remainder)

virtual bool PutError(const ILineError &)=0

Store error in the container, and return true if error was stored fine, and return false if the calle...

@ eProblem_GeneralParsingError

vector< string > mSequences

size_t NumDeflines() const

size_t NumSequences() const

vector< TLineInfo > mDeflines

Operators to edit gaps in sequences.

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static unsigned int line_num

unsigned int TSeqPos

Type for sequence locations and lengths.

EDiagSev

Severity level for the posted diagnostics.

@ eDiag_Info

Informational message.

@ eDiag_Error

Error message.

#define NCBI_THROW2(exception_class, err_code, message, extra)

Throw exception with extra parameter.

#define FORMAT(message)

Format message using iostreams library.

@ fAddMods

Parse defline mods and add to SeqEntry.

static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)

Parse a string representing one or more Seq-ids, appending the results to IDS.

EAccessionInfo

For IdentifyAccession (below)

static int BestRank(const CRef< CSeq_id > &id)

@ fParse_RawText

Try to ID raw non-numeric accessions.

@ fParse_AnyLocal

Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...

CRef< C > Ref(C *object)

Helper functions to get CRef<> and CConstRef<> objects.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)

Check if a string is blank (has no text).

static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string (in-place)

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static string & ToUpper(string &str)

Convert string to upper case – string& version.

static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string.

C::value_type FindBestChoice(const C &container, F score_func)

Find the best choice (lowest score) for values in a container.

TLens & SetLens(void)

Assign a value to Lens data member.

vector< TSignedSeqPos > TStarts

void SetDim(TDim value)

Assign a value to Dim data member.

vector< CRef< CSeq_id > > TIds

TStarts & SetStarts(void)

Assign a value to Starts data member.

void SetNumseg(TNumseg value)

Assign a value to Numseg data member.

TIds & SetIds(void)

Assign a value to Ids data member.

@ eClass_pop_set

population study

void SetData(TData &value)

Assign a value to Data data member.

EMol

molecule class in living organism

void SetDescr(TDescr &value)

Assign a value to Descr data member.

@ eRepr_raw

continuous sequence

@ eMol_not_set

> cdna = rna

@ eMol_na

just a nucleic acid

constexpr bool empty(list< Ts... >) noexcept

const struct ncbi::grid::netcache::search::fields::SIZE size

@ eAlnSubcode_BadSequenceCount

@ eAlnSubcode_IllegalSequenceId

@ eAlnSubcode_InconsistentMolType

@ eAlnSubcode_InsufficientDeflineInfo

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

#define row(bind, expected)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4