lineNumber =
error.GetLineNum();
66 if(lineNumber == -1) {
68 "At ID '"<<
error.GetID() <<
"' " 69 "in category '"<<
static_cast<int>(
error.GetCategory()) <<
"': " 70<<
error.GetMsg() <<
"'");
73 "At ID '"<<
error.GetID() <<
"' " 74 "in category '"<<
static_cast<int>(
error.GetCategory()) <<
"' " 75 "at line "<<
error.GetLineNum() <<
": " 76<<
error.GetMsg() <<
"'");
131 const string& idString,
152 using TIds= list<CRef<CSeq_id>>;
172m_fValidateIds(fValidateIds),
174m_IS(is), m_ReadDone(
false), m_ReadSucceeded(
false),
175m_UseNexusInfo(
true)
188 if(!fSingleIdValidate) {
192 return[fSingleIdValidate](
constlist<CRef<CSeq_id>>& ids,
195 for(
const auto& pId : ids) {
196fSingleIdValidate(*pId, lineNum, errorReporter);
212{EAlphabet::eAlpha_Default,
215{EAlphabet::eAlpha_Nucleotide,
216 "ABCDGHKMNRSTUVWXYabcdghkmnrstuvwxy"},
218{EAlphabet::eAlpha_Protein,
219 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*"},
221{EAlphabet::eAlpha_Dna,
222 "ABCDGHKMNRSTVWXYabcdghkmnrstvwxy"},
224{EAlphabet::eAlpha_Rna,
225 "ABCDGHKMNRSTVWXYabcdghkmnrstvwxy"},
227{EAlphabet::eAlpha_Dna_no_ambiguity,
230{EAlphabet::eAlpha_Rna_no_ambiguity,
233 returnalphaMap[alphaId];
272 const string& seqId,
274 const string& message,
294TReadFlags readFlags,
295ncbi::objects::ILineErrorListener* pErrorListener)
321 boolgenerate_local_ids,
322ncbi::objects::ILineErrorListener*
)
347 const auto& idString = seqIdInfo.
mData;
362 "Unable to parse sequence ID string.");
377 const autonum_sequences = alignmentInfo.
NumSequences();
379 if(num_sequences == 0) {
383 "No sequence data was detected in alignment file.");
387 if(num_sequences == 1) {
391 "Only one sequence was detected in the alignment file. An alignment file must contain more than one sequence.");
398 for(
autoseqIdInfo : alignmentInfo.
mIds) {
402 m_Ids.push_back(ids);
405 size_tnumDeflines = alignmentInfo.
NumDeflines();
407 if(numDeflines ==
m_Ids.size()) {
409 for(
size_t i=0;
i< numDeflines; ++
i) {
418 "Expected %d deflines but finding %d. ",
422 "If deflines are used, each sequence must have a corresponding defline. " 423 "Note that deflines are optional.",
440 if(begin_len <
m_Seqs[row_i].length()) {
441string::iterator s =
m_Seqs[row_i].end();
442 while(s !=
m_Seqs[row_i].begin()) {
444 if(
GetEndGap().find(*s) != string::npos) {
503 for(
int i=0;
i<
m_Dim; ++
i) {
523 "CAlnReader::GetSeqAlign(): " 524 "Seq_align is not available until after Read()", 0);
550aln_stop =
m_Seqs[row_i].size();
560vector<bool> is_gap; is_gap.resize(
m_Dim,
true);
561vector<bool> prev_is_gap; prev_is_gap.resize(
m_Dim,
true);
562vector<TSignedSeqPos> next_start; next_start.resize(
m_Dim, 0);
564 TSeqPosprev_aln_pos = 0, prev_len = 0;
565 boolnew_seg =
true;
568 for(
TSeqPosaln_pos = 0; aln_pos < aln_stop; aln_pos++) {
570 if(aln_pos >=
m_Seqs[row_i].length()) {
571 if(!is_gap[row_i]) {
572is_gap[row_i] =
true;
576 stringresidue =
m_Seqs[row_i].substr(aln_pos, 1);
578 if(!
x_IsGap(row_i, aln_pos, residue)) {
581is_gap[row_i] =
false;
590 if( !is_gap[row_i] ) {
591is_gap[row_i] =
true;
601lens.push_back(prev_len = aln_pos - prev_aln_pos);
603 if( !prev_is_gap[row_i] ) {
604next_start[row_i] += prev_len;
609starts.resize(starts_i +
m_Dim);
612starts[starts_i++] = -1;
614starts[starts_i++] = next_start[row_i];;
616prev_is_gap[row_i] = is_gap[row_i];
619prev_aln_pos = aln_pos;
630lens.push_back(aln_stop - prev_aln_pos);
632 _ASSERT((
int)lens.size() == numseg);
636 m_Aln->Validate(
true);
644 const string& alphabet,
645 const string& seqData,
655 const string& alphabet,
656 const string& seqData,
657 const string& seqId,
662 stringseqChars = seqData;
663 if(!missingChars.empty()) {
665 remove_if(seqChars.begin(), seqChars.end(),
666[&](
charc) { return missingChars.find(c) != string::npos;}),
678alphabet.size() >= 2*26) {
682 autoposFirstT = seqChars.find_first_of(
"Tt");
683 autoposFirstU = seqChars.find_first_of(
"Uu");
684 if(posFirstT != string::npos && posFirstU != string::npos) {
685 string msg=
"Invalid Mol Type: " 686 "U and T cannot appear in the same nucleotide sequence. " 687 "Reinterpreting as protein.";
705 const string& seqData)
const 709pSeqInst->SetMol(mol);
710pSeqInst->SetLength(seqData.size());
713 data.SetIupacaa().Set(seqData);
715 data.SetIupacna().Set(seqData);
729 "CAlnReader::GetSeqEntry(): " 730 "Seq_entry is not available until after Read()", 0);
741seq_annot->
SetData().SetAlign().push_back(seq_align);
744 m_Entry->SetSet().SetAnnot().push_back(seq_annot);
746 auto& seq_set =
m_Entry->SetSet().SetSeq_set();
750 const string& seq_str =
m_SeqVec[row_i];
754 auto& ids = pSubEntry->SetSeq().SetId();
765 const stringseqId = ids.front()->AsFastaString();
770pSubEntry->SetSeq().SetInst(*pSeqInst);
771seq_set.push_back(pSubEntry);
777 for(
auto& pSeqEntry : seq_set) {
782 for(
auto& pSeqEntry : seq_set) {
784pSeqEntry->SetSeq());
798 for(
const auto&
mod: mods) {
811 autodefline = defline_info.
mData;
818 const autoidString = pFirstID->AsFastaString();
821errorReporter(idString, defline_info.
mNumLine, pErrorListener);
838 const boollogInfo = pErrorListener ?
842 CModAdder::Apply(mod_handler, bioseq, skipped_mods, logInfo, errorReporter);
858pDesc->SetTitle() = title;
859bioseq.
SetDescr().Set().push_back(std::move(pDesc));
User-defined methods of the data storage class.
User-defined methods of the data storage class.
END_ENUM_INFO string ErrorPrintf(const char *format,...)
thread_local unique_ptr< CAlnErrorReporter > theErrorReporter
static void sReportError(ILineErrorListener *pEC, EDiagSev severity, int code, int subcode, const string &seqId, int lineNumber, const string &message, ILineError::EProblem problemType=ILineError::eProblem_GeneralParsingError)
static CAlnReader::FValidateIds s_GetMultiIdValidate(CAlnReader::FIdValidate fSingleIdValidate)
static void s_AppendMods(const CModHandler::TModList &mods, string &title)
string sAlnErrorToString(const CAlnError &error)
bool ReadAlignmentFile(istream &istr, bool gen_local_ids, bool use_nexus_info, CSequenceInfo &sequence_info, SAlignmentFile &alignmentInfo, ILineErrorListener *pErrorListener=nullptr)
void remove_if(Container &c, Predicate *__pred)
void Report(int lineNumber, EDiagSev severity, EReaderCode subsystem, EAlnSubcode errorCode, const string &descr, const string &seqId="")
CAlnError(int category, int line_num, string id, string message)
EAlnErr GetCategory() const
const string & GetMsg() const
const string & GetID() const
class CAlnReader supports importing a large variety of text-based alignment formats into standard dat...
vector< string > m_IdStrings
void ParseDefline(const string &defline, const SDeflineParseInfo &info, const TIgnoredProblems &ignoredErrors, list< CRef< objects::CSeq_id >> &ids, bool &hasRange, TSeqPos &rangeStart, TSeqPos &rangeEnd, TSeqTitles &seqTitles, objects::ILineErrorListener *pMessageListener)
void x_ParseAndValidateSeqIds(const TLineInfo &seqIdInfo, TReadFlags flags, TIdList &ids)
objects::CFastaDeflineReader::TIgnoredProblems TIgnoredProblems
objects::CSeq_inst::EMol GetSequenceMolType(const string &alphabet, const string &seqData, objects::ILineErrorListener *pErrorListener=nullptr)
Get a sequence's moltype, also considering the alphabet used to read it.
void x_CalculateMiddleSections()
virtual ~CAlnReader(void)
objects::CFastaDeflineReader::SDeflineParseInfo SDeflineParseInfo
void SetPaup(EAlphabet alpha)
vector< string > m_SeqVec
void Read(bool guess, bool generate_local_ids=false, objects::ILineErrorListener *pErrorListener=nullptr)
TAlignMiddles m_MiddleSections
function< void(const list< CRef< objects::CSeq_id > > &, int, objects::CAlnErrorReporter *)> FValidateIds
static string GetAlphabetLetters(EAlphabet)
vector< TSeqPos > m_SeqLen
objects::CSeq_inst::EMol x_GetSequenceMolType(const string &alphabet, const string &seqData, const string &seqId="", objects::ILineErrorListener *pErrorListener=nullptr)
const string & GetMiddleGap(void) const
int TReadFlags
binary OR of EReadFlags
objects::CDense_seg::TDim TNumrow
const string & GetAlphabet(void) const
pair< TSeqPos, TSeqPos > TAlignMiddleInterval
characters have different contexts, depending on whether they are before the first non-gap character,...
FValidateIds m_fValidateIds
objects::CFastaDeflineReader::TFastaFlags TFastaFlags
CRef< objects::CSeq_inst > x_GetSeqInst(objects::CSeq_inst::EMol mol, const string &seqData) const
void SetClustal(EAlphabet alpha)
void x_AddMods(const TLineInfo &defline_info, objects::CBioseq &bioseq, objects::ILineErrorListener *pErrorListener)
function< void(const objects::CSeq_id &, int, objects::CAlnErrorReporter *)> FIdValidate
ncbi::objects::CSequenceInfo mSequenceInfo
CRef< objects::CSeq_align > GetSeqAlign(TFastaFlags fasta_flags=0, objects::ILineErrorListener *pErrorListener=nullptr)
Create ASN.1 classes from the parsed alignment.
EAlignFormat m_AlignFormat
CNcbiIstream & m_IS
Other internal data.
CRef< objects::CSeq_entry > m_Entry
const string & GetEndGap(void) const
bool x_IsGap(TNumrow row, TSeqPos pos, const string &residue)
const string & GetMissing(void) const
objects::CFastaDeflineReader::TSeqTitles TSeqTitles
void SetPhylip(EAlphabet alpha)
void SetAlphabet(const string &value)
virtual CRef< objects::CSeq_id > GenerateID(const string &fasta_defline, const TSeqPos &line_number, TFastaFlags fasta_flags)
CRef< objects::CSeq_align > m_Aln
void x_VerifyAlignmentInfo(const ncbi::objects::SAlignmentFile &, TReadFlags readFlags)
void x_AssignDensegIds(TFastaFlags fasta_flags, objects::CDense_seg &denseg)
CRef< objects::CSeq_entry > GetSeqEntry(TFastaFlags fasta_flags=objects::CFastaReader::fAddMods, objects::ILineErrorListener *pErrorListener=nullptr)
void SetAllGap(const string &value)
Convenience function for setting beginning, middle, and end gap to the same thing.
void x_AddTitle(const string &defline, objects::CBioseq &bioseq)
list< CRef< objects::CSeq_id > > TIdList
Parsed result data (analogous to SAlignmentFile) Seqs are upper-case strings representing the sequenc...
vector< TLineInfo > m_DeflineInfo
void SetFastaGap(EAlphabet alpha)
Alternative & easy way to choose alphabet, etc.
const string & GetBeginningGap(void) const
CAlnReader(CNcbiIstream &is, FValidateIds fIdValidate=nullptr)
const CSeq_id * GetFirstId() const
CAlnErrorReporter * m_pErrorReporter
CDefaultIdErrorReporter(CAlnErrorReporter *pErrorReporter)
void operator()(EDiagSev severity, int lineNum, const string &idString, CFastaIdValidate::EErrCode, const string &msg)
void operator()(const TIds &ids, int lineNum, CAlnErrorReporter *pErrorReporter)
list< CRef< CSeq_id > > TIds
CFastaIdValidate m_FastaIdValidate
static void ParseDefline(const CTempString &defline, const SDeflineParseInfo &info, const TIgnoredProblems &ignoredErrors, TIds &ids, bool &hasRange, TSeqPos &rangeStart, TSeqPos &rangeEnd, TSeqTitles &seqTitles, ILineErrorListener *pMessageListener)
static ESequenceType SequenceType(const char *str, unsigned length=0, ESTStrictness strictness=eST_Default)
Guess sequence type.
static CLineErrorEx * Create(EProblem eProblem, EDiagSev eSeverity, int code, int subcode, const std::string &strSeqId, unsigned int uLine, const std::string &strErrorMessage=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
static void Apply(const CModHandler &mod_handler, CBioseq &bioseq, TSkippedMods &skipped_mods, FPostMessage fPostMessage=nullptr)
list< CModData > TModList
void AddMods(const TModList &mods, EHandleExisting handle_existing, TModList &rejected_mods, FReportError fReportError=nullptr)
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
static void Apply(const CTempString &title, TModList &mods, string &remainder)
virtual bool PutError(const ILineError &)=0
Store error in the container, and return true if error was stored fine, and return false if the calle...
@ eProblem_GeneralParsingError
vector< string > mSequences
size_t NumDeflines() const
size_t NumSequences() const
vector< TLineInfo > mDeflines
Operators to edit gaps in sequences.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static unsigned int line_num
unsigned int TSeqPos
Type for sequence locations and lengths.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
#define FORMAT(message)
Format message using iostreams library.
@ fAddMods
Parse defline mods and add to SeqEntry.
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
EAccessionInfo
For IdentifyAccession (below)
static int BestRank(const CRef< CSeq_id > &id)
@ fParse_RawText
Try to ID raw non-numeric accessions.
@ fParse_AnyLocal
Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string & ToUpper(string &str)
Convert string to upper case â string& version.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
TLens & SetLens(void)
Assign a value to Lens data member.
vector< TSignedSeqPos > TStarts
void SetDim(TDim value)
Assign a value to Dim data member.
vector< CRef< CSeq_id > > TIds
TStarts & SetStarts(void)
Assign a value to Starts data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
TIds & SetIds(void)
Assign a value to Ids data member.
@ eClass_pop_set
population study
void SetData(TData &value)
Assign a value to Data data member.
EMol
molecule class in living organism
void SetDescr(TDescr &value)
Assign a value to Descr data member.
@ eRepr_raw
continuous sequence
@ eMol_not_set
> cdna = rna
@ eMol_na
just a nucleic acid
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
@ eAlnSubcode_BadSequenceCount
@ eAlnSubcode_IllegalSequenceId
@ eAlnSubcode_InconsistentMolType
@ eAlnSubcode_InsufficientDeflineInfo
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define row(bind, expected)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4