lastLine = args.back().mData;
72 autolastWhiteSpacePos = lastLine.find_last_of(
" \t");
73 stringlastToken = (lastWhiteSpacePos ==
NPOS) ?
75lastLine.substr(lastWhiteSpacePos);
77 stringlastTokenLower(lastToken);
80 if(lastTokenLower ==
"end") {
81 const boolonlyArg = (args.size() == 1 &&
82lastWhiteSpacePos ==
NPOS);
87 "\""+ lastToken +
"\" is not a valid argument for the \""+
88 command.name +
"\" command.");
95 "File format autocorrected to comply with Nexus rules. Unexpected \"end;\". Appending \';\' to prior command. No action required.");
97 if(lastWhiteSpacePos ==
NPOS) {
102args.back().mData.substr(0, lastWhiteSpacePos));
121commandTokens.front().mData.find_first_of(
" \t[");
122 if(nameEnd ==
NPOS) {
131 command.startLineNum = commandTokens.front().mNumLine;
133 stringnameLower =
command.name;
137 if(nameLower ==
"begin") {
152 "\""+
command.name +
"\" command appears outside of block.");
157 if(block ==
"ncbi") {
162 if(nameLower ==
"end") {
168 "\" command terminates a block and does not take any arguments.");
174 if(block ==
"data"||
175block ==
"characters") {
180 if(block ==
"taxa") {
194 autonameLower =
command.name;
200 if(nameLower ==
"dimensions") {
204 if(nameLower ==
"format") {
208 if(nameLower ==
"matrix") {
227 autonameLower =
command.name;
233 if(nameLower ==
"dimensions") {
253 static stringpreviousCommand;
255 autonameLower =
command.name;
257 if(nameLower ==
"end") {
258 if(previousCommand !=
"sequin") {
260 "Exiting empty \"NCBI\" block. Expected a \"sequin\" command.";
266previousCommand.clear();
273 if(nameLower ==
"sequin") {
275previousCommand =
"sequin";
281 "Unexpected \""+
command.name +
"\" command inside \"NCBI\" block. The \"NCBI\" block must contain a \"sequin\" command and no other commands.");
285previousCommand.clear();
298 for(
autolineInfo : args) {
299 autoline = lineInfo.mData;
300 autolineNum = lineInfo.mNumLine;
301 string dummy, defLine;
310 if(!
dummy.empty()) {
312 "The definition lines in the Nexus file are not correctly formatted. " 313 "Definition lines are optional, " 314 "but if included, must start with \">\" followed by modifiers in square brackets. " 315 "The sequences have been imported " 316 "but the information in the definition lines will be ignored.";
322 mDeflines.push_back({defLine, lineNum});
334 autolineNum =
command.front().mNumLine;
335 autonewBlockName =
command.front().mData;
338 "Nested blocks detected. New block \"%s\" while still in \"%s\" block. \"%s\" block begins on line %d",
339newBlockName.c_str(),
363 autodescription =
"\"end\" command appears outside of block.";
382 intdataLineCount(0);
384 intblockLineLength(0);
385 intsequenceCharCount(0);
388 for(
autolineInfo :
command) {
389 const auto&
data= lineInfo.mData;
390 const intlineNum = lineInfo.mNumLine;
392vector<string> tokens;
394 if(tokens.size() < 2) {
396 "Data line does not follow the expected pattern of sequence_ID followed by sequence data. " 397 "Each data line should conform to the same expected pattern.";
404 const string& seqId = tokens[0];
412seqCount = inFirstBlock ? dataLineCount : (dataLineCount %
mNumSequences);
413maxSeqCount =
max(seqCount, maxSeqCount);
419 mSeqIds.push_back({seqId, lineNum});
423 stringseqData =
NStr::Join(tokens.begin()+1, tokens.end(),
"");
424 const intdataSize = seqData.size();
428sequenceCharCount += dataSize;
432 "The expected number of characters per sequence specified by nChar in the Nexus file is %d. " 433 "The actual number of characters counted for the first sequence is %d. " 434 "The expected number of characters must equal the actual number of characters.",
442blockLineLength = dataSize;
445 if(dataSize != blockLineLength) {
453 mSequences[seqCount].push_back({seqData, lineNum});
462 "The expected number of sequences specified by nTax in the Nexus file is %d. " 463 "The actual number of sequences encountered is %d. " 464 "The number of sequences in the file must equal the expected number of sequences.",
473 if(seqCount != maxSeqCount) {
476 "The final sequence block in the Nexus file is incomplete. " 477 "It contains data for just %d sequences, but %d sequences are expected.",
488 "The expected number of characters per sequence specified by nChar in the Nexus file is %d. " 489 "The actual number of characters counted for the first sequence is %d. " 490 "The expected number of characters must equal the actual number of characters.",
513 if(ntaxPos.second != string::npos) {
515 size_tntaxLinePos = ntaxPos.second;
516 if(ntaxLinePos == 0 &&
517ntaxPos.first != args.begin()) {
518ntaxSubStr =
prev(ntaxPos.first)->mData;
519ntaxLinePos += ntaxSubStr.size();
522 boolfoundError =
true;
523 const autolitLength = strlen(
"newtaxa");
524 if(ntaxLinePos > litLength) {
525ntaxSubStr += ntaxPos.first->mData;
526 autoendOfPreviousToken = ntaxSubStr.find_last_not_of(
" \t", ntaxLinePos-1);
527 if(endOfPreviousToken !=
NPOS&&
528endOfPreviousToken >= (litLength-1) &&
531endOfPreviousToken-(litLength-1), litLength),
"newtaxa")) {
538ntaxPos.first->mNumLine,
540 "Invalid command arguments. \"nTax\" must be immediately preceded by \"newtaxa\" in \""+
550 if(!ntax.first.empty()) {
556 ErrorPrintf(
"Nexus file has invalid nTax setting: \"%s\". nTax must be an integer.",
566 if(!nchar.first.empty()) {
572 ErrorPrintf(
"Nexus file has invalid nChar setting: \"%s\". nChar must be an integer.",
573nchar.first.c_str());
593 if(!missing.first.empty()) {
597 if(!gap.first.empty()) {
601 if(!matchchar.first.empty()) {
619 for(
autolineInfo :
command) {
620 if(keyPos ==
NPOS) {
622 if(keyPos !=
NPOS) {
623keyLine = lineInfo.mNumLine;
624endPos = lineInfo.mData.find_first_of(
" \t=", keyPos);
628 if(keyPos !=
NPOS) {
629 intcurrentLine = lineInfo.mNumLine;
630 if(currentLine != keyLine) {
633valPos = lineInfo.mData.find_first_not_of(
" \t=", endPos);
634 if(valPos !=
NPOS) {
635endPos = lineInfo.mData.find_first_of(
" \t\n;", valPos);
636 if(endPos !=
NPOS) {
637 return{lineInfo.mData.substr(valPos, endPos-valPos), lineInfo.mNumLine};
639 return{lineInfo.mData.substr(valPos), lineInfo.mNumLine};
647pair<CAlnScannerNexus::TCommandArgs::const_iterator, size_t>
649 const string& token)
const 652 for(
autoit = args.cbegin(); it != args.cend(); ++it) {
653 stringline(it->mData);
655 size_tpos = line.find(token);
656 if(pos != string::npos) {
657 returnmake_pair(it, pos);
661 returnmake_pair(args.cend(), string::npos);
670 int&numUnmatchedLeftBrackets,
674 for(
autoindex=startPos; index<line.size(); ++index) {
675 if(line[index] ==
'[') {
676++numUnmatchedLeftBrackets;
679 if(line[index] ==
']') {
680--numUnmatchedLeftBrackets;
683 if((numUnmatchedLeftBrackets == 0) &&
702 size_tcommandEnd(0);
703 size_tcommandStart(0);
704 intnumOpenBrackets(0);
705 size_tcommentStartLine(-1);
706 boolinCommand(
false);
707 boolfirstToken =
true;
710 while(iStr.
ReadLine(line, lineCount)) {
714 stringlineStrLower(line);
716 if(lineStrLower ==
"#nexus") {
721 "Unexpected token. \"#NEXUS\" should appear once at the beginnng of the file." 738 intpreviousOpenBrackets = numOpenBrackets;
740 if(previousOpenBrackets == 0 &&
741numOpenBrackets > 0) {
742commentStartLine = lineCount;
749previousOpenBrackets = numOpenBrackets;
751 if(previousOpenBrackets == 0 &&
752numOpenBrackets > 0) {
753commentStartLine = lineCount;
757 while(commandEnd !=
NPOS) {
758 stringcommandSubstr =
760 if(!commandSubstr.empty()) {
761commandTokens.push_back({commandSubstr, lineCount});
764commandTokens.clear();
766commandStart = commandEnd+1;
767previousOpenBrackets = numOpenBrackets;
770 if(previousOpenBrackets == 0 &&
771numOpenBrackets > 0) {
772commentStartLine = lineCount;
776 if(commandStart < line.size()) {
783 if(numOpenBrackets > 0) {
785 "The beginning of a comment was detected, but it is missing a closing bracket. Add the closing bracket to the end of the comment or correct if it is not a comment.";
792 if(!commandTokens.empty()) {
794 "Terminating semicolon missing from command. Commands in a Nexus file must end with a semicolon.";
828 int&numUnmatchedLeftBrackets)
835list<pair<size_t, size_t>> commentLimits;
839 while(index < line.size()) {
840 const auto& c = line[index];
842++numUnmatchedLeftBrackets;
843 if(numUnmatchedLeftBrackets==1) {
849 if(numUnmatchedLeftBrackets==1) {
851commentLimits.push_back(make_pair(start, stop));
853--numUnmatchedLeftBrackets;
858 if(numUnmatchedLeftBrackets) {
859commentLimits.push_back(make_pair(start, index-1));
862 for(
autoit = commentLimits.crbegin();
863it != commentLimits.crend();
865line.erase(it->first, (it->second-it->first)+1);
874 int&numUnmatchedLeftBrackets,
882list<pair<size_t,size_t>> commentLimits;
887(numUnmatchedLeftBrackets == 0) &&
892 const auto len= line.size();
894 for(
size_tindex=0; index<
len; ++index) {
895 const auto& c = line[index];
899numUnmatchedLeftBrackets==0) {
907++numUnmatchedLeftBrackets;
908 if(numUnmatchedLeftBrackets==1) {
914--numUnmatchedLeftBrackets;
915 if(numUnmatchedLeftBrackets==0) {
917commentLimits.push_back(make_pair(start, stop));
921 if(numUnmatchedLeftBrackets == 0 &&
927 if(numUnmatchedLeftBrackets && !inCommand) {
928commentLimits.push_back(make_pair(start,
len-1));
931 for(
autoit = commentLimits.crbegin();
932it != commentLimits.crend();
934line.erase(it->first, (it->second-it->first)+1);
946 intnumUnmatchedLeftBrackets = 0;
950 if(it->mData.empty()) {
string BadCharCountPrintf(int expectedCount, int actualCount)
END_ENUM_INFO string ErrorPrintf(const char *format,...)
thread_local unique_ptr< CAlnErrorReporter > theErrorReporter
static void sStripNexusComments(string &line, int &numUnmatchedLeftBrackets)
virtual void xAdjustSequenceInfo(CSequenceInfo &) override
void xProcessTaxaBlockCommand(TCommand &command, CSequenceInfo &sequenceInfo)
static void sStripNexusCommentsFromCommand(TCommandArgs &command)
void xProcessNCBIBlockCommand(TCommand &command, CSequenceInfo &sequenceInfo)
void xProcessFormat(const TCommandArgs &args)
void xProcessDataBlockCommand(TCommand &command, CSequenceInfo &sequenceInfo)
void xEndBlock(int lineNum)
void xProcessMatrix(const TCommandArgs &args)
bool xUnexpectedEndBlock(TCommand &command)
pair< TCommandArgs::const_iterator, size_t > xGetArgPos(const TCommandArgs &args, const string &token) const
void xBeginBlock(const TCommandArgs &command)
TCommand::TArgs TCommandArgs
pair< string, int > xGetKeyVal(const TCommandArgs &command, const string &key)
void xProcessDimensions(const TCommandArgs &args)
TCommandArgs TCommandTokens
static void sStripCommentsOutsideCommand(string &line, int &numUnmatchedLeftBrackets, bool &inCommand)
void xImportAlignmentData(CSequenceInfo &, CLineInput &) override
void xProcessSequin(const TCommandArgs &args)
void xProcessCommand(const TCommandTokens &commandTokens, CSequenceInfo &sequenceInfo)
static size_t sFindCharOutsideComment(char c, const string &line, int &numUnmatchedLeftBrackets, size_t startPos=0)
vector< TLineInfo > mSeqIds
vector< vector< TLineInfo > > mSequences
vector< TLineInfo > mDeflines
virtual bool ReadLine(string &line, int &lineNum)=0
CSequenceInfo & SetMissing(const string &c)
CSequenceInfo & SetBeginningGap(const string &c)
CSequenceInfo & SetEndGap(const string &c)
CSequenceInfo & SetMiddleGap(const string &c)
CSequenceInfo & SetMatch(const string &c)
The NCBI C++ standard methods for dealing with std::string.
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static string & ToLower(string &str)
Convert string to lower case â string& version.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
void CheckId(const string &seqId, const vector< SLineInfo > &orderedIds, int idCount, int lineNum, bool firstBlock)
void ProcessDefline(const string &defLine, string &seqId, string &defLineInfo)
const struct ncbi::grid::netcache::search::fields::KEY key
@ eAlnSubcode_UnexpectedCommandArgs
@ eAlnSubcode_BadSequenceCount
@ eAlnSubcode_UnterminatedComment
@ eAlnSubcode_UnexpectedCommand
@ eAlnSubcode_IllegalDataDescription
@ eAlnSubcode_UnterminatedCommand
@ eAlnSubcode_IllegalDataLine
@ eAlnSubcode_BadDataCount
@ eAlnSubcode_IllegalDefinitionLine
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4