& strToken )
204 size_ttokenSize = strToken.size();
205 if(tokenSize == 0) {
208 if(tokenSize == 1 && strToken[0] ==
'0') {
211 if(strToken[0] <
'1'||
'9'< strToken[0]) {
214 for(
size_t i=1;
i<tokenSize; ++
i) {
215 if(strToken[
i] <
'0'||
'9'< strToken[
i]) {
224 const string& strToken )
227 if( ! strToken.empty() && (strToken[0] ==
'-'|| strToken[0] ==
'+')) {
235 const string& strToken )
237 stringtoken( strToken );
239 if( token.size() > 1 && token[0] ==
'-') {
242 if(token.size() > 1 && token[0] ==
'0') {
252 for(
const char* s =
"ACGNTU"; *s; ++s ) {
258 for(
const char* s =
"BDHKMRSVWY"; *s; ++s ) {
264 for(
const char* s =
"ACDEFGHIKLMNPQRSTVWYBZX"; *s; ++s ) {
272 for(
const char* s =
"\r\n"; *s; ++s ) {
276 for(
intc = 1; c < 256; ++c ) {
277 if(
isalpha((
unsigned char)c) )
279 if(
isdigit((
unsigned char)c) )
281 if(
isspace((
unsigned char)c) )
292 autoformatIt = sm_FormatNames.find(
format);
293 if(formatIt == sm_FormatNames.end()) {
295 "CFormatGuess::GetFormatName: out-of-range format value " 298 returnformatIt->second;
312length = (unsigned)::strlen(
str);
315 unsigned intmain_nuc_content = 0, ambig_content = 0, bad_nuc_content = 0,
316amino_acid_content = 0, exotic_aa_content = 0, bad_aa_content = 0;
318 for(
unsigned i= 0;
i< length; ++
i) {
319 unsigned charc =
str[
i];
330++amino_acid_content;
338 switch(strictness) {
341 doubledna_content = (double)main_nuc_content / (
double)length;
342 doubleprot_content = (double)amino_acid_content / (
double)length;
344 if(dna_content > 0.7) {
347 if(prot_content > 0.7) {
353 if(bad_nuc_content + ambig_content <= main_nuc_content / 9
354|| (bad_nuc_content + ambig_content <= main_nuc_content / 3 &&
355bad_nuc_content <= (main_nuc_content + ambig_content) / 19)) {
358}
else if(bad_aa_content + exotic_aa_content
359<= amino_acid_content / 9) {
365 if(bad_nuc_content == 0 && ambig_content <= main_nuc_content / 3) {
367}
else if(bad_aa_content == 0
368&& exotic_aa_content <= amino_acid_content / 9) {
399, m_bOwnsStream(
true)
400, m_iTestBufferSize(0)
407 const string& FileName )
408: m_Stream( * new
CNcbiIfstream( FileName.c_str(), ios::binary ) )
409, m_bOwnsStream(
true)
418, m_bOwnsStream(
false)
602 "CFormatGuess::x_TestFormat(): Unsupported format ID ("+
612 "sm_FormatNames does not list all possible formats");
643 conststreamsize k_TestBufferGranularity = 8096;
662 if(Multiplier >= 1024) {
714 while( ! TestBuffer.fail() ) {
718 if(!strLine.empty()) {
721 size_t size= strLine.size();
722 boolis_header =
size> 0 && strLine[0] ==
'>';
723 for(
size_t i=0;
i<
size; ++
i) {
724 unsigned charc = strLine[
i];
730 else if(c ==
'{'|| c ==
'}') {
778 if(line.size()<minLength) {
783 for(
autoc : line) {
785 autoindex =
static_cast<int>(c);
797 return(nucCount/line.size() > 0.9);
814 boolfoundId =
false;
837 unsigned intuGtfLineCount = 0;
845 if( it->empty() || (*it)[0] ==
'#') {
859 return(uGtfLineCount != 0);
871 unsigned intuGvfLineCount = 0;
879 if( it->empty() || (*it)[0] ==
'#') {
896 return(uGvfLineCount != 0);
909 unsigned intuGffLineCount = 0;
920 if( it->empty() || (*it)[0] ==
'#') {
934 return(uGffLineCount != 0);
947 unsigned intuGffLineCount = 0;
958 if( it->empty() || (*it)[0] ==
'#') {
972 return(uGffLineCount != 0);
985 unsigned intuGffLineCount = 0;
993 if( it->empty() || (*it)[0] ==
'#') {
1007 return(uGffLineCount != 0);
1022 if(it->empty() || (*it)[0] !=
'>') {
1068 const intBUFFSIZE = 8096;
1084 boolis_nexus =
false;
1085 boolhas_trees =
false;
1086 const size_tcheck_size = 12;
1089 if(
NPOS!= it->find(
"#NEXUS") ) {
1103 chartest_buf[
read_size+ check_size + 1];
1104memset(test_buf,
' ', check_size);
1106 size_tmax_reads = 32768;
1107 for(
size_t i= 0;
i< max_reads; ++
i) {
1109 size_tnum_read =
m_Stream.gcount();
1111test_buf[num_read + check_size] = 0;
1119strncpy(test_buf, test_buf + num_read, check_size);
1140 const size_tmaxSampleSize = 8*1024-1;
1141 size_tsampleSize = 0;
1142 char* pSample =
new char[maxSampleSize+1];
1145 m_Stream.read(pSample, maxSampleSize);
1146sampleSize = (size_t)
m_Stream.gcount();
1149 if(0 == sampleSize) {
1153pSample[sampleSize] = 0;
1184 return(conf ==
eYes);
1200list<string>::const_iterator iter =
m_TestLines.begin();
1205 if(toks.size() != 1 ||
1206toks.front().find_first_not_of(
"0123456789") != string::npos) {
1213 for(
size_t i= 1; iter !=
m_TestLines.end(); ++
i, ++iter) {
1216 if(toks.size() !=
i) {
1218list<string>::const_iterator it = iter;
1225list<string>::const_iterator it = toks.begin();
1226 for(++it; it != toks.end(); ++it) {
1267 if(it->find(
">Feature ") != 0 && it->find(
">Features ") != 0) {
1303 static const char* known_types[] = {
1306 for(
size_t i=0;
i<
ArraySize(known_types); ++
i) {
1340 if(
NPOS!= it->find(
"#NEXUS") ) {
1351 for(
autoc : line) {
1368vector<string> toks;
1370 const size_tnum_toks = toks.size();
1372 if(num_toks != 2 &&
1377 const string& seqdata = toks[1];
1380 unsigned intcumulated_res = 0;
1381 if(num_toks == 3) {
1383 if(cumulated_res == 0) {
1396 if(num_toks == 3) {
1397 size_tnum_gaps =
count(seqdata.begin(), seqdata.end(),
'-');
1398 if(((seqdata.size() - num_gaps) > cumulated_res)) {
1405seg_length = seqdata.size();
1415 structSClustalBlockInfo
1418 unsigned intm_Size;
1422 voidReset(
void) {
1428SClustalBlockInfo() { Reset(); }
1446SClustalBlockInfo block_info;
1448 boolhas_valid_block =
false;
1449 size_tseg_length = 0;
1450 size_tseg_length_prev = 0;
1455 while( !TestBuffer.eof() ) {
1463 if(TestBuffer.fail()) {
1472 if(block_info.m_InBlock) {
1473 if(block_info.m_Size < 2) {
1482 if(! block_info.m_InBlock || block_info.m_Size<2) {
1494 if(seg_length > 60) {
1497 if(block_info.m_InBlock) {
1498 if(seg_length != seg_length_prev) {
1501has_valid_block =
true;
1504 if(block_info.m_Ids.find(seq_id) != block_info.m_Ids.end()) {
1507block_info.m_Ids.insert(seq_id);
1509seg_length_prev = seg_length;
1510block_info.m_InBlock =
true;
1511++(block_info.m_Size);
1514 returnhas_valid_block;
1522list<string>::const_iterator iter =
m_TestLines.begin();
1528 for(
size_t i=5;
i<7; ++
i)
1535 if(iter->empty() || (*iter)[0] ==
'#'|| (*iter)[0] ==
';') {
1541ncols = toks.size();
1552 if(iter->empty() || (*iter)[0] ==
'#'|| (*iter)[0] ==
';') {
1558 if(toks.size() != ncols) {
1559list<string>::const_iterator it = iter;
1568 for(
const auto& token : toks) {
1569 autoit = find_if(token.begin(), token.end(),
1570[](
unsigned charc){ return !isprint(c); });
1571 if(it != token.end()) {
1576 return( nlines >= 3 );
1665 if( dAlNumFraction < 0.8 ) {
1671 if( dDnaFraction > 0.91 || dAaFraction > 0.91 ) {
1698 if( dAlNumFraction < 0.80 ) {
1706 while( ! TestBuffer.fail() ) {
1707vector<string> Fields;
1713 return( Fields.size() >= 2 && Fields[1] ==
"::="&&
isalpha(Fields[0][0]));
1736 intrsid, chr, pos, numMatched;
1737numMatched = sscanf( it->c_str(),
"rs%d\t%d\t%d", &rsid, &chr, &pos);
1738 if( numMatched == 3) {
1755 boolbTrackLineFound(
false);
1756 boolbHasStartAndStop (
false);
1757 size_tcolumncount = 0;
1760 if(
str.empty() ) {
1765 if(
str.find(
"chr ") == 0 ||
1766 str.find(
"Chr ") == 0 ||
1767 str.find(
"CHR ") == 0)
1775bTrackLineFound =
true;
1790 if(
columns.size() != columncount ) {
1791 if( columncount == 0 ) {
1792columncount =
columns.size();
1801bHasStartAndStop =
true;
1806 return(bHasStartAndStop || bTrackLineFound);
1818 boolLineFound =
false;
1819 size_tcolumncount = 15;
1840 if(
columns.size() != columncount ) {
1851 if(strand !=
"+"&& strand !=
"-")
1898 const intBUFFSIZE = 1024;
1911 unsigned intuHgvsLineCount = 0;
1915 if( it->empty() || (*it)[0] ==
'#') {
1923 return(uHgvsLineCount != 0);
2098 boolignoreFirstColumn =
false;
2099 unsigned intuPslLineCount = 0;
2107 if(!
IsLinePsl(*it, ignoreFirstColumn)) {
2108ignoreFirstColumn =
true;
2109 if(!
IsLinePsl(*it, ignoreFirstColumn)) {
2116 if( !
IsLinePsl(*it, ignoreFirstColumn) ) {
2121 return(uPslLineCount != 0);
2127list<string>::iterator& lineIt,
2128list<string>::iterator endIt,
2133 if(lineIt == endIt) {
2136 if(lineIt->size() > 79) {
2140vector<int> validIndents = {0, 2, 3, 5, 12, 21};
2141 autofirstNotBlank = lineIt->find_first_not_of(
" ");
2142 while(firstNotBlank != 0) {
2143 if(std::find(validIndents.begin(), validIndents.end(), firstNotBlank) ==
2144validIndents.end()) {
2145 autofirstNotBlankOrDigit = lineIt->find_first_not_of(
" 1234567890");
2146 if(firstNotBlankOrDigit != 10) {
2151 if(lineIt == endIt) {
2154firstNotBlank = lineIt->find_first_not_of(
" ");
2184 stringkeyword,
data, lookingFor;
2190lookingFor =
"LOCUS";
2191 if(keyword != lookingFor) {
2199lookingFor =
"DEFINITION";
2200 if(keyword != lookingFor) {
2203 while(keyword == lookingFor) {
2209lookingFor =
"ACCESSION";
2210 if(keyword != lookingFor) {
2213 while(keyword == lookingFor) {
2219 boolnidSeen =
false;
2220lookingFor =
"NID";
2221 if(keyword == lookingFor) {
2228lookingFor =
"VERSION";
2229 if(keyword != lookingFor) {
2237lookingFor =
"NID";
2238 if(keyword == lookingFor) {
2245lookingFor =
"PROJECT";
2246 while(keyword == lookingFor) {
2252lookingFor =
"DBLINK";
2253 while(keyword == lookingFor) {
2259lookingFor =
"KEYWORDS";
2260 if(keyword != lookingFor) {
2272list<string>::iterator& lineIt,
2273list<string>::iterator endIt,
2281 if(lineIt == endIt) {
2313 stringlineCode, lineData, lookingFor;
2320 if(lineCode != lookingFor) {
2329 if(lineCode != lookingFor) {
2332 while(lineCode == lookingFor) {
2339 while(lineCode == lookingFor) {
2346 for(
int i= 0;
i< 2; ++
i) {
2347 if(lineCode != lookingFor) {
2356 if(lineCode != lookingFor) {
2359 while(lineCode == lookingFor) {
2366 if(lineCode != lookingFor) {
2369 while(lineCode == lookingFor) {
2376 if(lineCode != lookingFor) {
2379 while(lineCode == lookingFor) {
2386 if(lineCode != lookingFor) {
2389 while(lineCode == lookingFor) {
2402list<string>::iterator& lineIt,
2403list<string>::iterator endIt,
2408 if(lineIt == endIt) {
2444 stringlineCode, lineData, lookingFor;
2451 if(lineCode != lookingFor) {
2460 if(lineCode != lookingFor) {
2463 while(lineCode == lookingFor) {
2470 for(
int i= 0;
i< 3; ++
i) {
2471 if(lineCode != lookingFor) {
2481 if(lineCode != lookingFor) {
2484 while(lineCode == lookingFor) {
2491 if(lineCode !=
"GN"&& lineCode !=
"OS") {
2531 if(
limits.size()%2 == 1) {
2534testString +=
"\"";
2535 limits.push_back(testString.size()-1);
2541 stringcomplement =
"";
2543 autoit =
limits.begin();
2544 size_tcomp_interval_start = 0;
2545 while(it !=
limits.end()) {
2546 const size_tstring_start = *it++;
2547 if(string_start > comp_interval_start) {
2548 const size_tcomp_interval_length = string_start-comp_interval_start;
2549complement += testString.substr(comp_interval_start, comp_interval_length);
2552 const size_tstring_stop = *it++;
2553comp_interval_start = string_stop+1;
2556 if(comp_interval_start < testString.size()) {
2557complement += testString.substr(comp_interval_start);
2560testString = complement;
2570 const string& double_quotes = R
"(")"; 2572 boolis_start =
true;
2575 while( pos !=
NPOS) {
2582is_start = !is_start;
2589 size_ts_GetPrecedingFslashCount(
const string&
input,
const size_tpos)
2592pos >=
input.size() ||
2598 intcurrent_pos =
static_cast<int>(pos)-1;
2599 size_tnum_fslash = 0;
2600 while( current_pos >= 0 &&
input[current_pos] ==
'\\') {
2612 const string& double_quotes = R
"(")"; 2617 while(pos !=
NPOS) {
2618 const size_tnum_fslash = s_GetPrecedingFslashCount(
input, pos);
2621 if(num_fslash%2 == 0) {
2638list<string> subStrings;
2642 for(
autoit = subStrings.cbegin(); it != subStrings.cend(); ++it) {
2643 const stringsubString = *it;
2647 if(it == subStrings.cend()) {
2648testString = subString;
2673 const stringextendedString = testString +
"0";
2697 const size_tstringSize = testString.size();
2699 if(stringSize > 4) {
2703 const stringnullString(
"null");
2704 const stringtrueString(
"true");
2705 const stringfalseString(
"false");
2707 if(testString == nullString.substr(0, stringSize) ||
2708testString == trueString.substr(0, stringSize) ||
2709testString == falseString.substr(0, stringSize)) {
2725list<string> numStrings;
2729 for(
autonumString : numStrings) {
2744 if(testString.find_first_of(
"()") != string::npos) {
2748 const size_tpunctuation_threshold = 4;
2764 size_tinitial_len = testString.size();
2773 returntestString.size() - initial_len;
2793 const autonext_pos = testString.find_first_not_of(
"( \t\r\n",1);
2794 if(next_pos !=
NPOS&& testString[next_pos] ==
'\"') {
2860 stringlabels_1st_line[] = {
"SW",
"perc",
"query",
"position",
"matching",
""};
2861 stringlabels_2nd_line[] = {
"score",
"div.",
"del.",
"ins.",
"sequence",
""};
2881 size_tcurrent_offset = 0;
2882 for(
size_t i=0; labels_1st_line[
i] !=
""; ++
i) {
2883current_offset =
NStr::FindCase( *it, labels_1st_line[
i], current_offset );
2884 if( current_offset ==
NPOS) {
2897 for(
size_tj=0; labels_2nd_line[j] !=
""; ++j ) {
2898current_offset =
NStr::FindCase( *it, labels_2nd_line[j], current_offset );
2899 if( current_offset ==
NPOS) {
2948 const string& cline )
2965 if( line.empty() || line[0] !=
'(') {
2971 boolin_comment =
false;
2972 for(
size_tii=0; line.c_str()[ii] != 0; ++ii ) {
2973 if( ! in_comment ) {
2974 if( line.c_str()[ii] !=
'[') {
2975trimmed += line.c_str()[ii];
2982 if( line.c_str()[ii] ==
']') {
2983in_comment =
false;
2992 boolin_quote =
false;
2993 for(
size_tii=0; line.c_str()[ii] != 0; ++ii ) {
2995 if( line.c_str()[ii] !=
'\'') {
2996trimmed += line.c_str()[ii];
3004 if( line.c_str()[ii] ==
'\'') {
3015 while( line.c_str()[ii] != 0 ) {
3016 if( line.c_str()[ii] !=
':') {
3017trimmed += line.c_str()[ii++];
3021 if( line.c_str()[ii] ==
'-'|| line.c_str()[ii] ==
'+') {
3024 while(
'0'<= line.c_str()[ii] && line.c_str()[ii] <=
'9') {
3027 if( line.c_str()[ii] ==
'.') {
3029 while(
'0'<= line.c_str()[ii] && line.c_str()[ii] <=
'9') {
3039 if(line.empty() || line[0] !=
'(') {
3042 size_tparen_count = 1;
3043 for(
size_tii=1; line.c_str()[ii] != 0; ++ii ) {
3044 switch( line.c_str()[ii] ) {
3051 if( paren_count == 0 ) {
3057 if( paren_count == 0 ) {
3075 const string& line )
3080 SIZE_TYPEpos = line.find_first_not_of(
"0123456789 \t");
3081 if(pos ==
NPOS|| pos + 45 >= line.size()) {
3086 charc = line[pos +
i];
3087 if(
i% 11 == 10) {
3092 if( !
isalpha(c) && c !=
'-'&& c !=
'*') {
3104 const string&
label)
3109 if(
NPOS!=
label.find_first_of(
"[]") ) {
3112 size_tcolon =
label.find(
':');
3113 if(
NPOS== colon ) {
3116 size_tdot =
label.find_first_not_of(
"0123456789", colon + 1 );
3117 if(
NPOS== dot ) {
3120 if(
label[ dot ] !=
'.') {
3123 size_tend =
label.find_first_not_of(
"0123456789", dot + 1 );
3124 return(
NPOS== end );
3130 const string& strLine )
3136 stringline( strLine );
3137 size_tuCommentStart =
NStr::Find( line,
"#");
3139 if(
NPOS!= uCommentStart ) {
3140line = line.substr( 0, uCommentStart );
3143 if( line.empty() ) {
3147vector<string> tokens;
3152 if( tokens[1].
size() > 1 && tokens[1][0] ==
'-') {
3153tokens[1][0] =
'1';
3159 if( tokens[2].
size() > 1 && tokens[2][0] ==
'-') {
3160tokens[2][0] =
'1';
3166 if( tokens[3].
size() > 1 && tokens[3][0] ==
'-') {
3167tokens[3][0] =
'1';
3173 if( tokens[4].
size() != 1 ||
NPOS== tokens[4].find_first_of(
"ADFGPNOW") ) {
3176 if( tokens[4] ==
"N") {
3188 if( tokens.size() != 9 ) {
3191 if( tokens[8].
size() != 1 ||
NPOS== tokens[8].find_first_of(
"+-") ) {
3202 const string& line )
3206 if(toks.size() != 5) {
3210list<string>::iterator
i= toks.begin();
3228 if(frame < -3 || frame > 3) {
3243 const string& line )
3245vector<string> tokens;
3258 if( tokens[6].
size() != 1 ||
NPOS== tokens[6].find_first_of(
".+-") ) {
3261 if( tokens[7].
size() != 1 ||
NPOS== tokens[7].find_first_of(
".0123") ) {
3264 if( tokens.size() < 9 ||
3265(
NPOS== tokens[8].find(
"gene_id") &&
NPOS== tokens[8].find(
"transcript_id") ) ) {
3274 const string& line )
3277vector<string> tokens;
3290 booltypeOk =
false;
3292terms.push_back(
"snv");
3293terms.push_back(
"cnv");
3294terms.push_back(
"copy_number_variation");
3295terms.push_back(
"gain");
3296terms.push_back(
"copy_number_gain");
3297terms.push_back(
"loss");
3298terms.push_back(
"copy_number_loss");
3299terms.push_back(
"loss_of_heterozygosity");
3300terms.push_back(
"complex");
3301terms.push_back(
"complex_substitution");
3302terms.push_back(
"complex_sequence_alteration");
3303terms.push_back(
"indel");
3304terms.push_back(
"insertion");
3305terms.push_back(
"inversion");
3306terms.push_back(
"substitution");
3307terms.push_back(
"deletion");
3308terms.push_back(
"duplication");
3309terms.push_back(
"translocation");
3310terms.push_back(
"upd");
3311terms.push_back(
"uniparental_disomy");
3312terms.push_back(
"maternal_uniparental_disomy");
3313terms.push_back(
"paternal_uniparental_disomy");
3314terms.push_back(
"tandom_duplication");
3315terms.push_back(
"structural_variation");
3316terms.push_back(
"sequence_alteration");
3317 ITERATE(list<string>, termiter, terms) {
3330 if( tokens[6].
size() != 1 ||
NPOS== tokens[6].find_first_of(
".+-") ) {
3333 if( tokens[7].
size() != 1 ||
NPOS== tokens[7].find_first_of(
".0123") ) {
3338 stringattrs = tokens[8];
3339 if(string::npos == attrs.find(
"ID="))
3341 if(string::npos == attrs.find(
"Variant_seq=")) {
3350 const string& line )
3352vector<string> tokens;
3365 if( tokens[6].
size() != 1 ||
NPOS== tokens[6].find_first_of(
".+-?") ) {
3368 if( tokens[7].
size() != 1 ||
NPOS== tokens[7].find_first_of(
".0123") ) {
3371 if( tokens.size() < 9 || tokens[8].empty()) {
3374 if( tokens.size() >= 9 && tokens[8].size() > 1) {
3375 const string& col9 = tokens[8];
3394 const string& line )
3396vector<string> tokens;
3397 stringremaining(line),
head, tail;
3416 stringfeatureType =
head;
3437 const stringlegalStrands{
"+-.?"};
3439string::npos == legalStrands.find(
head)) {
3445 const stringlegalPhases{
".0123"};
3447string::npos == legalPhases.find(
head)) {
3453 if(remaining.empty()) {
3457 if(featureType ==
"gene") {
3466 if(featureType ==
"transcript") {
3487 const string& line )
3489vector<string> tokens;
3491 if( num_cols < 8 ) {
3503 if( tokens[6].
size() != 1 ||
NPOS== tokens[6].find_first_of(
".+-") ) {
3506 if( tokens[7].
size() != 1 ||
NPOS== tokens[7].find_first_of(
".0123") ) {
3515 const string& line )
3517vector<string> values;
3525 if( values[0] ==
"DNA") {
3532 if( values[0] ==
"AS") {
3543 const string& line )
3545 const size_tMIN_VALUES_PER_RECORD = 14;
3550list<string> values;
3560list<string>::iterator it = values.begin();
3603 if( *it !=
"+"&& *it !=
"C") {
3617 const string& line,
3618 boolignoreFirstLine)
3621vector<string> tokens;
3622 intfirstColumn = (ignoreFirstLine ? 1 : 0);
3624 if(tokens.size() - firstColumn != 21) {
3634 const string& token = tokens[firstColumn + 8];
3635 if(token.empty() || token.size() > 2) {
3638 if(token.find_first_not_of(
"-+") != string::npos) {
3663vector<string> hopefullyInts;
3665 if(hopefullyInts.size() != blockCount) {
3668 for(
autohopefulInt: hopefullyInts) {
3682 constvector<string>& Fields )
3684 if( Fields.size() == 0 ) {
3704 const size_tMIN_HIGH_RATIO = 20;
3705 size_thigh_count = 0;
3711 if( 0 < high_count &&
m_iTestDataSize/ high_count < MIN_HIGH_RATIO ) {
3721 if( string::npos !=
data.find(
"\r\n") ) {
3724 else if( string::npos !=
data.find(
"\n") ) {
3727 else if( string::npos !=
data.find(
"\r") ) {
3750 const doubleREQUIRED_ASCII_RATIO = 0.9;
3754 size_tcount_print = 0;
3760 if(count_print < (
double)
count* REQUIRED_ASCII_RATIO) {
3796 const string& line )
3801 ITERATE(
string, Iter, line) {
3804string::const_iterator NextI = Iter;
3806 if(NextI != line.end())
3812}
else if(State == 1) {
3815}
else if(State == 2) {
3823 if(
Char==
'm'&& Next ==
't') {
3829}
else if(State == 3) {
3834}
else if(State == 4) {
3840 return(State == 5);
bool IsEmpty(void) const
Check if there are any hints are set at all.
bool IsPreferred(TFormat fmt) const
Check if the format is listed as preferred.
bool IsDisabled(TFormat fmt) const
Check if the format is listed as disabled.
Class implements different ad-hoc unreliable file format identifications.
bool TestFormatLzo(EMode)
bool TestFormatJson(EMode)
bool x_IsTruncatedJsonKeyword(const string &testString) const
static bool IsLineGvf(const std::string &)
unsigned int m_iStatsCountBraces
bool TestFormatBinaryAsn(EMode)
bool TestFormatDistanceMatrix(EMode)
bool x_IsTruncatedJsonNumber(const string &testString) const
bool TestFormatBZip2(EMode)
bool TestFormatGff3(EMode)
bool x_CheckStripJsonNumbers(string &testString) const
bool TestFormatTable(EMode)
bool TestFormatTaxplot(EMode)
unsigned int m_iStatsCountData
bool x_TestTableDelimiter(const string &delims)
bool TestFormatSra(EMode)
bool TestFormatFlatFileUniProt(EMode)
size_t x_FindNextJsonStringStop(const string &input, const size_t from_pos) const
static bool IsLineGff3(const std::string &)
bool TestFormatAgp(EMode)
bool x_CheckJsonStart(const string &testString) const
bool TestFormatBed15(EMode)
bool x_IsBlankOrNumbers(const string &testString) const
static bool IsLineHgvs(const std::string &)
static bool IsLinePhrapId(const std::string &)
bool TestFormatFiveColFeatureTable(EMode)
static bool IsLineFlatFileSequence(const std::string &)
bool TestFormatGlimmer3(EMode)
static bool IsLabelNewick(const std::string &)
bool TestFormatBed(EMode)
bool TestFormatFlatFileSequence(EMode)
bool TestFormatFlatFileEna(EMode)
bool IsInputRepeatMaskerWithHeader()
bool TestFormat(EFormat, EMode)
bool TestFormatSnpMarkers(EMode)
bool x_LooksLikeCLUSTALConservedInfo(const string &line) const
static bool IsSupportedFormat(EFormat format)
bool TestFormatZip(EMode)
bool TestFormatNewick(EMode)
bool TestFormatCLUSTAL(void)
bool TestFormatWiggle(EMode)
EFormat
The formats are checked in the same order as declared here.
@ eBZip2
bzip2 compressed file
@ eSra
INSDC Sequence Read Archive file.
@ eFiveColFeatureTable
Five-column feature table.
@ eBinaryASN
Binary ASN.1.
@ eLzo
lzo compressed file
@ eFormat_max
Max value of EFormat.
@ eGff2
GFF2, CGff2Reader, any GFF-like that doesn't fit the others.
@ eBed
UCSC BED file format, CBedReader.
@ eGtf
New GTF, CGtfReader.
@ eGZip
GNU zip compressed file.
@ eZip
zip compressed file
@ eSnpMarkers
SNP Marker flat file.
@ eHgvs
HGVS, CHgvsParser.
@ eAgp
AGP format assembly, AgpRead.
@ eDistanceMatrix
Distance matrix file.
@ ePhrapAce
Phrap ACE assembly file.
@ eFlatFileSequence
GenBank/GenPept/DDBJ/EMBL flat-file sequence portion.
@ eGff3
GFF3, CGff3Reader.
@ eGtf_POISENED
Old and Dead GFF/GTF style annotations.
@ eGlimmer3
Glimmer3 predictions.
@ eFasta
FASTA format sequence record, CFastaReader.
@ eUnknown
unknown format
@ eGffAugustus
GFFish output of Augustus Gene Prediction.
@ eRmo
RepeatMasker Output.
@ eZstd
Zstandard (zstd) compressed data.
@ eUCSCRegion
USCS Region file format.
@ eAlignment
Text alignment.
@ ePsl
PSL alignment format.
@ eBed15
UCSC BED15 or microarray format.
@ eWiggle
UCSC WIGGLE file format.
@ eBam
Binary alignment/map file.
bool TestFormatZstd(EMode)
bool TestFormatAugustus(EMode)
bool TestFormatBam(EMode)
void x_FindJsonStringLimits(const string &testString, list< size_t > &limits) const
bool x_IsNumber(const string &testString) const
unsigned int m_iStatsCountAaChars
static bool IsLinePsl(const std::string &, bool ignoreFirstColumn)
bool TestFormatGff2(EMode)
bool TestFormatAlignment(EMode)
bool TestFormatFasta(EMode)
void x_StripJsonStrings(string &testString) const
streamsize m_iTestBufferSize
bool TestFormatGvf(EMode)
static bool x_TestInput(CNcbiIstream &input, EOnError onerror)
static bool IsLineGtf(const std::string &)
EFormat GuessFormat(EMode)
static bool IsLineRmo(const std::string &)
bool TestFormatPsl(EMode)
unsigned int m_iStatsCountAlNumChars
static bool IsSampleNewick(const std::string &)
void x_StripJsonKeywords(string &testString) const
std::list< std::string > m_TestLines
bool TestFormatPhrapAce(EMode)
streamsize m_iTestDataSize
bool TestFormatXml(EMode)
bool TestFormatTextAsn(EMode)
unsigned int m_iStatsCountDnaChars
size_t x_StripJsonPunctuation(string &testString) const
static bool IsAsnComment(const vector< string > &)
bool x_CheckStripJsonPunctuation(string &testString) const
static bool IsLineGff2(const std::string &)
static bool IsLineGlimmer3(const std::string &)
static const char * GetFormatName(EFormat format)
static bool IsLineAugustus(const std::string &)
@ eDefault
Return eUnknown.
@ eThrowOnBadSource
Throw an exception if the data source (stream, file) can't be read.
@ eST_Lax
Implement historic behavior, risking false positives.
@ eST_Strict
Require 100% encodability of printable non-digits.
@ eST_Default
Be relatively strict, but still allow for typos.
bool x_TryProcessCLUSTALSeqData(const string &line, string &id, size_t &seg_length) const
bool TestFormatVcf(EMode)
static EFormat Format(const string &path, EOnError onerror=eDefault)
Guess file format.
bool TestFormatHgvs(EMode)
bool TestFormatGtf(EMode)
bool TestFormatRepeatMasker(EMode)
static bool IsLineAgp(const std::string &)
bool TestFormatGZip(EMode)
static ESequenceType SequenceType(const char *str, unsigned length=0, ESTStrictness strictness=eST_Default)
Guess sequence type.
bool TestFormatFlatFileGenbank(EMode)
bool IsInputRepeatMaskerWithoutHeader()
bool x_TestFormat(EFormat format, EMode mode)
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Concept for reading and writing characters.
static const TFormatNamesItem s_format_to_name_table[]
static bool s_IsTokenPosInt(const string &strToken)
static unsigned char symbol_type_table[256]
void SkipCommentAndBlank(CTempString &text)
bool EnaGetLineData(list< string >::iterator &lineIt, list< string >::iterator endIt, string &lineCode, string &lineData)
DEFINE_STATIC_ARRAY_MAP(TFormatNamesMap, sm_FormatNames, s_format_to_name_table)
bool GenbankGetKeywordLine(list< string >::iterator &lineIt, list< string >::iterator endIt, string &keyword, string &data)
static bool s_IsTokenDouble(const string &strToken)
static bool s_IsTokenInteger(const string &strToken)
constexpr size_t sm_CheckOrder_Size
@ fProtein_Alphabet
Allows BZX*-, but not JOU.
@ fDNA_Main_Alphabet
Just ACGTUN-.
@ fDNA_Ambig_Alphabet
Anything else representable in ncbi4na.
SStaticPair< CFormatGuess::EFormat, const char * > TFormatNamesItem
static bool s_LooksLikeNucSeqData(const string &line, size_t minLength=10)
static const CFormatGuess::EFormat sm_CheckOrder[]
bool UniProtGetLineData(list< string >::iterator &lineIt, list< string >::iterator endIt, string &lineCode, string &lineData)
CStaticPairArrayMap< CFormatGuess::EFormat, const char * > TFormatNamesMap
static void init_symbol_type_table(void)
static const char * str(char *buf, int n)
static const char * column
static const TDS_WORD limits[]
static const column_t columns[]
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_ASSERT(expr, mess)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
char Char
Alias for char.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)
Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)
static void Stepback(CNcbiIstream &is, CT_CHAR_TYPE *buf, streamsize buf_size, void *del_ptr=0)
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case sensitive search.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
size_type length(void) const
Return the length of the represented array.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static const size_type npos
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
@ eTrunc_Begin
Truncate leading whitespace only.
@ eNocase
Case insensitive compare.
@ eCase
Case sensitive compare.
static const char label[]
unsigned int
A callback function used to compare two keys in a database.
The blob sat and sat key Both must be positive integers</td > n< td > Non empty string The interpretation of the blob id depends on a processor Cassandra n processor expects the following format
static void text(MDB_val *v)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
std::istream & in(std::istream &in_, double &x_)
static size_t read_size(CNcbiIstream &stream, const char *name)
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4