A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/format__guess_8cpp_source.html below:

NCBI C++ ToolKit: src/util/format_guess.cpp Source File

201  const string

& strToken )

204  size_t

tokenSize = strToken.size();

205  if

(tokenSize == 0) {

208  if

(tokenSize == 1 && strToken[0] ==

'0'

) {

211  if

(strToken[0] <

'1'

||

'9'

< strToken[0]) {

214  for

(

size_t i

=1;

i

<tokenSize; ++

i

) {

215  if

(strToken[

i

] <

'0'

||

'9'

< strToken[

i

]) {

224  const string

& strToken )

227  if

( ! strToken.empty() && (strToken[0] ==

'-'

|| strToken[0] ==

'+'

)) {

235  const string

& strToken )

237  string

token( strToken );

239  if

( token.size() > 1 && token[0] ==

'-'

) {

242  if

(token.size() > 1 && token[0] ==

'0'

) {

252  for

(

const char

* s =

"ACGNTU"

; *s; ++s ) {

258  for

(

const char

* s =

"BDHKMRSVWY"

; *s; ++s ) {

264  for

(

const char

* s =

"ACDEFGHIKLMNPQRSTVWYBZX"

; *s; ++s ) {

272  for

(

const char

* s =

"\r\n"

; *s; ++s ) {

276  for

(

int

c = 1; c < 256; ++c ) {

277  if

(

isalpha

((

unsigned char

)c) )

279  if

(

isdigit

((

unsigned char

)c) )

281  if

(

isspace

((

unsigned char

)c) )

292  auto

formatIt = sm_FormatNames.find(

format

);

293  if

(formatIt == sm_FormatNames.end()) {

295  "CFormatGuess::GetFormatName: out-of-range format value " 298  return

formatIt->second;

312

length = (unsigned)::strlen(

str

);

315  unsigned int

main_nuc_content = 0, ambig_content = 0, bad_nuc_content = 0,

316

amino_acid_content = 0, exotic_aa_content = 0, bad_aa_content = 0;

318  for

(

unsigned i

= 0;

i

< length; ++

i

) {

319  unsigned char

c =

str

[

i

];

330

++amino_acid_content;

338  switch

(strictness) {

341  double

dna_content = (double)main_nuc_content / (

double

)length;

342  double

prot_content = (double)amino_acid_content / (

double

)length;

344  if

(dna_content > 0.7) {

347  if

(prot_content > 0.7) {

353  if

(bad_nuc_content + ambig_content <= main_nuc_content / 9

354

|| (bad_nuc_content + ambig_content <= main_nuc_content / 3 &&

355

bad_nuc_content <= (main_nuc_content + ambig_content) / 19)) {

358

}

else if

(bad_aa_content + exotic_aa_content

359

<= amino_acid_content / 9) {

365  if

(bad_nuc_content == 0 && ambig_content <= main_nuc_content / 3) {

367

}

else if

(bad_aa_content == 0

368

&& exotic_aa_content <= amino_acid_content / 9) {

399

, m_bOwnsStream(

true

)

400

, m_iTestBufferSize(0)

407  const string

& FileName )

408

: m_Stream( * new

CNcbiIfstream

( FileName.c_str(), ios::binary ) )

409

, m_bOwnsStream(

true

)

418

, m_bOwnsStream(

false

)

602  "CFormatGuess::x_TestFormat(): Unsupported format ID ("

+

612  "sm_FormatNames does not list all possible formats"

);

643  const

streamsize k_TestBufferGranularity = 8096;

662  if

(Multiplier >= 1024) {

714  while

( ! TestBuffer.fail() ) {

718  if

(!strLine.empty()) {

721  size_t size

= strLine.size();

722  bool

is_header =

size

> 0 && strLine[0] ==

'>'

;

723  for

(

size_t i

=0;

i

<

size

; ++

i

) {

724  unsigned char

c = strLine[

i

];

730  else if

(c ==

'{'

|| c ==

'}'

) {

778  if

(line.size()<minLength) {

783  for

(

auto

c : line) {

785  auto

index =

static_cast<int>

(c);

797  return

(nucCount/line.size() > 0.9);

814  bool

foundId =

false

;

837  unsigned int

uGtfLineCount = 0;

845  if

( it->empty() || (*it)[0] ==

'#'

) {

859  return

(uGtfLineCount != 0);

871  unsigned int

uGvfLineCount = 0;

879  if

( it->empty() || (*it)[0] ==

'#'

) {

896  return

(uGvfLineCount != 0);

909  unsigned int

uGffLineCount = 0;

920  if

( it->empty() || (*it)[0] ==

'#'

) {

934  return

(uGffLineCount != 0);

947  unsigned int

uGffLineCount = 0;

958  if

( it->empty() || (*it)[0] ==

'#'

) {

972  return

(uGffLineCount != 0);

985  unsigned int

uGffLineCount = 0;

993  if

( it->empty() || (*it)[0] ==

'#'

) {

1007  return

(uGffLineCount != 0);

1022  if

(it->empty() || (*it)[0] !=

'>'

) {

1068  const int

BUFFSIZE = 8096;

1084  bool

is_nexus =

false

;

1085  bool

has_trees =

false

;

1086  const size_t

check_size = 12;

1089  if

(

NPOS

!= it->find(

"#NEXUS"

) ) {

1103  char

test_buf[

read_size

+ check_size + 1];

1104

memset(test_buf,

' '

, check_size);

1106  size_t

max_reads = 32768;

1107  for

(

size_t i

= 0;

i

< max_reads; ++

i

) {

1109  size_t

num_read =

m_Stream

.gcount();

1111

test_buf[num_read + check_size] = 0;

1119

strncpy(test_buf, test_buf + num_read, check_size);

1140  const size_t

maxSampleSize = 8*1024-1;

1141  size_t

sampleSize = 0;

1142  char

* pSample =

new char

[maxSampleSize+1];

1145  m_Stream

.read(pSample, maxSampleSize);

1146

sampleSize = (size_t)

m_Stream

.gcount();

1149  if

(0 == sampleSize) {

1153

pSample[sampleSize] = 0;

1184  return

(conf ==

eYes

);

1200

list<string>::const_iterator iter =

m_TestLines

.begin();

1205  if

(toks.size() != 1 ||

1206

toks.front().find_first_not_of(

"0123456789"

) != string::npos) {

1213  for

(

size_t i

= 1; iter !=

m_TestLines

.end(); ++

i

, ++iter) {

1216  if

(toks.size() !=

i

) {

1218 

list<string>::const_iterator it = iter;

1225

list<string>::const_iterator it = toks.begin();

1226  for

(++it; it != toks.end(); ++it) {

1267  if

(it->find(

">Feature "

) != 0 && it->find(

">Features "

) != 0) {

1303  static const char

* known_types[] = {

1306  for

(

size_t i

=0;

i

<

ArraySize

(known_types); ++

i

) {

1340  if

(

NPOS

!= it->find(

"#NEXUS"

) ) {

1351  for

(

auto

c : line) {

1368

vector<string> toks;

1370  const size_t

num_toks = toks.size();

1372  if

(num_toks != 2 &&

1377  const string

& seqdata = toks[1];

1380  unsigned int

cumulated_res = 0;

1381  if

(num_toks == 3) {

1383  if

(cumulated_res == 0) {

1396  if

(num_toks == 3) {

1397  size_t

num_gaps =

count

(seqdata.begin(), seqdata.end(),

'-'

);

1398  if

(((seqdata.size() - num_gaps) > cumulated_res)) {

1405

seg_length = seqdata.size();

1415 struct

SClustalBlockInfo

1418  unsigned int

m_Size;

1422  void

Reset(

void

) {

1428

SClustalBlockInfo() { Reset(); }

1446

SClustalBlockInfo block_info;

1448  bool

has_valid_block =

false

;

1449  size_t

seg_length = 0;

1450  size_t

seg_length_prev = 0;

1455  while

( !TestBuffer.eof() ) {

1463  if

(TestBuffer.fail()) {

1472  if

(block_info.m_InBlock) {

1473  if

(block_info.m_Size < 2) {

1482  if

(! block_info.m_InBlock || block_info.m_Size<2) {

1494  if

(seg_length > 60) {

1497  if

(block_info.m_InBlock) {

1498  if

(seg_length != seg_length_prev) {

1501

has_valid_block =

true

;

1504  if

(block_info.m_Ids.find(seq_id) != block_info.m_Ids.end()) {

1507

block_info.m_Ids.insert(seq_id);

1509

seg_length_prev = seg_length;

1510

block_info.m_InBlock =

true

;

1511

++(block_info.m_Size);

1514  return

has_valid_block;

1522

list<string>::const_iterator iter =

m_TestLines

.begin();

1528  for

(

size_t i

=5;

i

<7; ++

i

)

1535  if

(iter->empty() || (*iter)[0] ==

'#'

|| (*iter)[0] ==

';'

) {

1541

ncols = toks.size();

1552  if

(iter->empty() || (*iter)[0] ==

'#'

|| (*iter)[0] ==

';'

) {

1558  if

(toks.size() != ncols) {

1559

list<string>::const_iterator it = iter;

1568  for

(

const auto

& token : toks) {

1569  auto

it = find_if(token.begin(), token.end(),

1570

[](

unsigned char

c){ return !isprint(c); });

1571  if

(it != token.end()) {

1576  return

( nlines >= 3 );

1665  if

( dAlNumFraction < 0.8 ) {

1671  if

( dDnaFraction > 0.91 || dAaFraction > 0.91 ) {

1698  if

( dAlNumFraction < 0.80 ) {

1706  while

( ! TestBuffer.fail() ) {

1707

vector<string> Fields;

1713  return

( Fields.size() >= 2 && Fields[1] ==

"::="

&&

isalpha

(Fields[0][0]));

1736  int

rsid, chr, pos, numMatched;

1737

numMatched = sscanf( it->c_str(),

"rs%d\t%d\t%d"

, &rsid, &chr, &pos);

1738  if

( numMatched == 3) {

1755  bool

bTrackLineFound(

false

);

1756  bool

bHasStartAndStop (

false

);

1757  size_t

columncount = 0;

1760  if

(

str

.empty() ) {

1765  if

(

str

.find(

"chr "

) == 0 ||

1766  str

.find(

"Chr "

) == 0 ||

1767  str

.find(

"CHR "

) == 0)

1775

bTrackLineFound =

true

;

1790  if

(

columns

.size() != columncount ) {

1791  if

( columncount == 0 ) {

1792

columncount =

columns

.size();

1801

bHasStartAndStop =

true

;

1806  return

(bHasStartAndStop || bTrackLineFound);

1818  bool

LineFound =

false

;

1819  size_t

columncount = 15;

1840  if

(

columns

.size() != columncount ) {

1851  if

(strand !=

"+"

&& strand !=

"-"

)

1898  const int

BUFFSIZE = 1024;

1911  unsigned int

uHgvsLineCount = 0;

1915  if

( it->empty() || (*it)[0] ==

'#'

) {

1923  return

(uHgvsLineCount != 0);

2098  bool

ignoreFirstColumn =

false

;

2099  unsigned int

uPslLineCount = 0;

2107  if

(!

IsLinePsl

(*it, ignoreFirstColumn)) {

2108

ignoreFirstColumn =

true

;

2109  if

(!

IsLinePsl

(*it, ignoreFirstColumn)) {

2116  if

( !

IsLinePsl

(*it, ignoreFirstColumn) ) {

2121  return

(uPslLineCount != 0);

2127

list<string>::iterator& lineIt,

2128

list<string>::iterator endIt,

2133  if

(lineIt == endIt) {

2136  if

(lineIt->size() > 79) {

2140

vector<int> validIndents = {0, 2, 3, 5, 12, 21};

2141  auto

firstNotBlank = lineIt->find_first_not_of(

" "

);

2142  while

(firstNotBlank != 0) {

2143  if

(std::find(validIndents.begin(), validIndents.end(), firstNotBlank) ==

2144

validIndents.end()) {

2145  auto

firstNotBlankOrDigit = lineIt->find_first_not_of(

" 1234567890"

);

2146  if

(firstNotBlankOrDigit != 10) {

2151  if

(lineIt == endIt) {

2154

firstNotBlank = lineIt->find_first_not_of(

" "

);

2184  string

keyword,

data

, lookingFor;

2190

lookingFor =

"LOCUS"

;

2191  if

(keyword != lookingFor) {

2199

lookingFor =

"DEFINITION"

;

2200  if

(keyword != lookingFor) {

2203  while

(keyword == lookingFor) {

2209

lookingFor =

"ACCESSION"

;

2210  if

(keyword != lookingFor) {

2213  while

(keyword == lookingFor) {

2219  bool

nidSeen =

false

;

2220

lookingFor =

"NID"

;

2221  if

(keyword == lookingFor) {

2228

lookingFor =

"VERSION"

;

2229  if

(keyword != lookingFor) {

2237

lookingFor =

"NID"

;

2238  if

(keyword == lookingFor) {

2245

lookingFor =

"PROJECT"

;

2246  while

(keyword == lookingFor) {

2252

lookingFor =

"DBLINK"

;

2253  while

(keyword == lookingFor) {

2259

lookingFor =

"KEYWORDS"

;

2260  if

(keyword != lookingFor) {

2272

list<string>::iterator& lineIt,

2273

list<string>::iterator endIt,

2281  if

(lineIt == endIt) {

2313  string

lineCode, lineData, lookingFor;

2320  if

(lineCode != lookingFor) {

2329  if

(lineCode != lookingFor) {

2332  while

(lineCode == lookingFor) {

2339  while

(lineCode == lookingFor) {

2346  for

(

int i

= 0;

i

< 2; ++

i

) {

2347  if

(lineCode != lookingFor) {

2356  if

(lineCode != lookingFor) {

2359  while

(lineCode == lookingFor) {

2366  if

(lineCode != lookingFor) {

2369  while

(lineCode == lookingFor) {

2376  if

(lineCode != lookingFor) {

2379  while

(lineCode == lookingFor) {

2386  if

(lineCode != lookingFor) {

2389  while

(lineCode == lookingFor) {

2402

list<string>::iterator& lineIt,

2403

list<string>::iterator endIt,

2408  if

(lineIt == endIt) {

2444  string

lineCode, lineData, lookingFor;

2451  if

(lineCode != lookingFor) {

2460  if

(lineCode != lookingFor) {

2463  while

(lineCode == lookingFor) {

2470  for

(

int i

= 0;

i

< 3; ++

i

) {

2471  if

(lineCode != lookingFor) {

2481  if

(lineCode != lookingFor) {

2484  while

(lineCode == lookingFor) {

2491  if

(lineCode !=

"GN"

&& lineCode !=

"OS"

) {

2531  if

(

limits

.size()%2 == 1) {

2534

testString +=

"\""

;

2535  limits

.push_back(testString.size()-1);

2541  string

complement =

""

;

2543  auto

it =

limits

.begin();

2544  size_t

comp_interval_start = 0;

2545  while

(it !=

limits

.end()) {

2546  const size_t

string_start = *it++;

2547  if

(string_start > comp_interval_start) {

2548  const size_t

comp_interval_length = string_start-comp_interval_start;

2549

complement += testString.substr(comp_interval_start, comp_interval_length);

2552  const size_t

string_stop = *it++;

2553

comp_interval_start = string_stop+1;

2556  if

(comp_interval_start < testString.size()) {

2557

complement += testString.substr(comp_interval_start);

2560

testString = complement;

2570  const string

& double_quotes = R

"(")"; 2572  bool

is_start =

true

;

2575  while

( pos !=

NPOS

) {

2582

is_start = !is_start;

2589 size_t

s_GetPrecedingFslashCount(

const string

&

input

,

const size_t

pos)

2592

pos >=

input

.size() ||

2598  int

current_pos =

static_cast<int>

(pos)-1;

2599  size_t

num_fslash = 0;

2600  while

( current_pos >= 0 &&

input

[current_pos] ==

'\\'

) {

2612  const string

& double_quotes = R

"(")"; 2617  while

(pos !=

NPOS

) {

2618  const size_t

num_fslash = s_GetPrecedingFslashCount(

input

, pos);

2621  if

(num_fslash%2 == 0) {

2638

list<string> subStrings;

2642  for

(

auto

it = subStrings.cbegin(); it != subStrings.cend(); ++it) {

2643  const string

subString = *it;

2647  if

(it == subStrings.cend()) {

2648

testString = subString;

2673  const string

extendedString = testString +

"0"

;

2697  const size_t

stringSize = testString.size();

2699  if

(stringSize > 4) {

2703  const string

nullString(

"null"

);

2704  const string

trueString(

"true"

);

2705  const string

falseString(

"false"

);

2707  if

(testString == nullString.substr(0, stringSize) ||

2708

testString == trueString.substr(0, stringSize) ||

2709

testString == falseString.substr(0, stringSize)) {

2725

list<string> numStrings;

2729  for

(

auto

numString : numStrings) {

2744  if

(testString.find_first_of(

"()"

) != string::npos) {

2748  const size_t

punctuation_threshold = 4;

2764  size_t

initial_len = testString.size();

2773  return

testString.size() - initial_len;

2793  const auto

next_pos = testString.find_first_not_of(

"( \t\r\n"

,1);

2794  if

(next_pos !=

NPOS

&& testString[next_pos] ==

'\"'

) {

2860  string

labels_1st_line[] = {

"SW"

,

"perc"

,

"query"

,

"position"

,

"matching"

,

""

};

2861  string

labels_2nd_line[] = {

"score"

,

"div."

,

"del."

,

"ins."

,

"sequence"

,

""

};

2881  size_t

current_offset = 0;

2882  for

(

size_t i

=0; labels_1st_line[

i

] !=

""

; ++

i

) {

2883

current_offset =

NStr::FindCase

( *it, labels_1st_line[

i

], current_offset );

2884  if

( current_offset ==

NPOS

) {

2897  for

(

size_t

j=0; labels_2nd_line[j] !=

""

; ++j ) {

2898

current_offset =

NStr::FindCase

( *it, labels_2nd_line[j], current_offset );

2899  if

( current_offset ==

NPOS

) {

2948  const string

& cline )

2965  if

( line.empty() || line[0] !=

'('

) {

2971  bool

in_comment =

false

;

2972  for

(

size_t

ii=0; line.c_str()[ii] != 0; ++ii ) {

2973  if

( ! in_comment ) {

2974  if

( line.c_str()[ii] !=

'['

) {

2975

trimmed += line.c_str()[ii];

2982  if

( line.c_str()[ii] ==

']'

) {

2983

in_comment =

false

;

2992  bool

in_quote =

false

;

2993  for

(

size_t

ii=0; line.c_str()[ii] != 0; ++ii ) {

2995  if

( line.c_str()[ii] !=

'\''

) {

2996

trimmed += line.c_str()[ii];

3004  if

( line.c_str()[ii] ==

'\''

) {

3015  while

( line.c_str()[ii] != 0 ) {

3016  if

( line.c_str()[ii] !=

':'

) {

3017

trimmed += line.c_str()[ii++];

3021  if

( line.c_str()[ii] ==

'-'

|| line.c_str()[ii] ==

'+'

) {

3024  while

(

'0'

<= line.c_str()[ii] && line.c_str()[ii] <=

'9'

) {

3027  if

( line.c_str()[ii] ==

'.'

) {

3029  while

(

'0'

<= line.c_str()[ii] && line.c_str()[ii] <=

'9'

) {

3039  if

(line.empty() || line[0] !=

'('

) {

3042  size_t

paren_count = 1;

3043  for

(

size_t

ii=1; line.c_str()[ii] != 0; ++ii ) {

3044  switch

( line.c_str()[ii] ) {

3051  if

( paren_count == 0 ) {

3057  if

( paren_count == 0 ) {

3075  const string

& line )

3080  SIZE_TYPE

pos = line.find_first_not_of(

"0123456789 \t"

);

3081  if

(pos ==

NPOS

|| pos + 45 >= line.size()) {

3086  char

c = line[pos +

i

];

3087  if

(

i

% 11 == 10) {

3092  if

( !

isalpha

(c) && c !=

'-'

&& c !=

'*'

) {

3104  const string

&

label

)

3109  if

(

NPOS

!=

label

.find_first_of(

"[]"

) ) {

3112  size_t

colon =

label

.find(

':'

);

3113  if

(

NPOS

== colon ) {

3116  size_t

dot =

label

.find_first_not_of(

"0123456789"

, colon + 1 );

3117  if

(

NPOS

== dot ) {

3120  if

(

label

[ dot ] !=

'.'

) {

3123  size_t

end =

label

.find_first_not_of(

"0123456789"

, dot + 1 );

3124  return

(

NPOS

== end );

3130  const string

& strLine )

3136  string

line( strLine );

3137  size_t

uCommentStart =

NStr::Find

( line,

"#"

);

3139  if

(

NPOS

!= uCommentStart ) {

3140

line = line.substr( 0, uCommentStart );

3143  if

( line.empty() ) {

3147

vector<string> tokens;

3152  if

( tokens[1].

size

() > 1 && tokens[1][0] ==

'-'

) {

3153

tokens[1][0] =

'1'

;

3159  if

( tokens[2].

size

() > 1 && tokens[2][0] ==

'-'

) {

3160

tokens[2][0] =

'1'

;

3166  if

( tokens[3].

size

() > 1 && tokens[3][0] ==

'-'

) {

3167

tokens[3][0] =

'1'

;

3173  if

( tokens[4].

size

() != 1 ||

NPOS

== tokens[4].find_first_of(

"ADFGPNOW"

) ) {

3176  if

( tokens[4] ==

"N"

) {

3188  if

( tokens.size() != 9 ) {

3191  if

( tokens[8].

size

() != 1 ||

NPOS

== tokens[8].find_first_of(

"+-"

) ) {

3202  const string

& line )

3206  if

(toks.size() != 5) {

3210

list<string>::iterator

i

= toks.begin();

3228  if

(frame < -3 || frame > 3) {

3243  const string

& line )

3245

vector<string> tokens;

3258  if

( tokens[6].

size

() != 1 ||

NPOS

== tokens[6].find_first_of(

".+-"

) ) {

3261  if

( tokens[7].

size

() != 1 ||

NPOS

== tokens[7].find_first_of(

".0123"

) ) {

3264  if

( tokens.size() < 9 ||

3265

(

NPOS

== tokens[8].find(

"gene_id"

) &&

NPOS

== tokens[8].find(

"transcript_id"

) ) ) {

3274  const string

& line )

3277

vector<string> tokens;

3290  bool

typeOk =

false

;

3292

terms.push_back(

"snv"

);

3293

terms.push_back(

"cnv"

);

3294

terms.push_back(

"copy_number_variation"

);

3295

terms.push_back(

"gain"

);

3296

terms.push_back(

"copy_number_gain"

);

3297

terms.push_back(

"loss"

);

3298

terms.push_back(

"copy_number_loss"

);

3299

terms.push_back(

"loss_of_heterozygosity"

);

3300

terms.push_back(

"complex"

);

3301

terms.push_back(

"complex_substitution"

);

3302

terms.push_back(

"complex_sequence_alteration"

);

3303

terms.push_back(

"indel"

);

3304

terms.push_back(

"insertion"

);

3305

terms.push_back(

"inversion"

);

3306

terms.push_back(

"substitution"

);

3307

terms.push_back(

"deletion"

);

3308

terms.push_back(

"duplication"

);

3309

terms.push_back(

"translocation"

);

3310

terms.push_back(

"upd"

);

3311

terms.push_back(

"uniparental_disomy"

);

3312

terms.push_back(

"maternal_uniparental_disomy"

);

3313

terms.push_back(

"paternal_uniparental_disomy"

);

3314

terms.push_back(

"tandom_duplication"

);

3315

terms.push_back(

"structural_variation"

);

3316

terms.push_back(

"sequence_alteration"

);

3317  ITERATE

(list<string>, termiter, terms) {

3330  if

( tokens[6].

size

() != 1 ||

NPOS

== tokens[6].find_first_of(

".+-"

) ) {

3333  if

( tokens[7].

size

() != 1 ||

NPOS

== tokens[7].find_first_of(

".0123"

) ) {

3338  string

attrs = tokens[8];

3339  if

(string::npos == attrs.find(

"ID="

))

3341  if

(string::npos == attrs.find(

"Variant_seq="

)) {

3350  const string

& line )

3352

vector<string> tokens;

3365  if

( tokens[6].

size

() != 1 ||

NPOS

== tokens[6].find_first_of(

".+-?"

) ) {

3368  if

( tokens[7].

size

() != 1 ||

NPOS

== tokens[7].find_first_of(

".0123"

) ) {

3371  if

( tokens.size() < 9 || tokens[8].empty()) {

3374  if

( tokens.size() >= 9 && tokens[8].size() > 1) {

3375  const string

& col9 = tokens[8];

3394  const string

& line )

3396

vector<string> tokens;

3397  string

remaining(line),

head

, tail;

3416  string

featureType =

head

;

3437  const string

legalStrands{

"+-.?"

};

3439

string::npos == legalStrands.find(

head

)) {

3445  const string

legalPhases{

".0123"

};

3447

string::npos == legalPhases.find(

head

)) {

3453  if

(remaining.empty()) {

3457  if

(featureType ==

"gene"

) {

3466  if

(featureType ==

"transcript"

) {

3487  const string

& line )

3489

vector<string> tokens;

3491  if

( num_cols < 8 ) {

3503  if

( tokens[6].

size

() != 1 ||

NPOS

== tokens[6].find_first_of(

".+-"

) ) {

3506  if

( tokens[7].

size

() != 1 ||

NPOS

== tokens[7].find_first_of(

".0123"

) ) {

3515  const string

& line )

3517

vector<string> values;

3525  if

( values[0] ==

"DNA"

) {

3532  if

( values[0] ==

"AS"

) {

3543  const string

& line )

3545  const size_t

MIN_VALUES_PER_RECORD = 14;

3550

list<string> values;

3560

list<string>::iterator it = values.begin();

3603  if

( *it !=

"+"

&& *it !=

"C"

) {

3617  const string

& line,

3618  bool

ignoreFirstLine)

3621

vector<string> tokens;

3622  int

firstColumn = (ignoreFirstLine ? 1 : 0);

3624  if

(tokens.size() - firstColumn != 21) {

3634  const string

& token = tokens[firstColumn + 8];

3635  if

(token.empty() || token.size() > 2) {

3638  if

(token.find_first_not_of(

"-+"

) != string::npos) {

3663

vector<string> hopefullyInts;

3665  if

(hopefullyInts.size() != blockCount) {

3668  for

(

auto

hopefulInt: hopefullyInts) {

3682  const

vector<string>& Fields )

3684  if

( Fields.size() == 0 ) {

3704  const size_t

MIN_HIGH_RATIO = 20;

3705  size_t

high_count = 0;

3711  if

( 0 < high_count &&

m_iTestDataSize

/ high_count < MIN_HIGH_RATIO ) {

3721  if

( string::npos !=

data

.find(

"\r\n"

) ) {

3724  else if

( string::npos !=

data

.find(

"\n"

) ) {

3727  else if

( string::npos !=

data

.find(

"\r"

) ) {

3750  const double

REQUIRED_ASCII_RATIO = 0.9;

3754  size_t

count_print = 0;

3760  if

(count_print < (

double

)

count

* REQUIRED_ASCII_RATIO) {

3796  const string

& line )

3801  ITERATE

(

string

, Iter, line) {

3804

string::const_iterator NextI = Iter;

3806  if

(NextI != line.end())

3812

}

else if

(State == 1) {

3815

}

else if

(State == 2) {

3823  if

(

Char

==

'm'

&& Next ==

't'

) {

3829

}

else if

(State == 3) {

3834

}

else if

(State == 4) {

3840  return

(State == 5);

bool IsEmpty(void) const

Check if there are any hints are set at all.

bool IsPreferred(TFormat fmt) const

Check if the format is listed as preferred.

bool IsDisabled(TFormat fmt) const

Check if the format is listed as disabled.

Class implements different ad-hoc unreliable file format identifications.

bool TestFormatLzo(EMode)

bool TestFormatJson(EMode)

bool x_IsTruncatedJsonKeyword(const string &testString) const

static bool IsLineGvf(const std::string &)

unsigned int m_iStatsCountBraces

bool TestFormatBinaryAsn(EMode)

bool TestFormatDistanceMatrix(EMode)

bool x_IsTruncatedJsonNumber(const string &testString) const

bool TestFormatBZip2(EMode)

bool TestFormatGff3(EMode)

bool x_CheckStripJsonNumbers(string &testString) const

bool TestFormatTable(EMode)

bool TestFormatTaxplot(EMode)

unsigned int m_iStatsCountData

bool x_TestTableDelimiter(const string &delims)

bool TestFormatSra(EMode)

bool TestFormatFlatFileUniProt(EMode)

size_t x_FindNextJsonStringStop(const string &input, const size_t from_pos) const

static bool IsLineGff3(const std::string &)

bool TestFormatAgp(EMode)

bool x_CheckJsonStart(const string &testString) const

bool TestFormatBed15(EMode)

bool x_IsBlankOrNumbers(const string &testString) const

static bool IsLineHgvs(const std::string &)

static bool IsLinePhrapId(const std::string &)

bool TestFormatFiveColFeatureTable(EMode)

static bool IsLineFlatFileSequence(const std::string &)

bool TestFormatGlimmer3(EMode)

static bool IsLabelNewick(const std::string &)

bool TestFormatBed(EMode)

bool TestFormatFlatFileSequence(EMode)

bool TestFormatFlatFileEna(EMode)

bool IsInputRepeatMaskerWithHeader()

bool TestFormat(EFormat, EMode)

bool TestFormatSnpMarkers(EMode)

bool x_LooksLikeCLUSTALConservedInfo(const string &line) const

static bool IsSupportedFormat(EFormat format)

bool TestFormatZip(EMode)

bool TestFormatNewick(EMode)

bool TestFormatCLUSTAL(void)

bool TestFormatWiggle(EMode)

EFormat

The formats are checked in the same order as declared here.

@ eBZip2

bzip2 compressed file

@ eSra

INSDC Sequence Read Archive file.

@ eFiveColFeatureTable

Five-column feature table.

@ eBinaryASN

Binary ASN.1.

@ eLzo

lzo compressed file

@ eFormat_max

Max value of EFormat.

@ eGff2

GFF2, CGff2Reader, any GFF-like that doesn't fit the others.

@ eBed

UCSC BED file format, CBedReader.

@ eGtf

New GTF, CGtfReader.

@ eGZip

GNU zip compressed file.

@ eZip

zip compressed file

@ eSnpMarkers

SNP Marker flat file.

@ eHgvs

HGVS, CHgvsParser.

@ eAgp

AGP format assembly, AgpRead.

@ eDistanceMatrix

Distance matrix file.

@ ePhrapAce

Phrap ACE assembly file.

@ eFlatFileSequence

GenBank/GenPept/DDBJ/EMBL flat-file sequence portion.

@ eGff3

GFF3, CGff3Reader.

@ eGtf_POISENED

Old and Dead GFF/GTF style annotations.

@ eGlimmer3

Glimmer3 predictions.

@ eFasta

FASTA format sequence record, CFastaReader.

@ eUnknown

unknown format

@ eGffAugustus

GFFish output of Augustus Gene Prediction.

@ eRmo

RepeatMasker Output.

@ eZstd

Zstandard (zstd) compressed data.

@ eUCSCRegion

USCS Region file format.

@ eAlignment

Text alignment.

@ ePsl

PSL alignment format.

@ eBed15

UCSC BED15 or microarray format.

@ eWiggle

UCSC WIGGLE file format.

@ eBam

Binary alignment/map file.

bool TestFormatZstd(EMode)

bool TestFormatAugustus(EMode)

bool TestFormatBam(EMode)

void x_FindJsonStringLimits(const string &testString, list< size_t > &limits) const

bool x_IsNumber(const string &testString) const

unsigned int m_iStatsCountAaChars

static bool IsLinePsl(const std::string &, bool ignoreFirstColumn)

bool TestFormatGff2(EMode)

bool TestFormatAlignment(EMode)

bool TestFormatFasta(EMode)

void x_StripJsonStrings(string &testString) const

streamsize m_iTestBufferSize

bool TestFormatGvf(EMode)

static bool x_TestInput(CNcbiIstream &input, EOnError onerror)

static bool IsLineGtf(const std::string &)

EFormat GuessFormat(EMode)

static bool IsLineRmo(const std::string &)

bool TestFormatPsl(EMode)

unsigned int m_iStatsCountAlNumChars

static bool IsSampleNewick(const std::string &)

void x_StripJsonKeywords(string &testString) const

std::list< std::string > m_TestLines

bool TestFormatPhrapAce(EMode)

streamsize m_iTestDataSize

bool TestFormatXml(EMode)

bool TestFormatTextAsn(EMode)

unsigned int m_iStatsCountDnaChars

size_t x_StripJsonPunctuation(string &testString) const

static bool IsAsnComment(const vector< string > &)

bool x_CheckStripJsonPunctuation(string &testString) const

static bool IsLineGff2(const std::string &)

static bool IsLineGlimmer3(const std::string &)

static const char * GetFormatName(EFormat format)

static bool IsLineAugustus(const std::string &)

@ eDefault

Return eUnknown.

@ eThrowOnBadSource

Throw an exception if the data source (stream, file) can't be read.

@ eST_Lax

Implement historic behavior, risking false positives.

@ eST_Strict

Require 100% encodability of printable non-digits.

@ eST_Default

Be relatively strict, but still allow for typos.

bool x_TryProcessCLUSTALSeqData(const string &line, string &id, size_t &seg_length) const

bool TestFormatVcf(EMode)

static EFormat Format(const string &path, EOnError onerror=eDefault)

Guess file format.

bool TestFormatHgvs(EMode)

bool TestFormatGtf(EMode)

bool TestFormatRepeatMasker(EMode)

static bool IsLineAgp(const std::string &)

bool TestFormatGZip(EMode)

static ESequenceType SequenceType(const char *str, unsigned length=0, ESTStrictness strictness=eST_Default)

Guess sequence type.

bool TestFormatFlatFileGenbank(EMode)

bool IsInputRepeatMaskerWithoutHeader()

bool x_TestFormat(EFormat format, EMode mode)

class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

Concept for reading and writing characters.

static const TFormatNamesItem s_format_to_name_table[]

static bool s_IsTokenPosInt(const string &strToken)

static unsigned char symbol_type_table[256]

void SkipCommentAndBlank(CTempString &text)

bool EnaGetLineData(list< string >::iterator &lineIt, list< string >::iterator endIt, string &lineCode, string &lineData)

DEFINE_STATIC_ARRAY_MAP(TFormatNamesMap, sm_FormatNames, s_format_to_name_table)

bool GenbankGetKeywordLine(list< string >::iterator &lineIt, list< string >::iterator endIt, string &keyword, string &data)

static bool s_IsTokenDouble(const string &strToken)

static bool s_IsTokenInteger(const string &strToken)

constexpr size_t sm_CheckOrder_Size

@ fProtein_Alphabet

Allows BZX*-, but not JOU.

@ fDNA_Main_Alphabet

Just ACGTUN-.

@ fDNA_Ambig_Alphabet

Anything else representable in ncbi4na.

SStaticPair< CFormatGuess::EFormat, const char * > TFormatNamesItem

static bool s_LooksLikeNucSeqData(const string &line, size_t minLength=10)

static const CFormatGuess::EFormat sm_CheckOrder[]

bool UniProtGetLineData(list< string >::iterator &lineIt, list< string >::iterator endIt, string &lineCode, string &lineData)

CStaticPairArrayMap< CFormatGuess::EFormat, const char * > TFormatNamesMap

static void init_symbol_type_table(void)

static const char * str(char *buf, int n)

static const char * column

static const TDS_WORD limits[]

static const column_t columns[]

constexpr size_t ArraySize(const Element(&)[Size])

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NCBI_ASSERT(expr, mess)

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

char Char

Alias for char.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)

Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)

static void Stepback(CNcbiIstream &is, CT_CHAR_TYPE *buf, streamsize buf_size, void *del_ptr=0)

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

NCBI_NS_STD::string::size_type SIZE_TYPE

static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string.

static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)

Convert string to non-negative integer value.

static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to int.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)

Find the pattern in the specified range of a string using a case insensitive search.

static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)

Check if a string is blank (has no text).

static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)

Convert string to double.

static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string (in-place)

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)

Find the pattern in the specified range of a string using a case sensitive search.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)

Split a string into two pieces using the specified delimiters.

size_type length(void) const

Return the length of the represented array.

static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to unsigned int.

static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive equality of a substring with another string.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

size_type find(const CTempString match, size_type pos=0) const

Find the first instance of the entire matching string within the current string, beginning at an opti...

static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string.

static const size_type npos

@ fConvErr_NoThrow

Do not throw an exception on error.

@ fSplit_Tokenize

All delimiters are merged and trimmed, to get non-empty tokens only.

@ fSplit_MergeDelimiters

Merge adjacent delimiters.

@ eTrunc_Begin

Truncate leading whitespace only.

@ eNocase

Case insensitive compare.

@ eCase

Case sensitive compare.

static const char label[]

unsigned int

A callback function used to compare two keys in a database.

The blob sat and sat key Both must be positive integers</td > n< td > Non empty string The interpretation of the blob id depends on a processor Cassandra n processor expects the following format

static void text(MDB_val *v)

constexpr bool empty(list< Ts... >) noexcept

const struct ncbi::grid::netcache::search::fields::SIZE size

Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...

NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.

std::istream & in(std::istream &in_, double &x_)

static size_t read_size(CNcbiIstream &stream, const char *name)

Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4