A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/columnar__vcf__reader_8cpp_source.html below:

NCBI C++ ToolKit: src/gui/widgets/loaders/columnar_vcf_reader.cpp Source File

58

edit::CParseTextOptions options;

59

options.SetStartText(

"ID="

);

60

options.SetStopText(

","

);

61  m_Name

= options.GetSelectedText(line);

63

options.SetStartText(

",Number="

);

64

options.SetStopText(

","

);

65  m_Number

= options.GetSelectedText(line);

67

options.SetStartText(

",Description=\""

);

68

options.SetStopText(

"\""

);

77  unsigned

nr_lines = 0;

82  if

(nr_lines % 200 == 0 && (canceled && canceled->

IsCanceled

())) {

108

edit::CParseTextOptions options;

109

options.SetStartText(

"accession="

);

110

options.SetStopText(

","

);

113

options.SetStopText(

">"

);

130

}

while

(reader.

PeekChar

() ==

'#'

);

137  "Line starting with ##fileformat is missing"

,

151  if

(header_line.find(

" "

) !=

NPOS

|| header_line.find(

"\t"

) ==

NPOS

) {

156  "Header line expected to be tab delimited"

,

160

vector<string> col_names;

163  bool

is_unique =

false

;

165  set<string>

unique_strs(col_names.begin(), col_names.end());

166

is_unique = (col_names.size() == unique_strs.

size

());

174  "Column names are not unique"

,

179  auto

it = col_names.begin();

180  for

(; it != col_names.end(); ++it) {

188  for

(; it != col_names.end(); ++it, ++index) {

216  "Error allowance exceeded"

,

227  bool

placed = error_cont->

PutError

(err);

235  "Error allowance exceeded"

,

243  auto

start = chrono::steady_clock::now();

246  unsigned

nr_lines = 0;

247  unsigned

lines_per_contig = 0;

250  string

previous_chrom;

252

vector<future<void>> async_calls;

253  auto

JoinOptimization = [&async_calls]()

256  for

(

auto

& task : async_calls) {

263  auto

task_start = chrono::steady_clock::now();

265  auto

opt_start = chrono::steady_clock::now();

266

var_map->FinalizeReading();

267  auto

diff_opt = chrono::steady_clock::now() - opt_start;

268  LOG_POST

(

Info

<<

"Optimization of "

<< chr <<

" took "

<< chrono::duration_cast<chrono::milliseconds>(diff_opt).

count

() <<

" ms"

);

270  if

(on_variants_list_ready) {

271

on_variants_list_ready(*var_map);

274  auto

diff_opt = chrono::steady_clock::now() - task_start;

279  while

(

in

.good() && !

in

.eof()) {

280  if

(nr_lines % 1000 == 0 && (canceled && canceled->

IsCanceled

())) {

294  if

(line.empty() || (!line.empty() && line[0] ==

'#'

)) {

298  if

(line.find(

"\t"

) ==

NPOS

) {

303  "Has been skipped as it is not tab delimited:\n"

+ line,

309  size_t

pos = line.find(

"\t"

);

310  string

chrom = line.substr(0, pos);

311  if

(chrom != previous_chrom) {

312  if

(!previous_chrom.empty()) {

314

async_calls.push_back(async(std::launch::async | std::launch::deferred, OptimizeVariantsList, std::ref(

m_ChromosomeMap

.at(previous_chrom))));

318

previous_chrom = chrom;

319

lines_per_contig = 0;

324

vars_list = inserted.first->second.GetPointer();

331

chrom +

" data line found out of its block. All entries for a specific CHROM should form a contiguous block within the VCF file."

,

338  if

(prog_func && lines_per_contig > 0 && lines_per_contig % 500000 == 0) {

345

vars_list->ParseLine(line);

359  auto

diff_parsing = chrono::steady_clock::now() - start;

360  LOG_POST

(

Info

<<

"Parsed "

<< nr_lines <<

" lines from VCF file in " 361

<< chrono::duration_cast<chrono::milliseconds>(diff_parsing).

count

() <<

" ms "

);

370  if

(!

in

.eof() && !

in

.good()) {

371  LOG_POST

(

Error

<<

"Reading cannot be completed, as input stream is corrupted"

);

384  if

(on_variants_list_ready) {

396  if

(header_line.find(

" "

) !=

NPOS

|| header_line.find(

"\t"

) ==

NPOS

) {

401  "Header line is expected to be tab delimited"

,

407  const unsigned

kMandatoryCols = 8;

408  unsigned

nr_tabs =

static_cast<unsigned>

(

count

(header_line.begin(), header_line.end(),

'\t'

));

409  if

(nr_tabs + 1 < kMandatoryCols) {

414  "Header line is expected to have at least 8 columns"

,

458

vector<CColumnarVCFReader::TSeqIdVarsListPair>

468  auto

start = chrono::steady_clock::now();

470  unsigned

nr_lines = 0;

471  unsigned

lines_per_contig = 0;

474  size_t

search_chrs = chr_list.size();

477

vector<future<void>> async_calls;

478  auto

JoinOptimization = [&async_calls]()

481  for

(

auto

& task : async_calls) {

487  auto

chr = var_map->GetChrName();

488  auto

task_start = chrono::steady_clock::now();

490  auto

opt_start = chrono::steady_clock::now();

491

var_map->FinalizeReading();

492  auto

diff_opt = chrono::steady_clock::now() - opt_start;

493  LOG_POST

(

Info

<<

"Optimization of "

<< chr <<

" took "

<< chrono::duration_cast<chrono::milliseconds>(diff_opt).

count

() <<

" ms"

);

495  if

(on_variants_list_ready) {

496

on_variants_list_ready(*var_map);

499  auto

diff_opt = chrono::steady_clock::now() - task_start;

503  auto

CallOptimizeVarsList = [&]() {

505

async_calls.push_back(async(std::launch::async | std::launch::deferred, OptimizeVariantsList, vcf_vars));

507

OptimizeVariantsList(vcf_vars);

511  while

(

in

.good() && !

in

.eof() && search_chrs > 0) {

512  if

(nr_lines % 1000 == 0 && (canceled && canceled->

IsCanceled

())) {

515

variants_list.clear();

516  return

variants_list;

528  if

(line.empty() || (!line.empty() && line[0] ==

'#'

)) {

532  if

(line.find(

"\t"

) ==

NPOS

) {

537  "Has been skipped as it is not tab delimited:\n"

+ line,

543  size_t

pos = line.find(

"\t"

);

544  string

chrom = line.substr(0, pos);

545  if

(!vcf_vars || (vcf_vars && !

NStr::EqualCase

(vcf_vars->GetChrName(), chrom))) {

546  if

(prev_chrom == chrom)

550  for

(

const auto

& syn_it : chr_list) {

551  const auto

& seq_id = syn_it.first;

552  const auto

& synonyms = syn_it.second;

553  if

(find_if(synonyms.begin(), synonyms.end(),

554

[&chrom](

const string

& elem) { return NStr::EqualCase(chrom, elem); }) != synonyms.end()) {

558

CallOptimizeVarsList();

559

lines_per_contig = 0;

563  if

(find_if(variants_list.begin(), variants_list.end(),

564

[&seq_id](

const TSeqIdVarsListPair

& elem) { return (seq_id->AsFastaString() == elem.first->AsFastaString()); }) == variants_list.end()) {

566

vcf_vars = variants_list.back().second;

573

chrom +

" data line found out of its block. All entries for a specific CHROM should form a contiguous block within the VCF file."

,

583

CallOptimizeVarsList();

585

vcf_vars.

Reset

(

nullptr

);

589

lines_per_contig = 0;

594  if

(prog_func && lines_per_contig > 0 && lines_per_contig % 500000 == 0) {

599

vcf_vars->ParseLine(line);

624  auto

diff_parsing = chrono::steady_clock::now() - start;

625  LOG_POST

(

Info

<<

"Parsed "

<< nr_lines <<

" lines in " 626

<< chrono::duration_cast<chrono::milliseconds>(diff_parsing).

count

() <<

" ms "

);

631

variants_list.clear();

632  return

variants_list;

635  if

(!

in

.good() && !

in

.eof()) {

636  LOG_POST

(

Error

<<

"Reading cannot be completed, as input stream is corrupted"

);

640

variants_list.clear();

645

OptimizeVariantsList(vcf_vars);

650  if

(chr_list.size() != variants_list.size()) {

651  for

(

const auto

& chr_it : chr_list) {

652  if

(find_if(variants_list.begin(), variants_list.end(),

654

{ return elem.first->Equals(*chr_it.first); }) == variants_list.end()) {

656  auto

id_str = chr_it.first->AsFastaString();

661  "Chromosome "

+ id_str +

" is not in the file"

,

668  if

(!on_variants_list_ready) {

669  for

(

auto

& var_it : variants_list) {

674  return

variants_list;

679

vector<string>

names

;

681  names

.push_back(it.first);

700

it.second->GetStatistics(

out

);

707

it.second->SerializeVariantData(prefix,

out

);

714

it.second->DeserializeAndCheck(prefix,

out

);

721

it.second->List(

out

, only_sv_cols);

728

it.second->ListPositionVectors(

out

);

Debugging functions (internal). Poorly documented, not well written.

Serialization for sparse_vector<>

void ListColumns(CNcbiOstream &out, bool only_sv_cols=false)

void SerializeToDisk(const string &prefix, CNcbiOstream *out=nullptr)

void GetStatistics(CNcbiOstream &out)

void Deserialize(const string &prefix, CNcbiOstream *out=nullptr)

void ListIndexVectors(CNcbiOstream &out)

bool m_LoadAllInfo

Flag to load every INFO field.

pair< CConstRef< objects::CSeq_id >, CRef< CVCFVariantList > > TSeqIdVarsListPair

void x_ProcessCriticalError(objects::CObjReaderLineException &err, objects::ILineErrorListener *error_cont)

vector< string > GetChromosomeNames() const

Returns a vector, holding the chrs/contigs identifiers, read from the file.

map< unsigned, string > m_SampleCols

List of SAMPLE columns parsed from the last line of the header, order is important.

unsigned x_ProcessHeaderLine(const string &header_line, unsigned line_nr, objects::ILineErrorListener *listener)

map< unsigned, string > m_LoadSamples

List of SAMPLES required to be loaded.

function< void(const string &)> TReportProgress

void x_ProcessError(objects::CObjReaderLineException &err, objects::ILineErrorListener *error_cont)

CRef< CVCFVariantList > GetVariantsForChr(const string &chr_name) const

Retrieves the variants list for a given chr/contig.

bool ReadHeader(CNcbiIstream &in, ICanceled *canceled=nullptr, objects::ILineErrorListener *listener=nullptr)

Reads only the header section of the file.

void x_GetSamplesToLoad(const string &header_line, objects::ILineErrorListener *listener, unsigned line_nr)

set< CConstRef< SVcfFieldData > > m_InfoFields

List of INFO fields parsed from the header of the file.

bool m_LoadAllSamples

Flag to load every SAMPLE column.

void x_ProcessWarning(objects::CObjReaderLineException &err, objects::ILineErrorListener *error_cont)

std::function< void(CVCFVariantList &)> TOnVCFVariantListReady

Defines a callable object, used when a variants list is processed by the reader.

void x_InterruptReading()

void x_GatherSampleColNames(const string &header_line, objects::ILineErrorListener *listener, unsigned line_nr)

unordered_map< string, CRef< CVCFVariantList > > m_ChromosomeMap

set< string > m_LoadInfoFields

List of INFO fields required to be loaded.

vector< TSeqIdVarsListPair > ReadVariantsForChrs(CNcbiIstream &in, const vector< pair< CConstRef< objects::CSeq_id >, vector< string >>> &chr_list, ICanceled *canceled=nullptr, objects::ILineErrorListener *listener=nullptr, TReportProgress prog_func=TReportProgress(), TOnVCFVariantListReady on_variants_list_ready=TOnVCFVariantListReady())

Reads a list of variants.

bool ReadData(CNcbiIstream &in, ICanceled *canceled=nullptr, objects::ILineErrorListener *listener=nullptr, TReportProgress prog_func=TReportProgress(), TOnVCFVariantListReady on_variants_list_ready=TOnVCFVariantListReady())

Reads only the data section of the file.

void Throw(void) const

this function to throw this object.

static CObjReaderLineException * Create(EDiagSev eSeverity, unsigned int uLine, const std::string &strMessage, EProblem eProblem=eProblem_GeneralParsingError, const std::string &strSeqId=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), CObjReaderLineException::EErrCode eErrCode=eFormat, const TVecOfLines &vecOfOtherLines=TVecOfLines())

Please use this instead of the constructor because the ctor is protected.

std::string Message() const

Simple implementation of ILineReader for i(o)streams.

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

const string & GetChrName() const

static const string sm_FORMAT

Interface for testing cancellation request in a long lasting operation.

virtual bool PutError(const ILineError &)=0

Store error in the container, and return true if error was stored fine, and return false if the calle...

@ eProblem_GeneralParsingError

string SeverityStr() const

iterator_bool insert(const value_type &val)

const_iterator begin() const

const_iterator end() const

const Uint8 kAsyncVarsThreshold

std::ofstream out("events_result.xml")

main entry point for tests

static const struct name_t names[]

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

@ eDiag_Warning

Warning message.

@ eDiag_Critical

Critical error message.

void Error(CExceptionArgs_Base &args)

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

const string & GetMsg(void) const

Get message string.

void Info(CExceptionArgs_Base &args)

char PeekChar(void) const

Returns the first character of the next string without consuming it.

Uint8 GetLineNumber(void) const

Returns the current line number (counting from 1, not 0).

void Reset(void)

Reset reference object.

TObjectType * Release(void)

Release a reference to the object and return a pointer to the object.

uint64_t Uint8

8-byte (64-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

virtual bool IsCanceled(void) const =0

CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)

Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-sensitive equality of a substring with another string.

bool empty(void) const

Return true if the represented string is empty (i.e., the length is zero)

static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)

Convert UInt to string.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

CTempString substr(size_type pos) const

Obtain a substring from this string, beginning at a given offset.

static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)

Convert UInt8 to string.

Lightweight interface for getting lines of data with minimal memory copying.

Compressed bitset (entry point to bm.h)

std::istream & in(std::istream &in_, double &x_)

Structure to store characteristics of an INFO field It is constructed from an INFO meta-information l...

string m_Name

INFO ID (name)

string m_Description

INFO Description.

SVcfFieldData(const string &line)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4