ncbi::objtools::hugeasn;
15start_pos = blob.find(
'>', start_pos);
16 if(start_pos == std::string_view::npos || blob[start_pos-1] ==
'\n')
24 if(line.size()>1 && line[0] ==
'>'&& line[1] ==
'?') {
26ncbi::NStr::StringToNumeric(line.substr(2), &gapsize);
35 size_tcurrent_pos = 0;
37 boolis_data_segment =
true;
38std::string_view defline;
43 while(current_pos != std::string_view::npos &&
44current_pos + 1 <= blob.size() &&
45blob[current_pos+1] ==
'?') {
47is_data_segment =
false;
50 if(current_pos == std::string_view::npos)
53 if(is_data_segment || !is_delta_seq)
57is_data_segment =
true;
63 if(
buffer.size() < max_allowed && lit.size()) {
64 size_tcan_add = max_allowed -
buffer.size();
65can_add =
std::min(can_add, lit.size());
66 autoto_copy = lit.substr(0, can_add);
68lit = lit.substr(can_add);
71 return buffer.size() == max_allowed;
81 usingTView = std::string_view;
87 size_t lineno= start_line;
89 if(current[0] ==
'\n') {
90co_yield std::make_pair(
lineno++, TView{});
91current = current.substr(1);
93 while(!current.empty()) {
94 autonext_nl = current.find(
'\n');
95 if(next_nl == TView::npos) {
96next_nl = current.size();
98 autonewline_size = (current[next_nl-1] ==
'\r') ? next_nl-1 : next_nl;
99 autoto_yield = current.substr(0, newline_size);
100co_yield std::make_pair(
lineno++, to_yield);
101current = current.substr(next_nl+1);
125 static voidxTrimBlob(TFastaBlob& blob, TView current);
135m_file.reset(
newTFile::element_type);
136m_file->Open(filename,
nullptr);
138 throwstd::runtime_error(
"Wrong file format used");
139m_blob =
TView(m_file->m_memory, m_file->m_filesize);
145 throwstd::runtime_error(
"Wrong file format used");
152 self->m_flags =
flags;
154std::shared_ptr<TFastaBlob> prev_blob;
156 autocurrent =
self->m_blob;
158 while(!current.empty()) {
159 if(current[0] !=
'>') {
160 throwstd::runtime_error(
"No defline start found");
165 if(next_nl == TView::npos) {
166 throwstd::runtime_error(
"Defline end was not found");
169current = current.substr(next_nl);
172 autonext_blob = current.substr(0, next_defline_pos);
173 autoto_yield = std::make_shared<TFastaBlob> (
self);
174to_yield->m_defline = defline;
175to_yield->m_data = next_blob;
177current = current.substr(next_defline_pos);
184 autoit = current.begin();
185 size_t len= std::distance(blob.
m_data.begin(), it);
188 len= std::distance(blob.
m_blob.begin(), it);
196 self->m_flags =
flags;
198std::shared_ptr<TFastaBlob> prev_blob;
200 autocurrent =
self->m_blob;
205blob.m_blob = blob.m_data = current;
207 boolinside_deltaseq =
false;
209 while(!current.empty()) {
213 if(!line.empty() && line[0] ==
'>') {
214 if(line.size()>1 && line[1] ==
'?') {
215inside_deltaseq =
true;
218 if(
flags.test(IsDeltaSeq) && inside_deltaseq) {
219inside_deltaseq =
false;
221inside_deltaseq =
false;
222 if(blob.m_numlines>0) {
224co_yield std::make_shared<TFastaBlob>(blob);
226blob.m_defline = line;
227blob.m_blob = current;
228current = current.substr(nextnl);
229blob.m_data = current;
230blob.m_lineno_handicap =
lineno-1;
231blob.m_seq_length = 0;
237 if(line.size()>0 && line[0] !=
'>') {
238blob.m_seq_length += line.size();
241current = current.substr(nextnl);
243 if(blob.m_numlines>0) {
245co_yield std::make_shared<TFastaBlob>(blob);
262 m_impl= std::make_shared<CFastaSourceImpl>(filename);
267 m_impl= std::make_shared<CFastaSourceImpl>(
file);
272 m_impl= std::make_shared<CFastaSourceImpl>(blob);
CFastaSourceImpl(TView blob)
std::atomic< bool > m_blobs_loaded
ParseFlagsSet Flags() const
static Generator< std::shared_ptr< TFastaBlob > > ReadBlobsQuick(std::shared_ptr< CFastaSourceImpl > self, ParseFlagsSet flags)
std::atomic< bool > m_is_loading
CFastaSource::TFile TFile
CFastaSource::TView TView
static void xTrimBlob(TFastaBlob &blob, TView current)
static Generator< std::shared_ptr< TFastaBlob > > ReadBlobs(std::shared_ptr< CFastaSourceImpl > self, ParseFlagsSet flags)
CFastaSourceImpl(const std::string &filename)
std::shared_ptr< CFastaSourceImpl > m_impl
std::shared_ptr< objects::edit::CHugeFile > TFile
CFastaSource(const std::string &filename)
Generator< std::shared_ptr< TFastaBlob > > ReadBlobs()
Generator< std::shared_ptr< TFastaBlob > > ReadBlobsQuick()
ct::const_bitset< int(ParseFlags::MaxValue), ParseFlags > ParseFlagsSet
@ eFasta
FASTA format sequence record, CFastaReader.
std::optional< size_t > x_CheckGapEstimation(std::string_view line)
size_t x_FindNextDefline(std::string_view blob, size_t start_pos)
Generator< std::pair< size_t, std::string_view > > BasicReadLinesGenerator(size_t start_line, std::string_view blob)
size_t x_FindNextTrueDefline(std::string_view blob, bool is_delta_seq)
bool x_FillLiteralBuffer(size_t max_allowed, std::string &buffer, std::string_view &lit)
size_t SkipLine(std::string_view blob, std::string_view::size_type start_pos, std::string_view &line)
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_SCOPE(ns)
Define a new scope.
ParseFlagsSet Flags() const
std::shared_ptr< CFastaSourceImpl > m_impl
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4