<
size_t...Ints>
17 usingresult_type = std::array<size_t,
std::max({Ints...}) + 1>;
19 for(
auto& rec:
result) {
23 for(
autorec: {Ints...}) {
31 template<
typename_Finder,
size_tcurrent, auto indices,
typename_Input,
typename_Output>
34constexpr
autopos = indices[current];
36 autonextpos = _Finder{}(
input);
37 if(nextpos == std::string_view::npos)
38nextpos =
input.size();
41std::get<pos>(
output) =
input.substr(0, nextpos);
43 ifconstexpr(current + 1 < indices.size()) {
44 if(nextpos !=
input.size()) {
45ExtractTokensToTupleImpl<_Finder, current + 1, indices>(
input.substr(nextpos + 1),
output);
47 throwstd::runtime_error(
"split_string: failed to fill all elements");
52 template<
typename_Finder, auto indices,
typename_Input,
typename_Output>
55ExtractTokensToTupleImpl<_Finder, 0, indices>(std::forward<_Input>(
input), std::forward<_Output>(
output));
58 template<
typename_CharType, _CharType _C>
61std::size_t
operator()(std::basic_string_view<_CharType> s)
const 67 template<ct::fixed_
string_RE>
71std::size_t
operator()(std::basic_string_view<_CharType> s)
const 73 autom = ctre::search<_RE.
get_array()>(s);
75 returnstd::distance(s.begin(), m.begin());
77 returnstd::basic_string_view<_CharType>::npos;
83std::integer_sequence<size_t, Ints...>,
84std::basic_string_view<
typenamedecltype(_RE)::
char_type>
input,
87static_assert(
sizeof...(Ints) ==
sizeof...(TArgs));
89 using_Finder = std::conditional_t<_RE.
size() == 1,
95std::tuple<std::decay_t<TArgs>&...>
output(std::ref(tokens)...);
97ExtractTokensToTuple<_Finder, indices>(
input,
output);
102std::basic_string_view<
typenamedecltype(_RE)::
char_type>
input,
105extract_tokens<_RE>(std::index_sequence<Ints...>{},
input, std::forward<TArgs>(tokens)...);
142 returnoperator()(*
l, *
r);
144 booloperator ()(std::forward_list<TGffLine>::const_iterator
l, std::forward_list<TGffLine>::const_iterator
r)
const 146 returnoperator()(*
l, *
r);
152 cmp=
l.m_seqid.compare(
r.m_seqid);
156 cmp=
l.m_parent.compare(
r.m_parent);
160 cmp=
l.m_id.compare(
r.m_id);
164 return l.m_line.compare(
r.m_line) < 0;
168 template<
typename_It>
169 staticstd::vector<_It> x_SortGffLines(
size_t size, _It begin, _It end);
171 template<
typename_It>
172 staticstd::shared_ptr<TGffBlob> x_PopulateBlob(std::shared_ptr<CGffSourceImpl>
self, _It begin, _It end);
174 static voidx_GetColumn1and9(TView line, TView&
col1, TView& col9);
175 static boolx_GetParentAndID(TView col9, TView&
id, TView& parent);
176 static boolx_GetGtfGeneId(TView col9, TView& geneid);
179std::vector<TView>& comments,
180std::size_t& all_lines_size,
181std::forward_list<TGffLine>& all_lines,
182TView& fasta)
const;
191: m_file{std::make_shared<
TFile::element_type>()}
193 m_file->Open(filename,
nullptr);
205 if(!ct::inline_bitset<CFormatGuess::eGff3, CFormatGuess::eGtf>.
test(
m_file->m_format))
206 throwstd::runtime_error(
"Wrong file format used");
210 template<
typename_It>
213std::shared_ptr<TGffBlob> new_blob;
214 auto size= std::distance(begin, end);
216new_blob = std::make_shared<TGffBlob>(
self);
218new_blob->m_lines.reserve(
size);
219new_blob->m_seqid = (**begin).m_seqid;
220for_each(begin, end, [
b= new_blob](
autorec) {
221 b->m_lines.push_back(rec->m_line);
227 template<
typename_It>
230std::multiset<_It, TCompareNodes> s_set;
231 for(_It node = begin; node != end; node++) {
235std::vector<_It> sorted; sorted.reserve(
size);
237 for(
auto& node: s_set) {
238sorted.push_back(node);
247 for(
size_t i=0;
i<9;
i++) {
248 autotab_pos = line.find(
'\t');
250col9 = line.substr(0, tab_pos);
253 if(tab_pos == TView::npos)
254 throwstd::runtime_error(
"wrong format");
256 col1= line.substr(0, tab_pos);
257line = line.substr(tab_pos + 1);
259 if(
col1.empty() || col9.empty())
260 throwstd::runtime_error(
"wrong format");
265 autogeneid_search = ctre::search<
"gene_id \"([^\"]+)\"">(col9);
267 autoc = *
std::prev(geneid_search.begin());
268 if(c==
';'|| c==
'\t')
269geneid= geneid_search.get<1>().to_view();
276 autoparent_search = ctre::search<
"Parent=([^;]+)">(col9);
278 autoc = *
std::prev(parent_search.begin());
279 if(c==
';'|| c==
'\t')
280parent = parent_search.get<1>().to_view();
282 autoid_search = ctre::search<
"ID=([^;]+)">(col9);
284 autoc = *
std::prev(id_search.begin());
285 if(c==
';'|| c==
'\t')
286 id= id_search.get<1>().to_view();
292std::vector<TView>& comments,
293std::size_t& all_lines_size,
294std::forward_list<TGffLine>& all_lines,
297std::forward_list<TView> dash_lines;
298 size_tdash_lines_size = 0;
305 while(!current.empty()) {
309current = current.substr(next_pos);
313 if(line[0] ==
'#') {
314dash_lines.push_front(line);
316}
else if(line[0] ==
'>') {
321std::string_view seqid;
323std::string_view parent;
337all_lines.emplace_front(line, seqid, parent,
id);
340current = current.substr(next_pos);
343 if(dash_lines_size) {
344comments.reserve(dash_lines_size);
345dash_lines.reverse();
346 while(!dash_lines.empty()) {
347comments.push_back(dash_lines.front());
348dash_lines.pop_front();
357std::vector<TView> comments;
358std::forward_list<TGffLine> all_lines;
359 size_tall_lines_size;
362 self->x_LoadGffLines(comments, all_lines_size, all_lines, fasta);
364 if(!comments.empty()) {
365 autonew_blob = std::make_shared<TGffBlob>(
self);
367new_blob->m_lines = std::move(comments);
372 if(all_lines_size) {
373 autosorted =
x_SortGffLines(all_lines_size, all_lines.begin(), all_lines.end());
376std::vector<TView> current_lines;
378 autobegin = sorted.begin();
380std::shared_ptr<TGffBlob> new_blob;
382 for(
autoit = sorted.begin(); it< sorted.end(); it++) {
383 if(current_seqid != (**it).m_seqid ) {
387current_seqid = (**it).m_seqid;
395 if(!fasta.empty()) {
396 autonew_blob = std::make_shared<TGffBlob>();
398new_blob->m_lines.push_back(fasta);
412 m_impl= std::make_shared<CGffSourceImpl>(filename);
416 m_impl= std::make_shared<CGffSourceImpl>(
file);
420 m_impl= std::make_shared<CGffSourceImpl>(blob);
@ eGtf
New GTF, CGtfReader.
static bool x_GetGtfGeneId(TView col9, TView &geneid)
static bool x_GetParentAndID(TView col9, TView &id, TView &parent)
static CGffSource::TGenerator ReadBlobs(std::shared_ptr< CGffSourceImpl > self)
static std::vector< _It > x_SortGffLines(size_t size, _It begin, _It end)
CGffSourceImpl(TView blob)
static std::shared_ptr< TGffBlob > x_PopulateBlob(std::shared_ptr< CGffSourceImpl > self, _It begin, _It end)
void x_LoadGffLines(std::vector< TView > &comments, std::size_t &all_lines_size, std::forward_list< TGffLine > &all_lines, TView &fasta) const
CGffSourceImpl(const std::string &filename)
static void x_GetColumn1and9(TView line, TView &col1, TView &col9)
std::shared_ptr< CGffSourceImpl > m_impl
TGenerator ReadBlobs() const
std::shared_ptr< objects::edit::CHugeFile > TFile
void extract_tokens(std::integer_sequence< size_t, Ints... >, std::basic_string_view< typename decltype(_RE)::char_type > input, TArgs &&...tokens)
void ExtractTokensToTupleImpl(_Input &&input, _Output &&output)
void ExtractTokensToTuple(_Input &&input, _Output &&output)
constexpr auto MakeMapping()
Frond-end GFF-familty files readers, including GFF2, GFF3, GTF.
size_t SkipLine(std::string_view blob, std::string_view::size_type start_pos, std::string_view &line)
#define test(a, b, c, d, e)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static SQLCHAR output[256]
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_SCOPE(ns)
Define a new scope.
string_type::value_type char_type
The character type used by the parser.
const struct ncbi::grid::netcache::search::fields::SIZE size
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
constexpr size_t size() const
constexpr const array_type & get_array() const
typename decltype(_RE)::char_type _CharType
std::size_t operator()(std::basic_string_view< _CharType > s) const
std::size_t operator()(std::basic_string_view< _CharType > s) const
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4