CFast5colReaderIndexer
42 size_tm_current_pos = 0;
43std::string_view m_whole_file;
44 size_tm_current_lineno = 0;
45 size_tm_prev_line_pos = 0;
46ncbi::CFast5colReader::CMemBlockInfo m_current_block;
47std::list<ncbi::CFast5colReader::CMemBlockInfo>* m_blocks =
nullptr;
48 size_tGetNextLine(std::string_view&
next);
49 voidMakeNewBlock(
size_tpos, std::string_view seqid);
52 voidCFast5colReaderIndexer::MakeNewBlock(
size_tpos, std::string_view seqid)
54 if(m_current_block.line_no) {
55m_current_block.size = pos - m_current_block.start_pos;
56m_blocks->push_back(m_current_block);
57m_current_block.index++;
58m_current_block.start_pos = pos;
60m_current_block.seqid = seqid;
61m_current_block.line_no = m_current_lineno;
64 size_tCFast5colReaderIndexer::GetNextLine(std::string_view& next_line)
66 if(m_current_pos>=m_whole_file.size())
67 returnstd::string_view::npos;
69 autonl_pos = m_whole_file.find(
'\n', m_current_pos);
70 if(nl_pos == m_whole_file.npos)
71nl_pos = m_whole_file.size()-1;
73 while(cr_pos>0 && m_whole_file[cr_pos-1]==
'\r')
77next_line = m_whole_file.substr(m_current_pos, cr_pos-m_current_pos);
78 size_tstart = m_current_pos;
79m_current_pos = nl_pos + 1;
96m_reader_flags = reader_flags;
100 if(genome_center_id.empty())
101m_seqid_prefix.clear();
103m_seqid_prefix =
"gnl|"+ genome_center_id +
":";
110 auto file= std::make_unique<TFile>();
111 file->Open(filename,
nullptr);
112Open(std::move(
file));
117m_hugefile = std::move(
file);
118x_IndexFile(std::string_view(m_hugefile->m_memory, m_hugefile->m_filesize));
128std::list<CRef<CSeq_annot>> annots;
135 autorange = m_blocks_map.equal_range(label_with_ver);
136 if(range.first == range.second) {
139 if(label_nover != label_with_ver)
140range = m_blocks_map.equal_range(label_nover);
148std::list<CRef<CSeq_annot>> annots;
150 autorange = x_FindAnnots(seqid);
152 for(
autoit = range.first; it != range.second; ++it)
154 if(m_used_annots.reset(it->second->index)) {
156 CMemoryLineReaderline_reader(m_memory.data()+it->second->start_pos, it->second->size);
159line_reader, m_reader_flags, m_logger);
161 if(annot && annot->IsFtable() && annot->GetData().GetFtable().size() > 0) {
162annots.push_back(annot);
172std::list<CRef<CSeq_id>> seqids;
174 autorange = x_FindAnnots(seqid);
176 for(
autoit = range.first; it != range.second; ++it)
180seqids.splice(seqids.end(), ids);
191CFast5colReaderIndexer
state;
192 state.m_blocks = &m_blocks;
193 state.m_whole_file = memory;
195std::string_view next_line;
197 while((start =
state.GetNextLine(next_line)) != std::string_view::npos)
199 if(next_line[0]==
'>') {
205 state.MakeNewBlock(start, seqid);
209 state.MakeNewBlock(memory.size(), {});
211 for(
auto& rec: m_blocks)
215 for(
auto id: ids) {
216 std::stringlabel_nover, label_with_ver, label_only;
225m_blocks_map.emplace(std::make_pair(label_with_ver, &rec));
226 if(label_nover != label_with_ver)
227m_blocks_map.emplace(std::make_pair(label_nover, &rec));
229 if(!m_seqid_prefix.empty()) {
230 if(rec.seqid.find(
'|') == std::string_view::npos || id->IsLocal())
231m_blocks_map.emplace(std::make_pair(m_seqid_prefix + label_only, &rec));
234 if(rec.seqid.find(
'|') == std::string_view::npos && !id->IsLocal())
235m_blocks_map.emplace(std::make_pair(
"lcl|"+ label_only, &rec));
239m_used_annots.Init(m_blocks.size());
void Open(const std::string &filename)
std::list< CRef< objects::CSeq_id > > FindAnnots(CRef< objects::CSeq_id > seqid) const
void Init(const std::string &genome_center_id, long reader_flags, TLogger *logger=nullptr)
TRange x_FindAnnots(CRef< objects::CSeq_id > id) const
std::list< CRef< objects::CSeq_annot > > GetAndUseAnnot(CRef< objects::CSeq_id > seqid) override
~CFast5colReader() override
void x_IndexFile(std::string_view memory)
std::pair< TContainer::const_iterator, TContainer::const_iterator > TRange
static bool ParseInitialFeatureLine(const CTempString &line_arg, CTempStringEx &out_seqid, CTempStringEx &out_annotname)
If line_arg is a feature line (e.g.
CRef< CSeq_annot > ReadSequinFeatureTable(const TFlags flags=0, ITableFilter *filter=nullptr, const string &seqid_prefix=kEmptyStr)
Simple implementation of ILineReader for regions of memory (such as memory-mapped files).
objects::ILineErrorListener TLogger
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
@ fLabel_Version
Show the version.
@ fParse_RawText
Try to ID raw non-numeric accessions.
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
@ eContent
Untagged human-readable accession or the like.
@ eBoth
Type and content, delimited by a vertical bar.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string & ToLower(string &str)
Convert string to lower case â string& version.
void Stop(void)
Suspend the timer.
void Start(void)
Start the timer.
list< CRef< CSeq_id > > TId
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4