nested_id.AppendInteger(data_type);
76 stringlmd = last_modified;
79nested_id.AppendString(lmd);
80 id.AppendNestedCID(nested_id);
81 return id.ToString();
89 if(cache_key.empty())
100 autodata_type = nested_cid.GetFirst(
eCIT_Integer).GetInteger();
141, m_WigId(
data.m_WigId)
142, m_DataType(
data.m_DataType)
143, m_SummaryBinCount(
data.m_SummaryBinCount)
158}
catch(exception& e) {
170 intsummary_size = 0;
171is.read((
char*)&summary_size,
sizeof(
int));
197 boolhas_blob =
false;
200}
catch(exception& e) {
206}
catch(exception& e) {
265request_type =
"summary";
268request_type =
"data";
274 "./rmt_bigwig_summary.sh",
281request_type.c_str(),
296 floatbin_count_pct = 0.9f;
304 if(exit_code != 0) {
307 stringerr_msg = (exit_code == -1) ?
308 "bigwig statistics timed out":
"bigwig statistics failed (exit code: "+
NStr::NumericToString(exit_code);
313bin_count_pct -= 0.1;
318 stringval_max, val_min;
320vector<TValType> data_points;
321constexpr
intbuf_len = 64000;
322data_points.resize(buf_len, 0);
328 while(
NcbiGetline(summary_is, summary,
"\n")) {
331 autov_max = NStr::StringToNumeric<float>(val_max, convFlags);
332 autov_min = NStr::StringToNumeric<float>(val_min, convFlags);
333 floatv = (
abs(v_min) >
abs(v_max)) ? v_min : v_max;
338summary_sv.
import(&data_points[0], batch_size, start);
340fill_n(data_points.begin(), buf_len, 0);
347summary_sv.
import(&data_points[0], batch_size, start);
364 auto len= v_in.size();
366v_out.resize(
len, 0);
370 for(
size_tpos = 0; pos <
len; ++pos) {
385 size_t len= (range.
GetTo() - from) + 1;
386vector<TValType> v_in(
len, 0);
392 for(
size_t i= 0;
i<
len; ++
i) {
413vector<float> raw_data(range.
GetLength(), 0);
418 if(exit_code != 0) {
419 stringerr_msg = (exit_code == -1) ?
420 "bigwig data retrieval timed out":
"bigwig data retrieval failed (exit code: "+
NStr::NumericToString(exit_code);
428 if(curr_line.empty() || curr_line[0] ==
'#')
436 intstart = max<int>(NStr::StringToNumeric<int>(
columns[1]), range.
GetFrom());
437 intstop = NStr::StringToNumeric<int>(
columns[2]);
438 intspan = stop - start;
441 float value= NStr::StringToNumeric<float>(
columns[3]);
443fill_n(raw_data.begin() + pos, span,
value);
461 int len=
static_cast<int>(sv_lay.
size());
467w.write((
const char*)&
len,
sizeof(
len));
471w.write((
const char*)sv_lay.
buf(),
len);
481 throwruntime_error(
"Invalid frames size");
487 autostop = range.
GetTo();
488 while(start <= stop) {
489res.emplace_back(start, min<int>(start + (
chunk_size- 1), stop));
499 autoit =
data.begin();
503 intto = from +
static_cast<int>(
data.size() - 1);
505 while(++it !=
data.end()) {
523 const auto& data_r =
data.GetRange();
524 _ASSERT(data_r.GetLength() > 0);
525 if(data_r.GetLength() == 0)
531 autostart = data_r.GetFrom()/bin_size;
532 autostop = data_r.GetTo()/bin_size;
533 int len= (stop - start) + 1;
534 autofrom = data_r.GetFrom();
536vector<TSparseVector::value_type> data_points;
537constexpr
intbuf_len = 64000;
538data_points.resize(buf_len, 0);
540 intbatch_size = min<int>(buf_len, (stop + 1) - start);
541 m_Data[
"0"]->decode(&data_points[0], start, batch_size);
542 for(
auto i= 0;
i< batch_size; ++
i) {
543 auto f= from +
i* bin_size;
547 data.AddRange(
r, v);
550from += (batch_size * bin_size);
569vector<TSeqRange> chunks;
574 for(
const auto& chunk : chunks) {
575vector<float> data_points;
577 _ASSERT(chunk.GetLength() == data_points.size());
591vector<TSeqRange> missing_ranges;
594 for(
const auto&
r: missing_ranges) {
595 len-=
r.GetLength();
597 floatcoverage =
len;
610vector<TSeqRange> missing_ranges;
612 boolupdate_data = !missing_ranges.empty();
613 if(update_data ==
false)
615vector<TSeqRange> normalized_ranges;
618normalized_ranges.emplace_back(from, to);
619 for(
size_t i= 1;
i< missing_ranges.size(); ++
i) {
620 const auto&
r= missing_ranges[
i];
621 if(
r.GetFrom() > normalized_ranges.back().GetTo()) {
624normalized_ranges.emplace_back(from, to);
625}
else if(
r.GetTo() > normalized_ranges.back().GetTo()) {
627normalized_ranges.back().SetTo(to);
631 for(
auto& range : normalized_ranges) {
635 m_PendingData.emplace(range, async(std::launch::async, [range,
this]{
666 if(!pending_data.
empty()) {
667 for(
auto&
r: pending_data) {
670 Update(d->range, update);
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Debugging functions (internal). Poorly documented, not well written.
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Serialization for sparse_vector<>
Checksum and hash calculation classes.
CChecksum â Checksum calculator.
Pool of recycled CCompoundID objects.
CCompoundID NewID(ECompoundIDClass new_id_class)
Create and return a new CCompoundID objects.
CCompoundID FromString(const string &cid)
Unpack the base64-encoded ID and return a CCompoundID object for field extraction.
void AppendSeqID(const string &seq_id)
Append an eCIT_SeqID field at the end of this compound ID.
The result type for Spawn methods.
void SaveData(CRef< TData > data)
Clones TData, puts it into Save Queue for asynchroneous storage operation.
static CGraphCache & GetInstance()
Guard that can be used to automatically return object to the pool after leaving some scope.
Extended exit information for waited process.
Note about the "buf_size" parameter for streams in this API.
@ fOwnReader
Own the underlying reader.
@ fOwnWriter
Own the underlying writer.
@ ESuggestedAction_Access
try to access the remote file
@ ESuggestedAction_Skip
do not access the file
static void Set(const string &sKey, ESuggestedAction KnownAccessibility)
static ESuggestedAction Check(const string &sKey)
string m_RemotePath
path to remote data or srz accession
time_t m_LastChecked
timestamp: last time when m_LastModified was checked
string m_NetCacheKey
data access key
static CCompoundID CreateCompoundID(objects::CScope &scope, const objects::CSeq_id &seq_id, const string &remote_path)
CSparseGraph.
virtual void Init(TGraphCachePool &icache_pool)
string m_LastModified
timestamp of the remote data last modified date as reported by www server
void Update(const TSeqRange &range, const TUpdateMap &update)
virtual void Save(TGraphCachePool &icache_pool)
mutex m_DataMutex
data access mutex
map< string, unique_ptr< TSparseVector > > m_Data
map of sprasvectors, the key is used as a part of suffix of NetCache key (m_NetCacheKey + "_" + this ...
string m_SeqId
NCBI seq_id, e.g. NC_000001.
size_t m_DataSize
size of each vector of m_Data
void InitFromCompoundID(CCompoundID id)
void GetMissingRegions(const TSeqRange &range, vector< TSeqRange > &missing_ranges)
Writer-based output stream.
int64_t x_DecodeSign(uint64_t v)
map< TSeqRange, future< TPrefetchData > > m_PendingData
void Prefetch(const TSeqRange &range)
virtual void Save(TGraphCachePool &icache_pool) override
GetData serializes the data into CDensityMap for the range provided in @data.
void x_CreateNetCacheKey()
virtual void Init(TGraphCachePool &icache_pool) override
Save is invoked by CGraphCache when cached data needs to be permanently saved into NetCache.
CWigGraph(const string &cache_key)
Copy constructor is invoked by CGraphCache which clones the data for aynchroneous saving operation.
void x_GetBigWigData(const TSeqRange &range, vector< TValType > &v_out)
void x_DecodeDelta(const TSeqRange &range, vector< float > &v_out)
float GetDataCoverage(const TSeqRange &range)
bool x_ReadSummary(ICache &icache, TSparseVector &sv)
void GetData(CHistogramGlyph::TMap &data)
GetDataCoverage returns fraction (0.
int x_RunBigWigScript(const TSeqRange &range, const string tmp_file)
void x_UnPackFloat(TSparseVector::value_type v_in, float &v_out)
void x_AddDataToMap(const vector< float > &data, const TSeqPos from, CHistogramGlyph::TMap &the_map)
future< TPrefetchSummary > m_PendingSummary
unique_ptr< TSparseVector > TPrefetchSummary
void x_SaveSummary(ICache &icache, TSparseVector &sv)
string m_WigId
WigFile id, e.g. chr1.
void x_GetBigWigSummary(TSparseVector &summary_sv)
void x_EncodeDelta(const TSeqRange &range, const vector< float > &v_in, vector< TValType > &v_out)
atomic< bool > m_PrefetchRequested
Number od data points in one Summary point.
static constexpr float kFract
CWigGraph data come in two flavors Summary - low-res representation for the whole sequence length and...
unique_ptr< TPrefetchRequest > TPrefetchData
uint64_t x_EncodeSign(int64_t v)
Encode/Decode negtaive vaues to positive ones.
static const int kMinBinSize
void x_PackFloat(float v_in, TSparseVector::value_type &v_out)
Float values are packed with precision loss (3 decimal places retained) values are conveeted to posti...
struct { TSeqRange range TPrefetchRequest
static string GetCacheKey(objects::CScope &scope, const objects::CSeq_id &seq_id, const string &wig_id, const string &remote_path, const string &last_modified, EDataType data_type)
CWigGraph.
BLOB cache read/write/maintenance interface.
virtual size_t GetSize(const string &key, TBlobVersion version, const string &subkey)=0
Check if BLOB exists, return BLOB size.
virtual IWriter * GetWriteStream(const string &key, TBlobVersion version, const string &subkey, unsigned int time_to_live=0, const string &owner=kEmptyStr)=0
Return sequential stream interface to write BLOB data.
virtual IReader * GetReadStream(const string &key, TBlobVersion version, const string &subkey)=0
Return sequential stream interface to read BLOB data.
succinct sparse vector with runtime compression using bit-slicing / transposition method
bvector_type::size_type size_type
void resize(size_type sz)
resize vector
void import(const value_type *arr, size_type arr_size, size_type offset=0, bool set_not_null=true)
Import list of elements from a C-style array.
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector planes
static const int chunk_size
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const column_t columns[]
unsigned int TSeqPos
Type for sequence locations and lengths.
int TSignedSeqPos
Type for signed sequence position.
string GetHexSum(void) const
Return string with checksum in hexadecimal form.
void AddLine(const char *line, size_t len)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
static CResult SpawnL(EMode mode, const char *cmdname, const char *argv,...)
Spawn a new process with specified command-line arguments.
TProcessHandle GetProcessHandle(void)
Get process handle/pid.
@ eNoWait
Continues to execute calling process concurrently with new process (asynchronous spawn).
static fstream * CreateTmpFile(const string &filename=kEmptyStr, ETextBinary text_binary=eBinary, EAllowRead allow_read=eAllowRead)
Create temporary file and return pointer to corresponding stream.
static string GetTmpName(ETmpFileCreationMode mode=eTmpFileGetName)
Get temporary file name.
@ eTmpFileCreate
Create empty file for each GetTmpName* call.
virtual void AddRange(TSeqRange range, CntType score=1, bool expand=false)
static void GetLastModified(const string &url, string &last_modified)
HTTP: Returns header's Last-Modified in the last_modified parameter FTP: Returns MD5 of first 512 byt...
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
int GetSignal(void) const
Get the signal number that has caused the process to terminate (UNIX only).
int Wait(unsigned long timeout=kInfiniteTimeoutMs, CExitInfo *info=0) const
Wait until process terminates.
bool IsSignaled(void) const
TRUE if the process terminated by a signal (UNIX only).
int GetExitCode(void) const
Get process exit code.
@ eHandle
A process handle (MS Windows).
position_type GetLength(void) const
position_type GetToOpen(void) const
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)
Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
@ fAllowTrailingSpaces
Ignore trailing whitespace characters.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fAllowLeadingSpaces
Ignore leading whitespace characters in converted string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
time_t GetTimeT(void) const
Get time in time_t format.
@ eCurrent
Use current time. See also CCurrentTime.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
unsigned int
A callback function used to compare two keys in a database.
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Defines a portable execute class.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Reader-writer based streams.
layout class for serialization buffer structure
const unsigned char * buf() const noexcept
Return serialization buffer pointer.
size_t size() const noexcept
return current serialized size
static const int kDeltaFrameSize
static TSeqPos s_AlignToFrame(TSeqPos pos)
static void s_SplitRange(const TSeqRange &range, int frames, vector< TSeqRange > &res)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4