<
classContainer>
85 template<
classContainer>
120 "CIGAR extraction only supported or Spliced-seg alignments");
125 boolis_neg =
false;
137(*prev_it)->GetGenomic_start() -
138(*it)->GetGenomic_end() - 1;
142(*it)->GetGenomic_start() -
143(*prev_it)->GetGenomic_end() - 1;
165 for(
const auto& chunk : (*it)->GetParts()) {
166 switch(chunk->Which()) {
239 returnkey1.
items.size() < key2.
items.size();
247 ITERATE(vector<string>, iter, key_toks) {
323item.second =
lookup.GetScore(align, *iter);
332 key.items.push_back(item);
336 if(
count% 100000 == 0) {
340<<
count/ e <<
" alignments/sec)");
352 const string&tmp_path,
365 if((*iter)[0] ==
'-') {
368}
else if((*iter)[0] ==
'+') {
387 "failed to create temporary path");
398 if( &aligns_in == &aligns_out) {
400 "cannot sort into the same container");
403 typedeflist< CRef<CSeq_align> > TAlns;
413 if( &aligns_in == &aligns_out) {
415 "cannot sort into the same container");
418 typedefvector< CRef<CSeq_align> > TAlns;
438vector<string> tmp_volumes;
450aligns.push_back(
val);
472tmp_volumes.push_back(fname);
475<<
": "<< aligns.size() <<
" alignments");
477unique_ptr<CObjectOStream> tmp_os
482 "output stream error");
485*tmp_os << *it->second;
491 if(tmp_volumes.size() && aligns.size()) {
502tmp_volumes.push_back(fname);
505<<
": "<< aligns.size() <<
" alignments");
507unique_ptr<CObjectOStream> tmp_os
512 "output stream error");
515*tmp_os << *it->second;
522 if(tmp_volumes.size()) {
532sorted_output.
Write(*it);
537sorted_output.
Flush();
540 ITERATE(vector<string>, it, tmp_volumes) {
551 boolremove_input_files,
562 typedefvector< AutoPtr<CObjectIStream> > TFiles;
564files.reserve(input_files.size());
565 ITERATE(vector<string>, it, input_files) {
586 if( !(*it)->EndOfData() ) {
598kf.first.second = sa;
605 while( !q.empty() ) {
609sorted_output.
Write(kf.first);
612 if( !(*files[kf.second]).EndOfData() ) {
615(*files[kf.second]) >> *sa;
618kf.first.second = sa;
623files[kf.second].reset();
625<< input_files[kf.second]);
626 if( remove_input_files &&
630<< input_files[kf.second]);
636sorted_output.
Flush();
bool IsReverse(ENa_strand s)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static string s_GetCIGARForSort(CScope &, const CSeq_align &align)
bool Match(const objects::CSeq_align &align)
Match a single alignment.
virtual void Write(const TAlignment &aln)=0
SAlignExtractor m_Extractor
deque< TAlignment > TAlignments
CRef< CAlignFilter > m_Filter
CAlignSort(CScope &scope, string sorting_keys, CRef< CAlignFilter > filter=CRef< CAlignFilter >(), const string &tmp_path="/tmp", size_t memory_limit=0, size_t count_limit=0)
void MergeSortedFiles(const vector< string > &input_files, IAlignSortedOutput &sorted_output, bool remove_input_files=false, bool filtered=false)
void SortAlignments(IAlignSource &align_source, IAlignSortedOutput &sorted_output)
pair< SSortKey, CRef< CSeq_align > > TAlignment
pair< TAlignment, size_t > SKeyAndFile
SSortKey_Less m_Predicate
CLocalAlignSortedOutput(TList &sortedOutput)
virtual size_t GetCountProcessed() const
virtual size_t GetCountEmitted() const
virtual ~CLocalAlignSortedOutput()
virtual void Write(const CAlignSort::TAlignment &aln)
virtual bool EndOfData() const
virtual CRef< CSeq_align > GetNext()
CLocalAlignSource(const TList &aligns)
string GetTraceback(const CSeq_align &align, CSeq_align::TDim row)
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
static Uint8 GetTotalPhysicalMemorySize(void)
Return the amount of actual/total physical memory, in bytes.
virtual bool EndOfData() const =0
virtual CRef< CSeq_align > GetNext()=0
std::ofstream out("events_result.xml")
main entry point for tests
static int lookup(const char *name, const struct lookup_int *table)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
@ eFollowLinks
Follow symbolic links.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
static string NormalizePath(const string &path, EFollowLinks follow_links=eIgnoreLinks)
Normalize a path.
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
static string GetTmpNameEx(const string &dir=kEmptyStr, const string &prefix=kEmptyStr, ETmpFileCreationMode mode=eTmpFileGetName)
Get temporary file name.
bool CreatePath(TCreateFlags flags=fCreate_Default) const
Create the directory path recursively possibly more than one at a time.
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
virtual bool Exists(void) const
Check if directory "dirname" exists.
@ eSerial_AsnBinary
ASN.1 binary.
const string AsFastaString(void) const
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
size_t total
Total memory usage.
static bool GetMemoryUsage(SMemoryUsage &usage)
Get current process memory usage.
position_type GetLength(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
double Restart(void)
Return time elapsed since first Start() or last Restart() call (in seconds).
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
const TExons & GetExons(void) const
Get the Exons member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
const TSegs & GetSegs(void) const
Get the Segs member data.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
constexpr auto sort(_Init &&init)
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::KEY key
Defines process management classes.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
bool operator()(const TAlignment &k1, const TAlignment &k2) const
vector< ESortDir > sort_dirs
pair< string, double > TItem
Process memory usage information, in bytes.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4