);
118 const size_tnum =
static_cast<size_t>(numrows) * numsegs;
122 "CDense_seg::CheckNumSegs(): " 127 "CDense_seg::CheckNumSegs(): " 130 if(starts.size() != num) {
132 "CDense_seg::CheckNumSegs(): " 133 "starts.size is inconsistent with dim * numseg");
135 if(lens.size() !=
size_t(numsegs)) {
137 "CDense_seg::CheckNumSegs(): " 138 "lens.size is inconsistent with numseg");
140 if(strands.size() && strands.size() != num) {
142 "CDense_seg::CheckNumSegs(): " 143 "strands.size is inconsistent with dim * numseg");
145 if(widths.size() && widths.size() !=
size_t(numrows)) {
147 "CDense_seg::CheckNumSegs(): " 148 "widths.size is inconsistent with dim");
160 "CDense_seg::GetSeq_id(): " 161 "can not get seq-id for the row requested.");
171 if(row < 0 || row >= dim) {
173 "CDense_seg::GetSeqStart(): " 174 "Invalid row number");
180 intpos = (seg - 1) * dim +
row;
182 if((start = starts[pos]) >= 0) {
190 while(++seg < numseg) {
191 if((start = starts[pos]) >= 0) {
198 "CDense_seg::GetSeqStart(): " 209 if(row < 0 || row >= dim) {
211 "CDense_seg::GetSeqStop(): " 212 "Invalid row number");
219 while(++seg < numseg) {
220 if((start = starts[pos]) >= 0) {
221 returnstart +
GetLens()[seg] - 1;
227 intpos = (seg - 1) * dim +
row;
229 if((start = starts[pos]) >= 0) {
230 returnstart +
GetLens()[seg] - 1;
236 "CDense_seg::GetSeqStop(): " 243 const size_t& strands_size =
GetStrands().size();
245 if( !strands_size ) {
250 if(strands_size < (
size_t) dim) {
259 "CDense_seg::GetSeqStrand(): " 260 "Invalid strands size");
263 if(row < 0 || row >= dim) {
265 "CDense_seg::GetSeqStrand(): " 266 "Invalid row number");
289 const size_t max= numrows * (numsegs -1);
291 boolstrands_exist = !strands.empty();
293 size_tnumseg = 0, numrow = 0,
offset= 0;
294 for(numrow = 0; numrow < numrows; numrow++) {
296 bool plus= strands_exist ?
306 for(numseg = 0; numseg < numsegs; numseg++) {
307start = starts[
offset+ numrow];
309 if(start < min_start) {
310 stringerrstr =
string(
"CDense_seg::Validate():")
311+
" Starts are not consistent!" 314numsegs - 1 - numseg) +
322lens[
plus? numseg : numsegs - 1 - numseg] *
323(widths.size() == numrows ?
332 if(min_start == -1) {
333 stringerrstr =
string(
"CDense_seg::Validate():")
348list<TSignedSeqRange> delete_ranges;
355 for(j = 0; j <
GetDim(); ++j) {
357 if(this_start == -1) {
362 if(
GetDim() - count_gaps > 1) {
379 for(j = 0; j <
GetDim(); ++j) {
381 if(this_start == -1) {
386 if(
GetDim() - count_gaps > 1) {
396list<TSignedSeqRange>::reverse_iterator iter = delete_ranges.rbegin();
397list<TSignedSeqRange>::reverse_iterator end = delete_ranges.rend();
398 for( ; iter != end; ++iter) {
400 if(
r.GetLength() == 0) {
438vector<bool> can_merge(
GetNumseg() - 1,
true);
439 unsigned intmerge_count = 0;
442 for(j = 0; j <
GetDim(); ++j) {
447 if( (this_start == -1 && next_start != -1)
448|| (this_start != -1 && next_start == -1) ) {
449can_merge[
i] =
false;
455 if(this_start != -1 && next_start != -1) {
463 if(this_start + seg_len != next_start) {
464can_merge[
i] =
false;
475can_merge[
i] =
false;
480 if(can_merge[
i]) {
485 if(merge_count == 0) {
494new_lens.reserve(
GetNumseg() - merge_count);
499 if(
i> 0 && can_merge[
i- 1]) {
503 for(j = 0; j <
GetDim(); ++j) {
505new_starts[new_starts.size() -
GetDim() + j] =
512new_lens.push_back(
GetLens()[
i]);
513 for(j = 0; j <
GetDim(); ++j) {
536 #define IDX(_x,_y) (((_x)*GetDim())+(_y)) 537 boolswaps_made =
false;
542 boolcurr_gap =
false, next_gap =
false;
543 intcurr_seq_row =
GetDim()+1, next_seq_row =
GetDim()+1;
544 for(
intj=0; j <
GetDim(); ++j) {
548curr_seq_row =
min(curr_seq_row, j);
552next_seq_row =
min(next_seq_row, j);
554 if(!(curr_gap & next_gap))
560 if(next_seq_row < curr_seq_row) {
561 for(
intj=0; j <
GetDim(); ++j) {
570}
while(swaps_made);
584vector<bool>
remove(numseg,
true);
585 unsigned intremove_count = 0;
586 for(
i= 0;
i< numseg; ++
i) {
588 for(j = 0; j < dim; ++j) {
600 if(remove_count == 0) {
608new_starts.reserve((numseg - remove_count) * dim);
609new_lens.reserve(numseg - remove_count);
611new_strands.reserve((numseg - remove_count) * dim);
613 for(
i= 0;
i< numseg; ++
i) {
616new_lens.push_back(
GetLens()[
i]);
617 for(j = 0; j < dim; ++j) {
618new_starts.push_back(
GetStarts()[
i* dim + j]);
620new_strands.push_back(
GetStrands()[
i* dim + j]);
663CDense_seg::TLens::iterator
f=
SetLens().begin();
664CDense_seg::TLens::iterator
r=
SetLens().end();
666 swap(*(
f++), *(--
r));
676 swap(starts[
f+
i], starts[
r+
i]);
688 if(row1 >=
GetDim() || row1 < 0 ||
689row2 >=
GetDim() || row2 < 0) {
691 "Row numbers supplied to CDense_seg::SwapRows must be " 692 "in the range [0, dim)");
701 for(
int i= 0;
i< idxStop;
i+=
GetDim()) {
707 for(
int i= 0;
i< idxStop;
i+=
GetDim()) {
723 for(seg = 0; seg <
GetNumseg() && !found; ++seg) {
727 if(pos >= start && pos < start +
len) {
734 "CDense_seg::x_FindSegment(): " 735 "Can't find a segment containing position "+
749 if(row < 0 || row >=
GetDim()) {
751 "CDense_seg::ExtractSlice():" 752 " Invalid row number (" 761 "CDense_seg::ExtractSlice(): " 763 ") off end of alignment");
767 "CDense_seg::ExtractSlice(): " 769 ") off end of alignment");
790 swap(startOffset, stopOffset);
791 swap(startSeg, stopSeg);
801start += startOffset;
813 if(seg == startSeg) {
816 if(seg == stopSeg) {
844new_ds->
SetDim(
static_cast<TDim>(rows.size()));
848new_ds->
SetIds().reserve(rows.size());
857 if(*row < 0 || *row >= dim) {
859 "CDense_seg::ExtractRows():" 860 " Invalid row number (" 866new_ds->
SetIds().push_back(id_copy);
868 for(
TNumsegsegnum = 0; segnum < numseg; ++segnum) {
871 intidx = segnum * dim + *
row;
893 if(
offset== 0)
return;
899++seg, pos +=
GetDim()) {
904 "Negative offset greater than seq position");
913++seg, pos +=
GetDim()) {
936}
while(++seq_loc_i);
940 if(ttl_loc_len < row_stop + 1) {
941 stringerrstr(
"CDense_seg::RemapToLoc():" 942 " Seq-loc is not long enough to" 943 " cover the alignment!" 944 " Maximum row seq pos is ");
946errstr +=
". The total seq-loc len is only ";
948errstr +=
", it should be at least ";
950errstr +=
" (= max seq pos + 1).";
972 size_tidx = loc_plus ?
row: (numsegs - 1) * numrows +
row;
973 TNumsegseg = loc_plus ? 0 : numsegs - 1;
974 while(loc_plus ? seg <
GetNumseg() : seg >= 0) {
975 if(starts[idx] == -1) {
978idx += numrows; seg++;
980idx -= numrows; seg--;
986 if((loc_plus == row_plus ?
987starts[idx] : ttl_loc_len - starts[idx] - lens[seg])
988> len_so_far + loc_len) {
996 "CDense_seg::RemapToLoc():" 1003 "CDense_seg::RemapToLoc():" 1004 " The strand should be the same accross" 1005 " the input seq-loc");
1010 if(loc_plus == row_plus) {
1011 SetStarts()[idx] += start - len_so_far;
1014start - len_so_far + ttl_loc_len - starts[idx] - lens[seg];
1017 if(lens[seg] >
len) {
1026 "CDense_seg::RemapToLoc():" 1027 " Internal error");
1032(loc_plus ? seg : seg + 1),
1034 SetLens()[loc_plus ? seg + 1 : seg] = len_diff;
1037 TStartstemp_starts(numrows, -1);
1038 for(
introw_i = 0, tmp_idx = seg * numrows;
1039row_i < numrows; ++row_i, ++tmp_idx) {
1041 if(this_start != -1) {
1042temp_starts[row_i] = this_start;
1044 if(
row== row_i) {
1045temp_starts[row_i] = start;
1047temp_starts[row_i] +=
len;
1050this_start += len_diff;
1055len_so_far += loc_len;
1059(loc_plus ? seg + 1 : seg) * numrows,
1060temp_starts.begin(), temp_starts.end());
1062 if(strands.size()) {
1065strands.begin(), strands.begin() + numrows);
1070 if((len_diff = lens[seg] -
len) > 0) {
1072idx += numrows; seg++;
1074idx -= numrows; seg--;
1085idx += numrows; seg++;
1087idx -= numrows; seg--;
1092 if( !ignore_strand ) {
1093 if(loc_plus != row_plus) {
1094 if(!strands.size()) {
1098 for(seg = 0, idx =
row;
1099seg <
GetNumseg(); seg++, idx += numrows) {
1119 boolstrands_exist = !strands.empty();
1130vector<TNumseg> extra_segs;
1141new_ds->
SetDim(numrows);
1143new_numsegs = numsegs;
1146new_ids.resize(numrows);
1150new_ids[
row] = id;
1156vector<TSignedSeqPos> expected_positions;
1157expected_positions.resize(numrows, -1);
1160 plus.resize(numrows,
true);
1161 if(strands_exist) {
1170 TNumsegidx = 0, new_idx = 0, extra_idx = 0;
1173 for(seg = 0; seg < numsegs; seg++) {
1183 if(expected_pos >= 0) {
1187extra_len = start - expected_pos;
1189extra_len = expected_pos - start -
len;
1192 if(extra_len < 0) {
1193 stringerrstr(
"CDense_seg::AddUnalignedSegments():" 1194 " Illegal overlap at Row ");
1196errstr +=
" Segment ";
1201}
else if(extra_len > 0) {
1203extra_segs.push_back(seg);
1204extra_lens.push_back(extra_len);
1205extra_starts.resize(extra_idx + numrows, -1);
1206extra_starts[extra_idx +
row] =
1209extra_idx += numrows;
1216expected_pos = start +
len;
1218expected_pos = start;
1225new_numsegs = numsegs + extra_numsegs;
1226new_lens.resize(numsegs + extra_numsegs);
1227new_starts.resize(numrows * new_numsegs);
1228 for(seg = 0, new_seg = 0, extra_seg = 0,
1229idx = 0, new_idx = 0, extra_idx = 0;
1234 if(extra_numsegs) {
1235 for( ; size_t(extra_seg) < extra_segs.size() && extra_segs[extra_seg] == seg; ++new_seg, ++extra_seg) {
1236new_lens[new_seg] = extra_lens[extra_seg];
1237 for(
row= 0;
row< numrows; ++
row, ++new_idx, ++extra_idx) {
1238new_starts[new_idx] = extra_starts[extra_idx];
1244new_lens[new_seg] = lens[seg];
1246new_starts[new_idx++] = starts[idx++];
1253 if(strands_exist) {
1255 for(new_seg = 0; new_seg < new_numsegs; new_seg++) {
1256 for(
row= 0;
row< numrows;
row++, new_idx++) {
1275 const string& transcript )
1278 boolquery_strand_specific =
1280 boolsubj_strand_specific =
1283 if(!query_strand_specific || !subj_strand_specific) {
1303string::const_iterator ib = transcript.begin();
1304string::const_iterator ie = transcript.end();
1305string::const_iterator ii = ib;
1306 unsigned charseg_type;
1307 static const char* badsymerr =
"Unknown or unsupported transcript symbol";
1308 charc = ii != ie ? *ii++ : 0;
1309 if(c ==
'M'|| c ==
'R') {
1314 else if(c ==
'I') {
1318 else if(c ==
'D') {
1325 "Empty transcript");
1335 if(
isalpha((
unsigned char) c)) {
1337 if(seg_type == 0 && (c ==
'M'|| c ==
'R')) {
1341 else if(seg_type == 1 && c ==
'I') {
1344 else if(seg_type == 2 && c ==
'D') {
1352starts.push_back(seg_type == 1? (
TSeqPos)-1: query_start + query_close);
1353strands.push_back(query_strand);
1357starts.push_back(seg_type == 2? (
TSeqPos)-1: subj_start + subj_close);
1358strands.push_back(subj_strand);
1361 case0: seg_len = pos1 - start1;
break;
1362 case1: seg_len = pos2 - start2;
break;
1363 case2: seg_len = pos1 - start1;
break;
1365lens.push_back(seg_len);
1372 if(c ==
'M'|| c ==
'R'){
1377 else if(c ==
'I') {
1381 else if(c ==
'D') {
1395 if(!
isdigit((
unsigned char) c)) {
1398 "Alignment transcript corrupt");
1402 while(ii < ie &&
isdigit((
unsigned char)(*ii))) {
1403 len= 10*
len+ *ii -
'0';
1408 case0: pos1 +=
len; pos2 +=
len;
break;
1409 case1: pos2 +=
len;
break;
1410 case2: pos1 +=
len;
break;
1416starts.push_back(seg_type == 1? (
TSeqPos)-1: query_start + query_close);
1417strands.push_back(query_strand);
1420starts.push_back(seg_type == 2? (
TSeqPos)-1: subj_start + subj_close);
1421strands.push_back(subj_strand);
1425 case0: seg_len = pos1 - start1;
break;
1426 case1: seg_len = pos2 - start2;
break;
1427 case2: seg_len = pos1 - start1;
break;
1429lens.push_back(seg_len);
1440 "Invalid row number in CreateRowSeq_interval(): "+
1449 for(
intseg = 0; seg <
GetNumseg(); seg++) {
1455 if(
TSeqPos(start) < from) {
1459 if(start +
len> to) {
1471 "Can not convert row to seq-interval - invalid from/to value");
1474ret->
SetTo(to - 1);
1477 if(plus_len >= minus_len*2) {
1481 else if(plus_len*2 < minus_len) {
1513ds.
SetLens().reserve(numseg);
1522 catch( bad_alloc&
) {
1558 catch( bad_alloc&
) {
1561DefaultRead(
in, member);
1568 return type.FindMember(
"starts");
1575x_GetMember().SetLocalReadHook(
in, hook);
1582x_GetMember().SetGlobalReadHook(hook);
1595 catch( bad_alloc&
) {
1598DefaultRead(
in, member);
1605 return type.FindMember(
"lens");
1612x_GetMember().SetLocalReadHook(
in, hook);
1619x_GetMember().SetGlobalReadHook(hook);
1632 catch( bad_alloc&
) {
1635DefaultRead(
in, member);
1642 return type.FindMember(
"strands");
1649x_GetMember().SetLocalReadHook(
in, hook);
1656x_GetMember().SetGlobalReadHook(hook);
#define ASSERT_CONSISTENCY()
NCBI_PARAM_DECL(bool, OBJECTS, DENSE_SEG_RESERVE)
NCBI_PARAM_DEF_EX(bool, OBJECTS, DENSE_SEG_RESERVE, true, eParam_NoThread, OBJECTS_DENSE_SEG_RESERVE)
bool IsReverse(ENa_strand s)
static char * s_Reserve(size_t size, CSimpleBufferT< char > &buffer)
static void SetHook(CObjectIStream &in)
static void SetGlobalHook(void)
static CObjectTypeInfoMI x_GetMember(void)
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
static void SetHook(CObjectIStream &in)
static void SetGlobalHook(void)
static CObjectTypeInfoMI x_GetMember(void)
static void SetHook(CObjectIStream &in)
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
static CObjectTypeInfoMI x_GetMember(void)
static void SetGlobalHook(void)
virtual void PreReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
Return true to invoke default reading method afterwards.
void TrimEndGaps()
Trim leading/training gaps if possible.
CRef< CDense_seg > ExtractRows(const vector< TDim > &rows) const
Extract specified rows of the alignment, in specified order.
const CSeq_id & GetSeq_id(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
CRef< CSeq_interval > CreateRowSeq_interval(TDim row) const
TSeqPos GetSeqStop(TDim row) const
void Reverse(void)
Reverse the segments' orientation.
static void SetGlobalReserveHooks(void)
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
void RemovePureGapSegs()
Remove any segments in which every row has a gap (these can arise when ExtractRows is used)
TNumseg CheckNumSegs(void) const
void SwapRows(TDim row1, TDim row2)
Swap two rows (changing *order*, not content)
TSeqPos GetSeqStart(TDim row) const
void OrderAdjacentGaps()
Order adjacent gaps, so that the side with sequence is in row-decending order.
void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)
Initialize from pairwise alignment transcript (a string representation produced by CNWAligner)
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
TDim CheckNumRows(void) const
TNumseg x_FindSegment(TDim row, TSignedSeqPos pos) const
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any,...
void Compact()
Join adjacent mergeable segments to create a more compact alignment.
void Validate(bool full_test=false) const
void RemapToLoc(TDim row, const CSeq_loc &loc, bool ignore_strand=false)
static void SetReserveHooks(CObjectIStream &in)
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
const TWidths & GetWidths(void) const
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
Base class for all serializable objects.
static const char si[8][64]
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
ESerialRecursionMode
How to assign and compare child sub-objects of serial objects.
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
TRange GetRange(void) const
Get the range.
ENa_strand GetStrand(void) const
static C * Get(const CTypesIterator &it)
TMemberIndex GetMemberIndex(void) const
Get index of the member in the class.
const CObjectInfo & GetClassObject(void) const
Get containing class data.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
position_type GetLength(void) const
CRange< TSignedSeqPos > TSignedSeqRange
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetLens(void) const
lengths in ids order within segs Check if a value has been assigned to Lens data member.
TLens & SetLens(void)
Assign a value to Lens data member.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
vector< ENa_strand > TStrands
TDim & SetDim(void)
Assign a value to Dim data member.
const TLens & GetLens(void) const
Get the Lens member data.
vector< TSignedSeqPos > TStarts
void SetDim(TDim value)
Assign a value to Dim data member.
vector< CRef< CSeq_id > > TIds
TDim GetDim(void) const
Get the Dim member data.
TNumseg & SetNumseg(void)
Assign a value to Numseg data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
bool IsSetStarts(void) const
start OFFSETS in ids order within segs Check if a value has been assigned to Starts data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
const TIds & GetIds(void) const
Get the Ids member data.
bool CanGetStrands(void) const
Check if it is safe to call GetStrands method.
TNumseg GetNumseg(void) const
Get the Numseg member data.
TIds & SetIds(void)
Assign a value to Ids data member.
const TStrands & GetStrands(void) const
Get the Strands member data.
bool IsSetIds(void) const
sequences in order Check if a value has been assigned to Ids data member.
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsWhole(void) const
Check if variant Whole is selected.
void SetStrand(TStrand value)
Assign a value to Strand data member.
@ eNa_strand_both
in forward orientation
const struct ncbi::grid::netcache::search::fields::SIZE size
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
#define row(bind, expected)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4