v =
NCBI_PARAM_TYPE(WGS, USE_AMBIGUITY_MASK)::GetDefault();
141 static boolv =
NCBI_PARAM_TYPE(WGS, USE_AMBIGUITY_4NA)::GetDefault();
153 static boolv =
NCBI_PARAM_TYPE(WGS, USE_FULL_4NA_BLOCKS)::GetDefault();
206 #ifdef COLLECT_PROFILE 212SProfiler() : name(0),
count(0) {}
215cout << name<<
" calls: "<<
count<<
" time: "<<
sw.
Elapsed()<<endl;
218 structSProfilerGuard
221SProfilerGuard(SProfiler&
sw,
const char* name)
234 staticSProfiler sw_Serialize;
235 staticSProfiler sw_Feat;
236 staticSProfiler sw_GetAccSeq_id;
237 staticSProfiler sw_GetBioseq;
238 staticSProfiler sw_GetSeq_entry;
239 staticSProfiler sw_GetSeq_entryData;
240 staticSProfiler sw_GetSplitInfo;
241 staticSProfiler sw_GetSplitInfoData;
242 staticSProfiler sw_InitSplit;
243 staticSProfiler sw_GetFeatLocIdTypeRange;
244 staticSProfiler sw_GetFeatLocIdTypeFeat;
245 staticSProfiler sw_GetFeatLocIdTypeFeatBytes;
246 staticSProfiler sw_GetFeatBytes;
247 staticSProfiler sw_GetChunk;
248 staticSProfiler sw_CreateQualityChunk;
249 staticSProfiler sw_CreateDataChunk;
250 staticSProfiler sw_CreateProductsChunk;
251 staticSProfiler sw_CreateFeaturesChunk;
252 staticSProfiler sw__GetProtFeat;
253 staticSProfiler sw___GetProtAnnot;
254 staticSProfiler sw___GetProtInst;
255 staticSProfiler sw___GetProtDescr;
256 staticSProfiler sw____GetProtWGSAcc;
257 staticSProfiler sw____GetProtAccVer;
258 staticSProfiler sw____GetProtAcc;
259 staticSProfiler sw____GetProtGI;
260 staticSProfiler sw____GetProtGISeq_id;
261 staticSProfiler sw____GetProtGnlSeq_id;
262 staticSProfiler sw____GetProtAccSeq_id;
263 staticSProfiler sw___GetProtIds;
264 staticSProfiler sw__GetProtBioseq;
265 staticSProfiler sw_GetProtEntry;
266 staticSProfiler sw__GetScaffoldFeat;
267 staticSProfiler sw___GetScaffoldQual;
268 staticSProfiler sw___GetScaffoldAnnot;
269 staticSProfiler sw___GetScaffoldInst;
270 staticSProfiler sw___GetScaffoldDescr;
271 staticSProfiler sw___GetScaffoldIds;
272 staticSProfiler sw__GetScaffoldBioseq;
273 staticSProfiler sw_GetScaffoldEntry;
274 staticSProfiler sw__GetContigFeat;
275 staticSProfiler sw___GetContigQual;
276 staticSProfiler sw____GetContigQualSize;
277 staticSProfiler sw____GetContigQualData;
278 staticSProfiler sw____GetContigQualMinMax;
279 staticSProfiler sw___GetContigAnnot;
280 staticSProfiler sw____IsGap;
281 staticSProfiler sw____Get2naLen;
282 staticSProfiler sw____Get4naLen;
283 staticSProfiler sw____GetGapLen;
284 staticSProfiler sw____GetRaw2na;
285 staticSProfiler sw____GetRaw4na;
286 staticSProfiler sw____GetAmb2Mask;
287 staticSProfiler sw____Get4na2Mask;
288 staticSProfiler sw____Scan4na;
289 staticSProfiler sw____GetCvt4na;
290 staticSProfiler sw____GetAmb4na;
291 staticSProfiler sw____GetBlk4na;
292 staticSProfiler sw____SetGaps;
293 staticSProfiler sw___GetContigInst;
294 staticSProfiler sw___GetContigDescr;
295 staticSProfiler sw___GetContigIds;
296 staticSProfiler sw__GetContigBioseq;
297 staticSProfiler sw_GetContigEntry;
298 staticSProfiler sw_FeatIterator;
299 staticSProfiler sw_ProtIterator;
300 staticSProfiler sw_ScafIterator;
301 staticSProfiler sw_SeqIterator;
302 staticSProfiler sw_WGSOpen;
304 # define PROFILE(var) SProfilerGuard guard(var, #var) 306 # define PROFILE(var) 366dst.assign(
data.begin()+2,
data.end()-2);
369dst.assign(
data.begin(),
data.end());
403 out.Write(
info.m_Bytes.data(),
info.m_Bytes.size());
576m_READ_2na(m_Cursor,
"(INSDC:2na:packed)READ",
588 if(
s_UseAmbiguity4na() && m_GAP_START && m_GAP_LEN && m_AMBIGUITY_POS && m_AMBIGUITY_4NA ) {
593m_AMBIGUITY_POS.Reset();
594m_AMBIGUITY_4NA.Reset();
598m_QUALITY.ResetIfAlwaysEmpty(
m_Cursor);
778 type.FindVariant(
"str")
783 type.FindMember(
"key")
788 type.FindMember(
"db")
793 type.FindMember(
"qual")
1099 size_tbyte_index = block_index/8;
1100 Uint1byte_bit = 1<<(block_index%8);
1105 size_tbyte_index = block_index/8;
1106 Uint1byte_bit = 1<<(block_index%8);
1163 void Advance(S4naReader& reader)
const;
1167 template<
classValue>
1170dst.resize(src.
size());
1171copy_n(src.
begin(), src.
size(), dst.data());
1176: m_Prefix(db.GetIdPrefixWithVersion()),
1178m_HasGapInfo(
false),
1179m_HasAmbiguityMask(
false),
1180m_HasAmbiguityPos(
false),
1181m_Has4naBlocks(
false)
1183 if( cur.m_GAP_START ) {
1188 if( cur.m_GAP_LINKAGE ) {
1194 const boolkVerify4na =
false;
1195vector<Uint1> m_ExpectedAmbiguityMask;
1196vector<INSDC_coord_zero> m_ExpectedAmbiguityPos;
1197vector<INSDC_4na_bin> m_ExpectedAmbiguity4na;
1207 if( cur.m_AMBIGUITY_MASK ) {
1212 if( cur.m_AMBIGUITY_POS && cur.m_AMBIGUITY_4NA ) {
1222 size_tmask_bit_count = 0;
1245 for(
size_tblock_index = 0;
1251 size_tbyte_index = block_index/8;
1252 Uint1byte_bit = 1<<(block_index%8);
1253exp_bit = byte_index < m_ExpectedAmbiguityMask.size() &&
1254(m_ExpectedAmbiguityMask[byte_index] & byte_bit);
1256 if( bit != exp_bit ) {
1258 "mask["<<block_index<<
" = "<<oct<<block_index<<dec<<
"] "<<bit<<
", expected "<<exp_bit);
1261 size_tindex = 0, exp_index = 0;
1262 while( index <
m_AmbiguityPos.size() || exp_index < m_ExpectedAmbiguityPos.size() ) {
1265 TSeqPosexp_pos = exp_index < m_ExpectedAmbiguityPos.size()? m_ExpectedAmbiguityPos[exp_index]:
kInvalidSeqPos;
1266 intexp_base = exp_index < m_ExpectedAmbiguityPos.size()? m_ExpectedAmbiguity4na[exp_index]: 0;
1267 if( pos == exp_pos ) {
1268 if( base != exp_base ) {
1270 "amb["<<pos<<
" = "<<oct<<pos<<dec<<
"] "<<base<<
", expected "<<exp_base);
1275 else if( pos < exp_pos ) {
1277 "amb["<<pos<<
" = "<<oct<<pos<<dec<<
"] "<<base<<
", expected -");
1282 "amb["<<exp_pos<<
" = "<<oct<<exp_pos<<dec<<
"] -, expected "<<exp_base);
1293 size_tmemory = GetUsedMemory();
1295 LOG_POST(
"~SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") " 1303 const size_tkAllocateGap =
sizeof(
void*)*2;
1304 size_tret = kAllocateGap +
sizeof(*this);
1305ret += kAllocateGap + m_GapStart.size()*
sizeof(m_GapStart.front());
1306ret += kAllocateGap + m_GapLen.size()*
sizeof(m_GapLen.front());
1307ret += kAllocateGap + m_GapProps.size()*
sizeof(m_GapProps.front());
1308ret += kAllocateGap + m_GapLinkage.size()*
sizeof(m_GapLinkage.front());
1309ret += kAllocateGap + m_AmbiguityMask.size()*
sizeof(m_AmbiguityMask.front());
1310 if( m_HasAmbiguityPos || m_Has4naBlocks ) {
1312ret += kAllocateGap + m_AmbiguityPos.size()*
sizeof(m_AmbiguityPos.front());
1313ret += kAllocateGap + m_Ambiguity4na.size()*
sizeof(m_Ambiguity4na.front());
1314 const size_tkBlockUsedMemory =
1315kAllocateGap + 4*
sizeof(
void*) +
sizeof(
S4naBlock);
1316ret += kBlockUsedMemory * m_4naBlocks.size();
1328gap_info.
gaps_len= m_GapLen.data();
1348 return b&& !(
b&(
b-1));
1356 for( ; ptr != end; ++ptr ) {
1385 return offset+base_count;
1404 if( gap_info.
IsInGap(pos) ) {
1430 static const unsigned char table[16] = {
14310x11, 0x12, 0x14, 0x18,
14320x21, 0x22, 0x24, 0x28,
14330x41, 0x42, 0x44, 0x48,
14340x81, 0x82, 0x84, 0x88
1436 return table[bits_2na & 0xf];
1462 while( base_count >= 4 ) {
1463 charbits_2na = src_2na[0];
1471 charbits_2na = src_2na[0] & (0xff00 >> base_count*2);
1474 if( base_count < 2 ) {
1477dst_4na[0] = bits_4na;
1479 if( base_count > 2 ) {
1489 constvector<char>& src_2na_vec,
1492 size_tdst_4na_byte_count = (base_count+1)/2;
1494dst_4na_vec.reserve((dst_4na_byte_count+7)/8*8);
1495dst_4na_vec.resize(dst_4na_byte_count);
1507 char& dst = dst_4na_vec[
offset/2];
1509dst = (dst & 0xf) | (amb << 4);
1512dst = (dst & 0xf0) | amb;
1524 char* dst = dst_4na_vec.data()+ (
offset/2);
1531 while(
len>= 2 ) {
1546 const char* src_4na,
TSeqPossrc_offset,
1549 if( !base_count ) {
1552dst_4na += dst_offset/2;
1554src_4na += src_offset/2;
1557 if( dst_offset != 0 ) {
1558 Uint1dst_b = dst_4na[0];
1559 Uint1src_b = src_4na[0];
1560src_4na += src_offset;
1561 if( !src_offset ) {
1565dst_b = (dst_b & 0xf0) | (src_b & 0xf);
1572 if( src_offset == 0 ) {
1573 size_tcopy_bytes = base_count / 2;
1574dst_4na = copy_n(src_4na, copy_bytes, dst_4na);
1575src_4na += copy_bytes;
1579 while( base_count >= 2 ) {
1580 Uint1src_b0 = src_4na[0];
1581 Uint1src_b1 = src_4na[1];
1582 Uint1dst_b = (src_b0 << 4) | (src_b1 >> 4);
1591 Uint1dst_b = dst_4na[0];
1592 Uint1src_b = src_4na[0];
1596dst_b = (dst_b & 0xf) | (src_b & 0xf0);
1605 const Uint1* src_4na,
1608 while( base_count >= 2 ) {
1609 autob0 = src_4na[0];
1610 autob1 = src_4na[1];
1611 autopacked_bb = (b0 << 4)+b1;
1612*dst_packed_4na = packed_bb;
1618 autob0 = src_4na[0];
1619 autopacked_bb = (b0 << 4);
1620*dst_packed_4na = packed_bb;
1628 constvector<INSDC_coord_zero>& amb_pos,
1629 constvector<INSDC_4na_bin>& amb_4na)
1631 autoiter_pos = lower_bound(amb_pos.begin(), amb_pos.end(),
INSDC_coord_zero(pos));
1632 autoiter_4na = amb_4na.begin() + (iter_pos-amb_pos.begin());
1634 for( ; iter_pos != amb_pos.end() && *iter_pos < end; ++iter_pos, ++iter_4na ) {
1635 s_Set_4na(dst_4na_vec, *iter_pos-pos, *iter_4na);
1647 for(
autoiter =
blocks.lower_bound(block_pos);
1648iter !=
blocks.end() && iter->first < end;
1650 TSeqPosblock_pos = iter->first;
1654 if( block_pos < pos ) {
1656src_offset = pos-block_pos;
1660dst_offset = block_pos-pos;
1664 s_Copy_4na(dst_4na_vec.data(), dst_offset, iter->second.m_Packed4na, src_offset, copy_len);
1676 for( ;
len> 0; ) {
1677 if( gap_info.
IsInGap(pos) ) {
1712 boolambiguous =
false;
1715 if( gap_info.
IsInGap(pos) ) {
1728m_AmbiguityPos.push_back(pos+
i);
1729m_Ambiguity4na.push_back(
b);
1743 if( m_HasAmbiguityMask ) {
1747 if( m_HasAmbiguityPos ) {
1750 if(
size_tambiguity_count = m_AmbiguityPos.size() ) {
1752 size_tlast_byte_index = last_block_index/8;
1753m_AmbiguityMask.resize(last_byte_index+1);
1754 for(
size_t i= 0;
i< ambiguity_count; ++
i) {
1759 size_tmemory = GetUsedMemory();
1760 size_tmask_bit_count = 0;
1761 for(
auto bb: m_AmbiguityMask ) {
1768 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") " 1769<<
"calculated mask from ambiguities, " 1785cur4na = db.
Seq4na(m_RowId);
1786read4na = cur4na->READ(m_RowId);
1792 size_tmask_bit_count = 0;
1793m_AmbiguityMask.resize((block_count+7)/8);
1795 for(
size_tblock_index = 0; block_index < block_count; ++block_index ) {
1797 const Uint1* base_ptr = read4na.
data() + block_pos;
1799 boolambiguous =
false;
1800 if( use_full_4na_blocks ) {
1801ambiguous = x_AddAmbiguousBlock(base_ptr, base_count, block_pos, gap_info);
1804ambiguous = x_AddAmbiguities(base_ptr, base_count, block_pos, gap_info);
1807x_SetAmbiguousBlock(block_index);
1811 if( use_full_4na_blocks ) {
1812m_Has4naBlocks =
true;
1815m_HasAmbiguityPos =
true;
1819 size_tmemory = GetUsedMemory();
1821 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") " 1822 "calculated mask from read, " 1830m_HasAmbiguityMask =
true;
1837 if( m_HasAmbiguityPos || m_Has4naBlocks ) {
1847 size_tbit_count = 0;
1848 size_twrong_bit_count = 0;
1850 for(
size_tblock_byte = 0; block_byte < m_AmbiguityMask.size(); ++block_byte ) {
1851 if(
autobits = m_AmbiguityMask[block_byte] ) {
1854cur4na = db.
Seq4na(m_RowId);
1855read4na = cur4na->READ(m_RowId);
1858 for(
size_tblock_bit = 0; block_bit < 8; ++block_bit ) {
1859 if( bits & (1<<block_bit) ) {
1861 size_tblock_index = block_byte*8+block_bit;
1863 const Uint1* base_ptr = read4na.
data() + block_pos;
1865 boolambiguous =
false;
1866gap_info.
SetPos(block_pos);
1867 if( use_full_4na_blocks ) {
1868ambiguous = x_AddAmbiguousBlock(base_ptr, base_count, block_pos, gap_info);
1871ambiguous = x_AddAmbiguities(base_ptr, base_count, block_pos, gap_info);
1879 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") " 1880<<
"wrong bit set at "<<block_pos);
1887 if( use_full_4na_blocks ) {
1888m_Has4naBlocks =
true;
1891m_HasAmbiguityPos =
true;
1896 size_tmemory = GetUsedMemory();
1898 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") " 1899<<
"calculated 4na, " 1906 for(
size_t i= 0;
i< 2 &&
i< m_AmbiguityPos.size(); ++
i) {
1907 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") " 1908<<
"ambiguity at "<<m_AmbiguityPos[
i]<<
" - "<<m_Ambiguity4na[
i]*1);
1917 if( m_HasAmbiguityPos ) {
1938reader.
m_Pos= pos;
1939 if( m_HasAmbiguityPos ) {
1942lower_bound(m_AmbiguityPos.begin(), m_AmbiguityPos.end(),
INSDC_coord_zero(pos)) - m_AmbiguityPos.begin();
1958 if( m_HasAmbiguityPos ) {
1985 returnbase == 0xf? eBase_Gap:
sx_Is2na(base)? eBase_2na: eBase_4na;
1995 if( m_HasAmbiguityPos ) {
2031 if( m_HasAmbiguityPos ) {
2032 autoiter = lower_bound(m_AmbiguityPos.begin(), m_AmbiguityPos.end(),
INSDC_coord_zero(pos));
2033 if( iter == m_AmbiguityPos.end() ||
TSeqPos(*iter) >= end ) {
2041 for(
autoblock_iter = m_4naBlocks.lower_bound(block_pos);
2042block_iter != m_4naBlocks.end() && block_iter->first < end;
2044 size_tin_block_pos = pos <= block_iter->first? 0: pos-block_iter->first;
2047in_block_pos, in_block_len));
2048 if( amb_pos < in_block_pos+in_block_len ) {
2049 return(block_iter->first+amb_pos) - pos;
2064 if(
len< stop_2na_len ) {
2067 S4naReaderreader = Get4naReader(pos, db, cur);
2068 TSeqPosrem_len =
len, len2na = 0, gap_len = 0;
2074 for( ; rem_len; --rem_len, Advance(reader) ) {
2075 autobase_type = GetBaseType(reader);
2076 if( base_type == eBase_2na ) {
2077 if( len2na == stop_2na_len-1 ) {
2078 return len-(rem_len+len2na);
2087 if( gap_len == stop_gap_len-1 ) {
2088 return len-(rem_len+gap_len);
2095 _ASSERT(len2na < stop_2na_len);
2105 S4naReaderreader = Get4naReader(pos, db, cur);
2107 for( ; rem_len; --rem_len, Advance(reader) ) {
2109 autobase_type = GetBaseType(reader);
2110 if( base_type != eBase_Gap ) {
2111 return len-rem_len;
2126 size_tbytes = (
len+3)/4;
2128 data.reserve((bytes+7)/8*8);
2129 data.resize(bytes);
2144 autoseq_2na = Get2na(pos,
len, cur);
2148 if( m_HasAmbiguityPos ) {
2171 while( pos != end ) {
2173 if( x_AmbiguousBlock(block_index) ) {
2187 while( pos != end ) {
2189 if( !x_AmbiguousBlock(block_index) ) {
2199 #ifdef USE_GLOBAL_AMBIGUITY_CACHE 2213 #ifdef USE_GLOBAL_AMBIGUITY_CACHE 2228 size_tused_memory =
info->GetUsedMemory();
2229 #ifdef USE_GLOBAL_AMBIGUITY_CACHE 2242 if( !seq->m_Cursor.TryOpenRow(1) ) {
2252 if( m_IdRowDigits < 6 || m_IdRowDigits > 8 ) {
2254 "CWGSDb: bad WGS accession format: "<<acc);
2279 boolhas_static_taxid = seq->m_TAXID && seq->m_TAXID.IsStatic(seq->m_Cursor);
2281 if( has_static_taxid ) {
2282 auto value= seq->TAXID(1);
2283 if(
value.size() != 1 ) {
2284has_static_taxid =
false;
2287static_taxid =
value[0];
2297 size_t size= node.GetSize();
2309 if( node.GetSize() != 0 ) {
2325 #ifdef USE_TEST_PATH 2328 if( !test_path.empty() ) {
2337 if( !vol_path.
empty() ) {
2338vector<CTempString> dirs;
2340 ITERATE( vector<CTempString>, it, dirs ) {
2352path_or_acc.
find(
'.') == string::npos ) {
2359 stringacc = path_or_acc.
substr(start);
2360 size_tacclen = acc.
size();
2361 size_tdigit_pos = acc.find_first_of(
"0123456789");
2362 if(digit_pos == string::npos && (acclen == 4 || acclen == 6)) {
2363 return string(path_or_acc) +
"00";
2364}
else if((digit_pos == 4 || digit_pos == 6) &&
2365acclen > digit_pos + 2) {
2367 returnpath_or_acc.
substr(0, start+digit_pos+2);
2376atomic<bool>& table_is_opened,
2380 if( !table_is_opened.load(memory_order_acquire) ) {
2382table_is_opened.store(
true, memory_order_release);
2390atomic<Int1>& index_is_opened,
2391 const char* index_name,
2392 const char* backup_index_name)
2396 if( !index_is_opened.load(memory_order_acquire) ) {
2403 else if( backup_index_name ) {
2410index_is_opened.store(
type, memory_order_release);
2414index_is_opened.store(-1, memory_order_release);
2452 "contig_name_uc",
"contig_name");
2459 "scaffold_name_uc",
"scaffold_name");
2466 "protein_name_uc",
"protein_name");
2473 "product_name_uc",
"product_name");
2484pair<TVDBRowId, CWGSDb_Impl::ERowType>
2486TAllowRowType allow_type)
2491 if(prefix_len ==
NPOS|| prefix_len >= acc.
size() - 2)
2493 elseprefix_len += 2;
2496 if(
row[0] ==
'S') {
2503 else if(
row[0] ==
'P') {
2516 if( ret.first < 0 ) {
2526 if( is_scaffold ) {
2529pair<TVDBRowId, TRowType> rt =
ParseRowType(acc, allow_type);
2530 if( is_scaffold ) {
2541 if(
const CTextseq_id* text_id =
id.GetTextseq_Id() ) {
2551 if(
const CTextseq_id* text_id =
id.GetTextseq_Id() ) {
2552 const_cast<CTextseq_id*
>(text_id)->SetAccession(accession);
2570 const boolkSetErrno = 0;
2571 const boolkSetNcbiError = 0;
2573 int error= 0, ret = -1;
2574 size_t len=
str.size();
2579 unsignedv =
str.data()[0] -
'0';
2584 for(
size_t i= 1;
i<
len; ++
i) {
2585 unsignedd =
str.data()[
i] -
'0';
2590 unsignednv = v * 10 + d;
2591 const unsignedkOverflowLimit = (INT_MAX - 9) / 10 + 1;
2592 if( v >= kOverflowLimit ) {
2594 if( v > kOverflowLimit || nv > INT_MAX) {
2602ret =
static_cast<int>(v);
2609 if(kSetNcbiError &&
error) {
2623 if(
str.size() == 1 ||
str.data()[0] !=
'0') {
2675 if( !bytes.
empty() ) {
2683 for(
auto& desc :
tmp.Set() ) {
2684descr.
Set().push_back(desc);
2688 while(
in.HaveMoreData() ) {
2691descr.
Set().push_back(desc);
2700 if( !bytes.
empty() ) {
2702 while(
in.HaveMoreData() ) {
2705annot_set.push_back(annot);
2714split_id.
SetGi(
id.GetGi());
2725split_id.
SetGi(
id.GetGi());
2737split_ids.push_back(split_id);
2755loc_gi.
SetGi(
id.GetGi());
2780 if( prefix.
empty() ) {
2785 CDbtag& dbtag =
id->SetGeneral();
2786dbtag.
SetDb(prefix);
2794TGnlIdFlags gnl_id_flags)
const 2801 CDbtag& dbtag =
id->SetGeneral();
2803 if( colon !=
NPOS) {
2804dbtag.
SetDb(
tag.substr(0, colon));
2805 tag=
tag.substr(colon+1);
2812 tag[db.size()] ==
':') {
2813 tag=
tag.substr(db.size()+1);
2834TGnlIdFlags gnl_id_flags)
const 2836 if(
str.empty() ) {
2855 if(
str.empty() ) {
2862 if( cur.m_SEQID_GNL_PREFIX ) {
2876 if(
str.empty() ) {
2883 if( cur.m_SEQID_GNL_PREFIX ) {
2897 if(
str.empty() ) {
2904 if( cur.m_SEQID_GNL_PREFIX ) {
2917 if( !acc.
empty() ) {
2960master_acc.resize(master_acc.size() + 2 +
m_IdRowDigits,
'0');
2961 id=
new CSeq_id(master_acc);
3055 str>> *master_entry;
3060 if( id->IsPatent() ) {
3100 if( master_entry->IsSetDescr() ) {
3110 switch( desc.
Which() ) {
3113 returneDescr_force;
3120 returneDescr_default;
3125 if( name ==
"DBLink"||
3126name ==
"GenomeProjectsDB"||
3127name ==
"StructuredComment"||
3128name ==
"FeatureFetchPolicy"||
3129name ==
"Unverified") {
3130 returneDescr_default;
3164 if(uo_type ==
"StructuredComment") {
3166 if((*it)->GetLabel().IsStr() &&
3167(*it)->GetLabel().GetStr() ==
"StructuredCommentPrefix") {
3168 string data= ((*it)->GetData().IsStr() ?
3169(
string) (*it)->GetData().GetStr() :
3171uo_type +=
"|"+
data;
3186 if(!uo_type.empty() && existing_uo_types.count(uo_type) == 0) {
3187existing_uo_types.
insert(uo_type);
3194 unsignedtype_mask = 0;
3199type_mask |= 1 << desc.
Which();
3204 for(
auto& desc : main_seq->
GetDescr().
Get()) {
3205type_mask |= 1 << desc->Which();
3215(type_mask & (1 << desc.
Which())) ) {
3218 if(!uo_type.empty() && existing_uo_types.count(uo_type) == 0)
3224descr.
Set().push_back(*it);
3228 auto& user_object = desc->
SetUser();
3230user_object.SetData();
3231descr.
Set().push_back(desc);
3291 return id.
GetGi();
3304method<<
": GI is too big: "<<gi);
3314 if( idx->m_NUC_ROW_ID ) {
3316idx->m_NUC_ROW_ID.GetRowIdRange(idx->m_Cursor);
3317 if( row_range.second ) {
3318ret.first =
s_ToGi(row_range.first,
3319 "CWGSDb::GetNucGiRange()");
3320ret.second =
s_ToGi(row_range.first + row_range.second - 1,
3321 "CWGSDb::GetNucGiRange()");
3334 if( idx->m_PROT_ROW_ID ) {
3336idx->m_PROT_ROW_ID.GetRowIdRange(idx->m_Cursor);
3337 if( row_range.second ) {
3338ret.first =
s_ToGi(row_range.first,
3339 "CWGSDb::GetProtGiRange()");
3340ret.second =
s_ToGi(row_range.first + row_range.second - 1,
3341 "CWGSDb::GetProtGiRange()");
3352 if( ranges.empty() ) {
3355 sort(ranges.begin(), ranges.end());
3356TGiRanges::iterator dst = ranges.begin();
3357 for( TGiRanges::iterator
i= dst+1;
i!= ranges.end(); ++
i) {
3358 if(
i->GetFrom() == dst->GetToOpen() ) {
3359dst->SetToOpen(
i->GetToOpen());
3365ranges.erase(dst+1, ranges.end());
3375 TIntIdgi_start = -1, gi_end = -1;
3376 TVDBRowIdRangerow_range = seq->m_GI.GetRowIdRange(seq->m_Cursor);
3378row_id = row_range.first+
i;
3383 if( gi != gi_end ) {
3384 if( gi_end != gi_start ) {
3385ranges.push_back(
TGiRange(gi_start, gi_end));
3391 if( gi_end != gi_start ) {
3392ranges.push_back(
TGiRange(gi_start, gi_end));
3412 while( prefix < acc.
size() &&
isalpha(acc[prefix]&0xff) ) {
3415 if( prefix == acc.
size() || prefix == 0 || acc.
size()-prefix > 9 ) {
3422 if( c < '0' || c >
'9') {
3425v = v*10 + (c-
'0');
3436 stringacc = m_AccPrefix;
3437acc.resize(m_IdLength,
'0');
3438 for(
SIZE_TYPE i= m_IdLength; id;
id/= 10 ) {
3439acc[--
i] +=
id% 10;
3450 TVDBRowIdRangerow_range = seq->m_GB_ACCESSION.GetRowIdRange(seq->m_Cursor);
3452row_id = row_range.first+
i;
3454 if( acc.
empty() ) {
3463 if( it == ranges.
end() || it->first !=
info) {
3468 if( id < it->second.GetFrom() ) {
3469it->second.SetFrom(
id);
3471 else if(
id>= it->second.GetToOpen() ) {
3472it->second.SetTo(
id);
3484pair<TVDBRowId, bool> ret;
3487 if( idx->m_NUC_ROW_ID ) {
3490 if( !
value.empty() ) {
3491ret.first = *
value;
3494 if( !ret.first && idx->m_PROT_ROW_ID ) {
3497 if( !
value.empty() ) {
3498ret.first = *
value;
3512 if( idx->m_NUC_ROW_ID ) {
3515 if( !
value.empty() ) {
3530 if( idx->m_PROT_ROW_ID ) {
3533 if( !
value.empty() ) {
3555 returnrange.second? range.first: 0;
3559 returnrange.second? range.first: 0;
3568 if( seq->m_CONTIG_NAME_ROW_RANGE ) {
3569seq->m_Cursor.SetParam(
"CONTIG_NAME_QUERY", name);
3573 if( !
value.empty() ) {
3614 const char* query_param_name;
3615 if(
NStr::Equal(idx->m_ROW_ID.GetName(),
"ROW_ID") ) {
3616query_param_name =
"NAME_QUERY";
3619query_param_name =
"ACCESSION_QUERY";
3624 if( !
value.empty() ) {
3627 if( range.first && range.first <= range.second ) {
3629 if( !prot_rows.
empty() ) {
3630 if( ask_version > 0 ) {
3632 size_tversion_index = size_t(prot_rows.
size() == 1? 0: ask_version-1);
3633 if( version_index < prot_rows.
size() ) {
3635prot_row_id = prot_rows[version_index];
3636 if( prot_row_id ) {
3638 intactual_version = *
prot->ACC_VERSION(prot_row_id);
3640 if( actual_version != ask_version ) {
3647 else if( ask_version == -1 ) {
3649prot_row_id = prot_rows[prot_rows.
size()-1];
3661 boolcan_have_gis =
false;
3664 autogi_range = cur->m_Cursor.GetRowIdRange(cur->m_GI.GetIndex());
3665 if( gi_range.second ) {
3668can_have_gis =
true;
3673 returncan_have_gis;
3680 if(
autocur =
Feat() ) {
3681feature_count = cur->m_Cursor.GetRowIdRange().second;
3684 returnfeature_count;
3727 PROFILE(sw_GetFeatLocIdTypeRange);
3729 autorow_range = seq->m_Cursor.GetRowIdRange(seq->m_FEAT_ROW_START.GetIndex());
3731 autoseq_row_id = row_range.first+
i;
3732 autorow_start = seq->FEAT_ROW_START(seq_row_id);
3733 if( !row_start.empty() ) {
3734feat_row_id = *row_start;
3740 catch( exception&
) {
3743 PROFILE(sw_GetFeatLocIdTypeFeat);
3747 PROFILE(sw_GetFeatLocIdTypeFeatBytes);
3748bytes = *cur->SEQ_FEAT(feat_row_id);
3750cur.GetNCObject().m_ObjStr.OpenFromBuffer(bytes.
data(), bytes.
size());
3751cur.GetNCObject().m_ObjStr >> *feat;
3754 if(
const CTextseq_id*
id= !seq_id? 0: seq_id->GetTextseq_Id() ) {
3755 if( id->IsSetVersion() ) {
3764 catch( exception&
) {
3791 while( *
this&& GetToOpen() <= pos ) {
3811split_prod(
false),
3812split_data(
false),
3813split_feat(
false),
3814split_qual(
false),
3823x_SetFlags(
flags);
3831x_SetSplitVersion(split_version);
3836 voidx_SetFlags(TFlags
flags);
3853 template<
classIter>
3856main_id = it.GetId(
flags);
3860 if( feat_id->
IsGi() ) {
3872 template<
classIter>
3893vector<TVDBRowId>& product_row_ids);
3895vector<TVDBRowId>& product_row_ids);
3897vector<TVDBRowId>& product_row_ids);
3901 voidx_AddProducts(
constvector<TVDBRowId>& product_row_ids);
3908 auto state= GetGBState();
3914 if( !(m_IncludeFlags & TIncludeFlags(1 <<
state)) ) {
3926GetDb().Put(m_Cur0, m_CurrId);
3928GetDb().Put(m_Cur, m_CurrId);
3937m_CurrId = m_FirstGoodId = m_FirstBadId = 0;
3938m_AccVersion = eLatest;
3943: m_AccVersion(eLatest)
3951 if(
this!= &iter ) {
3971m_AccVersion(eLatest),
3972m_IncludeFlags(fIncludeDefault),
3973m_ClipByQuality(
true)
3981: m_AccVersion(eLatest)
3983 x_Select(wgs_db, include_flags, clip_type);
3991: m_AccVersion(eLatest)
3993 x_Select(wgs_db, include_flags, clip_type,
row);
4002: m_AccVersion(eLatest)
4004 x_Select(wgs_db, include_flags, clip_type, first_row, last_row);
4012: m_AccVersion(eLatest)
4014 x_Select(wgs_db, include_flags, clip_type, acc);
4019TIncludeFlags include_flags,
4021: m_AccVersion(eLatest)
4023 x_Select(wgs_db, include_flags, clip_type);
4029TIncludeFlags include_flags,
4031: m_AccVersion(eLatest)
4033 x_Select(wgs_db, include_flags, clip_type,
row);
4040TIncludeFlags include_flags,
4042: m_AccVersion(eLatest)
4044 x_Select(wgs_db, include_flags, clip_type, first_row, last_row);
4050TIncludeFlags include_flags,
4052: m_AccVersion(eLatest)
4054 x_Select(wgs_db, include_flags, clip_type, acc);
4073: m_AccVersion(eLatest)
4083: m_AccVersion(eLatest)
4094: m_AccVersion(eLatest)
4104: m_AccVersion(eLatest)
4118TIncludeFlags include_flags,
4121 x_Init(wgs_db, include_flags, clip_type, 0);
4127TIncludeFlags include_flags,
4132 x_Init(wgs_db, include_flags, clip_type,
row);
4138TIncludeFlags include_flags,
4144 x_Init(wgs_db, include_flags, clip_type, first_row);
4160TIncludeFlags include_flags,
4166 x_Init(wgs_db, include_flags, clip_type,
row);
4178TIncludeFlags include_flags,
4196 switch( clip_type ) {
4252 "CWGSSeqIterator::"<<method<<
"(): Invalid iterator state");
4283 x_CheckValid(
"CWGSSeqIterator::GetLatestAccVersion");
4290 x_CheckValid(
"CWGSSeqIterator::GetAccVersionCount");
4291 #ifdef TEST_ACC_VERSION 4309 return version<= latest_version &&
4323 "CWGSSeqIterator: "<<
4326 " is out of VDB version range: "<<
4327oldest_version<<
"-"<<latest_version);
4352 if(
m_Cur->m_GI ) {
4408 if(
GetDb().HasCommonTaxId() ) {
4418 return m_Cur->m_HASH;
4437 #ifdef TEST_ACC_VERSION 4449 #ifdef TEST_ACC_VERSION 4509 "CWGSSeqIterator::GetId("<<
flags<<
"): " 4510 "no valid id found: "<<
4522ids.push_back(
id);
4529ids.push_back(
id);
4536ids.push_back(
id);
4553 if(
m_Cur->m_DESCR ) {
4562 x_CheckValid(
"CWGSSeqIterator::HasNucProtDescrBytes");
4569 x_CheckValid(
"CWGSSeqIterator::GetNucProtDescrBytes");
4571 if(
m_Cur->m_NUC_PROT_DESCR ) {
4592 if( !
GetDb().GetMasterDescr().
empty() ) {
4613 if( ret->
Get().empty() ) {
4622 x_CheckValid(
"CWGSSeqIterator::GetLocFeatRowIdRange");
4624 if( !
m_Cur->m_FEAT_ROW_START ) {
4628 if( start_val.
empty() ) {
4633 if( end < start ) {
4635 "CWGSSeqIterator::GetLocFeatRowIdRange: " 4636 "feature row range is invalid: "<<start<<
","<<end);
4667 x_CheckValid(
"CWGSSeqIterator::CanHaveQualityGraph");
4668 return m_Cur->m_QUALITY;
4682 PROFILE(sw____GetContigQualSize);
4695quality_vec.clear();
4699quality_vec.reserve((
size+7)/8*8);
4700quality_vec.resize(
size);
4702quality_vec.data());
4708 return "Phrap Graph";
4715 Uint1min_v0 = 0xff, max_v0 = 0;
4716 Uint1min_v1 = 0xff, max_v1 = 0;
4717 Uint1min_v2 = 0xff, max_v2 = 0;
4718 Uint1min_v3 = 0xff, max_v3 = 0;
4724 if( v0 < min_v0 ) min_v0 = v0;
4725 if( v1 < min_v1 ) min_v1 = v1;
4726 if(
v2< min_v2 ) min_v2 =
v2;
4727 if( v3 < min_v3 ) min_v3 = v3;
4728 if( v0 > max_v0 ) max_v0 = v0;
4729 if( v1 > max_v1 ) max_v1 = v1;
4730 if(
v2> max_v2 ) max_v2 =
v2;
4731 if( v3 > max_v3 ) max_v3 = v3;
4735 if( v0 < min_v0 ) min_v0 = v0;
4736 if( v0 > max_v0 ) max_v0 = v0;
4738min_v0 =
min(min_v0, min_v2);
4739max_v0 =
max(max_v0, max_v2);
4740min_v1 =
min(min_v1, min_v3);
4741max_v1 =
max(max_v1, max_v3);
4742min_v =
min(min_v0, min_v1);
4743max_v =
max(max_v0, max_v1);
4748TFlags
flags)
const 4751 info.x_SetId(*
this);
4776 PROFILE(sw____GetContigQualData);
4777values.reserve((
size+7)/8*8);
4778values.resize(
size);
4783 Uint1min_q = 0, max_q = 0;
4785 PROFILE(sw____GetContigQualMinMax);
4795annot->
SetDesc().Set().push_back(name);
4797graph->
SetTitle(
"Phrap Quality");
4808annot->
SetData().SetGraph().push_back(graph);
4809annot_set.push_back(annot);
4844 if(
m_Cur->m_GB_STATE ) {
4855 if( !
m_Cur->m_PUBLIC_COMMENT ) {
4866 if( !
m_Cur->m_PUBLIC_COMMENT ) {
4883 return m_Cur->m_GAP_START;
4981evidence->SetType(
type);
4992 static const intkLenTypeMask =
4995 static const intkGapTypeMask =
5005 intlen_type = -(-props & kLenTypeMask);
5006 intgap_type = -(-props & kGapTypeMask);
5011 if( gap_type || gap_linkage ) {
5013 switch( gap_type ) {
5050 for( ; bit && bit <= gap_linkage; bit<<=1, ++
type) {
5051 if( gap_linkage & bit ) {
5131segments.push_back(seg);
5148TInstSegmentFlags
flags)
const 5158 for( ;
len> 0; ) {
5159 if( gap_info.
IsInGap(pos) ) {
5164 x_AddGap(segments, pos - raw_offset, gap_len, gap_info);
5182rem_len =
min(rem_len, chunk_end - pos);
5189seg_len = ambiguity->Get2naLengthBlock(pos, rem_len);
5190 if( seg_len == rem_len ) {
5202seg_len = ambiguity->Get2naLengthBlock(pos,
min(rem_len,
kSplit2naSize));
5203 if( seg_len >=
kMin2naSize|| seg_len == rem_len ) {
5212 TSeqPosseg_len_2na = seg_len;
5213seg_len += ambiguity->Get4naLengthBlock(pos+seg_len,
5215 if( seg_len == seg_len_2na ) {
5249segments.push_back(seg);
5266 const TSeqPoskMinGapSize = 20;
5268 const TSeqPoskUnknownGapSize = 100;
5271 for( ;
len> 0; ) {
5287 TSeqPosseg_len_2na = seg_len;
5295 if( seg_len == kUnknownGapSize ) {
5299 else if( seg_len == seg_len_2na ) {
5314segments.push_back(seg);
5329 _ASSERT(it->range.GetFrom() == pos);
5330 if( it->literal ) {
5331 _ASSERT(it->range.GetLength() == it->literal->GetLength());
5332seq->SetLiteral(it->literal.GetNCObject());
5335seq->SetLiteral().SetLength(it->range.GetLength());
5337 delta.push_back(seq);
5338pos += it->range.GetLength();
5347 if( segments.size() == 1 && !segments[0].is_gap ) {
5376 if( length == 0 ) {
5389 if( !
info.split_data ) {
5407 TSeqPospos = it->range.GetFrom();
5408 TSeqPosend = it->range.GetToOpen();
5410 if( !chunk || chunk->GetId() != chunk_id ) {
5412chunk->SetId().Set(chunk_id);
5413 info.split->SetChunks().push_back(chunk);
5416chunk->SetContent().push_back(content);
5440this->flags =
flags;
5455 "SWGSCreateInfo::SetSplitVersion("<<
split_version<<
"): " 5456 "unknown split version");
5465 if( bytes.
empty() ) {
5501seqset->
SetSeq_set().push_back(main_entry);
5509vector<TVDBRowId>& product_row_ids)
5515 if(
TVDBRowIdproduct_row_id = feat_it.GetProductRowId() ) {
5517product_row_ids.push_back(product_row_id);
5518 if( !product_features ) {
5521product_features = &annot->
SetData().SetFtable();
5523dst = product_features;
5527 if( !main_features ) {
5530main_features = &annot->
SetData().SetFtable();
5532dst = main_features;
5566 static const TSeqPoskMaxGap = 100000;
5631 constvector<TVDBRowId>& product_row_ids,
5632 size_tproduct_index);
5640 if( gi_range_stop == gi_range_start ) {
5644 if( gi_range_stop == gi_range_start+
GI_CONST(1) ) {
5649gi_range.
SetStart(gi_range_start);
5652loc_set.push_back(loc);
5658 constvector<TVDBRowId>& product_row_ids,
5659 size_tproduct_index)
5666 if(
features[0].HasFeatures() ) {
5669chunk->SetContent().push_back(content);
5670content->SetSeq_annot_place().SetBioseqs().Set().push_back(
seq_place);
5675 if(
features[1].HasFeatures() ) {
5678chunk->SetContent().push_back(content);
5679content->SetSeq_annot_place().SetBioseq_sets().Set().push_back(
kMainEntryId);
5688 auto& loc_set = annot_info.
SetSeq_loc().SetLoc_set();
5689loc_set.push_back(old_loc);
5692 for(
autoit = product_row_ids.begin()+product_index; it != product_row_ids.end(); ++it ) {
5695 "invalid protein row id: "<<*it);
5706 if( gi != gi_range_stop ) {
5708gi_range_start = gi;
5719loc_set.push_back(loc);
5726chunk->SetContent().push_back(content);
5727content->SetFeat_ids();
5747annot_info.
SetFeat().push_back(type_info);
5756interval->SetStart(
r.GetFrom());
5757interval->SetLength(
r.GetLength());
5758intervals.
SetInts().push_back(interval);
5764vector<TVDBRowId>& product_row_ids)
5769 intchunk_index = 0;
5772 size_tproduct_index = 0;
5775 boolwith_product =
false;
5776 if(
TVDBRowIdproduct_row_id = feat_it.GetProductRowId() ) {
5778product_row_ids.push_back(product_row_id);
5779with_product =
true;
5781c.
AddFeature(with_product, feat_it.GetFeatType(), feat_it.GetLocRange());
5785product_row_ids, product_index));
5786product_index = product_row_ids.size();
5792product_row_ids, product_index));
5794 if( !product_row_ids.empty() ) {
5802vector<TVDBRowId>& product_row_ids)
5805x_AddFeaturesSplit(range, product_row_ids);
5808x_AddFeaturesDirect(range, product_row_ids);
5818 if( !main_features ) {
5820main_seq->SetAnnot().push_back(annot);
5821main_features = &annot->
SetData().SetFtable();
5823x_AddFeature(feat_it, *main_features);
5839 info.split->SetChunks().push_back(chunk);
5841chunk->SetId().Set(chunk_id);
5847chunk->SetContent().push_back(content);
5848content->SetFeat_ids();
5850content->SetSeq_annot();
5851annot_info.
SetName(GetQualityAnnotName());
5857chunk->SetContent().push_back(content);
5858content->SetSeq_annot_place().SetBioseqs().Set().push_back(place);
5866 PROFILE(sw__GetContigBioseq);
5868 info.x_SetSeq(*
this);
5869 if(
info.entry ) {
5871 info.entry->SetSeq(*
info.main_seq);
5875 PROFILE(sw___GetContigDescr);
5878 if( m_Cur->m_DESCR ) {
5880 if( !descr.
empty() ) {
5881 info.x_AddDescr(*descr);
5888 info.main_seq->SetDescr(*descr);
5893 PROFILE(sw___GetContigAnnot);
5894GetAnnotSet(
info.main_seq->SetAnnot(),
info.flags);
5895 boolhas_split_annot =
false;
5897 if(
info.split_qual ) {
5898x_AddQualityChunkInfo(
info);
5899has_split_annot =
true;
5902x_GetQualityAnnot(
info.main_seq->SetAnnot(),
info);
5905 if( !has_split_annot &&
info.main_seq->GetAnnot().empty() ) {
5906 info.main_seq->ResetAnnot();
5909 info.main_seq->SetInst(*x_GetSeq_inst(
info));
5917 if( feat_it.GetProductRowId() ) {
5932TFlags save_flags =
flags;
5943chunk->SetData().push_back(chunk_data);
5945bioseqs = &chunk_data->SetBioseqs();
5950 ITERATE( vector<TVDBRowId>, it, product_row_ids ) {
5953 "invalid protein row id: "<<*it);
5961entry->
SetSeq(*main_seq);
5965bioseqs->push_back(main_seq);
5969main_seq = save_seq;
5979 _ASSERT(entry->IsSeq() && &entry->GetSeq() == main_seq);
5980vector<TVDBRowId> product_row_ids;
5983x_AddFeatures(range, product_row_ids);
5985 if( !product_row_ids.empty() ) {
5987 _ASSERT(entry && entry->IsSet());
5989 intchunk_index = 0;
5990 size_tprod_count = 0;
5993 ITERATE( vector<TVDBRowId>, it, product_row_ids ) {
5996 split->SetChunks().push_back(chunk);
6003chunk->SetContent().push_back(content);
6004content->SetFeat_ids();
6007chunk->SetContent().push_back(content);
6009content->SetBioseq_place().push_back(place_info);
6011ids = &place_info->SetSeq_ids().Set();
6016 "invalid protein row id: "<<*it);
6025x_AddProducts(product_row_ids);
6034 if( !db->GetMasterDescr().empty() ) {
6035db->AddMasterDescr(
info.entry->SetDescr(),
info.main_seq,
flags);
6046x_CreateBioseq(
info);
6055x_CreateBioseq(
info);
6057 info.x_CreateProtSet(GetLocFeatRowIdRange());
6061 if( !descr.
empty() ) {
6077 info.split =
null;
6084 info.split_data =
true;
6089 info.split_prod =
true;
6095 info.split_feat =
true;
6098CanHaveQualityGraph() ) {
6099 info.split_qual =
true;
6101 if( !
info.split_data && !
info.split_prod && !
info.split_feat &&
6102!
info.split_qual ) {
6107 info.split->SetSkeleton(*
info.entry);
6108 info.split->SetChunks();
6115x_CreateEntry(
info);
6120 unsignedindex)
const 6122 PROFILE(sw_CreateQualityChunk);
6125x_GetQualityAnnot(
data->SetAnnots(),
info,
6127 info.chunk->SetData().push_back(
data);
6132 unsignedindex)
const 6143GetGapInfo(gap_info);
6145TInstSegmentFlags inst_flags = fInst_MakeData;
6146x_GetSegmentsWithExplicitGaps(segments, range, gap_info, inst_flags);
6149 _ASSERT(it->literal && it->literal->IsSetSeq_data());
6151piece->SetStart(it->range.GetFrom());
6152piece->SetData().push_back(it->literal);
6153 data->SetSeq_data().push_back(piece);
6155 info.chunk->SetData().push_back(
data);
6160 unsignedindex)
const 6162 PROFILE(sw_CreateProductsChunk);
6163vector<TVDBRowId> product_row_ids;
6166 if(
TVDBRowIdrow_id = feat_it.GetProductRowId() ) {
6171product_row_ids.push_back(row_id);
6177 info.x_AddProducts(product_row_ids);
6182 unsignedindex)
const 6184 PROFILE(sw_CreateFeaturesChunk);
6186 autorange = GetLocFeatRowIdRange();
6188 autofeat_stop =
min(range.first+range.second, feat_start+
kFeatPerChunk);
6189range.first = feat_start;
6190range.second =
max(feat_start, feat_stop)-feat_start;
6192 info.chunk->SetData();
6197 if( feat_it.GetProductRowId() ) {
6199 if( !product_features ) {
6201 info.chunk->SetData().push_back(
data);
6204 data->SetAnnots().push_back(annot);
6205product_features = &annot->
SetData().SetFtable();
6207dst = product_features;
6211 if( !main_features ) {
6213 info.chunk->SetData().push_back(
data);
6214 data->SetId().SetSeq_id(*
info.main_id);
6216 data->SetAnnots().push_back(annot);
6217main_features = &annot->
SetData().SetFtable();
6219dst = main_features;
6221 info.x_AddFeature(feat_it, *dst);
6231 info.x_SetId(*
this);
6235x_CreateQualityChunk(
info, index);
6238x_CreateProductsChunk(
info, index);
6241x_CreateFeaturesChunk(
info, index);
6244x_CreateDataChunk(
info, index);
6248 "CWGSSeqIterator::CreateChunk("<<chunk_id<<
"): " 6249 "unsupported chunk type: "<<
type);
6257x_CheckValid(
"CWGSSeqIterator::GetBioseq");
6259x_CreateBioseq(
info);
6260 return info.main_seq;
6267x_CheckValid(
"CWGSSeqIterator::GetSeq_entry");
6270x_CreateEntry(
info);
6277 PROFILE(sw_GetSeq_entryData);
6278x_CheckValid(
"CWGSSeqIterator::GetSeq_entryData");
6282x_CreateEntry(
info);
6289 returnGetSplitInfoAndVersion(
flags).first;
6298x_CheckValid(
"CWGSSeqIterator::GetSplitInfo");
6300 if( x_InitSplit(
info) ) {
6301x_CreateSplit(
info);
6303 returnmake_pair(
info.split,
info.split_version);
6309 returnGetSplitInfoDataAndVersion(
flags).first;
6317 PROFILE(sw_GetSplitInfoData);
6318x_CheckValid(
"CWGSSeqIterator::GetSplitInfoData");
6320 if( x_InitSplit(
info) ) {
6322x_CreateSplit(
info);
6329TFlags
flags)
const 6331x_CheckValid(
"CWGSSeqIterator::GetChunk");
6334x_CreateChunk(
info, chunk_id);
6342x_CheckValid(
"CWGSSeqIterator::GetChunk");
6345x_CreateChunk(
info, chunk_id);
6351TFlags
flags)
const 6353x_CheckValid(
"CWGSSeqIterator::GetChunkData");
6357x_CreateChunk(
info, chunk_id);
6365x_CheckValid(
"CWGSSeqIterator::GetChunkData");
6369x_CreateChunk(
info, chunk_id);
6390m_CurrId = m_FirstGoodId = m_FirstBadId = 0;
6406 if(
this!= &iter ) {
6493 "CWGSScaffoldIterator::"<<method<<
"(): " 6494 "Invalid iterator state");
6501 if( !
m_Cur->m_ACCESSION ) {
6539 x_CheckValid(
"CWGSScaffoldIterator::GetRawGBState");
6543 if(
m_Cur->m_GB_STATE ) {
6555 if( !acc.
empty() ) {
6611 "CWGSScaffoldIterator::GetId("<<
flags<<
"): " 6612 "no valid id found: "<<
6620 PROFILE(sw___GetScaffoldIds);
6624ids.push_back(
id);
6631ids.push_back(
id);
6638ids.push_back(
id);
6646 x_CheckValid(
"CWGSScaffoldIterator::GetScaffoldName");
6667 if( ret->
Get().empty() ) {
6680 for(
size_t i= 0;
i< lens.
size(); ++
i) {
6698 x_CheckValid(
"CWGSScaffoldIterator::GetLocFeatRowIdRange");
6700 if( !
m_Cur->m_FEAT_ROW_START ) {
6704 if( start_val.
empty() ) {
6709 if( end < start ) {
6711 "CWGSScaffoldIterator::GetLocFeatRowIdRange: " 6712 "feature row range is invalid: "<<start<<
","<<end);
6737 if(
m_Cur->m_COMPONENT_LINKAGE ) {
6739 if( !linkages_val.
empty() ) {
6740 size_tgaps_count = 0;
6741 for(
size_t i= 0;
i< lens.
size(); ++
i) {
6748 if( linkages_val.
size() != gaps_count ) {
6750 "CWGSScaffoldIterator: inconsistent gap info");
6752linkages = linkages_val.
data();
6756 for(
size_t i= 0;
i< lens.
size(); ++
i) {
6769 if( start == 0 ||
len== 0 ) {
6771 "CWGSScaffoldIterator: component is bad for "+
6779interval.
SetId(*
GetDb().GetContigSeq_id(row_id));
6790 delta.push_back(seg);
6801 PROFILE(sw__GetScaffoldBioseq);
6803 info.x_SetSeq(*
this);
6804 if(
info.entry ) {
6806 info.entry->SetSeq(*
info.main_seq);
6810 PROFILE(sw___GetContigDescr);
6825 info.main_seq->SetDescr(*descr);
6836 PROFILE(sw_GetScaffoldEntry);
6859 return info.main_seq;
6911 if(
this!= &iter ) {
6927 x_Init(wgs_db, seq_type);
6934 x_Init(wgs_db, seq_type);
6969 if( (seq_type ==
eProt|| !
m_Cur->m_NUC_ROW_ID) &&
6970(seq_type ==
eNuc|| !
m_Cur->m_PROT_ROW_ID) ) {
6986 if( !
value.empty() ) {
6997 if( !
value.empty() ) {
7043: m_CurrId(0), m_FirstGoodId(0), m_FirstBadId(0)
7049: m_CurrId(0), m_FirstGoodId(0), m_FirstBadId(0)
7058 if(
this!= &iter ) {
7144 "CWGSProteinIterator::"<<method<<
"(): " 7145 "Invalid iterator state");
7160 if( !gi.
empty() ) {
7161 return s_ToGi(*gi,
"CWGSProteinIterator::GetGi()");
7172 if(
m_Cur0->m_GB_ACCESSION ) {
7183 PROFILE(sw____GetProtAccVer);
7192 PROFILE(sw____GetProtAccSeq_id);
7195 if( !acc.
empty() ) {
7219 PROFILE(sw____GetProtGISeq_id);
7254 "CWGSProteinIterator::GetId("<<
flags<<
"): " 7255 "no valid id found: "<<
7267ids.push_back(
id);
7274ids.push_back(
id);
7281ids.push_back(
id);
7289 x_CheckValid(
"CWGSProteinIterator::GetProteinName");
7296 x_CheckValid(
"CWGSProteinIterator::GetProductName");
7304 if(
GetDb().HasCommonTaxId() ) {
7308 return m_Cur->m_TAXID;
7315 if(
GetDb().HasCommonTaxId() ) {
7327 return m_Cur->m_HASH;
7349 return m_Cur->m_REF_ACC;
7388 if(
m_Cur->m_GB_STATE ) {
7397 x_CheckValid(
"CWGSProteinIterator::HasPublicComment");
7399 if( !
m_Cur->m_PUBLIC_COMMENT ) {
7408 x_CheckValid(
"CWGSProteinIterator::GetPublicComment");
7410 if( !
m_Cur->m_PUBLIC_COMMENT ) {
7429 if( !
m_Cur->m_TITLE ) {
7438 x_CheckValid(
"CWGSProteinIterator::GetLocFeatRowIdRange");
7440 if( !
m_Cur->m_FEAT_ROW_START ) {
7444 if( start_val.
empty() ) {
7449 if( end < start ) {
7451 "CWGSProteinIterator::GetLocFeatRowIdRange: " 7452 "feature row range is invalid: "<<start<<
","<<end);
7460 x_CheckValid(
"CWGSProteinIterator::GetProductFeatCount");
7462 if( !
m_Cur->m_FEAT_PRODUCT_ROW_ID ) {
7471 x_CheckValid(
"CWGSProteinIterator::GetProductFeatRowId");
7473 if( !
m_Cur->m_FEAT_PRODUCT_ROW_ID ) {
7482 x_CheckValid(
"CWGSProteinIterator::GetBestProductFeatRowId");
7484 if( !
m_Cur->m_FEAT_PRODUCT_ROW_ID ) {
7488 return row.empty()? 0:
row[
row.size()-1];
7494 x_CheckValid(
"CWGSProteinIterator::GetProductFeatRowId");
7496 if( !
m_Cur->m_FEAT_PRODUCT_ROW_ID ) {
7500 return row.empty()? 0: *
row;
7506 x_CheckValid(
"CWGSProteinIterator::GetReplacedByRowId");
7508 if(
m_Cur->m_REPLACED_BY ) {
7510 if( !
value.empty() ) {
7514 #ifdef TEST_ACC_VERSION 7525 x_CheckValid(
"CWGSProteinIterator::GetReplacesRowId");
7527 if(
m_Cur->m_REPLACES ) {
7529 if( !
value.empty() ) {
7533 #ifdef TEST_ACC_VERSION 7567 if(
m_Cur->m_DESCR ) {
7572 if( !title.
empty() ) {
7575ret->
Set().push_back(desc);
7582 if( ret->
Get().empty() ) {
7622 if( replaced_by_row || replaces_row ) {
7624 if( replaced_by_row ) {
7628 if( replaces_row ) {
7644 info.x_SetSeq(*
this);
7645 if(
info.entry ) {
7647 info.entry->SetSeq(*
info.main_seq);
7654 if(
m_Cur->m_DESCR ) {
7656 if( !descr.
empty() ) {
7657 info.x_AddDescr(*descr);
7664 info.main_seq->SetDescr(*descr);
7669 if( !
info.db->FeatTable() ) {
7673 if(
info.main_seq->GetAnnot().empty() ) {
7674 info.main_seq->ResetAnnot();
7699 return info.main_seq;
7753 if(
this!= &iter ) {
7815row_range.first+row_range.second);
7841 "CWGSFeatureIterator::"<<method<<
"(): " 7842 "Invalid iterator state");
7855 x_CheckValid(
"CWGSFeatureIterator::GetProductSeqType");
7869 x_CheckValid(
"CWGSFeatureIterator::GetProductRowId");
7870 if( !
m_Cur->m_PRODUCT_ROW_ID ) {
7874 return row.empty()? 0: *
row;
#define READ(buf, off, bytes, endian)
CAsnBinData(CSerialObject &obj)
virtual void Serialize(CObjectOStreamAsnBinary &out) const
CRef< CSerialObject > m_MainObject
virtual ~CAsnBinData(void)
const TInfoMap & info_map
CWGSAsnBinData::TDescrInfo TInfo
map< TKey, TInfo > TInfoMap
virtual void WriteClassMember(CObjectOStream &out, const CConstObjectInfoMI &member)
CDescrWriteHook(const TInfoMap &info_map)
virtual void WriteChoiceVariant(CObjectOStream &out, const CConstObjectInfoCV &variant)
map< TKey, TInfo > TInfoMap
const CSeq_annot::TData::TFtable * TKey
CWGSAsnBinData::SFtableInfo TInfo
const TInfoMap & info_map
CFtableWriteHook(const TInfoMap &info_map)
void Release()
Manually force the resource to be released.
CID2S_Bioseq_place_Info â.
CID2S_Feat_type_Info â.
CID2S_Seq_annot_Info â.
CID2S_Seq_id_Interval â.
CID2S_Sequence_Piece â.
void GetData(char *buffer, size_t size, size_t offset=0) const
size_t GetSize(void) const
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Writing containers (SET OF, SEQUENCE OF).
Helper class: installs hooks in constructor, and uninstalls in destructor.
CObjectIStreamAsnBinary â.
CObjectOStreamAsnBinary â.
@Seq_descr.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
Base class for all serializable objects.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Template class for iteration on objects of class C (non-medifiable version)
uint32_t GetElementCount(TVDBRowId row, const CVDBColumn &column, uint32_t elem_bits) const
void ReadElements(TVDBRowId row, const CVDBColumn &column, uint32_t elem_bits, uint32_t start, uint32_t count, void *buffer) const
TVDBRowIdRange GetRowIdRange(TVDBColumnIdx column=0) const
TVDBRowIdRange Find(const string &value) const
const TValue * data() const
const_iterator begin() const
static bool IsPlainAccession(const string &acc_or_path)
void AddFeature(TFtable &ftable, const CTempString &data)
vector< char > TDescrInfo
void AddDescr(CBioseq &seq, const CTempString &data)
CWGSAsnBinData(CSerialObject &obj)
virtual void Serialize(CObjectOStreamAsnBinary &out) const
CSeq_annot::TData::TFtable TFtable
map< const TFtable *, SFtableInfo > TFtableMap
virtual ~CWGSAsnBinData(void)
map< const CBioseq *, TDescrInfo > TDescrMap
CRef< CSeq_descr > m_EmptyDescr
CVDBTableIndex m_ProteinNameIndex
CRef< CSeq_id > GetGeneralOrPatentSeq_id(CTempString str, TVDBRowId row, TGnlIdFlags gnl_id_flags=fGnlId_Default) const
CVDBObjectCache< SProtIdxTableCursor > m_ProtIdx
CRef< SSeqTableCursor > Seq(TVDBRowId row=0)
CVDBObjectCache< SProt0TableCursor > m_Prot0
atomic< bool > m_GiIdxTableIsOpened
NCBI_gb_state m_ProjectGBState
const CVDBTableIndex & ContigNameIndex(void)
const CVDBTableIndex & ProductNameIndex(void)
pair< TGi, TGi > GetNucGiRange(void)
CRef< CSeq_id > GetGeneralSeq_id(CTempString prefix, CTempString tag) const
bool LoadMasterDescr(int filter)
const CVDBTable & GiIdxTable(void)
COpenRange< TIntId > TGiRange
EFeatLocIdType GetFeatLocIdType()
CRef< CSeq_entry > GetMasterDescrEntry(void)
atomic< bool > m_FeatTableIsOpened
CVDBTableIndex m_ScaffoldNameIndex
atomic< bool > m_ScfTableIsOpened
string m_IdPrefixDbWithVersion
CRef< SSeq0TableCursor > Seq0(TVDBRowId row=0)
CRef< SProtTableCursor > Prot(TVDBRowId row=0)
bool HasCommonTaxId(void) const
CRef< CSeq_id > GetAccSeq_id(CTempString acc, int version) const
CRef< SGiIdxTableCursor > GiIdx(TVDBRowId row=0)
void ResetMasterDescr(void)
TAmbiguityCache m_AmbiguityCache
TVDBRowId GetNucGiRowId(TGi gi)
const CVDBTableIndex & ScaffoldNameIndex(void)
CRef< CSeq_entry > GetMasterSeq_entry(void) const
void SetMasterDescr(const TMasterDescr &descr, int filter)
void OpenScaffoldNameIndex(void)
CRef< SAmbiguityInfo > GetAmbiguityInfo(TVDBRowId row)
TTaxId GetCommonTaxId(void) const
void PutAmbiguityInfo(CRef< SAmbiguityInfo > &ambiguity)
pair< TVDBRowId, bool > GetGiRowId(TGi gi)
TVDBRowCount GetTotalFeatureCount()
void x_SortGiRanges(TGiRanges &ranges)
CRef< SProtIdxTableCursor > ProtIdx(TVDBRowId row=0)
void OpenGiIdxTable(void)
EFeatLocIdType DetermineFeatLocIdType()
const CVDBTable & ScfTable(void)
static pair< TVDBRowId, ERowType > ParseRowType(CTempString acc, TAllowRowType allow)
void AddMasterDescr(CSeq_descr &descr, const CBioseq *main_seq=0, TFlags flags=fDefaultFlags) const
CVDBTableIndex m_ProtAccIndex
TMasterDescr m_MasterDescr
CRef< CSeq_entry > m_MasterEntry
void OpenIndex(const CVDBTable &table, CVDBTableIndex &index, atomic< Int1 > &index_is_opened, const char *index_name, const char *backup_index_name=0)
void OpenProteinNameIndex(void)
TVDBRowId GetScaffoldNameRowId(const string &name)
TVDBRowId GetContigNameRowId(const string &name)
CRef< CSeq_id > GetProteinSeq_id(TVDBRowId row_id) const
CVDBObjectCache< SSeqTableCursor > m_Seq
CSeq_inst::TMol GetScaffoldMolType(void) const
atomic< Int1 > m_ScaffoldNameIndexIsOpened
atomic< bool > m_ProtIdxTableIsOpened
NCBI_gb_state GetProjectGBState() const
CRef< SProt0TableCursor > Prot0(TVDBRowId row=0)
TVDBRowId GetProductNameRowId(const string &name)
CRef< CSeq_id > GetMasterSeq_id(void) const
TVDBRowId GetProtAccRowId(const string &acc, int version=-1)
CVDBObjectCache< SProtTableCursor > m_Prot
CRef< SFeatTableCursor > Feat(TVDBRowId row=0)
TVDBRowId GetProteinNameRowId(const string &name)
atomic< Int1 > m_ProductNameIndexIsOpened
vector< TGiRange > TGiRanges
TVDBRowId Lookup(const string &name, const CVDBTableIndex &index, bool upcase)
CRef< CSeq_id > GetPatentSeq_id(int id) const
virtual ~CWGSDb_Impl(void)
void OpenProtIdxTable(void)
list< CRef< CSeqdesc > > TMasterDescr
TProtAccRanges GetProtAccRanges(void)
CFastMutex m_AmbiguityCacheMutex
static string NormalizePathOrAccession(CTempString path_or_acc, CTempString vol_path=CTempString())
const CVDBTableIndex & ProteinNameIndex(void)
TGiRanges GetProtGiRanges(void)
TVDBRowId ParseRow(CTempString acc, bool *is_scaffold) const
TGiRanges GetNucGiRanges(void)
CVDBObjectCache< SScfTableCursor > m_Scf
void Put(CRef< SSeq0TableCursor > &curs, TVDBRowId row=0)
CRef< SSeq4naTableCursor > Seq4na(TVDBRowId row=0)
bool IsSetMasterDescr(void) const
CVDBTableIndex m_ContigNameIndex
void OpenProtAccIndex(void)
CVDBObjectCache< SSeq0TableCursor > m_Seq0
CSeq_inst::TMol GetContigMolType(void) const
void x_LoadMasterDescr(int filter)
CRef< CSeq_id > m_PatentId
void SetPatentId(CRef< CSeq_id > id)
atomic< bool > m_ProtTableIsOpened
TGi GetMasterGi(void) const
CRef< SScfTableCursor > Scf(TVDBRowId row=0)
const CVDBTable & FeatTable(void)
const CVDBTable & SeqTable(void)
CRef< CSeq_id > GetContigSeq_id(TVDBRowId row_id) const
pair< TGi, TGi > GetProtGiRange(void)
CSeq_inst::TMol m_ContigMolType
CWGSDb_Impl(CVDBMgr &mgr, CTempString path_or_acc, CTempString vol_path=CTempString())
string m_IdPrefixWithVersion
size_t GetMasterDescrBytes(TMasterDescrBytes &buffer)
void OpenProductNameIndex(void)
atomic< Int1 > m_ProtAccIndexIsOpened
CSeq_id::E_Choice m_SeqIdType
const TMasterDescr & GetMasterDescr(void) const
atomic< EFeatLocIdType > m_FeatLocIdType
CVDBTableIndex m_ProductNameIndex
CVDBObjectCache< SFeatTableCursor > m_Feat
void OpenContigNameIndex(void)
const CVDBTable & ProtIdxTable(void)
const CVDBTable & ProtTable(void)
bool HasStandardFeatLocIdType()
CSeq_inst::TMol GetProteinMolType(void) const
CVDBObjectCache< SGiIdxTableCursor > m_GiIdx
atomic< Int1 > m_ProteinNameIndexIsOpened
void OpenTable(CVDBTable &table, atomic< bool > &table_is_opened, const char *table_name)
CRef< CSeq_id > GetScaffoldSeq_id(TVDBRowId row_id) const
void x_InitIdParams(void)
TVDBRowId GetProtGiRowId(TGi gi)
const string & GetWGSPath(void) const
atomic< Int1 > m_ContigNameIndexIsOpened
static TVDBRowId ParseProteinRow(CTempString acc)
static TVDBRowId ParseScaffoldRow(CTempString acc)
static EDescrType GetMasterDescrType(const CSeqdesc &desc)
static TVDBRowId ParseContigRow(CTempString acc)
CRange< TSeqPos > GetLocRange(void) const
CWGSDb_Impl & GetDb(void) const
TVDBRowId GetLocRowId(void) const
NCBI_WGS_seqtype GetProductSeqType(void) const
NCBI_WGS_feattype GetFeatType(void) const
TVDBRowId GetProductRowId(void) const
TSeqPos GetLocLength(void) const
CRef< CSeq_feat > GetSeq_feat() const
CWGSFeatureIterator & SelectRow(TVDBRowId row)
CTempString GetSeq_featBytes(void) const
CWGSFeatureIterator & operator=(const CWGSFeatureIterator &iter)
void x_ReportInvalid(const char *method) const
CWGSFeatureIterator & SelectRowRange(TVDBRowIdRange row_range)
NCBI_WGS_seqtype GetLocSeqType(void) const
TSeqPos GetLocStart(void) const
CWGSFeatureIterator(void)
CRef< CWGSDb_Impl::SFeatTableCursor > m_Cur
void x_Init(const CWGSDb &wgs_db)
~CWGSFeatureIterator(void)
void x_CheckValid(const char *method) const
void x_Init(const CWGSDb &wgs_db, ESeqType seq_type)
CRef< CWGSDb_Impl::SGiIdxTableCursor > m_Cur
CWGSDb_Impl & GetDb(void) const
CWGSGiIterator & operator=(const CWGSGiIterator &iter)
CRef< CSeq_id > GetAccSeq_id(void) const
CBioseq::TAnnot TAnnotSet
TVDBRowId GetReplacesRowId(void) const
void x_CreateEntry(SWGSCreateInfo &info) const
CTempString GetPublicComment(void) const
CWGSProteinIterator & SelectRow(TVDBRowId row)
CRef< CBioseq > GetBioseq(TFlags flags=fDefaultFlags) const
CWGSProteinIterator(void)
CRef< CSeq_id > GetGiSeq_id(void) const
NCBI_gb_state GetGBState(void) const
size_t GetProductFeatCount(void) const
bool HasTaxId(void) const
TVDBRowId GetBestProductFeatRowId(void) const
TSeqPos GetSeqLength(void) const
CWGSDb_Impl & GetDb(void) const
bool HasTitle(void) const
NCBI_gb_state GetRawGBState(void) const
TVDBRowIdRange GetLocFeatRowIdRange(void) const
TVDBRowId GetProductFeatRowId(void) const
CWGSProteinIterator & operator=(const CWGSProteinIterator &iter)
CRef< CSeq_inst > GetSeq_inst(TFlags flags=fDefaultFlags) const
CSeq_id::TGi GetGi(void) const
THash GetSeqHash(void) const
bool HasAnnotSet(void) const
void x_CheckValid(const char *method) const
CRef< CWGSDb_Impl::SProtTableCursor > m_Cur
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
int GetAccVersion(void) const
void x_CreateBioseq(SWGSCreateInfo &info) const
CTempString GetTitle(void) const
void GetIds(CBioseq::TId &ids, TFlags flags=fDefaultFlags) const
~CWGSProteinIterator(void)
void x_Init(const CWGSDb &wgs_db)
CTempString GetAccession(void) const
CRef< CSeq_id > GetGeneralOrPatentSeq_id(void) const
CRef< CSeq_id > GetId(TFlags flags=fDefaultFlags) const
bool HasPublicComment(void) const
CRef< CSeq_id > GetGeneralSeq_id(void) const
bool HasSeqHash(void) const
void GetAnnotSet(TAnnotSet &annot_set, TFlags flags=fDefaultFlags) const
CTempString GetRefAcc(void) const
TTaxId GetTaxId(void) const
CTempString GetProductName(void) const
void x_ReportInvalid(const char *method) const
CTempString GetProteinName(void) const
CRef< CWGSDb_Impl::SProt0TableCursor > m_Cur0
CRef< CSeq_descr > GetSeq_descr(TFlags flags=fDefaultFlags) const
TVDBRowId GetReplacedByRowId(void) const
bool HasRefAcc(void) const
bool HasSeq_descr(TFlags flags=fDefaultFlags) const
void x_Init(const CWGSDb &wgs_db)
void x_ReportInvalid(const char *method) const
CWGSScaffoldIterator(void)
bool HasSeq_descr(TFlags flags=fDefaultFlags) const
void x_CreateEntry(SWGSCreateInfo &info) const
CRef< CSeq_id > GetAccSeq_id(void) const
CWGSScaffoldIterator & SelectRow(TVDBRowId row)
CTempString GetScaffoldName(void) const
void x_CreateBioseq(SWGSCreateInfo &info) const
TVDBRowIdRange GetLocFeatRowIdRange(void) const
CRef< CSeq_id > GetGeneralOrPatentSeq_id(void) const
bool IsCircular(void) const
CRef< CSeq_descr > GetSeq_descr(TFlags flags=fDefaultFlags) const
NCBI_gb_state GetRawGBState(void) const
CRef< CBioseq > GetBioseq(TFlags flags=fDefaultFlags) const
void GetIds(CBioseq::TId &ids, TFlags flags=fDefaultFlags) const
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
CRef< CSeq_inst > GetSeq_inst(TFlags flags=fDefaultFlags) const
CWGSScaffoldIterator & operator=(const CWGSScaffoldIterator &iter)
CRef< CWGSDb_Impl::SScfTableCursor > m_Cur
int GetAccVersion(void) const
CRef< CSeq_id > GetGeneralSeq_id(void) const
TSeqPos GetSeqLength(void) const
CRef< CSeq_id > GetId(TFlags flags=fDefaultFlags) const
NCBI_gb_state GetGBState(void) const
void x_CheckValid(const char *method) const
~CWGSScaffoldIterator(void)
CWGSDb_Impl & GetDb(void) const
CTempString GetAccession(void) const
CRef< CSeq_id > GetGiSeq_id(void) const
void x_CreateChunk(SWGSCreateInfo &info, TChunkId chunk_id) const
CRef< CAsnBinData > GetSeq_entryData(TFlags flags=fDefaultFlags) const
CRef< CSeq_data > Get4na(TSeqPos pos, TSeqPos len) const
void x_Select(const CWGSDb &wgs_db, TIncludeFlags include_flags, EClipType clip_type)
void x_Init(const CWGSDb &wgs_db, TIncludeFlags include_flags, EClipType clip_type, TVDBRowId get_row)
bool x_Excluded(void) const
void x_CreateSplit(SWGSCreateInfo &info) const
CRef< CSeq_descr > GetSeq_descr(TFlags flags=fDefaultFlags) const
void x_SetDelta(CSeq_inst &inst, const TSegments &segments) const
CTempString GetTitle(void) const
bool CanHaveQualityGraph(void) const
TIncludeFlags m_IncludeFlags
CTempString GetPublicComment(void) const
bool x_InitSplit(SWGSCreateInfo &info) const
CRef< CAsnBinData > GetChunkDataForVersion(TChunkId chunk_id, TSplitVersion split_version) const
CTempString GetContigName(void) const
CWGSSeqIterator & SelectRow(TVDBRowId row)
CRef< CSeq_id > GetGeneralOrPatentSeq_id(void) const
TSeqPos GetRawSeqLength(void) const
CWGSSeqIterator & operator++(void)
CRef< CID2S_Chunk > GetChunkForVersion(TChunkId chunk_id, TSplitVersion split_version) const
CRef< CAsnBinData > GetChunkData(TChunkId chunk_id, TFlags flags=fDefaultFlags) const
SAmbiguityAccess GetAmbiguity() const
void GetQualityAnnot(TAnnotSet &annot_set, TFlags flags=fDefaultFlags) const
TTaxId GetTaxId(void) const
CTempString GetAccession(void) const
TVDBRowIdRange GetLocFeatRowIdRange(void) const
void x_AddQualityChunkInfo(SWGSCreateInfo &info) const
bool IsCircular(void) const
CRef< CSeq_id > GetGeneralSeq_id(void) const
bool HasQualityGraph(void) const
bool HasClippingInfo(void) const
CRef< CAsnBinData > GetSplitInfoData(TFlags flags=fDefaultFlags) const
TSeqPos GetClipQualityLength(void) const
string GetQualityAnnotName(void) const
NCBI_gb_state GetRawGBState(void) const
COpenRange< TSeqPos > x_NormalizeSeqRange(COpenRange< TSeqPos > range) const
void x_ReportInvalid(const char *method) const
void x_GetSegmentsWithRecoveredGaps(TSegments &segments, COpenRange< TSeqPos > range) const
bool HasPublicComment(void) const
CTempString GetNucProtDescrBytes(void) const
CSeq_id::TGi GetGi(void) const
void SelectAccVersion(int version)
void x_CreateBioseq(SWGSCreateInfo &info) const
bool HasGapInfo(void) const
CWGSSeqIterator & operator=(const CWGSSeqIterator &iter)
bool HasTitle(void) const
void GetQualityVec(vector< INSDC_quality_phred > &quality_vec) const
CRef< CSeq_inst > x_GetSeq_inst(SWGSCreateInfo &info) const
CRef< CSeq_id > GetGiSeq_id(void) const
void x_CreateProductsChunk(SWGSCreateInfo &info, unsigned index) const
CRef< CWGSDb_Impl::SSeq0TableCursor > m_Cur0
bool HasAnnotSet(void) const
CRef< CSeq_id > GetAccSeq_id(void) const
int GetAccVersion(void) const
bool HasSeqHash(void) const
CTempString GetAnnotBytes(void) const
THash GetSeqHash(void) const
SVersionSelector x_GetAccVersionSelector(int version) const
CRef< CID2S_Chunk > GetChunk(TChunkId chunk_id, TFlags flags=fDefaultFlags) const
pair< CRef< CAsnBinData >, TSplitVersion > GetSplitInfoDataAndVersion(TFlags flags=fDefaultFlags) const
void x_CreateDataChunk(SWGSCreateInfo &info, unsigned index) const
void x_GetSegmentsWithExplicitGaps(TSegments &data, COpenRange< TSeqPos > range, TWGSContigGapInfo gap_info, TInstSegmentFlags flags) const
void x_CreateEntry(SWGSCreateInfo &info) const
void x_GetQualityAnnot(TAnnotSet &annot_set, SWGSCreateInfo &info, TSeqPos pos=0, TSeqPos len=kInvalidSeqPos) const
bool HasSeq_descr(TFlags flags=fDefaultFlags) const
void GetGapInfo(TWGSContigGapInfo &gap_info) const
void x_CreateQualityChunk(SWGSCreateInfo &info, unsigned index) const
CRef< CSeq_data > Get2na(TSeqPos pos, TSeqPos len) const
bool HasTaxId(void) const
CBioseq::TAnnot TAnnotSet
struct CWGSSeqIterator::SWGSContigGapInfo TWGSContigGapInfo
TSeqPos GetSeqLength(EClipType clip_type=eDefaultClip) const
CRef< CBioseq > GetBioseq(TFlags flags=fDefaultFlags) const
CRef< CWGSDb_Impl::SAmbiguityInfo > m_AmbiguityInfo
SVersionSelector m_AccVersion
TSeqPos x_GetQualityArraySize(void) const
CRef< CWGSDb_Impl::SSeqTableCursor > m_Cur
void x_SetDeltaOrData(CSeq_inst &inst, const TSegments &segments) const
TSeqPos GetClipQualityLeft(void) const
pair< CRef< CID2S_Split_Info >, TSplitVersion > GetSplitInfoAndVersion(TFlags flags=fDefaultFlags) const
CRef< CID2S_Split_Info > GetSplitInfo(TFlags flags=fDefaultFlags) const
bool HasAccVersion(int version) const
unsigned GetAccVersionCount(void) const
void GetAnnotSet(TAnnotSet &annot_set, TFlags flags=fDefaultFlags) const
CRef< CSeq_inst > GetSeq_inst(TFlags flags=fDefaultFlags) const
vector< Uint1 > GetAmbiguityBytes() const
bool HasNucProtDescrBytes(void) const
CTempString GetSeqDescrBytes(void) const
int GetLatestAccVersion(void) const
vector< SSegment > TSegments
TSeqPos GetSeqOffset(EClipType clip_type=eDefaultClip) const
void GetIds(CBioseq::TId &ids, TFlags flags=fDefaultFlags) const
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
bool HasSeqDescrBytes(void) const
void x_AddGap(TSegments &segments, TSeqPos pos, TSeqPos len, const TWGSContigGapInfo &gap_info) const
void x_CreateFeaturesChunk(SWGSCreateInfo &info, unsigned index) const
bool GetClipByQualityFlag(EClipType clip_type=eDefaultClip) const
void x_CheckValid(const char *method) const
CRef< CSeq_id > GetId(TFlags flags=fDefaultFlags) const
NCBI_gb_state GetGBState(void) const
CWGSDb_Impl & GetDb(void) const
Write hook for a choice variant (CHOICE)
Write hook for data member of a containing object (eg, SEQUENCE)
void Cleanup(TGlobalAmbiguityCache &)
TGlobalAmbiguityCache * Create()
void put(const key_type &key, const mapped_type &value, const resource_type &resource_used)
mapped_type get(const key_type &key)
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
iterator_bool insert(const value_type &val)
container_type::value_type value_type
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
The NCBI C++ standard methods for dealing with std::string.
int GetSeqLength(const CBioseq &bioseq)
static vector< string > arr
std::ofstream out("events_result.xml")
main entry point for tests
static const char table_name[]
static const char * str(char *buf, int n)
#define GI_FROM(T, value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define TAX_ID_FROM(T, value)
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
static void SetErrno(int errno_code)
Set last error using errno code.
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
const TPrim & Get(void) const
C * SerialClone(const C &src)
Create on heap a clone of the source object.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
CObjectTypeInfo GetPointedType(void) const
Get type information of data to which this type refers.
CObjectTypeInfo GetMemberType(void) const
Get data type information.
const CConstObjectInfo & GetClassObject(void) const
Get containing class data.
void DefaultWrite(CObjectOStream &out, const CConstObjectInfoCV &variant)
CConstObjectInfo GetVariant(void) const
Get variant data.
ETypeFamily GetTypeFamily(void) const
Get data type family.
TConstObjectPtr GetObjectPtr(void) const
Get pointer to object.
void DefaultWrite(CObjectOStream &out, const CConstObjectInfoMI &member)
void OpenFromBuffer(const char *buffer, size_t size)
Attach reader to a data source.
Uint8 TCount
Alias for value type of counter.
TObjectType * GetNCPointer(void) const THROWS_NONE
Get pointer,.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
TObjectType & GetNCObject(void) const
Get object.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
position_type GetLength(void) const
TThisType & SetFrom(position_type from)
TThisType & SetToOpen(position_type toOpen)
position_type GetToOpen(void) const
position_type GetFrom(void) const
TThisType & SetLength(position_type length)
NCBI_NS_STD::string::size_type SIZE_TYPE
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static Uint8 StringToUInt8_DataSize(const CTempString str, TStringToNumFlags flags=0)
Convert string that can contain "software" qualifiers to Uint8.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool IsUpper(const CTempString str)
Checks if all letters in the given string have a upper case.
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ToUpper(string &str)
Convert string to upper case â string& version.
size_type size(void) const
Return the length of the represented array.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fWithCommas
Use commas as thousands separator.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Stop(void)
Suspend the timer.
void Start(void)
Start the timer.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
TStr & SetStr(void)
Select the variant.
const TData & GetData(void) const
Get the Data member data.
void SetType(TType &value)
Assign a value to Type data member.
const TType & GetType(void) const
Get the Type member data.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId & SetId(void)
Select the variant.
vector< CRef< CUser_field > > TData
TGi & SetGi(void)
Select the variant.
void SetName(const TName &value)
Assign a value to Name data member.
void SetStart(TStart value)
Assign a value to Start data member.
list< CRef< C_E > > Tdata
TSeq_id_interval & SetSeq_id_interval(void)
Select the variant.
list< CRef< CID2S_Seq_loc > > TLoc_set
TSeq_id & SetSeq_id(void)
Select the variant.
TWhole_gi_range & SetWhole_gi_range(void)
Select the variant.
TGi_interval & SetGi_interval(void)
Select the variant.
TSeq_id & SetSeq_id(void)
Select the variant.
void SetGi(TGi value)
Assign a value to Gi data member.
void SetSeq_id(TSeq_id &value)
Assign a value to Seq_id data member.
void SetSeq_loc(TSeq_loc &value)
Assign a value to Seq_loc data member.
TGi & SetGi(void)
Select the variant.
void SetCount(TCount value)
Assign a value to Count data member.
list< CRef< CBioseq > > TBioseqs
void SetStart(TStart value)
Assign a value to Start data member.
void SetStart(TStart value)
Assign a value to Start data member.
TWhole_gi & SetWhole_gi(void)
Select the variant.
TFeat & SetFeat(void)
Assign a value to Feat data member.
TContent & SetContent(void)
Assign a value to Content data member.
TWhole_seq_id & SetWhole_seq_id(void)
Select the variant.
void SetSeq_id(TSeq_id &value)
Assign a value to Seq_id data member.
void SetGraph(void)
Set NULL data member (assign 'NULL' value to Graph data member).
void SetLength(TLength value)
Assign a value to Length data member.
void ResetSeq_loc(void)
Reset Seq_loc data member.
TInts & SetInts(void)
Assign a value to Ints data member.
void SetLength(TLength value)
Assign a value to Length data member.
const TLocation & GetLocation(void) const
Get the Location member data.
E_Choice
Choice variants.
@ e_not_set
No variant selected.
@ e_MaxChoice
== e_Variation+1
void SetSeqid(TSeqid value)
Assign a value to Seqid data member.
void SetTo(TTo value)
Assign a value to To data member.
TPatent & SetPatent(void)
Select the variant.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
TGi GetGi(void) const
Get the variant data.
TGi & SetGi(void)
Select the variant.
E_Choice
Choice variants.
bool IsGi(void) const
Check if variant Gi is selected.
void SetStrand(TStrand value)
Assign a value to Strand data member.
void SetCit(TCit &value)
Assign a value to Cit data member.
@ e_not_set
No variant selected.
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetNumval(TNumval value)
Assign a value to Numval data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
const TSeq & GetSeq(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
bool IsSeq(void) const
Check if variant Seq is selected.
void SetId(TId &value)
Assign a value to Id data member.
bool IsSet(void) const
Check if variant Set is selected.
void SetClass(TClass value)
Assign a value to Class data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
@ e_not_set
No variant selected.
void ResetStrand(void)
Reset Strand data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetData(TData &value)
Assign a value to Data data member.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
const TUser & GetUser(void) const
Get the variant data.
TTitle & SetTitle(void)
Select the variant.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
void SetExt(TExt &value)
Assign a value to Ext data member.
void SetHist(THist &value)
Assign a value to Hist data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TId & GetId(void) const
Get the Id member data.
TName & SetName(void)
Select the variant.
const Tdata & Get(void) const
Get the member data.
void SetType(TType value)
Assign a value to Type data member.
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
void SetReplaces(TReplaces &value)
Assign a value to Replaces data member.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
void SetInst(TInst &value)
Assign a value to Inst data member.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
void SetTopology(TTopology value)
Assign a value to Topology data member.
EMol
molecule class in living organism
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
TUser & SetUser(void)
Select the variant.
void SetRepr(TRepr value)
Assign a value to Repr data member.
TNcbi2na & SetNcbi2na(void)
Select the variant.
list< CRef< CSeq_feat > > TFtable
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
void SetReplaced_by(TReplaced_by &value)
Assign a value to Replaced_by data member.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_annot > > TAnnot
void SetLength(TLength value)
Assign a value to Length data member.
list< CRef< CDelta_seq > > Tdata
void SetStrand(TStrand value)
Assign a value to Strand data member.
TLinkage_evidence & SetLinkage_evidence(void)
Assign a value to Linkage_evidence data member.
void SetLinkage(TLinkage value)
Assign a value to Linkage data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void ResetExt(void)
Reset Ext data member.
TNcbi4na & SetNcbi4na(void)
Select the variant.
const TDescr & GetDescr(void) const
Get the Descr member data.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
void SetMol(TMol value)
Assign a value to Mol data member.
bool IsUser(void) const
Check if variant User is selected.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eRepr_virtual
no seq data
@ e_Embl
EMBL specific information.
@ e_User
user defined object
@ e_Update_date
date of last update
@ e_Pub
a reference to the publication
@ e_Genbank
GenBank specific info.
@ e_Comment
a more extensive comment
@ e_Molinfo
info on the molecule and techniques
@ e_Create_date
date entry first created/released
@ e_Source
source of materials, includes Org-ref
@ eStrand_ds
double strand
unsigned int
A callback function used to compare two keys in a database.
Definition of all error codes used in SRA C++ support libraries.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
Lightweight interface for getting lines of data with minimal memory copying.
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
const string version
version string
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
string s_Value(TValue value)
const GenericPointer< typename T::ValueType > T2 value
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
static bool GetIds(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define row(bind, expected)
void AddFeature(const CTempString &data)
char m_Packed4na[kAmbiguityBlockSize/2]
T4naBlocks::const_iterator m_4naBlocksIter
EBaseType GetBaseType(const S4naReader &reader) const
CWGSDb_Impl::SSeqTableCursor SSeqTableCursor
void x_CalculateAmbiguityMask(CWGSDb_Impl &db)
SAmbiguityInfo(TVDBRowId row_id, CWGSDb_Impl &db, SSeqTableCursor &cur)
bool x_AmbiguousBlock(size_t block_index) const
bool x_IsValid(const S4naReader &reader) const
vector< INSDC_coord_len > m_GapLen
vector< NCBI_WGS_component_props > m_GapProps
bool x_AddAmbiguousBlock(const Uint1 *ptr, TSeqPos count, TSeqPos pos, TWGSContigGapInfo &gap_info) const
bool x_AddAmbiguities(const Uint1 *ptr, TSeqPos count, TSeqPos pos, TWGSContigGapInfo &gap_info) const
vector< INSDC_coord_zero > m_GapStart
vector< Uint1 > GetAmbiguityBytes(SSeqTableCursor &cur)
void Advance(S4naReader &reader) const
TWGSContigGapInfo GetGapInfo() const
void x_SetAmbiguousBlock(size_t block_index)
map< TSeqPos, S4naBlock > T4naBlocks
CWGSSeqIterator::TWGSContigGapInfo TWGSContigGapInfo
TSeqPos Get2naLengthExact(TSeqPos pos, TSeqPos len, CWGSDb_Impl &db, SSeqTableCursor &cur) const
CRef< CSeq_data > Get4na(TSeqPos pos, TSeqPos len, CWGSDb_Impl &db, SSeqTableCursor &cur) const
void x_Calculate4na(CWGSDb_Impl &db) const
S4naReader Get4naReader(TSeqPos pos, CWGSDb_Impl &db, SSeqTableCursor &cur) const
vector< INSDC_4na_bin > m_Ambiguity4na
vector< Uint1 > m_AmbiguityMask
CRef< CSeq_data > Get2na(TSeqPos pos, TSeqPos len, SSeqTableCursor &cur) const
TSeqPos Get4naLengthBlock(TSeqPos pos, TSeqPos len) const
TSeqPos Get2naLengthBlock(TSeqPos pos, TSeqPos len) const
vector< NCBI_WGS_gap_linkage > m_GapLinkage
TSeqPos GetGapLengthExact(TSeqPos pos, TSeqPos len, CWGSDb_Impl &db, SSeqTableCursor &cur) const
vector< INSDC_coord_zero > m_AmbiguityPos
void x_Need4na(CWGSDb_Impl &db) const
size_t GetUsedMemory() const
TSeqPos Get4naLengthExact(TSeqPos pos, TSeqPos len, TSeqPos stop_2na_len, TSeqPos stop_gap_len, CWGSDb_Impl &db, SSeqTableCursor &cur) const
SFeatTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(TVDBRowId, PRODUCT_ROW_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, PRODUCT_START)
CObjectIStreamAsnBinary m_ObjStr
DECLARE_VDB_COLUMN_AS_STRING(LOC_ACCESSION)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_feattype, FEAT_TYPE)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_seqtype, LOC_SEQ_TYPE)
DECLARE_VDB_COLUMN_AS_STRING(PRODUCT_ACCESSION)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_seqtype, PRODUCT_SEQ_TYPE)
DECLARE_VDB_COLUMN_AS(TVDBRowId, LOC_ROW_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, LOC_START)
DECLARE_VDB_COLUMN_AS_STRING(SEQ_FEAT)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_loc_strand, LOC_STRAND)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, LOC_LEN)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, PRODUCT_LEN)
SGiIdxTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(TVDBRowId, PROT_ROW_ID)
DECLARE_VDB_COLUMN_AS(TVDBRowId, NUC_ROW_ID)
SProt0TableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(NCBI_gi, GI)
DECLARE_VDB_COLUMN_AS_STRING(GB_ACCESSION)
DECLARE_VDB_COLUMN_AS(uint32_t, ACC_VERSION)
DECLARE_VDB_COLUMN_AS_STRING(SEQID_GNL_PREFIX)
DECLARE_VDB_COLUMN_AS_STRING(PROTEIN_NAME)
string GetAcc(Uint4 id) const
pair< TVDBRowId, TVDBRowId > row_range_t
DECLARE_VDB_COLUMN_AS(row_range_t, NAME_ROW_RANGE)
SProtIdxTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(TVDBRowId, ROW_ID)
DECLARE_VDB_COLUMN_AS_STRING(PUBLIC_COMMENT)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_START)
DECLARE_VDB_COLUMN_AS(TVDBRowId, REPLACED_BY)
SProtTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS_STRING(DESCR)
DECLARE_VDB_COLUMN_AS_STRING(PROTEIN)
DECLARE_VDB_COLUMN_AS(NCBI_taxid, TAXID)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_END)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_PRODUCT_ROW_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, PROTEIN_LEN)
DECLARE_VDB_COLUMN_AS(NCBI_gb_state, GB_STATE)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_hash, HASH)
DECLARE_VDB_COLUMN_AS_STRING(PRODUCT_NAME)
DECLARE_VDB_COLUMN_AS_STRING(TITLE)
DECLARE_VDB_COLUMN_AS_STRING(REF_ACC)
DECLARE_VDB_COLUMN_AS(TVDBRowId, REPLACES)
DECLARE_VDB_COLUMN_AS_STRING(ANNOT)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_component_props, COMPONENT_PROPS)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_START)
SScfTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_gap_linkage, COMPONENT_LINKAGE)
DECLARE_VDB_COLUMN_AS_STRING(ACCESSION)
DECLARE_VDB_COLUMN_AS(TVDBRowId, COMPONENT_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, COMPONENT_LEN)
DECLARE_VDB_COLUMN_AS(NCBI_gb_state, GB_STATE)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_PRODUCT_ROW_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_one, COMPONENT_START)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_END)
DECLARE_VDB_COLUMN_AS_STRING(SCAFFOLD_NAME)
DECLARE_VDB_COLUMN_AS(bool, CIRCULAR)
DECLARE_VDB_COLUMN_AS_STRING(SEQID_GNL_PREFIX)
SSeq0TableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(NCBI_taxid, TAXID)
DECLARE_VDB_COLUMN_AS_STRING(SEQID_GNL_PREFIX)
DECLARE_VDB_COLUMN_AS(Uint1, MOL)
DECLARE_VDB_COLUMN_AS_STRING(ACC_PREFIX)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, ACC_CONTIG_LEN)
DECLARE_VDB_COLUMN_AS(INSDC_4na_bin, READ)
SSeq4naTableCursor(const CVDBTable &table)
SSeqTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_PRODUCT_ROW_ID)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_hash, HASH)
CVDBColumnBits< 2 > m_READ_2na
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, TRIM_START)
DECLARE_VDB_COLUMN_AS_STRING(LABEL)
DECLARE_VDB_COLUMN_AS(INSDC_quality_phred, QUALITY)
DECLARE_VDB_COLUMN_AS_STRING(CONTIG_NAME)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_gap_linkage, GAP_LINKAGE)
DECLARE_VDB_COLUMN_AS(INSDC_4na_bin, AMBIGUITY_4NA)
DECLARE_VDB_COLUMN_AS(NCBI_gb_state, GB_STATE)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, AMBIGUITY_POS)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, GAP_LEN)
DECLARE_VDB_COLUMN_AS_STRING(TITLE)
DECLARE_VDB_COLUMN_AS(NCBI_gi, GI)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, READ_START)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, READ_LEN)
DECLARE_VDB_COLUMN_AS_STRING(ANNOT)
DECLARE_VDB_COLUMN_AS(Uint1, AMBIGUITY_MASK)
DECLARE_VDB_COLUMN_AS(bool, CIRCULAR)
DECLARE_VDB_COLUMN_AS_STRING(ACCESSION)
DECLARE_VDB_COLUMN_AS_STRING(DESCR)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, GAP_START)
DECLARE_VDB_COLUMN_AS(row_range_t, CONTIG_NAME_ROW_RANGE)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_START)
DECLARE_VDB_COLUMN_AS(uint32_t, ACC_VERSION)
DECLARE_VDB_COLUMN_AS_STRING(PUBLIC_COMMENT)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_component_props, GAP_PROPS)
DECLARE_VDB_COLUMN_AS_STRING(NUC_PROT_DESCR)
DECLARE_VDB_COLUMN_AS_STRING(NAME)
pair< TVDBRowId, TVDBRowId > row_range_t
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, TRIM_LEN)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_END)
CRef< CSeq_data > Get4na(TSeqPos pos, TSeqPos len) const
TSeqPos Get4naLengthExact(TSeqPos pos, TSeqPos len, TSeqPos stop_2na_len, TSeqPos stop_gap_len) const
TSeqPos Get2naLengthExact(TSeqPos pos, TSeqPos len) const
CRef< CWGSDb_Impl::SAmbiguityInfo > m_AmbiguityInfo
CRef< CWGSDb_Impl::SSeqTableCursor > m_Seq
vector< Uint1 > GetAmbiguityBytes() const
SAmbiguityInfo * operator->() const
SAmbiguityAccess(CRef< CWGSDb_Impl::SAmbiguityInfo > &info, CWGSDb_Impl &db, const CRef< CWGSDb_Impl::SSeqTableCursor > &seq, TVDBRowId row_id)
CRef< CSeq_data > Get2na(TSeqPos pos, TSeqPos len) const
SAmbiguityAccess(const SAmbiguityAccess &)=delete
TSeqPos GetGapLengthExact(TSeqPos pos, TSeqPos len) const
void operator=(const SAmbiguityAccess &)=delete
CRef< CSeq_literal > literal
COpenRange< TSeqPos > range
const INSDC_coord_len * gaps_len
bool IsInGap(TSeqPos pos) const
TSeqPos GetGapLength(TSeqPos pos, TSeqPos len) const
const NCBI_WGS_component_props * gaps_props
TSeqPos GetFrom(void) const
const INSDC_coord_zero * gaps_start
TSeqPos GetDataLength(TSeqPos pos, TSeqPos len) const
const NCBI_WGS_gap_linkage * gaps_linkage
CRef< CID2S_Split_Info > split
void x_CreateProtSet(TVDBRowIdRange range)
void x_AddFeature(const CWGSFeatureIterator &it, CSeq_annot::TData::TFtable &dst)
void x_AddProducts(const vector< TVDBRowId > &product_row_ids)
void x_AddDescr(CTempString bytes)
SWGSCreateInfo(const CWGSDb &db)
void x_SetSplitVersion(TSplitVersion split_version)
void x_SetFlags(TFlags flags)
void x_SetSeq(CWGSProteinIterator &it)
void x_AddFeaturesSplit(TVDBRowIdRange range, vector< TVDBRowId > &product_row_ids)
CRef< CWGSAsnBinData > data
CBioseq_set & x_GetProtSet(void)
CRef< CID2S_Chunk > chunk
void x_AddFeatures(TVDBRowIdRange range, vector< TVDBRowId > &product_row_ids)
SWGSCreateInfo(const CWGSDb &db, EFromFlags, TFlags flags)
SWGSCreateInfo(const CWGSDb &db, EFromSplitVersion, TSplitVersion split_version)
TSplitVersion split_version
void x_AddFeaturesDirect(TVDBRowIdRange range, vector< TVDBRowId > &product_row_ids)
@ eFeatLocIdUninitialized
static const TSplitVersion kDefaultSplitVersion
void AddFeature(NCBI_WGS_feattype type, COpenRange< TSeqPos > range)
void AddFeatType(NCBI_WGS_feattype feat_type)
static bool ExpandRange(COpenRange< TSeqPos > &dst, COpenRange< TSeqPos > src)
void AddFeatRange(COpenRange< TSeqPos > range)
bitset< CSeqFeatData::e_MaxChoice > feat_types
vector< COpenRange< TSeqPos > > loc_ranges
void AddContent(CID2S_Chunk_Info &chunk, CSeq_id &feat_id)
void AddFeature(bool with_product, NCBI_WGS_feattype type, COpenRange< TSeqPos > range)
CRef< CID2S_Chunk_Info > CreateChunkInfo(int index, CWGSProteinIterator &prot_it, const vector< TVDBRowId > &product_row_ids, size_t product_index)
CRef< CID2S_Bioseq_Ids::C_E > seq_place
SWGSFeatChunkInfo(CSeq_id &main_id, CSeq_id &feat_id)
static DP_BlockInfo * blocks
#define INIT_VDB_COLUMN(name)
#define INIT_VDB_COLUMN_AS(name, type)
#define INIT_VDB_COLUMN_BACKUP(name, backup_name)
pair< TVDBRowId, TVDBRowCount > TVDBRowIdRange
#define INIT_OPTIONAL_VDB_COLUMN(name)
int32_t NCBI_WGS_gap_linkage
int16_t NCBI_WGS_component_props
@ NCBI_WGS_gap_linkage_evidence_paired_ends
@ NCBI_WGS_gap_linkage_linked
@ NCBI_WGS_gap_unknown_type
@ NCBI_WGS_gap_centromere
@ NCBI_WGS_gap_heterochromatin
static const char kMasterDescrMark[]
static const bool kRecoverGaps
static const size_t kProdPerChunk
static TTaxId s_GetTaxId(const CVDBValueFor< NCBI_taxid > &value)
#define DEFAULT_AMBIGUITY_CACHE_SIZE
void sx_SetSplitInterval(CID2S_Seq_loc &split_loc, CSeq_id &id, TSeqPos pos, TSeqPos end)
static CSafeStatic< TGlobalAmbiguityCache, SStaticGlobalAmbiguityCacheCallbacks > s_GlobalAmbiguityCache
static bool s_UseAmbiguity4na(void)
static void s_Convert_2na_to_4na(char *dst_4na, const char *src_2na, size_t base_count)
static bool sx_HasAmbiguity(const Uint1 *ptr, const Uint1 *end)
static void s_Set_4na_gap(vector< char > &dst_4na_vec, size_t offset, size_t len)
static int kAssignedDefaultSplitVersion
static CRef< CSeq_literal > sx_MakeGapLiteral(TSeqPos len, NCBI_WGS_component_props props, NCBI_WGS_gap_linkage gap_linkage)
static const size_t kMinFeatCountToSplit
static bool s_UseFull4naBlocks(void)
static void s_AddUserObjectType(const CSeqdesc &desc, set< string > &existing_uo_types)
static const TSeqPos kDataChunkSize
static CWGSSeqIterator::TIncludeFlags s_ToFlags(CWGSSeqIterator::EWithdrawn withdrawn)
static const size_t kFeatPerChunk
static const Uint1 * sx_FindAmbiguity(const Uint1 *ptr, const Uint1 *end)
static const TSeqPos kChunk2naSize
static void s_GetMinMax(const Uint1 *arr, size_t size, Uint1 &min_v, Uint1 &max_v)
static void s_Copy_4na(char *dst_4na, TSeqPos dst_offset, const char *src_4na, TSeqPos src_offset, size_t base_count)
static char s_ConvertBits_2na_to_4na_2nd(char bits_2na)
static size_t sx_Find_4na_Ambiguity(const char *ptr, size_t offset, size_t base_count)
void sx_SetSplitId(CID2S_Bioseq_Ids::C_E &split_id, CSeq_id &id)
void sx_AddAnnotBytes(CBioseq::TAnnot &annot_set, CTempString bytes)
static bool s_GetClipByQuality(void)
static const TSeqPos kMinDataSplitSize
static char s_ConvertBits_2na_to_4na(char bits_2na)
static bool kEnableSplitData
static void s_Set_4na(vector< char > &dst_4na_vec, size_t offset, INSDC_4na_bin amb)
static bool kEnableSplitProd
static Uint1 sx_Get_4na(const char *ptr, size_t offset)
static void sx_AddMasterDescr(const CWGSDb &db, SWGSCreateInfo &info, SWGSDb_Defs::TFlags flags)
int sx_StringToNonNegativeInt(const CTempString &str)
void sx_AddSplitIds(CID2S_Bioseq_Ids::Tdata &split_ids, const CBioseq::TId &ids)
static bool s_UseAmbiguityMask(void)
static const TSeqPos kMin2naSize
static bool sx_Is2na(Uint1 b)
bool sx_SetAccession(CSeq_id &id, CTempString accession)
static const char kSeq_descrFirstByte
static const TSeqPos kChunk4naSize
void sx_SetTag(CDbtag &tag, CTempString str)
static int s_GetDebugLevel(void)
static bool kEnableSplitQual
static const TSeqPos kAmbiguityBlockSize
int sx_GetStringId(CTempString str)
static void sx_Assign(vector< Value > &dst, const CVDBValueFor< Value > &src)
void sx_AddDescrBytes(CSeq_descr &descr, CTempString bytes)
static bool kEnableSplitFeat
NCBI_PARAM_DEF_EX(int, WGS, DEBUG, 0, eParam_NoThread, WGS_DEBUG)
static void sx_AddEvidence(CSeq_gap &gap, CLinkage_evidence::TType type)
static TGi s_ToGi(TVDBRowId gi, const char *method)
limited_resource_map< pair< string, TVDBRowId >, CRef< CWGSDb_Impl::SAmbiguityInfo >, size_t > TGlobalAmbiguityCache
static char s_ConvertBits_2na_to_4na_1st(char bits_2na)
static bool s_UseGapInfo(void)
static void s_SetAmbiguitiesPos(vector< char > &dst_4na_vec, TSeqPos pos, TSeqPos len, const vector< INSDC_coord_zero > &amb_pos, const vector< INSDC_4na_bin > &amb_4na)
static bool sx_HasMoreProducts(const CWGSDb &db, TVDBRowIdRange range, size_t count)
NCBI_DEFINE_ERR_SUBCODE_X(19)
DEFINE_STATIC_FAST_MUTEX(s_GlobalAmbiguityCacheMutex)
static void s_AddGiRange(CID2S_Seq_loc::TLoc_set &loc_set, CSeq_id::TGi gi_range_start, CSeq_id::TGi gi_range_stop)
static void s_SetGaps(vector< char > &dst_4na_vec, TSeqPos pos, TSeqPos len, CWGSSeqIterator::TWGSContigGapInfo gap_info)
static size_t s_GetAmbiguityCacheSize(void)
static const TSeqPos kQualChunkSize
static const TSeqPos kSplit2naSize
bool sx_SetVersion(CSeq_id &id, int version)
static void s_SetAmbiguitiesBlocks(vector< char > &dst_4na_vec, TSeqPos pos, TSeqPos len, const CWGSDb_Impl::SAmbiguityInfo::T4naBlocks &blocks)
static string s_GetUserObjectType(const CSeqdesc &desc)
void sx_AddSplitId(CID2S_Bioseq_Ids::Tdata &split_ids, CSeq_id &id)
NCBI_PARAM_DEF(bool, WGS, MASTER_DESCR, true)
static const TSeqPos kSplit4naSize
int sx_NewStringToNonNegativeInt(CTempString str)
static void s_Pack_4na(char *dst_packed_4na, const Uint1 *src_4na, size_t base_count)
NCBI_PARAM_DECL(int, WGS, DEBUG)
@ NCBI_gb_state_eWGSGenBankReplaced
@ NCBI_gb_state_eWGSGenBankMissing
@ NCBI_gb_state_eWGSGenBankLive
uint8_t INSDC_quality_phred
uint8_t NCBI_WGS_loc_strand
uint8_t NCBI_WGS_feattype
static wxAcceleratorEntry entries[3]
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4