*
data(0);
81m_IsAA (prot_nucl ==
'p'),
87m_VolStart (vol_start),
90m_HaveColumns (
false),
91m_SeqFileOpened(
false),
92m_HdrFileOpened(
false),
93m_HashFileOpened(
false),
94m_OidFileOpened(
false)
288 TIndxstart_offset = 0;
289 TIndxend_offset = 0;
297 return int(end_offset - start_offset - 1);
304 TIndxstart_offset = 0;
305 TIndxend_offset = 0;
312 intwhole_bytes =
int(end_offset - start_offset - 1);
321 intremainder = amb_char & 3;
322 return(whole_bytes * 4) + remainder;
327 TIndxstart_offset = 0;
328 TIndxend_offset = 0;
335 intwhole_bytes =
int(end_offset - start_offset - 1);
343 return(whole_bytes * 4) + (oid & 0x03);
362translated.resize(512);
3650x21, 0x22, 0x24, 0x28,
3660x41, 0x42, 0x44, 0x48,
3670x81, 0x82, 0x84, 0x88 };
371 for(pair1 = 0; pair1 < 16; pair1++) {
372 for(pair2 = 0; pair2 < 16; pair2++) {
373 Int2index = (pair1 * 16 + pair2) * 2;
375translated[index] =
convert[pair1];
376translated[index+1] =
convert[pair2];
404vector<char> & buf4bit,
408 TTable& expanded =
t.Get().storage;
413buf4bit.resize(estimated_length);
417 for(
int i=0;
i<inp_chars;
i++) {
418 Uint4inp_char = (buf2bit[
i] & 0xFF);
420buf4bit[bytes] = expanded[ (inp_char*2) ];
421buf4bit[bytes+1] = expanded[ (inp_char*2) + 1 ];
428 Uint1remainder_bits = 2 * bases_remain;
429 Uint1remainder_mask = (0xFF << (8 - remainder_bits)) & 0xFF;
430 Uint4last_masked = buf2bit[inp_chars] & remainder_mask;
432buf4bit[bytes++] = expanded[ (last_masked*2) ];
434 if(bases_remain > 2) {
435buf4bit[bytes ++] = expanded[ (last_masked*2)+1 ];
439buf4bit.resize(bytes);
441 _ASSERT(estimated_length == (
int)buf4bit.size());
460 for(
int i= 0;
i<256;
i++) {
461 intp1 = (
i>> 6) & 0x3;
462 intp2 = (
i>> 4) & 0x3;
463 intp3 = (
i>> 2) & 0x3;
466translated.push_back(1 << p1);
467translated.push_back(1 << p2);
468translated.push_back(1 << p3);
469translated.push_back(1 << p4);
509 TTable& expanded =
t.Get().storage;
511 intpos = range.
begin;
513 intinput_chars_begin = range.
begin/ 4;
514 intinput_chars_end = (range.
end+ 3) / 4;
516 intwhole_chars_begin = (range.
begin+ 3) / 4;
517 intwhole_chars_end = range.
end/ 4;
519 intp = input_chars_begin;
521 if(p < whole_chars_begin) {
522 Int4table_offset = (buf2bit[input_chars_begin] & 0xFF) * 4;
524 intendpt = (input_chars_begin + 1) * 4;
526 if(endpt > range.
end) {
530 for(
intk = range.
begin; k < endpt; k++) {
537buf8bit[pos++] = expanded[ table_offset + 1 ];
541buf8bit[pos++] = expanded[ table_offset + 2 ];
545buf8bit[pos++] = expanded[ table_offset + 3 ];
560p = whole_chars_begin;
562 while(p < whole_chars_end) {
563 Int4table_offset = (buf2bit[p] & 0xFF) * 4;
565buf8bit[pos++] = expanded[ table_offset ];
566buf8bit[pos++] = expanded[ table_offset + 1 ];
567buf8bit[pos++] = expanded[ table_offset + 2 ];
568buf8bit[pos++] = expanded[ table_offset + 3 ];
572 if(p < input_chars_end) {
573 Int4table_offset = (buf2bit[p] & 0xFF) * 4;
575 intremains = (range.
end& 0x3);
578buf8bit[pos++] = expanded[ table_offset ];
581buf8bit[pos++] = expanded[ table_offset + 1 ];
584buf8bit[pos++] = expanded[ table_offset + 2 ];
623 for(
int i= range.
begin;
i< range.
end;
i++)
644 return(ambchars[
i] >> 16) & 0xFFF;
660 returnambchars[
i+1];
680 return(ambchars[
i] >> 28) & 0xF;
696 return(ambchars[
i] >> 24) & 0xF;
712 returnambchars[
i] & 0xFFFFFF;
730 constvector<Int4> & amb_chars)
732 if(amb_chars.empty())
736 Uint4amb_num = amb_chars[0];
742 boolnew_format = (amb_num & 0x80000000) != 0;
745amb_num &= 0x7FFFFFFF;
748 for(
Uint4 i=1;
i< amb_num+1;
i++) {
763 Int4pos = position / 2;
764 Int4rem = position & 1;
765 Uint1char_l = char_r << 4;
772 for(j = 0; j <= row_len; j++) {
774buf4bit[index] = (buf4bit[index] & 0x0F) + char_l;
777buf4bit[index] = (buf4bit[index] & 0xF0) + char_r;
805 constvector<Int4> & amb_chars,
808 if(amb_chars.empty() || !seq )
return;
810 Uint4amb_num = amb_chars[0];
813 boolnew_format = (amb_num & 0x80000000) != 0;
815 if(new_format) amb_num &= 0x7FFFFFFF;
817 for(
Uint4 i= 1;
i< amb_num+1;
i++) {
832 if(new_format) ++
i;
834 if(position + row_len <= region.
begin)
837 if(position >= region.
end)
840 for(
intj = 0; j < row_len; ++j, ++position)
841 if( position >= region.
begin&& position < region.
end)
842seq[position] = trans_ch;
860 const char* seq_buffer,
871vector<char> aa_data;
872aa_data.resize(length);
874 for(
int i= 0;
i< length;
i++) {
875aa_data[
i] = seq_buffer[
i];
878seqinst.
SetSeq_data().SetNcbistdaa().Set().swap(aa_data);
897 const char* seq_buffer,
900 intwhole_bytes = length / 4;
901 intpartial_byte = ((length & 0x3) != 0) ? 1 : 0;
903vector<char> na_data;
904na_data.resize(whole_bytes + partial_byte);
906 for(
int i= 0;
i<whole_bytes;
i++) {
907na_data[
i] = seq_buffer[
i];
911na_data[whole_bytes] = seq_buffer[whole_bytes] & (0xFF - 0x03);
914seqinst.
SetSeq_data().SetNcbi2na().Set().swap(na_data);
935 const char* seq_buffer,
937vector<Int4> & amb_chars)
939vector<char> buffer_4na;
943seqinst.
SetSeq_data().SetNcbi4na().Set().swap(buffer_4na);
963 typedeflist< CRef<CBlast_def_line> >::const_iterator TDefIt;
964 typedeflist< CRef<CSeq_id > >::const_iterator TSeqIt;
966 constlist< CRef<CBlast_def_line> > & dl = deflines->
Get();
968 boolfirst_defline(
true);
970 for(TDefIt iter = dl.begin(); iter != dl.end(); iter++) {
975 if(! title.empty()) {
980 boolwrote_seqids(
false);
983 constlist< CRef<CSeq_id > > & sl = defline.
GetSeqid();
985 boolfirst_seqid(
true);
992 for(TSeqIt seqit = sl.begin(); seqit != sl.end(); seqit++) {
999(*seqit)->WriteAsFasta(oss);
1001first_seqid =
false;
1002wrote_seqids =
true;
1007first_defline =
false;
1034 typedeflist< CRef<CSeq_id> > TSeqidList;
1036 ITERATE(TSeqidList, iter, seqids) {
1057 const CSeq_id* preferred_seqid)
1072 if(preferred_gi !=
ZERO_GI|| preferred_seqid) {
1076 if(preferred_gi !=
ZERO_GI) {
1079seqid.
Reset(preferred_seqid);
1082 boolfound =
false;
1087new_bdls->
Set().push_front(*iter);
1089new_bdls->
Set().push_back(*iter);
1099list< CRef<CSeqdesc> >
1102 const CSeq_id* preferred_seqid)
1106 const boolprovide_new_taxonomy_info =
true;
1109 const char* TAX_ORGREF_DB_NAME =
"taxon";
1111list< CRef<CSeqdesc> > taxonomy;
1116 if(bdls.
Empty()) {
1120 typedeflist< CRef<CBlast_def_line> > TBDLL;
1122 typedefTBDLL::const_iterator TBDLLConstIter;
1124 constTBDLL & dl = bdls->
Get();
1130 for(TBDLLConstIter iter = dl.begin(); iter != dl.end(); iter ++) {
1133 if((*iter)->CanGetTaxid()) {
1134taxid = (*iter)->GetTaxid();
1140 boolhave_org_desc =
false;
1143have_org_desc =
true;
1147 boolfound_taxid_in_taxonomy_blastdb =
true;
1149 if((! have_org_desc) && provide_new_taxonomy_info) {
1153found_taxid_in_taxonomy_blastdb =
false;
1157 if(provide_new_taxonomy_info) {
1158 if(have_org_desc) {
1162org_tag->
SetDb(TAX_ORGREF_DB_NAME);
1166 if(found_taxid_in_taxonomy_blastdb) {
1170org->
SetDb().push_back(org_tag);
1179taxonomy.push_back(desc);
1181 if(use_taxinfo_cache) {
1203 if(oss.size() == 1) {
1206 constvector<char> & v = *oss.front();
1213 size+= (**iter1).size();
1216temp.reserve(
size);
1220temp.append(& (**iter3)[0], (*iter3)->size());
1237 if( !bioseq.IsSetDescr() ) {
1243 if( !(*iter)->IsUser() ) {
1250 constvector< CRef< CUser_field > >& usf = uobj.
GetData();
1252 _ASSERT(usf.front()->CanGetData());
1253 if(usf.front()->GetData().IsOss()) {
1274vector<char> hdr_data;
1277 if(! hdr_data.empty()) {
1290vector< vector<char>* > & strs = uf->
SetData().SetOss();
1293strs.push_back(
newvector<char>);
1294strs[0]->swap(hdr_data);
1296uobj->
SetData().push_back(uf);
1308 const CSeq_id* target_seq_id,
1312 typedeflist< CRef<CBlast_def_line> > TDeflines;
1316list< CRef< CSeq_id > > seqids;
1328 if((target_gi !=
ZERO_GI) || target_seq_id) {
1338seqid.
Reset(target_seq_id);
1344 ITERATE(TDeflines, iter, orig_deflines->
Get()) {
1352 if(filt_dl.
Empty()) {
1354 "Error: oid headers do not contain target gi/seq_id.");
1356defline_set->
Set().push_back(filt_dl);
1359defline_set = orig_deflines;
1362 if(defline_set.
Empty() ||
1363(! defline_set->
CanGet()) ||
1364(0 == defline_set->
Get().size())) {
1368defline = defline_set->
Get().front();
1369 if(! defline->CanGetSeqid()) {
1372seqids = defline->GetSeqid();
1381 const char* seq_buffer = 0;
1404vector<Int4> ambchars;
1408 if(ambchars.empty()) {
1432bioseq->
SetInst().SetMol(is_prot
1439bioseq->
SetId().swap(seqids);
1453desc1->
SetTitle().swap(description);
1458seq_desc_set.
Set().push_back(desc1);
1460 if(! desc2.
Empty()) {
1461seq_desc_set.
Set().push_back(desc2);
1465list< CRef<CSeqdesc> > tax =
1469bioseq->
SetDescr().Set().push_back(*iter);
1486 switch(alloc_type) {
1488retval = (
char*)
malloc(length);
1492retval =
new char[length];
1512 x_GetAmbigSeq(oid, & buf1, nucl_code, alloc_type, region, masks);
1523 if(!masks || masks->
empty())
return;
1527 unsigned intbegin(range.
begin);
1528 unsigned intend(range.
end);
1530 while(i < masks->
size() && (*masks)[
i].second <= begin) ++
i;
1532 while(i < masks->
size() && (*masks)[
i].
first< end) {
1533 for(
size_tj =
max((*masks)[
i].
first, begin);
1534j <
min((*masks)[
i].second, end); ++j) {
1535seq[j] = mask_letter;
1549 if((partial_ranges ==
NULL) || (partial_ranges->
size() == 0)) {
1553 const char*
tmp(0);
1559 intnum_ranges =
static_cast<int>(partial_ranges->
size());
1560 if((*partial_ranges)[num_ranges - 1].second >
static_cast<TSeqPos>(
base_length)) {
1566 char*seq = *
buffer+ (sentinel ? 1 : 0);
1568vector<Int4> ambchars;
1571 intbegin(riter->first);
1572 intend(riter->second);
1592(*buffer)[0] = (char)15;
1618 const char*
tmp(0);
1641 char*seq = *
buffer- range.
begin+ (sentinel ? 1 : 0);
1645vector<Int4> ambchars;
1654 booluse_range_set =
true;
1660|| rciter->second->GetRanges().empty()
1662use_range_set =
false;
1665range_set = rciter->second->GetRanges();
1669 if(!use_range_set) {
1684 intbegin(riter->first);
1685 intend(riter->second);
1694 min(range.
end, riter->second));
1705(*buffer)[0] = (char)15;
1711 if(masks) masks->
clear();
1727 if(sequence.
length() == 0) {
1729 "Error: packed sequence data is not valid.");
1732 const char* seq_buffer = sequence.
data();
1734 intwhole_bytes =
static_cast<int>(sequence.
length()) - 1;
1735 intremainder = sequence[whole_bytes] & 3;
1744vector<Int4> ambchars;
1745ambchars.reserve(ambiguities.
length()/4);
1747 for(
size_t i= 0;
i< ambiguities.
length();
i+=4) {
1749ambchars.push_back(
A);
1775 const char**
buffer)
const 1777 TIndxstart_offset = 0;
1778 TIndxend_offset = 0;
1791 if(
'p'== seqtype) {
1796length =
int(end_offset - start_offset);
1803 if(! (*
buffer- 1))
return-1;
1805}
else if(
'n'== seqtype) {
1819 if(! (*
buffer))
return-1;
1833 intwhole_bytes =
int(end_offset - start_offset - 1);
1835 charlast_char = (*buffer)[whole_bytes];
1837 intremainder = last_char & 3;
1838length = (whole_bytes * 4) + remainder;
1846list< CRef< CSeq_id > > seqids;
1851 if((! defline_set.
Empty()) && defline_set->
CanGet()) {
1853 if(! (*defline)->CanGetSeqid()) {
1858seqids.push_back(*seqid);
1868list< CRef< CSeq_id > > seqids;
1873 if((! defline_set.
Empty()) && defline_set->
CanGet()) {
1875 if(! (*defline)->CanGetSeqid()) {
1880seqids.push_back(*seqid);
1918 #ifdef NCBI_STRICT_TAX_ID 1921tax_ids.
insert(leaf_ids.begin(), leaf_ids.end());
1925 if(user_tax_ids.
size() > tax_ids.
size()) {
1927 if(user_tax_ids.
find(*itr) != user_tax_ids.
end()) {
1935 if(tax_ids.
find(*itr) != tax_ids.
end()) {
1959 if(taxid_set.
size() > user_tax_ids.
size()) {
1963 if(user_tax_ids.
find(*itr) == user_tax_ids.
end()) {
1972 bool* changed)
const 1974 typedeflist< CRef<CBlast_def_line> > TBDLL;
1975 typedefTBDLL::iterator TBDLLIter;
1982 if(useCache && cached.first.NotEmpty()) {
1984*changed = cached.second;
1987 returncached.first;
1990 boolasn_changed =
false;
2001TBDLL & dl = BDLS->
Set();
2003 for(TBDLLIter iter = dl.begin(); iter != dl.end(); ) {
2006 boolhave_memb =
true;
2016 intmemb_mask = 0x1 << (
m_MemBit-1);
2018 if((bits & memb_mask) == 0) {
2029 boolhave_user =
false, have_volume =
false;
2032 if(have_user && have_volume)
break;
2034have_memb = have_user && have_volume;
2043 if( (*vtaxid)->GetNumTaxIds() > 0) {
2062TBDLLIter eraseme = iter++;
2064asn_changed =
true;
2074cached.first = BDLS;
2075cached.second = asn_changed;
2077cached.first = BDLS;
2078cached.second = asn_changed;
2090 typedeflist< CRef<CBlast_def_line> > TBDLL;
2091 typedefTBDLL::iterator TBDLLIter;
2098 if(useCache && cached.first.NotEmpty()) {
2100*changed = cached.second;
2103 returncached.first;
2106 boolasn_changed =
false;
2117TBDLL & dl = BDLS->
Set();
2119 for(TBDLLIter iter = dl.begin(); iter != dl.end(); ) {
2122 boolhave_memb =
true;
2132 intmemb_mask = 0x1 << (
m_MemBit-1);
2134 if((bits & memb_mask) == 0) {
2145 boolhave_user =
false, have_volume =
false;
2148 if(have_user && have_volume)
break;
2150have_memb = have_user && have_volume;
2159 if( (*vtaxid)->GetNumTaxIds() > 0) {
2179TBDLLIter eraseme = iter++;
2181asn_changed =
true;
2191cached.first = BDLS;
2192cached.second = asn_changed;
2194cached.first = BDLS;
2195cached.second = asn_changed;
2204 bool* changed)
const 2210 if(! raw_data.
size()) {
2218bdls.
Reset(
newobjects::CBlast_def_line_set);
2224 if(! (**dl).CanGetSeqid()) {
2234 if(dbt.
GetDb() ==
"BL_ORD_ID") {
2259 if(! raw_data.
size()) {
2271bdls.
Reset(
newobjects::CBlast_def_line_set);
2277 if(! (**dl).CanGetSeqid()) {
2287 if(dbt.
GetDb() ==
"BL_ORD_ID") {
2305 TIndxhdr_start = 0;
2315 return CTempString(asn_region, hdr_end - hdr_start);
2320vector<char> & hdr_data )
const 2330 boolchanged =
false;
2343hdr_data.assign(s.data(), s.data() + s.size());
2346hdr_data.assign(raw.
data(), raw.
data() + raw.
size());
2351vector<Int4> & ambchars)
const 2353 TIndxstart_offset = 0;
2354 TIndxend_offset = 0;
2362 "File error: could not get ambiguity data.");
2365 intlength =
int(end_offset - start_offset);
2368 inttotal = length / 4;
2376total &= 0x7FFFFFFF;
2378ambchars.resize(total);
2380 for(
int i= 0;
i<total;
i++) {
2434 typedeflist< CRef< CBlast_def_line > >::const_iterator TI1;
2437TI1 it1 = BDLS->
Get().begin();
2439 for(; it1 != BDLS->
Get().end(); it1++) {
2440 if((*it1)->IsSetOther_info()) {
2441TI2 it2 = (*it1)->GetOther_info().begin();
2442TI2 it2end = (*it1)->GetOther_info().end();
2444 for(; it2 != it2end; it2++) {
2479 return! oids.empty();
2511 "GI list specified but no ISAM file found for GI in "+
m_VolName);
2523 "TI list specified but no ISAM file found for TI in "+
m_VolName);
2535 "IPG list specified but no ISAM file found for IPG in "+
m_VolName);
2547 "SI list specified but no ISAM file found for SI in "+
m_VolName);
2565 "GI list specified but no ISAM file found for GI in "+
m_VolName);
2577 "TI list specified but no ISAM file found for TI in "+
m_VolName);
2589 "SI list specified but no ISAM file found for SI in "+
m_VolName);
2611 typedeflist< CRef< CBlast_def_line > >::const_iterator TI1;
2612 typedeflist< CRef< CSeq_id > >::const_iterator TI2;
2614TI1 it1 = BDLS->
Get().begin();
2618 for(; it1 != BDLS->
Get().end(); it1++) {
2619 if((*it1)->CanGetSeqid()) {
2620TI2 it2 = (*it1)->GetSeqid().begin();
2621TI2 it2end = (*it1)->GetSeqid().end();
2625 for(; it2 != it2end; it2++) {
2626 if((*it2)->IsGi()) {
2627gi = (*it2)->GetGi();
2640 const string& str_id,
2642vector<int> & oids)
const 2644 boolvcheck (
false);
2645 boolfits_in_four = (ident == -1) || ! (ident >> 32);
2646 boolneeds_four =
true;
2666oids.push_back(oid);
2679oids.push_back(oid);
2692oids.push_back(oid);
2713oids.push_back((
int) ident);
2720 "Internal error: hashes are not Seq-ids.");
2723 if((! fits_in_four) && needs_four) {
2726 "ID overflows range of specified type.");
2735vector<int> & oids)
const 2747 size_tpos = acc.find(
".");
2750 stringver_str(acc, pos+1, acc.size()-(pos+1));
2755 stringnover(acc, 0, pos);
2758 while((pos2 = nover.find(
"|")) != nover.npos) {
2759nover.erase(0, pos2+1);
2763list< CRef<CSeq_id> > ids =
2766 boolfound =
false;
2769 const CTextseq_id*
id= (*seqid)->GetTextseq_Id();
2787oids.erase(
remove(oids.begin(), oids.end(), -1), oids.end());
2796 boolsimpler (
false);
2810 boolsimpler (
false);
2854 if(first_seq >= vol_cnt) {
2857 "OID not in valid range.");
2860 if(residue >= vol_len) {
2863 "Residue offset not in valid range.");
2872 doubledresidue = (double(residue) * end_of_bytes) / vol_len;
2877residue =
Uint8(dresidue);
2879 if(residue > (end_of_bytes-1)) {
2880residue = end_of_bytes - 1;
2888 intoid_beg = first_seq;
2889 intoid_end = vol_cnt-1;
2893 intoid_mid = (oid_beg + oid_end)/2;
2895 while(oid_beg < oid_end) {
2902 if(
offset>= residue) {
2905oid_beg = oid_mid + 1;
2908oid_mid = (oid_beg + oid_end)/2;
2918 TIndxstart_offset = 0;
2920 returnstart_offset;
2943 if((begin >= end) || (end > length)) {
2946 "Begin and end offsets are not valid.");
2974v4.reserve((length+1)/2);
2979 for(
TSeqPos i= 0;
i< length_whole;
i+= 2) {
2983 if(length_whole != length) {
2984 _ASSERT((length_whole) == (length-1));
2985v4.push_back(
buffer[length_whole] << 4);
2999 int* amb_length )
const 3014 TIndxmap_begin = 0;
3021 boolamb_ok =
true;
3027end_A = start_A = --end_S;
3031map_begin = start_S - 1;
3032map_end = end_A + 1;
3036map_begin = start_S;
3040 ints_len =
int(end_S - start_S);
3041 inta_len =
int(end_A - start_A);
3043 if(! (s_len && amb_ok)) {
3045 "File error: could not get sequence data.");
3049*amb_length = a_len;
3053*seq_length = s_len;
3058*
buffer+= (start_S - map_begin);
3062 if(! *seq_length) {
3065 "Could not get sequence data.");
3068 if(((
buffer&& *
buffer) || a_len) && (! *seq_length)) {
3078 if(
id>= (
static_cast<T>(1) << 32)) {
3081 "ID overflows range of specified type.");
3117low_id = high_id =
count= 0;
3125high_id = (
int)
H;
3151 boolcache_data)
const 3155 if(offset_ranges.
empty() && (! cache_data) && (! append_ranges)) {
3168 if(
R.Empty() ||
R->GetRanges().empty()) {
3173 if(offset_ranges.
empty() && (! cache_data)) {
3189 boolflush_sequence = ((! append_ranges) ||
3190(! offset_ranges.
empty()) ||
3193 if(flush_sequence) {
3194 R->FlushSequence();
3197 R->SetRanges(offset_ranges, append_ranges, cache_data);
3210 if(append_ranges) {
3231 if((**gilist).GetNumSis() != 0)
3234 if((**gilist).GetNumTis() != 0)
3267 "Hash lookup requested but no hash ISAM file found.");
3273 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 3274 (!defined(NCBI_COMPILER_MIPSPRO)) ) 3290 m_Columns[col_id]->GetBlob(oid, blob, keep, & locked);
3307 return m_Columns[col_id]->GetMetaData();
3320titles.
insert((**iter).GetTitle());
3332 stringalpha(
"abcdefghijklmnopqrstuvwxyz");
3333 stringei(
"??a"), ed(
"??b"), ed2(
"??c");
3335ei[0] = ed[0] = ed2[0] = (
m_IsAA?
'p':
'n');
3339 for(
size_t i= 0;
i< alpha.size();
i++) {
3340ei[1] = ed[1] = ed2[1] = alpha[
i];
3347 if( ! (big || small))
continue;
3351 const Int2bytetest = 0x0011;
3352 const char* ptr = (
const char*) &bytetest;
3353 if(ptr[0] == 0x11 && small) {
3359 stringerrmsg, errarg;
3361 stringtitle = col->GetTitle();
3363 if(unique_titles[title]) {
3364errmsg =
"duplicate column title";
3367unique_titles[title] = 1;
3372 if(noidc != noidv) {
3373errmsg =
"column has wrong #oids";
3378 if(errmsg.size()) {
3379 if(errarg.size()) {
3380errmsg +=
string(
" [") + errarg +
"].";
3383 string(
"Error: ") + errmsg);
3404 return static_cast<int>(
i);
#define FENCE_SENTRY
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.
vector< TSeqRange > TRangeVector
ncbi::TMaskedQueryRegions mask
`Blob' Class for SeqDB (and WriteDB).
TTaxIds GetTaxIds() const
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CObjectIStreamAsnBinary â.
CObjectOStreamAsnBinary â.
static char * Alloc(size_t length, bool clear=true)
Allocate memory that atlas will keep track of.
static bool ColumnExists(const string &basename, const string &extn, CSeqDBAtlas &atlas)
Determine if the column exists.
void UnLease()
Release memory held in the atlas layer by this object.
const char * GetFileDataPtr(const string &fname, TIndx offset)
Get a pointer to the specified offset.
static bool IndexExists(const string &name, const char prot_nucl)
TGi GetSeqGI(TOid oid, CSeqDBLockHold &locked)
int GetNumGis() const
Get the number of GIs in the array.
int GetNumSis() const
Get the number of Seq-ids in the array.
int GetNumTis() const
Get the number of TIs in the array.
Uint8 GetMaskOpts() const
set< TTaxId > & GetTaxIdsList()
const char * GetFileDataPtr(TIndx start) const
Read part of the file into a buffer.
string GetDate() const
Get the construction date of the volume.
void GetHdrStartEnd(int oid, TIndx &start, TIndx &end) const
Get the location of a sequence's header data.
void UnLease()
Release any memory leases temporarily held here.
string GetTitle() const
Get the volume title.
int GetNumOIDs() const
Get the number of oids in this volume.
string GetLMDBFileName() const
bool GetAmbStartEnd(int oid, TIndx &start, TIndx &end) const
Get the location of a sequence's ambiguity data.
Uint8 GetVolumeLength() const
Get the length of the volume (in bases).
int GetMinLength() const
Get the length of the shortest sequence in this volume.
void GetSeqStart(int oid, TIndx &start) const
Get the location of a sequence's packed sequence data.
char GetSeqType() const
Get the sequence data type.
int GetMaxLength() const
Get the length of the longest sequence in this volume.
void GetSeqStartEnd(int oid, TIndx &start, TIndx &end) const
Get the location of a sequence's packed sequence data.
TValue & Lookup(int key)
Find a value in the cache.
bool IdToOid(Int8 id, TOid &oid)
GI or TI translation.
void HashToOids(unsigned hash, vector< TOid > &oids)
Sequence hash lookup.
bool PigToOid(TPig pig, TOid &oid)
PIG translation.
void IdsToOids(int vol_start, int vol_end, CSeqDBGiList &ids)
Translate Gis and Tis to Oids for the given ID list.
void GetIdBounds(Int8 &low_id, Int8 &high_id, int &count)
Get Numeric Bounds.
void UnLease()
Return any memory held by this object to the atlas.
void StringToOids(const string &acc, vector< TOid > &oids, bool adjusted, bool &version_check)
String translation.
static bool IndexExists(const string &dbname, char prot_nucl, char file_ext_char)
Check if a given ISAM index exists.
int GetNumTis() const
Get the number of TIs in the array.
int GetNumGis() const
Get the number of GIs in the array.
int GetNumSis() const
Get the number of SeqIds in the array.
set< TTaxId > & GetTaxIdsList()
TRangeList m_Ranges
Range of offsets needed for this sequence.
void SetRanges(const TRangeList &ranges, bool append_ranges, bool cache_data)
Set ranges of the sequence that will be used.
static int ImmediateLength()
Sequences shorter than this will not use ranges in any case.
bool m_CacheData
True if caching of sequence data is required for this sequence.
void ReadBytes(char *buf, TIndx start, TIndx end) const
Read part of the file into a buffer.
const char * GetFileDataPtr(TIndx start) const
Get a pointer into the file contents.
static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo &info)
Get the taxonomy names for a given tax id.
void OptimizeGiLists() const
Simplify the GI list configuration.
bool m_HaveColumns
True if we have opened the columns for this volume.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Get the Seq-ids associated with a sequence.
CFastMutex m_MtxCachedRange
CRef< CSeqDBIsam > m_IsamGi
Handles translation of GIs to OIDs.
void SeqidToOids(CSeq_id &seqid, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified Seq-id.
Uint8 x_GetSeqResidueOffset(int oid) const
Returns the base-offset of the specified oid.
void x_OpenHashFile(void) const
void x_UnleasePigFile(void) const
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const
void x_UnleaseTiFile(void) const
CSeqDBAtlas & m_Atlas
The memory management layer.
void AccessionToOids(const string &acc, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified accession or formatted Seq-id.
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold &locked)
Fetch the data blob for the given column and oid.
CRef< CSeqDBHdrFile > m_Hdr
Contains header (defline) information for this volume.
void x_OpenSeqFile(void) const
CSeqDBIntCache< CRef< CSeqdesc > > m_TaxCache
This cache allows CBioseqs to share taxonomic objects.
void x_OpenTiFile(void) const
int GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const
Get a sequence with ambiguous regions.
vector< CRef< CSeqDBColumn > > m_Columns
Set of columns defined for this volume.
CRef< CSeqDBIdxFile > m_Idx
Metadata plus offsets into the sequence, header, and ambiguity data.
void x_OpenStrFile(void) const
int GetSeqLengthExact(int oid) const
Exact sequence length for nucleotide databases.
void x_StringToOids(const string &acc, ESeqDBIdType id_type, Int8 ident, const string &str_id, bool simplified, vector< int > &oids) const
void OpenSeqFile(CSeqDBLockHold &locked) const
Open sequence file.
int GetColumnId(const string &title, CSeqDBLockHold &locked)
Get an ID number for a given column title.
CRef< CSeqDBIsam > m_IsamStr
Handles translation of strings (accessions) to OIDs.
vector< CRef< CSeqDBGiList > > TGiLists
A set of GI lists.
CSeqDBIntCache< TDeflineCacheItem > m_DeflineCache
Cache of filtered deflines.
int x_GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const
Get a sequence with ambiguous regions.
int m_VolStart
Starting OID of this volume.
int GetNumOIDs() const
Get the number of OIDs for this volume.
bool GetGi(int oid, TGi &gi, CSeqDBLockHold &locked) const
Find the GI given an OID.
CRef< CBlast_def_line_set > x_GetHdrAsn1(int oid, bool adjust_oids, bool *changed) const
Get sequence header object.
void GetPigBounds(int &low_id, int &high_id, int &count, CSeqDBLockHold &locked) const
Get PIG Bounds.
void x_FilterHasId(const CSeq_id &id, bool &have_user, bool &have_vol) const
Determine if a user ID list affects this ID, and how.
string m_VolName
The name of this volume.
CTempString x_GetHdrAsn1Binary(int oid) const
Get sequence header binary data.
void FlushOffsetRangeCache()
Flush all offset ranges cached.
CSeqDBVol(CSeqDBAtlas &atlas, const string &name, char prot_nucl, CSeqDBGiList *user_list, CSeqDBNegativeList *neg_list, int vol_start, CSeqDBLockHold &locked)
Constructor.
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Get Raw Sequence and Ambiguity Data.
void x_OpenHdrFile(void) const
string GetLMDBFileName() const
Get sqlite file name associated with this volume Empty string if version 4.
void ListColumns(set< string > &titles, CSeqDBLockHold &locked)
List the titles of all columns for this volume.
int GetSeqLengthApprox(int oid) const
Approximate sequence length for nucleotide databases.
TRangeCache m_RangeCache
Cached/ranged sequence info.
int m_VolEnd
First OID past end of this volume.
bool m_SeqFileOpened
True if the volume file has been (at least tried to) opened.
bool GetPig(int oid, int &pig, CSeqDBLockHold &locked) const
Find the PIG given an OID.
int GetSeqLengthProt(int oid) const
Sequence length for protein databases.
CRef< CSeqDBSeqFile > m_Seq
Contains sequence data for this volume.
CRef< CSeqdesc > x_GetAsnDefline(int oid) const
Get sequence header information structures.
string GetTitle() const
Get the volume title.
CRef< CSeqDBGiIndex > m_GiIndex
The GI index file (for fast oid->gi conversion)
CRef< CSeqDBNegativeList > m_NegativeList
The negative ID list, if one exists.
TGiLists m_VolumeGiLists
The volume GI lists, if any exist.
CRef< CSeqDBIsam > m_IsamTi
Handles translation of TI (trace ids) to OIDs.
int x_GetSequence(int oid, const char **buffer) const
Get sequence data.
CRef< CSeqDBIsam > m_IsamHash
Handles translation of sequence hash value to OIDs.
void UnLease()
Return expendable resources held by this volume.
list< CRef< CSeqdesc > > x_GetTaxonomy(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)
Get taxonomic descriptions of a sequence.
void IdsToOids(CSeqDBGiList &gis) const
Translate Gis to Oids for the given vector of Gi/Oid pairs.
CRef< CBlast_def_line_set > x_GetFilteredHeader(int oid, bool *changed) const
Get sequence header information.
char * x_AllocType(size_t length, ESeqDBAllocType alloc_type) const
Allocate memory in one of several ways.
void x_CheckVersions(const string &acc, vector< int > &oids) const
Check Seq-id versions for special sparse-id support case.
bool GiToOid(TGi gi, int &oid, CSeqDBLockHold &locked) const
Find the OID given a GI.
TGi GetSeqGI(int oid, CSeqDBLockHold &locked) const
Get the GI of a sequence This method returns the gi of the sequence.
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end, CSeqDBLockHold &locked) const
Fetch data as a CSeq_data object.
void GetGiBounds(TGi &low_id, TGi &high_id, int &count, CSeqDBLockHold &locked) const
Get GI Bounds.
int GetOidAtOffset(int first_seq, Uint8 residue, CSeqDBLockHold &locked) const
Find the OID at a given index into the database.
char GetSeqType() const
Get the sequence type stored in this database.
bool x_HaveIdFilter(void) const
Returns true if this volume has an ID list.
void x_OpenAllColumns(CSeqDBLockHold &locked)
Find all columns for this volume.
CRef< CBioseq > GetBioseq(int oid, TGi pref_gi, const CSeq_id *pref_seq_id, bool seqdata, CSeqDBLockHold &locked)
Get a CBioseq object for this sequence.
int GetMinLength() const
Get the length of the smallest sequence in this volume.
void x_UnleaseStrFile(void) const
CRef< CSeqDBGiList > m_UserGiList
The user ID list, if one exists.
CSeqDBAtlas::TIndx TIndx
Import TIndx definition from the CSeqDBAtlas class.
void x_OpenGiFile(void) const
pair< CRef< CBlast_def_line_set >, bool > TDeflineCacheItem
Filtered defline plus whether binary data needed changes.
CRef< CSeqDBIsam > m_IsamPig
Handles translation of GIs to OIDs.
bool m_IsAA
True if the volume is protein, false for nucleotide.
const map< string, string > & GetColumnMetaData(int col_id, CSeqDBLockHold &locked)
Get all metadata for the specified column.
CRef< CBlast_def_line_set > x_GetTaxDefline(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)
Get defline filtered by several criteria.
void x_OpenOidFile(void) const
int GetMaxLength() const
Get the length of the largest sequence in this volume.
bool PigToOid(int pig, int &oid) const
Find the OID given a PIG.
bool TiToOid(Int8 ti, int &oid, CSeqDBLockHold &locked) const
Find the OID given a TI.
void x_OpenPigFile(void) const
void x_GetFilteredBinaryHeader(int oid, vector< char > &hdr_data) const
Get binary sequence header information.
Uint8 GetVolumeLength() const
Get the total length of this volume (in bases).
CRef< CBlast_def_line_set > GetFilteredHeader(int oid, CSeqDBLockHold &locked) const
Get filtered sequence header information.
string GetDate() const
Get the formatting date of the volume.
int m_MemBit
The filtering MEMB_BIT.
void x_UnleaseGiFile(void) const
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data) const
Apply a range of offsets to a database sequence.
void GetStringBounds(string &low_id, string &high_id, int &count) const
Get String Bounds.
void HashToOids(unsigned hash, vector< int > &oids, CSeqDBLockHold &locked) const
Get the OIDs for a given sequence hash.
void x_GetAmbChar(int oid, vector< Int4 > &ambchars) const
Get ambiguity information.
char x_GetSeqType() const
Returns 'p' for protein databases, or 'n' for nucleotide.
static const char * kOidNotFound
String containing the error message in exceptions thrown when a given OID cannot be found.
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
@Seq_descr.hpp User-defined methods of the data storage class.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
static int base_length[29]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static TDSRET convert(TDSSOCKET *tds, TDSICONV *conv, TDS_ICONV_DIRECTION direction, const char *from, size_t from_len, char *dest, size_t *dest_len)
#define GI_FROM(T, value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define TAX_ID_TO(T, tax_id)
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define TAX_ID_FROM(T, value)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string AsFastaString(void) const
E_SIC
Compare return values.
@ e_NO
different SeqId types-can't compare
@ e_YES
SeqIds compared, but are different.
void Close(void)
Detach reader from a data source.
void OpenFromBuffer(const char *buffer, size_t size)
Attach reader to a data source.
void AddReference(void) const
Add reference to object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
void RemoveReference(void) const
Remove reference to object.
bool ReferencedOnlyOnce(void) const THROWS_NONE
Check if object is referenced only once.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
void Reset(void)
Reset random number generator to initial startup condition (LFG only)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
const char * data(void) const
Return a pointer to the array represented.
size_type length(void) const
Return the length of the represented array.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type size(void) const
Return the length of the represented array.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
#define DEFINE_STATIC_FAST_MUTEX(id)
Define static fast mutex and initialize it.
const TSeqid & GetSeqid(void) const
Get the Seqid member data.
bool CanGetTitle(void) const
Check if it is safe to call GetTitle method.
bool IsSetLinks(void) const
Check if a value has been assigned to Links data member.
TTaxid GetTaxid(void) const
Get the Taxid member data.
bool IsSet(void) const
Check if a value has been assigned to data member.
bool IsSetMemberships(void) const
bit arrays Repurposed to store the (multiple) taxIDs associated with WP proteins.
bool IsSetTaxid(void) const
Check if a value has been assigned to Taxid data member.
const TLinks & GetLinks(void) const
Get the Links member data.
const TMemberships & GetMemberships(void) const
Get the Memberships member data.
Tdata & Set(void)
Assign a value to data member.
bool CanGet(void) const
Check if it is safe to call Get method.
const Tdata & Get(void) const
Get the member data.
bool CanGetSeqid(void) const
Check if it is safe to call GetSeqid method.
bool CanGetMemberships(void) const
Check if it is safe to call GetMemberships method.
const TTitle & GetTitle(void) const
Get the Title member data.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
void SetTag(TTag &value)
Assign a value to Tag data member.
const TDb & GetDb(void) const
Get the Db member data.
vector< vector< char > * > TOss
TData & SetData(void)
Assign a value to Data data member.
void SetNum(TNum value)
Assign a value to Num data member.
const TStr & GetStr(void) const
Get the variant data.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
const TData & GetData(void) const
Get the Data member data.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
const TType & GetType(void) const
Get the Type member data.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId GetId(void) const
Get the variant data.
void SetCommon(const TCommon &value)
Assign a value to Common data member.
TDb & SetDb(void)
Assign a value to Db data member.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
TGeneral & SetGeneral(void)
Select the variant.
E_Choice Which(void) const
Which variant is currently selected.
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetVersion(void) const
Check if it is safe to call GetVersion method.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
TTitle & SetTitle(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
TSource & SetSource(void)
Select the variant.
TNcbistdaa & SetNcbistdaa(void)
Select the variant.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TUser & SetUser(void)
Select the variant.
void SetRepr(TRepr value)
Assign a value to Repr data member.
Tdata & Set(void)
Assign a value to data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
TNcbi4na & SetNcbi4na(void)
Select the variant.
void SetMol(TMol value)
Assign a value to Mol data member.
@ eRepr_raw
continuous sequence
@ eMol_na
just a nucleic acid
unsigned int
A callback function used to compare two keys in a database.
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
Static variables safety - create on demand, destroy on application termination.
Multi-threading â mutexes; rw-locks; semaphore.
ESeqDBAllocType
Certain methods have an "Alloc" version.
ESeqDBIdType SeqDB_SimplifySeqid(CSeq_id &bestid, const string *acc, Int8 &num_id, string &str_id, bool &simpler)
Seq-id simplification.
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
ESeqDBIdType SeqDB_SimplifyAccession(const string &acc, Int8 &num_id, string &str_id, bool &simpler)
String id simplification.
const int kSeqDBNuclBlastNA8
Used to request ambiguities in BLAST/NA8 format.
ESeqDBIdType
Various identifier formats used in Id lookup.
@ eStringId
Each PIG identifier refers to exactly one protein sequence.
@ eTiId
Genomic ID is a relatively stable numeric identifier for sequences.
@ ePigId
Trace ID is a numeric identifier for Trace sequences.
@ eHashId
Some sequence sources uses string identifiers.
@ eOID
Lookup from sequence hash values to OIDs.
T SeqDB_GetStdOrd(const T *stdord_obj)
Read a network order integer value.
The SeqDB oid filtering layer.
static bool s_SeqDB_SeqIdIn(const list< CRef< CSeq_id > > &seqids, const CSeq_id &target)
Search for a Seq-id in a list of Seq-ids.
bool s_IncludeDefline_Taxid(const CBlast_def_line &def, const set< TTaxId > &user_tax_ids)
static void s_SeqDBMapNA2ToNA4(const char *buf2bit, vector< char > &buf4bit, int base_length)
Convert sequence data from NA2 to NA4 format.
Uint4 s_ResVal(const vector< Int4 > &ambchars, Uint4 i)
Get ambiguous residue value (old version)
void SeqDB_UnpackAmbiguities(const CTempString &sequence, const CTempString &ambiguities, string &result)
Unpack an ambiguous nucleotide sequence.
static void s_SeqDBRebuildDNA_NA8(char *seq, const vector< Int4 > &amb_chars, const SSeqDBSlice ®ion)
Rebuild an ambiguous region from sequence and ambiguity data.
Uint4 s_ResLenOld(const vector< Int4 > &ambchars, Uint4 i)
Get ambiguous region length (old version)
Uint4 s_ResLenNew(const vector< Int4 > &ambchars, Uint4 i)
Get length of ambiguous region (new version)
set< pair< int, int > > TRangeVector
List of offset ranges as begin/end pairs.
bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line &def, const set< TTaxId > &user_tax_ids)
Uint4 s_ResPosNew(const vector< Int4 > &ambchars, Uint4 i)
Get position of ambiguous region (new version)
static void s_SeqDBRebuildDNA_NA4(vector< char > &buf4bit, const vector< Int4 > &amb_chars)
Rebuild an ambiguous region from sequence and ambiguity data.
static void s_SeqDBWriteSeqDataProt(CSeq_inst &seqinst, const char *seq_buffer, int length)
Store protein sequence data in a Seq-inst.
static void s_GetBioseqTitle(CRef< CBlast_def_line_set > deflines, string &title)
Get the title string for a CBioseq.
static void s_SeqDBFitsInFour(T id)
static void s_SeqDBMapNA2ToNA4Setup(TTable &translated)
Build NA2 to NcbiNA4 translation table.
unsigned SeqDB_ncbina8_to_blastna8[]
static CRef< CBlast_def_line_set > s_OssToDefline(const CUser_field::TData::TOss &oss)
Efficiently decode a Blast-def-line-set from binary ASN.1.
static void s_SeqDBMapNA2ToNA8Setup(TTable &translated)
Build NA2 to Ncbi-NA8 translation table.
Uint4 s_ResPosOld(const vector< Int4 > &ambchars, Uint4 i)
Get ambiguous residue value (old version)
static void s_SeqDBMapNcbiNA8ToBlastNA8(char *buf, const SSeqDBSlice &range)
Convert sequence data from Ncbi-NA8 to Blast-NA8 format.
static void s_SeqDBMaskSequence(char *seq, CSeqDB::TSequenceRanges *masks, char mask_letter, const SSeqDBSlice &range)
static void s_SeqDBWriteSeqDataNucl(CSeq_inst &seqinst, const char *seq_buffer, int length)
Store non-ambiguous nucleotide sequence data in a Seq-inst.
CRef< CBlast_def_line_set > s_ExtractBlastDefline(const T &bioseq)
vector< Uint1 > TTable
Translation table type.
static void s_SeqDBMapNA2ToNA8(const char *buf2bit, char *buf8bit, const SSeqDBSlice &range)
Convert sequence data from NA2 to NA8 format.
bool s_IncludeDefline_MaskFilter(const CBlast_def_line &def, Uint8 mask)
Defines database volume access classes.
List of sequence offset ranges.
OID-Range type to simplify interfaces.
int begin
First oid in range.
int end
OID after last included oid.
string common_name
Common name, such as "noisy night monkey".
string scientific_name
Scientific name, such as "Aotus vociferans".
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4