A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/seqdbvol_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/blast/seqdb_reader/seqdbvol.cpp Source File

55  const char

*

data

(0);

81

m_IsAA (prot_nucl ==

'p'

),

87

m_VolStart (vol_start),

90

m_HaveColumns (

false

),

91

m_SeqFileOpened(

false

),

92

m_HdrFileOpened(

false

),

93

m_HashFileOpened(

false

),

94

m_OidFileOpened(

false

)

288  TIndx

start_offset = 0;

289  TIndx

end_offset = 0;

297  return int

(end_offset - start_offset - 1);

304  TIndx

start_offset = 0;

305  TIndx

end_offset = 0;

312  int

whole_bytes =

int

(end_offset - start_offset - 1);

321  int

remainder = amb_char & 3;

322  return

(whole_bytes * 4) + remainder;

327  TIndx

start_offset = 0;

328  TIndx

end_offset = 0;

335  int

whole_bytes =

int

(end_offset - start_offset - 1);

343  return

(whole_bytes * 4) + (oid & 0x03);

362

translated.resize(512);

365

0x21, 0x22, 0x24, 0x28,

366

0x41, 0x42, 0x44, 0x48,

367

0x81, 0x82, 0x84, 0x88 };

371  for

(pair1 = 0; pair1 < 16; pair1++) {

372  for

(pair2 = 0; pair2 < 16; pair2++) {

373  Int2

index = (pair1 * 16 + pair2) * 2;

375

translated[index] =

convert

[pair1];

376

translated[index+1] =

convert

[pair2];

404

vector<char> & buf4bit,

408  TTable

& expanded =

t

.Get().storage;

413

buf4bit.resize(estimated_length);

417  for

(

int i

=0;

i

<inp_chars;

i

++) {

418  Uint4

inp_char = (buf2bit[

i

] & 0xFF);

420

buf4bit[bytes] = expanded[ (inp_char*2) ];

421

buf4bit[bytes+1] = expanded[ (inp_char*2) + 1 ];

428  Uint1

remainder_bits = 2 * bases_remain;

429  Uint1

remainder_mask = (0xFF << (8 - remainder_bits)) & 0xFF;

430  Uint4

last_masked = buf2bit[inp_chars] & remainder_mask;

432

buf4bit[bytes++] = expanded[ (last_masked*2) ];

434  if

(bases_remain > 2) {

435

buf4bit[bytes ++] = expanded[ (last_masked*2)+1 ];

439

buf4bit.resize(bytes);

441  _ASSERT

(estimated_length == (

int

)buf4bit.size());

460  for

(

int i

= 0;

i

<256;

i

++) {

461  int

p1 = (

i

>> 6) & 0x3;

462  int

p2 = (

i

>> 4) & 0x3;

463  int

p3 = (

i

>> 2) & 0x3;

466

translated.push_back(1 << p1);

467

translated.push_back(1 << p2);

468

translated.push_back(1 << p3);

469

translated.push_back(1 << p4);

509  TTable

& expanded =

t

.Get().storage;

511  int

pos = range.

begin

;

513  int

input_chars_begin = range.

begin

/ 4;

514  int

input_chars_end = (range.

end

+ 3) / 4;

516  int

whole_chars_begin = (range.

begin

+ 3) / 4;

517  int

whole_chars_end = range.

end

/ 4;

519  int

p = input_chars_begin;

521  if

(p < whole_chars_begin) {

522  Int4

table_offset = (buf2bit[input_chars_begin] & 0xFF) * 4;

524  int

endpt = (input_chars_begin + 1) * 4;

526  if

(endpt > range.

end

) {

530  for

(

int

k = range.

begin

; k < endpt; k++) {

537

buf8bit[pos++] = expanded[ table_offset + 1 ];

541

buf8bit[pos++] = expanded[ table_offset + 2 ];

545

buf8bit[pos++] = expanded[ table_offset + 3 ];

560

p = whole_chars_begin;

562  while

(p < whole_chars_end) {

563  Int4

table_offset = (buf2bit[p] & 0xFF) * 4;

565

buf8bit[pos++] = expanded[ table_offset ];

566

buf8bit[pos++] = expanded[ table_offset + 1 ];

567

buf8bit[pos++] = expanded[ table_offset + 2 ];

568

buf8bit[pos++] = expanded[ table_offset + 3 ];

572  if

(p < input_chars_end) {

573  Int4

table_offset = (buf2bit[p] & 0xFF) * 4;

575  int

remains = (range.

end

& 0x3);

578

buf8bit[pos++] = expanded[ table_offset ];

581

buf8bit[pos++] = expanded[ table_offset + 1 ];

584

buf8bit[pos++] = expanded[ table_offset + 2 ];

623  for

(

int i

= range.

begin

;

i

< range.

end

;

i

++)

644  return

(ambchars[

i

] >> 16) & 0xFFF;

660  return

ambchars[

i

+1];

680  return

(ambchars[

i

] >> 28) & 0xF;

696  return

(ambchars[

i

] >> 24) & 0xF;

712  return

ambchars[

i

] & 0xFFFFFF;

730  const

vector<Int4> & amb_chars)

732  if

(amb_chars.empty())

736  Uint4

amb_num = amb_chars[0];

742  bool

new_format = (amb_num & 0x80000000) != 0;

745

amb_num &= 0x7FFFFFFF;

748  for

(

Uint4 i

=1;

i

< amb_num+1;

i

++) {

763  Int4

pos = position / 2;

764  Int4

rem = position & 1;

765  Uint1

char_l = char_r << 4;

772  for

(j = 0; j <= row_len; j++) {

774

buf4bit[index] = (buf4bit[index] & 0x0F) + char_l;

777

buf4bit[index] = (buf4bit[index] & 0xF0) + char_r;

805  const

vector<Int4> & amb_chars,

808  if

(amb_chars.empty() || !seq )

return

;

810  Uint4

amb_num = amb_chars[0];

813  bool

new_format = (amb_num & 0x80000000) != 0;

815  if

(new_format) amb_num &= 0x7FFFFFFF;

817  for

(

Uint4 i

= 1;

i

< amb_num+1;

i

++) {

832  if

(new_format) ++

i

;

834  if

(position + row_len <= region.

begin

)

837  if

(position >= region.

end

)

840  for

(

int

j = 0; j < row_len; ++j, ++position)

841  if

( position >= region.

begin

&& position < region.

end

)

842

seq[position] = trans_ch;

860  const char

* seq_buffer,

871

vector<char> aa_data;

872

aa_data.resize(length);

874  for

(

int i

= 0;

i

< length;

i

++) {

875

aa_data[

i

] = seq_buffer[

i

];

878

seqinst.

SetSeq_data

().SetNcbistdaa().Set().swap(aa_data);

897  const char

* seq_buffer,

900  int

whole_bytes = length / 4;

901  int

partial_byte = ((length & 0x3) != 0) ? 1 : 0;

903

vector<char> na_data;

904

na_data.resize(whole_bytes + partial_byte);

906  for

(

int i

= 0;

i

<whole_bytes;

i

++) {

907

na_data[

i

] = seq_buffer[

i

];

911

na_data[whole_bytes] = seq_buffer[whole_bytes] & (0xFF - 0x03);

914

seqinst.

SetSeq_data

().SetNcbi2na().Set().swap(na_data);

935  const char

* seq_buffer,

937

vector<Int4> & amb_chars)

939

vector<char> buffer_4na;

943

seqinst.

SetSeq_data

().SetNcbi4na().Set().swap(buffer_4na);

963  typedef

list< CRef<CBlast_def_line> >::const_iterator TDefIt;

964  typedef

list< CRef<CSeq_id > >::const_iterator TSeqIt;

966  const

list< CRef<CBlast_def_line> > & dl = deflines->

Get

();

968  bool

first_defline(

true

);

970  for

(TDefIt iter = dl.begin(); iter != dl.end(); iter++) {

975  if

(! title.empty()) {

980  bool

wrote_seqids(

false

);

983  const

list< CRef<CSeq_id > > & sl = defline.

GetSeqid

();

985  bool

first_seqid(

true

);

992  for

(TSeqIt seqit = sl.begin(); seqit != sl.end(); seqit++) {

999

(*seqit)->WriteAsFasta(oss);

1001

first_seqid =

false

;

1002

wrote_seqids =

true

;

1007

first_defline =

false

;

1034  typedef

list< CRef<CSeq_id> > TSeqidList;

1036  ITERATE

(TSeqidList, iter, seqids) {

1057  const CSeq_id

* preferred_seqid)

1072  if

(preferred_gi !=

ZERO_GI

|| preferred_seqid) {

1076  if

(preferred_gi !=

ZERO_GI

) {

1079

seqid.

Reset

(preferred_seqid);

1082  bool

found =

false

;

1087

new_bdls->

Set

().push_front(*iter);

1089

new_bdls->

Set

().push_back(*iter);

1099

list< CRef<CSeqdesc> >

1102  const CSeq_id

* preferred_seqid)

1106  const bool

provide_new_taxonomy_info =

true

;

1109  const char

* TAX_ORGREF_DB_NAME =

"taxon"

;

1111

list< CRef<CSeqdesc> > taxonomy;

1116  if

(bdls.

Empty

()) {

1120  typedef

list< CRef<CBlast_def_line> > TBDLL;

1122  typedef

TBDLL::const_iterator TBDLLConstIter;

1124  const

TBDLL & dl = bdls->

Get

();

1130  for

(TBDLLConstIter iter = dl.begin(); iter != dl.end(); iter ++) {

1133  if

((*iter)->CanGetTaxid()) {

1134

taxid = (*iter)->GetTaxid();

1140  bool

have_org_desc =

false

;

1143

have_org_desc =

true

;

1147  bool

found_taxid_in_taxonomy_blastdb =

true

;

1149  if

((! have_org_desc) && provide_new_taxonomy_info) {

1153

found_taxid_in_taxonomy_blastdb =

false

;

1157  if

(provide_new_taxonomy_info) {

1158  if

(have_org_desc) {

1162

org_tag->

SetDb

(TAX_ORGREF_DB_NAME);

1166  if

(found_taxid_in_taxonomy_blastdb) {

1170

org->

SetDb

().push_back(org_tag);

1179

taxonomy.push_back(desc);

1181  if

(use_taxinfo_cache) {

1203  if

(oss.size() == 1) {

1206  const

vector<char> & v = *oss.front();

1213  size

+= (**iter1).size();

1216

temp.reserve(

size

);

1220

temp.append(& (**iter3)[0], (*iter3)->size());

1237  if

( !bioseq.IsSetDescr() ) {

1243  if

( !(*iter)->IsUser() ) {

1250  const

vector< CRef< CUser_field > >& usf = uobj.

GetData

();

1252  _ASSERT

(usf.front()->CanGetData());

1253  if

(usf.front()->GetData().IsOss()) {

1274

vector<char> hdr_data;

1277  if

(! hdr_data.empty()) {

1290

vector< vector<char>* > & strs = uf->

SetData

().SetOss();

1293

strs.push_back(

new

vector<char>);

1294

strs[0]->swap(hdr_data);

1296

uobj->

SetData

().push_back(uf);

1308  const CSeq_id

* target_seq_id,

1312  typedef

list< CRef<CBlast_def_line> > TDeflines;

1316

list< CRef< CSeq_id > > seqids;

1328  if

((target_gi !=

ZERO_GI

) || target_seq_id) {

1338

seqid.

Reset

(target_seq_id);

1344  ITERATE

(TDeflines, iter, orig_deflines->

Get

()) {

1352  if

(filt_dl.

Empty

()) {

1354  "Error: oid headers do not contain target gi/seq_id."

);

1356

defline_set->

Set

().push_back(filt_dl);

1359

defline_set = orig_deflines;

1362  if

(defline_set.

Empty

() ||

1363

(! defline_set->

CanGet

()) ||

1364

(0 == defline_set->

Get

().size())) {

1368

defline = defline_set->

Get

().front();

1369  if

(! defline->CanGetSeqid()) {

1372

seqids = defline->GetSeqid();

1381  const char

* seq_buffer = 0;

1404

vector<Int4> ambchars;

1408  if

(ambchars.empty()) {

1432

bioseq->

SetInst

().SetMol(is_prot

1439

bioseq->

SetId

().swap(seqids);

1453

desc1->

SetTitle

().swap(description);

1458

seq_desc_set.

Set

().push_back(desc1);

1460  if

(! desc2.

Empty

()) {

1461

seq_desc_set.

Set

().push_back(desc2);

1465

list< CRef<CSeqdesc> > tax =

1469

bioseq->

SetDescr

().Set().push_back(*iter);

1486  switch

(alloc_type) {

1488

retval = (

char

*)

malloc

(length);

1492

retval =

new char

[length];

1512  x_GetAmbigSeq

(oid, & buf1, nucl_code, alloc_type, region, masks);

1523  if

(!masks || masks->

empty

())

return

;

1527  unsigned int

begin(range.

begin

);

1528  unsigned int

end(range.

end

);

1530  while

(i < masks->

size

() && (*masks)[

i

].second <= begin) ++

i

;

1532  while

(i < masks->

size

() && (*masks)[

i

].

first

< end) {

1533  for

(

size_t

j =

max

((*masks)[

i

].

first

, begin);

1534

j <

min

((*masks)[

i

].second, end); ++j) {

1535

seq[j] = mask_letter;

1549  if

((partial_ranges ==

NULL

) || (partial_ranges->

size

() == 0)) {

1553  const char

*

tmp

(0);

1559  int

num_ranges =

static_cast<int>

(partial_ranges->

size

());

1560  if

((*partial_ranges)[num_ranges - 1].second >

static_cast<TSeqPos>

(

base_length

)) {

1566  char

*seq = *

buffer

+ (sentinel ? 1 : 0);

1568

vector<Int4> ambchars;

1571  int

begin(riter->first);

1572  int

end(riter->second);

1592

(*buffer)[0] = (char)15;

1618  const char

*

tmp

(0);

1641  char

*seq = *

buffer

- range.

begin

+ (sentinel ? 1 : 0);

1645

vector<Int4> ambchars;

1654  bool

use_range_set =

true

;

1660

|| rciter->second->GetRanges().empty()

1662

use_range_set =

false

;

1665

range_set = rciter->second->GetRanges();

1669  if

(!use_range_set) {

1684  int

begin(riter->first);

1685  int

end(riter->second);

1694  min

(range.

end

, riter->second));

1705

(*buffer)[0] = (char)15;

1711  if

(masks) masks->

clear

();

1727  if

(sequence.

length

() == 0) {

1729  "Error: packed sequence data is not valid."

);

1732  const char

* seq_buffer = sequence.

data

();

1734  int

whole_bytes =

static_cast<int>

(sequence.

length

()) - 1;

1735  int

remainder = sequence[whole_bytes] & 3;

1744

vector<Int4> ambchars;

1745

ambchars.reserve(ambiguities.

length

()/4);

1747  for

(

size_t i

= 0;

i

< ambiguities.

length

();

i

+=4) {

1749

ambchars.push_back(

A

);

1775  const char

**

buffer

)

const 1777  TIndx

start_offset = 0;

1778  TIndx

end_offset = 0;

1791  if

(

'p'

== seqtype) {

1796

length =

int

(end_offset - start_offset);

1803  if

(! (*

buffer

- 1))

return

-1;

1805

}

else if

(

'n'

== seqtype) {

1819  if

(! (*

buffer

))

return

-1;

1833  int

whole_bytes =

int

(end_offset - start_offset - 1);

1835  char

last_char = (*buffer)[whole_bytes];

1837  int

remainder = last_char & 3;

1838

length = (whole_bytes * 4) + remainder;

1846

list< CRef< CSeq_id > > seqids;

1851  if

((! defline_set.

Empty

()) && defline_set->

CanGet

()) {

1853  if

(! (*defline)->CanGetSeqid()) {

1858

seqids.push_back(*seqid);

1868

list< CRef< CSeq_id > > seqids;

1873  if

((! defline_set.

Empty

()) && defline_set->

CanGet

()) {

1875  if

(! (*defline)->CanGetSeqid()) {

1880

seqids.push_back(*seqid);

1918 #ifdef NCBI_STRICT_TAX_ID 1921

tax_ids.

insert

(leaf_ids.begin(), leaf_ids.end());

1925  if

(user_tax_ids.

size

() > tax_ids.

size

()) {

1927  if

(user_tax_ids.

find

(*itr) != user_tax_ids.

end

()) {

1935  if

(tax_ids.

find

(*itr) != tax_ids.

end

()) {

1959  if

(taxid_set.

size

() > user_tax_ids.

size

()) {

1963  if

(user_tax_ids.

find

(*itr) == user_tax_ids.

end

()) {

1972  bool

* changed)

const 1974  typedef

list< CRef<CBlast_def_line> > TBDLL;

1975  typedef

TBDLL::iterator TBDLLIter;

1982  if

(useCache && cached.first.NotEmpty()) {

1984

*changed = cached.second;

1987  return

cached.first;

1990  bool

asn_changed =

false

;

2001

TBDLL & dl = BDLS->

Set

();

2003  for

(TBDLLIter iter = dl.begin(); iter != dl.end(); ) {

2006  bool

have_memb =

true

;

2016  int

memb_mask = 0x1 << (

m_MemBit

-1);

2018  if

((bits & memb_mask) == 0) {

2029  bool

have_user =

false

, have_volume =

false

;

2032  if

(have_user && have_volume)

break

;

2034

have_memb = have_user && have_volume;

2043  if

( (*vtaxid)->GetNumTaxIds() > 0) {

2062

TBDLLIter eraseme = iter++;

2064

asn_changed =

true

;

2074

cached.first = BDLS;

2075

cached.second = asn_changed;

2077

cached.first = BDLS;

2078

cached.second = asn_changed;

2090  typedef

list< CRef<CBlast_def_line> > TBDLL;

2091  typedef

TBDLL::iterator TBDLLIter;

2098  if

(useCache && cached.first.NotEmpty()) {

2100

*changed = cached.second;

2103  return

cached.first;

2106  bool

asn_changed =

false

;

2117

TBDLL & dl = BDLS->

Set

();

2119  for

(TBDLLIter iter = dl.begin(); iter != dl.end(); ) {

2122  bool

have_memb =

true

;

2132  int

memb_mask = 0x1 << (

m_MemBit

-1);

2134  if

((bits & memb_mask) == 0) {

2145  bool

have_user =

false

, have_volume =

false

;

2148  if

(have_user && have_volume)

break

;

2150

have_memb = have_user && have_volume;

2159  if

( (*vtaxid)->GetNumTaxIds() > 0) {

2179

TBDLLIter eraseme = iter++;

2181

asn_changed =

true

;

2191

cached.first = BDLS;

2192

cached.second = asn_changed;

2194

cached.first = BDLS;

2195

cached.second = asn_changed;

2204  bool

* changed)

const 2210  if

(! raw_data.

size

()) {

2218

bdls.

Reset

(

new

objects::CBlast_def_line_set);

2224  if

(! (**dl).CanGetSeqid()) {

2234  if

(dbt.

GetDb

() ==

"BL_ORD_ID"

) {

2259  if

(! raw_data.

size

()) {

2271

bdls.

Reset

(

new

objects::CBlast_def_line_set);

2277  if

(! (**dl).CanGetSeqid()) {

2287  if

(dbt.

GetDb

() ==

"BL_ORD_ID"

) {

2305  TIndx

hdr_start = 0;

2315  return CTempString

(asn_region, hdr_end - hdr_start);

2320

vector<char> & hdr_data )

const 2330  bool

changed =

false

;

2343

hdr_data.assign(s.data(), s.data() + s.size());

2346

hdr_data.assign(raw.

data

(), raw.

data

() + raw.

size

());

2351

vector<Int4> & ambchars)

const 2353  TIndx

start_offset = 0;

2354  TIndx

end_offset = 0;

2362  "File error: could not get ambiguity data."

);

2365  int

length =

int

(end_offset - start_offset);

2368  int

total = length / 4;

2376

total &= 0x7FFFFFFF;

2378

ambchars.resize(total);

2380  for

(

int i

= 0;

i

<total;

i

++) {

2434  typedef

list< CRef< CBlast_def_line > >::const_iterator TI1;

2437

TI1 it1 = BDLS->

Get

().begin();

2439  for

(; it1 != BDLS->

Get

().end(); it1++) {

2440  if

((*it1)->IsSetOther_info()) {

2441

TI2 it2 = (*it1)->GetOther_info().begin();

2442

TI2 it2end = (*it1)->GetOther_info().end();

2444  for

(; it2 != it2end; it2++) {

2479  return

! oids.empty();

2511  "GI list specified but no ISAM file found for GI in "

+

m_VolName

);

2523  "TI list specified but no ISAM file found for TI in "

+

m_VolName

);

2535  "IPG list specified but no ISAM file found for IPG in "

+

m_VolName

);

2547  "SI list specified but no ISAM file found for SI in "

+

m_VolName

);

2565  "GI list specified but no ISAM file found for GI in "

+

m_VolName

);

2577  "TI list specified but no ISAM file found for TI in "

+

m_VolName

);

2589  "SI list specified but no ISAM file found for SI in "

+

m_VolName

);

2611  typedef

list< CRef< CBlast_def_line > >::const_iterator TI1;

2612  typedef

list< CRef< CSeq_id > >::const_iterator TI2;

2614

TI1 it1 = BDLS->

Get

().begin();

2618  for

(; it1 != BDLS->

Get

().end(); it1++) {

2619  if

((*it1)->CanGetSeqid()) {

2620

TI2 it2 = (*it1)->GetSeqid().begin();

2621

TI2 it2end = (*it1)->GetSeqid().end();

2625  for

(; it2 != it2end; it2++) {

2626  if

((*it2)->IsGi()) {

2627

gi = (*it2)->GetGi();

2640  const string

& str_id,

2642

vector<int> & oids)

const 2644  bool

vcheck (

false

);

2645  bool

fits_in_four = (ident == -1) || ! (ident >> 32);

2646  bool

needs_four =

true

;

2666

oids.push_back(oid);

2679

oids.push_back(oid);

2692

oids.push_back(oid);

2713

oids.push_back((

int

) ident);

2720  "Internal error: hashes are not Seq-ids."

);

2723  if

((! fits_in_four) && needs_four) {

2726  "ID overflows range of specified type."

);

2735

vector<int> & oids)

const 2747  size_t

pos = acc.find(

"."

);

2750  string

ver_str(acc, pos+1, acc.size()-(pos+1));

2755  string

nover(acc, 0, pos);

2758  while

((pos2 = nover.find(

"|"

)) != nover.npos) {

2759

nover.erase(0, pos2+1);

2763

list< CRef<CSeq_id> > ids =

2766  bool

found =

false

;

2769  const CTextseq_id

*

id

= (*seqid)->GetTextseq_Id();

2787

oids.erase(

remove

(oids.begin(), oids.end(), -1), oids.end());

2796  bool

simpler (

false

);

2810  bool

simpler (

false

);

2854  if

(first_seq >= vol_cnt) {

2857  "OID not in valid range."

);

2860  if

(residue >= vol_len) {

2863  "Residue offset not in valid range."

);

2872  double

dresidue = (double(residue) * end_of_bytes) / vol_len;

2877

residue =

Uint8

(dresidue);

2879  if

(residue > (end_of_bytes-1)) {

2880

residue = end_of_bytes - 1;

2888  int

oid_beg = first_seq;

2889  int

oid_end = vol_cnt-1;

2893  int

oid_mid = (oid_beg + oid_end)/2;

2895  while

(oid_beg < oid_end) {

2902  if

(

offset

>= residue) {

2905

oid_beg = oid_mid + 1;

2908

oid_mid = (oid_beg + oid_end)/2;

2918  TIndx

start_offset = 0;

2920  return

start_offset;

2943  if

((begin >= end) || (end > length)) {

2946  "Begin and end offsets are not valid."

);

2974

v4.reserve((length+1)/2);

2979  for

(

TSeqPos i

= 0;

i

< length_whole;

i

+= 2) {

2983  if

(length_whole != length) {

2984  _ASSERT

((length_whole) == (length-1));

2985

v4.push_back(

buffer

[length_whole] << 4);

2999  int

* amb_length )

const 3014  TIndx

map_begin = 0;

3021  bool

amb_ok =

true

;

3027

end_A = start_A = --end_S;

3031

map_begin = start_S - 1;

3032

map_end = end_A + 1;

3036

map_begin = start_S;

3040  int

s_len =

int

(end_S - start_S);

3041  int

a_len =

int

(end_A - start_A);

3043  if

(! (s_len && amb_ok)) {

3045  "File error: could not get sequence data."

);

3049

*amb_length = a_len;

3053

*seq_length = s_len;

3058

*

buffer

+= (start_S - map_begin);

3062  if

(! *seq_length) {

3065  "Could not get sequence data."

);

3068  if

(((

buffer

&& *

buffer

) || a_len) && (! *seq_length)) {

3078  if

(

id

>= (

static_cast<T>

(1) << 32)) {

3081  "ID overflows range of specified type."

);

3117

low_id = high_id =

count

= 0;

3125

high_id = (

int

)

H

;

3151  bool

cache_data)

const 3155  if

(offset_ranges.

empty

() && (! cache_data) && (! append_ranges)) {

3168  if

(

R

.Empty() ||

R

->GetRanges().empty()) {

3173  if

(offset_ranges.

empty

() && (! cache_data)) {

3189  bool

flush_sequence = ((! append_ranges) ||

3190

(! offset_ranges.

empty

()) ||

3193  if

(flush_sequence) {

3194  R

->FlushSequence();

3197  R

->SetRanges(offset_ranges, append_ranges, cache_data);

3210  if

(append_ranges) {

3231  if

((**gilist).GetNumSis() != 0)

3234  if

((**gilist).GetNumTis() != 0)

3267  "Hash lookup requested but no hash ISAM file found."

);

3273 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 3274  (!defined(NCBI_COMPILER_MIPSPRO)) ) 3290  m_Columns

[col_id]->GetBlob(oid, blob, keep, & locked);

3307  return m_Columns

[col_id]->GetMetaData();

3320

titles.

insert

((**iter).GetTitle());

3332  string

alpha(

"abcdefghijklmnopqrstuvwxyz"

);

3333  string

ei(

"??a"

), ed(

"??b"

), ed2(

"??c"

);

3335

ei[0] = ed[0] = ed2[0] = (

m_IsAA

?

'p'

:

'n'

);

3339  for

(

size_t i

= 0;

i

< alpha.size();

i

++) {

3340

ei[1] = ed[1] = ed2[1] = alpha[

i

];

3347  if

( ! (big || small))

continue

;

3351  const Int2

bytetest = 0x0011;

3352  const char

* ptr = (

const char

*) &bytetest;

3353  if

(ptr[0] == 0x11 && small) {

3359  string

errmsg, errarg;

3361  string

title = col->GetTitle();

3363  if

(unique_titles[title]) {

3364

errmsg =

"duplicate column title"

;

3367

unique_titles[title] = 1;

3372  if

(noidc != noidv) {

3373

errmsg =

"column has wrong #oids"

;

3378  if

(errmsg.size()) {

3379  if

(errarg.size()) {

3380

errmsg +=

string

(

" ["

) + errarg +

"]."

;

3383  string

(

"Error: "

) + errmsg);

3404  return static_cast<int>

(

i

);

#define FENCE_SENTRY

This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.

vector< TSeqRange > TRangeVector

ncbi::TMaskedQueryRegions mask

`Blob' Class for SeqDB (and WriteDB).

TTaxIds GetTaxIds() const

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

CObjectIStreamAsnBinary –.

CObjectOStreamAsnBinary –.

static char * Alloc(size_t length, bool clear=true)

Allocate memory that atlas will keep track of.

static bool ColumnExists(const string &basename, const string &extn, CSeqDBAtlas &atlas)

Determine if the column exists.

void UnLease()

Release memory held in the atlas layer by this object.

const char * GetFileDataPtr(const string &fname, TIndx offset)

Get a pointer to the specified offset.

static bool IndexExists(const string &name, const char prot_nucl)

TGi GetSeqGI(TOid oid, CSeqDBLockHold &locked)

int GetNumGis() const

Get the number of GIs in the array.

int GetNumSis() const

Get the number of Seq-ids in the array.

int GetNumTis() const

Get the number of TIs in the array.

Uint8 GetMaskOpts() const

set< TTaxId > & GetTaxIdsList()

const char * GetFileDataPtr(TIndx start) const

Read part of the file into a buffer.

string GetDate() const

Get the construction date of the volume.

void GetHdrStartEnd(int oid, TIndx &start, TIndx &end) const

Get the location of a sequence's header data.

void UnLease()

Release any memory leases temporarily held here.

string GetTitle() const

Get the volume title.

int GetNumOIDs() const

Get the number of oids in this volume.

string GetLMDBFileName() const

bool GetAmbStartEnd(int oid, TIndx &start, TIndx &end) const

Get the location of a sequence's ambiguity data.

Uint8 GetVolumeLength() const

Get the length of the volume (in bases).

int GetMinLength() const

Get the length of the shortest sequence in this volume.

void GetSeqStart(int oid, TIndx &start) const

Get the location of a sequence's packed sequence data.

char GetSeqType() const

Get the sequence data type.

int GetMaxLength() const

Get the length of the longest sequence in this volume.

void GetSeqStartEnd(int oid, TIndx &start, TIndx &end) const

Get the location of a sequence's packed sequence data.

TValue & Lookup(int key)

Find a value in the cache.

bool IdToOid(Int8 id, TOid &oid)

GI or TI translation.

void HashToOids(unsigned hash, vector< TOid > &oids)

Sequence hash lookup.

bool PigToOid(TPig pig, TOid &oid)

PIG translation.

void IdsToOids(int vol_start, int vol_end, CSeqDBGiList &ids)

Translate Gis and Tis to Oids for the given ID list.

void GetIdBounds(Int8 &low_id, Int8 &high_id, int &count)

Get Numeric Bounds.

void UnLease()

Return any memory held by this object to the atlas.

void StringToOids(const string &acc, vector< TOid > &oids, bool adjusted, bool &version_check)

String translation.

static bool IndexExists(const string &dbname, char prot_nucl, char file_ext_char)

Check if a given ISAM index exists.

int GetNumTis() const

Get the number of TIs in the array.

int GetNumGis() const

Get the number of GIs in the array.

int GetNumSis() const

Get the number of SeqIds in the array.

set< TTaxId > & GetTaxIdsList()

TRangeList m_Ranges

Range of offsets needed for this sequence.

void SetRanges(const TRangeList &ranges, bool append_ranges, bool cache_data)

Set ranges of the sequence that will be used.

static int ImmediateLength()

Sequences shorter than this will not use ranges in any case.

bool m_CacheData

True if caching of sequence data is required for this sequence.

void ReadBytes(char *buf, TIndx start, TIndx end) const

Read part of the file into a buffer.

const char * GetFileDataPtr(TIndx start) const

Get a pointer into the file contents.

static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo &info)

Get the taxonomy names for a given tax id.

void OptimizeGiLists() const

Simplify the GI list configuration.

bool m_HaveColumns

True if we have opened the columns for this volume.

list< CRef< CSeq_id > > GetSeqIDs(int oid) const

Get the Seq-ids associated with a sequence.

CFastMutex m_MtxCachedRange

CRef< CSeqDBIsam > m_IsamGi

Handles translation of GIs to OIDs.

void SeqidToOids(CSeq_id &seqid, vector< int > &oids, CSeqDBLockHold &locked) const

Find OIDs for the specified Seq-id.

Uint8 x_GetSeqResidueOffset(int oid) const

Returns the base-offset of the specified oid.

void x_OpenHashFile(void) const

void x_UnleasePigFile(void) const

int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const

void x_UnleaseTiFile(void) const

CSeqDBAtlas & m_Atlas

The memory management layer.

void AccessionToOids(const string &acc, vector< int > &oids, CSeqDBLockHold &locked) const

Find OIDs for the specified accession or formatted Seq-id.

void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold &locked)

Fetch the data blob for the given column and oid.

CRef< CSeqDBHdrFile > m_Hdr

Contains header (defline) information for this volume.

void x_OpenSeqFile(void) const

CSeqDBIntCache< CRef< CSeqdesc > > m_TaxCache

This cache allows CBioseqs to share taxonomic objects.

void x_OpenTiFile(void) const

int GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const

Get a sequence with ambiguous regions.

vector< CRef< CSeqDBColumn > > m_Columns

Set of columns defined for this volume.

CRef< CSeqDBIdxFile > m_Idx

Metadata plus offsets into the sequence, header, and ambiguity data.

void x_OpenStrFile(void) const

int GetSeqLengthExact(int oid) const

Exact sequence length for nucleotide databases.

void x_StringToOids(const string &acc, ESeqDBIdType id_type, Int8 ident, const string &str_id, bool simplified, vector< int > &oids) const

void OpenSeqFile(CSeqDBLockHold &locked) const

Open sequence file.

int GetColumnId(const string &title, CSeqDBLockHold &locked)

Get an ID number for a given column title.

CRef< CSeqDBIsam > m_IsamStr

Handles translation of strings (accessions) to OIDs.

vector< CRef< CSeqDBGiList > > TGiLists

A set of GI lists.

CSeqDBIntCache< TDeflineCacheItem > m_DeflineCache

Cache of filtered deflines.

int x_GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const

Get a sequence with ambiguous regions.

int m_VolStart

Starting OID of this volume.

int GetNumOIDs() const

Get the number of OIDs for this volume.

bool GetGi(int oid, TGi &gi, CSeqDBLockHold &locked) const

Find the GI given an OID.

CRef< CBlast_def_line_set > x_GetHdrAsn1(int oid, bool adjust_oids, bool *changed) const

Get sequence header object.

void GetPigBounds(int &low_id, int &high_id, int &count, CSeqDBLockHold &locked) const

Get PIG Bounds.

void x_FilterHasId(const CSeq_id &id, bool &have_user, bool &have_vol) const

Determine if a user ID list affects this ID, and how.

string m_VolName

The name of this volume.

CTempString x_GetHdrAsn1Binary(int oid) const

Get sequence header binary data.

void FlushOffsetRangeCache()

Flush all offset ranges cached.

CSeqDBVol(CSeqDBAtlas &atlas, const string &name, char prot_nucl, CSeqDBGiList *user_list, CSeqDBNegativeList *neg_list, int vol_start, CSeqDBLockHold &locked)

Constructor.

void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const

Get Raw Sequence and Ambiguity Data.

void x_OpenHdrFile(void) const

string GetLMDBFileName() const

Get sqlite file name associated with this volume Empty string if version 4.

void ListColumns(set< string > &titles, CSeqDBLockHold &locked)

List the titles of all columns for this volume.

int GetSeqLengthApprox(int oid) const

Approximate sequence length for nucleotide databases.

TRangeCache m_RangeCache

Cached/ranged sequence info.

int m_VolEnd

First OID past end of this volume.

bool m_SeqFileOpened

True if the volume file has been (at least tried to) opened.

bool GetPig(int oid, int &pig, CSeqDBLockHold &locked) const

Find the PIG given an OID.

int GetSeqLengthProt(int oid) const

Sequence length for protein databases.

CRef< CSeqDBSeqFile > m_Seq

Contains sequence data for this volume.

CRef< CSeqdesc > x_GetAsnDefline(int oid) const

Get sequence header information structures.

string GetTitle() const

Get the volume title.

CRef< CSeqDBGiIndex > m_GiIndex

The GI index file (for fast oid->gi conversion)

CRef< CSeqDBNegativeList > m_NegativeList

The negative ID list, if one exists.

TGiLists m_VolumeGiLists

The volume GI lists, if any exist.

CRef< CSeqDBIsam > m_IsamTi

Handles translation of TI (trace ids) to OIDs.

int x_GetSequence(int oid, const char **buffer) const

Get sequence data.

CRef< CSeqDBIsam > m_IsamHash

Handles translation of sequence hash value to OIDs.

void UnLease()

Return expendable resources held by this volume.

list< CRef< CSeqdesc > > x_GetTaxonomy(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)

Get taxonomic descriptions of a sequence.

void IdsToOids(CSeqDBGiList &gis) const

Translate Gis to Oids for the given vector of Gi/Oid pairs.

CRef< CBlast_def_line_set > x_GetFilteredHeader(int oid, bool *changed) const

Get sequence header information.

char * x_AllocType(size_t length, ESeqDBAllocType alloc_type) const

Allocate memory in one of several ways.

void x_CheckVersions(const string &acc, vector< int > &oids) const

Check Seq-id versions for special sparse-id support case.

bool GiToOid(TGi gi, int &oid, CSeqDBLockHold &locked) const

Find the OID given a GI.

TGi GetSeqGI(int oid, CSeqDBLockHold &locked) const

Get the GI of a sequence This method returns the gi of the sequence.

CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end, CSeqDBLockHold &locked) const

Fetch data as a CSeq_data object.

void GetGiBounds(TGi &low_id, TGi &high_id, int &count, CSeqDBLockHold &locked) const

Get GI Bounds.

int GetOidAtOffset(int first_seq, Uint8 residue, CSeqDBLockHold &locked) const

Find the OID at a given index into the database.

char GetSeqType() const

Get the sequence type stored in this database.

bool x_HaveIdFilter(void) const

Returns true if this volume has an ID list.

void x_OpenAllColumns(CSeqDBLockHold &locked)

Find all columns for this volume.

CRef< CBioseq > GetBioseq(int oid, TGi pref_gi, const CSeq_id *pref_seq_id, bool seqdata, CSeqDBLockHold &locked)

Get a CBioseq object for this sequence.

int GetMinLength() const

Get the length of the smallest sequence in this volume.

void x_UnleaseStrFile(void) const

CRef< CSeqDBGiList > m_UserGiList

The user ID list, if one exists.

CSeqDBAtlas::TIndx TIndx

Import TIndx definition from the CSeqDBAtlas class.

void x_OpenGiFile(void) const

pair< CRef< CBlast_def_line_set >, bool > TDeflineCacheItem

Filtered defline plus whether binary data needed changes.

CRef< CSeqDBIsam > m_IsamPig

Handles translation of GIs to OIDs.

bool m_IsAA

True if the volume is protein, false for nucleotide.

const map< string, string > & GetColumnMetaData(int col_id, CSeqDBLockHold &locked)

Get all metadata for the specified column.

CRef< CBlast_def_line_set > x_GetTaxDefline(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)

Get defline filtered by several criteria.

void x_OpenOidFile(void) const

int GetMaxLength() const

Get the length of the largest sequence in this volume.

bool PigToOid(int pig, int &oid) const

Find the OID given a PIG.

bool TiToOid(Int8 ti, int &oid, CSeqDBLockHold &locked) const

Find the OID given a TI.

void x_OpenPigFile(void) const

void x_GetFilteredBinaryHeader(int oid, vector< char > &hdr_data) const

Get binary sequence header information.

Uint8 GetVolumeLength() const

Get the total length of this volume (in bases).

CRef< CBlast_def_line_set > GetFilteredHeader(int oid, CSeqDBLockHold &locked) const

Get filtered sequence header information.

string GetDate() const

Get the formatting date of the volume.

int m_MemBit

The filtering MEMB_BIT.

void x_UnleaseGiFile(void) const

void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data) const

Apply a range of offsets to a database sequence.

void GetStringBounds(string &low_id, string &high_id, int &count) const

Get String Bounds.

void HashToOids(unsigned hash, vector< int > &oids, CSeqDBLockHold &locked) const

Get the OIDs for a given sequence hash.

void x_GetAmbChar(int oid, vector< Int4 > &ambchars) const

Get ambiguity information.

char x_GetSeqType() const

Returns 'p' for protein databases, or 'n' for nucleotide.

static const char * kOidNotFound

String containing the error message in exceptions thrown when a given OID cannot be found.

static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)

Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.

@Seq_descr.hpp User-defined methods of the data storage class.

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

container_type::iterator iterator

const_iterator end() const

const_iterator find(const key_type &key) const

iterator_bool insert(const value_type &val)

const_iterator begin() const

const_iterator find(const key_type &key) const

const_iterator end() const

static int base_length[29]

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

static TDSRET convert(TDSSOCKET *tds, TDSICONV *conv, TDS_ICONV_DIRECTION direction, const char *from, size_t from_len, char *dest, size_t *dest_len)

#define GI_FROM(T, value)

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define TAX_ID_TO(T, tax_id)

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

SStrictId_Tax::TId TTaxId

Taxon id type.

#define TAX_ID_FROM(T, value)

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

const string AsFastaString(void) const

E_SIC

Compare return values.

@ e_NO

different SeqId types-can't compare

@ e_YES

SeqIds compared, but are different.

void Close(void)

Detach reader from a data source.

void OpenFromBuffer(const char *buffer, size_t size)

Attach reader to a data source.

void AddReference(void) const

Add reference to object.

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

void RemoveReference(void) const

Remove reference to object.

bool ReferencedOnlyOnce(void) const THROWS_NONE

Check if object is referenced only once.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

uint8_t Uint1

1-byte (8-bit) unsigned integer

int16_t Int2

2-byte (16-bit) signed integer

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

void Reset(void)

Reset random number generator to initial startup condition (LFG only)

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)

Convert Int8 to string.

static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to int.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

const char * data(void) const

Return a pointer to the array represented.

size_type length(void) const

Return the length of the represented array.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

size_type size(void) const

Return the length of the represented array.

@ fConvErr_NoThrow

Do not throw an exception on error.

@ fAllowTrailingSymbols

Ignore trailing non-numerics characters.

#define DEFINE_STATIC_FAST_MUTEX(id)

Define static fast mutex and initialize it.

const TSeqid & GetSeqid(void) const

Get the Seqid member data.

bool CanGetTitle(void) const

Check if it is safe to call GetTitle method.

bool IsSetLinks(void) const

Check if a value has been assigned to Links data member.

TTaxid GetTaxid(void) const

Get the Taxid member data.

bool IsSet(void) const

Check if a value has been assigned to data member.

bool IsSetMemberships(void) const

bit arrays Repurposed to store the (multiple) taxIDs associated with WP proteins.

bool IsSetTaxid(void) const

Check if a value has been assigned to Taxid data member.

const TLinks & GetLinks(void) const

Get the Links member data.

const TMemberships & GetMemberships(void) const

Get the Memberships member data.

Tdata & Set(void)

Assign a value to data member.

bool CanGet(void) const

Check if it is safe to call Get method.

const Tdata & Get(void) const

Get the member data.

bool CanGetSeqid(void) const

Check if it is safe to call GetSeqid method.

bool CanGetMemberships(void) const

Check if it is safe to call GetMemberships method.

const TTitle & GetTitle(void) const

Get the Title member data.

bool IsStr(void) const

Check if variant Str is selected.

const TTag & GetTag(void) const

Get the Tag member data.

void SetTag(TTag &value)

Assign a value to Tag data member.

const TDb & GetDb(void) const

Get the Db member data.

vector< vector< char > * > TOss

TData & SetData(void)

Assign a value to Data data member.

void SetNum(TNum value)

Assign a value to Num data member.

const TStr & GetStr(void) const

Get the variant data.

void SetLabel(TLabel &value)

Assign a value to Label data member.

TStr & SetStr(void)

Select the variant.

const TData & GetData(void) const

Get the Data member data.

void SetType(TType &value)

Assign a value to Type data member.

void SetData(TData &value)

Assign a value to Data data member.

const TType & GetType(void) const

Get the Type member data.

void SetDb(const TDb &value)

Assign a value to Db data member.

TId GetId(void) const

Get the variant data.

void SetCommon(const TCommon &value)

Assign a value to Common data member.

TDb & SetDb(void)

Assign a value to Db data member.

void SetTaxname(const TTaxname &value)

Assign a value to Taxname data member.

TGeneral & SetGeneral(void)

Select the variant.

E_Choice Which(void) const

Which variant is currently selected.

TVersion GetVersion(void) const

Get the Version member data.

bool CanGetVersion(void) const

Check if it is safe to call GetVersion method.

bool CanGetAccession(void) const

Check if it is safe to call GetAccession method.

const TAccession & GetAccession(void) const

Get the Accession member data.

@ e_General

for other databases

@ e_Gi

GenInfo Integrated Database.

list< CRef< CSeqdesc > > Tdata

TId & SetId(void)

Assign a value to Id data member.

TTitle & SetTitle(void)

Select the variant.

void SetInst(TInst &value)

Assign a value to Inst data member.

TSource & SetSource(void)

Select the variant.

TNcbistdaa & SetNcbistdaa(void)

Select the variant.

void SetDescr(TDescr &value)

Assign a value to Descr data member.

TUser & SetUser(void)

Select the variant.

void SetRepr(TRepr value)

Assign a value to Repr data member.

Tdata & Set(void)

Assign a value to data member.

void SetLength(TLength value)

Assign a value to Length data member.

void SetSeq_data(TSeq_data &value)

Assign a value to Seq_data data member.

TNcbi4na & SetNcbi4na(void)

Select the variant.

void SetMol(TMol value)

Assign a value to Mol data member.

@ eRepr_raw

continuous sequence

@ eMol_na

just a nucleic acid

unsigned int

A callback function used to compare two keys in a database.

const struct ncbi::grid::netcache::search::fields::SIZE size

const CharType(& source)[N]

Static variables safety - create on demand, destroy on application termination.

Multi-threading – mutexes; rw-locks; semaphore.

ESeqDBAllocType

Certain methods have an "Alloc" version.

ESeqDBIdType SeqDB_SimplifySeqid(CSeq_id &bestid, const string *acc, Int8 &num_id, string &str_id, bool &simpler)

Seq-id simplification.

const int kSeqDBNuclNcbiNA8

Used to request ambiguities in Ncbi/NA8 format.

ESeqDBIdType SeqDB_SimplifyAccession(const string &acc, Int8 &num_id, string &str_id, bool &simpler)

String id simplification.

const int kSeqDBNuclBlastNA8

Used to request ambiguities in BLAST/NA8 format.

ESeqDBIdType

Various identifier formats used in Id lookup.

@ eStringId

Each PIG identifier refers to exactly one protein sequence.

@ eTiId

Genomic ID is a relatively stable numeric identifier for sequences.

@ ePigId

Trace ID is a numeric identifier for Trace sequences.

@ eHashId

Some sequence sources uses string identifiers.

@ eOID

Lookup from sequence hash values to OIDs.

T SeqDB_GetStdOrd(const T *stdord_obj)

Read a network order integer value.

The SeqDB oid filtering layer.

static bool s_SeqDB_SeqIdIn(const list< CRef< CSeq_id > > &seqids, const CSeq_id &target)

Search for a Seq-id in a list of Seq-ids.

bool s_IncludeDefline_Taxid(const CBlast_def_line &def, const set< TTaxId > &user_tax_ids)

static void s_SeqDBMapNA2ToNA4(const char *buf2bit, vector< char > &buf4bit, int base_length)

Convert sequence data from NA2 to NA4 format.

Uint4 s_ResVal(const vector< Int4 > &ambchars, Uint4 i)

Get ambiguous residue value (old version)

void SeqDB_UnpackAmbiguities(const CTempString &sequence, const CTempString &ambiguities, string &result)

Unpack an ambiguous nucleotide sequence.

static void s_SeqDBRebuildDNA_NA8(char *seq, const vector< Int4 > &amb_chars, const SSeqDBSlice &region)

Rebuild an ambiguous region from sequence and ambiguity data.

Uint4 s_ResLenOld(const vector< Int4 > &ambchars, Uint4 i)

Get ambiguous region length (old version)

Uint4 s_ResLenNew(const vector< Int4 > &ambchars, Uint4 i)

Get length of ambiguous region (new version)

set< pair< int, int > > TRangeVector

List of offset ranges as begin/end pairs.

bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line &def, const set< TTaxId > &user_tax_ids)

Uint4 s_ResPosNew(const vector< Int4 > &ambchars, Uint4 i)

Get position of ambiguous region (new version)

static void s_SeqDBRebuildDNA_NA4(vector< char > &buf4bit, const vector< Int4 > &amb_chars)

Rebuild an ambiguous region from sequence and ambiguity data.

static void s_SeqDBWriteSeqDataProt(CSeq_inst &seqinst, const char *seq_buffer, int length)

Store protein sequence data in a Seq-inst.

static void s_GetBioseqTitle(CRef< CBlast_def_line_set > deflines, string &title)

Get the title string for a CBioseq.

static void s_SeqDBFitsInFour(T id)

static void s_SeqDBMapNA2ToNA4Setup(TTable &translated)

Build NA2 to NcbiNA4 translation table.

unsigned SeqDB_ncbina8_to_blastna8[]

static CRef< CBlast_def_line_set > s_OssToDefline(const CUser_field::TData::TOss &oss)

Efficiently decode a Blast-def-line-set from binary ASN.1.

static void s_SeqDBMapNA2ToNA8Setup(TTable &translated)

Build NA2 to Ncbi-NA8 translation table.

Uint4 s_ResPosOld(const vector< Int4 > &ambchars, Uint4 i)

Get ambiguous residue value (old version)

static void s_SeqDBMapNcbiNA8ToBlastNA8(char *buf, const SSeqDBSlice &range)

Convert sequence data from Ncbi-NA8 to Blast-NA8 format.

static void s_SeqDBMaskSequence(char *seq, CSeqDB::TSequenceRanges *masks, char mask_letter, const SSeqDBSlice &range)

static void s_SeqDBWriteSeqDataNucl(CSeq_inst &seqinst, const char *seq_buffer, int length)

Store non-ambiguous nucleotide sequence data in a Seq-inst.

CRef< CBlast_def_line_set > s_ExtractBlastDefline(const T &bioseq)

vector< Uint1 > TTable

Translation table type.

static void s_SeqDBMapNA2ToNA8(const char *buf2bit, char *buf8bit, const SSeqDBSlice &range)

Convert sequence data from NA2 to NA8 format.

bool s_IncludeDefline_MaskFilter(const CBlast_def_line &def, Uint8 mask)

Defines database volume access classes.

List of sequence offset ranges.

OID-Range type to simplify interfaces.

int begin

First oid in range.

int end

OID after last included oid.

string common_name

Common name, such as "noisy night monkey".

string scientific_name

Scientific name, such as "Aotus vociferans".


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4