<
classA,
classB,
classC,
classD,
classE>
69oss <<
a<<
b<< c << d << e;
79 const char* slice (0);
81 const char*
whole(0);
89 s_ToString(
"Checking NcbiNA8 subsequence range [", begin,
",", end,
"].");
94BOOST_REQUIRE_MESSAGE(0 == memcmp(slice,
whole+ begin, sliceL), op);
103 boolsuccess = db.
GiToOid(nt_gi, oid);
106oss <<
"GI "<< nt_gi <<
" was not found in nt";
108BOOST_REQUIRE_MESSAGE(success,
msg);
116 for(
int i= 1;
i<length;
i*= 2) {
117 for(
intj = 0; j<length; j +=
i) {
134 return(oid & 1) != 0;
137 return(oid & 1) == 0;
150 for(
unsignedd = 2; d < oid; d++) {
197 unsignedexp_count(0);
205BOOST_REQUIRE_EQUAL(
first, lowest);
206BOOST_REQUIRE_EQUAL(
last, highest);
207BOOST_REQUIRE_EQUAL(
count, exp_count);
210 template<
classNUM,
classDIF>
218cout <<
"\nMismatch: line "<<
lineno 230 const signed char*
buf= (
const signed char*) buf_in;
234 while(
i< length) {
246 template<
classASNOBJ>
264 Int4num1(0), num2(0);
268BOOST_REQUIRE(num1 >= 1);
269BOOST_REQUIRE_EQUAL(num1, num2);
279 Int4num1(0), num2(0);
283BOOST_REQUIRE(num1 >= 1);
284BOOST_REQUIRE_EQUAL(num1, num2);
289 boolcaught_exception =
false;
295 Int4num1(0), num2(0);
299BOOST_REQUIRE(num1 >= 1);
300BOOST_REQUIRE_EQUAL(num1, num2);
302caught_exception =
true;
305 if(! caught_exception) {
306BOOST_ERROR(
"ConstructMissing() did not throw an exception of type CSeqDBException.");
312 boolcaught_exception =
false;
317caught_exception =
true;
320 if(! caught_exception) {
321BOOST_ERROR(
"InvalidSeqType() did not throw an exception of type CSeqDBException.");
332BOOST_REQUIRE(num1 >= 1);
337 boolcaught_exception =
false;
345BOOST_REQUIRE(num1 >= 1);
347caught_exception =
true;
350 if(! caught_exception) {
351BOOST_ERROR(
"InvalidPath() did not throw an exception of type CSeqDBException.");
365BOOST_REQUIRE_EQUAL(
int(100), nseqs);
366BOOST_REQUIRE_EQUAL(
Uint8(51718), vlength);
370BOOST_REQUIRE_EQUAL(
string(
"Another test DB for CPPUNIT, SeqDB."),
377 intoid_values[] = { 0, 100000 };
378 for(
autoend_oid : oid_values) {
386BOOST_REQUIRE(vol2 < vol1);
387BOOST_REQUIRE_EQUAL(seq2, seq1 - 1);
395BOOST_REQUIRE(vol3 < vol2);
396BOOST_REQUIRE_EQUAL(seq3, seq2 - 1);
405BOOST_CHECK_EQUAL(0
U, vol4);
406BOOST_CHECK_EQUAL(0, seq4);
410BOOST_CHECK_EQUAL(10, seq4);
411BOOST_CHECK(vol4 > 0);
420 Int4nseqs(0), noids(0);
421 Uint8vlength(0), tlength(0);
427BOOST_REQUIRE_EQUAL(
int(100), nseqs);
428BOOST_REQUIRE_EQUAL(
int(100), noids);
429BOOST_REQUIRE_EQUAL(
Uint8(26945), tlength);
430BOOST_REQUIRE_EQUAL(
Uint8(26945), vlength);
434BOOST_REQUIRE_EQUAL(
string(
"Test database for BLAST unit tests"),
442 char* bufp_blst = 0;
443 char* bufp_ncbi = 0;
463BOOST_REQUIRE_EQUAL(
Uint4(30118382ul), hashval_blst);
464BOOST_REQUIRE_EQUAL(
Uint4(3084382219ul), hashval_ncbi);
471 char* bufp_blst = 0;
472 char* bufp_ncbi = 0;
492BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval_blst);
493BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval_ncbi);
500 const char* bufp1 = 0;
501 const char* bufp2 = 0;
511BOOST_REQUIRE_EQUAL(
Uint4(30118382ul), hashval1);
512BOOST_REQUIRE_EQUAL(
Uint4(3084382219ul), hashval2);
519 const char* bufp1 = 0;
520 const char* bufp2 = 0;
530BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval1);
531BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval2);
542 " accession \"BP722514\",\n" 547 " title \"Xenopus laevis NBRP cDNA clone:XL452f07ex, 3' end\",\n" 549 " type str \"ASN1_BlastDefLine\",\n" 552 " label str \"ASN1_BlastDefLine\",\n" 555 " '30803080A0801A3158656E6F707573206C6165766973204E4252502063444E4120\n" 556 "636C6F6E653A584C34353266303765782C20332720656E640000A1803080AB80020402BEFD4300\n" 557 "00AC803080A1801A0842503732323531340000A38002010100000000000000000000A280020100\n" 568 " seq-data ncbi4na '11428288218841844814141422818811214421121482118428221114\n" 569 "82211121141881228484211141128842148481121112222F882124422141148188842112118488\n" 570 "41114822882844214144144148281181'H\n" 574BOOST_REQUIRE_EQUAL(
expected, got);
586 " accession \"EAI08555\",\n" 591 " title \"unknown [environmental sequence]\",\n" 593 " type str \"ASN1_BlastDefLine\",\n" 596 " label str \"ASN1_BlastDefLine\",\n" 599 " '30803080A0801A20756E6B6E6F776E205B656E7669726F6E6D656E74616C207365\n" 600 "7175656E63655D0000A1803080AB80020402A37A630000A4803080A1801A084541493038353535\n" 601 "0000A38002010100000000000000000000A280020100000000000000'H\n" 611 " seq-data ncbistdaa '0C0A0A0606090B0909060909060B09060909131004160F090A0A0A\n" 612 "0A0B0A0B0D0D010B0D110D0606090F12090D0A0B0904050D0A160D0B0B05051009100B1005'H\n" 616BOOST_REQUIRE_EQUAL(
expected, got);
624 string expected= (
"Blast-def-line-set ::= {\n" 626 " title \"Xenopus laevis NBRP cDNA clone:XL452f05ex, 3' end\",\n" 630 " accession \"BP722512\",\n" 638BOOST_REQUIRE_EQUAL(
expected, got);
646 string expected= (
"Blast-def-line-set ::= {\n" 648 " title \"similar to KIAA0960 protein [Mus musculus]\",\n" 652 " accession \"XP_357594\",\n" 660BOOST_REQUIRE_EQUAL(
expected, got);
668list< CRef< CSeq_id > > seqids =
685BOOST_REQUIRE_EQUAL(
Uint4(136774894ul), h);
693list< CRef< CSeq_id > > seqids =
710BOOST_REQUIRE_EQUAL(
Uint4(2942938647ul), h);
719BOOST_REQUIRE_EQUAL( (
int) 330, dbp.
GetSeqLength(13) );
720BOOST_REQUIRE_EQUAL( (
int) 422, dbp.
GetSeqLength(19) );
721BOOST_REQUIRE_EQUAL( (
int) 67, dbp.
GetSeqLength(26) );
722BOOST_REQUIRE_EQUAL( (
int) 104, dbp.
GetSeqLength(27) );
723BOOST_REQUIRE_EQUAL( (
int) 282, dbp.
GetSeqLength(38) );
724BOOST_REQUIRE_EQUAL( (
int) 158, dbp.
GetSeqLength(43) );
725BOOST_REQUIRE_EQUAL( (
int) 472, dbp.
GetSeqLength(54) );
726BOOST_REQUIRE_EQUAL( (
int) 207, dbp.
GetSeqLength(93) );
728BOOST_REQUIRE_EQUAL( (
int) 833, dbn.
GetSeqLength(9) );
729BOOST_REQUIRE_EQUAL( (
int) 250, dbn.
GetSeqLength(26) );
730BOOST_REQUIRE_EQUAL( (
int) 708, dbn.
GetSeqLength(39) );
731BOOST_REQUIRE_EQUAL( (
int) 472, dbn.
GetSeqLength(43) );
732BOOST_REQUIRE_EQUAL( (
int) 708, dbn.
GetSeqLength(39) );
733BOOST_REQUIRE_EQUAL( (
int) 448, dbn.
GetSeqLength(47) );
734BOOST_REQUIRE_EQUAL( (
int) 825, dbn.
GetSeqLength(61) );
735BOOST_REQUIRE_EQUAL( (
int) 371, dbn.
GetSeqLength(70) );
744 intplen(0), nlen(0);
754 for(
i= 0;
i< plen;
i++) {
764 for(
i= 0;
i< nlen;
i++) {
778BOOST_REQUIRE_EQUAL(
int(100), nlen);
779BOOST_REQUIRE_EQUAL(
int(100), plen);
780BOOST_REQUIRE_EQUAL(
Uint8(26945), ptot);
781BOOST_REQUIRE_EQUAL(
Uint8(51718), ex_tot);
782BOOST_REQUIRE_EQUAL(
Uint8(51726), ap_tot);
790 const char* bufp = 0;
796BOOST_REQUIRE_EQUAL(
Uint4(1128126064ul), hashval);
804 const char* bufp = 0;
809BOOST_REQUIRE_EQUAL(
Uint4(3219499033ul), hashval);
818 intnr_seqs(0), nr_oids(0), sp_seqs(0), sp_oids(0);
819 Uint8nr_tlen(0), nr_vlen(0), sp_tlen(0), sp_vlen(0);
827BOOST_REQUIRE_EQUAL(nr_seqs, nr_oids);
828BOOST_REQUIRE_EQUAL(nr_tlen, nr_vlen);
830BOOST_REQUIRE_GT(nr_seqs, sp_seqs);
831BOOST_REQUIRE_NE(nr_oids, sp_oids);
832BOOST_REQUIRE_GT(nr_tlen, sp_tlen);
833BOOST_REQUIRE_NE(nr_vlen, sp_vlen);
843 const char* seqid_list[] = {
844 "AAA03612.1",
"prf||1922246A",
"P51728.1",
"AAB84238.1",
"BAA25256.1",
"AAC15878.1",
845 "1A8U_A",
"AAC82254.1",
"AAD31141.1",
"1R24_A",
"AAF63214.1",
"AAF95963.1",
846 "WP_003095644.1",
"AAC59341.1" 8501153908, 507276, 851580, 200775, 1028308, 939134,
851199107, 511756, 27645, 429124, 575812, 648744,
856199, 233, 186, 441, 96, 206,
857277, 205, 110, 206, 510, 293,
866BOOST_REQUIRE((L_seqid == L_len) && (L_len == L_pig));
868 for(
size_t i= 0;
i<L_seqid;
i++) {
869 stringarr_seqid(seqid_list[
i]);
870 intarr_pig(pig_list[
i]), arr_len(len_list[
i]);
871vector<int> seqid2oid;
872 intpig2oid = 0, oid2pig=0, oid2len =0;
873 nr.AccessionToOids(arr_seqid, seqid2oid);
875BOOST_REQUIRE(
nr.PigToOid(arr_pig, pig2oid));
877BOOST_CHECK_EQUAL(pig2oid, seqid2oid[0]);
878BOOST_REQUIRE(pig2oid !=
int(-1));
880oid2len =
nr.GetSeqLength(pig2oid);
881BOOST_REQUIRE(
nr.OidToPig (pig2oid, oid2pig));
883BOOST_REQUIRE_EQUAL(arr_len, oid2len);
884BOOST_REQUIRE_EQUAL(arr_pig, oid2pig);
890 const string kDb(
"nr");
895 const Uint4NUM_ITEMS = 6;
897 const char** str_list[NUM_ITEMS];
900{
"AAP90615.1",
"AAP90628.1",
"AAP90641.1",
"AAP90654.1",
"AAP90667.1", 0 };
904{
"1NPQ",
"1NPQ_A",
"1NPQ_B", 0 };
908{
"1LCT_A",
"1LCT", 0 };
910{
"1GWB_A",
"1GWB_B",
"1GWB", 0 };
919 Uint4* len_list[NUM_ITEMS];
921 Uint4l0[] = { 261, 0 };
922 Uint4l1[] = { 232, 0 };
923 Uint4l2[] = { 17, 90, 0 };
924 Uint4l3[] = { 17, 90, 0 };
925 Uint4l4[] = { 333, 0 };
926 Uint4l5[] = { 281, 0 };
940BOOST_REQUIRE_EQUAL(NUM_ITEMS, L_str);
941BOOST_REQUIRE_EQUAL(NUM_ITEMS, L_len);
943 for(
Uint4 i= 0;
i< NUM_ITEMS;
i++) {
949 for(
const char** strp = str_list[
i]; *strp; strp++) {
951 nr.AccessionToOids(*strp, oids);
953BOOST_REQUIRE_MESSAGE(! oids.empty(),
"Failed to find accession " 954<< *strp <<
" in "<<
kDb);
956 ITERATE(vector<int>, iter, oids) {
965str_iter = str_oids.
begin();
969 Uint4* llp = len_list[
i];
976oid_len.
insert(
nr.GetSeqLength(*iter));
981oid_iter = oid_len.
begin();
982exp_iter = exp_len.
begin();
984 while(oid_iter != oid_len.
end()) {
985BOOST_REQUIRE(exp_iter != exp_len.
end());
986BOOST_REQUIRE_EQUAL(*oid_iter, *exp_iter);
1001 const char*
dbname=
"nt";
1003 boolis_prot =
false;
1007oss_fn <<
"."<<
dbname<<
"."<< gi;
1009vector<char> seqdb_data;
1010vector<char> expected_data;
1019 boolgi_trans = db.
GiToOid(gi, oid);
1021BOOST_REQUIRE(gi_trans);
1030BOOST_REQUIRE(! bs.
Empty());
1034BOOST_REQUIRE_EQUAL(
int(seqdb_data.size()), 872);
1037 stringexpected_bs =
1042 " accession \"AJ389663\",\n" 1050 " seq-data ncbi4na '42184822114812141288821418148411122424118442821881118214\n" 1051 "824144882288141824882211822512824418112848442118828141428118121842111211428224\n" 1052 "122228888112244444411141424288881881418211112211842444888848282442118222428211\n" 1053 "288884484128284418112888484284182421244222824142244241248182888211184828422281\n" 1054 "821128881482488124841818422811241448848812444811244441182144488241882244141444\n" 1055 "142184141112442812212182211441144214214424242111881222128222442124444144814841\n" 1056 "241111181124184244412828182414422224811824411841481212888111822888112414418211\n" 1057 "884414442114828448422142142242448118822142822118142481818811148848842148811111\n" 1058 "428248148844182824444411442814244864242248844424822812842824122841228122442244\n" 1059 "814888484222414484282884128414848282444841224424148881288841111118814148428211\n" 1060 "142144228848422422241181484484218441181184411414412282448828188884884488882441\n" 1061 "124841448118418811414441214124444421688248188424424281414484111882884412242242\n" 1062 "11412441281284241114218884221142184888821881FFF1141124111482141448824114124182\n" 1063 "141812248244814882841221811124FFF241284424182243241148812812818412824424442142\n" 1064 "228214441112211148288844488224444411481844884F11142841112881114411884124411444\n" 1065 "212212214414844142284244288118884128211212444111128212224422244121224841441884\n" 1066 "121418841414282888282418824484448448448421844224882881488448441424188848284488\n" 1067 "11882241811241124141282814228428111814822A224188242228182482442144412882881414\n" 1068 "441241484424818142212424141884142118112144828484184222881418488244442242124242\n" 1069 "428121284114411821421248284228222844222411144488444811222428411228228824842814\n" 1070 "441884444288481188488222218411241441188222148114242414821811428242488418812482\n" 1071 "228422288848121212242224824281281221188414244888128414441211441884422224124144\n" 1072 "24282244248282842448A88842241411284222211148421284'H\n" 1077 "GCATGTCCAAGTACAGACTTTCAGATAGTGAAACCGCGAATGGCTCATTAAATCAGTCGA" 1078 "GGTTCCTTAGATCGTTCCAATCCRACTCGGATAACTGTGGCAATTCTAGAGCTAATACAT" 1079 "GCAAACAAGCTCCGACCCCTTTTAACCGGGGGGAAAGAGCGCTTTTATTAGATCAAAACC" 1080 "AATGCGGGTTTTGTCTCGGCAATCCCGCTCAACTTTTGGTGACTCTGGATAACTTTGTGC" 1081 "TGATCGCACGGCCCTCGAGCCGGCGACGTATCTTTCAAATGTCTGCCCTATCAACTTTAG" 1082 "TCGTTACGTGATATGCCTAACGAGGTTGTTACGGGTAACGGGGAATCAGGGTTCGATTCC" 1083 "GGAGAGGGAGCATGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTAC" 1084 "CCACTCCCGGCACGGGGAGGTAGTGACGAAAAATAACGATGCGGGACTCTATCGAGGCCC" 1085 "CGTAATCGGAATGAGTACACTTTAAATCCTTTAACGAGGATCAATTGGAGGGCAAGTCTG" 1086 "GTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAA" 1087 "AAGCTCGTAGTTGGATCTCGGGGGAAGGCTAGCGGTSGCGCCGTTGGGCGTCCTACTGCT" 1088 "CGACCTGACCTACCGGCCGGTAGTTTGTGCCCGAGGTGCTCTTGACTGAGTGTCTCGGGT" 1089 "GACCGGCGAGTTTACTTTGAAAAAATTAGAGTGCTCAAAGCAGGCCTTGTGCCGCCCGAA" 1090 "TAGTGGTGCATGGAATAATGGAAGAGGACCTCGGTTCTATTTTGTTGGTTTTCGGAACGT" 1091 "GAGGTAATGATTAAGAGGGACAGACGGGGGCA";
1093expected_data.assign(data_str.data(),
1094data_str.data() + data_str.size());
1096vector<char> seqdb_tmp;
1105seqdb_tmp.swap(seqdb_data);
1107BOOST_REQUIRE_EQUAL(expected_bs, seqdb_bs);
1108BOOST_REQUIRE_EQUAL(expected_data.size(), seqdb_data.size());
1110 Uint4num_diffs = 0;
1112 for(
Uint4 i= 0;
i< expected_data.size();
i++) {
1113 unsigned R= unsigned(expected_data[
i]) & 0xFF;
1114 unsigned S= unsigned(seqdb_data[
i]) & 0xFF;
1120cout <<
"At location "<< dec <<
i<<
", Readdb has: "<<
hex<<
int(
R) <<
" whereas SeqDB has: "<<
hex<<
int(
S);
1123cout <<
" (R += "<< (
R-
S) <<
")\n";
1125cout <<
" (S += "<< (
S-
R) <<
")\n";
1133cout <<
"Num diffs: "<< dec << num_diffs << endl;
1136BOOST_REQUIRE_EQUAL((
int) 0, (
int)num_diffs);
1142 boolcaught_exception =
false;
1151BOOST_REQUIRE_EQUAL((
int) 11112222,
len);
1153caught_exception =
true;
1156 if(! caught_exception) {
1157BOOST_ERROR(
"GetLenHighOID() did not throw an exception of type CSeqDBException.");
1164 boolcaught_exception =
false;
1170BOOST_REQUIRE_EQUAL((
Uint4) 11112222,
len);
1172caught_exception =
true;
1175 if(! caught_exception) {
1176BOOST_ERROR(
"GetLenNegOID() did not throw an exception of type CSeqDBException.");
1183 boolcaught_exception =
false;
1194BOOST_REQUIRE_EQUAL((
Uint4) 11112222,
len);
1196caught_exception =
true;
1199 if(! caught_exception) {
1200BOOST_ERROR(
"GetSeqHighOID() did not throw an exception of type CSeqDBException.");
1207 boolcaught_exception =
false;
1215BOOST_REQUIRE_EQUAL((
Uint4) 11112222,
len);
1217caught_exception =
true;
1220 if(! caught_exception) {
1221BOOST_ERROR(
"GetSeqNegOID() did not throw an exception of type CSeqDBException.");
1228 boolcaught_exception =
false;
1237 nr.GetOidAtOffset(0, vlength + 1);
1239caught_exception =
true;
1242 if(! caught_exception) {
1243BOOST_ERROR(
"Offset2OidBadOffset() did not throw an exception of type CSeqDBException.");
1250 boolcaught_exception =
false;
1258 nr.GetOidAtOffset(noids + 1, 0);
1260caught_exception =
true;
1263 if(! caught_exception) {
1264BOOST_ERROR(
"Offset2OidBadOid() did not throw an exception of type CSeqDBException.");
1271 Uint4segments = 1000;
1274 string dbname((
i== 0) ?
"nr":
"nt");
1283 Uint8vol_length(0);
1287 for(
Uint4j = 0; j < segments; j++) {
1288 Uint8range_target = (vol_length * j) / segments;
1292 doublerange_ratio = double(range_target) / vol_length;
1293 doubleoid_ratio = double(oid_here) / num_oids;
1294 doublepercent_diff = 100.0 *
fabs(oid_ratio - range_ratio);
1303BOOST_REQUIRE(prev_oid <= oid_here);
1304BOOST_REQUIRE(percent_diff <= 30.0);
1306prev_oid = oid_here;
1349vector<string>
names;
1351 names.push_back(
"p,nr");
1352 names.push_back(
"n,nt");
1353 names.push_back(
"n,pdbnt");
1354 names.push_back(
"p,pdb");
1355 names.push_back(
"p,CDSEARCH/oasis_pfam");
1358BOOST_REQUIRE(s->length() > 2);
1360 charprot_nucl = (*s)[0];
1361 string dbname(*s, 2, s->length()-2);
1372 const char* mask_name[] = {
1373 "range",
"odd",
"even",
"prime",
"ERROR" 1407 for(
int i= 0; ranges[
i];
i+= 2) {
1408 unsigned first= ranges[
i];
1409 unsignedsecond = ranges[
i+1];
1418 intobegin(0), oend(0);
1421 intlowest(INT_MAX);
1437 unsignednum_found(0);
1440num_found = (
int) oids.size();
1442 ITERATE(vector<int>, iter, oids) {
1443 if((*iter) > highest) {
1447 if((*iter) < lowest) {
1454num_found = oend-obegin;
1457 if(oend > highest) {
1461 if(obegin < lowest) {
1466 for(
intv = obegin; v < oend; v++) {
1471 if(obegin == oend) {
1486 TGilow_gi = 20*1000*1000;
1487 TGihigh_gi = 30*1000*1000;
1493dbs.push_back(
"data/seqp");
1494dbs.push_back(
"data/ranges/seqp15");
1495dbs.push_back(
"data/ranges/twenty");
1496dbs.push_back(
"data/ranges/twenty15");
1498 for(
Uint4dbnum = 0; dbnum < dbs.size(); dbnum++) {
1501 boolall_gis_in_range =
true;
1502 boolall_oids_in_range =
true;
1506 if(! (all_oids_in_range || all_gis_in_range)) {
1510 if(all_oids_in_range) {
1511 if((oid < (low_oid-1)) || ((high_oid-1)) < oid) {
1512all_oids_in_range =
false;
1516 if(all_gis_in_range) {
1517list< CRef<CSeq_id> > ids = db.
GetSeqIDs(oid);
1519 boolgi_in_range =
false;
1522 if((**seqid).IsGi()) {
1523 TGigi = (**seqid).GetGi();
1525 if((gi > low_gi) && (gi < high_gi)) {
1526gi_in_range =
true;
1532 if(! gi_in_range) {
1533all_gis_in_range =
false;
1538 boolgis_confined (
false);
1539 booloids_confined(
false);
1543gis_confined =
false;
1544oids_confined =
false;
1548gis_confined =
false;
1549oids_confined =
true;
1553gis_confined =
true;
1554oids_confined =
false;
1558gis_confined =
true;
1559oids_confined =
true;
1563BOOST_REQUIRE_EQUAL(oids_confined, all_oids_in_range);
1564BOOST_REQUIRE_EQUAL(gis_confined, all_gis_in_range);
1571 boolcaught_exception =
false;
1576caught_exception =
true;
1579 if(! caught_exception) {
1580BOOST_ERROR(
"EmptyDBList() did not throw an exception of type CSeqDBException.");
1622BOOST_REQUIRE_EQUAL(29, found);
1639BOOST_REQUIRE_EQUAL(29, found);
1654BOOST_REQUIRE_EQUAL(58, found);
1660 const string kFileName(
"data/prot345t.gil");
1666BOOST_REQUIRE_EQUAL((
size_t) seqdbgifile.
GetNumGis(), gis.size());
1667 sort(gis.begin(), gis.end());
1671ifstream gifile(fn.c_str());
1672BOOST_REQUIRE(gifile);
1674vector<TGi> reference;
1675reference.reserve(gis.size());
1676 while( !gifile.eof() ) {
1679 if(tgi == -1)
break;
1682 sort(reference.begin(), reference.end());
1683BOOST_REQUIRE_EQUAL(reference.size(), gis.size());
1686 for(
size_t i= 0;
i< reference.size();
i++) {
1688BOOST_REQUIRE_MESSAGE(reference[
i] == gis[
i],
msg);
1696dbs.push_back(
"Test/Giardia.01");
1697dbs.push_back(
"Test/baylor_wgs_contigs.01");
1698dbs.push_back(dbs[0] +
" "+ dbs[1]);
1700vector< vector<TGi> > gis(dbs.size());
1701vector< vector<string> > volumes(dbs.size());
1703 for(
int i= 0;
i< (
int)dbs.size();
i++) {
1705BOOST_REQUIRE_NO_THROW(db.
Reset(
new CSeqDB(dbs[
i],
1713db->
GetGis(oid, gis[
i],
true);
1719BOOST_REQUIRE(volumes[0] == volumes[1]);
1720BOOST_REQUIRE(volumes[0] == volumes[2]);
1721BOOST_REQUIRE_EQUAL(gis[0].
size() + gis[1].
size(), gis[2].
size());
1723vector<TGi> zero_one(gis[0]);
1724zero_one.insert(zero_one.end(), gis[1].begin(), gis[1].end());
1726 sort(zero_one.begin(), zero_one.end());
1727 sort(gis[2].begin(), gis[2].end());
1729BOOST_REQUIRE(zero_one == gis[2]);
1735 TGigi1a = 446106212;
1738 TGigi2a = 494110381;
1739 TGigi2b = 30172867;
1748 boolsuccess = db.
GiToOid(gi1a, oid1);
1749BOOST_REQUIRE(success);
1751success = db.
GiToOid(gi2a, oid2);
1752BOOST_REQUIRE(success);
1754BOOST_REQUIRE(oid1 != oid2);
1759BOOST_REQUIRE_EQUAL((
int)gi2taxid.
size(), 44);
1760BOOST_REQUIRE_EQUAL(gi2taxid[gi1a],
tax1);
1763BOOST_REQUIRE_EQUAL((
int)gi2taxid.
size(), 23);
1764BOOST_REQUIRE_EQUAL(gi2taxid[gi2a], tax2a);
1765BOOST_REQUIRE_EQUAL(gi2taxid[gi2b], tax2b);
1768BOOST_REQUIRE_EQUAL((
int)gi2taxid.
size(), 67);
1769BOOST_REQUIRE_EQUAL(gi2taxid[gi1a],
tax1);
1770BOOST_REQUIRE_EQUAL(gi2taxid[gi2a], tax2a);
1771BOOST_REQUIRE_EQUAL(gi2taxid[gi2b], tax2b);
1774 #define BEGIN(X) (X) 1775 #define END(X) ((X) + (sizeof (X) / sizeof *(X))) 1780 TGigi1a = 446106212;
1789 TGigi2a = 494110381;
1802 boolsuccess = db.
GiToOid(gi1a, oid1);
1803BOOST_REQUIRE(success);
1805success = db.
GiToOid(gi2a, oid2);
1806BOOST_REQUIRE(success);
1808BOOST_REQUIRE(oid1 != oid2);
1819BOOST_REQUIRE(gi2taxids.
empty());
1821BOOST_REQUIRE_EQUAL((
int) gi2taxids.
size(), 44);
1822BOOST_REQUIRE_EQUAL((
int) gi2taxids[gi1a].
size(), 5);
1823BOOST_REQUIRE_EQUAL_COLLECTIONS(
1824gi2taxids[gi1a].begin(), gi2taxids[gi1a].end(),
1825expected1.
begin(), expected1.
end()
1829BOOST_REQUIRE(!gi2taxids.
empty());
1831BOOST_REQUIRE_EQUAL((
int) gi2taxids.
size(), 23);
1832BOOST_REQUIRE_EQUAL((
int) gi2taxids[gi2a].
size(), 4);
1833BOOST_REQUIRE_EQUAL_COLLECTIONS(
1834gi2taxids[gi2a].begin(), gi2taxids[gi2a].end(),
1835expected2a.
begin(), expected2a.
end()
1839BOOST_REQUIRE(!gi2taxids.
empty());
1841BOOST_REQUIRE_EQUAL((
int) gi2taxids.
size(), 67);
1842BOOST_REQUIRE_EQUAL((
int) gi2taxids[gi1a].
size(), 5);
1843BOOST_REQUIRE_EQUAL((
int) gi2taxids[gi2a].
size(), 4);
1844BOOST_REQUIRE_EQUAL_COLLECTIONS(
1845gi2taxids[gi1a].begin(), gi2taxids[gi1a].end(),
1846expected1.
begin(), expected1.
end()
1848BOOST_REQUIRE_EQUAL_COLLECTIONS(
1849gi2taxids[gi2a].begin(), gi2taxids[gi2a].end(),
1850expected2a.
begin(), expected2a.
end()
1857 TGigi1a = 446106212;
1905 TGigi2a = 494110381;
1935vector<int> expected1;
1937 sort(expected1.begin(), expected1.end());
1939vector<int> expected2a;
1940expected2a.assign(
BEGIN(tax2a),
END(tax2a));
1941 sort(expected2a.begin(), expected2a.end());
1945 boolsuccess = db.
GiToOid(gi1a, oid1);
1946BOOST_REQUIRE(success);
1948success = db.
GiToOid(gi2a, oid2);
1949BOOST_REQUIRE(success);
1951BOOST_REQUIRE(oid1 != oid2);
1953vector<TTaxId> taxids;
1957 sort(taxids.begin(), taxids.end());
1958BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected1.size());
1959BOOST_REQUIRE_EQUAL_COLLECTIONS(
1960taxids.begin(), taxids.end(),
1961expected1.begin(), expected1.end()
1966 sort(taxids.begin(), taxids.end());
1967BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected2a.size());
1968BOOST_REQUIRE_EQUAL_COLLECTIONS(
1969taxids.begin(), taxids.end(),
1970expected2a.begin(), expected2a.end()
1978 sort(expected2a.begin(), expected2a.end());
1982 sort(taxids.begin(), taxids.end());
1983BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected2a.size());
1984BOOST_REQUIRE_EQUAL_COLLECTIONS(
1985taxids.begin(), taxids.end(),
1986expected2a.begin(), expected2a.end()
1993 TGigi1a = 446106212;
19951386, 1392, 1396, 1428, 1234146
1998 TGigi2a = 494110381;
20001678, 216816, 469594, 1263059
2006vector<int> expected1;
2008 sort(expected1.begin(), expected1.end());
2010vector<int> expected2a;
2011expected2a.assign(
BEGIN(tax2a),
END(tax2a));
2012 sort(expected2a.begin(), expected2a.end());
2016 boolsuccess = db.
GiToOid(gi1a, oid1);
2017BOOST_REQUIRE(success);
2019success = db.
GiToOid(gi2a, oid2);
2020BOOST_REQUIRE(success);
2022BOOST_REQUIRE(oid1 != oid2);
2024vector<TTaxId> taxids;
2028 sort(taxids.begin(), taxids.end());
2029BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected1.size());
2030BOOST_REQUIRE_EQUAL_COLLECTIONS(
2031taxids.begin(), taxids.end(),
2032expected1.begin(), expected1.end()
2037 sort(taxids.begin(), taxids.end());
2038BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected2a.size());
2039BOOST_REQUIRE_EQUAL_COLLECTIONS(
2040taxids.begin(), taxids.end(),
2041expected2a.begin(), expected2a.end()
2049 sort(expected2a.begin(), expected2a.end());
2053 sort(taxids.begin(), taxids.end());
2054BOOST_REQUIRE_EQUAL((
int) taxids.size(), (
int) expected2a.size());
2055BOOST_REQUIRE_EQUAL_COLLECTIONS(
2056taxids.begin(), taxids.end(),
2057expected2a.begin(), expected2a.end()
2078 const intkNumTestGis = 3;
2079 const intkGiOids[kNumTestGis] = { 15, 51, 84 };
2085vector<int> oid_list;
2094BOOST_REQUIRE_EQUAL(1, (
int)oid_list.size());
2102BOOST_REQUIRE_EQUAL(1, (
int)oid_list.size());
2110BOOST_REQUIRE_EQUAL(1, (
int)oid_list.size());
2126oids1.push_back(oid);
2129BOOST_REQUIRE(! oids2.empty());
2131 ITERATE(vector<int>, iter, oids1) {
2132BOOST_REQUIRE(*iter == oids2[0]);
2141 const intkFirstOid(0);
2142 const intkLastOid(100);
2146vector<int> oid_list;
2151BOOST_REQUIRE_EQUAL(kFirstOid, start);
2152BOOST_REQUIRE_EQUAL(kLastOid, end);
2156BOOST_REQUIRE_EQUAL(kFirstOid, start);
2157BOOST_REQUIRE_EQUAL(kFirstOid, end);
2162BOOST_REQUIRE_EQUAL(kFirstOid, start);
2163BOOST_REQUIRE_EQUAL(kLastOid, end);
2180BOOST_REQUIRE_EQUAL(
info.taxid, 57176);
2181BOOST_REQUIRE_EQUAL((
string)
info.scientific_name,
string(
"Aotus vociferans"));
2182BOOST_REQUIRE_EQUAL((
string)
info.common_name,
string(
"noisy night monkey"));
2183BOOST_REQUIRE_EQUAL((
string)
info.blast_name,
string(
"primates"));
2184BOOST_REQUIRE_EQUAL((
string)
info.s_kingdom,
string(
"Eukaryota"));
2187BOOST_REQUIRE_EQUAL(
info.taxid, 562);
2202 intslen(0),alen(0);
2208 unsignedexp_hash = 705445389u;
2210BOOST_REQUIRE_EQUAL((290/4) + 1, slen);
2211BOOST_REQUIRE_EQUAL(20, alen);
2212BOOST_REQUIRE_EQUAL(exp_hash, h);
2229 ITERATE(vector<int>, oid, oids) {
2230 intslen(0),alen(0);
2236 string A(
buffer+ slen, alen);
2240BOOST_REQUIRE_EQUAL((
int)
A.size(), 0);
2241BOOST_REQUIRE_EQUAL((
int)
S.size(),
len);
2242BOOST_REQUIRE_EQUAL((
int) *(
buffer-1), 0);
2243BOOST_REQUIRE_EQUAL((
int) *(
buffer+slen), 0);
2259 intslen(0),alen(0);
2263BOOST_REQUIRE_EQUAL((290/4) + 1, slen);
2264BOOST_REQUIRE_EQUAL(20, alen);
2274 intlow(0), high(0),
count(0);
2276 nr.GetPigBounds(& low, & high, &
count);
2278BOOST_REQUIRE(low < high);
2279BOOST_REQUIRE(
count);
2286 boolcaught_exception =
false;
2297BOOST_REQUIRE(low < high);
2298BOOST_REQUIRE(
count);
2303 intlow(0), high(0),
count(0);
2307BOOST_REQUIRE(low < high);
2308BOOST_REQUIRE(
count);
2310caught_exception =
true;
2313 if(! caught_exception) {
2314BOOST_ERROR(
"ExpertIdBoundsNoPig() did not throw an exception of type CSeqDBException.");
2321 typedefpair<bool, string> TStringBool;
2322 typedefvector< TStringBool > TStringBoolVec;
2324TStringBoolVec paths;
2325paths.push_back(TStringBool(
true,
"nt.000.nin"));
2326paths.push_back(TStringBool(
true,
"Test/ITS_RefSeq_Fungi.nal"));
2327paths.push_back(TStringBool(
true,
"taxdb.bti"));
2328paths.push_back(TStringBool(
true,
"data/seqp.pin"));
2329paths.push_back(TStringBool(
false,
"nr.00"));
2333 ITERATE(TStringBoolVec, iter, paths) {
2334 stringfilename = iter->second;
2336 boolfound = ! resolved.empty();
2339 intposition = resolved.find(filename);
2341BOOST_REQUIRE(found);
2344BOOST_REQUIRE(resolved.size() > filename.size());
2347BOOST_REQUIRE_EQUAL(position + filename.size(), resolved.size());
2349BOOST_REQUIRE(! found);
2359 for(
size_t i= 0;
i< gis.size();
i++) {
2378a3.push_back(special);
2380 for(
Uint4 i= 0; (
i*3) < 500;
i++) {
2389a5.push_back(special);
2396 for(
Uint4 i= 0;
i< 500;
i++) {
2398 if(((
i% 15) == 0) || (gi == special)) {
2399BOOST_REQUIRE(
true== both.
FindGi(gi));
2401BOOST_REQUIRE(
false== both.
FindGi(gi));
2419a3.push_back(special);
2421 for(
Uint4 i= 0; (
i*3) < 500;
i++) {
2430a5.push_back(special);
2441 for(
int i= 0;
i< (
int)a5.size();
i++) {
2443BOOST_REQUIRE(
false== both.
FindGi(a5[
i]));
2445BOOST_REQUIRE(
true== both.
FindGi(a5[
i]));
2452BOOST_REQUIRE(std::find(a5.begin(), a5.end(), gi) != a5.end());
2469a3.push_back(special);
2471 for(
int i= 0; (
i*3) < 500;
i++) {
2472a3.push_back(
i*3);
2475a5.push_back(
i*5);
2480a5.push_back(special);
2489BOOST_REQUIRE(calc->IsPositive());
2492 for(
int i= 0;
i< 500;
i++) {
2493 boolis_3 = ((
i% 3) == 0) || (
i== special);
2494 boolis_5 = ((
i% 5) == 0) || (
i== special);
2496 if(is_3 && (! is_5)) {
2497BOOST_REQUIRE(
true== and_not->
FindGi(
i));
2499BOOST_REQUIRE(
false== and_not->
FindGi(
i));
2517m2.push_back(special);
2518m3.push_back(special);
2519m5.push_back(special);
2520m7.push_back(special);
2522 for(
int i= 0;
i< 1000;
i++) {
2537m2.push_back(special);
2538m3.push_back(special);
2539m5.push_back(special);
2540m7.push_back(special);
2575BOOST_REQUIRE(! not_m5_ornot_m7.
IsPositive());
2593 for(
int i= 0;
i< 1000;
i++) {
2594 boold2(!(
i%2)), d3(!(
i%3)), d5(!(
i%5)), d7(!(
i%7));
2596 if(
i== special) {
2597d2 = d3 = d5 = d7 =
true;
2604 boolin_c1 = ( d2 && !d3) || ( d5 && !d7);
2605 boolin_c2 = (!d2 || d3) && ( d5 != d7);
2606 boolin_c3 = ( d2 || !d3) && (!d5 || !d7);
2608BOOST_REQUIRE_EQUAL(in_c1, c1p->
FindGi(
i));
2609BOOST_REQUIRE_EQUAL(in_c2, c2p->
FindGi(
i));
2610BOOST_REQUIRE_EQUAL(in_c3, ! c3n->
FindGi(
i));
263046071115, 46071116, 46071117, 46071118, 46071119,
263146071120, 46071121, 46071122, 46071123, 46071124,
263246071125, 46071126, 46071127, 46071128, 46071129,
263346071130, 46071131, 46071132, 46071133, 46071134 };
2635BOOST_REQUIRE((
sizeof(v1)/
sizeof(
int)) == 20);
2637vector<int>
all(v1, v1 + 20);
2638vector<int> mid(v1 + 5, v1 + 15);
2653 stringnm =
"data/seqn";
2658 CSeqDBdb_A(nm, ty, All);
2659 CSeqDBdb_M(nm, ty, Mid);
2660 CSeqDBdb_N(nm, ty, Neg);
2661 CSeqDBdb_TB(nm, ty, TopBot);
2662 CSeqDBdb_NTB(nm, ty, NotTopBot);
2671 boolA_have =
s_DbHasOID(db_A, A_count, oid);
2672 boolM_have =
s_DbHasOID(db_M, M_count, oid);
2673 boolN_have =
s_DbHasOID(db_N, N_count, oid);
2674 boolTB_have =
s_DbHasOID(db_TB, TB_count, oid);
2675 boolNTB_have =
s_DbHasOID(db_NTB, NTB_count, oid);
2677BOOST_REQUIRE((! M_have) || A_have);
2678BOOST_REQUIRE(A_have != N_have);
2679BOOST_REQUIRE((! TB_have) || A_have);
2681BOOST_REQUIRE((!M_have) || (!N_have));
2682BOOST_REQUIRE((!M_have) || (!TB_have));
2683BOOST_REQUIRE((!M_have) || NTB_have);
2685BOOST_REQUIRE((!N_have) || (!TB_have));
2686BOOST_REQUIRE((!N_have) || NTB_have);
2688BOOST_REQUIRE(TB_have != NTB_have);
2693BOOST_REQUIRE_EQUAL(
NSEQ, 100);
2695BOOST_REQUIRE_EQUAL(A_count, 20);
2696BOOST_REQUIRE_EQUAL(M_count, 10);
2697BOOST_REQUIRE_EQUAL(N_count,
NSEQ-A_count);
2698BOOST_REQUIRE_EQUAL(TB_count, A_count - M_count);
2699BOOST_REQUIRE_EQUAL(NTB_count + TB_count, 100);
2703BOOST_REQUIRE(! idset_TB.
Blank());
2712 const char*s1 = 0, *s2 = 0;
2718BOOST_REQUIRE(
string(s1) ==
string(s2));
2741 for(
const char** p =
str; *p; p++) {
2742 if((*p)[0] ==
'#') {
2765 const char*
str[] =
2779BOOST_REQUIRE_EQUAL((
int)ids->GetNumSis(), 9);
2783 for(
int i= 0;
i< ids->GetNumSis();
i++) {
2784BOOST_REQUIRE(ids->GetSiOid(
i).oid == -1);
2791 for(
int i= 0;
i< ids->GetNumSis();
i++) {
2792BOOST_CHECK_MESSAGE(ids->GetSiOid(
i).oid != -1,
2793 "Seqid "<< ids->GetSiOid(
i).si <<
" is unresolved");
2805BOOST_REQUIRE_EQUAL(k, ids->GetNumSis());
2814BOOST_REQUIRE( dbp.
GiToOid(gi, the_oid));
2815BOOST_REQUIRE_EQUAL(oid, the_oid);
2822BOOST_REQUIRE( dbp.
GiToOid(gi, the_oid));
2823BOOST_REQUIRE_EQUAL(oid, the_oid);
2831 const char*
str[] = {
2847 "ref|NP_912855.1|",
2849 "sp|Q63931|CCKR_CAVPO",
2862 "ref|NP_760268.1|",
2863 "ref|NP_817911.1|",
2872BOOST_REQUIRE_EQUAL((
int)ids->GetNumSis(), 12);
2873BOOST_REQUIRE_EQUAL((
int)ids->GetNumGis(), 13);
2878 for(
i= 0;
i< ids->GetNumSis();
i++) {
2879BOOST_REQUIRE(ids->GetSiOid(
i).oid == -1);
2881 for(
i= 0;
i< ids->GetNumGis();
i++) {
2882BOOST_REQUIRE(ids->GetGiOid(
i).oid == -1);
2890 for(
i= 0;
str[
i];
i++) {
2891 boolfound =
false;
2894 if(
str[
i][0] ==
'#') {
2895 intgi = atoi(
str[
i] + 1);
2896found = ids->GiToOid(gi, oid);
2899found = ids->SiToOid(str_id, oid);
2902BOOST_REQUIRE_EQUAL(found,
true);
2904 if(
i>= 0 &&
i< 4) {
2905BOOST_REQUIRE_EQUAL(oid, -1);
2906}
else if(
i>= 15 &&
i< 25) {
2908cout <<
"oid = -1, id="<<
str[
i] << endl;
2911BOOST_REQUIRE(oid != -1);
2919 const char* inter[] = {
2925 "gi|28378617",
"ref|NP_785509.1|",
2926 "gi|23474175",
"ref|ZP_00129469.1|",
2927 "gi|27364740",
"ref|NP_760268.1|",
2928 "gi|23113886",
"ref|ZP_00099225.1|",
2929 "gi|28563952",
"ref|NP_788261.1|",
2930 "gi|29788717",
"gb|AAP03339.1|",
2931 "gi|29566344",
"ref|NP_817911.1|",
2932 "gi|28950006",
"emb|CAD70761.1|",
2933 "gi|21305377",
"gb|AAM45611.1|",
2939 for(
const char** p = inter; *p; p++)
2946 typedeflist< CRef<CSeq_id> > TIds;
2950 ITERATE(TIds, iter, the_ids) {
2954BOOST_REQUIRE(itr != need.
end());
2961BOOST_REQUIRE(need.
empty());
2972BOOST_REQUIRE_EQUAL((
string)db.
GetTitle(),
string(
"empty test database"));
2979vector<TTaxId> taxids;
2989 char* ncbuffer = 0;
3007BOOST_REQUIRE_EQUAL((
string)db.
GetTitle(),
string(
"empty test database"));
3008BOOST_REQUIRE_EQUAL((
string)db.
GetDate(),
string(
"Mar 19, 2007 11:38 AM"));
3015 Uint8seq_total = 0;
3022BOOST_REQUIRE_EQUAL(oid_count, 0);
3023BOOST_REQUIRE_EQUAL(seq_total,
Uint8(0));
3026BOOST_REQUIRE_NO_THROW(db.
Begin());
3032 intbegin(0), end(0);
3039BOOST_REQUIRE_EQUAL(
size_t(0), oids.size());
3041BOOST_REQUIRE_EQUAL(begin, end);
3045BOOST_REQUIRE_EQUAL((
string)db.
GetDBNameList(),
string(
"data/empty"));
3050 stringacc(
"P01013");
3051 CSeq_idseqid(
"sp|P01013|OVALX_CHICK");
3062BOOST_REQUIRE_EQUAL(
false, db.
PigToOid(pig, oid));
3063BOOST_REQUIRE_EQUAL(
false, db.
GiToOid(gi, oid));
3064BOOST_REQUIRE_EQUAL(
false, db.
GiToPig(gi, pig));
3065BOOST_REQUIRE_EQUAL(
false, db.
PigToGi(pig, gi));
3067BOOST_REQUIRE(oids.size() == 0);
3068BOOST_REQUIRE_NO_THROW(db.
SeqidToOids(seqid, oids));
3069BOOST_REQUIRE(oids.size() == 0);
3070BOOST_REQUIRE_EQUAL(
false, db.
SeqidToOid(seqid, oid));
3072 Uint8residue(12345);
3082vector<string> paths1;
3083vector<string> paths2;
3091BOOST_REQUIRE_EQUAL(paths1.size(),
size_t(1));
3092BOOST_REQUIRE_EQUAL(paths2.size(),
size_t(1));
3093BOOST_REQUIRE_EQUAL((
string)paths1[0], (
string)paths2[0]);
3119BOOST_REQUIRE(!sd.
Empty());
3126BOOST_REQUIRE(!sd.
Empty());
3135 for(
intdi = 0; di < 2; di++) {
3136 CSeqDB& db = di ? db65 : db56;
3138 for(
intoi = 0; oi < 2; oi++) {
3139list< CRef<CSeq_id> > ids = db.
GetSeqIDs(oi);
3144 while(! ids.empty()) {
3145 const CSeq_id&
id= *ids.front();
3148 id.GetGeneral().GetDb() ==
"BL_ORD_ID") {
3157BOOST_REQUIRE(
count== 1);
3158BOOST_REQUIRE(oid == oi);
3171 TGinucl_gi = 46071107;
3172 stringnucl_str = (
"AAGCTCTTCATTGATGGTAGAGAGCCTATTAACAGGCAAC" 3173 "AGTCAATGCTCCAAAGTCCAAACAAGATTACCTGTGCAAA" 3174 "GAACTTGCAGTGTAACAAACCCCNTTCACGGCCAGAAGTA" 3175 "TTTGCAACAATGTTGAAAGTCCTTCTGGCAGAGGAGGAGT" 3178 TGiprot_gi = 43914529;
3179 stringprot_str =
"MINKSGYEAKYKKSIKNNEEFWRKEGKRITWIKPYKKIKNVRYS";
3181 intnucl_oid(-1), prot_oid(-1);
3183 N.GiToOid(nucl_gi, nucl_oid);
3184 P.GiToOid(prot_gi, prot_oid);
3187 N.GetSequenceAsString(nucl_oid, nstr);
3188 P.GetSequenceAsString(prot_oid, pstr);
3190BOOST_REQUIRE_EQUAL((
string)nstr, (
string)nucl_str);
3191BOOST_REQUIRE_EQUAL((
string)pstr, (
string)prot_str);
3202BOOST_REQUIRE_EQUAL((
int)
local.GetTotalLength(), 12345);
3203BOOST_REQUIRE_EQUAL((
int)
local.GetTotalLengthStats(), 23456);
3204BOOST_REQUIRE_EQUAL((
int)
local.GetNumSeqs(), 123);
3205BOOST_REQUIRE_EQUAL((
int)
local.GetNumSeqsStats(), 234);
3216 m_Tis.push_back(*ids);
3218 m_Gis.push_back(*ids);
3231 int& amt = m[
key];
3251db.
GetGis(oid, gis,
false);
3253 ITERATE(vector<TGi>, iter, gis) {
3292BOOST_REQUIRE_EQUAL((
int)have_got.
GetNumSeqs(), 100);
3298BOOST_REQUIRE_EQUAL((
int)have_not.
GetNumSeqs(), 89);
3307 for(
int* idp = gis; *idp; ++idp) {
3311BOOST_REQUIRE_EQUAL((
int) id_pop.
size(), nlist_gis);
3312BOOST_REQUIRE_EQUAL(total, nlist_gis);
3319BOOST_REQUIRE_EQUAL((
int) id_pop.
size(), seqp_gis);
3320BOOST_REQUIRE_EQUAL(total, seqp_gis-nlist_gis);
3330BOOST_REQUIRE_EQUAL((
int) id_pop.
size(), 0);
3331BOOST_REQUIRE_EQUAL(total, -seqp_gis);
3340 bool ok= have_got.
GiToOid(gi1, oid1);
3341BOOST_REQUIRE(
ok);
3343list< CRef<CSeq_id> > got_ids = have_got.
GetSeqIDs(oid1);
3344list< CRef<CSeq_id> > not_ids = have_not.
GetSeqIDs(oid1);
3354BOOST_REQUIRE_EQUAL(diff, 2);
3374 boolfound = have_got.
GiToOid(gis[0], oid);
3375BOOST_REQUIRE(found);
3377vector<TGi> gis_w, gis_wo;
3378have_got.
GetGis(oid, gis_w);
3379have_not.
GetGis(oid, gis_wo);
3383 intcount_w = (
int) gis_w.size();
3384 intcount_wo = (
int) gis_wo.size();
3385BOOST_REQUIRE_EQUAL(count_w, (count_wo+1));
3392vector<unsigned int> pigs;
3393pigs.push_back(281224);
3398 stringdb =
"swissprot";
3399 const int len= 134;
3408 boolfound = have_got.
PigToOid(pigs[0], oid);
3409BOOST_REQUIRE(found);
3413BOOST_REQUIRE_EQUAL((
unsigned int)pig_w, pigs[0]);
3430 boolfound =
false;
3432 ITERATE(vector<int>, iter, oids) {
3439BOOST_REQUIRE(found);
3448 boolfound =
false;
3450 ITERATE(vector<int>, iter, oids) {
3457BOOST_REQUIRE(found);
3466NStr::Tokenize(
"1234 2468 4936 9872 19744 1234000 " 3467 "1234000000 1234000000000 1234000000000000",
3470 stringsides(
"B44448888");
3475BOOST_REQUIRE_EQUAL(sides.size(), ids.size());
3477 for(
size_t i= 0;
i< ids.size();
i++) {
3478 boolis4(
false), is8(
false);
3480 switch(sides[
i]) {
3495 stringidstr = ids[
i];
3500 boolhave = db4.TiToOid(idnum, oid);
3501BOOST_REQUIRE_EQUAL(is4, have);
3502BOOST_REQUIRE_EQUAL(is4, (oid >= 0));
3504have = db8.TiToOid(idnum, oid);
3505BOOST_REQUIRE_EQUAL(is8, have);
3506BOOST_REQUIRE_EQUAL(is8, (oid >= 0));
3508 CSeq_idseqid(
string(
"gnl|ti|") + idstr);
3511db4.SeqidToOids(seqid, oids);
3512BOOST_REQUIRE_EQUAL(is4, (oids.size() == 1));
3514db8.SeqidToOids(seqid, oids);
3515BOOST_REQUIRE_EQUAL(is8, (oids.size() == 1));
3530 intoid1(-1), oid2(-1);
3531 boolokay1 = p1.
PigToOid(pig, oid1);
3532 boolokay2 = p2.
PigToOid(pig, oid2);
3534BOOST_REQUIRE(okay1);
3535BOOST_REQUIRE(okay2);
3536BOOST_REQUIRE(oid1 > 0);
3537BOOST_REQUIRE(oid2 > 0);
3538BOOST_REQUIRE(oid1 == oid2);
3540 intsize1 = p1.
GetHdr(oid1)->
Get().size();
3541 intsize2 = p2.
GetHdr(oid2)->
Get().size();
3550BOOST_CHECK_NE(0, size1);
3551BOOST_CHECK_NE(0, size2);
3552BOOST_CHECK_GE(size1, 14);
3553BOOST_CHECK_GT(size1, (size2 + 5));
3563ostringstream fasta;
3567 ITERATE(list<string>, iter, ids) {
3571vector<int> tmp_oids;
3574BOOST_REQUIRE_MESSAGE(tmp_oids.size(),
3575 string(
"No OIDs found for ")+(*iter));
3577oids.insert(oids.end(), tmp_oids.begin(), tmp_oids.end());
3582 sort(oids.begin(), oids.end());
3583oids.erase(unique(oids.begin(), oids.end()), oids.end());
3585 ITERATE(vector<int>, iter, oids) {
3589 stringall_fasta = fasta.str();
3590 string msg=
string(
"Error for accession: ") + acc;
3592BOOST_REQUIRE_MESSAGE(all_fasta.size() == exp_size,
msg);
3593BOOST_REQUIRE_MESSAGE(exp_oids == oids.size(),
msg);
3610 s_CheckIdLookup(db,
"NP_268346, XP_642837.1, 30262378, ABD21303.1", 4, 5411);
3634 s_CheckIdLookup(db,
"NP_268346, XP_642837.1, 30262378, ABD21303.1", 4, 5411);
3688 stringacc(
"1QCF_A");
3691 nr.AccessionToOids(acc, oids);
3693BOOST_REQUIRE(oids.size());
3696 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 3697 (!defined(NCBI_COMPILER_MIPSPRO)) ) 3701 stringfname(
"data/user-column");
3702 stringvname(
"data/user-column-db");
3703 const stringtitle(
"comedy");
3708BOOST_REQUIRE_EQUAL(
CR.GetTitle(), title);
3715BOOST_REQUIRE_EQUAL((
int)
columns.size(), 1);
3716BOOST_REQUIRE_EQUAL(title,
columns[0]);
3719BOOST_REQUIRE(comedy_column >= 0);
3727BOOST_REQUIRE_EQUAL((
int)metadata_db.
size(), 3);
3728BOOST_REQUIRE_EQUAL(metadata_db.
find(
"created-by")->second,
string(
"unit test"));
3729BOOST_REQUIRE_EQUAL(metadata_db.
find(
"purpose")->second,
string(
"none"));
3730BOOST_REQUIRE_EQUAL(metadata_db.
find(
"format")->second,
string(
"text"));
3733BOOST_REQUIRE(metadata_db == metadata_user);
3741BOOST_REQUIRE(db.
GetColumnValue(comedy_column,
"format") ==
"text");
3742BOOST_REQUIRE(db.
GetColumnValue(comedy_column,
"duck soup") ==
"");
3743BOOST_REQUIRE(
CR.GetValue(
"format") ==
"text");
3744BOOST_REQUIRE(
CR.GetValue(
"who's on first") ==
"");
3749vector<string> volumes;
3755BOOST_REQUIRE(meta_vol0.
find(
"format") != meta_vol0.
end());
3756BOOST_REQUIRE(meta_vol0.
find(
"format")->second ==
"text");
3760vector<string> column_data;
3761column_data.push_back(
"Groucho Marx");
3762column_data.push_back(
"Charlie Chaplain");
3763column_data.push_back(
"");
3764column_data.push_back(
"Abbott and Costello");
3765column_data.push_back(
"Jackie Gleason");
3766column_data.push_back(
"Jerry Seinfeld");
3767column_data.back()[5] = (char) 0;
3771BOOST_REQUIRE_EQUAL((
int) column_data.size(), db.
GetNumOIDs());
3772BOOST_REQUIRE_EQUAL((
int) column_data.size(),
CR.GetNumOIDs());
3776 for(
intoid = 0; oid <
count; oid++) {
3778 CR.GetBlob(oid, cr_blob);
3780BOOST_REQUIRE(db_blob.
Str() == column_data[oid]);
3781BOOST_REQUIRE(cr_blob.
Str() == column_data[oid]);
3791 stringgood(
"Z12841.1");
3792 stringbad (
"Z12842.1");
3793 stringboth(
"Z12843.1");
3795vector<int> o1, o2, o3;
3800BOOST_REQUIRE(o1.size() == 1);
3801BOOST_REQUIRE(o2.size() == 0);
3802BOOST_REQUIRE(o3.size() == 1);
3805 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 3806 (!defined(NCBI_COMPILER_MIPSPRO)) ) 3817BOOST_REQUIRE_EQUAL((
int)algos.size(), 2);
3821 stringalgo_opts, algo_name;
3825filtering_algo, algo_name, algo_opts);
3829BOOST_REQUIRE_EQUAL(algo_opts,
kEmptyStr);
3832filtering_algo, algo_name, algo_opts);
3834BOOST_REQUIRE_EQUAL(algo_opts,
string(
"-species Desmodus_rotundus"));
3857BOOST_REQUIRE_EQUAL(algos.size(), 1U);
3858BOOST_REQUIRE_EQUAL(11, algos.front());
3862BOOST_REQUIRE(ranges.
empty());
3969BOOST_CHECK_EQUAL((
string) nr_sum.
CompareSelf(),
"=A=B=C=a=b=c");
3970BOOST_CHECK_EQUAL((
string) sp_sum.
CompareSelf(),
"=A=B=C=a=b=c");
3971BOOST_CHECK_EQUAL((
string) ac_sum.
CompareSelf(),
"+A+B=C+a+b=c");
3972BOOST_CHECK_EQUAL((
string) sc_sum.
CompareSelf(),
"+A+B=C+a+b=c");
3974BOOST_CHECK_EQUAL((
string) nr_sum.
Compare(sp_sum),
"+T+F+M+t+f+m");
3975BOOST_CHECK_EQUAL((
string) nr_sum.
Compare(ac_sum),
"=T+F+M=t+f+m");
3976BOOST_CHECK_EQUAL((
string) nr_sum.
Compare(sc_sum),
"+T+F+M+t+f+m");
3978BOOST_CHECK_EQUAL((
string) sp_sum.
Compare(sc_sum),
"=T+F+M=t+f+m");
3979BOOST_CHECK_EQUAL((
string) ac_sum.
Compare(sc_sum),
"+T+F+M+t+f+m");
3995 stringpath =
string(
"data") + ch +
"deltaseq";
3996ifstream
f(path.c_str());
4009BOOST_REQUIRE_EQUAL(h1, h2);
4021BOOST_REQUIRE(db.
GiToOid(555, oid));
4024BOOST_REQUIRE(deflines.
NotEmpty());
4029BOOST_REQUIRE(deflines.
Empty());
4035 const Int8kExpectedSize = 1420;
4036BOOST_REQUIRE_EQUAL(kExpectedSize, db.
GetDiskUsage());
4041vector<string> gnomon_ids;
4042gnomon_ids.push_back(
"gnl|GNOMON|334.p");
4043gnomon_ids.push_back(
"gnl|GNOMON|2334.p");
4044gnomon_ids.push_back(
"gnl|GNOMON|4334.p");
4045gnomon_ids.push_back(
"gnl|GNOMON|6334.p");
4046gnomon_ids.push_back(
"gnl|GNOMON|8334.p");
4049 for(
size_t i= 0;
i< gnomon_ids.size();
i++) {
4053BOOST_REQUIRE( !oids.empty() );
4054BOOST_REQUIRE_EQUAL(
i, (
size_t)oids.front());
4060BOOST_REQUIRE( !oids.empty() );
4061BOOST_REQUIRE_EQUAL(
i, (
size_t)oids.front());
4067BOOST_REQUIRE(found);
4068BOOST_REQUIRE_EQUAL(
i, (
size_t)oid);
4077 stringseqidlist_name = seqidlist_tmpfile.
GetFileName();
4078 stringblastdb_name = alias_file_tmpfile.
GetFileName() +
".pal";
4081 const stringkSeqIdIncluded =
"P01013.1";
4084ofstream stream(seqidlist_name.c_str());
4085stream << kSeqIdIncluded << endl;
4089ofstream stream(blastdb_name.c_str());
4090stream <<
"TITLE test for 129295 JIRA SB-646"<< endl;
4091stream <<
"DBLIST nr"<< endl;
4092stream <<
"SEQIDLIST "<< seqidlist_name << endl;
4100BOOST_REQUIRE_EQUAL(1U, oids.size());
4102 const stringseqid2search =
"WP_138200753.1";
4105BOOST_CHECK_EQUAL(0
U, oids.size());
4111 intrv = system(
"cp data/swiss_cheese.pal 'data/test space.pal'");
4112BOOST_REQUIRE_EQUAL(0, rv);
4113 stringdb_name =
"\"data/test space\"";
4118BOOST_REQUIRE_EQUAL((
string) dbs_sum.
CompareSelf(),
"+A+B=C+a+b=c");
4138BOOST_REQUIRE_EQUAL_COLLECTIONS(taxids.
begin(), taxids.
end(),
4139returned.
begin(), returned.
end());
4163returned.
begin(), returned.
end());
4176 const intoids[] = {
41770x7acee466, 0x4cbc1ab0,
41780x7d219922, 0x7e096431,
41790x276283ea, 0x13cee382,
41800x51f8b267, 0x37183674,
41810x03559cd6, 0x6bdcfbb7
4183 const Uint4nrecs = (
Uint4) (
sizeofoids /
sizeofoids[0]);
4197 #ifndef NCBI_INT8_GI 4199 const Uint4uint4_gi = 0xFFFFFFFF;
4205 const Int8big_gi = 0xC0000000;
4206 for(
Uint4 i= 0;
i< nrecs; ++
i) {
4215BOOST_REQUIRE(oid == oids[
i]);
4217BOOST_FAIL(
"CSeq_id constructor threw exception");
4232BOOST_REQUIRE_EQUAL(55, found);
4240list< CRef<CSeq_id> > ids = db.
GetSeqIDs(1);
4244 if((*itr)->IsGi()) {
4249fasta_id = (*itr)->AsFastaString();
4253BOOST_REQUIRE_EQUAL(1 , num_acc);
4254BOOST_REQUIRE_EQUAL(fasta_id ,
"prf||2209341B");
4265BOOST_REQUIRE_EQUAL(63, found);
4278BOOST_REQUIRE_EQUAL(1, found);
4279BOOST_REQUIRE_EQUAL(3, oid);
4291BOOST_REQUIRE_EQUAL(2, found);
4297 const unsigned intnum_pigs = 5;
4298 const intpigs[num_pigs] = {4377482, 1287445, 2, 6066974, 5303747};
4299 const unsigned intnum_valid_pig = 4;
4304 for(
unsigned int i=0;
i< num_pigs;
i++) {
4305pig_list->
AddPig(pigs[
i]);
4312 stringdb_name =
"swissprot";
4319BOOST_REQUIRE_EQUAL(pig_db.
GetNumSeqs(), 4);
4320BOOST_REQUIRE_EQUAL(negative_pig_db.
GetNumSeqs(), (
int) (total_num_seqs - num_valid_pig));
4322vector<string> seq_ids;
4325list< CRef<CSeq_id> > ids = pig_db.
GetSeqIDs(oid);
4327seq_ids.push_back(ids.front()->GetSeqIdString());
4328BOOST_REQUIRE_EQUAL(oid_found, oid);
4330BOOST_REQUIRE_EQUAL(seq_ids.size(), num_valid_pig);
4332 for(
unsigned int i=0;
i< seq_ids.size();
i++){
4335BOOST_REQUIRE_EQUAL(
not_found.size(), (
unsigned int) 0);
4343 const unsigned intnum_pigs = 5;
4344 const intpigs[num_pigs] = {2, 355704, 863725, 1727116, 24036443};
4345 stringdb_name =
"data/ipg_test";
4351 for(
unsigned int i=0;
i< num_pigs;
i++) {
4352pos_list->
AddPig(pigs[
i]);
4363BOOST_REQUIRE_EQUAL(total_num_seqs, 1);
4365 const intcheck_oids[1] = {12};
4367BOOST_REQUIRE_EQUAL(oid, check_oids[c]);
4372 for(
unsigned int i=0;
i< num_pigs;
i++) {
4373pos_list->
AddPig(pigs[
i]);
4384BOOST_REQUIRE_EQUAL(total_num_seqs, 3);
4386 const intcheck_oids[3] = {2, 6, 8};
4388BOOST_REQUIRE_EQUAL(oid, check_oids[c]);
4396 for(
unsigned int i=0;
i< num_pigs;
i++) {
4397p.push_back(pigs[
i]);
4409BOOST_REQUIRE_EQUAL(total_num_seqs, 5);
4411 const intcheck_oids[5] = {0, 1, 3, 5, 7};
4413BOOST_REQUIRE_EQUAL(oid, check_oids[c]);
4422 for(
unsigned int i=0;
i< num_pigs;
i++) {
4423p.push_back(pigs[
i]);
4434 const intcheck_oids[2] = {1, 3 };
4436BOOST_REQUIRE_EQUAL(oid, check_oids[c]);
4445 stringdb_name =
"refseq_mrna";
4452 inttotal_num_seqs = 0;
4453 Uint8total_length = 0;
4455BOOST_REQUIRE(total_num_seqs > 0);
4456BOOST_REQUIRE(total_length > 0);
4463 for(
int i=0;
i< MAX_FD_COUNT;
i++) {
4483 for(
Int8 i=0;
i< 10000;
i++) {
4498 const intkNumThreads=64;
4499vector<CTestThread*> threads;
4501 for(
int i=0;
i< kNumThreads;
i++) {
4504 for(
int i=0;
i< kNumThreads;
i++) {
4505threads[
i]->Run();
4507 for(
int i=0;
i< kNumThreads;
i++) {
4508threads[
i]->Join();
4516 stringdb_name =
"data/wp_nr_v5";
4519 stringacc =
"WP_007051162.1";
4520vector<TTaxId> tax_ids;
4522BOOST_REQUIRE_EQUAL(tax_ids.size(), 4U);
4523BOOST_REQUIRE_EQUAL(tax_ids[0], 1678);
4524BOOST_REQUIRE_EQUAL(tax_ids[3], 1263059);
4528vector<TTaxId> tax_ids;
4530BOOST_REQUIRE_EQUAL(tax_ids.size(), 1U);
4531BOOST_REQUIRE_EQUAL(tax_ids[0], 1205679);
4534 stringacc =
"junk";
4535vector<TTaxId> tax_ids;
4537BOOST_REQUIRE_EQUAL(tax_ids.size(), 0
U);
4543 stringdb_name =
"data/test_v4";
4546 stringacc =
"pir||T49736";
4547vector<TTaxId> tax_ids;
4549BOOST_REQUIRE_EQUAL(tax_ids.size(), 1U);
4550BOOST_REQUIRE_EQUAL(tax_ids[0], 0);
4553 stringacc =
"junk";
4554vector<TTaxId> tax_ids;
4556BOOST_REQUIRE_EQUAL(tax_ids.size(), 0
U);
4564test_list.
AddGi(689972625);
4565test_list.
AddGi(689972107);
4566test_list.
AddGi(689971919);
4567test_list.
AddGi(689971844);
4568test_list.
AddGi(689971914);
4569test_list.
AddGi(123);
4573 stringdb_name =
"data/15_seqs_v5";
4576vector<CSeqDBGiList::SGiOid> gilist = test_list.
GetGiList();
4577 for(
unsigned int i=0;
i< test_list.
GetNumGis();
i++){
4578BOOST_REQUIRE_EQUAL( gilist[
i].oid, oids[
i]);
4584 stringalias_db_name =
"data/10_seqs_alias";
4587vector<CSeqDBGiList::SGiOid> gilist = test_list.
GetGiList();
4588 for(
unsigned int i=0;
i< test_list.
GetNumGis();
i++){
4589BOOST_REQUIRE_EQUAL( gilist[
i].oid, oids[
i]);
4600 const intgi = 1492455000;
4602 stringgiless_protein =
"nr";
4604BOOST_REQUIRE_EQUAL(db_giless.
GiToPig(gi, pig),
false);
4605BOOST_REQUIRE_EQUAL(pig, -1);
4607 for(
int i=0;
i< 200000;
i++) {
static const char * kFileName
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
ncbi::TMaskedQueryRegions mask
vector< CRef< CSeq_id > > SeqIdList
`Blob' Class for SeqDB (and WriteDB).
CTempString Str() const
Get blob contents as a CTempString.
TTaxIds GetLeafTaxIds() const
void SetLeafTaxIds(const TTaxIds &t)
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
GI list containing the intersection of two other lists of GIs.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CNegativeIdList(const int *ids, bool use_tis)
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
int ChangeOpenedFilseCount(EFilesCount fc)
int GetOpenedFilseCount(void)
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Raw Sequence and Ambiguity Data.
void GetPigBounds(int *low_id, int *high_id, int *count)
Get PIG Bounds.
unsigned GetSequenceHash(int oid)
Get the sequence hash for a given OID.
void HashToOids(unsigned hash, vector< int > &oids)
Get the OIDs for a given sequence hash.
void GetGiBounds(TGi *low_id, TGi *high_id, int *count)
Get GI Bounds.
vector< SGiOid > m_GisOids
Pairs of GIs and OIDs.
int GetNumGis() const
Get the number of GIs in the array.
void GetPigList(vector< TPig > &pigs) const
void GetGiList(vector< TGi > &gis) const
Get the gi list.
void AddTaxIds(const set< TTaxId > &tax_ids)
T GetKey(int index) const
void AddGi(TGi gi)
Add a new GI to the list.
bool FindGi(TGi gi) const
Test for existence of a GI.
vector< SSiOid > m_SisOids
Pairs of Seq-ids and OIDs.
SeqDB ID list for performing boolean set operations.
bool Blank() const
Check if an ID list is blank.
void Compute(EOperation op, const vector< int > &ids, bool positive=true)
Perform a logical operation on a list.
bool IsPositive()
Checks whether a positive GI list was produced.
CRef< CSeqDBNegativeList > GetNegativeList()
Retrieve a negative GI list.
CRef< CSeqDBGiList > GetPositiveList()
Retrieve a positive GI list.
bool IdToOid(Int8 id, TOid &oid)
GI or TI translation.
void UnLease()
Return any memory held by this object to the atlas.
void AddTaxIds(const set< TTaxId > &tax_ids)
void SetGiList(const vector< TGi > &new_list)
Set ID set for this negative list.
void SetPigList(const vector< TPig > &new_list)
vector< TTi > m_Tis
TIs to exclude from the SeqDB instance.
bool FindGi(TGi gi)
Test for existence of a GI.
vector< TGi > m_Gis
GIs to exclude from the SeqDB instance.
Reader for BlastDb format column files.
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob)
Fetch the data blob for the given column and oid.
int TOID
Sequence type accepted and returned for OID indices.
bool IdsToOids(CSeqDBGiList &id_list) const
Get OIDs from an ID list.
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
bool OidToPig(int oid, int &pig) const
Translate an OID to a PIG.
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
void GetGis(int oid, vector< TGi > &gis, bool append=false) const
Gets a list of GIs for an OID.
bool PigToOid(int pig, int &oid) const
Translate a PIG to an OID.
void SetIterationRange(int oid_begin, int oid_end)
Set Iteration Range.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
TGi GetSeqGI(int oid) const
Returns the first Gi (if any) of the sequence.
Uint8 GetVolumeLength() const
Returns the sum of the lengths of all volumes.
void GetAvailableMaskAlgorithms(vector< int > &algorithms)
Get a list of algorithm IDs for which mask data exists.
bool OidToGi(int oid, TGi &gi) const
Translate an OID to a GI.
const string & GetDBNameList() const
Get list of database names.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
Int8 GetDiskUsage() const
Retrieve the disk usage in bytes for this BLAST database.
void ResetInternalChunkBookmark()
Resets this object's internal chunk bookmark, which is used when the oid_state argument to GetNextOID...
EOidListType
Indicates how block of OIDs was returned.
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end) const
Fetch data as a CSeq_data object.
bool GiToPig(TGi gi, int &pig) const
Translate a GI to a PIG.
void GetAliasFileValues(TAliasFileValues &afv)
Get Name/Value Data From Alias Files.
int GetMaxLength() const
Returns the length of the largest sequence in the database.
int GetSeqLength(int oid) const
Returns the sequence length in base pairs or residues.
bool PigToGi(int pig, TGi &gi) const
Translate a PIG to a GI.
ESeqType GetSequenceType() const
Returns the type of database opened - protein or nucleotide.
const CSeqDBGiList * GetGiList() const
Get GI list attached to this database.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
bool SeqidToOid(const CSeq_id &seqid, int &oid) const
Translate a Seq-id to any matching OID.
void RetAmbigSeq(const char **buffer) const
Returns any resources associated with the sequence.
int GetOidAtOffset(int first_seq, Uint8 residue) const
Find the sequence closest to the given offset into the database.
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
CRef< CBioseq > GetBioseqNoData(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence without sequence data.
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist=false) const
Get taxid for an OID.
void GetMaskAlgorithmDetails(int algorithm_id, objects::EBlast_filter_program &program, string &program_name, string &algo_opts)
Get information about one type of masking available here.
void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx=true) const
Returns the sum of the sequence lengths.
void RetSequence(const char **buffer) const
Returns any resources associated with the sequence.
string GetTitle() const
Returns the database title.
int GetNumSeqs() const
Returns the number of sequences available.
void GetTaxIdsForSeqId(const CSeq_id &seq_id, vector< TTaxId > &taxids)
Get all tax ids for a seq id.
EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state=NULL)
Return a chunk of OIDs, and update the OID bookmark.
int GetSequence(int oid, const char **buffer) const
Get a pointer to raw sequence data.
void AccessionToOids(const string &acc, vector< int > &oids) const
Translate an Accession to a list of OIDs.
void ListColumns(vector< string > &titles)
List columns titles found in this database.
void GetTaxIdsForAccession(const string &accs, vector< TTaxId > &taxids)
Get all tax ids for an accessions.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
string GetDate() const
Returns the construction date of the database.
int GetNumSeqsStats() const
Returns the number of sequences available.
@ eUnfilteredAll
Sum of all sequences, ignoring GI and OID lists and alias files.
@ eFilteredRange
Sum of included sequences with OIDs within the iteration range.
@ eFilteredAll
Values from alias files, or summation over all included sequences.
int GetColumnId(const string &title)
Get an ID number for a given column title.
void SeqidToOids(const CSeq_id &seqid, vector< int > &oids) const
Translate a Seq-id to a list of OIDs.
int GetAmbigSeqAlloc(int oid, char **buffer, int nucl_code, ESeqDBAllocType strategy, TSequenceRanges *masks=NULL) const
Get a pointer to sequence data with ambiguities.
CSeqDBIter Begin() const
Returns a sequence iterator.
const string & GetColumnValue(int column_id, const string &key)
Look up the value for a specific column metadata key.
CRef< CBioseq > GiToBioseq(TGi gi) const
Get a CBioseq for a given GI.
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
Uint8 GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
int GetSeqLengthApprox(int oid) const
Returns an unbiased, approximate sequence length.
CRef< CBioseq > SeqidToBioseq(const CSeq_id &seqid) const
Get a CBioseq for a given Seq-id.
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
CRef< CBioseq > PigToBioseq(int pig) const
Get a CBioseq for a given PIG.
int GetAmbigSeq(int oid, const char **buffer, int nucl_code) const
Get a pointer to sequence data with ambiguities.
void GetMaskData(int oid, const vector< int > &algo_ids, TSequenceRanges &ranges)
Get masked ranges of a sequence.
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
const map< string, string > & GetColumnMetaData(int column_id)
Get all metadata for the specified column.
CSeqDBIdSet GetIdSet() const
Get IdSet list attached to this database.
void GetLeafTaxIDs(int oid, map< TGi, set< TTaxId > > &gi_to_taxid_set, bool persist=false) const
Get taxid for an OID.
void Append(const char *p)
CSeqIdList(const char **str)
CSimpleGiList(const vector< TGi > &gis)
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
CRef< CSeqDBIsam > m_Isam
CTestThread(CSeqDBAtlas &atlas)
CTmpEnvironmentSetter(const char *name, const char *value=NULL)
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
Defines column reader class for SeqDB.
int GetSeqLength(const CBioseq &bioseq)
static const unsigned long CR
static const struct name_t names[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const char * expected[]
static const char * str(char *buf, int n)
static const column_t columns[]
#define GI_FROM(T, value)
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
void Set(const string &name, const string &value)
Set an environment variable by name.
const string & Get(const string &name, bool *found=NULL) const
Get environment value by name.
static void Add(const string &path)
Add the name of a dir entry; it will be deleted on (normal) exit.
const string & GetFileName(void) const
Return used file name (generated or given in the constructor).
static string ConvertToOSPath(const string &path)
Convert "path" on any OS to the current OS-dependent path.
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
const TPrim & Get(void) const
#define MSerial_AsnText
I/O stream manipulators â.
const string AsFastaString(void) const
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
void SetWidth(TSeqPos width)
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static Int8 StringToInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Int8.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
bool IsSetLinks(void) const
Check if a value has been assigned to Links data member.
TTaxid GetTaxid(void) const
Get the Taxid member data.
bool IsSetTaxid(void) const
Check if a value has been assigned to Taxid data member.
void SetTaxid(TTaxid value)
Assign a value to Taxid data member.
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
const Tdata & Get(void) const
Get the member data.
@ eBlast_filter_program_repeat
@ eBlast_filter_program_seg
const TTag & GetTag(void) const
Get the Tag member data.
TId GetId(void) const
Get the variant data.
TGi GetGi(void) const
Get the variant data.
const TGeneral & GetGeneral(void) const
Get the variant data.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
void ResetDescr(void)
Reset Descr data member.
const TInst & GetInst(void) const
Get the Inst member data.
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
bool CanGetSeq_data(void) const
Check if it is safe to call GetSeq_data method.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
CBioseq_Info & GetBioseq(CTSE_Info &tse, const CBioObjectId &id)
unique_ptr< CLocalTaxon > tax1
static void hex(unsigned char c)
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
static const string kDb("db")
string s_Stringify(CRef< ASNOBJ > a)
static void s_TestMaskingLimits(EMaskingType mask, unsigned first, unsigned last, unsigned lowest, unsigned highest, unsigned count)
static void s_MapAllGis(CSeqDB &db, map< int, int > &m, int change, int &total)
static void s_TestPartialAmbig(CSeqDB &db, TGi nt_gi)
void s_ApproxEqual(NUM a, NUM b, DIF epsilon, int lineno)
BOOST_AUTO_TEST_CASE(ConstructLocal)
static bool s_DbHasOID(CSeqDB &db, int &count, int oid)
BOOST_AUTO_TEST_CASE_TIMEOUT(TestGiToOidTimeout, 15)
static void s_ModifyMap(map< int, int > &m, int key, int c, int &total)
static void s_CheckIdLookup(CSeqDB &db, const string &acc, size_t exp_oids, size_t exp_size)
static void s_TestPartialAmbigRange(CSeqDB &db, int oid, int begin, int end)
static Uint4 s_BufHash(const char *buf_in, Uint4 length, Uint4 start=1)
string s_ToString(const A &a, const B &b, const C &c, const D &d, const E &e)
static bool s_MaskingTest(EMaskingType mask, unsigned oid)
Defines exception class and several constants for SeqDB.
const blastdb::TOid kSeqDBEntryNotFound
Int4 TOid
Ordinal ID in BLAST databases.
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
ESeqDBIdType SeqDB_SimplifyAccession(const string &acc, Int8 &num_id, string &str_id, bool &simpler)
String id simplification.
bool SeqDB_IsBinaryGiList(const string &fname)
Read a text or binary SeqId list from a file.
unsigned SeqDB_SequenceHash(const char *sequence, int length)
Returns a path minus filename.
const int kSeqDBNuclBlastNA8
Used to request ambiguities in BLAST/NA8 format.
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
Defines `expert' version of CSeqDB interfaces.
ISAM index database access object.
static const char * kTaxid
static SLJIT_INLINE sljit_ins nr(sljit_gpr dst, sljit_gpr src)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
List of sequence offset ranges.
int measured_oids
Measured oid count should equal filtered if alias files are correct.
int total_oids
Total oid count, sum of all volume oid counts.
Int8 filtered_length
Filtered length, result of all filtering.
void CompareField(Int8 X, Int8 Y, string &sum, char ch)
Int8 total_length
Total length, sum of all volume lengths.
string Compare(SDbSumInfo &other)
int filtered_oids
Filtered oid count, result of all filtering.
Int8 measured_length
Measured length should equal filtered if alias files are correct.
Utility stuff for more convenient using of Boost.Test library.
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
static string kCount("Count")
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4