slen = s.
Size();
92 stringextn2(extn, 2, 4);
98(extn[1] ==
'n'|| extn[1] ==
'p') &&
99(extn2 ==
"al"|| extn2 ==
"in"|| extn2 ==
"db")) {
117 for(
int i= 0;
i<
buffer.Size();
i++) {
138 intextn_amt = extn ? (extn->
Size()+1) : 0;
146 boolonly_two =
false;
148 if(one.Empty() || two[0] == delim) {
163outp.reserve(two.
Size() + extn_amt);
173outp.reserve(one.Size() + two.
Size() + 1 + extn_amt);
177 if(outp[outp.size() - 1] != delim) {
195 if(x1 != x2)
return(x1 < x2);
196 else return(s1 < s2);
235 boollinkoutdb_search)
238path.reserve(
dbname.size() + 4);
241 if(linkoutdb_search) {
243path.append(
".sqlite3");
248path.append(
".-al");
250path[path.size()-3] = dbtype;
256path[path.size()-2] =
'i';
257path[path.size()-1] =
'n';
272 const char* splitter = 0;
274 #if defined(NCBI_OS_UNIX) 290 for(
size_t i= 0;
i<dbs.size();
i++) {
291 if(dbs[
i] ==
'/'|| dbs[
i] ==
'\\') {
337 boollinkoutdb_search =
false)
347vector<string> roads;
353 ITERATE(vector<string>, road, roads) {
383 const stringpath=
"")
448 return whole.GetLength() != (
Int8) -1;
474 const chardbtype(
'p');
477 return s_SeqDB_TryPaths(pathology, filename, dbtype,
false, access,
true);
492 size_tnewlen =
a.length() +
b.length() + delim.length();
494 if(
a.capacity() < newlen) {
497 while(newcap < newlen) {
510: m_CurrentOrder(
eNone), m_MaskOpts(0)
525 returnlhs.
oid< rhs.
oid;
540 returnlhs.
gi< rhs.
gi;
553 returnlhs.
pig< rhs.
pig;
569 returnlhs.
ti< rhs.
ti;
584 returnlhs.
si< rhs.
si;
589 template<
classTCompare,
classTVector>
592 boolalready =
true;
594TCompare compare_less;
596 for(
int i= 1;
i< (
int) v.size();
i++) {
597 if(compare_less(v[
i], v[
i-1])) {
604 sort(v.begin(), v.end(), compare_less);
625 "Out of sequence sort order requested.");
637s_InsureOrder<CSeqDB_SortGiLessThan>(
m_GisOids);
638s_InsureOrder<CSeqDB_SortTiLessThan>(
m_TisOids);
639s_InsureOrder<CSeqDB_SortSiLessThan>(
m_SisOids);
640s_InsureOrder<CSeqDB_SortPigLessThan>(
m_PigsOids);
646 "Unrecognized sort order requested.");
656 intoid(0), index(0);
657 return(
const_cast<CSeqDBGiList*
>(
this))->GiToOid(gi, oid, index);
664 return GiToOid(gi, oid, index);
680}
else if(m_gi > gi) {
696 intoid(0), index(0);
697 return(
const_cast<CSeqDBGiList*
>(
this))->TiToOid(ti, oid, index);
704 return TiToOid(ti, oid, index);
720}
else if(m_ti > ti) {
735 intoid(0), index(0);
736 return(
const_cast<CSeqDBGiList*
>(
this))->SiToOid(
si, oid, index);
757}
else if(
si< m_si) {
777gis.push_back(itr->gi);
788pigs.push_back(itr->pig);
799tis.push_back(itr->ti);
811sis.push_back(itr->si);
826 Int4num_gis = (
Int4) (endp - beginp) - 2;
830 if(((endp - beginp) < 2U)
831|| (beginp[0] != 0xFFFFFFFFU)
835 "Specified file is not a valid binary GI file.");
838gis.reserve(num_gis);
840 for(
Uint4* elem = (beginp + 2); elem < endp; ++elem) {
855 bool& has_long_ids,
bool* has_tis =
NULL)
857 boolretval =
false;
858has_long_ids =
false;
861 Uint8file_size = fendp - fbeginp;
863 if(file_size == 0) {
866 "Specified file is empty.");
867}
else if(
isdigit((
unsigned char)(*((
char*) fbeginp))) ||
868((
unsigned char)(*((
char*) fbeginp)) ==
'#')) {
870}
else if((file_size >= 8) && ((*fbeginp & 0xFF) == 0xFF)) {
873 intmarker = fbeginp[3] & 0xFF;
875 if(marker == 0xFE || marker == 0xFC) {
876has_long_ids =
true;
878 if(has_tis && (marker == 0xFD || marker == 0xFC)) {
884 "Specified file is not a valid GI/TI list.");
918 string msg=
string(
"Invalid byte in text"+ list_type +
" list [") +
927vector<CSeqDBGiList::SGiOid> & gis,
930 boollong_ids =
false;
931 Uint8file_size = fendp - fbeginp;
938 Uint8num_gis = bendp - bbeginp - 2;
942 if((bbeginp[0] != 0xFFFFFFFFU)
946 "Specified file is not a valid binary GI file.");
949gis.reserve(num_gis);
953 boolin_gi_order =
true;
955 Uint4* elem = bbeginp + 2;
956 while(elem < bendp) {
958gis.push_back(this_gi);
960 if(prev_gi > this_gi) {
961in_gi_order =
false;
968 while(elem < bendp) {
972*in_order = in_gi_order;
974 for(
Uint4* elem = (bbeginp + 2); elem < bendp; ++elem) {
985gis.reserve((
int) (file_size / 7));
988 const stringlist_type(
"GI");
990 for(
const char* p = fbeginp; p < fendp; p ++) {
1007vector<CSeqDBGiList::SPigOid> & pigs,
1010 boollong_ids =
false;
1011 Int8file_size = fendp - fbeginp;
1017 Int4num_pigs = (
Int4) (bendp - bbeginp) - 2;
1021 if(((bendp - bbeginp) < 2U)
1022|| (bbeginp[0] != 0xFFFFFFFFU)
1026 "Specified file is not a valid binary IPG file.");
1029pigs.reserve(num_pigs);
1033 boolsorted =
true;
1035 Uint4* elem = bbeginp + 2;
1036 while(elem < bendp) {
1038pigs.push_back(this_pig);
1040 if(prev_pig > this_pig) {
1044prev_pig = this_pig;
1048 while(elem < bendp) {
1054 for(
Uint4* elem = (bbeginp + 2); elem < bendp; ++elem) {
1059pigs.reserve((
int) (file_size / 7));
1062 const stringlist_type(
"IPG");
1064 for(
const char* p = fbeginp; p < fendp; p ++) {
1069pigs.push_back(elem);
1084 boollong_ids =
false;
1086 Int4* bbeginp = (
Int4*) fbeginp;
1089 Uint8num_taxids = (bendp - bbeginp) - 2;
1092taxids.
oids.clear();
1094 if(((bendp - bbeginp) < 2) || (bbeginp[0] != 0xFFFFFFFF)
1097 "Specified file is not a valid binary Tax Id List file.");
1100 for(
Int4* elem = (bbeginp + 2); elem < bendp; ++elem) {
1105 const stringlist_type(
"TAXID");
1107 for(
const char* p = fbeginp; p < fendp; p ++) {
1151vector<CSeqDBGiList::STiOid> & tis,
1154 boollong_ids =
false;
1155 Int8file_size = fendp - fbeginp;
1158 Int4* bbeginp = (
Int4*) fbeginp;
1160 Int4* bdatap = bbeginp + 2;
1162 Uint4num_tis = (
int)(bendp-bdatap);
1164 intremainder = num_tis % 2;
1172 boolbad_fmt =
false;
1174 if(bendp < bdatap) {
1180 if((marker != -3 && marker != -4) ||
1181(num_ids != num_tis) ||
1182(remainder && long_ids)) {
1191 "Specified file is not a valid binary GI or TI file.");
1194tis.reserve(num_tis);
1202 boolin_ti_order =
true;
1204 Int8* elem = bdatap8;
1206 while(elem < bendp8) {
1208tis.push_back(this_ti);
1210 if(prev_ti > this_ti) {
1211in_ti_order =
false;
1218 while(elem < bendp8) {
1222*in_order = in_ti_order;
1224 for(
Int8* elem = bdatap8; elem < bendp8; elem ++) {
1231 boolin_ti_order =
true;
1233 Int4* elem = bdatap;
1235 while(elem < bendp) {
1237tis.push_back(this_ti);
1239 if(prev_ti > this_ti) {
1240in_ti_order =
false;
1247 while(elem < bendp) {
1251*in_order = in_ti_order;
1253 for(
Int4* elem = bdatap; elem < bendp; elem ++) {
1264tis.reserve(
int(file_size / 7));
1267 const stringlist_type(
"TI");
1269 for(
const char* p = fbeginp; p < fendp; p ++) {
1273tis.push_back(elem);
1286vector<CSeqDBGiList::SSiOid> & sis,
1289 Int8file_size = fendp - fbeginp;
1296sis.reserve(sis.size() +
int(file_size / 7));
1298 const char* p = fbeginp;
1300 while( p < fendp) {
1302 while(p< fendp && (*p==
'>'|| *p==
' '|| *p==
'\t'|| *p==
'\n'|| *p==
'\r')) ++p;
1303 if(p< fendp && *p ==
'#') {
1305 while(p< fendp && *p!=
'\n') ++p;
1309 while(p< fendp && *p!=
' '&& *p!=
'\t'&& *p!=
'\n'&& *p!=
'\r') ++p;
1311 stringacc(
head, p);
1313 if(str_id !=
"") {
1314sis.push_back(str_id);
1316cerr <<
"WARNING: "<< acc
1317<<
" is not a valid seqid string."<< endl;
1321 if(in_order) *in_order =
false;
1326vector<CSeqDBGiList::SGiOid> & gis,
1327vector<CSeqDBGiList::STiOid> & tis,
1328vector<CSeqDBGiList::SSiOid> & sis,
1331 Int8file_size = fendp - fbeginp;
1338sis.reserve(sis.size() +
int(file_size / 7));
1340 const char* p = fbeginp;
1342 while( p < fendp) {
1344 while(p< fendp && (*p==
'>'|| *p==
' '|| *p==
'\t'|| *p==
'\n'|| *p==
'\r')) ++p;
1345 if(p< fendp && *p ==
'#') {
1347 while(p< fendp && *p!=
'\n') ++p;
1351 while(p< fendp && *p!=
' '&& *p!=
'\t'&& *p!=
'\n'&& *p!=
'\r') ++p;
1353 stringacc(
head, p);
1361 else if(
eTiId== id_type) {
1362tis.push_back((
TTi) num_id);
1364 else if(
eGiId== id_type) {
1368cerr <<
"WARNING: "<< acc
1369<<
" is not a valid seqid string."<< endl;
1373 if(in_order) *in_order =
false;
1381 const char* fbeginp = (
char*) mfile.
GetPtr();
1382 const char* fendp = fbeginp + (
int)file_size;
1384 boolignore =
false;
1385 boolhas_tis =
false;
1390retval = has_tis && retval;
1405 void SeqDB_ReadGiList(
const string& fname, vector<CSeqDBGiList::SGiOid> & gis,
bool* in_order)
1410 const char* fbeginp = (
char*) mfile.
GetPtr();
1411 const char* fendp = fbeginp + file_size;
1417 void SeqDB_ReadTiList(
const string& fname, vector<CSeqDBGiList::STiOid> & tis,
bool* in_order)
1422 const char* fbeginp = (
char*) mfile.
GetPtr();
1423 const char* fendp = fbeginp + file_size;
1429vector<CSeqDBGiList::STiOid> & tis, vector<CSeqDBGiList::SSiOid> & sis,
bool* in_order)
1434 const char*fbeginp = (
char*) mfile.
GetPtr();
1435 const char*fendp = fbeginp + file_size;
1440 void SeqDB_ReadPigList(
const string& fname, vector<CSeqDBGiList::SPigOid> & pigs,
bool* in_order)
1445 const char* fbeginp = (
char*) mfile.
GetPtr();
1446 const char* fendp = fbeginp + file_size;
1456 const char* fbeginp = (
char*) mfile.
GetPtr();
1457 const char* fendp = fbeginp + file_size;
1464 typedefvector<CSeqDBGiList::SGiOid> TPairList;
1469gis.reserve(pairs.size());
1471 ITERATE(TPairList, iter, pairs) {
1472gis.push_back(iter->gi);
1485 const char*fbeginp = (
char*) mfile.
GetPtr();
1486 const char*fendp = fbeginp + file_size;
1494 int b(0), e((
int)
m_Gis.size());
1502}
else if(m_gi > gi) {
1517 int b(0), e((
int)
m_Tis.size());
1525}
else if(m_ti > ti) {
1537 boolmatch_type =
false;
1538 return FindId(
id, match_type);
1545 int b(0), e((
int)
m_Sis.size());
1549 stringm_si =
m_Sis[m];
1553}
else if(m_si >
si) {
1567match_type = (
GetNumGis() > 0) ?
true:
false;
1571}
else if(
id.IsGeneral() &&
id.GetGeneral().GetDb() ==
"ti") {
1572match_type = (
GetNumTis() > 0) ?
true:
false;
1575 const CObject_id& obj =
id.GetGeneral().GetTag();
1584match_type = (
GetNumSis() > 0) ?
true:
false;
1602 size_tpos = str_id.find(
".");
1603 if(pos != str_id.npos) {
1604 stringnover(str_id, 0, pos);
1636 return FindGi(
id.GetGi());
1637}
else if(
id.IsGeneral() &&
id.GetGeneral().GetDb() ==
"ti") {
1638 const CObject_id& obj =
id.GetGeneral().GetTag();
1654 if(
FindSi(str_id))
return true;
1657 size_tpos = str_id.find(
".");
1658 if(pos != str_id.npos) {
1659 stringnover(str_id, 0, pos);
1669 boolin_order =
false;
1722 for(
unsigned i= 0;
i< dbs.size();
i++) {
1728 for(
unsigned i= 0;
i< dbs.size();
i++) {
1733 if(dbs[
i].find(
" ") != string::npos) {
1745vector<CTempString> & dbs,
1748vector<CSeqDB_Substring> subs;
1753dbs.reserve(subs.size());
1755 ITERATE(vector<CSeqDB_Substring>, iter, subs) {
1757dbs.push_back(
tmp);
1763vector<CSeqDB_Substring> & dbs,
1768 const char* sp =
dbname.data();
1770 boolquoted =
false;
1773 for(
unsigned i= 0;
i<
dbname.size();
i++) {
1780 if(keep_quote)
i++;
1794}
else if(ch ==
'"') {
1798begin = keep_quote ?
i:
i+ 1;
1804 if(begin <
dbname.size()) {
1815 sort(gis.begin(), gis.end());
1820 intgis_n = (
int) gis.size();
1822 while(list_i < list_n && gis_i < gis_n) {
1849 sort(gis.begin(), gis.end());
1854 intgis_n = (
int) gis.size();
1856 while(list_i < list_n && gis_i < gis_n) {
1857 TGiL = neg_gilist.
GetGi(list_i);
1873 TGilast_gi = gis[gis_i];
1874 do{ gis_i++; }
while(gis_i < gis_n && gis[gis_i] == last_gi);
1878 while(gis_i < gis_n) {
1904 #ifdef NCBI_STRICT_GI 1920 sort(ids.begin(), ids.end());
1921ids.erase(unique(ids.begin(), ids.end()), ids.end());
1927 sort(ids.begin(), ids.end());
1928ids.erase(unique(ids.begin(), ids.end()), ids.end());
1947incl_A = incl_B = incl_AB =
false;
1976 if((! A_pos) && (! B_pos)) {
1978result_pos =
false;
1979A_pos = B_pos =
true;
1984 if((! A_pos) || (! B_pos)) {
1986result_pos =
false;
1993result_pos = A_pos == B_pos;
2009incl_AB = A_pos && B_pos;
2014incl_A = incl_B = incl_AB =
true;
2018incl_AB = (A_pos != B_pos);
2019incl_A = incl_B = ! incl_AB;
2029 constvector<Int8> &
A,
2031 constvector<Int8> &
B,
2036 boolincl_A(
false),
2048 size_tA_i(0), B_i(0);
2050 while((A_i <
A.size()) && (B_i <
B.size())) {
2051 Int8Ax(
A[A_i]),
Bx(
B[B_i]), target(-1);
2052 boolincluded(
false);
2058}
else if(Ax >
Bx) {
2070 result.push_back(target);
2075 while(A_i <
A.size()) {
2076 result.push_back(
A[A_i++]);
2081 while(B_i <
B.size()) {
2082 result.push_back(
B[B_i++]);
2088 constvector<Int4> & ids,
2097 boolresult_pos(
true);
2112 constvector<Int8> & ids,
2120 boolresult_pos(
true);
2135 constvector<Uint8> & ids,
2143 boolresult_pos(
true);
2162 "Set operation requested but ID types don't match.");
2166 boolresult_pos(
true);
2187 "Positive ID list requested but only negative exists.");
2194ids->
AddTi(*iter);
2200 _ASSERT(((*iter) >> 32) == 0);
2213 "Negative ID list requested but only positive exists.");
2222ids->
AddTi(*iter);
2228 _ASSERT(((*iter) >> 32) == 0);
2236ids->
AddSi(*iter);
2244: m_Positive (
false),
2257 const string&
text)
2259 string msg=
"Validation failed: ["+
text+
"] at ";
2274 boolmatched =
true;
2276 switch(bestid.
Which()) {
2294 if(dbt.
GetDb() ==
"BL_ORD_ID") {
2301 if(dbt.
GetDb() ==
"PIG") {
2308 if(dbt.
GetDb() ==
"ti") {
2344 if(objid.
IsStr()) {
2346str_id = objid.
GetStr();
2402 boolfound =
false;
2446 size_tvbar =
str.find(
'|', pos);
2448 if(vbar == string::npos) {
2449 returnstring::npos;
2452 stringportion(
str, pos, vbar - pos);
2458 size_tvbar_prev = vbar;
2460 for(
count=0; ; ++
count, vbar_prev = vbar) {
2461vbar =
str.find(
'|', vbar_prev + 1);
2463 if(vbar == string::npos) {
2467 intstart_pt =
int(vbar_prev + 1);
2468 stringelement(
str, start_pt, vbar - start_pt);
2478 returnstring::npos;
2481 return(vbar == string::npos) ?
str.size() : vbar;
2505 while(pos < line.size()) {
2508 if(end == string::npos) {
2514 stringelement(line, pos, end - pos);
2521 catch(invalid_argument &) {
2526seqids.push_back(
id);
2530 return! seqids.empty();
2541num_id = (
Uint4)-1;
2543vector< CRef< CSeq_id > > seqid_set;
2556list< CRef<CSeq_id> > seqids;
2564 if(!seqids.empty() && seqids.front()->IsPdb() &&
2565acc.find(
"_") != string::npos) {
2567str_id = seqids.front()->AsFastaString();
2570 else if(!seqids.empty() && seqids.front()->IsLocal()) {
2572 if( acc.find(
":") != string::npos) {
2573 static const char* GNL_DBs[] = {
"CDD",
"SRA",
"TSA",
"GNOMON",
NULL};
2574 stringdb_tag, gnl_id;
2576 const char** p = GNL_DBs;
2577 for(; p && *p; ++p) {
2579str_id =
"gnl|"+ db_tag +
"|"+ gnl_id;
2580seqids.front().Reset();
2582seqids.front() = new_id;
2593str_id =
"lcl|"+ acc;
2614 boolsimpler(
false);
2627 const stringkExtnMol(1, db_is_protein ?
'p':
'n');
2629extn.push_back(kExtnMol +
"al");
2630extn.push_back(kExtnMol +
"in");
2631extn.push_back(kExtnMol +
"hr");
2632extn.push_back(kExtnMol +
"sq");
2633extn.push_back(kExtnMol +
"ni");
2634extn.push_back(kExtnMol +
"nd");
2636extn.push_back(kExtnMol +
"si");
2637extn.push_back(kExtnMol +
"sd");
2639extn.push_back(kExtnMol +
"pi");
2640extn.push_back(kExtnMol +
"pd");
2641extn.push_back(kExtnMol +
"js");
2643vector<string> lmdbs;
2645extn.insert(extn.end(), lmdbs.begin(), lmdbs.end());
2648extn.push_back(kExtnMol +
"aa");
2649extn.push_back(kExtnMol +
"ab");
2650extn.push_back(kExtnMol +
"ac");
2651extn.push_back(kExtnMol +
"og");
2652extn.push_back(kExtnMol +
"hi");
2653extn.push_back(kExtnMol +
"hd");
2654extn.push_back(kExtnMol +
"ti");
2655extn.push_back(kExtnMol +
"td");
2656extn.push_back(kExtnMol +
"xm");
2663 static const char* ext[]={
"db",
"os",
"ot",
"tf",
"to",
"db-lock",
"tf-lock",
NULL};
2665 const stringkExtnMol(1, db_is_protein ?
'p':
'n');
2666 for(
const char** p=ext; *p !=
NULL; p++) {
2667extn.push_back(kExtnMol + (*p));
2674 const stringkExtnMol(1, db_is_protein ?
'p':
'n');
2675extn = kExtnMol +
"js";
2681 switch(
id.Which()) {
2687 const CDbtag& dbt =
id.GetGeneral();
2711 return(db_is_protein ?
"pxm":
"nxm");
static int GetSeqidlist(CMemoryFile &file, vector< CSeqDBGiList::SSiOid > &idlist, SBlastSeqIdListInfo &list_info)
Get seqidlist from dbv5 seqidlist file.
CIntersectionGiList(CSeqDBGiList &gilist, vector< TGi > &gis)
Construct an intersection of two lists of GIs.
bool DoesFileExist(const string &fname)
Check if file exists.
const string GetSearchPath() const
Get BlastDB search path.
static const string GenerateSearchPath()
Generate search path.
@ eFileErr
Files were missing or contents were incorrect.
CSeqDBFileGiList(const string &fname, EIdType idtype=eGiList)
Build a GI list from a file.
vector< SGiOid > & GetGiList()
Get list of GIs and OIDs.
void AddTi(TTi ti)
Add a new TI to the list.
vector< SGiOid > m_GisOids
Pairs of GIs and OIDs.
int GetNumGis() const
Get the number of GIs in the array.
bool GiToOid(TGi gi, int &oid)
Try to find a GI and return the associated OID.
const SGiOid & GetGiOid(int index) const
Access an element of the array.
vector< SPigOid > m_PigsOids
bool FindSi(const string &si) const
int GetNumSis() const
Get the number of Seq-ids in the array.
bool SiToOid(const string &si, int &oid)
CSeqDBGiList()
Constructor.
bool TiToOid(TTi ti, int &oid)
Try to find a TI and return the associated OID.
bool FindTi(TTi ti) const
Test for existence of a TI.
vector< SPigOid > & GetPigList()
Get list og Pigs and OIDs.
int GetNumTis() const
Get the number of TIs in the array.
vector< STiOid > m_TisOids
Pairs of GIs and OIDs.
vector< STiOid > & GetTiList()
Get list of Tis and OIDs.
SBlastSeqIdListInfo m_ListInfo
ESortOrder
Possible sorting states.
@ eNone
The array is unsorted or the sortedness is unknown.
@ eGi
The array is sorted by GI.
void AddGi(TGi gi)
Add a new GI to the list.
void ReserveGis(size_t n)
Reserve space for GIs.
vector< SSiOid > & GetSiList()
Get list of Seq-ids and OIDs.
void ReserveTis(size_t n)
Reserve space for TIs.
void PreprocessIdsForISAMSiLookup()
Preprocess ids for ISAM string id lookup.
bool FindGi(TGi gi) const
Test for existence of a GI.
void InsureOrder(ESortOrder order)
Sort if necessary to insure order of elements.
vector< SSiOid > m_SisOids
Pairs of Seq-ids and OIDs.
bool FindId(const CSeq_id &id)
Test for existence of a Seq-id by type.
ESortOrder m_CurrentOrder
Indicates the current sort order, if any, of this container.
Helper class to allow copy-on-write semantics for CSeqDBIdSet.
const vector< Int8 > & Get() const
Access the Int8 set.
size_t Size() const
Get the number of elements stored here.
vector< Int8 > & Set()
Access the Int8 set.
vector< string > & SetSeqIDs()
Access the string set.
SeqDB ID list for performing boolean set operations.
CRef< CSeqDBIdSet_Vector > m_Ids
Ids stored here.
static void x_SortAndUnique(vector< Int8 > &ids)
Sort and unique the internal set.
void x_BooleanSetOperation(EOperation op, const vector< Int8 > &A, bool A_pos, const vector< Int8 > &B, bool B_pos, vector< Int8 > &result, bool &result_pos)
Compute boolean operation on two vectors.
static void x_SummarizeBooleanOp(EOperation op, bool A_pos, bool B_pos, bool &result_pos, bool &incl_A, bool &incl_B, bool &incl_AB)
Compute inclusion flags for a boolean operation.
CSeqDBIdSet()
Construct a 'blank' CSeqDBIdSet object.
bool m_Positive
True if the current list is positive.
void Negate()
Invert the current list.
bool Blank() const
Check if an ID list is blank.
EIdType
Type of IDs stored here.
EOperation
Types of operations that may be performed on GI lists.
void Compute(EOperation op, const vector< int > &ids, bool positive=true)
Perform a logical operation on a list.
EIdType m_IdType
Id type.
CRef< CSeqDBNegativeList > GetNegativeList()
Retrieve a negative GI list.
CRef< CSeqDBGiList > GetPositiveList()
Retrieve a positive GI list.
void AddSi(const string &si)
Add a new SeqId to the list.
void AddGi(TGi gi)
Add a new GI to the list.
void ReserveGis(size_t n)
Reserve space for GIs.
void AddTi(TTi ti)
Add a new TI to the list.
int GetNumTis() const
Get the number of TIs in the array.
bool FindId(const CSeq_id &id, bool &match_type)
Test for existence of a TI or GI here and report whether the ID was one of those types.
void ReserveTis(size_t n)
Reserve space for TIs.
vector< TTi > m_Tis
TIs to exclude from the SeqDB instance.
TGi GetGi(int index) const
Access an element of the GI array.
vector< string > m_Sis
SeqIds to exclude from the SeqDB instance.
bool FindTi(TTi ti)
Test for existence of a TI.
void PreprocessIdsForISAMSiLookup()
int GetNumGis() const
Get the number of GIs in the array.
bool FindGi(TGi gi)
Test for existence of a GI.
size_t m_LastSortSize
Zero if unsorted, or the size it had after the last sort.
vector< TGi > m_Gis
GIs to exclude from the SeqDB instance.
int GetNumSis() const
Get the number of SeqIds in the array.
void ReserveSis(size_t n)
void InsureOrder()
Sort list if not already sorted.
Check file existence using CSeqDBAtlas.
CSeqDB_AtlasAccessor(CSeqDBAtlas &atlas)
Constructor.
virtual bool DoesFileExist(const string &fname)
Test file existence.
File existence test interface.
virtual ~CSeqDB_FileExistence()
Destructor.
virtual bool DoesFileExist(const string &fname)=0
Check if file exists at fully qualified path.
CSeqDB_Substring FindBaseName() const
Returns the portion of this path containing the base name.
Check file existence using CFile.
virtual bool DoesFileExist(const string &fname)
Test file existence.
CSeqDB_SimpleAccessor()
Constructor.
Compare SGiOid structs by GI.
int operator()(const CSeqDBGiList::SGiOid &lhs, const CSeqDBGiList::SGiOid &rhs)
Test whether lhs is less than (occurs before) rhs.
Compare SGiOid structs by OID.
int operator()(const CSeqDBGiList::SGiOid &lhs, const CSeqDBGiList::SGiOid &rhs)
Test whether lhs is less than (occurs before) rhs.
int operator()(const CSeqDBGiList::SPigOid &lhs, const CSeqDBGiList::SPigOid &rhs)
Test whether lhs is less than (occurs before) rhs.
Compare SSeqIdOid structs by SeqId.
int operator()(const CSeqDBGiList::SSiOid &lhs, const CSeqDBGiList::SSiOid &rhs)
Test whether lhs is less than (occurs before) rhs.
Compare SGiOid structs by GI.
int operator()(const CSeqDBGiList::STiOid &lhs, const CSeqDBGiList::STiOid &rhs)
Test whether lhs is less than (occurs before) rhs.
void GetString(string &s) const
Return the data by assigning it to a string.
int Size() const
Return the length of the string in bytes.
void Clear()
Reset the string to an empty state.
const char * GetEnd() const
Returns a pointer to the end of the string, which is always a pointer to the character past the last ...
void EraseFront(int n)
Disinclude data from the beginning of the string.
void Resize(int n)
Change the length of the string.
const char * GetBegin() const
Returns a pointer to the start of the string.
bool Empty() const
Returns true iff the string is empty.
int FindLastOf(char ch) const
Find last instance of a character in the substring.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
iterator_bool insert(const value_type &val)
static const char si[8][64]
static const char * str(char *buf, int n)
#define GI_FROM(T, value)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define TAX_ID_FROM(T, value)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
size_t GetSize(void) const
Get length of the mapped region.
void * GetPtr(void) const
Get pointer to beginning of data.
static char GetPathSeparator(void)
Get path separator symbol specific for the current platform.
const string AsFastaString(void) const
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
static E_Choice WhichInverseSeqId(const CTempString &SeqIdCode)
Converts a string to a choice, no need to require a member.
static int BestRank(const CRef< CSeq_id > &id)
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
@ fLabel_Version
Show the version.
@ fLabel_GeneralDbIsContent
For type general, use the database name as the tag and the (text or numeric) key as the content.
@ eFasta
Tagged ID in NCBI's traditional FASTA style.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static Int8 StringToInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Int8.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static string & ToLower(string &str)
Convert string to lower case â string& version.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ eTrunc_Both
Truncate whitespace at both begin and end of string.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsId(void) const
Check if variant Id is selected.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
bool CanGetTag(void) const
Check if it is safe to call GetTag method.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
TId GetId(void) const
Get the variant data.
TGibbsq GetGibbsq(void) const
Get the variant data.
const TName & GetName(void) const
Get the Name member data.
bool CanGetName(void) const
Check if it is safe to call GetName method.
bool IsPrf(void) const
Check if variant Prf is selected.
E_Choice Which(void) const
Which variant is currently selected.
TGi GetGi(void) const
Get the variant data.
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetVersion(void) const
Check if it is safe to call GetVersion method.
E_Choice
Choice variants.
const TLocal & GetLocal(void) const
Get the variant data.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsPir(void) const
Check if variant Pir is selected.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_Gpipe
Internal NCBI genome pipeline processing ID.
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_Gibbsq
Geninfo backbone seqid.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
@ e_not_set
No variant selected.
@ e_Tpg
Third Party Annot/Seq Genbank.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
static void text(MDB_val *v)
constexpr auto sort(_Init &&init)
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
const string version
version string
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines unified interface to application:
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
The SeqDB memory management layer.
bool SeqDB_IsBinaryTiList(const string &fname)
Returns true if the file name passed contains a binary TI list.
void SeqDB_ReadPigList(const string &fname, vector< CSeqDBGiList::SPigOid > &pigs, bool *in_order)
void SeqDB_ReadMemoryPigList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SPigOid > &pigs, bool *in_order)
static bool s_SeqDB_DBExists(const string &dbname, char dbtype, CSeqDB_FileExistence &access, bool linkoutdb_search)
Test whether an index or alias file exists.
void SeqDB_ReadGiList(const string &fname, vector< CSeqDBGiList::SGiOid > &gis, bool *in_order)
Read a text or binary GI list from a file.
void SeqDB_GetLMDBFileExtensions(bool db_is_protein, vector< string > &extn)
Retrieves file extensions for BLAST LMDB files.
void SeqDB_ReadMemorySiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order)
Read a text SeqID list from an area of memory.
void SeqDB_ReadBinaryGiList(const string &fname, vector< TGi > &gis)
Read a binary-format GI list from a file.
void SeqDB_ReadMemoryGiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SGiOid > &gis, bool *in_order)
Read a text or binary GI list from an area of memory.
CSeqDB_Substring SeqDB_RemoveExtn(CSeqDB_Substring s)
Returns a filename minus greedy path.
bool SeqDB_CompareVolume(const string &s1, const string &s2)
Compares two volume file names and determine the volume order.
ESeqDBIdType SeqDB_SimplifySeqid(CSeq_id &bestid, const string *acc, Int8 &num_id, string &str_id, bool &simpler)
Seq-id simplification.
void SeqDB_ReadMemoryMixList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::SGiOid > &gis, vector< CSeqDBGiList::STiOid > &tis, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order)
Read an ID list (mixed type) from an area of memory.
string SeqDB_FindBlastDBPath(const string &dbname, char dbtype, string *sp, bool exact, CSeqDBAtlas &atlas)
Finds a file in the search path.
string GetBlastSeqIdString(const CSeq_id &seqid, bool version)
Return ID string as stored in lmdb.
void SeqDB_ReadMixList(const string &fname, vector< CSeqDBGiList::SGiOid > &gis, vector< CSeqDBGiList::STiOid > &tis, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order)
Read a text SeqId list from a file.
static string s_SeqDB_FindBlastDBPath(const string &dbname, char dbtype, string *sp, bool exact, CSeqDB_FileExistence &access, const string path="")
void SeqDB_SplitQuoted(const string &dbname, vector< CTempString > &dbs, bool keep_quote)
Split a (possibly) quoted list of database names into pieces.
int s_ReadDigit(const char d, const string &list_type)
static string s_SeqDB_TryPaths(const string &blast_paths, const string &dbname, char dbtype, bool exact, CSeqDB_FileExistence &access, bool linkoutdb_search=false)
Search for a file in a provided set of paths.
CSeqDB_Substring SeqDB_RemoveFileName(CSeqDB_Substring s)
Returns a path minus filename.
static string s_GetPathSplitter()
Returns the character used to seperate path components in the current operating system or platform.
void SeqDB_GetFileExtensions(bool db_is_protein, vector< string > &extn, EBlastDbVersion dbver)
Retrieves a list of all supported file extensions for BLAST databases.
bool IsStringId(const CSeq_id &id)
Determine if id is srting id.
void SeqDB_JoinDelim(string &a, const string &b, const string &delim)
Join two strings with a delimiter.
ESeqDBIdType SeqDB_SimplifyAccession(const string &acc, Int8 &num_id, string &str_id, bool &simpler)
String id simplification.
static bool s_SeqDB_ParseSeqIDs(const string &line, vector< CRef< CSeq_id > > &seqids)
Parse string into a sequence of Seq-id objects.
bool SeqDB_IsBinaryGiList(const string &fname)
Read a text or binary SeqId list from a file.
void s_InsureOrder(TVector &v)
void SeqDB_FileIntegrityAssert(const string &file, int line, const string &text)
Report file corruption by throwing an eFile CSeqDBException.
CSeqDB_Substring SeqDB_RemoveDirName(CSeqDB_Substring s)
Returns a filename minus greedy path.
void SeqDB_ReadTaxIdList(const string &fname, CSeqDBGiList::STaxIdsOids &taxids)
void SeqDB_ReadSiList(const string &fname, vector< CSeqDBGiList::SSiOid > &sis, bool *in_order, SBlastSeqIdListInfo &db_info)
Read a text SeqId list from a file.
const string SeqDB_GetOidMaskFileExt(bool db_is_protein, EOidMaskType t)
string SeqDB_ResolveDbPathForLinkoutDB(const string &filename)
Resolve a file path using SeqDB's path algorithms.
void SeqDB_ReadMemoryTaxIdList(const char *fbeginp, const char *fendp, CSeqDBGiList::STaxIdsOids &taxids)
void SeqDB_ReadMemoryTiList(const char *fbeginp, const char *fendp, vector< CSeqDBGiList::STiOid > &tis, bool *in_order)
Read a text or binary TI list from an area of memory.
void SeqDB_CombineAndQuote(const vector< string > &dbs, string &dbname)
Combine and quote list of database names.
string SeqDB_MakeOSPath(const string &dbs)
Return path with delimiters changed to platform preferred kind.
void SeqDB_ReadTiList(const string &fname, vector< CSeqDBGiList::STiOid > &tis, bool *in_order)
Read a text or binary TI list from a file.
static bool s_SeqDB_IsBinaryNumericList(const char *fbeginp, const char *fendp, bool &has_long_ids, bool *has_tis=NULL)
This function determines whether a file is a valid binary GI/TI file.
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
string SeqDB_ResolveDbPathNoExtension(const string &filename, char dbtype)
Resolve a file path using SeqDB's path algorithms.
void SeqDB_GetMetadataFileExtension(bool db_is_protein, string &extn)
bool SeqDB_SplitString(CSeqDB_Substring &buffer, CSeqDB_Substring &front, char delim)
Parse a prefix from a substring.
static bool s_ContainsBinaryNumericIdList(const string &fname, CSeqDBFileGiList::EIdType type)
static size_t s_SeqDB_EndOfFastaID(const string &str, size_t pos)
Find the end of a single element in a Seq-id set.
void SeqDB_CombinePath(const CSeqDB_Substring &one, const CSeqDB_Substring &two, const CSeqDB_Substring *extn, string &outp)
Combine a filesystem path and file name.
const string kSeqDBGroupAliasFileName("index.alx")
void SeqDB_ConvertOSPath(string &dbs)
Change path delimiters to platform preferred kind in-place.
Defines exception class and several constants for SeqDB.
EBlastDbVersion
BLAST database version.
ESeqDBIdType
Various identifier formats used in Id lookup.
@ eStringId
Each PIG identifier refers to exactly one protein sequence.
@ eTiId
Genomic ID is a relatively stable numeric identifier for sequences.
@ ePigId
Trace ID is a numeric identifier for Trace sequences.
@ eOID
Lookup from sequence hash values to OIDs.
This file defines several SeqDB utility functions related to byte order and file system portability.
void SeqDB_ThrowException(CSeqDBException::EErrCode code, const string &msg)
Thow a SeqDB exception; this is seperated into a function primarily to allow a breakpoint to be set.
T SeqDB_GetStdOrd(const T *stdord_obj)
Read a network order integer value.
void s_SeqDB_QuickAssign(string &dst, const char *bp, const char *ep)
Higher Performance String Assignment.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure that holds GI,OID pairs.
TGi gi
The GI or 0 if unknown.
int oid
The OID or -1 if unknown.
TPig pig
The PIG or 0 if unknown.
Structure that holds Seq-id,OID pairs.
string si
The String-id or "" if unknown.
vector< blastdb::TOid > oids
Structure that holds TI,OID pairs.
TTi ti
The TI or 0 if unknown.
Blast DB v5 seqid list info.
vector< CRef< CSeq_id > > TIdList
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4