( volset, filters, gi_list, neg_list, locked, lmdb_set);
104 if(
x_ComputeFilters(volset, filters, lmdb_set, f_bits, gi_list, neg_list)) {
134 intvol_end = vol.
OIDEnd();
141 intvols =
static_cast<int>(ft->
GetVolumes().size());
156 for(
intj = 1; j < vols; j++) {
175volume_map->
UnionWith(*sub_bits,
true);
230 switch(
mask.GetType()) {
280 CFilecheck_file(path_str);
281 if(!check_file.
Exists()) {
316 for(j = 0; j < gis.
GetNumGis(); j++) {
319gilist_oids->
SetBit(oid);
325 for(j = 0; j < gis.
GetNumSis(); j++) {
328gilist_oids->
SetBit(oid);
334 for(j = 0; j < gis.
GetNumTis(); j++) {
337gilist_oids->
SetBit(oid);
346gilist_oids->
SetBit(oid);
353 if(oids_tax.size()) {
355 for(
unsigned intk = 0; k < oids_tax.size(); k++) {
357taxlist_oids->
SetBit(oids_tax[k]);
370 constvector<blastdb::TOid> & excluded_oids = nlist.
GetExcludedOids();
371 for(
unsigned int i=0;
i< excluded_oids.size();
i++) {
396 for(
intoid = 0; oid <
max; oid++) {
421 for(
int i= 0;
i< num_gis;
i++) {
424 if(oid != prev_oid) {
425 if((oid >= oid_start) && (oid < oid_end)) {
432 for(
int i= 0;
i< num_tis;
i++) {
435 if(oid != prev_oid) {
436 if((oid >= oid_start) && (oid < oid_end)) {
443 for(
int i= 0;
i< num_sis;
i++) {
446 if(oid != prev_oid) {
447 if((oid >= oid_start) && (oid < oid_end)) {
496bitend = bitmap + (((num_oids + 31) / 32) * 4);
503 for(
size_toid = vol_end; bitset->
CheckOrFindBit(oid); oid++) {
521vector<const CSeqDBVolEntry * > & excluded_vols,
525vector<bool> vol_included(num_vol,
false);
526excluded_vols.clear();
527 for(
unsigned int i=0;
i< num_vol;
i++) {
529 if(std::find(vol_basenames.begin(), vol_basenames.end(), vol->
GetVolName()) != vol_basenames.end()) {
540 for(
unsigned int i= 0;
i< excluded_vols.size();
i++) {
549 void s_AddFilterFile(
string& name,
const string& vn, vector<string> & fnames, vector<vector<string> > & fnames_vols)
552 for(; j < fnames.size(); j++) {
553 if(fnames[j] == name) {
554fnames_vols[j].push_back(vn);
558 if( fnames.size() == j) {
559vector<string> p(1,vn);
560fnames.push_back(name);
561fnames_vols.push_back(p);
572 if(seq_id1.
Match(seq_id2)) {
579vector<vector<string> > & fnames_vols,
586 if(fnames.size() == 0) {
589vector<string> user_accs;
594vector<string> neg_user_accs;
595 if((!neg_user_list.
Empty()) && (neg_user_list->
GetNumSis() > 0)) {
596neg_user_accs = neg_user_list->
GetSiList();
597 sort(neg_user_accs.begin(), neg_user_accs.end());
600 for(
unsigned intk=0; k < fnames.size(); k++) {
601vector<const CSeqDBVolEntry * > excluded_vols;
602vector<blastdb::TOid> oids;
607 if(accs.size() == 0){
610 if((user_accs.size() > 0) || (neg_user_accs.size() > 0)){
612 if(user_accs.size() > 0) {
613vector<string> common;
614common.resize(accs.size());
615vector<string>::iterator itr = set_intersection(accs.begin(), accs.end(),
616user_accs.begin(), user_accs.end(), common.begin(),
s_CompareSeqId);
617common.resize(itr-common.begin());
618 if(common.size() == 0){
623 if(neg_user_accs.size() > 0) {
624vector<string> difference;
625difference.resize(accs.size());
626vector<string>::iterator itr = set_difference(accs.begin(), accs.end(),
627neg_user_accs.begin(), neg_user_accs.end(), difference.begin(),
s_CompareSeqId);
628difference.resize(itr-difference.begin());
629 if(difference.size() == 0){
632 swap(accs, difference);
637 for(
unsigned int i=0;
i< accs.size();
i++) {
641 if(excluded_vols.size() != 0) {
646filter_bit.
SetBit(oids[
i]);
652vector<vector<string> > & fnames_vols,
659 if(fnames.size() == 0) {
672 for(
unsigned intk=0; k < fnames.size(); k++) {
673vector<const CSeqDBVolEntry * > excluded_vols;
674vector<blastdb::TOid> oids;
679 if(taxids.
size() == 0){
682 if(user_taxids.
size() > 0){
683vector<TTaxId> common;
684common.resize(taxids.
size());
685vector<TTaxId>::iterator itr = set_intersection(taxids.
begin(), taxids.
end(),
686user_taxids.
begin(), user_taxids.
end(), common.begin());
687common.resize(itr-common.begin());
688 if( common.size() == 0) {
692taxids.
insert(common.begin(), common.end());
694 if(neg_user_taxids.
size() > 0) {
695vector<TTaxId> difference;
696difference.resize(taxids.
size());
697vector<TTaxId>::iterator itr = set_difference(taxids.
begin(), taxids.
end(),
698neg_user_taxids.
begin(), neg_user_taxids.
end(), difference.begin());
699difference.resize(itr-difference.begin());
700 if(difference.size() == 0){
704taxids.
insert(difference.begin(), difference.end());
708 for(
unsigned int i=0;
i< oids.size();
i++) {
709 if(excluded_vols.size() != 0) {
714filter_bit.
SetBit(oids[
i]);
727vector<string> seqid_fnames;
728vector<string> taxid_fnames;
729vector< vector<string> > seqid_fnames_vols;
730vector< vector<string> > taxid_fnames_vols;
739 stringname = (*itr)->GetPath().GetPathS();
751 if(seqid_fnames.size() > 0) {
753lmdb_set, volset, filter_bit);
755 if(taxid_fnames.size() > 0) {
757lmdb_set, volset, filter_bit);
760 return((seqid_fnames.size() + taxid_fnames.size()) > 0 ?
true:
false);
ncbi::TMaskedQueryRegions mask
void SetFrame(const string &frame)
void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)
const char * GetFileDataPtr(const string &fname, TIndx offset)
Get a pointer to the specified offset.
void Init(const string &filename)
Initializes a memory map object.
TListRef GetNodeIdList(const CSeqDB_Path &filename, const CSeqDBVol *volp, EGiListType list_type, CSeqDBLockHold &locked)
Get a reference to a named GI list.
int GetNumGis() const
Get the number of GIs in the array.
const SGiOid & GetGiOid(int index) const
Access an element of the array.
int GetNumSis() const
Get the number of Seq-ids in the array.
const SPigOid & GetPigOid(int index) const
int GetNumTis() const
Get the number of TIs in the array.
const SSiOid & GetSiOid(int index) const
Access an element of the array.
void GetSiList(vector< string > &sis) const
TODO Get the seqid list?
const vector< blastdb::TOid > & GetOidsForTaxIdsList()
set< TTaxId > & GetTaxIdsList()
const STiOid & GetTiOid(int index) const
Access an element of the array.
bool Empty() const
Return false if there are elements present.
void AccessionsToOids(const vector< string > &accs, vector< TOid > &oids) const
bool IsBlastDBVersion5() const
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
const vector< string > & GetSiList()
int GetNumTis() const
Get the number of TIs in the array.
bool GetOidStatus(int oid)
Get the inclusion status of an OID.
int GetNumGis() const
Get the number of GIs in the array.
const vector< blastdb::TOid > & GetExcludedOids()
int GetNumSis() const
Get the number of SeqIds in the array.
set< TTaxId > & GetTaxIdsList()
int GetNumOids()
Get the size of the OID array.
CSeqDBOIDList(CSeqDBAtlas &atlas, const CSeqDBVolSet &volumes, CSeqDB_FilterTree &filters, CRef< CSeqDBGiList > &gi_list, CRef< CSeqDBNegativeList > &neg_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
Constructor.
CRef< CSeqDB_BitSet > x_IdsToBitSet(const CSeqDBGiList &ids, int vol_start, int vol_end)
Load an ID (GI or TI) list file into a bitset object.
bool x_IsSet(TOID oid) const
Check if a bit is set.
void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Dump debug information for this object.
void x_Setup(const CSeqDBVolSet &volset, CSeqDB_FilterTree &filters, CRef< CSeqDBGiList > &gi_list, CRef< CSeqDBNegativeList > &neg_list, CSeqDBLockHold &locked, const CSeqDBLMDBSet &lmdb_set)
Build an oid mask in memory.
void x_ClearBitRange(int oid_start, int oid_end)
Clear all bits in a range.
CRef< CSeqDB_BitSet > x_GetOidMask(const CSeqDB_Path &fn, int vol_start, int vol_end)
Load the named OID mask file into a bitset object.
void x_ApplyUserGiList(CSeqDBGiList &gis)
Apply a user GI list to a volume.
void x_ApplyNegativeList(CSeqDBNegativeList &neg, bool is_v5)
Apply a negative user GI list to a volume.
~CSeqDBOIDList()
Destructor.
CSeqDBAtlas & m_Atlas
The memory management layer object.
CRef< CSeqDB_BitSet > x_ComputeFilters(const CSeqDB_FilterTree &ft, const CSeqDBVolEntry &vol, CSeqDBGiListSet &gis, CSeqDBLockHold &locked, bool isBlastDBv5)
Compute the oid mask bitset for a database volume.
const unsigned char TCUC
Shorthand type to clarify code that iterates over memory.
CRef< CSeqDB_BitSet > m_AllBits
An OID bit set covering all volumes.
int m_NumOIDs
The total number of OIDs represented in the bit set.
TIndx GetFileLength() const
Get the length of the file.
TIndx ReadSwapped(CSeqDBFileMemMap &lease, TIndx offset, Uint4 *value) const
Read a four byte numerical object from the file.
bool Open(const CSeqDB_Path &name)
MMap or Open a file.
const char * GetFileDataPtr(CSeqDBFileMemMap &lease, TIndx start, TIndx end) const
Get a pointer to a section of the file.
int OIDStart() const
Get the starting OID in this volume's range.
int OIDEnd() const
Get the ending OID in this volume's range.
CSeqDBVol * Vol()
Get a pointer to the underlying volume object.
const CSeqDBVolEntry * GetVolEntry(int i) const
Find a volume entry by index.
const CSeqDBVol * GetVol(int i) const
Find a volume by index.
int GetNumVols() const
Get the number of volumes.
int GetNumOIDs() const
Get the size of the OID range.
const string & GetVolName() const
Get the volume name.
void SetOidMaskType(int oid_masks) const
void AttachVolumeGiList(CRef< CSeqDBGiList > gilist) const
Filter this volume using the specified GI list.
char GetSeqType() const
Get the sequence type stored in this database.
void SetMemBit(int mbit) const
Set the MEMB_BIT fitlering for this volume.
Something else yet again etc.
@ eTaxIdList
Taxonomay Id List.
@ eOidRange
OID Range [start, end).
@ eAllSet
All OIDs are set.
@ eAllClear
All OIDs are clear.
void IntersectWith(CSeqDB_BitSet &other, bool consume)
This bitset is assigned to the intersection of it and another.
void UnionWith(CSeqDB_BitSet &other, bool consume)
This bitset is assigned to the union of it and another.
void Normalize()
If this is a special case bitset, convert it to a normal one.
void ClearBit(size_t index)
Clear the specified bit (to false).
bool CheckOrFindBit(size_t &index) const
Check if a bit is true or find the next bit that is.
void AssignBitRange(size_t start, size_t end, bool value)
Store the provided value in a range of bits.
void SetBit(size_t index)
Set the specified bit (to true).
Tree of nodes describing filtering of database sequences.
bool HasFilter() const
Check whether this tree represents any volume filtering.
const vector< CRef< CSeqDB_FilterTree > > & GetNodes() const
Get child nodes attached to this node.
vector< CRef< CSeqDB_AliasMask > > TFilters
Type used to store lists of filters found here.
const TFilters & GetFilters() const
Get filters from this node.
const vector< CSeqDB_BasePath > & GetVolumes() const
Get volumes attached to this node.
CRef< CSeqDB_FilterTree > Specialize(string volname) const
Specialized this tree for the indicated volume.
const string & GetPathS() const
Get the path as a string.
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator end() const
The NCBI C++ standard methods for dealing with std::string.
static unsigned char depth[2 *(256+1+29)+1]
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
virtual bool Exists(void) const
Check existence of file.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
constexpr auto sort(_Init &&init)
const blastdb::TOid kSeqDBEntryNotFound
Int4 TOid
Ordinal ID in BLAST databases.
const string SeqDB_GetOidMaskFileExt(bool db_is_protein, EOidMaskType t)
File access objects for CSeqDB.
Implementation for some assorted ID list filtering code.
void s_AddFilterFile(string &name, const string &vn, vector< string > &fnames, vector< vector< string > > &fnames_vols)
void s_GetFilteredOidRange(const CSeqDBVolSet &volset, const vector< string > &vol_basenames, vector< const CSeqDBVolEntry * > &excluded_vols, CRef< CSeqDBGiList > &si_list)
void s_ProcessTaxIdFilters(const vector< string > &fnames, vector< vector< string > > &fnames_vols, CRef< CSeqDBGiList > user_list, CRef< CSeqDBNegativeList > neg_user_list, const CSeqDBLMDBSet &lmdb_set, const CSeqDBVolSet &volset, CSeqDB_BitSet &filter_bit)
void s_ProcessSeqIdFilters(const vector< string > &fnames, vector< vector< string > > &fnames_vols, CRef< CSeqDBGiList > user_list, CRef< CSeqDBNegativeList > neg_user_list, const CSeqDBLMDBSet &lmdb_set, const CSeqDBVolSet &volset, CSeqDB_BitSet &filter_bit)
bool s_IsOidInFilteredVol(blastdb::TOid oid, vector< const CSeqDBVolEntry * > &excluded_vols)
bool s_CompareSeqId(const string &id1, const string &id2)
The SeqDB oid filtering layer.
static const sljit_gpr r2
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4