* annot_name,
91 voidBeginBlob(
void);
92 voidResetBlob(
void);
95m_CurBlobPos = m_LastBlobPos + pos;
103m_IsGBBioseqSet =
value;
117 returnm_IsGBBioseqSet;
121 returnm_Manager.GetSeqAlignGroupSize();
258(*m_Ids)[it.GetSeq_id_Handle()].range.CombineWith(it.GetRange());
262(*m_Ids)[it.GetSeq_id_Handle()].range.CombineWith(it.GetRange());
285(*m_Ids)[it.GetSeq_id_Handle()].range.CombineWith(it.GetRange());
479 if(
info.GetMemberInfo()->GetId().GetName() ==
"class") {
484 else if(
info.GetMemberInfo()->GetId().GetName() ==
"descr") {
488 else if(
info.GetMemberInfo()->GetId().GetName() ==
"seq-set") {
546m_CurFileId(file_id),
552m_IsGBBioseqSet(
false)
574 switch( blob_type ) {
603 LDS2_THROW(eIndexerError,
"Unfinished blob in the data file.");
632 for(
int i= 0;
i< num_types;
i++) {
637 if(
types.size() == 1 ) {
639 for(
int i= 0;
i< num_types;
i++) {
681 "Bioseq with duplicate seq-id found: "<<
688 "Bioseqs with duplicate seq-id found: "+
758 "id", bioseq_ids_hook, objstr.get());
776id_hook, annot_desc_hook, annot_type_hook);
785 "class", bioseq_set_hook, objstr.get());
787 "descr", bioseq_set_hook, objstr.get());
789 "seq-set", bioseq_set_hook, objstr.get());
807objstr->Skip(type_info);
852 const string* annot_name,
856annot->
type= annot_type;
859annot->
name= *annot_name;
880: m_GBReleaseMode(eGB_Ignore),
881m_DupIdMode(eDuplicate_Store),
882m_ErrorMode(eError_Report),
888m_SeqAlignGroupSize(0)
939 else if( sub.
IsFile() ) {
960handler_name = url_it->second;
971 "Can not find URL handler: "+ url_it->second);
975 "Can not find URL handler: "+ url_it->second);
979 handler= h_it->second.GetPointerOrNull();
1037 if(db_info.
id!= 0) {
1041 if( file_info.
exists() ) {
1045 "Unrecognized file format: "+ *it);
1049 "Unrecognized file format: "+ *it);
1056 if(db_info.
id== 0) {
1063file_info.
id= db_info.
id;
1064 if(file_info != db_info) {
1082 "Failed to open file '"+
info.name +
"'");
1084 intparsed_entries = 0;
1085 switch(
info.format ) {
1092 while( !
in->eof() ) {
1102 "Unrecognized top level object in "+
1107 "Unrecognized top level object in "<<
1118 if(parsed_entries == 0) {
1133 while( !
lr.AtEOF() ) {
1137 if( !se->
IsSeq() ) {
1151 if( !
lr.AtEOF() ) {
1164 "Failed to parse fasta file "<<
info.name);
1168 if(parsed_entries == 0) {
1177 "Unsupported data file format: "+
info.name);
1181 "Unsupported data file format: "<<
info.name);
1186 if(parsed_entries > 0) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Checksum and hash calculation classes.
Implementation of ILineReader for IReader.
Base class for reading FASTA sequences.
EFormat
The formats are checked in the same order as declared here.
@ eBinaryASN
Binary ASN.1.
@ eGZip
GNU zip compressed file.
@ eFasta
FASTA format sequence record, CFastaReader.
static EFormat Format(const string &path, EOnError onerror=eDefault)
Guess file format.
~CLDS2_AnnotDesc_Hook(void)
virtual void SkipObject(CObjectIStream &in, const CObjectTypeInfo &info)
const string & GetName(void) const
CLDS2_AnnotDesc_Hook(void)
~CLDS2_AnnotType_Hook(void)
CLDS2_AnnotType_Hook(void)
virtual void SkipObject(CObjectIStream &in, const CObjectTypeInfo &info)
SLDS2_Annot::TIdMap * m_Ids
void ResetAnnot(SLDS2_Annot::TIdMap *ids)
const string & GetType(void) const
CLDS2_ObjectParser & m_Parser
SLDS2_Annot::TIdMap m_IdMap
CLDS2_AnnotDesc_Hook & m_DescHook
CLDS2_Annot_Hook(CLDS2_ObjectParser &parser, CLDS2_Seq_id_Hook &id_hook, CLDS2_AnnotDesc_Hook &desc_hook, CLDS2_AnnotType_Hook &type_hook)
CLDS2_Seq_id_Hook & m_IdHook
virtual void SkipObject(CObjectIStream &in, const CObjectTypeInfo &info)
CLDS2_AnnotType_Hook & m_TypeHook
CLDS2_ObjectParser & m_Parser
virtual void SkipClassMember(CObjectIStream &in, const CObjectTypeInfoMI &info)
CLDS2_Seq_id_Hook & m_IdHook
CLDS2_BioseqIds_Hook(CLDS2_ObjectParser &parser, CLDS2_Seq_id_Hook &id_hook)
virtual void SkipClassMember(CObjectIStream &in, const CObjectTypeInfoMI &info)
CLDS2_BioseqSet_Hook(CLDS2_ObjectParser &parser)
CLDS2_ObjectParser & m_Parser
CBioseq_set::EClass m_Class
Int8 AddAnnot(SLDS2_Annot &annot)
Add annotation, return the new annot id.
Int8 AddBlob(Int8 file_id, SLDS2_Blob::EBlobType blob_type, Int8 file_pos)
Add blob, return the new blob id.
void GetFileNames(TStringSet &files) const
Get all known file names.
void Open(EAccessMode mode=eWrite)
Open LDS2 database. If the database does not exist, throws exception.
void AddFile(SLDS2_File &info)
Add new file record. On success file_info.id is not zero.
void BeginUpdate(void)
Start update transaction.
const string & GetDbFile(void) const
Get database file name.
vector< AutoPtr< SLDS2_Annot > > TLDS2Annots
void DeleteFile(const string &file_name)
Delete file and all related entries from the database.
void UpdateFile(SLDS2_File &info)
Update info for the known file. The 'id' of the info will change.
Int8 GetBioseqId(const CSeq_id_Handle &idh) const
Check if the db contains a bioseq with the given id.
void Create(void)
Create the database.
void EndUpdate(void)
End update transaction, commit the changes.
SLDS2_File GetFileInfo(const string &file_name) const
Get complete file info.
Int8 AddBioseq(Int8 blob_id, const TSeqIdSet &ids)
Add bioseq, return the new bioseq id.
Class for managing LDS2 database and related data files.
@ eError_Throw
Throw exceptions on errors.
@ eError_Report
Print error messages, but do not fail (default).
SLDS2_File x_GetFileInfo(const string &file_name, CRef< CLDS2_UrlHandler_Base > &handler)
THandlersByUrl m_HandlersByUrl
@ eGB_Force
Split all top-level bioseq-sets into seq-entries.
@ eGB_Ignore
Do not split bioseq-sets (default)
void AddDataDir(const string &data_dir, EDirMode mode=eDir_Recurse)
Add data directory.
void RegisterUrlHandler(CLDS2_UrlHandler_Base *handler)
Register a URL handler.
EDuplicateIdMode GetDuplicateIdMode(void) const
CLDS2_Manager(const string &db_file)
Create LDS2 manager for the specified db file.
void ResetData(void)
Remove all data from the database.
virtual ~CLDS2_Manager(void)
CLDS2_UrlHandler_Base * x_GetUrlHandler(const SLDS2_File &file_info)
void AddDataFile(const string &data_file)
Add new data file to the list.
void UpdateData(void)
Rescan all indexed files, check for modifications, update the database.
void AddDataUrl(const string &url, const string &handler_name)
Add a URL.
bool x_IsGZipFile(const SLDS2_File &file_info)
EDirMode
Directory parsing mode while indexing files.
@ eDir_Recurse
Automatically scan sub-directories (default).
CFastaReader::TFlags m_FastaFlags
CRef< CLDS2_Database > m_Db
void x_ParseFile(const SLDS2_File &info, CLDS2_UrlHandler_Base &handler)
void SetDbFile(const string &db_file)
Select new database.
@ eDuplicate_Skip
Ignore bioseqs with duplicate ids, store just the first one.
@ eDuplicate_Store
Store all bioseqs regardless of seq-id conflicts (defalut).
CLDS2_ObjectParser(CLDS2_Manager &mgr, Int8 file_id, TFormat format, CNcbiIstream &in, CLDS2_Database &db)
void SetBlobOffset(Int8 pos)
int GetSeqAlignGroupSize(void) const
vector< AutoPtr< SBioseqInfo > > TBioseqs
SLDS2_Blob::EBlobType m_LastBlobType
void AddBioseq(const TSeqIdSet &ids)
SLDS2_Blob::EBlobType x_GetBlobType(void)
void SetGBBioseqSet(bool value)
CLDS2_Database::TLDS2Annots TAnnots
CLDS2_Manager & m_Manager
ESerialDataFormat m_Format
static TTypeInfo sx_GetObjectTypeInfo(SLDS2_Blob::EBlobType blob_type)
bool ParseNext(SLDS2_Blob::EBlobType blob_type=SLDS2_Blob::eUnknown)
SLDS2_Blob::EBlobType m_BlobType
void EndBlob(SLDS2_Blob::EBlobType blob_type)
bool GetSplitBioseqSet(void) const
~CLDS2_ObjectParser(void)
void AddAnnot(SLDS2_Annot::EType annot_type, const string *annot_name, const SLDS2_Annot::TIdMap &ids)
SLDS2_File::TFormat TFormat
virtual void SkipObject(CObjectIStream &in, const CObjectTypeInfo &info)
CLDS2_ObjectParser & m_Parser
CLDS2_SeqEntry_Hook(CLDS2_ObjectParser &parser)
void operator=(const CGuard &)
CLDS2_Seq_id_Hook & m_Hook
CGuard(CLDS2_Seq_id_Hook &hook, TSeqIdSet &ids)
virtual void SkipObject(CObjectIStream &in, const CObjectTypeInfo &info)
stack< TSeqIdSet * > TIdStack
void PushSet(TSeqIdSet &ids)
Base class for URL handler.
Default handler for local files - registered automatically by LDS2 manager and data loader.
static const string s_GetHandlerName(void)
Handler for GZip local files. Not registered by default.
static const string s_GetHandlerName(void)
Helper class: installs hooks in constructor, and uninstalls in destructor.
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
namespace ncbi::objects::
Root class for all serialization exceptions.
Skip hook for data member of a containing object (eg, SEQUENCE)
Skip hook for a standalone object.
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
void(*)(CSeq_entry_Handle seh, IWorkbench *wb, const CSerialObject &obj) handler
static const struct type types[]
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
void Error(CExceptionArgs_Base &args)
void Warning(CExceptionArgs_Base &args)
TEntries GetEntries(const string &mask=kEmptyStr, TGetEntriesFlags flags=0) const
Get directory entries based on the specified "mask".
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
virtual bool Exists(void) const
Check the entry existence.
bool IsDir(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a directory.
bool IsFile(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a file.
const string & GetPath(void) const
Get entry path.
@ fIgnoreRecursive
Suppress "self recursive" elements (the directories "." and "..").
ESerialDataFormat
Data file format.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
static TTypeInfo GetTypeInfo(void)
void DefaultSkip(CObjectIStream &stream, const CObjectTypeInfoMI &member)
pair< TObjectPtr, TTypeInfo > ObjectInfo(C &obj)
const CEnumeratedTypeValues & GetEnumeratedTypeValues(void) const
Get a set of possible values of enumeration.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
void DefaultRead(CObjectIStream &in, const CObjectInfo &object)
Default read.
void DefaultSkip(CObjectIStream &in, const CObjectTypeInfo &type)
Default skip.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
int64_t Int8
8-byte (64-bit) signed integer
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
Int8 NcbiStreamposToInt8(NCBI_NS_STD::char_traits< char >::pos_type stream_pos)
Convert stream position to 64-bit int.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
TDim GetDim(void) const
Get the Dim member data.
bool IsSetDim(void) const
dimensionality Check if a value has been assigned to Dim data member.
const TLocation & GetLocation(void) const
Get the Location member data.
const TProduct & GetProduct(void) const
Get the Product member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
const TLoc & GetLoc(void) const
Get the Loc member data.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
@ eClass_genbank
converted genbank
const TId & GetId(void) const
Get the Id member data.
list< CRef< CSeq_id > > TId
const TName & GetName(void) const
Get the variant data.
bool IsName(void) const
Check if variant Name is selected.
The blob sat and sat key Both must be positive integers</td > n< td > Non empty string The interpretation of the blob id depends on a processor Cassandra n processor expects the following format
static bool IsSupportedFormat(CFormatGuess::EFormat format)
static const SLDS2_Blob::EBlobType kExpectedBlobTypes[]
#define LDS2_THROW(errcode, message)
const GenericPointer< typename T::ValueType > T2 value
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)
static SLJIT_INLINE sljit_ins lr(sljit_gpr dst, sljit_gpr src)
#define row(bind, expected)
Info about seq-id used in an annotation.
EBlobType
Top-level object types.
@ eBioseq_set_element
Used for indexing individual seq-entries from a top-level bioseq-set.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4