: m_Out(
out), m_FmtSpec(format_spec), m_BlastDb(blastdb),
51m_DataExtractor(blastdb,
61 if(
config.m_FiltAlgoId >= 0 ||
config.m_FmtAlgoId >= 0) {
63 if(
config.m_FiltAlgoId >= 0)
64algo_ids.push_back(
config.m_FiltAlgoId);
65 if(
config.m_FmtAlgoId >= 0)
66algo_ids.push_back(
config.m_FmtAlgoId);
67vector<int> invalid_algo_ids =
69 if( !invalid_algo_ids.empty()) {
71 "Invalid filtering algorithm ID.");
91 "Invalid format specification");
213os <<
"Unrecognized format specification: '%"<< *fmt <<
"'";
229vector<string> data2write;
238 return(*desc)->GetTitle();
246 static const stringkTarget(
" >gi|");
247 static const stringkCtrlA =
string(1,
'\001') +
string(
"gi|");
260 if(
id.IsGi() ||
id.IsPrf() ||
id.IsPir()) {
261retval =
id.AsFastaString();
264retval =
id.GetSeqIdString(
true);
276 boollong_seqids =
false;
280long_seqids = (
registry.
Get(
"BLAST",
"LONG_SEQID") ==
"1");
286 if(bioseq.
Empty()) {
298 stringlcl_tmp =
id->AsFastaString();
299lcl_tmp = lcl_tmp.erase(0,4);
304 else if(long_seqids) {
306 if(
config.m_UseCtrlA) {
309fasta.
Write(*bioseq, 0,
true);
313 stringseparator =
config.m_UseCtrlA ?
"\001":
" >";
316 id = FindBestChoice(bioseq->GetId(), CSeq_id::Score); 317 m_Out << GetBareId(*id); 319 string title = s_GetTitle(bioseq); 321 if (!title.empty()) { 324 NStr::ReplaceInPlace(title, " >", "\001"); 326 vector<string> tokens; 327 NStr::Split(title, "\001", tokens); 328 auto it = tokens.begin(); 331 for (; it != tokens.end(); ++it) { 332 size_t pos = it->find (" "); 333 string str_id(*it, 0, pos != NPOS ? pos : it->length()); 334 list< CRef<CSeq_id> > seqids; 335 CSeq_id::ParseFastaIds(seqids, str_id); 337 // no valid sequence ids indicates that '>
' was within the 339 if (seqids.empty()) { 340 m_Out << " >" << *it; 344 id = FindBestChoice(seqids, CSeq_id::Score); 345 m_Out << GetBareId(*id); 347 m_Out << it->substr(pos, it->length() - pos); 353 CScope scope(*CObjectManager::GetInstance()); 354 fasta.WriteSequence(scope.AddBioseq(*bioseq)); 362SIZE_TYPE operator() (SIZE_TYPE a, const string& b) const {
368CSeqFormatter::x_Replacer(const vector<string>& data2write) const
370SIZE_TYPE data2write_size = accumulate(data2write.begin(), data2write.end(),
374retval.reserve(m_FmtSpec.size() + data2write_size -
375(m_ReplTypes.size() * 2));
377SIZE_TYPE fmt_idx = 0;
378for (SIZE_TYPE i = 0, kSize = m_ReplOffsets.size(); i < kSize; i++) {
379retval.append(&m_FmtSpec[fmt_idx], &m_FmtSpec[m_ReplOffsets[i]]);
380retval.append(data2write[i]);
381fmt_idx = m_ReplOffsets[i] + 2;
383if (fmt_idx <= m_FmtSpec.size()) {
384retval.append(&m_FmtSpec[fmt_idx], &m_FmtSpec[m_FmtSpec.size()]);
390void CSeqFormatter::SetConfig(TSeqRange range, objects::ENa_strand strand,
393m_DataExtractor.SetConfig(range, strand, filt_algo_id);
Definition of a customizable sequence writer interface.
Encapsulates identifier to retrieve data from a BLAST database.
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Defines invalid user input exceptions.
static CNcbiApplication * Instance(void)
Singleton method.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
vector< int > ValidateMaskAlgorithms(const vector< int > &algorithm_ids)
Validates the algorithm IDs passed to this function, returning a vector of those algorithm IDs not pr...
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
void Write(CBlastDBSeqId &id)
Write the sequence data associated with the requested ID in the format specified in the constructor.
bool x_RequireData() const
Specifies whether or not data (e.g., Bioseq) is required.
CSeqFormatter(const string &fmt_spec, CSeqDB &blastdb, CNcbiOstream &out, CSeqFormatterConfig config=CSeqFormatterConfig())
Constructor.
vector< char > m_ReplTypes
Vector of convertor objects.
string x_Replacer(const vector< string > &data2write) const
Replace format specifiers for the data contained in data2write.
CNcbiOstream & m_Out
Stream to write output.
CBlastDBExtractor m_DataExtractor
Data extractor.
void DumpAll(CSeqDB &blastdb, CSeqFormatterConfig config=CSeqFormatterConfig())
Full database FASTA dump This is an optimized version that does not support range and mask retrieval.
void x_Builder(vector< string > &data2write)
Build data for write.
string m_FmtSpec
The output format specification.
vector< SIZE_TYPE > m_ReplOffsets
Vector of offsets where the replacements will take place.
CSeqDB & m_BlastDb
The BLAST database from which to extract data.
bool m_Fasta
Fasta output?
std::ofstream out("events_result.xml")
main entry point for tests
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
void SetWidth(TSeqPos width)
virtual void WriteSequence(const CBioseq_Handle &handle, const CSeq_loc *location=0, CSeq_loc::EOpFlags merge_flags=CSeq_loc::fMerge_AbuttingOnly)
void SetAllFlags(TFlags flags)
@ fNoExpensiveOps
don't try too hard to find titles
@ fKeepGTSigns
don't convert '>' to '_' in title
@ fEnableGI
Use this flag to enable GI output in the defline.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
const TDb & GetDb(void) const
Get the Db member data.
bool IsGeneral(void) const
Check if variant General is selected.
bool IsLocal(void) const
Check if variant Local is selected.
const TGeneral & GetGeneral(void) const
Get the variant data.
list< CRef< CSeqdesc > > Tdata
const TId & GetId(void) const
Get the Id member data.
const Tdata & Get(void) const
Get the member data.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
@ e_Title
a title for this sequence
static void s_ReplaceCtrlAsInTitle(CRef< CBioseq > bioseq)
string GetBareId(const CSeq_id &id)
static string s_GetTitle(CConstRef< CBioseq > bioseq)
Configuration object for CSeqFormatter.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4