= { {
"Strong",0}, {
"Moderate",1}, {
"Weak",2}, {
"Suspect",3},{
"Absent",4} };
62 map<int, string> match_type_strs= { {0,
"Strong"}, {1,
"Moderate"}, {2,
"Weak"}, {3,
"Suspect"},{4,
"Absent"} };
67 autobar_p = in_str.find(
"|");
68 autocolon_p = in_str.find(
":");
70 if(bar_p ==
NPOS&& colon_p ==
NPOS)
73 automin_p =
min(bar_p, colon_p);
75 returnin_str.substr(min_p+1,
NPOS);
80 stringnew_str = in_str;
81 while(!new_str.empty()) {
82out_strs.push_back(new_str);
89list<string> fasta_sub_strs;
91 ITERATE(list<string>, sub_str_iter, fasta_sub_strs) {
94sub_id->
Set(*sub_str_iter);
95out_ids.push_back(sub_id);
109vector<string> title_tokens;
112 if(title_tokens.empty()){
115id_token = title_tokens[0];
120 if(obj_id.
IsStr())
121id_token = obj_id.
GetStr();
126obj_id->
SetStr(id_token);
129retval->
Assign(*sid_in);
140list<CRef<CSeq_id> > out_list;
152out_list.push_back(next_id);
155vector<CConstRef<CSeq_id> > idl_copy;
158idl_copy.push_back( cc);
160list<CRef<objects::CSeq_id> > next_seqid_list;
166out_list.push_back(next_id);
170 stringother_seq_id_str;
172 if(title.find(
"|") !=
NPOS) {
173 size_tpos = title.find(
" ");
174title = title.substr(pos+1, title.length()-(pos+1));
184list<CRef<CSeq_id> > id_list;
185 boolhas_range=
false;
186 TSeqPosrange_start=0, range_stop=0;
193id_list, has_range, range_start, range_stop, titles,
196out_list.push_back(*ii);
198 if(!titles.empty() && title.empty()) {
199title = titles[0].m_sLineText;
208out_list.push_back(next_id);
216 const string& db,
TSeqPosterminal_flexibility)
217: m_SeqLoc(seq_loc),
m_Scope(scope), m_DB(db),
218m_TerminalFlexibility(terminal_flexibility), m_Vecscreen(0)
241 CLocalBlastblaster(query_factory, opts, target_db);
287 string& qid,
string& qtitle,
288 string& sid,
string& stitle)
303list<CRef<CSeq_id> > qidl, sidl;
325 const boolkPrintAlignments =
static_cast<bool>(m_Outfmt == eShowAlignments);
326 const boolkPrintBlastTab =
static_cast<bool>(m_Outfmt == eBlastTab);
327 const boolkPrintJson =
static_cast<bool>(m_Outfmt == eJson);
328 const boolkPrintAsnText =
static_cast<bool>(m_Outfmt == eAsnText || m_Outfmt == eAsnTextNoProcess);
330 const stringcustom_output = ( kPrintBlastTab ?
"qaccver qstart qend saccver salltitles ":
"");
331 const boolkBelieveQuery(
false);
332 const boolkShowGi(
false);
336 const intkNumDescriptions(0);
337 const intkNumAlignments(50);
338 const boolkIsTabular(
false);
341kBelieveQuery,
out, kNumDescriptions,
343kShowGi, m_HtmlOutput,
346 if(!kPrintBlastTab && !kPrintJson && !kPrintAsnText ) {
350list<SVecscreenSummary> match_list = m_Screener.GetList();
352 if(kPrintBlastTab || kPrintJson || kPrintAsnText) {
358 if(m_Outfmt == eAsnTextNoProcess) {
360alignments = ((*result_set)[0]).SetSeqAlign().GetPointer();
363 if(m_Outfmt != eAsnTextNoProcess) {
364 ITERATE(list<SVecscreenSummary>, mi, match_list) {
366 if(mi->range.IntersectionWith((*align_iter)->GetSeqRange(0))==mi->range ||
367mi->range.IntersectionWith((*align_iter)->GetSeqRange(0))==(*align_iter)->GetSeqRange(0)) {
368 if(mi->seqid->Equals( (*align_iter)->GetSeq_id(0))) {
369(*align_iter)->SetNamedScore(
"match_type",
match_type_ints[mi->match_type]);
380 stringqid, qtitle, sid, stitle;
381x_GetIdsAndTitlesForSeqAlign(**align_iter, qid, qtitle, sid, stitle);
383fixed->
Assign(**align_iter);
385fixed->
SetSegs().SetDenseg().SetIds()[0]->Set(qid);
387fixed->
SetSegs().SetDenseg().SetIds()[0]->Set(
"lcl|"+qid);
390fixed->
SetSegs().SetDenseg().SetIds()[1]->Set(sid);
392fixed->
SetSegs().SetDenseg().SetIds()[1]->Set(
"lcl|"+sid);
394fixed_ids.
Set().push_back(fixed);
396(*result_set)[0].SetSeqAlign()->Assign(fixed_ids);
398m_Screener.m_Queries);
400 else if(kPrintBlastTab) {
401 if(!match_list.empty()) {
402 out<<
"#qid\tqstart\tqend\tmatch_strength\tdrop_count\tsid\tstitle"<< endl;
403 ITERATE(list<SVecscreenSummary>, mi, match_list) {
404 stringqtitle=
"", stitle=
"";
405 stringqid=
"", sid=
"";
407 if(!mi->aligns.empty()) {
408x_GetIdsAndTitlesForSeqAlign(*mi->aligns.front(), qid, qtitle, sid, stitle);
410qid = mi->seqid->GetSeqIdString(
true);
415qid=sid=qtitle=stitle=
"";
420x_GetIdsAndTitlesForSeqAlign(align, qid, qtitle, sid, stitle);
426<< mi->drops.
size() <<
"\t" 452}
else if(kPrintJson) {
457 stringqtitle=
"", stitle=
"";
458 stringqid=
"", sid=
"";
460 stringlast_qid =
"";
462 ITERATE(list<SVecscreenSummary>, mi, match_list) {
465qid=sid=qtitle=stitle=
"";
470x_GetIdsAndTitlesForSeqAlign(align, qid, qtitle, sid, stitle);
474jobj.
insert(
"query_id", qid);
479jobj.
insert(
"drop_count", mi->drops.size());
480jobj.
insert(
"subject_id", sid);
481jobj.
insert(
"subject_title", stitle);
487 if(align_count > 0) {
488top_obj.
insert(
"query_id", last_qid);
498 if(kPrintAlignments ==
false) {
500 m_Scope.GetBioseqHandle(*m_Screener.m_SeqLoc->GetId(),
503 stringmessage =
"Failed to resolve SeqId: "+m_Screener.m_SeqLoc->GetId()->AsFastaString();
508CBlastFormatUtil::AcknowledgeBlastQuery(*bioseq,
511m_HtmlOutput, kIsTabular);
514m_Screener.m_Vecscreen->VecscreenPrint(
out);
515 if(match_list.empty() && !kPrintAlignments) {
516 out<<
"<b>***** No hits found *****</b><br>\n";
519 if(match_list.empty() && !kPrintAlignments) {
520 out<<
"No hits found\n";
523 typedefpair<string, string> TLabels;
524vector<TLabels> match_labels;
525match_labels.push_back(TLabels(
"Strong",
"Strong match"));
526match_labels.push_back(TLabels(
"Moderate",
"Moderate match"));
527match_labels.push_back(TLabels(
"Weak",
"Weak match"));
528match_labels.push_back(TLabels(
"Suspect",
"Suspect origin"));
531list<SVecscreenSummary>::iterator boundary, itr;
532boundary = stable_partition(match_list.begin(), match_list.end(),
534 if(boundary != match_list.begin()) {
536 for(itr = match_list.begin(); itr != boundary; ++itr) {
537 out<< itr->range.GetFrom()+1 <<
"\t" 538<< itr->range.GetTo()+1 <<
"\n";
540match_list.erase(match_list.begin(), boundary);
547 if(kPrintAlignments) {
551m_Screener.m_Queries);
556list<CVecscreenRun::SVecscreenSummary>
560list<CVecscreenRun::SVecscreenSummary> retval;
563list<CVecscreen::AlnInfo> aln_info;
564 ITERATE(list<CVecscreen::AlnInfo*>, ai, *aln_info_ptr) {
569aln_info.push_back(align_info);
573 ITERATE(list<CVecscreen::AlnInfo>, ai, aln_info) {
576summary.
range= ai->range;
578summary.
aligns= ai->get_aligns();
579summary.
drops= ai->align_drops;
580retval.push_back(summary);
static CRef< CScope > m_Scope
Produce formatted blast output for command line applications.
Declares class to display one-line descriptions at the top of the BLAST report.
#define BLAST_DEFAULT_MATRIX
Default matrix name: BLOSUM62.
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
static string GetTitle(const objects::CBioseq_Handle &bh)
This class formats the BLAST results for command line applications.
void PrintOneResultSet(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds(), bool is_deltablast_domain_result=false)
Print all alignment information for a single query sequence along with any errors or warnings (errors...
void PrintEpilog(const blast::CBlastOptions &options)
Print the footer of the blast report.
@ kFormatLineLength
The line length of pairwise blast output.
void PrintProlog()
Print the header of the blast report.
Class for computing sequences' titles ("definitions").
vector< SLineTextAndLoc > TSeqTitles
EOutputFormat
Defines the output formats supported by our command line formatter.
@ ePairwise
Standard pairwise alignments.
@ eTabular
Tabular output.
@ eAsnText
ASN.1 text output.
CJson_Object push_back_object(void)
Add object type element to the end of the array.
bool Write(std::ostream &out, TJson_Write_Flags flags=fJson_Write_IndentWithSpace, unsigned int indent_char_count=4) const
Write JSON data into a stream.
CJson_Object SetObject(void)
Get JSON object contents of the node.
CJson_Array insert_array(const CJson_Node::TKeyType &name)
Insert array type element into the object.
void insert(const CJson_Node::TKeyType &name)
Insert null element into the object.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Search Results for One Query.
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
static void GetBioseqHandleDeflineAndId(const objects::CBioseq_Handle &handle, list< TGi > &use_this_gi, string &seqid, string &defline, bool show_gi=true, TGi this_gi_first=INVALID_GI)
Returns sequence id and a BLAST defline as strings, given a Bioseq handle and a list of gis.
static void GetSeqIdList(const objects::CBioseq_Handle &bh, list< CRef< objects::CSeq_id > > &ids)
Converts a Bioseq handle's sequence id type into a list of objects::CSeq_id references,...
void x_GetIdsAndTitlesForSeqAlign(const objects::CSeq_align &align, string &qid, string &qtitle, string &sid, string &stitle)
CScope & m_Scope
from which we get the sequence data
void FormatResults(CNcbiOstream &out, CRef< blast::CBlastOptionsHandle > vs_opts)
Format the VecScreen results.
CRef< blast::CSearchResultSet > m_RawBlastResults
The raw BLAST results.
CRef< blast::CBlastQueryVector > m_Queries
The queries to run VecScreen on.
CRef< CSeq_loc > m_SeqLoc
Seq-loc to screen.
list< SVecscreenSummary > GetList() const
Fetches summary list.
CVecscreen * m_Vecscreen
vecscreen instance for search.
CRef< objects::CSeq_align_set > GetSeqalignSet() const
Fetches seqalign-set already processed by vecscreen.
CRef< blast::CSearchResultSet > GetSearchResultSet() const
CRef< CScope > m_Scope
Scope used to fetch query.
TSeqPos m_TerminalFlexibility
edge wiggle room
CRef< objects::CSeq_align_set > m_Seqalign_set
Processed Seq-align.
CVecscreenRun(CRef< CSeq_loc > seq_loc, CRef< CScope > scope, const string &db=string(kDefaultVectorDb), const TSeqPos terminal_flexibility=kDefaultTerminalFlexibility)
Constructor.
list< CRef< objects::CSeq_align > > TAlignList
string m_DB
Database to use (UniVec is default).
void x_RunBlast()
Runs the actual BLAST search.
static string GetStrengthString(MatchType match_type)
Returns a string concerning the strength of the match for a given enum value.
const list< AlnInfo * > * GetAlnInfoList() const
return alignment info list
CRef< objects::CSeq_align_set > ProcessSeqAlign(void)
Process alignment to show.
Class for the messages for an individual query sequence.
std::ofstream out("events_result.xml")
main entry point for tests
Operators to edit gaps in sequences.
CRef< CSearchResultSet > Run()
Executes the search.
size_type size() const
Identical to GetNumResults, provided to facilitate STL-style iteration.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
static CBlastOptionsHandle * CreateTask(string task, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested task,...
@ eBlastDbIsNucleotide
nucleotide
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
vector< ILineError::EProblem > TIgnoredProblems
static void ParseDefLine(const TStr &defLine, const SDefLineParseInfo &info, const TIgnoredProblems &ignoredErrors, list< CRef< CSeq_id >> &ids, bool &hasRange, TSeqPos &rangeStart, TSeqPos &rangeEnd, TSeqTitles &seqTitles, ILineErrorListener *pMessageListener)
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
@ fLabel_Version
Show the version.
@ eContent
Untagged human-readable accession or the like.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
string GenerateDefline(const CBioseq_Handle &bsh, TUserFlags flags=0)
Main method.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
vector< CSeq_id_Handle > TId
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
const TId & GetId(void) const
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null â same effect as Empty().
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
static const char label[]
list< CRef< CSeq_id > > TSeqid
const Tdata & Get(void) const
Get the member data.
list< CRef< CBlast_def_line > > Tdata
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
TStr & SetStr(void)
Select the variant.
TId GetId(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
TLocal & SetLocal(void)
Select the variant.
const TLocal & GetLocal(void) const
Get the variant data.
bool IsLocal(void) const
Check if variant Local is selected.
Main class to perform a BLAST search on the local machine.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Defines BLAST database access classes.
Definition of SSeqLoc structure.
const CSeq_id * seqid
Seq-id of query.
CRange< TSeqPos > range
range of match.
string match_type
Categorizes strength of match.
SVecscreenMatchFinder(const string &match_type)
bool operator()(const CVecscreenRun::SVecscreenSummary &rhs)
string s_PopIdPart(const string &in_str)
map< string, int > match_type_ints
void s_MakeFastaSubIds(const CSeq_id &in_id, list< CRef< CSeq_id > > &out_ids)
CRef< CSeq_id > s_ReplaceLocalId(const CBioseq_Handle &bh, CConstRef< CSeq_id > sid_in, bool parse_local)
map< int, string > match_type_strs
list< CRef< CSeq_id > > s_SetIdList(const CBioseq_Handle &bh, string &title)
void s_MakeFastaSubStrs(const string &in_str, list< string > &out_strs)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4