: m_Queries(query_factory),
58m_LocalDbAdapter(blastdb),
59m_Options(&options->SetOptions()),
60m_BtopSpliceSignals(
true)
63 if(!
env.Get(
"BTOP_NO_SPLICE_SIGNALS").empty()) {
118vector< CConstRef<objects::CSeq_id> > seqid_vec;
119vector< CRef<CBlastAncillaryData> > ancill_vec;
122 unsigned intnum_subjects = 0;
129 if(subject_infosrc !=
NULL) {
130num_subjects = subject_infosrc->
Size();
134 for(index=0; index<local_query_data->
GetNumQueries(); index++)
137local_query_data->
GetSeq_loc(index)->GetId());
141msg_vec.push_back(q_msg);
142seqid_vec.push_back(query_id);
144sa_vec.push_back(tmp_align);
145pair<double, double> tmp_pair(-1.0, -1.0);
150ancill_vec.push_back(tmp_ancillary_data);
152 for(
unsigned int i=1;
i< num_subjects;
i++) {
154msg_vec.push_back(
msg);
155seqid_vec.push_back(query_id);
157sa_vec.push_back(tmp_align);
160ancill_vec.push_back(tmp_ancillary_data);
202 "Missing database or subject sequences");
212 boolbtop_splice_signals)
216 const Uint1kGap = 15;
218 intnum_identical = 0;
229 if(subject_gap > 0 &&
236 if(btop_splice_signals) {
237 stringacceptor(2u,
' ');
239(
int)((
prev->map_info->right_edge >> 2) & 3)]);
241(
int)(
prev->map_info->right_edge & 3)]);
246 stringdonor(2u,
' ');
258 else if(subject_gap > 0) {
270 else if(query_gap < 0) {
279query_pos += num_matches;
282num_identical += num_matches;
283 if(num_matches > 0) {
294 if(buff[1] !=
'-') {
296buff[0] ==
'-'&& num_matches == 0) {
298md_tag += (
string)(&buff[1]);
302 if(buff[0] ==
'-') {
305md_tag += (
string)(&buff[1]);
310md_matches += num_matches;
318num_matches = hsp->
query.
end- query_pos;
319num_identical += num_matches;
320 if(num_matches > 0) {
322md_matches += num_matches;
325 if(md_matches > 0) {
328 len+= num_identical;
330perc_id = (double)(num_identical * 100) / (double)
len;
338 boolbtop_splice_signals)
354subject_id, &subj_length);
358query_length, chain);
366user_object->
SetType().SetStr(
"Mapper Info");
367align->
SetExt().push_back(user_object);
379btop_splice_signals);
380user_object->
AddField(
"btop", btop);
381user_object->
AddField(
"md_tag", md_tag);
387user_object->
AddField(
"segment",
398 boolbtop_splice_signals)
403 for(
const HSPChain* chain = chains; chain; chain = chain->
next) {
407 if(chain->pair && chain->context > chain->pair->context) {
423btop_splice_signals));
425seqinfo_src, query_info,
426btop_splice_signals));
430btop_splice_signals);
433seq_aligns->
Set().push_back(align);
444aligns.reserve(
results->num_queries);
447query_ids.reserve(
results->num_queries);
458 for(
intindex=0;index <
results->num_queries;index++) {
465 for(
autoit: seq_aligns->
Get()) {
466retval->
Set().push_back(it);
476 return a->GetSegs().IsDisc() && !
b->GetSegs().IsDisc();
495retval->reserve(
results->num_queries);
497 for(
intindex=0;index <
results->num_queries;index++) {
516query_data->
GetSeq_loc(index + 1)->GetId());
521chains =
results->chain_array[index + 1];
528 for(
autoit: mate_aligns->
Get()) {
529aligns->
Set().push_back(it);
537&query_masks[index + 1],
548retval->push_back(res);
562: m_QueryId(query_id),
567 x_SetInfo(query_length, query_mask, mate_length, mate_mask);
575: m_QueryId(query_id),
588 if(
a->GetSegs().IsDisc() &&
b->GetSegs().IsDisc()) {
589 const CSeq_align& a_first = *
a->GetSegs().GetDisc().Get().front();
590 const CSeq_align& a_second = *
a->GetSegs().GetDisc().Get().back();
591 const CSeq_align& b_first = *
b->GetSegs().GetDisc().Get().front();
592 const CSeq_align& b_second = *
b->GetSegs().GetDisc().Get().back();
607 return(
a->GetSegs().IsDisc() && !
b->GetSegs().IsDisc());
617 if(
a->GetSegs().IsDisc() &&
b->GetSegs().IsDisc()) {
618 const CSeq_align& a_first = *
a->GetSegs().GetDisc().Get().front();
619 const CSeq_align& a_second = *
a->GetSegs().GetDisc().Get().back();
620 const CSeq_align& b_first = *
b->GetSegs().GetDisc().Get().front();
621 const CSeq_align& b_second = *
b->GetSegs().GetDisc().Get().back();
636 return(
a->GetSegs().IsDisc() && !
b->GetSegs().IsDisc());
660 boolfirst_aligned =
false;
661 boollast_aligned =
false;
670 if(it->GetSegs().IsDisc()) {
671first_aligned =
true;
672last_aligned =
true;
675it->GetSegs().GetDisc().Get();
676 ASSERT(sasd.size() == 2);
700 else if(it->GetSeq_id(0).Match(*
m_QueryId)) {
701first_aligned =
true;
703 else if(it->GetSeq_id(0).Match(*
m_MateId)) {
704last_aligned =
true;
709 if(!first_aligned) {
717 if(first_mask && !first_mask->empty()) {
718 TSeqRangefirst_range(*first_mask->front());
724 if(last_mask && !last_mask->empty()) {
725 TSeqRangelast_range(*last_mask->front());
736 for(
auto result: *
this) {
738 if(no_discordant &&
result->IsPaired() && !
result->IsConcordant()) {
742 for(
autoit:
result->GetSeqAlign()->Get()) {
743retval->
Set().push_back(it);
Auxiliary functions for BLAST.
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
void BlastHSPStreamMappingClose(BlastHSPStream *hsp_stream, BlastMappingResults *results)
Closes BlastHSPStream structure for mapping and produces BlastMappingResults.
@ eFirstSegment
The first sequence of a pair with both sequences read and accepted.
Definition of classes which constitute the results of running a BLAST search.
Utility function to convert internal BLAST result structures into objects::CSeq_align_set objects.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
Class used to return ancillary data from a blast search, i.e.
Defines BLAST error codes (user errors included)
Search class to perform the preliminary stage of the BLAST search.
Index wrapper exceptions.
Results of Magic-BLAST mapping.
Magic-BLAST results for a single query/read or a pair of reads.
@ eScore_PercentIdentity_Gapped
void SetNamedScore(const string &id, int score)
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Abstract base class to encapsulate retrieval of sequence identifiers.
Collection of masked regions for a single query sequence.
Class for the messages for an individual query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
virtual CConstRef< objects::CSeq_loc > GetSeq_loc(size_t index)=0
Get the Seq_loc for the sequence indicated by index.
int CheckInternalData()
Checks that internal data is valid.
size_t GetNumberOfThreads(void) const
Accessor for the number of threads to use.
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
CRef< CLocalDbAdapter > m_LocalDbAdapter
Reference to a BLAST subject/database object.
bool m_Concordant
True if results are concordant pair.
CRef< IQueryFactory > m_Queries
Queries.
bool m_Paired
True if results are for paired reads.
CRef< SInternalData > m_InternalData
Internal data strctures.
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
CRef< SInternalData > Run()
Borrow the internal data and results results.
CRef< CMagicBlastResultSet > RunEx(void)
void x_SetInfo(int first_length, const TMaskedQueryRegions *first_masks, int last_length=0, const TMaskedQueryRegions *last_masks=NULL)
CStructWrapper< TData > * WrapStruct(TData *obj, TData *(*del)(TData *))
Auxiliary function to create a CStructWrapper for a pointer to an object.
void SortAlignments(EOrdering order)
Sort alignments by selected criteria (pair configuration)
CRef< CSeq_align_set > x_BuildSeqAlignSet(const BlastMappingResults *results)
const char BLASTNA_TO_IUPACNA[]
Translates between blastna and iupacna.
vector< CConstRef< objects::CSeq_id > > TQueryIdVector
List of query ids.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
bool IsBlastDb() const
Returns true if this object represents a BLAST database.
virtual void SetNumberOfThreads(size_t nthreads)
@inheritDoc
void x_Validate(void)
Perform sanity checks on input arguments.
static CRef< CSeq_align > s_CreateSeqAlign(const HSPChain *chain, CRef< ILocalQueryData > &qdata, CRef< IBlastSeqInfoSrc > &seqinfo_src, const BlastQueryInfo *query_info, bool btop_splice_signals)
static CRef< CSeq_align_set > x_CreateSeqAlignSet(const HSPChain *results, CRef< ILocalQueryData > qdata, CRef< IBlastSeqInfoSrc > seqinfo_src, const BlastQueryInfo *query_info, bool btop_splice_signals)
Create results.
CRef< CSeq_align_set > GetFlatResults(bool no_discordant=false)
Get all results as a single Seq-align-set object.
CConstRef< CSeq_id > m_QueryId
Query id.
void Combine(const TSearchMessages &other_msgs)
Combine another set of search messages with this one.
bool m_BtopSpliceSignals
Should BTOP strings be formatted with splice signals.
EOrdering
Ordering of alignments.
IBlastSeqInfoSrc * MakeSeqInfoSrc()
Retrieves or constructs the IBlastSeqInfoSrc.
BlastQueryInfo * m_QueryInfo
The query information structure.
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
CConstRef< CSeq_id > m_MateId
Mate id if results are for paired reads.
virtual size_t GetNumQueries()=0
Get the number of queries.
CRef< CSeq_align_set > CreateEmptySeq_align_set()
Constructs an empty Seq-align-set containing an empty discontinuous seq-align, and appends it to a pr...
TResultsInfo m_FirstInfo
Alignment flags for the query.
virtual size_t Size() const =0
Returns the size of the underlying container of sequences.
void GetQueryMessages(size_t index, TQueryMessages &qmsgs)
Retrieve error/warning messages for a specific query.
CRef< CSeq_align_set > m_Aligns
Alignments for a single or a pair of reads.
void MakeSplicedSeg(CSpliced_seg &spliced_seg, CRef< CSeq_id > product_id, CRef< CSeq_id > genomic_id, int product_length, const HSPChain *chain)
Convert a spliced alignmeny in BlastHSPChain into Spliced_seg.
CMagicBlast(CRef< IQueryFactory > query_factory, CRef< CLocalDbAdapter > blastdb, CRef< CMagicBlastOptionsHandle > options)
Constructor to map short reads as queries to a genome as BLAST database.
static void s_ComputeBtopAndIdentity(const HSPChain *chain, string &btop, string &md_tag, double &perc_id, bool btop_splice_signals)
CRef< CBlastPrelimSearch > m_PrelimSearch
Object that runs BLAST search.
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
CRef< CBlastOptions > m_Options
Options to configure the search.
TSearchMessages GetSearchMessages() const
Retrieve any error/warning messages that occurred during the search.
const TSeqLocInfoVector & GetQueryMasks(void) const
Return query masks.
CRef< CSeq_align_set > Run(void)
Run the RNA-Seq mapping.
CRef< TBlastHSPStream > m_HspStream
HSP output of the preliminary stage goes here.
CMagicBlastResults(CConstRef< CSeq_id > query_id, CConstRef< CSeq_id > mate_id, CRef< CSeq_align_set > aligns, const TMaskedQueryRegions *query_mask=NULL, const TMaskedQueryRegions *mate_mask=NULL, int query_length=0, int mate_length=0)
Constructor for a pair.
void GetSequenceLengthAndId(const IBlastSeqInfoSrc *seqinfo_src, int oid, CRef< objects::CSeq_id > &seqid, TSeqPos *length)
Retrieves subject sequence Seq-id and length.
CRef< CMagicBlastResultSet > x_BuildResultSet(const BlastMappingResults *results)
TResultsInfo m_LastInfo
Alignment flags for the mate.
bool IsDbScanMode() const
Returns true if this is not a database but is database scanning mode.
@ eCoreBlastError
FIXME: need to interpret CORE errors.
@ fUnaligned
Read is unaligned.
@ fFiltered
Read did not pass quality filtering.
unsigned int TSeqPos
Type for sequence locations and lengths.
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
static int BlastRank(const CRef< CSeq_id > &id)
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
position_type GetLength(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
void SetDim(TDim value)
Assign a value to Dim data member.
void SetType(TType value)
Assign a value to Type data member.
TExt & SetExt(void)
Assign a value to Ext data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
@ eType_partial
mapping pieces together
ENa_strand
strand of nucleic acid
unsigned int
A callback function used to compare two keys in a database.
#define MAPPER_SPLICE_SIGNAL
Declares CMagicBlast, the C++ API for the BLAST RNA-Seq mapping engine.
#define ASSERT
macro for assert.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
BlastMappingResults * Blast_MappingResultsFree(BlastMappingResults *results)
Free BlastMappingResults structure.
BlastMappingResults * Blast_MappingResultsNew(void)
Initialize BlastMappingResults structure.
Int4 query_length
Length of this query, strand or frame.
Int4 segment_flags
Flags describing segments for paired reads.
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
Structure holding all information about an HSP.
BlastSeg query
Query sequence info.
BlastSeg subject
Subject sequence info.
BlastHSPMappingInfo * map_info
Structure that contains BLAST mapping results.
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 offset
Start of hsp.
A chain of HSPs: spliced alignment.
Int4 score
Alignment score for the chain.
Int4 count
Number of placements for the read.
HSPContainer * hsps
A list of HSPs that belong to this chain.
Int4 context
Contex number of query sequence.
struct HSPChain * next
Pointer to the next chain in a list.
struct HSPContainer * next
Uint1 query_base
Query base at this position.
Uint1 subject_base
Subject base at this position.
Int4 query_pos
Query position.
Alignment edit script for gapped alignment.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4