A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/cdd__pssm__input_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/blast/api/cdd_pssm_input.cpp Source File

40 #include "../core/blast_psi_priv.h" 63 #ifndef GAP_IN_ALIGNMENT 65 # define GAP_IN_ALIGNMENT ((Uint4)-1) 75  const string

& matrix_name

,

79  const string

& query_title

)

80

: m_QueryTitle(query_title),

82

m_SeqalignSet(seqaligns),

85

m_MatrixName(matrix_name),

86

m_DiagnosticsRequest(diags),

88

m_GapExistence(gap_existence),

89

m_GapExtension(gap_extension)

95  if

(seqaligns.

Empty

()) {

106  for

(

unsigned int i

=0;

i

<

m_Hits

.size();

i

++) {

118  "Minimum RPS-BLAST e-value is larger than the maximum one"

);

158  "Evalue not found in Seq-align"

);

161  if

(evalue >= min_evalue && evalue < max_evalue) {

162  m_Hits

.push_back(

new CHit

((*it)->GetSegs().GetDenseg(), evalue));

171  if

(

m_Hits

.size() < 2) {

177

vector<CHit*> new_hits;

178

new_hits.reserve(

m_Hits

.size());

180

new_hits.push_back(

m_Hits

[0]);

182

vector<CHit*>::iterator it(

m_Hits

.begin());

186  for

(;it !=

m_Hits

.end();++it) {

190  for

(

int i

=

static_cast<int>

(new_hits.size()) - 1;

i

>= 0

191

&& (*it)->m_SubjectId->Match(*new_hits[

i

]->m_SubjectId);

i

--) {

193  const CHit

* kept_hit = new_hits[

i

];

198  CHit

intersection(*kept_hit);

204

(*it)->Subtract(intersection);

206  if

((*it)->IsEmpty()) {

213

new_hits.push_back(*it);

235

(*it)->FillData(seqdb, *profile_data);

242  const int

kQueryLength =

static_cast<int>

(

m_QueryData

.size());

243  const int

kNumCds =

static_cast<int>

(

m_Hits

.size());

250  m_MsaData

.resize(kQueryLength * (kNumCds), cell);

254  "Multiple alignment data structure"

);

256  for

(

int i

=0;

i

< kNumCds;

i

++) {

261  for

(

size_t

hit_idx=0;hit_idx <

m_Hits

.size();hit_idx++) {

265  m_Hits

[hit_idx]->GetSegments()) {

267  const int

kNumQueryColumns

268

= (*it)->m_QueryRange.GetTo() - (*it)->m_QueryRange.GetFrom();

270  int

q_from = (*it)->m_QueryRange.GetFrom();

273  for

(

int i

=0;

i

< kNumQueryColumns;

i

++) {

276  m_Msa

[hit_idx][q_from +

i

].

data

= &(*it)->m_MsaData[

i

];

279  m_Hits

[hit_idx]->m_MsaIdx =

static_cast<int>

(hit_idx);

289  const int

kQueryLength =

static_cast<int>

(

m_QueryData

.size());

290  const int

kNumCds =

static_cast<int>

(

m_Hits

.size());

292  for

(

int i

=0;

i

< kNumCds;

i

++) {

296  for

(

int i

=0;

i

< kNumCds;

i

++) {

297  for

(

int

j=0;j < kQueryLength;j++) {

301  "Query sequence cannot contain gaps"

);

312  if

(

data

->iobsr <= 0.0) {

314  "Zero independent observations in domain model"

);

320  if

(

data

->wfreqs[k] < 0.0) {

322  "Negative residue frequency in a domain " 325

s +=

data

->wfreqs[k];

331  if

(

fabs

(s - 1.0) > 1e-5) {

333  "Domain residue frequencies do not sum to 1"

);

344

: m_Evalue(evalue), m_MsaIdx(-1)

346  const int

kNumDims = denseg.

GetDim

();

347  const int

kNumSegments = denseg.

GetNumseg

();

351

m_SubjectId.Reset(denseg.

GetIds

()[1].GetNonNullPointer());

353  const

vector<TSignedSeqPos>& starts = denseg.

GetStarts

();

354  const

vector<TSeqPos>& lens = denseg.

GetLens

();

359  for

(

int

seg=0;seg < kNumSegments;seg++) {

360  TSeqPos

query_offset = starts[query_index];

361  TSeqPos

subject_offset = starts[subject_index];

363

query_index += kNumDims;

364

subject_index += kNumDims;

370

m_SegmentList.push_back(

new

CHitSegment(

371  TRange

(query_offset, query_offset + lens[seg]),

372  TRange

(subject_offset, subject_offset

375

query_offset += lens[seg];

376

subject_offset += lens[seg];

383

: m_SubjectId(hit.m_SubjectId),

384

m_Evalue(hit.m_Evalue),

385

m_MsaIdx(hit.m_MsaIdx)

396  ITERATE

(vector<CHitSegment*>, it, m_SegmentList) {

409  ITERATE

(vector<CHitSegment*>, it, m_SegmentList) {

410  result

+= (*it)->GetLength();

426

(*it)->FillData(db_oid, profile_data);

482  _ASSERT

(!m_SubjectId.Empty());

484  ITERATE

(vector<CHitSegment*>, it, m_SegmentList) {

495  if

(m_SegmentList.empty()) {

499  ITERATE

(vector<CHitSegment*>, it, m_SegmentList) {

500  if

(!(*it)->IsEmpty()) {

515

vector<TRange>::const_iterator r_itr = ranges.begin();

516

vector<CHitSegment*>::iterator seg_it = m_SegmentList.begin();

517

vector<CHitSegment*> new_segs;

518  while

(seg_it != m_SegmentList.end() && r_itr != ranges.end()) {

522

= (app == eSubject ? (*seg_it)->m_SubjectRange

523

: (*seg_it)->m_QueryRange);

526  while

(r_itr != ranges.end() && r_itr->GetTo() < seg_range.

GetFrom

()) {

530  if

(r_itr == ranges.end()) {

538  if

(intersection == seg_range) {

544  if

(intersection.

Empty

()) {

554  while

(r_itr != ranges.end() && r_itr->GetFrom() < seg_range.

GetTo

()) {

558

r_itr->GetFrom()) - seg_range.

GetFrom

();

559  int

d_to =

min

(seg_range.

GetTo

(),

560

r_itr->GetTo()) - seg_range.

GetTo

();

565

new_segs.push_back(new_seg);

580  while

(seg_it != m_SegmentList.end()) {

587  ITERATE

(vector<CHitSegment*>, it, m_SegmentList) {

589

new_segs.push_back(*it);

594

m_SegmentList.swap(new_segs);

601

vector<TRange> ranges;

604

ranges.push_back(app == eQuery ? (*it)->m_QueryRange

605

: (*it)->m_SubjectRange);

610

IntersectWith(ranges, app);

618  if

(IsEmpty() || hit.

IsEmpty

()) {

626  int

from = hit.

GetSegments

().front()->m_QueryRange.GetFrom();

627  int

to = hit.

GetSegments

().back()->m_QueryRange.GetTo();

630  if

(m_SegmentList.front()->m_QueryRange.GetFrom() >= to

631

|| m_SegmentList.back()->m_QueryRange.GetTo() <= from) {

637

vector<CHitSegment*>::iterator it = m_SegmentList.begin();

639

vector<CHitSegment*> new_segments;

640

new_segments.reserve(m_SegmentList.size());

644  while

(it != m_SegmentList.end() && (*it)->m_QueryRange.GetTo() <= from) {

646

new_segments.push_back(*it);

653  if

(it == m_SegmentList.end() || (*it)->m_QueryRange.GetFrom() > to) {

658  if

((*it)->m_QueryRange.GetTo() > to) {

665  if

((*it)->m_QueryRange.GetFrom() < from) {

670  int

d_to = from - (*it)->m_QueryRange.GetTo();

672

(*it)->AdjustRanges(0, d_to);

673  _ASSERT

((*it)->m_QueryRange.GetFrom() < (*it)->m_QueryRange.GetTo());

674

new_segments.push_back(*it);

684  _ASSERT

((*it)->m_QueryRange.GetFrom() < (*it)->m_QueryRange.GetTo());

685

new_segments.push_back(new_seg);

689  for

(;it != m_SegmentList.end();++it) {

690

new_segments.push_back(*it);

697  if

((*it)->m_QueryRange.GetFrom() >= from) {

704  int

d_to = from - (*it)->m_QueryRange.GetTo();

707

(*it)->AdjustRanges(0, d_to);

708  _ASSERT

((*it)->m_QueryRange.GetFrom() < (*it)->m_QueryRange.GetTo());

709

new_segments.push_back(*it);

714  while

(it != m_SegmentList.end()

715

&& (*it)->m_QueryRange.GetTo() <= to) {

723  if

(it != m_SegmentList.end()) {

725  if

((*it)->m_QueryRange.GetFrom() < to) {

726  int

d_from = to - (*it)->m_QueryRange.GetFrom();

729

(*it)->AdjustRanges(d_from, 0);

730  _ASSERT

((*it)->m_QueryRange.GetFrom()

731

< (*it)->m_QueryRange.GetTo());

733

new_segments.push_back(*it);

742  while

(it != m_SegmentList.end()) {

743

new_segments.push_back(*it);

749

m_SegmentList.swap(new_segments);

759  m_MsaData

.resize(m_QueryRange.GetTo() - m_QueryRange.GetFrom(), d);

761

x_FillResidueCounts(db_oid, profile_data);

762

x_FillObservations(db_oid, profile_data);

768  _ASSERT

(m_QueryRange.GetFrom() >= 0 && m_QueryRange.GetTo() >= 0);

769  _ASSERT

(m_SubjectRange.GetFrom() >= 0 && m_SubjectRange.GetTo() >= 0);

771  const int

kQueryLength = m_QueryRange.GetTo() - m_QueryRange.GetFrom();

772  const int

kSubjectLength = m_SubjectRange.GetTo() - m_SubjectRange.GetFrom();

774  if

(kQueryLength != kSubjectLength) {

790

m_QueryRange.SetFrom(m_QueryRange.GetFrom() + d_from);

791

m_QueryRange.SetTo(m_QueryRange.GetTo() + d_to);

793

m_SubjectRange.SetFrom(m_SubjectRange.GetFrom() + d_from);

794

m_SubjectRange.SetTo(m_SubjectRange.GetTo() + d_to);

800  return

m_QueryRange.GetFrom() > m_QueryRange.GetTo()

801

|| m_SubjectRange.GetFrom() > m_SubjectRange.GetTo();

807  _ASSERT

(profile_data()->freq_header);

812  _ASSERT

(db_oid < num_profiles);

816  const TFreqs

* db_counts =

821  int

db_seq_length = db_seq_offsets[db_oid + 1] - db_seq_offsets[db_oid];

826  _ASSERT

(m_SubjectRange.GetTo() <= db_seq_length);

831  for

(

int i

=0;

i

< num_columns;

i

++) {

845

(double)counts[(m_SubjectRange.GetFrom() +

i

) *

kAlphabetSize

+ j]

846

/ (

double

)sum_freqs;

856  _ASSERT

(profile_data()->obsr_header);

861  _ASSERT

(db_oid < num_profiles);

865  const TObsr

* data_start

873  for

(

int i

=0;

i

< data_size;

i

+=2) {

878  for

(

int

j=0;j < num;j++) {

879

obsr.push_back(

val

);

883  int

num_columns = m_SubjectRange.GetTo() - m_SubjectRange.GetFrom();

884  for

(

int i

=0;

i

< num_columns;

i

++) {

User-defined methods of the data storage class.

Declares the BLAST exception class.

Defines a concrete strategy to obtain PSSM input data for PSI-BLAST.

Defines BLAST error codes (user errors included)

Wrapper class to manage the BlastRPSInfo structure, as currently there aren't any allocation or deall...

Defines system exceptions occurred while running BLAST.

Represents one alignment segment of a RPS-BLAST hit.

Class used for sorting hits by subject seq-id and e-value.

Class used for sorting hit segments by range.

Class used for sorting ranges.

bool SeqidToOid(const CSeq_id &seqid, int &oid) const

Translate a Seq-id to any matching OID.

const CSeq_id & GetSeq_id(TDim row) const

Get seq-id (the first one if segments have different ids).

#define GAP_IN_ALIGNMENT

Representation of GAP in Seq-align.

virtual ~CCddInputData()

Virtual destructor.

CRef< objects::CBioseq > m_QueryBioseq

Query as Bioseq.

static const char kGapChar('-')

The representation of a gap in ASCII format.

void AdjustRanges(int d_from, int d_to)

Change ranges on query and subject by given values.

TRange m_QueryRange

Segment range on query.

vector< CHit * > m_Hits

RPS-BLAST hits in internal representation.

bool IsEmpty(void) const

Is hit empty.

string m_DbName

CDD database name.

CHit(const objects::CDense_seg &denseg, double evalue)

Constructor.

vector< CHitSegment * > m_SegmentList

List of hit segments.

void x_CreateMsa(void)

Create multiple alignment of CDs.

void x_ProcessAlignments(double min_evalue, double max_evalue)

Process RPS-BLAST hits.

void x_RemoveMultipleCdHits(void)

Remove multiple hits to the same CD.

vector< CHitSegment * > & GetSegments(void)

Get hit segments.

void Subtract(const CHit &hit)

Subtract from another hit from this hit using query ranges.

PSIBlastOptions m_Opts

Delta BLAST options for PSSM Engine.

int GetLength(void) const

Get hit length in residues, counts number of matching residues, gaps are not counted.

void Process(void)

Pre-process CD matches to query.

double m_MinEvalue

Min e-value threshold for all hits to be included in PSSM computation.

PSICdMsaCell ** m_Msa

Pointer to MSA.

CConstRef< objects::CSeq_align_set > m_SeqalignSet

RPS-BLAST hits for the query.

CCddInputData(const Uint1 *query, unsigned int query_length, CConstRef< objects::CSeq_align_set > seqaligns, const PSIBlastOptions &opts, const string &dbname, const string &matrix_name="BLOSUM62", int gap_existence=0, int gap_extension=0, PSIDiagnosticsRequest *diags=NULL, const string &query_title="")

Constructor.

bool x_ValidateHits(void) const

Validate internal representation of RPS-BLAST hits.

PSICdMsa m_CddData

MSA of CDs and CD data.

const Uint1 AMINOACID_TO_NCBISTDAA[]

Translates between ncbieaa and ncbistdaa.

vector< Uint1 > m_QueryData

Query sequence.

PSIMsaDimensions m_MsaDimensions

MSA dimensions, used by PSSM engine.

void x_FillResidueCounts(int db_oid, const CBlastRPSInfo &profile_data)

Populate arrays of weighted residue counts.

void x_FillObservations(int db_oid, const CBlastRPSInfo &profile_data)

Populate arrays of effective numbers of observations.

void FillData(int db_oid, const CBlastRPSInfo &profile_data)

Allocate and populate arrays for MSA data (weighted residue counts and effective observations used fo...

static const int kAlphabetSize

bool Validate(void) const

Validate hit.

vector< PSICdMsaCell > m_MsaData

MSA data.

Uint4 TFreqs

Type used for residue frequencies stored in CDD.

void IntersectWith(const vector< TRange > &segments, EApplyTo app)

Intersect hit segments with list of ranges and store result in hit segments.

bool Validate(void) const

Validate hit segment.

unsigned int GetQueryLength(void)

Get query length.

string m_QueryTitle

Query title (for PSSM)

static const int kRpsScaleFactor

Scale of residue frequencies and number of independent observations stored in CDD.

bool x_ValidateMsa(void) const

Validate multiple alignment of CDs.

void x_ExtractQueryForPssm(void)

Create query as Bioseq.

bool IsEmpty(void) const

Does the hit segment represent an empty range.

void FillData(const CSeqDB &seqdb, const CBlastRPSInfo &profile_data)

Allocate and populate arrays of data for PSSM computation.

Uint4 TObsr

Type used for number of independent observations stored in CDD.

void x_FillHitsData(void)

Read data needed for PSSM computation from CDD and populate arrays.

EApplyTo

Master selection for operations involving ranges.

@ fDeltaBlast

Flags set for DELTA-BLAST.

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

bool Empty(void) const THROWS_NONE

Check if CConstRef is empty – not pointing to any object which means having a null value.

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool NotEmpty(void) const THROWS_NONE

Check if CConstRef is not empty – pointing to an object and has a non-null value.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

uint8_t Uint1

1-byte (8-bit) unsigned integer

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

TThisType IntersectionWith(const TThisType &r) const

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define USING_SCOPE(ns)

Use the specified namespace.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

const TStarts & GetStarts(void) const

Get the Starts member data.

const TLens & GetLens(void) const

Get the Lens member data.

TDim GetDim(void) const

Get the Dim member data.

const TIds & GetIds(void) const

Get the Ids member data.

TNumseg GetNumseg(void) const

Get the Numseg member data.

list< CRef< CSeq_align > > Tdata

TTitle & SetTitle(void)

Select the variant.

@ eRepr_raw

continuous sequence

char * dbname(DBPROCESS *dbproc)

Get name of current database.

unsigned int

A callback function used to compare two keys in a database.

bool is_aligned(T *p) noexcept

Check pointer alignment.

constexpr auto sort(_Init &&init)

static PCRE2_SIZE * offsets

Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...

double inclusion_ethresh

Minimum evalue for inclusion in PSSM calculation.

Data needed for PSSM computation stored in MSA cell for single column in CD aligned to a position in ...

double iobsr

Effective number of independent observations in a CD column.

double * wfreqs

Frequencies for each residue in CD column.

Alignment cell that represents one column of CD aligned to a position in the query.

Uint1 is_aligned

Does this cell represent column aligned to a CD.

PSICdMsaCellData * data

Data needed for PSSM computation.

PSIMsaDimensions * dimensions

Query length and number of aligned cds.

unsigned char * query

Query sequence as Ncbistdaa.

PSICdMsaCell ** msa

Multiple alignment of CDs.

Structure to allow requesting various diagnostics data to be collected by PSSM engine.

Uint4 num_seqs

Number of distinct sequences aligned with the query (does not include the query)

Uint4 query_length

Length of the query.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4