A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/seq__trimmer_8cpp_source.html below:

NCBI C++ ToolKit: src/objmgr/util/seq_trimmer.cpp Source File

60  template

<

typename

TType,

int

Size>

61  void

s_FillArray(TType(&

array

)[Size],

const

TType &

value

) {

68  template

<

int

TableSize,

int

InputSize>

69  void

s_SetAmbigLookupTableFromArray(

70  bool

(&ambig_lookup_table)[TableSize],

76  _ASSERT

( chInputChar >= kFirstCharInLookupTable &&

77

chInputChar <= kLastCharInLookupTable );

78

ambig_lookup_table[chInputChar - kFirstCharInLookupTable] =

value

;

85  return

(iSeqPos == 1 || iSeqPos == -1);

88  bool

s_IsSupportedSegmentType(

const CSeqMap_CI

& segment )

90  switch

( segment.

GetType

() ) {

106  if

( iTrimDirection < 0 ) {

107  return

(iStartPos < iEndPos);

109  return

(iStartPos > iEndPos);

113  struct

PVecTrimRulesLessThan {

126  struct

PVecTrimRulesHaveSameNumberOfBases {

134  struct

PVecTrimRuleAlwaysPasses {

148

pTrimRuleVec->push_back(arrTrimRules[rule_idx]);

150  return

pTrimRuleVec.release();

159

s_DefaultRuleCreator,

NULL

);

160  return

s_DefaultTrimRules.

Get

();

168

: m_eMeaningOfAmbig(eMeaningOfAmbig),

170

m_vecTrimRules(trimRuleVec),

171

m_uMinSeqLen(uMinSeqLen)

177

(1 + kLastCharInLookupTable - kFirstCharInLookupTable) );

179

(1 + kLastCharInLookupTable - kFirstCharInLookupTable) );

196  'A'

,

'C'

,

'G'

,

'T'

};

197

s_SetAmbigLookupTableFromArray(

201  'B'

,

'J'

,

'X'

,

'Z'

};

202

s_SetAmbigLookupTableFromArray(

222  if

( bioseq_len < 1 ) {

230

seqvec, leftmost_good_base, rightmost_good_base,

233  if

( leftmost_good_base > rightmost_good_base ) {

235  if

( trimmed_ranges ) {

236

*trimmed_ranges +=

TSeqRange

(0, bioseq_len - 1);

243

rightmost_good_base =

245

seqvec, rightmost_good_base, leftmost_good_base,

248  if

( leftmost_good_base > rightmost_good_base ) {

250  if

( trimmed_ranges ) {

251

*trimmed_ranges +=

TSeqRange

(0, bioseq_len - 1);

258  if

( (leftmost_good_base == 0) &&

259

(rightmost_good_base == (bioseq_len - 1)) )

266

leftmost_good_base, rightmost_good_base,

268  if

( trimmed_ranges ) {

269  if

( leftmost_good_base > 0 ) {

270

*trimmed_ranges +=

TSeqRange

(0, leftmost_good_base - 1);

272  if

( rightmost_good_base < bioseq_len - 1 ) {

273

*trimmed_ranges +=

TSeqRange

(rightmost_good_base + 1,

289  sort

( vecTrimRules.begin(), vecTrimRules.end(),

290

PVecTrimRulesLessThan() );

297 

TTrimRuleVec::iterator new_end_iter =

299

vecTrimRules.begin(), vecTrimRules.end(),

300

PVecTrimRulesHaveSameNumberOfBases() );

301

vecTrimRules.erase( new_end_iter, vecTrimRules.end() );

306

vecTrimRules.begin(), vecTrimRules.end(),

307

PVecTrimRuleAlwaysPasses() );

308

vecTrimRules.erase( new_end_iter, vecTrimRules.end() );

313  const STrimRule

& trimRule = *trim_rule_it;

315

problems_strm <<

"A rule has a non-positive number of " 316  "bases to check"

<< endl;

321

problems_strm <<

"There is a rule where bases_to_check " 323  "equal to max bases allowed (" 332  if

( ! sProblems.empty() ) {

334  "Cannot create CSequenceAmbigTrimmer due to issues with rules: " 356

bioseq_eh.

SetInst

( *pNewSeqInst );

370

iStartPosInclusive_arg, iEndPosInclusive_arg, iTrimDirection) )

372  return

( iTrimDirection > 0

380  TSignedSeqPos

uStartOfGoodBasesSoFar = iStartPosInclusive_arg;

394

1 +

abs

(uEndOfGoodBasesSoFar - uStartOfGoodBasesSoFar );

398

uOldBasesLeft = iNumBasesLeft;

403  const STrimRule

& trimRule = *trim_rule_it;

415

uStartOfGoodBasesSoFar +

422

uStartOfGoodBasesSoFar,

423

iEndPosToCheckForThisRule,

438

iEndPosToCheckForThisRule,

443

uStartOfGoodBasesSoFar +=

454

uStartOfGoodBasesSoFar,

455

uEndOfGoodBasesSoFar,

457

uFewestBasesCheckedInARule );

461

uStartOfGoodBasesSoFar =

470  if

( s_IsEmptyRange(uStartOfGoodBasesSoFar, uEndOfGoodBasesSoFar, iTrimDirection) ) {

473

iNumBasesLeft = 1 +

abs

(uEndOfGoodBasesSoFar - uStartOfGoodBasesSoFar );

475  if

( iNumBasesLeft == uOldBasesLeft ) {

488

uStartOfGoodBasesSoFar,

489

uEndOfGoodBasesSoFar,

494  return

uStartOfGoodBasesSoFar;

506

in_out_uStartOfGoodBasesSoFar, uEndOfGoodBasesSoFar, iTrimDirection) )

515  if

( ! pAmbigLookupTable ) {

517  "Unable to determine molecule type of sequence"

);

520  TSignedSeqPos

newStartOfGoodBases = in_out_uStartOfGoodBasesSoFar;

521  while

( ! s_IsEmptyRange(newStartOfGoodBases, uEndOfGoodBasesSoFar, iTrimDirection) &&

522

(*pAmbigLookupTable)[ seqvec[newStartOfGoodBases] - kFirstCharInLookupTable] )

527

newStartOfGoodBases, &seqvec.

GetScope

() );

532  while

( ! s_IsEmptyRange(newStartOfGoodBases, end_of_segment, iTrimDirection) &&

533

! s_IsEmptyRange(newStartOfGoodBases, uEndOfGoodBasesSoFar, iTrimDirection) &&

534

(*pAmbigLookupTable)[ seqvec[newStartOfGoodBases] - kFirstCharInLookupTable] )

536

newStartOfGoodBases += iTrimDirection;

555  if

( s_IsEmptyRange(newStartOfGoodBases, uEndOfGoodBasesSoFar, iTrimDirection) )

558

iNumBasesToRemove = 1 +

abs

(uEndOfGoodBasesSoFar - in_out_uStartOfGoodBasesSoFar);

560

iNumBasesToRemove =

abs

(newStartOfGoodBases - in_out_uStartOfGoodBasesSoFar);

565

iNumBasesToRemove = (iNumBasesToRemove / uChunkSize) * uChunkSize;

568

in_out_uStartOfGoodBasesSoFar += (iTrimDirection * iNumBasesToRemove);

579

iStartPosInclusive_arg, iEndPosInclusive_arg, iTrimDirection) )

590

iStartPosInclusive_arg, pScope );

596  if

(

NULL

== pAmbigLookupTable ) {

601  for

( ; segment_ci &&

604

iEndPosInclusive_arg, iTrimDirection);

616  switch

( eSegmentType ) {

622

1 +

abs

(segmentEndPosInclusive - segmentStartPosInclusive),

623

1 +

abs

(segmentStartPosInclusive - iEndPosInclusive_arg) );

641

! s_IsEmptyRange(pos, segmentEndPosInclusive, iTrimDirection) &&

642

! s_IsEmptyRange(pos, iEndPosInclusive_arg, iTrimDirection)

644

pos += iTrimDirection)

647  if

( residue < kFirstCharInLookupTable || residue > kLastCharInLookupTable ||

648

(*pAmbigLookupTable)[residue - kFirstCharInLookupTable])

654

}

else if

( s_IsEmptyRange(

665

<<

static_cast<int>

(eSegmentType)

666

<<

" are not supported at this time"

);

678  _ASSERT

( s_IsSupportedSegmentType(segment) );

680  if

( iTrimDirection == 1 ) {

685  _ASSERT

( iTrimDirection == -1 );

697  return

( iTrimDirection == 1 ? ++in_out_segment_it : --in_out_segment_it );

714

1 + ( rightmost_good_base - leftmost_good_base ) );

715  for

( ; seqmap_ci; ++seqmap_ci ) {

727  if

( pOriginalGapSeqLiteral ) {

728

pNewGapLiteral->Assign(*pOriginalGapSeqLiteral);

730  if

( ! bIsLengthKnown ) {

733

pNewGapLiteral->SetLength( uGapLength );

735

pDeltaSeq->SetLiteral( *pNewGapLiteral );

737

pDeltaExt->

Set

().push_back(

Ref

(&*pDeltaSeq) );

750

pDeltaSeq->SetLiteral().SetLength( seqmap_ci.

GetLength

() );

751

pDeltaSeq->SetLiteral().SetSeq_data( *pSeqData );

753

pDeltaExt->

Set

().push_back(

Ref

(&*pDeltaSeq) );

758  "seqmap segments of type "

<<

static_cast<int>

(

eType

) );

770

seq_inst.

SetLength

( 1 + ( rightmost_good_base - leftmost_good_base ) );

771  if

( pDeltaExt->

Set

().empty() ) {

773

}

else if

( pDeltaExt->

Set

().size() == 1 ) {

776  CSeq_data

& seq_data = pDeltaSeq->SetLiteral().SetSeq_data();

779

seq_inst.

SetExt

().SetDelta( *pDeltaExt );

782

bioseq_eh.

SetInst

( seq_inst );

void remove_if(Container &c, Predicate *__pred)

void Set(T *object)

Initialize with an existing object.

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

position_type GetCoveredLength(void) const

Returns total length covered by ranges in this collection, i.e.

T & Get(void)

Create the variable if not created yet, return the reference.

Include a standard set of the NCBI C++ Toolkit most basic headers.

static bool s_IsValidDirection(const string &direction)

#define ITERATE_0_IDX(idx, up_to)

idx loops from 0 (inclusive) to up_to (exclusive)

unsigned int TSeqPos

Type for sequence locations and lengths.

constexpr size_t ArraySize(const Element(&)[Size])

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

int TSignedSeqPos

Type for signed sequence position.

#define NCBI_USER_THROW(message)

Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...

#define NCBI_USER_THROW_FMT(message)

Throw a "user exception" with message processed as output to ostream.

C * SerialClone(const C &src)

Create on heap a clone of the source object.

TSignedSeqPos pos_after_last_gap

Inclusive.

EMeaningOfAmbig m_eMeaningOfAmbig

This holds the current interpretation for "ambiguous".

bool TAmbigLookupTable[26]

virtual EResult x_TrimToNothing(CBioseq_Handle &bioseq_handle)

The bioseq is trimmed to size 0.

TSignedSeqPos m_uMinSeqLen

When the bioseq gets trimmed down to less than this size, we halt the trimming.

TSignedSeqPos max_bases_allowed_to_be_ambig

TSignedSeqPos x_SegmentGetEndInclusive(const CSeqMap_CI &segment, const TSignedSeqPos iTrimDirection)

This returns the (inclusive) position at the end of the segment currently at iStartPosInclusive_arg.

TFlags m_fFlags

This holds the flags that affect the behavior of this class.

CSequenceAmbigTrimmer(EMeaningOfAmbig eMeaningOfAmbig, TFlags fFlags=0, const TTrimRuleVec &vecTrimRules=GetDefaultTrimRules(), TSignedSeqPos uMinSeqLen=50)

This sets up the parameters for how this trimmer will act.

CSeqMap_CI & x_SeqMapIterDoNext(CSeqMap_CI &in_out_segment_it, const TSignedSeqPos iTrimDirection)

Returns the "next" segment.

EMeaningOfAmbig

This enum is used to set what is meant by "ambiguous".

virtual void x_NormalizeVecTrimRules(TTrimRuleVec &vecTrimRules)

This prepares the vector of trimming rules to be used by the trimming algorithm.

TAmbigLookupTable m_arrNucAmbigLookupTable

EResult

This indicates what happened with the trim.

TSignedSeqPos bases_to_check

virtual TSignedSeqPos x_FindWhereToTrim(const CSeqVector &seqvec, const TSignedSeqPos iStartPosInclusive_arg, const TSignedSeqPos iEndPosInclusive_arg, TSignedSeqPos iTrimDirection)

This returns the last good base that won't be trimmed (note: last really means "first" when we're sta...

vector< STrimRule > TTrimRuleVec

Multiple STrimRules are allowed, which are applied from smallest bases_to_check to largest bases_to_c...

TAmbigLookupTable m_arrProtAmbigLookupTable

virtual EResult DoTrim(CBioseq_Handle &bioseq_handle, CRangeCollection< TSeqPos > *trimmed_ranges=nullptr)

This trims the given bioseq, using params set in the CSequenceAmbigTrimmer constructor.

virtual void x_EdgeSeqMapGapAdjust(const CSeqVector &seqvec, TSignedSeqPos &in_out_uStartOfGoodBasesSoFar, const TSignedSeqPos uEndOfGoodBasesSoFar, const TSignedSeqPos iTrimDirection, const TSignedSeqPos uChunkSize)

This adjusts in_out_uStartOfGoodBasesSoFar if we're at a CSeqMap gap.

static const TTrimRuleVec & GetDefaultTrimRules(void)

This returns a reasonable default for trimming rules.

TSignedSeqPos x_SegmentGetBeginningInclusive(const CSeqMap_CI &segment, const TSignedSeqPos iTrimDirection)

This returns the (inclusive) position at the beginning of the segment.

virtual void x_CountAmbigInRange(SAmbigCount &out_result, const CSeqVector &seqvec, const TSignedSeqPos iStartPosInclusive_arg, const TSignedSeqPos iEndPosInclusive_arg, const TSignedSeqPos iTrimDirection)

This counts the number of ambiguous bases in the range [leftmost_pos_to_check, rightmost_pos_to_check...

void x_SliceBioseq(TSignedSeqPos leftmost_good_base, TSignedSeqPos rightmost_good_base, CBioseq_Handle &bioseq_handle)

TSignedSeqPos num_ambig_bases

the number of ambiguous bases found in the range supplied to x_CountAmbigInRange

TTrimRuleVec m_vecTrimRules

This holds the trimming rules that will be applied.

bool x_TestFlag(TFlags fFlag)

Test if a given flag is set.

@ fFlags_DoNotTrimBeginning

0x01 ("Beginning" as defined by CSeqVector)

@ fFlags_DoNotTrimEnd

0x02 ("End" as defined by CSeqVector)

@ fFlags_DoNotTrimSeqGap

0x04 (Seq-gaps are not considered trimmable if this flag is set, only letter gaps (e....

@ eMeaningOfAmbig_AnyAmbig

Here, anything that's not certain is considered ambiguous.

@ eMeaningOfAmbig_OnlyCompletelyUnknown

Here, only N for nucleotides and X for amino acids is considered ambiguous.

@ eResult_NoTrimNeeded

Bioseq is left unchanged because it did not need to be trimmed at all.

@ eResult_SuccessfullyTrimmed

Bioseq is now trimmed.

TSeqPos GetBioseqLength(void) const

void SetInst(TInst &v) const

CBioseq_EditHandle GetEditHandle(void) const

Get 'edit' version of handle.

CScope & GetScope(void) const

Get scope this handle belongs to.

const CSeqMap & GetSeqMap(void) const

Get sequence map.

const TInst & GetInst(void) const

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

TSeqPos GetEndPosition(void) const

return end position of current segment in sequence (exclusive)

CSeqMap::ESegmentType GetType(void) const

bool IsUnknownLength(void) const

return true if current segment is a gap of unknown length

TSeqPos GetPosition(void) const

return position of current segment in sequence

TSeqPos GetLength(void) const

return length of current segment

CConstRef< CSeq_literal > GetRefGapLiteral(void) const

return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap

TCoding GetCoding(void) const

Target sequence coding.

const CSeqMap & GetSeqMap(void) const

CSeqMap_CI FindSegment(TSeqPos pos, CScope *scope) const

Find segment containing the position.

bool IsProtein(void) const

CSeqMap_CI ResolvedRangeIterator(CScope *scope, TSeqPos from, TSeqPos length, ENa_strand strand=eNa_strand_plus, size_t maxResolve=size_t(-1), TFlags flags=fDefaultFlags) const

Iterate segments in the range with specified strand coordinates.

bool IsNucleotide(void) const

void GetPackedSeqData(string &buffer, TSeqPos start=0, TSeqPos stop=kInvalidSeqPos)

TMol GetSequenceType(void) const

CScope & GetScope(void) const

@ eSeqData

real sequence data

CRef< C > Ref(C *object)

Helper functions to get CRef<> and CConstRef<> objects.

CRange< TSeqPos > TSeqRange

typedefs for sequence ranges

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

void SetExt(TExt &value)

Assign a value to Ext data member.

void SetRepr(TRepr value)

Assign a value to Repr data member.

void SetLength(TLength value)

Assign a value to Length data member.

void SetSeq_data(TSeq_data &value)

Assign a value to Seq_data data member.

void ResetExt(void)

Reset Ext data member.

void ResetSeq_data(void)

Reset Seq_data data member.

@ eRepr_raw

continuous sequence

@ eRepr_virtual

no seq data

constexpr auto sort(_Init &&init)

const GenericPointer< typename T::ValueType > T2 value

Static variables safety - create on demand, destroy on application termination.

This holds the output of x_CountAmbigInRange.

For example, if bases_to_check is 10 and max_bases_allowed_to_be_ambig is 5, then on each iteration w...


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4