A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/compact__sam_8cpp_source.html below:

NCBI C++ ToolKit: src/app/gnomon/compact_sam.cpp Source File

56  template

<

typename

T>

106

istringstream icigar(cigar_string);

110  while

(icigar >>

len

>> c) {

116

}

else if

(c ==

'N'

) {

121

cigar_string = cigar_string.substr(clip_pos);

124

}

else if

(c ==

'M'

|| c ==

'='

|| c ==

'X'

) {

130  if

(

isdigit

(MD_tag.front())) {

132  int

m = stoi(MD_tag, &idx);

133

MD_tag.erase(0, idx);

135

MD_tag = to_string(m-

len

)+MD_tag;

148

}

else if

(c ==

'I'

) {

152

}

else if

(c ==

'D'

) {

156  if

(MD_tag.front() ==

'^'

)

157

MD_tag.erase(0,

len

+1);

159

MD_tag.erase(0,

len

);

161

cerr <<

"Unexpected symbol in CIGAR"

<< endl;

164

clip_pos = icigar.tellg();

166

cigar_string.clear();

171 int ProcessSAM

(

const

vector<string>& tags, vector<CCompactSAMApplication::Exon>& exons) {

175  int

mismatchpenalty = 1;

178  double

log2factor = 0.25;

179  int

intron_penalty = 0;

180  int

noncanonical_penalty = 8;

181  int

gcag_penalty = 4;

182  int

atac_penalty = 8;

186  for

(

int i

= 11;

i

< (

int

)tags.size() && (MD_tag.empty() || jM_tag.empty()); ++

i

) {

187  auto

&

tag

= tags[

i

];

188

vector<string> fields;

190  if

(fields.size() != 3)

192  if

(fields[0] ==

"MD"

&& fields[1] ==

"Z"

)

194  if

(fields[0] ==

"jM"

&& fields[1] ==

"B"

)

197  if

(MD_tag.empty()) {

198

cerr <<

"MD:Z tag in SAM file is expected"

<< endl;

201  if

(jM_tag.empty()) {

202

cerr <<

"jM:B tag in SAM file is expected"

<< endl;

207  string

cigar_string = tags[5];

209  int

sfrom = std::stoi(tags[3])-1;

210  while

(!cigar_string.empty()) {

211

exons.push_back(

GetNextExon

(cigar_string, MD_tag, qfrom, sfrom));

212

qfrom = exons.back().m_qto+1;

213

sfrom = exons.back().m_sto+1;

215

pair<int, int> range(exons.front().m_sfrom, exons.back().m_sto);

218  int

align_score = -

int

(log2factor*

log2

(

double

(range.second-range.first+1))+0.5);

219  for

(

auto

& e : exons) {

220  int

gap_len = e.m_align_len-e.m_matches-e.m_mismatches;

221

e.m_score = e.m_matches*matchscore-e.m_mismatches*mismatchpenalty-e.m_indels*gapopen-gap_len*gapextend;

222

align_score += e.m_score;

224  if

(

next

(exons.begin()) != exons.end()) {

225

vector<string> splices;

227  for

(

auto

& spl : splices) {

229

align_score -= intron_penalty;

231

align_score -= noncanonical_penalty;

232  else if

(spl ==

"3"

|| spl ==

"4"

)

233

align_score -= gcag_penalty;

234  else if

(spl ==

"5"

|| spl ==

"6"

)

235

align_score -= atac_penalty;

244  for

(

unsigned i

= 1;

i

< ai.

m_fields

.size(); ++

i

)

250  return

name+

":i:"

+to_string(

value

);

252 string Tag

(

const string

& name,

const string

&

value

) {

253  return

name+

":Z:"

+

value

;

255 template

<

typename

T>

257  auto

itag = find_if(fields.begin()+11, fields.end(), [&](

const string

&

tag

){ return tag.substr(0,2) == name; });

258  if

(itag != fields.end())

261

fields.push_back(

Tag

(name,

value

));

263 void RemoveTag

(vector<string>& fields,

const string

& name) {

264  auto

itag = find_if(fields.begin()+11, fields.end(), [&](

const string

&

tag

){ return tag.substr(0,2) == name; });

265  if

(itag != fields.end())

274  int

flag = 1+2+strand*16+(1-strand)*32+(mate+1)*64;

275

fields[1] = to_string(flag);

283

fields[8] = to_string(span);

291

mismatches += e.m_mismatches;

293

mismatches += e.m_mismatches;

305  int

flag = stoi(fields[1])&1;

315

fields[1] = to_string(flag);

328

mismatches += e.m_mismatches;

336  bool

paired =

false

;

339  for

(

int

mate = 0; mate < 2; ++mate) {

340  for

(

int

strand = 0; strand < 2; ++strand) {

342  for

(

auto

& ai : compact_aligns) {

344  if

(ai.m_matep !=

nullptr

)

354  for

(

int

mate = 0; mate < 2; ++mate) {

356  auto

& left_aligns = contig_aligns.second[mate][strand];

357  for

(

auto

& ai : left_aligns) {

358  FormatPaired

(ai, mate, strand, mate_count[0], ++index,

true

);

359  FormatPaired

(*ai.m_matep, 1-mate, 1-strand, mate_count[0], index,

false

);

365  bool

all_aligned = mate_count[0] > 0 && mate_count[1] > 0;

368  for

(

int

mate = 0; mate < 2; ++mate) {

369  for

(

int

strand = 0; strand < 2; ++strand) {

371  for

(

auto

& ai : compact_aligns)

372  FormatSingle

(ai, mate, strand, mate_count[mate], ++mate_index[mate], all_aligned);

383  for

(

int

mate = 0; mate < 2; ++mate) {

384  for

(

int

strand = 0; strand < 2; ++strand) {

386  for

(

auto

& align : compact_aligns) {

387

mate_score[mate] =

max

(mate_score[mate], align.m_score);

388  if

(align.m_matep !=

nullptr

)

389

pair_score =

max

(pair_score, align.m_score+align.m_matep->m_score);

391

compact_aligns.remove_if([](

const AlignInfo

&

a

){

return

!

a

.m_above_thresholds; });

396  bool

paired = pair_score >=

max

(mate_score[0], mate_score[1]);

398  for

(

int

mate = 0; mate < 2; ++mate) {

399  for

(

int

strand = 0; strand < 2; ++strand) {

402

compact_aligns.remove_if([&](

const AlignInfo

&

a

){

403  if

(

a

.m_matep ==

nullptr

)

return true

;

404  if

(

a

.m_score+

a

.m_matep->m_score == pair_score)

return false

;

405  a

.m_matep->m_matep =

nullptr

;

return true

; });

407

compact_aligns.remove_if([&](

AlignInfo

&

a

){

a

.m_matep =

nullptr

;

return a

.m_score != mate_score[mate]; });

425

vector<pair<int,int>> left_introns;

426

vector<pair<int,int>> right_introns;

428  for

(

unsigned i

= 1;

i

< left.

m_exons

.size(); ++

i

) {

430

left_introns.emplace_back(left.

m_exons

[

i

-1].m_sto+1, left.

m_exons

[

i

].m_sfrom-1);

432  for

(

unsigned i

= 1;

i

< right.

m_exons

.size(); ++

i

) {

434

right_introns.emplace_back(right.

m_exons

[

i

-1].m_sto+1, right.

m_exons

[

i

].m_sfrom-1);

436  return

left_introns == right_introns;

441  for

(

int

left_mate = 0; left_mate < 2; ++left_mate) {

442  int

right_mate = 1-left_mate;

444  int

right_strand = 1;

445  auto

& left_aligns = contig_aligns.second[left_mate][left_strand];

446  auto

& right_aligns = contig_aligns.second[right_mate][right_strand];

448  auto

iright = right_aligns.begin();

449  for

(

auto

ileft = left_aligns.begin(); ileft != left_aligns.end() && iright != right_aligns.end(); ++ileft) {

450  while

(iright != right_aligns.end() && iright->m_range.second < ileft->m_range.second)

452  if

(iright == right_aligns.end())

454  auto

inext =

next

(ileft);

455  if

(inext != left_aligns.end() && inext->m_range.second < iright->m_range.second)

460

ileft->m_matep = &(*iright);

461

iright->m_matep = &(*ileft);

471  for

(

int

mate = 0; mate < 2; ++mate) {

472  for

(

int

strand = 0; strand < 2; ++strand) {

473  TAlignInfoList

& compact_aligns = contig_aligns.second[mate][strand];

474  if

(compact_aligns.empty())

477  size_t

read_len = compact_aligns.front().m_fields[9].size();

480  for

(

auto

& ai : compact_aligns) {

481  auto

& fields = ai.m_fields;

482

vector<Exon> align_exons;

483  int

align_score =

ProcessSAM

(fields, align_exons);

484

pair<int, int> align_range(align_exons.front().m_sfrom, align_exons.back().m_sto);

485  int

read_span = align_exons.back().m_qto-align_exons.front().m_qfrom+1;

486

exons.insert(exons.end(), align_exons.begin(), align_exons.end());

490  for

(

auto

& e : align_exons) {

491

matches += e.m_matches;

492

align_len += e.m_align_len;

495

ai.m_range = align_range;

496

ai.m_score = align_score;

497

ai.m_above_thresholds = above_hresholds;

498  swap

(align_exons, ai.m_exons);

504  sort

(exons.begin(), exons.end(), [](

const Exon

& e1,

const Exon

& e2){

505

if(e1.m_score == e2.m_score)

506

return e1.m_matches > e2.m_matches;

508

return e1.m_score > e2.m_score; });

511  auto

tail =

remove_if

(exons.begin(), exons.end(), [&](

const Exon

& e){

512

bool r1 = !lefts.insert(e.m_sfrom).second; bool r2 = !rights.insert(e.m_sto).second;

514

exons.erase(tail, exons.end());

522  for

(

auto

& e : exons) {

524

hit->SetQueryId(readid);

525

hit->SetSubjId(contigid);

526

hit->SetQueryStart(e.m_qfrom);

527

hit->SetQueryStop(e.m_qto);

528

hit->SetSubjStart(e.m_sfrom);

529

hit->SetSubjStop(e.m_sto);

530

hit->SetLength(e.m_align_len);

531

hit->SetMismatches(e.m_mismatches);

532

hit->SetGaps(e.m_indels);

534

hit->SetScore(e.m_score);

535

hit->SetRawScore(e.m_score);

536

hit->SetIdentity(

float

(e.m_matches)/e.m_align_len);

537

hitrefs.push_back(hit);

541  typedef

TAccessor::TCoord TCoord;

543  const

TCoord penalty_bps (TCoord(

m_penalty

*read_len + 0.5));

544  const

TCoord min_matches (TCoord(

m_min_idty

*read_len + 0.5));

547  const

TCoord min_singleton_matches (

min

(msm1, msm2));

549

TAccessor ca (penalty_bps, min_matches, min_singleton_matches,

false

);

551

ca.Run(hitrefs.begin(), hitrefs.end());

556  for

(

bool

b0 (ca.GetFirst(comp)); b0 ; b0 = ca.GetNext(comp)) {

559

pair<int, int> range(span[2], span[3]);

560

selected_ranges.

insert

(range);

564

compact_aligns.remove_if([&](

const AlignInfo

&

a

){

return

selected_ranges.count(

a

.m_range) == 0; });

572  for

(

auto

it = compact_aligns.begin(); it != compact_aligns.end(); ++it) {

573  for

(

auto

inext =

next

(it); inext != compact_aligns.end() && inext->m_range == it->m_range; )

574

inext = compact_aligns.erase(inext);

586

argdescr->SetUsageContext(

GetArguments

().GetProgramBasename(),

587  "compact_sam expects SAM alignments at stdin collated by query, e.g. with 'sort -k 1,1'"

);

589

argdescr->AddDefaultKey(

"min_output_identity"

,

"min_output_identity"

,

"Minimal identity for output alignments"

,

592

argdescr->AddDefaultKey(

"min_output_coverage"

,

"min_output_coverage"

,

"Minimal coverage for output alignments"

,

595

argdescr->AddDefaultKey(

"penalty"

,

"penalty"

,

"Per-compartment penalty"

,

598

argdescr->AddDefaultKey (

"max_intron"

,

"max_intron"

,

599  "Maximum intron length (in base pairs)"

,

603

argdescr->AddDefaultKey(

"min_query_len"

,

"min_query_len"

,

604  "Minimum length for individual cDNA sequences."

,

607

argdescr->AddFlag(

"nocheck"

,

"Don't check if reads are collated (saves memory)"

);

610

argdescr->SetConstraint(

"penalty"

, constrain01);

611

argdescr->SetConstraint(

"min_output_identity"

, constrain01);

612

argdescr->SetConstraint(

"min_output_coverage"

, constrain01);

615

argdescr->SetConstraint(

"min_query_len"

, constrain_minqlen);

626  m_penalty

= args[

"penalty"

].AsDouble();

632  bool check

= !args[

"nocheck"

];

637  while

(getline(cin, line)) {

640  if

(line[0] ==

'@'

) {

641

cout << line << endl;

644

vector<string> fields;

646  string

read = fields[0];

647  if

(read != read_old) {

648  if

(

check

&& !previous_reads.

insert

(read_old).second) {

649

cerr <<

"Input collated by reads is expected"

<< endl;

664  string

contig = fields[2];

665  int

flag = std::stoi(fields[1]);

666  int

strand = (flag&16) ? 1 : 0;

667  int

mate = (flag&128) ? 1 : 0;

670  swap

(ais.back().m_fields, fields);

674  if

(

check

&& !previous_reads.

insert

(read_old).second) {

675

cerr <<

"Input collated by reads is expected"

<< endl;

687 int main

(

int

argc,

const char

* argv[])

void remove_if(Container &c, Predicate *__pred)

void SelectBestLocations()

bool CompatiblePair(const AlignInfo &left, const AlignInfo &right)

vector< THitRef > THitRefs

double m_min_singleton_idty

virtual int Run()

Run the application.

int m_min_singleton_idty_bps

virtual void Init()

Initialize the application.

list< AlignInfo > TAlignInfoList

map< string, TMatrix< TAlignInfoList > > m_compact_aligns

list< TSamFields > TSplitAligns

vector< string > TSamFields

iterator_bool insert(const value_type &val)

void FormatSingle(CCompactSAMApplication::AlignInfo &ai, int mate, int strand, int count, int index, bool all_aligned)

void FormatPaired(CCompactSAMApplication::AlignInfo &ai, int mate, int strand, int count, int index, bool left)

string Tag(const string &name, int value)

int ProcessSAM(const vector< string > &tags, vector< CCompactSAMApplication::Exon > &exons)

void Print(const CCompactSAMApplication::AlignInfo &ai)

void ReplaceTag(vector< string > &fields, const string &name, const T &value)

CCompactSAMApplication::Exon GetNextExon(string &cigar_string, string &MD_tag, int qfrom, int sfrom)

int main(int argc, const char *argv[])

void RemoveTag(vector< string > &fields, const string &name)

static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

static void s_GetSpan(const THitRefs &hitrefs, TCoord span[4])

Get sequence span for a set of alignments (hits).

unsigned int TSeqPos

Type for sequence locations and lengths.

virtual const CArgs & GetArgs(void) const

Get parsed command line arguments.

int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)

Main function (entry point) for the NCBI application.

virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)

Setup the command line argument descriptions.

const CNcbiArguments & GetArguments(void) const

Get the application's cached unprocessed command-line arguments.

void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)

@ eDouble

Convertible into a floating point number (double)

@ eInteger

Convertible into an integer number (int or Int8)

EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)

Set the threshold severity for posting the messages.

@ eDS_ToStderr

To standard error stream.

@ eDiag_Info

Informational message.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

unsigned int

A callback function used to compare two keys in a database.

constexpr auto sort(_Init &&init)

const struct ncbi::grid::netcache::search::fields::SIZE size

const GenericPointer< typename T::ValueType > T2 value

Defines the CNcbiApplication and CAppException classes for creating NCBI applications.

Defines command line argument related classes.

Defines unified interface to application:


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4