A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/marbl/canu/commit/ea2b03d63352dba819713ad716f4236a9ad93113 below:

Trim back noisy alignments to the real alignment. · marbl/canu@ea2b03d · GitHub

@@ -142,6 +142,76 @@ printLog(char const *format, ...) {

142 142 143 143 144 144 145 +

uint32

146 +

stripLeadingGaps(char *tAln,

147 +

char *rAln, int32 alignLen) {

148 +

int32 fBase = 0;

149 +

bool skipping = true;

150 + 151 +

// Skip initial gaps.

152 +

while ((fBase < alignLen) && (tAln[fBase] == '-'))

153 +

fBase++;

154 + 155 +

// Count the number of ACGT in a row, then the number of gaps we find.

156 +

// If there are more ACGT than gaps, stop skipping.

157 +

while (skipping == true) {

158 +

int32 nBase = 0;

159 +

int32 nGap = 0;

160 + 161 +

while ((fBase+nBase < alignLen) && (tAln[fBase+nBase] != '-'))

162 +

nBase++;

163 +

while ((fBase+nBase+nGap < alignLen) && (tAln[fBase+nBase+nGap] == '-'))

164 +

nGap++;

165 + 166 +

if ((nBase < 16) || (nBase < nGap))

167 +

fBase += nBase + nGap;

168 +

else

169 +

skipping = false;

170 + 171 +

if (fBase >= alignLen)

172 +

skipping = false;

173 +

}

174 + 175 +

return(fBase);

176 +

}

177 + 178 + 179 + 180 +

uint32

181 +

stripTrailingGaps(char *tAln,

182 +

char *rAln, int32 alignLen) {

183 +

int32 lBase = alignLen - 1;

184 +

bool skipping = true;

185 + 186 +

// Skip initial gaps.

187 +

while ((lBase >= 0) && (tAln[lBase] == '-'))

188 +

lBase--;

189 + 190 +

// Count the number of ACGT in a row, then the number of gaps we find.

191 +

// If there are more ACGT than gaps, stop skipping.

192 +

while (skipping == true) {

193 +

int32 nBase = 0;

194 +

int32 nGap = 0;

195 + 196 +

while ((lBase-nBase >= 0) && (tAln[lBase-nBase] != '-'))

197 +

nBase++;

198 +

while ((lBase-nBase-nGap >= 0) && (tAln[lBase-nBase-nGap] == '-'))

199 +

nGap++;

200 + 201 +

if ((nBase < 16) || (nBase < nGap))

202 +

lBase -= nBase + nGap;

203 +

else

204 +

skipping = false;

205 + 206 +

if (lBase < 0)

207 +

skipping = false;

208 +

}

209 + 210 +

return(alignLen - 1 - lBase);

211 +

}

212 + 213 + 214 + 145 215

static

146 216

alignTagList *

147 217

getAlignTags(char *Qalign, int32 Qbgn, int32 Qlen, // read

@@ -318,36 +388,62 @@ alignReadsToTemplate(falconInput *evidence,

318 388

evidence[0].read, evidence[j].read,

319 389

tAln, rAln);

320 390 321 -

// Strip leading/trailing gaps on template sequence.

391 +

// Strip leading/trailing gaps on template sequence. These are the

392 +

// number of alignment letters to ignore on either end.

322 393 323 -

uint32 fBase = 0; // First non-gap in the alignment

324 -

uint32 lBase = align.alignmentLength; // Last base in the alignment (actually, first gap in the gaps at the end, but that was too long for a variable name)

394 +

int32 bSkip = stripLeadingGaps (tAln, rAln, align.alignmentLength); // Alignment letters to skip

395 +

int32 eSkip = stripTrailingGaps(tAln, rAln, align.alignmentLength); // because they're junk gaps.

325 396 326 -

while ((fBase < align.alignmentLength) && (tAln[fBase] == '-'))

327 -

fBase++;

397 +

// We need to know the position in the template that the (trimmed)

398 +

// alignment starts at, so we count how many non-gap letters are skipped

399 +

// in either sequence at either end.

328 400 329 -

while ((lBase > fBase) && (tAln[lBase-1] == '-'))

330 -

lBase--;

401 +

int32 tbSkip = 0, teSkip = 0; // Sequence bases to skip in (t)emplate or evidence (r)ead,

402 +

int32 rbSkip = 0, reSkip = 0; // at the (b)egin or (e)nd of the sequence.

331 403 332 -

rBgn += fBase;

333 -

rEnd -= align.alignmentLength - lBase;

404 +

for (int32 ii=0; ii<bSkip; ii++) {

405 +

if (tAln[ii] != '-') tbSkip++;

406 +

if (rAln[ii] != '-') rbSkip++;

407 +

}

334 408 335 -

assert(rBgn >= 0); assert(rEnd <= evidence[j].readLength);

336 -

assert(tBgn >= 0); assert(tEnd <= evidence[0].readLength);

409 +

for (int32 ii=0; ii<eSkip; ii++) {

410 +

if (tAln[align.alignmentLength - ii - 1] != '-') teSkip++;

411 +

if (rAln[align.alignmentLength - ii - 1] != '-') reSkip++;

412 +

}

337 413 338 -

rAln[lBase] = 0; // Truncate the alignments before the gaps.

339 -

tAln[lBase] = 0;

414 +

#if 1

415 +

printLog(evidence, j, "ALIGNED template %7d-%-7d evidence %7d-%-7d %6.3f%% skip %d %d endgaps %d %d\n",

416 +

tBgn + tbSkip, tEnd - teSkip,

417 +

rBgn + rbSkip, rEnd - reSkip,

418 +

bSkip, eSkip,

419 +

rbSkip, reSkip);

420 +

#endif

340 421 341 -

// Convert the alignment into tags.

422 +

#if 0

423 +

printLog(evidence, j, " full template %100.100s...\n", tAln);

424 +

printLog(evidence, j, " full evidence %100.100s...\n", rAln);

342 425 343 -

printLog(evidence, j, "ALIGNED template %7d-%-7d evidence %7d-%-7d %6.3f%% skip %d %d\n",

344 -

tBgn, tEnd, // The skip/endgap reported isn't really correct.

345 -

rBgn, rEnd, // See the next commit.

346 -

fBase, lBase);

426 +

printLog(evidence, j, " full template ...%100.100s\n", tAln + align.alignmentLength - 100);

427 +

printLog(evidence, j, " full evidence ...%100.100s\n", rAln + align.alignmentLength - 100);

428 +

#endif

429 + 430 +

tAln[align.alignmentLength - eSkip] = 0; // Truncate the alignment strings for printing.

431 +

rAln[align.alignmentLength - eSkip] = 0;

432 + 433 +

#if 0

434 +

printLog(evidence, j, " template %100.100s...\n", tAln + bSkip);

435 +

printLog(evidence, j, " evidence %100.100s...\n", rAln + bSkip);

436 + 437 +

printLog(evidence, j, " template ...%100.100s\n", tAln + align.alignmentLength - eSkip - 100);

438 +

printLog(evidence, j, " evidence ...%100.100s\n", rAln + align.alignmentLength - eSkip - 100);

439 +

#endif

440 + 441 +

// Convert the alignment into tags.

347 442 348 -

tagList[j] = getAlignTags(rAln + fBase, rBgn, evidence[j].readLength,

349 -

tAln + fBase, tBgn, evidence[0].readLength,

350 -

lBase - fBase);

443 +

if (align.alignmentLength > bSkip + eSkip + 500)

444 +

tagList[j] = getAlignTags(rAln + bSkip, rBgn + rbSkip, evidence[j].readLength,

445 +

tAln + bSkip, tBgn + tbSkip, evidence[0].readLength,

446 +

align.alignmentLength - bSkip - eSkip);

351 447 352 448

delete [] tAln;

353 449

delete [] rAln;


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4