@@ -142,6 +142,76 @@ printLog(char const *format, ...) {
142
142
143
143
144
144
145
+
uint32
146
+
stripLeadingGaps(char *tAln,
147
+
char *rAln, int32 alignLen) {
148
+
int32 fBase = 0;
149
+
bool skipping = true;
150
+
151
+
// Skip initial gaps.
152
+
while ((fBase < alignLen) && (tAln[fBase] == '-'))
153
+
fBase++;
154
+
155
+
// Count the number of ACGT in a row, then the number of gaps we find.
156
+
// If there are more ACGT than gaps, stop skipping.
157
+
while (skipping == true) {
158
+
int32 nBase = 0;
159
+
int32 nGap = 0;
160
+
161
+
while ((fBase+nBase < alignLen) && (tAln[fBase+nBase] != '-'))
162
+
nBase++;
163
+
while ((fBase+nBase+nGap < alignLen) && (tAln[fBase+nBase+nGap] == '-'))
164
+
nGap++;
165
+
166
+
if ((nBase < 16) || (nBase < nGap))
167
+
fBase += nBase + nGap;
168
+
else
169
+
skipping = false;
170
+
171
+
if (fBase >= alignLen)
172
+
skipping = false;
173
+
}
174
+
175
+
return(fBase);
176
+
}
177
+
178
+
179
+
180
+
uint32
181
+
stripTrailingGaps(char *tAln,
182
+
char *rAln, int32 alignLen) {
183
+
int32 lBase = alignLen - 1;
184
+
bool skipping = true;
185
+
186
+
// Skip initial gaps.
187
+
while ((lBase >= 0) && (tAln[lBase] == '-'))
188
+
lBase--;
189
+
190
+
// Count the number of ACGT in a row, then the number of gaps we find.
191
+
// If there are more ACGT than gaps, stop skipping.
192
+
while (skipping == true) {
193
+
int32 nBase = 0;
194
+
int32 nGap = 0;
195
+
196
+
while ((lBase-nBase >= 0) && (tAln[lBase-nBase] != '-'))
197
+
nBase++;
198
+
while ((lBase-nBase-nGap >= 0) && (tAln[lBase-nBase-nGap] == '-'))
199
+
nGap++;
200
+
201
+
if ((nBase < 16) || (nBase < nGap))
202
+
lBase -= nBase + nGap;
203
+
else
204
+
skipping = false;
205
+
206
+
if (lBase < 0)
207
+
skipping = false;
208
+
}
209
+
210
+
return(alignLen - 1 - lBase);
211
+
}
212
+
213
+
214
+
145
215
static
146
216
alignTagList *
147
217
getAlignTags(char *Qalign, int32 Qbgn, int32 Qlen, // read
@@ -318,36 +388,62 @@ alignReadsToTemplate(falconInput *evidence,
318
388
evidence[0].read, evidence[j].read,
319
389
tAln, rAln);
320
390
321
-
// Strip leading/trailing gaps on template sequence.
391
+
// Strip leading/trailing gaps on template sequence. These are the
392
+
// number of alignment letters to ignore on either end.
322
393
323
-
uint32 fBase = 0; // First non-gap in the alignment
324
-
uint32 lBase = align.alignmentLength; // Last base in the alignment (actually, first gap in the gaps at the end, but that was too long for a variable name)
394
+
int32 bSkip = stripLeadingGaps (tAln, rAln, align.alignmentLength); // Alignment letters to skip
395
+
int32 eSkip = stripTrailingGaps(tAln, rAln, align.alignmentLength); // because they're junk gaps.
325
396
326
-
while ((fBase < align.alignmentLength) && (tAln[fBase] == '-'))
327
-
fBase++;
397
+
// We need to know the position in the template that the (trimmed)
398
+
// alignment starts at, so we count how many non-gap letters are skipped
399
+
// in either sequence at either end.
328
400
329
-
while ((lBase > fBase) && (tAln[lBase-1] == '-'))
330
-
lBase--;
401
+
int32 tbSkip = 0, teSkip = 0; // Sequence bases to skip in (t)emplate or evidence (r)ead,
402
+
int32 rbSkip = 0, reSkip = 0; // at the (b)egin or (e)nd of the sequence.
331
403
332
-
rBgn += fBase;
333
-
rEnd -= align.alignmentLength - lBase;
404
+
for (int32 ii=0; ii<bSkip; ii++) {
405
+
if (tAln[ii] != '-') tbSkip++;
406
+
if (rAln[ii] != '-') rbSkip++;
407
+
}
334
408
335
-
assert(rBgn >= 0); assert(rEnd <= evidence[j].readLength);
336
-
assert(tBgn >= 0); assert(tEnd <= evidence[0].readLength);
409
+
for (int32 ii=0; ii<eSkip; ii++) {
410
+
if (tAln[align.alignmentLength - ii - 1] != '-') teSkip++;
411
+
if (rAln[align.alignmentLength - ii - 1] != '-') reSkip++;
412
+
}
337
413
338
-
rAln[lBase] = 0; // Truncate the alignments before the gaps.
339
-
tAln[lBase] = 0;
414
+
#if 1
415
+
printLog(evidence, j, "ALIGNED template %7d-%-7d evidence %7d-%-7d %6.3f%% skip %d %d endgaps %d %d\n",
416
+
tBgn + tbSkip, tEnd - teSkip,
417
+
rBgn + rbSkip, rEnd - reSkip,
418
+
bSkip, eSkip,
419
+
rbSkip, reSkip);
420
+
#endif
340
421
341
-
// Convert the alignment into tags.
422
+
#if 0
423
+
printLog(evidence, j, " full template %100.100s...\n", tAln);
424
+
printLog(evidence, j, " full evidence %100.100s...\n", rAln);
342
425
343
-
printLog(evidence, j, "ALIGNED template %7d-%-7d evidence %7d-%-7d %6.3f%% skip %d %d\n",
344
-
tBgn, tEnd, // The skip/endgap reported isn't really correct.
345
-
rBgn, rEnd, // See the next commit.
346
-
fBase, lBase);
426
+
printLog(evidence, j, " full template ...%100.100s\n", tAln + align.alignmentLength - 100);
427
+
printLog(evidence, j, " full evidence ...%100.100s\n", rAln + align.alignmentLength - 100);
428
+
#endif
429
+
430
+
tAln[align.alignmentLength - eSkip] = 0; // Truncate the alignment strings for printing.
431
+
rAln[align.alignmentLength - eSkip] = 0;
432
+
433
+
#if 0
434
+
printLog(evidence, j, " template %100.100s...\n", tAln + bSkip);
435
+
printLog(evidence, j, " evidence %100.100s...\n", rAln + bSkip);
436
+
437
+
printLog(evidence, j, " template ...%100.100s\n", tAln + align.alignmentLength - eSkip - 100);
438
+
printLog(evidence, j, " evidence ...%100.100s\n", rAln + align.alignmentLength - eSkip - 100);
439
+
#endif
440
+
441
+
// Convert the alignment into tags.
347
442
348
-
tagList[j] = getAlignTags(rAln + fBase, rBgn, evidence[j].readLength,
349
-
tAln + fBase, tBgn, evidence[0].readLength,
350
-
lBase - fBase);
443
+
if (align.alignmentLength > bSkip + eSkip + 500)
444
+
tagList[j] = getAlignTags(rAln + bSkip, rBgn + rbSkip, evidence[j].readLength,
445
+
tAln + bSkip, tBgn + tbSkip, evidence[0].readLength,
446
+
align.alignmentLength - bSkip - eSkip);
351
447
352
448
delete [] tAln;
353
449
delete [] rAln;
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4