+29
-16
lines changedFilter options
+29
-16
lines changed Original file line number Diff line number Diff line change
@@ -239,40 +239,53 @@ Unitig::computeErrorProfile(const char *UNUSED(prefix), const char *UNUSED(label
239
239
}
240
240
241
241
// Warn if too few or too many overlaps.
242
+
//
243
+
// The stdDev<> cannot handle more than 4 billion entries. If there are
244
+
// more than that, just compute the average and make a single interval
245
+
// spanning the tig.
246
+
//
247
+
// This happened once; see https://github.com/marbl/canu/issues/1355
248
+
// WARNING: tig 19049 length 9064 nReads 393906 has 4873434470 overlaps.
249
+
250
+
double fakeV = 0.0;
242
251
243
252
if (olapsLen == 0) {
244
253
writeLog("WARNING: tig %u length %u nReads %u has no overlaps.\n", id(), getLength(), ufpath.size());
254
+
245
255
for (uint32 fi=0; fi<ufpath.size(); fi++)
246
256
writeLog("WARNING: read %7u %7u-%-7u\n",
247
257
ufpath[fi].ident,
248
258
ufpath[fi].position.bgn,
249
259
ufpath[fi].position.end);
250
260
}
251
261
252
-
if (olapsLen > (uint64)4 * 1024 * 1024 * 1024) {
253
-
writeLog("WARNING: tig %u length %u nReads %u has " F_U64 " overlaps.\n", id(), getLength(), ufpath.size(), olapsLen);
254
-
for (uint32 fi=0; fi<ufpath.size(); fi++)
255
-
writeLog("WARNING: read %7u %7u-%-7u\n",
256
-
ufpath[fi].ident,
257
-
ufpath[fi].position.bgn,
258
-
ufpath[fi].position.end);
262
+
if (olapsLen > (uint64)UINT32_MAX) {
263
+
for (uint64 oo=0; oo<olapsLen; oo++)
264
+
fakeV += olaps[oo].erate;
265
+
266
+
fakeV /= olapsLen;
267
+
268
+
writeLog("WARNING: tig %u length %u nReads %u has " F_U64 " overlaps; setting to average %f.\n",
269
+
id(), getLength(), ufpath.size(), olapsLen, fakeV);
270
+
271
+
olapsLen = 0;
259
272
}
260
273
261
274
// Sort.
262
275
263
276
std::sort(olaps, olaps + olapsLen);
264
277
265
-
// Convert coordinates into intervals. Conceptually, squish out the duplicate numbers, then
266
-
// create an interval for every adjacent pair. We need to add intervals for the first and last
267
-
// region. And one more, for convenience, to hold the final 'close' values on intervals that
268
-
// extend to the end of the unitig.
269
-
270
-
if (olapsLen == 0) // No olaps, so add an interval
271
-
errorProfile.push_back(epValue(0, getLength())); // covering the whole tig
278
+
if (olapsLen == 0) // No olaps, so add an interval
279
+
errorProfile.push_back(epValue(fakeV, getLength())); // covering the whole tig
272
280
273
-
if ((olapsLen > 0) && (olaps[0].pos != 0)) // Olaps, but missing the first
274
-
errorProfile.push_back(epValue(0, olaps[0].pos)); // interval, so add it.
281
+
if ((olapsLen > 0) && (olaps[0].pos != 0)) // Olaps, but missing the first
282
+
errorProfile.push_back(epValue(0, olaps[0].pos)); // interval, so add it.
275
283
284
+
// Convert coordinates into intervals. Conceptually, squish out the
285
+
// duplicate numbers, then create an interval for every adjacent pair. We
286
+
// need to add intervals for the first and last region. And one more, for
287
+
// convenience, to hold the final 'close' values on intervals that extend
288
+
// to the end of the unitig.
276
289
277
290
stdDev<float> curDev;
278
291
You can’t perform that action at this time.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4