A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/marbl/canu/commit/69e22c99300bf76766c39abf763a739d8def8133 below:

Don't estimate error profiles for tigs with billions of overlaps. Is… · marbl/canu@69e22c9 · GitHub

File tree Expand file treeCollapse file tree 1 file changed

+29

-16

lines changed

Filter options

Expand file treeCollapse file tree 1 file changed

+29

-16

lines changed Original file line number Diff line number Diff line change

@@ -239,40 +239,53 @@ Unitig::computeErrorProfile(const char *UNUSED(prefix), const char *UNUSED(label

239 239

}

240 240 241 241

// Warn if too few or too many overlaps.

242 +

//

243 +

// The stdDev<> cannot handle more than 4 billion entries. If there are

244 +

// more than that, just compute the average and make a single interval

245 +

// spanning the tig.

246 +

//

247 +

// This happened once; see https://github.com/marbl/canu/issues/1355

248 +

// WARNING: tig 19049 length 9064 nReads 393906 has 4873434470 overlaps.

249 + 250 +

double fakeV = 0.0;

242 251 243 252

if (olapsLen == 0) {

244 253

writeLog("WARNING: tig %u length %u nReads %u has no overlaps.\n", id(), getLength(), ufpath.size());

254 + 245 255

for (uint32 fi=0; fi<ufpath.size(); fi++)

246 256

writeLog("WARNING: read %7u %7u-%-7u\n",

247 257

ufpath[fi].ident,

248 258

ufpath[fi].position.bgn,

249 259

ufpath[fi].position.end);

250 260

}

251 261 252 -

if (olapsLen > (uint64)4 * 1024 * 1024 * 1024) {

253 -

writeLog("WARNING: tig %u length %u nReads %u has " F_U64 " overlaps.\n", id(), getLength(), ufpath.size(), olapsLen);

254 -

for (uint32 fi=0; fi<ufpath.size(); fi++)

255 -

writeLog("WARNING: read %7u %7u-%-7u\n",

256 -

ufpath[fi].ident,

257 -

ufpath[fi].position.bgn,

258 -

ufpath[fi].position.end);

262 +

if (olapsLen > (uint64)UINT32_MAX) {

263 +

for (uint64 oo=0; oo<olapsLen; oo++)

264 +

fakeV += olaps[oo].erate;

265 + 266 +

fakeV /= olapsLen;

267 + 268 +

writeLog("WARNING: tig %u length %u nReads %u has " F_U64 " overlaps; setting to average %f.\n",

269 +

id(), getLength(), ufpath.size(), olapsLen, fakeV);

270 + 271 +

olapsLen = 0;

259 272

}

260 273 261 274

// Sort.

262 275 263 276

std::sort(olaps, olaps + olapsLen);

264 277 265 -

// Convert coordinates into intervals. Conceptually, squish out the duplicate numbers, then

266 -

// create an interval for every adjacent pair. We need to add intervals for the first and last

267 -

// region. And one more, for convenience, to hold the final 'close' values on intervals that

268 -

// extend to the end of the unitig.

269 - 270 -

if (olapsLen == 0) // No olaps, so add an interval

271 -

errorProfile.push_back(epValue(0, getLength())); // covering the whole tig

278 +

if (olapsLen == 0) // No olaps, so add an interval

279 +

errorProfile.push_back(epValue(fakeV, getLength())); // covering the whole tig

272 280 273 -

if ((olapsLen > 0) && (olaps[0].pos != 0)) // Olaps, but missing the first

274 -

errorProfile.push_back(epValue(0, olaps[0].pos)); // interval, so add it.

281 +

if ((olapsLen > 0) && (olaps[0].pos != 0)) // Olaps, but missing the first

282 +

errorProfile.push_back(epValue(0, olaps[0].pos)); // interval, so add it.

275 283 284 +

// Convert coordinates into intervals. Conceptually, squish out the

285 +

// duplicate numbers, then create an interval for every adjacent pair. We

286 +

// need to add intervals for the first and last region. And one more, for

287 +

// convenience, to hold the final 'close' values on intervals that extend

288 +

// to the end of the unitig.

276 289 277 290

stdDev<float> curDev;

278 291

You can’t perform that action at this time.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4