A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/marbl/canu/commit/eeef601e046e7551c83f7459fecfe8884936929d below:

Use a fixed-size buffer (512 Mbp) for streaming reads; this makes con… · marbl/canu@eeef601 · GitHub

@@ -51,23 +51,20 @@ Output_Corrections(feParameters *G);

51 51 52 52 53 53 54 -

// From overlapInCore.C

55 -

int

56 -

Binomial_Bound(int e, double p, int Start, double Limit);

57 - 58 - 59 - 60 54

// Read fragments lo_frag..hi_frag (INCLUSIVE) from store and save the ids and sequences of those

61 55

// with overlaps to fragments in global Frag .

62 56 63 57

static

64 58

void

65 -

Extract_Needed_Frags(feParameters *G,

66 -

gkStore *gkpStore,

67 -

uint32 loID,

68 -

uint32 hiID,

69 -

Frag_List_t *fl,

70 -

uint64 &nextOlap) {

59 +

extractReads(feParameters *G,

60 +

gkStore *gkpStore,

61 +

Frag_List_t *fl,

62 +

uint64 &nextOlap) {

63 + 64 +

// Clear the buffer.

65 + 66 +

fl->readsLen = 0;

67 +

fl->basesLen = 0;

71 68 72 69

// The original converted to lowercase, and made non-acgt be 'a'.

73 70

@@ -81,110 +78,103 @@ Extract_Needed_Frags(feParameters *G,

81 78

filter['G'] = filter['g'] = 'g';

82 79

filter['T'] = filter['t'] = 't';

83 80 84 -

// Count the amount of stuff we're loading.

81 +

// Return if we've exhausted the overlaps.

85 82 86 -

fl->readsLen = 0;

87 -

fl->basesLen = 0;

83 +

if (nextOlap >= G->olapsLen)

84 +

return;

85 + 86 +

// Count the amount of stuff we're loading.

88 87 89 88

uint64 lastOlap = nextOlap;

90 -

uint32 ii = 0; // Index into reads arrays

91 -

uint32 fi = G->olaps[lastOlap].b_iid; // Actual ID we're extracting

89 +

uint32 loID = G->olaps[lastOlap].b_iid; // Actual ID we're extracting

90 +

uint32 hiID = loID;

91 +

uint64 maxBases = 512 * 1024 * 1024;

92 92 93 -

assert(loID <= fi);

93 +

// Find the highest read ID that we can load without exceeding maxBases.

94 94 95 -

fprintf(stderr, "\n");

96 -

fprintf(stderr, "Extract_Needed_Frags()-- Loading used reads between " F_U32 " and " F_U32 ", at overlap " F_U64 ".\n", fi, hiID, lastOlap);

95 +

while ((fl->basesLen < maxBases) &&

96 +

(lastOlap < G->olapsLen)) {

97 +

hiID = G->olaps[lastOlap].b_iid; // Grab the ID of the overlap we're at.

97 98 98 -

while (fi <= hiID) {

99 -

gkRead *read = gkpStore->gkStore_getRead(fi);

99 +

gkRead *read = gkpStore->gkStore_getRead(hiID); // Grab that read.

100 100 101 -

fl->readsLen += 1;

101 +

fl->readsLen += 1; // Add the read to our set.

102 102

fl->basesLen += read->gkRead_sequenceLength() + 1;

103 103 104 -

// Advance to the next overlap

105 - 106 -

lastOlap++;

107 -

while ((lastOlap < G->olapsLen) && (G->olaps[lastOlap].b_iid == fi))

108 -

lastOlap++;

109 -

fi = (lastOlap < G->olapsLen) ? G->olaps[lastOlap].b_iid : hiID + 1;

104 +

lastOlap++; // Advance to the next overlap

105 +

while ((lastOlap < G->olapsLen) && //

106 +

(G->olaps[lastOlap].b_iid == hiID)) // If we've exceeded the max size or hit the last overlap,

107 +

lastOlap++; // the loop will stop on the next iteration.

110 108

}

111 109 112 -

fprintf(stderr, "Extract_Needed_Frags()-- Loading reads for overlaps " F_U64 " to " F_U64 " (reads " F_U32 " bases " F_U64 ")\n", nextOlap, lastOlap, fl->readsLen, fl->basesLen);

110 +

// If nothing to load, just return.

111 + 112 +

if (fl->readsLen == 0)

113 +

return;

114 + 115 +

// Report what we're going to do.

116 + 117 +

fprintf(stderr, "extractReads()-- Loading reads " F_U32 " to " F_U32 " (" F_U32 " reads with " F_U64 " bases) overlaps " F_U64 " through " F_U64 ".\n",

118 +

loID, hiID, fl->readsLen, fl->basesLen, nextOlap, lastOlap);

113 119 114 120

// Ensure there is space.

115 121 116 122

if (fl->readsMax < fl->readsLen) {

117 123

delete [] fl->readIDs;

118 124

delete [] fl->readBases;

119 125 120 -

//fprintf(stderr, "Extract_Needed_Frags()-- realloc reads from " F_U32 " to " F_U32 "\n", fl->readsMax, 12 * fl->readsLen / 10);

121 - 122 -

fl->readIDs = new uint32 [12 * fl->readsLen / 10];

123 -

fl->readBases = new char * [12 * fl->readsLen / 10];

124 - 125 126

fl->readsMax = 12 * fl->readsLen / 10;

127 +

fl->readIDs = new uint32 [fl->readsMax];

128 +

fl->readBases = new char * [fl->readsMax];

126 129

}

127 130 128 131

if (fl->basesMax < fl->basesLen) {

129 132

delete [] fl->bases;

130 133 131 -

//fprintf(stderr, "Extract_Needed_Frags()-- realloc bases from " F_U64 " to " F_U64 "\n", fl->basesMax, 12 * fl->basesLen / 10);

132 - 133 -

fl->bases = new char [12 * fl->basesLen / 10];

134 - 135 134

fl->basesMax = 12 * fl->basesLen / 10;

135 +

fl->bases = new char [fl->basesMax];

136 136

}

137 137 138 -

// Load. This is complicated by loading only the reads that have overlaps we care about.

139 - 140 -

fl->readsLen = 0;

141 -

fl->basesLen = 0;

138 +

// Load the sequence data for reads loID to hiID, as long as the read has an overlap.

142 139 143 140

gkReadData *readData = new gkReadData;

144 141 145 -

ii = 0;

146 -

fi = G->olaps[nextOlap].b_iid;

147 - 148 -

assert(loID <= fi);

142 +

fl->readsLen = 0;

143 +

fl->basesLen = 0;

149 144 150 -

while (fi <= hiID) {

151 -

gkRead *read = gkpStore->gkStore_getRead(fi);

145 +

while ((loID <= hiID) &&

146 +

(nextOlap < G->olapsLen)) {

147 +

gkRead *read = gkpStore->gkStore_getRead(loID);

152 148 153 -

fl->readIDs[ii] = fi;

154 -

fl->readBases[ii] = fl->bases + fl->basesLen;

155 -

fl->basesLen += read->gkRead_sequenceLength() + 1;

149 +

fl->readIDs[fl->readsLen] = loID; // Save the ID of _this_ read.

150 +

fl->readBases[fl->readsLen] = fl->bases + fl->basesLen; // Set the data pointer to where this read should start.

156 151 157 152

gkpStore->gkStore_loadReadData(read, readData);

158 153 159 154

uint32 readLen = read->gkRead_sequenceLength();

160 155

char *readBases = readData->gkReadData_getSequence();

161 156 162 157

for (uint32 bb=0; bb<readLen; bb++)

163 -

fl->readBases[ii][bb] = filter[readBases[bb]];

158 +

fl->readBases[fl->readsLen][bb] = filter[readBases[bb]];

164 159 165 -

fl->readBases[ii][readLen] = 0; // All good reads end.

160 +

fl->readBases[fl->readsLen][readLen] = 0; // All good reads end.

166 161 167 -

ii++;

162 +

fl->basesLen += read->gkRead_sequenceLength() + 1; // Update basesLen to account for this read.

163 +

fl->readsLen += 1; // And note that we loaded a read.

168 164 169 -

// Advance to the next overlap.

170 - 171 -

nextOlap++;

172 -

while ((nextOlap < G->olapsLen) && (G->olaps[nextOlap].b_iid == fi))

165 +

nextOlap++; // Advance past all the overlaps for this read.

166 +

while ((nextOlap < G->olapsLen) &&

167 +

(G->olaps[nextOlap].b_iid == loID))

173 168

nextOlap++;

174 -

fi = (nextOlap < G->olapsLen) ? G->olaps[nextOlap].b_iid : hiID + 1;

169 + 170 +

if (nextOlap < G->olapsLen) // If we have valid overlap, grab the read ID.

171 +

loID = G->olaps[nextOlap].b_iid; // If we don't have a valid overlap, the loop will stop.

175 172

}

176 173 177 174

delete readData;

178 175 179 -

fl->readsLen = ii;

180 176 181 -

if (fl->readsLen > 0)

182 -

fprintf(stderr, "Extract_Needed_Frags()-- Loaded " F_U32 " reads (%.4f%%). Loaded IDs " F_U32 " through " F_U32 ".\n",

183 -

fl->readsLen, 100.0 * fl->readsLen / (hiID - 1 - loID),

184 -

fl->readIDs[0], fl->readIDs[fl->readsLen-1]);

185 -

else

186 -

fprintf(stderr, "Extract_Needed_Frags()-- Loaded " F_U32 " reads (%.4f%%).\n",

187 -

fl->readsLen, 100.0 * fl->readsLen / (hiID - 1 - loID));

177 +

fprintf(stderr, "extractReads()-- Loaded.\n");

188 178

}

189 179 190 180

@@ -194,7 +184,7 @@ Extract_Needed_Frags(feParameters *G,

194 184

// frag_iid % Num_PThreads == thread_id

195 185 196 186

void *

197 -

Threaded_Process_Stream(void *ptr) {

187 +

processThread(void *ptr) {

198 188

Thread_Work_Area_t *wa = (Thread_Work_Area_t *)ptr;

199 189 200 190

for (int32 i=0; i<wa->frag_list->readsLen; i++) {

@@ -247,10 +237,10 @@ Threaded_Process_Stream(void *ptr) {

247 237 248 238

static

249 239

void

250 -

Threaded_Stream_Old_Frags(feParameters *G,

251 -

gkStore *gkpStore,

252 -

uint64 &passedOlaps,

253 -

uint64 &failedOlaps) {

240 +

processReads(feParameters *G,

241 +

gkStore *gkpStore,

242 +

uint64 &passedOlaps,

243 +

uint64 &failedOlaps) {

254 244 255 245

pthread_attr_t attr;

256 246

@@ -262,8 +252,6 @@ Threaded_Stream_Old_Frags(feParameters *G,

262 252 263 253

for (uint32 i=0; i<G->numThreads; i++) {

264 254

thread_wa[i].thread_id = i;

265 -

thread_wa[i].loID = 0;

266 -

thread_wa[i].hiID = 0;

267 255

thread_wa[i].nextOlap = 0;

268 256

thread_wa[i].G = G;

269 257

thread_wa[i].frag_list = NULL;

@@ -278,14 +266,6 @@ Threaded_Stream_Old_Frags(feParameters *G,

278 266

thread_wa[i].ped.initialize(G, G->errorRate);

279 267

}

280 268 281 -

uint32 loID = G->olaps[0].b_iid;

282 -

uint32 hiID = loID + FRAGS_PER_BATCH - 1;

283 - 284 -

uint32 endID = G->olaps[G->olapsLen - 1].b_iid;

285 - 286 -

if (hiID > endID)

287 -

hiID = endID;

288 - 289 269

uint64 frstOlap = 0;

290 270

uint64 nextOlap = 0;

291 271

@@ -295,41 +275,34 @@ Threaded_Stream_Old_Frags(feParameters *G,

295 275

Frag_List_t *curr_frag_list = &frag_list_1;

296 276

Frag_List_t *next_frag_list = &frag_list_2;

297 277 298 -

Extract_Needed_Frags(G, gkpStore, loID, hiID, curr_frag_list, nextOlap);

278 +

extractReads(G, gkpStore, curr_frag_list, nextOlap);

299 279 300 -

while (loID <= endID) {

280 +

while (curr_frag_list->readsLen > 0) {

301 281 302 282

// Process fragments in curr_frag_list in background

303 283 284 +

fprintf(stderr, "processReads()-- Launching compute.\n");

285 + 304 286

for (uint32 i=0; i<G->numThreads; i++) {

305 -

thread_wa[i].loID = loID;

306 -

thread_wa[i].hiID = hiID;

307 287

thread_wa[i].nextOlap = frstOlap;

308 288

thread_wa[i].frag_list = curr_frag_list;

309 289 310 -

int status = pthread_create(thread_id + i, &attr, Threaded_Process_Stream, thread_wa + i);

290 +

int status = pthread_create(thread_id + i, &attr, processThread, thread_wa + i);

311 291 312 292

if (status != 0)

313 293

fprintf(stderr, "pthread_create error: %s\n", strerror(status)), exit(1);

314 294

}

315 295 316 296

// Read next batch of fragments

317 297 318 -

loID = hiID + 1;

319 - 320 -

if (loID <= endID) {

321 -

hiID = loID + FRAGS_PER_BATCH - 1;

298 +

frstOlap = nextOlap;

322 299 323 -

if (hiID > endID)

324 -

hiID = endID;

325 - 326 -

frstOlap = nextOlap;

327 - 328 -

Extract_Needed_Frags(G, gkpStore, loID, hiID, next_frag_list, nextOlap);

329 -

}

300 +

extractReads(G, gkpStore, next_frag_list, nextOlap);

330 301 331 302

// Wait for background processing to finish

332 303 304 +

fprintf(stderr, "processReads()-- Waiting for compute.\n");

305 + 333 306

for (uint32 i=0; i<G->numThreads; i++) {

334 307

void *ptr;

335 308

@@ -430,6 +403,8 @@ main(int argc, char **argv) {

430 403

err++;

431 404

if (G->numThreads == 0)

432 405

err++;

406 +

if (G->bgnID > G->endID)

407 +

err++;

433 408 434 409

if (err > 0) {

435 410

fprintf(stderr, "usage: %s[-ehp][-d DegrThresh][-k KmerLen][-x ExcludeLen]\n", argv[0]);

@@ -463,6 +438,8 @@ main(int argc, char **argv) {

463 438

fprintf(stderr, "ERROR: no overlap store (-O) supplied.\n");

464 439

if (G->numThreads == 0)

465 440

fprintf(stderr, "ERROR: number of compute threads (-t) must be larger than zero.\n");

441 +

if (G->bgnID > G->endID)

442 +

fprintf(stderr, "ERROR: read range (-R) %u-%u invalid.\n", G->bgnID, G->endID);

466 443 467 444

exit(1);

468 445

}

@@ -496,7 +473,7 @@ main(int argc, char **argv) {

496 473

uint64 passedOlaps = 0;

497 474

uint64 failedOlaps = 0;

498 475 499 -

Threaded_Stream_Old_Frags(G, gkpStore, passedOlaps, failedOlaps);

476 +

processReads(G, gkpStore, passedOlaps, failedOlaps);

500 477 501 478

// All done. Sum up what we did.

502 479

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4