@@ -51,23 +51,20 @@ Output_Corrections(feParameters *G);
51
51
52
52
53
53
54
-
// From overlapInCore.C
55
-
int
56
-
Binomial_Bound(int e, double p, int Start, double Limit);
57
-
58
-
59
-
60
54
// Read fragments lo_frag..hi_frag (INCLUSIVE) from store and save the ids and sequences of those
61
55
// with overlaps to fragments in global Frag .
62
56
63
57
static
64
58
void
65
-
Extract_Needed_Frags(feParameters *G,
66
-
gkStore *gkpStore,
67
-
uint32 loID,
68
-
uint32 hiID,
69
-
Frag_List_t *fl,
70
-
uint64 &nextOlap) {
59
+
extractReads(feParameters *G,
60
+
gkStore *gkpStore,
61
+
Frag_List_t *fl,
62
+
uint64 &nextOlap) {
63
+
64
+
// Clear the buffer.
65
+
66
+
fl->readsLen = 0;
67
+
fl->basesLen = 0;
71
68
72
69
// The original converted to lowercase, and made non-acgt be 'a'.
73
70
@@ -81,110 +78,103 @@ Extract_Needed_Frags(feParameters *G,
81
78
filter['G'] = filter['g'] = 'g';
82
79
filter['T'] = filter['t'] = 't';
83
80
84
-
// Count the amount of stuff we're loading.
81
+
// Return if we've exhausted the overlaps.
85
82
86
-
fl->readsLen = 0;
87
-
fl->basesLen = 0;
83
+
if (nextOlap >= G->olapsLen)
84
+
return;
85
+
86
+
// Count the amount of stuff we're loading.
88
87
89
88
uint64 lastOlap = nextOlap;
90
-
uint32 ii = 0; // Index into reads arrays
91
-
uint32 fi = G->olaps[lastOlap].b_iid; // Actual ID we're extracting
89
+
uint32 loID = G->olaps[lastOlap].b_iid; // Actual ID we're extracting
90
+
uint32 hiID = loID;
91
+
uint64 maxBases = 512 * 1024 * 1024;
92
92
93
-
assert(loID <= fi);
93
+
// Find the highest read ID that we can load without exceeding maxBases.
94
94
95
-
fprintf(stderr, "\n");
96
-
fprintf(stderr, "Extract_Needed_Frags()-- Loading used reads between " F_U32 " and " F_U32 ", at overlap " F_U64 ".\n", fi, hiID, lastOlap);
95
+
while ((fl->basesLen < maxBases) &&
96
+
(lastOlap < G->olapsLen)) {
97
+
hiID = G->olaps[lastOlap].b_iid; // Grab the ID of the overlap we're at.
97
98
98
-
while (fi <= hiID) {
99
-
gkRead *read = gkpStore->gkStore_getRead(fi);
99
+
gkRead *read = gkpStore->gkStore_getRead(hiID); // Grab that read.
100
100
101
-
fl->readsLen += 1;
101
+
fl->readsLen += 1; // Add the read to our set.
102
102
fl->basesLen += read->gkRead_sequenceLength() + 1;
103
103
104
-
// Advance to the next overlap
105
-
106
-
lastOlap++;
107
-
while ((lastOlap < G->olapsLen) && (G->olaps[lastOlap].b_iid == fi))
108
-
lastOlap++;
109
-
fi = (lastOlap < G->olapsLen) ? G->olaps[lastOlap].b_iid : hiID + 1;
104
+
lastOlap++; // Advance to the next overlap
105
+
while ((lastOlap < G->olapsLen) && //
106
+
(G->olaps[lastOlap].b_iid == hiID)) // If we've exceeded the max size or hit the last overlap,
107
+
lastOlap++; // the loop will stop on the next iteration.
110
108
}
111
109
112
-
fprintf(stderr, "Extract_Needed_Frags()-- Loading reads for overlaps " F_U64 " to " F_U64 " (reads " F_U32 " bases " F_U64 ")\n", nextOlap, lastOlap, fl->readsLen, fl->basesLen);
110
+
// If nothing to load, just return.
111
+
112
+
if (fl->readsLen == 0)
113
+
return;
114
+
115
+
// Report what we're going to do.
116
+
117
+
fprintf(stderr, "extractReads()-- Loading reads " F_U32 " to " F_U32 " (" F_U32 " reads with " F_U64 " bases) overlaps " F_U64 " through " F_U64 ".\n",
118
+
loID, hiID, fl->readsLen, fl->basesLen, nextOlap, lastOlap);
113
119
114
120
// Ensure there is space.
115
121
116
122
if (fl->readsMax < fl->readsLen) {
117
123
delete [] fl->readIDs;
118
124
delete [] fl->readBases;
119
125
120
-
//fprintf(stderr, "Extract_Needed_Frags()-- realloc reads from " F_U32 " to " F_U32 "\n", fl->readsMax, 12 * fl->readsLen / 10);
121
-
122
-
fl->readIDs = new uint32 [12 * fl->readsLen / 10];
123
-
fl->readBases = new char * [12 * fl->readsLen / 10];
124
-
125
126
fl->readsMax = 12 * fl->readsLen / 10;
127
+
fl->readIDs = new uint32 [fl->readsMax];
128
+
fl->readBases = new char * [fl->readsMax];
126
129
}
127
130
128
131
if (fl->basesMax < fl->basesLen) {
129
132
delete [] fl->bases;
130
133
131
-
//fprintf(stderr, "Extract_Needed_Frags()-- realloc bases from " F_U64 " to " F_U64 "\n", fl->basesMax, 12 * fl->basesLen / 10);
132
-
133
-
fl->bases = new char [12 * fl->basesLen / 10];
134
-
135
134
fl->basesMax = 12 * fl->basesLen / 10;
135
+
fl->bases = new char [fl->basesMax];
136
136
}
137
137
138
-
// Load. This is complicated by loading only the reads that have overlaps we care about.
139
-
140
-
fl->readsLen = 0;
141
-
fl->basesLen = 0;
138
+
// Load the sequence data for reads loID to hiID, as long as the read has an overlap.
142
139
143
140
gkReadData *readData = new gkReadData;
144
141
145
-
ii = 0;
146
-
fi = G->olaps[nextOlap].b_iid;
147
-
148
-
assert(loID <= fi);
142
+
fl->readsLen = 0;
143
+
fl->basesLen = 0;
149
144
150
-
while (fi <= hiID) {
151
-
gkRead *read = gkpStore->gkStore_getRead(fi);
145
+
while ((loID <= hiID) &&
146
+
(nextOlap < G->olapsLen)) {
147
+
gkRead *read = gkpStore->gkStore_getRead(loID);
152
148
153
-
fl->readIDs[ii] = fi;
154
-
fl->readBases[ii] = fl->bases + fl->basesLen;
155
-
fl->basesLen += read->gkRead_sequenceLength() + 1;
149
+
fl->readIDs[fl->readsLen] = loID; // Save the ID of _this_ read.
150
+
fl->readBases[fl->readsLen] = fl->bases + fl->basesLen; // Set the data pointer to where this read should start.
156
151
157
152
gkpStore->gkStore_loadReadData(read, readData);
158
153
159
154
uint32 readLen = read->gkRead_sequenceLength();
160
155
char *readBases = readData->gkReadData_getSequence();
161
156
162
157
for (uint32 bb=0; bb<readLen; bb++)
163
-
fl->readBases[ii][bb] = filter[readBases[bb]];
158
+
fl->readBases[fl->readsLen][bb] = filter[readBases[bb]];
164
159
165
-
fl->readBases[ii][readLen] = 0; // All good reads end.
160
+
fl->readBases[fl->readsLen][readLen] = 0; // All good reads end.
166
161
167
-
ii++;
162
+
fl->basesLen += read->gkRead_sequenceLength() + 1; // Update basesLen to account for this read.
163
+
fl->readsLen += 1; // And note that we loaded a read.
168
164
169
-
// Advance to the next overlap.
170
-
171
-
nextOlap++;
172
-
while ((nextOlap < G->olapsLen) && (G->olaps[nextOlap].b_iid == fi))
165
+
nextOlap++; // Advance past all the overlaps for this read.
166
+
while ((nextOlap < G->olapsLen) &&
167
+
(G->olaps[nextOlap].b_iid == loID))
173
168
nextOlap++;
174
-
fi = (nextOlap < G->olapsLen) ? G->olaps[nextOlap].b_iid : hiID + 1;
169
+
170
+
if (nextOlap < G->olapsLen) // If we have valid overlap, grab the read ID.
171
+
loID = G->olaps[nextOlap].b_iid; // If we don't have a valid overlap, the loop will stop.
175
172
}
176
173
177
174
delete readData;
178
175
179
-
fl->readsLen = ii;
180
176
181
-
if (fl->readsLen > 0)
182
-
fprintf(stderr, "Extract_Needed_Frags()-- Loaded " F_U32 " reads (%.4f%%). Loaded IDs " F_U32 " through " F_U32 ".\n",
183
-
fl->readsLen, 100.0 * fl->readsLen / (hiID - 1 - loID),
184
-
fl->readIDs[0], fl->readIDs[fl->readsLen-1]);
185
-
else
186
-
fprintf(stderr, "Extract_Needed_Frags()-- Loaded " F_U32 " reads (%.4f%%).\n",
187
-
fl->readsLen, 100.0 * fl->readsLen / (hiID - 1 - loID));
177
+
fprintf(stderr, "extractReads()-- Loaded.\n");
188
178
}
189
179
190
180
@@ -194,7 +184,7 @@ Extract_Needed_Frags(feParameters *G,
194
184
// frag_iid % Num_PThreads == thread_id
195
185
196
186
void *
197
-
Threaded_Process_Stream(void *ptr) {
187
+
processThread(void *ptr) {
198
188
Thread_Work_Area_t *wa = (Thread_Work_Area_t *)ptr;
199
189
200
190
for (int32 i=0; i<wa->frag_list->readsLen; i++) {
@@ -247,10 +237,10 @@ Threaded_Process_Stream(void *ptr) {
247
237
248
238
static
249
239
void
250
-
Threaded_Stream_Old_Frags(feParameters *G,
251
-
gkStore *gkpStore,
252
-
uint64 &passedOlaps,
253
-
uint64 &failedOlaps) {
240
+
processReads(feParameters *G,
241
+
gkStore *gkpStore,
242
+
uint64 &passedOlaps,
243
+
uint64 &failedOlaps) {
254
244
255
245
pthread_attr_t attr;
256
246
@@ -262,8 +252,6 @@ Threaded_Stream_Old_Frags(feParameters *G,
262
252
263
253
for (uint32 i=0; i<G->numThreads; i++) {
264
254
thread_wa[i].thread_id = i;
265
-
thread_wa[i].loID = 0;
266
-
thread_wa[i].hiID = 0;
267
255
thread_wa[i].nextOlap = 0;
268
256
thread_wa[i].G = G;
269
257
thread_wa[i].frag_list = NULL;
@@ -278,14 +266,6 @@ Threaded_Stream_Old_Frags(feParameters *G,
278
266
thread_wa[i].ped.initialize(G, G->errorRate);
279
267
}
280
268
281
-
uint32 loID = G->olaps[0].b_iid;
282
-
uint32 hiID = loID + FRAGS_PER_BATCH - 1;
283
-
284
-
uint32 endID = G->olaps[G->olapsLen - 1].b_iid;
285
-
286
-
if (hiID > endID)
287
-
hiID = endID;
288
-
289
269
uint64 frstOlap = 0;
290
270
uint64 nextOlap = 0;
291
271
@@ -295,41 +275,34 @@ Threaded_Stream_Old_Frags(feParameters *G,
295
275
Frag_List_t *curr_frag_list = &frag_list_1;
296
276
Frag_List_t *next_frag_list = &frag_list_2;
297
277
298
-
Extract_Needed_Frags(G, gkpStore, loID, hiID, curr_frag_list, nextOlap);
278
+
extractReads(G, gkpStore, curr_frag_list, nextOlap);
299
279
300
-
while (loID <= endID) {
280
+
while (curr_frag_list->readsLen > 0) {
301
281
302
282
// Process fragments in curr_frag_list in background
303
283
284
+
fprintf(stderr, "processReads()-- Launching compute.\n");
285
+
304
286
for (uint32 i=0; i<G->numThreads; i++) {
305
-
thread_wa[i].loID = loID;
306
-
thread_wa[i].hiID = hiID;
307
287
thread_wa[i].nextOlap = frstOlap;
308
288
thread_wa[i].frag_list = curr_frag_list;
309
289
310
-
int status = pthread_create(thread_id + i, &attr, Threaded_Process_Stream, thread_wa + i);
290
+
int status = pthread_create(thread_id + i, &attr, processThread, thread_wa + i);
311
291
312
292
if (status != 0)
313
293
fprintf(stderr, "pthread_create error: %s\n", strerror(status)), exit(1);
314
294
}
315
295
316
296
// Read next batch of fragments
317
297
318
-
loID = hiID + 1;
319
-
320
-
if (loID <= endID) {
321
-
hiID = loID + FRAGS_PER_BATCH - 1;
298
+
frstOlap = nextOlap;
322
299
323
-
if (hiID > endID)
324
-
hiID = endID;
325
-
326
-
frstOlap = nextOlap;
327
-
328
-
Extract_Needed_Frags(G, gkpStore, loID, hiID, next_frag_list, nextOlap);
329
-
}
300
+
extractReads(G, gkpStore, next_frag_list, nextOlap);
330
301
331
302
// Wait for background processing to finish
332
303
304
+
fprintf(stderr, "processReads()-- Waiting for compute.\n");
305
+
333
306
for (uint32 i=0; i<G->numThreads; i++) {
334
307
void *ptr;
335
308
@@ -430,6 +403,8 @@ main(int argc, char **argv) {
430
403
err++;
431
404
if (G->numThreads == 0)
432
405
err++;
406
+
if (G->bgnID > G->endID)
407
+
err++;
433
408
434
409
if (err > 0) {
435
410
fprintf(stderr, "usage: %s[-ehp][-d DegrThresh][-k KmerLen][-x ExcludeLen]\n", argv[0]);
@@ -463,6 +438,8 @@ main(int argc, char **argv) {
463
438
fprintf(stderr, "ERROR: no overlap store (-O) supplied.\n");
464
439
if (G->numThreads == 0)
465
440
fprintf(stderr, "ERROR: number of compute threads (-t) must be larger than zero.\n");
441
+
if (G->bgnID > G->endID)
442
+
fprintf(stderr, "ERROR: read range (-R) %u-%u invalid.\n", G->bgnID, G->endID);
466
443
467
444
exit(1);
468
445
}
@@ -496,7 +473,7 @@ main(int argc, char **argv) {
496
473
uint64 passedOlaps = 0;
497
474
uint64 failedOlaps = 0;
498
475
499
-
Threaded_Stream_Old_Frags(G, gkpStore, passedOlaps, failedOlaps);
476
+
processReads(G, gkpStore, passedOlaps, failedOlaps);
500
477
501
478
// All done. Sum up what we did.
502
479
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4