vector< string > & idmap,
54 constvector< CSRSearch::SResultData > &
results,
55 const string& idstr1,
const string& idstr2 =
"");
68vector< CSRSearch::SResultData > *
r1;
69vector< CSRSearch::SResultData > *
r2;
77:
max_res_( max_results_per_query ),
78 rv_pool( max_results_per_query )
84 const string& idstr1,
const string& idstr2 );
88 if(
sidmap.size() < start + idmap.size() ) {
89 sidmap.resize( start + idmap.size(),
"unknown");
92 for(
TSeqNum i= start;
i< start + idmap.size(); ++
i) {
109vector< CSRSearch::SResultData > *
r=
data_pool[q].r1;
110 return(
r== 0) ? 0 :
r->size();
115vector< CSRSearch::SResultData > *
r=
data_pool[q].r2;
116 return(
r== 0) ? 0 :
r->size();
126 typedefvector< CSRSearch::SResultData >
TItem;
151 for( TBlock::iterator
i=
b.begin();
i!=
b.end(); ++
i) {
203 while( q >=
q_max) {
230 if(
data.r1 != 0 ) {
234 if(
data.r2 != 0 ) {
243 const string& idstr1,
const string& idstr2 )
245 if( res.
res.empty() )
return;
256 data.r1->push_back( res.
res[
i] );
264 else if( res.
nres_1== 0 ) {
268 data.r2->push_back( res.
res[
i] );
281 for(
Uint4 i= 0 ;
i< sz; ++
i) {
282 data.r1->push_back( res.
res[
i] );
285sz = res.
res.size();
288 data.r2->push_back( res.
res[
i] );
295 if( !
data.r1->empty() )
data.id1 = idstr1;
296 if( !
data.r2->empty() )
data.id2 = idstr2;
301 "Search for close matches to short sequences.";
307arg_desc->SetUsageContext(
310 "input",
"input_file_name",
"input file name",
312arg_desc->AddOptionalKey(
313 "input1",
"paired_input_file_name",
314 "file containing query sequence pairs",
316arg_desc->AddOptionalKey(
317 "output",
"output_file_name",
"output file name",
319arg_desc->AddOptionalKey(
320 "pair_distance",
"pair_distance",
321 "distance between query pairs",
323arg_desc->AddOptionalKey(
324 "pair_distance_fuzz",
"pair_distance_fuzz",
325 "how much deviation from pair_distance is allowed",
327arg_desc->AddDefaultKey(
328 "mismatch",
"allow_mismatch",
329 "flag to allow one mismatch",
331arg_desc->AddDefaultKey(
332 "nomap",
"no_mmap_index",
333 "read index rather than mmap()'ing it.",
336 "index",
"index_name",
"index file name",
338arg_desc->AddDefaultKey(
339 "start_vol",
"index_volume",
340 "the first index volume to process",
342arg_desc->AddDefaultKey(
343 "end_vol",
"index_volume",
344 "one past the last index volume to process",
346arg_desc->AddDefaultKey(
347 "restrict_matches",
"number_of_matches",
348 "restrict the number of matches per query to at most this number",
350arg_desc->AddDefaultKey(
351 "noid",
"use_ordinal",
352 "use ordinal numbers for queries and database in output",
366snprintf( volstr, 3,
"%02d", vol );
367 returnprefix +
"."+ volstr +
".idx";
374 constvector< CSRSearch::SResultData > &
results,
375 const string& qidstr1,
const string& qidstr2 )
377 typedefvector< CSRSearch::SResultData > TRes;
379 for( TRes::const_iterator
i=
results.begin();
381 const string& qidstr = (
i->type == 2) ? qidstr2 : qidstr1;
382ostream << (
int)
i->type <<
"\t";
384 if( qidstr.empty() ) {
385ostream << qnum <<
"\t"<<
i->snum <<
"\t";
388ostream << qidstr <<
"\t";
390 if(
i->snum < idmap.size() ) ostream << idmap[
i->snum] <<
"\t";
391 elseostream <<
"unknown"<<
"\t";
394ostream <<
i->spos_1 <<
"\t" 395<< ((
i->fw_strand_1 == 0) ?
'-':
'+') <<
"\t" 396<<
i->mpos_1 <<
"\t" 397<< (char)
i->mbase_1;
401<<
i->spos_2 <<
"\t" 402<< ((
i->fw_strand_2 == 0) ?
'-':
'+') <<
"\t" 403<<
i->mpos_2 <<
"\t" 404<< (char)
i->mbase_2;
417entry->Which() != objects::CSeq_entry_Base::e_Seq ) {
420 "input seq-entry is NULL or not a sequence");
424objects::CScope scope( *
om);
425objects::CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry( *entry );
426objects::CBioseq_Handle bsh = seh.GetSeq();
428 Uint4pos = idstr.find_first_of(
" \t");
429idstr = idstr.substr( 0, pos );
433entry->SetSeq(), 0, objects::CBioseq_Handle::eCoding_Iupac );
439 om.
Reset( objects::CObjectManager::GetInstance() );
442 if(
GetArgs()[
"output"] ) {
447 Uint4pd = 0, pdfuzz = 0;
449 if(
GetArgs()[
"input1"] ) {
450 if( !
GetArgs()[
"pair_distance"] ) {
451 ERR_POST(
Error<<
"-pair_distance must be provided for paired input");
455pd =
GetArgs()[
"pair_distance"].AsInteger();
457 if(
GetArgs()[
"pair_distance_fuzz"] )
458pdfuzz =
GetArgs()[
"pair_distance_fuzz"].AsInteger();
462 ERR_POST(
Error<<
"the value of -pair_distance_fuzz can not be greater " 463<<
"than the value of -pair_distance");
468 stringindex_prefix =
GetArgs()[
"index"].AsString();
470 Uint4start_vol =
GetArgs()[
"start_vol"].AsInteger();
473 boolnoid =
GetArgs()[
"noid"].AsBoolean();
476 booluse_cache = (
nr!= 0);
477 if(
nr== 0 )
nr= 0xFFFFFFFF;
478 boolmismatch =
GetArgs()[
"mismatch"].AsBoolean();
479 boolnomap =
GetArgs()[
"nomap"].AsBoolean();
484 stringindex_name =
MakeIndexName( index_prefix, start_vol );
485cerr <<
"searching volume "<< index_name << endl;
491 if( index == 0 )
break;
496(
GetArgs()[
"input"].AsString() ) );
499 if(
GetArgs()[
"input1"] ) {
501 GetArgs()[
"input1"].AsString() );
504 boolpaired = (iseqstream1 != 0);
512 stringqidstr1, qidstr2;
532seq, seq1,
nr, s1, s2, l1, l2, !mismatch );
533search_obj->search( sdata,
results);
537seq,
nr, s1, s2, l1, l2, !mismatch );
538search_obj->search( sdata,
results);
550 if( seq_counter%100000 == 0 ) {
551cerr << seq_counter <<
" sequences processed"<< endl;
555 if( ++start_vol == end_vol )
break;
558 if( use_cache ) rcache.
dump( *ostream );
Types of exception the indexing library can throw.
const vector< string > & getIdMap() const
static CRef< CDbIndex > Load(const std::string &fname, bool nomap=false)
Load index.
CSequenceIStream::TStreamPos TSeqNum
Type used to enumerate sequences in the index.
TSeqNum getStartOId() const
static const Uint4 BLOCK_SIZE
SDataItem & at(TSeqNum q)
static const Uint4 BLOCKS_RESERVE
SDataItem & operator[](TSeqNum q)
vector< SDataItem > TBlock
static const Uint4 BLOCK_SHIFT
static const Uint4 BLOCK_MASK
static const Uint4 BLOCK_SIZE
vector< CSRSearch::SResultData > TItem
static const Uint4 BLOCKS_RESERVE
CSRSearch::ELevel getLevel2(TSeqNum q)
const vector< string > & getSIdMap() const
Uint4 getNRes1(TSeqNum q)
CSRSearch::ELevel getLevel1(TSeqNum q)
void updateSIdMap(const vector< string > &idmap, TSeqNum start)
CDbIndex::TSeqNum TSeqNum
Uint4 getNRes2(TSeqNum q)
void update(TSeqNum query, CSRSearch::TResults &results, const string &idstr1, const string &idstr2)
void dump(CNcbiOstream &ostream)
CRCache(Uint4 max_results_per_query)
virtual int Run()
Application main procedure.
static const char *const USAGE_LINE
String containing program usage information.
virtual void Init()
Application initialization.
static CRef< CSRSearch > MakeSRSearch(CRef< CDbIndex > index, TSeqPos d=0, TSeqPos dfuzz=0)
Sequence stream for reading FASTA formatted files.
Class used to abstract reading nucleotide sequences from various sources.
virtual CRef< TSeqData > next()=0
Extract the next sequence from the stream.
CDbIndex::TSeqNum TSeqNum
Forwarding declarations for convenience.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
void Reset(void)
Reset reference object.
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
uint32_t Uint4
4-byte (32-bit) unsigned integer
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
unsigned int
A callback function used to compare two keys in a database.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static SLJIT_INLINE sljit_ins nr(sljit_gpr dst, sljit_gpr src)
CRef< objects::CObjectManager > om
string MakeIndexName(const string &prefix, Uint4 vol)
void PrintResults(CNcbiOstream &ostream, const vector< string > &idmap, CDbIndex::TSeqNum qnum, const vector< CSRSearch::SResultData > &results, const string &idstr1, const string &idstr2="")
CSequenceIStream::TSeqData TSeqData
CSeqVector ExtractSeqVector(TSeqData &sd, bool noid, string &idstr)
USING_SCOPE(blastdbindex)
vector< CSRSearch::SResultData > * r2
vector< CSRSearch::SResultData > * r1
vector< SResultData > res
Type containing the sequence itself along with the masking information.
CRef< objects::CSeq_entry > seq_entry_
Sequence data.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4