FILE *fpt = fopen(
file,
"r");
56CBioseq::TId::iterator id;
62 id= seq->
SetId().begin();
66<<
"CSeq_id::GetStringDescr for prefix = " 69<<
"GetStringDescr for prefix = " 79string::size_type ipos = prefix.rfind(
'|');
80 if(ipos != string::npos) prefix.erase(ipos+1);
89 if((s=strstr(
str,
"Query= "))!=
NULL)
109<<
"before applying tagmap = " 114<<
"after applying tagmap = " 127 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while parsing long protein names"<<
NcbiEndl;
130 CRegexpre_length(
"Length=(\\d+)|\\((\\d+) letters\\)");
132 autoqLenStr = re_length.
GetSub(
str,1);
133 if(qLenStr.empty()) qLenStr = re_length.
GetSub(
str,2);
134qLen = NStr::StringToNumeric<long>(qLenStr);
138blastMap[qName].qLen = qLen;
139blastMap[qName].qName = qName;
142<< qName <<
":"<< qLen <<
NcbiEndl;
152blastMap[qName].hits.resize(ihit+1);
155 if((s=strstr(
str,
"gi|")) !=
NULL)
158sbjGIs.push_back(gi);
165 charbigbuf[9900]; *bigbuf =
'\0';
168 CRegexpre_length_in_align(
"Length\\s*=\\s*(\\d+)");
177 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while parsing hits"<<
NcbiEndl;
180 if((s=strstr(
str,
"gi|")) !=
NULL)
183sbjGIs.push_back(gi);
192blastMap[qName].hits[ihit].sbjGIs = sbjGIs;
195 longsbjLen = NStr::StringToNumeric<long>(re_length_in_align.
GetSub(
str, 1));
196blastMap[qName].hits[ihit].sbjLen = sbjLen;
202 for(
intk=1; k < 2; ++s)
if(*s ==
' ') ++k;
210 while(*s && (*s !=
'\n'))
214 while((ipos = sbjName.find_last_of(
"\r\n")) != string::npos)
216sbjName.erase(ipos,1);
220blastMap[qName].hits[ihit].sbjName = sbjName;
222 if(*s ==
'\n') { ++s; }
227 if(strstr(s,
"gi|")!= s)
229 while(*s && (*s !=
'\n')) {s++; }
232 while(strstr(
str,
"Score = ")==
NULL)
236 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while skipping to Score line"<<
NcbiEndl;
241s = strstr(
str,
"Score = "); s+=strlen(
"Score = ");
242 doublebitscore = atof(s);
243blastMap[qName].hits[ihit].bitscore = bitscore;
246 if(
str[strlen(
str)-1] ==
'\n')
str[strlen(
str)-1] =
' ';
248s = strstr(
str,
"Expect = "); s+=strlen(
"Expect = ");
249 doubleeval = atof(s);
250blastMap[qName].hits[ihit].eval = eval;
254 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while reading Identities line"<<
NcbiEndl;
257s = strstr(
str,
"Identities = "); s+=strlen(
"Identities = ");
258 longnident = atoi(s);
259blastMap[qName].hits[ihit].nident = nident;
260s = strstr(s,
"/"); s++;
261 longalilen = atoi(s);
262blastMap[qName].hits[ihit].alilen = alilen;
263s = strstr(s,
"("); s++;
264 doublepident = atof(s);
265blastMap[qName].hits[ihit].pident = pident;
267s = strstr(
str,
"Positives = "); s+=strlen(
"Positives = ");
269blastMap[qName].hits[ihit].npos = npos;
270s = strstr(s,
"/"); s++;
272s = strstr(s,
"("); s++;
274blastMap[qName].hits[ihit].ppos = ppos;
277 CRegexpre_query_in_align(
"^Query[: ]");
282 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while skipping to Query: line"<<
NcbiEndl;
291 char label[10], seqal[100];
292 longsbjstart =0 ,sbjend, ret;
295ret = sscanf(
str,
"%s%ld%s%ld",
label, &q_start, seqal, &q_end);
296 if(ret != 4) { printf(
"\nERROR line: %s",
str);
return0;}
300 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while parsing first alignment line"<<
NcbiEndl;
305 CRegexpre_subj_in_align(
"^Sbjct[: ]");
310 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while skipping to Sbjct:line"<<
NcbiEndl;
319ret = sscanf(
str,
"%s%ld%s%ld",
label, &tmpstart, seqal, &sbjend);
320 if(ret != 4) { printf(
"\nERROR line: %s",
str);
return0;}
322 if(sbjstart == 0) sbjstart = tmpstart;
326 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while appending to alignment I"<<
NcbiEndl;
332 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while parsing Query: line"<<
NcbiEndl;
339ret = sscanf(
str,
"%s%ld%s%ld",
label, &tmpstart, seqal, &q_end);
340 if(ret != 4) { printf(
"\nERROR line: %s",
str);
return0;}
343 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while parsing after Query: line"<<
NcbiEndl;
349 NcbiCerr<<
"CReadBlastApp::ReadBlast: ERROR: no next line in ("<<
file<<
") while parsing new Sbjt line"<<
NcbiEndl;
357blastMap[qName].hits[ihit].alignment = alignment;
358blastMap[qName].hits[ihit].sbjstart = sbjstart;
359blastMap[qName].hits[ihit].sbjend = sbjend ;
360blastMap[qName].hits[ihit].q_start = q_start;
361blastMap[qName].hits[ihit].q_end = q_end ;
380 for(seq=
Begin(); seq; ++seq)
389 if(blastMap.
find(qname) == blastMap.
end() )
391 NcbiCerr<<
"WARNING: sequence "<< qname <<
" does not have blast results, might be a naming convention mismatch"<<
NcbiEndl;
401 NcbiCerr<<
"Removing some hits from previous version of the same genome:";
416 for(
unsigned intihit=0; ihit < blastMap[qname].hits.
size(); ihit++)
418 boolprev_version=
false;
420 for(list<long>::const_iterator mygi = blastMap[qname].hits[ihit].sbjGIs.
begin();
421mygi!=blastMap[qname].hits[ihit].sbjGIs.
end();
429 NcbiCerr<<
"CReadBlastApp::StoreBlast: blast hit to the protein from previous version: " 440annotList.push_back(annot);
444desc->
SetName(blastMap[qname].hits[ihit].sbjName);
445annot->
SetDesc().Set().push_back(desc);
449desc->
SetComment(blastMap[qname].hits[ihit].alignment);
451annot->
SetDesc().Set().push_back(desc);
457alignList.push_back(align);
467id_vector.push_back(qid);
472id_vector.push_back(sid);
476start_vector.push_back(1);
477start_vector.push_back(1);
481len_vector.push_back(1);
482len_vector.push_back(1);
487 bounds.push_back(qBounds);
489 bounds.push_back(sbjBounds);
490qBounds->
SetInt().SetFrom(blastMap[qname].hits[ihit].q_start);
491qBounds->
SetInt().SetTo (blastMap[qname].hits[ihit].q_end);
492qBounds->
SetInt().SetId().SetLocal().SetId(1);
493sbjBounds->
SetInt().SetFrom(blastMap[qname].hits[ihit].sbjstart);
494sbjBounds->
SetInt().SetTo (blastMap[qname].hits[ihit].sbjend);
495sbjBounds->
SetInt().SetId().SetLocal().SetId(1);
502iscore++; scores[iscore]=
new CScore;
503scores[iscore]->
SetId().SetStr(
"sbjLen");
504scores[iscore]->SetValue().SetInt(blastMap[qname].hits[ihit].sbjLen);
505iscore++; scores[iscore]=
new CScore;
506scores[iscore]->
SetId().SetStr(
"score");
507scores[iscore]->SetValue().SetReal(blastMap[qname].hits[ihit].bitscore);
508iscore++; scores[iscore]=
new CScore;
509scores[iscore]->
SetId().SetStr(
"e_value");
510scores[iscore]->SetValue().SetReal(blastMap[qname].hits[ihit].eval);
511iscore++; scores[iscore]=
new CScore;
512scores[iscore]->
SetId().SetStr(
"num_ident");
513scores[iscore]->SetValue().SetInt(blastMap[qname].hits[ihit].nident);
514iscore++; scores[iscore]=
new CScore;
515scores[iscore]->
SetId().SetStr(
"alilen");
516scores[iscore]->SetValue().SetInt(blastMap[qname].hits[ihit].alilen);
517iscore++; scores[iscore]=
new CScore;
518scores[iscore]->
SetId().SetStr(
"per_ident");
519scores[iscore]->SetValue().SetReal(blastMap[qname].hits[ihit].pident);
520iscore++; scores[iscore]=
new CScore;
521scores[iscore]->
SetId().SetStr(
"num_sim");
522scores[iscore]->SetValue().SetInt(blastMap[qname].hits[ihit].npos);
523iscore++; scores[iscore]=
new CScore;
524scores[iscore]->
SetId().SetStr(
"per_sim");
525scores[iscore]->SetValue().SetReal(blastMap[qname].hits[ihit].ppos);
526scores.resize(scores.size()+blastMap[qname].hits[ihit].sbjGIs.
size());
528 for(list<long>::const_iterator gii=blastMap[qname].hits[ihit].sbjGIs.
begin();
529gii!=blastMap[qname].hits[ihit].sbjGIs.
end(); ++gii)
531iscore++; scores[iscore]=
new CScore;
532scores[iscore]->
SetId().SetStr(
"use_this_gi");
533scores[iscore]->SetValue().SetInt(*gii);
551 for(seq=
Begin(); seq; ++seq)
558 if(blastMap.
find(qname) == blastMap.
end() )
continue;
562 intnumQcoversHits=0;
563 intnumScoversHits=0;
566 const blastStr& thisStr=blastMap.
find(qname)->second;
568 intqLen = blastMap[qname].qLen;
569vector<problemStr> problems;
570 for(
unsigned intihit=0; ihit < thisStr.
hits.size(); ihit++)
572 const hitStr& thisHitStr = thisStr.
hits[ihit];
573 intq_ali_len = thisHitStr.
q_end- thisHitStr.
q_start+1;
575 intsLen = thisHitStr.
sbjLen;
576 stringsname = thisHitStr.
sbjName;
577 if(sname.find(
" PRK")!=string::npos)
continue;
578 if(sname.find(
" CHL")!=string::npos)
continue;
579 if(sname.find(
" MTH")!=string::npos)
continue;
580 if(sname.find(
" MTF")!=string::npos)
continue;
581 if(sname.find(
" PHA")!=string::npos)
continue;
582 if(sname.find(
" PTZ")!=string::npos)
continue;
594 charbufferchar[2048]; memset(bufferchar, 0, 2048);
595strstream
buffer(bufferchar, 2048);
602 buffer<<
"Subject"<<
"\t" 607 stringdebug_test =
buffer.str();
613problems.push_back(problem);
623 if(numScoversHits && !numQcoversHits)
625 ITERATE(vector<problemStr>, problem, problems)
627 m_diag[qname].problems.push_back(*problem);
629 charbufferchar[2048]; memset(bufferchar, 0, 2048);
630strstream
buffer(bufferchar, 2048);
632 NcbiCerr<< qname <<
"("<< qLen <<
")" 633<<
" is a potential partial annotation."<<
NcbiEndl;
635<< numHits <<
" hits to CDD db, " 636<< numScoversHits <<
" cover the query and in only " 637<< numQcoversHits <<
" cases query covers the subject" 640 m_diag[qname].problems.push_back(problem1);
static bool PrintDetails(int current_verbosity=m_current_verbosity)
map< string, string > m_tagmap
static void IncreaseVerbosity(void)
static char * next_w(char *w)
list< long > m_previous_genome
int ProcessCDD(map< string, blastStr > &blastMap)
static int skip_toprot(CTypeIterator< CBioseq > &seq)
int ReadBlast(const char *file, map< string, blastStr > &blastMap)
static double m_entireThreshold
int StoreBlast(map< string, blastStr > &blastMap)
static char * skip_space(char *w)
static void DecreaseVerbosity(void)
static double m_partThreshold
static double m_eThreshold
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
static const char * bounds[]
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
static string GetStringDescr(const CBioseq &bioseq, EStringFormat fmt)
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
CTempString GetSub(CTempString str, size_t idx=0) const
Get pattern/subpattern from previous GetMatch().
static const char label[]
TId & SetId(void)
Select the variant.
TScore & SetScore(void)
Assign a value to Score data member.
vector< CRef< CScore > > TScore
void SetSegs(TSegs &value)
Assign a value to Segs data member.
vector< TSignedSeqPos > TStarts
void SetType(TType value)
Assign a value to Type data member.
vector< CRef< CSeq_id > > TIds
list< CRef< CSeq_loc > > TBounds
void SetId(TId &value)
Assign a value to Id data member.
TBounds & SetBounds(void)
Assign a value to Bounds data member.
@ eType_partial
mapping pieces together
TLocal & SetLocal(void)
Select the variant.
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
TName & SetName(void)
Select the variant.
list< CRef< CSeq_align > > TAlign
TComment & SetComment(void)
Select the variant.
list< CRef< CSeq_annot > > TAnnot
Miscellaneous common-use basic types and functionality.
string GetStringDescr(const CBioseq &bioseq)
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4