arg_desc->SetUsageContext
47 "Microbial Genome Submission Check Tool (subcheck) is for the validation of " 48 "genome records prior to submission to GenBank. It utilizes a series of " 49 "self-consistency checks as well as comparison of submitted annotations to " 50 "computed annotations. Some of specified computed annotations could be " 51 "pre-computed using BLAST and its modifications and tRNAscanSE. Currently " 52 "there is no specific tool for predicting rRNA annotations. Please use the " 53 "format specified in documentation" 58(
"in",
"input_asn",
59 "input file in the ASN.1 format, must be either Seq-entry or Seq-submit",
62arg_desc->AddOptionalKey
63(
"out",
"output_asn",
64 "output file in the ASN.1 format, of the same type (Seq-entry or Seq-submit)",
69 "keep frameshifted sequences and make misc_features at the same time. Needs editing after run!");
71arg_desc->AddOptionalKey
72(
"inblast",
"blast_res_proteins",
73 "input file which contains the standard BLAST output results (ran with -IT option) " 74 "for all query proteins " 75 "sequences specified in the input genome against a protein database (recommended: bact_prot " 76 "database of Refseq proteins supplied with the distributed standalone version of this tool)",
79arg_desc->AddOptionalKey
80(
"inblastcdd",
"blast_res_cdd",
81 "input file which contains the standard BLAST output results for all query proteins " 82 "sequences specified in input_asn against the CDD database",
85arg_desc->AddOptionalKey
86(
"intrna",
"input_trna",
87 "input tRNAscan predictions in default output format, default value is <-in parameter>.nfsa.tRNA",
90arg_desc->AddOptionalKey
91(
"inrrna",
"input_rrna",
92 "input ribosomal RNA predictions (5S, 16S, 23S), see the manual for format, default value is <-in parameter>.nfsa.rRNA",
95arg_desc->AddOptionalKey(
96 "parentacc",
"parent_genome_accession",
97 "Refseq accession of the genome which protein annotations need to be excluded from BLAST output results",
100arg_desc->AddOptionalKey(
101 "inparents",
"InputParentsFile",
102 "contains a list of all protein accessions/GIs for each Refseq accession/GI",
105arg_desc->AddOptionalKey(
106 "intagmap",
"InputTagMap",
107 "use the file to map tags in BLAST",
110arg_desc->AddDefaultKey(
"infmt",
"InputFormat",
"format of input file",
112arg_desc->SetConstraint
115arg_desc->AddOptionalKey
116(
"outTbl",
"OutputTblFile",
117 "name of file to write additional TBL output (/dev/null by default)",
120arg_desc->AddOptionalKey
121(
"outPartial",
"OutputFilePartial",
122 "name of the output file for reporting \"partial hit\" problems",
125arg_desc->AddOptionalKey
126(
"outOverlap",
"OutputFileOverlap",
127 "name of the output file for reporting overlap problems",
130arg_desc->AddOptionalKey
131(
"outRnaOverlap",
"OutputFileRnaOverlap",
132 "name of the output file for reporting RNA overlap problems",
135arg_desc->AddOptionalKey
136(
"outCompleteOverlap",
"OutputFileCompleteOverlap",
137 "name of the output file for reporting complete overlap problems",
140arg_desc->AddOptionalKey
141(
"outOther",
"OutputFileOther",
142 "name of the output file for reporting other problems",
145arg_desc->AddDefaultKey(
"outfmt",
"OutputFormat",
"format of output file",
147arg_desc->SetConstraint
151arg_desc->AddDefaultKey(
152 "verbosity",
"Verbosity",
153 "Verbosity level threshold",
157arg_desc->AddDefaultKey(
158 "small_tails_threshold",
"small_tails_threshold",
159 "the sum of the left and right tails outside the aligned region for " 160 "the given sum less than this threshold will make it \"small tails\"",
163arg_desc->AddDefaultKey(
164 "n_best_hit",
"n_best_hit",
165 "number of BLAST best hits imported for each sequence",
168arg_desc->AddDefaultKey(
169 "m_eThreshold",
"m_eThreshold",
170 "only CDD hits below this threshold will be used for partial hit definition",
173arg_desc->AddDefaultKey(
174 "m_entireThreshold",
"m_entireThreshold",
175 "at least this part of the query needs to be in the alignment to be considered for partial hit candidate",
178arg_desc->AddDefaultKey(
179 "m_partThreshold",
"m_partThreshold",
180 "if aligned region with CDD is less than this threshold, this hit will be considered for partial hit candidate",
183arg_desc->AddDefaultKey(
184 "m_rna_overlapThreshold",
"m_rna_overlapThreshold",
185 "if protein and RNA annotations overlapping more than that threshold, it will be reported",
188arg_desc->AddDefaultKey(
189 "m_cds_overlapThreshold",
"m_cds_overlapThreshold",
190 "if CDS annotations overlapping more than that threshold, it will be reported",
193arg_desc->AddDefaultKey(
194 "m_trnascan_scoreThreshold",
"m_trnascan_scoreThreshold",
195 "tRNA-scan predictions below that threshold are ignored",
198arg_desc->AddDefaultKey(
199 "m_shortProteinThreshold",
"m_shortProteinThreshold",
200 "proteins shorter than that will be reported and removed",
242 stringbase = args[
"in"].AsString();
253unique_ptr<CObjectIStream>
in 255args[
"in"].AsInputFile()));
268 NcbiCerr<<
"WARNING: tbl file will be read but nothing more will be done."<<
NcbiEndl;
269 if(!
m_tbl.
Read(args[
"in"].AsInputFile()))
271 NcbiCerr<<
"FATAL: tbl file does not have any records or have been corrupted"<<
NcbiEndl;
279 NcbiCerr<<
"FATAL: only tbl, Seq-submit or Seq-entry formats are accepted at this time. Seq-set has to be present as well"<<
NcbiEndl;
299 if( args[
"out"].
HasValue() &&
false)
301unique_ptr<CObjectOStream>
out 303args[
"out"].AsOutputFile()));
311 if(args[
"intagmap"].
HasValue())
319 if(args[
"parentacc"].
HasValue())
325 if(args[
"inparents"].
HasValue())
336 if(!
ReadBlast(args[
"inblast"].AsString().c_str(), blastMap))
345 if(args[
"inblastcdd"].
HasValue())
349 ReadBlast(args[
"inblastcdd"].AsString().c_str(), cddMap);
358tRNA_file = args[
"intrna"].AsString();
363tRNA_file +=
".nfsa.tRNA";
374rRNA_file =args[
"inrrna"].AsString();
379rRNA_file +=
".nfsa.rRNA";
411 NcbiCerr<<
"Dumping FASTA file for subsequent HTML blast output..."<<
NcbiEndl;
416 boolreport_and_forget =
false;
419 stringsout = args[
"outPartial"].HasValue() ?
420args[
"outPartial"].AsString() :
421base +
".partial.problems.log";
433 stringsout = args[
"outOverlap"].HasValue() ?
434args[
"outOverlap"].AsString() :
435base +
".overlap.problems.log";
447 stringsout = args[
"outRnaOverlap"].HasValue() ?
448args[
"outRnaOverlap"].AsString() :
449base +
".rna.overlap.problems.log";
462 stringsout = args[
"outCompleteOverlap"].HasValue() ?
463args[
"outCompleteOverlap"].AsString() :
464base +
".complete.overlap.problems.log";
468<<
"(eCompleteOverlap)" 476 stringsout = base +
".overlap.resolved.problems.log";
480<<
"(eRemoveOverlap)" 488 stringsout = base +
".tRNA.missing.log";
502 stringsout = base +
".tRNA.bad.strand.log";
506<<
"(eTRNABadStrand)" 514 stringsout = base +
".tRNA.undef.strand.log";
518<<
"(eTRNAUndefStrand)" 526 stringsout = base +
".tRNA.complete.mismatch.log";
530<<
"(eTRNAComMismatch)" 538 stringsout = base +
".tRNA.mismatch.log";
550 stringsout = base +
".short.annotation.log";
566 stringsout = args[
"outOther"].HasValue() ?
567args[
"outOther"].AsString() :
568base +
".frameshifts.problems.log";
572<<
"(eRelFrameShift)" 606args[
"out"].AsOutputFile().seekp(0);
607unique_ptr<CObjectOStream>
out 609args[
"out"].AsOutputFile()));
void printGeneralInfo(ostream &out=NcbiCerr)
int CollectFrameshiftedSeqs(map< string, string > &problem_names)
static int m_verbosity_threshold
static bool PrintDetails(int current_verbosity=m_current_verbosity)
map< string, string > m_tagmap
static stack< int > m_saved_verbosity
list< long > m_previous_genome
int ProcessCDD(map< string, blastStr > &blastMap)
int ReadRRNA2(const string &file)
static double m_trnascan_scoreThreshold
int simple_overlaps(void)
static int m_current_verbosity
bool ReadPreviousAcc(const string &file, list< long > &input_acc)
int RemoveProblems(map< string, string > &problem_seqs, LocMap &loc_map)
virtual void Init(void)
Initialize the application.
static int m_cds_overlapThreshold
int CopyInfoFromGenesToProteins(void)
===========================================================================
int ReadBlast(const char *file, map< string, blastStr > &blastMap)
static double m_entireThreshold
static int m_rna_overlapThreshold
static ECoreDataType getCoreDataType(istream &in)
virtual int Run(void)
Run the application.
int StoreBlast(map< string, blastStr > &blastMap)
int AnalyzeSeqsViaBioseqs(bool in_pool_prot, bool against_prot)
TSimpleSeqs m_extRNAtable2
int ReadTagMap(const char *file)
int ReadParents(CNcbiIstream &in, const list< long > &nacc)
static bool less_simple_seq(const TSimpleSeq &first, const TSimpleSeq &second)
static int m_shortProteinThreshold
static void PopVerbosity(void)
ECoreDataType m_coreDataType
void reportProblems(const bool report_and_forget, diagMap &diag, ostream &out, const CBioseq::TAnnot &annots, const EProblem type)
static double m_small_tails_threshold
static double m_partThreshold
static double m_eThreshold
void dump_fasta_for_pretty_blast(diagMap &diag)
int ReadTRNA2(const string &file)
static void PushVerbosity(void)
std::ofstream out("events_result.xml")
main entry point for tests
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
string & Print(string &str) const
Print (append) all arguments to the string "str" and return "str".
@ fPreOpen
Open file right away; for eInputFile, eOutputFile, eIOFile.
@ eInputFile
Name of file (must exist and be readable)
@ eDouble
Convertible into a floating point number (double)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
@ eOverlap
CSeq_locs overlap.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
void SetData(TData &value)
Assign a value to Data data member.
std::istream & in(std::istream &in_, double &x_)
ESerialDataFormat s_GetFormat(const string &name)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4