A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/agp__validate_8cpp_source.html below:

NCBI C++ ToolKit: src/app/agp_validate/agp_validate.cpp Source File

75  virtual void Init

(

void

);

76  virtual int Run

(

void

);

77  virtual void Exit

(

void

);

114

m_comp2len, m_comp2range_coll)

130

version_str+=

", AGP Specification v2.1"

;

132  str

=

"Validate data in the AGP format:\n" 133  "https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/\n" 135  "Version: "

+ version_str +

"\n" 137  "USAGE: agp_validate [-options] [FASTA files...] [AGP files...]\n" 139  "There are 3 validations modes:\n" 140  "no mode option: (default mode) report component, gap, scaffold and object statistics, perform checks\n" 141  " that do not require component sequences to be available in GenBank (see: -list).\n" 142  "-alt, -species: Check component Accessions, Lengths and Taxonomy ID using GenBank data;\n" 143  " -species allows components from different subspecies during Taxid checks.\n" 145  "-comp Check that the supplied object sequences (in FASTA files) match what can be\n" 146  " constructed from the AGP and the component sequences (in FASTA files or in GenBank).\n" 147  " Run \"agp_validate -comp\" to see the options for this mode.\n" 149  "OPTIONS (default and -alt modes):\n" 150  " -g Check that component names look like Nucleotide accessions\n" 151  " (this does not require components to be in GenBank).\n" 152  " -out FILE Save the AGP file, adding missing version 1 to the component accessions (need -alt),\n" 153  " or adding gaps where runs of Ns longer than 10 bp are found in components (need FASTA files).\n" 154  " -obj Use FASTA files to read names and lengths of objects (the default is components).\n" 155  " -v VER AGP version (1 or 2). The default is to choose automatically. Version 2 is chosen\n" 156  " when the linkage evidence (column 9) is not empty in the first gap line encountered.\n" 157  " -xml Report results in XML format.\n" 158  " -sub Treat serious warnings as errors, put summary and stats at the top.\n" 160  " Extra checks specific to an object type:\n" 161  " -un Unplaced/unlocalized scaffolds:\n" 162  " any single-component scaffold must use the whole component in orientation '+'\n" 163  " -scaf Scaffold from component AGP: no scaffold-breaking gaps allowed\n" 164  " -chr Chromosome from scaffold AGP: ONLY scaffold-breaking gaps allowed\n" 165  " Use both of the last 2 options in this order: -scaf Scaf_AGP_file(s) -chr Chr_AGP_file(s)\n" 166  " to check that all scaffolds in Scaf_AGP_file(s) are wholly included in Chr_AGP_file(s)\n" 169  " -list List error and warning messages.\n" 170  " -limit COUNT Print only the first COUNT messages of each type.\n" 171  " Default=100. To print all, use: -limit 0\n" 172  " -skip, -only WHAT Skip, or report only a particular error or warning.\n" 173  " -show WHAT Show the warning hidden by default (w40, w45, w46, w52).\n" 174  " 'WHAT' could be a part of the message text, an error code (e11, w22, etc; see -list),\n" 175  " or a keyword: all, warn, err, alt.\n" 177  "If component FASTA files are given in front of AGP files, also check that:\n" 178  "- component_id from AGP is present in FASTA;\n" 179  "- component_end does not exceed sequence length.\n" 180  "If FASTA files for objects are given (after -obj), check that:\n" 181  "- object_id from AGP is present in FASTA;\n" 182  "- object lengths in FASTA and in AGP match.\n" 194  auto

arg_desc = make_unique<CArgDesc_agp_validate>(

GetVersion

());

196

arg_desc->SetUsageContext(

198  "Validate AGP data"

,

false

);

201

arg_desc->AddFlag(

"alt"

,

""

);

203

arg_desc->AddFlag(

"g"

,

""

);

204

arg_desc->AddFlag(

"obj"

,

""

);

205

arg_desc->AddFlag(

"un"

,

""

);

206

arg_desc->AddFlag(

"scaf"

,

""

);

207

arg_desc->AddFlag(

"chr"

,

""

);

208

arg_desc->AddFlag(

"comp"

,

""

);

209

arg_desc->AddFlag(

"xml"

,

""

);

210

arg_desc->AddFlag(

"sub"

,

""

);

213

arg_desc->AddOptionalKey(

"loadlog"

,

"FILE"

,

214  "specifies where we write our loading log for -comp"

,

216

arg_desc->AddFlag(

"ignoreagponly"

,

""

);

217

arg_desc->AddFlag(

"ignoreobjfileonly"

,

""

);

218

arg_desc->AddDefaultKey(

"diffstofind"

,

""

,

""

,

221

arg_desc->AddFlag(

"species"

,

"allow components from different subspecies"

);

223

arg_desc->AddOptionalKey(

"out"

,

"FILE"

,

224  "add missing version 1 to component accessions"

,

227

arg_desc->AddOptionalKey(

"v"

,

"ver"

,

231

arg_desc->AddOptionalKey(

"skip"

,

"error_or_warning"

,

232  "Message or message code to skip"

,

236

arg_desc->AddOptionalKey(

"only"

,

"error_or_warning"

,

237  "Message or message code to print (hide other)"

,

241

arg_desc->AddOptionalKey(

"show"

,

"error_or_warning"

,

242  "Message or message code to print (if not printed by default)"

,

246

arg_desc->AddDefaultKey(

"limit"

,

"ErrorCount"

,

247  "Print at most ErrorCount lines with a particular error"

,

251

arg_desc->AddFlag(

"list"

,

"all possible errors and warnings"

);

254

arg_desc->AddExtra(0, 10000,

"files to be processed"

,

272  if

( args[

"list"

].

HasValue

() ) {

273  pAgpErr

->PrintAllMessages(cout);

288  pAgpErr

->m_out = error_details_out;

301

cerr <<

"Error -- cannot specify -un with -chr/-scaf.\n"

;

305

cerr <<

"Error -- cannot specify -chr/-scaf with -alt/-species.\n"

;

310  if

( args[

"scaf"

].

HasValue

() ) {

311

cerr <<

"Error -- -scaf and -chr must precede different files.\n"

;

317  else if

( args[

"scaf"

].

HasValue

() ) {

323

cerr <<

"Error -- cannot specify -obj with -alt/-species.\n"

;

330  bool

checkCompNames=args[

"g"

].HasValue();

332  if

(checkCompNames) {

343  if

( args[

"species"

].

HasValue

() ) {

352  bool

onlyNotSkip = args[

"only"

].HasValue();

354  if

( args[

"skip"

].

HasValue

() ) {

356

cerr <<

"Error -- cannot specify both -only and -skip.\n"

;

359

err_warn = &( args[

"skip"

].GetStringList() );

360

action=

"Skipping messages:\n"

;

362  else if

(onlyNotSkip) {

363  if

( args[

"show"

].

HasValue

() ) {

364

cerr <<

"Error -- cannot specify both -only and -show; please use multiple -only instead.\n"

;

368

err_warn = &( args[

"only"

].GetStringList() );

370

action=

"Allowed messages:\n"

;

374  bool

needHeading=

true

;

375  for

( CArgValue::TStringArray::const_iterator it =

376

err_warn->begin(); it != err_warn->end(); ++it

378  string

res =

pAgpErr

->SkipMsg(*it, onlyNotSkip);

380

cerr <<

"WARNING: no matches for "

<< *it <<

"\n"

;

384  if

( res[0] ==

' '

&& needHeading) {

385  if

(needHeading) cerr << action;

390

cerr << res <<

"\n"

;

397  if

( args[

"show"

].

HasValue

() ) {

398

err_warn = &( args[

"show"

].GetStringList() );

399  for

( CArgValue::TStringArray::const_iterator it =

400

err_warn->begin(); it != err_warn->end(); ++it

407

args[

"limit"

].HasValue() ? args[

"limit"

].AsInteger() : 100;

410  if

( args[

"v"

].AsString()[0]==

'1'

) {

413  else if

( args[

"v"

].AsString()[0]==

'2'

) {

417

cerr <<

"Error -- invalid AGP version after -v (must start with 1 or 2).\n"

;

425  if

( ! args[

"comp"

] ) {

428  if

( args[

"loadlog"

] || args[

"ignoreagponly"

] ||

429

args[

"ignoreobjfileonly"

] ||

430

args[

"diffstofind"

].AsInteger() > 0 )

432

cerr <<

"Error -- -comp mode options without -comp"

<< endl;

437  bool

taxid_check_failed=

false

;

439

cout <<

"<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n<page>\n"

;

451

cout <<

"</page>\n"

;

454  else if

(error_details_out) {

455

cout <<

"\n\n===== Details ====="

<< endl;

457  delete

error_details_out;

466  for

(

unsigned int i

= 1;

i

<= args.

GetNExtra

();

i

++) {

468  if

( ! filename.empty() && filename[0] !=

'-'

) {

474  if

( args[

"loadlog"

] ) {

475

comploadlog = args[

"loadlog"

].AsString();

478  string

agp_as_fasta_file;

479  if

( args[

"out"

] ) {

480

agp_as_fasta_file = args[

"out"

].AsString();

484  if

( args[

"ignoreagponly"

] ) {

487  if

( args[

"ignoreobjfileonly"

] ) {

491  int

diffsToFind = args[

"diffstofind"

].AsInteger();

496

agp_as_fasta_file, diffsToHide,

499

cerr <<

"AGP/FASTA comparison failed."

<< endl;

512

<< s <<

" and length"

<< s <<

" loaded from FASTA."

<< endl;

516

runs_of_Ns += it->second.size();

521  if

(!

m_use_xml

) cout <<

"No runs of Ns longer than 10 bp found in FASTA sequences."

<< endl;

531

cout <<

"===== Reading Chromosome from scaffold AGP ====="

<< endl;

533  if

(

out

) *

out

<<

"===== Chromosome from scaffold AGP ====="

<< endl;

540

cout <<

"===== Reading Scaffold from component AGP ====="

<< endl;

541  if

(

out

) *

out

<<

"===== Scaffold from component AGP ====="

<< endl;

556  for

(

unsigned int i

= 1;

i

<= args.

GetNExtra

();

i

++) {

561

cerr <<

"Error -- second -chr is not supported.\n"

;

565

cerr <<

"Error -- -chr after a file, but no preceding -scaf. Expecting:\n" 566

<<

" -scaf Scaffold_AGP_file(s) -chr Chromosome_AGP_file(s)\n"

;

575

cout <<

"\n===== Reading Chromosome from scaffold AGP ====="

<< endl;

576  if

(

out

) *

out

<<

"\n===== Chromosome from scaffold AGP ====="

<< endl;

590

istr.get(ch); istr.putback(ch);

604  if

(num_fasta_files==args.

GetNExtra

()) {

632  if

(

code

==-1)

continue

;

634  bool

comp2len_check_failed=

false

;

637  if

( !agp_row->

IsGap

() ) {

664  if

(

code

!=0 || comp2len_check_failed ||

673  pAgpErr

->m_messages = tmp_messages;

691  string

acc, acc_long;

696  int

header_line_num=0;

703  bool

mfa_bMasked=

false

;

704  bool

mfa_prevMasked=

false

;

710  if

(line[0]==

'>'

) {

719  if

(prev_len)

goto

LengthRedefinedFa;

722  if

(mfa_pos-mfa_firstMasked > 10)

723

range_coll +=

TSeqRange

(mfa_firstMasked, mfa_pos-1);

725  if

(!range_coll.

empty

()) {

729

range_coll.

clear

();

730

mfa_firstMasked=mfa_pos=0;

732

mfa_prevMasked=

false

;

738  if

(pos2<pos1) pos1 = pos2;

741  if

(pos1>0 && line[pos1]==

'|'

) pos1--;

744

acc_long=line.substr(1, pos1);

752

cerr<<

"ERROR - expecting >fasta_header at start of file "

<< filename <<

", got:\n" 753

<< line.substr(0, 100) <<

"\n\n"

;

759

cerr<<

"ERROR - non-alphabetic character in the FASTA:\n" 760  " file "

<< filename <<

"\n line "

<<

line_num

<<

"\n column "

<<

i

+1 <<

"\n\n"

;

765

mfa_bMasked =

toupper

(line[

i

]) ==

'N'

;

766  if

(mfa_bMasked!=mfa_prevMasked) {

768

mfa_firstMasked=mfa_pos;

771  if

(mfa_pos-mfa_firstMasked > 10)

772

range_coll +=

TSeqRange

(mfa_firstMasked, mfa_pos-1);

775

mfa_prevMasked=mfa_bMasked;

798  if

(prev_len)

goto

LengthRedefinedFa;

801  if

(mfa_pos-mfa_firstMasked > 10)

802

range_coll +=

TSeqRange

(mfa_firstMasked, mfa_pos-1);

804  if

(!range_coll.

empty

()) {

809

cerr<<

"WARNING - empty file "

<< filename <<

"\n"

;

814

cerr<<

"ERROR - sequence length redefined from "

<< prev_len <<

" to "

<<

len

<<

"\n" 815

<<

" sequence id: "

<< acc_long <<

"\n" 816

<<

" File: "

<< filename <<

"\n" 817

<<

" Lines: "

<< header_line_num <<

".."

<<

line_num

<<

"\n\n"

;

831  if

(runs_of_Ns && runs_of_Ns->

size

()) {

834

cerr <<

"FATAL: need AGP version (for adding gap lines). Please use -v 1 or -v 2\n"

;

848  "\t1\t100\t1\tN\t100\t"

+

849  string

(

row

->GetVersion() ==

eAgpVersion_1_1

?

"fragment\tyes\t"

:

"scaffold\tyes\tunspecified"

)

852  int

comp2obj_ofs =

row

->object_beg -

row

->component_beg;

861

(*m_out) << tmp_row->

ToString

() << endl;

866

tmp_gap_row->

object_beg

= comp2obj_ofs + it->GetFrom();

867

tmp_gap_row->

object_end

= comp2obj_ofs + it->GetTo();

868

tmp_gap_row->

gap_length

= it->GetTo() - it->GetFrom() + 1;

871

(*m_out) << tmp_gap_row->

ToString

(

true

) << endl;

887

(*m_out) << tmp_row->

ToString

() << endl;

892

(*m_out) << s << endl;

899 int main

(

int

argc,

const char

* argv[])

901  if

(argc==1+1 &&

string

(

"-comp"

)==argv[1]) {

902

cout <<

"agp_validate -comp (formerly agp_fasta_compare):\n" 904  "check that the object sequences FASTA matches the AGP.\n" 907  "USAGE: agp_validate -comp [-options] FASTA file(s)... AGP file(s)...\n" 909  " -loadlog OUTPUT_FILE Save the list of all loaded sequences.\n" 910  " -ignoreagponly Do not report objects present in AGP file(s) only.\n" 911  " -ignoreobjfileonly Do not report objects present in FASTA file(s) only.\n" 912  " -diffstofind NUM (EXPERIMENTAL) If specified, list the first NUM lines of each difference.\n" 913  " -out OUTPUT_FILE Save the assembled AGP sequences as FASTA.\n" 915  "FASTA files for components can be provided (along with object FASTA files) if components are not yet in GenBank.\n"

void OverrideLenIfAccession(const string &acc, int &in_out_len)

string ExtractAccession(const string &long_acc)

@ eAgpVersion_auto

auto-detect using the first gap line

@ eAgpVersion_1_1

AGP spec 1.1.

@ eAgpVersion_2_0

AGP spec 2.0 or later.

CRef< CAgpErrEx > pAgpErr

int main(int argc, const char *argv[])

virtual ~CAgpCompSpanSplitter()

CAgpCompSpanSplitter(CNcbiOstream *out=NULL)

virtual void SaveRow(const string &s, CRef< CAgpRow > row, TRangeColl *runs_of_Ns)

Correctly print multiple errors and warnings on consequitive lines; suppress undesired or higly repet...

EResult Run(const std::list< std::string > &files, const std::string &loadlog, const std::string &agp_as_fasta_file, TDiffsToHide diffsToHide, int diffs_to_find)

@ fDiffsToHide_ObjfileOnly

virtual void SetVersion(EAgpVersion ver)

Change what AGP version to use for the next input that's read.

virtual int ReadStream(CNcbiIstream &is, EFinalize eFinalize=eFinalize_Yes)

Read an AGP file from the given input stream.

string & GetComponentId()

static bool CheckComponentEnd(const string &comp_id, TAgpPos comp_end, TAgpLen comp_len, CAgpErr &agp_err)

static CRef< CAgpRow > New(CAgpErr *arg, EAgpVersion agp_version=eAgpVersion_auto, CAgpReader *reader=nullptr)

string ToString(bool reorder_linkage_evidences=false)

static bool IsGap(char c)

int FromString(const string &line)

CAgpValidateReader m_reader

void x_LoadLenFa(CNcbiIstream &istr, const string &filename)

enum CAgpValidateApplication::EValidationType m_ValidationType

EAgpVersion m_agp_version

unique_ptr< CAltValidator > m_AltValidator

TMapStrRangeColl m_comp2range_coll

CAgpValidateApplication()

virtual void Init(void)

Initialize the application.

virtual int Run(void)

Run the application.

void x_ReportFastaSeqCount()

void x_ValidateUsingFiles(const CArgs &args, CNcbiOstream *out=NULL)

void x_ValidateFile(CNcbiIstream &istr)

virtual void Exit(void)

Cleanup on application exit.

void PrintTotals(CNcbiOstream &out=cout, bool use_xml=false)

void Reset(bool for_chr_from_scaf=false)

void SetRowOutput(IAgpRowOutput *row_output)

CVersionInfo m_VersionInfo

string & PrintUsage(string &str, bool) const

Print usage message to end of specified string.

CArgDesc_agp_validate(CVersionInfo &&versionInfo)

TAgpLen AddCompLen(const string &acc, TAgpLen len, bool increment_count=true)

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

TRangeVector::const_iterator const_iterator

const_iterator end() const

const_iterator begin() const

container_type::iterator iterator

const_iterator begin() const

const_iterator end() const

const_iterator find(const key_type &key) const

std::ofstream out("events_result.xml")

main entry point for tests

static unsigned int line_num

static const char * str(char *buf, int n)

const CNcbiRegistry & GetConfig(void) const

Get the application's cached configuration parameters (read-only).

unsigned int TSeqPos

Type for sequence locations and lengths.

virtual const CArgs & GetArgs(void) const

Get parsed command line arguments.

int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)

Main function (entry point) for the NCBI application.

CVersionInfo GetVersion(void) const

Get the program version information.

virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)

Setup the command line argument descriptions.

const CNcbiArguments & GetArguments(void) const

Get the application's cached unprocessed command-line arguments.

void SetVersion(const CVersionInfo &version)

Set the version number for the program.

vector< string > TStringArray

Some values types can contain several value lists.

size_t GetNExtra(void) const

Get the number of unnamed positional (a.k.a. extra) args.

@ fAllowMultiple

Repeated key arguments are legal (use with AddKey)

@ eString

An arbitrary string.

@ eOutputFile

Name of file (must be writable)

@ eInteger

Convertible into an integer number (int or Int8)

void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")

Set diagnostic stream.

CRange< TSeqPos > TSeqRange

typedefs for sequence ranges

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)

Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

CNcbistrstream_Base< IO_PREFIX::ostrstream, IOS_BASE::out > CNcbiOstrstream

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

NCBI_NS_STD::string::size_type SIZE_TYPE

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

void CONNECT_Init(const IRWRegistry *reg=0, CRWLock *lock=0, TConnectInitFlags flag=eConnectInit_OwnNothing, FSSLSetup ssl=0)

Init [X]CONNECT library with the specified "reg" and "lock" (ownership for either or both can be deta...

virtual string Print(void) const

Print version information.

#define NCBI_SC_VERSION_PROXY

#define NCBI_TEAMCITY_BUILD_NUMBER_PROXY

Defines the CNcbiApplication and CAppException classes for creating NCBI applications.

#define row(bind, expected)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4