A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/agp__converter_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/readers/agp_converter.cpp Source File

75

TOutputFlags fOutputFlags,

77

: m_pTemplateBioseq(pTemplateBioseq),

78

m_fOutputFlags(fOutputFlags)

81

m_pSubmitBlock.Reset(pSubmitBlock);

84  if

( pErrorHandler ) {

85

m_pErrorHandler = pErrorHandler;

88

m_pErrorHandler.

Reset

(

new

CErrorHandler );

98  TSeqPos

length = (*ent)->GetSeq().GetInst().GetLength();

112  if

((*desc)->IsSource() && (*desc)->GetSource().IsSetSubtype()) {

114

(*desc)->GetSource().GetSubtype()) {

115  if

((*sub_type)->GetSubtype() ==

119  "chromosome info ignored because template " 120  "contains a chromosome SubSource"

);

137  while

(!chromosomes_istr.eof()) {

142

list<string> split_line;

144  if

(split_line.size() != 2) {

147  "line of chromosome file does not have " 148  "two columns: "

+ line);

151  string id

= split_line.front();

152  string

chr = split_line.back();

153  if

(mapChromosomeNames.

find

(

id

) != mapChromosomeNames.

end

()

154

&& mapChromosomeNames[

id

] != chr)

158  "inconsistent chromosome for "

+

id

+

159  " in chromosome file"

);

162

mapChromosomeNames[id] = chr;

175  const

std::vector<std::string> & vecAgpFileNames,

177  size_t

uMaxBioseqsToWrite )

const 185  if

( ! vecAgpFileNames.empty() ) {

188  const bool

bOnlyOneBioseqInAllAGPFiles =

189

( agp_entries.size() == 1 && vecAgpFileNames.size() == 1 );

194  string

sObjectOpeningString;

195  string

sObjectClosingString;

198

sObjectOpeningString,

199

sObjectClosingString,

201

bOnlyOneBioseqInAllAGPFiles );

203

ostrm << sObjectOpeningString << endl;

210  bool

bFirstEntry =

true

;

211  ITERATE

( std::vector<std::string>, file_name_it, vecAgpFileNames ) {

214  if

( ! bFirstEntry ) {

234  "Entry skipped and reason probably given in a previous error"

);

241

bFirstEntry =

false

;

243  if

( bOneObjectPerBioseq ) {

246

ostrm << sObjectClosingString << endl;

247

ostrm << sObjectOpeningString << endl;

248

}

else if

( ! sObjectOpeningString.empty() ) {

251

ostrm <<

","

<< endl;

255  if

( sObjectOpeningString.empty() ) {

257

ostrm <<

"Bioseq ::= "

<< endl;

260

ostrm <<

"seq "

<< endl;

266

obj_writer.

Flush

();

271

ostrm << sObjectClosingString << endl;

275  const string

& sDirName,

276  const

std::vector<std::string> & vecAgpFileNames,

277  const string

& sSuffix_arg,

280  CDir

outputDir(sDirName);

281  if

( ! outputDir.

Exists

() ||

282

! outputDir.

IsDir

() )

286  "The output directory is not a dir or is not found: "

+ sDirName );

290  const string

& sSuffix = (

291

sSuffix_arg.empty() ?

295  ITERATE

( std::vector<std::string>, file_name_it, vecAgpFileNames ) {

310  "Entry skipped and the reason was " 311  "probably given in a previous error"

);

322

new_submit->

SetData

().SetEntrys().push_back(new_entry);

323

pObjectToPrint = new_submit;

326

pObjectToPrint = new_entry;

330

outputDir.

GetPath

(), id_str, sSuffix);

337  if

( pFileWrittenCallback ) {

338

pFileWrittenCallback->

Notify

(outfpath);

344 #ifdef STRING_AND_VAR_PAIR 345 # error STRING_AND_VAR_PAIR 350 #define STRING_AND_VAR_PAIR(_value) \ 360  static const

TStrFlagPair kStrFlagPairs[] = {

369

TStrFlagMap::const_iterator find_iter =

371  if

( find_iter == kStrFlagMap.end() ) {

373  "Bad string given to CAgpConverter::OutputFlagStringToEnum: " 376  return

find_iter->second;

387  static const

TStrErrorPair kStrErrorPairs[] = {

406

TStrErrorMap::const_iterator find_iter =

408  if

( find_iter == kStrErrorMap.end() ) {

410  "Bad string given to CAgpConverter::ErrorStringToEnum: " 413  return

find_iter->second;

417 #undef STRING_AND_VAR_PAIR 420  const string

& sAgpFileName,

428

stringstream err_strm;

432  const int

iErrCode = agp_reader.

ReadStream

(istr);

435  const string

sErrors = err_strm.str();

436  if

( ! sErrors.empty() ) {

439  "AGP parsing returned error message(s): "

+ sErrors );

441  if

( iErrCode != 0 ) {

444  "AGP parsing returned error code "

+

450

out_agp_entries.swap( agp_reader.

GetResult

() );

456  string

& out_id_str )

const 458  string

unparsed_id_str;

472  const TSeqPos

uAGPBioseqLen = (

477  if

( uOrigBioseqLen != uAGPBioseqLen ) {

480  "** Entry "

+ out_id_str +

" has mismatch, but will " 481  "be written anyway: " 482  "fOutputFlags_AGPLenMustMatchOrig was set and the entry's " 485  " but the original template's length is "

+

495

.SetExt().SetDelta().Set()) {

496  if

((*delta)->IsLiteral() &&

497

(*delta)->GetLiteral().GetLength() == 100) {

498

(*delta)->SetLiteral().SetFuzz().SetLim();

506

new_entry, out_id_str);

507  if

( ! bSuccessfulValidation ) {

511  "** Not writing entry "

+ out_id_str +

" due to failed validation"

);

519

new_entry, unparsed_id_str);

531  string

& out_unparsed_id_str,

532  string

& out_id_str )

const 537

stringstream id_strm;

539

out_unparsed_id_str = id_strm.str();

540

out_id_str = out_unparsed_id_str;

545

list<CRef<CSeq_id> > ids;

546

ids.push_back(pSeqId);

557  "** ID "

+ out_id_str +

558  " contains a '|'; consider using the -fasta_id option"

);

563  bool

bFirstWasTransformed =

false

;

567  if

( bWasTransformed && id_it == ids.begin() ) {

568

bFirstWasTransformed =

true

;

575

bFirstWasTransformed )

596  const string

& id_str)

const 602  if

((*delta)->IsLoc()) {

603  const string

comp_id_str =

604

(*delta)->GetLoc().GetInt().GetId().AsFastaString();

611  "** Component "

+ comp_id_str +

612  " of entry "

+ id_str +

" not found"

);

614  const TSeqPos

uCompLen = find_iter->second;

616  const TSeqPos

to = (*delta)->GetLoc().GetInt().GetTo();

617  if

(to >= uCompLen) {

621  "** Component "

+ comp_id_str +

622  " of entry "

+ id_str +

" not long enough.\n" 636  const string

& unparsed_id_str )

const 647

sub_source->

SetName

(chr_find_iter->second);

648

vector<CRef<CSeqdesc> > source_descs;

651  if

((*desc)->IsSource()) {

652

source_descs.push_back(*desc);

655  if

(source_descs.size() != 1) {

660  "Source Desc's; expected exactly one"

);

663  CSeqdesc

& source_desc = *source_descs[0];

683  string

& out_sObjectOpeningString,

684  string

& out_sObjectClosingString,

686  bool

bOnlyOneBioseqInAllAGPFiles )

const 688

out_sObjectOpeningString.clear();

689

out_sObjectClosingString.clear();

692  bool

bUsingBioseqSets =

false

;

696

bUsingBioseqSets =

true

;

701

bUsingBioseqSets =

false

;

702

}

else if

( ! bOnlyOneBioseqInAllAGPFiles )

707

bUsingBioseqSets =

true

;

718

stringstream seq_sub_header_strm;

723  if

( out_sObjectOpeningString.empty() ) {

724

seq_sub_header_strm <<

"Seq-submit ::= "

;

726

seq_sub_header_strm <<

"{"

<< endl;

727

seq_sub_header_strm <<

"sub "

;

729

submit_block_writer.

Flush

();

730

seq_sub_header_strm <<

","

<< endl;

731

seq_sub_header_strm <<

"data entrys {"

<< endl;

733

out_sObjectOpeningString = seq_sub_header_strm.str();

734

out_sObjectClosingString =

"} }"

+ out_sObjectClosingString;

738  const bool

bUsingSeqEntry = (

741  if

( bUsingSeqEntry ) {

742  if

( out_sObjectOpeningString.empty() ) {

745

out_sObjectOpeningString +=

"Seq-entry ::= "

;

747  if

( bUsingBioseqSets ) {

748

out_sObjectOpeningString +=

"set "

;

753  if

( bUsingBioseqSets ) {

756  if

( out_sObjectOpeningString.empty() ) {

757

out_sObjectOpeningString +=

"Bioseq-set ::= "

;

759

out_sObjectOpeningString +=

"{ seq-set { "

;

760

out_sObjectClosingString =

"} }"

+ out_sObjectClosingString;

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

#define STRING_AND_VAR_PAIR(_value)

@ eAgpVersion_auto

auto-detect using the first gap line

This gets called after each file is written, so the caller can do useful things like run asnval on ev...

virtual void Notify(const string &file)=0

virtual ~IIdTransformer(void)

TCompLengthMap m_mapComponentLength

void OutputBioseqs(CNcbiOstream &ostrm, const std::vector< std::string > &vecAgpFileNames, TOutputBioseqsFlags fFlags=0, size_t uMaxBioseqsToWrite=std::numeric_limits< size_t >::max()) const

Outputs the result from the AGP file names as ASN.1.

void x_SetUpObjectOpeningAndClosingStrings(string &out_sObjectOpeningString, string &out_sObjectClosingString, TOutputBioseqsFlags fOutputBioseqsFlags, bool bOnlyOneBioseqInAllAGPFiles) const

Each Bioseq written out will have the out_sObjectOpeningString before it and out_sObjectClosingString...

CRef< IIdTransformer > m_pIdTransformer

bool x_VerifyComponents(CConstRef< objects::CSeq_entry > new_entry, const string &id_str) const

@ fOutputBioseqsFlags_OneObjectPerBioseq

If set, each AGP Bioseq is written as its own object.

@ fOutputBioseqsFlags_WrapInSeqEntry

Bioseqs and Bioseq-sets should always be wrapped in a Seq-entry.

@ fOutputBioseqsFlags_DoNOTUnwrapSingularBioseqSets

Specify this if Bioseq-sets with just one Bioseq in them should _NOT_ be unwrapped into a Bioseq.

TChromosomeMap m_mapChromosomeNames

EError

The different kinds of errors that could occur while processing.

@ eError_SuggestUsingFastaIdOption

@ eError_WrongNumberOfSourceDescs

@ eError_ComponentTooShort

@ eError_SubmitBlockIgnoredWhenOneBigBioseqSet

@ eError_EntrySkippedDueToFailedComponentValidation

@ eError_ChromosomeFileBadFormat

@ eError_OutputDirNotFoundOrNotADir

@ eError_ChromosomeIsInconsistent

@ eError_ChromosomeMapIgnoredBecauseChromosomeSubsourceAlreadyInTemplate

@ eError_ComponentNotFound

@ eError_AGPLengthMismatchWithTemplateLength

void SetComponentsBioseqSet(CConstRef< objects::CBioseq_set > pComponentsBioseqSet)

Give a bioseq-set containing all the components pieces, for verification.

CRef< objects::CSeq_entry > x_InitializeCopyOfTemplate(const objects::CBioseq &agp_seq, string &out_unparsed_id_str, string &out_id_str) const

void x_ReadAgpEntries(const string &sAgpFileName, CAgpToSeqEntry::TSeqEntryRefVec &out_agp_entries) const

static TOutputFlags OutputFlagStringToEnum(const string &sEnumAsString)

Convert string to flag.

CAgpConverter(CConstRef< objects::CBioseq > pTemplateBioseq, const objects::CSubmit_block *pSubmitBlock=nullptr, TOutputFlags fOutputFlags=0, CRef< CErrorHandler > pErrorHandler=CRef< CErrorHandler >())

Constructor.

CRef< CErrorHandler > m_pErrorHandler

void OutputOneFileForEach(const string &sDirName, const std::vector< std::string > &vecAgpFileNames, const string &sSuffix=kEmptyStr, IFileWrittenCallback *pFileWrittenCallback=nullptr) const

Outputs the results of each Seq-entry (or Seq-submit if Submit-block was given) into its own file in ...

void x_SetCreateAndUpdateDatesToToday(CRef< objects::CSeq_entry > new_entry) const

void LoadChromosomeMap(CNcbiIstream &chromosomes_istr)

Input has 2 tab-delimited columns: id, then chromosome name.

CConstRef< objects::CBioseq > m_pTemplateBioseq

@ fOutputFlags_Fuzz100

For gaps of length 100, put an Int-fuzz = unk in the literal.

@ fOutputFlags_FastaId

Parse object ids (col. 1) as fasta-style ids if they contain '|'.

@ fOutputFlags_SetGapInfo

Set Seq-gap (gap type and linkage) in delta sequence.

@ fOutputFlags_LAST_PLUS_ONE

@ fOutputFlags_AGPLenMustMatchOrig

When set, we give an error on AGP objects that don't have the same length as the original template.

int TOutputFlags

Bitwise-OR of EOutputFlags.

static EError ErrorStringToEnum(const string &sEnumAsString)

Convert string to EError enum.

void x_SetChromosomeNameInSourceSubtype(CRef< objects::CSeq_entry > new_entry, const string &unparsed_id_str) const

void SetChromosomesInfo(const TChromosomeMap &mapChromosomeNames)

Give the chromosomes to this object.

TOutputFlags m_fOutputFlags

CRef< objects::CSeq_entry > x_InitializeAndCheckCopyOfTemplate(const objects::CBioseq &agp_bioseq, string &out_id_str) const

CConstRef< objects::CSubmit_block > m_pSubmitBlock

Correctly print multiple errors and warnings on consequitive lines; suppress undesired or higly repet...

virtual int ReadStream(CNcbiIstream &is, EFinalize eFinalize=eFinalize_Yes)

Read an AGP file from the given input stream.

This class is used to turn an AGP file into a vector of Seq-entry's.

vector< CRef< objects::CSeq_entry > > TSeqEntryRefVec

This is the way the results will be returned Each Seq-entry contains just one Bioseq,...

@ fSetSeqGap

Found gaps will not be given Seq-data such as Type and Linkage.

TSeqEntryRefVec & GetResult(void)

This gets the results found, but don't call before finalizing.

const CSeq_id * GetFirstId() const

TSeqPos GetLength(void) const

bool IsSetLength(void) const

void SetToTime(const CTime &time, EPrecision prec=ePrecision_second)

ostream & AsString(ostream &s) const

class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...

container_type::const_iterator const_iterator

const_iterator end() const

const_iterator find(const key_type &key) const

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

#define NCBI_USER_THROW_FMT(message)

Throw a "user exception" with message processed as output to ostream.

static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)

Assemble a path from basic components.

virtual bool Exists(void) const

Check if directory "dirname" exists.

bool IsDir(EFollowLinks follow=eFollowLinks) const

Check whether a directory entry is a directory.

const string & GetPath(void) const

Get entry path.

virtual const CTypeInfo * GetThisTypeInfo(void) const =0

C * SerialClone(const C &src)

Create on heap a clone of the source object.

#define MSerial_AsnText

I/O stream manipulators –.

static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)

Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.

@ fLabel_Version

Show the version.

@ fLabel_GeneralDbIsContent

For type general, use the database name as the tag and the (text or numeric) key as the content.

@ eContent

Untagged human-readable accession or the like.

void WriteObject(const CConstObjectInfo &object)

TObjectType * GetPointer(void) const THROWS_NONE

Get pointer,.

TObjectType * GetPointer(void) THROWS_NONE

Get pointer,.

void Reset(void)

Reset reference object.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)

Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)

IO_PREFIX::ofstream CNcbiOfstream

Portable alias for ofstream.

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)

Convert size_t to string.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string.

@ fSplit_Tokenize

All delimiters are merged and trimmed, to get non-empty tokens only.

CTime CurrentTime(CTime::ETimeZone tz=CTime::eLocal, CTime::ETimeZonePrecision tzp=CTime::eTZPrecisionDefault)

void SetSubtype(TSubtype value)

Assign a value to Subtype data member.

list< CRef< CSubSource > > TSubtype

void SetName(const TName &value)

Assign a value to Name data member.

TSubtype & SetSubtype(void)

Assign a value to Subtype data member.

const TLocal & GetLocal(void) const

Get the variant data.

const TSeq & GetSeq(void) const

Get the variant data.

const TSeq_set & GetSeq_set(void) const

Get the Seq_set member data.

list< CRef< CSeq_entry > > TSeq_set

TSeq & SetSeq(void)

Select the variant.

list< CRef< CSeqdesc > > Tdata

TId & SetId(void)

Assign a value to Id data member.

void ResetId(void)

Reset Id data member.

const TInst & GetInst(void) const

Get the Inst member data.

const Tdata & Get(void) const

Get the member data.

list< CRef< CSeq_id > > TId

void SetInst(TInst &value)

Assign a value to Inst data member.

TSource & SetSource(void)

Select the variant.

const TExt & GetExt(void) const

Get the Ext member data.

void SetDescr(TDescr &value)

Assign a value to Descr data member.

const TDelta & GetDelta(void) const

Get the variant data.

const Tdata & Get(void) const

Get the member data.

list< CRef< CDelta_seq > > Tdata

const TDescr & GetDescr(void) const

Get the Descr member data.

TCreate_date & SetCreate_date(void)

Select the variant.

TUpdate_date & SetUpdate_date(void)

Select the variant.

void SetSub(TSub &value)

Assign a value to Sub data member.

void SetData(TData &value)

Assign a value to Data data member.

Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...

Int4 delta(size_t dimension_, const Int4 *score_)

#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)

Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4