TOutputFlags fOutputFlags,
77: m_pTemplateBioseq(pTemplateBioseq),
78m_fOutputFlags(fOutputFlags)
81m_pSubmitBlock.Reset(pSubmitBlock);
84 if( pErrorHandler ) {
85m_pErrorHandler = pErrorHandler;
88m_pErrorHandler.
Reset(
newCErrorHandler );
98 TSeqPoslength = (*ent)->GetSeq().GetInst().GetLength();
112 if((*desc)->IsSource() && (*desc)->GetSource().IsSetSubtype()) {
114(*desc)->GetSource().GetSubtype()) {
115 if((*sub_type)->GetSubtype() ==
119 "chromosome info ignored because template " 120 "contains a chromosome SubSource");
137 while(!chromosomes_istr.eof()) {
142list<string> split_line;
144 if(split_line.size() != 2) {
147 "line of chromosome file does not have " 148 "two columns: "+ line);
151 string id= split_line.front();
152 stringchr = split_line.back();
153 if(mapChromosomeNames.
find(
id) != mapChromosomeNames.
end()
154&& mapChromosomeNames[
id] != chr)
158 "inconsistent chromosome for "+
id+
159 " in chromosome file");
162mapChromosomeNames[id] = chr;
175 conststd::vector<std::string> & vecAgpFileNames,
177 size_tuMaxBioseqsToWrite )
const 185 if( ! vecAgpFileNames.empty() ) {
188 const boolbOnlyOneBioseqInAllAGPFiles =
189( agp_entries.size() == 1 && vecAgpFileNames.size() == 1 );
194 stringsObjectOpeningString;
195 stringsObjectClosingString;
198sObjectOpeningString,
199sObjectClosingString,
201bOnlyOneBioseqInAllAGPFiles );
203ostrm << sObjectOpeningString << endl;
210 boolbFirstEntry =
true;
211 ITERATE( std::vector<std::string>, file_name_it, vecAgpFileNames ) {
214 if( ! bFirstEntry ) {
234 "Entry skipped and reason probably given in a previous error");
241bFirstEntry =
false;
243 if( bOneObjectPerBioseq ) {
246ostrm << sObjectClosingString << endl;
247ostrm << sObjectOpeningString << endl;
248}
else if( ! sObjectOpeningString.empty() ) {
251ostrm <<
","<< endl;
255 if( sObjectOpeningString.empty() ) {
257ostrm <<
"Bioseq ::= "<< endl;
260ostrm <<
"seq "<< endl;
266obj_writer.
Flush();
271ostrm << sObjectClosingString << endl;
275 const string& sDirName,
276 conststd::vector<std::string> & vecAgpFileNames,
277 const string& sSuffix_arg,
280 CDiroutputDir(sDirName);
281 if( ! outputDir.
Exists() ||
282! outputDir.
IsDir() )
286 "The output directory is not a dir or is not found: "+ sDirName );
290 const string& sSuffix = (
291sSuffix_arg.empty() ?
295 ITERATE( std::vector<std::string>, file_name_it, vecAgpFileNames ) {
310 "Entry skipped and the reason was " 311 "probably given in a previous error");
322new_submit->
SetData().SetEntrys().push_back(new_entry);
323pObjectToPrint = new_submit;
326pObjectToPrint = new_entry;
330outputDir.
GetPath(), id_str, sSuffix);
337 if( pFileWrittenCallback ) {
338pFileWrittenCallback->
Notify(outfpath);
344 #ifdef STRING_AND_VAR_PAIR 345 # error STRING_AND_VAR_PAIR 350 #define STRING_AND_VAR_PAIR(_value) \ 360 static constTStrFlagPair kStrFlagPairs[] = {
369TStrFlagMap::const_iterator find_iter =
371 if( find_iter == kStrFlagMap.end() ) {
373 "Bad string given to CAgpConverter::OutputFlagStringToEnum: " 376 returnfind_iter->second;
387 static constTStrErrorPair kStrErrorPairs[] = {
406TStrErrorMap::const_iterator find_iter =
408 if( find_iter == kStrErrorMap.end() ) {
410 "Bad string given to CAgpConverter::ErrorStringToEnum: " 413 returnfind_iter->second;
417 #undef STRING_AND_VAR_PAIR 420 const string& sAgpFileName,
428stringstream err_strm;
432 const intiErrCode = agp_reader.
ReadStream(istr);
435 const stringsErrors = err_strm.str();
436 if( ! sErrors.empty() ) {
439 "AGP parsing returned error message(s): "+ sErrors );
441 if( iErrCode != 0 ) {
444 "AGP parsing returned error code "+
450out_agp_entries.swap( agp_reader.
GetResult() );
456 string& out_id_str )
const 458 stringunparsed_id_str;
472 const TSeqPosuAGPBioseqLen = (
477 if( uOrigBioseqLen != uAGPBioseqLen ) {
480 "** Entry "+ out_id_str +
" has mismatch, but will " 481 "be written anyway: " 482 "fOutputFlags_AGPLenMustMatchOrig was set and the entry's " 485 " but the original template's length is "+
495.SetExt().SetDelta().Set()) {
496 if((*delta)->IsLiteral() &&
497(*delta)->GetLiteral().GetLength() == 100) {
498(*delta)->SetLiteral().SetFuzz().SetLim();
506new_entry, out_id_str);
507 if( ! bSuccessfulValidation ) {
511 "** Not writing entry "+ out_id_str +
" due to failed validation");
519new_entry, unparsed_id_str);
531 string& out_unparsed_id_str,
532 string& out_id_str )
const 537stringstream id_strm;
539out_unparsed_id_str = id_strm.str();
540out_id_str = out_unparsed_id_str;
545list<CRef<CSeq_id> > ids;
546ids.push_back(pSeqId);
557 "** ID "+ out_id_str +
558 " contains a '|'; consider using the -fasta_id option");
563 boolbFirstWasTransformed =
false;
567 if( bWasTransformed && id_it == ids.begin() ) {
568bFirstWasTransformed =
true;
575bFirstWasTransformed )
596 const string& id_str)
const 602 if((*delta)->IsLoc()) {
603 const stringcomp_id_str =
604(*delta)->GetLoc().GetInt().GetId().AsFastaString();
611 "** Component "+ comp_id_str +
612 " of entry "+ id_str +
" not found");
614 const TSeqPosuCompLen = find_iter->second;
616 const TSeqPosto = (*delta)->GetLoc().GetInt().GetTo();
617 if(to >= uCompLen) {
621 "** Component "+ comp_id_str +
622 " of entry "+ id_str +
" not long enough.\n" 636 const string& unparsed_id_str )
const 647sub_source->
SetName(chr_find_iter->second);
648vector<CRef<CSeqdesc> > source_descs;
651 if((*desc)->IsSource()) {
652source_descs.push_back(*desc);
655 if(source_descs.size() != 1) {
660 "Source Desc's; expected exactly one");
663 CSeqdesc& source_desc = *source_descs[0];
683 string& out_sObjectOpeningString,
684 string& out_sObjectClosingString,
686 boolbOnlyOneBioseqInAllAGPFiles )
const 688out_sObjectOpeningString.clear();
689out_sObjectClosingString.clear();
692 boolbUsingBioseqSets =
false;
696bUsingBioseqSets =
true;
701bUsingBioseqSets =
false;
702}
else if( ! bOnlyOneBioseqInAllAGPFiles )
707bUsingBioseqSets =
true;
718stringstream seq_sub_header_strm;
723 if( out_sObjectOpeningString.empty() ) {
724seq_sub_header_strm <<
"Seq-submit ::= ";
726seq_sub_header_strm <<
"{"<< endl;
727seq_sub_header_strm <<
"sub ";
729submit_block_writer.
Flush();
730seq_sub_header_strm <<
","<< endl;
731seq_sub_header_strm <<
"data entrys {"<< endl;
733out_sObjectOpeningString = seq_sub_header_strm.str();
734out_sObjectClosingString =
"} }"+ out_sObjectClosingString;
738 const boolbUsingSeqEntry = (
741 if( bUsingSeqEntry ) {
742 if( out_sObjectOpeningString.empty() ) {
745out_sObjectOpeningString +=
"Seq-entry ::= ";
747 if( bUsingBioseqSets ) {
748out_sObjectOpeningString +=
"set ";
753 if( bUsingBioseqSets ) {
756 if( out_sObjectOpeningString.empty() ) {
757out_sObjectOpeningString +=
"Bioseq-set ::= ";
759out_sObjectOpeningString +=
"{ seq-set { ";
760out_sObjectClosingString =
"} }"+ out_sObjectClosingString;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define STRING_AND_VAR_PAIR(_value)
@ eAgpVersion_auto
auto-detect using the first gap line
This gets called after each file is written, so the caller can do useful things like run asnval on ev...
virtual void Notify(const string &file)=0
virtual ~IIdTransformer(void)
TCompLengthMap m_mapComponentLength
void OutputBioseqs(CNcbiOstream &ostrm, const std::vector< std::string > &vecAgpFileNames, TOutputBioseqsFlags fFlags=0, size_t uMaxBioseqsToWrite=std::numeric_limits< size_t >::max()) const
Outputs the result from the AGP file names as ASN.1.
void x_SetUpObjectOpeningAndClosingStrings(string &out_sObjectOpeningString, string &out_sObjectClosingString, TOutputBioseqsFlags fOutputBioseqsFlags, bool bOnlyOneBioseqInAllAGPFiles) const
Each Bioseq written out will have the out_sObjectOpeningString before it and out_sObjectClosingString...
CRef< IIdTransformer > m_pIdTransformer
bool x_VerifyComponents(CConstRef< objects::CSeq_entry > new_entry, const string &id_str) const
@ fOutputBioseqsFlags_OneObjectPerBioseq
If set, each AGP Bioseq is written as its own object.
@ fOutputBioseqsFlags_WrapInSeqEntry
Bioseqs and Bioseq-sets should always be wrapped in a Seq-entry.
@ fOutputBioseqsFlags_DoNOTUnwrapSingularBioseqSets
Specify this if Bioseq-sets with just one Bioseq in them should _NOT_ be unwrapped into a Bioseq.
TChromosomeMap m_mapChromosomeNames
EError
The different kinds of errors that could occur while processing.
@ eError_SuggestUsingFastaIdOption
@ eError_WrongNumberOfSourceDescs
@ eError_ComponentTooShort
@ eError_SubmitBlockIgnoredWhenOneBigBioseqSet
@ eError_EntrySkippedDueToFailedComponentValidation
@ eError_ChromosomeFileBadFormat
@ eError_OutputDirNotFoundOrNotADir
@ eError_ChromosomeIsInconsistent
@ eError_ChromosomeMapIgnoredBecauseChromosomeSubsourceAlreadyInTemplate
@ eError_ComponentNotFound
@ eError_AGPLengthMismatchWithTemplateLength
void SetComponentsBioseqSet(CConstRef< objects::CBioseq_set > pComponentsBioseqSet)
Give a bioseq-set containing all the components pieces, for verification.
CRef< objects::CSeq_entry > x_InitializeCopyOfTemplate(const objects::CBioseq &agp_seq, string &out_unparsed_id_str, string &out_id_str) const
void x_ReadAgpEntries(const string &sAgpFileName, CAgpToSeqEntry::TSeqEntryRefVec &out_agp_entries) const
static TOutputFlags OutputFlagStringToEnum(const string &sEnumAsString)
Convert string to flag.
CAgpConverter(CConstRef< objects::CBioseq > pTemplateBioseq, const objects::CSubmit_block *pSubmitBlock=nullptr, TOutputFlags fOutputFlags=0, CRef< CErrorHandler > pErrorHandler=CRef< CErrorHandler >())
Constructor.
CRef< CErrorHandler > m_pErrorHandler
void OutputOneFileForEach(const string &sDirName, const std::vector< std::string > &vecAgpFileNames, const string &sSuffix=kEmptyStr, IFileWrittenCallback *pFileWrittenCallback=nullptr) const
Outputs the results of each Seq-entry (or Seq-submit if Submit-block was given) into its own file in ...
void x_SetCreateAndUpdateDatesToToday(CRef< objects::CSeq_entry > new_entry) const
void LoadChromosomeMap(CNcbiIstream &chromosomes_istr)
Input has 2 tab-delimited columns: id, then chromosome name.
CConstRef< objects::CBioseq > m_pTemplateBioseq
@ fOutputFlags_Fuzz100
For gaps of length 100, put an Int-fuzz = unk in the literal.
@ fOutputFlags_FastaId
Parse object ids (col. 1) as fasta-style ids if they contain '|'.
@ fOutputFlags_SetGapInfo
Set Seq-gap (gap type and linkage) in delta sequence.
@ fOutputFlags_LAST_PLUS_ONE
@ fOutputFlags_AGPLenMustMatchOrig
When set, we give an error on AGP objects that don't have the same length as the original template.
int TOutputFlags
Bitwise-OR of EOutputFlags.
static EError ErrorStringToEnum(const string &sEnumAsString)
Convert string to EError enum.
void x_SetChromosomeNameInSourceSubtype(CRef< objects::CSeq_entry > new_entry, const string &unparsed_id_str) const
void SetChromosomesInfo(const TChromosomeMap &mapChromosomeNames)
Give the chromosomes to this object.
TOutputFlags m_fOutputFlags
CRef< objects::CSeq_entry > x_InitializeAndCheckCopyOfTemplate(const objects::CBioseq &agp_bioseq, string &out_id_str) const
CConstRef< objects::CSubmit_block > m_pSubmitBlock
Correctly print multiple errors and warnings on consequitive lines; suppress undesired or higly repet...
virtual int ReadStream(CNcbiIstream &is, EFinalize eFinalize=eFinalize_Yes)
Read an AGP file from the given input stream.
This class is used to turn an AGP file into a vector of Seq-entry's.
vector< CRef< objects::CSeq_entry > > TSeqEntryRefVec
This is the way the results will be returned Each Seq-entry contains just one Bioseq,...
@ fSetSeqGap
Found gaps will not be given Seq-data such as Type and Linkage.
TSeqEntryRefVec & GetResult(void)
This gets the results found, but don't call before finalizing.
const CSeq_id * GetFirstId() const
TSeqPos GetLength(void) const
bool IsSetLength(void) const
void SetToTime(const CTime &time, EPrecision prec=ePrecision_second)
ostream & AsString(ostream &s) const
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_USER_THROW_FMT(message)
Throw a "user exception" with message processed as output to ostream.
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
virtual bool Exists(void) const
Check if directory "dirname" exists.
bool IsDir(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a directory.
const string & GetPath(void) const
Get entry path.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
C * SerialClone(const C &src)
Create on heap a clone of the source object.
#define MSerial_AsnText
I/O stream manipulators â.
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
@ fLabel_Version
Show the version.
@ fLabel_GeneralDbIsContent
For type general, use the database name as the tag and the (text or numeric) key as the content.
@ eContent
Untagged human-readable accession or the like.
void WriteObject(const CConstObjectInfo &object)
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
CTime CurrentTime(CTime::ETimeZone tz=CTime::eLocal, CTime::ETimeZonePrecision tzp=CTime::eTZPrecisionDefault)
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
list< CRef< CSubSource > > TSubtype
void SetName(const TName &value)
Assign a value to Name data member.
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
const TLocal & GetLocal(void) const
Get the variant data.
const TSeq & GetSeq(void) const
Get the variant data.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
void ResetId(void)
Reset Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
const Tdata & Get(void) const
Get the member data.
list< CRef< CSeq_id > > TId
void SetInst(TInst &value)
Assign a value to Inst data member.
TSource & SetSource(void)
Select the variant.
const TExt & GetExt(void) const
Get the Ext member data.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TDelta & GetDelta(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
list< CRef< CDelta_seq > > Tdata
const TDescr & GetDescr(void) const
Get the Descr member data.
TCreate_date & SetCreate_date(void)
Select the variant.
TUpdate_date & SetUpdate_date(void)
Select the variant.
void SetSub(TSub &value)
Assign a value to Sub data member.
void SetData(TData &value)
Assign a value to Data data member.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
Int4 delta(size_t dimension_, const Int4 *score_)
#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4