;
76 virtual void Init(
void);
77 virtual int Run(
void);
80 voidLoadInputAlns(
const string&
file_name,
const string& asn_type);
85m_AlnContainer.insert(*aln);
100arg_desc->AddDefaultKey(
"in",
"InputFileName",
101 "Name of file to read from (standard input by default)",
104arg_desc->AddDefaultKey(
"b",
"bin_obj_type",
105 "This forces the input file to be read in binary ASN.1 mode\n" 106 "and specifies the type of the top-level ASN.1 object.\n",
109arg_desc->AddDefaultKey(
"anchor_row",
"AnchorRow",
113arg_desc->AddOptionalKey(
"anchor_id",
"AnchorSeqId",
114 "Anchor seq-id (fasta or ASN.1)",
117arg_desc->AddOptionalKey(
"pseudo_id",
"PseudoSeqId",
118 "Pseudo seq-id (fasta or ASN.1) to be used as anchor of merged CAnchoredAln",
121arg_desc->AddFlag(
"print_seq",
122 "Print sequence data");
124arg_desc->AddFlag(
"no_scope",
125 "Don't use CScope while loading alignments");
127arg_desc->AddFlag(
"ignore_gaps",
128 "Ignore gaps (insertions) when collecting alignments");
130arg_desc->AddFlag(
"merge_keep_overlaps",
131 "Don't truncate overlaps when merging alignments");
133arg_desc->AddFlag(
"merge_align_to_anchor",
134 "Use anchor row as the alignment row " 135 "(don't translate to alignment coordinates)");
137arg_desc->AddFlag(
"merge_ignore_gaps",
138 "Ignore gaps (insertions) when merging alignments");
141 stringprog_description =
"Seq-align Test Application.\n";
142arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
143prog_description,
false);
145SetupArgDescriptions(arg_desc.release());
151 boolbinary = !asn_type.empty();
177cout << ind <<
"CPairwiseAln"<< endl;
178cout << ind <<
" First-id: "<< pw.
GetFirstId()->AsString() <<
" dir = ";
182cout << ind <<
" Second: "<< pw.
GetSecondId()->AsString() <<
" / " 187<< rit->GetSecondFrom() << (rg.
IsDirect() ?
"d":
"r") <<
188 " len="<< rit->GetLength() << endl;
191cout << ind <<
" Insertions on "<< pw.
GetFirstId()->AsString() <<
":"<< endl;
193cout << ind <<
" ["<<
194gap->GetFirstFrom() << (gap->IsFirstDirect() ?
"D":
"R") <<
195 "] => "<< gap->GetSecondFrom() <<
196(gap->IsDirect() ?
"d":
"r") <<
197 " len="<< gap->GetLength() << endl;
200cout << ind <<
" CPairwise_CI"<< endl;
204cout << ind <<
" "<<
205it.GetFirstRange().
GetFrom() <<
" - "<< it.GetFirstRange().GetToOpen() <<
206(it.IsFirstDirect() ?
" D":
" R") <<
207 " => "<< it.GetSecondRange().GetFrom() <<
" - "<< it.GetSecondRange().GetToOpen() <<
208(it.IsDirect() ?
" d":
" r") <<
209 " ["<< it.GetFirstRange().GetLength() <<
"] "<<
217cout << ind <<
"CAnchoredAln"<< endl;
229cout <<
"CSparseAln"<< endl;
230cout <<
" Rows: "<< sparse_aln.
GetNumRows() << endl;
231cout <<
" Anchor row: "<< sparse_aln.
GetAnchor() << endl;
235cout <<
" Row "<<
row<<
": " 243cout <<
" SeqRg: "<< native_rg.
GetFrom() <<
"/"<< frames.first <<
244 " - "<< native_rg.
GetTo() <<
"/"<< frames.second <<
245 " ("<< rg.
GetFrom() <<
" - "<< rg.
GetTo() <<
")";
248cout <<
" SeqRg: "<< rg.
GetFrom() <<
" - "<< rg.
GetTo();
258 size_taln_len = sequence.size();
259cout <<
" AlnSeqString: "<< sequence << endl;
262 size_tseq_len = sequence.size();
263cout <<
" SeqString: "<< sequence << endl;
264cout <<
" AlnSeqLen = "<< aln_len <<
", SeqLen = "<< seq_len << endl;
273 stringaln_total, seq_total;
279 stringaln_data, seq_data, aln_gap, seq_gap;
291 if( aln_rg.
Empty() ) {
292aln_gap =
string(seq_data.size(),
'*');
294 if( seq_rg.
Empty() ) {
295seq_gap =
string(aln_data.size(),
'?');
298 if(!aln_data.empty()) {
299aln_total.append(aln_data);
300aln_total.append(aln_gap);
301seq_total.append(seq_gap);
302seq_total.append(seq_data);
304 if(aln_data.size() + aln_gap.size() != seq_data.size() + seq_gap.size()) {
310cout <<
" SeqLen (aln/seq): "<< aln_data.size() + aln_gap.size() <<
311 " / "<< seq_gap.size() + seq_data.size() << endl;
312cout <<
" Aln: "<< aln_data + aln_gap << endl;
313cout <<
" Seq: "<< seq_gap + seq_data << endl;
319 if(!aln_total.empty() || !seq_total.empty()) {
320cout <<
" Total aligned sequence (aln/seq):"<< endl;
321cout <<
" "<< aln_total << endl;
322cout <<
" "<< seq_total << endl;
325cout <<
" [No sequence data]"<< endl;
337<< it->GetAlnRange().GetFrom() <<
" - " 338<< it->GetAlnRange().GetTo() <<
" (len=" 339<< it->GetAlnRange().GetLength() <<
") " 340<< (it.IsAnchorDirect() ?
"D":
"R")
342<< it->GetRange().GetFrom() <<
" - " 343<< it->GetRange().GetTo() <<
" (len=" 344<< it->GetRange().GetLength() <<
") " 345<< (it->IsReversed() ?
"R":
"D")
358cout <<
"Unaligned";
363aln.
GetAnchor(), it->GetAlnRange().GetFrom(),
366aln.
GetAnchor(), it->GetAlnRange().GetTo(),
368cout <<
" [anchor range: "<< anchor_from <<
" - "<< anchor_to <<
"]";
377cout <<
"CSparse_CI:"<< endl;
380 if(
row== anchor_row) {
381cout <<
"anchor segments, id=";
384cout <<
"row "<<
row<<
" segments, id=";
386cout << aln.
GetSeqId(
row).AsFastaString() << endl;
388cout <<
"AllSegments:"<< endl;
390cout <<
"SkipGaps:"<< endl;
392cout <<
"SkipInserts:"<< endl;
394cout <<
"InsertsOnly:"<< endl;
416 if(
str.empty() ) {
443 string file_name= args[
"in"].AsString();
445 stringasn_type = args[
"b"].AsString();
447 intanchor_row_idx = args[
"anchor_row"].AsInteger();
459id_extract(**aln_it,
tmp);
460ids.insert(ids.end(),
tmp.begin(),
tmp.end());
465cerr <<
"Skipping alignment: "<< e.what() << endl;
470cout <<
"Collected ids:"<< endl;
472cout <<
" "<< (*id)->AsString() <<
" W="<< (*id)->GetBaseWidth() << endl;
478 if( args[
"ignore_gaps"] ) {
482 for(
int i= 0;
i< (
int)aln_id_map.
size();
i++) {
483cout <<
"Alignment #"<<
i<< endl;
485 dynamic_cast<const CAlnSeqId&
>(*aln_id_map[
i][0]))));
487 dynamic_cast<const CAlnSeqId&
>(*aln_id_map[
i][1]))));
491cout <<
"Pairwise - row 0 vs 1:"<< endl;
495 i, aln_user_options, anchor_row_idx);
496cout <<
"Anchored to row "<< anchored_aln->
GetAnchorRow() << endl;
499cout <<
"Sparse (unbuilt)"<< endl;
506 if( !args[
"merge_keep_overlaps"] ) {
509 if( args[
"merge_align_to_anchor"] ) {
512 if( args[
"merge_ignore_gaps"] ) {
518 if(anchor_row_idx < 0) anchor_row_idx = 0;
530cout <<
"AnchoredAlnVec start"<< endl;
534cout <<
"AnchoredAlnVec end"<< endl << endl;
538 BuildAln(anchored_aln_vec, built_aln, merge_options, pseudo_id);
540cout <<
"BuildAln result"<< endl;
548 for(
int i= 0;
i< sparse_aln.
GetDim(); ++
i) {
549cout <<
"row "<<
i<< endl;
559cout << ((*it)->IsAligned() ?
" aligned : ":
" gap : ") <<
560it_aln_rg.
GetFrom() <<
" - "<<
561it_aln_rg.
GetTo() <<
" => "<<
566cout <<
" ("<< native_rg.
GetFrom() <<
"/"<< fr.first <<
567 " - "<< native_rg.
GetTo() <<
"/"<< fr.second <<
")";
569cout <<((*it)->IsReversed() ?
" R":
" D") << endl;
577 int main(
intargc,
const char* argv[])
static CRef< CScope > m_Scope
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
void ConvertSeqAlignToPairwiseAln(CPairwiseAln &pairwise_aln, const objects::CSeq_align &sa, objects::CSeq_align::TDim row_1, objects::CSeq_align::TDim row_2, CAlnUserOptions::EDirection direction=CAlnUserOptions::eBothDirections, const TAlnSeqIdVec *ids=0)
Build pairwise alignment from the selected rows of a seq-align.
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
CRef< CAnchoredAln > CreateAnchoredAlnFromAln(const _TAlnStats &aln_stats, size_t aln_idx, const CAlnUserOptions &options, objects::CSeq_align::TDim explicit_anchor_row=-1)
Create an anchored alignment from Seq-align using hints.
vector< TAlnSeqIdIRef > TAlnSeqIdVec
CSeq_align::TDim TDim
Types we use here:
vector< TSeqIdPtr > TSeqIdVector
void DumpSparseAln(const CSparseAln &sparse_aln)
void TestSparseIt(const CSparseAln &aln, int row, const CSparse_CI::TSignedRange &rg, CSparse_CI::EFlags flags)
void DumpPairwiseAln(const CPairwiseAln &pw, const string ind)
vector< const CSeq_align * > TAlnVector
void DumpAnchoredAln(const CAnchoredAln &aaln, const string &ind)
void TestIterators(const CSparseAln &aln, const CSparse_CI::TSignedRange &rg)
const CSeq_id * TSeqIdPtr
int main(int argc, const char *argv[])
void TestSparseIterator(const CSparseAln &aln)
TAlnSeqIdIRef ArgToSeq_id(const CArgValue &arg)
const_iterator begin() const
TSignedSeqPos GetSecondPosByFirstPos(position_type pos, ESearchDirection dir=eNone) const
position_type GetFirstFrom() const
CRange< position_type > GetFirstRange() const
@ fIgnoreInsertions
allows segments not separated by gaps
@ fDefaultPolicy
do not store insertions
const TInsertions & GetInsertions() const
Each insertion shows where the 'first' sequence has a gap while the 'second' sequence has the inserti...
vector< TAlignRange > TInsertions
class CAlignRangeCollection<TAlignRange> represent a sorted collection of TAlignRange.
CAlignRange Represents an element of pairwise alignment of two sequences.
Helper class for reading seq-align objects from a CObjectIStream.
void Read(CObjectIStream *obj_in_stream, TCallback callback, const string &top_level_asn_object=kEmptyStr)
Read all seq-align objects from the stream.
Container mapping seq-aligns to vectors of participating seq-ids.
size_type size(void) const
Size (number of alignments)
void push_back(const CSeq_align &aln)
Adding an alignment.
Default IAlnSeqId implementation based on CSeq_id_Handle.
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
const TIdVec & GetAnchorIdVec(void) const
Get vector of potential anchor ids.
virtual void Init(void)
Initialize the application.
CRef< CObjectManager > m_ObjMgr
CAlnContainer m_AlnContainer
virtual int Run(void)
Run the application.
void LoadInputAlns(const string &file_name, const string &asn_type)
CScope & GetScope(void) const
bool InsertAln(const CSeq_align *aln)
Options for different alignment manager operations.
void SetAnchorId(const TAlnSeqIdIRef &anchor_id)
Set anchor id.
@ eBothDirections
No filtering: use both direct and reverse sequences.
void SetMergeFlags(TMergeFlags flags, bool set)
Set/clear merge flags.
@ fUseAnchorAsAlnSeq
Use the anchor sequence as the alignment sequence.
@ fTruncateOverlaps
Truncate overlapping ranges.
@ fIgnoreInsertions
Do not collect and store insertions (gaps on the anchor).
Query-anchored alignment can be 2 or multi-dimentional.
const TPairwiseAlnVector & GetPairwiseAlns(void) const
The vector of pairwise alns.
vector< CRef< CPairwiseAln > > TPairwiseAlnVector
TDim GetAnchorRow(void) const
Which is the anchor row?
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
A pairwise aln is a collection of ranges for a pair of rows.
const TAlnSeqIdIRef & GetFirstId(void) const
Get first sequence id.
const TAlnSeqIdIRef & GetSecondId(void) const
Get second sequence id.
CPairwiseAln iterator. Iterates over aligned ranges and gaps.
@ eGap
Gap or unaligned range.
Scope-aware seq-id converter.
void Validate(bool full_test=false) const
TSignedSeqPos GetSeqAlnStart(TNumrow row) const
TRng GetAlnRange(void) const
Get whole alignment range.
TNumrow GetNumRows(void) const
Synonym of the above.
int GetBaseWidth(TNumrow row) const
Get base width for the sequence (1 for nucleotides, 3 for proteins).
TSignedSeqPos GetSeqAlnStop(TNumrow row) const
TRange GetSeqRange(TNumrow row) const
Get sequence range in sequence coords.
pair< int, int > TFrames
Get start and stop frames for the selected row/range.
CAnchoredAln::TDim TDim
Synonym of TNumrow.
_TRange AlnRangeToNativeSeqRange(TNumrow row, _TRange aln_range) const
Convert alignment range (genomic coordinates) on the selected row to real sequence range.
virtual IAlnSegmentIterator * CreateSegmentIterator(TNumrow row, const TSignedRange &range, IAlnSegmentIterator::EFlags flags) const
Create segment iterator.
string & GetAlnSeqString(TNumrow row, string &buffer, const TSignedRange &rq_aln_rng, bool force_translation=false) const
Fetch alignment sequence data.
TFrames AlnRangeToNativeFrames(TNumrow row, _TRange aln_range) const
TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
TDim GetDim(void) const
Alignment dimension (number of sequence rows in the alignment)
TNumrow GetAnchor(void) const
Get anchor row index.
TSignedRange GetSeqAlnRange(TNumrow row) const
Get sequence range in alignment coords (strand ignored).
string & GetSeqString(TNumrow row, string &buffer, TSeqPos seq_from, TSeqPos seq_to, bool force_translation=false) const
Fetch sequence data for the given row and range.
const objects::CSeq_id & GetSeqId(TNumrow row) const
Get seq-id for the row.
Implementation of IAlnSegmentIterator for CSparseAln.
@ eForward
Towards higher seq coord (to the right if plus strand, left if minus)
Alignment segment iterator interface.
@ eSkipInserts
Iterate segments where at least some rows are aligned (including gap segments)
@ eInsertsOnly
Iterate only ranges not participating in the alignment (unaligned segments)
@ eAllSegments
Iterate all segments.
@ eSkipGaps
Skip gap segments (show only aligned ranges)
Alignment segment interface.
virtual const TSignedRange & GetRange(void) const =0
Get the selected row range.
@ fAligned
Aligned segment.
@ fIndel
Either anchor or the selected row is not present in the segment.
@ fUnaligned
The range on the selected sequence does not participate in the alignment (the alignment range of the ...
@ fGap
Both anchor row and the selected row are not included in the segment (some other row is present and t...
virtual TSegTypeFlags GetType(void) const =0
Get current segment type.
virtual const TSignedRange & GetAlnRange(void) const =0
Get alignment range for the segment.
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const char * str(char *buf, int n)
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
virtual const string & AsString(void) const =0
Get the argument's string value.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
#define MSerial_AsnText
I/O stream manipulators â.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
TThisType & SetToOpen(position_type toOpen)
position_type GetToOpen(void) const
position_type GetFrom(void) const
bool IsFirstDirect() const
position_type GetFirstFrom(void) const
static TThisType GetWhole(void)
void Run(void)
Enter the main loop.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
E_Choice Which(void) const
Which variant is currently selected.
@ e_not_set
No variant selected.
unsigned int
A callback function used to compare two keys in a database.
Magic spell ;-) needed for some weird compilers... very empiric.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
#define GetArgs
Avoid preprocessor name clash with the NCBI C Toolkit.
Defines command line argument related classes.
Defines unified interface to application:
std::istream & in(std::istream &in_, double &x_)
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
#define row(bind, expected)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4