& fname_out_seqaligns,
67 const string& fname_gis_to_filter)
79 const string& fname_out_seqaligns,
94 const string& fname_gis_to_filter,
100filtered_aln.
Set().clear();
103 if(!((*iter)->GetSegs().IsDisc())) {
107id_aligned_seq = &((*iter)->GetSeq_id(1));
110 if(seqdb_gis->FindGi(gi)) {
111filtered_aln.
Set().push_back(*iter);
122aln_disc->
Assign(**iter);
123aln_disc->
SetSegs().SetDisc(*filtered_sub_aln);
125filtered_aln.
Set().push_back(aln_disc);
132 constlist<TGi>& list_gis,
136filtered_aln.
Set().clear();
139 if(!((*iter)->GetSegs().IsDisc())) {
143id_aligned_seq = &((*iter)->GetSeq_id(1));
146 if(find(list_gis.begin(), list_gis.end(), gi) != list_gis.end()) {
147filtered_aln.
Set().push_back(*iter);
155 FilterByGiList((*iter)->GetSegs().GetDisc(), list_gis, *filtered_sub_aln);
158aln_disc->
Assign(**iter);
159aln_disc->
SetSegs().SetDisc(*filtered_sub_aln);
161filtered_aln.
Set().push_back(aln_disc);
177list< CRef<CSeq_id> > seqid_list = db->
GetSeqIDs(oid);
178gis.reserve(seqid_list.size());
182gis.push_back((**id).GetGi());
186 sort(gis.begin(), gis.end());
193filtered_aln.
Set().clear();
196 if(!((*iter_aln)->GetSegs().IsDisc())) {
202id_aligned_seq = &((*iter_aln)->GetSeq_id(1));
203 TGigi_aligned_seq = id_aligned_seq->
GetGi();
206 intoid_aligned_seq = -1;
207db->
GiToOid(gi_aligned_seq, oid_aligned_seq);
210vector<TGi> vec_gis_from_DB;
212 if(oid_aligned_seq > 0)
216 if(!vec_gis_from_DB.empty()) {
226 FilterBySeqDB((*iter_aln)->GetSegs().GetDisc(), db, *filtered_sub_aln);
229aln_disc->Assign(**iter_aln);
230aln_disc->SetSegs().SetDisc(*filtered_sub_aln);
232filtered_aln.
Set().push_back(aln_disc);
245 if(out_gi_vec.size() == 0)
250 for(vector<TGi>::const_iterator it_gi_out = out_gi_vec.begin();
251it_gi_out != out_gi_vec.end(); it_gi_out++)
256 boolsuccess =
false;
258in_gi, *it_gi_out, success);
266out_aln.
Set().push_back(sa_copy);
274vector<TGi> vec_old_extra_gis;
278vector<TGi> vec_new_extra_gis;
279 x_GenerateNewGis(in_gi, vec_old_extra_gis, out_gi_vec, main_new_gi, vec_new_extra_gis);
281 boolsuccess =
false;
283main_new_gi, success);
290out_aln.
Set().push_back(sa_copy);
299 constvector<TGi>& vec_old_extra_gis,
300 constvector<TGi>& vec_out_gis,
302vector<TGi>& vec_new_extra_gis)
304 if(vec_out_gis.empty())
307 inti_out_gi = 0, i_old_gi = 0, i_new_gi = 0;
311 if(find(vec_out_gis.begin(), vec_out_gis.end(), main_old_gi) != vec_out_gis.end())
312main_new_gi = main_old_gi;
314main_new_gi = vec_out_gis[0];
316 intnum_gis_left = vec_out_gis.size();
317 if(num_gis_left > 0)
323vec_new_extra_gis.resize(num_gis_left);
325 for(; i_old_gi < (
int)(vec_old_extra_gis.size()); i_old_gi++)
327 TGiold_gi = vec_old_extra_gis[i_old_gi];
328 if(find(vec_out_gis.begin(), vec_out_gis.end(), old_gi) != vec_out_gis.end())
329vec_new_extra_gis[i_new_gi++] = old_gi;
332 for(; i_out_gi < (
int)(vec_out_gis.size()); i_out_gi++)
334 TGiout_gi = vec_out_gis[i_out_gi];
335 if(find(vec_old_extra_gis.begin(), vec_old_extra_gis.end(), out_gi)
336== vec_old_extra_gis.end())
339vec_new_extra_gis[i_new_gi++] = out_gi;
347vec_new_extra_gis.clear();
354 TGiold_gi,
TGinew_gi,
bool& success)
363 boolgi_changed =
false;
372 if((*iter_diag)->IsSetIds() && n_row < (*iter_diag)->GetIds().size())
374 const CSeq_id& id_to_change = *((*iter_diag)->GetIds()[n_row]);
375 if(id_to_change.
IsGi() &&
376id_to_change.
GetGi() == old_gi)
378(*iter_diag)->SetIds()[n_row]->
SetGi(new_gi);
389 if(denseg.IsSetIds() && n_row < denseg.GetIds().size())
391 const CSeq_id& id_to_change = *(denseg.GetIds()[n_row]);
392 if(id_to_change.
IsGi() &&
393id_to_change.
GetGi() == old_gi)
395denseg.SetIds()[n_row]->
SetGi(new_gi);
407 if((*iter_std)->IsSetLoc() && n_row < (*iter_std)->GetLoc().size())
409 CSeq_loc& loc_to_change = *((*iter_std)->SetLoc()[n_row]);
413 const CSeq_id* p_id_to_change = loc_to_change.
GetId();
416 if(p_id_to_change->
IsGi() &&
417p_id_to_change->
GetGi() == old_gi)
420loc_to_change.
SetId(*id_updated);
432success = gi_changed;
438vec_extra_gis.clear();
445 if(score_entry->CScore_Base::IsSetId())
446 if(score_entry->GetId().IsStr())
448 stringstr_id = score_entry->GetId().GetStr();
449 if(str_id ==
"use_this_gi")
451 Uint4gi_v = (
Uint4) (score_entry->GetValue().GetInt());
453vec_extra_gis.push_back(gi);
461 for(
inti_gi = 0; i_gi < (
int)(vec_extra_gis.size()); i_gi++)
469CSeq_align::TScore::iterator iter_score = score_entries.begin();
470 while(iter_score != score_entries.end())
473 boolerase_entry =
false;
475 if(score_entry->IsSetId())
476 if(score_entry->GetId().IsStr())
478 stringstr_id = score_entry->GetId().GetStr();
479erase_entry = (str_id ==
"use_this_gi");
483iter_score = score_entries.erase(iter_score);
494score_entry->SetId().SetStr(
"use_this_gi");
497sa->
SetScore().push_back(score_entry);
503 const string& fname_gis)
509seqdb =
new CSeqDB(fname_db,
533seqdb_gis->GetGiList(vec_gis);
536 sort(vec_gis.begin(), vec_gis.end());
539 for(vector<TGi>::iterator it = vec_gis.begin(); it != vec_gis.end(); it++)
540list_gis.push_back(*it);
548seqdb_gis->GetGiList(vec_gis);
550 sort(vec_gis.begin(), vec_gis.end());
561 if(!subjid.
Match(*newSeqID)) {
565 if(denseg.IsSetIds() && denseg.GetIds().size() == 2)
567denseg.SetIds()[1] = newSeqID;
584vector< CRef<CSeq_id> > seqids;
585list< CRef<CSeq_id> > seqid_list = filteredDB->
GetSeqIDs(oid);
586seqids.reserve(seqid_list.size());
589 if(subjid.
IsGi() && (**id).IsGi()) {
590seqids.push_back(*
id);
592 else if(!subjid.
IsGi() && !(**id).IsGi()) {
593seqids.push_back(*
id);
597 if(!seqids.empty()) {
599newSubjectID = seqids[0];
601vector <string> useThisSeqs;
602 for(
size_t i= 0;
i< seqids.size();
i++) {
605 if(seqids[0]->IsGi()) {
606useThisSeqs.push_back(
"gi:"+ textSeqID);
609useThisSeqs.push_back(
"seqid:"+ textSeqID);
613userObject->
SetType().SetStr(
"use_this_seqid");
614userObject->
AddField(
"SEQIDS", useThisSeqs);
616sa_copy->
SetExt().push_back(userObject);
624vector<int>& oid_vec)
628 boolsuccess =
false;
638 if(previous_id.
Empty() || !subjid->
Match(*previous_id))
641 if(oid_vec[
i] > 0) {
644 if(!filtered_aln.
Empty()) {
656previous_id = subjid;
658 if(success && !filtered_aln.
Empty()) {
659new_aln->
Set().push_back(filtered_aln);
673 #ifdef NCBI_STRICT_TAX_ID 676tax_ids.
insert(leaf_ids.begin(), leaf_ids.end());
680 if(user_tax_ids.
size() > tax_ids.
size()) {
682 if(user_tax_ids.
find(*itr) != user_tax_ids.
end()) {
689 if(tax_ids.
find(*itr) != tax_ids.
end()) {
704 constlist< CRef< CBlast_def_line > >& bdlSet = bdlRef->
Get();
705vector <string> useThisSeqs;
724list<string> use_this_seq;
726 if(use_this_seq.size() > 0) {
731 if(sa_copy.
Empty()) {
735useThisSeqs.push_back(
"gi:"+ textSeqID);
738useThisSeqs.push_back(
"seqid:"+ textSeqID);
741userObject->
SetType().SetStr(
"use_this_seqid");
742userObject->
AddField(
"SEQIDS", useThisSeqs);
744sa_copy->
SetExt().push_back(userObject);
757 boolsuccess =
false;
767 if(previous_id.
Empty() || !subjid->
Match(*previous_id))
774 if(!filtered_aln.
Empty()) {
784previous_id = subjid;
786 if(success && !filtered_aln.
Empty()) {
787new_aln->
Set().push_back(filtered_aln);
799list<string> use_this_seq;
801 boolmodifyAlignment =
false;
802 if(use_this_seq.size() > 0) {
809 constlist< CRef< CBlast_def_line > >& bdlSet = bdlRef->
Get();
815 if(accInfo != accType) {
817use_this_seq.push_back(textSeqID);
820modifyAlignment =
true;
824 if(!modifyAlignment) {
827 if(modifyAlignment && use_this_seq.size() > 0) {
828vector <string> useThisSeqs;
831 ITERATE(list<string>, iter_seq, use_this_seq){
832 if(seqID->
IsGi()) {
833useThisSeqs.push_back(
"gi:"+ *iter_seq);
836useThisSeqs.push_back(
"seqid:"+ *iter_seq);
839userObject->
SetType().SetStr(
"use_this_seqid");
840userObject->
AddField(
"SEQIDS", useThisSeqs);
842sa_copy->
SetExt().push_back(userObject);
850vector <CSeq_id::EAccessionInfo> accTypes)
853 boolsuccess =
false;
863 if(previous_id.
Empty() || !subjid->
Match(*previous_id))
866 for(
size_t i= 0;
i< accTypes.size();
i++) {
867 if(!currAlign.
Empty()) {
869currAlign = filtered_aln;
872 if(!filtered_aln.
Empty()) {
882previous_id = subjid;
884 if(success && !filtered_aln.
Empty()) {
885new_aln->
Set().push_back(filtered_aln);
Defines the alignment filtering class.
Declares CBlastScopeSource class to create properly configured CScope objects to invoke the BLAST dat...
static bool RemoveSeqsOfAccessionTypeFromSeqInUse(list< string > &use_this_seq, objects::CSeq_id::EAccessionInfo accesionType)
function to remove sequences of accesionType from use_this_seq list
static bool GetTextSeqID(const list< CRef< objects::CSeq_id > > &ids, string *textSeqID=NULL)
static void GetUseThisSequence(const objects::CSeq_align &aln, list< TGi > &use_this_gi)
Extract use_this_gi info from blast alingment.
static bool MatchSeqInUseThisSeqList(list< string > &use_this_seq, string textSeqIDToMatch)
void x_RemoveExtraGis(CRef< objects::CSeq_align > sa)
void WriteSeqalignSet(const string &fname, const objects::CSeq_align_set &aln)
Write a seqalign to a file.
void FilterSeqalignsExt(const string &fname_in_seqaligns, const string &fname_out_seqaligns, CRef< CSeqDB > db)
Filter Seqaligns - extended file-based version.
CRef< CSeqDB > PrepareSeqDB(const string &fname_db, bool is_prot, const string &fname_gis_to_filter)
Load a SeqDB database with the given gi-list.
void ReadGiList(const string &fname, list< TGi > &list_gis, bool sorted=false)
Read a gi list from a file and, optionally, sort it.
void FilterByGiListFromFile(const objects::CSeq_align_set &full_aln, const string &fname_gis_to_filter, objects::CSeq_align_set &filtered_aln)
Filter Seqaligns using a gi-list stored in file.
EResultsFormat
EResultsFormat - output options for filtered seqaligns.
CRef< CSeq_align_set > FilterByAccessionType(const CSeq_align_set &seqalign, CRef< CScope > &scope, vector< CSeq_id::EAccessionInfo > accTypes)
virtual ~CSeqAlignFilter()
Destructor.
bool x_AddUseGiEntryInSeqalign(CRef< objects::CSeq_align > sa, TGi new_gi)
void x_ReadExtraGis(CConstRef< objects::CSeq_align > sa, vector< TGi > &vec_extra_gis)
Read the "use_this_gi" entries from a seqalign object.
void FilterBySeqDB(const objects::CSeq_align_set &full_aln, CRef< CSeqDB > db, objects::CSeq_align_set &filtered_aln)
Filter Seqaligns using a SeqDB object.
void FilterByGiList(const objects::CSeq_align_set &full_aln, const list< TGi > &list_gis, objects::CSeq_align_set &filtered_aln)
Filter Seqaligns using a list of integers as the gi-list.
void ReadGiVector(const string &fname, vector< TGi > &vec_gis, bool sorted=false)
Read a gi vector from a file and, optionally, sort it.
void x_WriteExtraGis(CRef< objects::CSeq_align > sa, const vector< TGi > &vec_extra_gis)
void ReadSeqalignSet(const string &fname, objects::CSeq_align_set &aln)
Read a seqalign set from file.
void x_CreateOusputSeqaligns(CConstRef< objects::CSeq_align > in_aln, TGi in_gi, objects::CSeq_align_set &out_aln, const vector< TGi > &out_gi_vec)
Create one or more seqalign objects for output, based on the given input seqalign and the list of gi'...
CRef< CSeq_align_set > FilterByTaxonomy(const CSeq_align_set &seqalign, CRef< CSeqDB > &seqdb, const set< int > &taxids)
void x_GenerateNewGis(TGi main_old_gi, const vector< TGi > &vec_old_extra_gis, const vector< TGi > &vec_out_gis, TGi &main_new_gi, vector< TGi > &vec_new_extra_gis)
Generate the list of gi's based on the old list and the newly available gi's.
CRef< objects::CSeq_align > x_UpdateGiInSeqalign(CConstRef< objects::CSeq_align > sa, unsigned int n_row, TGi old_gi, TGi new_gi, bool &success)
Change the gi of one of the sequences referenced in the seqalign object.
void FilterSeqaligns(const string &fname_in_seqaligns, const string &fname_out_seqaligns, const string &fname_gis_to_filter)
Filter Seqaligns - file-based version.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
const CSeqDBGiList * GetGiList() const
Get GI list attached to this database.
bool SeqidToOid(const CSeq_id &seqid, int &oid) const
Translate a Seq-id to any matching OID.
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
#define GI_FROM(T, value)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define TAX_ID_FROM(T, value)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
@ eSerial_AsnText
ASN.1 text.
TGi FindGi(const container &ids)
Return gi from id list if exists, return 0 otherwise.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
EAccessionInfo
For IdentifyAccession (below)
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static int WorstRank(const CRef< CSeq_id > &id)
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty â not pointing to any object which means having a null value.
void Reset(void)
Reset reference object.
TObjectType * GetNonNullPointer(void) const
Get pointer value and throw a null pointer exception if pointer is null.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
bool IsSetLinks(void) const
Check if a value has been assigned to Links data member.
TTaxid GetTaxid(void) const
Get the Taxid member data.
bool IsSetTaxid(void) const
Check if a value has been assigned to Taxid data member.
const TLinks & GetLinks(void) const
Get the Links member data.
const Tdata & Get(void) const
Get the member data.
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
TScore & SetScore(void)
Assign a value to Score data member.
vector< CRef< CScore > > TScore
void ResetExt(void)
Reset Ext data member.
list< CRef< CStd_seg > > TStd
void SetSegs(TSegs &value)
Assign a value to Segs data member.
bool IsDendiag(void) const
Check if variant Dendiag is selected.
TExt & SetExt(void)
Assign a value to Ext data member.
bool IsStd(void) const
Check if variant Std is selected.
list< CRef< CSeq_align > > Tdata
const TScore & GetScore(void) const
Get the Score member data.
list< CRef< CDense_diag > > TDendiag
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
TGi GetGi(void) const
Get the variant data.
TGi & SetGi(void)
Select the variant.
bool IsGi(void) const
Check if variant Gi is selected.
@ e_Gi
GenInfo Integrated Database.
list< CRef< CSeq_id > > TId
unsigned int
A callback function used to compare two keys in a database.
constexpr auto sort(_Init &&init)
static CRef< CSeq_align > s_ModifySeqAlnWithFilteredSeqIDs(CRef< CBlast_def_line_set > &bdlRef, const set< TTaxId > &taxids, CRef< CSeq_align > &in_align)
static CRef< CSeq_align > s_UpdateSubjectInSeqalign(CRef< CSeq_align > &in_align, CRef< CSeq_id > &newSeqID)
static void s_GetFilteredRedundantGis(CRef< CSeqDB > db, int oid, vector< TGi > &gis)
static bool s_IncludeDeflineTaxid(const CBlast_def_line &def, const set< TTaxId > &user_tax_ids)
CRef< CSeq_align > s_UpdateSeqAlnWithFilteredSeqIDs(CRef< CSeqDB > filteredDB, int oid, CRef< CSeq_align > &in_align)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4