stable_sort(enzymes.begin(), enzymes.end(),
SCompareSpecs());
97 if(enzyme != enzymes.begin() &&
98enzyme->GetSpecs() ==
result.back().GetSpecs()) {
99 result.back().SetName() +=
"/";
100 result.back().SetName() += enzyme->GetName();
102 result.push_back(*enzyme);
103 result.back().SetPrototype();
120os <<
"Recog. site: "<<
site.GetStart() <<
'-' 121<<
site.GetEnd() << endl;
122os <<
"Plus strand cuts: ";
132os <<
"Minus strand cuts: ";
153 intplus_cut, minus_cut;
157string::size_type idx = s.find_first_of(
")");
158 if(idx == std::string::npos) {
159 throwruntime_error(
string(
"Error parsing site ")
167 if(s[s.length() - 1] ==
')') {
168string::size_type idx = s.find_last_of(
"(");
169 if(idx == std::string::npos) {
170 throwruntime_error(
string(
"Error parsing site ")
178 for(
unsigned int i= 0;
i< s.length();
i++) {
179 if(s[
i] ==
'^') {
197string::size_type idx = s.find_first_not_of(
"N");
198 if(idx == string::npos) {
206idx = s.find_last_not_of(
"N");
228vector<string> site_vec;
230 ITERATE(vector<string>, iter, site_vec) {
241 returnTRebaseData::GetDefault();
250 if(
l.size() != 2) {
251 throwruntime_error(
string(
"Couldn't parse cut locations ")
266TEnzymes::size_type prototype_idx(0);
268 while(getline(
input, line)) {
269vector<string> fields;
272 if(fields.size() < 2) {
277 boolis_prototype(
true);
282is_prototype =
false;
288 stringsites = fields[3];
291enzymes.push_back(enzyme);
295prototype_idx = enzymes.size();
296}
else if(prototype_idx) {
297 CREnzyme& prototype = enzymes[prototype_idx - 1];
340 CPatternRec(
stringpattern,
size_tenzyme_index,
size_tspec_index,
371 returnlhs->GetEnzymeName() < rhs->GetEnzymeName();
382 returnlhs.GetName() < rhs.GetName();
393 returnlhs->GetDefiniteSites().size() < rhs->GetDefiniteSites().size();
414 if(pos == s.size()) {
420 charorig_ch = s[pos];
421 for(
charx = 1; x <= 8; x <<= 1) {
459 if(! refile.empty()) {
460ifstream istr(refile.c_str());
482 returnconverter.
Resolve(scope);
491 booldefinite =
true)
500feat->
SetData().SetRsite().SetDb().SetDb(
"REBASE");
501feat->
SetData().SetRsite().SetDb()
502.SetTag().SetStr(
"REBASE");
514vector< CRef<CSeq_loc> > locs;
518recog_site->
SetInt().SetFrom(
site->GetStart());
519recog_site->
SetInt().SetTo (
site->GetEnd());
520recog_site->
SetInt().SetStrand(
site->GetStrand());
521recog_site->
SetId(
id);
522locs.push_back(recog_site);
529 intnegative_cut_locs = 0;
533cut_site->
SetPnt().SetPoint(*cut);
537cut_site->
SetPnt().SetStrand(cut_strand);
538cut_site->
SetId(
id);
539locs.push_back(cut_site);
545 ITERATE(vector<int>, cut,
site->GetMinusCuts()) {
548cut_site->
SetPnt().SetPoint(*cut);
553cut_site->
SetId(
id);
554locs.push_back(cut_site);
562 if(negative_cut_locs > 0) {
564+
" cleavage sites are located before the" 565 " beginning of the sequence and are not reported";
570 copy(locs.begin(), locs.end(),
571back_inserter(feat->
SetLocation().SetMix().Set()));
578annot.
SetData().SetFtable().push_back(feat);
591 typedefvector<CRef<CREnzResult> > TResults;
613 constvector<CRSite>& definite_sites =
614(*result)->GetDefiniteSites();
615 constvector<CRSite>& possible_sites =
616(*result)->GetPossibleSites();
618 size_tcount_definite_sites = definite_sites.size();
619 size_tcount_possible_sites = possible_sites.size();
621 if(count_definite_sites || count_possible_sites) {
622total_definite_sites += count_definite_sites;
623total_possible_sites += count_possible_sites;
628 const stringtitle(
"Restriction sites");
634new_annot->
SetDesc().Set().push_back(region);
635annot.push_back(new_annot);
643**
result, curr_annot, scope, loc);
645**
result, curr_annot, scope, loc,
false);
658 _TRACE(
"Found "<< total_definite_sites <<
" definite and " 659<< total_possible_sites <<
" possible sites");
685 static const boolambig_table[16] = {
6860, 0, 0, 1, 0, 1, 1, 1,
6870, 1, 1, 1, 1, 1, 1, 1
689 returnambig_table[(size_t)
nuc];
698 template<
classSeq>
705 results.reserve(enzymes.size());
718enzyme->IsPrototype())) {
726 constvector<CRSpec>& specs = enzyme->GetSpecs();
729 ITERATE(vector<CRSpec>, spec, specs) {
743 SIZE_TYPEfsm_pat_size = pat.find_first_of(0x0f);
745 SIZE_TYPEpos = pat.find_first_of(0x0f, fsm_pat_size + 1);
747|| pat.find_first_of(0x0f, pos + 1) ==
NPOS) {
748fsm_pat_size = pat.size();
753spec - specs.begin(),
764fsm_pat_size = comp.find_first_of(0x0f);
765 SIZE_TYPEpos = comp.find_first_of(0x0f, fsm_pat_size + 1);
767|| comp.find_first_of(0x0f, pos + 1) ==
NPOS) {
768fsm_pat_size = comp.size();
773spec - specs.begin(),
788vector<TSeqPos> ambig_nucs;
793ambig_nucs.push_back(
i);
807 if(end_pos >= seq.size()) {
822 for(
unsigned int n= begin_pos;
n<= end_pos;
843 constvector<int>& plus_cuts = spec.
GetPlusCuts();
844 ITERATE(vector<int>, cut, plus_cuts) {
851 site.SetPlusCuts().push_back(begin_pos + *cut);
855 ITERATE(vector<int>, cut, minus_cuts) {
862 site.SetMinusCuts().push_back(begin_pos + *cut);
874 if(!ambig_nucs.empty()) {
876 const string& pat = pattern->GetPattern();
877 TSeqPospat_size = pattern->GetPatternSize();
880 size_tds_pos =
results[pattern->GetEnzymeIndex()]
881->GetDefiniteSites().size();
882 size_tps_pos =
results[pattern->GetEnzymeIndex()]
883->GetPossibleSites().size();
889 ITERATE(vector<TSeqPos>, pos, ambig_nucs) {
892begin_check =
max(begin_check, 0);
895begin_check =
max(begin_check, next_pos);
896 intend_check =
min(*pos, (
TSeqPos) (seq.size() - pat_size));
898 for(
i= begin_check;
i<= end_check;
i++) {
907 site.SetStrand(pattern->GetStrand());
910 constvector<int>& plus_cuts
911= enzymes[pattern->GetEnzymeIndex()]
912.GetSpecs()[pattern->GetSpecIndex()].GetPlusCuts();
913 ITERATE(vector<int>, cut, plus_cuts) {
916.push_back(
i+ pattern->GetPatternSize()
919 site.SetPlusCuts().push_back(
i+ *cut);
923 constvector<int>& minus_cuts
924= enzymes[pattern->GetEnzymeIndex()]
925.GetSpecs()[pattern->GetSpecIndex()]
927 ITERATE(vector<int>, cut, minus_cuts) {
930.push_back(
i+ pattern->GetPatternSize()
933 site.SetMinusCuts().push_back(
i+ *cut);
939 results[pattern->GetEnzymeIndex()]
940->SetDefiniteSites().push_back(
site);
942 results[pattern->GetEnzymeIndex()]
943->SetPossibleSites().push_back(
site);
950vector<CRSite>& def_sites =
results[pattern->GetEnzymeIndex()]
951->SetDefiniteSites();
952inplace_merge(def_sites.begin(),
953def_sites.begin() + ds_pos,
957vector<CRSite>& pos_sites =
results[pattern->GetEnzymeIndex()]
958->SetPossibleSites();
959inplace_merge(pos_sites.begin(),
960pos_sites.begin() + ps_pos,
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
ENa_strand Reverse(ENa_strand s)
User-defined methods of the data storage class.
static void x_AddPattern(const string &pat, CTextFsm< size_t > &fsm, size_t match_value)
static bool x_IsAmbig(char nuc)
friend void x_FindRSite(const Seq &seq, const TEnzymes &enzymes, vector< CRef< CREnzResult > > &results, CFindRSites::TFlags)
Find all definite and possible sites in a sequence for a vector of enzymes, using a finite state mach...
const TEnzymes & GetEnzymes()
void x_LoadREnzymeData(const string &refile, CRebase::EEnzymesToLoad which_enzymes)
CREnzyme::TEnzymes TEnzymes
CFindRSites(const string &refile=kEmptyStr, CRebase::EEnzymesToLoad which_enzymes=CRebase::eAll, TFlags flags=fDefault)
@ fFindIsoschizomers
Lump together all enzymes with identical specificities.
static void Find(const string &seq, const TEnzymes &enzymes, vector< CRef< CREnzResult > > &results, TFlags flags=0)
static void x_ExpandRecursion(string &s, unsigned int pos, CTextFsm< size_t > &fsm, size_t match_value)
TAnnot GetAnnot(CScope &scope, const CSeq_loc &loc) const
TSeqPos GetFsmPatSize(void) const
size_t GetSpecIndex(void) const
size_t GetEnzymeIndex(void) const
TSeqPos GetPatternSize(void) const
const string & GetPattern(void) const
CPatternRec(string pattern, size_t enzyme_index, size_t spec_index, ENa_strand strand, TSeqPos fsm_pat_size)
ENa_strand GetStrand(void) const
This class represents the results of a search for sites of a particular enzyme.
const vector< CRSite > & GetPossibleSites(void) const
const vector< CRSite > & GetDefiniteSites(void) const
const string & GetEnzymeName(void) const
This class represents a restriction enzyme (an enzyme name and a vector of cleavage specificities)
void SetName(const string &s)
const string & GetName(void) const
static void CombineIsoschizomers(TEnzymes &enzymes)
vector< string > & SetIsoschizomers(void)
vector< CREnzyme > TEnzymes
vector< CRSpec > & SetSpecs(void)
const vector< CRSpec > & GetSpecs(void) const
void SetPrototype(const string &s=kEmptyStr)
This class represents a particular occurrence of a restriction site on a sequence (not to be confused...
This class represents a restriction enzyme specificity, i.e., a sequence recognition pattern and vect...
bool operator<(const CRSpec &rhs) const
const string & GetSeq(void) const
void SetSeq(const string &s)
vector< int > m_MinusCuts
const vector< int > & GetPlusCuts(void) const
const vector< int > & GetMinusCuts(void) const
vector< int > & SetPlusCuts(void)
vector< int > & SetMinusCuts(void)
static CRSpec MakeRSpec(const string &site)
CREnzyme::TEnzymes TEnzymes
static string GetDefaultDataPath()
static void x_ParseCutPair(const string &s, int &plus_cut, int &minus_cut)
static CREnzyme MakeREnzyme(const string &name, const string &sites)
static void ReadNARFormat(istream &input, TEnzymes &enzymes, enum EEnzymesToLoad which)
static char IupacToNcbi8na(char in)
stuff for dealing with ncbi8na.
static EMatch MatchNcbi8na(const Seq &seq, const Pat &pat, TSeqPos pos)
static void CompNcbi8na(string &seq8na)
complement an ncbi8na sequence in place
void SetNameDesc(const string &name)
void SetCreateDate(const CTime &dt)
void SetTitleDesc(const string &title)
namespace ncbi::objects::
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
CConstRef< CSeq_id > GetSeqId(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CConstRef< CSeq_loc > m_ParentLoc
CRef< CSeq_loc > Resolve(CScope *scope=0, TFlags flags=0) const
@ fNoMerge
don't merge adjacent intervals
CScope & GetScope(void) const
Get scope this handle belongs to.
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void SetNcbiCoding(void)
Set coding to either Ncbi8aa or Ncbi8na depending on molecule type.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
void AddWord(const string &word, const MatchType &match)
int GetNextState(int state, char letter) const
const vector< MatchType > & GetMatches(int state) const
bool IsMatchFound(int state) const
int GetInitialState(void) const
NCBI_NS_STD::string::size_type SIZE_TYPE
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ eCurrent
Use current time. See also CCurrentTime.
@ eLim_tl
space to left of position
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetComment(const TComment &value)
Assign a value to Comment data member.
const TLocation & GetLocation(void) const
Get the Location member data.
void SetData(TData &value)
Assign a value to Data data member.
ENa_strand
strand of nucleic acid
@ eNa_strand_both
in forward orientation
void SetData(TData &value)
Assign a value to Data data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
TRegion & SetRegion(void)
Select the variant.
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
NCBI_PARAM_DECL(string, RESTRICTION_SITES, REBASE)
ostream & operator<<(ostream &os, const CRSite &site)
typedef NCBI_PARAM_TYPE(RESTRICTION_SITES, REBASE) TRebaseData
NCBI_PARAM_DEF(string, RESTRICTION_SITES, REBASE, "")
void x_FindRSite(const Seq &seq, const CFindRSites::TEnzymes &enzymes, vector< CRef< CREnzResult > > &results, CFindRSites::TFlags flags)
Find all definite and possible sites in a sequence for a vector of enzymes, using a finite state mach...
static CRef< CSeq_loc > s_RemapChildToParent(const CSeq_loc &parent, const CSeq_loc &child, CScope *scope)
static void s_AddSitesToAnnot(const vector< CRSite > &sites, const CREnzResult &result, CSeq_annot &annot, CScope &scope, const CSeq_loc &parent_loc, bool definite=true)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
bool operator()(const CRSite &lhs, const CRSite &rhs) const
bool operator()(const CREnzyme &lhs, const CREnzyme &rhs)
Location relative to a base Seq-loc: one (usually) or more ranges of offsets.
static bool ambig(char c)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4