constexpr std::array<string_view, 10> weasels = {
82 if(
arr.size() == 1) {
88 for(
i=0;
i< (
int)(
arr.size() - 1);
i++) {
91 for(
auto& it: weasels) {
105 for( ;
i< (
int)(
arr.size()-1);
i++) {
106ret_str +=
arr[
i] +
' ';
108ret_str +=
arr[
arr.size()-1];
115 stringup_str =
str;
118 if(up_str ==
str)
return true;
125 stringlow_str =
str;
128 if(low_str ==
str)
return true;
135 for(
unsigned i=0;
i<
str.size();
i++) {
147 stringcomp_str1, comp_str2;
148comp_str1 = str1.substr(0, len1);
149comp_str2 = str2.substr(0, len1);
150 if(case_sensitive) {
151 return(comp_str1 == comp_str2);
165 size_tpos_match = 0, pos_str = 0;
166 boolwd_case, whole_wd, word_start_m, word_start_s;
167 bool match=
true, recursive_match =
false;
168 unsignedlen_m = str_match.size(), len_s =
str.size(), target_match_len=0;
176vector <string> word_word;
181word_word.push_back(
strtmp);
185 while(
match&& pos_match < len_m && pos_str < len_s && !recursive_match) {
186cp_m = str_match.substr(pos_match);
187cp_s =
str.substr(pos_str);
193wd_case = (*it)->GetCase_sensitive();
194whole_wd = (*it)->GetWhole_word();
195len1 = word_word[
i].size();
198word_start_m = (!pos_match && is_start) || !
isalpha(str_match[pos_match - 1]);
199 ch1= (cp_m.size() <= len1) ?
' ': cp_m[len1];
202 if(!whole_wd || (!
isalpha(
ch1) && word_start_m)) {
203 if( !(*it)->CanGetSynonyms() || (*it)->GetSynonyms().empty()) {
205recursive_match =
true;
210 ITERATE(list <string>, sit, (*it)->GetSynonyms()) {
211len2 = (*sit).size();
215word_start_s = (!pos_str && is_start) || !
isalpha(
str[pos_str - 1]);
216 ch2= (cp_s.size() <= len2) ?
' ': cp_s[len2];
218 if(!whole_wd || (!
isalpha(
ch2) && word_start_s)) {
219 if(
AdvancedStringCompare(cp_s.substr(len2), cp_m.substr(len1), str_cons, word_start_m & word_start_s, &target_match_len)) {
220recursive_match =
true;
232 if(!recursive_match) {
238 else if( ig_space && (
isspace(cp_m[0]) ||
isspace(cp_s[0])) ) {
247 else if(ig_punct && (
ispunct(cp_m[0]) ||
ispunct(cp_s[0]) )) {
262 if(
match&& !recursive_match) {
263 while(pos_str <
str.size() && ((ig_space &&
isspace(
str[pos_str])) || (ig_punct &&
ispunct(
str[pos_str])))) {
267 while(pos_match < str_match.size() && ((ig_space &&
isspace(str_match[pos_match])) || (ig_punct &&
ispunct(str_match[pos_match])))) {
271 if(pos_match < str_match.size()) {
281 if(
match&& ini_target_match_len) {
282*ini_target_match_len += target_match_len;
304 unsigned len=
str.size();
305 while(!rval && pos <
len) {
324 else if(disallow_slash && ch ==
'/') {
338string::const_iterator it =
str.begin();
340 if((strip_space &&
isspace(*it)) || (strip_punct &&
ispunct(*it))) {
345}
while(++it !=
str.end());
351 static bool IsWholeWordMatch(
const string& start,
const size_t& found,
const unsigned& match_len,
booldisallow_slash =
false)
359 else if(start.empty() || found == string::npos) {
368after_idx = found + match_len;
369 if(after_idx < start.size() &&
DisallowCharacter(start[after_idx], disallow_slash)) {
384 size_tcp =
str.substr(0, hyphen-1).find_last_not_of(
' ');
385 if(cp != string::npos) {
386cp =
str.substr(0, cp).find_last_not_of(
" ,;");
388 if(cp == string::npos) {
392 unsigned len= hyphen - cp;
397cp =
str.find_first_not_of(
' ', hyphen+1);
398 if(cp != string::npos) {
399cp =
str.find_first_not_of(
" ,;");
401 if(cp == string::npos) {
409second =
str.substr(hyphen+1,
len);
413 if(
first.empty() || second.empty()) {
429 if(
str.find_first_not_of(digit_str) != string::npos) {
438 stringnew_first, new_second, new_str;
445 else if(
first.empty() || second.empty()) {
449 intstr_num, first_num, second_num;
450str_num = first_num = second_num = 0;
453 stringcomp_str1, comp_str2;
459 if((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
465prefix_len =
first.find_first_of(digit_str) + 1;
467new_str =
str.substr(prefix_len - 1);
468new_first =
first.substr(prefix_len - 1);
469comp_str1 =
str.substr(0, prefix_len);
470comp_str2 =
first.substr(0, prefix_len);
475 if((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
483 while(prefix_len <
first.size() && prefix_len < second.size() &&
first[prefix_len] == second[prefix_len]) {
488comp_str1 =
str.substr(0, prefix_len);
489comp_str2 =
first.substr(0, prefix_len);
490 if(prefix_len <=
first.size() && prefix_len <= second.size() &&
isdigit(
first[prefix_len-1]) &&
isdigit(second[prefix_len-1]) && comp_str1 == comp_str2) {
491new_first =
first.substr(prefix_len);
492new_second = second.substr(prefix_len);
493new_str =
str.substr(prefix_len);
498 if((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
504 size_tidx1, idx2, idx_str;
505 stringsuf1, suf2, sub_str;
506idx1 =
first.find_first_not_of(digit_str);
507suf1 =
first.substr(prefix_len + idx1);
508idx2 = second.find_first_not_of(digit_str);
509suf2 = second.substr(prefix_len + idx2);
510idx_str =
str.find_first_not_of(digit_str);
511sub_str =
str.substr(prefix_len + idx_str);
512 if(suf1 == suf2 && suf1 == sub_str) {
517 if((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
530 if(list.empty() ||
str.empty()) {
534 size_tidx =
str.find_first_not_of(alpha_str);
535 if(idx == string::npos) {
539idx =
str.substr(idx).find_first_not_of(digit_str);
542 size_thyphen = list.find(
'-');
544 stringrange_start, range_end;
545 while(hyphen != string::npos && !rval) {
547hyphen = list.substr(1).find(
'-');
555hyphen = list.find(
'-', hyphen + 1);
568 stringthis_str(
str);
595tmp_cons.
Assign(*str_cons);
605 stringsearch(this_str), pattern(tmp_cons.
GetMatch_text());
616 if(string::npos == pFound) {
621 while(!rval && pFound != string::npos) {
623search.find(pattern, pFound+1):
625rval = (pFound != string::npos)?
640 while(pFound != string::npos && !rval) {
641 if((pFound + pattern.size()) == search.size()) {
644pFound = string::npos;
647 if(pattern.empty()) {
667 if(pFound == string::npos) {
672 while(!rval && pFound != string::npos) {
674 if(pFound != string::npos) {
708 for(
auto& it: conset.
Get()) {
710cerr <<
"Bad suspect rule constraint!\n";
741constraint.
Reset(&
GetFind().GetString_constraint());
749 string str=
"Unknown replacement function";
759 str=
"replace '"+ func.
GetHaem_replace() +
"' with 'heme' if whole word, 'hem' otherwise";
772loc_word = cons.
GetNot_present() ?
"does not contain":
"contains";
775loc_word = cons.
GetNot_present() ?
"does not equal":
"equals";
778loc_word = cons.
GetNot_present() ?
"does not start with":
"starts with";
781loc_word = cons.
GetNot_present() ?
"does not end with":
"ends with";
784loc_word = cons.
GetNot_present() ?
"is not one of":
"is one of";
791 if((*it)->CanGetSynonyms() && !(*it)->GetSynonyms().empty()) {
795CWord_substitution::TSynonyms::const_iterator z = sn;
796syns += (++z == synonyms.end()) ?
" and ":
", ";
798syns +=
"\'"+ *sn +
"\'";
800sub_words += sub_words.empty() ?
"":
", ";
801sub_words +=
"allow '"+ ((*it)->CanGetWord() ? (*it)->GetWord() :
"") +
"' to be replaced by "+ syns;
802 if((*it)->GetCase_sensitive()) sub_words +=
", case-sensitive";
803 if((*it)->GetWhole_word()) sub_words +=
", whole word";
812params += cons.
GetIgnore_weasel() ? params.empty() ?
"ignore \'putative\' synonyms":
", ignore \'putative\' synonyms":
kEmptyStr;
815 str+= params.empty() ?
kEmptyStr:
" ("+ params +
")";
828 switch(func.
Which()) {
832 return "may contain plural";
836 return "Three or more numbers together";
838 return "contains underscore";
842 return "is all capital letters";
844 return "contains unbalanced brackets or parentheses";
849 return "contains \'"+ func.
GetHas_term() +
"\' at start or separated from other letters by numbers, spaces, or punctuation, but does not also contain 'domain'";
853 return "Unknown search function";
859 switch(pos.
Which()) {
885partial =
" that are partial on both ends";
888partial =
" that are complete on both ends";
891partial =
" that are 5' complete and 3' partial";
894partial =
" that are 5' partial and 3' complete";
896 stringlocation_type;
898location_type =
" with single interval";
901location_type =
" with joined intervals";
904location_type =
" with ordered intervals";
909dist5 = dist5.empty() ? dist5 :
" with 5\' end "+ dist5;
914dist3 = dist3.empty() ? dist3 :
" with 3\' end "+ dist3;
918seq_word =
"nucleotide sequences";
921seq_word =
"protein sequences";
925strand =
" on plus strands";
928strand =
" on minus strands";
930 if(partial.empty() && location_type.empty() && dist5.empty() && dist3.empty() && seq_word.empty() && strand.empty()) {
933 string str=
"only objects";
935 if(strand.empty() && !seq_word.empty()) {
936 str+=
" on "+ seq_word;
938 else if(!strand.empty()) {
940 str+= seq_word.empty() ?
kEmptyStr:
" of "+ seq_word;
943 str+= location_type;
952 string str=
"Invalid field type";
953 switch(vnp.
Which()) {
956 return "e_Source_qual";
962 return "missing field";
971 return label.empty() ?
"Unknown feature":
label;
976 return "e_Cds_gene_prot";
980 return "e_Molinfo_field";
988 return "e_Rna_field";
991 return "e_Struc_comment_field";
1020 switch(choice.
Which()) {
1031 return "[[CDS Gene Prot QUAL CONSTRAINT]]";
1033 return "[[CDS Gene Prot PSEUDO CONSTRAINT]]";
1036 return "[[SEQUENCE CONSTRAINT]]";
1039 return "[[PUB CONSTRAINT]]";
1044 return "[[MOLINFO CONSTRAINT]]";
1047 return "[[FIELD MISSING CONSTRAINT]]";
1050 return "[[TRANSLATION CONSTRAINT]]";
1074 static const char* rule_type[] = {
1079 "Organelles not appropriate in prokaryote",
1080 "Suspicious phrase; should this be nonfunctional?",
1081 "May contain database identifier more appropriate in note; remove from product name",
1082 "Remove organism from product name",
1083 "Possible parsing error or incorrect formatting; remove inappropriate symbols",
1084 "Implies evolutionary relationship; change to -like protein",
1085 "Consider adding 'protein' to the end of the product name",
1086 "Correct the name or use 'hypothetical protein'",
1087 "Use American spelling",
1088 "Use short product name instead of descriptive phrase",
1089 "use protein instead of gene as appropriate" 1105 if(!except.empty())
out+=
" but not "+ except;
1106 if(!feat_constraint.empty())
out+=
", "+ feat_constraint;
1107 if(!replace.empty())
out+=
", "+ replace;
1109 if(!descr.empty())
out+=
" Description: "+ descr;
bool IsAllPunctuation(const string &str)
static bool CaseNCompareEqual(string str1, string str2, unsigned len1, bool case_sensitive)
static bool DoesSingleStringMatchConstraint(const string &str, const CString_constraint *str_cons)
bool IsAllCaps(const string &str)
static bool IsWholeWordMatch(const string &start, const size_t &found, const unsigned &match_len, bool disallow_slash=false)
static bool DisallowCharacter(const char ch, bool disallow_slash)
static const string SkipWeasel(const string &str)
static bool GetSpanFromHyphenInString(const string &str, const size_t &hyphen, string &first, string &second)
static bool IsStringInSpanInList(const string &str, const string &list)
static bool IsStringConstraintEmpty(const CString_constraint *constraint)
static bool AdvancedStringCompare(const string &str, const string &str_match, const CString_constraint *str_cons, bool is_start, unsigned *ini_target_match_len=0)
static bool StringIsPositiveAllDigits(const string &str)
bool IsAllLowerCase(const string &str)
static string StripUnimportantCharacters(const string &str, bool strip_space, bool strip_punct)
static bool IsStringInSpan(const string &str, const string &first, const string &second)
static bool AdvancedStringMatch(const string &str, const CString_constraint *str_cons)
User-defined methods of the data storage class.
bool ApplyToString(string &result, const CMatchString &str, CConstRef< CString_constraint > find) const
bool Match(const CMatchString &str) const
void SetMatch_text(const TMatch_text &value)
string SummarizeLocationConstraint(const CLocation_constraint &) const
string SummarizeEndDistance(const CLocation_pos_constraint &) const
string GetRuleTypeName(void) const
string SummarizeSourceConstraint(const CSource_constraint &) const
bool ApplyToString(string &result, const CMatchString &str) const
bool StringMatchesSuspectProductRule(const CMatchString &str) const
string SummarizeRule(void) const
string SummarizeConstraintSet(const CConstraint_choice_set &) const
string SummarizeConstraint(const CConstraint_choice &) const
string SummarizeSearchFunc(const CSearch_func &) const
string SummarizeFieldType(const CField_type &) const
string SummarizeStringConstraint(const CString_constraint &) const
string SummarizeReplaceRule(const CReplace_rule &) const
string SummarizeFieldConstraint(const CField_constraint &) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
static vector< string > arr
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define ENUM_METHOD_NAME(EnumName)
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static string & ToUpper(string &str)
Convert string to upper case â string& version.
static string & ToLower(string &str)
Convert string to lower case â string& version.
static const char label[]
TCase_sensitive GetCase_sensitive(void) const
Get the Case_sensitive member data.
TToo_long GetToo_long(void) const
Get the variant data.
const TIgnore_words & GetIgnore_words(void) const
Get the Ignore_words member data.
TStrand GetStrand(void) const
Get the Strand member data.
TIgnore_space GetIgnore_space(void) const
Get the Ignore_space member data.
E_Choice Which(void) const
Which variant is currently selected.
const TLocation & GetLocation(void) const
Get the variant data.
const TField & GetField(void) const
Get the variant data.
const TSimple_replace & GetSimple_replace(void) const
Get the variant data.
TMatch_location GetMatch_location(void) const
Get the Match_location member data.
const TPrefix_and_numbers & GetPrefix_and_numbers(void) const
Get the variant data.
const TReplace & GetReplace(void) const
Get the Replace member data.
E_Choice Which(void) const
Which variant is currently selected.
const TString & GetString(void) const
Get the variant data.
const THaem_replace & GetHaem_replace(void) const
Get the variant data.
bool CanGetReplace(void) const
Check if it is safe to call GetReplace method.
TIs_all_caps GetIs_all_caps(void) const
Get the Is_all_caps member data.
bool CanGetFeat_constraint(void) const
Check if it is safe to call GetFeat_constraint method.
bool IsSetDescription(void) const
Check if a value has been assigned to Description data member.
TWhole_string GetWhole_string(void) const
Get the Whole_string member data.
TPartial3 GetPartial3(void) const
Get the Partial3 member data.
TWhole_word GetWhole_word(void) const
Get the Whole_word member data.
TRule_type GetRule_type(void) const
Get the Rule_type member data.
const TDescription & GetDescription(void) const
Get the Description member data.
TIgnore_weasel GetIgnore_weasel(void) const
Get the Ignore_weasel member data.
const TField & GetField(void) const
Get the Field member data.
TMove_to_note GetMove_to_note(void) const
Get the Move_to_note member data.
const TExcept & GetExcept(void) const
Get the Except member data.
TDist_from_end GetDist_from_end(void) const
Get the variant data.
const TFeature_field & GetFeature_field(void) const
Get the variant data.
EMacro_feature_type
feature values
TIgnore_punct GetIgnore_punct(void) const
Get the Ignore_punct member data.
const TEnd5 & GetEnd5(void) const
Get the End5 member data.
TLocation_type GetLocation_type(void) const
Get the Location_type member data.
const TFind & GetFind(void) const
Get the Find member data.
E_Choice Which(void) const
Which variant is currently selected.
EPartial_constraint
Access to EPartial_constraint's attributes (values, names) as defined in spec.
const TFeat_constraint & GetFeat_constraint(void) const
Get the Feat_constraint member data.
EString_location
simple constraints
E_Choice Which(void) const
Which variant is currently selected.
TN_or_more_brackets_or_parentheses GetN_or_more_brackets_or_parentheses(void) const
Get the variant data.
TSeq_type GetSeq_type(void) const
Get the Seq_type member data.
const TMatch_text & GetMatch_text(void) const
Get the Match_text member data.
const TString_constraint & GetString_constraint(void) const
Get the String_constraint member data.
bool CanGetEnd5(void) const
Check if it is safe to call GetEnd5 method.
TPartial5 GetPartial5(void) const
Get the Partial5 member data.
TNot_present GetNot_present(void) const
Get the Not_present member data.
TMax_dist_from_end GetMax_dist_from_end(void) const
Get the variant data.
bool IsSetFind(void) const
Check if a value has been assigned to Find data member.
const THas_term & GetHas_term(void) const
Get the variant data.
TIs_all_punct GetIs_all_punct(void) const
Get the Is_all_punct member data.
E_Choice Which(void) const
Which variant is currently selected.
bool CanGetEnd3(void) const
Check if it is safe to call GetEnd3 method.
bool CanGetExcept(void) const
Check if it is safe to call GetExcept method.
TIs_all_lower GetIs_all_lower(void) const
Get the Is_all_lower member data.
TWeasel_to_putative GetWeasel_to_putative(void) const
Get the Weasel_to_putative member data.
const Tdata & Get(void) const
Get the member data.
const TReplace & GetReplace(void) const
Get the Replace member data.
const TReplace_func & GetReplace_func(void) const
Get the Replace_func member data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSetReplace(void) const
Check if a value has been assigned to Replace data member.
bool CanGetReplace(void) const
Check if it is safe to call GetReplace method.
const TString_constraint & GetString_constraint(void) const
Get the variant data.
TType GetType(void) const
Get the Type member data.
E_Choice Which(void) const
Which variant is currently selected.
TMin_dist_from_end GetMin_dist_from_end(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
bool CanGetMatch_text(void) const
Check if it is safe to call GetMatch_text method.
bool CanGetIgnore_words(void) const
Check if it is safe to call GetIgnore_words method.
const TEnd3 & GetEnd3(void) const
Get the End3 member data.
const TField & GetField(void) const
Get the Field member data.
@ e_N_or_more_brackets_or_parentheses
@ eStrand_constraint_plus
@ eStrand_constraint_minus
@ e_not_set
No variant selected.
@ e_not_set
No variant selected.
@ eSeqtype_constraint_prot
@ eSeqtype_constraint_nuc
@ ePartial_constraint_complete
@ ePartial_constraint_partial
@ ePartial_constraint_either
@ eString_location_inlist
@ eString_location_equals
@ eString_location_contains
@ eString_location_starts
@ eLocation_type_constraint_ordered
@ eLocation_type_constraint_joined
@ eLocation_type_constraint_single_interval
unsigned int
A callback function used to compare two keys in a database.
static const BitmapCharRec ch1
static const BitmapCharRec ch2
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4