x_IsWordCharacter(
charc) {
65 return(c >=
'0'&& c <=
'9') || (c >=
'a'&& c <=
'z') || (c >=
'A'&& c <=
'Z') || c ==
'_'|| c ==
'-';
68 voidx_Split(
const string& s, vector<string>& v)
72 for(
i= 0;
i< s.length();
i++) {
75v.push_back(s.substr(
n,
i-
n));
79 else if(x_IsWordCharacter(s[
i])) {
83 else if(!x_IsWordCharacter(s[
n])) {
84v.push_back(s.substr(
n,
i-
n));
92 else if(x_IsWordCharacter(s[
n])) {
93v.push_back(s.substr(
n,
i-
n));
99v.push_back(s.substr(
n,
i-
n));
103 stringx_Assemble(vector<string>& v, vector<bool>& skip)
107 for(
size_t i= 0;
i< v.size();
i++) {
109 if(!
first&& x_IsWordCharacter(v[
i][0])) {
121 CTempStringx_StripUnimportantCharacters(
string& storage,
const CTempString&
str,
boolstrip_space,
boolstrip_punct)
126 if(!strip_space && !strip_punct)
129 boolhas_stripped =
false;
131 const char* s =
str.data();
132 for(;
i<
str.size();
i++, s++)
134 if((strip_space &&
isspace(*s)) || (strip_punct &&
ispunct(*s)))
138storage.reserve(
str.size()-1);
140storage.append(
str.data(),
i);
141has_stripped =
true;
147storage.push_back(*s);
158 boolx_DisallowCharacter(
const charch,
booldisallow_slash)
161 else if(disallow_slash && ch ==
'/')
return true;
171 const auto& callback = [&](
size_t n,
size_tp) {
186 #include "weasel.inc" 188 static constTLocalFSM s_FSM{s_compact, s_hits_init_1, s_hits_init_2, s_states,
nullptr};
232 for(
unsigned i=0;
i<
match.size();
i++) {
264 while(it !=
str.end() && !
isalpha((
unsigned char) (*it))) {
265 if(
isdigit( (
unsigned char) (*it))) {
271 if(it !=
str.end()) {
272 return isalpha((
unsigned char) (*it)) &&
isupper((
unsigned char) (*it));
282 for(
size_t i= 0;
i<
str.size() && rval; ++
i) {
285rval = rval &&
isupper( (
unsigned char)
str[
i] );
288}
else if(
str[
i] ==
'-'){
290 if((
i> 0 && !
isalpha( (
unsigned char)
str[
i- 1])) ||
291(
i+ 1 <
str.size() && !
isalpha( (
unsigned char)
str[
i+ 1] )))
293}
else if(
isdigit( (
unsigned char)
str[
i])){
294 if(
i+ 1 <
str.size() &&
isalpha( (
unsigned char)
str[
i+ 1])) {
310 else if(start.
empty() || found == string::npos) {
315 if(x_DisallowCharacter (start[found-1], disallow_slash)) {
319after_idx = found + match_len;
320 if(after_idx < start.
size() && x_DisallowCharacter(start[after_idx], disallow_slash)) {
333vector<size_t> match_lens = (*word)->GetMatchLens(
str, pattern, prev_char);
334 if(match_lens.size() > 0) {
335 size_tword_len = (*word)->GetWord().length();
337 size_tthis_match = 0;
338 charthis_prev_char = 0;
340this_prev_char =
str.c_str()[(*len) - 1];
342this_prev_char = prev_char;
344 boolrequire_end =
false;
349(!require_end || this_match ==
str.substr(*len).length())) {
351match_len += this_match;
359 if(pattern.length() == 0) {
363 if(
str.length() == 0) {
390 if(
str[0] == pattern[0]) {
406 size_tmatch_len = 0;
408 if(ini_target_match_len !=
NULL) {
409*ini_target_match_len = match_len;
422 size_tmatch_len = 0;
433 while(!rval && pos <
len) {
438 size_tsub_match_len = 0;
520 returnsearch == pattern;
523 if(found ==
NPOS) {
543 if(
str.original().original().empty()) {
569cout <<
"eString_location_inlist is not supported!\n";
585cout << pattern <<
" <===> "<< search <<
"\nSelf-weasel case with ignored words is not supported!\n";
591 strings_search, p_search;
604vector<bool> skip(v.size(),
false);
605vector<size_t>
test;
606 for(
size_t i= 0;
i< v.size();
i++) {
608 unsignedm = (1 << k);
610 stringlower = v[
i];
614 test.push_back(
i);
623 stringguess = x_Assemble(v, skip);
628 for(
size_t i= 0;
i<
test.size();
i++) {
629 if(skip[
test[
i]]) {
630skip[
test[
i]] =
false;
634skip[
test[
i]] =
true;
635 if(
i==
test.size() - 1) {
658 size_tmatch_len = 0;
664 offset+= replace.length();
677 const string&
val=
str;
684}
else if(
Empty()) {
697 size_tmatch_len = 0;
700 result.append(
val.data()+match_len,
val.length()-match_len);
712 while(!rval &&
offset<
val.length()) {
713 size_tmatch_len = 0;
718&&
offset+ match_len ==
val.length()) {
static constexpr auto s_WeaselWords
User-defined methods of the data storage class.
ncbi::TMaskedQueryRegions mask
const string & uppercase() const
const string & original() const
const string & lowercase() const
CTempString GetNoweaselLC() const
CTempString GetNoweasel() const
CTempString GetNoweaselUC() const
CTempString::size_type m_noweasel_start
CAutoLowerCase m_original
unsigned GetWeaselMask() const
const CAutoLowerCase & original() const
void Search(const char *input, VoidCall1 found_callback) const
CTempString x_GetCompareString(const CMatchString &s, ECase e_case=e_automatic) const
bool x_IsAllSkippable(const CTempString &str) const
bool x_IsWholeWordMatch(const CTempString &start, size_t found, size_t match_len, bool disallow_slash=false) const
CTempString x_GetConstraintString(ECase e_case=e_automatic) const
bool x_MatchFound(CTempString &search, CTempString &pattern) const
bool x_IsFirstCap(const CMatchString &str) const
bool ReplaceStringConstraintPortionInString(string &result, const CMatchString &str, const string &replace) const
bool x_IsAllLowerCase(const CMatchString &str) const
bool x_IsAllPunctuation(const CMatchString &str) const
virtual ~CString_constraint()
bool x_ReplaceContains(string &val, const string &replace) const
bool Match(const CMatchString &str) const
bool x_DoesSingleStringMatchConstraint(const CMatchString &str) const
bool x_IsAllCaps(const CMatchString &str) const
bool x_IsFirstEachCap(const CMatchString &str) const
bool x_AdvancedStringCompare(const string &str, const string &str_match, const char prev_char, size_t *ini_target_match_len=0) const
bool x_IsSkippable(const char ch) const
bool x_AdvancedStringMatch(const string &str, const string &tmp_match) const
bool x_PartialCompare(const string &str, const string &pattern, char prev_char, size_t &match_len) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
#define test(a, b, c, d, e)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
uint8_t Uint1
1-byte (8-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
const char * const_iterator
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
size_type size(void) const
Return the length of the represented array.
static string & ToLower(string &str)
Convert string to lower case â string& version.
TCase_sensitive GetCase_sensitive(void) const
Get the Case_sensitive member data.
const TIgnore_words & GetIgnore_words(void) const
Get the Ignore_words member data.
bool IsSetMatch_location(void) const
Check if a value has been assigned to Match_location data member.
TIs_first_each_cap GetIs_first_each_cap(void) const
Get the Is_first_each_cap member data.
TIgnore_space GetIgnore_space(void) const
Get the Ignore_space member data.
TMatch_location GetMatch_location(void) const
Get the Match_location member data.
bool IsSetCase_sensitive(void) const
Check if a value has been assigned to Case_sensitive data member.
TIs_all_caps GetIs_all_caps(void) const
Get the Is_all_caps member data.
TIs_first_cap GetIs_first_cap(void) const
Get the Is_first_cap member data.
bool IsSetNot_present(void) const
Check if a value has been assigned to Not_present data member.
TWhole_word GetWhole_word(void) const
Get the Whole_word member data.
TIgnore_weasel GetIgnore_weasel(void) const
Get the Ignore_weasel member data.
TIgnore_punct GetIgnore_punct(void) const
Get the Ignore_punct member data.
EString_location
simple constraints
const TMatch_text & GetMatch_text(void) const
Get the Match_text member data.
TNot_present GetNot_present(void) const
Get the Not_present member data.
list< CRef< CWord_substitution > > Tdata
TIs_all_punct GetIs_all_punct(void) const
Get the Is_all_punct member data.
TIs_all_lower GetIs_all_lower(void) const
Get the Is_all_lower member data.
bool CanGetMatch_text(void) const
Check if it is safe to call GetMatch_text method.
bool CanGetIgnore_words(void) const
Check if it is safe to call GetIgnore_words method.
bool IsSetIgnore_words(void) const
Check if a value has been assigned to Ignore_words data member.
@ eString_location_inlist
@ eString_location_equals
@ eString_location_contains
@ eString_location_starts
const TYPE & Get(const CNamedParameterList *param)
constexpr bool empty(list< Ts... >) noexcept
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4