prev_len = iter -
in.begin();
60 size_tremaining =
in.length() - prev_len - 1;
63 tolower((
unsigned char)(*iter)) ==
tolower((
unsigned char)(*(iter - 1))) &&
64 tolower((
unsigned char)(*iter)) !=
'c') {
67 switch(
tolower((
unsigned char)(*iter))) {
82*(iter - 1) !=
'm') {
93*
out+= (char)
tolower((
unsigned char)(*iter));
99*(iter + 2) ==
'a') {
105 if(remaining > 1 && *(iter + 1) ==
'h') {
112( *(iter + 1) ==
'e'||
113*(iter + 1) ==
'i'||
114*(iter + 1) ==
'y') ) {
120 if(remaining && *(iter + 1) ==
'k') {
127 if(remaining >= 2 && prev_len) {
128 if( *(iter + 1) ==
'g'&&
129( *(iter + 2) ==
'e'||
130*(iter + 2) ==
'i'||
131*(iter + 2) ==
'y') ) {
141 if(remaining == 1 && *(iter + 1) ==
'h') {
142 if(prev_len > 2 && ( *(iter - 3) ==
'b'||
143*(iter - 3) ==
'd') ) {
149 if(prev_len > 3 && *(iter - 3) ==
'h') {
154 if(prev_len > 4 && *(iter - 4) ==
'h') {
165 if(remaining == 1 &&
166(*(iter + 1) ==
'n'|| *(iter + 1) ==
'm')) {
171 if(remaining && !prev_len && *(iter + 1) ==
'n') {
177 if(remaining == 3 &&
178*(iter + 1) ==
'n'&&
179*(iter + 1) ==
'e'&&
180*(iter + 1) ==
'd') {
185 if( (remaining > 1 && *(iter + 1) ==
'e') ||
186(remaining && ( *(iter + 1) ==
'i'||
187*(iter + 1) ==
'y') ) ) {
197 if(remaining && prev_len &&
198vowels.
find(*(iter + 1)) != string::npos &&
200*
out+= (char)
tolower((
unsigned char)(*iter));
203 else if( !prev_len ) {
204*
out+= (char)
tolower((
unsigned char)(*iter));
210 if(!prev_len && remaining && *(iter + 1) ==
'n') {
215*
out+= (char)
tolower((
unsigned char)(*iter));
219 if(prev_len == 0 && remaining && *(iter + 1) ==
'n') {
224 if(remaining && *(iter + 1) ==
'h') {
228*
out+= (char)
tolower((
unsigned char)(*iter));
237*(iter + 1) ==
'i'&&
238( *(iter + 2) ==
'o'||
239*(iter + 2) ==
'a') ) {
244 if(remaining && *(iter + 1) ==
'h') {
250*(iter + 1) ==
'c'&&
251( *(iter + 2) ==
'e'||
252*(iter + 2) ==
'i'||
253*(iter + 2) ==
'y') ) {
261*(iter + 1) ==
'i'&&
262( *(iter + 2) ==
'o'||
263*(iter + 2) ==
'a') ) {
268 if(remaining && *(iter + 1) ==
'h') {
273*
out+= (char)
tolower((
unsigned char)(*iter));
282 if(remaining && ( *(iter + 1) ==
'h'||
283*(iter + 1) ==
'r') ) {
284*
out+= *(iter + 1);
288*
out+= (char)
tolower((
unsigned char)(*iter));
292 if( *(iter - 1) ==
'a'||
293*(iter - 1) ==
'e'||
294*(iter - 1) ==
'i'||
295*(iter - 1) ==
'o'||
296*(iter - 1) ==
'u') {
297*
out+= (char)
tolower((
unsigned char)(*iter));
306 if(remaining && prev_len &&
307( *(iter + 1) ==
'a'||
308*(iter + 1) ==
'e'||
309*(iter + 1) ==
'i'||
310*(iter + 1) ==
'o'||
311*(iter + 1) ==
'u')) {
314*
out+= (char)
tolower((
unsigned char)(*iter));
322 if(
out->length() == max_chars) {
333 size_tmax_chars,
charpad_char)
335 static const charsc_SoundexLut[256] = {
3360x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3370x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3380x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3390x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3400x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3410x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3420x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3430x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3440x00, 0x00,
'1',
'2',
'3', 0x00,
'1',
'2',
3450x00, 0x00,
'2',
'2',
'4',
'5',
'5', 0x00,
346 '1',
'2',
'6',
'2',
'3', 0x00,
'1', 0x00,
347 '2', 0x00,
'2', 0x00, 0x00, 0x00, 0x00, 0x00,
3480x00, 0x00,
'1',
'2',
'3', 0x00,
'1',
'2',
3490x00, 0x00,
'2',
'2',
'4',
'5',
'5', 0x00,
350 '1',
'2',
'6',
'2',
'3', 0x00,
'1', 0x00,
351 '2', 0x00,
'2', 0x00, 0x00, 0x00, 0x00, 0x00,
3520x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3530x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3540x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3550x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3560x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3570x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3580x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3590x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3600x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3610x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3620x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3630x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3640x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3650x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3660x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3670x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
377string::const_iterator iter =
in.begin();
378*
out+= (char)
toupper((
unsigned char)(*iter));
383 charc = sc_SoundexLut[(
int)(
unsigned char)*iter2];
384 if(c && *(
out->end() - 1) != c) {
386 if(
out->length() == max_chars) {
393 if(
out->length() < max_chars) {
410 const string* pstr1 = &str1;
411 const string* pstr2 = &str2;
412 if(pstr1->length() > pstr2->length()) {
416string::const_iterator iter1 = pstr1->begin();
417string::const_iterator iter2 = pstr2->begin();
418 for( ; iter1 != pstr1->end() && iter2 != pstr2->end(); ) {
419 charc1_0 = (char)
tolower((
unsigned char)(*iter1));
420 charc2_0 = (char)
tolower((
unsigned char)(*iter2));
431 intmax_radius = (
int)
min(pstr1->end() - iter1,
432string::difference_type(3));
434string::const_iterator best_iter1 = iter1 + 1;
435string::const_iterator best_iter2 = iter2 + 1;
438 for(
intradius = 1; radius <= max_radius; ++radius) {
440 charcorner1 = *(iter1 + radius);
441 charcorner2 = *(iter2 + radius);
443 for(
int i= radius;
i>= 0; --
i) {
444c1_0 = (char)
tolower((
unsigned char)(*(iter1 +
i)));
445c2_0 = (char)
tolower((
unsigned char)(*(iter2 +
i)));
446 if(c1_0 == corner2) {
449best_iter1 = iter1 +
i;
450best_iter2 = iter2 + radius;
453 if(c2_0 == corner1) {
456best_iter1 = iter1 + radius;
457best_iter2 = iter2 +
i;
470dist += (pstr1->end() - iter1) + (pstr2->end() - iter2);
481 const string* short_str = &str1;
482 const string* long_str = &str2;
483 if(long_str->size() < short_str->size()) {
484 swap(short_str, long_str);
486 size_tshort_size = short_str->size();
487 size_tlong_size = long_str->size();
489 size_t* row0_ptr = buf0;
490 size_t* row1_ptr = buf1;
492row0.resize(short_size + 1);
493row1.resize(short_size + 1);
502 for(
i= 0;
i<= short_size; ++
i) {
509 for(
i= 0;
i< long_size; ++
i) {
512 for(j = 0; j < short_size; ++j) {
513 intc0 =
tolower((
unsigned char) (*short_str)[j]);
514 intc1 =
tolower((
unsigned char) (*long_str)[
i]);
515 size_tcost = (c0 == c1 ? 0 : 1);
517 min(row0_ptr[j] + cost,
518 min(row0_ptr[j + 1] + 1, row1_ptr[j] + 1));
524 swap(row0_ptr, row1_ptr);
527 returnrow0_ptr[short_size];
538 size_tmax_metaphone)
544 return Score(word1, meta1, word2, meta2);
550 const string& word2,
const string& meta2,
559score = word1.length() - score;
594 for(
int i= 256;
i--; ) {
598 for(
int i= 0;
i< 26; ++
i) {
623 returnfill_types->GetChar(c);
627string::const_iterator end)
643 for( ; iter != end; ++iter) {
645 if(
type!= prev_type) {
652 for( ; iter != end; ++iter) {
654 if(
type!= prev_type) {
690 static inline bool s_EndsWith(
const string& str1,
const string& str2)
692string::const_reverse_iterator iter1(str1.end());
693string::const_reverse_iterator end1 (str1.begin());
694string::const_reverse_iterator iter2(str2.end());
695string::const_reverse_iterator end2 (str2.begin());
696 for( ; iter1 != end1 && iter2 != end2; ++iter1, ++iter2) {
697 if(*iter1 != *iter2) {
704 static inline bool s_EndsWith(
const string& str1,
const char* p)
706string::const_reverse_iterator iter1(str1.end());
707string::const_reverse_iterator end1 (str1.begin());
708 const char* iter2 = p + strlen(p) - 1;
709 for( ; iter1 != end1; ++iter1, --iter2) {
710 if(*iter1 != *iter2) {
722 for(string::size_type
i= 0;
i<
str.size(); ++
i) {
732 const string&
match,
733 const string& substitute,
736 if(word.length() <
match.length()) {
745word.end() -
match.length()) <= min_measure) {
749word.erase(word.length() -
match.length());
757 const char* substitute,
760 size_tmatch_len = strlen(
match);
761 if(word.length() < match_len) {
770word.end() - match_len) <= min_measure) {
774word.erase(word.length() - match_len);
782 size_tnew_ending_size,
785 size_tmatch_len = strlen(
match);
786 if(word.length() < match_len) {
795word.end() - match_len) <= min_measure) {
799word.erase(word.length() - match_len + new_ending_size);
807 string&
str= *out_str;
820 if(
str[
str.length()-1 ] ==
's') {
846 str.erase(
str.length() - 1);
851 str.erase(
str.length() - 2);
855 str.erase(
str.length() - 3);
864}
else if(
str[
str.length() - 1] !=
'l'&&
865 str[
str.length() - 1] !=
's'&&
866 str[
str.length() - 1] !=
'z'&&
868 str.erase(
str.length() - 1);
869}
else if(
str.length() == 3 &&
879 if(
str[
str.length() - 1] ==
'y'&&
881 str[
str.length() - 1] =
'i';
886 if(
str.length() > 3) {
887 switch(
str[
str.length() - 2 ]) {
907 if(
str[
str.length()-1 ] ==
'i'&&
964 static constTReplace rep_step3[] = {
975 static const char* s_Step3_Endings(
"eils");
976 if(
CTempString(s_Step3_Endings).find(
str[
str.length()-1]) != string::npos) {
977 for(
constTReplace* p = rep_step3; p->first; ++p) {
986 if(
str.length() > 2) {
987 switch(
str[
str.length() - 2]) {
989 if(
str[
str.length()-1 ] ==
'l') {
1001 if(
str[
str.length()-1 ] ==
'e') {
1073 str[
str.length() - 1] ==
'l'&&
1074 str[
str.length() - 2] ==
'l') {
1075 str.erase(
str.length() - 1);
static void Stem(const string &in_str, string *out_str)
Compute the Porter stem for a given word.
static void GetSoundex(const string &in, string *out, size_t max_chars=eMaxSoundex, char pad_char='0')
Compute the Soundex key for a given word The Soundex key is defined as:
static size_t GetEditDistance(const string &str1, const string &str2, EDistanceMethod method=eEditDistance_Exact)
static int Score(const string &word1, const string &word2, size_t max_metaphone=eMaxMetaphone)
Compute a nearness score for two different words or phrases.
EDistanceMethod
Return the Levenshtein edit distance between two words.
@ eEditDistance_Exact
This method performs an exhausive search, and has an algorithmic complexity of O(n x m),...
@ eEditDistance_Similar
This method performs a simpler search, looking for the distance between similar words.
static void GetMetaphone(const string &in, string *out, size_t max_chars=eMaxMetaphone)
Compute the Metaphone key for a given word Metaphone is a more advanced algorithm than Soundex; inste...
ECharType s_char_type[256]
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
ECharType
Porter's Stemming Algorithm.
static const size_t kMaxMetaphoneStack
static bool s_EndsWith(const string &str1, const string &str2)
static NCBI_UNUSED bool s_ReplaceEnding(string &word, const string &match, const string &substitute, int min_measure=0)
static ECharType s_GetCharType(int c)
static bool s_TruncateEnding(string &word, const char *match, size_t new_ending_size, int min_measure=0)
static string::size_type s_FindFirstVowel(const string &str)
static int s_MeasureWord(string::const_iterator iter, string::const_iterator end)
std::ofstream out("events_result.xml")
main entry point for tests
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
unsigned int
A callback function used to compare two keys in a database.
std::istream & in(std::istream &in_, double &x_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4