m_Length(
str.length())
81 switch(it->GetOperation()) {
83len_insert += it->GetString().length();
86len_delete += it->GetString().length();
90edit_distance +=
max(len_insert, len_delete);
96edit_distance +=
max(len_insert, len_delete);
107TList::const_iterator max_pos = diffs.end();
112 if(
len> max_len) {
118 if(!max_len || max_pos == diffs.end()) {
121 returnmax_pos->GetString();
129 if(it->GetString().length() != it->GetLength()) {
133 "Possible fRemoveEOL flag was used, it is impossible " 134 "to perform cleanup and merge in this mode");
162 switch(it->GetOperation()) {
165off_second += it->GetLength();
169off_first += it->GetLength();
173off_first += it->GetLength();
174off_second += it->GetLength();
185 size_tcount_delete = 0;
186 size_tcount_insert = 0;
188CDiffList::TList::iterator this_diff =
m_List.begin();
189CDiffList::TList::iterator ins_diff =
m_List.end();
190CDiffList::TList::iterator del_diff =
m_List.end();
191CDiffList::TList::iterator equal_diff =
m_List.end();
193 while(this_diff !=
m_List.end()) {
194 const CTempString& current = this_diff->GetString();
195 switch(this_diff->GetOperation()) {
202this_diff =
m_List.erase(this_diff);
204ins_diff = this_diff++;
214this_diff =
m_List.erase(this_diff);
216del_diff = this_diff++;
223 if(count_delete + count_insert >= 1) {
225 if(count_delete != 0 && count_insert != 0) {
230 if(common_prefix_len) {
232 if(equal_diff ==
m_List.end()) {
239ins_str = ins_str.
substr(common_prefix_len);
240del_str = del_str.
substr(common_prefix_len);
245 if(common_suffix_len) {
248this_diff->SetString(
252ins_str = ins_str.
substr(0, ins_str.
length() - common_suffix_len);
253del_str = del_str.
substr(0, del_str.
length() - common_suffix_len);
256 if(common_prefix_len || common_suffix_len) {
257ins_diff->SetString(ins_str);
258del_diff->SetString(del_str);
262equal_diff = this_diff++;
266 if(equal_diff ==
m_List.end()) {
268equal_diff = this_diff++;
273this_diff =
m_List.erase(this_diff);
289 if(
m_List.size() < 3) {
294CDiffList::TList::iterator next_diff =
m_List.begin();
295CDiffList::TList::iterator prev_diff = next_diff++;
296CDiffList::TList::iterator this_diff = next_diff++;
298 boolchanges =
false;
300 while(next_diff !=
m_List.end()) {
302 if(prev_diff->GetOperation() ==
DIFF_EQUAL&&
323prev_diff->SetString(
327this_diff->SetString(
330next_diff =
m_List.erase(next_diff);
331 if(next_diff ==
m_List.end()) {
332next_diff = this_diff;
340next_diff->SetString(
344this_diff->SetString(
351prev_diff = this_diff;
352this_diff = next_diff;
369unique_ptr<CDeadline> real_deadline;
386 if(common_prefix_len) {
387common_prefix = s1.
substr(0, common_prefix_len);
388s1 = s1.
substr(common_prefix_len);
389s2 = s2.
substr(common_prefix_len);
395 if(common_suffix_len) {
396common_suffix = s1.
substr(s1.
length() - common_suffix_len);
404 if(common_prefix_len) {
407 if(common_suffix_len) {
462 if(!hm1_res && !hm2_res) {
464}
else if(!hm1_res) {
466}
else if(!hm2_res) {
470hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2;
515 while((j = short_str.
find(
seed, j + 1)) !=
NPOS) {
520 if(best_common.
length() < suffix_len + prefix_len) {
521best_common = short_str.
substr(j - suffix_len, suffix_len + prefix_len);
522best_longtext_a = long_str.
substr(0,
i- suffix_len);
523best_longtext_b = long_str.
substr(
i+ prefix_len);
524best_shorttext_a = short_str.
substr(0, j - suffix_len);
525best_shorttext_b = short_str.
substr(j + prefix_len);
528 if(best_common.
length() * 2 >= long_str.
length()) {
529hm[0] = best_longtext_a;
530hm[1] = best_longtext_b;
531hm[2] = best_shorttext_a;
532hm[3] = best_shorttext_b;
556 const intmax_d = (s1_len + s2_len + 1) / 2;
557 const intv_offset = max_d;
558 const intv_length = 2 * max_d;
559 int*v1 =
new int[v_length];
560 int*
v2=
new int[v_length];
561 for(
intx = 0; x < v_length; x++) {
565v1[v_offset + 1] = 0;
566 v2[v_offset + 1] = 0;
567 const int delta= s1_len - s2_len;
578 for(
intd = 0; d < max_d; d++)
584 for(
intk1 = -d + k1start; k1 <= d - k1end; k1 += 2) {
585 const intk1_offset = v_offset + k1;
587 if(k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) {
588x1 = v1[k1_offset + 1];
590x1 = v1[k1_offset - 1] + 1;
593 while(x1 < s1_len && y1 < s2_len && s1[x1] == s2[y1]) {
601}
else if(y1 > s2_len) {
604}
else if(
front) {
605 intk2_offset = v_offset +
delta- k1;
606 if(k2_offset >= 0 && k2_offset < v_length &&
v2[k2_offset] != -1) {
608 intx2 = s1_len -
v2[k2_offset];
621 for(
intk2 = -d + k2start; k2 <= d - k2end; k2 += 2) {
622 const intk2_offset = v_offset + k2;
624 if(k2 == -d || (k2 != d &&
v2[k2_offset - 1] <
v2[k2_offset + 1])) {
625x2 =
v2[k2_offset + 1];
627x2 =
v2[k2_offset - 1] + 1;
630 while(x2 < s1_len && y2 < s2_len &&
631s1[s1_len - x2 - 1] == s2[s2_len - y2 - 1]) {
639}
else if(y2 > s2_len) {
642}
else if(!
front) {
643 intk1_offset = v_offset +
delta- k2;
644 if(k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) {
645 intx1 = v1[k1_offset];
646 inty1 = v_offset + x1 - k1_offset;
729 if(short_str.
length() == 1) {
743 x_Diff(hm[0], hm[2], diffs_a);
744 x_Diff(hm[1], hm[3], diffs_b);
775unique_ptr<CDeadline> real_deadline;
783vector<CTempString> lines;
786s1_num_lines = lines.size();
788s2_num_lines = lines.size() - s1_num_lines;
793 strings = lines[
i];
800 if(
i!= s1_num_lines-1 &&
i!= lines.size()-1) {
808 typedefvector<size_type> TSequence;
809TSequence idx1(s1_num_lines);
810TSequence idx2(s2_num_lines);
813TStringToLineNumMap::const_iterator it;
814 strings = lines[
i];
821 if(
i< s1_num_lines-1) {
826idx1[
i] = it->second;
830TStringToLineNumMap::const_iterator it;
831 strings = lines[pos];
838 if(
i< s2_num_lines-1) {
843idx2[
i] = it->second;
848 if(s1_num_lines > 10000 && s2_num_lines > 10000) {
852vector< pair<size_type, dtl::elemInfo> > ses_seq = d.
getSes().getSequence();
853vector< pair<size_type, dtl::elemInfo> >::iterator it;
872 for(it = ses_seq.begin(); it != ses_seq.end(); it++)
880 boolis_last_line =
false;
887 line2= pos2 - s1_num_lines + 1;
888is_last_line = (
line2>= s2_num_lines);
895is_last_line = (
line1>= s1_num_lines);
903 line2= pos2 - s1_num_lines + 1;
904is_last_line = (
line2>= s2_num_lines);
918 if(s[
len-1] ==
'\r') {
938insertions.push_back(op);
942deletions.push_back(op);
946 if(!deletions.empty()) {
950 if(!insertions.empty()) {
960 if(!deletions.empty()) {
963 if(!insertions.empty()) {
966 _VERIFY(pos1 == s1_num_lines);
967 _VERIFY(pos2 == lines.size());
990CDiffList::TList::const_iterator start_iter,
991CDiffList::TList::const_iterator end_iter,
995 const chareol =
'\n';
996 unsigned longn1 = 0;
997 unsigned longn2 = 0;
1002CDiffList::TList::const_iterator it = start_iter;
1003 while(it != end_iter)
1006 switch(it->GetOperation()) {
1022 out<<
"@@ -"<< (
unsignedlong)hunk_s1 <<
","<< n1 <<
" +" 1023<< (
unsigned long)hunk_s2 <<
","<< n2 <<
" @@"<< eol;
1027 while(it != end_iter)
1030 switch(it->GetOperation()) {
1041 out<< op << it->GetString() << eol;
1050 unsigned intnum_common_lines)
1052 if(!
out.good()) {
1056 if(diffs.empty()) {
1061CDiffList::TList::const_iterator it = diffs.begin();
1062CDiffList::TList::const_iterator hunk_start = diffs.end();
1063CDiffList::TList::const_iterator hunk_end = diffs.end();
1065 boolis_hunk =
false;
1066 unsigned intnum_equal = 0;
1077 while(it != diffs.end()) {
1078 switch(it->GetOperation()) {
1089 if(!num_common_lines || !num_equal) {
1112 if(num_common_lines == 0) {
1118}
else if(!num_equal) {
1121}
else if(num_equal <= num_common_lines) {
1139}
else if(num_equal < num_common_lines) {
1167 case eEmpty:
return "eEmpty";
@ eEmpty
no filtering at all.
CDiffList â the list of diff operations.
CDiffOperation â The storage class for one diff operation.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
diff class template sequence must support random_access_iterator.
void compose()
compose Longest Common Subsequence and Shortest Edit Script.
Ses< elem > getSes() const
unordered_map< string, CDiffList::size_type > TStringToLineNumMap
CNcbiOstream & s_PrintUnifiedHunk(CNcbiOstream &out, CDiffList::TList::const_iterator start_iter, CDiffList::TList::const_iterator end_iter, CDiffList::size_type hunk_s1, CDiffList::size_type hunk_s2)
API to diff strings/texts and to find the differences.
std::ofstream out("events_result.xml")
main entry point for tests
static char line1[1024 *16]
static char line2[1024 *16]
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void CalculateOffsets(void)
Calculate offsets for all substrings in the difference list and find its position from the start of t...
CDeadline * m_Deadline
Deadline for processing (NULL if not set)
void x_DiffBisect(CTempString s1, CTempString s2, CDiffList &diffs) const
Find the 'middle snake' of a diff, split the problem in two and return the recursively constructed di...
void Prepend(CDiffOperation::EType operation, CTempString str)
Add element to the front of the list.
bool x_DiffHalfMatch(CTempString s1, CTempString s2, TDiffHalfMatchList &hm) const
Do the two texts share a substring which is at least half the length of the longer text?...
unsigned int TFlags
Bitwise OR of "Flags".
void CleanupAndMerge(void)
Reorder and merge like edit sections, merge equalities.
CDiffList & Diff(CTempString text1, CTempString text2, TFlags flags=0)
Find the differences between two texts (line mode).
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
unsigned int TFlags
Bitwise OR of "EFlags".
bool x_CleanupAndMerge_SingleEdits(void)
Look for single edits surrounded on both sides by equalities which can be shifted sideways to elimina...
CTempString GetLongestCommonSubstring(void) const
Find the longest common substring for current difference list.
void x_CleanupAndMerge_Equities(void)
Merge adjacent parts with the same operation.
void Reset(void)
Reset internal state and prepare to next Diff()
bool x_DiffHalfMatchI(CTempString long_str, CTempString short_str, size_type i, TDiffHalfMatchList &hm) const
Does a substring of short string exist within long string such that the substring is at least half th...
void x_DiffBisectSplit(CTempString s1, CTempString s2, int x, int y, CDiffList &diffs) const
Given the location of the 'middle snake', split the diff in two parts and recurse.
void Append(CDiffOperation::EType operation, CTempString str)
Add element to the end of the list.
vector< CTempString > TDiffHalfMatchList
Five element array for the list of strings, returned by x_DiffHalfMatch()
bool IsTimeoutExpired() const
Check if timeout is expired.
CDiffList m_Diffs
The list of differences from the last diff.
list< CDiffOperation > TList
Storage class type for the list of diff operations.
CDiffList & Diff(CTempString s1, CTempString s2, TFlags flags=0)
Find the differences between two texts (character mode).
void x_Diff(CTempString s1, CTempString s2, CDiffList &diffs) const
Find the differences between two texts.
CDiffOperation::size_type size_type
Size type definition.
size_type GetEditDistance(void) const
Compute the edit distance (Levenshtein distance).
TList m_List
List of the differences.
CDiffOperation::size_type size_type
Type definition.
EType
Type of the current diff operation.
CDiffOperation(EType operation, CTempString str)
Constructor.
CTimeout m_Timeout
Relative timeout for processing.
const TList & GetList(void) const
Get list of the diff operations as list<>.
void SetLength(size_type length)
CNcbiOstream & PrintUnifiedDiff(CNcbiOstream &out, CTempString text1, CTempString text2, unsigned int num_common_lines=3)
Find the differences between two texts and print result into output stream in unified-like format.
@ fCalculateOffsets
Automatically call CalculateOffests() for the generated list of differences.
@ fNoCleanup
By default Diff() automatically call CleanupAndMerge() for the generated list of differences to have ...
@ fCleanup
Automatically call CleanupAndMerge() for the generated list of differences after Diff() to have a sho...
@ fCalculateOffsets
Automatically call CalculateOffests() for the generated list of differences.
@ fIgnoreEOL
Ignore differences in end-of-line types on string comparison.
@ fRemoveEOL
Remove end-of-line symbols from each string added to the list, which can be obtained using CDiffOpera...
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static SIZE_TYPE CommonSuffixSize(const CTempString s1, const CTempString s2)
Determine the common suffix of two strings.
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
size_type length(void) const
Return the length of the represented array.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static SIZE_TYPE CommonPrefixSize(const CTempString s1, const CTempString s2)
Determine the common prefix of two strings.
operation
Bit operations.
virtual void Reset(void)
Reset the whole object.
unsigned int
A callback function used to compare two keys in a database.
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
int edit_t
type of edit for SES
double value_type
The numeric datatype used by the parser.
The NCBI C++/STL use hints.
Int4 delta(size_t dimension_, const Int4 *score_)
Structure to save offset/length in the compared strings.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4