A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/diff_8cpp_source.html below:

NCBI C++ ToolKit: src/util/diff/diff.cpp Source File

35 #include <unordered_map> 39 #define NCBI_USE_ERRCODE_X Util_Diff 45 #define F_ISSET(mask) (((flags) & (mask)) == (mask)) 48 #define DIFF_DELETE CDiffOperation::eDelete 49 #define DIFF_EQUAL CDiffOperation::eEqual 50 #define DIFF_INSERT CDiffOperation::eInsert 61

m_Length(

str

.length())

81  switch

(it->GetOperation()) {

83

len_insert += it->GetString().length();

86

len_delete += it->GetString().length();

90

edit_distance +=

max

(len_insert, len_delete);

96

edit_distance +=

max

(len_insert, len_delete);

107

TList::const_iterator max_pos = diffs.end();

112  if

(

len

> max_len) {

118  if

(!max_len || max_pos == diffs.end()) {

121  return

max_pos->GetString();

129  if

(it->GetString().length() != it->GetLength()) {

133  "Possible fRemoveEOL flag was used, it is impossible " 134  "to perform cleanup and merge in this mode"

);

162  switch

(it->GetOperation()) {

165

off_second += it->GetLength();

169

off_first += it->GetLength();

173

off_first += it->GetLength();

174

off_second += it->GetLength();

185  size_t

count_delete = 0;

186  size_t

count_insert = 0;

188

CDiffList::TList::iterator this_diff =

m_List

.begin();

189

CDiffList::TList::iterator ins_diff =

m_List

.end();

190

CDiffList::TList::iterator del_diff =

m_List

.end();

191

CDiffList::TList::iterator equal_diff =

m_List

.end();

193  while

(this_diff !=

m_List

.end()) {

194  const CTempString

& current = this_diff->GetString();

195  switch

(this_diff->GetOperation()) {

202

this_diff =

m_List

.erase(this_diff);

204

ins_diff = this_diff++;

214

this_diff =

m_List

.erase(this_diff);

216

del_diff = this_diff++;

223  if

(count_delete + count_insert >= 1) {

225  if

(count_delete != 0 && count_insert != 0) {

230  if

(common_prefix_len) {

232  if

(equal_diff ==

m_List

.end()) {

239

ins_str = ins_str.

substr

(common_prefix_len);

240

del_str = del_str.

substr

(common_prefix_len);

245  if

(common_suffix_len) {

248

this_diff->SetString(

252

ins_str = ins_str.

substr

(0, ins_str.

length

() - common_suffix_len);

253

del_str = del_str.

substr

(0, del_str.

length

() - common_suffix_len);

256  if

(common_prefix_len || common_suffix_len) {

257

ins_diff->SetString(ins_str);

258

del_diff->SetString(del_str);

262

equal_diff = this_diff++;

266  if

(equal_diff ==

m_List

.end()) {

268

equal_diff = this_diff++;

273

this_diff =

m_List

.erase(this_diff);

289  if

(

m_List

.size() < 3) {

294

CDiffList::TList::iterator next_diff =

m_List

.begin();

295

CDiffList::TList::iterator prev_diff = next_diff++;

296

CDiffList::TList::iterator this_diff = next_diff++;

298  bool

changes =

false

;

300  while

(next_diff !=

m_List

.end()) {

302  if

(prev_diff->GetOperation() ==

DIFF_EQUAL

&&

323

prev_diff->SetString(

327

this_diff->SetString(

330

next_diff =

m_List

.erase(next_diff);

331  if

(next_diff ==

m_List

.end()) {

332

next_diff = this_diff;

340

next_diff->SetString(

344

this_diff->SetString(

351

prev_diff = this_diff;

352

this_diff = next_diff;

369

unique_ptr<CDeadline> real_deadline;

386  if

(common_prefix_len) {

387

common_prefix = s1.

substr

(0, common_prefix_len);

388

s1 = s1.

substr

(common_prefix_len);

389

s2 = s2.

substr

(common_prefix_len);

395  if

(common_suffix_len) {

396

common_suffix = s1.

substr

(s1.

length

() - common_suffix_len);

404  if

(common_prefix_len) {

407  if

(common_suffix_len) {

462  if

(!hm1_res && !hm2_res) {

464

}

else if

(!hm1_res) {

466

}

else if

(!hm2_res) {

470

hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2;

515  while

((j = short_str.

find

(

seed

, j + 1)) !=

NPOS

) {

520  if

(best_common.

length

() < suffix_len + prefix_len) {

521

best_common = short_str.

substr

(j - suffix_len, suffix_len + prefix_len);

522

best_longtext_a = long_str.

substr

(0,

i

- suffix_len);

523

best_longtext_b = long_str.

substr

(

i

+ prefix_len);

524

best_shorttext_a = short_str.

substr

(0, j - suffix_len);

525

best_shorttext_b = short_str.

substr

(j + prefix_len);

528  if

(best_common.

length

() * 2 >= long_str.

length

()) {

529

hm[0] = best_longtext_a;

530

hm[1] = best_longtext_b;

531

hm[2] = best_shorttext_a;

532

hm[3] = best_shorttext_b;

556  const int

max_d = (s1_len + s2_len + 1) / 2;

557  const int

v_offset = max_d;

558  const int

v_length = 2 * max_d;

559  int

*v1 =

new int

[v_length];

560  int

*

v2

=

new int

[v_length];

561  for

(

int

x = 0; x < v_length; x++) {

565

v1[v_offset + 1] = 0;

566  v2

[v_offset + 1] = 0;

567  const int delta

= s1_len - s2_len;

578  for

(

int

d = 0; d < max_d; d++)

584  for

(

int

k1 = -d + k1start; k1 <= d - k1end; k1 += 2) {

585  const int

k1_offset = v_offset + k1;

587  if

(k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) {

588

x1 = v1[k1_offset + 1];

590

x1 = v1[k1_offset - 1] + 1;

593  while

(x1 < s1_len && y1 < s2_len && s1[x1] == s2[y1]) {

601

}

else if

(y1 > s2_len) {

604

}

else if

(

front

) {

605  int

k2_offset = v_offset +

delta

- k1;

606  if

(k2_offset >= 0 && k2_offset < v_length &&

v2

[k2_offset] != -1) {

608  int

x2 = s1_len -

v2

[k2_offset];

621  for

(

int

k2 = -d + k2start; k2 <= d - k2end; k2 += 2) {

622  const int

k2_offset = v_offset + k2;

624  if

(k2 == -d || (k2 != d &&

v2

[k2_offset - 1] <

v2

[k2_offset + 1])) {

625

x2 =

v2

[k2_offset + 1];

627

x2 =

v2

[k2_offset - 1] + 1;

630  while

(x2 < s1_len && y2 < s2_len &&

631

s1[s1_len - x2 - 1] == s2[s2_len - y2 - 1]) {

639

}

else if

(y2 > s2_len) {

642

}

else if

(!

front

) {

643  int

k1_offset = v_offset +

delta

- k2;

644  if

(k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) {

645  int

x1 = v1[k1_offset];

646  int

y1 = v_offset + x1 - k1_offset;

729  if

(short_str.

length

() == 1) {

743  x_Diff

(hm[0], hm[2], diffs_a);

744  x_Diff

(hm[1], hm[3], diffs_b);

775

unique_ptr<CDeadline> real_deadline;

783

vector<CTempString> lines;

786

s1_num_lines = lines.size();

788

s2_num_lines = lines.size() - s1_num_lines;

793  string

s = lines[

i

];

800  if

(

i

!= s1_num_lines-1 &&

i

!= lines.size()-1) {

808  typedef

vector<size_type> TSequence;

809

TSequence idx1(s1_num_lines);

810

TSequence idx2(s2_num_lines);

813

TStringToLineNumMap::const_iterator it;

814  string

s = lines[

i

];

821  if

(

i

< s1_num_lines-1) {

826

idx1[

i

] = it->second;

830

TStringToLineNumMap::const_iterator it;

831  string

s = lines[pos];

838  if

(

i

< s2_num_lines-1) {

843

idx2[

i

] = it->second;

848  if

(s1_num_lines > 10000 && s2_num_lines > 10000) {

852

vector< pair<size_type, dtl::elemInfo> > ses_seq = d.

getSes

().getSequence();

853

vector< pair<size_type, dtl::elemInfo> >::iterator it;

872  for

(it = ses_seq.begin(); it != ses_seq.end(); it++)

880  bool

is_last_line =

false

;

887  line2

= pos2 - s1_num_lines + 1;

888

is_last_line = (

line2

>= s2_num_lines);

895

is_last_line = (

line1

>= s1_num_lines);

903  line2

= pos2 - s1_num_lines + 1;

904

is_last_line = (

line2

>= s2_num_lines);

918  if

(s[

len

-1] ==

'\r'

) {

938

insertions.push_back(op);

942

deletions.push_back(op);

946  if

(!deletions.empty()) {

950  if

(!insertions.empty()) {

960  if

(!deletions.empty()) {

963  if

(!insertions.empty()) {

966  _VERIFY

(pos1 == s1_num_lines);

967  _VERIFY

(pos2 == lines.size());

990

CDiffList::TList::const_iterator start_iter,

991

CDiffList::TList::const_iterator end_iter,

995  const char

eol =

'\n'

;

996  unsigned long

n1 = 0;

997  unsigned long

n2 = 0;

1002

CDiffList::TList::const_iterator it = start_iter;

1003  while

(it != end_iter)

1006  switch

(it->GetOperation()) {

1022  out

<<

"@@ -"

<< (

unsigned

long)hunk_s1 <<

","

<< n1 <<

" +" 1023

<< (

unsigned long

)hunk_s2 <<

","

<< n2 <<

" @@"

<< eol;

1027  while

(it != end_iter)

1030  switch

(it->GetOperation()) {

1041  out

<< op << it->GetString() << eol;

1050  unsigned int

num_common_lines)

1052  if

(!

out

.good()) {

1056  if

(diffs.empty()) {

1061

CDiffList::TList::const_iterator it = diffs.begin();

1062

CDiffList::TList::const_iterator hunk_start = diffs.end();

1063

CDiffList::TList::const_iterator hunk_end = diffs.end();

1065  bool

is_hunk =

false

;

1066  unsigned int

num_equal = 0;

1077  while

(it != diffs.end()) {

1078  switch

(it->GetOperation()) {

1089  if

(!num_common_lines || !num_equal) {

1112  if

(num_common_lines == 0) {

1118

}

else if

(!num_equal) {

1121

}

else if

(num_equal <= num_common_lines) {

1139

}

else if

(num_equal < num_common_lines) {

1167  case eEmpty

:

return "eEmpty"

;

@ eEmpty

no filtering at all.

CDiffList – the list of diff operations.

CDiffOperation – The storage class for one diff operation.

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

diff class template sequence must support random_access_iterator.

void compose()

compose Longest Common Subsequence and Shortest Edit Script.

Ses< elem > getSes() const

unordered_map< string, CDiffList::size_type > TStringToLineNumMap

CNcbiOstream & s_PrintUnifiedHunk(CNcbiOstream &out, CDiffList::TList::const_iterator start_iter, CDiffList::TList::const_iterator end_iter, CDiffList::size_type hunk_s1, CDiffList::size_type hunk_s2)

API to diff strings/texts and to find the differences.

std::ofstream out("events_result.xml")

main entry point for tests

static char line1[1024 *16]

static char line2[1024 *16]

static const char * str(char *buf, int n)

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

void CalculateOffsets(void)

Calculate offsets for all substrings in the difference list and find its position from the start of t...

CDeadline * m_Deadline

Deadline for processing (NULL if not set)

void x_DiffBisect(CTempString s1, CTempString s2, CDiffList &diffs) const

Find the 'middle snake' of a diff, split the problem in two and return the recursively constructed di...

void Prepend(CDiffOperation::EType operation, CTempString str)

Add element to the front of the list.

bool x_DiffHalfMatch(CTempString s1, CTempString s2, TDiffHalfMatchList &hm) const

Do the two texts share a substring which is at least half the length of the longer text?...

unsigned int TFlags

Bitwise OR of "Flags".

void CleanupAndMerge(void)

Reorder and merge like edit sections, merge equalities.

CDiffList & Diff(CTempString text1, CTempString text2, TFlags flags=0)

Find the differences between two texts (line mode).

virtual const char * GetErrCodeString(void) const override

Get error code interpreted as text.

unsigned int TFlags

Bitwise OR of "EFlags".

bool x_CleanupAndMerge_SingleEdits(void)

Look for single edits surrounded on both sides by equalities which can be shifted sideways to elimina...

CTempString GetLongestCommonSubstring(void) const

Find the longest common substring for current difference list.

void x_CleanupAndMerge_Equities(void)

Merge adjacent parts with the same operation.

void Reset(void)

Reset internal state and prepare to next Diff()

bool x_DiffHalfMatchI(CTempString long_str, CTempString short_str, size_type i, TDiffHalfMatchList &hm) const

Does a substring of short string exist within long string such that the substring is at least half th...

void x_DiffBisectSplit(CTempString s1, CTempString s2, int x, int y, CDiffList &diffs) const

Given the location of the 'middle snake', split the diff in two parts and recurse.

void Append(CDiffOperation::EType operation, CTempString str)

Add element to the end of the list.

vector< CTempString > TDiffHalfMatchList

Five element array for the list of strings, returned by x_DiffHalfMatch()

bool IsTimeoutExpired() const

Check if timeout is expired.

CDiffList m_Diffs

The list of differences from the last diff.

list< CDiffOperation > TList

Storage class type for the list of diff operations.

CDiffList & Diff(CTempString s1, CTempString s2, TFlags flags=0)

Find the differences between two texts (character mode).

void x_Diff(CTempString s1, CTempString s2, CDiffList &diffs) const

Find the differences between two texts.

CDiffOperation::size_type size_type

Size type definition.

size_type GetEditDistance(void) const

Compute the edit distance (Levenshtein distance).

TList m_List

List of the differences.

CDiffOperation::size_type size_type

Type definition.

EType

Type of the current diff operation.

CDiffOperation(EType operation, CTempString str)

Constructor.

CTimeout m_Timeout

Relative timeout for processing.

const TList & GetList(void) const

Get list of the diff operations as list<>.

void SetLength(size_type length)

CNcbiOstream & PrintUnifiedDiff(CNcbiOstream &out, CTempString text1, CTempString text2, unsigned int num_common_lines=3)

Find the differences between two texts and print result into output stream in unified-like format.

@ fCalculateOffsets

Automatically call CalculateOffests() for the generated list of differences.

@ fNoCleanup

By default Diff() automatically call CleanupAndMerge() for the generated list of differences to have ...

@ fCleanup

Automatically call CleanupAndMerge() for the generated list of differences after Diff() to have a sho...

@ fCalculateOffsets

Automatically call CalculateOffests() for the generated list of differences.

@ fIgnoreEOL

Ignore differences in end-of-line types on string comparison.

@ fRemoveEOL

Remove end-of-line symbols from each string added to the list, which can be obtained using CDiffOpera...

TErrCode GetErrCode(void) const

Get error code.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

virtual const char * GetErrCodeString(void) const

Get error code interpreted as text.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)

Check if a string ends with a specified suffix value.

static SIZE_TYPE CommonSuffixSize(const CTempString s1, const CTempString s2)

Determine the common suffix of two strings.

const char * data(void) const

Return a pointer to the array represented.

bool empty(void) const

Return true if the represented string is empty (i.e., the length is zero)

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

size_type length(void) const

Return the length of the represented array.

CTempString substr(size_type pos) const

Obtain a substring from this string, beginning at a given offset.

size_type find(const CTempString match, size_type pos=0) const

Find the first instance of the entire matching string within the current string, beginning at an opti...

static SIZE_TYPE CommonPrefixSize(const CTempString s1, const CTempString s2)

Determine the common prefix of two strings.

operation

Bit operations.

virtual void Reset(void)

Reset the whole object.

unsigned int

A callback function used to compare two keys in a database.

constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head

int edit_t

type of edit for SES

double value_type

The numeric datatype used by the parser.

The NCBI C++/STL use hints.

Int4 delta(size_t dimension_, const Int4 *score_)

Structure to save offset/length in the compared strings.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4