00001 """
00002
00003 @file diff3.py
00004 @brief Three-way file comparison algorithm.
00005
00006 This is a line-by-line translation of the Text::Diff3 Perl module version
00007 0.10 written by MIZUTANI Tociyuki <tociyuki@gmail.com>.
00008
00009 The Text::Diff3 Perl module in turn was based on the diff3 program:
00010
00011 Three way file comparison program (diff3) for Project GNU.
00012 Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
00013 Written by Randy Smith
00014
00015 The two-way file comparison procedure was based on the article:
00016 P. Heckel. ``A technique for isolating differences between files.''
00017 Communications of the ACM, Vol. 21, No. 4, page 264, April 1978.
00018
00019 Copyright (c) 2012 University of Pennsylvania. All rights reserved.
00020 See https://www.cbica.upenn.edu/sbia/software/license.html or COPYING file.
00021
00022 """
00023
00024 from operator import xor
00025
00026
00027
00028 def diff3(yourtext, origtext, theirtext):
00029 """Three-way diff based on the GNU diff3.c by R. Smith.
00030
00031 @param [in] yourtext Array of lines of your text.
00032 @param [in] origtext Array of lines of original text.
00033 @param [in] theirtext Array of lines of their text.
00034
00035 @returns Array of tuples containing diff results. The tuples consist of
00036 (cmd, loA, hiA, loB, hiB), where cmd is either one of
00037 '0', '1', '2', or 'A'.
00038
00039 """
00040
00041 d2 = (diff(origtext, yourtext), diff(origtext, theirtext))
00042 d3 = []
00043 r3 = [None, 0, 0, 0, 0, 0, 0]
00044 while d2[0] or d2[1]:
00045
00046
00047
00048
00049
00050
00051 r2 = ([], [])
00052 if not d2[0]: i = 1
00053 else:
00054 if not d2[1]: i = 0
00055 else:
00056 if d2[0][0][1] <= d2[1][0][1]: i = 0
00057 else: i = 1
00058 j = i
00059 k = xor(i, 1)
00060 hi = d2[j][0][2]
00061 r2[j].append(d2[j].pop(0))
00062 while d2[k] and d2[k][0][1] <= hi + 1:
00063 hi_k = d2[k][0][2]
00064 r2[k].append(d2[k].pop(0))
00065 if hi < hi_k:
00066 hi = hi_k
00067 j = k
00068 k = xor(k, 1)
00069 lo2 = r2[i][ 0][1]
00070 hi2 = r2[j][-1][2]
00071
00072
00073
00074
00075
00076
00077
00078
00079 if r2[0]:
00080 lo0 = r2[0][ 0][3] - r2[0][ 0][1] + lo2
00081 hi0 = r2[0][-1][4] - r2[0][-1][2] + hi2
00082 else:
00083 lo0 = r3[2] - r3[6] + lo2
00084 hi0 = r3[2] - r3[6] + hi2
00085 if r2[1]:
00086 lo1 = r2[1][ 0][3] - r2[1][ 0][1] + lo2
00087 hi1 = r2[1][-1][4] - r2[1][-1][2] + hi2
00088 else:
00089 lo1 = r3[4] - r3[6] + lo2
00090 hi1 = r3[4] - r3[6] + hi2
00091
00092 if not r2[0]:
00093 cmd = '1'
00094 elif not r2[1]:
00095 cmd = '0'
00096 elif hi0 - lo0 != hi1 - lo1:
00097 cmd = 'A'
00098 else:
00099 cmd = '2'
00100 for d in range(0, hi0 - lo0 + 1):
00101 (i0, i1) = (lo0 + d - 1, lo1 + d - 1)
00102 ok0 = (0 <= i0 and i0 < len(yourtext))
00103 ok1 = (0 <= i1 and i1 < len(theirtext))
00104 if xor(ok0, ok1) or (ok0 and yourtext[i0] != theirtext[i1]):
00105 cmd = 'A'
00106 break
00107 d3.append((cmd, lo0, hi0, lo1, hi1, lo2, hi2))
00108 return d3
00109
00110
00111 def merge(yourtext, origtext, theirtext):
00112 res = {'conflict': 0, 'body': []}
00113 d3 = diff3(yourtext, origtext, theirtext)
00114 text3 = (yourtext, theirtext, origtext)
00115 i2 = 1
00116 for r3 in d3:
00117 for lineno in range(i2, r3[5]):
00118 res['body'].append(text3[2][lineno - 1])
00119 if r3[0] == '0':
00120 for lineno in range(r3[1], r3[2] + 1):
00121 res['body'].append(text3[0][lineno - 1])
00122 elif r3[0] != 'A':
00123 for lineno in range(r3[3], r3[4] + 1):
00124 res['body'].append(text3[1][lineno - 1])
00125 else:
00126 res = _conflict_range(text3, r3, res)
00127 i2 = r3[6] + 1
00128 for lineno in range(i2, len(text3[2]) + 1):
00129 res['body'].append(text3[2][lineno - 1])
00130 return res
00131
00132
00133 def _conflict_range(text3, r3, res):
00134 text_a = []
00135 for i in range(r3[3], r3[4] + 1):
00136 text_a.append(text3[1][i - 1])
00137 text_b = []
00138 for i in range(r3[1], r3[2] + 1):
00139 text_b.append(text3[0][i - 1])
00140 d = diff(text_a, text_b)
00141 if _assoc_range(d, 'c') and r3[5] <= r3[6]:
00142 res['conflict'] += 1
00143 res['body'].append('<<<<<<<')
00144 for lineno in range(r3[1], r3[2] + 1):
00145 res['body'].append(text3[0][lineno - 1])
00146 res['body'].append('|||||||')
00147 for lineno in range(r3[5], r3[6] + 1):
00148 res['body'].append(text3[2][lineno - 1])
00149 res['body'].append('=======')
00150 for lineno in range(r3[3], r3[4] + 1):
00151 res['body'].append(text3[1][lineno - 1])
00152 res['body'].append('>>>>>>>')
00153 return res
00154 ia = 1
00155 for r2 in d:
00156 for lineno in range(ia, r2[1]):
00157 res['body'].append(text_a[lineno - 1])
00158 if r2[0] == 'c':
00159 res['conflict'] += 1
00160 res['body'].append('<<<<<<<')
00161 for lineno in range(r2[3], r2[4] + 1):
00162 res['body'].append(text_b[lineno - 1])
00163 res['body'].append('=======')
00164 for lineno in range(r2[1], r2[2] + 1):
00165 res['body'].append(text_a[lineno - 1])
00166 res['body'].append('>>>>>>>')
00167 elif r2[0] == 'a':
00168 for lineno in range(r2[3], r2[4] + 1):
00169 res['body'].append(text_b[lineno - 1])
00170 ia = r2[2] + 1
00171 for lineno in range(ia, len(text_a)):
00172 res['body'].append(text_a[lineno - 1])
00173 return res
00174
00175
00176 def _assoc_range(diff, diff_type):
00177 for d in diff:
00178 if d[0] == diff_type: return d
00179 return None
00180
00181
00182 def _diff_heckel(text_a, text_b):
00183 """Two-way diff based on the algorithm by P. Heckel.
00184
00185 @param [in] text_a Array of lines of first text.
00186 @param [in] text_b Array of lines of second text.
00187
00188 @returns TODO
00189
00190 """
00191 d = [];
00192 uniq = [(len(text_a), len(text_b))]
00193 (freq, ap, bp) = ({}, {}, {})
00194 for i in range(len(text_a)):
00195 s = text_a[i]
00196 freq[s] = freq.get(s, 0) + 2;
00197 ap [s] = i;
00198 for i in range(len(text_b)):
00199 s = text_b[i]
00200 freq[s] = freq.get(s, 0) + 3;
00201 bp [s] = i;
00202 for s, x in freq.items():
00203 if x == 5: uniq.append((ap[s], bp[s]))
00204 (freq, ap, bp) = ({}, {}, {})
00205 uniq.sort(key=lambda x: x[0])
00206 (a1, b1) = (0, 0)
00207 while a1 < len(text_a) and b1 < len(text_b):
00208 if text_a[a1] != text_b[b1]: break
00209 a1 += 1
00210 b1 += 1
00211 for a_uniq, b_uniq in uniq:
00212 if a_uniq < a1 or b_uniq < b1: continue
00213 (a0, b0) = (a1, b1)
00214 (a1, b1) = (a_uniq - 1, b_uniq - 1)
00215 while a0 <= a1 and b0 <= b1:
00216 if text_a[a1] != text_b[b1]: break
00217 a1 -= 1
00218 b1 -= 1
00219 if a0 <= a1 and b0 <= b1:
00220 d.append(('c', a0 + 1, a1 + 1, b0 + 1, b1 + 1))
00221 elif a0 <= a1:
00222 d.append(('d', a0 + 1, a1 + 1, b0 + 1, b0))
00223 elif b0 <= b1:
00224 d.append(('a', a0 + 1, a0, b0 + 1, b1 + 1))
00225 (a1, b1) = (a_uniq + 1, b_uniq + 1)
00226 while a1 < len(text_a) and b1 < len(text_b):
00227 if text_a[a1] != text_b[b1]: break
00228 a1 += 1
00229 b1 += 1
00230 return d
00231
00232
00233 diff = _diff_heckel