diff3.py
Go to the documentation of this file.
00001 """ 00002 00003 @file diff3.py 00004 @brief Three-way file comparison algorithm. 00005 00006 This is a line-by-line translation of the Text::Diff3 Perl module version 00007 0.10 written by MIZUTANI Tociyuki <tociyuki@gmail.com>. 00008 00009 The Text::Diff3 Perl module in turn was based on the diff3 program: 00010 00011 Three way file comparison program (diff3) for Project GNU. 00012 Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc. 00013 Written by Randy Smith 00014 00015 The two-way file comparison procedure was based on the article: 00016 P. Heckel. ``A technique for isolating differences between files.'' 00017 Communications of the ACM, Vol. 21, No. 4, page 264, April 1978. 00018 00019 Copyright (c) 2012 University of Pennsylvania. All rights reserved. 00020 See https://www.cbica.upenn.edu/sbia/software/license.html or COPYING file. 00021 00022 """ 00023 00024 from operator import xor 00025 00026 00027 # ---------------------------------------------------------------------------- 00028 def diff3(yourtext, origtext, theirtext): 00029 """Three-way diff based on the GNU diff3.c by R. Smith. 00030 00031 @param [in] yourtext Array of lines of your text. 00032 @param [in] origtext Array of lines of original text. 00033 @param [in] theirtext Array of lines of their text. 00034 00035 @returns Array of tuples containing diff results. The tuples consist of 00036 (cmd, loA, hiA, loB, hiB), where cmd is either one of 00037 '0', '1', '2', or 'A'. 00038 00039 """ 00040 # diff result => [(cmd, loA, hiA, loB, hiB), ...] 00041 d2 = (diff(origtext, yourtext), diff(origtext, theirtext)) 00042 d3 = [] 00043 r3 = [None, 0, 0, 0, 0, 0, 0] 00044 while d2[0] or d2[1]: 00045 # find a continual range in origtext lo2..hi2 00046 # changed by yourtext or by theirtext. 00047 # 00048 # d2[0] 222 222222222 00049 # origtext ...L!!!!!!!!!!!!!!!!!!!!H... 00050 # d2[1] 222222 22 2222222 00051 r2 = ([], []) 00052 if not d2[0]: i = 1 00053 else: 00054 if not d2[1]: i = 0 00055 else: 00056 if d2[0][0][1] <= d2[1][0][1]: i = 0 00057 else: i = 1 00058 j = i 00059 k = xor(i, 1) 00060 hi = d2[j][0][2] 00061 r2[j].append(d2[j].pop(0)) 00062 while d2[k] and d2[k][0][1] <= hi + 1: 00063 hi_k = d2[k][0][2] 00064 r2[k].append(d2[k].pop(0)) 00065 if hi < hi_k: 00066 hi = hi_k 00067 j = k 00068 k = xor(k, 1) 00069 lo2 = r2[i][ 0][1] 00070 hi2 = r2[j][-1][2] 00071 # take the corresponding ranges in yourtext lo0..hi0 00072 # and in theirtext lo1..hi1. 00073 # 00074 # yourtext ..L!!!!!!!!!!!!!!!!!!!!!!!!!!!!H... 00075 # d2[0] 222 222222222 00076 # origtext ...00!1111!000!!00!111111... 00077 # d2[1] 222222 22 2222222 00078 # theirtext ...L!!!!!!!!!!!!!!!!H... 00079 if r2[0]: 00080 lo0 = r2[0][ 0][3] - r2[0][ 0][1] + lo2 00081 hi0 = r2[0][-1][4] - r2[0][-1][2] + hi2 00082 else: 00083 lo0 = r3[2] - r3[6] + lo2 00084 hi0 = r3[2] - r3[6] + hi2 00085 if r2[1]: 00086 lo1 = r2[1][ 0][3] - r2[1][ 0][1] + lo2 00087 hi1 = r2[1][-1][4] - r2[1][-1][2] + hi2 00088 else: 00089 lo1 = r3[4] - r3[6] + lo2 00090 hi1 = r3[4] - r3[6] + hi2 00091 # detect type of changes 00092 if not r2[0]: 00093 cmd = '1' 00094 elif not r2[1]: 00095 cmd = '0' 00096 elif hi0 - lo0 != hi1 - lo1: 00097 cmd = 'A' 00098 else: 00099 cmd = '2' 00100 for d in range(0, hi0 - lo0 + 1): 00101 (i0, i1) = (lo0 + d - 1, lo1 + d - 1) 00102 ok0 = (0 <= i0 and i0 < len(yourtext)) 00103 ok1 = (0 <= i1 and i1 < len(theirtext)) 00104 if xor(ok0, ok1) or (ok0 and yourtext[i0] != theirtext[i1]): 00105 cmd = 'A' 00106 break 00107 d3.append((cmd, lo0, hi0, lo1, hi1, lo2, hi2)) 00108 return d3 00109 00110 # ---------------------------------------------------------------------------- 00111 def merge(yourtext, origtext, theirtext): 00112 res = {'conflict': 0, 'body': []} 00113 d3 = diff3(yourtext, origtext, theirtext) 00114 text3 = (yourtext, theirtext, origtext) 00115 i2 = 1 00116 for r3 in d3: 00117 for lineno in range(i2, r3[5]): 00118 res['body'].append(text3[2][lineno - 1]) 00119 if r3[0] == '0': 00120 for lineno in range(r3[1], r3[2] + 1): 00121 res['body'].append(text3[0][lineno - 1]) 00122 elif r3[0] != 'A': 00123 for lineno in range(r3[3], r3[4] + 1): 00124 res['body'].append(text3[1][lineno - 1]) 00125 else: 00126 res = _conflict_range(text3, r3, res) 00127 i2 = r3[6] + 1 00128 for lineno in range(i2, len(text3[2]) + 1): 00129 res['body'].append(text3[2][lineno - 1]) 00130 return res 00131 00132 # ---------------------------------------------------------------------------- 00133 def _conflict_range(text3, r3, res): 00134 text_a = [] # their text 00135 for i in range(r3[3], r3[4] + 1): 00136 text_a.append(text3[1][i - 1]) 00137 text_b = [] # your text 00138 for i in range(r3[1], r3[2] + 1): 00139 text_b.append(text3[0][i - 1]) 00140 d = diff(text_a, text_b) 00141 if _assoc_range(d, 'c') and r3[5] <= r3[6]: 00142 res['conflict'] += 1 00143 res['body'].append('<<<<<<<') 00144 for lineno in range(r3[1], r3[2] + 1): 00145 res['body'].append(text3[0][lineno - 1]) 00146 res['body'].append('|||||||') 00147 for lineno in range(r3[5], r3[6] + 1): 00148 res['body'].append(text3[2][lineno - 1]) 00149 res['body'].append('=======') 00150 for lineno in range(r3[3], r3[4] + 1): 00151 res['body'].append(text3[1][lineno - 1]) 00152 res['body'].append('>>>>>>>') 00153 return res 00154 ia = 1 00155 for r2 in d: 00156 for lineno in range(ia, r2[1]): 00157 res['body'].append(text_a[lineno - 1]) 00158 if r2[0] == 'c': 00159 res['conflict'] += 1 00160 res['body'].append('<<<<<<<') 00161 for lineno in range(r2[3], r2[4] + 1): 00162 res['body'].append(text_b[lineno - 1]) 00163 res['body'].append('=======') 00164 for lineno in range(r2[1], r2[2] + 1): 00165 res['body'].append(text_a[lineno - 1]) 00166 res['body'].append('>>>>>>>') 00167 elif r2[0] == 'a': 00168 for lineno in range(r2[3], r2[4] + 1): 00169 res['body'].append(text_b[lineno - 1]) 00170 ia = r2[2] + 1 00171 for lineno in range(ia, len(text_a)): 00172 res['body'].append(text_a[lineno - 1]) 00173 return res 00174 00175 # ---------------------------------------------------------------------------- 00176 def _assoc_range(diff, diff_type): 00177 for d in diff: 00178 if d[0] == diff_type: return d 00179 return None 00180 00181 # ---------------------------------------------------------------------------- 00182 def _diff_heckel(text_a, text_b): 00183 """Two-way diff based on the algorithm by P. Heckel. 00184 00185 @param [in] text_a Array of lines of first text. 00186 @param [in] text_b Array of lines of second text. 00187 00188 @returns TODO 00189 00190 """ 00191 d = []; 00192 uniq = [(len(text_a), len(text_b))] 00193 (freq, ap, bp) = ({}, {}, {}) 00194 for i in range(len(text_a)): 00195 s = text_a[i] 00196 freq[s] = freq.get(s, 0) + 2; 00197 ap [s] = i; 00198 for i in range(len(text_b)): 00199 s = text_b[i] 00200 freq[s] = freq.get(s, 0) + 3; 00201 bp [s] = i; 00202 for s, x in freq.items(): 00203 if x == 5: uniq.append((ap[s], bp[s])) 00204 (freq, ap, bp) = ({}, {}, {}) 00205 uniq.sort(key=lambda x: x[0]) 00206 (a1, b1) = (0, 0) 00207 while a1 < len(text_a) and b1 < len(text_b): 00208 if text_a[a1] != text_b[b1]: break 00209 a1 += 1 00210 b1 += 1 00211 for a_uniq, b_uniq in uniq: 00212 if a_uniq < a1 or b_uniq < b1: continue 00213 (a0, b0) = (a1, b1) 00214 (a1, b1) = (a_uniq - 1, b_uniq - 1) 00215 while a0 <= a1 and b0 <= b1: 00216 if text_a[a1] != text_b[b1]: break 00217 a1 -= 1 00218 b1 -= 1 00219 if a0 <= a1 and b0 <= b1: 00220 d.append(('c', a0 + 1, a1 + 1, b0 + 1, b1 + 1)) 00221 elif a0 <= a1: 00222 d.append(('d', a0 + 1, a1 + 1, b0 + 1, b0)) 00223 elif b0 <= b1: 00224 d.append(('a', a0 + 1, a0, b0 + 1, b1 + 1)) 00225 (a1, b1) = (a_uniq + 1, b_uniq + 1) 00226 while a1 < len(text_a) and b1 < len(text_b): 00227 if text_a[a1] != text_b[b1]: break 00228 a1 += 1 00229 b1 += 1 00230 return d 00231 00232 # ---------------------------------------------------------------------------- 00233 diff = _diff_heckel # default two-way diff function used by diff3()