xref: /openbmc/openbmc/poky/bitbake/lib/simplediff/__init__.py (revision eb8dc40360f0cfef56fb6947cc817a547d6d9bc6)
1*eb8dc403SDave Cobbley'''
2*eb8dc403SDave CobbleySimple Diff for Python version 1.0
3*eb8dc403SDave Cobbley
4*eb8dc403SDave CobbleyAnnotate two versions of a list with the values that have been
5*eb8dc403SDave Cobbleychanged between the versions, similar to unix's `diff` but with
6*eb8dc403SDave Cobbleya dead-simple Python interface.
7*eb8dc403SDave Cobbley
8*eb8dc403SDave Cobbley(C) Paul Butler 2008-2012 <http://www.paulbutler.org/>
9*eb8dc403SDave CobbleyMay be used and distributed under the zlib/libpng license
10*eb8dc403SDave Cobbley<http://www.opensource.org/licenses/zlib-license.php>
11*eb8dc403SDave Cobbley'''
12*eb8dc403SDave Cobbley
13*eb8dc403SDave Cobbley__all__ = ['diff', 'string_diff', 'html_diff']
14*eb8dc403SDave Cobbley__version__ = '1.0'
15*eb8dc403SDave Cobbley
16*eb8dc403SDave Cobbley
17*eb8dc403SDave Cobbleydef diff(old, new):
18*eb8dc403SDave Cobbley    '''
19*eb8dc403SDave Cobbley    Find the differences between two lists. Returns a list of pairs, where the
20*eb8dc403SDave Cobbley    first value is in ['+','-','='] and represents an insertion, deletion, or
21*eb8dc403SDave Cobbley    no change for that list. The second value of the pair is the list
22*eb8dc403SDave Cobbley    of elements.
23*eb8dc403SDave Cobbley
24*eb8dc403SDave Cobbley    Params:
25*eb8dc403SDave Cobbley        old     the old list of immutable, comparable values (ie. a list
26*eb8dc403SDave Cobbley                of strings)
27*eb8dc403SDave Cobbley        new     the new list of immutable, comparable values
28*eb8dc403SDave Cobbley
29*eb8dc403SDave Cobbley    Returns:
30*eb8dc403SDave Cobbley        A list of pairs, with the first part of the pair being one of three
31*eb8dc403SDave Cobbley        strings ('-', '+', '=') and the second part being a list of values from
32*eb8dc403SDave Cobbley        the original old and/or new lists. The first part of the pair
33*eb8dc403SDave Cobbley        corresponds to whether the list of values is a deletion, insertion, or
34*eb8dc403SDave Cobbley        unchanged, respectively.
35*eb8dc403SDave Cobbley
36*eb8dc403SDave Cobbley    Examples:
37*eb8dc403SDave Cobbley        >>> diff([1,2,3,4],[1,3,4])
38*eb8dc403SDave Cobbley        [('=', [1]), ('-', [2]), ('=', [3, 4])]
39*eb8dc403SDave Cobbley
40*eb8dc403SDave Cobbley        >>> diff([1,2,3,4],[2,3,4,1])
41*eb8dc403SDave Cobbley        [('-', [1]), ('=', [2, 3, 4]), ('+', [1])]
42*eb8dc403SDave Cobbley
43*eb8dc403SDave Cobbley        >>> diff('The quick brown fox jumps over the lazy dog'.split(),
44*eb8dc403SDave Cobbley        ...      'The slow blue cheese drips over the lazy carrot'.split())
45*eb8dc403SDave Cobbley        ... # doctest: +NORMALIZE_WHITESPACE
46*eb8dc403SDave Cobbley        [('=', ['The']),
47*eb8dc403SDave Cobbley         ('-', ['quick', 'brown', 'fox', 'jumps']),
48*eb8dc403SDave Cobbley         ('+', ['slow', 'blue', 'cheese', 'drips']),
49*eb8dc403SDave Cobbley         ('=', ['over', 'the', 'lazy']),
50*eb8dc403SDave Cobbley         ('-', ['dog']),
51*eb8dc403SDave Cobbley         ('+', ['carrot'])]
52*eb8dc403SDave Cobbley
53*eb8dc403SDave Cobbley    '''
54*eb8dc403SDave Cobbley
55*eb8dc403SDave Cobbley    # Create a map from old values to their indices
56*eb8dc403SDave Cobbley    old_index_map = dict()
57*eb8dc403SDave Cobbley    for i, val in enumerate(old):
58*eb8dc403SDave Cobbley        old_index_map.setdefault(val,list()).append(i)
59*eb8dc403SDave Cobbley
60*eb8dc403SDave Cobbley    # Find the largest substring common to old and new.
61*eb8dc403SDave Cobbley    # We use a dynamic programming approach here.
62*eb8dc403SDave Cobbley    #
63*eb8dc403SDave Cobbley    # We iterate over each value in the `new` list, calling the
64*eb8dc403SDave Cobbley    # index `inew`. At each iteration, `overlap[i]` is the
65*eb8dc403SDave Cobbley    # length of the largest suffix of `old[:i]` equal to a suffix
66*eb8dc403SDave Cobbley    # of `new[:inew]` (or unset when `old[i]` != `new[inew]`).
67*eb8dc403SDave Cobbley    #
68*eb8dc403SDave Cobbley    # At each stage of iteration, the new `overlap` (called
69*eb8dc403SDave Cobbley    # `_overlap` until the original `overlap` is no longer needed)
70*eb8dc403SDave Cobbley    # is built from the old one.
71*eb8dc403SDave Cobbley    #
72*eb8dc403SDave Cobbley    # If the length of overlap exceeds the largest substring
73*eb8dc403SDave Cobbley    # seen so far (`sub_length`), we update the largest substring
74*eb8dc403SDave Cobbley    # to the overlapping strings.
75*eb8dc403SDave Cobbley
76*eb8dc403SDave Cobbley    overlap = dict()
77*eb8dc403SDave Cobbley    # `sub_start_old` is the index of the beginning of the largest overlapping
78*eb8dc403SDave Cobbley    # substring in the old list. `sub_start_new` is the index of the beginning
79*eb8dc403SDave Cobbley    # of the same substring in the new list. `sub_length` is the length that
80*eb8dc403SDave Cobbley    # overlaps in both.
81*eb8dc403SDave Cobbley    # These track the largest overlapping substring seen so far, so naturally
82*eb8dc403SDave Cobbley    # we start with a 0-length substring.
83*eb8dc403SDave Cobbley    sub_start_old = 0
84*eb8dc403SDave Cobbley    sub_start_new = 0
85*eb8dc403SDave Cobbley    sub_length = 0
86*eb8dc403SDave Cobbley
87*eb8dc403SDave Cobbley    for inew, val in enumerate(new):
88*eb8dc403SDave Cobbley        _overlap = dict()
89*eb8dc403SDave Cobbley        for iold in old_index_map.get(val,list()):
90*eb8dc403SDave Cobbley            # now we are considering all values of iold such that
91*eb8dc403SDave Cobbley            # `old[iold] == new[inew]`.
92*eb8dc403SDave Cobbley            _overlap[iold] = (iold and overlap.get(iold - 1, 0)) + 1
93*eb8dc403SDave Cobbley            if(_overlap[iold] > sub_length):
94*eb8dc403SDave Cobbley                # this is the largest substring seen so far, so store its
95*eb8dc403SDave Cobbley                # indices
96*eb8dc403SDave Cobbley                sub_length = _overlap[iold]
97*eb8dc403SDave Cobbley                sub_start_old = iold - sub_length + 1
98*eb8dc403SDave Cobbley                sub_start_new = inew - sub_length + 1
99*eb8dc403SDave Cobbley        overlap = _overlap
100*eb8dc403SDave Cobbley
101*eb8dc403SDave Cobbley    if sub_length == 0:
102*eb8dc403SDave Cobbley        # If no common substring is found, we return an insert and delete...
103*eb8dc403SDave Cobbley        return (old and [('-', old)] or []) + (new and [('+', new)] or [])
104*eb8dc403SDave Cobbley    else:
105*eb8dc403SDave Cobbley        # ...otherwise, the common substring is unchanged and we recursively
106*eb8dc403SDave Cobbley        # diff the text before and after that substring
107*eb8dc403SDave Cobbley        return diff(old[ : sub_start_old], new[ : sub_start_new]) + \
108*eb8dc403SDave Cobbley               [('=', new[sub_start_new : sub_start_new + sub_length])] + \
109*eb8dc403SDave Cobbley               diff(old[sub_start_old + sub_length : ],
110*eb8dc403SDave Cobbley                       new[sub_start_new + sub_length : ])
111*eb8dc403SDave Cobbley
112*eb8dc403SDave Cobbley
113*eb8dc403SDave Cobbleydef string_diff(old, new):
114*eb8dc403SDave Cobbley    '''
115*eb8dc403SDave Cobbley    Returns the difference between the old and new strings when split on
116*eb8dc403SDave Cobbley    whitespace. Considers punctuation a part of the word
117*eb8dc403SDave Cobbley
118*eb8dc403SDave Cobbley    This function is intended as an example; you'll probably want
119*eb8dc403SDave Cobbley    a more sophisticated wrapper in practice.
120*eb8dc403SDave Cobbley
121*eb8dc403SDave Cobbley    Params:
122*eb8dc403SDave Cobbley        old     the old string
123*eb8dc403SDave Cobbley        new     the new string
124*eb8dc403SDave Cobbley
125*eb8dc403SDave Cobbley    Returns:
126*eb8dc403SDave Cobbley        the output of `diff` on the two strings after splitting them
127*eb8dc403SDave Cobbley        on whitespace (a list of change instructions; see the docstring
128*eb8dc403SDave Cobbley        of `diff`)
129*eb8dc403SDave Cobbley
130*eb8dc403SDave Cobbley    Examples:
131*eb8dc403SDave Cobbley        >>> string_diff('The quick brown fox', 'The fast blue fox')
132*eb8dc403SDave Cobbley        ... # doctest: +NORMALIZE_WHITESPACE
133*eb8dc403SDave Cobbley        [('=', ['The']),
134*eb8dc403SDave Cobbley         ('-', ['quick', 'brown']),
135*eb8dc403SDave Cobbley         ('+', ['fast', 'blue']),
136*eb8dc403SDave Cobbley         ('=', ['fox'])]
137*eb8dc403SDave Cobbley
138*eb8dc403SDave Cobbley    '''
139*eb8dc403SDave Cobbley    return diff(old.split(), new.split())
140*eb8dc403SDave Cobbley
141*eb8dc403SDave Cobbley
142*eb8dc403SDave Cobbleydef html_diff(old, new):
143*eb8dc403SDave Cobbley    '''
144*eb8dc403SDave Cobbley    Returns the difference between two strings (as in stringDiff) in
145*eb8dc403SDave Cobbley    HTML format. HTML code in the strings is NOT escaped, so you
146*eb8dc403SDave Cobbley    will get weird results if the strings contain HTML.
147*eb8dc403SDave Cobbley
148*eb8dc403SDave Cobbley    This function is intended as an example; you'll probably want
149*eb8dc403SDave Cobbley    a more sophisticated wrapper in practice.
150*eb8dc403SDave Cobbley
151*eb8dc403SDave Cobbley    Params:
152*eb8dc403SDave Cobbley        old     the old string
153*eb8dc403SDave Cobbley        new     the new string
154*eb8dc403SDave Cobbley
155*eb8dc403SDave Cobbley    Returns:
156*eb8dc403SDave Cobbley        the output of the diff expressed with HTML <ins> and <del>
157*eb8dc403SDave Cobbley        tags.
158*eb8dc403SDave Cobbley
159*eb8dc403SDave Cobbley    Examples:
160*eb8dc403SDave Cobbley        >>> html_diff('The quick brown fox', 'The fast blue fox')
161*eb8dc403SDave Cobbley        'The <del>quick brown</del> <ins>fast blue</ins> fox'
162*eb8dc403SDave Cobbley    '''
163*eb8dc403SDave Cobbley    con = {'=': (lambda x: x),
164*eb8dc403SDave Cobbley           '+': (lambda x: "<ins>" + x + "</ins>"),
165*eb8dc403SDave Cobbley           '-': (lambda x: "<del>" + x + "</del>")}
166*eb8dc403SDave Cobbley    return " ".join([(con[a])(" ".join(b)) for a, b in string_diff(old, new)])
167*eb8dc403SDave Cobbley
168*eb8dc403SDave Cobbley
169*eb8dc403SDave Cobbleydef check_diff(old, new):
170*eb8dc403SDave Cobbley    '''
171*eb8dc403SDave Cobbley    This tests that diffs returned by `diff` are valid. You probably won't
172*eb8dc403SDave Cobbley    want to use this function, but it's provided for documentation and
173*eb8dc403SDave Cobbley    testing.
174*eb8dc403SDave Cobbley
175*eb8dc403SDave Cobbley    A diff should satisfy the property that the old input is equal to the
176*eb8dc403SDave Cobbley    elements of the result annotated with '-' or '=' concatenated together.
177*eb8dc403SDave Cobbley    Likewise, the new input is equal to the elements of the result annotated
178*eb8dc403SDave Cobbley    with '+' or '=' concatenated together. This function compares `old`,
179*eb8dc403SDave Cobbley    `new`, and the results of `diff(old, new)` to ensure this is true.
180*eb8dc403SDave Cobbley
181*eb8dc403SDave Cobbley    Tests:
182*eb8dc403SDave Cobbley        >>> check_diff('ABCBA', 'CBABA')
183*eb8dc403SDave Cobbley        >>> check_diff('Foobarbaz', 'Foobarbaz')
184*eb8dc403SDave Cobbley        >>> check_diff('Foobarbaz', 'Boobazbam')
185*eb8dc403SDave Cobbley        >>> check_diff('The quick brown fox', 'Some quick brown car')
186*eb8dc403SDave Cobbley        >>> check_diff('A thick red book', 'A quick blue book')
187*eb8dc403SDave Cobbley        >>> check_diff('dafhjkdashfkhasfjsdafdasfsda', 'asdfaskjfhksahkfjsdha')
188*eb8dc403SDave Cobbley        >>> check_diff('88288822828828288282828', '88288882882828282882828')
189*eb8dc403SDave Cobbley        >>> check_diff('1234567890', '24689')
190*eb8dc403SDave Cobbley    '''
191*eb8dc403SDave Cobbley    old = list(old)
192*eb8dc403SDave Cobbley    new = list(new)
193*eb8dc403SDave Cobbley    result = diff(old, new)
194*eb8dc403SDave Cobbley    _old = [val for (a, vals) in result if (a in '=-') for val in vals]
195*eb8dc403SDave Cobbley    assert old == _old, 'Expected %s, got %s' % (old, _old)
196*eb8dc403SDave Cobbley    _new = [val for (a, vals) in result if (a in '=+') for val in vals]
197*eb8dc403SDave Cobbley    assert new == _new, 'Expected %s, got %s' % (new, _new)
198*eb8dc403SDave Cobbley
199