#! /usr/bin/env python text1 = """Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Integer eu lacus accumsan arcu fermentum euismod. Donec pulvinar porttitor tellus. Aliquam venenatis. Donec facilisis pharetra tortor. In nec mauris eget magna consequat convallis. Nam sed sem vitae odio pellentesque interdum. Sed consequat viverra nisl. Suspendisse arcu metus, blandit quis, rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy molestie orci. Praesent nisi elit, fringilla ac, suscipit non, tristique vel, mauris. Curabitur vel lorem id nisl porta adipiscing. Suspendisse eu lectus. In nunc. Duis vulputate tristique enim. Donec quis lectus a justo imperdiet tempus. Make them the same""" text1_lines = text1.splitlines() text2 = """Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Integer eu lacus accumsan arcu fermentum euismod. Donec pulvinar, porttitor tellus. Aliquam venenatis. Donec facilisis pharetra tortor. In nec mauris eget magna consequat convallis. Nam cras vitae mi vitae odio pellentesque interdum. Sed consequat viverra nisl. Suspendisse arcu metus, blandit quis, rhoncus ac, pharetra eget, velit. Mauris urna. Morbi nonummy molestie orci. Praesent nisi elit, fringilla ac, suscipit non, tristique vel, mauris. Curabitur vel lorem id nisl porta adipiscing. Duis vulputate tristique enim. Donec quis lectus a justo imperdiet tempus. Suspendisse eu lectus. In nunc. Make them the same""" text2_lines = text2.splitlines() from pprint import pprint import difflib import re nonSpaceRE = re.compile (r'\S') plusMinusRE = re.compile (r'[\+,\-]+') startDiff = '_%_START_%_' endDiff = '_%_END_%_' #startDiff = '<' #endDiff = '>' startRE = re.compile (startDiff) endRE = re.compile (endDiff) fixHtmlREs = [ (re.compile (r'&'), '&'), (re.compile (r'<'), '<'), (re.compile (r'>'), '>'), ] class DiffLines (object): def __init__ (self, line, command): self.line = line.rstrip ('\n') self.command = command self._ranges = [] def __str__ (self): return "%s : %s" % (self.command, self.line) def setRanges (self, ranges): self._ranges = sorted (ranges) self._ranges.reverse() def _markDiffs (self): retval = self.line ## We worted the ranges in reverse order so that this ## works. The reason this is important as wehn we add the ## markers for the start and end of a change, it changes the ## length of the line. To make sure this doesn't affect us ## adversely, we go from the end o fthe line to the beginning. for start, end in self._ranges: retval = retval[0:end] + endDiff + retval [end:] retval = retval[0:start] + startDiff + retval [start:] return retval def asHtml (self): retval = self._markDiffs() for tup in fixHtmlREs: retval = tup[0].sub (tup[1], retval) if self.command == '+' or self.command == '-': name = 'firstDiff' if self.command == '-': name = 'secondDiff' retval = startRE.sub ('' % name, retval) retval = endRE.sub ('', retval) return retval @staticmethod def asSingleColumnHtml (diffList, title = ''): firstNum = 0 secondNum = 0 retval = '''\n\n%s\n

%s

\n''' % (title, title)
        for obj in diffList:
            if obj.command == ' ':
                name = 'both'
                firstNum += 1
                secondNum += 1
                firstAs  = '%d' % firstNum
                secondAs = '%d' % secondNum
            elif obj.command == '-':
                name = 'first'
                firstNum += 1
                firstAs  = '%d' % firstNum
                secondAs = ''
            elif obj.command == '+':
                name = 'second'
                secondNum += 1
                firstAs  = ''
                secondAs = '%d' % secondNum
            else:
                raise RuntimeError, "shouldn't be here"
            retval += '%4s %4s ' % (firstAs, secondAs)
            retval += '%s\n' % (name, obj.asHtml())
        retval += '
\n' return retval d = difflib.Differ() diff = d.compare(text1_lines, text2_lines) diffList =[] for line in diff: # is this a blank lineyes if not nonSpaceRE.search (line): continue command = line[0:1] rest = line[2:] if command == '?': # this line is just for pointing out what is differnt last.setRanges ( [(m.start(),m.end()) \ for m in plusMinusRE.finditer (rest)] ) else: # this is a text line last = DiffLines (rest, command) diffList.append (last) print DiffLines.asSingleColumnHtml (diffList)