#! /usr/bin/env python from pprint import pprint import difflib import re import optparse nonSpaceRE = re.compile (r'\S') plusMinusRE = re.compile (r'[\+,\-]+') startDiff = '_%_START_%_' endDiff = '_%_END_%_' startRE = re.compile (startDiff) endRE = re.compile (endDiff) fixHtmlREs = [ (re.compile (r'&'), '&'), (re.compile (r'<'), '<'), (re.compile (r'>'), '>'), ] class DiffLines (object): def __init__ (self, line, command): self.line = line.rstrip ('\n') self.command = command self._ranges = [] def __str__ (self): return "%s : %s" % (self.command, self.line) def setRanges (self, ranges): self._ranges = sorted (ranges) self._ranges.reverse() def _markDiffs (self): retval = self.line ## We worted the ranges in reverse order so that this ## works. The reason this is important as wehn we add the ## markers for the start and end of a change, it changes the ## length of the line. To make sure this doesn't affect us ## adversely, we go from the end o fthe line to the beginning. for start, end in self._ranges: retval = retval[0:end] + endDiff + retval [end:] retval = retval[0:start] + startDiff + retval [start:] return retval def asHtml (self): retval = self._markDiffs() for tup in fixHtmlREs: retval = tup[0].sub (tup[1], retval) if self.command == '+' or self.command == '-': name = 'firstDiff' if self.command == '-': name = 'secondDiff' retval = startRE.sub ('' % name, retval) retval = endRE.sub ('', retval) return retval @staticmethod def asSingleColumnHtml (diffList, title = '', css='diff.css'): firstNum = 0 secondNum = 0 retval = '''\n\n%s\n

%s

\n''' % (title, css, title)
        for obj in diffList:
            if obj.command == ' ':
                name = 'both'
                firstNum += 1
                secondNum += 1
                firstAs  = '%d' % firstNum
                secondAs = '%d' % secondNum
            elif obj.command == '-':
                name = 'first'
                firstNum += 1
                firstAs  = '%d' % firstNum
                secondAs = ''
            elif obj.command == '+':
                name = 'second'
                secondNum += 1
                firstAs  = ''
                secondAs = '%d' % secondNum
            else:
                raise RuntimeError, "shouldn't be here"
            retval += '%4s %4s ' % (firstAs, secondAs)
            retval += '%s\n' % (name, obj.asHtml())
        retval += '
\n' return retval if __name__ == "__main__": # Setup options parser parser = optparse.OptionParser("usage: %prog [options] file1 file2") parser.add_option ("--css", dest="css", type="string", default="diff.css", help="location of css file (default '%default')") (options, args) = parser.parse_args() if len (args) != 2: raise RuntimeError, "Must provide exactly two filees" text1_lines = open (args[0]).readlines() text2_lines = open (args[1]).readlines() d = difflib.Differ() diff = d.compare(text1_lines, text2_lines) diffList =[] for line in diff: # is this a blank lineyes if not nonSpaceRE.search (line): continue command = line[0:1] rest = line[2:] if command == '?': # this line is just for pointing out what is differnt last.setRanges ( [(m.start(),m.end()) \ for m in plusMinusRE.finditer (rest)] ) else: # this is a text line last = DiffLines (rest, command) diffList.append (last) print DiffLines.asSingleColumnHtml (diffList, css=options.css)