#! /usr/bin/env python from pprint import pprint import difflib import re import optparse nonSpaceRE = re.compile (r'\S') plusMinusRE = re.compile (r'[\+,\-]+') startDiff = '_%_START_%_' endDiff = '_%_END_%_' startRE = re.compile (startDiff) endRE = re.compile (endDiff) fixHtmlREs = [ (re.compile (r'&'), '&'), (re.compile (r'<'), '<'), (re.compile (r'>'), '>'), ] class DiffLines (object): def __init__ (self, line, command): self.line = line.rstrip ('\n') self.command = command self._ranges = [] def __str__ (self): return "%s : %s" % (self.command, self.line) def setRanges (self, ranges): self._ranges = sorted (ranges) self._ranges.reverse() def _markDiffs (self): retval = self.line ## We worted the ranges in reverse order so that this ## works. The reason this is important as wehn we add the ## markers for the start and end of a change, it changes the ## length of the line. To make sure this doesn't affect us ## adversely, we go from the end o fthe line to the beginning. for start, end in self._ranges: retval = retval[0:end] + endDiff + retval [end:] retval = retval[0:start] + startDiff + retval [start:] return retval def asHtml (self): retval = self._markDiffs() for tup in fixHtmlREs: retval = tup[0].sub (tup[1], retval) if self.command == '+' or self.command == '-': name = 'firstDiff' if self.command == '-': name = 'secondDiff' retval = startRE.sub ('' % name, retval) retval = endRE.sub ('', retval) return retval @staticmethod def asSingleColumnHtml (diffList, title = '', css='diff.css'): firstNum = 0 secondNum = 0 retval = '''\n
\n\n''' % (title, css, title) for obj in diffList: if obj.command == ' ': name = 'both' firstNum += 1 secondNum += 1 firstAs = '%d' % firstNum secondAs = '%d' % secondNum elif obj.command == '-': name = 'first' firstNum += 1 firstAs = '%d' % firstNum secondAs = '' elif obj.command == '+': name = 'second' secondNum += 1 firstAs = '' secondAs = '%d' % secondNum else: raise RuntimeError, "shouldn't be here" retval += '%4s %4s ' % (firstAs, secondAs) retval += '%s\n' % (name, obj.asHtml()) retval += '\n' return retval if __name__ == "__main__": # Setup options parser parser = optparse.OptionParser("usage: %prog [options] file1 file2") parser.add_option ("--css", dest="css", type="string", default="diff.css", help="location of css file (default '%default')") (options, args) = parser.parse_args() if len (args) != 2: raise RuntimeError, "Must provide exactly two filees" text1_lines = open (args[0]).readlines() text2_lines = open (args[1]).readlines() d = difflib.Differ() diff = d.compare(text1_lines, text2_lines) diffList =[] for line in diff: # is this a blank lineyes if not nonSpaceRE.search (line): continue command = line[0:1] rest = line[2:] if command == '?': # this line is just for pointing out what is differnt last.setRanges ( [(m.start(),m.end()) \ for m in plusMinusRE.finditer (rest)] ) else: # this is a text line last = DiffLines (rest, command) diffList.append (last) print DiffLines.asSingleColumnHtml (diffList, css=options.css)