#!/usr/bin/ruby # # html source diff # (c) Yoann Guillot 2006 # # shows the difference by word in two files, ignoring whitespaces/htmlentities conversions # # usage : ruby diff.rb file1 file2 # synclen = 6 ctxlen = 10 file1 = ['x']*ctxlen + File.read(ARGV.shift).split(/\s+|(?=<)/) file2 = ['x']*ctxlen + File.read(ARGV.shift).split(/\s+|(?=<)/) count1 = count2 = ctxlen # returns true if the words are the same def compare(w1, w2) w1 == w2 or htmlentities(w1) == htmlentities(w2) end # prints the string in 80 cols # with the first column filled with +pfx+ def show(pfx, str) loop do if str.length > 79 len = 79 - str[0...79][/\S+$/].to_s.length len = 79 if len == 0 puts pfx + str[0...len] str = str[len..-1] else puts pfx + str break end end end loop do w1 = file1[count1] w2 = file2[count2] break if not w1 and not w2 if compare w1, w2 count1 += 1 count2 += 1 else diff1 = diff2 = nil catch(:resynced) { 1000.times { |depth| (-depth..depth).map { |d| if d == 0 [depth, depth] elsif d < 0 [depth, depth+d] elsif d > 0 [depth-d, depth] end }.each { |dc1, dc2| next if (0...synclen).map { |i| compare file1[count1 + dc1 + i], file2[count2 + dc2 + i] }.include? false show ' ', file1[count1-ctxlen, ctxlen].join(' ') if dc1 > 0 show '-', file1[count1, dc1].join(' ') end if dc2 > 0 show '+', file2[count2, dc2].join(' ') end count1 += dc1 count2 += dc2 show ' ', file1[count1, ctxlen].join(' ') puts throw :resynced } } raise 'nomatch..' } end end BEGIN { Htmlent = < EOS def htmlentities(w) w.gsub(/&(.*?);/) { |x| Htmlent[$1] or x } if w end }