#!/usr/bin/ruby
#
# html source diff
# (c) Yoann Guillot 2006
#
# shows the difference by word in two files, ignoring whitespaces/htmlentities conversions
#
# usage : ruby diff.rb file1 file2
#
synclen = 6
ctxlen = 10
file1 = ['x']*ctxlen + File.read(ARGV.shift).split(/\s+|(?=<)/)
file2 = ['x']*ctxlen + File.read(ARGV.shift).split(/\s+|(?=<)/)
count1 = count2 = ctxlen
# returns true if the words are the same
def compare(w1, w2)
w1 == w2 or htmlentities(w1) == htmlentities(w2)
end
# prints the string in 80 cols
# with the first column filled with +pfx+
def show(pfx, str)
loop do
if str.length > 79
len = 79 - str[0...79][/\S+$/].to_s.length
len = 79 if len == 0
puts pfx + str[0...len]
str = str[len..-1]
else
puts pfx + str
break
end
end
end
loop do
w1 = file1[count1]
w2 = file2[count2]
break if not w1 and not w2
if compare w1, w2
count1 += 1
count2 += 1
else
diff1 = diff2 = nil
catch(:resynced) {
1000.times { |depth|
(-depth..depth).map { |d|
if d == 0
[depth, depth]
elsif d < 0
[depth, depth+d]
elsif d > 0
[depth-d, depth]
end
}.each { |dc1, dc2|
next if (0...synclen).map { |i| compare file1[count1 + dc1 + i], file2[count2 + dc2 + i] }.include? false
show ' ', file1[count1-ctxlen, ctxlen].join(' ')
if dc1 > 0
show '-', file1[count1, dc1].join(' ')
end
if dc2 > 0
show '+', file2[count2, dc2].join(' ')
end
count1 += dc1
count2 += dc2
show ' ', file1[count1, ctxlen].join(' ')
puts
throw :resynced
}
}
raise 'nomatch..'
}
end
end
BEGIN {
Htmlent = <
EOS
def htmlentities(w)
w.gsub(/&(.*?);/) { |x| Htmlent[$1] or x } if w
end
}