2009-04-05 18:02:44 -04:00
|
|
|
module RailsGuides
|
|
|
|
module Levenshtein
|
2016-09-12 15:46:44 -04:00
|
|
|
# This code is based directly on the Text gem implementation.
|
|
|
|
# Copyright (c) 2006-2013 Paul Battley, Michael Neumann, Tim Fletcher.
|
|
|
|
#
|
2014-06-28 03:19:46 -04:00
|
|
|
# Returns a value representing the "cost" of transforming str1 into str2
|
2016-08-06 14:20:22 -04:00
|
|
|
def self.distance(str1, str2)
|
2014-06-28 03:19:46 -04:00
|
|
|
s = str1
|
|
|
|
t = str2
|
|
|
|
n = s.length
|
|
|
|
m = t.length
|
2009-03-21 19:40:35 -04:00
|
|
|
|
2014-06-28 03:19:46 -04:00
|
|
|
return m if (0 == n)
|
|
|
|
return n if (0 == m)
|
2009-03-21 19:40:35 -04:00
|
|
|
|
2014-06-28 03:19:46 -04:00
|
|
|
d = (0..m).to_a
|
|
|
|
x = nil
|
|
|
|
|
2015-04-11 18:16:10 -04:00
|
|
|
# avoid duplicating an enumerable object in the loop
|
|
|
|
str2_codepoint_enumerable = str2.each_codepoint
|
|
|
|
|
|
|
|
str1.each_codepoint.with_index do |char1, i|
|
2016-10-28 23:05:58 -04:00
|
|
|
e = i + 1
|
2014-06-28 03:19:46 -04:00
|
|
|
|
2015-04-11 18:16:10 -04:00
|
|
|
str2_codepoint_enumerable.with_index do |char2, j|
|
2014-06-28 03:19:46 -04:00
|
|
|
cost = (char1 == char2) ? 0 : 1
|
|
|
|
x = [
|
2016-10-28 23:05:58 -04:00
|
|
|
d[j + 1] + 1, # insertion
|
2014-06-28 03:19:46 -04:00
|
|
|
e + 1, # deletion
|
|
|
|
d[j] + cost # substitution
|
|
|
|
].min
|
|
|
|
d[j] = e
|
|
|
|
e = x
|
2009-04-05 18:02:44 -04:00
|
|
|
end
|
2014-06-28 03:19:46 -04:00
|
|
|
|
|
|
|
d[m] = x
|
2009-03-21 19:40:35 -04:00
|
|
|
end
|
2009-03-15 15:03:37 -04:00
|
|
|
|
2014-06-28 03:19:46 -04:00
|
|
|
return x
|
2009-04-05 18:02:44 -04:00
|
|
|
end
|
2009-03-21 19:40:35 -04:00
|
|
|
end
|
2009-03-15 15:03:37 -04:00
|
|
|
end
|