2019-10-29 10:08:37 -04:00
|
|
|
# frozen-string-literal: true
|
|
|
|
|
|
|
|
require_relative "levenshtein"
|
|
|
|
require_relative "jaro_winkler"
|
|
|
|
|
|
|
|
module DidYouMean
|
|
|
|
class SpellChecker
|
|
|
|
def initialize(dictionary:)
|
|
|
|
@dictionary = dictionary
|
|
|
|
end
|
|
|
|
|
|
|
|
def correct(input)
|
2021-10-22 21:00:23 -04:00
|
|
|
normalized_input = normalize(input)
|
|
|
|
threshold = normalized_input.length > 3 ? 0.834 : 0.77
|
2019-10-29 10:08:37 -04:00
|
|
|
|
2021-10-22 21:00:23 -04:00
|
|
|
words = @dictionary.select { |word| JaroWinkler.distance(normalize(word), normalized_input) >= threshold }
|
|
|
|
words.reject! { |word| input.to_s == word.to_s }
|
|
|
|
words.sort_by! { |word| JaroWinkler.distance(word.to_s, normalized_input) }
|
2019-10-29 10:08:37 -04:00
|
|
|
words.reverse!
|
|
|
|
|
|
|
|
# Correct mistypes
|
2021-10-22 21:00:23 -04:00
|
|
|
threshold = (normalized_input.length * 0.25).ceil
|
|
|
|
corrections = words.select { |c| Levenshtein.distance(normalize(c), normalized_input) <= threshold }
|
2019-10-29 10:08:37 -04:00
|
|
|
|
|
|
|
# Correct misspells
|
|
|
|
if corrections.empty?
|
|
|
|
corrections = words.select do |word|
|
|
|
|
word = normalize(word)
|
2021-10-22 21:00:23 -04:00
|
|
|
length = normalized_input.length < word.length ? normalized_input.length : word.length
|
2019-10-29 10:08:37 -04:00
|
|
|
|
2021-10-22 21:00:23 -04:00
|
|
|
Levenshtein.distance(word, normalized_input) < length
|
2019-10-29 10:08:37 -04:00
|
|
|
end.first(1)
|
|
|
|
end
|
|
|
|
|
|
|
|
corrections
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def normalize(str_or_symbol) #:nodoc:
|
|
|
|
str = str_or_symbol.to_s.downcase
|
|
|
|
str.tr!("@", "")
|
|
|
|
str
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|