2016-02-01 07:43:26 -05:00
|
|
|
# frozen_string_literal: true
|
2009-06-09 17:38:59 -04:00
|
|
|
|
|
|
|
##
|
|
|
|
# A collection of text-wrangling methods
|
|
|
|
|
|
|
|
module Gem::Text
|
|
|
|
|
2017-08-28 04:31:28 -04:00
|
|
|
##
|
|
|
|
# Remove any non-printable characters and make the text suitable for
|
|
|
|
# printing.
|
|
|
|
def clean_text(text)
|
|
|
|
text.gsub(/[\000-\b\v-\f\016-\037\177]/, ".".freeze)
|
|
|
|
end
|
|
|
|
|
|
|
|
def truncate_text(text, description, max_length = 100_000)
|
|
|
|
raise ArgumentError, "max_length must be positive" unless max_length > 0
|
|
|
|
return text if text.size <= max_length
|
|
|
|
"Truncating #{description} to #{max_length.to_s.reverse.gsub(/...(?=.)/,'\&,').reverse} characters:\n" + text[0, max_length]
|
|
|
|
end
|
|
|
|
|
2009-06-09 17:38:59 -04:00
|
|
|
##
|
|
|
|
# Wraps +text+ to +wrap+ characters and optionally indents by +indent+
|
|
|
|
# characters
|
|
|
|
|
|
|
|
def format_text(text, wrap, indent=0)
|
|
|
|
result = []
|
2017-08-28 04:31:28 -04:00
|
|
|
work = clean_text(text)
|
2009-06-09 17:38:59 -04:00
|
|
|
|
|
|
|
while work.length > wrap do
|
2018-11-21 05:20:47 -05:00
|
|
|
if work =~ /^(.{0,#{wrap}})[ \n]/
|
2011-05-31 23:45:05 -04:00
|
|
|
result << $1.rstrip
|
2009-06-09 17:38:59 -04:00
|
|
|
work.slice!(0, $&.length)
|
|
|
|
else
|
|
|
|
result << work.slice!(0, wrap)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
result << work if work.length.nonzero?
|
|
|
|
result.join("\n").gsub(/^/, " " * indent)
|
|
|
|
end
|
|
|
|
|
2018-11-21 05:20:47 -05:00
|
|
|
def min3(a, b, c) # :nodoc:
|
|
|
|
if a < b && a < c
|
2014-09-13 23:30:02 -04:00
|
|
|
a
|
2018-11-21 05:20:47 -05:00
|
|
|
elsif b < c
|
2014-09-13 23:30:02 -04:00
|
|
|
b
|
|
|
|
else
|
|
|
|
c
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2011-01-18 19:08:49 -05:00
|
|
|
# Returns a value representing the "cost" of transforming str1 into str2
|
2021-01-03 20:09:05 -05:00
|
|
|
# Vendored version of DidYouMean::Levenshtein.distance from the ruby/did_you_mean gem @ 1.4.0
|
2022-04-26 21:11:53 -04:00
|
|
|
# https://github.com/ruby/did_you_mean/blob/2ddf39b874808685965dbc47d344cf6c7651807c/lib/did_you_mean/levenshtein.rb#L7-L37
|
2018-11-21 05:20:47 -05:00
|
|
|
def levenshtein_distance(str1, str2)
|
2021-01-03 20:09:05 -05:00
|
|
|
n = str1.length
|
|
|
|
m = str2.length
|
|
|
|
return m if n.zero?
|
|
|
|
return n if m.zero?
|
2011-01-18 19:08:49 -05:00
|
|
|
|
|
|
|
d = (0..m).to_a
|
|
|
|
x = nil
|
|
|
|
|
2021-01-03 20:09:05 -05:00
|
|
|
# to avoid duplicating an enumerable object, create it outside of the loop
|
|
|
|
str2_codepoints = str2.codepoints
|
2011-01-18 19:08:49 -05:00
|
|
|
|
2021-01-03 20:09:05 -05:00
|
|
|
str1.each_codepoint.with_index(1) do |char1, i|
|
|
|
|
j = 0
|
|
|
|
while j < m
|
|
|
|
cost = (char1 == str2_codepoints[j]) ? 0 : 1
|
2014-09-13 23:30:02 -04:00
|
|
|
x = min3(
|
2021-01-03 20:09:05 -05:00
|
|
|
d[j + 1] + 1, # insertion
|
|
|
|
i + 1, # deletion
|
|
|
|
d[j] + cost # substitution
|
|
|
|
)
|
|
|
|
d[j] = i
|
|
|
|
i = x
|
|
|
|
|
|
|
|
j += 1
|
2011-01-18 19:08:49 -05:00
|
|
|
end
|
|
|
|
d[m] = x
|
|
|
|
end
|
|
|
|
|
2021-01-03 20:09:05 -05:00
|
|
|
x
|
2011-01-18 19:08:49 -05:00
|
|
|
end
|
2009-06-09 17:38:59 -04:00
|
|
|
end
|