2003-10-14 16:14:20 -04:00
|
|
|
#!/usr/bin/env ruby
|
2011-05-16 14:51:56 -04:00
|
|
|
#--
|
2003-10-14 16:14:20 -04:00
|
|
|
# Copyright (c) 2001,2003 Akinori MUSHA <knu@iDaemons.org>
|
|
|
|
#
|
|
|
|
# All rights reserved. You can redistribute and/or modify it under
|
|
|
|
# the same terms as Ruby.
|
|
|
|
#
|
|
|
|
# $Idaemons: /home/cvs/rb/abbrev.rb,v 1.2 2001/05/30 09:37:45 knu Exp $
|
|
|
|
# $RoughId: abbrev.rb,v 1.4 2003/10/14 19:45:42 knu Exp $
|
|
|
|
# $Id$
|
2011-05-16 14:51:56 -04:00
|
|
|
#++
|
2004-01-20 00:27:23 -05:00
|
|
|
|
2012-04-10 16:15:05 -04:00
|
|
|
##
|
|
|
|
# Calculates the set of unique abbreviations for a given set of strings.
|
2004-01-20 00:27:23 -05:00
|
|
|
#
|
|
|
|
# require 'abbrev'
|
|
|
|
# require 'pp'
|
|
|
|
#
|
2012-04-10 16:15:05 -04:00
|
|
|
# pp Abbrev.abbrev(['ruby', 'rules'])
|
2004-01-20 00:27:23 -05:00
|
|
|
#
|
2012-04-10 16:15:05 -04:00
|
|
|
# Generates:
|
2004-01-20 00:27:23 -05:00
|
|
|
#
|
2012-04-10 16:15:05 -04:00
|
|
|
# { "rub" => "ruby",
|
|
|
|
# "ruby" => "ruby",
|
|
|
|
# "rul" => "rules",
|
|
|
|
# "rule" => "rules",
|
|
|
|
# "rules" => "rules" }
|
2004-01-20 00:27:23 -05:00
|
|
|
#
|
2012-10-06 16:03:26 -04:00
|
|
|
# It also provides an array core extension, Array#abbrev.
|
|
|
|
#
|
|
|
|
# pp %w{april may}.abbrev
|
|
|
|
# #=> {"summe"=>"summer",
|
|
|
|
# "summ"=>"summer",
|
|
|
|
# "sum"=>"summer",
|
|
|
|
# "su"=>"summer",
|
|
|
|
# "s"=>"summer",
|
|
|
|
# "winte"=>"winter",
|
|
|
|
# "wint"=>"winter",
|
|
|
|
# "win"=>"winter",
|
|
|
|
# "wi"=>"winter",
|
|
|
|
# "w"=>"winter",
|
|
|
|
# "summer"=>"summer",
|
|
|
|
# "winter"=>"winter"}
|
2003-10-14 16:14:20 -04:00
|
|
|
|
|
|
|
module Abbrev
|
2004-01-20 00:27:23 -05:00
|
|
|
|
|
|
|
# Given a set of strings, calculate the set of unambiguous
|
|
|
|
# abbreviations for those strings, and return a hash where the keys
|
|
|
|
# are all the possible abbreviations and the values are the full
|
2012-10-06 16:03:26 -04:00
|
|
|
# strings.
|
|
|
|
#
|
|
|
|
# Thus, given input of "car" and "cone", the keys pointing to "car" would be
|
|
|
|
# "ca" and "car", while those pointing to "cone" would be "co", "con", and
|
|
|
|
# "cone".
|
|
|
|
#
|
|
|
|
# require 'abbrev'
|
|
|
|
# require 'pp'
|
|
|
|
#
|
|
|
|
# pp Abbrev.abbrev(['car', 'cone'])
|
|
|
|
# #=> {"ca"=>"car", "con"=>"cone", "co"=>"cone", "car"=>"car", "cone"=>"cone"}
|
2004-01-20 00:27:23 -05:00
|
|
|
#
|
|
|
|
# The optional +pattern+ parameter is a pattern or a string. Only
|
2012-05-07 19:55:53 -04:00
|
|
|
# input strings that match the pattern or start with the string
|
|
|
|
# are included in the output hash.
|
2012-10-06 16:03:26 -04:00
|
|
|
#
|
|
|
|
# pp %w{car box cone}.abbrev(/b/)
|
|
|
|
# #=> {"bo"=>"box", "b"=>"box", "box"=>"box"}
|
2003-10-14 16:14:20 -04:00
|
|
|
def abbrev(words, pattern = nil)
|
|
|
|
table = {}
|
|
|
|
seen = Hash.new(0)
|
|
|
|
|
|
|
|
if pattern.is_a?(String)
|
2011-05-18 17:19:18 -04:00
|
|
|
pattern = /^#{Regexp.quote(pattern)}/ # regard as a prefix
|
2003-10-14 16:14:20 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
words.each do |word|
|
|
|
|
next if (abbrev = word).empty?
|
|
|
|
while (len = abbrev.rindex(/[\w\W]\z/)) > 0
|
2011-05-18 17:19:18 -04:00
|
|
|
abbrev = word[0,len]
|
2003-10-14 16:14:20 -04:00
|
|
|
|
2011-05-18 17:19:18 -04:00
|
|
|
next if pattern && pattern !~ abbrev
|
2003-10-14 16:14:20 -04:00
|
|
|
|
2011-05-18 17:19:18 -04:00
|
|
|
case seen[abbrev] += 1
|
|
|
|
when 1
|
|
|
|
table[abbrev] = word
|
|
|
|
when 2
|
|
|
|
table.delete(abbrev)
|
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
2003-10-14 16:14:20 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
words.each do |word|
|
|
|
|
next if pattern && pattern !~ word
|
|
|
|
|
|
|
|
table[word] = word
|
|
|
|
end
|
|
|
|
|
|
|
|
table
|
|
|
|
end
|
|
|
|
|
|
|
|
module_function :abbrev
|
|
|
|
end
|
|
|
|
|
|
|
|
class Array
|
2004-01-20 00:27:23 -05:00
|
|
|
# Calculates the set of unambiguous abbreviations for the strings in
|
2012-05-07 19:55:53 -04:00
|
|
|
# +self+.
|
|
|
|
#
|
2012-10-06 16:03:26 -04:00
|
|
|
# abbr = %w{ car cone }.abbrev
|
|
|
|
# abbr #=> { "ca" => "car", "car" => "car",
|
|
|
|
# "co" => "cone", "con" => "cone",
|
|
|
|
# "cone" => "cone" }
|
|
|
|
#
|
2012-05-07 19:55:53 -04:00
|
|
|
# The optional +pattern+ parameter is a pattern or a string. Only
|
|
|
|
# input strings that match the pattern or start with the string
|
|
|
|
# are included in the output hash.
|
2004-01-20 00:27:23 -05:00
|
|
|
#
|
2012-10-06 16:03:26 -04:00
|
|
|
# abbr = %w{ fast boat day }.abbrev(/^.a.*$/)
|
|
|
|
# abbr #=> {"fas"=>"fast","fa"=>"fast",
|
|
|
|
# "da"=>"day", "fast"=>"fast", "day"=>"day"}
|
2012-05-07 19:55:53 -04:00
|
|
|
#
|
2012-10-06 16:03:26 -04:00
|
|
|
# See also Abbrev.abbrev
|
2003-10-14 16:14:20 -04:00
|
|
|
def abbrev(pattern = nil)
|
|
|
|
Abbrev::abbrev(self, pattern)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if $0 == __FILE__
|
|
|
|
while line = gets
|
|
|
|
hash = line.split.abbrev
|
|
|
|
|
|
|
|
hash.sort.each do |k, v|
|
|
|
|
puts "#{k} => #{v}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|