From bfc73852b1f03d8dee405cdeb0f2883d89c78b2d Mon Sep 17 00:00:00 2001
From: Manfred Stienstra <manfred@fngtps.com>
Date: Sun, 21 Sep 2008 17:28:05 +0200
Subject: [PATCH] Improve documentation.

---
 .../core_ext/string/multibyte.rb              |  23 ++--
 activesupport/lib/active_support/multibyte.rb |   4 +-
 .../lib/active_support/multibyte/chars.rb     | 113 ++++++++++--------
 .../active_support/multibyte/exceptions.rb    |   1 +
 4 files changed, 74 insertions(+), 67 deletions(-)

diff --git a/activesupport/lib/active_support/core_ext/string/multibyte.rb b/activesupport/lib/active_support/core_ext/string/multibyte.rb
index 5a2dc36f72..3bf79bc7e1 100644
--- a/activesupport/lib/active_support/core_ext/string/multibyte.rb
+++ b/activesupport/lib/active_support/core_ext/string/multibyte.rb
@@ -6,7 +6,9 @@ module ActiveSupport #:nodoc:
       # Implements multibyte methods for easier access to multibyte characters in a String instance.
       module Multibyte
         unless '1.9'.respond_to?(:force_encoding)
-          # +mb_chars+ is a multibyte safe proxy method for string methods.
+          # == Multibyte proxy
+          #
+          # +mb_chars+ is a multibyte safe proxy for string methods.
           #
           # In Ruby 1.8 and older it creates and returns an instance of the ActiveSupport::Multibyte::Chars class which
           # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy
@@ -19,11 +21,10 @@ module ActiveSupport #:nodoc:
           #   name.mb_chars.reverse.to_s   #=> "rellüM sualC"
           #   name.mb_chars.length         #=> 12
           #
-          # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware so we don't need
-          # a proxy class any more. This means that +mb_chars+ makes it easier to write code that runs on multiple Ruby
-          # versions.
+          # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware. This means that
+          # it becomes easy to run one version of your code on multiple Ruby versions.
           #
-          # == Method chaining 
+          # == Method chaining
           #
           # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows
           # method chaining on the result of any of these methods.
@@ -32,12 +33,12 @@ module ActiveSupport #:nodoc:
           #
           # == Interoperability and configuration
           #
-          # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
+          # The Chars object tries to be as interchangeable with String objects as possible: sorting and comparing between
           # String and Char work like expected. The bang! methods change the internal string representation in the Chars
           # object. Interoperability problems can be resolved easily with a +to_s+ call.
           #
           # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars. For
-          # information about how to change the default Multibyte behaviour, see ActiveSupport::Multibyte.
+          # information about how to change the default Multibyte behaviour see ActiveSupport::Multibyte.
           def mb_chars
             if ActiveSupport::Multibyte.proxy_class.wants?(self)
               ActiveSupport::Multibyte.proxy_class.new(self)
@@ -56,15 +57,11 @@ module ActiveSupport #:nodoc:
             alias chars mb_chars
           end
         else
-          # In Ruby 1.9 and newer +mb_chars+ returns self. In Ruby 1.8 and older +mb_chars+ creates and returns an
-          # Unicode safe proxy for string operations, this makes it easier to write code that runs on multiple Ruby
-          # versions.
-          def mb_chars
+          def mb_chars #:nodoc
             self
           end
           
-          # Returns true if the string has valid UTF-8 encoding.
-          def is_utf8?
+          def is_utf8? #:nodoc
             case encoding
             when Encoding::UTF_8
               valid_encoding?
diff --git a/activesupport/lib/active_support/multibyte.rb b/activesupport/lib/active_support/multibyte.rb
index 63c0d50166..018aafe607 100644
--- a/activesupport/lib/active_support/multibyte.rb
+++ b/activesupport/lib/active_support/multibyte.rb
@@ -5,7 +5,7 @@ require 'active_support/multibyte/exceptions'
 require 'active_support/multibyte/unicode_database'
 
 module ActiveSupport #:nodoc:
-  module Multibyte #:nodoc:
+  module Multibyte
     # A list of all available normalization forms. See http://www.unicode.org/reports/tr15/tr15-29.html for more
     # information about normalization.
     NORMALIZATIONS_FORMS = [:c, :kc, :d, :kd]
@@ -30,4 +30,4 @@ module ActiveSupport #:nodoc:
     mattr_accessor :proxy_class
     self.proxy_class = ActiveSupport::Multibyte::Chars
   end
-end
\ No newline at end of file
+end
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index 27cc3c65a2..c61367968e 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -2,7 +2,7 @@
 
 module ActiveSupport #:nodoc:
   module Multibyte #:nodoc:
-    # Chars enables you to work transparently with multibyte encodings in the Ruby String class without having extensive
+    # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
     # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
     # encoding safe manner. All the normal String methods are also implemented on the proxy.
     #
@@ -88,14 +88,14 @@ module ActiveSupport #:nodoc:
       alias to_s wrapped_string
       alias to_str wrapped_string
 
-      # Creates a new Chars instance. +string+ is the wrapped string.
       if '1.9'.respond_to?(:force_encoding)
+        # Creates a new Chars instance by wrapping _string_.
         def initialize(string)
           @wrapped_string = string
           @wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
         end
       else
-        def initialize(string)
+        def initialize(string) #:nodoc:
           @wrapped_string = string
         end
       end
@@ -121,10 +121,10 @@ module ActiveSupport #:nodoc:
         true
       end
 
-      # Returns +true+ if the Chars class can and should act as a proxy for the string +string+. Returns
+      # Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
       # +false+ otherwise.
       def self.wants?(string)
-        RUBY_VERSION < '1.9' && $KCODE == 'UTF8' && consumes?(string)
+        $KCODE == 'UTF8' && consumes?(string)
       end
 
       # Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
@@ -138,9 +138,9 @@ module ActiveSupport #:nodoc:
 
       include Comparable
 
-      # Returns -1, 0 or +1 depending on whether the Chars object is to be sorted before, equal or after the
-      # object on the right side of the operation. It accepts any object that implements +to_s+. See String.<=>
-      # for more details.
+      # Returns <tt>-1</tt>, <tt>0</tt> or <tt>+1</tt> depending on whether the Chars object is to be sorted before,
+      # equal or after the object on the right side of the operation. It accepts any object that implements +to_s+.
+      # See <tt>String#<=></tt> for more details.
       #
       # Example:
       #   'é'.mb_chars <=> 'ü'.mb_chars #=> -1
@@ -148,7 +148,7 @@ module ActiveSupport #:nodoc:
         @wrapped_string <=> other.to_s
       end
 
-      # Returns a new Chars object containing the other object concatenated to the string.
+      # Returns a new Chars object containing the _other_ object concatenated to the string.
       #
       # Example:
       #   ('Café'.mb_chars + ' périferôl').to_s #=> "Café périferôl"
@@ -156,7 +156,7 @@ module ActiveSupport #:nodoc:
         self << other
       end
 
-      # Like String.=~ only it returns the character offset (in codepoints) instead of the byte offset.
+      # Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
       #
       # Example:
       #   'Café périferôl'.mb_chars =~ /ô/ #=> 12
@@ -164,7 +164,7 @@ module ActiveSupport #:nodoc:
         translate_offset(@wrapped_string =~ other)
       end
 
-      # Works just like String#split, with the exception that the items in the resulting list are Chars
+      # Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
       # instances instead of String. This makes chaining methods easier.
       #
       # Example:
@@ -173,7 +173,7 @@ module ActiveSupport #:nodoc:
         @wrapped_string.split(*args).map { |i| i.mb_chars }
       end
 
-      # Inserts the passed string at specified codepoint offsets
+      # Inserts the passed string at specified codepoint offsets.
       #
       # Example:
       #   'Café'.mb_chars.insert(4, ' périferôl').to_s #=> "Café périferôl"
@@ -189,7 +189,7 @@ module ActiveSupport #:nodoc:
         self
       end
 
-      # Returns true if contained string contains +other+. Returns false otherwise.
+      # Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
       #
       # Example:
       #   'Café'.mb_chars.include?('é') #=> true
@@ -198,17 +198,17 @@ module ActiveSupport #:nodoc:
         @wrapped_string.include?(other)
       end
 
-      # Returns the position of the passed argument in the string, counting in codepoints
+      # Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
       #
       # Example:
       #   'Café périferôl'.mb_chars.index('ô') #=> 12
-      def index(*args)
-        index = @wrapped_string.index(*args)
+      #   'Café périferôl'.mb_chars.index(/\w/u) #=> 0
+      def index(needle, offset=0)
+        index = @wrapped_string.index(needle, offset)
         index ? (self.class.u_unpack(@wrapped_string.slice(0...index)).size) : nil
       end
 
-      # Works just like the indexed replace method on string, except instead of byte offsets you specify
-      # character offsets.
+      # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
       #
       # Example:
       #
@@ -248,7 +248,7 @@ module ActiveSupport #:nodoc:
         end
       end
 
-      # Works just like String#rjust, only integer specifies characters instead of bytes.
+      # Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
       #
       # Example:
       #
@@ -261,7 +261,7 @@ module ActiveSupport #:nodoc:
         justify(integer, :right, padstr)
       end
 
-      # Works just like String#ljust, only integer specifies characters instead of bytes.
+      # Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
       #
       # Example:
       #
@@ -274,7 +274,7 @@ module ActiveSupport #:nodoc:
         justify(integer, :left, padstr)
       end
 
-      # Works just like String#center, only integer specifies characters instead of bytes.
+      # Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
       #
       # Example:
       #
@@ -308,7 +308,7 @@ module ActiveSupport #:nodoc:
       end
       alias_method :length, :size
       
-      # Reverses all characters in the string
+      # Reverses all characters in the string.
       #
       # Example:
       #   'Café'.mb_chars.reverse.to_s #=> 'éfaC'
@@ -343,7 +343,7 @@ module ActiveSupport #:nodoc:
       end
       alias_method :[], :slice
 
-      # Convert characters in the string to uppercase
+      # Convert characters in the string to uppercase.
       #
       # Example:
       #   'Laurent, òu sont les tests?'.mb_chars.upcase.to_s #=> "LAURENT, ÒU SONT LES TESTS?"
@@ -351,7 +351,7 @@ module ActiveSupport #:nodoc:
         apply_mapping :uppercase_mapping
       end
 
-      # Convert characters in the string to lowercase
+      # Convert characters in the string to lowercase.
       #
       # Example:
       #   'VĚDA A VÝZKUM'.mb_chars.downcase.to_s #=> "věda a výzkum"
@@ -359,7 +359,7 @@ module ActiveSupport #:nodoc:
         apply_mapping :lowercase_mapping
       end
 
-      # Converts the first character to uppercase and the remainder to lowercase
+      # Converts the first character to uppercase and the remainder to lowercase.
       #
       # Example:
       #  'über'.mb_chars.capitalize.to_s #=> "Über"
@@ -418,6 +418,7 @@ module ActiveSupport #:nodoc:
         self.class.g_unpack(@wrapped_string).length
       end
 
+      # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
       def tidy_bytes
         chars(self.class.tidy_bytes(@wrapped_string))
       end
@@ -435,24 +436,35 @@ module ActiveSupport #:nodoc:
 
       class << self
 
-        # Unpack the string at codepoints boundaries
-        def u_unpack(str)
+        # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
+        # valid UTF-8.
+        #
+        # Example:
+        #   Chars.u_unpack('Café') #=> [67, 97, 102, 233]
+        def u_unpack(string)
           begin
-            str.unpack 'U*'
+            string.unpack 'U*'
           rescue ArgumentError
             raise EncodingError.new('malformed UTF-8 character')
           end
         end
 
-        # Detect whether the codepoint is in a certain character class. Primarily used by the
-        # grapheme cluster support.
+        # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
+        # character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
+        # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
+        #
+        # Primarily used by the grapheme cluster support.
         def in_char_class?(codepoint, classes)
           classes.detect { |c| UCD.boundary[c] === codepoint } ? true : false
         end
 
-        # Unpack the string at grapheme boundaries
-        def g_unpack(str)
-          codepoints = u_unpack(str)
+        # Unpack the string at grapheme boundaries. Returns a list of character lists.
+        #
+        # Example:
+        #   Chars.g_unpack('क्षि') #=> [[2325, 2381], [2359], [2367]]
+        #   Chars.g_unpack('Café') #=> [[67], [97], [102], [233]]
+        def g_unpack(string)
+          codepoints = u_unpack(string)
           unpacked = []
           pos = 0
           marker = 0
@@ -481,13 +493,15 @@ module ActiveSupport #:nodoc:
           unpacked
         end
 
-        # Reverse operation of g_unpack
+        # Reverse operation of g_unpack.
+        #
+        # Example:
+        #   Chars.g_pack(Chars.g_unpack('क्षि')) #=> 'क्षि'
         def g_pack(unpacked)
           (unpacked.flatten).pack('U*')
         end
 
-        # Generates a padding string of a certain size.
-        def padding(padsize, padstr=' ')
+        def padding(padsize, padstr=' ') #:nodoc:
           if padsize != 0
             new(padstr * ((padsize / u_unpack(padstr).size) + 1)).slice(0, padsize)
           else
@@ -495,7 +509,7 @@ module ActiveSupport #:nodoc:
           end
         end
 
-        # Re-order codepoints so the string becomes canonical
+        # Re-order codepoints so the string becomes canonical.
         def reorder_characters(codepoints)
           length = codepoints.length- 1
           pos = 0
@@ -511,7 +525,7 @@ module ActiveSupport #:nodoc:
           codepoints
         end
 
-        # Decompose composed characters to the decomposed form
+        # Decompose composed characters to the decomposed form.
         def decompose_codepoints(type, codepoints)
           codepoints.inject([]) do |decomposed, cp|
             # if it's a hangul syllable starter character
@@ -532,7 +546,7 @@ module ActiveSupport #:nodoc:
           end
         end
 
-        # Compose decomposed characters to the composed form
+        # Compose decomposed characters to the composed form.
         def compose_codepoints(codepoints)
           pos = 0
           eoa = codepoints.length - 1
@@ -591,9 +605,9 @@ module ActiveSupport #:nodoc:
           codepoints
         end
 
-        # Replaces all the non-UTF-8 bytes by their iso-8859-1 or cp1252 equivalent resulting in a valid UTF-8 string
-        def tidy_bytes(str)
-          str.split(//u).map do |c|
+        # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
+        def tidy_bytes(string)
+          string.split(//u).map do |c|
             if !UTF8_PAT.match(c)
               n = c.unpack('C')[0]
               n < 128 ? n.chr :
@@ -608,8 +622,7 @@ module ActiveSupport #:nodoc:
 
       protected
 
-        # Translate a byte offset in the wrapped string to a character offset by looking for the character boundary
-        def translate_offset(byte_offset)
+        def translate_offset(byte_offset) #:nodoc:
           return nil if byte_offset.nil?
           return 0   if @wrapped_string == ''
           chunk = @wrapped_string[0..byte_offset]
@@ -629,9 +642,7 @@ module ActiveSupport #:nodoc:
           end
         end
 
-        # Justifies a string in a certain way. Valid values for <tt>way</tt> are <tt>:right</tt>, <tt>:left</tt> and
-        # <tt>:center</tt>.
-        def justify(integer, way, padstr=' ')
+        def justify(integer, way, padstr=' ') #:nodoc:
           raise ArgumentError, "zero width padding" if padstr.length == 0
           padsize = integer - size
           padsize = padsize > 0 ? padsize : 0
@@ -648,8 +659,7 @@ module ActiveSupport #:nodoc:
           chars(result)
         end
 
-        # Map codepoints to one of it's attributes.
-        def apply_mapping(mapping)
+        def apply_mapping(mapping) #:nodoc:
           chars(self.class.u_unpack(@wrapped_string).map do |codepoint|
             cp = UCD.codepoints[codepoint]
             if cp and (ncp = cp.send(mapping)) and ncp > 0
@@ -660,9 +670,8 @@ module ActiveSupport #:nodoc:
           end.pack('U*'))
         end
 
-        # Creates a new instance
-        def chars(str)
-          self.class.new(str)
+        def chars(string) #:nodoc:
+          self.class.new(string)
         end
     end
   end
diff --git a/activesupport/lib/active_support/multibyte/exceptions.rb b/activesupport/lib/active_support/multibyte/exceptions.rb
index af760cc561..62066e3c71 100644
--- a/activesupport/lib/active_support/multibyte/exceptions.rb
+++ b/activesupport/lib/active_support/multibyte/exceptions.rb
@@ -2,6 +2,7 @@
 
 module ActiveSupport #:nodoc:
   module Multibyte #:nodoc:
+    # Raised when a problem with the encoding was found.
     class EncodingError < StandardError; end
   end
 end
\ No newline at end of file