Multibyte: String#chars returns self for Ruby 1.9

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@8460 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
2022-11-09 12:12:34 -05:00 · 2007-12-21 11:21:43 +00:00 · 2007-12-21 11:21:43 +00:00 · c95002c284
commit c95002c284
parent 909b2c1acf
5 changed files with 76 additions and 44 deletions
--- a/activesupport/lib/active_support/core_ext/string/unicode.rb
+++ b/activesupport/lib/active_support/core_ext/string/unicode.rb
@ -1,40 +1,59 @@
 module ActiveSupport #:nodoc:
  module CoreExtensions #:nodoc:
    module String #:nodoc:
-      # Define methods for handling unicode data.
-      module Unicode
-        # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the
-        # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all
-        # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the
-        # string overrides can also be called through the +chars+ proxy.
-        #
-        #   name = 'Claus Müller'
-        #   name.reverse #=> "rell??M sualC"
-        #   name.length #=> 13
-        #
-        #   name.chars.reverse.to_s #=> "rellüM sualC"
-        #   name.chars.length #=> 12
-        #   
-        #
-        # All the methods on the chars proxy which normally return a string will return a Chars object. This allows
-        # method chaining on the result of any of these methods.
-        #
-        #   name.chars.reverse.length #=> 12
-        #
-        # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
-        # String and Char work like expected. The bang! methods change the internal string representation in the Chars
-        # object. Interoperability problems can be resolved easily with a +to_s+ call.
-        #
-        # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and
-        # ActiveSupport::Multibyte::Handlers::UTF8Handler
-        def chars
-          ActiveSupport::Multibyte::Chars.new(self)
-        end
+      if RUBY_VERSION < '1.9'
+        # Define methods for handling unicode data.
+        module Unicode
+          # +chars+ is a Unicode safe proxy for string methods. It creates and returns an instance of the
+          # ActiveSupport::Multibyte::Chars class which encapsulates the original string. A Unicode safe version of all
+          # the String methods are defined on this proxy class. Undefined methods are forwarded to String, so all of the
+          # string overrides can also be called through the +chars+ proxy.
+          #
+          #   name = 'Claus Müller'
+          #   name.reverse #=> "rell??M sualC"
+          #   name.length #=> 13
+          #
+          #   name.chars.reverse.to_s #=> "rellüM sualC"
+          #   name.chars.length #=> 12
+          #   
+          #
+          # All the methods on the chars proxy which normally return a string will return a Chars object. This allows
+          # method chaining on the result of any of these methods.
+          #
+          #   name.chars.reverse.length #=> 12
+          #
+          # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
+          # String and Char work like expected. The bang! methods change the internal string representation in the Chars
+          # object. Interoperability problems can be resolved easily with a +to_s+ call.
+          #
+          # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars and
+          # ActiveSupport::Multibyte::Handlers::UTF8Handler
+          def chars
+            ActiveSupport::Multibyte::Chars.new(self)
+          end

-        # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
-        # them), returns false otherwise.
-        def is_utf8?
-          ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self)
+          # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
+          # them), returns false otherwise.
+          def is_utf8?
+            ActiveSupport::Multibyte::Handlers::UTF8Handler.consumes?(self)
+          end
+        end
+      else
+        module Unicode #:nodoc:
+          def chars
+            self
+          end
+
+          def is_utf8?
+            case encoding
+              when Encoding::UTF_8
+                valid_encoding?
+              when Encoding::ASCII_8BIT
+                dup.force_encoding('UTF-8').valid_encoding?
+              else
+                false
+            end
+          end
        end
      end
    end
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@ -119,14 +119,8 @@ module ActiveSupport::Multibyte #:nodoc:
      
      # +utf8_pragma+ checks if it can send this string to the handlers. It makes sure @string isn't nil and $KCODE is
      # set to 'UTF8'.
-      if RUBY_VERSION < '1.9'
-        def utf8_pragma?
-          !@string.nil? && ($KCODE == 'UTF8')
-        end
-      else
-        def utf8_pragma?
-          false
-        end
+      def utf8_pragma?
+        !@string.nil? && ($KCODE == 'UTF8')
      end
  end
 end
--- a/activesupport/test/multibyte_chars_test.rb
+++ b/activesupport/test/multibyte_chars_test.rb
@ -1,6 +1,15 @@
 require 'abstract_unit'

-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION >= '1.9'
+  class CharsTest < Test::Unit::TestCase
+    def test_chars_returns_self
+      str = 'abc'
+      assert_equal str.object_id, str.chars.object_id
+    end
+  end
+else
+
+$KCODE = 'UTF8'

 class CharsTest < Test::Unit::TestCase
  
@ -175,3 +184,5 @@ class CharsTest < Test::Unit::TestCase
    end
  end
 end
+
+end
--- a/activesupport/test/multibyte_conformance.rb
+++ b/activesupport/test/multibyte_conformance.rb
@ -1,7 +1,9 @@
 require 'abstract_unit'
 require 'open-uri'

-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION < '1.9'
+
+$KCODE = 'UTF8'

 UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::UNICODE_VERSION}/ucd"
 UNIDATA_FILE = '/NormalizationTest.txt'
@ -140,3 +142,5 @@ class ConformanceTestPure < Test::Unit::TestCase
    @handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler
  end
 end
+
+end
--- a/activesupport/test/multibyte_handler_test.rb
+++ b/activesupport/test/multibyte_handler_test.rb
@ -1,6 +1,8 @@
 require 'abstract_unit'

-$KCODE = 'UTF8' if RUBY_VERSION < '1.9'
+if RUBY_VERSION < '1.9'
+
+$KCODE = 'UTF8'

 class String
  # Unicode Inspect returns the codepoints of the string in hex
@ -365,3 +367,5 @@ class UTF8HandlingTestPure < Test::Unit::TestCase
    @handler = ::ActiveSupport::Multibyte::Handlers::UTF8Handler
  end
 end
+
+end