* lib/csv.rb (CSV#init_separators): use IO#gets with length

parameter to get rid of wrong convertion. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30356 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2010-12-25 08:32:27 +00:00 · 2010-12-25 08:32:27 +00:00 · 9c017ca5fb
commit 9c017ca5fb
parent 9a907880f0
3 changed files with 25 additions and 40 deletions
--- a/5
+++ b/5
@ -1,4 +1,7 @@
-Sat Dec 25 16:04:34 2010  Nobuyoshi Nakada  <nobu@ruby-lang.org>
+Sat Dec 25 17:32:24 2010  Nobuyoshi Nakada  <nobu@ruby-lang.org>
 	* lib/csv.rb (CSV#init_separators): use IO#gets with length
 	  parameter to get rid of wrong convertion.
 	* lib/csv.rb (CSV::foreach, CSV#initialize): directly use encoding
--- a/lib/csv.rb
+++ b/lib/csv.rb
@ -1573,10 +1573,7 @@ class CSV
    # if we can transcode the needed characters
    #
    @re_esc   =   "\\".encode(@encoding) rescue ""
-    @re_chars =   %w[ \\ .  [  ]  -  ^  $  ?
+    @re_chars =   /#{%"[-][\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding, fallback: proc{""})}/
                      *  +  {  }  (  )  |  #
                      \  \r \n \t \f \v ].
                  map { |s| s.encode(@encoding) rescue nil }.compact
    init_separators(options)
    init_parsers(options)
@ -2025,15 +2022,13 @@ class CSV
            # if we run out of data, it's probably a single line
            # (use a sensible default)
            #
-            if @io.eof?
+            unless sample = @io.gets(nil, 1024)
              @row_sep = $INPUT_RECORD_SEPARATOR
              break
            end
            # read ahead a bit
-            sample =  read_to_char(1024)
+            sample << (@io.gets(nil, 1) || "") if sample.end_with?(encode_str("\r"))
            sample += read_to_char(1) if sample[-1..-1] == encode_str("\r") and
                                         not @io.eof?
            # try to find a standard separator
            if sample =~ encode_re("\r\n?|\n")
              @row_sep = $&
@ -2267,7 +2262,7 @@ class CSV
  # a backslash cannot be transcoded.
  #
  def escape_re(str)
-    str.chars.map { |c| @re_chars.include?(c) ? @re_esc + c : c }.join('')
+    str.gsub(@re_chars) {|c| @re_esc + c}
  end
  #
@ -2286,31 +2281,6 @@ class CSV
    chunks.map { |chunk| chunk.encode(@encoding.name) }.join('')
  end
  #
  # Reads at least +bytes+ from <tt>@io</tt>, but will read up 10 bytes ahead if
  # needed to ensure the data read is valid in the ecoding of that data.  This
  # should ensure that it is safe to use regular expressions on the read data,
  # unless it is actually a broken encoding.  The read data will be returned in
  # <tt>@encoding</tt>.
  #
  def read_to_char(bytes)
    return "" if @io.eof?
    data = read_io(bytes)
    begin
      raise unless data.valid_encoding?
      encoded = encode_str(data)
      raise unless encoded.valid_encoding?
      return encoded
    rescue  # encoding error or my invalid data raise
      if @io.eof? or data.size >= bytes + 10
        return data
      else
        data += read_io(1)
        retry
      end
    end
  end
  private
  def raw_encoding
@ -2324,10 +2294,6 @@ class CSV
      Encoding::ASCII_8BIT
    end
  end
  def read_io(bytes)
    @io.read(bytes).force_encoding(raw_encoding)
  end
 end
 # Another name for CSV::instance().
--- a/test/csv/test_encodings.rb
+++ b/test/csv/test_encodings.rb
@ -238,12 +238,28 @@ class TestCSV::Encodings < TestCSV
  def assert_parses(fields, encoding, options = { })
    encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
    orig_fields = fields
    fields   = encode_ary(fields, encoding)
-    parsed   = CSV.parse(ary_to_data(fields, options), options)
+    data = ary_to_data(fields, options)
    parsed   = CSV.parse(data, options)
    assert_equal(fields, parsed)
    parsed.flatten.each_with_index do |field, i|
      assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
    end
    File.open(@temp_csv_path, "wb") {|f| f.print(data)}
    CSV.open(@temp_csv_path, "rb:#{encoding}", options) do |csv|
      csv.each_with_index do |row, i|
        assert_equal(fields[i], row)
      end
    end
    begin
      CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", options) do |csv|
        csv.each_with_index do |row, i|
          assert_equal(orig_fields[i], row)
        end
      end unless encoding == __ENCODING__
    rescue Encoding::ConverterNotFoundError
    end
  end
  def encode_ary(ary, encoding)