ruby--ruby/test/csv/tc_encodings.rb

#!/usr/bin/env ruby -w
# encoding: UTF-8

# tc_encodings.rb
#
#  Created by James Edward Gray II on 2008-09-13.
#  Copyright 2008 James Edward Gray II. You can redistribute or modify this code
#  under the terms of Ruby's license.

require "test/unit"

require "csv"

class TestEncodings < Test::Unit::TestCase
  def setup
    @temp_csv_path = File.join(File.dirname(__FILE__), "temp.csv")
  end
  
  def teardown
    File.unlink(@temp_csv_path) if File.exist? @temp_csv_path
  end
  
  ########################################
  ### Hand Test Some Popular Encodings ###
  ########################################
  
  def test_parses_utf8_encoding
    assert_parses( [ %w[ one two … ],
                     %w[ 1   …   3 ],
                     %w[ …   5   6 ] ], "UTF-8" )
  end
  
  def test_parses_latin1_encoding
    assert_parses( [ %w[ one    two    Résumé ],
                     %w[ 1      Résumé 3      ],
                     %w[ Résumé 5      6      ] ], "ISO-8859-1" )
  end
  
  def test_parses_utf16be_encoding
    assert_parses( [ %w[ one two … ],
                     %w[ 1   …   3 ],
                     %w[ …   5   6 ] ], "UTF-16BE" )
  end
  
  def test_parses_shift_jis_encoding
    assert_parses( [ %w[ 一 二 三 ],
                     %w[ 四 五 六 ],
                     %w[ 七 八 九 ] ], "Shift_JIS" )
  end
  
  ###########################################################
  ### Try Simple Reading for All Non-dummy Ruby Encodings ###
  ###########################################################
  
  def test_reading_with_most_encodings
    each_encoding do |encoding|
      begin
        assert_parses( [ %w[ abc def ],
                         %w[ ghi jkl ] ], encoding )
      rescue Encoding::ConverterNotFoundError
        fail("Failed to support #{encoding.name}.")
      end
    end
  end
  
  def test_regular_expression_escaping
    each_encoding do |encoding|
      begin
        assert_parses( [ %w[ abc def ],
                         %w[ ghi jkl ] ], encoding, :col_sep => "|" )
      rescue Encoding::ConverterNotFoundError
        fail("Failed to properly escape #{encoding.name}.")
      end
    end
  end
  
  #######################################################################
  ### Stress Test ASCII Compatible and Non-ASCII Compatible Encodings ###
  #######################################################################
  
  def test_auto_line_ending_detection
    # arrange data to place a \r at the end of CSV's read ahead point
    encode_for_tests([["a" * 509]], :row_sep => "\r\n") do |data|
      assert_equal("\r\n".encode(data.encoding), CSV.new(data).row_sep)
    end
  end
  
  def test_csv_chars_are_transcoded
    encode_for_tests([%w[abc def]]) do |data|
      %w[col_sep row_sep quote_char].each do |csv_char|
        assert_equal( "|".encode(data.encoding),
                      CSV.new(data, csv_char.to_sym => "|").send(csv_char) )
      end
    end
  end
  
  def test_parser_works_with_encoded_headers
    encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
      parsed = CSV.parse(data, :headers => true)
      assert( parsed.headers.all? { |h| h.encoding == data.encoding },
              "Wrong data encoding." )
      parsed.each do |row|
        assert( row.fields.all? { |f| f.encoding == data.encoding },
                "Wrong data encoding." )
      end
    end
  end
  
  def test_built_in_converters_transcode_to_utf_8_then_convert
    encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
      parsed = CSV.parse(data, :converters => :integer)
      assert( parsed[0].all? { |f| f.encoding == data.encoding },
              "Wrong data encoding." )
      assert_equal([1, 2, 3], parsed[1])
    end
  end
  
  def test_built_in_header_converters_transcode_to_utf_8_then_convert
    encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
      parsed = CSV.parse( data, :headers           => true,
                                :header_converters => :downcase )
      assert( parsed.headers.all? { |h| h.encoding.name == "UTF-8" },
              "Wrong data encoding." )
      assert( parsed[0].fields.all? { |f| f.encoding == data.encoding },
              "Wrong data encoding." )
    end
  end
  
  def test_open_allows_you_to_set_encodings
    encode_for_tests([%w[abc def]]) do |data|
      # read and write in encoding
      File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
      CSV.open(@temp_csv_path, "rb:#{data.encoding.name}") do |csv|
        csv.each do |row|
          assert( row.all? { |f| f.encoding == data.encoding },
                  "Wrong data encoding." )
        end
      end
      
      # read and write with transcoding
      File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
        f << data
      end
      CSV.open(@temp_csv_path, "rb:UTF-32BE:#{data.encoding.name}") do |csv|
        csv.each do |row|
          assert( row.all? { |f| f.encoding == data.encoding },
                  "Wrong data encoding." )
        end
      end
    end
  end
  
  def test_foreach_allows_you_to_set_encodings
    encode_for_tests([%w[abc def]]) do |data|
      # read and write in encoding
      File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
      CSV.foreach(@temp_csv_path, :encoding => data.encoding.name) do |row|
        assert( row.all? { |f| f.encoding == data.encoding },
                "Wrong data encoding." )
      end
      
      # read and write with transcoding
      File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
        f << data
      end
      CSV.foreach( @temp_csv_path,
                   :encoding => "UTF-32BE:#{data.encoding.name}" ) do |row|
        assert( row.all? { |f| f.encoding == data.encoding },
                "Wrong data encoding." )
      end
    end
  end
  
  def test_read_allows_you_to_set_encodings
    encode_for_tests([%w[abc def]]) do |data|
      # read and write in encoding
      File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
      rows = CSV.read(@temp_csv_path, :encoding => data.encoding.name)
      assert( rows.flatten.all? { |f| f.encoding == data.encoding },
              "Wrong data encoding." )
      
      # read and write with transcoding
      File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
        f << data
      end
      rows = CSV.read( @temp_csv_path,
                       :encoding => "UTF-32BE:#{data.encoding.name}" )
      assert( rows.flatten.all? { |f| f.encoding == data.encoding },
              "Wrong data encoding." )
    end
  end
  
  #################################
  ### Write CSV in any Encoding ###
  #################################
  
  def test_can_write_csv_in_any_encoding
    each_encoding do |encoding|
      # test generate_line with encoding hint
      csv = %w[abc d|ef].map { |f| f.encode(encoding) }.
                         to_csv(:col_sep => "|", :encoding => encoding.name)
      assert_equal(encoding, csv.encoding)
      
      # test generate_line with encoding guessing from fields
      csv = %w[abc d|ef].map { |f| f.encode(encoding) }.to_csv(:col_sep => "|")
      assert_equal(encoding, csv.encoding)
      
      # writing to files
      data = encode_ary([%w[abc d,ef], %w[123 456 ]], encoding)
      CSV.open(@temp_csv_path, "wb:#{encoding.name}") do |f|
        data.each { |row| f << row }
      end
      assert_equal(data, CSV.read(@temp_csv_path, :encoding => encoding.name))
    end
  end
  
  private
  
  def assert_parses(fields, encoding, options = { })
    encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
    fields   = encode_ary(fields, encoding)
    parsed   = CSV.parse(ary_to_data(fields, options), options)
    assert_equal(fields, parsed)
    parsed.flatten.each_with_index do |field, i|
      assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
    end
  end
  
  def encode_ary(ary, encoding)
    ary.map { |row| row.map { |field| field.encode(encoding) } }
  end
  
  def ary_to_data(ary, options = { })
    encoding   = ary.flatten.first.encoding
    quote_char = (options[:quote_char] || '"').encode(encoding)
    col_sep    = (options[:col_sep]    || ",").encode(encoding)
    row_sep    = (options[:row_sep]    || "\n").encode(encoding)
    ary.map { |row|
      row.map { |field|
        [quote_char, field.encode(encoding), quote_char].join
      }.join(col_sep) + row_sep
    }.join.encode(encoding)
  end
  
  def encode_for_tests(data, options = { })
    yield ary_to_data(encode_ary(data, "UTF-8"),    options)
    yield ary_to_data(encode_ary(data, "UTF-16BE"), options)
  end
  
  def each_encoding
    Encoding.list.each do |encoding|
      next if encoding.dummy?  # skip "dummy" encodings
      yield encoding
    end
  end
end
* lib/csv/csv.rb: Reworked CSV's parser and generator to be m17n. Data is now parsed in the Encoding it is in without need for translation. * lib/csv/csv.rb: Improved inspect() messages for better IRb support. * lib/csv/csv.rb: Fixed header writing bug reported by Dov Murik. * lib/csv/csv.rb: Use custom separators in parsing header Strings as suggested by Shmulik Regev. * lib/csv/csv.rb: Added a :write_headers option for outputting headers. * lib/csv/csv.rb: Handle open() calls in binary mode whenever we can to workaround a Windows issue where line-ending translation can cause an off-by-one error in seeking back to a non-zero starting position after auto-discovery for :row_sep as suggested by Robert Battle. * lib/csv/csv.rb: Improved the parser to fail faster when fed some forms of invalid CSV that can be detected without reading ahead. * lib/csv/csv.rb: Added a :field_size_limit option to control CSV's lookahead and prevent the parser from biting off more data than it can chew. * lib/csv/csv.rb: Added readers for CSV attributes: col_sep(), row_sep(), quote_char(), field_size_limit(), converters(), unconverted_fields?(), headers(), return_headers?(), write_headers?(), header_converters(), skip_blanks?(), and force_quotes?(). * lib/csv/csv.rb: Cleaned up code syntax to be more inline with Ruby 1.9 than 1.8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19441 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-20 20:39:03 -04:00			`#!/usr/bin/env ruby -w`
			`# encoding: UTF-8`

			`# tc_encodings.rb`
			`#`
			`# Created by James Edward Gray II on 2008-09-13.`
			`# Copyright 2008 James Edward Gray II. You can redistribute or modify this code`
			`# under the terms of Ruby's license.`

			`require "test/unit"`

			`require "csv"`

			`class TestEncodings < Test::Unit::TestCase`
			`def setup`
			`@temp_csv_path = File.join(File.dirname(__FILE__), "temp.csv")`
			`end`

			`def teardown`
			`File.unlink(@temp_csv_path) if File.exist? @temp_csv_path`
			`end`

			`########################################`
			`### Hand Test Some Popular Encodings ###`
			`########################################`

			`def test_parses_utf8_encoding`
			`assert_parses( [ %w[ one two … ],`
			`%w[ 1 … 3 ],`
			`%w[ … 5 6 ] ], "UTF-8" )`
			`end`

			`def test_parses_latin1_encoding`
			`assert_parses( [ %w[ one two Résumé ],`
			`%w[ 1 Résumé 3 ],`
			`%w[ Résumé 5 6 ] ], "ISO-8859-1" )`
			`end`

			`def test_parses_utf16be_encoding`
			`assert_parses( [ %w[ one two … ],`
			`%w[ 1 … 3 ],`
			`%w[ … 5 6 ] ], "UTF-16BE" )`
			`end`

			`def test_parses_shift_jis_encoding`
			`assert_parses( [ %w[ 一二三 ],`
			`%w[ 四五六 ],`
			`%w[ 七八九 ] ], "Shift_JIS" )`
			`end`

			`###########################################################`
			`### Try Simple Reading for All Non-dummy Ruby Encodings ###`
			`###########################################################`

			`def test_reading_with_most_encodings`
			`each_encoding do \|encoding\|`
			`begin`
			`assert_parses( [ %w[ abc def ],`
			`%w[ ghi jkl ] ], encoding )`
* transcode.c (rb_eUndefinedConversionError): renamed from rb_eConversionUndefinedError. (rb_eConverterNotFoundError): renamed from rb_eNoConverterError. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19554 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-25 08:47:30 -04:00			`rescue Encoding::ConverterNotFoundError`
* lib/csv/csv.rb: Reworked CSV's parser and generator to be m17n. Data is now parsed in the Encoding it is in without need for translation. * lib/csv/csv.rb: Improved inspect() messages for better IRb support. * lib/csv/csv.rb: Fixed header writing bug reported by Dov Murik. * lib/csv/csv.rb: Use custom separators in parsing header Strings as suggested by Shmulik Regev. * lib/csv/csv.rb: Added a :write_headers option for outputting headers. * lib/csv/csv.rb: Handle open() calls in binary mode whenever we can to workaround a Windows issue where line-ending translation can cause an off-by-one error in seeking back to a non-zero starting position after auto-discovery for :row_sep as suggested by Robert Battle. * lib/csv/csv.rb: Improved the parser to fail faster when fed some forms of invalid CSV that can be detected without reading ahead. * lib/csv/csv.rb: Added a :field_size_limit option to control CSV's lookahead and prevent the parser from biting off more data than it can chew. * lib/csv/csv.rb: Added readers for CSV attributes: col_sep(), row_sep(), quote_char(), field_size_limit(), converters(), unconverted_fields?(), headers(), return_headers?(), write_headers?(), header_converters(), skip_blanks?(), and force_quotes?(). * lib/csv/csv.rb: Cleaned up code syntax to be more inline with Ruby 1.9 than 1.8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19441 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-20 20:39:03 -04:00			`fail("Failed to support #{encoding.name}.")`
			`end`
			`end`
			`end`

			`def test_regular_expression_escaping`
			`each_encoding do \|encoding\|`
			`begin`
			`assert_parses( [ %w[ abc def ],`
			`%w[ ghi jkl ] ], encoding, :col_sep => "\|" )`
* transcode.c (rb_eUndefinedConversionError): renamed from rb_eConversionUndefinedError. (rb_eConverterNotFoundError): renamed from rb_eNoConverterError. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19554 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-25 08:47:30 -04:00			`rescue Encoding::ConverterNotFoundError`
* lib/csv/csv.rb: Reworked CSV's parser and generator to be m17n. Data is now parsed in the Encoding it is in without need for translation. * lib/csv/csv.rb: Improved inspect() messages for better IRb support. * lib/csv/csv.rb: Fixed header writing bug reported by Dov Murik. * lib/csv/csv.rb: Use custom separators in parsing header Strings as suggested by Shmulik Regev. * lib/csv/csv.rb: Added a :write_headers option for outputting headers. * lib/csv/csv.rb: Handle open() calls in binary mode whenever we can to workaround a Windows issue where line-ending translation can cause an off-by-one error in seeking back to a non-zero starting position after auto-discovery for :row_sep as suggested by Robert Battle. * lib/csv/csv.rb: Improved the parser to fail faster when fed some forms of invalid CSV that can be detected without reading ahead. * lib/csv/csv.rb: Added a :field_size_limit option to control CSV's lookahead and prevent the parser from biting off more data than it can chew. * lib/csv/csv.rb: Added readers for CSV attributes: col_sep(), row_sep(), quote_char(), field_size_limit(), converters(), unconverted_fields?(), headers(), return_headers?(), write_headers?(), header_converters(), skip_blanks?(), and force_quotes?(). * lib/csv/csv.rb: Cleaned up code syntax to be more inline with Ruby 1.9 than 1.8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19441 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-20 20:39:03 -04:00			`fail("Failed to properly escape #{encoding.name}.")`
			`end`
			`end`
			`end`

			`#######################################################################`
			`### Stress Test ASCII Compatible and Non-ASCII Compatible Encodings ###`
			`#######################################################################`

			`def test_auto_line_ending_detection`
			`# arrange data to place a \r at the end of CSV's read ahead point`
			`encode_for_tests([["a" * 509]], :row_sep => "\r\n") do \|data\|`
			`assert_equal("\r\n".encode(data.encoding), CSV.new(data).row_sep)`
			`end`
			`end`

			`def test_csv_chars_are_transcoded`
			`encode_for_tests([%w[abc def]]) do \|data\|`
			`%w[col_sep row_sep quote_char].each do \|csv_char\|`
			`assert_equal( "\|".encode(data.encoding),`
			`CSV.new(data, csv_char.to_sym => "\|").send(csv_char) )`
			`end`
			`end`
			`end`

			`def test_parser_works_with_encoded_headers`
			`encode_for_tests([%w[one two three], %w[1 2 3]]) do \|data\|`
			`parsed = CSV.parse(data, :headers => true)`
			`assert( parsed.headers.all? { \|h\| h.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`parsed.each do \|row\|`
			`assert( row.fields.all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`end`
			`end`
			`end`

			`def test_built_in_converters_transcode_to_utf_8_then_convert`
			`encode_for_tests([%w[one two three], %w[1 2 3]]) do \|data\|`
			`parsed = CSV.parse(data, :converters => :integer)`
			`assert( parsed[0].all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`assert_equal([1, 2, 3], parsed[1])`
			`end`
			`end`

			`def test_built_in_header_converters_transcode_to_utf_8_then_convert`
			`encode_for_tests([%w[one two three], %w[1 2 3]]) do \|data\|`
			`parsed = CSV.parse( data, :headers => true,`
			`:header_converters => :downcase )`
			`assert( parsed.headers.all? { \|h\| h.encoding.name == "UTF-8" },`
			`"Wrong data encoding." )`
			`assert( parsed[0].fields.all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`end`
			`end`

			`def test_open_allows_you_to_set_encodings`
			`encode_for_tests([%w[abc def]]) do \|data\|`
			`# read and write in encoding`
			`File.open(@temp_csv_path, "wb:#{data.encoding.name}") { \|f\| f << data }`
			`CSV.open(@temp_csv_path, "rb:#{data.encoding.name}") do \|csv\|`
			`csv.each do \|row\|`
			`assert( row.all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`end`
			`end`

			`# read and write with transcoding`
			`File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do \|f\|`
			`f << data`
			`end`
			`CSV.open(@temp_csv_path, "rb:UTF-32BE:#{data.encoding.name}") do \|csv\|`
			`csv.each do \|row\|`
			`assert( row.all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`end`
			`end`
			`end`
			`end`

			`def test_foreach_allows_you_to_set_encodings`
			`encode_for_tests([%w[abc def]]) do \|data\|`
			`# read and write in encoding`
			`File.open(@temp_csv_path, "wb:#{data.encoding.name}") { \|f\| f << data }`
			`CSV.foreach(@temp_csv_path, :encoding => data.encoding.name) do \|row\|`
			`assert( row.all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`end`

			`# read and write with transcoding`
			`File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do \|f\|`
			`f << data`
			`end`
			`CSV.foreach( @temp_csv_path,`
			`:encoding => "UTF-32BE:#{data.encoding.name}" ) do \|row\|`
			`assert( row.all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`end`
			`end`
			`end`

			`def test_read_allows_you_to_set_encodings`
			`encode_for_tests([%w[abc def]]) do \|data\|`
			`# read and write in encoding`
			`File.open(@temp_csv_path, "wb:#{data.encoding.name}") { \|f\| f << data }`
			`rows = CSV.read(@temp_csv_path, :encoding => data.encoding.name)`
			`assert( rows.flatten.all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`

			`# read and write with transcoding`
			`File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do \|f\|`
			`f << data`
			`end`
			`rows = CSV.read( @temp_csv_path,`
			`:encoding => "UTF-32BE:#{data.encoding.name}" )`
			`assert( rows.flatten.all? { \|f\| f.encoding == data.encoding },`
			`"Wrong data encoding." )`
			`end`
			`end`

			`#################################`
			`### Write CSV in any Encoding ###`
			`#################################`

			`def test_can_write_csv_in_any_encoding`
			`each_encoding do \|encoding\|`
			`# test generate_line with encoding hint`
			`csv = %w[abc d\|ef].map { \|f\| f.encode(encoding) }.`
			`to_csv(:col_sep => "\|", :encoding => encoding.name)`
			`assert_equal(encoding, csv.encoding)`

			`# test generate_line with encoding guessing from fields`
			`csv = %w[abc d\|ef].map { \|f\| f.encode(encoding) }.to_csv(:col_sep => "\|")`
			`assert_equal(encoding, csv.encoding)`

			`# writing to files`
			`data = encode_ary([%w[abc d,ef], %w[123 456 ]], encoding)`
* lib/csv/csv.rb: Worked around some minor encoding changes in Ruby pointed out by Nobu. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19602 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-27 20:06:21 -04:00			`CSV.open(@temp_csv_path, "wb:#{encoding.name}") do \|f\|`
			`data.each { \|row\| f << row }`
* lib/csv/csv.rb: Reworked CSV's parser and generator to be m17n. Data is now parsed in the Encoding it is in without need for translation. * lib/csv/csv.rb: Improved inspect() messages for better IRb support. * lib/csv/csv.rb: Fixed header writing bug reported by Dov Murik. * lib/csv/csv.rb: Use custom separators in parsing header Strings as suggested by Shmulik Regev. * lib/csv/csv.rb: Added a :write_headers option for outputting headers. * lib/csv/csv.rb: Handle open() calls in binary mode whenever we can to workaround a Windows issue where line-ending translation can cause an off-by-one error in seeking back to a non-zero starting position after auto-discovery for :row_sep as suggested by Robert Battle. * lib/csv/csv.rb: Improved the parser to fail faster when fed some forms of invalid CSV that can be detected without reading ahead. * lib/csv/csv.rb: Added a :field_size_limit option to control CSV's lookahead and prevent the parser from biting off more data than it can chew. * lib/csv/csv.rb: Added readers for CSV attributes: col_sep(), row_sep(), quote_char(), field_size_limit(), converters(), unconverted_fields?(), headers(), return_headers?(), write_headers?(), header_converters(), skip_blanks?(), and force_quotes?(). * lib/csv/csv.rb: Cleaned up code syntax to be more inline with Ruby 1.9 than 1.8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19441 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-20 20:39:03 -04:00			`end`
			`assert_equal(data, CSV.read(@temp_csv_path, :encoding => encoding.name))`
			`end`
			`end`

			`private`

			`def assert_parses(fields, encoding, options = { })`
			`encoding = Encoding.find(encoding) unless encoding.is_a? Encoding`
			`fields = encode_ary(fields, encoding)`
			`parsed = CSV.parse(ary_to_data(fields, options), options)`
			`assert_equal(fields, parsed)`
* lib/csv/csv.rb: Worked around some minor encoding changes in Ruby pointed out by Nobu. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19602 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-27 20:06:21 -04:00			`parsed.flatten.each_with_index do \|field, i\|`
			`assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")`
			`end`
* lib/csv/csv.rb: Reworked CSV's parser and generator to be m17n. Data is now parsed in the Encoding it is in without need for translation. * lib/csv/csv.rb: Improved inspect() messages for better IRb support. * lib/csv/csv.rb: Fixed header writing bug reported by Dov Murik. * lib/csv/csv.rb: Use custom separators in parsing header Strings as suggested by Shmulik Regev. * lib/csv/csv.rb: Added a :write_headers option for outputting headers. * lib/csv/csv.rb: Handle open() calls in binary mode whenever we can to workaround a Windows issue where line-ending translation can cause an off-by-one error in seeking back to a non-zero starting position after auto-discovery for :row_sep as suggested by Robert Battle. * lib/csv/csv.rb: Improved the parser to fail faster when fed some forms of invalid CSV that can be detected without reading ahead. * lib/csv/csv.rb: Added a :field_size_limit option to control CSV's lookahead and prevent the parser from biting off more data than it can chew. * lib/csv/csv.rb: Added readers for CSV attributes: col_sep(), row_sep(), quote_char(), field_size_limit(), converters(), unconverted_fields?(), headers(), return_headers?(), write_headers?(), header_converters(), skip_blanks?(), and force_quotes?(). * lib/csv/csv.rb: Cleaned up code syntax to be more inline with Ruby 1.9 than 1.8. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19441 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-20 20:39:03 -04:00			`end`

			`def encode_ary(ary, encoding)`
			`ary.map { \|row\| row.map { \|field\| field.encode(encoding) } }`
			`end`

			`def ary_to_data(ary, options = { })`
			`encoding = ary.flatten.first.encoding`
			`quote_char = (options[:quote_char] \|\| '"').encode(encoding)`
			`col_sep = (options[:col_sep] \|\| ",").encode(encoding)`
			`row_sep = (options[:row_sep] \|\| "\n").encode(encoding)`
			`ary.map { \|row\|`
			`row.map { \|field\|`
			`[quote_char, field.encode(encoding), quote_char].join`
			`}.join(col_sep) + row_sep`
			`}.join.encode(encoding)`
			`end`

			`def encode_for_tests(data, options = { })`
			`yield ary_to_data(encode_ary(data, "UTF-8"), options)`
			`yield ary_to_data(encode_ary(data, "UTF-16BE"), options)`
			`end`

			`def each_encoding`
			`Encoding.list.each do \|encoding\|`
			`next if encoding.dummy? # skip "dummy" encodings`
			`yield encoding`
			`end`
			`end`
			`end`