1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* lib/csv/csv.rb: Reworked CSV's parser and generator to be m17n. Data

is now parsed in the Encoding it is in without need for translation.
* lib/csv/csv.rb:  Improved inspect() messages for better IRb support.
* lib/csv/csv.rb:  Fixed header writing bug reported by Dov Murik.
* lib/csv/csv.rb:  Use custom separators in parsing header Strings as
  suggested by Shmulik Regev.
* lib/csv/csv.rb:  Added a :write_headers option for outputting headers.
* lib/csv/csv.rb:  Handle open() calls in binary mode whenever we can to
  workaround a Windows issue where line-ending translation can cause an
  off-by-one error in seeking back to a non-zero starting position after
  auto-discovery for :row_sep as suggested by Robert Battle.
* lib/csv/csv.rb:  Improved the parser to fail faster when fed some forms
  of invalid CSV that can be detected without reading ahead.
* lib/csv/csv.rb:  Added a :field_size_limit option to control CSV's
  lookahead and prevent the parser from biting off more data than
  it can chew.
* lib/csv/csv.rb:  Added readers for CSV attributes:  col_sep(), row_sep(),
  quote_char(), field_size_limit(), converters(), unconverted_fields?(),
  headers(), return_headers?(), write_headers?(), header_converters(),
  skip_blanks?(), and force_quotes?().
* lib/csv/csv.rb:  Cleaned up code syntax to be more inline with
  Ruby 1.9 than 1.8.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19441 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
jeg2 2008-09-21 00:39:03 +00:00
parent 31eacb6ed1
commit 280cbe0b1f
13 changed files with 1105 additions and 171 deletions

255
test/csv/tc_encodings.rb Normal file
View file

@ -0,0 +1,255 @@
#!/usr/bin/env ruby -w
# encoding: UTF-8
# tc_encodings.rb
#
# Created by James Edward Gray II on 2008-09-13.
# Copyright 2008 James Edward Gray II. You can redistribute or modify this code
# under the terms of Ruby's license.
require "test/unit"
require "csv"
class TestEncodings < Test::Unit::TestCase
def setup
@temp_csv_path = File.join(File.dirname(__FILE__), "temp.csv")
end
def teardown
File.unlink(@temp_csv_path) if File.exist? @temp_csv_path
end
########################################
### Hand Test Some Popular Encodings ###
########################################
def test_parses_utf8_encoding
assert_parses( [ %w[ one two … ],
%w[ 1 … 3 ],
%w[ … 5 6 ] ], "UTF-8" )
end
def test_parses_latin1_encoding
assert_parses( [ %w[ one two Résumé ],
%w[ 1 Résumé 3 ],
%w[ Résumé 5 6 ] ], "ISO-8859-1" )
end
def test_parses_utf16be_encoding
assert_parses( [ %w[ one two … ],
%w[ 1 … 3 ],
%w[ … 5 6 ] ], "UTF-16BE" )
end
def test_parses_shift_jis_encoding
assert_parses( [ %w[ 一 二 三 ],
%w[ 四 五 六 ],
%w[ 七 八 九 ] ], "Shift_JIS" )
end
###########################################################
### Try Simple Reading for All Non-dummy Ruby Encodings ###
###########################################################
def test_reading_with_most_encodings
each_encoding do |encoding|
begin
assert_parses( [ %w[ abc def ],
%w[ ghi jkl ] ], encoding )
rescue Encoding::NoConverterError
fail("Failed to support #{encoding.name}.")
end
end
end
def test_regular_expression_escaping
each_encoding do |encoding|
begin
assert_parses( [ %w[ abc def ],
%w[ ghi jkl ] ], encoding, :col_sep => "|" )
rescue Encoding::NoConverterError
fail("Failed to properly escape #{encoding.name}.")
end
end
end
#######################################################################
### Stress Test ASCII Compatible and Non-ASCII Compatible Encodings ###
#######################################################################
def test_auto_line_ending_detection
# arrange data to place a \r at the end of CSV's read ahead point
encode_for_tests([["a" * 509]], :row_sep => "\r\n") do |data|
assert_equal("\r\n".encode(data.encoding), CSV.new(data).row_sep)
end
end
def test_csv_chars_are_transcoded
encode_for_tests([%w[abc def]]) do |data|
%w[col_sep row_sep quote_char].each do |csv_char|
assert_equal( "|".encode(data.encoding),
CSV.new(data, csv_char.to_sym => "|").send(csv_char) )
end
end
end
def test_parser_works_with_encoded_headers
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
parsed = CSV.parse(data, :headers => true)
assert( parsed.headers.all? { |h| h.encoding == data.encoding },
"Wrong data encoding." )
parsed.each do |row|
assert( row.fields.all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
end
end
end
def test_built_in_converters_transcode_to_utf_8_then_convert
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
parsed = CSV.parse(data, :converters => :integer)
assert( parsed[0].all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
assert_equal([1, 2, 3], parsed[1])
end
end
def test_built_in_header_converters_transcode_to_utf_8_then_convert
encode_for_tests([%w[one two three], %w[1 2 3]]) do |data|
parsed = CSV.parse( data, :headers => true,
:header_converters => :downcase )
assert( parsed.headers.all? { |h| h.encoding.name == "UTF-8" },
"Wrong data encoding." )
assert( parsed[0].fields.all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
end
end
def test_open_allows_you_to_set_encodings
encode_for_tests([%w[abc def]]) do |data|
# read and write in encoding
File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
CSV.open(@temp_csv_path, "rb:#{data.encoding.name}") do |csv|
csv.each do |row|
assert( row.all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
end
end
# read and write with transcoding
File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
f << data
end
CSV.open(@temp_csv_path, "rb:UTF-32BE:#{data.encoding.name}") do |csv|
csv.each do |row|
assert( row.all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
end
end
end
end
def test_foreach_allows_you_to_set_encodings
encode_for_tests([%w[abc def]]) do |data|
# read and write in encoding
File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
CSV.foreach(@temp_csv_path, :encoding => data.encoding.name) do |row|
assert( row.all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
end
# read and write with transcoding
File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
f << data
end
CSV.foreach( @temp_csv_path,
:encoding => "UTF-32BE:#{data.encoding.name}" ) do |row|
assert( row.all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
end
end
end
def test_read_allows_you_to_set_encodings
encode_for_tests([%w[abc def]]) do |data|
# read and write in encoding
File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data }
rows = CSV.read(@temp_csv_path, :encoding => data.encoding.name)
assert( rows.flatten.all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
# read and write with transcoding
File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f|
f << data
end
rows = CSV.read( @temp_csv_path,
:encoding => "UTF-32BE:#{data.encoding.name}" )
assert( rows.flatten.all? { |f| f.encoding == data.encoding },
"Wrong data encoding." )
end
end
#################################
### Write CSV in any Encoding ###
#################################
def test_can_write_csv_in_any_encoding
each_encoding do |encoding|
# test generate_line with encoding hint
csv = %w[abc d|ef].map { |f| f.encode(encoding) }.
to_csv(:col_sep => "|", :encoding => encoding.name)
assert_equal(encoding, csv.encoding)
# test generate_line with encoding guessing from fields
csv = %w[abc d|ef].map { |f| f.encode(encoding) }.to_csv(:col_sep => "|")
assert_equal(encoding, csv.encoding)
# writing to files
data = encode_ary([%w[abc d,ef], %w[123 456 ]], encoding)
CSV.open(@temp_csv_path, "wb:#{encoding.name}") do |csv|
data.each { |row| csv << row }
end
assert_equal(data, CSV.read(@temp_csv_path, :encoding => encoding.name))
end
end
private
def assert_parses(fields, encoding, options = { })
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
fields = encode_ary(fields, encoding)
parsed = CSV.parse(ary_to_data(fields, options), options)
assert_equal(fields, parsed)
assert( parsed.flatten.all? { |field| field.encoding == encoding },
"Fields were transcoded." )
end
def encode_ary(ary, encoding)
ary.map { |row| row.map { |field| field.encode(encoding) } }
end
def ary_to_data(ary, options = { })
encoding = ary.flatten.first.encoding
quote_char = (options[:quote_char] || '"').encode(encoding)
col_sep = (options[:col_sep] || ",").encode(encoding)
row_sep = (options[:row_sep] || "\n").encode(encoding)
ary.map { |row|
row.map { |field|
[quote_char, field.encode(encoding), quote_char].join
}.join(col_sep) + row_sep
}.join.encode(encoding)
end
def encode_for_tests(data, options = { })
yield ary_to_data(encode_ary(data, "UTF-8"), options)
yield ary_to_data(encode_ary(data, "UTF-16BE"), options)
end
def each_encoding
Encoding.list.each do |encoding|
next if encoding.dummy? # skip "dummy" encodings
yield encoding
end
end
end