From 08e70126aeca4edcdeab7dc292acda2f50cc41a1 Mon Sep 17 00:00:00 2001 From: Koichi ITO Date: Thu, 4 Jun 2020 12:33:47 +0900 Subject: [PATCH] [ruby/csv] Add `invalid: :replace` for `CSV.open` (#130) This PR adds `invalid: :replace` for `CSV.open`. It is a PR similar to #129. https://github.com/ruby/csv/commit/5bf687341c --- lib/csv.rb | 7 +++++-- test/csv/interface/test_read.rb | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/lib/csv.rb b/lib/csv.rb index a06c92a1b9..285f7198b7 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -962,8 +962,10 @@ class CSV # # possible options elements: # hash form: - # :undef => :replace # replace undefined conversion - # :replace => string # replacement string ("?" or "\uFFFD" if not specified) + # :invalid => nil # raise error on invalid byte sequence (default) + # :invalid => :replace # replace invalid byte sequence + # :undef => :replace # replace undefined conversion + # :replace => string # replacement string ("?" or "\uFFFD" if not specified) # # This method opens an IO object, and wraps that with CSV. This is intended # as the primary interface for writing a CSV file. @@ -1026,6 +1028,7 @@ class CSV # wrap a File opened with the remaining +args+ with no newline # decorator file_opts = {universal_newline: false}.merge(options) + options.delete(:invalid) options.delete(:undef) options.delete(:replace) diff --git a/test/csv/interface/test_read.rb b/test/csv/interface/test_read.rb index a2bcceda88..b86c54fc9f 100644 --- a/test/csv/interface/test_read.rb +++ b/test/csv/interface/test_read.rb @@ -135,6 +135,36 @@ class TestCSVInterfaceRead < Test::Unit::TestCase end end + def test_open_with_invalid_nil + CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: nil) do |rows| + error = assert_raise(Encoding::InvalidByteSequenceError) do + rows << ["\x82\xa0"] + end + assert_equal('"\x82" on UTF-8', + error.message) + end + end + + def test_open_with_invalid_replace + CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: :replace) do |rows| + rows << ["\x82\xa0".force_encoding(Encoding::UTF_8)] + end + CSV.open(@input.path, encoding: Encoding::CP932) do |csv| + assert_equal([["??"]], + csv.to_a) + end + end + + def test_open_with_invalid_replace_and_replace_string + CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: :replace, replace: "X") do |rows| + rows << ["\x82\xa0".force_encoding(Encoding::UTF_8)] + end + CSV.open(@input.path, encoding: Encoding::CP932) do |csv| + assert_equal([["XX"]], + csv.to_a) + end + end + def test_open_with_undef_replace # U+00B7 Middle Dot CSV.open(@input.path, "w", encoding: Encoding::CP932, undef: :replace) do |rows|