diff --git a/lib/csv.rb b/lib/csv.rb index 42e99435cb..06a490f34c 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -330,6 +330,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP) # liberal_parsing: false, # nil_value: nil, # empty_value: "", +# strip: false, # # For generating. # write_headers: nil, # quote_empty: true, @@ -337,7 +338,6 @@ using CSV::MatchP if CSV.const_defined?(:MatchP) # write_converters: nil, # write_nil_value: nil, # write_empty_value: "", -# strip: false, # } # # ==== Options for Parsing @@ -355,8 +355,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP) # - +header_converters+: Specifies the header converters to be used. # - +skip_blanks+: Specifies whether blanks lines are to be ignored. # - +skip_lines+: Specifies how comments lines are to be recognized. -# - +strip+: Specifies whether leading and trailing whitespace are -# to be stripped from fields.. +# - +strip+: Specifies whether leading and trailing whitespace are to be +# stripped from fields. This must be compatible with +col_sep+; if it is not, +# then an +ArgumentError+ exception will be raised. # - +liberal_parsing+: Specifies whether \CSV should attempt to parse # non-compliant data. # - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field. @@ -935,6 +936,7 @@ class CSV liberal_parsing: false, nil_value: nil, empty_value: "", + strip: false, # For generating. write_headers: nil, quote_empty: true, @@ -942,7 +944,6 @@ class CSV write_converters: nil, write_nil_value: nil, write_empty_value: "", - strip: false, }.freeze class << self @@ -1760,11 +1761,11 @@ class CSV encoding: nil, nil_value: nil, empty_value: "", + strip: false, quote_empty: true, write_converters: nil, write_nil_value: nil, - write_empty_value: "", - strip: false) + write_empty_value: "") raise ArgumentError.new("Cannot parse nil as CSV") if data.nil? if data.is_a?(String) diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 3334acfbdd..f87db3bb12 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -361,6 +361,7 @@ class CSV prepare_skip_lines prepare_strip prepare_separators + validate_strip_and_col_sep_options prepare_quoted prepare_unquoted prepare_line @@ -531,6 +532,28 @@ class CSV @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding)) end + # This method verifies that there are no (obvious) ambiguities with the + # provided +col_sep+ and +strip+ parsing options. For example, if +col_sep+ + # and +strip+ were both equal to +\t+, then there would be no clear way to + # parse the input. + def validate_strip_and_col_sep_options + return unless @strip + + if @strip.is_a?(String) + if @column_separator.start_with?(@strip) || @column_separator.end_with?(@strip) + raise ArgumentError, + "The provided strip (#{@escaped_strip}) and " \ + "col_sep (#{@escaped_column_separator}) options are incompatible." + end + else + if Regexp.new("\\A[#{@escaped_strip}]|[#{@escaped_strip}]\\z").match?(@column_separator) + raise ArgumentError, + "The provided strip (true) and " \ + "col_sep (#{@escaped_column_separator}) options are incompatible." + end + end + end + def prepare_quoted if @quote_character @quotes = Regexp.new(@escaped_quote_character + diff --git a/test/csv/parse/test_strip.rb b/test/csv/parse/test_strip.rb index 3564fcb3ba..c5e35209cc 100644 --- a/test/csv/parse/test_strip.rb +++ b/test/csv/parse/test_strip.rb @@ -80,4 +80,33 @@ class TestCSVParseStrip < Test::Unit::TestCase %Q{"a" ,"b " \r\n}, strip: true)) end + + def test_col_sep_incompatible_true + message = "The provided strip (true) and " \ + "col_sep (\\t) options are incompatible." + assert_raise_with_message(ArgumentError, message) do + CSV.parse_line(%Q{"a"\t"b"\n}, + col_sep: "\t", + strip: true) + end + end + + def test_col_sep_incompatible_string + message = "The provided strip (\\t) and " \ + "col_sep (\\t) options are incompatible." + assert_raise_with_message(ArgumentError, message) do + CSV.parse_line(%Q{"a"\t"b"\n}, + col_sep: "\t", + strip: "\t") + end + end + + def test_col_sep_compatible_string + assert_equal( + ["a", "b"], + CSV.parse_line(%Q{\va\tb\v\n}, + col_sep: "\t", + strip: "\v") + ) + end end