1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/test/csv/parse/test_strip.rb
adamroyjones c70dc3cafb [ruby/csv] Add handling for ambiguous parsing options (https://github.com/ruby/csv/pull/226)
GitHub: fix GH-225

With Ruby 3.0.2 and csv 3.2.1, the file

```ruby
require "csv"
File.open("example.tsv", "w") { |f| f.puts("foo\t\tbar") }
CSV.read("example.tsv", col_sep: "\t", strip: true)
```

produces the error

```
lib/csv/parser.rb:935:in `parse_quotable_robust': TODO: Meaningful
message in line 1. (CSV::MalformedCSVError)
```

However, the CSV in this example is not malformed; instead, ambiguous
options were provided to the parser. It is not obvious (to me) whether
the string should be parsed as

- `["foo\t\tbar"]`,
- `["foo", "bar"]`,
- `["foo", "", "bar"]`, or
- `["foo", nil, "bar"]`.

This commit adds code that raises an exception when this situation is
encountered. Specifically, it checks if the column separator either ends
with or starts with the characters that would be stripped away.

This commit also adds unit tests and updates the documentation.

https://github.com/ruby/csv/commit/cc317dd42d
2021-12-24 14:35:33 +09:00

112 lines
2.9 KiB
Ruby

# -*- coding: utf-8 -*-
# frozen_string_literal: false
require_relative "../helper"
class TestCSVParseStrip < Test::Unit::TestCase
extend DifferentOFS
def test_both
assert_equal(["a", "b"],
CSV.parse_line(%Q{ a , b }, strip: true))
end
def test_left
assert_equal(["a", "b"],
CSV.parse_line(%Q{ a, b}, strip: true))
end
def test_right
assert_equal(["a", "b"],
CSV.parse_line(%Q{a ,b }, strip: true))
end
def test_middle
assert_equal(["a b"],
CSV.parse_line(%Q{a b}, strip: true))
end
def test_quoted
assert_equal([" a ", " b "],
CSV.parse_line(%Q{" a "," b "}, strip: true))
end
def test_liberal_parsing
assert_equal([" a ", "b", " c ", " d "],
CSV.parse_line(%Q{" a ", b , " c "," d " },
strip: true,
liberal_parsing: true))
end
def test_string
assert_equal(["a", " b"],
CSV.parse_line(%Q{ a , " b" },
strip: " "))
end
def test_no_quote
assert_equal([" a ", " b "],
CSV.parse_line(%Q{" a ", b },
strip: %Q{"},
quote_char: nil))
end
def test_do_not_strip_cr
assert_equal([
["a", "b "],
["a", "b "],
],
CSV.parse(%Q{"a" ,"b " \r} +
%Q{"a" ,"b " \r},
strip: true))
end
def test_do_not_strip_lf
assert_equal([
["a", "b "],
["a", "b "],
],
CSV.parse(%Q{"a" ,"b " \n} +
%Q{"a" ,"b " \n},
strip: true))
end
def test_do_not_strip_crlf
assert_equal([
["a", "b "],
["a", "b "],
],
CSV.parse(%Q{"a" ,"b " \r\n} +
%Q{"a" ,"b " \r\n},
strip: true))
end
def test_col_sep_incompatible_true
message = "The provided strip (true) and " \
"col_sep (\\t) options are incompatible."
assert_raise_with_message(ArgumentError, message) do
CSV.parse_line(%Q{"a"\t"b"\n},
col_sep: "\t",
strip: true)
end
end
def test_col_sep_incompatible_string
message = "The provided strip (\\t) and " \
"col_sep (\\t) options are incompatible."
assert_raise_with_message(ArgumentError, message) do
CSV.parse_line(%Q{"a"\t"b"\n},
col_sep: "\t",
strip: "\t")
end
end
def test_col_sep_compatible_string
assert_equal(
["a", "b"],
CSV.parse_line(%Q{\va\tb\v\n},
col_sep: "\t",
strip: "\v")
)
end
end