2019-04-14 17:01:51 -04:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# frozen_string_literal: false
|
|
|
|
|
|
|
|
require_relative "../helper"
|
|
|
|
|
|
|
|
class TestCSVParseStrip < Test::Unit::TestCase
|
|
|
|
extend DifferentOFS
|
|
|
|
|
|
|
|
def test_both
|
|
|
|
assert_equal(["a", "b"],
|
|
|
|
CSV.parse_line(%Q{ a , b }, strip: true))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_left
|
|
|
|
assert_equal(["a", "b"],
|
|
|
|
CSV.parse_line(%Q{ a, b}, strip: true))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_right
|
|
|
|
assert_equal(["a", "b"],
|
|
|
|
CSV.parse_line(%Q{a ,b }, strip: true))
|
|
|
|
end
|
|
|
|
|
2019-12-24 16:59:43 -05:00
|
|
|
def test_middle
|
|
|
|
assert_equal(["a b"],
|
|
|
|
CSV.parse_line(%Q{a b}, strip: true))
|
|
|
|
end
|
|
|
|
|
2019-04-14 17:01:51 -04:00
|
|
|
def test_quoted
|
|
|
|
assert_equal([" a ", " b "],
|
|
|
|
CSV.parse_line(%Q{" a "," b "}, strip: true))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_liberal_parsing
|
|
|
|
assert_equal([" a ", "b", " c ", " d "],
|
|
|
|
CSV.parse_line(%Q{" a ", b , " c "," d " },
|
|
|
|
strip: true,
|
|
|
|
liberal_parsing: true))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_string
|
|
|
|
assert_equal(["a", " b"],
|
|
|
|
CSV.parse_line(%Q{ a , " b" },
|
|
|
|
strip: " "))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_no_quote
|
|
|
|
assert_equal([" a ", " b "],
|
|
|
|
CSV.parse_line(%Q{" a ", b },
|
|
|
|
strip: %Q{"},
|
|
|
|
quote_char: nil))
|
|
|
|
end
|
2019-04-22 16:54:44 -04:00
|
|
|
|
|
|
|
def test_do_not_strip_cr
|
|
|
|
assert_equal([
|
|
|
|
["a", "b "],
|
|
|
|
["a", "b "],
|
|
|
|
],
|
|
|
|
CSV.parse(%Q{"a" ,"b " \r} +
|
|
|
|
%Q{"a" ,"b " \r},
|
|
|
|
strip: true))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_do_not_strip_lf
|
|
|
|
assert_equal([
|
|
|
|
["a", "b "],
|
|
|
|
["a", "b "],
|
|
|
|
],
|
|
|
|
CSV.parse(%Q{"a" ,"b " \n} +
|
|
|
|
%Q{"a" ,"b " \n},
|
|
|
|
strip: true))
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_do_not_strip_crlf
|
|
|
|
assert_equal([
|
|
|
|
["a", "b "],
|
|
|
|
["a", "b "],
|
|
|
|
],
|
|
|
|
CSV.parse(%Q{"a" ,"b " \r\n} +
|
|
|
|
%Q{"a" ,"b " \r\n},
|
|
|
|
strip: true))
|
|
|
|
end
|
[ruby/csv] Add handling for ambiguous parsing options (https://github.com/ruby/csv/pull/226)
GitHub: fix GH-225
With Ruby 3.0.2 and csv 3.2.1, the file
```ruby
require "csv"
File.open("example.tsv", "w") { |f| f.puts("foo\t\tbar") }
CSV.read("example.tsv", col_sep: "\t", strip: true)
```
produces the error
```
lib/csv/parser.rb:935:in `parse_quotable_robust': TODO: Meaningful
message in line 1. (CSV::MalformedCSVError)
```
However, the CSV in this example is not malformed; instead, ambiguous
options were provided to the parser. It is not obvious (to me) whether
the string should be parsed as
- `["foo\t\tbar"]`,
- `["foo", "bar"]`,
- `["foo", "", "bar"]`, or
- `["foo", nil, "bar"]`.
This commit adds code that raises an exception when this situation is
encountered. Specifically, it checks if the column separator either ends
with or starts with the characters that would be stripped away.
This commit also adds unit tests and updates the documentation.
https://github.com/ruby/csv/commit/cc317dd42d
2021-11-18 16:20:09 -05:00
|
|
|
|
|
|
|
def test_col_sep_incompatible_true
|
|
|
|
message = "The provided strip (true) and " \
|
|
|
|
"col_sep (\\t) options are incompatible."
|
|
|
|
assert_raise_with_message(ArgumentError, message) do
|
|
|
|
CSV.parse_line(%Q{"a"\t"b"\n},
|
|
|
|
col_sep: "\t",
|
|
|
|
strip: true)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_col_sep_incompatible_string
|
|
|
|
message = "The provided strip (\\t) and " \
|
|
|
|
"col_sep (\\t) options are incompatible."
|
|
|
|
assert_raise_with_message(ArgumentError, message) do
|
|
|
|
CSV.parse_line(%Q{"a"\t"b"\n},
|
|
|
|
col_sep: "\t",
|
|
|
|
strip: "\t")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_col_sep_compatible_string
|
|
|
|
assert_equal(
|
|
|
|
["a", "b"],
|
|
|
|
CSV.parse_line(%Q{\va\tb\v\n},
|
|
|
|
col_sep: "\t",
|
|
|
|
strip: "\v")
|
|
|
|
)
|
|
|
|
end
|
2019-04-14 17:01:51 -04:00
|
|
|
end
|