1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Adding a liberal_parsing option to CSV. Patch by Braden Anderson.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53401 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
jeg2 2016-01-01 02:44:48 +00:00
parent 7d9342aecd
commit f18f940802
3 changed files with 51 additions and 4 deletions

View file

@ -1,3 +1,9 @@
Fri Jan 1 11:42:57 2016 James Edward Gray II <james@graysoftinc.com>
* lib/csv.rb (CSV): Add a liberal_parsing option.
Patch by Braden Anderson. [#11839]
* test/csv/test_features.rb: test liberal_parsing
Fri Jan 1 10:27:28 2016 Nobuyoshi Nakada <nobu@ruby-lang.org> Fri Jan 1 10:27:28 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
* tool/mkconfig.rb (RbConfig): prefix SDKROOT to oldincludedir * tool/mkconfig.rb (RbConfig): prefix SDKROOT to oldincludedir

View file

@ -1019,6 +1019,7 @@ class CSV
# <b><tt>:skip_blanks</tt></b>:: +false+ # <b><tt>:skip_blanks</tt></b>:: +false+
# <b><tt>:force_quotes</tt></b>:: +false+ # <b><tt>:force_quotes</tt></b>:: +false+
# <b><tt>:skip_lines</tt></b>:: +nil+ # <b><tt>:skip_lines</tt></b>:: +nil+
# <b><tt>:liberal_parsing</tt></b>:: +false+
# #
DEFAULT_OPTIONS = { DEFAULT_OPTIONS = {
col_sep: ",", col_sep: ",",
@ -1033,6 +1034,7 @@ class CSV
skip_blanks: false, skip_blanks: false,
force_quotes: false, force_quotes: false,
skip_lines: nil, skip_lines: nil,
liberal_parsing: false,
}.freeze }.freeze
# #
@ -1499,6 +1501,10 @@ class CSV
# a comment. If the passed object does # a comment. If the passed object does
# not respond to <tt>match</tt>, # not respond to <tt>match</tt>,
# <tt>ArgumentError</tt> is thrown. # <tt>ArgumentError</tt> is thrown.
# <b><tt>:liberal_parsing</tt></b>:: When set to a +true+ value, CSV will
# attempt to parse input not conformant
# with RFC 4180, such as double quotes
# in unquoted fields.
# #
# See CSV::DEFAULT_OPTIONS for the default settings. # See CSV::DEFAULT_OPTIONS for the default settings.
# #
@ -1622,6 +1628,8 @@ class CSV
def skip_blanks?() @skip_blanks end def skip_blanks?() @skip_blanks end
# Returns +true+ if all output fields are quoted. See CSV::new for details. # Returns +true+ if all output fields are quoted. See CSV::new for details.
def force_quotes?() @force_quotes end def force_quotes?() @force_quotes end
# Returns +true+ if illegal input is handled. See CSV::new for details.
def liberal_parsing?() @liberal_parsing end
# #
# The Encoding CSV is parsing or writing in. This will be the Encoding you # The Encoding CSV is parsing or writing in. This will be the Encoding you
@ -1860,12 +1868,12 @@ class CSV
end end
elsif part[0] == @quote_char elsif part[0] == @quote_char
# If we are starting a new quoted column # If we are starting a new quoted column
if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0 if part.count(@quote_char) % 2 != 0
# start an extended column # start an extended column
csv << part[1..-1] csv << part[1..-1]
csv.last << @col_sep csv.last << @col_sep
in_extended_col = true in_extended_col = true
else elsif part[-1] == @quote_char
# regular quoted column # regular quoted column
csv << part[1..-2] csv << part[1..-2]
if csv.last =~ @parsers[:stray_quote] if csv.last =~ @parsers[:stray_quote]
@ -1873,6 +1881,11 @@ class CSV
"Missing or stray quote in line #{lineno + 1}" "Missing or stray quote in line #{lineno + 1}"
end end
csv.last.gsub!(@quote_char * 2, @quote_char) csv.last.gsub!(@quote_char * 2, @quote_char)
elsif @liberal_parsing
csv << part
else
raise MalformedCSVError,
"Missing or stray quote in line #{lineno + 1}"
end end
elsif part =~ @parsers[:quote_or_nl] elsif part =~ @parsers[:quote_or_nl]
# Unquoted field with bad characters. # Unquoted field with bad characters.
@ -1880,7 +1893,11 @@ class CSV
raise MalformedCSVError, "Unquoted fields do not allow " + raise MalformedCSVError, "Unquoted fields do not allow " +
"\\r or \\n (line #{lineno + 1})." "\\r or \\n (line #{lineno + 1})."
else else
raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}." if @liberal_parsing
csv << part
else
raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
end
end end
else else
# Regular ole unquoted field. # Regular ole unquoted field.
@ -1945,7 +1962,7 @@ class CSV
str << " encoding:" << @encoding.name str << " encoding:" << @encoding.name
# show other attributes # show other attributes
%w[ lineno col_sep row_sep %w[ lineno col_sep row_sep
quote_char skip_blanks ].each do |attr_name| quote_char skip_blanks liberal_parsing ].each do |attr_name|
if a = instance_variable_get("@#{attr_name}") if a = instance_variable_get("@#{attr_name}")
str << " " << attr_name << ":" << a.inspect str << " " << attr_name << ":" << a.inspect
end end
@ -2079,6 +2096,7 @@ class CSV
# store the parser behaviors # store the parser behaviors
@skip_blanks = options.delete(:skip_blanks) @skip_blanks = options.delete(:skip_blanks)
@field_size_limit = options.delete(:field_size_limit) @field_size_limit = options.delete(:field_size_limit)
@liberal_parsing = options.delete(:liberal_parsing)
# prebuild Regexps for faster parsing # prebuild Regexps for faster parsing
esc_row_sep = escape_re(@row_sep) esc_row_sep = escape_re(@row_sep)

View file

@ -142,6 +142,29 @@ class TestCSV::Features < TestCSV
assert_equal(3, count) assert_equal(3, count)
end end
def test_liberal_parsing
input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
assert_raise(CSV::MalformedCSVError) do
CSV.parse_line(input)
end
assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
CSV.parse_line(input, liberal_parsing: true))
input = '"quoted" field'
assert_raise(CSV::MalformedCSVError) do
CSV.parse_line(input)
end
assert_equal(['"quoted" field'],
CSV.parse_line(input, liberal_parsing: true))
assert_raise(CSV::MalformedCSVError) do
CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
end
assert_equal(["is", 'this "three', ' or four"', "fields"],
CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true))
end
def test_csv_behavior_readers def test_csv_behavior_readers
%w[ unconverted_fields return_headers write_headers %w[ unconverted_fields return_headers write_headers
skip_blanks force_quotes ].each do |behavior| skip_blanks force_quotes ].each do |behavior|