mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/csv.rb: Incorporating the fixes from the recent
FasterCSV releases: 1.5.2 and 1.5.3. [ruby-core:25038] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27025 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
0471422beb
commit
1bd1128989
5 changed files with 115 additions and 89 deletions
|
@ -1,3 +1,8 @@
|
|||
Tue Mar 23 23:58:51 2010 James Edward Gray II <jeg2@ruby-lang.org>
|
||||
|
||||
* lib/csv.rb: Incorporating the fixes from the recent
|
||||
FasterCSV releases: 1.5.2 and 1.5.3. [ruby-core:25038]
|
||||
|
||||
Tue Mar 23 18:35:46 2010 Shugo Maeda <shugo@ruby-lang.org>
|
||||
|
||||
* object.c (rb_obj_singleton_class): new method
|
||||
|
|
138
lib/csv.rb
138
lib/csv.rb
|
@ -826,8 +826,12 @@ class CSV
|
|||
# Returns the table as a complete CSV String. Headers will be listed first,
|
||||
# then all of the field rows.
|
||||
#
|
||||
# This method assumes you want the Table.headers(), unless you explicitly
|
||||
# pass <tt>:write_headers => false</tt>.
|
||||
#
|
||||
def to_csv(options = Hash.new)
|
||||
@table.inject([headers.to_csv(options)]) do |rows, row|
|
||||
wh = options.fetch(:write_headers, true)
|
||||
@table.inject(wh ? [headers.to_csv(options)] : [ ]) do |rows, row|
|
||||
if row.header_row?
|
||||
rows
|
||||
else
|
||||
|
@ -1808,13 +1812,18 @@ class CSV
|
|||
# it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
|
||||
# because of \r and/or \n characters embedded in quoted fields
|
||||
#
|
||||
in_extended_col = false
|
||||
csv = Array.new
|
||||
|
||||
loop do
|
||||
# add another read to the line
|
||||
(line += @io.gets(@row_sep)) rescue return nil
|
||||
# copy the line so we can chop it up in parsing
|
||||
parse = line.dup
|
||||
unless parse = @io.gets(@row_sep)
|
||||
return nil
|
||||
end
|
||||
|
||||
parse.sub!(@parsers[:line_end], "")
|
||||
|
||||
if csv.empty?
|
||||
#
|
||||
# I believe a blank line should be an <tt>Array.new</tt>, not Ruby 1.8
|
||||
# CSV's <tt>[nil]</tt>
|
||||
|
@ -1822,7 +1831,6 @@ class CSV
|
|||
if parse.empty?
|
||||
@lineno += 1
|
||||
if @skip_blanks
|
||||
line = ""
|
||||
next
|
||||
elsif @unconverted_fields
|
||||
return add_unconverted_fields(Array.new, Array.new)
|
||||
|
@ -1832,42 +1840,67 @@ class CSV
|
|||
return Array.new
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# shave leading empty fields if needed, because the main parser chokes
|
||||
# on these
|
||||
#
|
||||
csv = if parse.sub!(@parsers[:leading_fields], "")
|
||||
[nil] * ($&.length / @col_sep.length)
|
||||
else
|
||||
Array.new
|
||||
end
|
||||
#
|
||||
# then parse the main fields with a hyper-tuned Regexp from
|
||||
# Mastering Regular Expressions, Second Edition
|
||||
#
|
||||
parse.gsub!(@parsers[:csv_row]) do
|
||||
csv << if $1.nil? # we found an unquoted field
|
||||
if $2.empty? # switch empty unquoted fields to +nil+...
|
||||
nil # for Ruby 1.8 CSV compatibility
|
||||
|
||||
parts = parse.split(@col_sep, -1)
|
||||
csv << nil if parts.empty?
|
||||
|
||||
# This loop is the hot path of csv parsing. Some things may be non-dry
|
||||
# for a reason. Make sure to benchmark when refactoring.
|
||||
parts.each do |part|
|
||||
if in_extended_col
|
||||
# If we are continuing a previous column
|
||||
if part[-1] == @quote_char && part.count(@quote_char) % 2 != 0
|
||||
# extended column ends
|
||||
csv.last << part[0..-2]
|
||||
raise MalformedCSVError if csv.last =~ @parsers[:stray_quote]
|
||||
csv.last.gsub!(@quote_char * 2, @quote_char)
|
||||
in_extended_col = false
|
||||
else
|
||||
# I decided to take a strict approach to CSV parsing...
|
||||
if $2.count(@parsers[:return_newline]).zero? # verify correctness
|
||||
$2
|
||||
csv.last << part
|
||||
csv.last << @col_sep
|
||||
end
|
||||
elsif part[0] == @quote_char
|
||||
# If we are staring a new quoted column
|
||||
if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0
|
||||
# start an extended column
|
||||
csv << part[1..-1]
|
||||
csv.last << @col_sep
|
||||
in_extended_col = true
|
||||
else
|
||||
# or throw an Exception
|
||||
# regular quoted column
|
||||
csv << part[1..-2]
|
||||
raise MalformedCSVError if csv.last =~ @parsers[:stray_quote]
|
||||
csv.last.gsub!(@quote_char * 2, @quote_char)
|
||||
end
|
||||
elsif part =~ @parsers[:quote_or_nl]
|
||||
# Unquoted field with bad characters.
|
||||
if part =~ @parsers[:nl_or_lf]
|
||||
raise MalformedCSVError, "Unquoted fields do not allow " +
|
||||
"\\r or \\n (line #{lineno + 1})."
|
||||
else
|
||||
raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
|
||||
end
|
||||
else
|
||||
# Regular ole unquoted field.
|
||||
csv << (part.empty? ? nil : part)
|
||||
end
|
||||
else # we found a quoted field...
|
||||
$1.gsub(@quote_char * 2, @quote_char) # unescape contents
|
||||
end
|
||||
"" # gsub!'s replacement, clear the field
|
||||
end
|
||||
|
||||
# if parse is empty?(), we found all the fields on the line...
|
||||
if parse.empty?
|
||||
# Replace tacked on @col_sep with @row_sep if we are still in an extended
|
||||
# column.
|
||||
csv[-1][-1] = @row_sep if in_extended_col
|
||||
|
||||
if in_extended_col
|
||||
# if we're at eof?(), a quoted field wasn't closed...
|
||||
if @io.eof?
|
||||
raise MalformedCSVError,
|
||||
"Unclosed quoted field on line #{lineno + 1}."
|
||||
elsif @field_size_limit and csv.last.size >= @field_size_limit
|
||||
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
||||
end
|
||||
# otherwise, we need to loop and pull some more data to complete the row
|
||||
else
|
||||
@lineno += 1
|
||||
|
||||
# save fields unconverted fields, if needed...
|
||||
|
@ -1886,15 +1919,6 @@ class CSV
|
|||
# return the results
|
||||
break csv
|
||||
end
|
||||
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
||||
if @io.eof?
|
||||
raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
|
||||
elsif parse =~ @parsers[:bad_field]
|
||||
raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
|
||||
elsif @field_size_limit and parse.length >= @field_size_limit
|
||||
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
||||
end
|
||||
# otherwise, we need to loop and pull some more data to complete the row
|
||||
end
|
||||
end
|
||||
alias_method :gets, :shift
|
||||
|
@ -2046,33 +2070,11 @@ class CSV
|
|||
esc_row_sep = escape_re(@row_sep)
|
||||
esc_quote = escape_re(@quote_char)
|
||||
@parsers = {
|
||||
# for empty leading fields
|
||||
leading_fields: encode_re("\\A(?:", esc_col_sep, ")+"),
|
||||
# The Primary Parser
|
||||
csv_row: encode_re(
|
||||
"\\G(?:\\A|", esc_col_sep, ")", # anchor the match
|
||||
"(?:", esc_quote, # find quoted fields
|
||||
"((?>[^", esc_quote, "]*)", # "unrolling the loop"
|
||||
"(?>", esc_quote * 2, # double for escaping
|
||||
"[^", esc_quote, "]*)*)",
|
||||
esc_quote,
|
||||
"|", # ... or ...
|
||||
"([^", esc_quote, esc_col_sep, "]*))", # unquoted fields
|
||||
"(?=", esc_col_sep, "|\\z)" # ensure field is ended
|
||||
),
|
||||
# a test for unescaped quotes
|
||||
bad_field: encode_re(
|
||||
"\\A", esc_col_sep, "?", # an optional comma
|
||||
"(?:", esc_quote, # a quoted field
|
||||
"(?>[^", esc_quote, "]*)", # "unrolling the loop"
|
||||
"(?>", esc_quote * 2, # double for escaping
|
||||
"[^", esc_quote, "]*)*",
|
||||
esc_quote, # the closing quote
|
||||
"[^", esc_quote, "]", # an extra character
|
||||
"|", # ... or ...
|
||||
"[^", esc_quote, esc_col_sep, "]+", # an unquoted field
|
||||
esc_quote, ")" # an extra quote
|
||||
),
|
||||
# for detecting parse errors
|
||||
quote_or_nl: encode_re("[", esc_quote, "\r\n]"),
|
||||
nl_or_lf: encode_re("[\r\n]"),
|
||||
stray_quote: encode_re( "[^", esc_quote, "]", esc_quote,
|
||||
"[^", esc_quote, "]" ),
|
||||
# safer than chomp!()
|
||||
line_end: encode_re(esc_row_sep, "\\z"),
|
||||
# illegal unquoted characters
|
||||
|
|
|
@ -115,6 +115,18 @@ class TestCSVParsing < Test::Unit::TestCase
|
|||
assert_equal(Array.new, CSV.parse_line("\n1,2,3\n"))
|
||||
end
|
||||
|
||||
def test_non_regex_edge_cases
|
||||
# An early version of the non-regex parser fails this test
|
||||
[ [ "foo,\"foo,bar,baz,foo\",\"foo\"",
|
||||
["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case|
|
||||
assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
|
||||
end
|
||||
|
||||
assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line("1,\"23\"4\"5\", 6")
|
||||
end
|
||||
end
|
||||
|
||||
def test_malformed_csv
|
||||
assert_raise(CSV::MalformedCSVError) do
|
||||
CSV.parse_line("1,2\r,3", row_sep: "\n")
|
||||
|
|
|
@ -75,6 +75,11 @@ class TestCSVInterface < Test::Unit::TestCase
|
|||
assert_equal(%w{1 2 3}, row)
|
||||
end
|
||||
|
||||
def test_parse_line_with_empty_lines
|
||||
assert_equal(nil, CSV.parse_line("")) # to signal eof
|
||||
assert_equal(Array.new, CSV.parse_line("\n1,2,3"))
|
||||
end
|
||||
|
||||
def test_read_and_readlines
|
||||
assert_equal( @expected,
|
||||
CSV.read(@path, col_sep: "\t", row_sep: "\r\n") )
|
||||
|
@ -167,7 +172,7 @@ class TestCSVInterface < Test::Unit::TestCase
|
|||
csv << lines.first.keys
|
||||
lines.each { |line| csv << line }
|
||||
end
|
||||
CSV.open( @path, "w", headers: true,
|
||||
CSV.open( @path, "r", headers: true,
|
||||
converters: :all,
|
||||
header_converters: :symbol ) do |csv|
|
||||
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
||||
|
|
|
@ -253,6 +253,8 @@ class TestCSVTable < Test::Unit::TestCase
|
|||
# with options
|
||||
assert_equal( csv.gsub(",", "|").gsub("\n", "\r\n"),
|
||||
@table.to_csv(col_sep: "|", row_sep: "\r\n") )
|
||||
assert_equal( csv.lines.to_a[1..-1].join,
|
||||
@table.to_csv(:write_headers => false) )
|
||||
|
||||
# with headers
|
||||
assert_equal(csv, @header_table.to_csv)
|
||||
|
|
Loading…
Add table
Reference in a new issue