mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Improve CSV parsing performance.
Patch by @joshpencheon (Josh Pencheon) [fix GH-1607] #### benchmark-ips results ``` trunk: Warming up -------------------------------------- 4.000 i/100ms Calculating ------------------------------------- 39.661 (±10.1%) i/s - 2.352k in 60.034781s with-patch: Warming up -------------------------------------- 5.000 i/100ms Calculating ------------------------------------- 60.521 (± 9.9%) i/s - 3.595k in 60.047157s ``` #### memory_profiler resuts ``` trunk: allocated memory by class ----------------------------------- 35588490 String 7454320 Array 294000 MatchData 37340 Regexp 11840 Hash 2400 CSV 1600 Proc 1280 Method 800 StringIO with-patch: allocated memory by class ----------------------------------- 18788490 String 3454320 Array 294000 MatchData 37340 Regexp 11840 Hash 2400 CSV 1600 Proc 1280 Method 800 StringIO ``` git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58777 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
8ffc4094a4
commit
37abc2fb7e
1 changed files with 4 additions and 4 deletions
|
@ -1876,7 +1876,7 @@ class CSV
|
||||||
# If we are continuing a previous column
|
# If we are continuing a previous column
|
||||||
if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
|
if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
|
||||||
# extended column ends
|
# extended column ends
|
||||||
csv[-1] = csv[-1].push(part[0..-2]).join("")
|
csv.last << part[0..-2]
|
||||||
if csv.last =~ @parsers[:stray_quote]
|
if csv.last =~ @parsers[:stray_quote]
|
||||||
raise MalformedCSVError,
|
raise MalformedCSVError,
|
||||||
"Missing or stray quote in line #{lineno + 1}"
|
"Missing or stray quote in line #{lineno + 1}"
|
||||||
|
@ -1884,13 +1884,13 @@ class CSV
|
||||||
csv.last.gsub!(@double_quote_char, @quote_char)
|
csv.last.gsub!(@double_quote_char, @quote_char)
|
||||||
in_extended_col = false
|
in_extended_col = false
|
||||||
else
|
else
|
||||||
csv.last.push(part, @col_sep)
|
csv.last << part << @col_sep
|
||||||
end
|
end
|
||||||
elsif part.start_with?(@quote_char)
|
elsif part.start_with?(@quote_char)
|
||||||
# If we are starting a new quoted column
|
# If we are starting a new quoted column
|
||||||
if part.count(@quote_char) % 2 != 0
|
if part.count(@quote_char) % 2 != 0
|
||||||
# start an extended column
|
# start an extended column
|
||||||
csv << [part[1..-1], @col_sep]
|
csv << (part[1..-1] << @col_sep)
|
||||||
in_extended_col = true
|
in_extended_col = true
|
||||||
elsif part.end_with?(@quote_char)
|
elsif part.end_with?(@quote_char)
|
||||||
# regular quoted column
|
# regular quoted column
|
||||||
|
@ -1933,7 +1933,7 @@ class CSV
|
||||||
if @io.eof?
|
if @io.eof?
|
||||||
raise MalformedCSVError,
|
raise MalformedCSVError,
|
||||||
"Unclosed quoted field on line #{lineno + 1}."
|
"Unclosed quoted field on line #{lineno + 1}."
|
||||||
elsif @field_size_limit and csv.last.sum(&:size) >= @field_size_limit
|
elsif @field_size_limit and csv.last.size >= @field_size_limit
|
||||||
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
||||||
end
|
end
|
||||||
# otherwise, we need to loop and pull some more data to complete the row
|
# otherwise, we need to loop and pull some more data to complete the row
|
||||||
|
|
Loading…
Reference in a new issue