1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Improve CSV parsing performance.

Patch by @joshpencheon (Josh Pencheon)
  [fix GH-1607]

  #### benchmark-ips results
  ```
  trunk:
  Warming up --------------------------------------
                         4.000  i/100ms
  Calculating -------------------------------------
                         39.661  (±10.1%) i/s -      2.352k in 60.034781s
  with-patch:
  Warming up --------------------------------------
                         5.000  i/100ms
  Calculating -------------------------------------
                         60.521  (± 9.9%) i/s -      3.595k in 60.047157s
  ```

  #### memory_profiler resuts

  ```
  trunk:
  allocated memory by class
  -----------------------------------
    35588490  String
     7454320  Array
      294000  MatchData
       37340  Regexp
       11840  Hash
        2400  CSV
        1600  Proc
        1280  Method
         800  StringIO
  with-patch:
  allocated memory by class
  -----------------------------------
    18788490  String
     3454320  Array
      294000  MatchData
       37340  Regexp
       11840  Hash
        2400  CSV
        1600  Proc
        1280  Method
         800  StringIO
  ```

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58777 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
hsbt 2017-05-18 02:42:16 +00:00
parent 8ffc4094a4
commit 37abc2fb7e

View file

@ -1876,7 +1876,7 @@ class CSV
# If we are continuing a previous column
if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
# extended column ends
csv[-1] = csv[-1].push(part[0..-2]).join("")
csv.last << part[0..-2]
if csv.last =~ @parsers[:stray_quote]
raise MalformedCSVError,
"Missing or stray quote in line #{lineno + 1}"
@ -1884,13 +1884,13 @@ class CSV
csv.last.gsub!(@double_quote_char, @quote_char)
in_extended_col = false
else
csv.last.push(part, @col_sep)
csv.last << part << @col_sep
end
elsif part.start_with?(@quote_char)
# If we are starting a new quoted column
if part.count(@quote_char) % 2 != 0
# start an extended column
csv << [part[1..-1], @col_sep]
csv << (part[1..-1] << @col_sep)
in_extended_col = true
elsif part.end_with?(@quote_char)
# regular quoted column
@ -1933,7 +1933,7 @@ class CSV
if @io.eof?
raise MalformedCSVError,
"Unclosed quoted field on line #{lineno + 1}."
elsif @field_size_limit and csv.last.sum(&:size) >= @field_size_limit
elsif @field_size_limit and csv.last.size >= @field_size_limit
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
end
# otherwise, we need to loop and pull some more data to complete the row