mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
[ruby/csv] parser: fix a keep bug that some texts may be dropped unexpectedly
Ruby: [Bug #18245] [ruby-core:105587]
Reported by Hassan Abdul Rehman.
5c6523da0a
This commit is contained in:
parent
56a5ae9f52
commit
4a5d372ca8
Notes:
git
2021-12-24 14:35:54 +09:00
2 changed files with 91 additions and 37 deletions
|
@ -166,6 +166,7 @@ class CSV
|
||||||
end
|
end
|
||||||
|
|
||||||
def keep_start
|
def keep_start
|
||||||
|
adjust_last_keep
|
||||||
@keeps.push([@scanner.pos, nil])
|
@keeps.push([@scanner.pos, nil])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -196,7 +197,17 @@ class CSV
|
||||||
end
|
end
|
||||||
|
|
||||||
def keep_drop
|
def keep_drop
|
||||||
@keeps.pop
|
_, buffer = @keeps.pop
|
||||||
|
return unless buffer
|
||||||
|
|
||||||
|
last_keep = @keeps.last
|
||||||
|
return unless last_keep
|
||||||
|
|
||||||
|
if last_keep[1]
|
||||||
|
last_keep[1] << buffer
|
||||||
|
else
|
||||||
|
last_keep[1] = buffer
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def rest
|
def rest
|
||||||
|
@ -204,24 +215,30 @@ class CSV
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
def adjust_last_keep
|
||||||
|
keep = @keeps.last
|
||||||
|
return if keep.nil?
|
||||||
|
|
||||||
|
keep_start = keep[0]
|
||||||
|
return if @scanner.pos == keep_start
|
||||||
|
|
||||||
|
string = @scanner.string
|
||||||
|
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
|
||||||
|
if keep_data
|
||||||
|
keep_buffer = keep[1]
|
||||||
|
if keep_buffer
|
||||||
|
keep_buffer << keep_data
|
||||||
|
else
|
||||||
|
keep[1] = keep_data.dup
|
||||||
|
end
|
||||||
|
end
|
||||||
|
keep[0] = 0
|
||||||
|
end
|
||||||
|
|
||||||
def read_chunk
|
def read_chunk
|
||||||
return false if @last_scanner
|
return false if @last_scanner
|
||||||
|
|
||||||
unless @keeps.empty?
|
adjust_last_keep
|
||||||
keep = @keeps.last
|
|
||||||
keep_start = keep[0]
|
|
||||||
string = @scanner.string
|
|
||||||
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
|
|
||||||
if keep_data
|
|
||||||
keep_buffer = keep[1]
|
|
||||||
if keep_buffer
|
|
||||||
keep_buffer << keep_data
|
|
||||||
else
|
|
||||||
keep[1] = keep_data.dup
|
|
||||||
end
|
|
||||||
end
|
|
||||||
keep[0] = 0
|
|
||||||
end
|
|
||||||
|
|
||||||
input = @inputs.first
|
input = @inputs.first
|
||||||
case input
|
case input
|
||||||
|
@ -728,28 +745,26 @@ class CSV
|
||||||
sample[0, 128].index(@quote_character)
|
sample[0, 128].index(@quote_character)
|
||||||
end
|
end
|
||||||
|
|
||||||
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
|
class UnoptimizedStringIO # :nodoc:
|
||||||
if SCANNER_TEST
|
def initialize(string)
|
||||||
class UnoptimizedStringIO
|
@io = StringIO.new(string, "rb:#{string.encoding}")
|
||||||
def initialize(string)
|
|
||||||
@io = StringIO.new(string, "rb:#{string.encoding}")
|
|
||||||
end
|
|
||||||
|
|
||||||
def gets(*args)
|
|
||||||
@io.gets(*args)
|
|
||||||
end
|
|
||||||
|
|
||||||
def each_line(*args, &block)
|
|
||||||
@io.each_line(*args, &block)
|
|
||||||
end
|
|
||||||
|
|
||||||
def eof?
|
|
||||||
@io.eof?
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
SCANNER_TEST_CHUNK_SIZE =
|
def gets(*args)
|
||||||
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
|
@io.gets(*args)
|
||||||
|
end
|
||||||
|
|
||||||
|
def each_line(*args, &block)
|
||||||
|
@io.each_line(*args, &block)
|
||||||
|
end
|
||||||
|
|
||||||
|
def eof?
|
||||||
|
@io.eof?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
|
||||||
|
if SCANNER_TEST
|
||||||
def build_scanner
|
def build_scanner
|
||||||
inputs = @samples.collect do |sample|
|
inputs = @samples.collect do |sample|
|
||||||
UnoptimizedStringIO.new(sample)
|
UnoptimizedStringIO.new(sample)
|
||||||
|
@ -759,9 +774,11 @@ class CSV
|
||||||
else
|
else
|
||||||
inputs << @input
|
inputs << @input
|
||||||
end
|
end
|
||||||
|
chunk_size =
|
||||||
|
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
|
||||||
InputsScanner.new(inputs,
|
InputsScanner.new(inputs,
|
||||||
@encoding,
|
@encoding,
|
||||||
chunk_size: SCANNER_TEST_CHUNK_SIZE)
|
chunk_size: chunk_size)
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
def build_scanner
|
def build_scanner
|
||||||
|
|
37
test/csv/parse/test_inputs_scanner.rb
Normal file
37
test/csv/parse/test_inputs_scanner.rb
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
require_relative "../helper"
|
||||||
|
|
||||||
|
class TestCSVParseInputsScanner < Test::Unit::TestCase
|
||||||
|
include Helper
|
||||||
|
|
||||||
|
def test_keep_over_chunks_nested_back
|
||||||
|
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
|
||||||
|
scanner = CSV::Parser::InputsScanner.new([input],
|
||||||
|
Encoding::UTF_8,
|
||||||
|
nil,
|
||||||
|
chunk_size: 2)
|
||||||
|
scanner.keep_start
|
||||||
|
assert_equal("abc", scanner.scan_all(/[a-c]+/))
|
||||||
|
scanner.keep_start
|
||||||
|
assert_equal("def", scanner.scan_all(/[d-f]+/))
|
||||||
|
scanner.keep_back
|
||||||
|
scanner.keep_back
|
||||||
|
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def test_keep_over_chunks_nested_drop_back
|
||||||
|
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
|
||||||
|
scanner = CSV::Parser::InputsScanner.new([input],
|
||||||
|
Encoding::UTF_8,
|
||||||
|
nil,
|
||||||
|
chunk_size: 3)
|
||||||
|
scanner.keep_start
|
||||||
|
assert_equal("ab", scanner.scan(/../))
|
||||||
|
scanner.keep_start
|
||||||
|
assert_equal("c", scanner.scan(/./))
|
||||||
|
assert_equal("d", scanner.scan(/./))
|
||||||
|
scanner.keep_drop
|
||||||
|
scanner.keep_back
|
||||||
|
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
|
||||||
|
end
|
||||||
|
end
|
Loading…
Add table
Add a link
Reference in a new issue