mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/csv.rb (CSV#init_separators): use IO#gets with length
parameter to get rid of wrong convertion. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30356 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
9a907880f0
commit
9c017ca5fb
3 changed files with 25 additions and 40 deletions
|
@ -1,4 +1,7 @@
|
||||||
Sat Dec 25 16:04:34 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Sat Dec 25 17:32:24 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
|
* lib/csv.rb (CSV#init_separators): use IO#gets with length
|
||||||
|
parameter to get rid of wrong convertion.
|
||||||
|
|
||||||
* lib/csv.rb (CSV::foreach, CSV#initialize): directly use encoding
|
* lib/csv.rb (CSV::foreach, CSV#initialize): directly use encoding
|
||||||
|
|
||||||
|
|
42
lib/csv.rb
42
lib/csv.rb
|
@ -1573,10 +1573,7 @@ class CSV
|
||||||
# if we can transcode the needed characters
|
# if we can transcode the needed characters
|
||||||
#
|
#
|
||||||
@re_esc = "\\".encode(@encoding) rescue ""
|
@re_esc = "\\".encode(@encoding) rescue ""
|
||||||
@re_chars = %w[ \\ . [ ] - ^ $ ?
|
@re_chars = /#{%"[-][\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding, fallback: proc{""})}/
|
||||||
* + { } ( ) | #
|
|
||||||
\ \r \n \t \f \v ].
|
|
||||||
map { |s| s.encode(@encoding) rescue nil }.compact
|
|
||||||
|
|
||||||
init_separators(options)
|
init_separators(options)
|
||||||
init_parsers(options)
|
init_parsers(options)
|
||||||
|
@ -2025,15 +2022,13 @@ class CSV
|
||||||
# if we run out of data, it's probably a single line
|
# if we run out of data, it's probably a single line
|
||||||
# (use a sensible default)
|
# (use a sensible default)
|
||||||
#
|
#
|
||||||
if @io.eof?
|
unless sample = @io.gets(nil, 1024)
|
||||||
@row_sep = $INPUT_RECORD_SEPARATOR
|
@row_sep = $INPUT_RECORD_SEPARATOR
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
|
|
||||||
# read ahead a bit
|
# read ahead a bit
|
||||||
sample = read_to_char(1024)
|
sample << (@io.gets(nil, 1) || "") if sample.end_with?(encode_str("\r"))
|
||||||
sample += read_to_char(1) if sample[-1..-1] == encode_str("\r") and
|
|
||||||
not @io.eof?
|
|
||||||
# try to find a standard separator
|
# try to find a standard separator
|
||||||
if sample =~ encode_re("\r\n?|\n")
|
if sample =~ encode_re("\r\n?|\n")
|
||||||
@row_sep = $&
|
@row_sep = $&
|
||||||
|
@ -2267,7 +2262,7 @@ class CSV
|
||||||
# a backslash cannot be transcoded.
|
# a backslash cannot be transcoded.
|
||||||
#
|
#
|
||||||
def escape_re(str)
|
def escape_re(str)
|
||||||
str.chars.map { |c| @re_chars.include?(c) ? @re_esc + c : c }.join('')
|
str.gsub(@re_chars) {|c| @re_esc + c}
|
||||||
end
|
end
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -2286,31 +2281,6 @@ class CSV
|
||||||
chunks.map { |chunk| chunk.encode(@encoding.name) }.join('')
|
chunks.map { |chunk| chunk.encode(@encoding.name) }.join('')
|
||||||
end
|
end
|
||||||
|
|
||||||
#
|
|
||||||
# Reads at least +bytes+ from <tt>@io</tt>, but will read up 10 bytes ahead if
|
|
||||||
# needed to ensure the data read is valid in the ecoding of that data. This
|
|
||||||
# should ensure that it is safe to use regular expressions on the read data,
|
|
||||||
# unless it is actually a broken encoding. The read data will be returned in
|
|
||||||
# <tt>@encoding</tt>.
|
|
||||||
#
|
|
||||||
def read_to_char(bytes)
|
|
||||||
return "" if @io.eof?
|
|
||||||
data = read_io(bytes)
|
|
||||||
begin
|
|
||||||
raise unless data.valid_encoding?
|
|
||||||
encoded = encode_str(data)
|
|
||||||
raise unless encoded.valid_encoding?
|
|
||||||
return encoded
|
|
||||||
rescue # encoding error or my invalid data raise
|
|
||||||
if @io.eof? or data.size >= bytes + 10
|
|
||||||
return data
|
|
||||||
else
|
|
||||||
data += read_io(1)
|
|
||||||
retry
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def raw_encoding
|
def raw_encoding
|
||||||
|
@ -2324,10 +2294,6 @@ class CSV
|
||||||
Encoding::ASCII_8BIT
|
Encoding::ASCII_8BIT
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def read_io(bytes)
|
|
||||||
@io.read(bytes).force_encoding(raw_encoding)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Another name for CSV::instance().
|
# Another name for CSV::instance().
|
||||||
|
|
|
@ -238,12 +238,28 @@ class TestCSV::Encodings < TestCSV
|
||||||
|
|
||||||
def assert_parses(fields, encoding, options = { })
|
def assert_parses(fields, encoding, options = { })
|
||||||
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
|
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
|
||||||
|
orig_fields = fields
|
||||||
fields = encode_ary(fields, encoding)
|
fields = encode_ary(fields, encoding)
|
||||||
parsed = CSV.parse(ary_to_data(fields, options), options)
|
data = ary_to_data(fields, options)
|
||||||
|
parsed = CSV.parse(data, options)
|
||||||
assert_equal(fields, parsed)
|
assert_equal(fields, parsed)
|
||||||
parsed.flatten.each_with_index do |field, i|
|
parsed.flatten.each_with_index do |field, i|
|
||||||
assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
|
assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
|
||||||
end
|
end
|
||||||
|
File.open(@temp_csv_path, "wb") {|f| f.print(data)}
|
||||||
|
CSV.open(@temp_csv_path, "rb:#{encoding}", options) do |csv|
|
||||||
|
csv.each_with_index do |row, i|
|
||||||
|
assert_equal(fields[i], row)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
begin
|
||||||
|
CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", options) do |csv|
|
||||||
|
csv.each_with_index do |row, i|
|
||||||
|
assert_equal(orig_fields[i], row)
|
||||||
|
end
|
||||||
|
end unless encoding == __ENCODING__
|
||||||
|
rescue Encoding::ConverterNotFoundError
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def encode_ary(ary, encoding)
|
def encode_ary(ary, encoding)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue