2007-12-23 10:06:14 -05:00
|
|
|
require 'test/unit'
|
|
|
|
require 'tmpdir'
|
2007-12-23 18:57:14 -05:00
|
|
|
require 'timeout'
|
2008-08-18 11:35:21 -04:00
|
|
|
require_relative 'envutil'
|
2007-12-23 10:06:14 -05:00
|
|
|
|
2007-12-23 11:10:36 -05:00
|
|
|
class TestIO_M17N < Test::Unit::TestCase
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS = [
|
|
|
|
Encoding::ASCII_8BIT,
|
|
|
|
Encoding::EUC_JP,
|
|
|
|
Encoding::Shift_JIS,
|
|
|
|
Encoding::UTF_8
|
|
|
|
]
|
|
|
|
|
2007-12-23 10:06:14 -05:00
|
|
|
def with_tmpdir
|
|
|
|
Dir.mktmpdir {|dir|
|
2007-12-23 12:05:40 -05:00
|
|
|
Dir.chdir(dir) {
|
|
|
|
yield dir
|
|
|
|
}
|
2007-12-23 10:06:14 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-24 06:40:37 -04:00
|
|
|
def with_pipe(*args)
|
|
|
|
r, w = IO.pipe(*args)
|
2007-12-23 12:05:40 -05:00
|
|
|
begin
|
|
|
|
yield r, w
|
|
|
|
ensure
|
|
|
|
r.close if !r.closed?
|
|
|
|
w.close if !w.closed?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:10:36 -05:00
|
|
|
def generate_file(path, content)
|
|
|
|
open(path, "wb") {|f| f.write content }
|
|
|
|
end
|
|
|
|
|
|
|
|
def encdump(str)
|
|
|
|
"#{str.dump}.force_encoding(#{str.encoding.name.dump})"
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_str_equal(expected, actual, message=nil)
|
|
|
|
full_message = build_message(message, <<EOT)
|
|
|
|
#{encdump expected} expected but not equal to
|
|
|
|
#{encdump actual}.
|
|
|
|
EOT
|
|
|
|
assert_block(full_message) { expected == actual }
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:22:34 -05:00
|
|
|
def test_open_r
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r") {|f|
|
2007-12-24 05:27:53 -05:00
|
|
|
assert_equal(Encoding.default_external, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_rb
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "rb") {|f|
|
2008-10-20 12:57:19 -04:00
|
|
|
assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding)
|
2007-12-24 05:22:34 -05:00
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_r_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r:euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-21 06:07:21 -04:00
|
|
|
def test_open_r_enc_in_opt
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r", encoding: "euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_r_enc_in_opt2
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r", external_encoding: "euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:22:34 -05:00
|
|
|
def test_open_r_enc_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r:euc-jp:utf-8") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-21 06:07:21 -04:00
|
|
|
def test_open_r_enc_enc_in_opt
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r", encoding: "euc-jp:utf-8") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_r_enc_enc_in_opt2
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:22:34 -05:00
|
|
|
def test_open_w
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w") {|f|
|
2007-12-25 02:01:35 -05:00
|
|
|
assert_equal(nil, f.external_encoding)
|
2007-12-24 05:22:34 -05:00
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:27:53 -05:00
|
|
|
def test_open_wb
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "wb") {|f|
|
2008-10-20 12:57:19 -04:00
|
|
|
assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding)
|
2007-12-24 05:27:53 -05:00
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:22:34 -05:00
|
|
|
def test_open_w_enc
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w:euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-21 06:07:21 -04:00
|
|
|
def test_open_w_enc_in_opt
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w", encoding: "euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_w_enc_in_opt2
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w", external_encoding: "euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:22:34 -05:00
|
|
|
def test_open_w_enc_enc
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w:euc-jp:utf-8") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-21 06:07:21 -04:00
|
|
|
def test_open_w_enc_enc_in_opt
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w", encoding: "euc-jp:utf-8") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_w_enc_enc_in_opt2
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-18 12:33:46 -04:00
|
|
|
def test_open_w_enc_enc_perm
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w:euc-jp:utf-8", 0600) {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-21 13:09:56 -04:00
|
|
|
def test_io_new_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("tmp", "\xa1")
|
|
|
|
fd = IO.sysopen("tmp")
|
|
|
|
f = IO.new(fd, "r:sjis")
|
|
|
|
begin
|
|
|
|
assert_equal(Encoding::Shift_JIS, f.read.encoding)
|
|
|
|
ensure
|
|
|
|
f.close
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-24 05:40:31 -04:00
|
|
|
def test_s_pipe_invalid
|
2009-10-15 02:14:16 -04:00
|
|
|
with_pipe("utf-8", "euc-jp", :invalid=>:replace) {|r, w|
|
|
|
|
w << "\x80"
|
|
|
|
w.close
|
|
|
|
assert_equal("?", r.read)
|
|
|
|
}
|
2008-08-24 05:40:31 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_pipe_undef
|
2009-10-15 02:14:16 -04:00
|
|
|
with_pipe("utf-8:euc-jp", :undef=>:replace) {|r, w|
|
|
|
|
w << "\ufffd"
|
|
|
|
w.close
|
|
|
|
assert_equal("?", r.read)
|
|
|
|
}
|
2008-08-24 05:40:31 -04:00
|
|
|
end
|
|
|
|
|
2008-09-03 14:18:10 -04:00
|
|
|
def test_s_pipe_undef_replace_string
|
2009-10-15 02:14:16 -04:00
|
|
|
with_pipe("utf-8:euc-jp", :undef=>:replace, :replace=>"X") {|r, w|
|
|
|
|
w << "\ufffd"
|
|
|
|
w.close
|
|
|
|
assert_equal("X", r.read)
|
|
|
|
}
|
2008-09-03 14:18:10 -04:00
|
|
|
end
|
|
|
|
|
2008-08-24 06:40:37 -04:00
|
|
|
def test_dup
|
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
w << "\u3042"
|
|
|
|
w.close
|
|
|
|
r2 = r.dup
|
|
|
|
begin
|
|
|
|
assert_equal("\xA4\xA2".force_encoding("euc-jp"), r2.read)
|
|
|
|
ensure
|
|
|
|
r2.close
|
|
|
|
end
|
|
|
|
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_dup_undef
|
|
|
|
with_pipe("utf-8:euc-jp", :undef=>:replace) {|r, w|
|
|
|
|
w << "\uFFFD"
|
|
|
|
w.close
|
|
|
|
r2 = r.dup
|
|
|
|
begin
|
|
|
|
assert_equal("?", r2.read)
|
|
|
|
ensure
|
|
|
|
r2.close
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:22:34 -05:00
|
|
|
def test_stdin
|
|
|
|
assert_equal(Encoding.default_external, STDIN.external_encoding)
|
|
|
|
assert_equal(nil, STDIN.internal_encoding)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_stdout
|
2007-12-25 02:01:35 -05:00
|
|
|
assert_equal(nil, STDOUT.external_encoding)
|
2007-12-24 05:22:34 -05:00
|
|
|
assert_equal(nil, STDOUT.internal_encoding)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_stderr
|
2007-12-25 02:01:35 -05:00
|
|
|
assert_equal(nil, STDERR.external_encoding)
|
2007-12-24 05:22:34 -05:00
|
|
|
assert_equal(nil, STDERR.internal_encoding)
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:10:36 -05:00
|
|
|
def test_terminator_conversion
|
2007-12-23 10:06:14 -05:00
|
|
|
with_tmpdir {
|
2007-12-23 11:10:36 -05:00
|
|
|
generate_file('tmp', "before \u00FF after")
|
2007-12-23 12:12:57 -05:00
|
|
|
s = open("tmp", "r:utf-8:iso-8859-1") {|f|
|
2007-12-23 10:06:14 -05:00
|
|
|
f.gets("\xFF".force_encoding("iso-8859-1"))
|
|
|
|
}
|
2007-12-23 12:12:57 -05:00
|
|
|
assert_equal(Encoding.find("iso-8859-1"), s.encoding)
|
2007-12-23 11:10:36 -05:00
|
|
|
assert_str_equal("before \xFF".force_encoding("iso-8859-1"), s, '[ruby-core:14288]')
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 13:22:59 -05:00
|
|
|
def test_terminator_conversion2
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "before \xA1\xA2\xA2\xA3 after")
|
|
|
|
s = open("tmp", "r:euc-jp:utf-8") {|f|
|
|
|
|
f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8"))
|
|
|
|
}
|
2007-12-23 14:55:18 -05:00
|
|
|
assert_equal(Encoding.find("utf-8"), s.encoding)
|
|
|
|
assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]')
|
2007-12-23 13:22:59 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-25 22:07:08 -05:00
|
|
|
def test_terminator_stateful_conversion
|
|
|
|
with_tmpdir {
|
|
|
|
src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
|
|
|
|
generate_file('tmp', src)
|
2008-08-17 21:40:01 -04:00
|
|
|
s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
|
|
|
|
f.gets("0".force_encoding("euc-jp"))
|
|
|
|
}
|
|
|
|
assert_equal(Encoding.find("euc-jp"), s.encoding)
|
|
|
|
assert_str_equal(src.encode("euc-jp"), s)
|
2007-12-25 22:07:08 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 20:09:37 -05:00
|
|
|
def test_nonascii_terminator
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "before \xA2\xA2 after")
|
|
|
|
open("tmp", "r:euc-jp") {|f|
|
|
|
|
assert_raise(ArgumentError) {
|
|
|
|
f.gets("\xA2\xA2".force_encoding("utf-8"))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 18:57:14 -05:00
|
|
|
def test_pipe_terminator_conversion
|
|
|
|
with_pipe("euc-jp:utf-8") {|r, w|
|
|
|
|
w.write "before \xa2\xa2 after"
|
|
|
|
rs = "\xA2\xA2".encode("utf-8", "euc-jp")
|
2007-12-24 11:36:14 -05:00
|
|
|
w.close
|
2007-12-23 18:57:14 -05:00
|
|
|
timeout(1) {
|
|
|
|
assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"),
|
|
|
|
r.gets(rs))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_pipe_conversion
|
|
|
|
with_pipe("euc-jp:utf-8") {|r, w|
|
|
|
|
w.write "\xa1\xa1"
|
|
|
|
assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_pipe_convert_partial_read
|
|
|
|
with_pipe("euc-jp:utf-8") {|r, w|
|
|
|
|
begin
|
|
|
|
t = Thread.new {
|
|
|
|
w.write "\xa1"
|
|
|
|
sleep 0.1
|
|
|
|
w.write "\xa1"
|
|
|
|
}
|
|
|
|
assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
|
|
|
|
ensure
|
|
|
|
t.join if t
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-17 00:25:56 -04:00
|
|
|
def test_getc_invalid
|
|
|
|
with_pipe("euc-jp:utf-8") {|r, w|
|
|
|
|
w << "\xa1xyz"
|
|
|
|
w.close
|
2008-09-18 04:57:33 -04:00
|
|
|
err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
|
2008-08-17 00:25:56 -04:00
|
|
|
assert_equal("\xA1".force_encoding("ascii-8bit"), err.error_bytes)
|
|
|
|
assert_equal("xyz", r.read(10))
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-01-03 09:49:38 -05:00
|
|
|
def test_getc_stateful_conversion
|
|
|
|
with_tmpdir {
|
|
|
|
src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
|
|
|
|
generate_file('tmp', src)
|
2008-08-16 13:06:35 -04:00
|
|
|
open("tmp", "r:iso-2022-jp:euc-jp") {|f|
|
|
|
|
assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
|
|
|
|
assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
|
|
|
|
}
|
2008-07-29 10:38:44 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_ungetc_stateful_conversion
|
|
|
|
with_tmpdir {
|
|
|
|
src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
|
|
|
|
generate_file('tmp', src)
|
2008-08-18 10:28:45 -04:00
|
|
|
s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
|
|
|
|
f.ungetc("0".force_encoding("euc-jp"))
|
|
|
|
f.read
|
|
|
|
}
|
|
|
|
assert_equal(Encoding.find("euc-jp"), s.encoding)
|
|
|
|
assert_str_equal("0" + src.encode("euc-jp"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_ungetc_stateful_conversion2
|
|
|
|
with_tmpdir {
|
|
|
|
src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
|
|
|
|
former = "before \e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
|
|
|
|
rs = "\e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
|
|
|
|
latter = "\e$B\x23\x31\e(B after".force_encoding("iso-2022-jp")
|
|
|
|
generate_file('tmp', src)
|
|
|
|
s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
|
|
|
|
assert_equal(former.encode("euc-jp", "iso-2022-jp"),
|
|
|
|
f.gets(rs.encode("euc-jp", "iso-2022-jp")))
|
|
|
|
f.ungetc("0")
|
|
|
|
f.read
|
|
|
|
}
|
|
|
|
assert_equal(Encoding.find("euc-jp"), s.encoding)
|
|
|
|
assert_str_equal("0" + latter.encode("euc-jp"), s)
|
2008-01-03 09:49:38 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:10:36 -05:00
|
|
|
def test_open_ascii
|
|
|
|
with_tmpdir {
|
|
|
|
src = "abc\n"
|
|
|
|
generate_file('tmp', "abc\n")
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
2007-12-23 11:10:36 -05:00
|
|
|
s = open('tmp', "r:#{enc}") {|f| f.gets }
|
|
|
|
assert_equal(enc, s.encoding)
|
|
|
|
assert_str_equal(src, s)
|
|
|
|
}
|
2007-12-23 10:06:14 -05:00
|
|
|
}
|
|
|
|
end
|
2007-12-23 11:10:36 -05:00
|
|
|
|
|
|
|
def test_open_nonascii
|
|
|
|
with_tmpdir {
|
|
|
|
src = "\xc2\xa1\n"
|
|
|
|
generate_file('tmp', src)
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
2007-12-23 11:35:43 -05:00
|
|
|
content = src.dup.force_encoding(enc)
|
2007-12-23 11:10:36 -05:00
|
|
|
s = open('tmp', "r:#{enc}") {|f| f.gets }
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:35:43 -05:00
|
|
|
def test_read_encoding
|
2007-12-23 11:10:36 -05:00
|
|
|
with_tmpdir {
|
|
|
|
src = "\xc2\xa1\n".force_encoding("ASCII-8BIT")
|
|
|
|
generate_file('tmp', "\xc2\xa1\n")
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
2007-12-23 11:35:43 -05:00
|
|
|
content = src.dup.force_encoding(enc)
|
2007-12-23 11:10:36 -05:00
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.getc
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content[0], s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.readchar
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content[0], s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.gets
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.readline
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
lines = f.readlines
|
|
|
|
assert_equal(1, lines.length)
|
|
|
|
s = lines[0]
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
f.each_line {|s|
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.read
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.read(1)
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_str_equal(src[0], s)
|
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.readpartial(1)
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_str_equal(src[0], s)
|
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.sysread(1)
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_str_equal(src[0], s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:35:43 -05:00
|
|
|
def test_write_noenc
|
2007-12-24 23:24:21 -05:00
|
|
|
src = "\xc2\xa1\n".force_encoding("ascii-8bit")
|
2007-12-23 11:35:43 -05:00
|
|
|
with_tmpdir {
|
|
|
|
open('tmp', "w") {|f|
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
2007-12-23 11:35:43 -05:00
|
|
|
f.write src.dup.force_encoding(enc)
|
|
|
|
}
|
|
|
|
}
|
2007-12-24 23:24:21 -05:00
|
|
|
open('tmp', 'r:ascii-8bit') {|f|
|
2007-12-23 12:05:40 -05:00
|
|
|
assert_equal(src*ENCS.length, f.read)
|
2007-12-23 11:35:43 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
def test_write_conversion
|
2007-12-23 11:35:43 -05:00
|
|
|
utf8 = "\u6666"
|
|
|
|
eucjp = "\xb3\xa2".force_encoding("EUC-JP")
|
|
|
|
with_tmpdir {
|
|
|
|
open('tmp', "w:EUC-JP") {|f|
|
2007-12-23 12:31:32 -05:00
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
f.print utf8
|
|
|
|
}
|
|
|
|
assert_equal(eucjp, File.read('tmp').force_encoding("EUC-JP"))
|
2007-12-23 12:12:57 -05:00
|
|
|
open('tmp', 'r:EUC-JP:UTF-8') {|f|
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
assert_equal(utf8, f.read)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
def test_pipe
|
2007-12-23 13:22:59 -05:00
|
|
|
utf8 = "\u6666"
|
|
|
|
eucjp = "\xb3\xa2".force_encoding("EUC-JP")
|
|
|
|
|
|
|
|
with_pipe {|r,w|
|
|
|
|
assert_equal(Encoding.default_external, r.external_encoding)
|
|
|
|
assert_equal(nil, r.internal_encoding)
|
|
|
|
w << utf8
|
|
|
|
w.close
|
|
|
|
s = r.read
|
|
|
|
assert_equal(Encoding.default_external, s.encoding)
|
2007-12-24 23:54:49 -05:00
|
|
|
assert_str_equal(utf8.dup.force_encoding(Encoding.default_external), s)
|
2007-12-23 13:22:59 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
with_pipe("EUC-JP") {|r,w|
|
|
|
|
assert_equal(Encoding::EUC_JP, r.external_encoding)
|
|
|
|
assert_equal(nil, r.internal_encoding)
|
|
|
|
w << eucjp
|
|
|
|
w.close
|
|
|
|
assert_equal(eucjp, r.read)
|
|
|
|
}
|
|
|
|
|
2009-11-21 02:03:06 -05:00
|
|
|
with_pipe("UTF-8") {|r,w|
|
|
|
|
w << "a" * 1023 + "\u3042" + "a" * 1022
|
|
|
|
w.close
|
|
|
|
assert_equal(true, r.read.valid_encoding?)
|
|
|
|
}
|
|
|
|
|
2007-12-23 13:22:59 -05:00
|
|
|
with_pipe("UTF-8:EUC-JP") {|r,w|
|
|
|
|
assert_equal(Encoding::UTF_8, r.external_encoding)
|
|
|
|
assert_equal(Encoding::EUC_JP, r.internal_encoding)
|
|
|
|
w << utf8
|
|
|
|
w.close
|
|
|
|
assert_equal(eucjp, r.read)
|
|
|
|
}
|
|
|
|
|
2009-10-15 02:14:16 -04:00
|
|
|
e = assert_raise(ArgumentError) {with_pipe("UTF-8", "UTF-8".encode("UTF-32BE")) {}}
|
|
|
|
assert_match(/invalid name encoding/, e.message)
|
|
|
|
e = assert_raise(ArgumentError) {with_pipe("UTF-8".encode("UTF-32BE")) {}}
|
|
|
|
assert_match(/invalid name encoding/, e.message)
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
|
|
|
with_pipe(enc) {|r, w|
|
|
|
|
w << "\xc2\xa1"
|
|
|
|
w.close
|
2009-03-05 22:56:38 -05:00
|
|
|
s = r.getc
|
2007-12-23 12:05:40 -05:00
|
|
|
assert_equal(enc, s.encoding)
|
|
|
|
}
|
|
|
|
}
|
2007-12-23 14:15:48 -05:00
|
|
|
|
2007-12-23 14:55:18 -05:00
|
|
|
ENCS.each {|enc|
|
|
|
|
next if enc == Encoding::ASCII_8BIT
|
|
|
|
next if enc == Encoding::UTF_8
|
2007-12-23 14:15:48 -05:00
|
|
|
with_pipe("#{enc}:UTF-8") {|r, w|
|
|
|
|
w << "\xc2\xa1"
|
|
|
|
w.close
|
|
|
|
s = r.read
|
|
|
|
assert_equal(Encoding::UTF_8, s.encoding)
|
|
|
|
assert_equal(s.encode("UTF-8"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
end
|
|
|
|
|
2008-01-24 01:38:57 -05:00
|
|
|
def test_marshal
|
|
|
|
with_pipe("EUC-JP") {|r, w|
|
|
|
|
data = 56225
|
|
|
|
Marshal.dump(data, w)
|
|
|
|
w.close
|
|
|
|
result = nil
|
2008-01-24 02:35:49 -05:00
|
|
|
assert_nothing_raised("[ruby-dev:33264]") { result = Marshal.load(r) }
|
2008-01-24 01:38:57 -05:00
|
|
|
assert_equal(data, result)
|
|
|
|
}
|
|
|
|
end
|
2008-01-25 10:22:52 -05:00
|
|
|
|
|
|
|
def test_gets_nil
|
|
|
|
with_pipe("UTF-8:EUC-JP") {|r, w|
|
|
|
|
w << "\u{3042}"
|
|
|
|
w.close
|
|
|
|
result = r.gets(nil)
|
|
|
|
assert_equal("\u{3042}".encode("euc-jp"), result)
|
|
|
|
}
|
|
|
|
end
|
2008-02-29 09:36:39 -05:00
|
|
|
|
2008-08-17 15:41:39 -04:00
|
|
|
def test_gets_limit
|
2008-08-17 20:41:54 -04:00
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
2008-08-17 15:41:39 -04:00
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(1))
|
|
|
|
}
|
2008-08-17 20:41:54 -04:00
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
2008-08-17 15:41:39 -04:00
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(2))
|
|
|
|
}
|
2008-08-17 20:41:54 -04:00
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
2008-08-17 15:41:39 -04:00
|
|
|
assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(3))
|
|
|
|
}
|
2008-08-17 20:41:54 -04:00
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
2008-08-17 15:41:39 -04:00
|
|
|
assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(4))
|
|
|
|
}
|
2008-08-17 20:41:54 -04:00
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
|
|
|
assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(5))
|
|
|
|
}
|
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
|
|
|
assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(6))
|
|
|
|
}
|
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
|
|
|
assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(7))
|
|
|
|
}
|
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
|
|
|
assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(8))
|
|
|
|
}
|
|
|
|
with_pipe("euc-jp") {|r, w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close
|
|
|
|
assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(9))
|
|
|
|
}
|
2008-08-17 15:41:39 -04:00
|
|
|
end
|
|
|
|
|
2008-08-17 23:13:53 -04:00
|
|
|
def test_gets_invalid
|
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
before = "\u{3042}\u{3044}"
|
|
|
|
invalid = "\x80".force_encoding("utf-8")
|
|
|
|
after = "\u{3046}\u{3048}"
|
|
|
|
w << before + invalid + after
|
|
|
|
w.close
|
2008-09-18 04:57:33 -04:00
|
|
|
err = assert_raise(Encoding::InvalidByteSequenceError) { r.gets }
|
2008-08-17 23:13:53 -04:00
|
|
|
assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
|
|
|
|
assert_equal(after.encode("euc-jp"), r.gets)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-21 06:07:21 -04:00
|
|
|
def test_getc_invalid2
|
2008-08-17 23:13:53 -04:00
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
before1 = "\u{3042}"
|
|
|
|
before2 = "\u{3044}"
|
|
|
|
invalid = "\x80".force_encoding("utf-8")
|
|
|
|
after1 = "\u{3046}"
|
|
|
|
after2 = "\u{3048}"
|
|
|
|
w << before1 + before2 + invalid + after1 + after2
|
|
|
|
w.close
|
|
|
|
assert_equal(before1.encode("euc-jp"), r.getc)
|
|
|
|
assert_equal(before2.encode("euc-jp"), r.getc)
|
2008-09-18 04:57:33 -04:00
|
|
|
err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
|
2008-08-17 23:13:53 -04:00
|
|
|
assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
|
|
|
|
assert_equal(after1.encode("euc-jp"), r.getc)
|
|
|
|
assert_equal(after2.encode("euc-jp"), r.getc)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-21 06:07:21 -04:00
|
|
|
def test_getc_invalid3
|
2008-10-28 08:01:46 -04:00
|
|
|
with_pipe("utf-16le:euc-jp", binmode: true) {|r, w|
|
2008-08-17 23:13:53 -04:00
|
|
|
before1 = "\x42\x30".force_encoding("utf-16le")
|
|
|
|
before2 = "\x44\x30".force_encoding("utf-16le")
|
|
|
|
invalid = "\x00\xd8".force_encoding("utf-16le")
|
|
|
|
after1 = "\x46\x30".force_encoding("utf-16le")
|
|
|
|
after2 = "\x48\x30".force_encoding("utf-16le")
|
|
|
|
w << before1 + before2 + invalid + after1 + after2
|
|
|
|
w.close
|
|
|
|
assert_equal(before1.encode("euc-jp"), r.getc)
|
|
|
|
assert_equal(before2.encode("euc-jp"), r.getc)
|
2008-09-18 04:57:33 -04:00
|
|
|
err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
|
2008-08-17 23:13:53 -04:00
|
|
|
assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
|
|
|
|
assert_equal(after1.encode("euc-jp"), r.getc)
|
|
|
|
assert_equal(after2.encode("euc-jp"), r.getc)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_read_all
|
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
str = "\u3042\u3044"
|
|
|
|
w << str
|
|
|
|
w.close
|
|
|
|
assert_equal(str.encode("euc-jp"), r.read)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_read_all_invalid
|
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
before = "\u{3042}\u{3044}"
|
|
|
|
invalid = "\x80".force_encoding("utf-8")
|
|
|
|
after = "\u{3046}\u{3048}"
|
|
|
|
w << before + invalid + after
|
|
|
|
w.close
|
2008-09-18 04:57:33 -04:00
|
|
|
err = assert_raise(Encoding::InvalidByteSequenceError) { r.read }
|
2008-08-17 23:13:53 -04:00
|
|
|
assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
|
|
|
|
assert_equal(after.encode("euc-jp"), r.read)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-02-29 09:36:39 -05:00
|
|
|
def test_file_foreach
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tst', 'a' * 8191 + "\xa1\xa1")
|
|
|
|
assert_nothing_raised {
|
|
|
|
File.foreach('tst', :encoding=>"euc-jp") {|line| line.inspect }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
2008-08-18 04:24:49 -04:00
|
|
|
|
|
|
|
def test_set_encoding
|
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
s = "\u3042".force_encoding("ascii-8bit")
|
|
|
|
s << "\x82\xa0".force_encoding("ascii-8bit")
|
|
|
|
w << s
|
|
|
|
w.close
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
|
|
|
|
r.set_encoding("shift_jis:euc-jp")
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_set_encoding2
|
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
s = "\u3042".force_encoding("ascii-8bit")
|
|
|
|
s << "\x82\xa0".force_encoding("ascii-8bit")
|
|
|
|
w << s
|
|
|
|
w.close
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
|
|
|
|
r.set_encoding("shift_jis", "euc-jp")
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_set_encoding_nil
|
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
s = "\u3042".force_encoding("ascii-8bit")
|
|
|
|
s << "\x82\xa0".force_encoding("ascii-8bit")
|
|
|
|
w << s
|
|
|
|
w.close
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
|
|
|
|
r.set_encoding(nil)
|
|
|
|
assert_equal("\x82\xa0".force_encoding(Encoding.default_external), r.read)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_set_encoding_enc
|
|
|
|
with_pipe("utf-8:euc-jp") {|r, w|
|
|
|
|
s = "\u3042".force_encoding("ascii-8bit")
|
|
|
|
s << "\x82\xa0".force_encoding("ascii-8bit")
|
|
|
|
w << s
|
|
|
|
w.close
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
|
|
|
|
r.set_encoding(Encoding::Shift_JIS)
|
|
|
|
assert_equal("\x82\xa0".force_encoding(Encoding::Shift_JIS), r.getc)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-24 05:40:31 -04:00
|
|
|
def test_set_encoding_invalid
|
|
|
|
with_pipe {|r, w|
|
|
|
|
w << "\x80"
|
|
|
|
w.close
|
|
|
|
r.set_encoding("utf-8:euc-jp", :invalid=>:replace)
|
|
|
|
assert_equal("?", r.read)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_set_encoding_undef
|
|
|
|
with_pipe {|r, w|
|
|
|
|
w << "\ufffd"
|
|
|
|
w.close
|
|
|
|
r.set_encoding("utf-8", "euc-jp", :undef=>:replace)
|
|
|
|
assert_equal("?", r.read)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-09-03 14:22:21 -04:00
|
|
|
def test_set_encoding_undef_replace
|
|
|
|
with_pipe {|r, w|
|
|
|
|
w << "\ufffd"
|
|
|
|
w.close
|
|
|
|
r.set_encoding("utf-8", "euc-jp", :undef=>:replace, :replace=>"ZZZ")
|
|
|
|
assert_equal("ZZZ", r.read)
|
|
|
|
}
|
|
|
|
with_pipe {|r, w|
|
|
|
|
w << "\ufffd"
|
|
|
|
w.close
|
|
|
|
r.set_encoding("utf-8:euc-jp", :undef=>:replace, :replace=>"ZZZ")
|
|
|
|
assert_equal("ZZZ", r.read)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-09-12 13:58:58 -04:00
|
|
|
def test_set_encoding_binmode
|
|
|
|
assert_raise(ArgumentError) {
|
2008-09-16 13:00:08 -04:00
|
|
|
open(__FILE__, "rt") {|f|
|
2008-09-12 13:58:58 -04:00
|
|
|
f.set_encoding("iso-2022-jp")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert_raise(ArgumentError) {
|
2008-09-16 13:00:08 -04:00
|
|
|
open(__FILE__, "r") {|f|
|
2008-09-12 13:58:58 -04:00
|
|
|
f.set_encoding("iso-2022-jp")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert_nothing_raised {
|
2008-09-16 13:00:08 -04:00
|
|
|
open(__FILE__, "rb") {|f|
|
2008-09-12 13:58:58 -04:00
|
|
|
f.set_encoding("iso-2022-jp")
|
|
|
|
}
|
|
|
|
}
|
2008-09-12 14:00:08 -04:00
|
|
|
assert_nothing_raised {
|
2008-09-16 13:00:08 -04:00
|
|
|
open(__FILE__, "r") {|f|
|
2008-09-12 14:00:08 -04:00
|
|
|
f.binmode
|
|
|
|
f.set_encoding("iso-2022-jp")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert_nothing_raised {
|
2008-09-16 13:00:08 -04:00
|
|
|
open(__FILE__, "rt") {|f|
|
2008-09-12 14:00:08 -04:00
|
|
|
f.binmode
|
|
|
|
f.set_encoding("iso-2022-jp")
|
|
|
|
}
|
|
|
|
}
|
2008-09-12 13:58:58 -04:00
|
|
|
end
|
|
|
|
|
2008-08-18 08:06:42 -04:00
|
|
|
def test_write_conversion_fixenc
|
|
|
|
with_pipe {|r, w|
|
|
|
|
w.set_encoding("iso-2022-jp:utf-8")
|
|
|
|
t = Thread.new { r.read.force_encoding("ascii-8bit") }
|
|
|
|
w << "\u3042"
|
|
|
|
w << "\u3044"
|
|
|
|
w.close
|
|
|
|
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_write_conversion_anyenc_stateful
|
|
|
|
with_pipe {|r, w|
|
|
|
|
w.set_encoding("iso-2022-jp")
|
|
|
|
t = Thread.new { r.read.force_encoding("ascii-8bit") }
|
|
|
|
w << "\u3042"
|
|
|
|
w << "\x82\xa2".force_encoding("sjis")
|
|
|
|
w.close
|
|
|
|
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_write_conversion_anyenc_stateless
|
|
|
|
with_pipe {|r, w|
|
|
|
|
w.set_encoding("euc-jp")
|
|
|
|
t = Thread.new { r.read.force_encoding("ascii-8bit") }
|
|
|
|
w << "\u3042"
|
|
|
|
w << "\x82\xa2".force_encoding("sjis")
|
|
|
|
w.close
|
|
|
|
assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"), t.value)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_write_conversion_anyenc_stateful_nosync
|
|
|
|
with_pipe {|r, w|
|
|
|
|
w.sync = false
|
|
|
|
w.set_encoding("iso-2022-jp")
|
|
|
|
t = Thread.new { r.read.force_encoding("ascii-8bit") }
|
|
|
|
w << "\u3042"
|
|
|
|
w << "\x82\xa2".force_encoding("sjis")
|
|
|
|
w.close
|
|
|
|
assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), t.value)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-27 09:39:25 -04:00
|
|
|
def test_read_stateful
|
|
|
|
with_pipe("euc-jp:iso-2022-jp") {|r, w|
|
|
|
|
w << "\xA4\xA2"
|
|
|
|
w.close
|
|
|
|
assert_equal("\e$B$\"\e(B".force_encoding("iso-2022-jp"), r.read)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-18 11:35:21 -04:00
|
|
|
def test_stdin_external_encoding_with_reopen
|
|
|
|
with_tmpdir {
|
|
|
|
open("tst", "w+") {|f|
|
|
|
|
pid = spawn(EnvUtil.rubybin, '-e', <<-'End', 10=>f)
|
|
|
|
io = IO.new(10, "r+")
|
|
|
|
STDIN.reopen(io)
|
|
|
|
STDIN.external_encoding
|
|
|
|
STDIN.write "\u3042"
|
2009-03-05 22:56:38 -05:00
|
|
|
STDIN.flush
|
2008-08-18 11:35:21 -04:00
|
|
|
End
|
|
|
|
Process.wait pid
|
|
|
|
f.rewind
|
|
|
|
result = f.read.force_encoding("ascii-8bit")
|
|
|
|
assert_equal("\u3042".force_encoding("ascii-8bit"), result)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-21 06:07:21 -04:00
|
|
|
def test_popen_r_enc
|
|
|
|
IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
s = f.read
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_popen_r_enc_in_opt
|
|
|
|
IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", encoding: "ascii-8bit") {|f|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
s = f.read
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_popen_r_enc_in_opt2
|
|
|
|
IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", external_encoding: "ascii-8bit") {|f|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
s = f.read
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_popen_r_enc_enc
|
|
|
|
IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r:shift_jis:euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::Shift_JIS, f.external_encoding)
|
|
|
|
assert_equal(Encoding::EUC_JP, f.internal_encoding)
|
|
|
|
s = f.read
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_popen_r_enc_enc_in_opt
|
|
|
|
IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", encoding: "shift_jis:euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::Shift_JIS, f.external_encoding)
|
|
|
|
assert_equal(Encoding::EUC_JP, f.internal_encoding)
|
|
|
|
s = f.read
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_popen_r_enc_enc_in_opt2
|
|
|
|
IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::Shift_JIS, f.external_encoding)
|
|
|
|
assert_equal(Encoding::EUC_JP, f.internal_encoding)
|
|
|
|
s = f.read
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_popenv_r_enc_enc_in_opt2
|
|
|
|
IO.popen([EnvUtil.rubybin, "-e", "putc 0xa1"], "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::Shift_JIS, f.external_encoding)
|
|
|
|
assert_equal(Encoding::EUC_JP, f.internal_encoding)
|
|
|
|
s = f.read
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_pipe_r_enc
|
|
|
|
open("|#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
s = f.read
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\xff")
|
|
|
|
IO.foreach("t", :mode => "r:ascii-8bit") {|s|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_enc_in_opt
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\xff")
|
|
|
|
IO.foreach("t", :encoding => "ascii-8bit") {|s|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_enc_in_opt2
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\xff")
|
|
|
|
IO.foreach("t", :external_encoding => "ascii-8bit") {|s|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_enc_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\u3042")
|
|
|
|
IO.foreach("t", :mode => "r:utf-8:euc-jp") {|s|
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_enc_enc_in_opt
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\u3042")
|
|
|
|
IO.foreach("t", :mode => "r", :encoding => "utf-8:euc-jp") {|s|
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_enc_enc_in_opt2
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\u3042")
|
|
|
|
IO.foreach("t", :mode => "r", :external_encoding => "utf-8", :internal_encoding => "euc-jp") {|s|
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_open_args_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\xff")
|
|
|
|
IO.foreach("t", :open_args => ["r:ascii-8bit"]) {|s|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_open_args_enc_in_opt
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\xff")
|
|
|
|
IO.foreach("t", :open_args => ["r", encoding: "ascii-8bit"]) {|s|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_open_args_enc_in_opt2
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\xff")
|
|
|
|
IO.foreach("t", :open_args => ["r", external_encoding: "ascii-8bit"]) {|s|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_equal("\xff".force_encoding("ascii-8bit"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_open_args_enc_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\u3042")
|
|
|
|
IO.foreach("t", :open_args => ["r:utf-8:euc-jp"]) {|s|
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_open_args_enc_enc_in_opt
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\u3042")
|
|
|
|
IO.foreach("t", :open_args => ["r", encoding: "utf-8:euc-jp"]) {|s|
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_s_foreach_open_args_enc_enc_in_opt2
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "\u3042")
|
|
|
|
IO.foreach("t", :open_args => ["r", external_encoding: "utf-8", internal_encoding: "euc-jp"]) {|s|
|
|
|
|
assert_equal(Encoding::EUC_JP, s.encoding)
|
|
|
|
assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-22 22:23:42 -04:00
|
|
|
def test_both_textmode_binmode
|
|
|
|
assert_raise(ArgumentError) { open("not-exist", "r", :textmode=>true, :binmode=>true) }
|
|
|
|
end
|
|
|
|
|
2008-08-22 12:44:00 -04:00
|
|
|
def test_textmode_decode_universal_newline_read
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t.crlf", "a\r\nb\r\nc\r\n")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8"))
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt"))
|
2008-08-22 22:23:42 -04:00
|
|
|
open("t.crlf", "rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n", f.read) }
|
|
|
|
open("t.crlf", "rt") {|f| assert_equal("a\nb\nc\n", f.read) }
|
|
|
|
open("t.crlf", "r", :textmode=>true) {|f| assert_equal("a\nb\nc\n", f.read) }
|
2008-08-22 12:44:00 -04:00
|
|
|
|
|
|
|
generate_file("t.cr", "a\rb\rc\r")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
|
|
|
|
|
|
|
|
generate_file("t.lf", "a\nb\nc\n")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_textmode_decode_universal_newline_getc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t.crlf", "a\r\nb\r\nc\r\n")
|
|
|
|
open("t.crlf", "rt") {|f|
|
|
|
|
assert_equal("a", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal("b", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal("c", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal(nil, f.getc)
|
|
|
|
}
|
|
|
|
|
|
|
|
generate_file("t.cr", "a\rb\rc\r")
|
|
|
|
open("t.cr", "rt") {|f|
|
|
|
|
assert_equal("a", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal("b", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal("c", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal(nil, f.getc)
|
|
|
|
}
|
|
|
|
|
|
|
|
generate_file("t.lf", "a\nb\nc\n")
|
|
|
|
open("t.lf", "rt") {|f|
|
|
|
|
assert_equal("a", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal("b", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal("c", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal(nil, f.getc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_textmode_decode_universal_newline_gets
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t.crlf", "a\r\nb\r\nc\r\n")
|
|
|
|
open("t.crlf", "rt") {|f|
|
|
|
|
assert_equal("a\n", f.gets)
|
|
|
|
assert_equal("b\n", f.gets)
|
|
|
|
assert_equal("c\n", f.gets)
|
|
|
|
assert_equal(nil, f.gets)
|
|
|
|
}
|
|
|
|
|
|
|
|
generate_file("t.cr", "a\rb\rc\r")
|
|
|
|
open("t.cr", "rt") {|f|
|
|
|
|
assert_equal("a\n", f.gets)
|
|
|
|
assert_equal("b\n", f.gets)
|
|
|
|
assert_equal("c\n", f.gets)
|
|
|
|
assert_equal(nil, f.gets)
|
|
|
|
}
|
|
|
|
|
|
|
|
generate_file("t.lf", "a\nb\nc\n")
|
|
|
|
open("t.lf", "rt") {|f|
|
|
|
|
assert_equal("a\n", f.gets)
|
|
|
|
assert_equal("b\n", f.gets)
|
|
|
|
assert_equal("c\n", f.gets)
|
|
|
|
assert_equal(nil, f.gets)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_textmode_decode_universal_newline_utf16
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8"))
|
|
|
|
|
|
|
|
generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8"))
|
|
|
|
|
|
|
|
generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8"))
|
|
|
|
|
|
|
|
generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8"))
|
|
|
|
|
|
|
|
generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8"))
|
|
|
|
|
|
|
|
generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0")
|
|
|
|
assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8"))
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-26 08:55:14 -04:00
|
|
|
SYSTEM_NEWLINE = []
|
2008-08-22 12:44:00 -04:00
|
|
|
def system_newline
|
2008-08-26 08:55:14 -04:00
|
|
|
return SYSTEM_NEWLINE.first if !SYSTEM_NEWLINE.empty?
|
|
|
|
with_tmpdir {
|
|
|
|
open("newline", "wt") {|f|
|
|
|
|
f.print "\n"
|
|
|
|
}
|
|
|
|
open("newline", "rb") {|f|
|
|
|
|
SYSTEM_NEWLINE << f.read
|
|
|
|
}
|
|
|
|
}
|
|
|
|
SYSTEM_NEWLINE.first
|
2008-08-22 12:44:00 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def test_textmode_encode_newline
|
|
|
|
with_tmpdir {
|
|
|
|
open("t.txt", "wt") {|f|
|
|
|
|
f.puts "abc"
|
|
|
|
f.puts "def"
|
|
|
|
}
|
|
|
|
content = File.read("t.txt", :mode=>"rb")
|
|
|
|
nl = system_newline
|
|
|
|
assert_equal("abc#{nl}def#{nl}", content)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-26 08:55:14 -04:00
|
|
|
def test_textmode_encode_newline_enc
|
|
|
|
with_tmpdir {
|
|
|
|
open("t.txt", "wt:euc-jp") {|f|
|
|
|
|
f.puts "abc\u3042"
|
|
|
|
f.puts "def\u3044"
|
|
|
|
}
|
|
|
|
content = File.read("t.txt", :mode=>"rb:ascii-8bit")
|
|
|
|
nl = system_newline
|
|
|
|
assert_equal("abc\xA4\xA2#{nl}def\xA4\xA4#{nl}", content)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-09-09 10:56:55 -04:00
|
|
|
def test_read_newline_conversion_with_encoding_conversion
|
2008-09-05 18:26:39 -04:00
|
|
|
with_tmpdir {
|
2008-09-09 10:56:55 -04:00
|
|
|
generate_file("t.utf8.crlf", "a\r\nb\r\n")
|
|
|
|
open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f|
|
|
|
|
content = f.read
|
|
|
|
assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
|
2008-09-05 18:26:39 -04:00
|
|
|
}
|
2008-09-09 10:56:55 -04:00
|
|
|
open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
|
|
|
|
content = f.read
|
|
|
|
assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
|
2008-09-05 18:26:39 -04:00
|
|
|
}
|
2008-09-09 10:56:55 -04:00
|
|
|
open("t.utf8.crlf", "r:utf-8:utf-16be") {|f|
|
|
|
|
content = f.read
|
|
|
|
if system_newline == "\n"
|
|
|
|
assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
|
|
|
|
else
|
|
|
|
assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
|
|
|
|
end
|
2008-09-05 18:26:39 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-09-09 10:56:55 -04:00
|
|
|
def test_read_newline_conversion_without_encoding_conversion
|
2008-08-26 08:55:14 -04:00
|
|
|
with_tmpdir {
|
2008-09-05 18:26:39 -04:00
|
|
|
generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n")
|
|
|
|
open("t.utf16.crlf", "rb:utf-16be") {|f|
|
2008-08-26 08:55:14 -04:00
|
|
|
content = f.read
|
|
|
|
assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
|
|
|
|
content)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-09-09 10:56:55 -04:00
|
|
|
def test_read_newline_conversion_error
|
2008-08-26 08:55:14 -04:00
|
|
|
with_tmpdir {
|
2008-09-09 10:56:55 -04:00
|
|
|
generate_file("empty.txt", "")
|
|
|
|
# ascii incompatible encoding without conversion needs binmode.
|
2008-09-05 18:26:39 -04:00
|
|
|
assert_raise(ArgumentError) {
|
2008-09-09 10:56:55 -04:00
|
|
|
open("empty.txt", "rt:utf-16be") {|f| }
|
2008-09-05 18:26:39 -04:00
|
|
|
}
|
|
|
|
assert_raise(ArgumentError) {
|
2008-09-09 10:56:55 -04:00
|
|
|
open("empty.txt", "r:utf-16be") {|f| }
|
2008-09-05 18:26:39 -04:00
|
|
|
}
|
2008-09-09 10:56:55 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_read_mode
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t", "a\rb\r\nc\n\xc2\xa2")
|
|
|
|
generate_file("ie", "a\rb\r\nc\n\e$B\x42\x22\e(B")
|
|
|
|
generate_file("iu", "a\rb\r\nc\n\e$B\x21\x71\e(B")
|
|
|
|
generate_file("be", "\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35")
|
|
|
|
generate_file("bu", "\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2")
|
|
|
|
# "\xc2\xa2" is valid as EUC-JP and UTF-8
|
|
|
|
# EUC-JP UTF-8 Unicode
|
|
|
|
# 0xC2A2 0xE894B5 U+8535
|
|
|
|
# 0xA1F1 0xC2A2 U+00A2
|
|
|
|
|
|
|
|
open("t","rt") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) }
|
2008-10-20 12:57:19 -04:00
|
|
|
open("t","rb") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding(Encoding::ASCII_8BIT), f.read) }
|
2008-09-09 10:56:55 -04:00
|
|
|
|
|
|
|
open("t","rt:euc-jp") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
|
|
|
|
open("t","rb:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
|
|
|
|
open("t","rt:utf-8") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
|
|
|
|
open("t","rb:utf-8") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
|
|
|
|
assert_raise(ArgumentError) { open("t", "rt:iso-2022-jp") {|f| } }
|
|
|
|
open("t","rb:iso-2022-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("ISO-2022-JP"), f.read) }
|
|
|
|
|
|
|
|
open("t","rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n\u8535", f.read) }
|
|
|
|
open("t","rt:utf-8:euc-jp") {|f| assert_equal("a\nb\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
|
|
|
|
open("t","rb:euc-jp:utf-8") {|f| assert_equal("a\rb\r\nc\n\u8535", f.read) }
|
|
|
|
open("t","rb:utf-8:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
|
|
|
|
|
|
|
|
open("t","rt:euc-jp:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"), f.read) }
|
|
|
|
open("t","rt:utf-8:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"), f.read) }
|
|
|
|
open("t","rt:euc-jp:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"), f.read) }
|
|
|
|
open("t","rt:utf-8:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"), f.read) }
|
|
|
|
open("t","rb:euc-jp:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
|
|
|
|
open("t","rb:utf-8:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"),f.read)}
|
|
|
|
open("t","rb:euc-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
|
|
|
|
open("t","rb:utf-8:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"),f.read)}
|
|
|
|
|
|
|
|
open("ie","rt:iso-2022-jp:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
|
|
|
|
open("iu","rt:iso-2022-jp:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
|
|
|
|
open("be","rt:utf-16be:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
|
|
|
|
open("bu","rt:utf-16be:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
|
|
|
|
open("ie","rb:iso-2022-jp:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
|
|
|
|
open("iu","rb:iso-2022-jp:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
|
|
|
|
open("be","rb:utf-16be:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
|
|
|
|
open("bu","rb:utf-16be:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
|
|
|
|
|
|
|
|
open("ie","rt:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
|
|
|
|
open("be","rt:utf-16be:iso-2022-jp"){|f|assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
|
|
|
|
open("ie","rb:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
|
|
|
|
open("be","rb:utf-16be:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_write(expected, mode, *args)
|
|
|
|
with_tmpdir {
|
|
|
|
open("t", mode) {|f|
|
|
|
|
args.each {|arg| f.print arg }
|
|
|
|
}
|
|
|
|
content = File.read("t", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal(expected.dup.force_encoding("ascii-8bit"),
|
2009-03-05 22:56:38 -05:00
|
|
|
content.force_encoding("ascii-8bit"))
|
2008-09-09 10:56:55 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_write_mode
|
|
|
|
# "\xc2\xa2" is valid as EUC-JP and UTF-8
|
|
|
|
# EUC-JP UTF-8 Unicode
|
|
|
|
# 0xC2A2 0xE894B5 U+8535
|
|
|
|
# 0xA1F1 0xC2A2 U+00A2
|
|
|
|
a = "a\rb\r\nc\n"
|
|
|
|
e = "\xc2\xa2".force_encoding("euc-jp")
|
|
|
|
u8 = "\xc2\xa2".force_encoding("utf-8")
|
|
|
|
u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
|
|
|
|
i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
|
|
|
|
n = system_newline
|
|
|
|
un = n.encode("utf-16be").force_encoding("ascii-8bit")
|
|
|
|
|
|
|
|
assert_write("a\rb\r#{n}c#{n}", "wt", a)
|
|
|
|
assert_write("\xc2\xa2", "wt", e)
|
|
|
|
assert_write("\xc2\xa2", "wt", u8)
|
|
|
|
|
|
|
|
assert_write("a\rb\r\nc\n", "wb", a)
|
|
|
|
assert_write("\xc2\xa2", "wb", e)
|
|
|
|
assert_write("\xc2\xa2", "wb", u8)
|
|
|
|
|
|
|
|
#assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wt", u16) should raise
|
|
|
|
#assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wt", i) should raise
|
|
|
|
assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb", u16)
|
|
|
|
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb", i)
|
|
|
|
|
|
|
|
t_write_mode_enc
|
|
|
|
t_write_mode_enc(":utf-8")
|
|
|
|
end
|
|
|
|
|
|
|
|
def t_write_mode_enc(enc="")
|
|
|
|
# "\xc2\xa2" is valid as EUC-JP and UTF-8
|
|
|
|
# EUC-JP UTF-8 Unicode
|
|
|
|
# 0xC2A2 0xE894B5 U+8535
|
|
|
|
# 0xA1F1 0xC2A2 U+00A2
|
|
|
|
a = "a\rb\r\nc\n"
|
|
|
|
e = "\xc2\xa2".force_encoding("euc-jp")
|
|
|
|
u8 = "\xc2\xa2".force_encoding("utf-8")
|
|
|
|
u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
|
|
|
|
i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
|
|
|
|
n = system_newline
|
|
|
|
un = n.encode("utf-16be").force_encoding("ascii-8bit")
|
|
|
|
|
|
|
|
assert_write("a\rb\r#{n}c#{n}", "wt:euc-jp#{enc}", a)
|
|
|
|
assert_write("\xc2\xa2", "wt:euc-jp#{enc}", e)
|
|
|
|
assert_write("\xa1\xf1", "wt:euc-jp#{enc}", u8)
|
|
|
|
|
|
|
|
assert_write("a\rb\r\nc\n", "wb:euc-jp#{enc}", a)
|
|
|
|
assert_write("\xc2\xa2", "wb:euc-jp#{enc}", e)
|
|
|
|
assert_write("\xa1\xf1", "wb:euc-jp#{enc}", u8)
|
|
|
|
|
|
|
|
assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", u16)
|
|
|
|
assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", i)
|
|
|
|
assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", u16)
|
|
|
|
assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", i)
|
|
|
|
|
|
|
|
assert_write("\0a\0\r\0b\0\r#{un}\0c#{un}", "wt:utf-16be#{enc}", a)
|
|
|
|
assert_write("\x85\x35", "wt:utf-16be#{enc}", e)
|
|
|
|
assert_write("\x00\xa2", "wt:utf-16be#{enc}", u8)
|
|
|
|
assert_write("a\rb\r#{n}c#{n}", "wt:iso-2022-jp#{enc}", a)
|
|
|
|
assert_write("\e$B\x42\x22\e(B", "wt:iso-2022-jp#{enc}", e)
|
|
|
|
assert_write("\e$B\x21\x71\e(B", "wt:iso-2022-jp#{enc}", u8)
|
|
|
|
|
|
|
|
assert_write("\0a\0\r\0b\0\r\0\n\0c\0\n", "wb:utf-16be#{enc}", a)
|
|
|
|
assert_write("\x85\x35", "wb:utf-16be#{enc}", e)
|
|
|
|
assert_write("\x00\xa2", "wb:utf-16be#{enc}", u8)
|
|
|
|
assert_write("a\rb\r\nc\n", "wb:iso-2022-jp#{enc}", a)
|
|
|
|
assert_write("\e$B\x42\x22\e(B", "wb:iso-2022-jp#{enc}", e)
|
|
|
|
assert_write("\e$B\x21\x71\e(B", "wb:iso-2022-jp#{enc}", u8)
|
|
|
|
|
|
|
|
assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", u16)
|
|
|
|
assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", i)
|
|
|
|
assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", u16)
|
|
|
|
assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", i)
|
|
|
|
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", u16)
|
|
|
|
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", i)
|
|
|
|
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", u16)
|
|
|
|
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", i)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_write_mode_fail
|
|
|
|
return if system_newline == "\n"
|
|
|
|
with_tmpdir {
|
|
|
|
open("t", "wt") {|f|
|
|
|
|
assert_raise(ArgumentError) { f.print "\0\r\0\r\0\n\0\n".force_encoding("utf-16be") }
|
2008-09-05 18:26:39 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-09-09 10:56:55 -04:00
|
|
|
def test_write_ascii_incompat
|
|
|
|
with_tmpdir {
|
|
|
|
open("t.utf8", "wb:utf-8:utf-16be") {|f| }
|
|
|
|
open("t.utf8", "wt:utf-8:utf-16be") {|f| }
|
|
|
|
open("t.utf8", "w:utf-8:utf-16be") {|f| }
|
|
|
|
open("t.utf16", "wb:utf-16be") {|f| }
|
|
|
|
open("t.utf16", "wt:utf-16be") {|f| }
|
|
|
|
open("t.utf16", "w:utf-16be") {|f| }
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-09-05 18:26:39 -04:00
|
|
|
def test_binmode_write_ascii_incompat_internal
|
|
|
|
with_tmpdir {
|
|
|
|
open("t.utf8.lf", "wb:utf-8:utf-16be") {|f|
|
2008-08-26 08:55:14 -04:00
|
|
|
f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE")
|
|
|
|
}
|
|
|
|
content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("a\nb\n", content)
|
2008-09-05 18:26:39 -04:00
|
|
|
|
|
|
|
open("t.utf8.lf", "wb:utf-16be") {|f|
|
|
|
|
f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE")
|
|
|
|
}
|
|
|
|
content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("\0a\0\n\0b\0\n", content)
|
2008-08-26 08:55:14 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-22 12:44:00 -04:00
|
|
|
def test_binary
|
|
|
|
with_tmpdir {
|
|
|
|
src = "a\nb\rc\r\nd\n"
|
|
|
|
generate_file("t.txt", src)
|
|
|
|
open("t.txt", "rb") {|f|
|
|
|
|
assert_equal(src, f.read)
|
|
|
|
}
|
2008-08-22 22:23:42 -04:00
|
|
|
open("t.txt", "r", :binmode=>true) {|f|
|
|
|
|
assert_equal(src, f.read)
|
|
|
|
}
|
2008-08-26 08:55:14 -04:00
|
|
|
if system_newline == "\n"
|
2008-08-22 12:44:00 -04:00
|
|
|
open("t.txt", "r") {|f|
|
|
|
|
assert_equal(src, f.read)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_binmode
|
|
|
|
with_tmpdir {
|
|
|
|
src = "a\r\nb\r\nc\r\n"
|
|
|
|
generate_file("t.txt", src)
|
|
|
|
open("t.txt", "rt") {|f|
|
|
|
|
assert_equal("a", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
f.binmode
|
|
|
|
assert_equal("b", f.getc)
|
|
|
|
assert_equal("\r", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal("c", f.getc)
|
|
|
|
assert_equal("\r", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal(nil, f.getc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_binmode2
|
|
|
|
with_tmpdir {
|
|
|
|
src = "a\r\nb\r\nc\r\n"
|
|
|
|
generate_file("t.txt", src)
|
|
|
|
open("t.txt", "rt:euc-jp:utf-8") {|f|
|
|
|
|
assert_equal("a", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
f.binmode
|
|
|
|
assert_equal("b", f.getc)
|
|
|
|
assert_equal("\r", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal("c", f.getc)
|
|
|
|
assert_equal("\r", f.getc)
|
|
|
|
assert_equal("\n", f.getc)
|
|
|
|
assert_equal(nil, f.getc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-10-16 11:25:25 -04:00
|
|
|
def test_binmode3
|
|
|
|
with_tmpdir {
|
|
|
|
src = "\u3042\r\n"
|
|
|
|
generate_file("t.txt", src)
|
|
|
|
srcbin = src.dup.force_encoding("ascii-8bit")
|
|
|
|
open("t.txt", "rt:utf-8:euc-jp") {|f|
|
|
|
|
f.binmode
|
2008-10-16 11:35:37 -04:00
|
|
|
result = f.read
|
|
|
|
assert_str_equal(srcbin, result)
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, result.encoding)
|
2008-10-16 11:25:25 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-08-23 04:41:02 -04:00
|
|
|
def test_invalid_r
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t.txt", "a\x80b")
|
|
|
|
open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f|
|
|
|
|
assert_equal("a?b", f.read)
|
|
|
|
}
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f|
|
2008-08-23 04:41:02 -04:00
|
|
|
assert_equal("ab", f.read)
|
|
|
|
}
|
|
|
|
open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f|
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::InvalidByteSequenceError) { f.read }
|
2008-08-23 04:41:02 -04:00
|
|
|
assert_equal("b", f.read)
|
|
|
|
}
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f|
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::InvalidByteSequenceError) { f.read }
|
2008-08-23 04:41:02 -04:00
|
|
|
assert_equal("b", f.read)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_undef_r
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t.txt", "a\uFFFDb")
|
|
|
|
open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f|
|
|
|
|
assert_equal("a?b", f.read)
|
|
|
|
}
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f|
|
2008-08-23 04:41:02 -04:00
|
|
|
assert_equal("ab", f.read)
|
|
|
|
}
|
|
|
|
open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f|
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::UndefinedConversionError) { f.read }
|
2008-08-23 04:41:02 -04:00
|
|
|
assert_equal("b", f.read)
|
|
|
|
}
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f|
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::UndefinedConversionError) { f.read }
|
2008-08-23 04:41:02 -04:00
|
|
|
assert_equal("b", f.read)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
2008-08-24 02:25:24 -04:00
|
|
|
|
|
|
|
def test_invalid_w
|
|
|
|
with_tmpdir {
|
|
|
|
invalid_utf8 = "a\x80b".force_encoding("utf-8")
|
|
|
|
open("t.txt", "w:euc-jp", :invalid => :replace) {|f|
|
|
|
|
assert_nothing_raised { f.write invalid_utf8 }
|
|
|
|
}
|
|
|
|
assert_equal("a?b", File.read("t.txt"))
|
|
|
|
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "w:euc-jp", :invalid => :replace, :replace => "") {|f|
|
2008-08-24 02:25:24 -04:00
|
|
|
assert_nothing_raised { f.write invalid_utf8 }
|
|
|
|
}
|
|
|
|
assert_equal("ab", File.read("t.txt"))
|
|
|
|
|
|
|
|
open("t.txt", "w:euc-jp", :undef => :replace) {|f|
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::InvalidByteSequenceError) { f.write invalid_utf8 }
|
2008-08-24 02:25:24 -04:00
|
|
|
}
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "w:euc-jp", :undef => :replace, :replace => "") {|f|
|
2008-09-18 04:57:33 -04:00
|
|
|
assert_raise(Encoding::InvalidByteSequenceError) { f.write invalid_utf8 }
|
2008-08-24 02:25:24 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_undef_w_stateless
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t.txt", "a\uFFFDb")
|
|
|
|
open("t.txt", "w:euc-jp:utf-8", :undef => :replace) {|f|
|
|
|
|
assert_nothing_raised { f.write "a\uFFFDb" }
|
|
|
|
}
|
|
|
|
assert_equal("a?b", File.read("t.txt"))
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "w:euc-jp:utf-8", :undef => :replace, :replace => "") {|f|
|
2008-08-24 02:25:24 -04:00
|
|
|
assert_nothing_raised { f.write "a\uFFFDb" }
|
|
|
|
}
|
|
|
|
assert_equal("ab", File.read("t.txt"))
|
|
|
|
open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f|
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" }
|
2008-08-24 02:25:24 -04:00
|
|
|
}
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "w:euc-jp:utf-8", :invalid => :replace, :replace => "") {|f|
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" }
|
2008-08-24 02:25:24 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_undef_w_stateful
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file("t.txt", "a\uFFFDb")
|
|
|
|
open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace) {|f|
|
|
|
|
assert_nothing_raised { f.write "a\uFFFDb" }
|
|
|
|
}
|
|
|
|
assert_equal("a?b", File.read("t.txt"))
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace, :replace => "") {|f|
|
2008-08-24 02:25:24 -04:00
|
|
|
assert_nothing_raised { f.write "a\uFFFDb" }
|
|
|
|
}
|
|
|
|
assert_equal("ab", File.read("t.txt"))
|
|
|
|
open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f|
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" }
|
2008-08-24 02:25:24 -04:00
|
|
|
}
|
2008-09-04 06:15:34 -04:00
|
|
|
open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace, :replace => "") {|f|
|
2008-09-25 08:47:30 -04:00
|
|
|
assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" }
|
2008-08-24 02:25:24 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-09-06 11:39:00 -04:00
|
|
|
def test_w_xml_attr
|
|
|
|
with_tmpdir {
|
2008-09-06 23:13:29 -04:00
|
|
|
open("raw.txt", "wb", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
|
|
|
|
content = File.read("raw.txt", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("\"&<>"'\u4E02\u3042\n\"".force_encoding("ascii-8bit"), content)
|
|
|
|
|
|
|
|
open("ascii.txt", "wb:us-ascii", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
|
|
|
|
content = File.read("ascii.txt", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("\"&<>"'丂あ\n\"".force_encoding("ascii-8bit"), content)
|
|
|
|
|
|
|
|
open("iso-2022-jp.txt", "wb:iso-2022-jp", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
|
|
|
|
content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("\"&<>"'丂\e$B$\"\e(B\n\"".force_encoding("ascii-8bit"), content)
|
|
|
|
|
2008-09-08 15:38:11 -04:00
|
|
|
open("utf-16be.txt", "wb:utf-16be", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
|
|
|
|
content = File.read("utf-16be.txt", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("\0\"\0&\0a\0m\0p\0;\0&\0l\0t\0;\0&\0g\0t\0;\0&\0q\0u\0o\0t\0;\0'\x4E\x02\x30\x42\0\n\0\"".force_encoding("ascii-8bit"), content)
|
|
|
|
|
2008-09-06 11:39:00 -04:00
|
|
|
open("eucjp.txt", "w:euc-jp:utf-8", xml: :attr) {|f|
|
|
|
|
f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
|
|
|
|
}
|
|
|
|
content = File.read("eucjp.txt", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("\"\x8F\xB0\xA1\"".force_encoding("ascii-8bit"), content)
|
|
|
|
|
|
|
|
open("sjis.txt", "w:sjis:utf-8", xml: :attr) {|f|
|
|
|
|
f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
|
|
|
|
}
|
|
|
|
content = File.read("sjis.txt", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("\"丂\"".force_encoding("ascii-8bit"), content)
|
|
|
|
|
|
|
|
open("iso-2022-jp.txt", "w:iso-2022-jp:utf-8", xml: :attr) {|f|
|
|
|
|
f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
|
|
|
|
}
|
|
|
|
content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit")
|
|
|
|
assert_equal("\"丂\"".force_encoding("ascii-8bit"), content)
|
|
|
|
}
|
|
|
|
end
|
2009-07-11 01:46:36 -04:00
|
|
|
|
|
|
|
def test_strip_bom
|
|
|
|
with_tmpdir {
|
|
|
|
text = "\uFEFFa"
|
|
|
|
%w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name|
|
|
|
|
path = '%s-bom.txt' % name
|
|
|
|
content = text.encode(name)
|
|
|
|
generate_file(path, content)
|
2009-08-21 02:16:53 -04:00
|
|
|
result = File.read(path, mode: 'rb:BOM|UTF-8')
|
2009-07-11 01:46:36 -04:00
|
|
|
assert_equal(content[1].force_encoding("ascii-8bit"),
|
|
|
|
result.force_encoding("ascii-8bit"))
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
2007-12-23 10:06:14 -05:00
|
|
|
end
|
|
|
|
|