2007-12-23 10:06:14 -05:00
|
|
|
require 'test/unit'
|
|
|
|
require 'tmpdir'
|
2007-12-23 18:57:14 -05:00
|
|
|
require 'timeout'
|
2007-12-23 10:06:14 -05:00
|
|
|
|
2007-12-23 11:10:36 -05:00
|
|
|
class TestIO_M17N < Test::Unit::TestCase
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS = [
|
|
|
|
Encoding::ASCII_8BIT,
|
|
|
|
Encoding::EUC_JP,
|
|
|
|
Encoding::Shift_JIS,
|
|
|
|
Encoding::UTF_8
|
|
|
|
]
|
|
|
|
|
2007-12-23 10:06:14 -05:00
|
|
|
def with_tmpdir
|
|
|
|
Dir.mktmpdir {|dir|
|
2007-12-23 12:05:40 -05:00
|
|
|
Dir.chdir(dir) {
|
|
|
|
yield dir
|
|
|
|
}
|
2007-12-23 10:06:14 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
def with_pipe(enc=nil)
|
|
|
|
r, w = IO.pipe(enc)
|
|
|
|
begin
|
|
|
|
yield r, w
|
|
|
|
ensure
|
|
|
|
r.close if !r.closed?
|
|
|
|
w.close if !w.closed?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:10:36 -05:00
|
|
|
def generate_file(path, content)
|
|
|
|
open(path, "wb") {|f| f.write content }
|
|
|
|
end
|
|
|
|
|
|
|
|
def encdump(str)
|
|
|
|
"#{str.dump}.force_encoding(#{str.encoding.name.dump})"
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_str_equal(expected, actual, message=nil)
|
|
|
|
full_message = build_message(message, <<EOT)
|
|
|
|
#{encdump expected} expected but not equal to
|
|
|
|
#{encdump actual}.
|
|
|
|
EOT
|
|
|
|
assert_block(full_message) { expected == actual }
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:22:34 -05:00
|
|
|
def test_open_r
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r") {|f|
|
2007-12-24 05:27:53 -05:00
|
|
|
assert_equal(Encoding.default_external, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_rb
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "rb") {|f|
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
|
2007-12-24 05:22:34 -05:00
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_r_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r:euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_r_enc_enc
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "")
|
|
|
|
open("tmp", "r:euc-jp:utf-8") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_w
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w") {|f|
|
2007-12-25 02:01:35 -05:00
|
|
|
assert_equal(nil, f.external_encoding)
|
2007-12-24 05:22:34 -05:00
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:27:53 -05:00
|
|
|
def test_open_wb
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "wb") {|f|
|
2007-12-25 02:01:35 -05:00
|
|
|
assert_equal(nil, f.external_encoding)
|
2007-12-24 05:27:53 -05:00
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-24 05:22:34 -05:00
|
|
|
def test_open_w_enc
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w:euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_w_enc_enc
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w:euc-jp:utf-8") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_open_w_enc
|
|
|
|
with_tmpdir {
|
|
|
|
open("tmp", "w:euc-jp") {|f|
|
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_stdin
|
|
|
|
assert_equal(Encoding.default_external, STDIN.external_encoding)
|
|
|
|
assert_equal(nil, STDIN.internal_encoding)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_stdout
|
2007-12-25 02:01:35 -05:00
|
|
|
assert_equal(nil, STDOUT.external_encoding)
|
2007-12-24 05:22:34 -05:00
|
|
|
assert_equal(nil, STDOUT.internal_encoding)
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_stderr
|
2007-12-25 02:01:35 -05:00
|
|
|
assert_equal(nil, STDERR.external_encoding)
|
2007-12-24 05:22:34 -05:00
|
|
|
assert_equal(nil, STDERR.internal_encoding)
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:10:36 -05:00
|
|
|
def test_terminator_conversion
|
2007-12-23 10:06:14 -05:00
|
|
|
with_tmpdir {
|
2007-12-23 11:10:36 -05:00
|
|
|
generate_file('tmp', "before \u00FF after")
|
2007-12-23 12:12:57 -05:00
|
|
|
s = open("tmp", "r:utf-8:iso-8859-1") {|f|
|
2007-12-23 10:06:14 -05:00
|
|
|
f.gets("\xFF".force_encoding("iso-8859-1"))
|
|
|
|
}
|
2007-12-23 12:12:57 -05:00
|
|
|
assert_equal(Encoding.find("iso-8859-1"), s.encoding)
|
2007-12-23 11:10:36 -05:00
|
|
|
assert_str_equal("before \xFF".force_encoding("iso-8859-1"), s, '[ruby-core:14288]')
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 13:22:59 -05:00
|
|
|
def test_terminator_conversion2
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "before \xA1\xA2\xA2\xA3 after")
|
|
|
|
s = open("tmp", "r:euc-jp:utf-8") {|f|
|
|
|
|
f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8"))
|
|
|
|
}
|
2007-12-23 14:55:18 -05:00
|
|
|
assert_equal(Encoding.find("utf-8"), s.encoding)
|
|
|
|
assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]')
|
2007-12-23 13:22:59 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-25 22:07:08 -05:00
|
|
|
def test_terminator_stateful_conversion
|
|
|
|
with_tmpdir {
|
|
|
|
src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
|
|
|
|
generate_file('tmp', src)
|
|
|
|
s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
|
|
|
|
f.gets("0".force_encoding("euc-jp"))
|
|
|
|
}
|
|
|
|
assert_equal(Encoding.find("euc-jp"), s.encoding)
|
|
|
|
assert_str_equal(src.encode("euc-jp"), s)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 20:09:37 -05:00
|
|
|
def test_nonascii_terminator
|
|
|
|
with_tmpdir {
|
|
|
|
generate_file('tmp', "before \xA2\xA2 after")
|
|
|
|
open("tmp", "r:euc-jp") {|f|
|
|
|
|
assert_raise(ArgumentError) {
|
|
|
|
f.gets("\xA2\xA2".force_encoding("utf-8"))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 18:57:14 -05:00
|
|
|
def test_pipe_terminator_conversion
|
|
|
|
with_pipe("euc-jp:utf-8") {|r, w|
|
|
|
|
w.write "before \xa2\xa2 after"
|
|
|
|
rs = "\xA2\xA2".encode("utf-8", "euc-jp")
|
2007-12-24 11:36:14 -05:00
|
|
|
w.close
|
2007-12-23 18:57:14 -05:00
|
|
|
timeout(1) {
|
|
|
|
assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"),
|
|
|
|
r.gets(rs))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_pipe_conversion
|
|
|
|
with_pipe("euc-jp:utf-8") {|r, w|
|
|
|
|
w.write "\xa1\xa1"
|
|
|
|
assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_pipe_convert_partial_read
|
|
|
|
with_pipe("euc-jp:utf-8") {|r, w|
|
|
|
|
begin
|
|
|
|
t = Thread.new {
|
|
|
|
w.write "\xa1"
|
|
|
|
sleep 0.1
|
|
|
|
w.write "\xa1"
|
|
|
|
}
|
|
|
|
assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
|
|
|
|
ensure
|
|
|
|
t.join if t
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2008-01-03 09:49:38 -05:00
|
|
|
def test_getc_stateful_conversion
|
|
|
|
with_tmpdir {
|
|
|
|
src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
|
|
|
|
generate_file('tmp', src)
|
|
|
|
open("tmp", "r:iso-2022-jp:euc-jp") {|f|
|
|
|
|
assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
|
|
|
|
assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:10:36 -05:00
|
|
|
def test_open_ascii
|
|
|
|
with_tmpdir {
|
|
|
|
src = "abc\n"
|
|
|
|
generate_file('tmp', "abc\n")
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
2007-12-23 11:10:36 -05:00
|
|
|
s = open('tmp', "r:#{enc}") {|f| f.gets }
|
|
|
|
assert_equal(enc, s.encoding)
|
|
|
|
assert_str_equal(src, s)
|
|
|
|
}
|
2007-12-23 10:06:14 -05:00
|
|
|
}
|
|
|
|
end
|
2007-12-23 11:10:36 -05:00
|
|
|
|
|
|
|
def test_open_nonascii
|
|
|
|
with_tmpdir {
|
|
|
|
src = "\xc2\xa1\n"
|
|
|
|
generate_file('tmp', src)
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
2007-12-23 11:35:43 -05:00
|
|
|
content = src.dup.force_encoding(enc)
|
2007-12-23 11:10:36 -05:00
|
|
|
s = open('tmp', "r:#{enc}") {|f| f.gets }
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:35:43 -05:00
|
|
|
def test_read_encoding
|
2007-12-23 11:10:36 -05:00
|
|
|
with_tmpdir {
|
|
|
|
src = "\xc2\xa1\n".force_encoding("ASCII-8BIT")
|
|
|
|
generate_file('tmp', "\xc2\xa1\n")
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
2007-12-23 11:35:43 -05:00
|
|
|
content = src.dup.force_encoding(enc)
|
2007-12-23 11:10:36 -05:00
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.getc
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content[0], s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.readchar
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content[0], s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.gets
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.readline
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
lines = f.readlines
|
|
|
|
assert_equal(1, lines.length)
|
|
|
|
s = lines[0]
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
f.each_line {|s|
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.read
|
|
|
|
assert_equal(enc, s.encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_str_equal(content, s)
|
2007-12-23 11:10:36 -05:00
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.read(1)
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_str_equal(src[0], s)
|
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.readpartial(1)
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_str_equal(src[0], s)
|
|
|
|
}
|
|
|
|
open('tmp', "r:#{enc}") {|f|
|
|
|
|
s = f.sysread(1)
|
|
|
|
assert_equal(Encoding::ASCII_8BIT, s.encoding)
|
|
|
|
assert_str_equal(src[0], s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 11:35:43 -05:00
|
|
|
def test_write_noenc
|
2007-12-24 23:24:21 -05:00
|
|
|
src = "\xc2\xa1\n".force_encoding("ascii-8bit")
|
2007-12-23 11:35:43 -05:00
|
|
|
with_tmpdir {
|
|
|
|
open('tmp', "w") {|f|
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
2007-12-23 11:35:43 -05:00
|
|
|
f.write src.dup.force_encoding(enc)
|
|
|
|
}
|
|
|
|
}
|
2007-12-24 23:24:21 -05:00
|
|
|
open('tmp', 'r:ascii-8bit') {|f|
|
2007-12-23 12:05:40 -05:00
|
|
|
assert_equal(src*ENCS.length, f.read)
|
2007-12-23 11:35:43 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
def test_write_conversion
|
2007-12-23 11:35:43 -05:00
|
|
|
utf8 = "\u6666"
|
|
|
|
eucjp = "\xb3\xa2".force_encoding("EUC-JP")
|
|
|
|
with_tmpdir {
|
|
|
|
open('tmp', "w:EUC-JP") {|f|
|
2007-12-23 12:31:32 -05:00
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(nil, f.internal_encoding)
|
2007-12-23 11:35:43 -05:00
|
|
|
f.print utf8
|
|
|
|
}
|
|
|
|
assert_equal(eucjp, File.read('tmp').force_encoding("EUC-JP"))
|
2007-12-23 12:12:57 -05:00
|
|
|
open('tmp', 'r:EUC-JP:UTF-8') {|f|
|
2007-12-23 11:35:43 -05:00
|
|
|
assert_equal(Encoding::EUC_JP, f.external_encoding)
|
|
|
|
assert_equal(Encoding::UTF_8, f.internal_encoding)
|
|
|
|
assert_equal(utf8, f.read)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
def test_pipe
|
2007-12-23 13:22:59 -05:00
|
|
|
utf8 = "\u6666"
|
|
|
|
eucjp = "\xb3\xa2".force_encoding("EUC-JP")
|
|
|
|
|
|
|
|
with_pipe {|r,w|
|
|
|
|
assert_equal(Encoding.default_external, r.external_encoding)
|
|
|
|
assert_equal(nil, r.internal_encoding)
|
|
|
|
w << utf8
|
|
|
|
w.close
|
|
|
|
s = r.read
|
|
|
|
assert_equal(Encoding.default_external, s.encoding)
|
2007-12-24 23:54:49 -05:00
|
|
|
assert_str_equal(utf8.dup.force_encoding(Encoding.default_external), s)
|
2007-12-23 13:22:59 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
with_pipe("EUC-JP") {|r,w|
|
|
|
|
assert_equal(Encoding::EUC_JP, r.external_encoding)
|
|
|
|
assert_equal(nil, r.internal_encoding)
|
|
|
|
w << eucjp
|
|
|
|
w.close
|
|
|
|
assert_equal(eucjp, r.read)
|
|
|
|
}
|
|
|
|
|
|
|
|
with_pipe("UTF-8:EUC-JP") {|r,w|
|
|
|
|
assert_equal(Encoding::UTF_8, r.external_encoding)
|
|
|
|
assert_equal(Encoding::EUC_JP, r.internal_encoding)
|
|
|
|
w << utf8
|
|
|
|
w.close
|
|
|
|
assert_equal(eucjp, r.read)
|
|
|
|
}
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
ENCS.each {|enc|
|
|
|
|
with_pipe(enc) {|r, w|
|
|
|
|
w << "\xc2\xa1"
|
|
|
|
w.close
|
|
|
|
s = r.getc
|
|
|
|
assert_equal(enc, s.encoding)
|
|
|
|
}
|
|
|
|
}
|
2007-12-23 14:15:48 -05:00
|
|
|
|
2007-12-23 14:55:18 -05:00
|
|
|
ENCS.each {|enc|
|
|
|
|
next if enc == Encoding::ASCII_8BIT
|
|
|
|
next if enc == Encoding::UTF_8
|
2007-12-23 14:15:48 -05:00
|
|
|
with_pipe("#{enc}:UTF-8") {|r, w|
|
|
|
|
w << "\xc2\xa1"
|
|
|
|
w.close
|
|
|
|
s = r.read
|
|
|
|
assert_equal(Encoding::UTF_8, s.encoding)
|
|
|
|
assert_equal(s.encode("UTF-8"), s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-12-23 12:05:40 -05:00
|
|
|
end
|
|
|
|
|
2007-12-23 10:06:14 -05:00
|
|
|
end
|
|
|
|
|