mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode_data.h (TRANSCODE_ERROR): removed.
* tool/transcode-tblgen.rb: 8bit byte of ASCII-8BIT is a valid (but unique to ASCII-8BIT) character. * transcode.c (rb_eConversionUndefined): new error. (rb_eInvalidByteSequence): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18524 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
47c8401c5a
commit
5ade93542f
5 changed files with 53 additions and 38 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
Tue Aug 12 16:13:45 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode_data.h (TRANSCODE_ERROR): removed.
|
||||
|
||||
* tool/transcode-tblgen.rb: 8bit byte of ASCII-8BIT is a valid
|
||||
(but unique to ASCII-8BIT) character.
|
||||
|
||||
* transcode.c (rb_eConversionUndefined): new error.
|
||||
(rb_eInvalidByteSequence): ditto.
|
||||
|
||||
Tue Aug 12 16:08:51 2008 Minero Aoki <aamine@loveruby.net>
|
||||
|
||||
* lib/net/pop.rb: POP3Command should export @socket since POP
|
||||
|
|
|
@ -25,9 +25,9 @@ class TestTranscode < Test::Unit::TestCase
|
|||
assert_raise(ArgumentError) { 'abc'.encode!('foo', 'bar') }
|
||||
assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode('foo') }
|
||||
assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode!('foo') }
|
||||
assert_raise(RuntimeError) { "\x80".encode('utf-8','ASCII-8BIT') }
|
||||
assert_raise(RuntimeError) { "\x80".encode('utf-8','US-ASCII') }
|
||||
assert_raise(RuntimeError) { "\xA5".encode('utf-8','iso-8859-3') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\x80".encode('utf-8','ASCII-8BIT') }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x80".encode('utf-8','US-ASCII') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xA5".encode('utf-8','iso-8859-3') }
|
||||
end
|
||||
|
||||
def test_arguments
|
||||
|
@ -342,40 +342,39 @@ class TestTranscode < Test::Unit::TestCase
|
|||
check_both_ways("\u71FC", "\xE0\x9E", 'shift_jis') #
|
||||
check_both_ways("\u71F9", "\xE0\x9F", 'shift_jis') #
|
||||
check_both_ways("\u73F1", "\xE0\xFC", 'shift_jis') #
|
||||
assert_raise(RuntimeError) { "\xEF\x40".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xEF\x7E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xEF\x80".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xEF\x9E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xEF\x9F".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xEF\xFC".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xF0\x40".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xF0\x7E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xF0\x80".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xF0\x9E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xF0\x9F".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xF0\xFC".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xEF\x40".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xEF\x7E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xEF\x80".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xEF\x9E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xEF\x9F".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xEF\xFC".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xF0\x40".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xF0\x7E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xF0\x80".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xF0\x9E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xF0\x9F".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xF0\xFC".encode("utf-8", 'shift_jis') }
|
||||
check_both_ways("\u9ADC", "\xFC\x40", 'shift_jis') # @
|
||||
assert_raise(RuntimeError) { "\xFC\x7E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xFC\x80".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xFC\x9E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xFC\x9F".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(RuntimeError) { "\xFC\xFC".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xFC\x7E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xFC\x80".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xFC\x9E".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xFC\x9F".encode("utf-8", 'shift_jis') }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\xFC\xFC".encode("utf-8", 'shift_jis') }
|
||||
check_both_ways("\u677E\u672C\u884C\u5F18", "\x8f\xbc\x96\x7b\x8d\x73\x8d\x4f", 'shift_jis') # {sO
|
||||
check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\x90\xC2\x8E\x52\x8A\x77\x89\x40\x91\xE5\x8A\x77", 'shift_jis') # Rw@w
|
||||
check_both_ways("\u795E\u6797\u7FA9\u535A", "\x90\x5F\x97\xD1\x8B\x60\x94\x8E", 'shift_jis') # _ы`
|
||||
end
|
||||
|
||||
def test_iso_2022_jp
|
||||
assert_raise(RuntimeError) { "\x1b(A".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\x1b$(A".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\x1b$C".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\x0e".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\x80".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\x1b$(Dd!\x1b(B".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\u9299".encode("iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\u9299".encode("iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\uff71\uff72\uff73\uff74\uff75".encode("iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\x1b(I12345\x1b(B".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x1b(A".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x1b$(A".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x1b$C".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x0e".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x80".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x1b$(Dd!\x1b(B".encode("utf-8", "iso-2022-jp") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\u9299".encode("iso-2022-jp") }
|
||||
assert_raise(Encoding::ConversionUndefined) { "\uff71\uff72\uff73\uff74\uff75".encode("iso-2022-jp") }
|
||||
assert_raise(Encoding::InvalidByteSequence) { "\x1b(I12345\x1b(B".encode("utf-8", "iso-2022-jp") }
|
||||
assert_equal("\xA1\xA1".force_encoding("euc-jp"),
|
||||
"\e$B!!\e(B".encode("EUC-JP", "ISO-2022-JP"))
|
||||
assert_equal("\e$B!!\e(B".force_encoding("ISO-2022-JP"),
|
||||
|
|
|
@ -317,7 +317,8 @@ class ActionMap
|
|||
min = max = 0
|
||||
end
|
||||
|
||||
if n = OffsetsMemo[offsets]
|
||||
offsets_key = [min, max, offsets[min..max]]
|
||||
if n = OffsetsMemo[offsets_key]
|
||||
offsets_name = n
|
||||
offsets_code = ''
|
||||
else
|
||||
|
@ -326,7 +327,7 @@ class ActionMap
|
|||
static const unsigned char
|
||||
#{offsets_name}[#{2+max-min+1}] = #{format_offsets(min,max,offsets)};
|
||||
End
|
||||
OffsetsMemo[offsets] = offsets_name
|
||||
OffsetsMemo[offsets_key] = offsets_name
|
||||
end
|
||||
|
||||
if n = InfosMemo[infos]
|
||||
|
@ -524,6 +525,7 @@ ValidEncoding = {
|
|||
}
|
||||
|
||||
{
|
||||
'ASCII-8BIT' => '1byte',
|
||||
'ISO-8859-1' => '1byte',
|
||||
'ISO-8859-2' => '1byte',
|
||||
'ISO-8859-3' => '1byte',
|
||||
|
|
14
transcode.c
14
transcode.c
|
@ -15,6 +15,9 @@
|
|||
#include "transcode_data.h"
|
||||
#include <ctype.h>
|
||||
|
||||
VALUE rb_eConversionUndefined;
|
||||
VALUE rb_eInvalidByteSequence;
|
||||
|
||||
static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
|
||||
#define INVALID_IGNORE 0x1
|
||||
#define INVALID_REPLACE 0x2
|
||||
|
@ -895,7 +898,7 @@ resume:
|
|||
goto resume;
|
||||
}
|
||||
rb_trans_close(ts);
|
||||
rb_raise(TRANSCODE_ERROR, "invalid byte sequence");
|
||||
rb_raise(rb_eInvalidByteSequence, "invalid byte sequence");
|
||||
}
|
||||
if (ret == transcode_undefined_conversion) {
|
||||
/* valid character in from encoding
|
||||
|
@ -909,7 +912,7 @@ resume:
|
|||
goto resume;
|
||||
}
|
||||
rb_trans_close(ts);
|
||||
rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||
rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||
}
|
||||
if (ret == transcode_obuf_full) {
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
|
@ -974,7 +977,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
break;
|
||||
}
|
||||
rb_trans_close(ts);
|
||||
rb_raise(TRANSCODE_ERROR, "invalid byte sequence");
|
||||
rb_raise(rb_eInvalidByteSequence, "invalid byte sequence");
|
||||
break;
|
||||
|
||||
case transcode_undefined_conversion:
|
||||
|
@ -989,7 +992,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
break;
|
||||
}
|
||||
rb_trans_close(ts);
|
||||
rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||
rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||
break;
|
||||
|
||||
case transcode_obuf_full:
|
||||
|
@ -1204,6 +1207,9 @@ rb_str_transcode(VALUE str, VALUE to)
|
|||
void
|
||||
Init_transcode(void)
|
||||
{
|
||||
rb_eConversionUndefined = rb_define_class_under(rb_cEncoding, "ConversionUndefined", rb_eStandardError);
|
||||
rb_eInvalidByteSequence = rb_define_class_under(rb_cEncoding, "InvalidByteSequence", rb_eStandardError);
|
||||
|
||||
transcoder_table = st_init_strcasetable();
|
||||
|
||||
sym_invalid = ID2SYM(rb_intern("invalid"));
|
||||
|
|
|
@ -125,6 +125,4 @@ typedef struct {
|
|||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
||||
void rb_register_transcoder(const rb_transcoder *);
|
||||
|
||||
#define TRANSCODE_ERROR rb_eRuntimeError /*change exception*/
|
||||
|
||||
#endif /* RUBY_TRANSCODE_DATA_H */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue