mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode.c (rb_cEncodingConverter): new class Encoding::Converter.
(econv_free): new function. (econv_s_allocate): ditto. (econv_init): ditto. (check_econv): ditto. (econv_primitive_convert): new method. (Init_transcode): define Encoding::Converter. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18544 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
c523f6f15e
commit
edcd02cb5a
3 changed files with 124 additions and 0 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
Tue Aug 12 23:42:31 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (rb_cEncodingConverter): new class Encoding::Converter.
|
||||
(econv_free): new function.
|
||||
(econv_s_allocate): ditto.
|
||||
(econv_init): ditto.
|
||||
(check_econv): ditto.
|
||||
(econv_primitive_convert): new method.
|
||||
(Init_transcode): define Encoding::Converter.
|
||||
|
||||
Tue Aug 12 23:16:09 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* string.c (rb_str_splice_0): call rb_str_drop_bytes, not rb_str_drop.
|
||||
|
|
24
test/ruby/test_econv.rb
Normal file
24
test/ruby/test_econv.rb
Normal file
|
@ -0,0 +1,24 @@
|
|||
require 'test/unit'
|
||||
|
||||
class TestEncodingConverter < Test::Unit::TestCase
|
||||
def assert_econv(ret_expected, src_expected, dst_expected, from, to, src, dst, flags=0)
|
||||
ec = Encoding::Converter.new(from, to)
|
||||
ret = ec.primitive_convert(src, dst, flags)
|
||||
assert_equal(ret_expected, ret)
|
||||
assert_equal(src_expected, src)
|
||||
assert_equal(dst_expected, dst)
|
||||
end
|
||||
|
||||
def test_eucjp_to_utf8
|
||||
assert_econv(:finished, "", "", "EUC-JP", "UTF-8", "", "")
|
||||
assert_econv(:ibuf_empty, "", "", "EUC-JP", "UTF-8", "", "", Encoding::Converter::PARTIAL_INPUT)
|
||||
assert_econv(:finished, "", "", "EUC-JP", "UTF-8", "", " "*10)
|
||||
assert_econv(:obuf_full, "", "", "EUC-JP", "UTF-8", "a", "")
|
||||
end
|
||||
|
||||
def test_invalid
|
||||
assert_econv(:invalid_input, "", "", "EUC-JP", "UTF-8", "\x80", " "*10)
|
||||
assert_econv(:invalid_input, "", "a", "EUC-JP", "UTF-8", "a\x80", " "*10)
|
||||
assert_econv(:invalid_input, "\x80", "a", "EUC-JP", "UTF-8", "a\x80\x80", " "*10)
|
||||
end
|
||||
end
|
90
transcode.c
90
transcode.c
|
@ -18,6 +18,8 @@
|
|||
VALUE rb_eConversionUndefined;
|
||||
VALUE rb_eInvalidByteSequence;
|
||||
|
||||
VALUE rb_cEncodingConverter;
|
||||
|
||||
static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
|
||||
#define INVALID_IGNORE 0x1
|
||||
#define INVALID_REPLACE 0x2
|
||||
|
@ -1219,6 +1221,88 @@ rb_str_transcode(VALUE str, VALUE to)
|
|||
return str_encode(1, &to, str);
|
||||
}
|
||||
|
||||
static void
|
||||
econv_free(rb_trans_t *ts)
|
||||
{
|
||||
rb_trans_close(ts);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
econv_s_allocate(VALUE klass)
|
||||
{
|
||||
return Data_Wrap_Struct(klass, NULL, econv_free, NULL);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
econv_init(VALUE self, VALUE from_encoding, VALUE to_encoding)
|
||||
{
|
||||
const char *from_e, *to_e;
|
||||
rb_trans_t *ts;
|
||||
|
||||
from_e = StringValueCStr(from_encoding);
|
||||
to_e = StringValueCStr(to_encoding);
|
||||
|
||||
if (DATA_PTR(self)) {
|
||||
rb_raise(rb_eTypeError, "already initialized");
|
||||
}
|
||||
|
||||
ts = rb_trans_open(from_e, to_e, 0);
|
||||
if (!ts) {
|
||||
rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", from_e, to_e);
|
||||
}
|
||||
|
||||
DATA_PTR(self) = ts;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
#define IS_ECONV(obj) (RDATA(obj)->dfree == (RUBY_DATA_FUNC)econv_free)
|
||||
|
||||
static rb_trans_t *
|
||||
check_econv(VALUE self)
|
||||
{
|
||||
Check_Type(self, T_DATA);
|
||||
if (!IS_ECONV(self)) {
|
||||
rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding::Converter)",
|
||||
rb_class2name(CLASS_OF(self)));
|
||||
}
|
||||
return DATA_PTR(self);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
econv_primitive_convert(VALUE self, VALUE input, VALUE output, VALUE flags_v)
|
||||
{
|
||||
rb_trans_t *ts = check_econv(self);
|
||||
rb_trans_result_t res;
|
||||
const unsigned char *ip, *is;
|
||||
unsigned char *op, *os;
|
||||
int flags;
|
||||
|
||||
StringValue(input);
|
||||
StringValue(output);
|
||||
rb_str_modify(output);
|
||||
flags = NUM2INT(flags_v);
|
||||
|
||||
ip = (const unsigned char *)RSTRING_PTR(input);
|
||||
is = ip + RSTRING_LEN(input);
|
||||
|
||||
op = (unsigned char *)RSTRING_PTR(output);
|
||||
os = op + RSTRING_LEN(output);
|
||||
|
||||
res = rb_trans_conv(ts, &ip, is, &op, os, flags);
|
||||
rb_str_set_len(output, op-(unsigned char *)RSTRING_PTR(output));
|
||||
rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
|
||||
|
||||
switch (res) {
|
||||
case transcode_invalid_input: return ID2SYM(rb_intern("invalid_input"));
|
||||
case transcode_undefined_conversion: return ID2SYM(rb_intern("undefined_conversion"));
|
||||
case transcode_obuf_full: return ID2SYM(rb_intern("obuf_full"));
|
||||
case transcode_ibuf_empty: return ID2SYM(rb_intern("ibuf_empty"));
|
||||
case transcode_finished: return ID2SYM(rb_intern("finished"));
|
||||
default: return INT2NUM(res);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Init_transcode(void)
|
||||
{
|
||||
|
@ -1234,4 +1318,10 @@ Init_transcode(void)
|
|||
|
||||
rb_define_method(rb_cString, "encode", str_encode, -1);
|
||||
rb_define_method(rb_cString, "encode!", str_encode_bang, -1);
|
||||
|
||||
rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
|
||||
rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
|
||||
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, 2);
|
||||
rb_define_method(rb_cEncodingConverter, "primitive_convert", econv_primitive_convert, 3);
|
||||
rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue