mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode.c (decorator_names): extracted from rb_econv_open.
(rb_econv_open): use decorator_names. (econv_args): extracted from econv_init. (econv_init): use econv_args. (decorate_convpath): new function. (search_convpath_i): new function. (econv_s_search_convpath): new method. (Init_transcode): new method defined. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19305 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
89a5c998c7
commit
2d80b1695d
3 changed files with 226 additions and 72 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
|||
Fri Sep 12 22:39:46 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (decorator_names): extracted from rb_econv_open.
|
||||
(rb_econv_open): use decorator_names.
|
||||
(econv_args): extracted from econv_init.
|
||||
(econv_init): use econv_args.
|
||||
(decorate_convpath): new function.
|
||||
(search_convpath_i): new function.
|
||||
(econv_s_search_convpath): new method.
|
||||
(Init_transcode): new method defined.
|
||||
|
||||
Fri Sep 12 21:55:43 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode.c (rb_econv_t): new field: num_allocated.
|
||||
|
|
|
@ -816,6 +816,7 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
iso88591 = Encoding::ISO_8859_1
|
||||
iso2022jp = Encoding::ISO_2022_JP
|
||||
siso2022jp = Encoding::STATELESS_ISO_2022_JP
|
||||
|
||||
assert_equal([], Encoding::Converter.new("", "").convpath)
|
||||
assert_equal([[eucjp, utf8], [utf8, iso88591]],
|
||||
Encoding::Converter.new(eucjp, iso88591).convpath)
|
||||
|
@ -833,4 +834,19 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
assert_equal([[utf16be, utf8], "universal_newline", [utf8, utf16le]],
|
||||
Encoding::Converter.new(utf16be, utf16le, universal_newline: true).convpath)
|
||||
end
|
||||
|
||||
def test_search_convpath
|
||||
eucjp = Encoding::EUC_JP
|
||||
utf8 = Encoding::UTF_8
|
||||
utf32be = Encoding::UTF_32BE
|
||||
iso88591 = Encoding::ISO_8859_1
|
||||
assert_equal([[iso88591,utf8], [utf8,eucjp]],
|
||||
Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP"))
|
||||
assert_equal([[iso88591,utf8], [utf8,eucjp]],
|
||||
Encoding::Converter.search_convpath(iso88591, eucjp))
|
||||
assert_equal([[iso88591,utf8], [utf8,eucjp], "universal_newline"],
|
||||
Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true))
|
||||
assert_equal([[iso88591,utf8], "universal_newline", [utf8,utf32be]],
|
||||
Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true))
|
||||
end
|
||||
end
|
||||
|
|
271
transcode.c
271
transcode.c
|
@ -948,48 +948,56 @@ rb_econv_open0(const char *sname, const char *dname, int ecflags)
|
|||
return ec;
|
||||
}
|
||||
|
||||
#define MAX_ECFLAGS_DECORATORS 32
|
||||
|
||||
static int
|
||||
decorator_names(int ecflags, const char **decorators_ret)
|
||||
{
|
||||
int num_decorators;
|
||||
|
||||
if ((ecflags & ECONV_CRLF_NEWLINE_DECORATOR) &&
|
||||
(ecflags & ECONV_CR_NEWLINE_DECORATOR))
|
||||
return -1;
|
||||
|
||||
if ((ecflags & (ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR)) &&
|
||||
(ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR))
|
||||
return -1;
|
||||
|
||||
if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
|
||||
(ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR))
|
||||
return -1;
|
||||
|
||||
num_decorators = 0;
|
||||
|
||||
if (ecflags & ECONV_XML_TEXT_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "xml-text-escaped";
|
||||
if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "xml-attr-content-escaped";
|
||||
if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "xml-attr-quoted";
|
||||
|
||||
if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "crlf_newline";
|
||||
if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "cr_newline";
|
||||
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
|
||||
decorators_ret[num_decorators++] = "universal_newline";
|
||||
|
||||
return num_decorators;
|
||||
}
|
||||
|
||||
rb_econv_t *
|
||||
rb_econv_open(const char *sname, const char *dname, int ecflags)
|
||||
{
|
||||
rb_econv_t *ec;
|
||||
int num_decorators;
|
||||
const char *decorators[6];
|
||||
const char *decorators[MAX_ECFLAGS_DECORATORS];
|
||||
int i;
|
||||
|
||||
if ((ecflags & ECONV_CRLF_NEWLINE_DECORATOR) &&
|
||||
(ecflags & ECONV_CR_NEWLINE_DECORATOR))
|
||||
num_decorators = decorator_names(ecflags, decorators);
|
||||
if (num_decorators == -1)
|
||||
return NULL;
|
||||
|
||||
if ((ecflags & (ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR)) &&
|
||||
(ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR))
|
||||
return NULL;
|
||||
|
||||
if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
|
||||
(ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR))
|
||||
return NULL;
|
||||
|
||||
num_decorators = 0;
|
||||
|
||||
if (ecflags & ECONV_XML_TEXT_DECORATOR)
|
||||
if (!(decorators[num_decorators++] = "xml-text-escaped"))
|
||||
return NULL;
|
||||
if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
|
||||
if (!(decorators[num_decorators++] = "xml-attr-content-escaped"))
|
||||
return NULL;
|
||||
if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
|
||||
if (!(decorators[num_decorators++] = "xml-attr-quoted"))
|
||||
return NULL;
|
||||
|
||||
if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
|
||||
if (!(decorators[num_decorators++] = "crlf_newline"))
|
||||
return NULL;
|
||||
if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
|
||||
if (!(decorators[num_decorators++] = "cr_newline"))
|
||||
return NULL;
|
||||
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
|
||||
if (!(decorators[num_decorators++] = "universal_newline"))
|
||||
return NULL;
|
||||
|
||||
ec = rb_econv_open0(sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
|
||||
if (!ec)
|
||||
return NULL;
|
||||
|
@ -1932,7 +1940,7 @@ VALUE
|
|||
rb_econv_open_exc(const char *sname, const char *dname, int ecflags)
|
||||
{
|
||||
VALUE mesg, exc;
|
||||
mesg = rb_str_new_cstr("code converter open failed (");
|
||||
mesg = rb_str_new_cstr("code converter not found (");
|
||||
econv_description(sname, dname, ecflags, mesg);
|
||||
rb_str_cat2(mesg, ")");
|
||||
exc = rb_exc_new3(rb_eNoConverter, mesg);
|
||||
|
@ -2641,6 +2649,160 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
|
|||
return rb_enc_from_encoding(result_enc);
|
||||
}
|
||||
|
||||
static void
|
||||
econv_args(int argc, VALUE *argv,
|
||||
const char **sname_p, const char **dname_p,
|
||||
rb_encoding **senc_p, rb_encoding **denc_p,
|
||||
int *ecflags_p,
|
||||
VALUE *ecopts_p)
|
||||
{
|
||||
VALUE source_encoding, destination_encoding, opt, opthash, flags_v, ecopts;
|
||||
int sidx, didx;
|
||||
const char *sname, *dname;
|
||||
rb_encoding *senc, *denc;
|
||||
int ecflags;
|
||||
|
||||
rb_scan_args(argc, argv, "21", &source_encoding, &destination_encoding, &opt);
|
||||
|
||||
if (NIL_P(opt)) {
|
||||
ecflags = 0;
|
||||
ecopts = Qnil;
|
||||
}
|
||||
else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
|
||||
ecflags = NUM2INT(flags_v);
|
||||
ecopts = Qnil;
|
||||
}
|
||||
else {
|
||||
opthash = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
|
||||
ecflags = rb_econv_prepare_opts(opthash, &ecopts);
|
||||
}
|
||||
|
||||
senc = NULL;
|
||||
sidx = rb_to_encoding_index(source_encoding);
|
||||
if (0 <= sidx) {
|
||||
senc = rb_enc_from_index(sidx);
|
||||
}
|
||||
else {
|
||||
StringValue(source_encoding);
|
||||
}
|
||||
|
||||
denc = NULL;
|
||||
didx = rb_to_encoding_index(destination_encoding);
|
||||
if (0 <= didx) {
|
||||
denc = rb_enc_from_index(didx);
|
||||
}
|
||||
else {
|
||||
StringValue(destination_encoding);
|
||||
}
|
||||
|
||||
sname = senc ? senc->name : StringValueCStr(source_encoding);
|
||||
dname = denc ? denc->name : StringValueCStr(destination_encoding);
|
||||
|
||||
*sname_p = sname;
|
||||
*dname_p = dname;
|
||||
*senc_p = senc;
|
||||
*denc_p = denc;
|
||||
*ecflags_p = ecflags;
|
||||
*ecopts_p = ecopts;
|
||||
}
|
||||
|
||||
static int
|
||||
decorate_convpath(VALUE convpath, int ecflags)
|
||||
{
|
||||
int num_decorators;
|
||||
const char *decorators[MAX_ECFLAGS_DECORATORS];
|
||||
int i;
|
||||
int n, len;
|
||||
|
||||
num_decorators = decorator_names(ecflags, decorators);
|
||||
if (num_decorators == -1)
|
||||
return -1;
|
||||
|
||||
len = n = RARRAY_LEN(convpath);
|
||||
if (n != 0) {
|
||||
VALUE pair = RARRAY_PTR(convpath)[n-1];
|
||||
const char *sname = rb_to_encoding(RARRAY_PTR(pair)[0])->name;
|
||||
const char *dname = rb_to_encoding(RARRAY_PTR(pair)[1])->name;
|
||||
transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
|
||||
const rb_transcoder *tr = load_transcoder_entry(entry);
|
||||
if (!tr)
|
||||
return -1;
|
||||
if (!SUPPLEMENTAL_CONVERSION(tr->src_encoding, tr->dst_encoding) &&
|
||||
tr->asciicompat_type == asciicompat_encoder) {
|
||||
n--;
|
||||
rb_ary_store(convpath, len + num_decorators - 1, pair);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num_decorators; i++)
|
||||
rb_ary_store(convpath, n + i, rb_str_new_cstr(decorators[i]));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
search_convpath_i(const char *sname, const char *dname, int depth, void *arg)
|
||||
{
|
||||
VALUE *ary_p = arg;
|
||||
VALUE v;
|
||||
|
||||
if (*ary_p == Qnil) {
|
||||
*ary_p = rb_ary_new();
|
||||
}
|
||||
|
||||
if (SUPPLEMENTAL_CONVERSION(sname, dname)) {
|
||||
v = rb_str_new_cstr(dname);
|
||||
}
|
||||
else {
|
||||
v = rb_assoc_new(make_encobj(sname), make_encobj(dname));
|
||||
}
|
||||
rb_ary_store(*ary_p, depth, v);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary
|
||||
* Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary
|
||||
*
|
||||
* returns the conversion path.
|
||||
*
|
||||
* p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
|
||||
* #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
|
||||
* # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
|
||||
*
|
||||
* p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
|
||||
* #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
|
||||
* # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
|
||||
* # "universal_newline"]
|
||||
*
|
||||
* p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
|
||||
* #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
|
||||
* # "universal_newline",
|
||||
* # [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
|
||||
*/
|
||||
static VALUE
|
||||
econv_s_search_convpath(int argc, VALUE *argv, VALUE klass)
|
||||
{
|
||||
const char *sname, *dname;
|
||||
rb_encoding *senc, *denc;
|
||||
int ecflags;
|
||||
VALUE ecopts;
|
||||
VALUE convpath;
|
||||
|
||||
econv_args(argc, argv, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
|
||||
|
||||
convpath = Qnil;
|
||||
transcode_search_path(sname, dname, search_convpath_i, &convpath);
|
||||
|
||||
if (NIL_P(convpath))
|
||||
rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
|
||||
|
||||
if (decorate_convpath(convpath, ecflags) == -1)
|
||||
rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
|
||||
|
||||
return convpath;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* Encoding::Converter.new(source_encoding, destination_encoding)
|
||||
|
@ -2684,53 +2846,17 @@ econv_s_asciicompat_encoding(VALUE klass, VALUE arg)
|
|||
static VALUE
|
||||
econv_init(int argc, VALUE *argv, VALUE self)
|
||||
{
|
||||
VALUE source_encoding, destination_encoding, opt, opthash, flags_v, ecopts;
|
||||
int sidx, didx;
|
||||
VALUE ecopts;
|
||||
const char *sname, *dname;
|
||||
rb_encoding *senc, *denc;
|
||||
rb_econv_t *ec;
|
||||
int ecflags;
|
||||
|
||||
rb_scan_args(argc, argv, "21", &source_encoding, &destination_encoding, &opt);
|
||||
|
||||
if (NIL_P(opt)) {
|
||||
ecflags = 0;
|
||||
ecopts = Qnil;
|
||||
}
|
||||
else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) {
|
||||
ecflags = NUM2INT(flags_v);
|
||||
ecopts = Qnil;
|
||||
}
|
||||
else {
|
||||
opthash = rb_convert_type(opt, T_HASH, "Hash", "to_hash");
|
||||
ecflags = rb_econv_prepare_opts(opthash, &ecopts);
|
||||
}
|
||||
|
||||
senc = NULL;
|
||||
sidx = rb_to_encoding_index(source_encoding);
|
||||
if (0 <= sidx) {
|
||||
senc = rb_enc_from_index(sidx);
|
||||
}
|
||||
else {
|
||||
StringValue(source_encoding);
|
||||
}
|
||||
|
||||
denc = NULL;
|
||||
didx = rb_to_encoding_index(destination_encoding);
|
||||
if (0 <= didx) {
|
||||
denc = rb_enc_from_index(didx);
|
||||
}
|
||||
else {
|
||||
StringValue(destination_encoding);
|
||||
}
|
||||
|
||||
sname = senc ? senc->name : StringValueCStr(source_encoding);
|
||||
dname = denc ? denc->name : StringValueCStr(destination_encoding);
|
||||
|
||||
if (DATA_PTR(self)) {
|
||||
rb_raise(rb_eTypeError, "already initialized");
|
||||
}
|
||||
|
||||
econv_args(argc, argv, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
|
||||
ec = rb_econv_open_opts(sname, dname, ecflags, ecopts);
|
||||
if (!ec) {
|
||||
rb_exc_raise(rb_econv_open_exc(sname, dname, ecflags));
|
||||
|
@ -3690,6 +3816,7 @@ Init_transcode(void)
|
|||
rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData);
|
||||
rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate);
|
||||
rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1);
|
||||
rb_define_singleton_method(rb_cEncodingConverter, "search_convpath", econv_s_search_convpath, -1);
|
||||
rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1);
|
||||
rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0);
|
||||
rb_define_method(rb_cEncodingConverter, "convpath", econv_convpath, 0);
|
||||
|
|
Loading…
Reference in a new issue