mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00

Scan the coderange in the given encoding --- ext/-test-/string/enc_str_buf_cat.c | 14 ++++++++++++++ string.c | 32 ++++++++++++++++++++++--------- test/-ext-/string/test_enc_str_buf_cat.rb | 9 +++++++++ 3 files changed, 46 insertions(+), 9 deletions(-) Work around issue transcoding issue with non-ASCII compatible encodings and xml escaping When using a non-ASCII compatible source and destination encoding and xml escaping (the :xml option to String#encode), the resulting string was broken, as it used the correct non-ASCII compatible encoding, but contained data that was ASCII-compatible instead of compatible with the string's encoding. Work around this issue by detecting the case where both the source and destination encoding are non-ASCII compatible, and transcoding the source string from the non-ASCII compatible encoding to UTF-8. The xml escaping code will correctly handle the UTF-8 source string and the return the correctly encoded and escaped value. Fixes [Bug #12052] Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org> --- test/ruby/test_transcode.rb | 19 +++++++++++++++++++ transcode.c | 6 ++++++ 2 files changed, 25 insertions(+) =?UTF-8?q?-=20add=20regression=20tests=20for=20U+6E7F=20(?= =?UTF-8?q?=E6=B9=BF)=20in=20ISO-2022-JP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In ISO-2022-JP, the bytes use to code are the same as those for "<>". This adds regression tests to make sure that these bytes, when representing 湿, are NOT escaped with encode("ISO-2022-JP, xml: :text) or similar. These are additional regression tests for #12052. --- test/ruby/test_transcode.rb | 3 +++ 1 file changed, 3 insertions(+)
28 lines
825 B
C
28 lines
825 B
C
#include "ruby/ruby.h"
|
|
#include "ruby/encoding.h"
|
|
|
|
static VALUE
|
|
enc_str_buf_cat(VALUE str, VALUE str2)
|
|
{
|
|
return rb_enc_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2), rb_enc_get(str2));
|
|
}
|
|
|
|
static VALUE
|
|
str_conv_enc_opts(VALUE str, VALUE from, VALUE to, VALUE ecflags, VALUE ecopts)
|
|
{
|
|
rb_encoding *from_enc = NIL_P(from) ? NULL : rb_to_encoding(from);
|
|
rb_encoding *to_enc = NIL_P(to) ? NULL : rb_to_encoding(to);
|
|
int flags = NUM2INT(ecflags);
|
|
if (!NIL_P(ecopts)) {
|
|
Check_Type(ecopts, T_HASH);
|
|
OBJ_FREEZE(ecopts);
|
|
}
|
|
return rb_str_conv_enc_opts(str, from_enc, to_enc, flags, ecopts);
|
|
}
|
|
|
|
void
|
|
Init_string_enc_str_buf_cat(VALUE klass)
|
|
{
|
|
rb_define_method(klass, "enc_str_buf_cat", enc_str_buf_cat, 1);
|
|
rb_define_method(klass, "str_conv_enc_opts", str_conv_enc_opts, 4);
|
|
}
|