1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

[Bug #18964] Update the code range of appended portion

This commit is contained in:
Nobuyoshi Nakada 2022-08-18 11:37:54 +09:00
parent 2a55c61ee7
commit 725626d890
Notes: git 2022-08-18 14:57:37 +09:00
4 changed files with 59 additions and 4 deletions

15
ext/-test-/econv/append.c Normal file
View file

@ -0,0 +1,15 @@
#include "ruby/ruby.h"
#include "ruby/encoding.h"
static VALUE
econv_append(VALUE self, VALUE src, VALUE dst)
{
rb_econv_t *ec = DATA_PTR(self);
return rb_econv_str_append(ec, src, dst, 0);
}
void
Init_econv_append(VALUE klass)
{
rb_define_method(klass, "append", econv_append, 2);
}

View file

@ -0,0 +1,3 @@
# frozen_string_literal: false
require_relative "../auto_ext.rb"
auto_ext(inc: true)

11
ext/-test-/econv/init.c Normal file
View file

@ -0,0 +1,11 @@
#include "ruby.h"
#define init(n) {void Init_econv_##n(VALUE klass); Init_econv_##n(klass);}
void
Init_econv(void)
{
VALUE mBug = rb_define_module("Bug");
VALUE klass = rb_define_class_under(mBug, "EConv", rb_path2class("Encoding::Converter"));
TEST_INIT_FUNCS(init);
}

View file

@ -1812,6 +1812,12 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
return data.ascii_compat_name;
}
/*
* Append `len` bytes pointed by `ss` to `dst` with converting with `ec`.
*
* If the result of the conversion is not compatible with the encoding of
* `dst`, `dst` may not be valid encoding.
*/
VALUE
rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
{
@ -1819,11 +1825,19 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
unsigned char *ds, *dp, *de;
rb_econv_result_t res;
int max_output;
enum ruby_coderange_type coderange;
rb_encoding *dst_enc = ec->destination_encoding;
if (NIL_P(dst)) {
dst = rb_str_buf_new(len);
if (ec->destination_encoding)
rb_enc_associate(dst, ec->destination_encoding);
if (dst_enc) {
rb_enc_associate(dst, dst_enc);
}
coderange = ENC_CODERANGE_7BIT; // scan from the start
}
else {
dst_enc = rb_enc_get(dst);
coderange = rb_enc_str_coderange(dst);
}
if (ec->last_tc)
@ -1832,13 +1846,13 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
max_output = 1;
do {
int cr;
long dlen = RSTRING_LEN(dst);
if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
unsigned long new_capa = (unsigned long)dlen + len + max_output;
if (LONG_MAX < new_capa)
rb_raise(rb_eArgError, "too long string");
rb_str_resize(dst, new_capa);
rb_str_set_len(dst, dlen);
rb_str_modify_expand(dst, new_capa - dlen);
}
sp = (const unsigned char *)ss;
se = sp + len;
@ -1846,6 +1860,18 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
de = ds + rb_str_capacity(dst);
dp = ds += dlen;
res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
switch (coderange) {
case ENC_CODERANGE_7BIT:
case ENC_CODERANGE_VALID:
cr = (int)coderange;
rb_str_coderange_scan_restartable((char *)ds, (char *)dp, dst_enc, &cr);
coderange = cr;
ENC_CODERANGE_SET(dst, coderange);
break;
case ENC_CODERANGE_UNKNOWN:
case ENC_CODERANGE_BROKEN:
break;
}
len -= (const char *)sp - ss;
ss = (const char *)sp;
rb_str_set_len(dst, dlen + (dp - ds));