diff --git a/ChangeLog b/ChangeLog index b3b5a92dd2..3ea758a48f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Fri Jul 12 16:28:37 2013 Nobuyoshi Nakada + + * encoding.c (rb_enc_associate_index): refill the terminator if it + becomes longer than before. [ruby-dev:47500] [Bug #8624] + + * string.c (str_null_char, str_fill_term): get rid of out of bound + access. + + * string.c (rb_str_fill_terminator): add a parameter for the length of + new terminator. + Fri Jul 12 11:26:25 2013 Masaki Matsushita * hash.c (rb_hash_reject_bang): do not call rb_hash_foreach() if RHash diff --git a/encoding.c b/encoding.c index 869c1b0acd..7c64248d12 100644 --- a/encoding.c +++ b/encoding.c @@ -777,10 +777,12 @@ VALUE rb_enc_associate_index(VALUE obj, int idx) { rb_encoding *enc; + int oldidx, oldtermlen, termlen; /* enc_check_capable(obj);*/ rb_check_frozen(obj); - if (rb_enc_get_index(obj) == idx) + oldidx = rb_enc_get_index(obj); + if (oldidx == idx) return obj; if (SPECIAL_CONST_P(obj)) { rb_raise(rb_eArgError, "cannot set encoding"); @@ -790,6 +792,11 @@ rb_enc_associate_index(VALUE obj, int idx) !rb_enc_asciicompat(enc)) { ENC_CODERANGE_CLEAR(obj); } + termlen = rb_enc_mbminlen(enc); + oldtermlen = rb_enc_mbminlen(rb_enc_from_index(oldidx)); + if (oldtermlen < termlen && RB_TYPE_P(obj, T_STRING)) { + rb_str_fill_terminator(obj, oldtermlen); + } enc_set_index(obj, idx); return obj; } diff --git a/internal.h b/internal.h index bac60fd767..f3ecf8be9f 100644 --- a/internal.h +++ b/internal.h @@ -441,7 +441,7 @@ VALUE rb_str_quote_unprintable(VALUE); VALUE rb_id_quote_unprintable(ID); #define QUOTE(str) rb_str_quote_unprintable(str) #define QUOTE_ID(id) rb_id_quote_unprintable(id) -void rb_str_fill_terminator(VALUE str); +void rb_str_fill_terminator(VALUE str, const int termlen); /* struct.c */ VALUE rb_struct_init_copy(VALUE copy, VALUE s); diff --git a/string.c b/string.c index 786f491c3a..abfdaa3067 100644 --- a/string.c +++ b/string.c @@ -1483,12 +1483,12 @@ rb_string_value_ptr(volatile VALUE *ptr) } static const char * -str_null_char(const char *s, long len, rb_encoding *enc) +str_null_char(const char *s, long len, const int minlen, rb_encoding *enc) { int n; const char *e = s + len; - for (; s < e; s += n) { + for (; s + minlen <= e; s += n) { if (!rb_enc_codepoint_len(s, e, &n, enc)) return s; } return 0; @@ -1497,7 +1497,8 @@ str_null_char(const char *s, long len, rb_encoding *enc) static char * str_fill_term(VALUE str, char *s, long len, int termlen, rb_encoding *enc) { - long capa = rb_str_capacity(str) + 1; + int oldtermlen = rb_enc_mbminlen(enc); + long capa = rb_str_capacity(str) + oldtermlen; int n; if (capa < len + termlen) { @@ -1505,8 +1506,13 @@ str_fill_term(VALUE str, char *s, long len, int termlen, rb_encoding *enc) } else { const char *e = s + len; - if (!rb_enc_ascget(e, e + termlen, &n, enc)) return s; - rb_str_modify(str); + int diff = 0; + if (termlen > oldtermlen) diff = termlen - oldtermlen; + if (!diff && str_independent(str) && + !rb_enc_ascget(e, e + oldtermlen, &n, enc)) { + return s; + } + str_make_independent_expand(str, diff); } s = RSTRING_PTR(str); TERM_FILL(s + len, termlen); @@ -1523,7 +1529,7 @@ rb_string_value_cstr(volatile VALUE *ptr) const int minlen = rb_enc_mbminlen(enc); if (minlen > 1) { - if (str_null_char(s, len, enc)) { + if (str_null_char(s, len, minlen, enc)) { rb_raise(rb_eArgError, "string contains null char"); } return str_fill_term(str, s, len, minlen, enc); @@ -1540,13 +1546,12 @@ rb_string_value_cstr(volatile VALUE *ptr) } void -rb_str_fill_terminator(VALUE str) +rb_str_fill_terminator(VALUE str, const int newminlen) { char *s = RSTRING_PTR(str); long len = RSTRING_LEN(str); rb_encoding *enc = rb_enc_get(str); - const int minlen = rb_enc_mbminlen(enc); - str_fill_term(str, s, len, minlen, enc); + str_fill_term(str, s, len, newminlen, enc); } VALUE diff --git a/transcode.c b/transcode.c index 88908c3cf2..54fc316e41 100644 --- a/transcode.c +++ b/transcode.c @@ -2760,7 +2760,6 @@ str_encode_associate(VALUE str, int encidx) int cr = 0; rb_enc_associate_index(str, encidx); - rb_str_fill_terminator(str); /* transcoded string never be broken. */ if (rb_enc_asciicompat(rb_enc_from_index(encidx))) {