diff --git a/internal/string.h b/internal/string.h index 546a0ac9a7..d010669ca8 100644 --- a/internal/string.h +++ b/internal/string.h @@ -42,6 +42,7 @@ size_t rb_str_memsize(VALUE); char *rb_str_to_cstr(VALUE str); const char *ruby_escaped_char(int c); void rb_str_make_independent(VALUE str); +int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc); static inline bool STR_EMBED_P(VALUE str); static inline bool STR_SHARED_P(VALUE str); diff --git a/ruby.c b/ruby.c index 3358068bbb..818161710c 100644 --- a/ruby.c +++ b/ruby.c @@ -1680,7 +1680,11 @@ tty_enabled(void) static VALUE copy_str(VALUE str, rb_encoding *enc, bool intern) { - if (!intern) return rb_enc_associate(rb_str_dup(str), enc); + if (!intern) { + if (rb_enc_str_coderange_scan(str, enc) == ENC_CODERANGE_BROKEN) + return 0; + return rb_enc_associate(rb_str_dup(str), enc); + } return rb_enc_interned_str(RSTRING_PTR(str), RSTRING_LEN(str), enc); } @@ -1916,7 +1920,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) if (newpath == path) continue; path = newpath; #else - path = copy_str(path, lenc, !mark); + if (!(path = copy_str(path, lenc, !mark))) continue; #endif if (mark) rb_ivar_set(path, id_initial_load_path_mark, path); if (!modifiable) { @@ -1934,8 +1938,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) bool modified = false; for (long i = loaded_before_enc; i < RARRAY_LEN(loaded_features); ++i) { VALUE path = RARRAY_AREF(loaded_features, i); - if (rb_enc_get(path) == IF_UTF8_PATH(uenc, lenc)) continue; - path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true); + if (!(path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true))) continue; modified = true; RARRAY_ASET(loaded_features, i, path); } diff --git a/string.c b/string.c index 299d506004..78e2ba923f 100644 --- a/string.c +++ b/string.c @@ -724,6 +724,12 @@ enc_coderange_scan(VALUE str, rb_encoding *enc, int encidx) } } +int +rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc) +{ + return enc_coderange_scan(str, enc, rb_enc_to_index(enc)); +} + int rb_enc_str_coderange(VALUE str) {