1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Skip broken strings as the locale encoding

This commit is contained in:
Nobuyoshi Nakada 2021-09-29 19:59:31 +09:00
parent 409dbc951b
commit 842b0008c1
Notes: git 2021-10-01 20:29:13 +09:00
3 changed files with 14 additions and 4 deletions

View file

@ -42,6 +42,7 @@ size_t rb_str_memsize(VALUE);
char *rb_str_to_cstr(VALUE str);
const char *ruby_escaped_char(int c);
void rb_str_make_independent(VALUE str);
int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc);
static inline bool STR_EMBED_P(VALUE str);
static inline bool STR_SHARED_P(VALUE str);

11
ruby.c
View file

@ -1680,7 +1680,11 @@ tty_enabled(void)
static VALUE
copy_str(VALUE str, rb_encoding *enc, bool intern)
{
if (!intern) return rb_enc_associate(rb_str_dup(str), enc);
if (!intern) {
if (rb_enc_str_coderange_scan(str, enc) == ENC_CODERANGE_BROKEN)
return 0;
return rb_enc_associate(rb_str_dup(str), enc);
}
return rb_enc_interned_str(RSTRING_PTR(str), RSTRING_LEN(str), enc);
}
@ -1916,7 +1920,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
if (newpath == path) continue;
path = newpath;
#else
path = copy_str(path, lenc, !mark);
if (!(path = copy_str(path, lenc, !mark))) continue;
#endif
if (mark) rb_ivar_set(path, id_initial_load_path_mark, path);
if (!modifiable) {
@ -1934,8 +1938,7 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
bool modified = false;
for (long i = loaded_before_enc; i < RARRAY_LEN(loaded_features); ++i) {
VALUE path = RARRAY_AREF(loaded_features, i);
if (rb_enc_get(path) == IF_UTF8_PATH(uenc, lenc)) continue;
path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true);
if (!(path = copy_str(path, IF_UTF8_PATH(uenc, lenc), true))) continue;
modified = true;
RARRAY_ASET(loaded_features, i, path);
}

View file

@ -724,6 +724,12 @@ enc_coderange_scan(VALUE str, rb_encoding *enc, int encidx)
}
}
int
rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc)
{
return enc_coderange_scan(str, enc, rb_enc_to_index(enc));
}
int
rb_enc_str_coderange(VALUE str)
{