mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Windows: Read ENV names and values as UTF-8 encoded Strings (#3818)
* Windows: Read ENV names and values as UTF-8 encoded Strings Implements issue #12650: fix https://bugs.ruby-lang.org/issues/12650 This also removes the special encoding for ENV['PATH'] and some complexity in the code that is unnecessary now. * Windows: Improve readablity of getenv() encoding getenv() did use the expected codepage as an implicit parameter of the macro. This is mis-leading since include/ruby/win32.h has a different definition. Using the "cp" variable explicit (like the other function calls) makes it more readable and consistent. * Windows: Change external C-API macros getenv() and execv() to use UTF-8 They used to process and return strings with locale encoding, but since all ruby-internal spawn and environment functions use UTF-8, it makes sense to change the C-API equally.
This commit is contained in:
parent
94b6933d1c
commit
ca76337a00
Notes:
git
2020-12-08 02:01:05 +09:00
Merged-By: nurse <naruse@airemix.jp>
9 changed files with 39 additions and 82 deletions
75
hash.c
75
hash.c
|
@ -4815,22 +4815,7 @@ static char **my_environ;
|
||||||
#undef environ
|
#undef environ
|
||||||
#define environ my_environ
|
#define environ my_environ
|
||||||
#undef getenv
|
#undef getenv
|
||||||
static char *(*w32_getenv)(const char*);
|
#define getenv(n) rb_w32_ugetenv(n)
|
||||||
static char *
|
|
||||||
w32_getenv_unknown(const char *name)
|
|
||||||
{
|
|
||||||
char *(*func)(const char*);
|
|
||||||
if (rb_locale_encindex() == rb_ascii8bit_encindex()) {
|
|
||||||
func = rb_w32_getenv;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
func = rb_w32_ugetenv;
|
|
||||||
}
|
|
||||||
/* atomic assignment in flat memory model */
|
|
||||||
return (w32_getenv = func)(name);
|
|
||||||
}
|
|
||||||
static char *(*w32_getenv)(const char*) = w32_getenv_unknown;
|
|
||||||
#define getenv(n) w32_getenv(n)
|
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
#undef environ
|
#undef environ
|
||||||
#define environ (*_NSGetEnviron())
|
#define environ (*_NSGetEnviron())
|
||||||
|
@ -4849,20 +4834,20 @@ extern char **environ;
|
||||||
#define ENVNMATCH(s1, s2, n) (memcmp((s1), (s2), (n)) == 0)
|
#define ENVNMATCH(s1, s2, n) (memcmp((s1), (s2), (n)) == 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline rb_encoding *
|
||||||
|
env_encoding()
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
return rb_utf8_encoding();
|
||||||
|
#else
|
||||||
|
return rb_locale_encoding();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
|
env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
|
||||||
rb_encoding *internal = rb_default_internal_encoding();
|
|
||||||
const int ecflags = ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE;
|
|
||||||
rb_encoding *utf8 = rb_utf8_encoding();
|
|
||||||
VALUE str = rb_enc_str_new(NULL, 0, (internal ? internal : enc));
|
|
||||||
if (NIL_P(rb_str_cat_conv_enc_opts(str, 0, ptr, len, utf8, ecflags, Qnil))) {
|
|
||||||
rb_str_initialize(str, ptr, len, NULL);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
VALUE str = rb_external_str_new_with_enc(ptr, len, enc);
|
VALUE str = rb_external_str_new_with_enc(ptr, len, enc);
|
||||||
#endif
|
|
||||||
|
|
||||||
rb_obj_freeze(str);
|
rb_obj_freeze(str);
|
||||||
return str;
|
return str;
|
||||||
|
@ -4877,7 +4862,7 @@ env_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
|
||||||
static VALUE
|
static VALUE
|
||||||
env_str_new(const char *ptr, long len)
|
env_str_new(const char *ptr, long len)
|
||||||
{
|
{
|
||||||
return env_enc_str_new(ptr, len, rb_locale_encoding());
|
return env_enc_str_new(ptr, len, env_encoding());
|
||||||
}
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
@ -4889,46 +4874,23 @@ env_str_new2(const char *ptr)
|
||||||
|
|
||||||
static const char TZ_ENV[] = "TZ";
|
static const char TZ_ENV[] = "TZ";
|
||||||
|
|
||||||
static rb_encoding *
|
|
||||||
env_encoding_for(const char *name, const char *ptr)
|
|
||||||
{
|
|
||||||
if (ENVMATCH(name, PATH_ENV)) {
|
|
||||||
return rb_filesystem_encoding();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return rb_locale_encoding();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
env_name_new(const char *name, const char *ptr)
|
env_name_new(const char *name, const char *ptr)
|
||||||
{
|
{
|
||||||
return env_enc_str_new_cstr(ptr, env_encoding_for(name, ptr));
|
return env_enc_str_new_cstr(ptr, env_encoding());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *
|
static void *
|
||||||
get_env_cstr(
|
get_env_cstr(
|
||||||
#ifdef _WIN32
|
|
||||||
volatile VALUE *pstr,
|
|
||||||
#else
|
|
||||||
VALUE str,
|
VALUE str,
|
||||||
#endif
|
|
||||||
const char *name)
|
const char *name)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
|
||||||
VALUE str = *pstr;
|
|
||||||
#endif
|
|
||||||
char *var;
|
char *var;
|
||||||
rb_encoding *enc = rb_enc_get(str);
|
rb_encoding *enc = rb_enc_get(str);
|
||||||
if (!rb_enc_asciicompat(enc)) {
|
if (!rb_enc_asciicompat(enc)) {
|
||||||
rb_raise(rb_eArgError, "bad environment variable %s: ASCII incompatible encoding: %s",
|
rb_raise(rb_eArgError, "bad environment variable %s: ASCII incompatible encoding: %s",
|
||||||
name, rb_enc_name(enc));
|
name, rb_enc_name(enc));
|
||||||
}
|
}
|
||||||
#ifdef _WIN32
|
|
||||||
if (!rb_enc_str_asciionly_p(str)) {
|
|
||||||
*pstr = str = rb_str_conv_enc(str, NULL, rb_utf8_encoding());
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
var = RSTRING_PTR(str);
|
var = RSTRING_PTR(str);
|
||||||
if (memchr(var, '\0', RSTRING_LEN(str))) {
|
if (memchr(var, '\0', RSTRING_LEN(str))) {
|
||||||
rb_raise(rb_eArgError, "bad environment variable %s: contains null byte", name);
|
rb_raise(rb_eArgError, "bad environment variable %s: contains null byte", name);
|
||||||
|
@ -4936,13 +4898,8 @@ get_env_cstr(
|
||||||
return rb_str_fill_terminator(str, 1); /* ASCII compatible */
|
return rb_str_fill_terminator(str, 1); /* ASCII compatible */
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
#define get_env_ptr(var, val) \
|
|
||||||
(var = get_env_cstr(&(val), #var))
|
|
||||||
#else
|
|
||||||
#define get_env_ptr(var, val) \
|
#define get_env_ptr(var, val) \
|
||||||
(var = get_env_cstr(val, #var))
|
(var = get_env_cstr(val, #var))
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline const char *
|
static inline const char *
|
||||||
env_name(volatile VALUE *s)
|
env_name(volatile VALUE *s)
|
||||||
|
@ -4983,9 +4940,6 @@ env_delete(VALUE name)
|
||||||
VALUE value = env_str_new2(val);
|
VALUE value = env_str_new2(val);
|
||||||
|
|
||||||
ruby_setenv(nam, 0);
|
ruby_setenv(nam, 0);
|
||||||
if (ENVMATCH(nam, PATH_ENV)) {
|
|
||||||
RB_GC_GUARD(name);
|
|
||||||
}
|
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
return Qnil;
|
return Qnil;
|
||||||
|
@ -5407,9 +5361,6 @@ env_aset(VALUE nm, VALUE val)
|
||||||
get_env_ptr(value, val);
|
get_env_ptr(value, val);
|
||||||
|
|
||||||
ruby_setenv(name, value);
|
ruby_setenv(name, value);
|
||||||
if (ENVMATCH(name, PATH_ENV)) {
|
|
||||||
RB_GC_GUARD(nm);
|
|
||||||
}
|
|
||||||
reset_by_modified_env(name);
|
reset_by_modified_env(name);
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,7 +160,7 @@ typedef int clockid_t;
|
||||||
#define Sleep(msec) (void)rb_w32_Sleep(msec)
|
#define Sleep(msec) (void)rb_w32_Sleep(msec)
|
||||||
|
|
||||||
#undef execv
|
#undef execv
|
||||||
#define execv(path,argv) rb_w32_aspawn(P_OVERLAY,path,argv)
|
#define execv(path,argv) rb_w32_uaspawn(P_OVERLAY,path,argv)
|
||||||
#undef isatty
|
#undef isatty
|
||||||
#define isatty(h) rb_w32_isatty(h)
|
#define isatty(h) rb_w32_isatty(h)
|
||||||
|
|
||||||
|
@ -717,7 +717,7 @@ extern char *rb_w32_strerror(int);
|
||||||
#define getcwd(b, s) rb_w32_getcwd(b, s)
|
#define getcwd(b, s) rb_w32_getcwd(b, s)
|
||||||
|
|
||||||
#undef getenv
|
#undef getenv
|
||||||
#define getenv(n) rb_w32_getenv(n)
|
#define getenv(n) rb_w32_ugetenv(n)
|
||||||
|
|
||||||
#undef rename
|
#undef rename
|
||||||
#define rename(o, n) rb_w32_rename(o, n)
|
#define rename(o, n) rb_w32_rename(o, n)
|
||||||
|
|
1
spec/ruby/core/env/element_reference_spec.rb
vendored
1
spec/ruby/core/env/element_reference_spec.rb
vendored
|
@ -59,6 +59,7 @@ describe "ENV.[]" do
|
||||||
Encoding.default_internal = nil
|
Encoding.default_internal = nil
|
||||||
|
|
||||||
locale = Encoding.find('locale')
|
locale = Encoding.find('locale')
|
||||||
|
locale = Encoding::UTF_8 if platform_is :windows
|
||||||
locale = Encoding::BINARY if locale == Encoding::US_ASCII
|
locale = Encoding::BINARY if locale == Encoding::US_ASCII
|
||||||
ENV[@variable] = "\xC3\xB8"
|
ENV[@variable] = "\xC3\xB8"
|
||||||
ENV[@variable].encoding.should == locale
|
ENV[@variable].encoding.should == locale
|
||||||
|
|
3
spec/ruby/core/env/fetch_spec.rb
vendored
3
spec/ruby/core/env/fetch_spec.rb
vendored
|
@ -56,7 +56,8 @@ describe "ENV.fetch" do
|
||||||
end
|
end
|
||||||
|
|
||||||
it "uses the locale encoding" do
|
it "uses the locale encoding" do
|
||||||
|
encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale')
|
||||||
ENV["foo"] = "bar"
|
ENV["foo"] = "bar"
|
||||||
ENV.fetch("foo").encoding.should == Encoding.find('locale')
|
ENV.fetch("foo").encoding.should == encoding
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
5
spec/ruby/core/env/shift_spec.rb
vendored
5
spec/ruby/core/env/shift_spec.rb
vendored
|
@ -42,9 +42,10 @@ describe "ENV.shift" do
|
||||||
it "uses the locale encoding if Encoding.default_internal is nil" do
|
it "uses the locale encoding if Encoding.default_internal is nil" do
|
||||||
Encoding.default_internal = nil
|
Encoding.default_internal = nil
|
||||||
|
|
||||||
|
encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale')
|
||||||
pair = ENV.shift
|
pair = ENV.shift
|
||||||
pair.first.encoding.should equal(Encoding.find("locale"))
|
pair.first.encoding.should equal(encoding)
|
||||||
pair.last.encoding.should equal(Encoding.find("locale"))
|
pair.last.encoding.should equal(encoding)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "transcodes from the locale encoding to Encoding.default_internal if set" do
|
it "transcodes from the locale encoding to Encoding.default_internal if set" do
|
||||||
|
|
3
spec/ruby/core/env/values_at_spec.rb
vendored
3
spec/ruby/core/env/values_at_spec.rb
vendored
|
@ -28,7 +28,8 @@ describe "ENV.values_at" do
|
||||||
end
|
end
|
||||||
|
|
||||||
it "uses the locale encoding" do
|
it "uses the locale encoding" do
|
||||||
ENV.values_at(ENV.keys.first).first.encoding.should == Encoding.find('locale')
|
encoding = platform_is(:windows) ? Encoding::UTF_8 : Encoding.find('locale')
|
||||||
|
ENV.values_at(ENV.keys.first).first.encoding.should == encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
it "raises TypeError when a key is not coercible to String" do
|
it "raises TypeError when a key is not coercible to String" do
|
||||||
|
|
|
@ -369,7 +369,8 @@ class TestEnv < Test::Unit::TestCase
|
||||||
assert_equal("foo", v)
|
assert_equal("foo", v)
|
||||||
end
|
end
|
||||||
assert_invalid_env {|var| ENV.assoc(var)}
|
assert_invalid_env {|var| ENV.assoc(var)}
|
||||||
assert_equal(Encoding.find("locale"), v.encoding)
|
encoding = /mswin|mingw/ =~ RUBY_PLATFORM ? Encoding::UTF_8 : Encoding.find("locale")
|
||||||
|
assert_equal(encoding, v.encoding)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_has_value2
|
def test_has_value2
|
||||||
|
@ -579,15 +580,13 @@ class TestEnv < Test::Unit::TestCase
|
||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
|
|
||||||
if Encoding.find("locale") == Encoding::UTF_8
|
def test_utf8
|
||||||
def test_utf8
|
text = "testing \u{e5 e1 e2 e4 e3 101 3042}"
|
||||||
text = "testing \u{e5 e1 e2 e4 e3 101 3042}"
|
test = ENV["test"]
|
||||||
test = ENV["test"]
|
ENV["test"] = text
|
||||||
ENV["test"] = text
|
assert_equal text, ENV["test"]
|
||||||
assert_equal text, ENV["test"]
|
ensure
|
||||||
ensure
|
ENV["test"] = test
|
||||||
ENV["test"] = test
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1325,10 +1325,14 @@ class TestM17N < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_env
|
def test_env
|
||||||
locale_encoding = Encoding.find("locale")
|
if RUBY_PLATFORM =~ /bccwin|mswin|mingw/
|
||||||
|
env_encoding = Encoding::UTF_8
|
||||||
|
else
|
||||||
|
env_encoding = Encoding.find("locale")
|
||||||
|
end
|
||||||
ENV.each {|k, v|
|
ENV.each {|k, v|
|
||||||
assert_equal(locale_encoding, k.encoding, k)
|
assert_equal(env_encoding, k.encoding, k)
|
||||||
assert_equal(locale_encoding, v.encoding, v)
|
assert_equal(env_encoding, v.encoding, v)
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -77,7 +77,6 @@ static char *w32_getenv(const char *name, UINT cp);
|
||||||
#define DLN_FIND_EXTRA_ARG_DECL ,UINT cp
|
#define DLN_FIND_EXTRA_ARG_DECL ,UINT cp
|
||||||
#define DLN_FIND_EXTRA_ARG ,cp
|
#define DLN_FIND_EXTRA_ARG ,cp
|
||||||
#define rb_w32_stati128(path, st) w32_stati128(path, st, cp, FALSE)
|
#define rb_w32_stati128(path, st) w32_stati128(path, st, cp, FALSE)
|
||||||
#define getenv(name) w32_getenv(name, cp)
|
|
||||||
#undef CharNext
|
#undef CharNext
|
||||||
#define CharNext(p) CharNextExA(cp, (p), 0)
|
#define CharNext(p) CharNextExA(cp, (p), 0)
|
||||||
#define dln_find_exe_r rb_w32_udln_find_exe_r
|
#define dln_find_exe_r rb_w32_udln_find_exe_r
|
||||||
|
|
Loading…
Reference in a new issue