1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* encoding.c (rb_default_internal_encoding): merged a patch from

Michael Selig <michael.selig at fs.com.au> in [ruby-core:18985].

* io.c (rb_io_ext_int_to_encs): ditto.

* ruby.c (proc_options): support default internal encoding in -E
  option.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19709 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2008-10-07 17:39:44 +00:00
parent 0b184b473b
commit baeeebf474
5 changed files with 206 additions and 61 deletions

View file

@ -1,3 +1,13 @@
Wed Oct 8 02:38:28 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* encoding.c (rb_default_internal_encoding): merged a patch from
Michael Selig <michael.selig at fs.com.au> in [ruby-core:18985].
* io.c (rb_io_ext_int_to_encs): ditto.
* ruby.c (proc_options): support default internal encoding in -E
option.
Wed Oct 8 00:03:39 2008 Tadayoshi Funaba <tadf@dotrb.org>
* lib/date.rb (today,now): should produce own instances.

View file

@ -1027,6 +1027,55 @@ rb_enc_set_default_external(VALUE encoding)
default_external = 0;
}
/* -2 => not yet set, -1 => nil */
static int default_internal_index = -2;
static rb_encoding *default_internal;
rb_encoding *
rb_default_internal_encoding(void)
{
if (!default_internal && default_internal_index >= 0) {
default_internal = rb_enc_from_index(default_internal_index);
}
return default_internal;
}
VALUE
rb_enc_default_internal(void)
{
/* Note: These functions cope with default_internal not being set */
return rb_enc_from_encoding(rb_default_internal_encoding());
}
/*
* call-seq:
* Encoding.default_internal => enc
*
* Returns default internal encoding.
*
* It is initialized by the source internal_encoding or -E option,
* and can't be modified after that.
*/
static VALUE
get_default_internal(VALUE klass)
{
return rb_enc_default_internal();
}
void
rb_enc_set_default_internal(VALUE encoding)
{
if (default_internal_index != -2)
/* Already set */
return;
default_internal_index = encoding == Qnil ?
-1 :rb_enc_to_index(rb_to_encoding(encoding));
/* Convert US-ASCII => UTF-8 */
if (default_internal_index == rb_usascii_encindex())
default_internal_index = rb_utf8_encindex();
default_internal = 0;
}
/*
* call-seq:
* Encoding.locale_charmap => string
@ -1212,6 +1261,7 @@ Init_Encoding(void)
rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0);
rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0);
rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
list = rb_ary_new2(enc_table.count);

View file

@ -168,11 +168,14 @@ rb_encoding *rb_usascii_encoding(void);
rb_encoding *rb_locale_encoding(void);
rb_encoding *rb_filesystem_encoding(void);
rb_encoding *rb_default_external_encoding(void);
rb_encoding *rb_default_internal_encoding(void);
int rb_ascii8bit_encindex(void);
int rb_utf8_encindex(void);
int rb_usascii_encindex(void);
VALUE rb_enc_default_external(void);
VALUE rb_enc_default_internal(void);
void rb_enc_set_default_external(VALUE encoding);
void rb_enc_set_default_internal(VALUE encoding);
VALUE rb_locale_charmap(VALUE klass);
long rb_memsearch(const void*,long,const void*,long,rb_encoding*);

166
io.c
View file

@ -2177,10 +2177,8 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io)
}
newline = (unsigned char)rsptr[rslen - 1];
if (fptr->encs.enc2)
enc = fptr->encs.enc;
else
enc = io_input_encoding(fptr);
/* MS - Optimisation */
enc = io_read_encoding(fptr);
while ((c = appendline(fptr, newline, &str, &limit)) != EOF) {
const char *s, *p, *pp, *e;
@ -3740,52 +3738,87 @@ rb_io_oflags_modestr(int oflags)
return NULL; /* not reached */
}
/*
* Convert external/internal encodings to enc/enc2
* NULL => use default encoding
* Qnil => no encoding specified (internal only)
*/
static void
rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_encoding **enc, rb_encoding **enc2)
{
int default_ext = 0;
if (ext == NULL) {
ext = rb_default_external_encoding();
default_ext = 1;
}
if (intern == NULL && ext != rb_ascii8bit_encoding())
/* If external is ASCII-8BIT, no default transcoding */
intern = rb_default_internal_encoding();
if (intern == NULL || intern == (rb_encoding *)Qnil || intern == ext) {
/* No internal encoding => use external + no transcoding */
*enc = default_ext ? NULL : ext;
*enc2 = NULL;
}
else {
*enc = intern;
*enc2 = ext;
}
}
static void
parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p)
{
const char *p0, *p1;
char *enc2name;
const char *p;
char encname[ENCODING_MAXNAMELEN+1];
int idx, idx2;
rb_encoding *ext_enc, *int_enc;
/* parse estr as "enc" or "enc2:enc" */
/* parse estr as "enc" or "enc2:enc" or "enc:-" */
*enc_p = 0;
*enc2_p = 0;
p0 = strrchr(estr, ':');
if (!p0) p1 = estr;
else p1 = p0 + 1;
idx = rb_enc_find_index(p1);
if (idx >= 0) {
*enc_p = rb_enc_from_index(idx);
p = strrchr(estr, ':');
if (p) {
int len = (p++) - estr;
if (len == 0 || len > ENCODING_MAXNAMELEN)
idx = -1;
else {
memcpy(encname, estr, len);
encname[len] = '\0';
estr = encname;
idx = rb_enc_find_index(encname);
}
}
else
idx = rb_enc_find_index(estr);
if (idx >= 0)
ext_enc = rb_enc_from_index(idx);
else {
rb_warn("Unsupported encoding %s ignored", p1);
if (idx != -2)
rb_warn("Unsupported encoding %s ignored", estr);
ext_enc = NULL;
}
if (*enc_p && p0) {
int n = p0 - estr;
if (n > ENCODING_MAXNAMELEN) {
idx2 = -1;
int_enc = NULL;
if (p) {
if (*p == '-' && *(p+1) == '\0') {
/* Special case - "-" => no transcoding */
int_enc = (rb_encoding *)Qnil;
}
else {
enc2name = ALLOCA_N(char, n+1);
memcpy(enc2name, estr, n);
enc2name[n] = '\0';
estr = enc2name;
idx2 = rb_enc_find_index(enc2name);
}
if (idx2 < 0) {
rb_warn("Unsupported encoding %.*s ignored", n, estr);
}
else if (idx2 == idx) {
rb_warn("Ignoring internal encoding %.*s: it is identical to external encoding %s",
n, estr, p1);
}
else {
*enc2_p = rb_enc_from_index(idx2);
idx2 = rb_enc_find_index(p);
if (idx2 < 0)
rb_warn("Unsupported encoding %s ignored", p);
else if (idx2 == idx) {
rb_warn("Ignoring internal encoding %s: it is identical to external encoding %s", p, estr);
int_enc = (rb_encoding *)Qnil;
}
else
int_enc = rb_enc_from_index(idx2);
}
}
rb_io_ext_int_to_encs(ext_enc, int_enc, enc_p, enc2_p);
}
static void
@ -3821,28 +3854,32 @@ io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p)
}
if (!NIL_P(extenc)) {
rb_encoding *extencoding = rb_to_encoding(extenc);
rb_encoding *intencoding = NULL;
extracted = 1;
*enc_p = 0;
*enc2_p = 0;
if (!NIL_P(encoding)) {
rb_warn("Ignoring encoding parameter '%s': external_encoding is used",
RSTRING_PTR(encoding));
}
if (!NIL_P(intenc)) {
rb_encoding *intencoding = rb_to_encoding(intenc);
if (!NIL_P(encoding = rb_check_string_type(intenc))) {
char *p = StringValueCStr(encoding);
if (*p == '-' && *(p+1) == '\0') {
/* Special case - "-" => no transcoding */
intencoding = (rb_encoding *)Qnil;
}
else
intencoding = rb_to_encoding(intenc);
}
else
intencoding = rb_to_encoding(intenc);
if (extencoding == intencoding) {
rb_warn("Ignoring internal encoding '%s': it is identical to external encoding '%s'",
RSTRING_PTR(rb_inspect(intenc)),
RSTRING_PTR(rb_inspect(extenc)));
}
else {
*enc_p = intencoding;
*enc2_p = extencoding;
intencoding = (rb_encoding *)Qnil;
}
}
else {
*enc_p = extencoding;
}
rb_io_ext_int_to_encs(extencoding, intencoding, enc_p, enc2_p);
}
else {
if (!NIL_P(intenc)) {
@ -3882,8 +3919,8 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash,
vmode = *vmode_p;
enc = NULL;
enc2 = NULL;
/* Set to defaults */
rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
if (NIL_P(vmode)) {
fmode = FMODE_READABLE;
@ -4070,8 +4107,8 @@ rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig
rb_io_t *fptr;
convconfig_t cc;
if (!convconfig) {
cc.enc = NULL;
cc.enc2 = NULL;
/* Set to default encodings */
rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2);
cc.ecflags = 0;
cc.ecopts = Qnil;
convconfig = &cc;
@ -4099,8 +4136,8 @@ rb_file_open_internal(VALUE io, VALUE filename, const char *modestr)
parse_mode_enc(p+1, &convconfig.enc, &convconfig.enc2);
}
else {
convconfig.enc = NULL;
convconfig.enc2 = NULL;
/* Set to default encodings */
rb_io_ext_int_to_encs(NULL, NULL, &convconfig.enc, &convconfig.enc2);
convconfig.ecflags = 0;
convconfig.ecopts = Qnil;
}
@ -6661,29 +6698,40 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
{
rb_encoding *enc, *enc2;
int ecflags;
VALUE ecopts;
VALUE ecopts, tmp;
if (!NIL_P(v2)) {
enc2 = rb_to_encoding(v1);
enc = rb_to_encoding(v2);
tmp = rb_check_string_type(v2);
if (!NIL_P(tmp)) {
char *p = StringValueCStr(tmp);
if (*p == '-' && *(p+1) == '\0') {
/* Special case - "-" => no transcoding */
enc = enc2;
enc2 = NULL;
}
else
enc = rb_to_encoding(v2);
}
else
enc = rb_to_encoding(v2);
ecflags = rb_econv_prepare_opts(opt, &ecopts);
}
else {
if (NIL_P(v1)) {
enc = NULL;
enc2 = NULL;
/* Set to default encodings */
rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2);
ecflags = 0;
ecopts = Qnil;
}
else {
VALUE tmp = rb_check_string_type(v1);
tmp = rb_check_string_type(v1);
if (!NIL_P(tmp)) {
parse_mode_enc(StringValueCStr(tmp), &enc, &enc2);
ecflags = rb_econv_prepare_opts(opt, &ecopts);
}
else {
enc = rb_to_encoding(v1);
enc2 = NULL;
rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2);
ecflags = 0;
ecopts = Qnil;
}

38
ruby.c
View file

@ -86,7 +86,7 @@ struct cmdline_options {
VALUE name;
int index;
} enc;
} src, ext;
} src, ext, intern;
VALUE req_list;
};
@ -855,6 +855,7 @@ proc_options(int argc, char **argv, struct cmdline_options *opt)
ruby_each_words(s, disable_option, &opt->disable);
}
else if (strncmp("encoding", s, n = 8) == 0 && (!s[n] || s[n] == '=')) {
char *p;
s += n;
if (!*s++) {
next_encoding:
@ -863,7 +864,15 @@ proc_options(int argc, char **argv, struct cmdline_options *opt)
}
}
encoding:
opt->ext.enc.name = rb_str_new2(s);
p = strchr(s, ':');
if (p) {
if (p > s)
opt->ext.enc.name = rb_str_new(s, p-s);
if (*++p)
opt->intern.enc.name = rb_str_new2(p);
}
else
opt->ext.enc.name = rb_str_new2(s);
}
else if (strcmp("version", s) == 0)
opt->version = 1;
@ -966,6 +975,7 @@ process_options(VALUE arg)
rb_safe_level() == 0 && (s = getenv("RUBYOPT"))) {
VALUE src_enc_name = opt->src.enc.name;
VALUE ext_enc_name = opt->ext.enc.name;
VALUE int_enc_name = opt->intern.enc.name;
while (ISSPACE(*s))
s++;
@ -1005,6 +1015,8 @@ process_options(VALUE arg)
opt->src.enc.name = src_enc_name;
if (ext_enc_name)
opt->ext.enc.name = ext_enc_name;
if (int_enc_name)
opt->intern.enc.name = int_enc_name;
}
if (opt->version) {
@ -1073,6 +1085,9 @@ process_options(VALUE arg)
if (opt->ext.enc.name != 0) {
opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
}
if (opt->intern.enc.name != 0) {
opt->intern.enc.index = opt_enc_index(opt->intern.enc.name);
}
if (opt->src.enc.name != 0) {
opt->src.enc.index = opt_enc_index(opt->src.enc.name);
src_encoding_index = opt->src.enc.index;
@ -1084,6 +1099,11 @@ process_options(VALUE arg)
enc = lenc;
}
rb_enc_set_default_external(rb_enc_from_encoding(enc));
if (opt->intern.enc.index >= 0) {
enc = rb_enc_from_index(opt->intern.enc.index);
rb_enc_set_default_internal(rb_enc_from_encoding(enc));
opt->intern.enc.index = -1;
}
rb_set_safe_level_force(safe);
if (opt->e_script) {
@ -1105,6 +1125,15 @@ process_options(VALUE arg)
tree = load_file(parser, opt->script, 1, opt);
}
if (opt->intern.enc.index >= 0) {
/* Set in the shebang line */
enc = rb_enc_from_index(opt->intern.enc.index);
rb_enc_set_default_internal(rb_enc_from_encoding(enc));
}
else
/* Freeze default_internal */
rb_enc_set_default_internal(Qnil);
if (!tree) return Qfalse;
process_sflag(opt);
@ -1175,6 +1204,7 @@ load_file(VALUE parser, const char *fname, int script, struct cmdline_options *o
char *p;
int no_src_enc = !opt->src.enc.name;
int no_ext_enc = !opt->ext.enc.name;
int no_int_enc = !opt->intern.enc.name;
enc = rb_usascii_encoding();
rb_funcall(f, rb_intern("set_encoding"), 1, rb_enc_from_encoding(enc));
@ -1261,6 +1291,9 @@ load_file(VALUE parser, const char *fname, int script, struct cmdline_options *o
if (no_ext_enc && opt->ext.enc.name) {
opt->ext.enc.index = opt_enc_index(opt->ext.enc.name);
}
if (no_int_enc && opt->intern.enc.name) {
opt->intern.enc.index = opt_enc_index(opt->intern.enc.name);
}
}
else if (!NIL_P(c)) {
rb_io_ungetbyte(f, c);
@ -1511,6 +1544,7 @@ ruby_process_options(int argc, char **argv)
args.argv = argv;
args.opt = cmdline_options_init(&opt);
opt.ext.enc.index = -1;
opt.intern.enc.index = -1;
tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(),
process_options, (VALUE)&args,
0, rb_progname);