diff --git a/ChangeLog b/ChangeLog index 341570161d..a3c0530f75 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +Wed Oct 17 05:07:18 2007 Nobuyoshi Nakada + + * encoding.c (rb_primary_encoding): added Encoding.primary_encoding. + + * parse.y (rb_parser_encoding): added. + + * ruby.c (proc_options): added -E and --encoding options. + + * ruby.c (process_options): set primary encoding from command line + option if set, or source encoding. + + * include/ruby/encoding.h (rb_enc_from_encoding, + rb_get_primary_encoding, rb_set_primary_encoding): prototypes. + + * include/ruby/node.h (rb_parser_encoding): prototype. + Wed Oct 17 03:37:07 2007 Nobuyoshi Nakada * re.c (rb_reg_desc): set encoding. diff --git a/encoding.c b/encoding.c index 93c3eca48c..ff62a3f083 100644 --- a/encoding.c +++ b/encoding.c @@ -43,8 +43,8 @@ enc_new(rb_encoding *encoding) return enc; } -static VALUE -enc_from_encoding(rb_encoding *enc) +VALUE +rb_enc_from_encoding(rb_encoding *enc) { return enc_initialized_p(enc) ? ENC_FROM_ENCODING(enc) : enc_new(enc); } @@ -384,7 +384,7 @@ rb_obj_encoding(VALUE obj) if (!enc) { rb_raise(rb_eTypeError, "unknown encoding"); } - return enc_from_encoding(enc); + return rb_enc_from_encoding(enc); } @@ -482,7 +482,7 @@ enc_list(VALUE klass) for (i = 0; i < enc_table_size; ++i) { rb_encoding *enc = enc_table[i].enc; if (enc) { - rb_ary_push(ary, enc_from_encoding(enc)); + rb_ary_push(ary, rb_enc_from_encoding(enc)); } } return ary; @@ -495,7 +495,7 @@ enc_find(VALUE klass, VALUE enc) if (idx < 0) { rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc)); } - return enc_from_encoding(rb_enc_from_index(idx)); + return rb_enc_from_encoding(rb_enc_from_index(idx)); } /* :nodoc: */ @@ -513,6 +513,34 @@ enc_load(VALUE klass, VALUE str) return enc_find(klass, str); } +static VALUE rb_primary_encoding; + +VALUE +rb_get_primary_encoding(void) +{ + return rb_primary_encoding; +} + +static VALUE +get_primary_encoding(VALUE klass) +{ + return rb_get_primary_encoding(); +} + +void +rb_set_primary_encoding(VALUE encoding) +{ + rb_to_encoding(encoding); + rb_primary_encoding = encoding; +} + +static VALUE +set_primary_encoding(VALUE klass, VALUE enc) +{ + rb_set_primary_encoding(enc); + return rb_primary_encoding; +} + void Init_Encoding(void) { @@ -526,4 +554,8 @@ Init_Encoding(void) rb_define_method(rb_cEncoding, "_dump", enc_dump, -1); rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1); + + rb_primary_encoding = rb_enc_from_encoding(rb_enc_from_index(0)); + rb_define_singleton_method(rb_cEncoding, "primary_encoding", get_primary_encoding, 0); + rb_define_singleton_method(rb_cEncoding, "primary_encoding=", set_primary_encoding, 1); } diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 6645a1e29d..08371cc141 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -99,5 +99,8 @@ int rb_enc_tolower(int c, rb_encoding *enc); ID rb_intern3(const char*, long, rb_encoding*); int rb_enc_symname_p(const char*, rb_encoding*); int rb_enc_str_coderange(VALUE); +VALUE rb_enc_from_encoding(rb_encoding *enc); +VALUE rb_get_primary_encoding(void); +void rb_set_primary_encoding(VALUE encoding); #endif /* RUBY_ENCODING_H */ diff --git a/include/ruby/node.h b/include/ruby/node.h index 51268061e0..eeea3cea99 100644 --- a/include/ruby/node.h +++ b/include/ruby/node.h @@ -481,6 +481,7 @@ typedef struct RNode { VALUE rb_parser_new(void); VALUE rb_parser_end_seen_p(VALUE); +VALUE rb_parser_encoding(VALUE); NODE *rb_parser_compile_cstr(volatile VALUE, const char*, const char*, int, int); NODE *rb_parser_compile_string(volatile VALUE, const char*, VALUE, int); diff --git a/parse.y b/parse.y index 2708415446..a6e8637f8e 100644 --- a/parse.y +++ b/parse.y @@ -8895,6 +8895,21 @@ rb_parser_end_seen_p(VALUE vparser) return ruby__end__seen ? Qtrue : Qfalse; } +/* + * call-seq: + * ripper#encoding -> encoding + * + * Return encoding of the source. + */ +VALUE +rb_parser_encoding(VALUE vparser) +{ + struct parser_params *parser; + + Data_Get_Struct(vparser, struct parser_params, parser); + return rb_enc_from_encoding(parser->enc); +} + #ifdef YYMALLOC #define HEAPCNT(n, size) ((n) * (size) / sizeof(YYSTYPE)) #define NEWHEAP() rb_node_newnode(NODE_ALLOCA, 0, (VALUE)parser->heap, 0) @@ -9415,6 +9430,7 @@ Init_ripper(void) rb_define_method(Ripper, "column", ripper_column, 0); rb_define_method(Ripper, "lineno", ripper_lineno, 0); rb_define_method(Ripper, "end_seen?", rb_parser_end_seen_p, 0); + rb_define_method(Ripper, "encoding", rb_parser_encoding, 0); #ifdef RIPPER_DEBUG rb_define_method(rb_mKernel, "assert_Qundef", ripper_assert_Qundef, 2); rb_define_method(rb_mKernel, "rawVALUE", ripper_value, 1); diff --git a/ruby.c b/ruby.c index a5f20adf6e..653944b61d 100644 --- a/ruby.c +++ b/ruby.c @@ -687,10 +687,15 @@ proc_options(int argc, char **argv, struct cmdline_options *opt) } break; + case 'E': + if (!*++s) { + rb_raise(rb_eRuntimeError, "missing argument for -E"); + } + goto encoding; + case 'K': if (*++s) { rb_encoding *enc = 0; - if ((opt->enc_index = rb_enc_find_index(s)) >= 0) break; switch (*s) { case 'E': case 'e': enc = ONIG_ENCODING_EUC_JP; @@ -705,10 +710,9 @@ proc_options(int argc, char **argv, struct cmdline_options *opt) enc = ONIG_ENCODING_ASCII; break; } - if (!enc) { - rb_raise(rb_eRuntimeError, "unknown encoding name - %s", s); + if (enc) { + opt->enc_index = rb_enc_find_index(rb_enc_name(enc)); } - opt->enc_index = rb_enc_find_index(rb_enc_name(enc)); s++; } goto reswitch; @@ -770,6 +774,20 @@ proc_options(int argc, char **argv, struct cmdline_options *opt) ruby_debug = Qtrue; ruby_verbose = Qtrue; } + else if (strcmp("encoding", s) == 0) { + if (!--argc || !(s = *++argv)) { + noencoding: + rb_raise(rb_eRuntimeError, "missing argument for --encoding"); + } + encoding: + if ((opt->enc_index = rb_enc_find_index(s)) < 0) { + rb_raise(rb_eRuntimeError, "unknown encoding name - %s", s); + } + } + else if (strncmp("encoding=", s, 9) == 0) { + if (*(s += 9)) goto noencoding; + goto encoding; + } else if (strcmp("version", s) == 0) opt->version = 1; else if (strcmp("verbose", s) == 0) { @@ -824,6 +842,7 @@ process_options(VALUE arg) char **argv = opt->argv; NODE *tree = 0; VALUE parser; + VALUE encoding; const char *s; int i = proc_options(argc, argv, opt); @@ -920,7 +939,8 @@ process_options(VALUE arg) ruby_init_loadpath(); parser = rb_parser_new(); if (opt->e_script) { - rb_enc_associate_index(opt->e_script, opt->enc_index); + if (opt->enc_index >= 0) + rb_enc_associate_index(opt->e_script, opt->enc_index); require_libraries(); tree = rb_parser_compile_string(parser, opt->script, opt->e_script, 1); } @@ -950,6 +970,14 @@ process_options(VALUE arg) } } + if (opt->enc_index >= 0) { + encoding = rb_enc_from_encoding(rb_enc_from_index(opt->enc_index)); + } + else { + encoding = rb_parser_encoding(parser); + } + rb_set_primary_encoding(encoding); + return (VALUE)tree; } @@ -1068,7 +1096,7 @@ load_file(VALUE parser, const char *fname, int script, struct cmdline_options *o } require_libraries(); /* Why here? unnatural */ } - rb_enc_associate_index(f, opt->enc_index); + if (opt->enc_index >= 0) rb_enc_associate_index(f, opt->enc_index); parser = rb_parser_new(); tree = (NODE *)rb_parser_compile_file(parser, fname, f, line_start); if (script && rb_parser_end_seen_p(parser)) { @@ -1312,6 +1340,7 @@ ruby_process_options(int argc, char **argv) rb_argv0 = rb_progname; opt.argc = argc; opt.argv = argv; + opt.enc_index = -1; tree = (NODE *)rb_vm_call_cfunc(rb_vm_top_self(), process_options, (VALUE)&opt, 0, rb_progname);