diff --git a/ChangeLog b/ChangeLog index 3c06fa8892..bda8ff2241 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +Mon Dec 29 17:25:17 2008 Yuki Sonoda (Yugui) + + * lib/erb.rb (ERB): m17n of ERB. adds rdoc. + fixes #712. c.f. [ruby-dev:37516]. + + * lib/erb.rb (ERB::Compiler#compile): recognizes magic comments. + returns a pair of compiled script and its script encoding. + + * lib/erb.rb (ERB#set_eoutvar): make generated scripts return a + string in correct encoding. + + * lib/erb.rb (ERB#def_method): use Kernel#eval for encoding-awareness + of the evaluated string. + + * bin/erb.rb (ERB::Main.run): adds -E and -U options. + String is no longer Enumerable. + + * man/erb.1: new manapage. + + * test/erb/test_erb_m17n.rb: new test case for m17n features. + Mon Dec 29 18:02:45 2008 Yukihiro Matsumoto * ext/io/wait/wait.c (io_nread): returns number of bytes available diff --git a/bin/erb b/bin/erb index 8541437dc1..d26564104f 100755 --- a/bin/erb +++ b/bin/erb @@ -72,6 +72,11 @@ class ERB end raise "invalid trim mode #{arg.dump}" unless arg =~ /^[0-2]$/ trim_mode = arg.to_i + when '-E', '--encoding' + arg = ARGV.req_arg + set_encoding(*arg.split(/:/, 2)) + when '-U' + set_encoding(Encoding::UTF_8, Encoding::UTF_8) when '-P' disable_percent = true when '--help' @@ -91,12 +96,15 @@ class ERB -d set $DEBUG to true -r [library] load a library -S [safe_level] set $SAFE (0..4) + -E ex[:in] set default external/internal encodings + -U set default encoding to UTF-8. -T [trim_mode] specify trim_mode (0..2, -) -P ignore lines which start with "%" EOU exit 1 end + $<.set_encoding(Encoding::ASCII_8BIT, nil) src = $<.read filename = $FILENAME exit 2 unless src @@ -105,10 +113,8 @@ EOU erb.filename = filename if output if number - l = 1 - for line in erb.src - puts "%3d %s"%[l, line] - l += 1 + erb.src.each_line.with_index do |line, l| + puts "%3d %s"%[l+1, line] end else puts erb.src @@ -118,6 +124,19 @@ EOU end end module_function :run + + def set_encoding(extern, intern = nil) + verbose, $VERBOSE = $VERBOSE, nil + Encoding.default_external = extern unless extern.nil? || extern.empty? + Encoding.default_internal = intern unless intern.nil? || intern.empty? + [$stdin, $stdout, $stderr].each do |io| + io.set_encoding(extern, intern) + end + ensure + $VERBOSE = verbose + end + module_function :set_encoding + class << self; private :set_encoding; end end end diff --git a/lib/erb.rb b/lib/erb.rb index c879941284..c5194817be 100644 --- a/lib/erb.rb +++ b/lib/erb.rb @@ -58,6 +58,24 @@ # # See the ERB.new and ERB#result methods for more detail. # +# == Character encodings +# +# ERB (or ruby code generated by ERB) returns a string in the same +# character encoding as the input string. When the input string has +# a magic comment, however, it returns a string in the encoding specified +# by the magic comment. +# +# # -*- coding: UTF-8 -*- +# require 'erb' +# +# template = ERB.new < +# \_\_ENCODING\_\_ is <%= \_\_ENCODING\_\_ %>. +# EOF +# puts template.result +# +# Prints: \_\_ENCODING\_\_ is Big5. +# # # == Examples # @@ -517,10 +535,14 @@ class ERB end def compile(s) - out = Buffer.new(self, s.encoding) + enc = s.encoding + raise ArgumentError, "#{enc} is not ASCII compatible" if enc.dummy? + s = s.dup.force_encoding("ASCII-8BIT") # don't use constant Enoding::ASCII_8BIT for miniruby + enc = detect_magic_comment(s) || enc + out = Buffer.new(self, enc) content = '' - scanner = make_scanner(s.dup.force_encoding("ASCII-8BIT")) + scanner = make_scanner(s) scanner.scan do |token| if scanner.stag.nil? case token @@ -560,6 +582,7 @@ class ERB when '<%=' out.push("#{@insert_cmd}((#{content}).to_s)") when '<%#' + # content = content.force_encoding(@enc) # out.push("# #{content.dump}") end scanner.stag = nil @@ -573,7 +596,7 @@ class ERB end out.push("#{@put_cmd} #{content.dump}") if content.size > 0 out.close - out.script + return out.script, enc end def prepare_trim_mode(mode) @@ -613,6 +636,18 @@ class ERB end attr_reader :percent, :trim_mode attr_accessor :put_cmd, :insert_cmd, :pre_cmd, :post_cmd + + private + def detect_magic_comment(s) + if /\A<%#(.*)%>/ =~ s or (@percent and /\A%#(.*)/ =~ s) + comment = $1 + comment = $1 if comment[/-\*-\s*(.*?)\s*-*-$/] + if %r"coding\s*[=:]\s*([[:alnum:]\-_]+)" =~ comment + enc = $1.sub(/-(?:mac|dos|unix)/i, '') + enc = Encoding.find(enc) + end + end + end end end @@ -688,7 +723,7 @@ class ERB @safe_level = safe_level compiler = ERB::Compiler.new(trim_mode) set_eoutvar(compiler, eoutvar) - @src = compiler.compile(str) + @src, @enc = *compiler.compile(str) @filename = nil end @@ -714,7 +749,7 @@ class ERB compiler.pre_cmd = cmd cmd = [] - cmd.push(eoutvar) + cmd.push("#{eoutvar}.force_encoding(__ENCODING__)") compiler.post_cmd = cmd end @@ -745,7 +780,11 @@ class ERB end def def_method(mod, methodname, fname='(ERB)') # :nodoc: - mod.module_eval("def #{methodname}\n" + self.src + "\nend\n", fname, -1) + src = self.src + magic_comment = "#coding:#{@enc}\n" + mod.module_eval do + eval(magic_comment + "def #{methodname}\n" + src + "\nend\n", binding, fname, -2) + end end def def_module(methodname='erb') # :nodoc: diff --git a/man/erb.1 b/man/erb.1 new file mode 100644 index 0000000000..87a0a6fca4 --- /dev/null +++ b/man/erb.1 @@ -0,0 +1,158 @@ +.\"Ruby is copyrighted by Yukihiro Matsumoto . +.Dd December 27, 2008 +.Dt ERB(1) "" "Ruby Programmers Reference Guide" +.Os UNIX +.Sh NAME +.Nm erb +.Nd Ruby Templating +.Sh SYNOPSIS +.Nm +.Op Fl -version +.Op Fl UPdnvx +.Op Fl E Ar ext Ns Op Ns : Ns int +.Op Fl S Ar level +.Op Fl T Ar mode +.Op Fl r Ar library +.Op Fl - +.Op file ... +.Pp +.Sh DESCRIPTION +.Nm +is a command line front-end for +.Li "ERB" +library, which is an implementation of eRuby. + +eRuby provdes an easy to use but powerful templating system for Ruby. +Using eRuby, actual Ruby code can be added to any plain text document for the +purposes of generating document information details and/or flow control. + +.Nm +is a part of +.Nm Ruby . +.Pp +.Sh OPTIONS +.Bl -tag -width "1234567890123" -compact +.Pp +.It Fl -version +Prints the version of +.Nm . +.Pp +.It Fl E Ar external Ns Op : Ns Ar internal +.It Fl -encoding Ar external Ns Op : Ns Ar internal +Specifies the default value(s) for external encodings and internal encoding. Values should be separated with colon (:). + +You can ommit the one for internal encodings, then the value +.Pf ( Li "Encoding.default_internal" ) will be nil. +.Pp +.It Fl P +Evaluates lines starting with +.Li "%" +as Ruby code and removes the tailing EOLs. +.Pp +.It Fl S Ar level +Specifies the safe level in which eRuby script will run. +.Pp +.It Fl T Ar mode +Specifies trim mode (default 0). +.Ar mode +can be one of +.Bl -hang -offset indent +.It Sy 0 +EOL remains after the embedded ruby script is evaluated. +.Pp +.It Sy 1 +EOL is removed if the line ends with +.Li "%>" . +.Pp +.It Sy 2 +EOL is removed if the line starts with +.Li "<%" +and ends with +.Li "%>" . +.Pp +.It Sy - +EOL is removed if the line ends with +.Li "-%>" . +And leading whitespaces are removed if the erb directive starts with +.Li "<%-" . +.Pp +.El +.Pp +.It Fl U +can be one of +Sets the defalut value for internal encodings +.Pf ( Li "Encoding.default_internal" ) to UTF-8. +.Pp +.It Fl d +.It Fl -debug +Turns on debug mode. +.Li "$DEBUG" +will be set to true. +.Pp +.It Fl h +.It Fl -help +Prints a summry of the options. +.Pp +.It Fl n +Used with +.Fl x . +Prepends the line number to each line in the output. +.Pp +.It Fl v +Enables verbose mode. +.Li "$VERBOSE" +will be set to true. +.Pp +.It Fl x +Converts the eRuby script into Ruby script and prints it without line numbers. +.Pp +.El +.Pp +.Sh EXAMPLES +Here is an eRuby script +.Bd -literal -offset indent + +<% require 'prime' -%> + + <%= 1+1 %> + <%= __FILE__ %> + <%= Prime.each(10).to_a.join(", ") %> + +.Ed + +Command +.Dl "% erb -T - example.erb" +prints +.Bd -literal -offset indent + + + 2 + example.erb + 2, 3, 5, 7 + +.Ed +.Pp +.Sh SEE ALSO +.Xr ruby 1 . + +And see +.Xr ri 1 +documentation for +.Li "ERB" +class. +.El +.Pp +.Sh REPORTING BUGS +.Bl -bullet +.Li Security vulnerabilities should be reported via an email to +.Aq security@ruby-lang.org Ns +.Li . +Reported problems will be published after fixed. +.Pp +.Li And you can report other bugs and feature requests via the +Ruby Issue Tracking System (http://redmine.ruby-lang.org). +Do not report security vulnerabilities +via the system because it publishes the vulnerabilities immedately. +.El +.Sh AUTHORS +Written by Masatoshi SEKI. diff --git a/test/erb/test_erb_m17n.rb b/test/erb/test_erb_m17n.rb new file mode 100644 index 0000000000..432cb4fd74 --- /dev/null +++ b/test/erb/test_erb_m17n.rb @@ -0,0 +1,123 @@ +# -*- coding: UTF-8 -*- +require 'test/unit' +require 'erb' + +class TestERB < Test::Unit::TestCase + def test_result_encoding + erb = ERB.new("hello") + assert_equal __ENCODING__, erb.result.encoding + + erb = ERB.new("こんにちは".encode("EUC-JP")) + assert_equal Encoding::EUC_JP, erb.result.encoding + + erb = ERB.new("\xC4\xE3\xBA\xC3".force_encoding("EUC-CN")) + assert_equal Encoding::EUC_CN, erb.result.encoding + + erb = ERB.new("γεια σας".encode("ISO-8859-7")) + assert_equal Encoding::ISO_8859_7, erb.result.encoding + + assert_raise(ArgumentError, /ASCII compatible/) { + ERB.new("こんにちは".force_encoding("ISO-2022-JP")) # dummy encoding + } + end + + def test_generate_magic_comment + erb = ERB.new("hello") + assert_match /#coding:UTF-8/, erb.src + + erb = ERB.new("hello".force_encoding("EUC-JP")) + assert_match /#coding:EUC-JP/, erb.src + + erb = ERB.new("hello".force_encoding("ISO-8859-9")) + assert_match /#coding:ISO-8859-9/, erb.src + end + + def test_literal_encoding + erb = ERB.new("literal encoding is <%= 'hello'.encoding %>") + assert_match /literal encoding is UTF-8/, erb.result + + erb = ERB.new("literal encoding is <%= 'こんにちは'.encoding %>".encode("EUC-JP")) + assert_match /literal encoding is EUC-JP/, erb.result + + erb = ERB.new("literal encoding is <%= '\xC4\xE3\xBA\xC3'.encoding %>".force_encoding("EUC-CN")) + assert_match /literal encoding is GB2312/, erb.result + end + + def test___ENCODING__ + erb = ERB.new("__ENCODING__ is <%= __ENCODING__ %>") + assert_match /__ENCODING__ is UTF-8/, erb.result + + erb = ERB.new("__ENCODING__ is <%= __ENCODING__ %>".force_encoding("EUC-JP")) + assert_match /__ENCODING__ is EUC-JP/, erb.result + + erb = ERB.new("__ENCODING__ is <%= __ENCODING__ %>".force_encoding("Big5")) + assert_match /__ENCODING__ is Big5/, erb.result + end + + def test_recognize_magic_comment + erb = ERB.new(<<-EOS.encode("EUC-KR")) +<%# -*- coding: EUC-KR -*- %> +안녕하세요 + EOS + assert_match /#coding:EUC-KR/, erb.src + assert_equal Encoding::EUC_KR, erb.result.encoding + + erb = ERB.new(<<-EOS.encode("EUC-KR").force_encoding("ASCII-8BIT")) +<%#-*- coding: EUC-KR -*-%> +안녕하세요 + EOS + assert_match /#coding:EUC-KR/, erb.src + assert_equal Encoding::EUC_KR, erb.result.encoding + + erb = ERB.new(<<-EOS.encode("EUC-KR").force_encoding("ASCII-8BIT")) +<%# vim: tabsize=8 encoding=EUC-KR shiftwidth=2 expandtab %> +안녕하세요 + EOS + assert_match /#coding:EUC-KR/, erb.src + assert_equal Encoding::EUC_KR, erb.result.encoding + + erb = ERB.new(<<-EOS.encode("EUC-KR").force_encoding("ASCII-8BIT")) +<%#coding:EUC-KR %> +안녕하세요 + EOS + assert_match /#coding:EUC-KR/, erb.src + assert_equal Encoding::EUC_KR, erb.result.encoding + + erb = ERB.new(<<-EOS.encode("EUC-KR").force_encoding("EUC-JP")) +<%#coding:EUC-KR %> +안녕하세요 + EOS + assert_match /#coding:EUC-KR/, erb.src + assert_equal Encoding::EUC_KR, erb.result.encoding + end + + module M; end + def test_method_with_encoding + obj = Object.new + obj.extend(M) + + erb = ERB.new(<<-EOS.encode("EUC-JP").force_encoding("ASCII-8BIT")) +<%#coding:EUC-JP %> +literal encoding is <%= 'こんにちは'.encoding %> +__ENCODING__ is <%= __ENCODING__ %> + EOS + erb.def_method(M, :m_from_magic_comment) + + result = obj.m_from_magic_comment + assert_equal Encoding::EUC_JP, result.encoding + assert_match /literal encoding is EUC-JP/, result + assert_match /__ENCODING__ is EUC-JP/, result + + erb = ERB.new(<<-EOS.encode("EUC-KR")) +literal encoding is <%= '안녕하세요'.encoding %> +__ENCODING__ is <%= __ENCODING__ %> +EOS + erb.def_method(M, :m_from_eval_encoding) + result = obj.m_from_eval_encoding + assert_equal Encoding::EUC_KR, result.encoding + assert_match /literal encoding is EUC-KR/, result + assert_match /__ENCODING__ is EUC-KR/, result + end +end + +# vim:fileencoding=UTF-8