diff --git a/ChangeLog b/ChangeLog index 27c1f9878b..32ad9b45e0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Sat Sep 6 06:28:46 2008 Tanaka Akira + + * enc/trans/escape.trans: new file. + Sat Sep 6 06:23:27 2008 Tanaka Akira * tool/transcode-tblgen.rb (StrSet.parse): accept upper case diff --git a/enc/trans/escape.trans b/enc/trans/escape.trans new file mode 100644 index 0000000000..d1612187aa --- /dev/null +++ b/enc/trans/escape.trans @@ -0,0 +1,109 @@ +#include "transcode_data.h" + +static int +fun_so_escape_html_chref(void *statep, const unsigned char *s, size_t l, unsigned char *o) +{ + switch (*s) { + case '&': + o[0] = '&'; + o[1] = 'a'; + o[2] = 'm'; + o[3] = 'p'; + o[4] = ';'; + return 5; + + case '<': + o[0] = '&'; + o[1] = 'l'; + o[2] = 't'; + o[3] = ';'; + return 4; + + case '>': + o[0] = '&'; + o[1] = 'g'; + o[2] = 't'; + o[3] = ';'; + return 4; + + case '"': + o[0] = '&'; + o[1] = 'q'; + o[2] = 'u'; + o[3] = 'o'; + o[4] = 't'; + o[5] = ';'; + return 6; + + default: + rb_bug("unexpected char"); + } +} +<% + map_amp = {} + map_amp["{00-25,27-FF}"] = :nomap + map_amp["26"] = :func_so + transcode_generate_node(ActionMap.parse(map_amp), "escape_amp_as_chref") + + map_html_text = {} + map_html_text["{00-25,27-3B,3D,3F-FF}"] = :nomap + map_html_text["26"] = :func_so + map_html_text["3C"] = :func_so + map_html_text["3E"] = :func_so + transcode_generate_node(ActionMap.parse(map_html_text), "escape_html_text") + + map_html_attr = {} + map_html_attr["{00-21,23-25,27-3B,3D,3F-FF}"] = :nomap + map_html_attr["22"] = :func_so + map_html_attr["26"] = :func_so + map_html_attr["3C"] = :func_so + map_html_attr["3E"] = :func_so + transcode_generate_node(ActionMap.parse(map_html_attr), "escape_html_attr") +%> + +<%= transcode_generated_code %> + +static const rb_transcoder +rb_escape_amp_as_chref = { + "", "amp-escaped", escape_amp_as_chref, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 5, /* max_output */ + stateless_converter, /* stateful_type */ + 0, NULL, NULL, + NULL, NULL, NULL, &fun_so_escape_html_chref +}; + +static const rb_transcoder +rb_escape_html_text = { + "", "html-text-escaped", escape_html_text, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 5, /* max_output */ + stateless_converter, /* stateful_type */ + 0, NULL, NULL, + NULL, NULL, NULL, &fun_so_escape_html_chref +}; + +static const rb_transcoder +rb_escape_html_attr = { + "", "html-attr-escaped", escape_html_attr, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 6, /* max_output */ + stateless_converter, /* stateful_type */ + 0, NULL, NULL, + NULL, NULL, NULL, &fun_so_escape_html_chref +}; + +void +Init_escape(void) +{ + rb_register_transcoder(&rb_escape_amp_as_chref); + rb_register_transcoder(&rb_escape_html_text); + rb_register_transcoder(&rb_escape_html_attr); +} + diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 0a774c4a7a..be4917ddf7 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -692,5 +692,19 @@ class TestEncodingConverter < Test::Unit::TestCase ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1", Encoding::Converter::UNDEF_HEX_CHARREF) assert_equal("交換法則: n\xD7m=m\xD7n".force_encoding("ISO-8859-1"), ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn")) + + ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal("&", ec.convert("&")) + end + + def test_html_escape + ec = Encoding::Converter.new("", "amp-escaped") + assert_equal('&<>"', ec.convert("&<>\"")) + + ec = Encoding::Converter.new("", "html-text-escaped") + assert_equal('&<>"', ec.convert("&<>\"")) + + ec = Encoding::Converter.new("", "html-attr-escaped") + assert_equal('&<>"', ec.convert("&<>\"")) end end