mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined.
(ECONV_STATEFUL_ENCODER_MASK): defined. (ECONV_XML_ATTR_QUOTE_ENCODER): defined. (ECONV_XML_ATTR_ENCODER): removed. * enc/trans/escape.trans (rb_escape_xml_attr_content): defined. (rb_escape_xml_attr_quote): defined. (rb_escape_xml_attr): removed. * io.c (NEED_WRITECONV): writeconv is required if supplemental converter is used. (make_writeconv): apply stateful encoder in writeconv. * transcode.c: follow the constant change. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19209 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
120772041e
commit
020e681eec
7 changed files with 150 additions and 70 deletions
17
ChangeLog
17
ChangeLog
|
@ -1,3 +1,20 @@
|
|||
Sun Sep 7 12:09:29 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* include/ruby/encoding.h (ECONV_XML_ATTR_CONTENT_ENCODER): defined.
|
||||
(ECONV_STATEFUL_ENCODER_MASK): defined.
|
||||
(ECONV_XML_ATTR_QUOTE_ENCODER): defined.
|
||||
(ECONV_XML_ATTR_ENCODER): removed.
|
||||
|
||||
* enc/trans/escape.trans (rb_escape_xml_attr_content): defined.
|
||||
(rb_escape_xml_attr_quote): defined.
|
||||
(rb_escape_xml_attr): removed.
|
||||
|
||||
* io.c (NEED_WRITECONV): writeconv is required if supplemental
|
||||
converter is used.
|
||||
(make_writeconv): apply stateful encoder in writeconv.
|
||||
|
||||
* transcode.c: follow the constant change.
|
||||
|
||||
Sun Sep 7 07:24:09 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* misc/*.el: merged the following patches from Nathan Weizenbaum
|
||||
|
|
|
@ -52,9 +52,18 @@ fun_so_escape_xml_chref(void *statep, const unsigned char *s, size_t l, unsigned
|
|||
map_xml_text["3E"] = :func_so
|
||||
transcode_generate_node(ActionMap.parse(map_xml_text), "escape_xml_text")
|
||||
|
||||
map_xml_attr = {}
|
||||
map_xml_attr["{00-FF}"] = :func_so
|
||||
transcode_generate_node(ActionMap.parse(map_xml_attr), "escape_xml_attr")
|
||||
map_xml_attr_content = {}
|
||||
map_xml_attr_content["{00-21,23-25,27-3B,3D,3F-FF}"] = :nomap
|
||||
map_xml_attr_content["22"] = :func_so
|
||||
map_xml_attr_content["26"] = :func_so
|
||||
map_xml_attr_content["3C"] = :func_so
|
||||
map_xml_attr_content["3E"] = :func_so
|
||||
transcode_generate_node(ActionMap.parse(map_xml_attr_content), "escape_xml_attr_content")
|
||||
|
||||
map_xml_attr_quote = {}
|
||||
map_xml_attr_quote["{00-FF}"] = :func_so
|
||||
transcode_generate_node(ActionMap.parse(map_xml_attr_quote), "escape_xml_attr_quote")
|
||||
|
||||
%>
|
||||
|
||||
<%= transcode_generated_code %>
|
||||
|
@ -83,11 +92,23 @@ rb_escape_xml_text = {
|
|||
NULL, NULL, NULL, &fun_so_escape_xml_chref
|
||||
};
|
||||
|
||||
static const rb_transcoder
|
||||
rb_escape_xml_attr_content = {
|
||||
"", "xml-attr-content-escaped", escape_xml_attr_content,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
1, /* max_input */
|
||||
6, /* max_output */
|
||||
stateless_converter, /* stateful_type */
|
||||
0, NULL, NULL,
|
||||
NULL, NULL, NULL, &fun_so_escape_xml_chref
|
||||
};
|
||||
|
||||
#define END 0
|
||||
#define NORMAL 1
|
||||
|
||||
static int
|
||||
escape_xml_attr_init(void *statep)
|
||||
escape_xml_attr_quote_init(void *statep)
|
||||
{
|
||||
unsigned char *sp = statep;
|
||||
*sp = END;
|
||||
|
@ -95,7 +116,7 @@ escape_xml_attr_init(void *statep)
|
|||
}
|
||||
|
||||
static int
|
||||
fun_so_escape_xml_attr(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||
fun_so_escape_xml_attr_quote(void *statep, const unsigned char *s, size_t l, unsigned char *o)
|
||||
{
|
||||
unsigned char *sp = statep;
|
||||
int n = 0;
|
||||
|
@ -103,23 +124,12 @@ fun_so_escape_xml_attr(void *statep, const unsigned char *s, size_t l, unsigned
|
|||
*sp = NORMAL;
|
||||
o[n++] = '"';
|
||||
}
|
||||
switch (s[0]) {
|
||||
case '&':
|
||||
case '<':
|
||||
case '>':
|
||||
case '"':
|
||||
n += fun_so_escape_xml_chref(statep, s, l, o+n);
|
||||
break;
|
||||
|
||||
default:
|
||||
o[n++] = s[0];
|
||||
break;
|
||||
}
|
||||
o[n++] = s[0];
|
||||
return n;
|
||||
}
|
||||
|
||||
static int
|
||||
escape_xml_attr_finish(void *statep, unsigned char *o)
|
||||
escape_xml_attr_quote_finish(void *statep, unsigned char *o)
|
||||
{
|
||||
unsigned char *sp = statep;
|
||||
int n = 0;
|
||||
|
@ -135,16 +145,16 @@ escape_xml_attr_finish(void *statep, unsigned char *o)
|
|||
}
|
||||
|
||||
static const rb_transcoder
|
||||
rb_escape_xml_attr = {
|
||||
"", "xml-attr-escaped", escape_xml_attr,
|
||||
rb_escape_xml_attr_quote = {
|
||||
"", "xml-attr-quoted", escape_xml_attr_quote,
|
||||
TRANSCODE_TABLE_INFO,
|
||||
1, /* input_unit_length */
|
||||
1, /* max_input */
|
||||
7, /* max_output */
|
||||
stateful_encoder, /* stateful_type */
|
||||
1, escape_xml_attr_init, escape_xml_attr_init,
|
||||
NULL, NULL, NULL, fun_so_escape_xml_attr,
|
||||
escape_xml_attr_finish
|
||||
1, escape_xml_attr_quote_init, escape_xml_attr_quote_init,
|
||||
NULL, NULL, NULL, fun_so_escape_xml_attr_quote,
|
||||
escape_xml_attr_quote_finish
|
||||
};
|
||||
|
||||
void
|
||||
|
@ -152,6 +162,7 @@ Init_escape(void)
|
|||
{
|
||||
rb_register_transcoder(&rb_escape_amp_as_chref);
|
||||
rb_register_transcoder(&rb_escape_xml_text);
|
||||
rb_register_transcoder(&rb_escape_xml_attr);
|
||||
rb_register_transcoder(&rb_escape_xml_attr_content);
|
||||
rb_register_transcoder(&rb_escape_xml_attr_quote);
|
||||
}
|
||||
|
||||
|
|
|
@ -269,7 +269,10 @@ void rb_econv_binmode(rb_econv_t *ec);
|
|||
#define ECONV_CRLF_NEWLINE_ENCODER 0x00001000
|
||||
#define ECONV_CR_NEWLINE_ENCODER 0x00002000
|
||||
#define ECONV_XML_TEXT_ENCODER 0x00004000
|
||||
#define ECONV_XML_ATTR_ENCODER 0x00008000
|
||||
#define ECONV_XML_ATTR_CONTENT_ENCODER 0x00008000
|
||||
|
||||
#define ECONV_STATEFUL_ENCODER_MASK 0x00f00000
|
||||
#define ECONV_XML_ATTR_QUOTE_ENCODER 0x00100000
|
||||
|
||||
/* end of flags for rb_econv_open */
|
||||
|
||||
|
|
62
io.c
62
io.c
|
@ -682,7 +682,7 @@ rb_io_wait_writable(int f)
|
|||
# define NEED_NEWLINE_ENCODER(fptr) 0
|
||||
#endif
|
||||
#define NEED_READCONV(fptr) (fptr->encs.enc2 != NULL || NEED_NEWLINE_DECODER(fptr))
|
||||
#define NEED_WRITECONV(fptr) (fptr->encs.enc != NULL || NEED_NEWLINE_ENCODER(fptr))
|
||||
#define NEED_WRITECONV(fptr) (fptr->encs.enc != NULL || NEED_NEWLINE_ENCODER(fptr) || (fptr->encs.ecflags & (ECONV_DECODER_MASK|ECONV_ENCODER_MASK|ECONV_STATEFUL_ENCODER_MASK)))
|
||||
|
||||
static void
|
||||
make_writeconv(rb_io_t *fptr)
|
||||
|
@ -695,42 +695,50 @@ make_writeconv(rb_io_t *fptr)
|
|||
|
||||
fptr->writeconv_initialized = 1;
|
||||
|
||||
/* ECONV_INVALID_XXX and ECONV_UNDEF_XXX should be set both.
|
||||
* But ECONV_CRLF_NEWLINE_ENCODER should be set only for the first. */
|
||||
fptr->writeconv_pre_ecflags = fptr->encs.ecflags;
|
||||
fptr->writeconv_pre_ecopts = fptr->encs.ecopts;
|
||||
ecflags = fptr->encs.ecflags;
|
||||
ecopts = fptr->encs.ecopts;
|
||||
|
||||
#ifdef TEXTMODE_NEWLINE_ENCODER
|
||||
if (NEED_NEWLINE_ENCODER(fptr))
|
||||
ecflags |= TEXTMODE_NEWLINE_ENCODER;
|
||||
#endif
|
||||
|
||||
if (!fptr->encs.enc) {
|
||||
if (NEED_NEWLINE_ENCODER(fptr))
|
||||
ecflags |= TEXTMODE_NEWLINE_ENCODER;
|
||||
/* no encoding conversion */
|
||||
fptr->writeconv_pre_ecflags = 0;
|
||||
fptr->writeconv_pre_ecopts = Qnil;
|
||||
fptr->writeconv = rb_econv_open_opts("", "", ecflags, ecopts);
|
||||
if (!fptr->writeconv)
|
||||
rb_exc_raise(rb_econv_open_exc("", "", ecflags));
|
||||
fptr->writeconv_stateless = Qnil;
|
||||
return;
|
||||
}
|
||||
|
||||
if (NEED_NEWLINE_ENCODER(fptr))
|
||||
fptr->writeconv_pre_ecflags |= TEXTMODE_NEWLINE_ENCODER;
|
||||
#endif
|
||||
ecflags &= ECONV_ERROR_HANDLER_MASK;
|
||||
|
||||
enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc;
|
||||
senc = rb_econv_stateless_encoding(enc->name);
|
||||
if (senc) {
|
||||
denc = enc->name;
|
||||
fptr->writeconv_stateless = rb_str_new2(senc);
|
||||
fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts);
|
||||
if (!fptr->writeconv)
|
||||
rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags));
|
||||
}
|
||||
else {
|
||||
denc = NULL;
|
||||
fptr->writeconv_stateless = Qnil;
|
||||
fptr->writeconv = NULL;
|
||||
enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc;
|
||||
senc = rb_econv_stateless_encoding(enc->name);
|
||||
if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) {
|
||||
/* single conversion */
|
||||
fptr->writeconv_pre_ecflags = ecflags;
|
||||
fptr->writeconv_pre_ecopts = ecopts;
|
||||
fptr->writeconv = NULL;
|
||||
fptr->writeconv_stateless = Qnil;
|
||||
}
|
||||
else {
|
||||
/* double conversion */
|
||||
fptr->writeconv_pre_ecflags = ecflags & ~ECONV_STATEFUL_ENCODER_MASK;
|
||||
fptr->writeconv_pre_ecopts = ecopts;
|
||||
if (senc) {
|
||||
denc = enc->name;
|
||||
fptr->writeconv_stateless = rb_str_new2(senc);
|
||||
}
|
||||
else {
|
||||
senc = denc = "";
|
||||
fptr->writeconv_stateless = rb_str_new2(enc->name);
|
||||
}
|
||||
ecflags = fptr->encs.ecflags & (ECONV_ERROR_HANDLER_MASK|ECONV_STATEFUL_ENCODER_MASK);
|
||||
ecopts = fptr->encs.ecopts;
|
||||
fptr->writeconv = rb_econv_open_opts(senc, denc, ecflags, ecopts);
|
||||
if (!fptr->writeconv)
|
||||
rb_exc_raise(rb_econv_open_exc(senc, denc, ecflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -738,20 +738,37 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
assert_equal('', ec.finish)
|
||||
end
|
||||
|
||||
def test_xml_escape_attr
|
||||
ec = Encoding::Converter.new("", "xml-attr-escaped")
|
||||
def test_xml_escape_attr_content
|
||||
ec = Encoding::Converter.new("", "xml-attr-content-escaped")
|
||||
assert_equal('', ec.finish)
|
||||
|
||||
ec = Encoding::Converter.new("", "xml-attr-content-escaped")
|
||||
assert_equal('', ec.convert(""))
|
||||
assert_equal('', ec.finish)
|
||||
|
||||
ec = Encoding::Converter.new("", "xml-attr-content-escaped")
|
||||
assert_equal('"', ec.convert('"'))
|
||||
assert_equal('', ec.finish)
|
||||
|
||||
ec = Encoding::Converter.new("", "xml-attr-content-escaped")
|
||||
assert_equal('&<>"', ec.convert("&<>\""))
|
||||
assert_equal('', ec.finish)
|
||||
end
|
||||
|
||||
def test_xml_escape_attr_quote
|
||||
ec = Encoding::Converter.new("", "xml-attr-quoted")
|
||||
assert_equal('""', ec.finish)
|
||||
|
||||
ec = Encoding::Converter.new("", "xml-attr-escaped")
|
||||
ec = Encoding::Converter.new("", "xml-attr-quoted")
|
||||
assert_equal('', ec.convert(""))
|
||||
assert_equal('""', ec.finish)
|
||||
|
||||
ec = Encoding::Converter.new("", "xml-attr-escaped")
|
||||
assert_equal('""', ec.convert('"'))
|
||||
ec = Encoding::Converter.new("", "xml-attr-quoted")
|
||||
assert_equal('""', ec.convert('"'))
|
||||
assert_equal('"', ec.finish)
|
||||
|
||||
ec = Encoding::Converter.new("", "xml-attr-escaped")
|
||||
assert_equal('"&<>"', ec.convert("&<>\""))
|
||||
ec = Encoding::Converter.new("", "xml-attr-quoted")
|
||||
assert_equal('"&<>"', ec.convert("&<>\""))
|
||||
assert_equal('"', ec.finish)
|
||||
end
|
||||
|
||||
|
@ -760,7 +777,10 @@ class TestEncodingConverter < Test::Unit::TestCase
|
|||
assert_equal('<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\""))
|
||||
assert_equal('', ec.finish)
|
||||
|
||||
ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::XML_ATTR_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF)
|
||||
ec = Encoding::Converter.new("utf-8", "euc-jp",
|
||||
Encoding::Converter::XML_ATTR_CONTENT_ENCODER|
|
||||
Encoding::Converter::XML_ATTR_QUOTE_ENCODER|
|
||||
Encoding::Converter::UNDEF_HEX_CHARREF)
|
||||
assert_equal('"<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\""))
|
||||
assert_equal('"', ec.finish)
|
||||
|
||||
|
|
|
@ -1461,6 +1461,18 @@ EOT
|
|||
|
||||
def test_w_xml_attr
|
||||
with_tmpdir {
|
||||
open("raw.txt", "wb", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
|
||||
content = File.read("raw.txt", :mode=>"rb:ascii-8bit")
|
||||
assert_equal("\"&<>"'\u4E02\u3042\n\"".force_encoding("ascii-8bit"), content)
|
||||
|
||||
open("ascii.txt", "wb:us-ascii", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
|
||||
content = File.read("ascii.txt", :mode=>"rb:ascii-8bit")
|
||||
assert_equal("\"&<>"'丂あ\n\"".force_encoding("ascii-8bit"), content)
|
||||
|
||||
open("iso-2022-jp.txt", "wb:iso-2022-jp", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
|
||||
content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit")
|
||||
assert_equal("\"&<>"'丂\e$B$\"\e(B\n\"".force_encoding("ascii-8bit"), content)
|
||||
|
||||
open("eucjp.txt", "w:euc-jp:utf-8", xml: :attr) {|f|
|
||||
f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
|
||||
}
|
||||
|
@ -1480,6 +1492,5 @@ EOT
|
|||
assert_equal("\"丂\"".force_encoding("ascii-8bit"), content)
|
||||
}
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
|
28
transcode.c
28
transcode.c
|
@ -896,7 +896,7 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
|
|||
return NULL;
|
||||
|
||||
if ((ecflags & ECONV_XML_TEXT_ENCODER) &&
|
||||
(ecflags & ECONV_XML_ATTR_ENCODER))
|
||||
(ecflags & ECONV_XML_ATTR_CONTENT_ENCODER))
|
||||
return NULL;
|
||||
|
||||
num_encoders = 0;
|
||||
|
@ -909,8 +909,11 @@ rb_econv_open(const char *sname, const char *dname, int ecflags)
|
|||
if (ecflags & ECONV_XML_TEXT_ENCODER)
|
||||
if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-text-escaped")))
|
||||
return NULL;
|
||||
if (ecflags & ECONV_XML_ATTR_ENCODER)
|
||||
if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-escaped")))
|
||||
if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER)
|
||||
if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-content-escaped")))
|
||||
return NULL;
|
||||
if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER)
|
||||
if (!(encoders[num_encoders++] = get_transcoder_entry("", "xml-attr-quoted")))
|
||||
return NULL;
|
||||
|
||||
num_decoders = 0;
|
||||
|
@ -1792,7 +1795,8 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
|
|||
ECONV_CRLF_NEWLINE_ENCODER|
|
||||
ECONV_CR_NEWLINE_ENCODER|
|
||||
ECONV_XML_TEXT_ENCODER|
|
||||
ECONV_XML_ATTR_ENCODER)) {
|
||||
ECONV_XML_ATTR_CONTENT_ENCODER|
|
||||
ECONV_XML_ATTR_QUOTE_ENCODER)) {
|
||||
const char *pre = "";
|
||||
if (has_description)
|
||||
rb_str_cat2(mesg, " with ");
|
||||
|
@ -1812,9 +1816,13 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
|
|||
rb_str_cat2(mesg, pre); pre = ",";
|
||||
rb_str_cat2(mesg, "XML-text");
|
||||
}
|
||||
if (ecflags & ECONV_XML_ATTR_ENCODER) {
|
||||
if (ecflags & ECONV_XML_ATTR_CONTENT_ENCODER) {
|
||||
rb_str_cat2(mesg, pre); pre = ",";
|
||||
rb_str_cat2(mesg, "XML-attr");
|
||||
rb_str_cat2(mesg, "XML-attr-content");
|
||||
}
|
||||
if (ecflags & ECONV_XML_ATTR_QUOTE_ENCODER) {
|
||||
rb_str_cat2(mesg, pre); pre = ",";
|
||||
rb_str_cat2(mesg, "XML-attr-quote");
|
||||
}
|
||||
has_description = 1;
|
||||
}
|
||||
|
@ -2173,7 +2181,7 @@ econv_opts(VALUE opt)
|
|||
ecflags |= ECONV_XML_TEXT_ENCODER|ECONV_UNDEF_HEX_CHARREF;
|
||||
}
|
||||
else if (v==sym_attr) {
|
||||
ecflags |= ECONV_XML_ATTR_ENCODER|ECONV_UNDEF_HEX_CHARREF;
|
||||
ecflags |= ECONV_XML_ATTR_CONTENT_ENCODER|ECONV_XML_ATTR_QUOTE_ENCODER|ECONV_UNDEF_HEX_CHARREF;
|
||||
}
|
||||
else {
|
||||
rb_raise(rb_eArgError, "unexpected value for xml option: %s", rb_id2name(SYM2ID(v)));
|
||||
|
@ -2329,7 +2337,8 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
|
|||
ECONV_CRLF_NEWLINE_ENCODER|
|
||||
ECONV_CR_NEWLINE_ENCODER|
|
||||
ECONV_XML_TEXT_ENCODER|
|
||||
ECONV_XML_ATTR_ENCODER)) == 0) {
|
||||
ECONV_XML_ATTR_CONTENT_ENCODER|
|
||||
ECONV_XML_ATTR_QUOTE_ENCODER)) == 0) {
|
||||
if (senc && senc == denc) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -3573,7 +3582,8 @@ Init_transcode(void)
|
|||
rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER));
|
||||
rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER));
|
||||
rb_define_const(rb_cEncodingConverter, "XML_TEXT_ENCODER", INT2FIX(ECONV_XML_TEXT_ENCODER));
|
||||
rb_define_const(rb_cEncodingConverter, "XML_ATTR_ENCODER", INT2FIX(ECONV_XML_ATTR_ENCODER));
|
||||
rb_define_const(rb_cEncodingConverter, "XML_ATTR_CONTENT_ENCODER", INT2FIX(ECONV_XML_ATTR_CONTENT_ENCODER));
|
||||
rb_define_const(rb_cEncodingConverter, "XML_ATTR_QUOTE_ENCODER", INT2FIX(ECONV_XML_ATTR_QUOTE_ENCODER));
|
||||
|
||||
rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0);
|
||||
rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0);
|
||||
|
|
Loading…
Reference in a new issue