mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* include/ruby/encoding.h (rb_econv_output): add str_encoding
argument. * transcode.c (get_replacement_character): add repl_enc_ptr argument. (rb_econv_output_with_destination_encoding): renamed from rb_econv_output and make it static. (rb_econv_output): convert str and call rb_econv_output_with_destination_encoding. (output_replacement_character): follow above interface change. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18646 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
a98587c0fd
commit
fe091e89b7
3 changed files with 105 additions and 8 deletions
12
ChangeLog
12
ChangeLog
|
@ -1,3 +1,15 @@
|
||||||
|
Fri Aug 15 19:57:01 2008 Tanaka Akira <akr@fsij.org>
|
||||||
|
|
||||||
|
* include/ruby/encoding.h (rb_econv_output): add str_encoding
|
||||||
|
argument.
|
||||||
|
|
||||||
|
* transcode.c (get_replacement_character): add repl_enc_ptr argument.
|
||||||
|
(rb_econv_output_with_destination_encoding): renamed from
|
||||||
|
rb_econv_output and make it static.
|
||||||
|
(rb_econv_output): convert str and call
|
||||||
|
rb_econv_output_with_destination_encoding.
|
||||||
|
(output_replacement_character): follow above interface change.
|
||||||
|
|
||||||
Fri Aug 15 19:33:57 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Fri Aug 15 19:33:57 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* string.c (rb_str_drop_bytes): use memmove.
|
* string.c (rb_str_drop_bytes): use memmove.
|
||||||
|
|
|
@ -247,10 +247,13 @@ rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
|
||||||
const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
|
const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
|
||||||
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
|
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
|
||||||
int flags);
|
int flags);
|
||||||
|
|
||||||
|
/* result: 0:success -1:failure -2:conversion-failure-to-destination-encoding */
|
||||||
int rb_econv_output(rb_econv_t *ec,
|
int rb_econv_output(rb_econv_t *ec,
|
||||||
const unsigned char *str, size_t len,
|
const unsigned char *str, size_t len, const char *str_encoding,
|
||||||
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
|
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
|
||||||
size_t *required_size);
|
size_t *required_size);
|
||||||
|
|
||||||
void rb_econv_close(rb_econv_t *ec);
|
void rb_econv_close(rb_econv_t *ec);
|
||||||
|
|
||||||
/* flags for rb_econv_open */
|
/* flags for rb_econv_open */
|
||||||
|
|
96
transcode.c
96
transcode.c
|
@ -266,7 +266,7 @@ load_transcoder_entry(transcoder_entry_t *entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char*
|
static const char*
|
||||||
get_replacement_character(rb_encoding *enc, int *len_ret)
|
get_replacement_character(rb_encoding *enc, int *len_ret, const char **repl_enc_ptr)
|
||||||
{
|
{
|
||||||
static rb_encoding *utf16be_encoding, *utf16le_encoding;
|
static rb_encoding *utf16be_encoding, *utf16le_encoding;
|
||||||
static rb_encoding *utf32be_encoding, *utf32le_encoding;
|
static rb_encoding *utf32be_encoding, *utf32le_encoding;
|
||||||
|
@ -278,26 +278,32 @@ get_replacement_character(rb_encoding *enc, int *len_ret)
|
||||||
}
|
}
|
||||||
if (rb_utf8_encoding() == enc) {
|
if (rb_utf8_encoding() == enc) {
|
||||||
*len_ret = 3;
|
*len_ret = 3;
|
||||||
|
*repl_enc_ptr = "UTF-8";
|
||||||
return "\xEF\xBF\xBD";
|
return "\xEF\xBF\xBD";
|
||||||
}
|
}
|
||||||
else if (utf16be_encoding == enc) {
|
else if (utf16be_encoding == enc) {
|
||||||
*len_ret = 2;
|
*len_ret = 2;
|
||||||
|
*repl_enc_ptr = "UTF-16BE";
|
||||||
return "\xFF\xFD";
|
return "\xFF\xFD";
|
||||||
}
|
}
|
||||||
else if (utf16le_encoding == enc) {
|
else if (utf16le_encoding == enc) {
|
||||||
*len_ret = 2;
|
*len_ret = 2;
|
||||||
|
*repl_enc_ptr = "UTF-16LE";
|
||||||
return "\xFD\xFF";
|
return "\xFD\xFF";
|
||||||
}
|
}
|
||||||
else if (utf32be_encoding == enc) {
|
else if (utf32be_encoding == enc) {
|
||||||
*len_ret = 4;
|
*len_ret = 4;
|
||||||
|
*repl_enc_ptr = "UTF-32BE";
|
||||||
return "\x00\x00\xFF\xFD";
|
return "\x00\x00\xFF\xFD";
|
||||||
}
|
}
|
||||||
else if (utf32le_encoding == enc) {
|
else if (utf32le_encoding == enc) {
|
||||||
*len_ret = 4;
|
*len_ret = 4;
|
||||||
|
*repl_enc_ptr = "UTF-32LE";
|
||||||
return "\xFD\xFF\x00\x00";
|
return "\xFD\xFF\x00\x00";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
*len_ret = 1;
|
*len_ret = 1;
|
||||||
|
*repl_enc_ptr = "US-ASCII";
|
||||||
return "?";
|
return "?";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -962,8 +968,9 @@ rb_econv_convert(rb_econv_t *ec,
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
/* result: 0:success -1:failure */
|
||||||
rb_econv_output(rb_econv_t *ec,
|
static int
|
||||||
|
rb_econv_output_with_destination_encoding(rb_econv_t *ec,
|
||||||
const unsigned char *str, size_t len, /* string in destination encoding */
|
const unsigned char *str, size_t len, /* string in destination encoding */
|
||||||
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
|
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
|
||||||
size_t *required_size)
|
size_t *required_size)
|
||||||
|
@ -982,7 +989,7 @@ rb_econv_output(rb_econv_t *ec,
|
||||||
* Currently the replacement character for stateful encoding such as
|
* Currently the replacement character for stateful encoding such as
|
||||||
* ISO-2022-JP is "?" and it has no state changing sequence.
|
* ISO-2022-JP is "?" and it has no state changing sequence.
|
||||||
* So the extra state changing sequence don't occur when
|
* So the extra state changing sequence don't occur when
|
||||||
* rb_econv_output is used for replacement characters.
|
* rb_econv_output_with_destination_encoding is used for replacement characters.
|
||||||
*
|
*
|
||||||
* Thease assumption may be removed in future.
|
* Thease assumption may be removed in future.
|
||||||
* It needs to scan str to check state changing sequences in it.
|
* It needs to scan str to check state changing sequences in it.
|
||||||
|
@ -1014,6 +1021,77 @@ rb_econv_output(rb_econv_t *ec,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* result: 0:success -1:failure -2:conversion-failure-to-destination-encoding */
|
||||||
|
int
|
||||||
|
rb_econv_output(rb_econv_t *ec,
|
||||||
|
const unsigned char *str, size_t str_len, const char *str_encoding,
|
||||||
|
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
|
||||||
|
size_t *required_size)
|
||||||
|
{
|
||||||
|
rb_econv_t *from_ascii = NULL;;
|
||||||
|
unsigned char buf[1024], *buf2;
|
||||||
|
size_t dst_len;
|
||||||
|
const unsigned char *src_ptr;
|
||||||
|
unsigned char *dst_ptr;
|
||||||
|
rb_econv_result_t res;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (encoding_equal(str_encoding, ec->last_tc->transcoder->to_encoding)) {
|
||||||
|
return rb_econv_output_with_destination_encoding(ec, str, str_len, destination_buffer_ptr, destination_buffer_end, required_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (required_size)
|
||||||
|
*required_size = 0;
|
||||||
|
|
||||||
|
from_ascii = rb_econv_open(str_encoding, ec->last_tc->transcoder->to_encoding, 0);
|
||||||
|
if (!from_ascii)
|
||||||
|
return -2;
|
||||||
|
|
||||||
|
src_ptr = str;
|
||||||
|
dst_len = 0;
|
||||||
|
do {
|
||||||
|
dst_ptr = buf;
|
||||||
|
res = rb_econv_convert(from_ascii, &src_ptr, str+str_len, &dst_ptr, buf+sizeof(buf), 0);
|
||||||
|
if (dst_len + (dst_ptr - buf) < dst_len)
|
||||||
|
goto convfail;
|
||||||
|
dst_len += dst_ptr - buf;
|
||||||
|
} while (res == econv_destination_buffer_full);
|
||||||
|
|
||||||
|
if (res != econv_finished)
|
||||||
|
goto convfail;
|
||||||
|
|
||||||
|
rb_econv_close(from_ascii);
|
||||||
|
from_ascii = NULL;
|
||||||
|
|
||||||
|
if (dst_len <= sizeof(buf)) {
|
||||||
|
return rb_econv_output_with_destination_encoding(ec, buf, dst_len, destination_buffer_ptr, destination_buffer_end, required_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
buf2 = xmalloc(dst_len);
|
||||||
|
|
||||||
|
from_ascii = rb_econv_open(str_encoding, ec->last_tc->transcoder->to_encoding, 0);
|
||||||
|
if (!from_ascii)
|
||||||
|
goto convfail;
|
||||||
|
|
||||||
|
src_ptr = str;
|
||||||
|
dst_ptr = buf2;
|
||||||
|
res = rb_econv_convert(from_ascii, &src_ptr, str+str_len, &dst_ptr, buf2+dst_len, 0);
|
||||||
|
if (res != econv_finished)
|
||||||
|
goto convfail;
|
||||||
|
rb_econv_close(from_ascii);
|
||||||
|
from_ascii = NULL;
|
||||||
|
|
||||||
|
ret = rb_econv_output_with_destination_encoding(ec, buf2, dst_len, destination_buffer_ptr, destination_buffer_end, required_size);
|
||||||
|
|
||||||
|
xfree(buf2);
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
convfail:
|
||||||
|
if (from_ascii)
|
||||||
|
rb_econv_close(from_ascii);
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
rb_econv_close(rb_econv_t *ec)
|
rb_econv_close(rb_econv_t *ec)
|
||||||
{
|
{
|
||||||
|
@ -1059,15 +1137,18 @@ output_replacement_character(
|
||||||
const rb_transcoder *tr;
|
const rb_transcoder *tr;
|
||||||
rb_encoding *enc;
|
rb_encoding *enc;
|
||||||
const unsigned char *replacement;
|
const unsigned char *replacement;
|
||||||
|
const char *repl_enc;
|
||||||
int len;
|
int len;
|
||||||
size_t required_size;
|
size_t required_size;
|
||||||
|
int ret;
|
||||||
|
|
||||||
tr = tc->transcoder;
|
tr = tc->transcoder;
|
||||||
enc = rb_enc_find(tr->to_encoding);
|
enc = rb_enc_find(tr->to_encoding);
|
||||||
|
|
||||||
replacement = (const unsigned char *)get_replacement_character(enc, &len);
|
replacement = (const unsigned char *)get_replacement_character(enc, &len, &repl_enc);
|
||||||
|
|
||||||
if (rb_econv_output(ec, replacement, len, out_pos, *out_stop_ptr, &required_size) == 0)
|
ret = rb_econv_output(ec, replacement, len, repl_enc, out_pos, *out_stop_ptr, &required_size);
|
||||||
|
if (ret == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (required_size < len)
|
if (required_size < len)
|
||||||
|
@ -1075,7 +1156,8 @@ output_replacement_character(
|
||||||
|
|
||||||
more_output_buffer(destination, resize_destination, required_size, out_start_ptr, out_pos, out_stop_ptr);
|
more_output_buffer(destination, resize_destination, required_size, out_start_ptr, out_pos, out_stop_ptr);
|
||||||
|
|
||||||
if (rb_econv_output(ec, replacement, len, out_pos, *out_stop_ptr, &required_size) == 0)
|
ret = rb_econv_output(ec, replacement, len, repl_enc, out_pos, *out_stop_ptr, &required_size);
|
||||||
|
if (ret == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
|
|
Loading…
Add table
Reference in a new issue