mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode_data.h (rb_trans_result_t): new type.
(rb_trans_elem_t): new type. (rb_trans_t): new type. * transcode.c (transcode_dispatch_cb): removed. (transcode_dispatch): removed. (rb_transcoding_result_t): moved to rb_trans_result_t in transcode_data.h. (transcode_restartable0): goto follow_info when FUNsi. (rb_transcoding_open): use get_transcoder_entry. (rb_trans_open): new function. (rb_trans_conv): ditto. (rb_trans_close): ditto. (trans_open_i): ditto. (trans_sweep): ditto. (more_output_buffer): take rb_trans_t instead of rb_transcoding as an argument. (transcode_loop): take from_encoding and to_encoding instead of tr as arguments. use rb_trans_open/rb_trans_conv/rb_trans_close. (str_transcode): don't use transcode_dispatch. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18498 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
96765d153f
commit
a2901a7c75
4 changed files with 297 additions and 127 deletions
23
ChangeLog
23
ChangeLog
|
@ -1,3 +1,26 @@
|
|||
Tue Aug 12 00:43:44 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* transcode_data.h (rb_trans_result_t): new type.
|
||||
(rb_trans_elem_t): new type.
|
||||
(rb_trans_t): new type.
|
||||
|
||||
* transcode.c (transcode_dispatch_cb): removed.
|
||||
(transcode_dispatch): removed.
|
||||
(rb_transcoding_result_t): moved to rb_trans_result_t in
|
||||
transcode_data.h.
|
||||
(transcode_restartable0): goto follow_info when FUNsi.
|
||||
(rb_transcoding_open): use get_transcoder_entry.
|
||||
(rb_trans_open): new function.
|
||||
(rb_trans_conv): ditto.
|
||||
(rb_trans_close): ditto.
|
||||
(trans_open_i): ditto.
|
||||
(trans_sweep): ditto.
|
||||
(more_output_buffer): take rb_trans_t instead of rb_transcoding as
|
||||
an argument.
|
||||
(transcode_loop): take from_encoding and to_encoding instead of tr
|
||||
as arguments. use rb_trans_open/rb_trans_conv/rb_trans_close.
|
||||
(str_transcode): don't use transcode_dispatch.
|
||||
|
||||
Mon Aug 11 20:37:05 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* iseq.c (rb_iseq_clone): should preserve cref_stack link.
|
||||
|
|
|
@ -357,6 +357,10 @@ class TestTranscode < Test::Unit::TestCase
|
|||
assert_raise(RuntimeError) { "\u9299".encode("iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\uff71\uff72\uff73\uff74\uff75".encode("iso-2022-jp") }
|
||||
assert_raise(RuntimeError) { "\x1b(I12345\x1b(B".encode("utf-8", "iso-2022-jp") }
|
||||
assert_equal("\xA1\xA1".force_encoding("euc-jp"),
|
||||
"\e$B!!\e(B".encode("EUC-JP", "ISO-2022-JP"))
|
||||
assert_equal("\e$B!!\e(B".force_encoding("ISO-2022-JP"),
|
||||
"\xA1\xA1".encode("ISO-2022-JP", "EUC-JP"))
|
||||
end
|
||||
|
||||
def test_iso_2022_jp_1
|
||||
|
|
352
transcode.c
352
transcode.c
|
@ -255,47 +255,6 @@ load_transcoder(transcoder_entry_t *entry)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
transcode_dispatch_cb(const char *from, const char *to, int depth, void *arg)
|
||||
{
|
||||
const rb_transcoder **first_transcoder_ptr = (const rb_transcoder **)arg;
|
||||
|
||||
transcoder_entry_t *entry;
|
||||
|
||||
if (!*first_transcoder_ptr)
|
||||
return;
|
||||
|
||||
entry = get_transcoder_entry(from, to);
|
||||
if (!entry)
|
||||
goto failed;
|
||||
|
||||
if (!entry->transcoder && entry->lib) {
|
||||
load_transcoder(entry);
|
||||
}
|
||||
if (!entry->transcoder)
|
||||
goto failed;
|
||||
|
||||
if (depth == 0)
|
||||
*first_transcoder_ptr = entry->transcoder;
|
||||
|
||||
return;
|
||||
|
||||
failed:
|
||||
*first_transcoder_ptr = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
static const rb_transcoder *
|
||||
transcode_dispatch(const char *from_encoding, const char *to_encoding)
|
||||
{
|
||||
const rb_transcoder *first_transcoder = (rb_transcoder *)1;
|
||||
|
||||
if (transcode_search_path(from_encoding, to_encoding, transcode_dispatch_cb, (void *)&first_transcoder)) {
|
||||
return first_transcoder;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
output_replacement_character(unsigned char **out_pp, rb_encoding *enc)
|
||||
{
|
||||
|
@ -364,15 +323,7 @@ transcode_char_start(rb_transcoding *tc,
|
|||
return ptr;
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
transcode_invalid_input,
|
||||
transcode_undefined_conversion,
|
||||
transcode_obuf_full,
|
||||
transcode_ibuf_empty,
|
||||
transcode_finished,
|
||||
} rb_transcoding_result_t;
|
||||
|
||||
static rb_transcoding_result_t
|
||||
static rb_trans_result_t
|
||||
transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
||||
const unsigned char *in_stop, unsigned char *out_stop,
|
||||
rb_transcoding *tc,
|
||||
|
@ -517,7 +468,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
size_t char_len;
|
||||
char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
|
||||
next_info = (VALUE)(*tr->func_si)(tc, char_start, (size_t)char_len);
|
||||
break;
|
||||
goto follow_info;
|
||||
}
|
||||
case FUNio:
|
||||
while (out_stop - out_p < tr->max_output) { SUSPEND(transcode_obuf_full, 13); }
|
||||
|
@ -579,7 +530,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
#undef SUSPEND
|
||||
}
|
||||
|
||||
static rb_transcoding_result_t
|
||||
static rb_trans_result_t
|
||||
transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
|
||||
const unsigned char *in_stop, unsigned char *out_stop,
|
||||
rb_transcoding *tc,
|
||||
|
@ -589,7 +540,7 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
unsigned char *readagain_buf = ALLOCA_N(unsigned char, tc->readagain_len);
|
||||
const unsigned char *readagain_pos = readagain_buf;
|
||||
const unsigned char *readagain_stop = readagain_buf + tc->readagain_len;
|
||||
rb_transcoding_result_t res;
|
||||
rb_trans_result_t res;
|
||||
|
||||
MEMCPY(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len,
|
||||
unsigned char, tc->readagain_len);
|
||||
|
@ -611,21 +562,12 @@ rb_transcoding_open(const char *from, const char *to, int flags)
|
|||
rb_transcoding *tc;
|
||||
const rb_transcoder *tr;
|
||||
|
||||
st_data_t val;
|
||||
st_table *table2;
|
||||
transcoder_entry_t *entry;
|
||||
|
||||
/* xxx: support multistep conversion */
|
||||
|
||||
if (!st_lookup(transcoder_table, (st_data_t)from, &val)) {
|
||||
entry = get_transcoder_entry(from, to);
|
||||
if (!entry)
|
||||
return NULL;
|
||||
}
|
||||
table2 = (st_table *)val;
|
||||
|
||||
if (!st_lookup(table2, (st_data_t)to, &val)) {
|
||||
return NULL;
|
||||
}
|
||||
entry = (transcoder_entry_t *)val;
|
||||
tr = load_transcoder(entry);
|
||||
if (!tr)
|
||||
return NULL;
|
||||
|
@ -643,7 +585,7 @@ rb_transcoding_open(const char *from, const char *to, int flags)
|
|||
return tc;
|
||||
}
|
||||
|
||||
static rb_transcoding_result_t
|
||||
static rb_trans_result_t
|
||||
rb_transcoding_convert(rb_transcoding *tc,
|
||||
const unsigned char **input_ptr, const unsigned char *input_stop,
|
||||
unsigned char **output_ptr, unsigned char *output_stop,
|
||||
|
@ -664,17 +606,199 @@ rb_transcoding_close(rb_transcoding *tc)
|
|||
xfree(tc);
|
||||
}
|
||||
|
||||
static void
|
||||
trans_open_i(const char *from, const char *to, int depth, void *arg)
|
||||
{
|
||||
rb_trans_t **tsp = (rb_trans_t **)arg;
|
||||
rb_trans_t *ts;
|
||||
int i;
|
||||
|
||||
if (!*tsp) {
|
||||
ts = *tsp = ALLOC(rb_trans_t);
|
||||
ts->num_trans = depth+1;
|
||||
ts->elems = ALLOC_N(rb_trans_elem_t, ts->num_trans);
|
||||
ts->num_finished = 0;
|
||||
for (i = 0; i < ts->num_trans; i++) {
|
||||
ts->elems[i].tc = NULL;
|
||||
ts->elems[i].out_buf_start = NULL;
|
||||
ts->elems[i].out_data_start = NULL;
|
||||
ts->elems[i].out_data_end = NULL;
|
||||
ts->elems[i].out_buf_end = NULL;
|
||||
ts->elems[i].last_result = transcode_ibuf_empty;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ts = *tsp;
|
||||
}
|
||||
|
||||
ts->elems[depth].tc = rb_transcoding_open(from, to, 0);
|
||||
if (depth < ts->num_trans-1) {
|
||||
int bufsize = 4096;
|
||||
unsigned char *p;
|
||||
p = xmalloc(bufsize);
|
||||
ts->elems[depth].out_buf_start = p;
|
||||
ts->elems[depth].out_buf_end = p + bufsize;
|
||||
ts->elems[depth].out_data_start = p;
|
||||
ts->elems[depth].out_data_end = p;
|
||||
}
|
||||
}
|
||||
|
||||
static rb_trans_t *
|
||||
rb_trans_open(const char *from, const char *to, int flags)
|
||||
{
|
||||
rb_trans_t *ts = NULL;
|
||||
|
||||
transcode_search_path(from, to, trans_open_i, (void *)&ts);
|
||||
|
||||
if (!ts)
|
||||
return NULL;
|
||||
|
||||
return ts;
|
||||
}
|
||||
|
||||
static int
|
||||
trans_sweep(rb_trans_t *ts,
|
||||
const unsigned char **input_ptr, const unsigned char *input_stop,
|
||||
unsigned char **output_ptr, unsigned char *output_stop,
|
||||
int flags,
|
||||
int start)
|
||||
{
|
||||
int try;
|
||||
int i, f;
|
||||
|
||||
const unsigned char **ipp, *is, *iold;
|
||||
unsigned char **opp, *os, *oold;
|
||||
rb_trans_result_t res;
|
||||
|
||||
try = 1;
|
||||
while (try) {
|
||||
try = 0;
|
||||
for (i = start; i < ts->num_trans; i++) {
|
||||
rb_trans_elem_t *te = &ts->elems[i];
|
||||
|
||||
if (i == 0) {
|
||||
ipp = input_ptr;
|
||||
is = input_stop;
|
||||
}
|
||||
else {
|
||||
rb_trans_elem_t *prev_te = &ts->elems[i-1];
|
||||
ipp = (const unsigned char **)&prev_te->out_data_start;
|
||||
is = prev_te->out_data_end;
|
||||
}
|
||||
|
||||
if (!te->out_buf_start) {
|
||||
opp = output_ptr;
|
||||
os = output_stop;
|
||||
}
|
||||
else {
|
||||
if (te->out_buf_start != te->out_data_start) {
|
||||
int len = te->out_data_end - te->out_data_start;
|
||||
int off = te->out_data_start - te->out_buf_start;
|
||||
MEMMOVE(te->out_buf_start, te->out_data_start, unsigned char, len);
|
||||
te->out_data_start = te->out_buf_start;
|
||||
te->out_data_end -= off;
|
||||
}
|
||||
opp = &te->out_data_end;
|
||||
os = te->out_buf_end;
|
||||
}
|
||||
|
||||
f = flags;
|
||||
if (ts->num_finished != i)
|
||||
f |= PARTIAL_INPUT;
|
||||
iold = *ipp;
|
||||
oold = *opp;
|
||||
te->last_result = res = rb_transcoding_convert(te->tc, ipp, is, opp, os, f);
|
||||
if (iold != *ipp || oold != *opp)
|
||||
try = 1;
|
||||
|
||||
switch (res) {
|
||||
case transcode_invalid_input:
|
||||
case transcode_undefined_conversion:
|
||||
return i;
|
||||
|
||||
case transcode_obuf_full:
|
||||
case transcode_ibuf_empty:
|
||||
break;
|
||||
|
||||
case transcode_finished:
|
||||
ts->num_finished = i+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static rb_trans_result_t
|
||||
rb_trans_conv(rb_trans_t *ts,
|
||||
const unsigned char **input_ptr, const unsigned char *input_stop,
|
||||
unsigned char **output_ptr, unsigned char *output_stop,
|
||||
int flags)
|
||||
{
|
||||
int i;
|
||||
int start, err_index;
|
||||
|
||||
unsigned char empty_buf;
|
||||
unsigned char *empty_ptr = &empty_buf;
|
||||
|
||||
if (!input_ptr) {
|
||||
input_ptr = (const unsigned char **)&empty_ptr;
|
||||
input_stop = empty_ptr;
|
||||
}
|
||||
|
||||
if (!output_ptr) {
|
||||
output_ptr = &empty_ptr;
|
||||
output_stop = empty_ptr;
|
||||
}
|
||||
|
||||
err_index = -1;
|
||||
for (i = ts->num_trans-2; 0 <= i; i--) {
|
||||
if (ts->elems[i].last_result == transcode_invalid_input ||
|
||||
ts->elems[i].last_result == transcode_undefined_conversion) {
|
||||
err_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
start = err_index + 1;
|
||||
err_index = trans_sweep(ts, input_ptr, input_stop, output_ptr, output_stop, flags, start);
|
||||
} while (err_index != -1 && err_index != ts->num_trans-1);
|
||||
|
||||
if (err_index == ts->num_trans-1)
|
||||
return ts->elems[ts->num_trans-1].last_result;
|
||||
else if (start == 0)
|
||||
return ts->elems[ts->num_trans-1].last_result;
|
||||
else
|
||||
return ts->elems[start-1].last_result;
|
||||
}
|
||||
|
||||
static void
|
||||
rb_trans_close(rb_trans_t *ts)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ts->num_trans; i++) {
|
||||
rb_transcoding_close(ts->elems[i].tc);
|
||||
if (ts->elems[i].out_buf_start)
|
||||
xfree(ts->elems[i].out_buf_start);
|
||||
}
|
||||
|
||||
xfree(ts->elems);
|
||||
xfree(ts);
|
||||
}
|
||||
|
||||
static void
|
||||
more_output_buffer(
|
||||
VALUE destination,
|
||||
unsigned char *(*resize_destination)(VALUE, int, int),
|
||||
rb_transcoding *tc,
|
||||
rb_trans_t *ts,
|
||||
unsigned char **out_start_ptr,
|
||||
unsigned char **out_pos,
|
||||
unsigned char **out_stop_ptr)
|
||||
{
|
||||
size_t len = (*out_pos - *out_start_ptr);
|
||||
size_t new_len = (len + tc->transcoder->max_output) * 2;
|
||||
size_t new_len = (len + ts->elems[ts->num_trans-1].tc->transcoder->max_output) * 2;
|
||||
*out_start_ptr = resize_destination(destination, len, new_len);
|
||||
*out_pos = *out_start_ptr + len;
|
||||
*out_stop_ptr = *out_start_ptr + new_len;
|
||||
|
@ -686,17 +810,23 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
const unsigned char *in_stop, unsigned char *out_stop,
|
||||
VALUE destination,
|
||||
unsigned char *(*resize_destination)(VALUE, int, int),
|
||||
const rb_transcoder *tr,
|
||||
const char *from_encoding,
|
||||
const char *to_encoding,
|
||||
const int opt)
|
||||
{
|
||||
rb_transcoding *tc;
|
||||
rb_transcoding_result_t ret;
|
||||
rb_trans_t *ts;
|
||||
rb_trans_result_t ret;
|
||||
unsigned char *out_start = *out_pos;
|
||||
int max_output;
|
||||
|
||||
tc = rb_transcoding_open(tr->from_encoding, tr->to_encoding, 0);
|
||||
ts = rb_trans_open(from_encoding, to_encoding, 0);
|
||||
if (!ts)
|
||||
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
|
||||
|
||||
max_output = ts->elems[ts->num_trans-1].tc->transcoder->max_output;
|
||||
|
||||
resume:
|
||||
ret = rb_transcoding_convert(tc, in_pos, in_stop, out_pos, out_stop, opt);
|
||||
ret = rb_trans_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
|
||||
if (ret == transcode_invalid_input) {
|
||||
/* deal with invalid byte sequence */
|
||||
/* todo: add more alternative behaviors */
|
||||
|
@ -704,12 +834,12 @@ resume:
|
|||
goto resume;
|
||||
}
|
||||
else if (opt&INVALID_REPLACE) {
|
||||
if (out_stop - *out_pos < tr->max_output)
|
||||
more_output_buffer(destination, resize_destination, tc, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(out_pos, rb_enc_find(tr->to_encoding));
|
||||
if (out_stop - *out_pos < max_output)
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(out_pos, rb_enc_find(to_encoding));
|
||||
goto resume;
|
||||
}
|
||||
rb_transcoding_close(tc);
|
||||
rb_trans_close(ts);
|
||||
rb_raise(TRANSCODE_ERROR, "invalid byte sequence");
|
||||
}
|
||||
if (ret == transcode_undefined_conversion) {
|
||||
|
@ -720,20 +850,20 @@ resume:
|
|||
goto resume;
|
||||
}
|
||||
else if (opt&UNDEF_REPLACE) {
|
||||
if (out_stop - *out_pos < tr->max_output)
|
||||
more_output_buffer(destination, resize_destination, tc, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(out_pos, rb_enc_find(tr->to_encoding));
|
||||
if (out_stop - *out_pos < max_output)
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(out_pos, rb_enc_find(to_encoding));
|
||||
goto resume;
|
||||
}
|
||||
rb_transcoding_close(tc);
|
||||
rb_trans_close(ts);
|
||||
rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||
}
|
||||
if (ret == transcode_obuf_full) {
|
||||
more_output_buffer(destination, resize_destination, tc, &out_start, out_pos, &out_stop);
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
goto resume;
|
||||
}
|
||||
|
||||
rb_transcoding_close(tc);
|
||||
rb_trans_close(ts);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
|
@ -743,15 +873,21 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
const unsigned char *in_stop, unsigned char *out_stop,
|
||||
VALUE destination,
|
||||
unsigned char *(*resize_destination)(VALUE, int, int),
|
||||
const rb_transcoder *tr,
|
||||
const char *from_encoding,
|
||||
const char *to_encoding,
|
||||
const int opt)
|
||||
{
|
||||
rb_transcoding *tc;
|
||||
rb_transcoding_result_t ret;
|
||||
rb_trans_t *ts;
|
||||
rb_trans_result_t ret;
|
||||
unsigned char *out_start = *out_pos;
|
||||
const unsigned char *ptr;
|
||||
int max_output;
|
||||
|
||||
tc = rb_transcoding_open(tr->from_encoding, tr->to_encoding, 0);
|
||||
ts = rb_trans_open(from_encoding, to_encoding, 0);
|
||||
if (!ts)
|
||||
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
|
||||
|
||||
max_output = ts->elems[ts->num_trans-1].tc->transcoder->max_output;
|
||||
|
||||
ret = transcode_ibuf_empty;
|
||||
ptr = *in_pos;
|
||||
|
@ -762,14 +898,14 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
if (ret == transcode_ibuf_empty) {
|
||||
if (ptr < in_stop) {
|
||||
input_byte = *ptr;
|
||||
ret = rb_transcoding_convert(tc, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
|
||||
ret = rb_trans_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
|
||||
}
|
||||
else {
|
||||
ret = rb_transcoding_convert(tc, NULL, NULL, out_pos, out_stop, 0);
|
||||
ret = rb_trans_conv(ts, NULL, NULL, out_pos, out_stop, 0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = rb_transcoding_convert(tc, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
|
||||
ret = rb_trans_conv(ts, NULL, NULL, out_pos, out_stop, PARTIAL_INPUT);
|
||||
}
|
||||
if (&input_byte != p)
|
||||
ptr += p - &input_byte;
|
||||
|
@ -781,12 +917,12 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
break;
|
||||
}
|
||||
else if (opt&INVALID_REPLACE) {
|
||||
if (out_stop - *out_pos < tr->max_output)
|
||||
more_output_buffer(destination, resize_destination, tc, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(out_pos, rb_enc_find(tr->to_encoding));
|
||||
if (out_stop - *out_pos < max_output)
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(out_pos, rb_enc_find(to_encoding));
|
||||
break;
|
||||
}
|
||||
rb_transcoding_close(tc);
|
||||
rb_trans_close(ts);
|
||||
rb_raise(TRANSCODE_ERROR, "invalid byte sequence");
|
||||
break;
|
||||
|
||||
|
@ -798,17 +934,17 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
break;
|
||||
}
|
||||
else if (opt&UNDEF_REPLACE) {
|
||||
if (out_stop - *out_pos < tr->max_output)
|
||||
more_output_buffer(destination, resize_destination, tc, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(out_pos, rb_enc_find(tr->to_encoding));
|
||||
if (out_stop - *out_pos < max_output)
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
output_replacement_character(out_pos, rb_enc_find(to_encoding));
|
||||
break;
|
||||
}
|
||||
rb_transcoding_close(tc);
|
||||
rb_trans_close(ts);
|
||||
rb_raise(TRANSCODE_ERROR, "conversion undefined for byte sequence (maybe invalid byte sequence)");
|
||||
break;
|
||||
|
||||
case transcode_obuf_full:
|
||||
more_output_buffer(destination, resize_destination, tc, &out_start, out_pos, &out_stop);
|
||||
more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop);
|
||||
break;
|
||||
|
||||
case transcode_ibuf_empty:
|
||||
|
@ -818,7 +954,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
|
|||
break;
|
||||
}
|
||||
}
|
||||
rb_transcoding_close(tc);
|
||||
rb_trans_close(ts);
|
||||
*in_pos = in_stop;
|
||||
return;
|
||||
}
|
||||
|
@ -848,8 +984,6 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
const char *from_e, *to_e;
|
||||
int from_encidx, to_encidx;
|
||||
VALUE from_encval, to_encval;
|
||||
const rb_transcoder *tr;
|
||||
int final_encoding = 0;
|
||||
VALUE opt;
|
||||
int options = 0;
|
||||
|
||||
|
@ -922,19 +1056,13 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
return -1;
|
||||
}
|
||||
|
||||
do { /* loop for multistep transcoding */
|
||||
/* later, maybe use smaller intermediate strings for very long strings */
|
||||
if (!(tr = transcode_dispatch(from_e, to_e))) {
|
||||
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_e, to_e);
|
||||
}
|
||||
|
||||
fromp = sp = (unsigned char *)RSTRING_PTR(str);
|
||||
slen = RSTRING_LEN(str);
|
||||
blen = slen + 30; /* len + margin */
|
||||
dest = rb_str_tmp_new(blen);
|
||||
bp = (unsigned char *)RSTRING_PTR(dest);
|
||||
|
||||
transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, tr, options);
|
||||
transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, from_e, to_e, options);
|
||||
if (fromp != sp+slen) {
|
||||
rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp);
|
||||
}
|
||||
|
@ -942,14 +1070,6 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
*bp = '\0';
|
||||
rb_str_set_len(dest, bp - buf);
|
||||
|
||||
if (encoding_equal(tr->to_encoding, to_e)) {
|
||||
final_encoding = 1;
|
||||
}
|
||||
else {
|
||||
from_e = tr->to_encoding;
|
||||
str = dest;
|
||||
}
|
||||
} while (!final_encoding);
|
||||
/* set encoding */
|
||||
if (!to_enc) {
|
||||
to_encidx = rb_define_dummy_encoding(to_e);
|
||||
|
|
|
@ -96,6 +96,29 @@ typedef struct rb_transcoder {
|
|||
int (*finish_func)(rb_transcoding*, unsigned char*); /* -> output */
|
||||
} rb_transcoder;
|
||||
|
||||
typedef enum {
|
||||
transcode_invalid_input,
|
||||
transcode_undefined_conversion,
|
||||
transcode_obuf_full,
|
||||
transcode_ibuf_empty,
|
||||
transcode_finished,
|
||||
} rb_trans_result_t;
|
||||
|
||||
typedef struct {
|
||||
rb_transcoding *tc;
|
||||
unsigned char *out_buf_start;
|
||||
unsigned char *out_data_start;
|
||||
unsigned char *out_data_end;
|
||||
unsigned char *out_buf_end;
|
||||
rb_trans_result_t last_result;
|
||||
} rb_trans_elem_t;
|
||||
|
||||
typedef struct {
|
||||
rb_trans_elem_t *elems;
|
||||
int num_trans;
|
||||
int num_finished;
|
||||
} rb_trans_t;
|
||||
|
||||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
||||
void rb_register_transcoder(const rb_transcoder *);
|
||||
|
||||
|
|
Loading…
Reference in a new issue