mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Fri Dec 28 01:55:04 2007 Martin Duerst <duerst@it.aoyama.ac.jp>
* transcode.c (transcode_dispatch): reverted some of the changes in r14746. * transcode.c, enc/trans/single_byte.c: Added conversions to/from US-ASCII and ASCII-8BIT (using data tables). * enc/trans/single_byte.c: Some spacing/ordering changes due to automatic data file generation. * transcode_data.h, transcode.c: Preliminary code for using micro-conversion functions. * test/ruby/test_transcode.rb: Added some tests for US-ASCII and ASCII-8BIT conversions. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14766 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
48af602e38
commit
793e9423cd
5 changed files with 152 additions and 51 deletions
30
transcode.c
30
transcode.c
|
@ -89,6 +89,8 @@ rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
|
|||
static void
|
||||
init_transcoder_table(void)
|
||||
{
|
||||
rb_declare_transcoder("US-ASCII", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ASCII-8BIT", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-1", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-2", "UTF-8", "single_byte");
|
||||
rb_declare_transcoder("ISO-8859-3", "UTF-8", "single_byte");
|
||||
|
@ -173,6 +175,7 @@ transcode_loop(char **in_pos, char **out_pos,
|
|||
follow_byte:
|
||||
next_offset = next_table->base[next_byte];
|
||||
next_info = (VALUE)next_table->info[next_offset];
|
||||
follow_info:
|
||||
switch (next_info & 0x1F) {
|
||||
case NOMAP:
|
||||
*out_p++ = next_byte;
|
||||
|
@ -191,7 +194,7 @@ transcode_loop(char **in_pos, char **out_pos,
|
|||
else
|
||||
goto invalid;
|
||||
}
|
||||
next_table = next_table->info[next_offset];
|
||||
next_table = (const BYTE_LOOKUP *)next_info;
|
||||
goto follow_byte;
|
||||
/* maybe rewrite the following cases to use fallthrough???? */
|
||||
case ZERObt: /* drop input */
|
||||
|
@ -210,6 +213,9 @@ transcode_loop(char **in_pos, char **out_pos,
|
|||
*out_p++ = getBT2(next_info);
|
||||
*out_p++ = getBT3(next_info);
|
||||
continue;
|
||||
case FUNii:
|
||||
next_info = (VALUE)(*my_transcoder->func_ii)(next_info);
|
||||
goto follow_info;
|
||||
case INVALID:
|
||||
goto invalid;
|
||||
case UNDEF:
|
||||
|
@ -287,7 +293,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
return -1;
|
||||
}
|
||||
if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) {
|
||||
if (to_encidx == 0 || ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
|
||||
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
|
||||
return to_encidx;
|
||||
}
|
||||
}
|
||||
|
@ -295,25 +301,6 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (from_encidx == 0) {
|
||||
const char *p = RSTRING_PTR(str);
|
||||
const char *e = p + RSTRING_LEN(str);
|
||||
|
||||
while (p < e) {
|
||||
int ret = rb_enc_precise_mbclen(p, e, to_enc);
|
||||
int len = MBCLEN_CHARFOUND(ret);
|
||||
|
||||
if (!len) {
|
||||
rb_raise(rb_eArgError, "not fully converted, %d bytes left", e-p);
|
||||
}
|
||||
p += len;
|
||||
}
|
||||
if (to_encidx < 0) {
|
||||
to_encidx = rb_define_dummy_encoding(to_e);
|
||||
}
|
||||
return to_encidx;
|
||||
}
|
||||
|
||||
while (!final_encoding) { /* loop for multistep transcoding */
|
||||
/* later, maybe use smaller intermediate strings for very long strings */
|
||||
if (!(my_transcoder = transcode_dispatch(from_e, to_e))) {
|
||||
|
@ -412,6 +399,7 @@ rb_str_transcode_bang(int argc, VALUE *argv, VALUE str)
|
|||
/*
|
||||
* call-seq:
|
||||
* str.encode(encoding) => str
|
||||
* str.encode(to_encoding, from_encoding) => str
|
||||
*
|
||||
* With one argument, returns a copy of <i>str</i> transcoded
|
||||
* to encoding +encoding+.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue