mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* transcode.c (transcode_loop, str_transcoding_resize): use unsigned
char. [ruby-dev:33232] * transcode_data.h (rb_transcoding, rb_transcoder): removed callback parameters. * enc/trans/japanese.c: ditto. * enc/trans/utf_16_32.c: parenthesized bit-or operands. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15150 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
76e19bc534
commit
463af63468
5 changed files with 68 additions and 57 deletions
12
ChangeLog
12
ChangeLog
|
@ -1,3 +1,15 @@
|
|||
Mon Jan 21 12:35:00 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* transcode.c (transcode_loop, str_transcoding_resize): use unsigned
|
||||
char. [ruby-dev:33232]
|
||||
|
||||
* transcode_data.h (rb_transcoding, rb_transcoder): removed callback
|
||||
parameters.
|
||||
|
||||
* enc/trans/japanese.c: ditto.
|
||||
|
||||
* enc/trans/utf_16_32.c: parenthesized bit-or operands.
|
||||
|
||||
Mon Jan 21 11:59:00 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* string.c (rb_str_each_char): move forward. [ruby-dev:33231]
|
||||
|
|
|
@ -23671,10 +23671,10 @@ enum ISO_2022_ESCSEQ {
|
|||
#define ISO_2022_GZ_JIS_X_0213_2004_1 ISO_2022_ENCODING(ISO_2022_GZDM4,'Q')
|
||||
|
||||
static int
|
||||
get_iso_2022_mode(char **in_pos)
|
||||
get_iso_2022_mode(unsigned char **in_pos)
|
||||
{
|
||||
int new_mode;
|
||||
char *in_p = *in_pos;
|
||||
unsigned char *in_p = *in_pos;
|
||||
switch (*in_p++)
|
||||
{
|
||||
case '(':
|
||||
|
@ -23719,15 +23719,15 @@ get_iso_2022_mode(char **in_pos)
|
|||
}
|
||||
|
||||
static void
|
||||
from_iso_2022_jp_transcoder_preprocessor(char **in_pos, char **out_pos,
|
||||
char *in_stop, char *out_stop,
|
||||
from_iso_2022_jp_transcoder_preprocessor(unsigned char **in_pos, unsigned char **out_pos,
|
||||
unsigned char *in_stop, unsigned char *out_stop,
|
||||
rb_transcoding *my_transcoding)
|
||||
{
|
||||
const rb_transcoder *my_transcoder = my_transcoding->transcoder;
|
||||
char *in_p = *in_pos, *out_p = *out_pos;
|
||||
unsigned char *in_p = *in_pos, *out_p = *out_pos;
|
||||
int cur_mode = ISO_2022_GZ_ASCII;
|
||||
unsigned char c1;
|
||||
char *out_s = out_stop - my_transcoder->max_output + 1;
|
||||
unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
|
||||
while (in_p < in_stop) {
|
||||
if (out_p >= out_s) {
|
||||
int len = (out_p - *out_pos);
|
||||
|
@ -23770,9 +23770,9 @@ from_iso_2022_jp_transcoder_preprocessor(char **in_pos, char **out_pos,
|
|||
}
|
||||
|
||||
static int
|
||||
select_iso_2022_mode(char **out_pos, int new_mode)
|
||||
select_iso_2022_mode(unsigned char **out_pos, int new_mode)
|
||||
{
|
||||
char *out_p = *out_pos;
|
||||
unsigned char *out_p = *out_pos;
|
||||
*out_p++ = '\x1b';
|
||||
switch (new_mode>>8)
|
||||
{
|
||||
|
@ -23799,15 +23799,15 @@ select_iso_2022_mode(char **out_pos, int new_mode)
|
|||
}
|
||||
|
||||
static void
|
||||
to_iso_2022_jp_transcoder_postprocessor(char **in_pos, char **out_pos,
|
||||
char *in_stop, char *out_stop,
|
||||
to_iso_2022_jp_transcoder_postprocessor(unsigned char **in_pos, unsigned char **out_pos,
|
||||
unsigned char *in_stop, unsigned char *out_stop,
|
||||
rb_transcoding *my_transcoding)
|
||||
{
|
||||
const rb_transcoder *my_transcoder = my_transcoding->transcoder;
|
||||
char *in_p = *in_pos, *out_p = *out_pos;
|
||||
unsigned char *in_p = *in_pos, *out_p = *out_pos;
|
||||
int cur_mode = ISO_2022_GZ_ASCII, new_mode = 0;
|
||||
unsigned char next_byte;
|
||||
char *out_s = out_stop - my_transcoder->max_output + 1;
|
||||
unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
|
||||
while (in_p < in_stop) {
|
||||
if (out_p >= out_s) {
|
||||
int len = (out_p - *out_pos);
|
||||
|
|
|
@ -12,21 +12,21 @@ fun_so_from_utf_16be(const unsigned char* s, unsigned char* o)
|
|||
}
|
||||
else if (s[0]<0x08) {
|
||||
o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
|
||||
o[1] = 0x80 | s[1]&0x3F;
|
||||
o[1] = 0x80 | (s[1]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else if ((s[0]&0xF8)!=0xD8) {
|
||||
o[0] = 0xE0 | s[0]>>4;
|
||||
o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
|
||||
o[2] = 0x80 | s[1]&0x3F;
|
||||
o[2] = 0x80 | (s[1]&0x3F);
|
||||
return 3;
|
||||
}
|
||||
else {
|
||||
unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
|
||||
o[0] = 0xF0 | u>>2;
|
||||
o[1] = 0x80 | ((u&0x03)<<4) | (s[1]>>2)&0x0F;
|
||||
o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
|
||||
o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
|
||||
o[3] = 0x80 | s[3]&0x3F;
|
||||
o[3] = 0x80 | (s[3]&0x3F);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
@ -41,16 +41,16 @@ fun_so_to_utf_16be(const unsigned char* s, unsigned char* o)
|
|||
}
|
||||
else if ((s[0]&0xE0)==0xC0) {
|
||||
o[0] = (s[0]>>2)&0x07;
|
||||
o[1] = ((s[0]&0x03)<<6) | s[1]&0x3F;
|
||||
o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else if ((s[0]&0xF0)==0xE0) {
|
||||
o[0] = (s[0]<<4) | (s[1]>>2)^0x20;
|
||||
o[1] = (s[1]<<6) | s[2]^0x80;
|
||||
o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
|
||||
o[1] = (s[1]<<6) | (s[2]^0x80);
|
||||
return 2;
|
||||
}
|
||||
else {
|
||||
int w = (((s[0]&0x07)<<2) | (s[1]>>4)&0x03) - 1;
|
||||
int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
|
||||
o[0] = 0xD8 | (w>>2);
|
||||
o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
|
||||
o[2] = 0xDC | ((s[2]>>2)&0x03);
|
||||
|
@ -68,21 +68,21 @@ fun_so_from_utf_16le(const unsigned char* s, unsigned char* o)
|
|||
}
|
||||
else if (s[1]<0x08) {
|
||||
o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
|
||||
o[1] = 0x80 | s[0]&0x3F;
|
||||
o[1] = 0x80 | (s[0]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else if ((s[1]&0xF8)!=0xD8) {
|
||||
o[0] = 0xE0 | s[1]>>4;
|
||||
o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
|
||||
o[2] = 0x80 | s[0]&0x3F;
|
||||
o[2] = 0x80 | (s[0]&0x3F);
|
||||
return 3;
|
||||
}
|
||||
else {
|
||||
unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
|
||||
o[0] = 0xF0 | u>>2;
|
||||
o[1] = 0x80 | ((u&0x03)<<4) | (s[0]>>2)&0x0F;
|
||||
o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
|
||||
o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
|
||||
o[3] = 0x80 | s[2]&0x3F;
|
||||
o[3] = 0x80 | (s[2]&0x3F);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
@ -97,16 +97,16 @@ fun_so_to_utf_16le(const unsigned char* s, unsigned char* o)
|
|||
}
|
||||
else if ((s[0]&0xE0)==0xC0) {
|
||||
o[1] = (s[0]>>2)&0x07;
|
||||
o[0] = ((s[0]&0x03)<<6) | s[1]&0x3F;
|
||||
o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
|
||||
return 2;
|
||||
}
|
||||
else if ((s[0]&0xF0)==0xE0) {
|
||||
o[1] = (s[0]<<4) | (s[1]>>2)^0x20;
|
||||
o[0] = (s[1]<<6) | s[2]^0x80;
|
||||
o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
|
||||
o[0] = (s[1]<<6) | (s[2]^0x80);
|
||||
return 2;
|
||||
}
|
||||
else {
|
||||
int w = (((s[0]&0x07)<<2) | (s[1]>>4)&0x03) - 1;
|
||||
int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
|
||||
o[1] = 0xD8 | (w>>2);
|
||||
o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
|
||||
o[3] = 0xDC | ((s[2]>>2)&0x03);
|
||||
|
|
42
transcode.c
42
transcode.c
|
@ -147,20 +147,20 @@ transcode_dispatch(const char* from_encoding, const char* to_encoding)
|
|||
* Transcoding engine logic
|
||||
*/
|
||||
static void
|
||||
transcode_loop(char **in_pos, char **out_pos,
|
||||
char *in_stop, char *out_stop,
|
||||
transcode_loop(unsigned char **in_pos, unsigned char **out_pos,
|
||||
unsigned char *in_stop, unsigned char *out_stop,
|
||||
const rb_transcoder *my_transcoder,
|
||||
rb_transcoding *my_transcoding)
|
||||
{
|
||||
char *in_p = *in_pos, *out_p = *out_pos;
|
||||
unsigned char *in_p = *in_pos, *out_p = *out_pos;
|
||||
const BYTE_LOOKUP *conv_tree_start = my_transcoder->conv_tree_start;
|
||||
const BYTE_LOOKUP *next_table;
|
||||
char *char_start;
|
||||
unsigned char *char_start;
|
||||
unsigned int next_offset;
|
||||
VALUE next_info;
|
||||
unsigned char next_byte;
|
||||
int from_utf8 = my_transcoder->from_utf8;
|
||||
char *out_s = out_stop - my_transcoder->max_output + 1;
|
||||
unsigned char *out_s = out_stop - my_transcoder->max_output + 1;
|
||||
while (in_p < in_stop) {
|
||||
char_start = in_p;
|
||||
next_table = conv_tree_start;
|
||||
|
@ -214,17 +214,17 @@ transcode_loop(char **in_pos, char **out_pos,
|
|||
*out_p++ = getBT3(next_info);
|
||||
continue;
|
||||
case FUNii:
|
||||
next_info = (VALUE)(*my_transcoder->func_ii)(next_info, my_transcoding);
|
||||
next_info = (VALUE)(*my_transcoder->func_ii)(next_info);
|
||||
goto follow_info;
|
||||
case FUNsi:
|
||||
next_info = (VALUE)(*my_transcoder->func_si)(char_start, my_transcoding);
|
||||
next_info = (VALUE)(*my_transcoder->func_si)(char_start);
|
||||
goto follow_info;
|
||||
break;
|
||||
case FUNio:
|
||||
out_p += (VALUE)(*my_transcoder->func_io)(next_info, out_p, my_transcoding);
|
||||
out_p += (VALUE)(*my_transcoder->func_io)(next_info, out_p);
|
||||
break;
|
||||
case FUNso:
|
||||
out_p += (VALUE)(*my_transcoder->func_so)(char_start, out_p, my_transcoding);
|
||||
out_p += (VALUE)(*my_transcoder->func_so)(char_start, out_p);
|
||||
break;
|
||||
case INVALID:
|
||||
goto invalid;
|
||||
|
@ -250,12 +250,12 @@ transcode_loop(char **in_pos, char **out_pos,
|
|||
* String-specific code
|
||||
*/
|
||||
|
||||
static char *
|
||||
static unsigned char *
|
||||
str_transcoding_resize(rb_transcoding *my_transcoding, int len, int new_len)
|
||||
{
|
||||
VALUE dest_string = my_transcoding->ruby_string_dest;
|
||||
rb_str_resize(dest_string, new_len);
|
||||
return RSTRING_PTR(dest_string);
|
||||
return (unsigned char *)RSTRING_PTR(dest_string);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -264,7 +264,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
VALUE dest;
|
||||
VALUE str = *self;
|
||||
long blen, slen;
|
||||
char *buf, *bp, *sp, *fromp;
|
||||
unsigned char *buf, *bp, *sp, *fromp;
|
||||
rb_encoding *from_enc, *to_enc;
|
||||
const char *from_e, *to_e;
|
||||
int from_encidx, to_encidx;
|
||||
|
@ -318,26 +318,26 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
}
|
||||
|
||||
if (my_transcoder->preprocessor) {
|
||||
fromp = sp = RSTRING_PTR(str);
|
||||
fromp = sp = (unsigned char *)RSTRING_PTR(str);
|
||||
slen = RSTRING_LEN(str);
|
||||
blen = slen + 30; /* len + margin */
|
||||
dest = rb_str_tmp_new(blen);
|
||||
bp = RSTRING_PTR(dest);
|
||||
bp = (unsigned char *)RSTRING_PTR(dest);
|
||||
my_transcoding.ruby_string_dest = dest;
|
||||
(*my_transcoder->preprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
|
||||
if (fromp != sp+slen) {
|
||||
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
|
||||
}
|
||||
buf = RSTRING_PTR(dest);
|
||||
buf = (unsigned char *)RSTRING_PTR(dest);
|
||||
*bp = '\0';
|
||||
rb_str_set_len(dest, bp - buf);
|
||||
str = dest;
|
||||
}
|
||||
fromp = sp = RSTRING_PTR(str);
|
||||
fromp = sp = (unsigned char *)RSTRING_PTR(str);
|
||||
slen = RSTRING_LEN(str);
|
||||
blen = slen + 30; /* len + margin */
|
||||
dest = rb_str_tmp_new(blen);
|
||||
bp = RSTRING_PTR(dest);
|
||||
bp = (unsigned char *)RSTRING_PTR(dest);
|
||||
my_transcoding.ruby_string_dest = dest;
|
||||
my_transcoding.flush_func = str_transcoding_resize;
|
||||
|
||||
|
@ -345,22 +345,22 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
|
|||
if (fromp != sp+slen) {
|
||||
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
|
||||
}
|
||||
buf = RSTRING_PTR(dest);
|
||||
buf = (unsigned char *)RSTRING_PTR(dest);
|
||||
*bp = '\0';
|
||||
rb_str_set_len(dest, bp - buf);
|
||||
if (my_transcoder->postprocessor) {
|
||||
str = dest;
|
||||
fromp = sp = RSTRING_PTR(str);
|
||||
fromp = sp = (unsigned char *)RSTRING_PTR(str);
|
||||
slen = RSTRING_LEN(str);
|
||||
blen = slen + 30; /* len + margin */
|
||||
dest = rb_str_tmp_new(blen);
|
||||
bp = RSTRING_PTR(dest);
|
||||
bp = (unsigned char *)RSTRING_PTR(dest);
|
||||
my_transcoding.ruby_string_dest = dest;
|
||||
(*my_transcoder->postprocessor)(&fromp, &bp, (sp+slen), (bp+blen), &my_transcoding);
|
||||
if (fromp != sp+slen) {
|
||||
rb_raise(rb_eArgError, "not fully converted, %d bytes left", sp+slen-fromp);
|
||||
}
|
||||
buf = RSTRING_PTR(dest);
|
||||
buf = (unsigned char *)RSTRING_PTR(dest);
|
||||
*bp = '\0';
|
||||
rb_str_set_len(dest, bp - buf);
|
||||
}
|
||||
|
|
|
@ -62,8 +62,7 @@ typedef struct rb_transcoding {
|
|||
struct rb_transcoder *transcoder;
|
||||
VALUE ruby_string_dest; /* the String used as the conversion destination,
|
||||
or NULL if something else is being converted */
|
||||
char *(*flush_func)(struct rb_transcoding*, int, int);
|
||||
VALUE auxiliary_data;
|
||||
unsigned char *(*flush_func)(struct rb_transcoding*, int, int);
|
||||
} rb_transcoding;
|
||||
|
||||
/* static structure, one per supported encoding pair */
|
||||
|
@ -73,12 +72,12 @@ typedef struct rb_transcoder {
|
|||
const BYTE_LOOKUP *conv_tree_start;
|
||||
int max_output;
|
||||
int from_utf8;
|
||||
void (*preprocessor)(char**, char**, char*, char*, struct rb_transcoding *);
|
||||
void (*postprocessor)(char**, char**, char*, char*, struct rb_transcoding *);
|
||||
VALUE (*func_ii)(VALUE, struct rb_transcoding *); /* info -> info */
|
||||
VALUE (*func_si)(const char *, struct rb_transcoding *); /* start -> info */
|
||||
int (*func_io)(VALUE, const char*, struct rb_transcoding *); /* info -> output */
|
||||
int (*func_so)(const char*, char*, struct rb_transcoding *); /* start -> output */
|
||||
void (*preprocessor)(unsigned char**, unsigned char**, unsigned char*, unsigned char*, struct rb_transcoding *);
|
||||
void (*postprocessor)(unsigned char**, unsigned char**, unsigned char*, unsigned char*, struct rb_transcoding *);
|
||||
VALUE (*func_ii)(VALUE); /* info -> info */
|
||||
VALUE (*func_si)(const unsigned char *); /* start -> info */
|
||||
int (*func_io)(VALUE, const unsigned char*); /* info -> output */
|
||||
int (*func_so)(const unsigned char*, unsigned char*); /* start -> output */
|
||||
} rb_transcoder;
|
||||
|
||||
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
|
||||
|
|
Loading…
Reference in a new issue