mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off,
crbuf_len, crbuf_capa. (MakeOpenFile): initialize them. * io.c (io_shift_crbuf): new function. (io_getc): use econv. (rb_io_fptr_finalize): finalize readconv and crbuf. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18666 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
69f22784f2
commit
63daa7c07d
4 changed files with 105 additions and 10 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
Sun Aug 17 01:29:46 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off,
|
||||
crbuf_len, crbuf_capa.
|
||||
(MakeOpenFile): initialize them.
|
||||
|
||||
* io.c (io_shift_crbuf): new function.
|
||||
(io_getc): use econv.
|
||||
(rb_io_fptr_finalize): finalize readconv and crbuf.
|
||||
|
||||
Sun Aug 17 00:02:07 2008 Tanaka Akira <akr@fsij.org>
|
||||
|
||||
* include/ruby/encoding.h (rb_econv_check_error): declared.
|
||||
|
|
|
@ -36,17 +36,26 @@ typedef struct rb_io_t {
|
|||
char *path; /* pathname for file */
|
||||
void (*finalize)(struct rb_io_t*,int); /* finalize proc */
|
||||
long refcnt;
|
||||
|
||||
char *wbuf; /* wbuf_off + wbuf_len <= wbuf_capa */
|
||||
int wbuf_off;
|
||||
int wbuf_len;
|
||||
int wbuf_capa;
|
||||
|
||||
char *rbuf; /* rbuf_off + rbuf_len <= rbuf_capa */
|
||||
int rbuf_off;
|
||||
int rbuf_len;
|
||||
int rbuf_capa;
|
||||
|
||||
VALUE tied_io_for_writing;
|
||||
rb_encoding *enc;
|
||||
rb_encoding *enc2;
|
||||
rb_encoding *enc; /* int_enc if enc2. ext_enc otherwise. */
|
||||
rb_encoding *enc2; /* ext_enc if not NULL. */
|
||||
|
||||
rb_econv_t *readconv;
|
||||
char *crbuf; /* crbuf_off + crbuf_len <= crbuf_capa */
|
||||
int crbuf_off;
|
||||
int crbuf_len;
|
||||
int crbuf_capa;
|
||||
} rb_io_t;
|
||||
|
||||
#define HAVE_RB_IO_T 1
|
||||
|
@ -89,6 +98,11 @@ typedef struct rb_io_t {
|
|||
fp->rbuf_off = 0;\
|
||||
fp->rbuf_len = 0;\
|
||||
fp->rbuf_capa = 0;\
|
||||
fp->readconv = NULL;\
|
||||
fp->crbuf = NULL;\
|
||||
fp->crbuf_off = 0;\
|
||||
fp->crbuf_len = 0;\
|
||||
fp->crbuf_capa = 0;\
|
||||
fp->tied_io_for_writing = 0;\
|
||||
fp->enc = 0;\
|
||||
fp->enc2 = 0;\
|
||||
|
|
77
io.c
77
io.c
|
@ -2268,14 +2268,77 @@ rb_io_each_byte(VALUE io)
|
|||
return io;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
io_shift_crbuf(rb_io_t *fptr, int len)
|
||||
{
|
||||
VALUE str;
|
||||
str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len);
|
||||
fptr->crbuf_off += len;
|
||||
fptr->crbuf_len -= len;
|
||||
OBJ_TAINT(str);
|
||||
rb_enc_associate(str, fptr->enc);
|
||||
/* xxx: set coderange */
|
||||
if (fptr->crbuf_len == 0)
|
||||
fptr->crbuf_off = 0;
|
||||
if (fptr->crbuf_off < fptr->crbuf_capa/2) {
|
||||
memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len);
|
||||
fptr->crbuf_off = 0;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
io_getc(rb_io_t *fptr, rb_encoding *enc)
|
||||
{
|
||||
int r, n, cr = 0;
|
||||
VALUE str;
|
||||
|
||||
if (rb_enc_dummy_p(enc)) {
|
||||
rb_raise(rb_eNotImpError, "getc against dummy encoding is not currently supported");
|
||||
if (fptr->enc2) {
|
||||
if (!fptr->readconv) {
|
||||
fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0);
|
||||
if (!fptr->readconv)
|
||||
rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name);
|
||||
fptr->crbuf_off = 0;
|
||||
fptr->crbuf_len = 0;
|
||||
fptr->crbuf_capa = 1024;
|
||||
fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
const unsigned char *ss, *sp, *se;
|
||||
unsigned char *ds, *dp, *de;
|
||||
rb_econv_result_t res;
|
||||
if (fptr->crbuf_len) {
|
||||
r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
|
||||
if (!MBCLEN_NEEDMORE_P(r))
|
||||
break;
|
||||
if (fptr->crbuf_len == fptr->crbuf_capa) {
|
||||
rb_raise(rb_eIOError, "too long character");
|
||||
}
|
||||
}
|
||||
if (fptr->rbuf_len == 0) {
|
||||
if (io_fillbuf(fptr) == -1) {
|
||||
if (fptr->crbuf_len == 0)
|
||||
return Qnil;
|
||||
/* return an incomplete character just before EOF */
|
||||
return io_shift_crbuf(fptr, fptr->crbuf_len);
|
||||
}
|
||||
}
|
||||
ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off;
|
||||
se = sp + fptr->rbuf_len;
|
||||
ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len;
|
||||
de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa;
|
||||
res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT);
|
||||
fptr->rbuf_off += sp - ss;
|
||||
fptr->rbuf_len -= sp - ss;
|
||||
fptr->crbuf_len += dp - ds;
|
||||
rb_econv_check_error(fptr->readconv);
|
||||
}
|
||||
if (MBCLEN_INVALID_P(r)) {
|
||||
r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc);
|
||||
return io_shift_crbuf(fptr, r);
|
||||
}
|
||||
return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r));
|
||||
}
|
||||
|
||||
if (io_fillbuf(fptr) < 0) {
|
||||
|
@ -2766,6 +2829,14 @@ rb_io_fptr_finalize(rb_io_t *fptr)
|
|||
free(fptr->wbuf);
|
||||
fptr->wbuf = 0;
|
||||
}
|
||||
if (fptr->readconv) {
|
||||
rb_econv_close(fptr->readconv);
|
||||
fptr->readconv = NULL;
|
||||
}
|
||||
if (fptr->crbuf) {
|
||||
free(fptr->crbuf);
|
||||
fptr->crbuf = NULL;
|
||||
}
|
||||
free(fptr);
|
||||
return 1;
|
||||
}
|
||||
|
@ -3370,6 +3441,8 @@ mode_enc(rb_io_t *fptr, const char *estr)
|
|||
char *enc2name;
|
||||
int idx, idx2;
|
||||
|
||||
/* parse estr as "enc" or "enc2:enc" */
|
||||
|
||||
p0 = strrchr(estr, ':');
|
||||
if (!p0) p1 = estr;
|
||||
else p1 = p0 + 1;
|
||||
|
|
|
@ -220,12 +220,10 @@ EOT
|
|||
with_tmpdir {
|
||||
src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
|
||||
generate_file('tmp', src)
|
||||
assert_raise(NotImplementedError) do
|
||||
open("tmp", "r:iso-2022-jp:euc-jp") {|f|
|
||||
assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
|
||||
assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
|
||||
}
|
||||
end
|
||||
open("tmp", "r:iso-2022-jp:euc-jp") {|f|
|
||||
assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
|
||||
assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in a new issue