mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* ext/stringio/stringio.c (strio_each_codepoint): new method.
[ruby-core:23949] * ext/stringio/stringio.c (strio_each_codepoint): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@23818 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
b205b5e7b6
commit
fa31dda1f8
4 changed files with 146 additions and 1 deletions
|
@ -1,3 +1,10 @@
|
|||
Mon Jun 22 17:15:38 2009 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||
|
||||
* ext/stringio/stringio.c (strio_each_codepoint): new method.
|
||||
[ruby-core:23949]
|
||||
|
||||
* ext/stringio/stringio.c (strio_each_codepoint): ditto.
|
||||
|
||||
Mon Jun 22 16:26:11 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* ruby.c (ruby_init_loadpath_safe): removed "." from load_path.
|
||||
|
|
|
@ -824,6 +824,37 @@ strio_each_char(VALUE self)
|
|||
return self;
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* strio.each_codepoint {|c| block } -> strio
|
||||
*
|
||||
* See IO#each_codepoint.
|
||||
*/
|
||||
static VALUE
|
||||
strio_each_codepoint(VALUE self)
|
||||
{
|
||||
struct StringIO *ptr;
|
||||
rb_encoding *enc;
|
||||
unsigned int c;
|
||||
int n;
|
||||
|
||||
RETURN_ENUMERATOR(self, 0, 0);
|
||||
|
||||
ptr = readable(StringIO(self));
|
||||
enc = rb_enc_get(ptr->string);
|
||||
for (;;) {
|
||||
if (ptr->pos >= RSTRING_LEN(ptr->string)) {
|
||||
return self;
|
||||
}
|
||||
|
||||
c = rb_enc_codepoint_len(RSTRING_PTR(ptr->string)+ptr->pos,
|
||||
RSTRING_END(ptr->string), &n, enc);
|
||||
rb_yield(UINT2NUM(c));
|
||||
ptr->pos += n;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
/* Boyer-Moore search: copied from regex.c */
|
||||
static void
|
||||
bm_init_skip(long *skip, const char *pat, long m)
|
||||
|
@ -1359,6 +1390,8 @@ Init_stringio()
|
|||
rb_define_method(StringIO, "bytes", strio_each_byte, 0);
|
||||
rb_define_method(StringIO, "each_char", strio_each_char, 0);
|
||||
rb_define_method(StringIO, "chars", strio_each_char, 0);
|
||||
rb_define_method(StringIO, "each_codepoint", strio_each_codepoint, 0);
|
||||
rb_define_method(StringIO, "codepoints", strio_each_codepoint, 0);
|
||||
rb_define_method(StringIO, "getc", strio_getc, 0);
|
||||
rb_define_method(StringIO, "ungetc", strio_ungetc, 1);
|
||||
rb_define_method(StringIO, "ungetbyte", strio_ungetbyte, 1);
|
||||
|
|
102
io.c
102
io.c
|
@ -2641,7 +2641,7 @@ rb_io_each_byte(VALUE io)
|
|||
fptr->rbuf_len--;
|
||||
rb_yield(INT2FIX(*p & 0xff));
|
||||
p++;
|
||||
errno = 0;
|
||||
errno = 0;
|
||||
}
|
||||
rb_io_check_readable(fptr);
|
||||
READ_CHECK(fptr);
|
||||
|
@ -2774,6 +2774,89 @@ rb_io_each_char(VALUE io)
|
|||
|
||||
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* ios.each_codepoint {|c| block } => ios
|
||||
*
|
||||
* Passes the <code>Integer</code> ordinal of each character in <i>ios</i>,
|
||||
* passing the codepoint as an argument. The stream must be opened for
|
||||
* reading or an <code>IOError</code> will be raised.
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_io_each_codepoint(VALUE io)
|
||||
{
|
||||
rb_io_t *fptr;
|
||||
rb_encoding *enc;
|
||||
unsigned int c;
|
||||
int r, n;
|
||||
|
||||
RETURN_ENUMERATOR(io, 0, 0);
|
||||
GetOpenFile(io, fptr);
|
||||
rb_io_check_readable(fptr);
|
||||
|
||||
READ_CHECK(fptr);
|
||||
if (NEED_READCONV(fptr)) {
|
||||
for (;;) {
|
||||
make_readconv(fptr, 0);
|
||||
for (;;) {
|
||||
if (fptr->cbuf_len) {
|
||||
if (fptr->encs.enc)
|
||||
r = rb_enc_precise_mbclen(fptr->cbuf+fptr->cbuf_off,
|
||||
fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len,
|
||||
fptr->encs.enc);
|
||||
else
|
||||
r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
|
||||
if (!MBCLEN_NEEDMORE_P(r))
|
||||
break;
|
||||
if (fptr->cbuf_len == fptr->cbuf_capa) {
|
||||
rb_raise(rb_eIOError, "too long character");
|
||||
}
|
||||
}
|
||||
if (more_char(fptr) == -1) {
|
||||
/* ignore an incomplete character before EOF */
|
||||
return io;
|
||||
}
|
||||
}
|
||||
if (MBCLEN_INVALID_P(r)) {
|
||||
rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
|
||||
}
|
||||
n = MBCLEN_CHARFOUND_LEN(r);
|
||||
c = rb_enc_codepoint(fptr->cbuf+fptr->cbuf_off,
|
||||
fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len,
|
||||
fptr->encs.enc);
|
||||
fptr->rbuf_off += n;
|
||||
fptr->rbuf_len -= n;
|
||||
rb_yield(UINT2NUM(c));
|
||||
}
|
||||
}
|
||||
enc = io_input_encoding(fptr);
|
||||
for (;;) {
|
||||
if (io_fillbuf(fptr) < 0) {
|
||||
return io;
|
||||
}
|
||||
r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off,
|
||||
fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
|
||||
if (MBCLEN_CHARFOUND_P(r) &&
|
||||
(n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) {
|
||||
c = rb_enc_codepoint(fptr->rbuf+fptr->rbuf_off,
|
||||
fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
|
||||
fptr->rbuf_off += n;
|
||||
fptr->rbuf_len -= n;
|
||||
rb_yield(UINT2NUM(c));
|
||||
}
|
||||
else if (MBCLEN_INVALID_P(r)) {
|
||||
rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
|
||||
}
|
||||
else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return io;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* ios.lines(sep=$/) => anEnumerator
|
||||
|
@ -2836,6 +2919,21 @@ rb_io_chars(VALUE io)
|
|||
return rb_enumeratorize(io, ID2SYM(rb_intern("each_char")), 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* ios.codepoints => anEnumerator
|
||||
*
|
||||
* Returns an enumerator that gives each codepoint in <em>ios</em>.
|
||||
* The stream must be opened for reading or an <code>IOError</code>
|
||||
* will be raised.
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_io_codepoints(VALUE io)
|
||||
{
|
||||
return rb_enumeratorize(io, ID2SYM(rb_intern("each_codepoint")), 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* ios.getc => string or nil
|
||||
|
@ -8797,9 +8895,11 @@ Init_IO(void)
|
|||
rb_define_method(rb_cIO, "each_line", rb_io_each_line, -1);
|
||||
rb_define_method(rb_cIO, "each_byte", rb_io_each_byte, 0);
|
||||
rb_define_method(rb_cIO, "each_char", rb_io_each_char, 0);
|
||||
rb_define_method(rb_cIO, "each_codepoint", rb_io_each_codepoint, 0);
|
||||
rb_define_method(rb_cIO, "lines", rb_io_lines, -1);
|
||||
rb_define_method(rb_cIO, "bytes", rb_io_bytes, 0);
|
||||
rb_define_method(rb_cIO, "chars", rb_io_chars, 0);
|
||||
rb_define_method(rb_cIO, "codepoints", rb_io_codepoints, 0);
|
||||
|
||||
rb_define_method(rb_cIO, "syswrite", rb_io_syswrite, 1);
|
||||
rb_define_method(rb_cIO, "sysread", rb_io_sysread, -1);
|
||||
|
|
|
@ -340,6 +340,11 @@ class TestStringIO < Test::Unit::TestCase
|
|||
assert_equal(%w(1 2 3 4), f.each_char.to_a)
|
||||
end
|
||||
|
||||
def test_each_codepoint
|
||||
f = StringIO.new("1234")
|
||||
assert_equal([49, 50, 51, 52], f.each_codepoint.to_a)
|
||||
end
|
||||
|
||||
def test_gets2
|
||||
f = StringIO.new("foo\nbar\nbaz\n")
|
||||
assert_equal("fo", f.gets(2))
|
||||
|
|
Loading…
Reference in a new issue