1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* ext/stringio/stringio.c (strio_each_codepoint): new method.

[ruby-core:23949]

* ext/stringio/stringio.c (strio_each_codepoint): ditto.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@23818 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2009-06-22 08:23:30 +00:00
parent b205b5e7b6
commit fa31dda1f8
4 changed files with 146 additions and 1 deletions

View file

@ -1,3 +1,10 @@
Mon Jun 22 17:15:38 2009 Yukihiro Matsumoto <matz@ruby-lang.org>
* ext/stringio/stringio.c (strio_each_codepoint): new method.
[ruby-core:23949]
* ext/stringio/stringio.c (strio_each_codepoint): ditto.
Mon Jun 22 16:26:11 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
* ruby.c (ruby_init_loadpath_safe): removed "." from load_path.

View file

@ -824,6 +824,37 @@ strio_each_char(VALUE self)
return self;
}
/*
* call-seq:
* strio.each_codepoint {|c| block } -> strio
*
* See IO#each_codepoint.
*/
static VALUE
strio_each_codepoint(VALUE self)
{
struct StringIO *ptr;
rb_encoding *enc;
unsigned int c;
int n;
RETURN_ENUMERATOR(self, 0, 0);
ptr = readable(StringIO(self));
enc = rb_enc_get(ptr->string);
for (;;) {
if (ptr->pos >= RSTRING_LEN(ptr->string)) {
return self;
}
c = rb_enc_codepoint_len(RSTRING_PTR(ptr->string)+ptr->pos,
RSTRING_END(ptr->string), &n, enc);
rb_yield(UINT2NUM(c));
ptr->pos += n;
}
return self;
}
/* Boyer-Moore search: copied from regex.c */
static void
bm_init_skip(long *skip, const char *pat, long m)
@ -1359,6 +1390,8 @@ Init_stringio()
rb_define_method(StringIO, "bytes", strio_each_byte, 0);
rb_define_method(StringIO, "each_char", strio_each_char, 0);
rb_define_method(StringIO, "chars", strio_each_char, 0);
rb_define_method(StringIO, "each_codepoint", strio_each_codepoint, 0);
rb_define_method(StringIO, "codepoints", strio_each_codepoint, 0);
rb_define_method(StringIO, "getc", strio_getc, 0);
rb_define_method(StringIO, "ungetc", strio_ungetc, 1);
rb_define_method(StringIO, "ungetbyte", strio_ungetbyte, 1);

102
io.c
View file

@ -2641,7 +2641,7 @@ rb_io_each_byte(VALUE io)
fptr->rbuf_len--;
rb_yield(INT2FIX(*p & 0xff));
p++;
errno = 0;
errno = 0;
}
rb_io_check_readable(fptr);
READ_CHECK(fptr);
@ -2774,6 +2774,89 @@ rb_io_each_char(VALUE io)
/*
* call-seq:
* ios.each_codepoint {|c| block } => ios
*
* Passes the <code>Integer</code> ordinal of each character in <i>ios</i>,
* passing the codepoint as an argument. The stream must be opened for
* reading or an <code>IOError</code> will be raised.
*/
static VALUE
rb_io_each_codepoint(VALUE io)
{
rb_io_t *fptr;
rb_encoding *enc;
unsigned int c;
int r, n;
RETURN_ENUMERATOR(io, 0, 0);
GetOpenFile(io, fptr);
rb_io_check_readable(fptr);
READ_CHECK(fptr);
if (NEED_READCONV(fptr)) {
for (;;) {
make_readconv(fptr, 0);
for (;;) {
if (fptr->cbuf_len) {
if (fptr->encs.enc)
r = rb_enc_precise_mbclen(fptr->cbuf+fptr->cbuf_off,
fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len,
fptr->encs.enc);
else
r = ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
if (!MBCLEN_NEEDMORE_P(r))
break;
if (fptr->cbuf_len == fptr->cbuf_capa) {
rb_raise(rb_eIOError, "too long character");
}
}
if (more_char(fptr) == -1) {
/* ignore an incomplete character before EOF */
return io;
}
}
if (MBCLEN_INVALID_P(r)) {
rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
}
n = MBCLEN_CHARFOUND_LEN(r);
c = rb_enc_codepoint(fptr->cbuf+fptr->cbuf_off,
fptr->cbuf+fptr->cbuf_off+fptr->cbuf_len,
fptr->encs.enc);
fptr->rbuf_off += n;
fptr->rbuf_len -= n;
rb_yield(UINT2NUM(c));
}
}
enc = io_input_encoding(fptr);
for (;;) {
if (io_fillbuf(fptr) < 0) {
return io;
}
r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off,
fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
if (MBCLEN_CHARFOUND_P(r) &&
(n = MBCLEN_CHARFOUND_LEN(r)) <= fptr->rbuf_len) {
c = rb_enc_codepoint(fptr->rbuf+fptr->rbuf_off,
fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
fptr->rbuf_off += n;
fptr->rbuf_len -= n;
rb_yield(UINT2NUM(c));
}
else if (MBCLEN_INVALID_P(r)) {
rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
}
else {
continue;
}
}
return io;
}
/*
* call-seq:
* ios.lines(sep=$/) => anEnumerator
@ -2836,6 +2919,21 @@ rb_io_chars(VALUE io)
return rb_enumeratorize(io, ID2SYM(rb_intern("each_char")), 0, 0);
}
/*
* call-seq:
* ios.codepoints => anEnumerator
*
* Returns an enumerator that gives each codepoint in <em>ios</em>.
* The stream must be opened for reading or an <code>IOError</code>
* will be raised.
*/
static VALUE
rb_io_codepoints(VALUE io)
{
return rb_enumeratorize(io, ID2SYM(rb_intern("each_codepoint")), 0, 0);
}
/*
* call-seq:
* ios.getc => string or nil
@ -8797,9 +8895,11 @@ Init_IO(void)
rb_define_method(rb_cIO, "each_line", rb_io_each_line, -1);
rb_define_method(rb_cIO, "each_byte", rb_io_each_byte, 0);
rb_define_method(rb_cIO, "each_char", rb_io_each_char, 0);
rb_define_method(rb_cIO, "each_codepoint", rb_io_each_codepoint, 0);
rb_define_method(rb_cIO, "lines", rb_io_lines, -1);
rb_define_method(rb_cIO, "bytes", rb_io_bytes, 0);
rb_define_method(rb_cIO, "chars", rb_io_chars, 0);
rb_define_method(rb_cIO, "codepoints", rb_io_codepoints, 0);
rb_define_method(rb_cIO, "syswrite", rb_io_syswrite, 1);
rb_define_method(rb_cIO, "sysread", rb_io_sysread, -1);

View file

@ -340,6 +340,11 @@ class TestStringIO < Test::Unit::TestCase
assert_equal(%w(1 2 3 4), f.each_char.to_a)
end
def test_each_codepoint
f = StringIO.new("1234")
assert_equal([49, 50, 51, 52], f.each_codepoint.to_a)
end
def test_gets2
f = StringIO.new("foo\nbar\nbaz\n")
assert_equal("fo", f.gets(2))