mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* string.c (count_utf8_lead_bytes_with_word): wrote function
comments. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30778 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
b0d216c818
commit
aa36d4df21
2 changed files with 22 additions and 0 deletions
|
@ -1,3 +1,8 @@
|
||||||
|
Fri Feb 4 01:50:13 2011 KOSAKI Motohiro <kosaki.motohiro@gmail.com>
|
||||||
|
|
||||||
|
* string.c (count_utf8_lead_bytes_with_word): wrote function
|
||||||
|
comments.
|
||||||
|
|
||||||
Fri Feb 4 00:14:55 2011 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Fri Feb 4 00:14:55 2011 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* ext/zlib/zlib.c (gzfile_reader_get_unused): no need to dup
|
* ext/zlib/zlib.c (gzfile_reader_get_unused): no need to dup
|
||||||
|
|
17
string.c
17
string.c
|
@ -1038,13 +1038,30 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
|
||||||
|
|
||||||
#ifdef NONASCII_MASK
|
#ifdef NONASCII_MASK
|
||||||
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
|
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
|
||||||
|
* bit represention. (see http://en.wikipedia.org/wiki/UTF-8)
|
||||||
|
* Therefore, following pseudo code can detect UTF-8 leading byte.
|
||||||
|
*
|
||||||
|
* if (!(byte & 0x80))
|
||||||
|
* byte |= 0x40; // turn on bit6
|
||||||
|
* return ((byte>>6) & 1); // bit6 represent it's leading byte or not.
|
||||||
|
*
|
||||||
|
* This function calculate every bytes in the argument word `s'
|
||||||
|
* using the above logic concurrently. and gather every bytes result.
|
||||||
|
*/
|
||||||
static inline VALUE
|
static inline VALUE
|
||||||
count_utf8_lead_bytes_with_word(const VALUE *s)
|
count_utf8_lead_bytes_with_word(const VALUE *s)
|
||||||
{
|
{
|
||||||
VALUE d = *s;
|
VALUE d = *s;
|
||||||
|
|
||||||
|
/* Transform into bit0 represent UTF-8 leading or not. */
|
||||||
d |= ~(d>>1);
|
d |= ~(d>>1);
|
||||||
d >>= 6;
|
d >>= 6;
|
||||||
d &= NONASCII_MASK >> 7;
|
d &= NONASCII_MASK >> 7;
|
||||||
|
|
||||||
|
/* Gather every bytes. */
|
||||||
d += (d>>8);
|
d += (d>>8);
|
||||||
d += (d>>16);
|
d += (d>>16);
|
||||||
#if SIZEOF_VALUE == 8
|
#if SIZEOF_VALUE == 8
|
||||||
|
|
Loading…
Reference in a new issue