mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
string.c: improved comment.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48142 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
26d56b7dc8
commit
d26c49657d
2 changed files with 12 additions and 6 deletions
|
@ -1,3 +1,7 @@
|
|||
Sun Oct 25 11:24:24 2014 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* string.c: improved comment.
|
||||
|
||||
Sun Oct 26 07:40:11 2014 Masaki Suketa <masaki.suketa@nifty.ne.jp>
|
||||
|
||||
* ext/win32ole/win32ole.c (ole_val2variant, ole_invoke): refactoring.
|
||||
|
@ -12,6 +16,7 @@ Sat Oct 25 22:28:17 2014 Tanaka Akira <akr@fsij.org>
|
|||
|
||||
* io.c (io_binwrite_string): Test writev() failure.
|
||||
|
||||
>>>>>>> .r48141
|
||||
Sat Oct 25 20:19:19 2014 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* test/test-unicode_normalize.rb: added test_us_ascii.
|
||||
|
|
13
string.c
13
string.c
|
@ -1187,26 +1187,27 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
|
|||
/*
|
||||
* UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
|
||||
* bit representation. (see http://en.wikipedia.org/wiki/UTF-8)
|
||||
* Therefore, following pseudo code can detect UTF-8 leading byte.
|
||||
* Therefore, the following pseudocode can detect UTF-8 leading bytes.
|
||||
*
|
||||
* if (!(byte & 0x80))
|
||||
* byte |= 0x40; // turn on bit6
|
||||
* return ((byte>>6) & 1); // bit6 represent it's leading byte or not.
|
||||
* return ((byte>>6) & 1); // bit6 represent whether this byte is leading or not.
|
||||
*
|
||||
* This function calculate every bytes in the argument word `s'
|
||||
* using the above logic concurrently. and gather every bytes result.
|
||||
* This function calculates whether a byte is leading or not for all bytes
|
||||
* in the argument word by concurrently using the above logic, and then
|
||||
* adds up the number of leading bytes in the word.
|
||||
*/
|
||||
static inline uintptr_t
|
||||
count_utf8_lead_bytes_with_word(const uintptr_t *s)
|
||||
{
|
||||
uintptr_t d = *s;
|
||||
|
||||
/* Transform into bit0 represent UTF-8 leading or not. */
|
||||
/* Transform so that bit0 indicates whether we have a UTF-8 leading byte or not. */
|
||||
d |= ~(d>>1);
|
||||
d >>= 6;
|
||||
d &= NONASCII_MASK >> 7;
|
||||
|
||||
/* Gather every bytes. */
|
||||
/* Gather all bytes. */
|
||||
d += (d>>8);
|
||||
d += (d>>16);
|
||||
#if SIZEOF_VOIDP == 8
|
||||
|
|
Loading…
Reference in a new issue