1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* enc/utf_16be.c (UTF16_IS_SURROGATE_FIRST): avoid branch.

(UTF16_IS_SURROGATE_SECOND): ditto.
  (UTF16_IS_SURROGATE): defined.
  (utf16be_mbc_enc_len): validation implemented.

* enc/utf_16le.c (UTF16_IS_SURROGATE_FIRST): avoid branch.
  (UTF16_IS_SURROGATE_SECOND): ditto.
  (UTF16_IS_SURROGATE): defined.
  (utf16le_mbc_enc_len): validation implemented.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15338 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-01-30 03:49:54 +00:00
parent 8e36fa69fd
commit 44cfd58dc5
4 changed files with 108 additions and 9 deletions

View file

@ -29,8 +29,9 @@
#include "regenc.h"
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -55,7 +56,28 @@ static int
utf16be_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
{
return EncLen_UTF16[*p];
int byte = p[0];
if (!UTF16_IS_SURROGATE(byte)) {
if (2 <= e-p)
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2);
else
return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
}
if (UTF16_IS_SURROGATE_FIRST(byte)) {
switch (e-p) {
case 1: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(3);
case 2: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2);
case 3:
if (UTF16_IS_SURROGATE_SECOND(p[2]))
return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
break;
default:
if (UTF16_IS_SURROGATE_SECOND(p[2]))
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
break;
}
}
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
}
static int