1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* pack.c (utf8_to_uv): added checks for malformed or redundant

UTF-8 sequences.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3105 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2002-12-02 07:13:56 +00:00
parent 96986a7a90
commit e193fd8d66
3 changed files with 65 additions and 14 deletions

View file

@ -8,6 +8,11 @@ Sun Dec 1 22:43:29 2002 Nobuyoshi Nakada <nobu.nokada@softhome.net>
* win32/win32.c (rb_w32_stat): empty path is invalid, and return
ENOENT rather than EBADF in such case. [ruby-talk:57177]
Fri Nov 29 18:01:48 2002 Yukihiro Matsumoto <matz@ruby-lang.org>
* pack.c (utf8_to_uv): added checks for malformed or redundant
UTF-8 sequences.
Thu Nov 28 12:08:30 2002 Akinori MUSHA <knu@iDaemons.org>
* lib/mkmf.rb: Avoid the use of "clean::" in favor of "clean:" in

View file

@ -2922,4 +2922,19 @@ Init_socket()
#ifdef NI_DGRAM
sock_define_const("NI_DGRAM", NI_DGRAM);
#endif
#ifdef SHUT_RD
sock_define_const("SHUT_RD", SHUT_RD);
#else
sock_define_const("SHUT_RD", 0);
#endif
#ifdef SHUT_WR
sock_define_const("SHUT_WR", SHUT_WR);
#else
sock_define_const("SHUT_WR", 1);
#endif
#ifdef SHUT_RDWR
sock_define_const("SHUT_RDWR", SHUT_RDWR);
#else
sock_define_const("SHUT_RDWR", 2);
#endif
}

59
pack.c
View file

@ -1855,25 +1855,56 @@ utf8_to_uv(p, lenp)
char *p;
long *lenp;
{
int c = (*p++)&0xff;
unsigned long uv;
long n = 1;
int c = *p++ & 0xff;
unsigned long uv = c;
long n;
if (c < 0xc0) n = 1;
else if (c < 0xe0) n = 2;
else if (c < 0xf0) n = 3;
else if (c < 0xf8) n = 4;
else if (c < 0xfc) n = 5;
else if (c < 0xfe) n = 6;
else if (c == 0xfe) n = 7;
if (n > *lenp) return 0;
if (!(uv & 0x80)) {
*lenp = 1;
return uv;
}
if (!(uv & 0x40)) {
rb_warning("malformed UTF-8 character");
*lenp = 1;
return uv;
}
if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
else if (!(uv & 0x01)) { n = 7; uv = 0; }
else { n = 13; uv = 0; }
if (n > *lenp) {
rb_warning("malformed UTF-8 character (expected %d bytes, given %d bytes)",
n, *lenp);
return 0xfffd;
}
*lenp = n--;
uv = c;
if (n != 0) {
uv &= (1<<(BYTEWIDTH-2-n)) - 1;
while (n--) {
uv = uv << 6 | (*p++ & ((1<<6)-1));
c = *p++ & 0xff;
if ((c & 0xc0) != 0x80) {
rb_warning("malformed UTF-8 character");
*lenp -= n + 1;
return 0xfffd;
}
else {
c &= 0x3f;
if (uv == 0 && c == 0) {
int i;
for (i=0; n-i>0 && (p[i] & 0x3f) == 0; i++)
;
rb_warning("redundant UTF-8 sequence (skip %d bytes)", i+1);
n -= i;
p += i;
continue;
}
uv = uv << 6 | c;
}
}
}
return uv;