enc/utf_8.c: limit UTF-8

* enc/utf_8.c (code_to_mbclen, code_to_mbc): reject values larger than UTF-8 max codepoints. [Feature #11094] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@50392 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2015-04-25 22:36:52 +00:00 · 2015-04-25 22:36:52 +00:00 · 859f88f330
commit 859f88f330
parent 4489c13657
2 changed files with 8 additions and 18 deletions
--- a/5
+++ b/5
@ -1,3 +1,8 @@
+Sun Apr 26 07:36:48 2015  Nobuyoshi Nakada  <nobu@ruby-lang.org>
+
+	* enc/utf_8.c (code_to_mbclen, code_to_mbc): reject values larger
+	  than UTF-8 max codepoints.  [Feature #11094]
+
 Sat Apr 25 14:26:19 2015  Nobuyoshi Nakada  <nobu@ruby-lang.org>

 	* string.c (str_buf_cat): expand later so that the buffer can be
--- a/enc/utf_8.c
+++ b/enc/utf_8.c
@ -35,8 +35,8 @@
 /* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
 #define INVALID_CODE_FE   0xfffffffe
 #define INVALID_CODE_FF   0xffffffff
-#define VALID_CODE_LIMIT  0x7fffffff
 #endif
+#define VALID_CODE_LIMIT  0x0010ffff

 #define utf8_islead(c)     ((UChar )((c) & 0xc0) != 0x80)

@ -297,9 +297,7 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
  if      ((code & 0xffffff80) == 0) return 1;
  else if ((code & 0xfffff800) == 0) return 2;
  else if ((code & 0xffff0000) == 0) return 3;
-  else if ((code & 0xffe00000) == 0) return 4;
-  else if ((code & 0xfc000000) == 0) return 5;
-  else if ((code & 0x80000000) == 0) return 6;
+  else if (code <= VALID_CODE_LIMIT) return 4;
 #ifdef USE_INVALID_CODE_SCHEME
  else if (code == INVALID_CODE_FE) return 1;
  else if (code == INVALID_CODE_FF) return 1;
@ -328,24 +326,11 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
      *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
      *p++ = UTF8_TRAILS(code, 6);
    }
-    else if ((code & 0xffe00000) == 0) {
+    else if (code <= VALID_CODE_LIMIT) {
      *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
      *p++ = UTF8_TRAILS(code, 12);
      *p++ = UTF8_TRAILS(code,  6);
    }
-    else if ((code & 0xfc000000) == 0) {
-      *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
-      *p++ = UTF8_TRAILS(code, 18);
-      *p++ = UTF8_TRAILS(code, 12);
-      *p++ = UTF8_TRAILS(code,  6);
-    }
-    else if ((code & 0x80000000) == 0) {
-      *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
-      *p++ = UTF8_TRAILS(code, 24);
-      *p++ = UTF8_TRAILS(code, 18);
-      *p++ = UTF8_TRAILS(code, 12);
-      *p++ = UTF8_TRAILS(code,  6);
-    }
 #ifdef USE_INVALID_CODE_SCHEME
    else if (code == INVALID_CODE_FE) {
      *p = 0xfe;