1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* encoding.c (rb_enc_precise_mbclen): new function for mbclen with

validation.

* include/ruby/encoding.h (rb_enc_precise_mbclen): declared.
  (MBCLEN_CHARFOUND): new macro.
  (MBCLEN_INVALID): new macro.
  (MBCLEN_NEEDMORE): new macro.

* include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len
  by precise_mbc_enc_len.
  (ONIGENC_PRECISE_MBC_ENC_LEN): new macro.
  (ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro.
  (ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro.
  (ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro.
  (ONIGENC_MBCLEN_CHARFOUND): new macro.
  (ONIGENC_MBCLEN_INVALID): new macro.
  (ONIGENC_MBCLEN_NEEDMORE): new macro.
  (ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN.

* enc/euc_jp.c: validation implemented.

* enc/sjis.c: ditto.

* enc/utf8.c: ditto.

* string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid
  encoding.
  (rb_str_valid_encoding_p): new method String#valid_encoding?.

* io.c (rb_io_getc): use rb_enc_precise_mbclen.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14119 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2007-12-06 09:28:26 +00:00
parent de4ec68991
commit 69406aad50
10 changed files with 585 additions and 140 deletions

View file

@ -144,7 +144,7 @@ typedef struct {
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
typedef struct OnigEncodingTypeST {
int (*mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
const char* name;
int max_enc_len;
int min_enc_len;
@ -282,7 +282,32 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_MBC_ENC_LEN(enc,p,e) (enc)->mbc_enc_len(p,e,enc)
#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-n)
static inline int onigenc_mbclen_charfound(int r) { return 0 < r ? r : 0; }
static inline int onigenc_mbclen_needmore(int r) { return r < -1 ? -1 - r : 0; }
#define ONIGENC_MBCLEN_CHARFOUND(r) onigenc_mbclen_charfound(r)
#define ONIGENC_MBCLEN_INVALID(r) ((r) == -1)
#define ONIGENC_MBCLEN_NEEDMORE(r) onigenc_mbclen_needmore(r)
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
static inline int onigenc_mbclen_recover(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
{
int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
int r;
if (ONIGENC_MBCLEN_INVALID(ret))
return 1;
else if ((r = ONIGENC_MBCLEN_NEEDMORE(ret)))
return e-p+r;
else
return ONIGENC_MBCLEN_CHARFOUND(ret);
}
#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_recover(p,e,enc)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)