Initial revision

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ksaito 2004-03-05 15:31:51 +00:00
parent 33a06e4aca
commit 5770336f8b
15 changed files with 17522 additions and 0 deletions

54
ascii.c Normal file
View File

@ -0,0 +1,54 @@
/**********************************************************************
ascii.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regenc.h"
static int
ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingASCII = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
"US-ASCII", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_to_lower,
onigenc_ascii_mbc_is_case_ambig,
ascii_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
};

191
euc_jp.c Normal file
View File

@ -0,0 +1,191 @@
/**********************************************************************
euc_jp.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regenc.h"
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
static OnigCodePoint
eucjp_mbc_to_code(UChar* p, UChar* end)
{
int c, i, len;
OnigCodePoint n;
c = *p++;
len = enc_len(ONIG_ENCODING_EUC_JP, c);
n = c;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
static int
eucjp_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
else return 1;
}
static int
eucjp_code_to_mbc_first(OnigCodePoint code)
{
int first;
if ((code & 0xff0000) != 0) {
first = (code >> 16) & 0xff;
/*
if (enc_len(ONIG_ENCODING_EUC_JP, first) != 3)
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
*/
}
else if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
/*
if (enc_len(ONIG_ENCODING_EUC_JP, first) != 2)
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
*/
}
else {
/*
if (enc_len(ONIG_ENCODING_EUC_JP, code) != 1)
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
*/
return (int )code;
}
return first;
}
static int
eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
*p++ = (UChar )(code & 0xff);
#if 1
if (enc_len(ONIG_ENCODING_EUC_JP, buf[0]) != (p - buf))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
static int
eucjp_mbc_to_lower(UChar* p, UChar* lower)
{
int len;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
return 1;
}
else {
len = enc_len(ONIG_ENCODING_EUC_JP, *p);
if (lower != p) {
/* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
return len; /* return byte length of converted char to lower */
}
}
static int
eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = eucjp_code_to_mbc_first(code);
return (enc_len(ONIG_ENCODING_EUC_JP, first) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
}
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
static UChar*
eucjp_left_adjust_char_head(UChar* start, UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
UChar *p;
int len;
if (s <= start) return s;
p = s;
while (!eucjp_islead(*p) && p > start) p--;
len = enc_len(ONIG_ENCODING_EUC_JP, *p);
if (p + len > s) return p;
p += len;
return p + ((s - p) & ~1);
}
static int
eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
{
UChar c = *s;
if (c <= 0x7e || c == 0x8e || c == 0x8f)
return TRUE;
else
return FALSE;
}
OnigEncodingType OnigEncodingEUC_JP = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
},
"EUC-JP", /* name */
3, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
FALSE, /* is continuous sb mb codepoint */
eucjp_mbc_to_code,
eucjp_code_to_mbclen,
eucjp_code_to_mbc,
eucjp_mbc_to_lower,
onigenc_mbn_mbc_is_case_ambig,
eucjp_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
eucjp_left_adjust_char_head,
eucjp_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
};

77
oniggnu.h Normal file
View File

@ -0,0 +1,77 @@
/**********************************************************************
oniggnu.h - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGGNU_H
#define ONIGGNU_H
#include "oniguruma.h"
#define MBCTYPE_ASCII 0
#define MBCTYPE_EUC 1
#define MBCTYPE_SJIS 2
#define MBCTYPE_UTF8 3
/* GNU regex options */
#ifndef RE_NREGS
#define RE_NREGS ONIG_NREGION
#endif
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
#ifdef RUBY_PLATFORM
#define re_mbcinit ruby_re_mbcinit
#define re_compile_pattern ruby_re_compile_pattern
#define re_recompile_pattern ruby_re_recompile_pattern
#define re_free_pattern ruby_re_free_pattern
#define re_adjust_startpos ruby_re_adjust_startpos
#define re_search ruby_re_search
#define re_match ruby_re_match
#define re_set_casetable ruby_re_set_casetable
#define re_copy_registers ruby_re_copy_registers
#define re_free_registers ruby_re_free_registers
#define register_info_type ruby_register_info_type
#define re_error_code_to_str ruby_error_code_to_str
#define ruby_error_code_to_str onig_error_code_to_str
#define ruby_re_copy_registers onig_region_copy
#else
#define re_error_code_to_str onig_error_code_to_str
#define re_copy_registers onig_region_copy
#endif
#ifdef ONIG_RUBY_M17N
ONIG_EXTERN
void re_mbcinit P_((OnigEncoding));
#else
ONIG_EXTERN
void re_mbcinit P_((int));
#endif
ONIG_EXTERN
int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
ONIG_EXTERN
int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
ONIG_EXTERN
void re_free_pattern P_((struct re_pattern_buffer*));
ONIG_EXTERN
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
ONIG_EXTERN
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
ONIG_EXTERN
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
ONIG_EXTERN
void re_set_casetable P_((const char*));
ONIG_EXTERN
void re_free_registers P_((struct re_registers*));
ONIG_EXTERN
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
#endif /* ONIGGNU_H */

715
oniguruma.h Normal file
View File

@ -0,0 +1,715 @@
/**********************************************************************
oniguruma.h - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGURUMA_H
#define ONIGURUMA_H
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 2
#define ONIGURUMA_VERSION_MINOR 2
#define ONIGURUMA_VERSION_TEENY 4
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
#else
# define P_(args) ()
#endif
#endif
#ifndef PV_
#ifdef HAVE_STDARG_PROTOTYPES
# define PV_(args) args
#else
# define PV_(args) ()
#endif
#endif
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
#define ONIG_EXTERN extern __declspec(dllimport)
#endif
#endif
#endif
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
/* PART: character encoding */
typedef unsigned char UChar;
typedef unsigned long OnigCodePoint;
typedef unsigned int OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
typedef struct {
OnigCodePoint from;
OnigCodePoint to;
} OnigCodePointRange;
#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16
typedef struct {
int target_num;
int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
} OnigEncFoldMatchInfo;
#if defined(RUBY_PLATFORM) && defined(M17N_H)
#define ONIG_RUBY_M17N
typedef m17n_encoding* OnigEncoding;
#else
typedef struct {
const char len_table[256];
const char* name;
int max_enc_len;
int is_fold_match;
int ctype_support_level; /* sb-only/full */
int is_continuous_sb_mb; /* code point is continuous from sb to mb */
OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
int (*code_to_mbclen)(OnigCodePoint code);
int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
int (*mbc_to_lower)(UChar* p, UChar* lower);
int (*mbc_is_case_ambig)(UChar* p);
int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
int (*is_allowed_reverse_match)(UChar* p, UChar* e);
int (*get_all_fold_match_code)(OnigCodePoint** codes);
int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
#endif /* else RUBY && M17N */
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
/* work size */
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
/* character types */
#define ONIGENC_CTYPE_ALPHA (1<< 0)
#define ONIGENC_CTYPE_BLANK (1<< 1)
#define ONIGENC_CTYPE_CNTRL (1<< 2)
#define ONIGENC_CTYPE_DIGIT (1<< 3)
#define ONIGENC_CTYPE_GRAPH (1<< 4)
#define ONIGENC_CTYPE_LOWER (1<< 5)
#define ONIGENC_CTYPE_PRINT (1<< 6)
#define ONIGENC_CTYPE_PUNCT (1<< 7)
#define ONIGENC_CTYPE_SPACE (1<< 8)
#define ONIGENC_CTYPE_UPPER (1<< 9)
#define ONIGENC_CTYPE_XDIGIT (1<<10)
#define ONIGENC_CTYPE_WORD (1<<11)
#define ONIGENC_CTYPE_ASCII (1<<12)
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
/* ctype support level */
#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0
#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1
#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
#ifdef ONIG_RUBY_M17N
#include <ctype.h> /* for isblank(), isgraph() */
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf)
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p)
#define ONIGENC_IS_FOLD_MATCH(enc) FALSE
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
onigenc_is_allowed_reverse_match(enc, s, end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
onigenc_get_left_adjust_char_head(enc, start, s)
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b)
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
#define ONIGENC_MBC_MAXLEN_DIST(enc) \
(ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
: ONIG_INFINITE_DISTANCE)
#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
#if 0
#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */
#endif
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
onigenc_is_code_ctype(enc,code,ctype)
#ifdef isblank
# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code)
#else
# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t')
#endif
#ifdef isgraph
# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code)
#else
# define ONIGENC_IS_CODE_GRAPH(enc,code) \
(isprint((int )code) && !isspace((int )code))
#endif
#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code)
#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code)
#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code)
#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code)
#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code)
#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code)
#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code)
#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code)
#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code)
#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code)
#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code)
ONIG_EXTERN
int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
ONIG_EXTERN
int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN
int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
ONIG_EXTERN
int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
ONIG_EXTERN
int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
#else /* ONIG_RUBY_M17N */
#define ONIGENC_NAME(enc) ((enc)->name)
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf)
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p)
#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match)
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb)
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
(enc)->get_all_fold_match_code(codes)
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
(enc)->get_fold_match_info(p,end,info)
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)])
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype)
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
#define ONIGENC_IS_CODE_PRINT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
#define ONIGENC_IS_CODE_ALNUM(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
#define ONIGENC_IS_CODE_ALPHA(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
#define ONIGENC_IS_CODE_LOWER(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
#define ONIGENC_IS_CODE_UPPER(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
#define ONIGENC_IS_CODE_CNTRL(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
#define ONIGENC_IS_CODE_PUNCT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
#define ONIGENC_IS_CODE_SPACE(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
#define ONIGENC_IS_CODE_BLANK(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
#define ONIGENC_IS_CODE_DIGIT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
#define ONIGENC_IS_CODE_WORD(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
(enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
ONIG_EXTERN
UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
#endif /* is not ONIG_RUBY_M17N */
/* encoding API */
ONIG_EXTERN
int onigenc_init P_(());
ONIG_EXTERN
int onigenc_set_default_encoding P_((OnigEncoding enc));
ONIG_EXTERN
OnigEncoding onigenc_get_default_encoding P_(());
ONIG_EXTERN
void onigenc_set_default_caseconv_table P_((UChar* table));
ONIG_EXTERN
UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev));
ONIG_EXTERN
UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
ONIG_EXTERN
UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
ONIG_EXTERN
UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
/* PART: regular expression */
/* config parameters */
#define ONIG_NREGION 10
#define ONIG_MAX_BACKREF_NUM 1000
#define ONIG_MAX_REPEAT_NUM 100000
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000
/* constants */
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
#undef ismbchar
#define ismbchar(c) (mbclen((c)) != 1)
#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
#endif
typedef unsigned int OnigOptionType;
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
/* options */
#define ONIG_OPTION_NONE 0
#define ONIG_OPTION_IGNORECASE 1L
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
/* options (search time) */
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
/* syntax */
typedef struct {
unsigned int op;
unsigned int op2;
unsigned int behavior;
OnigOptionType options; /* default option */
} OnigSyntaxType;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regparse.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
/* syntax (operators) */
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0)
#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */
#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3)
#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5)
#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7)
#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */
#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */
#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */
#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */
#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */
#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */
#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */
#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */
#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */
#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */
#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */
#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */
#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */
#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */
#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */
#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */
#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */
#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */
#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?<name>...) */
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k<name> */
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g<name>, \g<n> */
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@<x>..) */
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */
#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */
#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */
#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */
#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */
#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */
/* syntax (behavior) in char class [...] */
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22)
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */
/* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */
/* meta character specifiers (onig_set_meta_char()) */
#define ONIG_META_CHAR_ESCAPE 0
#define ONIG_META_CHAR_ANYCHAR 1
#define ONIG_META_CHAR_ANYTIME 2
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
#define ONIG_INEFFECTIVE_META_CHAR 0
/* error codes */
#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
/* normal return */
#define ONIG_NORMAL 0
#define ONIG_MISMATCH -1
#define ONIG_NO_SUPPORT_CONFIG -2
/* internal error */
#define ONIGERR_PARSER_BUG -11
#define ONIGERR_STACK_BUG -12
#define ONIGERR_UNDEFINED_BYTECODE -13
#define ONIGERR_UNEXPECTED_BYTECODE -14
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
/* general error */
#define ONIGERR_INVALID_ARGUMENT -30
/* syntax error */
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
#define ONIGERR_EMPTY_CHAR_CLASS -102
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
#define ONIGERR_END_PATTERN_AT_META -105
#define ONIGERR_END_PATTERN_AT_CONTROL -106
#define ONIGERR_META_CODE_SYNTAX -108
#define ONIGERR_CONTROL_CODE_SYNTAX -109
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
#define ONIGERR_NESTED_REPEAT_OPERATOR -115
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
#define ONIGERR_END_PATTERN_IN_GROUP -118
#define ONIGERR_UNDEFINED_GROUP_OPTION -119
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
/* values error (syntax error) */
#define ONIGERR_TOO_BIG_NUMBER -200
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
#define ONIGERR_INVALID_BACKREF -208
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
#define ONIGERR_EMPTY_GROUP_NAME -214
#define ONIGERR_INVALID_GROUP_NAME -215
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
/* errors related to thread */
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
/* match result region type */
struct re_registers {
int allocated;
int num_regs;
int* beg;
int* end;
/* extended */
struct re_registers** list; /* capture history. list[1]-list[31] */
};
#define ONIG_REGION_NOTPOS -1
typedef struct re_registers OnigRegion;
typedef struct {
UChar* par;
UChar* par_end;
} OnigErrorInfo;
typedef struct {
int lower;
int upper;
} OnigRepeatRange;
typedef void (*OnigWarnFunc) P_((char* s));
extern void onig_null_warn P_((char* s));
#define ONIG_NULL_WARN onig_null_warn
#define ONIG_CHAR_TABLE_SIZE 256
/* regex_t state */
#define ONIG_STATE_NORMAL 0
#define ONIG_STATE_SEARCHING 1
#define ONIG_STATE_COMPILING -1
#define ONIG_STATE_MODIFY -2
#define ONIG_STATE(reg) \
((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
typedef struct re_pattern_buffer {
/* common members of BBuf(bytes-buffer) */
unsigned char* p; /* compiled pattern */
unsigned int used; /* used space for p */
unsigned int alloc; /* allocated space for p */
int state; /* normal, searching, compiling */
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */
unsigned int bt_mem_end; /* need backtrack flag */
int stack_pop_level;
int repeat_range_alloc;
OnigRepeatRange* repeat_range;
OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
void* name_table;
/* optimization info (string search, char-map and anchors) */
int optimize; /* optimize flag */
int threshold_len; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
int *int_map; /* BM skip for exact_len > 255 */
int *int_map_backward; /* BM skip for backward search */
OnigDistance dmin; /* min-distance of exact or map */
OnigDistance dmax; /* max-distance of exact or map */
/* regex_t link chain */
struct re_pattern_buffer* chain; /* escape compile-conflict */
} regex_t;
/* Oniguruma Native API */
ONIG_EXTERN
int onig_init P_((void));
ONIG_EXTERN
int onig_error_code_to_str PV_((UChar* s, int err_code, ...));
ONIG_EXTERN
void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
void onig_free P_((regex_t*));
ONIG_EXTERN
int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
void onig_region_free P_((OnigRegion* region, int free_self));
ONIG_EXTERN
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
ONIG_EXTERN
void onig_region_clear P_((OnigRegion* region));
ONIG_EXTERN
int onig_region_resize P_((OnigRegion* region, int n));
ONIG_EXTERN
int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end,
int** nums));
ONIG_EXTERN
int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region));
ONIG_EXTERN
int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN
int onig_number_of_names P_((regex_t* reg));
ONIG_EXTERN
OnigEncoding onig_get_encoding P_((regex_t* reg));
ONIG_EXTERN
OnigOptionType onig_get_options P_((regex_t* reg));
ONIG_EXTERN
OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
ONIG_EXTERN
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
ONIG_EXTERN
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
ONIG_EXTERN
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
ONIG_EXTERN
void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
ONIG_EXTERN
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
ONIG_EXTERN
int onig_set_meta_char P_((unsigned int what, unsigned int c));
ONIG_EXTERN
int onig_end P_((void));
ONIG_EXTERN
const char* onig_version P_((void));
#endif /* ONIGURUMA_H */

5440
regcomp.c Normal file

File diff suppressed because it is too large Load Diff

586
regenc.c Normal file
View File

@ -0,0 +1,586 @@
/**********************************************************************
regenc.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regenc.h"
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
extern int
onigenc_init()
{
return 0;
}
extern OnigEncoding
onigenc_get_default_encoding()
{
return OnigEncDefaultCharEncoding;
}
extern int
onigenc_set_default_encoding(OnigEncoding enc)
{
OnigEncDefaultCharEncoding = enc;
return 0;
}
extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
p += enc_len(enc, *p);
}
return p;
}
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
UChar* start, UChar* s, UChar** prev)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
if (prev) *prev = p;
p += enc_len(enc, *p);
}
else {
if (prev) *prev = (UChar* )NULL; /* Sorry */
}
return p;
}
extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
if (s <= start)
return (UChar* )NULL;
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
extern UChar*
onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
{
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
if (s <= start)
return (UChar* )NULL;
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
return s;
}
#ifndef ONIG_RUBY_M17N
#ifndef NOT_RUBY
#define USE_APPLICATION_TO_LOWER_CASE_TABLE
#endif
UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0;
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
static UChar BuiltInAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
unsigned short OnigEncAsciiCtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
extern void
onigenc_set_default_caseconv_table(UChar* table)
{
if (table == (UChar* )0) {
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
table = BuiltInAsciiToLowerCaseTable;
#else
return ;
#endif
}
if (table != OnigEncAsciiToLowerCaseTable) {
OnigEncAsciiToLowerCaseTable = table;
}
}
extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
extern int
onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes)
{
return 0;
}
extern int
onigenc_nothing_get_fold_match_info(UChar* p, UChar* end,
OnigEncFoldMatchInfo** info)
{
return -1;
}
extern int
onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb,
OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
{
return -1;
}
/* for single byte encodings */
extern int
onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
return 1; /* return byte length of converted char to lower */
}
extern int
onigenc_ascii_mbc_is_case_ambig(UChar* p)
{
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
extern OnigCodePoint
onigenc_single_byte_mbc_to_code(UChar* p, UChar* end)
{
return (OnigCodePoint )(*p);
}
extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
{
return 1;
}
extern int
onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
{
return (code & 0xff);
}
extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
{
*buf = (UChar )(code & 0xff);
return 1;
}
extern UChar*
onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s)
{
return s;
}
extern int
onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end)
{
return TRUE;
}
extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
{
int c, i, len;
OnigCodePoint n;
c = *p++;
len = enc_len(enc, c);
n = c;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
extern int
onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
{
int len;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
return 1;
}
else {
len = enc_len(enc, *p);
if (lower != p) {
/* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
return len; /* return byte length of converted to lower char */
}
}
extern int
onigenc_mbn_mbc_is_case_ambig(UChar* p)
{
if (ONIGENC_IS_MBC_ASCII(p))
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
return FALSE;
}
extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff00) != 0) return 2;
else return 1;
}
extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff000000) != 0) return 4;
else if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
else return 1;
}
extern int
onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
{
int first;
if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
}
else {
return (int )code;
}
return first;
}
extern int
onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
{
int first;
if ((code & 0xff000000) != 0) {
first = (code >> 24) & 0xff;
}
else if ((code & 0xff0000) != 0) {
first = (code >> 16) & 0xff;
}
else if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
}
else {
return (int )code;
}
return first;
}
extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff00) != 0) {
*p++ = (UChar )((code >> 8) & 0xff);
}
*p++ = (UChar )(code & 0xff);
#if 1
if (enc_len(enc, buf[0]) != (p - buf))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff000000) != 0) {
*p++ = (UChar )((code >> 24) & 0xff);
}
if ((code & 0xff0000) != 0) {
*p++ = (UChar )((code >> 16) & 0xff);
}
if ((code & 0xff00) != 0) {
*p++ = (UChar )((code >> 8) & 0xff);
}
*p++ = (UChar )(code & 0xff);
#if 1
if (enc_len(enc, buf[0]) != (p - buf))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
extern int
onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = onigenc_mb2_code_to_mbc_first(code);
return (enc_len(enc, first) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
}
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
extern int
onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = onigenc_mb4_code_to_mbc_first(code);
return (enc_len(enc, first) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
}
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
extern int
onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes)
{
static OnigCodePoint list[] = { 0xdf };
*codes = list;
return 1;
}
extern int
onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end,
OnigEncFoldMatchInfo** info)
{
/* German alphabet ess-tsett(U+00DF) */
static OnigEncFoldMatchInfo ss = {
3,
{ 1, 2, 2 },
{ "\337", "ss", "SS" } /* 0337: 0xdf */
};
if (p >= end) return -1;
if (*p == 0xdf) {
*info = &ss;
return 1;
}
else if (p + 1 < end) {
if ((*p == 'S' && *(p+1) == 'S') ||
(*p == 's' && *(p+1) == 's')) {
*info = &ss;
return 2;
}
}
return -1; /* is not a fold string. */
}
#else /* ONIG_RUBY_M17N */
extern int
onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
{
switch (ctype) {
case ONIGENC_CTYPE_ALPHA:
return m17n_isalpha(enc, code);
break;
case ONIGENC_CTYPE_BLANK:
return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
break;
case ONIGENC_CTYPE_CNTRL:
return m17n_iscntrl(enc, code);
break;
case ONIGENC_CTYPE_DIGIT:
return m17n_isdigit(enc, code);
break;
case ONIGENC_CTYPE_GRAPH:
return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
break;
case ONIGENC_CTYPE_LOWER:
return m17n_islower(enc, code);
break;
case ONIGENC_CTYPE_PRINT:
return m17n_isprint(enc, code);
break;
case ONIGENC_CTYPE_PUNCT:
return m17n_ispunct(enc, code);
break;
case ONIGENC_CTYPE_SPACE:
return m17n_isspace(enc, code);
break;
case ONIGENC_CTYPE_UPPER:
return m17n_isupper(enc, code);
break;
case ONIGENC_CTYPE_XDIGIT:
return m17n_isxdigit(enc, code);
break;
case ONIGENC_CTYPE_WORD:
return m17n_iswchar(enc, code);
break;
case ONIGENC_CTYPE_ASCII:
return (code < 128 ? TRUE : FALSE);
break;
case ONIGENC_CTYPE_ALNUM:
return m17n_isalnum(enc, code);
break;
default:
break;
}
return 0;
}
extern int
onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
int c, len;
m17n_mbcput(enc, code, buf);
c = m17n_firstbyte(enc, code);
len = enc_len(enc, c);
return len;
}
extern int
onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
{
unsigned int c, low;
c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
low = m17n_tolower(enc, c);
m17n_mbcput(enc, low, buf);
return m17n_codelen(enc, low);
}
extern int
onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p)
{
unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
if (m17n_isupper(enc, c) || m17n_islower(enc, c))
return TRUE;
return FALSE;
}
extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
{
UChar *p;
int len;
if (s <= start) return s;
p = s;
while (!m17n_islead(enc, *p) && p > start) p--;
while (p + (len = enc_len(enc, *p)) < s) {
p += len;
}
if (p + len == s) return s;
return p;
}
extern int
onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end)
{
return ONIGENC_IS_SINGLEBYTE(enc);
}
extern void
onigenc_set_default_caseconv_table(UChar* table) { }
#endif /* ONIG_RUBY_M17N */

96
regenc.h Normal file
View File

@ -0,0 +1,96 @@
/**********************************************************************
regenc.h - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGENC_H
#define REGENC_H
#ifndef RUBY_PLATFORM
#include "config.h"
#endif
#include "oniguruma.h"
#ifndef NULL
#define NULL ((void* )0)
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
/* error codes */
/* internal error */
#define ONIGERR_MEMORY -5
#define ONIGERR_TYPE_BUG -6
/* syntax error [-400, -999] */
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIG_NEWLINE '\n'
#define ONIG_IS_NEWLINE(c) ((c) == ONIG_NEWLINE)
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
#ifdef ONIG_RUBY_M17N
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF
#else /* ONIG_RUBY_M17N */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes));
ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]));
/* methods for single byte encoding */
ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower));
ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));
/* methods for multi byte encoding */
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower));
ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p));
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
#endif /* is not ONIG_RUBY_M17N */
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable;
ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
((OnigEncAsciiCtypeTable[code] & ctype) != 0)
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
#endif /* REGENC_H */

291
regerror.c Normal file
View File

@ -0,0 +1,291 @@
/**********************************************************************
regerror.c - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regint.h"
#include <stdio.h> /* for vsnprintf() */
#ifdef HAVE_STDARG_PROTOTYPES
#include <stdarg.h>
#define va_init_list(a,b) va_start(a,b)
#else
#include <varargs.h>
#define va_init_list(a,b) va_start(a)
#endif
extern char*
onig_error_code_to_format(int code)
{
char *p;
if (code >= 0) return (char* )0;
switch (code) {
case ONIG_MISMATCH:
p = "mismatch"; break;
case ONIG_NO_SUPPORT_CONFIG:
p = "no support in this configuration"; break;
case ONIGERR_MEMORY:
p = "fail to memory allocation"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
case ONIGERR_PARSER_BUG:
p = "internal parser error (bug)"; break;
case ONIGERR_STACK_BUG:
p = "stack error (bug)"; break;
case ONIGERR_UNDEFINED_BYTECODE:
p = "undefined bytecode (bug)"; break;
case ONIGERR_UNEXPECTED_BYTECODE:
p = "unexpected bytecode (bug)"; break;
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
p = "default multibyte-encoding is not setted"; break;
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
p = "can't convert to wide-char on specified multibyte-encoding"; break;
case ONIGERR_INVALID_ARGUMENT:
p = "invalid argument"; break;
case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
p = "end pattern at left brace"; break;
case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
p = "end pattern at left bracket"; break;
case ONIGERR_EMPTY_CHAR_CLASS:
p = "empty char-class"; break;
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
p = "premature end of char-class"; break;
case ONIGERR_END_PATTERN_AT_BACKSLASH:
p = "end pattern at backslash"; break;
case ONIGERR_END_PATTERN_AT_META:
p = "end pattern at meta"; break;
case ONIGERR_END_PATTERN_AT_CONTROL:
p = "end pattern at control"; break;
case ONIGERR_META_CODE_SYNTAX:
p = "illegal meta-code syntax"; break;
case ONIGERR_CONTROL_CODE_SYNTAX:
p = "illegal control-code syntax"; break;
case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
p = "char-class value at end of range"; break;
case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
p = "char-class value at start of range"; break;
case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
p = "unmatched range specifier in char-class"; break;
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
p = "target of repeat operator is not specified"; break;
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
p = "target of repeat operator is invalid"; break;
case ONIGERR_NESTED_REPEAT_OPERATOR:
p = "nested repeat operator"; break;
case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
p = "unmatched close parenthesis"; break;
case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
p = "end pattern with unmatched parenthesis"; break;
case ONIGERR_END_PATTERN_IN_GROUP:
p = "end pattern in group"; break;
case ONIGERR_UNDEFINED_GROUP_OPTION:
p = "undefined group option"; break;
case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
p = "invalid POSIX bracket type"; break;
case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
p = "invalid pattern in look-behind"; break;
case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
p = "invalid repeat range {lower,upper}"; break;
case ONIGERR_TOO_BIG_NUMBER:
p = "too big number"; break;
case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
p = "too big number for repeat range"; break;
case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
p = "upper is smaller than lower in repeat range"; break;
case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
p = "empty range in char class"; break;
case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
p = "mismatch multibyte code length in char-class range"; break;
case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
p = "too many multibyte code ranges are specified"; break;
case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
p = "too short multibyte code string"; break;
case ONIGERR_TOO_BIG_BACKREF_NUMBER:
p = "too big backref number"; break;
case ONIGERR_INVALID_BACKREF:
#ifdef USE_NAMED_GROUP
p = "invalid backref number/name"; break;
#else
p = "invalid backref number"; break;
#endif
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
p = "numbered backref/call is not allowed. (use name)"; break;
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
p = "too big wide-char value"; break;
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
p = "too long wide-char value"; break;
case ONIGERR_INVALID_WIDE_CHAR_VALUE:
p = "invalid wide-char value"; break;
case ONIGERR_EMPTY_GROUP_NAME:
p = "group name is empty"; break;
case ONIGERR_INVALID_GROUP_NAME:
p = "invalid group name <%n>"; break;
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
#ifdef USE_NAMED_GROUP
p = "invalid char in group name <%n>"; break;
#else
p = "invalid char in group number <%n>"; break;
#endif
case ONIGERR_UNDEFINED_NAME_REFERENCE:
p = "undefined name <%n> reference"; break;
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
p = "undefined group <%n> reference"; break;
case ONIGERR_MULTIPLEX_DEFINED_NAME:
p = "multiplex defined name <%n>"; break;
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
p = "multiplex definition name <%n> call"; break;
case ONIGERR_NEVER_ENDING_RECURSION:
p = "never ending recursion"; break;
case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
p = "group number is too big for capture history"; break;
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
p = "invalid character property name"; break;
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
p = "over thread pass limit count"; break;
default:
p = "undefined error code"; break;
}
return p;
}
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
extern int
#ifdef HAVE_STDARG_PROTOTYPES
onig_error_code_to_str(UChar* s, int code, ...)
#else
onig_error_code_to_str(s, code, va_alist)
UChar* s;
int code;
va_dcl
#endif
{
UChar *p, *q;
OnigErrorInfo* einfo;
int len;
va_list vargs;
va_init_list(vargs, code);
switch (code) {
case ONIGERR_UNDEFINED_NAME_REFERENCE:
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
case ONIGERR_MULTIPLEX_DEFINED_NAME:
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
case ONIGERR_INVALID_GROUP_NAME:
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
einfo = va_arg(vargs, OnigErrorInfo*);
len = einfo->par_end - einfo->par;
q = onig_error_code_to_format(code);
p = s;
while (*q != '\0') {
if (*q == '%') {
q++;
if (*q == 'n') { /* '%n': name */
if (len > MAX_ERROR_PAR_LEN) {
xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3);
p += (MAX_ERROR_PAR_LEN - 3);
xmemcpy(p, "...", 3);
p += 3;
}
else {
xmemcpy(p, einfo->par, len);
p += len;
}
q++;
}
else
goto normal_char;
}
else {
normal_char:
*p++ = *q++;
}
}
*p = '\0';
len = p - s;
break;
default:
q = onig_error_code_to_format(code);
len = strlen(q);
xmemcpy(s, q, len);
s[len] = '\0';
break;
}
va_end(vargs);
return len;
}
void
#ifdef HAVE_STDARG_PROTOTYPES
onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
char* pat, char* pat_end, char *fmt, ...)
#else
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
char buf[];
int bufsize;
OnigEncoding enc;
char* pat;
char* pat_end;
const char *fmt;
va_dcl
#endif
{
int n, need, len;
UChar *p, *s;
va_list args;
va_init_list(args, fmt);
n = vsnprintf(buf, bufsize, fmt, args);
va_end(args);
need = (pat_end - pat) * 4 + 4;
if (n + need < bufsize) {
strcat(buf, ": /");
s = buf + strlen(buf);
p = pat;
while (p < (UChar* )pat_end) {
if (*p == MC_ESC) {
*s++ = *p++;
len = enc_len(enc, *p);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
*s++ = MC_ESC;
*s++ = *p++;
}
else if (ONIGENC_IS_MBC_HEAD(enc, *p)) {
len = enc_len(enc, *p);
while (len-- > 0) *s++ = *p++;
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
char b[5];
sprintf(b, "\\%03o", *p & 0377);
len = strlen(b);
while (len-- > 0) *s++ = *p++;
}
else {
*s++ = *p++;
}
}
*s++ = '/';
*s = '\0';
}
}

3299
regexec.c Normal file

File diff suppressed because it is too large Load Diff

256
reggnu.c Normal file
View File

@ -0,0 +1,256 @@
/**********************************************************************
reggnu.c - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regint.h"
#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
#include "oniggnu.h"
#endif
#if defined(RUBY_PLATFORM) || defined(RUBY)
#ifndef ONIG_RUBY_M17N
#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
#endif
#endif
#ifndef NULL
#define NULL ((void* )0)
#endif
extern void
re_free_registers(OnigRegion* r)
{
/* 0: don't free self */
onig_region_free(r, 0);
}
extern int
re_adjust_startpos(regex_t* reg, const char* string, int size,
int startpos, int range)
{
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
UChar *p;
UChar *s = (UChar* )string + startpos;
if (range > 0) {
p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
}
else {
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
}
return p - (UChar* )string;
}
return startpos;
}
extern int
re_match(regex_t* reg, const char* str, int size, int pos,
struct re_registers* regs)
{
return onig_match(reg, (UChar* )str, (UChar* )(str + size),
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
}
extern int
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
struct re_registers* regs)
{
return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
(UChar* )(string + startpos),
(UChar* )(string + startpos + range),
regs, ONIG_OPTION_NONE);
}
extern int
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
OnigErrorInfo einfo;
r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
if (r != 0) {
if (IS_NOT_NULL(ebuf))
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
}
extern int
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
OnigErrorInfo einfo;
OnigEncoding enc;
/* I think encoding and options should be arguments of this function.
But this is adapted to present re.c. (2002/11/29)
*/
enc = OnigEncDefaultCharEncoding;
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
reg->options, enc, OnigDefaultSyntax, &einfo);
if (r != 0) {
if (IS_NOT_NULL(ebuf))
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
}
extern void
re_free_pattern(regex_t* reg)
{
onig_free(reg);
}
extern int
re_alloc_pattern(regex_t** reg)
{
return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
OnigDefaultSyntax);
}
extern void
re_set_casetable(const char* table)
{
onigenc_set_default_caseconv_table((UChar* )table);
}
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
static const unsigned char mbctab_ascii[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const unsigned char mbctab_euc[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
};
static const unsigned char mbctab_sjis[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
static const unsigned char mbctab_utf8[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
};
const unsigned char *re_mbctab = mbctab_ascii;
#endif
extern void
#ifdef ONIG_RUBY_M17N
re_mbcinit(OnigEncoding enc)
#else
re_mbcinit(int mb_code)
#endif
{
#ifdef ONIG_RUBY_M17N
onigenc_set_default_encoding(enc);
#else
OnigEncoding enc;
switch (mb_code) {
case MBCTYPE_ASCII:
enc = ONIG_ENCODING_ASCII;
break;
case MBCTYPE_EUC:
enc = ONIG_ENCODING_EUC_JP;
break;
case MBCTYPE_SJIS:
enc = ONIG_ENCODING_SJIS;
break;
case MBCTYPE_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
default:
return ;
break;
}
onigenc_set_default_encoding(enc);
#endif
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
switch (mb_code) {
case MBCTYPE_ASCII:
re_mbctab = mbctab_ascii;
break;
case MBCTYPE_EUC:
re_mbctab = mbctab_euc;
break;
case MBCTYPE_SJIS:
re_mbctab = mbctab_sjis;
break;
case MBCTYPE_UTF8:
re_mbctab = mbctab_utf8;
break;
}
#endif
}

685
regint.h Normal file
View File

@ -0,0 +1,685 @@
/**********************************************************************
regint.h - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGINT_H
#define REGINT_H
/* for debug */
/* #define ONIG_DEBUG_PARSE_TREE */
/* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
/* #define ONIG_DEBUG_STATISTICS */
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#endif
#endif
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
(defined(__ppc__) && defined(__APPLE__)) || \
defined(__x86_64) || defined(__x86_64__) || \
defined(__mc68020__)
#define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
/* config */
/* spec. config */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_FOLD_MATCH /* ess-tsett etc... */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
/* internal config */
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUALIFIER_PEEK_NEXT
#define INIT_MATCH_STACK_SIZE 160
#define MATCH_STACK_LIMIT_SIZE 500000
/* interface to external system */
#ifdef NOT_RUBY /* gived from Makefile */
#include "config.h"
#define USE_VARIABLE_META_CHARS
#define USE_VARIABLE_SYNTAX
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define xmalloc malloc
#define xrealloc realloc
#define xfree free
#else
#include "ruby.h"
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
#define DEFAULT_WARN_FUNCTION rb_warn
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
#if defined(RUBY_VERSION_MAJOR)
#if RUBY_VERSION_MAJOR > 1 || \
(RUBY_VERSION_MAJOR == 1 && \
defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
#define USE_ST_HASH_TABLE
#endif
#endif
#endif /* else NOT_RUBY */
#define THREAD_PASS_LIMIT_COUNT 10
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
#if defined(_WIN32) && !defined(__CYGWIN__)
#define xalloca _alloca
#ifdef NOT_RUBY
#define vsnprintf _vsnprintf
#endif
#else
#define xalloca alloca
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__)
#include <alloca.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <ctype.h>
#include <sys/types.h>
#ifdef ONIG_DEBUG
# include <stdio.h>
#endif
#include "regenc.h"
#include "oniguruma.h"
#ifdef MIN
#undef MIN
#endif
#ifdef MAX
#undef MAX
#endif
#define MIN(a,b) (((a)>(b))?(b):(a))
#define MAX(a,b) (((a)<(b))?(b):(a))
#define IS_NULL(p) (((void*)(p)) == (void*)0)
#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
#define NULL_UCHARP ((UChar* )0)
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
#define WORD_ALIGNMENT_SIZE SIZEOF_INT
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
(pad_size) = WORD_ALIGNMENT_SIZE \
- ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
} while (0)
#define ALIGNMENT_RIGHT(addr) do {\
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
(addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
} while (0)
#define B_SHIFT 8
#define B_MASK 0xff
#define SERIALIZE_2BYTE_INT(i,p) do {\
*(p) = ((i) >> B_SHIFT) & B_MASK;\
*((p)+1) = (i) & B_MASK;\
} while (0)
#define SERIALIZE_4BYTE_INT(i,p) do {\
*(p) = ((i) >> B_SHIFT*3) & B_MASK;\
*((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\
*((p)+2) = ((i) >> B_SHIFT ) & B_MASK;\
*((p)+3) = (i) & B_MASK;\
} while (0)
#define SERIALIZE_8BYTE_INT(i,p) do {\
*(p) = ((i) >> B_SHIFT*7) & B_MASK;\
*((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\
*((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\
*((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\
*((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\
*((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\
*((p)+6) = ((i) >> B_SHIFT ) & B_MASK;\
*((p)+7) = (i) & B_MASK;\
} while (0)
#define GET_2BYTE_INT_INC(type,i,p) do {\
(i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\
(p) += 2;\
} while (0)
#define GET_4BYTE_INT_INC(type,i,p) do {\
(i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \
((unsigned int )((p)[1]) << B_SHIFT*2) | \
((unsigned int )((p)[2]) << B_SHIFT ) | \
((unsigned int )((p)[3]) )); \
(p) += 4;\
} while (0)
#define GET_8BYTE_INT_INC(type,i,p) do {\
(i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \
((unsigned long )((p)[1]) << B_SHIFT*6) | \
((unsigned long )((p)[2]) << B_SHIFT*5) | \
((unsigned long )((p)[3]) << B_SHIFT*4) | \
((unsigned long )((p)[4]) << B_SHIFT*3) | \
((unsigned long )((p)[5]) << B_SHIFT*2) | \
((unsigned long )((p)[6]) << B_SHIFT ) | \
((unsigned long )((p)[7]) )); \
(p) += 8;\
} while (0)
#if SIZEOF_SHORT == 2
#define GET_SHORT_INC(i,p) GET_2BYTE_INT_INC(short,i,p)
#define SERIALIZE_SHORT(i,p) SERIALIZE_2BYTE_INT(i,p)
#elif SIZEOF_SHORT == 4
#define GET_SHORT_INC(i,p) GET_4BYTE_INT_INC(short,i,p)
#define SERIALIZE_SHORT(i,p) SERIALIZE_4BYTE_INT(i,p)
#elif SIZEOF_SHORT == 8
#define GET_SHORT_INC(i,p) GET_8BYTE_INT_INC(short,i,p)
#define SERIALIZE_SHORT(i,p) SERIALIZE_8BYTE_INT(i,p)
#endif
#if SIZEOF_INT == 2
#define GET_INT_INC(i,p) GET_2BYTE_INT_INC(int,i,p)
#define GET_UINT_INC(i,p) GET_2BYTE_INT_INC(unsigned,i,p)
#define SERIALIZE_INT(i,p) SERIALIZE_2BYTE_INT(i,p)
#define SERIALIZE_UINT(i,p) SERIALIZE_2BYTE_INT(i,p)
#elif SIZEOF_INT == 4
#define GET_INT_INC(i,p) GET_4BYTE_INT_INC(int,i,p)
#define GET_UINT_INC(i,p) GET_4BYTE_INT_INC(unsigned,i,p)
#define SERIALIZE_INT(i,p) SERIALIZE_4BYTE_INT(i,p)
#define SERIALIZE_UINT(i,p) SERIALIZE_4BYTE_INT(i,p)
#elif SIZEOF_INT == 8
#define GET_INT_INC(i,p) GET_8BYTE_INT_INC(int,i,p)
#define GET_UINT_INC(i,p) GET_8BYTE_INT_INC(unsigned,i,p)
#define SERIALIZE_INT(i,p) SERIALIZE_8BYTE_INT(i,p)
#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p)
#endif
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
/* stack pop level */
#define STACK_POP_LEVEL_FREE 0
#define STACK_POP_LEVEL_MEM_START 1
#define STACK_POP_LEVEL_ALL 2
/* optimize flags */
#define ONIG_OPTIMIZE_NONE 0
#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
#define ONIG_OPTIMIZE_MAP 5 /* char map */
/* bit status */
typedef unsigned int BitStatusType;
#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
#define BIT_STATUS_CLEAR(stats) (stats) = 0
#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
#define BIT_STATUS_AT(stats,n) \
((n) < BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
#define BIT_STATUS_ON_AT(stats,n) do {\
if ((n) < BIT_STATUS_BITS_NUM)\
(stats) |= (1 << (n));\
else\
(stats) |= 1;\
} while (0)
#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
if ((n) < BIT_STATUS_BITS_NUM)\
(stats) |= (1 << (n));\
} while (0)
#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
#define DIGITVAL(code) ((code) - '0')
#define ODIGITVAL(code) DIGITVAL(code)
#define XDIGITVAL(enc,code) \
(ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
: (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option))
#define IS_FIND_CONDITION(option) ((option) & \
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
/* OP_SET_OPTION is required for these options.
#define IS_DYNAMIC_OPTION(option) \
(((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
*/
/* ignore-case and multibyte status are included in compiled code. */
#define IS_DYNAMIC_OPTION(option) 0
/* bitset */
#define BITS_PER_BYTE 8
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
typedef unsigned int Bits;
#else
typedef unsigned char Bits;
#endif
typedef Bits BitSet[BITSET_SIZE];
typedef Bits* BitSetRef;
#define SIZE_BITSET sizeof(BitSet)
#define BITSET_CLEAR(bs) do {\
int i;\
for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\
} while (0)
#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM))
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
/* bytes buffer */
typedef struct _BBuf {
UChar* p;
unsigned int used;
unsigned int alloc;
} BBuf;
#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
#define BBUF_SIZE_INC(buf,inc) do{\
(buf)->alloc += (inc);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_EXPAND(buf,low) do{\
do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_ENSURE_SIZE(buf,size) do{\
unsigned int new_alloc = (buf)->alloc;\
while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
if ((buf)->alloc != new_alloc) {\
(buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
(buf)->alloc = new_alloc;\
}\
} while (0)
#define BBUF_WRITE(buf,pos,bytes,n) do{\
int used = (pos) + (n);\
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
xmemcpy((buf)->p + (pos), (bytes), (n));\
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
} while (0)
#define BBUF_WRITE1(buf,pos,byte) do{\
int used = (pos) + 1;\
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
(buf)->p[(pos)] = (byte);\
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
} while (0)
#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
/* from < to */
#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
} while (0)
/* from > to */
#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
} while (0)
/* from > to */
#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
(buf)->used -= (from - to);\
} while (0)
#define BBUF_INSERT(buf,pos,bytes,n) do {\
if (pos >= (buf)->used) {\
BBUF_WRITE(buf,pos,bytes,n);\
}\
else {\
BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
xmemcpy((buf)->p + (pos), (bytes), (n));\
}\
} while (0)
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
#define ANCHOR_BEGIN_BUF (1<<0)
#define ANCHOR_BEGIN_LINE (1<<1)
#define ANCHOR_BEGIN_POSITION (1<<2)
#define ANCHOR_END_BUF (1<<3)
#define ANCHOR_SEMI_END_BUF (1<<4)
#define ANCHOR_END_LINE (1<<5)
#define ANCHOR_WORD_BOUND (1<<6)
#define ANCHOR_NOT_WORD_BOUND (1<<7)
#define ANCHOR_WORD_BEGIN (1<<8)
#define ANCHOR_WORD_END (1<<9)
#define ANCHOR_PREC_READ (1<<10)
#define ANCHOR_PREC_READ_NOT (1<<11)
#define ANCHOR_LOOK_BEHIND (1<<12)
#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
#define ANCHOR_ANYCHAR_STAR_PL (1<<15) /* ".*" optimize info (posix-line) */
/* operation code */
enum OpCode {
OP_FINISH = 0, /* matching process terminator (no more alternative) */
OP_END = 1, /* pattern code terminator (success end) */
OP_EXACT1 = 2, /* single byte, N = 1 */
OP_EXACT2, /* single byte, N = 2 */
OP_EXACT3, /* single byte, N = 3 */
OP_EXACT4, /* single byte, N = 4 */
OP_EXACT5, /* single byte, N = 5 */
OP_EXACTN, /* single byte */
OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
OP_EXACTMB2N, /* mb-length = 2 */
OP_EXACTMB3N, /* mb-length = 3 */
OP_EXACTMBN, /* other length */
OP_EXACT1_IC, /* single byte, N = 1, ignore case */
OP_EXACTN_IC, /* single byte, ignore case */
OP_CCLASS,
OP_CCLASS_MB,
OP_CCLASS_MIX,
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
OP_ANYCHAR_STAR, /* ".*" */
OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
OP_ANYCHAR_STAR_PEEK_NEXT,
OP_ANYCHAR_ML_STAR_PEEK_NEXT,
OP_WORD,
OP_NOT_WORD,
OP_WORD_SB,
OP_WORD_MB,
OP_WORD_BOUND,
OP_NOT_WORD_BOUND,
OP_WORD_BEGIN,
OP_WORD_END,
OP_BEGIN_BUF,
OP_END_BUF,
OP_BEGIN_LINE,
OP_END_LINE,
OP_SEMI_END_BUF,
OP_BEGIN_POSITION,
OP_BACKREF1,
OP_BACKREF2,
OP_BACKREF3,
OP_BACKREFN,
OP_BACKREFN_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
OP_MEMORY_END_PUSH, /* push back-tracker to stack */
OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
OP_MEMORY_END,
OP_MEMORY_END_REC, /* push marker to stack */
OP_SET_OPTION_PUSH, /* set option and push recover option */
OP_SET_OPTION, /* set option */
OP_FAIL, /* pop stack and move */
OP_JUMP,
OP_PUSH,
OP_POP,
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
OP_REPEAT, /* {n,m} */
OP_REPEAT_NG, /* {n,m}? (non greedy) */
OP_REPEAT_INC,
OP_REPEAT_INC_NG, /* non greedy */
OP_NULL_CHECK_START, /* null loop checker start */
OP_NULL_CHECK_END, /* null loop checker end */
OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
OP_PUSH_POS, /* (?=...) start */
OP_POP_POS, /* (?=...) end */
OP_PUSH_POS_NOT, /* (?!...) start */
OP_FAIL_POS, /* (?!...) end */
OP_PUSH_STOP_BT, /* (?>...) start */
OP_POP_STOP_BT, /* (?>...) end */
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
OP_CALL, /* \g<name> */
OP_RETURN
};
/* arguments type */
#define ARG_SPECIAL -1
#define ARG_NON 0
#define ARG_RELADDR 1
#define ARG_ABSADDR 2
#define ARG_LENGTH 3
#define ARG_MEMNUM 4
#define ARG_OPTION 5
typedef short int RelAddrType;
typedef short int AbsAddrType;
typedef short int LengthType;
typedef short int MemNumType;
typedef int RepeatNumType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
#define SIZE_ABSADDR sizeof(AbsAddrType)
#define SIZE_LENGTH sizeof(LengthType)
#define SIZE_MEMNUM sizeof(MemNumType)
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define GET_RELADDR_INC(addr,p) do{\
addr = *((RelAddrType* )(p));\
(p) += SIZE_RELADDR;\
} while(0)
#define GET_ABSADDR_INC(addr,p) do{\
addr = *((AbsAddrType* )(p));\
(p) += SIZE_ABSADDR;\
} while(0)
#define GET_LENGTH_INC(len,p) do{\
len = *((LengthType* )(p));\
(p) += SIZE_LENGTH;\
} while(0)
#define GET_MEMNUM_INC(num,p) do{\
num = *((MemNumType* )(p));\
(p) += SIZE_MEMNUM;\
} while(0)
#define GET_REPEATNUM_INC(num,p) do{\
num = *((RepeatNumType* )(p));\
(p) += SIZE_REPEATNUM;\
} while(0)
#define GET_OPTION_INC(option,p) do{\
option = *((OnigOptionType* )(p));\
(p) += SIZE_OPTION;\
} while(0)
#else
#define GET_RELADDR_INC(addr,p) GET_SHORT_INC(addr,p)
#define GET_ABSADDR_INC(addr,p) GET_SHORT_INC(addr,p)
#define GET_LENGTH_INC(len,p) GET_SHORT_INC(len,p)
#define GET_MEMNUM_INC(num,p) GET_SHORT_INC(num,p)
#define GET_REPEATNUM_INC(num,p) GET_INT_INC(num,p)
#define GET_OPTION_INC(option,p) GET_UINT_INC(option,p)
#define SERIALIZE_RELADDR(addr,p) SERIALIZE_SHORT(addr,p)
#define SERIALIZE_ABSADDR(addr,p) SERIALIZE_SHORT(addr,p)
#define SERIALIZE_LENGTH(len,p) SERIALIZE_SHORT(len,p)
#define SERIALIZE_MEMNUM(num,p) SERIALIZE_SHORT(num,p)
#define SERIALIZE_REPEATNUM(num,p) SERIALIZE_INT(num,p)
#define SERIALIZE_OPTION(option,p) SERIALIZE_UINT(option,p)
#define SERIALIZE_BUFSIZE SIZEOF_INT
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define GET_BYTE_INC(byte,p) do{\
byte = *(p);\
(p)++;\
} while(0)
/* op-code + arg size */
#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP SIZE_OPCODE
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_POS SIZE_OPCODE
#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP_POS SIZE_OPCODE
#define SIZE_OP_FAIL_POS SIZE_OPCODE
#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_FAIL SIZE_OPCODE
#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE
typedef struct {
UChar esc;
UChar anychar;
UChar anytime;
UChar zero_or_one_time;
UChar one_or_more_time;
UChar anychar_anytime;
} OnigMetaCharTableType;
extern OnigMetaCharTableType OnigMetaCharTable;
#define MC_ESC OnigMetaCharTable.esc
#define MC_ANYCHAR OnigMetaCharTable.anychar
#define MC_ANYTIME OnigMetaCharTable.anytime
#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
#ifdef ONIG_DEBUG
typedef struct {
short int opcode;
char* name;
short int arg_type;
} OnigOpInfoType;
extern OnigOpInfoType OnigOpInfo[];
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
extern void onig_print_statistics P_((FILE* f));
#endif
#endif
extern char* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
extern UChar* onig_strdup P_((UChar* s, UChar* end));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
#endif /* REGINT_H */

4815
regparse.c Normal file

File diff suppressed because it is too large Load Diff

277
regparse.h Normal file
View File

@ -0,0 +1,277 @@
/**********************************************************************
regparse.h - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGPARSE_H
#define REGPARSE_H
#include "regint.h"
/* node type */
#define N_STRING (1<< 0)
#define N_CCLASS (1<< 1)
#define N_CTYPE (1<< 2)
#define N_ANYCHAR (1<< 3)
#define N_BACKREF (1<< 4)
#define N_QUALIFIER (1<< 5)
#define N_EFFECT (1<< 6)
#define N_ANCHOR (1<< 7)
#define N_LIST (1<< 8)
#define N_ALT (1<< 9)
#define N_CALL (1<<10)
#define IS_NODE_TYPE_SIMPLE(type) \
(((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
#define NTYPE(node) ((node)->type)
#define NCONS(node) ((node)->u.cons)
#define NSTRING(node) ((node)->u.str)
#define NCCLASS(node) ((node)->u.cclass)
#define NCTYPE(node) ((node)->u.ctype)
#define NQUALIFIER(node) ((node)->u.qualifier)
#define NANCHOR(node) ((node)->u.anchor)
#define NBACKREF(node) ((node)->u.backref)
#define NEFFECT(node) ((node)->u.effect)
#define NCALL(node) ((node)->u.call)
#define CTYPE_WORD (1<<0)
#define CTYPE_NOT_WORD (1<<1)
#define CTYPE_WHITE_SPACE (1<<2)
#define CTYPE_NOT_WHITE_SPACE (1<<3)
#define CTYPE_DIGIT (1<<4)
#define CTYPE_NOT_DIGIT (1<<5)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
#define EFFECT_MEMORY (1<<0)
#define EFFECT_OPTION (1<<1)
#define EFFECT_STOP_BACKTRACK (1<<2)
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 7
#define NSTR_RAW (1<<0) /* by backslashed number */
#define NSTR_CASE_AMBIG (1<<1)
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_CASE_AMBIG(node) \
(((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
#define CCLASS_SET_NOT(cc) (cc)->not = 1
#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
#define NQ_TARGET_IS_EMPTY_REC 3
typedef struct {
UChar* s;
UChar* end;
unsigned int flag;
int capa; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
typedef struct {
int not;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
typedef struct {
struct _Node* target;
int lower;
int upper;
int greedy;
int by_number; /* {n,m} */
int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
} QualifierNode;
/* status bits */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
#define NST_MEM_BACKREFED (1<<5)
#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
#define NST_RECURSION (1<<7)
#define NST_CALLED (1<<8)
#define NST_ADDR_FIXED (1<<9)
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0)
#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0)
#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0)
#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
typedef struct {
int state;
int type;
int regnum;
OnigOptionType option;
struct _Node* target;
AbsAddrType call_addr;
/* for multiple call reference */
OnigDistance min_len; /* min length (byte) */
OnigDistance max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_node_left() */
} EffectNode;
#define CALLNODE_REFNUM_UNDEF -1
#ifdef USE_SUBEXP_CALL
typedef struct {
int offset;
struct _Node* target;
} UnsetAddr;
typedef struct {
int num;
int alloc;
UnsetAddr* us;
} UnsetAddrList;
typedef struct {
int state;
int ref_num;
UChar* name;
UChar* name_end;
struct _Node* target; /* EffectNode : EFFECT_MEMORY */
UnsetAddrList* unset_addr_list;
} CallNode;
#endif
typedef struct {
int state;
int back_num;
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
} BackrefNode;
typedef struct {
int type;
struct _Node* target;
int char_len;
} AnchorNode;
typedef struct _Node {
int type;
union {
StrNode str;
CClassNode cclass;
QualifierNode qualifier;
EffectNode effect;
#ifdef USE_SUBEXP_CALL
CallNode call;
#endif
BackrefNode backref;
AnchorNode anchor;
struct {
struct _Node* left;
struct _Node* right;
} cons;
struct {
int type;
} ctype;
} u;
} Node;
#define NULL_NODE ((Node* )0)
#define SCANENV_MEMNODES_SIZE 8
#define SCANENV_MEM_NODES(senv) \
(IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
typedef struct {
OnigOptionType option;
OnigEncoding enc;
OnigSyntaxType* syntax;
BitStatusType capture_history;
BitStatusType bt_mem_start;
BitStatusType bt_mem_end;
BitStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
UChar* error;
UChar* error_end;
regex_t* reg; /* for reg->names only */
int num_call;
#ifdef USE_SUBEXP_CALL
UnsetAddrList* unset_addr_list;
#endif
int num_mem;
#ifdef USE_NAMED_GROUP
int num_named;
#endif
int mem_alloc;
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
Node** mem_nodes_dynamic;
} ScanEnv;
#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_effect P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern int onig_free_node_list();
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
extern int onig_print_names(FILE*, regex_t*);
#endif
#endif
#endif /* REGPARSE_H */

174
sjis.c Normal file
View File

@ -0,0 +1,174 @@
/**********************************************************************
sjis.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regenc.h"
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
#define SJIS_ISMB_FIRST(byte) (OnigEncodingSJIS.len_table[byte] > 1)
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
static OnigCodePoint
sjis_mbc_to_code(UChar* p, UChar* end)
{
int c, i, len;
OnigCodePoint n;
c = *p++;
len = enc_len(ONIG_ENCODING_SJIS, c);
n = c;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
static int
sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
*p++ = (UChar )(code & 0xff);
#if 0
if (enc_len(ONIG_ENCODING_SJIS, buf[0]) != (p - buf))
return REGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
static int
sjis_mbc_to_lower(UChar* p, UChar* lower)
{
int len;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
return 1;
}
else {
len = enc_len(ONIG_ENCODING_SJIS, *p);
if (lower != p) {
/* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
return len; /* return byte length of converted char to lower */
}
}
static int
sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = onigenc_mb2_code_to_mbc_first(code);
return (enc_len(ONIG_ENCODING_SJIS, first) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
}
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
static UChar*
sjis_left_adjust_char_head(UChar* start, UChar* s)
{
UChar *p;
int len;
if (s <= start) return s;
p = s;
if (SJIS_ISMB_TRAIL(*p)) {
while (p > start) {
if (! SJIS_ISMB_FIRST(*--p)) {
p++;
break;
}
}
}
len = enc_len(ONIG_ENCODING_SJIS, *p);
if (p + len > s) return p;
p += len;
return p + ((s - p) & ~1);
}
static int
sjis_is_allowed_reverse_match(UChar* s, UChar* end)
{
UChar c = *s;
return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
}
OnigEncodingType OnigEncodingSJIS = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
},
"Shift_JIS", /* name */
2, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
FALSE, /* is continuous sb mb codepoint */
sjis_mbc_to_code,
onigenc_mb2_code_to_mbclen,
sjis_code_to_mbc,
sjis_mbc_to_lower,
onigenc_mbn_mbc_is_case_ambig,
sjis_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
sjis_left_adjust_char_head,
sjis_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
};

566
utf8.c Normal file
View File

@ -0,0 +1,566 @@
/**********************************************************************
utf8.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regenc.h"
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncUnicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
static unsigned short EncUnicode_ISO_8859_1_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x1050, 0x1050, 0x1050, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x18d0,
0x1050, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050,
0x1050, 0x1050, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1050,
0x1050, 0x1050, 0x1850, 0x1850, 0x1050, 0x1871, 0x1050, 0x10d0,
0x1050, 0x1850, 0x1871, 0x10d0, 0x1850, 0x1850, 0x1850, 0x10d0,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1050,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1050,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871
};
static OnigCodePoint
utf8_mbc_to_code(UChar* p, UChar* end)
{
int c, len;
OnigCodePoint n;
c = *p++;
len = enc_len(ONIG_ENCODING_UTF8, c);
if (len > 1) {
len--;
n = c & ((1 << (6 - len)) - 1);
while (len--) {
c = *p++;
n = (n << 6) | (c & ((1 << 6) - 1));
}
return n;
}
else
return (OnigCodePoint )c;
}
static int
utf8_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xffffff80) == 0) return 1;
else if ((code & 0xfffff800) == 0) {
if (code <= 0xff && code >= 0xfe)
return 1;
return 2;
}
else if ((code & 0xffff0000) == 0) return 3;
else if ((code & 0xffe00000) == 0) return 4;
else if ((code & 0xfc000000) == 0) return 5;
else if ((code & 0x80000000) == 0) return 6;
else
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
}
#if 0
static int
utf8_code_to_mbc_first(OnigCodePoint code)
{
if ((code & 0xffffff80) == 0)
return code;
else {
if ((code & 0xfffff800) == 0)
return ((code>>6)& 0x1f) | 0xc0;
else if ((code & 0xffff0000) == 0)
return ((code>>12) & 0x0f) | 0xe0;
else if ((code & 0xffe00000) == 0)
return ((code>>18) & 0x07) | 0xf0;
else if ((code & 0xfc000000) == 0)
return ((code>>24) & 0x03) | 0xf8;
else if ((code & 0x80000000) == 0)
return ((code>>30) & 0x01) | 0xfc;
else {
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
}
}
}
#endif
static int
utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
{
#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
if ((code & 0xffffff80) == 0) {
*buf = (UChar )code;
return 1;
}
else {
UChar *p = buf;
if ((code & 0xfffff800) == 0) {
*p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
}
else if ((code & 0xffff0000) == 0) {
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xffe00000) == 0) {
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xfc000000) == 0) {
*p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0x80000000) == 0) {
*p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
*p++ = UTF8_TRAILS(code, 24);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else {
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
}
*p++ = UTF8_TRAIL0(code);
return p - buf;
}
}
static int
utf8_mbc_to_lower(UChar* p, UChar* lower)
{
int len;
/* !!! U+0080 - U+00ff is treated by fold match. !!! */
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
return 1; /* return byte length of converted char to lower */
}
else {
len = enc_len(ONIG_ENCODING_UTF8, *p);
if (lower != p) {
/* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
return len; /* return byte length of converted char to lower */
}
}
static int
utf8_mbc_is_case_ambig(UChar* p)
{
/* !!! U+0080 - U+00ff ( 0x80[0xc2,0x80] - 0xff[0xc3,0xbf] )
is treated by fold match. !!! */
if (ONIGENC_IS_MBC_ASCII(p))
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
return FALSE;
}
static int
utf8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256) {
return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
}
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
return TRUE;
}
return FALSE;
}
static int
utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
{
#define CR_SET(sbl,mbl) do { \
*nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
*nmb = sizeof(mbl) / sizeof(OnigCodePointRange); \
*sbr = sbl; \
*mbr = mbl; \
} while (0)
#define CR_SB_SET(sbl) do { \
*nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
*nmb = 0; \
*sbr = sbl; \
} while (0)
static OnigCodePointRange SBAlpha[] = {
{ 0x41, 0x5a },
{ 0x61, 0x7a }
};
static OnigCodePointRange MBAlpha[] = {
{ 0xaa, 0xaa },
{ 0xb5, 0xb5 },
{ 0xba, 0xba },
{ 0xc0, 0xd6 },
{ 0xd8, 0xf6 },
{ 0xf8, 0x220 }
};
static OnigCodePointRange SBBlank[] = {
{ 0x09, 0x09 },
{ 0x20, 0x20 }
};
static OnigCodePointRange MBBlank[] = {
{ 0xa0, 0xa0 }
};
static OnigCodePointRange SBCntrl[] = {
{ 0x00, 0x1f },
{ 0x7f, 0x7f }
};
static OnigCodePointRange MBCntrl[] = {
{ 0x80, 0x9f }
};
static OnigCodePointRange SBDigit[] = {
{ 0x30, 0x39 }
};
static OnigCodePointRange SBGraph[] = {
{ 0x21, 0x7e }
};
static OnigCodePointRange MBGraph[] = {
{ 0xa1, 0x220 }
};
static OnigCodePointRange SBLower[] = {
{ 0x61, 0x7a }
};
static OnigCodePointRange MBLower[] = {
{ 0xaa, 0xaa },
{ 0xb5, 0xb5 },
{ 0xba, 0xba },
{ 0xdf, 0xf6 },
{ 0xf8, 0xff }
};
static OnigCodePointRange SBPrint[] = {
{ 0x20, 0x7e }
};
static OnigCodePointRange MBPrint[] = {
{ 0xa0, 0x220 }
};
static OnigCodePointRange SBPunct[] = {
{ 0x21, 0x23 },
{ 0x25, 0x2a },
{ 0x2c, 0x2f },
{ 0x3a, 0x3b },
{ 0x3f, 0x40 },
{ 0x5b, 0x5d },
{ 0x5f, 0x5f },
{ 0x7b, 0x7b },
{ 0x7d, 0x7d }
};
static OnigCodePointRange MBPunct[] = {
{ 0xa1, 0xa1 },
{ 0xab, 0xab },
{ 0xad, 0xad },
{ 0xb7, 0xb7 },
{ 0xbb, 0xbb },
{ 0xbf, 0xbf }
};
static OnigCodePointRange SBSpace[] = {
{ 0x09, 0x0d },
{ 0x20, 0x20 }
};
static OnigCodePointRange MBSpace[] = {
{ 0xa0, 0xa0 }
};
static OnigCodePointRange SBUpper[] = {
{ 0x41, 0x5a }
};
static OnigCodePointRange MBUpper[] = {
{ 0xc0, 0xd6 },
{ 0xd8, 0xde }
};
static OnigCodePointRange SBXDigit[] = {
{ 0x30, 0x39 },
{ 0x41, 0x46 },
{ 0x61, 0x66 }
};
static OnigCodePointRange SBWord[] = {
{ 0x30, 0x39 },
{ 0x41, 0x5a },
{ 0x5f, 0x5f },
{ 0x61, 0x7a }
};
static OnigCodePointRange MBWord[] = {
{ 0xaa, 0xaa },
{ 0xb2, 0xb3 },
{ 0xb5, 0xb5 },
{ 0xb9, 0xba },
{ 0xbc, 0xbe },
{ 0xc0, 0xd6 },
{ 0xd8, 0xf6 },
#if 0
{ 0xf8, 0x220 }
#else
{ 0xf8, 0x7fffffff } /* all multibyte code as word */
#endif
};
static OnigCodePointRange SBAscii[] = {
{ 0x00, 0x7f }
};
static OnigCodePointRange SBAlnum[] = {
{ 0x30, 0x39 },
{ 0x41, 0x5a },
{ 0x61, 0x7a }
};
static OnigCodePointRange MBAlnum[] = {
{ 0xaa, 0xaa },
{ 0xb5, 0xb5 },
{ 0xba, 0xba },
{ 0xc0, 0xd6 },
{ 0xd8, 0xf6 },
{ 0xf8, 0x220 }
};
switch (ctype) {
case ONIGENC_CTYPE_ALPHA:
CR_SET(SBAlpha, MBAlpha);
break;
case ONIGENC_CTYPE_BLANK:
CR_SET(SBBlank, MBBlank);
break;
case ONIGENC_CTYPE_CNTRL:
CR_SET(SBCntrl, MBCntrl);
break;
case ONIGENC_CTYPE_DIGIT:
CR_SB_SET(SBDigit);
break;
case ONIGENC_CTYPE_GRAPH:
CR_SET(SBGraph, MBGraph);
break;
case ONIGENC_CTYPE_LOWER:
CR_SET(SBLower, MBLower);
break;
case ONIGENC_CTYPE_PRINT:
CR_SET(SBPrint, MBPrint);
break;
case ONIGENC_CTYPE_PUNCT:
CR_SET(SBPunct, MBPunct);
break;
case ONIGENC_CTYPE_SPACE:
CR_SET(SBSpace, MBSpace);
break;
case ONIGENC_CTYPE_UPPER:
CR_SET(SBUpper, MBUpper);
break;
case ONIGENC_CTYPE_XDIGIT:
CR_SB_SET(SBXDigit);
break;
case ONIGENC_CTYPE_WORD:
CR_SET(SBWord, MBWord);
break;
case ONIGENC_CTYPE_ASCII:
CR_SB_SET(SBAscii);
break;
case ONIGENC_CTYPE_ALNUM:
CR_SET(SBAlnum, MBAlnum);
break;
default:
return ONIGERR_TYPE_BUG;
break;
}
return 0;
}
static int
utf8_get_all_fold_match_code(OnigCodePoint** codes)
{
static OnigCodePoint list[] = {
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
};
*codes = list;
return sizeof(list) / sizeof(OnigCodePoint);
}
static int
utf8_get_fold_match_info(UChar* p, UChar* end, OnigEncFoldMatchInfo** info)
{
static OnigEncFoldMatchInfo xc[] = {
{ 2, { 2, 2 }, { "\303\200", "\303\240" } }, /* CodePoint 0xc0 */
{ 2, { 2, 2 }, { "\303\201", "\303\241" } },
{ 2, { 2, 2 }, { "\303\202", "\303\242" } },
{ 2, { 2, 2 }, { "\303\203", "\303\243" } },
{ 2, { 2, 2 }, { "\303\204", "\303\244" } },
{ 2, { 2, 2 }, { "\303\205", "\303\245" } },
{ 2, { 2, 2 }, { "\303\206", "\303\246" } },
{ 2, { 2, 2 }, { "\303\207", "\303\247" } },
{ 2, { 2, 2 }, { "\303\210", "\303\250" } },
{ 2, { 2, 2 }, { "\303\211", "\303\251" } },
{ 2, { 2, 2 }, { "\303\212", "\303\252" } },
{ 2, { 2, 2 }, { "\303\213", "\303\253" } },
{ 2, { 2, 2 }, { "\303\214", "\303\254" } },
{ 2, { 2, 2 }, { "\303\215", "\303\255" } },
{ 2, { 2, 2 }, { "\303\216", "\303\256" } },
{ 2, { 2, 2 }, { "\303\217", "\303\257" } },
{ 2, { 2, 2 }, { "\303\220", "\303\260" } }, /* CodePoint 0xd0 */
{ 2, { 2, 2 }, { "\303\221", "\303\261" } },
{ 2, { 2, 2 }, { "\303\222", "\303\262" } },
{ 2, { 2, 2 }, { "\303\223", "\303\263" } },
{ 2, { 2, 2 }, { "\303\224", "\303\264" } },
{ 2, { 2, 2 }, { "\303\225", "\303\265" } },
{ 2, { 2, 2 }, { "\303\226", "\303\266" } },
{ 0, { 0 }, { "" } },
{ 2, { 2, 2 }, { "\303\230", "\303\270" } },
{ 2, { 2, 2 }, { "\303\231", "\303\271" } },
{ 2, { 2, 2 }, { "\303\232", "\303\272" } },
{ 2, { 2, 2 }, { "\303\233", "\303\273" } },
{ 2, { 2, 2 }, { "\303\234", "\303\274" } },
{ 2, { 2, 2 }, { "\303\235", "\303\275" } },
{ 2, { 2, 2 }, { "\303\236", "\303\276" } },
{ 3, { 2, 2, 2 }, { "\303\237", "ss", "SS" }} /* ess-tsett(U+00DF) */
};
if (p + 1 >= end) return -1;
if (*p < 0x80) {
if ((*p == 'S' && *(p+1) == 'S') ||
(*p == 's' && *(p+1) == 's')) {
*info = &(xc[0xdf - 0xc0]);
return 2;
}
}
else if (*p == 195) { /* 195 == '\303' */
int c = *(p+1);
if (c >= 128) {
if (c <= 159) { /* upper */
if (c == 151) return -1; /* 0xd7 */
*info = &(xc[c - 128]);
return 2;
}
else { /* lower */
if (c == 183) return -1; /* 0xf7 */
*info = &(xc[c - 160]);
return 2;
}
}
}
return -1; /* is not a fold string. */
}
static UChar*
utf8_left_adjust_char_head(UChar* start, UChar* s)
{
UChar *p;
if (s <= start) return s;
p = s;
while (!utf8_islead(*p) && p > start) p--;
return p;
}
static int
utf8_is_allowed_reverse_match(UChar* s, UChar* end)
{
return TRUE;
}
OnigEncodingType OnigEncodingUTF8 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
},
"UTF-8", /* name */
6, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_FULL, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
utf8_mbc_to_code,
utf8_code_to_mbclen,
utf8_code_to_mbc,
utf8_mbc_to_lower,
utf8_mbc_is_case_ambig,
utf8_code_is_ctype,
utf8_get_ctype_code_range,
utf8_left_adjust_char_head,
utf8_is_allowed_reverse_match,
utf8_get_all_fold_match_code,
utf8_get_fold_match_info
};