mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
33a06e4aca
commit
5770336f8b
15 changed files with 17522 additions and 0 deletions
54
ascii.c
Normal file
54
ascii.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
/**********************************************************************
|
||||
|
||||
ascii.c - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#include "regenc.h"
|
||||
|
||||
static int
|
||||
ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingASCII = {
|
||||
{
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
},
|
||||
"US-ASCII", /* name */
|
||||
1, /* max byte length */
|
||||
FALSE, /* is_fold_match */
|
||||
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
|
||||
TRUE, /* is continuous sb mb codepoint */
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_to_lower,
|
||||
onigenc_ascii_mbc_is_case_ambig,
|
||||
ascii_code_is_ctype,
|
||||
onigenc_nothing_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_single_byte_is_allowed_reverse_match,
|
||||
onigenc_nothing_get_all_fold_match_code,
|
||||
onigenc_nothing_get_fold_match_info
|
||||
};
|
191
euc_jp.c
Normal file
191
euc_jp.c
Normal file
|
@ -0,0 +1,191 @@
|
|||
/**********************************************************************
|
||||
|
||||
euc_jp.c - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#include "regenc.h"
|
||||
|
||||
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
|
||||
|
||||
static OnigCodePoint
|
||||
eucjp_mbc_to_code(UChar* p, UChar* end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
c = *p++;
|
||||
len = enc_len(ONIG_ENCODING_EUC_JP, c);
|
||||
n = c;
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = *p++;
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static int
|
||||
eucjp_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if ((code & 0xff0000) != 0) return 3;
|
||||
else if ((code & 0xff00) != 0) return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
eucjp_code_to_mbc_first(OnigCodePoint code)
|
||||
{
|
||||
int first;
|
||||
|
||||
if ((code & 0xff0000) != 0) {
|
||||
first = (code >> 16) & 0xff;
|
||||
/*
|
||||
if (enc_len(ONIG_ENCODING_EUC_JP, first) != 3)
|
||||
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
*/
|
||||
}
|
||||
else if ((code & 0xff00) != 0) {
|
||||
first = (code >> 8) & 0xff;
|
||||
/*
|
||||
if (enc_len(ONIG_ENCODING_EUC_JP, first) != 2)
|
||||
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
*/
|
||||
}
|
||||
else {
|
||||
/*
|
||||
if (enc_len(ONIG_ENCODING_EUC_JP, code) != 1)
|
||||
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
*/
|
||||
return (int )code;
|
||||
}
|
||||
return first;
|
||||
}
|
||||
|
||||
static int
|
||||
eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
|
||||
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
|
||||
#if 1
|
||||
if (enc_len(ONIG_ENCODING_EUC_JP, buf[0]) != (p - buf))
|
||||
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
#endif
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
static int
|
||||
eucjp_mbc_to_lower(UChar* p, UChar* lower)
|
||||
{
|
||||
int len;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
len = enc_len(ONIG_ENCODING_EUC_JP, *p);
|
||||
if (lower != p) {
|
||||
/* memcpy(lower, p, len); */
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
}
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
int first = eucjp_code_to_mbc_first(code);
|
||||
return (enc_len(ONIG_ENCODING_EUC_JP, first) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
|
||||
ctype &= ~ONIGENC_CTYPE_WORD;
|
||||
if (ctype == 0) return FALSE;
|
||||
}
|
||||
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
eucjp_left_adjust_char_head(UChar* start, UChar* s)
|
||||
{
|
||||
/* Assumed in this encoding,
|
||||
mb-trail bytes don't mix with single bytes.
|
||||
*/
|
||||
UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
while (!eucjp_islead(*p) && p > start) p--;
|
||||
len = enc_len(ONIG_ENCODING_EUC_JP, *p);
|
||||
if (p + len > s) return p;
|
||||
p += len;
|
||||
return p + ((s - p) & ~1);
|
||||
}
|
||||
|
||||
static int
|
||||
eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
|
||||
{
|
||||
UChar c = *s;
|
||||
if (c <= 0x7e || c == 0x8e || c == 0x8f)
|
||||
return TRUE;
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingEUC_JP = {
|
||||
{
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
},
|
||||
"EUC-JP", /* name */
|
||||
3, /* max byte length */
|
||||
FALSE, /* is_fold_match */
|
||||
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
|
||||
FALSE, /* is continuous sb mb codepoint */
|
||||
eucjp_mbc_to_code,
|
||||
eucjp_code_to_mbclen,
|
||||
eucjp_code_to_mbc,
|
||||
eucjp_mbc_to_lower,
|
||||
onigenc_mbn_mbc_is_case_ambig,
|
||||
eucjp_code_is_ctype,
|
||||
onigenc_nothing_get_ctype_code_range,
|
||||
eucjp_left_adjust_char_head,
|
||||
eucjp_is_allowed_reverse_match,
|
||||
onigenc_nothing_get_all_fold_match_code,
|
||||
onigenc_nothing_get_fold_match_info
|
||||
};
|
77
oniggnu.h
Normal file
77
oniggnu.h
Normal file
|
@ -0,0 +1,77 @@
|
|||
/**********************************************************************
|
||||
|
||||
oniggnu.h - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#ifndef ONIGGNU_H
|
||||
#define ONIGGNU_H
|
||||
|
||||
#include "oniguruma.h"
|
||||
|
||||
#define MBCTYPE_ASCII 0
|
||||
#define MBCTYPE_EUC 1
|
||||
#define MBCTYPE_SJIS 2
|
||||
#define MBCTYPE_UTF8 3
|
||||
|
||||
/* GNU regex options */
|
||||
#ifndef RE_NREGS
|
||||
#define RE_NREGS ONIG_NREGION
|
||||
#endif
|
||||
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
|
||||
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
|
||||
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
|
||||
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
|
||||
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
|
||||
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
|
||||
|
||||
#ifdef RUBY_PLATFORM
|
||||
#define re_mbcinit ruby_re_mbcinit
|
||||
#define re_compile_pattern ruby_re_compile_pattern
|
||||
#define re_recompile_pattern ruby_re_recompile_pattern
|
||||
#define re_free_pattern ruby_re_free_pattern
|
||||
#define re_adjust_startpos ruby_re_adjust_startpos
|
||||
#define re_search ruby_re_search
|
||||
#define re_match ruby_re_match
|
||||
#define re_set_casetable ruby_re_set_casetable
|
||||
#define re_copy_registers ruby_re_copy_registers
|
||||
#define re_free_registers ruby_re_free_registers
|
||||
#define register_info_type ruby_register_info_type
|
||||
#define re_error_code_to_str ruby_error_code_to_str
|
||||
|
||||
#define ruby_error_code_to_str onig_error_code_to_str
|
||||
#define ruby_re_copy_registers onig_region_copy
|
||||
#else
|
||||
#define re_error_code_to_str onig_error_code_to_str
|
||||
#define re_copy_registers onig_region_copy
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_RUBY_M17N
|
||||
ONIG_EXTERN
|
||||
void re_mbcinit P_((OnigEncoding));
|
||||
#else
|
||||
ONIG_EXTERN
|
||||
void re_mbcinit P_((int));
|
||||
#endif
|
||||
|
||||
ONIG_EXTERN
|
||||
int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
|
||||
ONIG_EXTERN
|
||||
int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
|
||||
ONIG_EXTERN
|
||||
void re_free_pattern P_((struct re_pattern_buffer*));
|
||||
ONIG_EXTERN
|
||||
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
|
||||
ONIG_EXTERN
|
||||
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
|
||||
ONIG_EXTERN
|
||||
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
|
||||
ONIG_EXTERN
|
||||
void re_set_casetable P_((const char*));
|
||||
ONIG_EXTERN
|
||||
void re_free_registers P_((struct re_registers*));
|
||||
ONIG_EXTERN
|
||||
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
|
||||
|
||||
#endif /* ONIGGNU_H */
|
715
oniguruma.h
Normal file
715
oniguruma.h
Normal file
|
@ -0,0 +1,715 @@
|
|||
/**********************************************************************
|
||||
|
||||
oniguruma.h - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#ifndef ONIGURUMA_H
|
||||
#define ONIGURUMA_H
|
||||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 2
|
||||
#define ONIGURUMA_VERSION_MINOR 2
|
||||
#define ONIGURUMA_VERSION_TEENY 4
|
||||
|
||||
#ifndef P_
|
||||
#if defined(__STDC__) || defined(_WIN32)
|
||||
# define P_(args) args
|
||||
#else
|
||||
# define P_(args) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef PV_
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
# define PV_(args) args
|
||||
#else
|
||||
# define PV_(args) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
#if defined(EXPORT) || defined(RUBY_EXPORT)
|
||||
#define ONIG_EXTERN extern __declspec(dllexport)
|
||||
#else
|
||||
#define ONIG_EXTERN extern __declspec(dllimport)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#define ONIG_EXTERN extern
|
||||
#endif
|
||||
|
||||
/* PART: character encoding */
|
||||
|
||||
typedef unsigned char UChar;
|
||||
typedef unsigned long OnigCodePoint;
|
||||
typedef unsigned int OnigDistance;
|
||||
|
||||
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint from;
|
||||
OnigCodePoint to;
|
||||
} OnigCodePointRange;
|
||||
|
||||
#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16
|
||||
typedef struct {
|
||||
int target_num;
|
||||
int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
|
||||
UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
|
||||
} OnigEncFoldMatchInfo;
|
||||
|
||||
|
||||
#if defined(RUBY_PLATFORM) && defined(M17N_H)
|
||||
|
||||
#define ONIG_RUBY_M17N
|
||||
typedef m17n_encoding* OnigEncoding;
|
||||
|
||||
#else
|
||||
|
||||
typedef struct {
|
||||
const char len_table[256];
|
||||
const char* name;
|
||||
int max_enc_len;
|
||||
int is_fold_match;
|
||||
int ctype_support_level; /* sb-only/full */
|
||||
int is_continuous_sb_mb; /* code point is continuous from sb to mb */
|
||||
OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
|
||||
int (*code_to_mbclen)(OnigCodePoint code);
|
||||
int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
|
||||
int (*mbc_to_lower)(UChar* p, UChar* lower);
|
||||
int (*mbc_is_case_ambig)(UChar* p);
|
||||
int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
|
||||
int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
|
||||
UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
|
||||
int (*is_allowed_reverse_match)(UChar* p, UChar* e);
|
||||
int (*get_all_fold_match_code)(OnigCodePoint** codes);
|
||||
int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
|
||||
} OnigEncodingType;
|
||||
|
||||
typedef OnigEncodingType* OnigEncoding;
|
||||
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
|
||||
|
||||
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
|
||||
#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
|
||||
#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
|
||||
#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
|
||||
#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
|
||||
#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
|
||||
#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
|
||||
#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
|
||||
#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
|
||||
#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
|
||||
#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
|
||||
#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
|
||||
#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
|
||||
#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
|
||||
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
|
||||
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
|
||||
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
|
||||
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
|
||||
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
|
||||
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
|
||||
#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
|
||||
#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
|
||||
#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
|
||||
#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
|
||||
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
|
||||
|
||||
#endif /* else RUBY && M17N */
|
||||
|
||||
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
|
||||
|
||||
|
||||
/* work size */
|
||||
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
|
||||
#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
|
||||
|
||||
/* character types */
|
||||
#define ONIGENC_CTYPE_ALPHA (1<< 0)
|
||||
#define ONIGENC_CTYPE_BLANK (1<< 1)
|
||||
#define ONIGENC_CTYPE_CNTRL (1<< 2)
|
||||
#define ONIGENC_CTYPE_DIGIT (1<< 3)
|
||||
#define ONIGENC_CTYPE_GRAPH (1<< 4)
|
||||
#define ONIGENC_CTYPE_LOWER (1<< 5)
|
||||
#define ONIGENC_CTYPE_PRINT (1<< 6)
|
||||
#define ONIGENC_CTYPE_PUNCT (1<< 7)
|
||||
#define ONIGENC_CTYPE_SPACE (1<< 8)
|
||||
#define ONIGENC_CTYPE_UPPER (1<< 9)
|
||||
#define ONIGENC_CTYPE_XDIGIT (1<<10)
|
||||
#define ONIGENC_CTYPE_WORD (1<<11)
|
||||
#define ONIGENC_CTYPE_ASCII (1<<12)
|
||||
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
|
||||
|
||||
/* ctype support level */
|
||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0
|
||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1
|
||||
|
||||
|
||||
#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
|
||||
|
||||
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
|
||||
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
|
||||
#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
|
||||
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
|
||||
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
|
||||
#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
|
||||
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
|
||||
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
|
||||
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
|
||||
|
||||
|
||||
#ifdef ONIG_RUBY_M17N
|
||||
|
||||
#include <ctype.h> /* for isblank(), isgraph() */
|
||||
|
||||
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf)
|
||||
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p)
|
||||
|
||||
#define ONIGENC_IS_FOLD_MATCH(enc) FALSE
|
||||
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE
|
||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB
|
||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
|
||||
onigenc_is_allowed_reverse_match(enc, s, end)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
|
||||
onigenc_get_left_adjust_char_head(enc, start, s)
|
||||
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0
|
||||
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
|
||||
ONIG_NO_SUPPORT_CONFIG
|
||||
#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b)
|
||||
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
|
||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) \
|
||||
(ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
|
||||
: ONIG_INFINITE_DISTANCE)
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
|
||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
|
||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
|
||||
|
||||
#if 0
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */
|
||||
#endif
|
||||
|
||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
|
||||
onigenc_is_code_ctype(enc,code,ctype)
|
||||
|
||||
#ifdef isblank
|
||||
# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code)
|
||||
#else
|
||||
# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t')
|
||||
#endif
|
||||
#ifdef isgraph
|
||||
# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code)
|
||||
#else
|
||||
# define ONIGENC_IS_CODE_GRAPH(enc,code) \
|
||||
(isprint((int )code) && !isspace((int )code))
|
||||
#endif
|
||||
|
||||
#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code)
|
||||
#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code)
|
||||
#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code)
|
||||
#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code)
|
||||
#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code)
|
||||
#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code)
|
||||
#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code)
|
||||
#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code)
|
||||
#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code)
|
||||
#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code)
|
||||
#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code)
|
||||
|
||||
ONIG_EXTERN
|
||||
int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
|
||||
ONIG_EXTERN
|
||||
int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN
|
||||
int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
|
||||
ONIG_EXTERN
|
||||
int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
|
||||
ONIG_EXTERN
|
||||
int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
|
||||
|
||||
#else /* ONIG_RUBY_M17N */
|
||||
|
||||
#define ONIGENC_NAME(enc) ((enc)->name)
|
||||
|
||||
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf)
|
||||
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p)
|
||||
|
||||
#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match)
|
||||
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb)
|
||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level)
|
||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
|
||||
(enc)->is_allowed_reverse_match(s,end)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
|
||||
(enc)->left_adjust_char_head(start, s)
|
||||
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
|
||||
(enc)->get_all_fold_match_code(codes)
|
||||
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
|
||||
(enc)->get_fold_match_info(p,end,info)
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,n) \
|
||||
onigenc_step_back((enc),(start),(s),(n))
|
||||
|
||||
#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)])
|
||||
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
|
||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e))
|
||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
|
||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
|
||||
|
||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype)
|
||||
|
||||
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
|
||||
#define ONIGENC_IS_CODE_PRINT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
|
||||
#define ONIGENC_IS_CODE_ALNUM(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
|
||||
#define ONIGENC_IS_CODE_ALPHA(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
|
||||
#define ONIGENC_IS_CODE_LOWER(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
|
||||
#define ONIGENC_IS_CODE_UPPER(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
|
||||
#define ONIGENC_IS_CODE_CNTRL(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
|
||||
#define ONIGENC_IS_CODE_PUNCT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
|
||||
#define ONIGENC_IS_CODE_SPACE(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
|
||||
#define ONIGENC_IS_CODE_BLANK(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
|
||||
#define ONIGENC_IS_CODE_DIGIT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
|
||||
#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
|
||||
#define ONIGENC_IS_CODE_WORD(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
|
||||
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
|
||||
(enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
|
||||
|
||||
ONIG_EXTERN
|
||||
UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
|
||||
|
||||
#endif /* is not ONIG_RUBY_M17N */
|
||||
|
||||
|
||||
/* encoding API */
|
||||
ONIG_EXTERN
|
||||
int onigenc_init P_(());
|
||||
ONIG_EXTERN
|
||||
int onigenc_set_default_encoding P_((OnigEncoding enc));
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onigenc_get_default_encoding P_(());
|
||||
ONIG_EXTERN
|
||||
void onigenc_set_default_caseconv_table P_((UChar* table));
|
||||
ONIG_EXTERN
|
||||
UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev));
|
||||
ONIG_EXTERN
|
||||
UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
|
||||
ONIG_EXTERN
|
||||
UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
|
||||
ONIG_EXTERN
|
||||
UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
|
||||
|
||||
|
||||
|
||||
/* PART: regular expression */
|
||||
|
||||
/* config parameters */
|
||||
#define ONIG_NREGION 10
|
||||
#define ONIG_MAX_BACKREF_NUM 1000
|
||||
#define ONIG_MAX_REPEAT_NUM 100000
|
||||
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000
|
||||
/* constants */
|
||||
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
|
||||
|
||||
#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
|
||||
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
|
||||
#undef ismbchar
|
||||
#define ismbchar(c) (mbclen((c)) != 1)
|
||||
#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
|
||||
#endif
|
||||
|
||||
typedef unsigned int OnigOptionType;
|
||||
|
||||
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
|
||||
|
||||
/* options */
|
||||
#define ONIG_OPTION_NONE 0
|
||||
#define ONIG_OPTION_IGNORECASE 1L
|
||||
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
|
||||
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
|
||||
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
|
||||
#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
|
||||
#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
|
||||
#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
|
||||
#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
|
||||
#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
|
||||
/* options (search time) */
|
||||
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
|
||||
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
|
||||
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
|
||||
|
||||
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
|
||||
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
|
||||
#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
|
||||
|
||||
/* syntax */
|
||||
typedef struct {
|
||||
unsigned int op;
|
||||
unsigned int op2;
|
||||
unsigned int behavior;
|
||||
OnigOptionType options; /* default option */
|
||||
} OnigSyntaxType;
|
||||
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
|
||||
|
||||
/* predefined syntaxes (see regparse.c) */
|
||||
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
|
||||
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
|
||||
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
|
||||
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
|
||||
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
|
||||
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
|
||||
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
|
||||
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
|
||||
|
||||
/* default syntax */
|
||||
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
||||
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
|
||||
|
||||
/* syntax (operators) */
|
||||
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0)
|
||||
#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */
|
||||
#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */
|
||||
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3)
|
||||
#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */
|
||||
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5)
|
||||
#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */
|
||||
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7)
|
||||
#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */
|
||||
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */
|
||||
#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */
|
||||
#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */
|
||||
#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */
|
||||
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */
|
||||
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */
|
||||
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */
|
||||
#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */
|
||||
#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */
|
||||
#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */
|
||||
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */
|
||||
#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */
|
||||
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */
|
||||
#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */
|
||||
#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */
|
||||
#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */
|
||||
#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */
|
||||
#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */
|
||||
#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */
|
||||
#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */
|
||||
#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */
|
||||
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */
|
||||
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */
|
||||
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */
|
||||
#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */
|
||||
#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */
|
||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */
|
||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */
|
||||
#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */
|
||||
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?<name>...) */
|
||||
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k<name> */
|
||||
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g<name>, \g<n> */
|
||||
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@<x>..) */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */
|
||||
#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */
|
||||
#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */
|
||||
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */
|
||||
#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
|
||||
|
||||
/* syntax (behavior) */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */
|
||||
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */
|
||||
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */
|
||||
#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */
|
||||
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */
|
||||
#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/
|
||||
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
|
||||
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */
|
||||
|
||||
/* syntax (behavior) in char class [...] */
|
||||
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
|
||||
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */
|
||||
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22)
|
||||
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */
|
||||
/* syntax (behavior) warning */
|
||||
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */
|
||||
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */
|
||||
|
||||
/* meta character specifiers (onig_set_meta_char()) */
|
||||
#define ONIG_META_CHAR_ESCAPE 0
|
||||
#define ONIG_META_CHAR_ANYCHAR 1
|
||||
#define ONIG_META_CHAR_ANYTIME 2
|
||||
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
|
||||
#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
|
||||
#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
|
||||
|
||||
#define ONIG_INEFFECTIVE_META_CHAR 0
|
||||
|
||||
/* error codes */
|
||||
#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
|
||||
/* normal return */
|
||||
#define ONIG_NORMAL 0
|
||||
#define ONIG_MISMATCH -1
|
||||
#define ONIG_NO_SUPPORT_CONFIG -2
|
||||
/* internal error */
|
||||
#define ONIGERR_PARSER_BUG -11
|
||||
#define ONIGERR_STACK_BUG -12
|
||||
#define ONIGERR_UNDEFINED_BYTECODE -13
|
||||
#define ONIGERR_UNEXPECTED_BYTECODE -14
|
||||
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
|
||||
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
|
||||
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
|
||||
/* general error */
|
||||
#define ONIGERR_INVALID_ARGUMENT -30
|
||||
/* syntax error */
|
||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
|
||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
|
||||
#define ONIGERR_EMPTY_CHAR_CLASS -102
|
||||
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
|
||||
#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
|
||||
#define ONIGERR_END_PATTERN_AT_META -105
|
||||
#define ONIGERR_END_PATTERN_AT_CONTROL -106
|
||||
#define ONIGERR_META_CODE_SYNTAX -108
|
||||
#define ONIGERR_CONTROL_CODE_SYNTAX -109
|
||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
|
||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
|
||||
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
|
||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
|
||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
|
||||
#define ONIGERR_NESTED_REPEAT_OPERATOR -115
|
||||
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
|
||||
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
|
||||
#define ONIGERR_END_PATTERN_IN_GROUP -118
|
||||
#define ONIGERR_UNDEFINED_GROUP_OPTION -119
|
||||
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
|
||||
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
|
||||
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
|
||||
/* values error (syntax error) */
|
||||
#define ONIGERR_TOO_BIG_NUMBER -200
|
||||
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
|
||||
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
|
||||
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
|
||||
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
|
||||
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
|
||||
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
|
||||
#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
|
||||
#define ONIGERR_INVALID_BACKREF -208
|
||||
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
|
||||
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
|
||||
#define ONIGERR_EMPTY_GROUP_NAME -214
|
||||
#define ONIGERR_INVALID_GROUP_NAME -215
|
||||
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
|
||||
#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
|
||||
#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
|
||||
#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
|
||||
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
|
||||
#define ONIGERR_NEVER_ENDING_RECURSION -221
|
||||
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
|
||||
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
|
||||
/* errors related to thread */
|
||||
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
|
||||
|
||||
|
||||
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
|
||||
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
|
||||
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
|
||||
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
|
||||
|
||||
/* match result region type */
|
||||
struct re_registers {
|
||||
int allocated;
|
||||
int num_regs;
|
||||
int* beg;
|
||||
int* end;
|
||||
/* extended */
|
||||
struct re_registers** list; /* capture history. list[1]-list[31] */
|
||||
};
|
||||
|
||||
#define ONIG_REGION_NOTPOS -1
|
||||
|
||||
typedef struct re_registers OnigRegion;
|
||||
|
||||
typedef struct {
|
||||
UChar* par;
|
||||
UChar* par_end;
|
||||
} OnigErrorInfo;
|
||||
|
||||
typedef struct {
|
||||
int lower;
|
||||
int upper;
|
||||
} OnigRepeatRange;
|
||||
|
||||
typedef void (*OnigWarnFunc) P_((char* s));
|
||||
extern void onig_null_warn P_((char* s));
|
||||
#define ONIG_NULL_WARN onig_null_warn
|
||||
|
||||
#define ONIG_CHAR_TABLE_SIZE 256
|
||||
|
||||
/* regex_t state */
|
||||
#define ONIG_STATE_NORMAL 0
|
||||
#define ONIG_STATE_SEARCHING 1
|
||||
#define ONIG_STATE_COMPILING -1
|
||||
#define ONIG_STATE_MODIFY -2
|
||||
|
||||
#define ONIG_STATE(reg) \
|
||||
((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
|
||||
|
||||
typedef struct re_pattern_buffer {
|
||||
/* common members of BBuf(bytes-buffer) */
|
||||
unsigned char* p; /* compiled pattern */
|
||||
unsigned int used; /* used space for p */
|
||||
unsigned int alloc; /* allocated space for p */
|
||||
|
||||
int state; /* normal, searching, compiling */
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
|
||||
int num_call; /* number of subexp call */
|
||||
unsigned int capture_history; /* (?@...) flag (1-31) */
|
||||
unsigned int bt_mem_start; /* need backtrack flag */
|
||||
unsigned int bt_mem_end; /* need backtrack flag */
|
||||
int stack_pop_level;
|
||||
int repeat_range_alloc;
|
||||
OnigRepeatRange* repeat_range;
|
||||
|
||||
OnigEncoding enc;
|
||||
OnigOptionType options;
|
||||
OnigSyntaxType* syntax;
|
||||
void* name_table;
|
||||
|
||||
/* optimization info (string search, char-map and anchors) */
|
||||
int optimize; /* optimize flag */
|
||||
int threshold_len; /* search str-length for apply optimize */
|
||||
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
|
||||
OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
|
||||
OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
|
||||
int sub_anchor; /* start-anchor for exact or map */
|
||||
unsigned char *exact;
|
||||
unsigned char *exact_end;
|
||||
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
|
||||
int *int_map; /* BM skip for exact_len > 255 */
|
||||
int *int_map_backward; /* BM skip for backward search */
|
||||
OnigDistance dmin; /* min-distance of exact or map */
|
||||
OnigDistance dmax; /* max-distance of exact or map */
|
||||
|
||||
/* regex_t link chain */
|
||||
struct re_pattern_buffer* chain; /* escape compile-conflict */
|
||||
} regex_t;
|
||||
|
||||
|
||||
/* Oniguruma Native API */
|
||||
ONIG_EXTERN
|
||||
int onig_init P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_error_code_to_str PV_((UChar* s, int err_code, ...));
|
||||
ONIG_EXTERN
|
||||
void onig_set_warn_func P_((OnigWarnFunc f));
|
||||
ONIG_EXTERN
|
||||
void onig_set_verb_warn_func P_((OnigWarnFunc f));
|
||||
ONIG_EXTERN
|
||||
int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
void onig_free P_((regex_t*));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigRegion* onig_region_new P_((void));
|
||||
ONIG_EXTERN
|
||||
void onig_region_free P_((OnigRegion* region, int free_self));
|
||||
ONIG_EXTERN
|
||||
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
|
||||
ONIG_EXTERN
|
||||
void onig_region_clear P_((OnigRegion* region));
|
||||
ONIG_EXTERN
|
||||
int onig_region_resize P_((OnigRegion* region, int n));
|
||||
ONIG_EXTERN
|
||||
int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end,
|
||||
int** nums));
|
||||
ONIG_EXTERN
|
||||
int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region));
|
||||
ONIG_EXTERN
|
||||
int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_names P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onig_get_encoding P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_options P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
|
||||
ONIG_EXTERN
|
||||
int onig_set_meta_char P_((unsigned int what, unsigned int c));
|
||||
ONIG_EXTERN
|
||||
int onig_end P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_version P_((void));
|
||||
|
||||
#endif /* ONIGURUMA_H */
|
586
regenc.c
Normal file
586
regenc.c
Normal file
|
@ -0,0 +1,586 @@
|
|||
/**********************************************************************
|
||||
|
||||
regenc.c - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#include "regenc.h"
|
||||
|
||||
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
|
||||
|
||||
extern int
|
||||
onigenc_init()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern OnigEncoding
|
||||
onigenc_get_default_encoding()
|
||||
{
|
||||
return OnigEncDefaultCharEncoding;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_set_default_encoding(OnigEncoding enc)
|
||||
{
|
||||
OnigEncDefaultCharEncoding = enc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
|
||||
{
|
||||
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
|
||||
if (p < s) {
|
||||
p += enc_len(enc, *p);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
|
||||
UChar* start, UChar* s, UChar** prev)
|
||||
{
|
||||
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
|
||||
|
||||
if (p < s) {
|
||||
if (prev) *prev = p;
|
||||
p += enc_len(enc, *p);
|
||||
}
|
||||
else {
|
||||
if (prev) *prev = (UChar* )NULL; /* Sorry */
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
|
||||
{
|
||||
if (s <= start)
|
||||
return (UChar* )NULL;
|
||||
|
||||
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
|
||||
{
|
||||
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
|
||||
if (s <= start)
|
||||
return (UChar* )NULL;
|
||||
|
||||
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
#ifndef ONIG_RUBY_M17N
|
||||
|
||||
#ifndef NOT_RUBY
|
||||
#define USE_APPLICATION_TO_LOWER_CASE_TABLE
|
||||
#endif
|
||||
|
||||
UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0;
|
||||
|
||||
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
|
||||
static UChar BuiltInAsciiToLowerCaseTable[] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
|
||||
};
|
||||
#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
|
||||
|
||||
unsigned short OnigEncAsciiCtypeTable[256] = {
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
|
||||
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
|
||||
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
|
||||
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
|
||||
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
|
||||
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
extern void
|
||||
onigenc_set_default_caseconv_table(UChar* table)
|
||||
{
|
||||
if (table == (UChar* )0) {
|
||||
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
|
||||
table = BuiltInAsciiToLowerCaseTable;
|
||||
#else
|
||||
return ;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (table != OnigEncAsciiToLowerCaseTable) {
|
||||
OnigEncAsciiToLowerCaseTable = table;
|
||||
}
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
|
||||
{
|
||||
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_nothing_get_fold_match_info(UChar* p, UChar* end,
|
||||
OnigEncFoldMatchInfo** info)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb,
|
||||
OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* for single byte encodings */
|
||||
extern int
|
||||
onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower)
|
||||
{
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_ascii_mbc_is_case_ambig(UChar* p)
|
||||
{
|
||||
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
|
||||
}
|
||||
|
||||
extern OnigCodePoint
|
||||
onigenc_single_byte_mbc_to_code(UChar* p, UChar* end)
|
||||
{
|
||||
return (OnigCodePoint )(*p);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
|
||||
{
|
||||
return (code & 0xff);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
*buf = (UChar )(code & 0xff);
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s)
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
extern OnigCodePoint
|
||||
onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
c = *p++;
|
||||
len = enc_len(enc, c);
|
||||
n = c;
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = *p++;
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
|
||||
{
|
||||
int len;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
len = enc_len(enc, *p);
|
||||
if (lower != p) {
|
||||
/* memcpy(lower, p, len); */
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
}
|
||||
return len; /* return byte length of converted to lower char */
|
||||
}
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mbn_mbc_is_case_ambig(UChar* p)
|
||||
{
|
||||
if (ONIGENC_IS_MBC_ASCII(p))
|
||||
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if ((code & 0xff00) != 0) return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb4_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if ((code & 0xff000000) != 0) return 4;
|
||||
else if ((code & 0xff0000) != 0) return 3;
|
||||
else if ((code & 0xff00) != 0) return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
|
||||
{
|
||||
int first;
|
||||
|
||||
if ((code & 0xff00) != 0) {
|
||||
first = (code >> 8) & 0xff;
|
||||
}
|
||||
else {
|
||||
return (int )code;
|
||||
}
|
||||
return first;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
|
||||
{
|
||||
int first;
|
||||
|
||||
if ((code & 0xff000000) != 0) {
|
||||
first = (code >> 24) & 0xff;
|
||||
}
|
||||
else if ((code & 0xff0000) != 0) {
|
||||
first = (code >> 16) & 0xff;
|
||||
}
|
||||
else if ((code & 0xff00) != 0) {
|
||||
first = (code >> 8) & 0xff;
|
||||
}
|
||||
else {
|
||||
return (int )code;
|
||||
}
|
||||
return first;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xff00) != 0) {
|
||||
*p++ = (UChar )((code >> 8) & 0xff);
|
||||
}
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
|
||||
#if 1
|
||||
if (enc_len(enc, buf[0]) != (p - buf))
|
||||
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
#endif
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xff000000) != 0) {
|
||||
*p++ = (UChar )((code >> 24) & 0xff);
|
||||
}
|
||||
if ((code & 0xff0000) != 0) {
|
||||
*p++ = (UChar )((code >> 16) & 0xff);
|
||||
}
|
||||
if ((code & 0xff00) != 0) {
|
||||
*p++ = (UChar )((code >> 8) & 0xff);
|
||||
}
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
|
||||
#if 1
|
||||
if (enc_len(enc, buf[0]) != (p - buf))
|
||||
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
#endif
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
|
||||
unsigned int ctype)
|
||||
{
|
||||
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
int first = onigenc_mb2_code_to_mbc_first(code);
|
||||
return (enc_len(enc, first) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
|
||||
ctype &= ~ONIGENC_CTYPE_WORD;
|
||||
if (ctype == 0) return FALSE;
|
||||
}
|
||||
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
|
||||
unsigned int ctype)
|
||||
{
|
||||
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
int first = onigenc_mb4_code_to_mbc_first(code);
|
||||
return (enc_len(enc, first) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
|
||||
ctype &= ~ONIGENC_CTYPE_WORD;
|
||||
if (ctype == 0) return FALSE;
|
||||
}
|
||||
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes)
|
||||
{
|
||||
static OnigCodePoint list[] = { 0xdf };
|
||||
*codes = list;
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end,
|
||||
OnigEncFoldMatchInfo** info)
|
||||
{
|
||||
/* German alphabet ess-tsett(U+00DF) */
|
||||
static OnigEncFoldMatchInfo ss = {
|
||||
3,
|
||||
{ 1, 2, 2 },
|
||||
{ "\337", "ss", "SS" } /* 0337: 0xdf */
|
||||
};
|
||||
|
||||
if (p >= end) return -1;
|
||||
|
||||
if (*p == 0xdf) {
|
||||
*info = &ss;
|
||||
return 1;
|
||||
}
|
||||
else if (p + 1 < end) {
|
||||
if ((*p == 'S' && *(p+1) == 'S') ||
|
||||
(*p == 's' && *(p+1) == 's')) {
|
||||
*info = &ss;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
return -1; /* is not a fold string. */
|
||||
}
|
||||
|
||||
#else /* ONIG_RUBY_M17N */
|
||||
|
||||
extern int
|
||||
onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
|
||||
{
|
||||
switch (ctype) {
|
||||
case ONIGENC_CTYPE_ALPHA:
|
||||
return m17n_isalpha(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_BLANK:
|
||||
return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
|
||||
break;
|
||||
case ONIGENC_CTYPE_CNTRL:
|
||||
return m17n_iscntrl(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_DIGIT:
|
||||
return m17n_isdigit(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_GRAPH:
|
||||
return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
|
||||
break;
|
||||
case ONIGENC_CTYPE_LOWER:
|
||||
return m17n_islower(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_PRINT:
|
||||
return m17n_isprint(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_PUNCT:
|
||||
return m17n_ispunct(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_SPACE:
|
||||
return m17n_isspace(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_UPPER:
|
||||
return m17n_isupper(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_XDIGIT:
|
||||
return m17n_isxdigit(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_WORD:
|
||||
return m17n_iswchar(enc, code);
|
||||
break;
|
||||
case ONIGENC_CTYPE_ASCII:
|
||||
return (code < 128 ? TRUE : FALSE);
|
||||
break;
|
||||
case ONIGENC_CTYPE_ALNUM:
|
||||
return m17n_isalnum(enc, code);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
int c, len;
|
||||
|
||||
m17n_mbcput(enc, code, buf);
|
||||
c = m17n_firstbyte(enc, code);
|
||||
len = enc_len(enc, c);
|
||||
return len;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
|
||||
{
|
||||
unsigned int c, low;
|
||||
|
||||
c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
|
||||
low = m17n_tolower(enc, c);
|
||||
m17n_mbcput(enc, low, buf);
|
||||
|
||||
return m17n_codelen(enc, low);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p)
|
||||
{
|
||||
unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
|
||||
|
||||
if (m17n_isupper(enc, c) || m17n_islower(enc, c))
|
||||
return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
|
||||
{
|
||||
UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
while (!m17n_islead(enc, *p) && p > start) p--;
|
||||
while (p + (len = enc_len(enc, *p)) < s) {
|
||||
p += len;
|
||||
}
|
||||
if (p + len == s) return s;
|
||||
return p;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end)
|
||||
{
|
||||
return ONIGENC_IS_SINGLEBYTE(enc);
|
||||
}
|
||||
|
||||
extern void
|
||||
onigenc_set_default_caseconv_table(UChar* table) { }
|
||||
|
||||
#endif /* ONIG_RUBY_M17N */
|
96
regenc.h
Normal file
96
regenc.h
Normal file
|
@ -0,0 +1,96 @@
|
|||
/**********************************************************************
|
||||
|
||||
regenc.h - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#ifndef REGENC_H
|
||||
#define REGENC_H
|
||||
|
||||
#ifndef RUBY_PLATFORM
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "oniguruma.h"
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL ((void* )0)
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
/* error codes */
|
||||
/* internal error */
|
||||
#define ONIGERR_MEMORY -5
|
||||
#define ONIGERR_TYPE_BUG -6
|
||||
/* syntax error [-400, -999] */
|
||||
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
|
||||
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
|
||||
|
||||
#define ONIG_NEWLINE '\n'
|
||||
#define ONIG_IS_NEWLINE(c) ((c) == ONIG_NEWLINE)
|
||||
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
|
||||
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
|
||||
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
|
||||
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
|
||||
|
||||
|
||||
#ifdef ONIG_RUBY_M17N
|
||||
|
||||
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF
|
||||
|
||||
#else /* ONIG_RUBY_M17N */
|
||||
|
||||
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
|
||||
|
||||
/* for encoding system implementation (internal) */
|
||||
ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes));
|
||||
ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
|
||||
ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]));
|
||||
|
||||
/* methods for single byte encoding */
|
||||
ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p));
|
||||
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
|
||||
ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));
|
||||
|
||||
/* methods for multi byte encoding */
|
||||
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
|
||||
ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
|
||||
ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
|
||||
|
||||
#endif /* is not ONIG_RUBY_M17N */
|
||||
|
||||
|
||||
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
|
||||
ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable;
|
||||
ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
|
||||
|
||||
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
|
||||
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
|
||||
((OnigEncAsciiCtypeTable[code] & ctype) != 0)
|
||||
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
|
||||
ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
|
||||
|
||||
#endif /* REGENC_H */
|
291
regerror.c
Normal file
291
regerror.c
Normal file
|
@ -0,0 +1,291 @@
|
|||
/**********************************************************************
|
||||
|
||||
regerror.c - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#include "regint.h"
|
||||
#include <stdio.h> /* for vsnprintf() */
|
||||
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
#include <stdarg.h>
|
||||
#define va_init_list(a,b) va_start(a,b)
|
||||
#else
|
||||
#include <varargs.h>
|
||||
#define va_init_list(a,b) va_start(a)
|
||||
#endif
|
||||
|
||||
extern char*
|
||||
onig_error_code_to_format(int code)
|
||||
{
|
||||
char *p;
|
||||
|
||||
if (code >= 0) return (char* )0;
|
||||
|
||||
switch (code) {
|
||||
case ONIG_MISMATCH:
|
||||
p = "mismatch"; break;
|
||||
case ONIG_NO_SUPPORT_CONFIG:
|
||||
p = "no support in this configuration"; break;
|
||||
case ONIGERR_MEMORY:
|
||||
p = "fail to memory allocation"; break;
|
||||
case ONIGERR_MATCH_STACK_LIMIT_OVER:
|
||||
p = "match-stack limit over"; break;
|
||||
case ONIGERR_TYPE_BUG:
|
||||
p = "undefined type (bug)"; break;
|
||||
case ONIGERR_PARSER_BUG:
|
||||
p = "internal parser error (bug)"; break;
|
||||
case ONIGERR_STACK_BUG:
|
||||
p = "stack error (bug)"; break;
|
||||
case ONIGERR_UNDEFINED_BYTECODE:
|
||||
p = "undefined bytecode (bug)"; break;
|
||||
case ONIGERR_UNEXPECTED_BYTECODE:
|
||||
p = "unexpected bytecode (bug)"; break;
|
||||
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
|
||||
p = "default multibyte-encoding is not setted"; break;
|
||||
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
|
||||
p = "can't convert to wide-char on specified multibyte-encoding"; break;
|
||||
case ONIGERR_INVALID_ARGUMENT:
|
||||
p = "invalid argument"; break;
|
||||
case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
|
||||
p = "end pattern at left brace"; break;
|
||||
case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
|
||||
p = "end pattern at left bracket"; break;
|
||||
case ONIGERR_EMPTY_CHAR_CLASS:
|
||||
p = "empty char-class"; break;
|
||||
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
|
||||
p = "premature end of char-class"; break;
|
||||
case ONIGERR_END_PATTERN_AT_BACKSLASH:
|
||||
p = "end pattern at backslash"; break;
|
||||
case ONIGERR_END_PATTERN_AT_META:
|
||||
p = "end pattern at meta"; break;
|
||||
case ONIGERR_END_PATTERN_AT_CONTROL:
|
||||
p = "end pattern at control"; break;
|
||||
case ONIGERR_META_CODE_SYNTAX:
|
||||
p = "illegal meta-code syntax"; break;
|
||||
case ONIGERR_CONTROL_CODE_SYNTAX:
|
||||
p = "illegal control-code syntax"; break;
|
||||
case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
|
||||
p = "char-class value at end of range"; break;
|
||||
case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
|
||||
p = "char-class value at start of range"; break;
|
||||
case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
|
||||
p = "unmatched range specifier in char-class"; break;
|
||||
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
|
||||
p = "target of repeat operator is not specified"; break;
|
||||
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
|
||||
p = "target of repeat operator is invalid"; break;
|
||||
case ONIGERR_NESTED_REPEAT_OPERATOR:
|
||||
p = "nested repeat operator"; break;
|
||||
case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
|
||||
p = "unmatched close parenthesis"; break;
|
||||
case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
|
||||
p = "end pattern with unmatched parenthesis"; break;
|
||||
case ONIGERR_END_PATTERN_IN_GROUP:
|
||||
p = "end pattern in group"; break;
|
||||
case ONIGERR_UNDEFINED_GROUP_OPTION:
|
||||
p = "undefined group option"; break;
|
||||
case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
|
||||
p = "invalid POSIX bracket type"; break;
|
||||
case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
|
||||
p = "invalid pattern in look-behind"; break;
|
||||
case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
|
||||
p = "invalid repeat range {lower,upper}"; break;
|
||||
case ONIGERR_TOO_BIG_NUMBER:
|
||||
p = "too big number"; break;
|
||||
case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
|
||||
p = "too big number for repeat range"; break;
|
||||
case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
|
||||
p = "upper is smaller than lower in repeat range"; break;
|
||||
case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
|
||||
p = "empty range in char class"; break;
|
||||
case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
|
||||
p = "mismatch multibyte code length in char-class range"; break;
|
||||
case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
|
||||
p = "too many multibyte code ranges are specified"; break;
|
||||
case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
|
||||
p = "too short multibyte code string"; break;
|
||||
case ONIGERR_TOO_BIG_BACKREF_NUMBER:
|
||||
p = "too big backref number"; break;
|
||||
case ONIGERR_INVALID_BACKREF:
|
||||
#ifdef USE_NAMED_GROUP
|
||||
p = "invalid backref number/name"; break;
|
||||
#else
|
||||
p = "invalid backref number"; break;
|
||||
#endif
|
||||
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
|
||||
p = "numbered backref/call is not allowed. (use name)"; break;
|
||||
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
|
||||
p = "too big wide-char value"; break;
|
||||
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
|
||||
p = "too long wide-char value"; break;
|
||||
case ONIGERR_INVALID_WIDE_CHAR_VALUE:
|
||||
p = "invalid wide-char value"; break;
|
||||
case ONIGERR_EMPTY_GROUP_NAME:
|
||||
p = "group name is empty"; break;
|
||||
case ONIGERR_INVALID_GROUP_NAME:
|
||||
p = "invalid group name <%n>"; break;
|
||||
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
|
||||
#ifdef USE_NAMED_GROUP
|
||||
p = "invalid char in group name <%n>"; break;
|
||||
#else
|
||||
p = "invalid char in group number <%n>"; break;
|
||||
#endif
|
||||
case ONIGERR_UNDEFINED_NAME_REFERENCE:
|
||||
p = "undefined name <%n> reference"; break;
|
||||
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
|
||||
p = "undefined group <%n> reference"; break;
|
||||
case ONIGERR_MULTIPLEX_DEFINED_NAME:
|
||||
p = "multiplex defined name <%n>"; break;
|
||||
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
|
||||
p = "multiplex definition name <%n> call"; break;
|
||||
case ONIGERR_NEVER_ENDING_RECURSION:
|
||||
p = "never ending recursion"; break;
|
||||
case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
|
||||
p = "group number is too big for capture history"; break;
|
||||
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
|
||||
p = "invalid character property name"; break;
|
||||
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
|
||||
p = "over thread pass limit count"; break;
|
||||
|
||||
default:
|
||||
p = "undefined error code"; break;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
|
||||
#define MAX_ERROR_PAR_LEN 30
|
||||
|
||||
extern int
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
onig_error_code_to_str(UChar* s, int code, ...)
|
||||
#else
|
||||
onig_error_code_to_str(s, code, va_alist)
|
||||
UChar* s;
|
||||
int code;
|
||||
va_dcl
|
||||
#endif
|
||||
{
|
||||
UChar *p, *q;
|
||||
OnigErrorInfo* einfo;
|
||||
int len;
|
||||
va_list vargs;
|
||||
|
||||
va_init_list(vargs, code);
|
||||
|
||||
switch (code) {
|
||||
case ONIGERR_UNDEFINED_NAME_REFERENCE:
|
||||
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
|
||||
case ONIGERR_MULTIPLEX_DEFINED_NAME:
|
||||
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
|
||||
case ONIGERR_INVALID_GROUP_NAME:
|
||||
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
|
||||
einfo = va_arg(vargs, OnigErrorInfo*);
|
||||
len = einfo->par_end - einfo->par;
|
||||
q = onig_error_code_to_format(code);
|
||||
p = s;
|
||||
while (*q != '\0') {
|
||||
if (*q == '%') {
|
||||
q++;
|
||||
if (*q == 'n') { /* '%n': name */
|
||||
if (len > MAX_ERROR_PAR_LEN) {
|
||||
xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3);
|
||||
p += (MAX_ERROR_PAR_LEN - 3);
|
||||
xmemcpy(p, "...", 3);
|
||||
p += 3;
|
||||
}
|
||||
else {
|
||||
xmemcpy(p, einfo->par, len);
|
||||
p += len;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
else
|
||||
goto normal_char;
|
||||
}
|
||||
else {
|
||||
normal_char:
|
||||
*p++ = *q++;
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
len = p - s;
|
||||
break;
|
||||
|
||||
default:
|
||||
q = onig_error_code_to_format(code);
|
||||
len = strlen(q);
|
||||
xmemcpy(s, q, len);
|
||||
s[len] = '\0';
|
||||
break;
|
||||
}
|
||||
|
||||
va_end(vargs);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
|
||||
char* pat, char* pat_end, char *fmt, ...)
|
||||
#else
|
||||
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
|
||||
char buf[];
|
||||
int bufsize;
|
||||
OnigEncoding enc;
|
||||
char* pat;
|
||||
char* pat_end;
|
||||
const char *fmt;
|
||||
va_dcl
|
||||
#endif
|
||||
{
|
||||
int n, need, len;
|
||||
UChar *p, *s;
|
||||
va_list args;
|
||||
|
||||
va_init_list(args, fmt);
|
||||
n = vsnprintf(buf, bufsize, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
need = (pat_end - pat) * 4 + 4;
|
||||
|
||||
if (n + need < bufsize) {
|
||||
strcat(buf, ": /");
|
||||
s = buf + strlen(buf);
|
||||
|
||||
p = pat;
|
||||
while (p < (UChar* )pat_end) {
|
||||
if (*p == MC_ESC) {
|
||||
*s++ = *p++;
|
||||
len = enc_len(enc, *p);
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else if (*p == '/') {
|
||||
*s++ = MC_ESC;
|
||||
*s++ = *p++;
|
||||
}
|
||||
else if (ONIGENC_IS_MBC_HEAD(enc, *p)) {
|
||||
len = enc_len(enc, *p);
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
|
||||
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
|
||||
char b[5];
|
||||
sprintf(b, "\\%03o", *p & 0377);
|
||||
len = strlen(b);
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else {
|
||||
*s++ = *p++;
|
||||
}
|
||||
}
|
||||
|
||||
*s++ = '/';
|
||||
*s = '\0';
|
||||
}
|
||||
}
|
256
reggnu.c
Normal file
256
reggnu.c
Normal file
|
@ -0,0 +1,256 @@
|
|||
/**********************************************************************
|
||||
|
||||
reggnu.c - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#include "regint.h"
|
||||
|
||||
#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
|
||||
#include "oniggnu.h"
|
||||
#endif
|
||||
|
||||
#if defined(RUBY_PLATFORM) || defined(RUBY)
|
||||
#ifndef ONIG_RUBY_M17N
|
||||
#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL ((void* )0)
|
||||
#endif
|
||||
|
||||
extern void
|
||||
re_free_registers(OnigRegion* r)
|
||||
{
|
||||
/* 0: don't free self */
|
||||
onig_region_free(r, 0);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_adjust_startpos(regex_t* reg, const char* string, int size,
|
||||
int startpos, int range)
|
||||
{
|
||||
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
|
||||
UChar *p;
|
||||
UChar *s = (UChar* )string + startpos;
|
||||
|
||||
if (range > 0) {
|
||||
p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
|
||||
}
|
||||
else {
|
||||
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
|
||||
}
|
||||
return p - (UChar* )string;
|
||||
}
|
||||
|
||||
return startpos;
|
||||
}
|
||||
|
||||
extern int
|
||||
re_match(regex_t* reg, const char* str, int size, int pos,
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return onig_match(reg, (UChar* )str, (UChar* )(str + size),
|
||||
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
|
||||
(UChar* )(string + startpos),
|
||||
(UChar* )(string + startpos + range),
|
||||
regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
||||
{
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
|
||||
r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
|
||||
if (r != 0) {
|
||||
if (IS_NOT_NULL(ebuf))
|
||||
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
extern int
|
||||
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
||||
{
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
OnigEncoding enc;
|
||||
|
||||
/* I think encoding and options should be arguments of this function.
|
||||
But this is adapted to present re.c. (2002/11/29)
|
||||
*/
|
||||
enc = OnigEncDefaultCharEncoding;
|
||||
|
||||
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
|
||||
reg->options, enc, OnigDefaultSyntax, &einfo);
|
||||
if (r != 0) {
|
||||
if (IS_NOT_NULL(ebuf))
|
||||
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
extern void
|
||||
re_free_pattern(regex_t* reg)
|
||||
{
|
||||
onig_free(reg);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_alloc_pattern(regex_t** reg)
|
||||
{
|
||||
return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
|
||||
OnigDefaultSyntax);
|
||||
}
|
||||
|
||||
extern void
|
||||
re_set_casetable(const char* table)
|
||||
{
|
||||
onigenc_set_default_caseconv_table((UChar* )table);
|
||||
}
|
||||
|
||||
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
||||
static const unsigned char mbctab_ascii[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const unsigned char mbctab_euc[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
||||
};
|
||||
|
||||
static const unsigned char mbctab_sjis[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
|
||||
};
|
||||
|
||||
static const unsigned char mbctab_utf8[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
|
||||
};
|
||||
|
||||
const unsigned char *re_mbctab = mbctab_ascii;
|
||||
#endif
|
||||
|
||||
extern void
|
||||
#ifdef ONIG_RUBY_M17N
|
||||
re_mbcinit(OnigEncoding enc)
|
||||
#else
|
||||
re_mbcinit(int mb_code)
|
||||
#endif
|
||||
{
|
||||
#ifdef ONIG_RUBY_M17N
|
||||
|
||||
onigenc_set_default_encoding(enc);
|
||||
|
||||
#else
|
||||
|
||||
OnigEncoding enc;
|
||||
|
||||
switch (mb_code) {
|
||||
case MBCTYPE_ASCII:
|
||||
enc = ONIG_ENCODING_ASCII;
|
||||
break;
|
||||
case MBCTYPE_EUC:
|
||||
enc = ONIG_ENCODING_EUC_JP;
|
||||
break;
|
||||
case MBCTYPE_SJIS:
|
||||
enc = ONIG_ENCODING_SJIS;
|
||||
break;
|
||||
case MBCTYPE_UTF8:
|
||||
enc = ONIG_ENCODING_UTF8;
|
||||
break;
|
||||
default:
|
||||
return ;
|
||||
break;
|
||||
}
|
||||
|
||||
onigenc_set_default_encoding(enc);
|
||||
#endif
|
||||
|
||||
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
||||
switch (mb_code) {
|
||||
case MBCTYPE_ASCII:
|
||||
re_mbctab = mbctab_ascii;
|
||||
break;
|
||||
case MBCTYPE_EUC:
|
||||
re_mbctab = mbctab_euc;
|
||||
break;
|
||||
case MBCTYPE_SJIS:
|
||||
re_mbctab = mbctab_sjis;
|
||||
break;
|
||||
case MBCTYPE_UTF8:
|
||||
re_mbctab = mbctab_utf8;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
685
regint.h
Normal file
685
regint.h
Normal file
|
@ -0,0 +1,685 @@
|
|||
/**********************************************************************
|
||||
|
||||
regint.h - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#ifndef REGINT_H
|
||||
#define REGINT_H
|
||||
|
||||
/* for debug */
|
||||
/* #define ONIG_DEBUG_PARSE_TREE */
|
||||
/* #define ONIG_DEBUG_COMPILE */
|
||||
/* #define ONIG_DEBUG_SEARCH */
|
||||
/* #define ONIG_DEBUG_MATCH */
|
||||
/* #define ONIG_DONT_OPTIMIZE */
|
||||
|
||||
/* for byte-code statistical data. */
|
||||
/* #define ONIG_DEBUG_STATISTICS */
|
||||
|
||||
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
|
||||
defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
|
||||
#ifndef ONIG_DEBUG
|
||||
#define ONIG_DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
||||
(defined(__ppc__) && defined(__APPLE__)) || \
|
||||
defined(__x86_64) || defined(__x86_64__) || \
|
||||
defined(__mc68020__)
|
||||
#define PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
#endif
|
||||
|
||||
/* config */
|
||||
/* spec. config */
|
||||
#define USE_NAMED_GROUP
|
||||
#define USE_SUBEXP_CALL
|
||||
#define USE_FOLD_MATCH /* ess-tsett etc... */
|
||||
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
|
||||
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
|
||||
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
/* internal config */
|
||||
#define USE_RECYCLE_NODE
|
||||
#define USE_OP_PUSH_OR_JUMP_EXACT
|
||||
#define USE_QUALIFIER_PEEK_NEXT
|
||||
|
||||
#define INIT_MATCH_STACK_SIZE 160
|
||||
#define MATCH_STACK_LIMIT_SIZE 500000
|
||||
|
||||
/* interface to external system */
|
||||
#ifdef NOT_RUBY /* gived from Makefile */
|
||||
#include "config.h"
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_VARIABLE_SYNTAX
|
||||
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
|
||||
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
|
||||
#define THREAD_ATOMIC_START /* depend on thread system */
|
||||
#define THREAD_ATOMIC_END /* depend on thread system */
|
||||
#define THREAD_PASS /* depend on thread system */
|
||||
#define xmalloc malloc
|
||||
#define xrealloc realloc
|
||||
#define xfree free
|
||||
#else
|
||||
#include "ruby.h"
|
||||
#include "version.h"
|
||||
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
|
||||
#define THREAD_ATOMIC_START DEFER_INTS
|
||||
#define THREAD_ATOMIC_END ENABLE_INTS
|
||||
#define THREAD_PASS rb_thread_schedule()
|
||||
#define DEFAULT_WARN_FUNCTION rb_warn
|
||||
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
|
||||
|
||||
#if defined(RUBY_VERSION_MAJOR)
|
||||
#if RUBY_VERSION_MAJOR > 1 || \
|
||||
(RUBY_VERSION_MAJOR == 1 && \
|
||||
defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
|
||||
#define USE_ST_HASH_TABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* else NOT_RUBY */
|
||||
|
||||
#define THREAD_PASS_LIMIT_COUNT 10
|
||||
#define xmemset memset
|
||||
#define xmemcpy memcpy
|
||||
#define xmemmove memmove
|
||||
#if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
#define xalloca _alloca
|
||||
#ifdef NOT_RUBY
|
||||
#define vsnprintf _vsnprintf
|
||||
#endif
|
||||
#else
|
||||
#define xalloca alloca
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__)
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
# include <string.h>
|
||||
#else
|
||||
# include <strings.h>
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
# include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "regenc.h"
|
||||
#include "oniguruma.h"
|
||||
|
||||
#ifdef MIN
|
||||
#undef MIN
|
||||
#endif
|
||||
#ifdef MAX
|
||||
#undef MAX
|
||||
#endif
|
||||
#define MIN(a,b) (((a)>(b))?(b):(a))
|
||||
#define MAX(a,b) (((a)<(b))?(b):(a))
|
||||
|
||||
#define IS_NULL(p) (((void*)(p)) == (void*)0)
|
||||
#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
|
||||
#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
|
||||
#define CHECK_NULL_RETURN_VAL(p,val) if (IS_NULL(p)) return (val)
|
||||
#define NULL_UCHARP ((UChar* )0)
|
||||
|
||||
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
#define WORD_ALIGNMENT_SIZE SIZEOF_INT
|
||||
|
||||
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
|
||||
(pad_size) = WORD_ALIGNMENT_SIZE \
|
||||
- ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
|
||||
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
|
||||
} while (0)
|
||||
|
||||
#define ALIGNMENT_RIGHT(addr) do {\
|
||||
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
|
||||
(addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
|
||||
} while (0)
|
||||
|
||||
|
||||
#define B_SHIFT 8
|
||||
#define B_MASK 0xff
|
||||
|
||||
#define SERIALIZE_2BYTE_INT(i,p) do {\
|
||||
*(p) = ((i) >> B_SHIFT) & B_MASK;\
|
||||
*((p)+1) = (i) & B_MASK;\
|
||||
} while (0)
|
||||
|
||||
#define SERIALIZE_4BYTE_INT(i,p) do {\
|
||||
*(p) = ((i) >> B_SHIFT*3) & B_MASK;\
|
||||
*((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\
|
||||
*((p)+2) = ((i) >> B_SHIFT ) & B_MASK;\
|
||||
*((p)+3) = (i) & B_MASK;\
|
||||
} while (0)
|
||||
|
||||
#define SERIALIZE_8BYTE_INT(i,p) do {\
|
||||
*(p) = ((i) >> B_SHIFT*7) & B_MASK;\
|
||||
*((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\
|
||||
*((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\
|
||||
*((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\
|
||||
*((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\
|
||||
*((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\
|
||||
*((p)+6) = ((i) >> B_SHIFT ) & B_MASK;\
|
||||
*((p)+7) = (i) & B_MASK;\
|
||||
} while (0)
|
||||
|
||||
#define GET_2BYTE_INT_INC(type,i,p) do {\
|
||||
(i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\
|
||||
(p) += 2;\
|
||||
} while (0)
|
||||
|
||||
#define GET_4BYTE_INT_INC(type,i,p) do {\
|
||||
(i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \
|
||||
((unsigned int )((p)[1]) << B_SHIFT*2) | \
|
||||
((unsigned int )((p)[2]) << B_SHIFT ) | \
|
||||
((unsigned int )((p)[3]) )); \
|
||||
(p) += 4;\
|
||||
} while (0)
|
||||
|
||||
#define GET_8BYTE_INT_INC(type,i,p) do {\
|
||||
(i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \
|
||||
((unsigned long )((p)[1]) << B_SHIFT*6) | \
|
||||
((unsigned long )((p)[2]) << B_SHIFT*5) | \
|
||||
((unsigned long )((p)[3]) << B_SHIFT*4) | \
|
||||
((unsigned long )((p)[4]) << B_SHIFT*3) | \
|
||||
((unsigned long )((p)[5]) << B_SHIFT*2) | \
|
||||
((unsigned long )((p)[6]) << B_SHIFT ) | \
|
||||
((unsigned long )((p)[7]) )); \
|
||||
(p) += 8;\
|
||||
} while (0)
|
||||
|
||||
#if SIZEOF_SHORT == 2
|
||||
#define GET_SHORT_INC(i,p) GET_2BYTE_INT_INC(short,i,p)
|
||||
#define SERIALIZE_SHORT(i,p) SERIALIZE_2BYTE_INT(i,p)
|
||||
#elif SIZEOF_SHORT == 4
|
||||
#define GET_SHORT_INC(i,p) GET_4BYTE_INT_INC(short,i,p)
|
||||
#define SERIALIZE_SHORT(i,p) SERIALIZE_4BYTE_INT(i,p)
|
||||
#elif SIZEOF_SHORT == 8
|
||||
#define GET_SHORT_INC(i,p) GET_8BYTE_INT_INC(short,i,p)
|
||||
#define SERIALIZE_SHORT(i,p) SERIALIZE_8BYTE_INT(i,p)
|
||||
#endif
|
||||
|
||||
#if SIZEOF_INT == 2
|
||||
#define GET_INT_INC(i,p) GET_2BYTE_INT_INC(int,i,p)
|
||||
#define GET_UINT_INC(i,p) GET_2BYTE_INT_INC(unsigned,i,p)
|
||||
#define SERIALIZE_INT(i,p) SERIALIZE_2BYTE_INT(i,p)
|
||||
#define SERIALIZE_UINT(i,p) SERIALIZE_2BYTE_INT(i,p)
|
||||
#elif SIZEOF_INT == 4
|
||||
#define GET_INT_INC(i,p) GET_4BYTE_INT_INC(int,i,p)
|
||||
#define GET_UINT_INC(i,p) GET_4BYTE_INT_INC(unsigned,i,p)
|
||||
#define SERIALIZE_INT(i,p) SERIALIZE_4BYTE_INT(i,p)
|
||||
#define SERIALIZE_UINT(i,p) SERIALIZE_4BYTE_INT(i,p)
|
||||
#elif SIZEOF_INT == 8
|
||||
#define GET_INT_INC(i,p) GET_8BYTE_INT_INC(int,i,p)
|
||||
#define GET_UINT_INC(i,p) GET_8BYTE_INT_INC(unsigned,i,p)
|
||||
#define SERIALIZE_INT(i,p) SERIALIZE_8BYTE_INT(i,p)
|
||||
#define SERIALIZE_UINT(i,p) SERIALIZE_8BYTE_INT(i,p)
|
||||
#endif
|
||||
|
||||
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
|
||||
|
||||
/* stack pop level */
|
||||
#define STACK_POP_LEVEL_FREE 0
|
||||
#define STACK_POP_LEVEL_MEM_START 1
|
||||
#define STACK_POP_LEVEL_ALL 2
|
||||
|
||||
/* optimize flags */
|
||||
#define ONIG_OPTIMIZE_NONE 0
|
||||
#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
|
||||
#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
|
||||
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
|
||||
#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
|
||||
#define ONIG_OPTIMIZE_MAP 5 /* char map */
|
||||
|
||||
/* bit status */
|
||||
typedef unsigned int BitStatusType;
|
||||
|
||||
#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
|
||||
#define BIT_STATUS_CLEAR(stats) (stats) = 0
|
||||
#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
|
||||
#define BIT_STATUS_AT(stats,n) \
|
||||
((n) < BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
|
||||
|
||||
#define BIT_STATUS_ON_AT(stats,n) do {\
|
||||
if ((n) < BIT_STATUS_BITS_NUM)\
|
||||
(stats) |= (1 << (n));\
|
||||
else\
|
||||
(stats) |= 1;\
|
||||
} while (0)
|
||||
|
||||
#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
|
||||
if ((n) < BIT_STATUS_BITS_NUM)\
|
||||
(stats) |= (1 << (n));\
|
||||
} while (0)
|
||||
|
||||
|
||||
#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
|
||||
|
||||
#define DIGITVAL(code) ((code) - '0')
|
||||
#define ODIGITVAL(code) DIGITVAL(code)
|
||||
#define XDIGITVAL(enc,code) \
|
||||
(ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
|
||||
: (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
|
||||
|
||||
#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
|
||||
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
|
||||
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
|
||||
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
|
||||
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
|
||||
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
|
||||
#define IS_POSIXLINE(option) (IS_SINGLELINE(option) && IS_MULTILINE(option))
|
||||
#define IS_FIND_CONDITION(option) ((option) & \
|
||||
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
|
||||
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
|
||||
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
|
||||
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
|
||||
|
||||
/* OP_SET_OPTION is required for these options.
|
||||
#define IS_DYNAMIC_OPTION(option) \
|
||||
(((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
|
||||
*/
|
||||
/* ignore-case and multibyte status are included in compiled code. */
|
||||
#define IS_DYNAMIC_OPTION(option) 0
|
||||
|
||||
|
||||
/* bitset */
|
||||
#define BITS_PER_BYTE 8
|
||||
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
|
||||
#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
|
||||
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
typedef unsigned int Bits;
|
||||
#else
|
||||
typedef unsigned char Bits;
|
||||
#endif
|
||||
typedef Bits BitSet[BITSET_SIZE];
|
||||
typedef Bits* BitSetRef;
|
||||
|
||||
#define SIZE_BITSET sizeof(BitSet)
|
||||
|
||||
#define BITSET_CLEAR(bs) do {\
|
||||
int i;\
|
||||
for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\
|
||||
} while (0)
|
||||
|
||||
#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
|
||||
#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM))
|
||||
|
||||
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
|
||||
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
|
||||
#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
|
||||
#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
|
||||
|
||||
/* bytes buffer */
|
||||
typedef struct _BBuf {
|
||||
UChar* p;
|
||||
unsigned int used;
|
||||
unsigned int alloc;
|
||||
} BBuf;
|
||||
|
||||
#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
|
||||
|
||||
#define BBUF_SIZE_INC(buf,inc) do{\
|
||||
(buf)->alloc += (inc);\
|
||||
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
|
||||
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_EXPAND(buf,low) do{\
|
||||
do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
|
||||
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
|
||||
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_ENSURE_SIZE(buf,size) do{\
|
||||
unsigned int new_alloc = (buf)->alloc;\
|
||||
while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
|
||||
if ((buf)->alloc != new_alloc) {\
|
||||
(buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
|
||||
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
|
||||
(buf)->alloc = new_alloc;\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_WRITE(buf,pos,bytes,n) do{\
|
||||
int used = (pos) + (n);\
|
||||
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
|
||||
xmemcpy((buf)->p + (pos), (bytes), (n));\
|
||||
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_WRITE1(buf,pos,byte) do{\
|
||||
int used = (pos) + 1;\
|
||||
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
|
||||
(buf)->p[(pos)] = (byte);\
|
||||
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
|
||||
#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
|
||||
#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
|
||||
#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
|
||||
|
||||
/* from < to */
|
||||
#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
|
||||
if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
|
||||
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
|
||||
if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
|
||||
} while (0)
|
||||
|
||||
/* from > to */
|
||||
#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
|
||||
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
|
||||
} while (0)
|
||||
|
||||
/* from > to */
|
||||
#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
|
||||
xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
|
||||
(buf)->used -= (from - to);\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_INSERT(buf,pos,bytes,n) do {\
|
||||
if (pos >= (buf)->used) {\
|
||||
BBUF_WRITE(buf,pos,bytes,n);\
|
||||
}\
|
||||
else {\
|
||||
BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
|
||||
xmemcpy((buf)->p + (pos), (bytes), (n));\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
|
||||
|
||||
|
||||
#define ANCHOR_BEGIN_BUF (1<<0)
|
||||
#define ANCHOR_BEGIN_LINE (1<<1)
|
||||
#define ANCHOR_BEGIN_POSITION (1<<2)
|
||||
#define ANCHOR_END_BUF (1<<3)
|
||||
#define ANCHOR_SEMI_END_BUF (1<<4)
|
||||
#define ANCHOR_END_LINE (1<<5)
|
||||
|
||||
#define ANCHOR_WORD_BOUND (1<<6)
|
||||
#define ANCHOR_NOT_WORD_BOUND (1<<7)
|
||||
#define ANCHOR_WORD_BEGIN (1<<8)
|
||||
#define ANCHOR_WORD_END (1<<9)
|
||||
#define ANCHOR_PREC_READ (1<<10)
|
||||
#define ANCHOR_PREC_READ_NOT (1<<11)
|
||||
#define ANCHOR_LOOK_BEHIND (1<<12)
|
||||
#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
|
||||
|
||||
#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
|
||||
#define ANCHOR_ANYCHAR_STAR_PL (1<<15) /* ".*" optimize info (posix-line) */
|
||||
|
||||
/* operation code */
|
||||
enum OpCode {
|
||||
OP_FINISH = 0, /* matching process terminator (no more alternative) */
|
||||
OP_END = 1, /* pattern code terminator (success end) */
|
||||
|
||||
OP_EXACT1 = 2, /* single byte, N = 1 */
|
||||
OP_EXACT2, /* single byte, N = 2 */
|
||||
OP_EXACT3, /* single byte, N = 3 */
|
||||
OP_EXACT4, /* single byte, N = 4 */
|
||||
OP_EXACT5, /* single byte, N = 5 */
|
||||
OP_EXACTN, /* single byte */
|
||||
OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
|
||||
OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
|
||||
OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
|
||||
OP_EXACTMB2N, /* mb-length = 2 */
|
||||
OP_EXACTMB3N, /* mb-length = 3 */
|
||||
OP_EXACTMBN, /* other length */
|
||||
|
||||
OP_EXACT1_IC, /* single byte, N = 1, ignore case */
|
||||
OP_EXACTN_IC, /* single byte, ignore case */
|
||||
|
||||
OP_CCLASS,
|
||||
OP_CCLASS_MB,
|
||||
OP_CCLASS_MIX,
|
||||
OP_CCLASS_NOT,
|
||||
OP_CCLASS_MB_NOT,
|
||||
OP_CCLASS_MIX_NOT,
|
||||
|
||||
OP_ANYCHAR, /* "." */
|
||||
OP_ANYCHAR_ML, /* "." multi-line */
|
||||
OP_ANYCHAR_STAR, /* ".*" */
|
||||
OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
|
||||
OP_ANYCHAR_STAR_PEEK_NEXT,
|
||||
OP_ANYCHAR_ML_STAR_PEEK_NEXT,
|
||||
|
||||
OP_WORD,
|
||||
OP_NOT_WORD,
|
||||
OP_WORD_SB,
|
||||
OP_WORD_MB,
|
||||
OP_WORD_BOUND,
|
||||
OP_NOT_WORD_BOUND,
|
||||
OP_WORD_BEGIN,
|
||||
OP_WORD_END,
|
||||
|
||||
OP_BEGIN_BUF,
|
||||
OP_END_BUF,
|
||||
OP_BEGIN_LINE,
|
||||
OP_END_LINE,
|
||||
OP_SEMI_END_BUF,
|
||||
OP_BEGIN_POSITION,
|
||||
|
||||
OP_BACKREF1,
|
||||
OP_BACKREF2,
|
||||
OP_BACKREF3,
|
||||
OP_BACKREFN,
|
||||
OP_BACKREFN_IC,
|
||||
OP_BACKREF_MULTI,
|
||||
OP_BACKREF_MULTI_IC,
|
||||
|
||||
OP_MEMORY_START,
|
||||
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
|
||||
OP_MEMORY_END_PUSH, /* push back-tracker to stack */
|
||||
OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
|
||||
OP_MEMORY_END,
|
||||
OP_MEMORY_END_REC, /* push marker to stack */
|
||||
|
||||
OP_SET_OPTION_PUSH, /* set option and push recover option */
|
||||
OP_SET_OPTION, /* set option */
|
||||
|
||||
OP_FAIL, /* pop stack and move */
|
||||
OP_JUMP,
|
||||
OP_PUSH,
|
||||
OP_POP,
|
||||
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
|
||||
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
|
||||
OP_REPEAT, /* {n,m} */
|
||||
OP_REPEAT_NG, /* {n,m}? (non greedy) */
|
||||
OP_REPEAT_INC,
|
||||
OP_REPEAT_INC_NG, /* non greedy */
|
||||
OP_NULL_CHECK_START, /* null loop checker start */
|
||||
OP_NULL_CHECK_END, /* null loop checker end */
|
||||
OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
|
||||
OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
|
||||
|
||||
OP_PUSH_POS, /* (?=...) start */
|
||||
OP_POP_POS, /* (?=...) end */
|
||||
OP_PUSH_POS_NOT, /* (?!...) start */
|
||||
OP_FAIL_POS, /* (?!...) end */
|
||||
OP_PUSH_STOP_BT, /* (?>...) start */
|
||||
OP_POP_STOP_BT, /* (?>...) end */
|
||||
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
|
||||
OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
|
||||
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
|
||||
|
||||
OP_CALL, /* \g<name> */
|
||||
OP_RETURN
|
||||
};
|
||||
|
||||
/* arguments type */
|
||||
#define ARG_SPECIAL -1
|
||||
#define ARG_NON 0
|
||||
#define ARG_RELADDR 1
|
||||
#define ARG_ABSADDR 2
|
||||
#define ARG_LENGTH 3
|
||||
#define ARG_MEMNUM 4
|
||||
#define ARG_OPTION 5
|
||||
|
||||
typedef short int RelAddrType;
|
||||
typedef short int AbsAddrType;
|
||||
typedef short int LengthType;
|
||||
typedef short int MemNumType;
|
||||
typedef int RepeatNumType;
|
||||
|
||||
#define SIZE_OPCODE 1
|
||||
#define SIZE_RELADDR sizeof(RelAddrType)
|
||||
#define SIZE_ABSADDR sizeof(AbsAddrType)
|
||||
#define SIZE_LENGTH sizeof(LengthType)
|
||||
#define SIZE_MEMNUM sizeof(MemNumType)
|
||||
#define SIZE_REPEATNUM sizeof(RepeatNumType)
|
||||
#define SIZE_OPTION sizeof(OnigOptionType)
|
||||
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
#define GET_RELADDR_INC(addr,p) do{\
|
||||
addr = *((RelAddrType* )(p));\
|
||||
(p) += SIZE_RELADDR;\
|
||||
} while(0)
|
||||
|
||||
#define GET_ABSADDR_INC(addr,p) do{\
|
||||
addr = *((AbsAddrType* )(p));\
|
||||
(p) += SIZE_ABSADDR;\
|
||||
} while(0)
|
||||
|
||||
#define GET_LENGTH_INC(len,p) do{\
|
||||
len = *((LengthType* )(p));\
|
||||
(p) += SIZE_LENGTH;\
|
||||
} while(0)
|
||||
|
||||
#define GET_MEMNUM_INC(num,p) do{\
|
||||
num = *((MemNumType* )(p));\
|
||||
(p) += SIZE_MEMNUM;\
|
||||
} while(0)
|
||||
|
||||
#define GET_REPEATNUM_INC(num,p) do{\
|
||||
num = *((RepeatNumType* )(p));\
|
||||
(p) += SIZE_REPEATNUM;\
|
||||
} while(0)
|
||||
|
||||
#define GET_OPTION_INC(option,p) do{\
|
||||
option = *((OnigOptionType* )(p));\
|
||||
(p) += SIZE_OPTION;\
|
||||
} while(0)
|
||||
#else
|
||||
|
||||
#define GET_RELADDR_INC(addr,p) GET_SHORT_INC(addr,p)
|
||||
#define GET_ABSADDR_INC(addr,p) GET_SHORT_INC(addr,p)
|
||||
#define GET_LENGTH_INC(len,p) GET_SHORT_INC(len,p)
|
||||
#define GET_MEMNUM_INC(num,p) GET_SHORT_INC(num,p)
|
||||
#define GET_REPEATNUM_INC(num,p) GET_INT_INC(num,p)
|
||||
#define GET_OPTION_INC(option,p) GET_UINT_INC(option,p)
|
||||
|
||||
#define SERIALIZE_RELADDR(addr,p) SERIALIZE_SHORT(addr,p)
|
||||
#define SERIALIZE_ABSADDR(addr,p) SERIALIZE_SHORT(addr,p)
|
||||
#define SERIALIZE_LENGTH(len,p) SERIALIZE_SHORT(len,p)
|
||||
#define SERIALIZE_MEMNUM(num,p) SERIALIZE_SHORT(num,p)
|
||||
#define SERIALIZE_REPEATNUM(num,p) SERIALIZE_INT(num,p)
|
||||
#define SERIALIZE_OPTION(option,p) SERIALIZE_UINT(option,p)
|
||||
|
||||
#define SERIALIZE_BUFSIZE SIZEOF_INT
|
||||
|
||||
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
|
||||
|
||||
/* code point's address must be aligned address. */
|
||||
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
|
||||
#define GET_BYTE_INC(byte,p) do{\
|
||||
byte = *(p);\
|
||||
(p)++;\
|
||||
} while(0)
|
||||
|
||||
|
||||
/* op-code + arg size */
|
||||
#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
|
||||
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
|
||||
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_POP SIZE_OPCODE
|
||||
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
|
||||
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
|
||||
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_PUSH_POS SIZE_OPCODE
|
||||
#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_POP_POS SIZE_OPCODE
|
||||
#define SIZE_OP_FAIL_POS SIZE_OPCODE
|
||||
#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
|
||||
#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
|
||||
#define SIZE_OP_FAIL SIZE_OPCODE
|
||||
#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
|
||||
#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
|
||||
#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
|
||||
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
|
||||
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
|
||||
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
|
||||
#define SIZE_OP_RETURN SIZE_OPCODE
|
||||
|
||||
|
||||
typedef struct {
|
||||
UChar esc;
|
||||
UChar anychar;
|
||||
UChar anytime;
|
||||
UChar zero_or_one_time;
|
||||
UChar one_or_more_time;
|
||||
UChar anychar_anytime;
|
||||
} OnigMetaCharTableType;
|
||||
|
||||
extern OnigMetaCharTableType OnigMetaCharTable;
|
||||
|
||||
#define MC_ESC OnigMetaCharTable.esc
|
||||
#define MC_ANYCHAR OnigMetaCharTable.anychar
|
||||
#define MC_ANYTIME OnigMetaCharTable.anytime
|
||||
#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
|
||||
#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
|
||||
#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
|
||||
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
||||
typedef struct {
|
||||
short int opcode;
|
||||
char* name;
|
||||
short int arg_type;
|
||||
} OnigOpInfoType;
|
||||
|
||||
extern OnigOpInfoType OnigOpInfo[];
|
||||
|
||||
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
extern void onig_statistics_init P_((void));
|
||||
extern void onig_print_statistics P_((FILE* f));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern char* onig_error_code_to_format P_((int code));
|
||||
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
|
||||
extern UChar* onig_strdup P_((UChar* s, UChar* end));
|
||||
extern int onig_bbuf_init P_((BBuf* buf, int size));
|
||||
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
|
||||
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
|
||||
extern void onig_chain_reduce P_((regex_t* reg));
|
||||
extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
|
||||
|
||||
#endif /* REGINT_H */
|
4815
regparse.c
Normal file
4815
regparse.c
Normal file
File diff suppressed because it is too large
Load diff
277
regparse.h
Normal file
277
regparse.h
Normal file
|
@ -0,0 +1,277 @@
|
|||
/**********************************************************************
|
||||
|
||||
regparse.h - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#ifndef REGPARSE_H
|
||||
#define REGPARSE_H
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
/* node type */
|
||||
#define N_STRING (1<< 0)
|
||||
#define N_CCLASS (1<< 1)
|
||||
#define N_CTYPE (1<< 2)
|
||||
#define N_ANYCHAR (1<< 3)
|
||||
#define N_BACKREF (1<< 4)
|
||||
#define N_QUALIFIER (1<< 5)
|
||||
#define N_EFFECT (1<< 6)
|
||||
#define N_ANCHOR (1<< 7)
|
||||
#define N_LIST (1<< 8)
|
||||
#define N_ALT (1<< 9)
|
||||
#define N_CALL (1<<10)
|
||||
|
||||
#define IS_NODE_TYPE_SIMPLE(type) \
|
||||
(((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
|
||||
|
||||
#define NTYPE(node) ((node)->type)
|
||||
#define NCONS(node) ((node)->u.cons)
|
||||
#define NSTRING(node) ((node)->u.str)
|
||||
#define NCCLASS(node) ((node)->u.cclass)
|
||||
#define NCTYPE(node) ((node)->u.ctype)
|
||||
#define NQUALIFIER(node) ((node)->u.qualifier)
|
||||
#define NANCHOR(node) ((node)->u.anchor)
|
||||
#define NBACKREF(node) ((node)->u.backref)
|
||||
#define NEFFECT(node) ((node)->u.effect)
|
||||
#define NCALL(node) ((node)->u.call)
|
||||
|
||||
#define CTYPE_WORD (1<<0)
|
||||
#define CTYPE_NOT_WORD (1<<1)
|
||||
#define CTYPE_WHITE_SPACE (1<<2)
|
||||
#define CTYPE_NOT_WHITE_SPACE (1<<3)
|
||||
#define CTYPE_DIGIT (1<<4)
|
||||
#define CTYPE_NOT_DIGIT (1<<5)
|
||||
|
||||
|
||||
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
|
||||
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
|
||||
|
||||
#define EFFECT_MEMORY (1<<0)
|
||||
#define EFFECT_OPTION (1<<1)
|
||||
#define EFFECT_STOP_BACKTRACK (1<<2)
|
||||
|
||||
#define REPEAT_INFINITE -1
|
||||
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
|
||||
|
||||
#define NODE_STR_MARGIN 16
|
||||
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_BACKREFS_SIZE 7
|
||||
|
||||
#define NSTR_RAW (1<<0) /* by backslashed number */
|
||||
#define NSTR_CASE_AMBIG (1<<1)
|
||||
|
||||
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
|
||||
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
|
||||
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
|
||||
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
|
||||
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
|
||||
#define NSTRING_IS_CASE_AMBIG(node) \
|
||||
(((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)
|
||||
|
||||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
|
||||
|
||||
#define CCLASS_SET_NOT(cc) (cc)->not = 1
|
||||
|
||||
#define NQ_TARGET_ISNOT_EMPTY 0
|
||||
#define NQ_TARGET_IS_EMPTY 1
|
||||
#define NQ_TARGET_IS_EMPTY_MEM 2
|
||||
#define NQ_TARGET_IS_EMPTY_REC 3
|
||||
|
||||
|
||||
typedef struct {
|
||||
UChar* s;
|
||||
UChar* end;
|
||||
unsigned int flag;
|
||||
int capa; /* (allocated size - 1) or 0: use buf[] */
|
||||
UChar buf[NODE_STR_BUF_SIZE];
|
||||
} StrNode;
|
||||
|
||||
typedef struct {
|
||||
int not;
|
||||
BitSet bs;
|
||||
BBuf* mbuf; /* multi-byte info or NULL */
|
||||
} CClassNode;
|
||||
|
||||
typedef struct {
|
||||
struct _Node* target;
|
||||
int lower;
|
||||
int upper;
|
||||
int greedy;
|
||||
int by_number; /* {n,m} */
|
||||
int target_empty_info;
|
||||
struct _Node* head_exact;
|
||||
struct _Node* next_head_exact;
|
||||
int is_refered; /* include called node. don't eliminate even if {0} */
|
||||
} QualifierNode;
|
||||
|
||||
/* status bits */
|
||||
#define NST_MIN_FIXED (1<<0)
|
||||
#define NST_MAX_FIXED (1<<1)
|
||||
#define NST_CLEN_FIXED (1<<2)
|
||||
#define NST_MARK1 (1<<3)
|
||||
#define NST_MARK2 (1<<4)
|
||||
#define NST_MEM_BACKREFED (1<<5)
|
||||
#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
|
||||
|
||||
#define NST_RECURSION (1<<7)
|
||||
#define NST_CALLED (1<<8)
|
||||
#define NST_ADDR_FIXED (1<<9)
|
||||
#define NST_NAMED_GROUP (1<<10)
|
||||
#define NST_NAME_REF (1<<11)
|
||||
|
||||
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
|
||||
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
|
||||
|
||||
#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0)
|
||||
#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
|
||||
#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
|
||||
#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0)
|
||||
#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0)
|
||||
#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
|
||||
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
|
||||
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
|
||||
#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
|
||||
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
|
||||
|
||||
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
|
||||
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
|
||||
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
|
||||
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
|
||||
|
||||
typedef struct {
|
||||
int state;
|
||||
int type;
|
||||
int regnum;
|
||||
OnigOptionType option;
|
||||
struct _Node* target;
|
||||
AbsAddrType call_addr;
|
||||
/* for multiple call reference */
|
||||
OnigDistance min_len; /* min length (byte) */
|
||||
OnigDistance max_len; /* max length (byte) */
|
||||
int char_len; /* character length */
|
||||
int opt_count; /* referenced count in optimize_node_left() */
|
||||
} EffectNode;
|
||||
|
||||
#define CALLNODE_REFNUM_UNDEF -1
|
||||
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
|
||||
typedef struct {
|
||||
int offset;
|
||||
struct _Node* target;
|
||||
} UnsetAddr;
|
||||
|
||||
typedef struct {
|
||||
int num;
|
||||
int alloc;
|
||||
UnsetAddr* us;
|
||||
} UnsetAddrList;
|
||||
|
||||
typedef struct {
|
||||
int state;
|
||||
int ref_num;
|
||||
UChar* name;
|
||||
UChar* name_end;
|
||||
struct _Node* target; /* EffectNode : EFFECT_MEMORY */
|
||||
UnsetAddrList* unset_addr_list;
|
||||
} CallNode;
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int state;
|
||||
int back_num;
|
||||
int back_static[NODE_BACKREFS_SIZE];
|
||||
int* back_dynamic;
|
||||
} BackrefNode;
|
||||
|
||||
typedef struct {
|
||||
int type;
|
||||
struct _Node* target;
|
||||
int char_len;
|
||||
} AnchorNode;
|
||||
|
||||
typedef struct _Node {
|
||||
int type;
|
||||
union {
|
||||
StrNode str;
|
||||
CClassNode cclass;
|
||||
QualifierNode qualifier;
|
||||
EffectNode effect;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
CallNode call;
|
||||
#endif
|
||||
BackrefNode backref;
|
||||
AnchorNode anchor;
|
||||
struct {
|
||||
struct _Node* left;
|
||||
struct _Node* right;
|
||||
} cons;
|
||||
struct {
|
||||
int type;
|
||||
} ctype;
|
||||
} u;
|
||||
} Node;
|
||||
|
||||
#define NULL_NODE ((Node* )0)
|
||||
|
||||
#define SCANENV_MEMNODES_SIZE 8
|
||||
#define SCANENV_MEM_NODES(senv) \
|
||||
(IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
|
||||
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
|
||||
|
||||
typedef struct {
|
||||
OnigOptionType option;
|
||||
OnigEncoding enc;
|
||||
OnigSyntaxType* syntax;
|
||||
BitStatusType capture_history;
|
||||
BitStatusType bt_mem_start;
|
||||
BitStatusType bt_mem_end;
|
||||
BitStatusType backrefed_mem;
|
||||
UChar* pattern;
|
||||
UChar* pattern_end;
|
||||
UChar* error;
|
||||
UChar* error_end;
|
||||
regex_t* reg; /* for reg->names only */
|
||||
int num_call;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
UnsetAddrList* unset_addr_list;
|
||||
#endif
|
||||
int num_mem;
|
||||
#ifdef USE_NAMED_GROUP
|
||||
int num_named;
|
||||
#endif
|
||||
int mem_alloc;
|
||||
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
|
||||
Node** mem_nodes_dynamic;
|
||||
} ScanEnv;
|
||||
|
||||
|
||||
#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
|
||||
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
|
||||
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
|
||||
|
||||
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
|
||||
extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
|
||||
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
|
||||
extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
|
||||
extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
|
||||
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
|
||||
extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
|
||||
extern void onig_node_free P_((Node* node));
|
||||
extern Node* onig_node_new_effect P_((int type));
|
||||
extern Node* onig_node_new_anchor P_((int type));
|
||||
extern int onig_free_node_list();
|
||||
extern int onig_names_free P_((regex_t* reg));
|
||||
extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
#ifdef USE_NAMED_GROUP
|
||||
extern int onig_print_names(FILE*, regex_t*);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* REGPARSE_H */
|
174
sjis.c
Normal file
174
sjis.c
Normal file
|
@ -0,0 +1,174 @@
|
|||
/**********************************************************************
|
||||
|
||||
sjis.c - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#include "regenc.h"
|
||||
|
||||
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
|
||||
};
|
||||
|
||||
#define SJIS_ISMB_FIRST(byte) (OnigEncodingSJIS.len_table[byte] > 1)
|
||||
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
|
||||
|
||||
static OnigCodePoint
|
||||
sjis_mbc_to_code(UChar* p, UChar* end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
c = *p++;
|
||||
len = enc_len(ONIG_ENCODING_SJIS, c);
|
||||
n = c;
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = *p++;
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static int
|
||||
sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
|
||||
#if 0
|
||||
if (enc_len(ONIG_ENCODING_SJIS, buf[0]) != (p - buf))
|
||||
return REGERR_INVALID_WIDE_CHAR_VALUE;
|
||||
#endif
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
static int
|
||||
sjis_mbc_to_lower(UChar* p, UChar* lower)
|
||||
{
|
||||
int len;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
len = enc_len(ONIG_ENCODING_SJIS, *p);
|
||||
if (lower != p) {
|
||||
/* memcpy(lower, p, len); */
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
}
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
int first = onigenc_mb2_code_to_mbc_first(code);
|
||||
return (enc_len(ONIG_ENCODING_SJIS, first) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
|
||||
ctype &= ~ONIGENC_CTYPE_WORD;
|
||||
if (ctype == 0) return FALSE;
|
||||
}
|
||||
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
sjis_left_adjust_char_head(UChar* start, UChar* s)
|
||||
{
|
||||
UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
if (SJIS_ISMB_TRAIL(*p)) {
|
||||
while (p > start) {
|
||||
if (! SJIS_ISMB_FIRST(*--p)) {
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
len = enc_len(ONIG_ENCODING_SJIS, *p);
|
||||
if (p + len > s) return p;
|
||||
p += len;
|
||||
return p + ((s - p) & ~1);
|
||||
}
|
||||
|
||||
static int
|
||||
sjis_is_allowed_reverse_match(UChar* s, UChar* end)
|
||||
{
|
||||
UChar c = *s;
|
||||
return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingSJIS = {
|
||||
{
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
|
||||
},
|
||||
"Shift_JIS", /* name */
|
||||
2, /* max byte length */
|
||||
FALSE, /* is_fold_match */
|
||||
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
|
||||
FALSE, /* is continuous sb mb codepoint */
|
||||
sjis_mbc_to_code,
|
||||
onigenc_mb2_code_to_mbclen,
|
||||
sjis_code_to_mbc,
|
||||
sjis_mbc_to_lower,
|
||||
onigenc_mbn_mbc_is_case_ambig,
|
||||
sjis_code_is_ctype,
|
||||
onigenc_nothing_get_ctype_code_range,
|
||||
sjis_left_adjust_char_head,
|
||||
sjis_is_allowed_reverse_match,
|
||||
onigenc_nothing_get_all_fold_match_code,
|
||||
onigenc_nothing_get_fold_match_info
|
||||
};
|
566
utf8.c
Normal file
566
utf8.c
Normal file
|
@ -0,0 +1,566 @@
|
|||
/**********************************************************************
|
||||
|
||||
utf8.c - Oniguruma (regular expression library)
|
||||
|
||||
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
|
||||
|
||||
**********************************************************************/
|
||||
#include "regenc.h"
|
||||
|
||||
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
|
||||
|
||||
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
|
||||
((EncUnicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
|
||||
|
||||
static unsigned short EncUnicode_ISO_8859_1_CtypeTable[256] = {
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0,
|
||||
0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
|
||||
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
|
||||
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x1050, 0x1050, 0x1050, 0x10d0,
|
||||
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x18d0,
|
||||
0x1050, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1004,
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
|
||||
0x1142, 0x10d0, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050,
|
||||
0x1050, 0x1050, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1050,
|
||||
0x1050, 0x1050, 0x1850, 0x1850, 0x1050, 0x1871, 0x1050, 0x10d0,
|
||||
0x1050, 0x1850, 0x1871, 0x10d0, 0x1850, 0x1850, 0x1850, 0x10d0,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1050,
|
||||
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
|
||||
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1050,
|
||||
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871
|
||||
};
|
||||
|
||||
static OnigCodePoint
|
||||
utf8_mbc_to_code(UChar* p, UChar* end)
|
||||
{
|
||||
int c, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
c = *p++;
|
||||
len = enc_len(ONIG_ENCODING_UTF8, c);
|
||||
if (len > 1) {
|
||||
len--;
|
||||
n = c & ((1 << (6 - len)) - 1);
|
||||
while (len--) {
|
||||
c = *p++;
|
||||
n = (n << 6) | (c & ((1 << 6) - 1));
|
||||
}
|
||||
return n;
|
||||
}
|
||||
else
|
||||
return (OnigCodePoint )c;
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if ((code & 0xffffff80) == 0) return 1;
|
||||
else if ((code & 0xfffff800) == 0) {
|
||||
if (code <= 0xff && code >= 0xfe)
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
else if ((code & 0xffff0000) == 0) return 3;
|
||||
else if ((code & 0xffe00000) == 0) return 4;
|
||||
else if ((code & 0xfc000000) == 0) return 5;
|
||||
else if ((code & 0x80000000) == 0) return 6;
|
||||
else
|
||||
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf8_code_to_mbc_first(OnigCodePoint code)
|
||||
{
|
||||
if ((code & 0xffffff80) == 0)
|
||||
return code;
|
||||
else {
|
||||
if ((code & 0xfffff800) == 0)
|
||||
return ((code>>6)& 0x1f) | 0xc0;
|
||||
else if ((code & 0xffff0000) == 0)
|
||||
return ((code>>12) & 0x0f) | 0xe0;
|
||||
else if ((code & 0xffe00000) == 0)
|
||||
return ((code>>18) & 0x07) | 0xf0;
|
||||
else if ((code & 0xfc000000) == 0)
|
||||
return ((code>>24) & 0x03) | 0xf8;
|
||||
else if ((code & 0x80000000) == 0)
|
||||
return ((code>>30) & 0x01) | 0xfc;
|
||||
else {
|
||||
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
|
||||
#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
|
||||
|
||||
if ((code & 0xffffff80) == 0) {
|
||||
*buf = (UChar )code;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xfffff800) == 0) {
|
||||
*p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
|
||||
}
|
||||
else if ((code & 0xffff0000) == 0) {
|
||||
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
|
||||
*p++ = UTF8_TRAILS(code, 6);
|
||||
}
|
||||
else if ((code & 0xffe00000) == 0) {
|
||||
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
|
||||
*p++ = UTF8_TRAILS(code, 12);
|
||||
*p++ = UTF8_TRAILS(code, 6);
|
||||
}
|
||||
else if ((code & 0xfc000000) == 0) {
|
||||
*p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
|
||||
*p++ = UTF8_TRAILS(code, 18);
|
||||
*p++ = UTF8_TRAILS(code, 12);
|
||||
*p++ = UTF8_TRAILS(code, 6);
|
||||
}
|
||||
else if ((code & 0x80000000) == 0) {
|
||||
*p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
|
||||
*p++ = UTF8_TRAILS(code, 24);
|
||||
*p++ = UTF8_TRAILS(code, 18);
|
||||
*p++ = UTF8_TRAILS(code, 12);
|
||||
*p++ = UTF8_TRAILS(code, 6);
|
||||
}
|
||||
else {
|
||||
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
|
||||
*p++ = UTF8_TRAIL0(code);
|
||||
return p - buf;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_mbc_to_lower(UChar* p, UChar* lower)
|
||||
{
|
||||
int len;
|
||||
|
||||
/* !!! U+0080 - U+00ff is treated by fold match. !!! */
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
else {
|
||||
len = enc_len(ONIG_ENCODING_UTF8, *p);
|
||||
if (lower != p) {
|
||||
/* memcpy(lower, p, len); */
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
}
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_mbc_is_case_ambig(UChar* p)
|
||||
{
|
||||
/* !!! U+0080 - U+00ff ( 0x80[0xc2,0x80] - 0xff[0xc3,0xbf] )
|
||||
is treated by fold match. !!! */
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p))
|
||||
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256) {
|
||||
return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
|
||||
}
|
||||
|
||||
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
|
||||
OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
|
||||
{
|
||||
#define CR_SET(sbl,mbl) do { \
|
||||
*nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
|
||||
*nmb = sizeof(mbl) / sizeof(OnigCodePointRange); \
|
||||
*sbr = sbl; \
|
||||
*mbr = mbl; \
|
||||
} while (0)
|
||||
|
||||
#define CR_SB_SET(sbl) do { \
|
||||
*nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
|
||||
*nmb = 0; \
|
||||
*sbr = sbl; \
|
||||
} while (0)
|
||||
|
||||
static OnigCodePointRange SBAlpha[] = {
|
||||
{ 0x41, 0x5a },
|
||||
{ 0x61, 0x7a }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBAlpha[] = {
|
||||
{ 0xaa, 0xaa },
|
||||
{ 0xb5, 0xb5 },
|
||||
{ 0xba, 0xba },
|
||||
{ 0xc0, 0xd6 },
|
||||
{ 0xd8, 0xf6 },
|
||||
{ 0xf8, 0x220 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBBlank[] = {
|
||||
{ 0x09, 0x09 },
|
||||
{ 0x20, 0x20 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBBlank[] = {
|
||||
{ 0xa0, 0xa0 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBCntrl[] = {
|
||||
{ 0x00, 0x1f },
|
||||
{ 0x7f, 0x7f }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBCntrl[] = {
|
||||
{ 0x80, 0x9f }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBDigit[] = {
|
||||
{ 0x30, 0x39 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBGraph[] = {
|
||||
{ 0x21, 0x7e }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBGraph[] = {
|
||||
{ 0xa1, 0x220 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBLower[] = {
|
||||
{ 0x61, 0x7a }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBLower[] = {
|
||||
{ 0xaa, 0xaa },
|
||||
{ 0xb5, 0xb5 },
|
||||
{ 0xba, 0xba },
|
||||
{ 0xdf, 0xf6 },
|
||||
{ 0xf8, 0xff }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBPrint[] = {
|
||||
{ 0x20, 0x7e }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBPrint[] = {
|
||||
{ 0xa0, 0x220 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBPunct[] = {
|
||||
{ 0x21, 0x23 },
|
||||
{ 0x25, 0x2a },
|
||||
{ 0x2c, 0x2f },
|
||||
{ 0x3a, 0x3b },
|
||||
{ 0x3f, 0x40 },
|
||||
{ 0x5b, 0x5d },
|
||||
{ 0x5f, 0x5f },
|
||||
{ 0x7b, 0x7b },
|
||||
{ 0x7d, 0x7d }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBPunct[] = {
|
||||
{ 0xa1, 0xa1 },
|
||||
{ 0xab, 0xab },
|
||||
{ 0xad, 0xad },
|
||||
{ 0xb7, 0xb7 },
|
||||
{ 0xbb, 0xbb },
|
||||
{ 0xbf, 0xbf }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBSpace[] = {
|
||||
{ 0x09, 0x0d },
|
||||
{ 0x20, 0x20 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBSpace[] = {
|
||||
{ 0xa0, 0xa0 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBUpper[] = {
|
||||
{ 0x41, 0x5a }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBUpper[] = {
|
||||
{ 0xc0, 0xd6 },
|
||||
{ 0xd8, 0xde }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBXDigit[] = {
|
||||
{ 0x30, 0x39 },
|
||||
{ 0x41, 0x46 },
|
||||
{ 0x61, 0x66 }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBWord[] = {
|
||||
{ 0x30, 0x39 },
|
||||
{ 0x41, 0x5a },
|
||||
{ 0x5f, 0x5f },
|
||||
{ 0x61, 0x7a }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBWord[] = {
|
||||
{ 0xaa, 0xaa },
|
||||
{ 0xb2, 0xb3 },
|
||||
{ 0xb5, 0xb5 },
|
||||
{ 0xb9, 0xba },
|
||||
{ 0xbc, 0xbe },
|
||||
{ 0xc0, 0xd6 },
|
||||
{ 0xd8, 0xf6 },
|
||||
#if 0
|
||||
{ 0xf8, 0x220 }
|
||||
#else
|
||||
{ 0xf8, 0x7fffffff } /* all multibyte code as word */
|
||||
#endif
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBAscii[] = {
|
||||
{ 0x00, 0x7f }
|
||||
};
|
||||
|
||||
static OnigCodePointRange SBAlnum[] = {
|
||||
{ 0x30, 0x39 },
|
||||
{ 0x41, 0x5a },
|
||||
{ 0x61, 0x7a }
|
||||
};
|
||||
|
||||
static OnigCodePointRange MBAlnum[] = {
|
||||
{ 0xaa, 0xaa },
|
||||
{ 0xb5, 0xb5 },
|
||||
{ 0xba, 0xba },
|
||||
{ 0xc0, 0xd6 },
|
||||
{ 0xd8, 0xf6 },
|
||||
{ 0xf8, 0x220 }
|
||||
};
|
||||
|
||||
switch (ctype) {
|
||||
case ONIGENC_CTYPE_ALPHA:
|
||||
CR_SET(SBAlpha, MBAlpha);
|
||||
break;
|
||||
case ONIGENC_CTYPE_BLANK:
|
||||
CR_SET(SBBlank, MBBlank);
|
||||
break;
|
||||
case ONIGENC_CTYPE_CNTRL:
|
||||
CR_SET(SBCntrl, MBCntrl);
|
||||
break;
|
||||
case ONIGENC_CTYPE_DIGIT:
|
||||
CR_SB_SET(SBDigit);
|
||||
break;
|
||||
case ONIGENC_CTYPE_GRAPH:
|
||||
CR_SET(SBGraph, MBGraph);
|
||||
break;
|
||||
case ONIGENC_CTYPE_LOWER:
|
||||
CR_SET(SBLower, MBLower);
|
||||
break;
|
||||
case ONIGENC_CTYPE_PRINT:
|
||||
CR_SET(SBPrint, MBPrint);
|
||||
break;
|
||||
case ONIGENC_CTYPE_PUNCT:
|
||||
CR_SET(SBPunct, MBPunct);
|
||||
break;
|
||||
case ONIGENC_CTYPE_SPACE:
|
||||
CR_SET(SBSpace, MBSpace);
|
||||
break;
|
||||
case ONIGENC_CTYPE_UPPER:
|
||||
CR_SET(SBUpper, MBUpper);
|
||||
break;
|
||||
case ONIGENC_CTYPE_XDIGIT:
|
||||
CR_SB_SET(SBXDigit);
|
||||
break;
|
||||
case ONIGENC_CTYPE_WORD:
|
||||
CR_SET(SBWord, MBWord);
|
||||
break;
|
||||
case ONIGENC_CTYPE_ASCII:
|
||||
CR_SB_SET(SBAscii);
|
||||
break;
|
||||
case ONIGENC_CTYPE_ALNUM:
|
||||
CR_SET(SBAlnum, MBAlnum);
|
||||
break;
|
||||
|
||||
default:
|
||||
return ONIGERR_TYPE_BUG;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_get_all_fold_match_code(OnigCodePoint** codes)
|
||||
{
|
||||
static OnigCodePoint list[] = {
|
||||
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
|
||||
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
|
||||
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
|
||||
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
|
||||
|
||||
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
|
||||
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
|
||||
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6,
|
||||
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
|
||||
};
|
||||
|
||||
*codes = list;
|
||||
return sizeof(list) / sizeof(OnigCodePoint);
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_get_fold_match_info(UChar* p, UChar* end, OnigEncFoldMatchInfo** info)
|
||||
{
|
||||
|
||||
static OnigEncFoldMatchInfo xc[] = {
|
||||
{ 2, { 2, 2 }, { "\303\200", "\303\240" } }, /* CodePoint 0xc0 */
|
||||
{ 2, { 2, 2 }, { "\303\201", "\303\241" } },
|
||||
{ 2, { 2, 2 }, { "\303\202", "\303\242" } },
|
||||
{ 2, { 2, 2 }, { "\303\203", "\303\243" } },
|
||||
{ 2, { 2, 2 }, { "\303\204", "\303\244" } },
|
||||
{ 2, { 2, 2 }, { "\303\205", "\303\245" } },
|
||||
{ 2, { 2, 2 }, { "\303\206", "\303\246" } },
|
||||
{ 2, { 2, 2 }, { "\303\207", "\303\247" } },
|
||||
{ 2, { 2, 2 }, { "\303\210", "\303\250" } },
|
||||
{ 2, { 2, 2 }, { "\303\211", "\303\251" } },
|
||||
{ 2, { 2, 2 }, { "\303\212", "\303\252" } },
|
||||
{ 2, { 2, 2 }, { "\303\213", "\303\253" } },
|
||||
{ 2, { 2, 2 }, { "\303\214", "\303\254" } },
|
||||
{ 2, { 2, 2 }, { "\303\215", "\303\255" } },
|
||||
{ 2, { 2, 2 }, { "\303\216", "\303\256" } },
|
||||
{ 2, { 2, 2 }, { "\303\217", "\303\257" } },
|
||||
{ 2, { 2, 2 }, { "\303\220", "\303\260" } }, /* CodePoint 0xd0 */
|
||||
{ 2, { 2, 2 }, { "\303\221", "\303\261" } },
|
||||
{ 2, { 2, 2 }, { "\303\222", "\303\262" } },
|
||||
{ 2, { 2, 2 }, { "\303\223", "\303\263" } },
|
||||
{ 2, { 2, 2 }, { "\303\224", "\303\264" } },
|
||||
{ 2, { 2, 2 }, { "\303\225", "\303\265" } },
|
||||
{ 2, { 2, 2 }, { "\303\226", "\303\266" } },
|
||||
{ 0, { 0 }, { "" } },
|
||||
{ 2, { 2, 2 }, { "\303\230", "\303\270" } },
|
||||
{ 2, { 2, 2 }, { "\303\231", "\303\271" } },
|
||||
{ 2, { 2, 2 }, { "\303\232", "\303\272" } },
|
||||
{ 2, { 2, 2 }, { "\303\233", "\303\273" } },
|
||||
{ 2, { 2, 2 }, { "\303\234", "\303\274" } },
|
||||
{ 2, { 2, 2 }, { "\303\235", "\303\275" } },
|
||||
{ 2, { 2, 2 }, { "\303\236", "\303\276" } },
|
||||
{ 3, { 2, 2, 2 }, { "\303\237", "ss", "SS" }} /* ess-tsett(U+00DF) */
|
||||
};
|
||||
|
||||
if (p + 1 >= end) return -1;
|
||||
if (*p < 0x80) {
|
||||
if ((*p == 'S' && *(p+1) == 'S') ||
|
||||
(*p == 's' && *(p+1) == 's')) {
|
||||
*info = &(xc[0xdf - 0xc0]);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
else if (*p == 195) { /* 195 == '\303' */
|
||||
int c = *(p+1);
|
||||
if (c >= 128) {
|
||||
if (c <= 159) { /* upper */
|
||||
if (c == 151) return -1; /* 0xd7 */
|
||||
*info = &(xc[c - 128]);
|
||||
return 2;
|
||||
}
|
||||
else { /* lower */
|
||||
if (c == 183) return -1; /* 0xf7 */
|
||||
*info = &(xc[c - 160]);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1; /* is not a fold string. */
|
||||
}
|
||||
|
||||
|
||||
static UChar*
|
||||
utf8_left_adjust_char_head(UChar* start, UChar* s)
|
||||
{
|
||||
UChar *p;
|
||||
|
||||
if (s <= start) return s;
|
||||
p = s;
|
||||
|
||||
while (!utf8_islead(*p) && p > start) p--;
|
||||
return p;
|
||||
}
|
||||
|
||||
static int
|
||||
utf8_is_allowed_reverse_match(UChar* s, UChar* end)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF8 = {
|
||||
{
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
||||
},
|
||||
"UTF-8", /* name */
|
||||
6, /* max byte length */
|
||||
TRUE, /* is_fold_match */
|
||||
ONIGENC_CTYPE_SUPPORT_LEVEL_FULL, /* ctype_support_level */
|
||||
TRUE, /* is continuous sb mb codepoint */
|
||||
utf8_mbc_to_code,
|
||||
utf8_code_to_mbclen,
|
||||
utf8_code_to_mbc,
|
||||
utf8_mbc_to_lower,
|
||||
utf8_mbc_is_case_ambig,
|
||||
utf8_code_is_ctype,
|
||||
utf8_get_ctype_code_range,
|
||||
utf8_left_adjust_char_head,
|
||||
utf8_is_allowed_reverse_match,
|
||||
utf8_get_all_fold_match_code,
|
||||
utf8_get_fold_match_info
|
||||
};
|
Loading…
Reference in a new issue