mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@47598 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
d198d64e04
commit
d2a5354255
33 changed files with 390 additions and 236 deletions
|
@ -1,3 +1,7 @@
|
|||
Tue Sep 16 01:06:40 2014 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544
|
||||
|
||||
Mon Sep 15 16:21:10 2014 Eric Wong <e@80x24.org>
|
||||
|
||||
* io.c (struct io_advise_struct): 32 => 24 bytes on 64-bit
|
||||
|
|
|
@ -167,19 +167,19 @@ big5_mbc_enc_len0(const UChar* p, const UChar* e, int tridx, const int tbl[])
|
|||
static int
|
||||
big5_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return big5_mbc_enc_len0(p, e, 0, EncLen_BIG5);
|
||||
return big5_mbc_enc_len0(p, e, 0, EncLen_BIG5);
|
||||
}
|
||||
|
||||
static int
|
||||
big5_hkscs_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_HKSCS);
|
||||
return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_HKSCS);
|
||||
}
|
||||
|
||||
static int
|
||||
big5_uao_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_UAO);
|
||||
return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_UAO);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
|
|
|
@ -293,7 +293,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -504,13 +504,14 @@ static int
|
|||
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
|
||||
{
|
||||
UChar *s = p, *e = end;
|
||||
const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s));
|
||||
const struct enc_property *prop =
|
||||
onig_jis_property((const char* )s, (unsigned int )(e - s));
|
||||
|
||||
if (!prop) {
|
||||
return onigenc_minimum_property_name_to_ctype(enc, s, e);
|
||||
}
|
||||
|
||||
return (int)prop->ctype;
|
||||
return (int )prop->ctype;
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
@ -29,8 +29,6 @@
|
|||
|
||||
#include "regenc.h"
|
||||
|
||||
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
|
||||
|
||||
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
|
||||
((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
|
|
|
@ -208,7 +208,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -219,7 +219,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -210,7 +210,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -221,7 +221,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -29,8 +29,6 @@
|
|||
|
||||
#include "regenc.h"
|
||||
|
||||
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
|
||||
|
||||
#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
|
||||
((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
|
|
@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -194,7 +194,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -205,7 +205,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -190,7 +190,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -183,7 +183,7 @@ koi8_r_apply_all_case_fold(OnigCaseFoldType flag,
|
|||
void* arg, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -193,7 +193,7 @@ koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -187,7 +187,7 @@ koi8_u_apply_all_case_fold(OnigCaseFoldType flag,
|
|||
void* arg, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -197,7 +197,7 @@ koi8_u_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -278,7 +278,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -493,13 +493,14 @@ static int
|
|||
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
|
||||
{
|
||||
UChar *s = p, *e = end;
|
||||
const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s));
|
||||
const struct enc_property *prop =
|
||||
onig_jis_property((const char* )s, (unsigned int )(e - s));
|
||||
|
||||
if (!prop) {
|
||||
return onigenc_minimum_property_name_to_ctype(enc, s, e);
|
||||
}
|
||||
|
||||
return (int)prop->ctype;
|
||||
return (int )prop->ctype;
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
@ -141,7 +141,6 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
|
|||
|
||||
#include "enc/unicode/name2ctype.h"
|
||||
|
||||
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
|
||||
#define CODE_RANGES_NUM numberof(CodeRanges)
|
||||
|
||||
extern int
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
static int
|
||||
us_ascii_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
|
||||
{
|
||||
if (*p & 0x80)
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
|
||||
if (*p & 0x80)
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
|
||||
return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(us_ascii, US_ASCII) = {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include "regenc.h"
|
||||
/* dummy for unsupported, statefull encoding */
|
||||
#define ENC_DUMMY_UNICODE(name) ENC_REPLICATE(name, name "BE")
|
||||
#define ENC_DUMMY_UNICODE(name) ENC_DUMMY(name)
|
||||
ENC_DUMMY_UNICODE("UTF-16");
|
||||
ENC_DUMMY_UNICODE("UTF-32");
|
||||
|
|
|
@ -29,10 +29,6 @@
|
|||
|
||||
#include "regenc.h"
|
||||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
|
||||
|
||||
#if 0
|
||||
static const int EncLen_UTF16[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
|
|
|
@ -29,10 +29,6 @@
|
|||
|
||||
#include "regenc.h"
|
||||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
|
||||
|
||||
#if 0
|
||||
static const int EncLen_UTF16[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
|
|
|
@ -367,7 +367,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
|
|||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
const UChar* end, UChar* fold, OnigEncoding enc)
|
||||
const UChar* end, UChar* fold, OnigEncoding enc)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
|
@ -395,7 +395,7 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
|||
|
||||
static int
|
||||
get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
|
||||
const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED)
|
||||
const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
*sb_out = 0x80;
|
||||
return onigenc_unicode_ctype_code_range(ctype, ranges);
|
||||
|
|
|
@ -167,7 +167,7 @@ cp1251_apply_all_case_fold(OnigCaseFoldType flag,
|
|||
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
|
@ -176,7 +176,7 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
numberof(CaseFoldMap), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
|
|
@ -39,8 +39,8 @@ extern "C" {
|
|||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 5
|
||||
#define ONIGURUMA_VERSION_MINOR 14
|
||||
#define ONIGURUMA_VERSION_TEENY 1
|
||||
#define ONIGURUMA_VERSION_MINOR 15
|
||||
#define ONIGURUMA_VERSION_TEENY 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_PROTOTYPES
|
||||
|
|
127
regcomp.c
127
regcomp.c
|
@ -330,9 +330,10 @@ static int compile_tree(Node* node, regex_t* reg);
|
|||
(op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
|
||||
|
||||
static int
|
||||
select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
|
||||
select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
|
||||
{
|
||||
int op;
|
||||
OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
|
||||
|
||||
if (ignore_case) {
|
||||
switch (str_len) {
|
||||
|
@ -434,11 +435,11 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
|
|||
}
|
||||
|
||||
static int
|
||||
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
|
||||
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
|
||||
regex_t* reg ARG_UNUSED, int ignore_case)
|
||||
{
|
||||
int len;
|
||||
int op = select_str_opcode(mb_len, str_len, ignore_case);
|
||||
int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
||||
|
||||
len = SIZE_OPCODE;
|
||||
|
||||
|
@ -446,15 +447,15 @@ add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
|
|||
if (IS_NEED_STR_LEN_OP_EXACT(op))
|
||||
len += SIZE_LENGTH;
|
||||
|
||||
len += mb_len * (int )str_len;
|
||||
len += (int )byte_len;
|
||||
return len;
|
||||
}
|
||||
|
||||
static int
|
||||
add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
|
||||
add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
|
||||
regex_t* reg, int ignore_case)
|
||||
{
|
||||
int op = select_str_opcode(mb_len, str_len, ignore_case);
|
||||
int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
||||
add_opcode(reg, op);
|
||||
|
||||
if (op == OP_EXACTMBN)
|
||||
|
@ -462,12 +463,12 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
|
|||
|
||||
if (IS_NEED_STR_LEN_OP_EXACT(op)) {
|
||||
if (op == OP_EXACTN_IC)
|
||||
add_length(reg, mb_len * str_len);
|
||||
add_length(reg, byte_len);
|
||||
else
|
||||
add_length(reg, str_len);
|
||||
add_length(reg, byte_len / mb_len);
|
||||
}
|
||||
|
||||
add_bytes(reg, s, mb_len * str_len);
|
||||
add_bytes(reg, s, byte_len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -475,7 +476,7 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
|
|||
static int
|
||||
compile_length_string_node(Node* node, regex_t* reg)
|
||||
{
|
||||
int rlen, r, len, prev_len, slen, ambig;
|
||||
int rlen, r, len, prev_len, blen, ambig;
|
||||
OnigEncoding enc = reg->enc;
|
||||
UChar *p, *prev;
|
||||
StrNode* sn;
|
||||
|
@ -489,24 +490,24 @@ compile_length_string_node(Node* node, regex_t* reg)
|
|||
p = prev = sn->s;
|
||||
prev_len = enclen(enc, p, sn->end);
|
||||
p += prev_len;
|
||||
slen = 1;
|
||||
blen = prev_len;
|
||||
rlen = 0;
|
||||
|
||||
for (; p < sn->end; ) {
|
||||
len = enclen(enc, p, sn->end);
|
||||
if (len == prev_len) {
|
||||
slen++;
|
||||
if (len == prev_len || ambig) {
|
||||
blen += len;
|
||||
}
|
||||
else {
|
||||
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
|
||||
r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
||||
rlen += r;
|
||||
prev = p;
|
||||
slen = 1;
|
||||
blen = len;
|
||||
prev_len = len;
|
||||
}
|
||||
p += len;
|
||||
}
|
||||
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
|
||||
r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
||||
rlen += r;
|
||||
return rlen;
|
||||
}
|
||||
|
@ -523,7 +524,7 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg)
|
|||
static int
|
||||
compile_string_node(Node* node, regex_t* reg)
|
||||
{
|
||||
int r, len, prev_len, slen, ambig;
|
||||
int r, len, prev_len, blen, ambig;
|
||||
OnigEncoding enc = reg->enc;
|
||||
UChar *p, *prev, *end;
|
||||
StrNode* sn;
|
||||
|
@ -538,25 +539,25 @@ compile_string_node(Node* node, regex_t* reg)
|
|||
p = prev = sn->s;
|
||||
prev_len = enclen(enc, p, end);
|
||||
p += prev_len;
|
||||
slen = 1;
|
||||
blen = prev_len;
|
||||
|
||||
for (; p < end; ) {
|
||||
len = enclen(enc, p, end);
|
||||
if (len == prev_len) {
|
||||
slen++;
|
||||
if (len == prev_len || ambig) {
|
||||
blen += len;
|
||||
}
|
||||
else {
|
||||
r = add_compile_string(prev, prev_len, slen, reg, ambig);
|
||||
r = add_compile_string(prev, prev_len, blen, reg, ambig);
|
||||
if (r) return r;
|
||||
|
||||
prev = p;
|
||||
slen = 1;
|
||||
blen = len;
|
||||
prev_len = len;
|
||||
}
|
||||
|
||||
p += len;
|
||||
}
|
||||
return add_compile_string(prev, prev_len, slen, reg, ambig);
|
||||
return add_compile_string(prev, prev_len, blen, reg, ambig);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -2591,6 +2592,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
|
|||
return 0;
|
||||
}
|
||||
else {
|
||||
if (IS_NOT_NULL(xc->mbuf)) return 0;
|
||||
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
||||
int is_word;
|
||||
if (NCTYPE(y)->ascii_range)
|
||||
|
@ -3311,7 +3313,7 @@ next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
|
|||
qn->next_head_exact = n;
|
||||
}
|
||||
#endif
|
||||
/* automatic possessivation a*b ==> (?>a*)b */
|
||||
/* automatic possessification a*b ==> (?>a*)b */
|
||||
if (qn->lower <= 1) {
|
||||
int ttype = NTYPE(qn->target);
|
||||
if (IS_NODE_TYPE_SIMPLE(ttype)) {
|
||||
|
@ -3432,27 +3434,40 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
|
||||
int slen)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < item_num; i++) {
|
||||
if (items[i].byte_len != slen) {
|
||||
return 1;
|
||||
}
|
||||
if (items[i].code_len != 1) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
||||
UChar *p, int slen, UChar *end,
|
||||
regex_t* reg, Node **rnode)
|
||||
{
|
||||
int r, i, j, len, varlen, varclen;
|
||||
int r, i, j, len, varlen;
|
||||
Node *anode, *var_anode, *snode, *xnode, *an;
|
||||
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
||||
|
||||
*rnode = var_anode = NULL_NODE;
|
||||
|
||||
varlen = 0;
|
||||
varclen = 0;
|
||||
for (i = 0; i < item_num; i++) {
|
||||
if (items[i].byte_len != slen) {
|
||||
varlen = 1;
|
||||
break;
|
||||
}
|
||||
if (items[i].code_len != 1) {
|
||||
varclen |= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (varlen != 0) {
|
||||
|
@ -3537,8 +3552,6 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
|||
}
|
||||
}
|
||||
|
||||
if (varclen && !varlen)
|
||||
return 2;
|
||||
return varlen;
|
||||
|
||||
mem_err2:
|
||||
|
@ -3582,7 +3595,8 @@ expand_case_fold_string(Node* node, regex_t* reg)
|
|||
|
||||
len = enclen(reg->enc, p, end);
|
||||
|
||||
if (n == 0) {
|
||||
varlen = is_case_fold_variable_len(n, items, len);
|
||||
if (n == 0 || varlen == 0) {
|
||||
if (IS_NULL(snode)) {
|
||||
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
||||
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
||||
|
@ -3607,11 +3621,14 @@ expand_case_fold_string(Node* node, regex_t* reg)
|
|||
}
|
||||
else {
|
||||
alt_num *= (n + 1);
|
||||
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) {
|
||||
varlen = 1; /* Assume that expanded strings are variable length. */
|
||||
break;
|
||||
}
|
||||
if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
|
||||
|
||||
if (IS_NOT_NULL(snode)) {
|
||||
r = update_string_node_case_fold(reg, snode);
|
||||
if (r == 0) {
|
||||
NSTRING_SET_AMBIG(snode);
|
||||
}
|
||||
}
|
||||
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
||||
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
||||
if (IS_NULL(root)) {
|
||||
|
@ -3622,7 +3639,6 @@ expand_case_fold_string(Node* node, regex_t* reg)
|
|||
|
||||
r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
|
||||
if (r < 0) goto mem_err;
|
||||
if (r > 0) varlen = 1;
|
||||
if (r == 1) {
|
||||
if (IS_NULL(root)) {
|
||||
top_root = prev_node;
|
||||
|
@ -3636,7 +3652,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
|
|||
|
||||
root = NCAR(prev_node);
|
||||
}
|
||||
else { /* r == 0 || r == 2 */
|
||||
else { /* r == 0 */
|
||||
if (IS_NOT_NULL(root)) {
|
||||
if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
||||
onig_node_free(prev_node);
|
||||
|
@ -3650,6 +3666,12 @@ expand_case_fold_string(Node* node, regex_t* reg)
|
|||
|
||||
p += len;
|
||||
}
|
||||
if (IS_NOT_NULL(snode)) {
|
||||
r = update_string_node_case_fold(reg, snode);
|
||||
if (r == 0) {
|
||||
NSTRING_SET_AMBIG(snode);
|
||||
}
|
||||
}
|
||||
|
||||
if (p < end) {
|
||||
Node *srem;
|
||||
|
@ -3679,20 +3701,9 @@ expand_case_fold_string(Node* node, regex_t* reg)
|
|||
|
||||
/* ending */
|
||||
top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
|
||||
if (!varlen) {
|
||||
/* When all expanded strings are same length, case-insensitive
|
||||
BM search will be used. */
|
||||
r = update_string_node_case_fold(reg, node);
|
||||
if (r == 0) {
|
||||
NSTRING_SET_AMBIG(node);
|
||||
}
|
||||
}
|
||||
else {
|
||||
swap_node(node, top_root);
|
||||
r = 0;
|
||||
}
|
||||
swap_node(node, top_root);
|
||||
onig_node_free(top_root);
|
||||
return r;
|
||||
return 0;
|
||||
|
||||
mem_err:
|
||||
r = ONIGERR_MEMORY;
|
||||
|
@ -4367,7 +4378,7 @@ map_position_value(OnigEncoding enc, int i)
|
|||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
|
||||
};
|
||||
|
||||
if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
|
||||
if (i < numberof(ByteValTable)) {
|
||||
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
|
||||
return 20;
|
||||
else
|
||||
|
@ -4399,7 +4410,7 @@ distance_value(MinMaxLen* mm)
|
|||
if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
|
||||
|
||||
d = mm->max - mm->min;
|
||||
if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
|
||||
if (d < numberof(dist_vals))
|
||||
/* return dist_vals[d] * 16 / (mm->min + 12); */
|
||||
return (int )dist_vals[d];
|
||||
else
|
||||
|
@ -4507,6 +4518,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
|
|||
if (right_len == 0) {
|
||||
to->right_anchor |= left->right_anchor;
|
||||
}
|
||||
else {
|
||||
to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -5080,7 +5094,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
|||
case ANCHOR_END_BUF:
|
||||
case ANCHOR_SEMI_END_BUF:
|
||||
case ANCHOR_END_LINE:
|
||||
case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
|
||||
case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
|
||||
case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
|
||||
add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
|
||||
break;
|
||||
|
||||
|
@ -5103,7 +5118,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
|||
}
|
||||
break;
|
||||
|
||||
case ANCHOR_PREC_READ_NOT:
|
||||
case ANCHOR_LOOK_BEHIND_NOT:
|
||||
break;
|
||||
}
|
||||
|
@ -5369,7 +5383,8 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
|
|||
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
|
||||
ANCHOR_LOOK_BEHIND);
|
||||
|
||||
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
|
||||
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
|
||||
ANCHOR_PREC_READ_NOT);
|
||||
|
||||
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
|
||||
reg->anchor_dmin = opt.len.min;
|
||||
|
|
64
regenc.c
64
regenc.c
|
@ -414,9 +414,7 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
|||
OnigCodePoint code;
|
||||
int i, r;
|
||||
|
||||
for (i = 0;
|
||||
i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
|
||||
i++) {
|
||||
for (i = 0; i < numberof(OnigAsciiLowerMap); i++) {
|
||||
code = OnigAsciiLowerMap[i].to;
|
||||
r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
@ -431,8 +429,8 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
|||
|
||||
extern int
|
||||
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
|
||||
OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
if (0x41 <= *p && *p <= 0x5a) {
|
||||
items[0].byte_len = 1;
|
||||
|
@ -570,9 +568,10 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
|
|||
|
||||
|
||||
extern int
|
||||
onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
|
||||
OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
|
||||
OnigEncoding enc)
|
||||
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
|
||||
OnigCodePoint* sb_out ARG_UNUSED,
|
||||
const OnigCodePoint* ranges[] ARG_UNUSED,
|
||||
OnigEncoding enc)
|
||||
{
|
||||
return ONIG_NO_SUPPORT_CONFIG;
|
||||
}
|
||||
|
@ -589,7 +588,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc A
|
|||
/* for single byte encodings */
|
||||
extern int
|
||||
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
|
||||
const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
|
||||
const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
|
||||
|
||||
|
@ -633,28 +632,31 @@ extern int
|
|||
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
if (code > 0xff)
|
||||
rb_raise(rb_eRangeError, "%u out of char range", code);
|
||||
rb_raise(rb_eRangeError, "%u out of char range", code);
|
||||
*buf = (UChar )(code & 0xff);
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
|
||||
const UChar* end,
|
||||
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
|
||||
const UChar* s,
|
||||
const UChar* end ARG_UNUSED,
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
|
||||
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED,
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
|
||||
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED,
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return FALSE;
|
||||
|
@ -716,7 +718,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
|
|||
#if 0
|
||||
extern int
|
||||
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
|
||||
const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
|
||||
const UChar** pp, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
|
@ -791,27 +793,27 @@ extern int
|
|||
onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
|
||||
{
|
||||
static const PosixBracketEntryType PBS[] = {
|
||||
PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
|
||||
PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
|
||||
PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
|
||||
PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
|
||||
PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
|
||||
PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
|
||||
PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
|
||||
PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
|
||||
PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
|
||||
PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
|
||||
PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
|
||||
PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
|
||||
PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
|
||||
PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
|
||||
POSIX_BRACKET_ENTRY_INIT("Alnum", ONIGENC_CTYPE_ALNUM),
|
||||
POSIX_BRACKET_ENTRY_INIT("Alpha", ONIGENC_CTYPE_ALPHA),
|
||||
POSIX_BRACKET_ENTRY_INIT("Blank", ONIGENC_CTYPE_BLANK),
|
||||
POSIX_BRACKET_ENTRY_INIT("Cntrl", ONIGENC_CTYPE_CNTRL),
|
||||
POSIX_BRACKET_ENTRY_INIT("Digit", ONIGENC_CTYPE_DIGIT),
|
||||
POSIX_BRACKET_ENTRY_INIT("Graph", ONIGENC_CTYPE_GRAPH),
|
||||
POSIX_BRACKET_ENTRY_INIT("Lower", ONIGENC_CTYPE_LOWER),
|
||||
POSIX_BRACKET_ENTRY_INIT("Print", ONIGENC_CTYPE_PRINT),
|
||||
POSIX_BRACKET_ENTRY_INIT("Punct", ONIGENC_CTYPE_PUNCT),
|
||||
POSIX_BRACKET_ENTRY_INIT("Space", ONIGENC_CTYPE_SPACE),
|
||||
POSIX_BRACKET_ENTRY_INIT("Upper", ONIGENC_CTYPE_UPPER),
|
||||
POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT),
|
||||
POSIX_BRACKET_ENTRY_INIT("ASCII", ONIGENC_CTYPE_ASCII),
|
||||
POSIX_BRACKET_ENTRY_INIT("Word", ONIGENC_CTYPE_WORD),
|
||||
};
|
||||
|
||||
const PosixBracketEntryType *pb, *pbe;
|
||||
const PosixBracketEntryType *pb;
|
||||
int len;
|
||||
|
||||
len = onigenc_strlen(enc, p, end);
|
||||
for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
|
||||
for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
|
||||
if (len == pb->len &&
|
||||
onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
|
||||
return pb->ctype;
|
||||
|
|
14
regenc.h
14
regenc.h
|
@ -29,15 +29,18 @@
|
|||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef REGINT_H
|
||||
#ifndef RUBY_EXTERN
|
||||
#include "ruby/config.h"
|
||||
#include "ruby/defines.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "ruby/oniguruma.h"
|
||||
|
||||
RUBY_SYMBOL_EXPORT_BEGIN
|
||||
|
@ -104,7 +107,13 @@ typedef struct {
|
|||
short int len;
|
||||
} PosixBracketEntryType;
|
||||
|
||||
#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)}
|
||||
#define POSIX_BRACKET_ENTRY_INIT(name, ctype) \
|
||||
{(const UChar* )(name), (ctype), (short int )(sizeof(name) - 1)}
|
||||
|
||||
#ifndef numberof
|
||||
#define numberof(array) (int )(sizeof(array) / sizeof((array)[0]))
|
||||
#endif
|
||||
|
||||
|
||||
#define USE_CRNL_AS_LINE_TERMINATOR
|
||||
#define USE_UNICODE_PROPERTIES
|
||||
|
@ -159,6 +168,7 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, O
|
|||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
|
||||
|
||||
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
|
||||
OnigEncISO_8859_1_ToLowerCaseTable[c]
|
||||
|
|
|
@ -1397,7 +1397,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
(int )(end - str), (int )(sstart - str));
|
||||
#endif
|
||||
|
||||
STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */
|
||||
STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
|
||||
best_len = ONIG_MISMATCH;
|
||||
s = (UChar* )sstart;
|
||||
pkeep = (UChar* )sstart;
|
||||
|
@ -1406,7 +1406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
if (s) {
|
||||
UChar *q, *bp, buf[50];
|
||||
int len;
|
||||
fprintf(stderr, "%4d> \"", (*p == OP_FINISH) ? -1 : (int )(s - str));
|
||||
fprintf(stderr, "%4"PRIdPTR"> \"", (*p == OP_FINISH) ? (ptrdiff_t )-1 : s - str);
|
||||
bp = buf;
|
||||
if (*p != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */
|
||||
for (i = 0, q = s; i < 7 && q < end; i++) {
|
||||
|
@ -1419,6 +1419,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
|||
*bp = 0;
|
||||
fputs((char* )buf, stderr);
|
||||
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
|
||||
fprintf(stderr, "%4"PRIdPTR":", (p == FinishCode) ? (ptrdiff_t )-1 : p - reg->p);
|
||||
onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
@ -4183,7 +4184,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
|
|||
prev = s;
|
||||
s += enclen(reg->enc, s, end);
|
||||
|
||||
if ((reg->anchor & ANCHOR_LOOK_BEHIND) == 0) {
|
||||
if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
|
||||
while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
|
||||
&& s < range) {
|
||||
prev = s;
|
||||
|
|
6
regint.h
6
regint.h
|
@ -193,6 +193,8 @@ extern pthread_mutex_t gOnigMutex;
|
|||
#define USE_UPPER_CASE_TABLE
|
||||
#else
|
||||
|
||||
#define CHECK_INTERRUPT_IN_MATCH_AT
|
||||
|
||||
#define st_init_table onig_st_init_table
|
||||
#define st_init_table_with_size onig_st_init_table_with_size
|
||||
#define st_init_numtable onig_st_init_numtable
|
||||
|
@ -213,8 +215,6 @@ extern pthread_mutex_t gOnigMutex;
|
|||
/* */
|
||||
#define onig_st_is_member st_is_member
|
||||
|
||||
#define CHECK_INTERRUPT_IN_MATCH_AT
|
||||
|
||||
#endif
|
||||
|
||||
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
|
||||
|
@ -913,9 +913,7 @@ typedef struct {
|
|||
|
||||
extern OnigOpInfoType OnigOpInfo[];
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc));
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
extern void onig_statistics_init P_((void));
|
||||
|
|
299
regparse.c
299
regparse.c
|
@ -4153,17 +4153,15 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
|
|||
}
|
||||
|
||||
static int
|
||||
add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
|
||||
add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)
|
||||
{
|
||||
int maxcode, ascii_range;
|
||||
int maxcode;
|
||||
int c, r;
|
||||
const OnigCodePoint *ranges;
|
||||
OnigCodePoint sb_out;
|
||||
OnigEncoding enc = env->enc;
|
||||
OnigOptionType option = env->option;
|
||||
|
||||
ascii_range = IS_ASCII_RANGE(option) && (char_prop == 0);
|
||||
|
||||
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
|
||||
if (r == 0) {
|
||||
if (ascii_range) {
|
||||
|
@ -4280,31 +4278,32 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
|
|||
}
|
||||
|
||||
static int
|
||||
parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
|
||||
parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
|
||||
UChar** src, UChar* end, ScanEnv* env)
|
||||
{
|
||||
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
|
||||
#define POSIX_BRACKET_NAME_MIN_LEN 4
|
||||
|
||||
static const PosixBracketEntryType PBS[] = {
|
||||
{ (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
|
||||
{ (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
|
||||
{ (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
|
||||
{ (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
|
||||
{ (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
|
||||
{ (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
|
||||
{ (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
|
||||
{ (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
|
||||
{ (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
|
||||
{ (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
|
||||
{ (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
|
||||
{ (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
|
||||
{ (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
|
||||
{ (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
|
||||
{ (UChar* )NULL, -1, 0 }
|
||||
POSIX_BRACKET_ENTRY_INIT("alnum", ONIGENC_CTYPE_ALNUM),
|
||||
POSIX_BRACKET_ENTRY_INIT("alpha", ONIGENC_CTYPE_ALPHA),
|
||||
POSIX_BRACKET_ENTRY_INIT("blank", ONIGENC_CTYPE_BLANK),
|
||||
POSIX_BRACKET_ENTRY_INIT("cntrl", ONIGENC_CTYPE_CNTRL),
|
||||
POSIX_BRACKET_ENTRY_INIT("digit", ONIGENC_CTYPE_DIGIT),
|
||||
POSIX_BRACKET_ENTRY_INIT("graph", ONIGENC_CTYPE_GRAPH),
|
||||
POSIX_BRACKET_ENTRY_INIT("lower", ONIGENC_CTYPE_LOWER),
|
||||
POSIX_BRACKET_ENTRY_INIT("print", ONIGENC_CTYPE_PRINT),
|
||||
POSIX_BRACKET_ENTRY_INIT("punct", ONIGENC_CTYPE_PUNCT),
|
||||
POSIX_BRACKET_ENTRY_INIT("space", ONIGENC_CTYPE_SPACE),
|
||||
POSIX_BRACKET_ENTRY_INIT("upper", ONIGENC_CTYPE_UPPER),
|
||||
POSIX_BRACKET_ENTRY_INIT("xdigit", ONIGENC_CTYPE_XDIGIT),
|
||||
POSIX_BRACKET_ENTRY_INIT("ascii", ONIGENC_CTYPE_ASCII),
|
||||
POSIX_BRACKET_ENTRY_INIT("word", ONIGENC_CTYPE_WORD),
|
||||
};
|
||||
|
||||
const PosixBracketEntryType *pb;
|
||||
int not, i, r;
|
||||
int ascii_range;
|
||||
OnigCodePoint c;
|
||||
OnigEncoding enc = env->enc;
|
||||
UChar *p = *src;
|
||||
|
@ -4319,17 +4318,25 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
|
|||
if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
|
||||
goto not_posix_bracket;
|
||||
|
||||
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
|
||||
ascii_range = IS_ASCII_RANGE(env->option) &&
|
||||
! IS_POSIX_BRACKET_ALL_RANGE(env->option);
|
||||
for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
|
||||
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
|
||||
p = (UChar* )onigenc_step(enc, p, end, pb->len);
|
||||
if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
|
||||
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
|
||||
|
||||
r = add_ctype_to_cc(cc, pb->ctype, not,
|
||||
IS_POSIX_BRACKET_ALL_RANGE(env->option),
|
||||
env);
|
||||
r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
|
||||
if (r != 0) return r;
|
||||
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
if (pb->ctype != ONIGENC_CTYPE_WORD &&
|
||||
pb->ctype != ONIGENC_CTYPE_ASCII &&
|
||||
!ascii_range)
|
||||
r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
PINC_S; PINC_S;
|
||||
*src = p;
|
||||
return 0;
|
||||
|
@ -4386,6 +4393,8 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
|
|||
return r;
|
||||
}
|
||||
|
||||
static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);
|
||||
|
||||
static int
|
||||
parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
||||
ScanEnv* env)
|
||||
|
@ -4399,11 +4408,15 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
*np = node_new_cclass();
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
cc = NCCLASS(*np);
|
||||
r = add_ctype_to_cc(cc, ctype, 0, 1, env);
|
||||
r = add_ctype_to_cc(cc, ctype, 0, 0, env);
|
||||
if (r != 0) return r;
|
||||
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
|
||||
|
||||
return 0;
|
||||
if (IS_IGNORECASE(env->option)) {
|
||||
if (ctype != ONIGENC_CTYPE_ASCII)
|
||||
r = cclass_case_fold(np, cc, cc, env);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
|
@ -4421,7 +4434,8 @@ enum CCVALTYPE {
|
|||
};
|
||||
|
||||
static int
|
||||
next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
|
||||
next_state_class(CClassNode* cc, CClassNode* asc_cc,
|
||||
OnigCodePoint* vs, enum CCVALTYPE* type,
|
||||
enum CCSTATE* state, ScanEnv* env)
|
||||
{
|
||||
int r;
|
||||
|
@ -4430,11 +4444,18 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
|
|||
return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
|
||||
|
||||
if (*state == CCS_VALUE && *type != CCV_CLASS) {
|
||||
if (*type == CCV_SB)
|
||||
if (*type == CCV_SB) {
|
||||
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
|
||||
if (IS_NOT_NULL(asc_cc))
|
||||
BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
|
||||
}
|
||||
else if (*type == CCV_CODE_POINT) {
|
||||
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
|
||||
if (r < 0) return r;
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4444,7 +4465,8 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
|
|||
}
|
||||
|
||||
static int
|
||||
next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
|
||||
next_state_val(CClassNode* cc, CClassNode* asc_cc,
|
||||
OnigCodePoint *vs, OnigCodePoint v,
|
||||
int* vs_israw, int v_israw,
|
||||
enum CCVALTYPE intype, enum CCVALTYPE* type,
|
||||
enum CCSTATE* state, ScanEnv* env)
|
||||
|
@ -4453,11 +4475,18 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
|
|||
|
||||
switch (*state) {
|
||||
case CCS_VALUE:
|
||||
if (*type == CCV_SB)
|
||||
if (*type == CCV_SB) {
|
||||
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
|
||||
if (IS_NOT_NULL(asc_cc))
|
||||
BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
|
||||
}
|
||||
else if (*type == CCV_CODE_POINT) {
|
||||
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
|
||||
if (r < 0) return r;
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -4474,10 +4503,16 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
|
|||
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
|
||||
}
|
||||
bitset_set_range(env, cc->bs, (int )*vs, (int )v);
|
||||
if (IS_NOT_NULL(asc_cc))
|
||||
bitset_set_range(env, asc_cc->bs, (int )*vs, (int )v);
|
||||
}
|
||||
else {
|
||||
r = add_code_range(&(cc->mbuf), env, *vs, v);
|
||||
if (r < 0) return r;
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
r = add_code_range0(&(asc_cc->mbuf), env, *vs, v, 0);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -4493,6 +4528,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
|
|||
bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
|
||||
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
|
||||
if (r < 0) return r;
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
bitset_set_range(env, asc_cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
|
||||
r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*vs, v, 0);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
#if 0
|
||||
}
|
||||
else
|
||||
|
@ -4542,22 +4582,24 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
|
|||
}
|
||||
|
||||
static int
|
||||
parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
||||
parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,
|
||||
ScanEnv* env)
|
||||
{
|
||||
int r, neg, len, fetched, and_start;
|
||||
OnigCodePoint v, vs;
|
||||
UChar *p;
|
||||
Node* node;
|
||||
Node* asc_node;
|
||||
CClassNode *cc, *prev_cc;
|
||||
CClassNode work_cc;
|
||||
CClassNode *asc_cc, *asc_prev_cc;
|
||||
CClassNode work_cc, asc_work_cc;
|
||||
|
||||
enum CCSTATE state;
|
||||
enum CCVALTYPE val_type, in_type;
|
||||
int val_israw, in_israw;
|
||||
|
||||
prev_cc = (CClassNode* )NULL;
|
||||
*np = NULL_NODE;
|
||||
prev_cc = asc_prev_cc = (CClassNode* )NULL;
|
||||
*np = *asc_np = NULL_NODE;
|
||||
r = fetch_token_in_cc(tok, src, end, env);
|
||||
if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
|
||||
neg = 1;
|
||||
|
@ -4581,6 +4623,16 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
CHECK_NULL_RETURN_MEMERR(node);
|
||||
cc = NCCLASS(node);
|
||||
|
||||
if (IS_IGNORECASE(env->option)) {
|
||||
*asc_np = asc_node = node_new_cclass();
|
||||
CHECK_NULL_RETURN_MEMERR(asc_node);
|
||||
asc_cc = NCCLASS(asc_node);
|
||||
}
|
||||
else {
|
||||
asc_node = NULL_NODE;
|
||||
asc_cc = NULL;
|
||||
}
|
||||
|
||||
and_start = 0;
|
||||
state = CCS_START;
|
||||
p = *src;
|
||||
|
@ -4671,13 +4723,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
}
|
||||
in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
|
||||
val_entry2:
|
||||
r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
|
||||
r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
|
||||
&state, env);
|
||||
if (r != 0) goto err;
|
||||
break;
|
||||
|
||||
case TK_POSIX_BRACKET_OPEN:
|
||||
r = parse_posix_bracket(cc, &p, end, env);
|
||||
r = parse_posix_bracket(cc, asc_cc, &p, end, env);
|
||||
if (r < 0) goto err;
|
||||
if (r == 1) { /* is not POSIX bracket */
|
||||
CC_ESC_WARN(env, (UChar* )"[");
|
||||
|
@ -4690,11 +4742,18 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
break;
|
||||
|
||||
case TK_CHAR_TYPE:
|
||||
r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, 0, env);
|
||||
r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,
|
||||
IS_ASCII_RANGE(env->option), env);
|
||||
if (r != 0) return r;
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)
|
||||
r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,
|
||||
IS_ASCII_RANGE(env->option), env);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
next_class:
|
||||
r = next_state_class(cc, &vs, &val_type, &state, env);
|
||||
r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);
|
||||
if (r != 0) goto err;
|
||||
break;
|
||||
|
||||
|
@ -4704,8 +4763,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
|
||||
ctype = fetch_char_property_to_ctype(&p, end, env);
|
||||
if (ctype < 0) return ctype;
|
||||
r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 1, env);
|
||||
r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);
|
||||
if (r != 0) return r;
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
if (ctype != ONIGENC_CTYPE_ASCII)
|
||||
r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
goto next_class;
|
||||
}
|
||||
break;
|
||||
|
@ -4766,15 +4830,20 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
|
||||
case TK_CC_CC_OPEN: /* [ */
|
||||
{
|
||||
Node *anode;
|
||||
Node *anode, *aasc_node;
|
||||
CClassNode* acc;
|
||||
|
||||
r = parse_char_class(&anode, tok, &p, end, env);
|
||||
r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);
|
||||
if (r == 0) {
|
||||
acc = NCCLASS(anode);
|
||||
r = or_cclass(cc, acc, env);
|
||||
}
|
||||
if (r == 0 && IS_NOT_NULL(aasc_node)) {
|
||||
acc = NCCLASS(aasc_node);
|
||||
r = or_cclass(asc_cc, acc, env);
|
||||
}
|
||||
onig_node_free(anode);
|
||||
onig_node_free(aasc_node);
|
||||
if (r != 0) goto err;
|
||||
}
|
||||
break;
|
||||
|
@ -4782,7 +4851,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
case TK_CC_AND: /* && */
|
||||
{
|
||||
if (state == CCS_VALUE) {
|
||||
r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
|
||||
r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
|
||||
&val_type, &state, env);
|
||||
if (r != 0) goto err;
|
||||
}
|
||||
|
@ -4794,12 +4863,23 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
r = and_cclass(prev_cc, cc, env);
|
||||
if (r != 0) goto err;
|
||||
bbuf_free(cc->mbuf);
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
r = and_cclass(asc_prev_cc, asc_cc, env);
|
||||
if (r != 0) goto err;
|
||||
bbuf_free(asc_cc->mbuf);
|
||||
}
|
||||
}
|
||||
else {
|
||||
prev_cc = cc;
|
||||
cc = &work_cc;
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
asc_prev_cc = asc_cc;
|
||||
asc_cc = &asc_work_cc;
|
||||
}
|
||||
}
|
||||
initialize_cclass(cc);
|
||||
if (IS_NOT_NULL(asc_cc))
|
||||
initialize_cclass(asc_cc);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -4822,7 +4902,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
}
|
||||
|
||||
if (state == CCS_VALUE) {
|
||||
r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
|
||||
r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
|
||||
&val_type, &state, env);
|
||||
if (r != 0) goto err;
|
||||
}
|
||||
|
@ -4832,12 +4912,24 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
if (r != 0) goto err;
|
||||
bbuf_free(cc->mbuf);
|
||||
cc = prev_cc;
|
||||
if (IS_NOT_NULL(asc_cc)) {
|
||||
r = and_cclass(asc_prev_cc, asc_cc, env);
|
||||
if (r != 0) goto err;
|
||||
bbuf_free(asc_cc->mbuf);
|
||||
asc_cc = asc_prev_cc;
|
||||
}
|
||||
}
|
||||
|
||||
if (neg != 0)
|
||||
if (neg != 0) {
|
||||
NCCLASS_SET_NOT(cc);
|
||||
else
|
||||
if (IS_NOT_NULL(asc_cc))
|
||||
NCCLASS_SET_NOT(asc_cc);
|
||||
}
|
||||
else {
|
||||
NCCLASS_CLEAR_NOT(cc);
|
||||
if (IS_NOT_NULL(asc_cc))
|
||||
NCCLASS_CLEAR_NOT(asc_cc);
|
||||
}
|
||||
if (IS_NCCLASS_NOT(cc) &&
|
||||
IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
|
||||
int is_empty;
|
||||
|
@ -4865,6 +4957,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
|
|||
err:
|
||||
if (cc != NCCLASS(*np))
|
||||
bbuf_free(cc->mbuf);
|
||||
if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))
|
||||
bbuf_free(asc_cc->mbuf);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -5489,6 +5583,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
|
|||
typedef struct {
|
||||
ScanEnv* env;
|
||||
CClassNode* cc;
|
||||
CClassNode* asc_cc;
|
||||
Node* alt_root;
|
||||
Node** ptail;
|
||||
} IApplyCaseFoldArg;
|
||||
|
@ -5500,37 +5595,57 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
|
|||
IApplyCaseFoldArg* iarg;
|
||||
ScanEnv* env;
|
||||
CClassNode* cc;
|
||||
CClassNode* asc_cc;
|
||||
BitSetRef bs;
|
||||
int add_flag;
|
||||
|
||||
iarg = (IApplyCaseFoldArg* )arg;
|
||||
env = iarg->env;
|
||||
cc = iarg->cc;
|
||||
asc_cc = iarg->asc_cc;
|
||||
bs = cc->bs;
|
||||
|
||||
if (IS_NULL(asc_cc)) {
|
||||
add_flag = 0;
|
||||
}
|
||||
else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) {
|
||||
add_flag = 1;
|
||||
}
|
||||
else {
|
||||
add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);
|
||||
if (IS_NCCLASS_NOT(asc_cc))
|
||||
add_flag = !add_flag;
|
||||
}
|
||||
|
||||
if (to_len == 1) {
|
||||
int is_in = onig_is_code_in_cc(env->enc, from, cc);
|
||||
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
|
||||
if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
|
||||
(is_in == 0 && IS_NCCLASS_NOT(cc))) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
|
||||
add_code_range0(&(cc->mbuf), env, *to, *to, 0);
|
||||
}
|
||||
else {
|
||||
BITSET_SET_BIT(bs, *to);
|
||||
if (add_flag) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
|
||||
add_code_range0(&(cc->mbuf), env, *to, *to, 0);
|
||||
}
|
||||
else {
|
||||
BITSET_SET_BIT(bs, *to);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (is_in != 0) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
|
||||
if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
|
||||
add_code_range0(&(cc->mbuf), env, *to, *to, 0);
|
||||
}
|
||||
else {
|
||||
if (IS_NCCLASS_NOT(cc)) {
|
||||
BITSET_CLEAR_BIT(bs, *to);
|
||||
if (add_flag) {
|
||||
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
|
||||
if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
|
||||
add_code_range0(&(cc->mbuf), env, *to, *to, 0);
|
||||
}
|
||||
else {
|
||||
if (IS_NCCLASS_NOT(cc)) {
|
||||
BITSET_CLEAR_BIT(bs, *to);
|
||||
}
|
||||
else {
|
||||
BITSET_SET_BIT(bs, *to);
|
||||
}
|
||||
}
|
||||
else
|
||||
BITSET_SET_BIT(bs, *to);
|
||||
}
|
||||
}
|
||||
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
|
||||
|
@ -5573,6 +5688,35 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)
|
||||
{
|
||||
int r;
|
||||
IApplyCaseFoldArg iarg;
|
||||
|
||||
iarg.env = env;
|
||||
iarg.cc = cc;
|
||||
iarg.asc_cc = asc_cc;
|
||||
iarg.alt_root = NULL_NODE;
|
||||
iarg.ptail = &(iarg.alt_root);
|
||||
|
||||
r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
|
||||
i_apply_case_fold, &iarg);
|
||||
if (r != 0) {
|
||||
onig_node_free(iarg.alt_root);
|
||||
return r;
|
||||
}
|
||||
if (IS_NOT_NULL(iarg.alt_root)) {
|
||||
Node* work = onig_node_new_alt(*np, iarg.alt_root);
|
||||
if (IS_NULL(work)) {
|
||||
onig_node_free(iarg.alt_root);
|
||||
return ONIGERR_MEMORY;
|
||||
}
|
||||
*np = work;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
node_linebreak(Node** np, ScanEnv* env)
|
||||
{
|
||||
|
@ -5658,7 +5802,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
|
|||
np1 = node_new_cclass();
|
||||
if (IS_NULL(np1)) goto err;
|
||||
cc1 = NCCLASS(np1);
|
||||
r = add_ctype_to_cc(cc1, ctype, 0, 1, env);
|
||||
r = add_ctype_to_cc(cc1, ctype, 0, 0, env);
|
||||
if (r != 0) goto err;
|
||||
NCCLASS_SET_NOT(cc1);
|
||||
|
||||
|
@ -5666,7 +5810,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
|
|||
np2 = node_new_cclass();
|
||||
if (IS_NULL(np2)) goto err;
|
||||
cc2 = NCCLASS(np2);
|
||||
r = add_ctype_to_cc(cc2, ctype, 0, 1, env);
|
||||
r = add_ctype_to_cc(cc2, ctype, 0, 0, env);
|
||||
if (r != 0) goto err;
|
||||
|
||||
qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
|
||||
|
@ -6013,7 +6157,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||
*np = node_new_cclass();
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
cc = NCCLASS(*np);
|
||||
r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 0, env);
|
||||
r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
|
||||
IS_ASCII_RANGE(env->option), env);
|
||||
if (r != 0) return r;
|
||||
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
|
||||
#ifdef USE_SHARED_CCLASS_TABLE
|
||||
|
@ -6036,15 +6181,20 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||
|
||||
case TK_CC_OPEN:
|
||||
{
|
||||
Node *asc_node;
|
||||
CClassNode* cc;
|
||||
OnigCodePoint code;
|
||||
|
||||
r = parse_char_class(np, tok, src, end, env);
|
||||
if (r != 0) return r;
|
||||
r = parse_char_class(np, &asc_node, tok, src, end, env);
|
||||
if (r != 0) {
|
||||
onig_node_free(asc_node);
|
||||
return r;
|
||||
}
|
||||
|
||||
cc = NCCLASS(*np);
|
||||
if (is_onechar_cclass(cc, &code)) {
|
||||
onig_node_free(*np);
|
||||
onig_node_free(asc_node);
|
||||
*np = node_new_empty();
|
||||
CHECK_NULL_RETURN_MEMERR(*np);
|
||||
r = node_str_cat_codepoint(*np, env->enc, code);
|
||||
|
@ -6052,28 +6202,13 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
|||
goto string_loop;
|
||||
}
|
||||
if (IS_IGNORECASE(env->option)) {
|
||||
IApplyCaseFoldArg iarg;
|
||||
|
||||
iarg.env = env;
|
||||
iarg.cc = cc;
|
||||
iarg.alt_root = NULL_NODE;
|
||||
iarg.ptail = &(iarg.alt_root);
|
||||
|
||||
r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
|
||||
i_apply_case_fold, &iarg);
|
||||
r = cclass_case_fold(np, cc, NCCLASS(asc_node), env);
|
||||
if (r != 0) {
|
||||
onig_node_free(iarg.alt_root);
|
||||
onig_node_free(asc_node);
|
||||
return r;
|
||||
}
|
||||
if (IS_NOT_NULL(iarg.alt_root)) {
|
||||
Node* work = onig_node_new_alt(*np, iarg.alt_root);
|
||||
if (IS_NULL(work)) {
|
||||
onig_node_free(iarg.alt_root);
|
||||
return ONIGERR_MEMORY;
|
||||
}
|
||||
*np = work;
|
||||
}
|
||||
}
|
||||
onig_node_free(asc_node);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
@ -193,8 +193,8 @@ typedef struct {
|
|||
int type;
|
||||
int regnum;
|
||||
OnigOptionType option;
|
||||
struct _Node* target;
|
||||
AbsAddrType call_addr;
|
||||
struct _Node* target;
|
||||
/* for multiple call reference */
|
||||
OnigDistance min_len; /* min length (byte) */
|
||||
OnigDistance max_len; /* max length (byte) */
|
||||
|
@ -296,10 +296,10 @@ typedef struct {
|
|||
UChar* error;
|
||||
UChar* error_end;
|
||||
regex_t* reg; /* for reg->names only */
|
||||
int num_call;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
UnsetAddrList* unset_addr_list;
|
||||
#endif
|
||||
int num_call;
|
||||
int num_mem;
|
||||
#ifdef USE_NAMED_GROUP
|
||||
int num_named;
|
||||
|
|
Loading…
Reference in a new issue