mirror of
				https://github.com/ruby/ruby.git
				synced 2022-11-09 12:17:21 -05:00 
			
		
		
		
	Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
		
							parent
							
								
									33a06e4aca
								
							
						
					
					
						commit
						5770336f8b
					
				
					 15 changed files with 17522 additions and 0 deletions
				
			
		
							
								
								
									
										54
									
								
								ascii.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								ascii.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,54 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  ascii.c -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2003-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#include "regenc.h"
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
 | 
			
		||||
{
 | 
			
		||||
  if (code < 128)
 | 
			
		||||
    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
  else
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OnigEncodingType OnigEncodingASCII = {
 | 
			
		||||
  {
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 | 
			
		||||
  },
 | 
			
		||||
  "US-ASCII",  /* name */
 | 
			
		||||
  1,           /* max byte length */
 | 
			
		||||
  FALSE,       /* is_fold_match */
 | 
			
		||||
  ONIGENC_CTYPE_SUPPORT_LEVEL_SB,   /* ctype_support_level */
 | 
			
		||||
  TRUE,                             /* is continuous sb mb codepoint */
 | 
			
		||||
  onigenc_single_byte_mbc_to_code,
 | 
			
		||||
  onigenc_single_byte_code_to_mbclen,
 | 
			
		||||
  onigenc_single_byte_code_to_mbc,
 | 
			
		||||
  onigenc_ascii_mbc_to_lower,
 | 
			
		||||
  onigenc_ascii_mbc_is_case_ambig,
 | 
			
		||||
  ascii_code_is_ctype,
 | 
			
		||||
  onigenc_nothing_get_ctype_code_range,
 | 
			
		||||
  onigenc_single_byte_left_adjust_char_head,
 | 
			
		||||
  onigenc_single_byte_is_allowed_reverse_match,
 | 
			
		||||
  onigenc_nothing_get_all_fold_match_code,
 | 
			
		||||
  onigenc_nothing_get_fold_match_info
 | 
			
		||||
};
 | 
			
		||||
							
								
								
									
										191
									
								
								euc_jp.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										191
									
								
								euc_jp.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,191 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  euc_jp.c -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2003-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#include "regenc.h"
 | 
			
		||||
 | 
			
		||||
#define eucjp_islead(c)    ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
 | 
			
		||||
 | 
			
		||||
static OnigCodePoint
 | 
			
		||||
eucjp_mbc_to_code(UChar* p, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  int c, i, len;
 | 
			
		||||
  OnigCodePoint n;
 | 
			
		||||
 | 
			
		||||
  c = *p++;
 | 
			
		||||
  len = enc_len(ONIG_ENCODING_EUC_JP, c);
 | 
			
		||||
  n = c;
 | 
			
		||||
  if (len == 1) return n;
 | 
			
		||||
 | 
			
		||||
  for (i = 1; i < len; i++) {
 | 
			
		||||
    if (p >= end) break;
 | 
			
		||||
    c = *p++;
 | 
			
		||||
    n <<= 8;  n += c;
 | 
			
		||||
  }
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
eucjp_code_to_mbclen(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  if      ((code & 0xff0000) != 0) return 3;
 | 
			
		||||
  else if ((code &   0xff00) != 0) return 2;
 | 
			
		||||
  else return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
eucjp_code_to_mbc_first(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  int first;
 | 
			
		||||
 | 
			
		||||
  if ((code & 0xff0000) != 0) {
 | 
			
		||||
    first = (code >> 16) & 0xff;
 | 
			
		||||
    /*
 | 
			
		||||
    if (enc_len(ONIG_ENCODING_EUC_JP, first) != 3)
 | 
			
		||||
      return ONIGERR_INVALID_WIDE_CHAR_VALUE;
 | 
			
		||||
    */
 | 
			
		||||
  }
 | 
			
		||||
  else if ((code & 0xff00) != 0) {
 | 
			
		||||
    first = (code >> 8) & 0xff;
 | 
			
		||||
    /*
 | 
			
		||||
    if (enc_len(ONIG_ENCODING_EUC_JP, first) != 2)
 | 
			
		||||
      return ONIGERR_INVALID_WIDE_CHAR_VALUE;
 | 
			
		||||
    */
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    /*
 | 
			
		||||
    if (enc_len(ONIG_ENCODING_EUC_JP, code) != 1)
 | 
			
		||||
      return ONIGERR_INVALID_WIDE_CHAR_VALUE;
 | 
			
		||||
    */
 | 
			
		||||
    return (int )code;
 | 
			
		||||
  }
 | 
			
		||||
  return first;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
 | 
			
		||||
{
 | 
			
		||||
  UChar *p = buf;
 | 
			
		||||
 | 
			
		||||
  if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
 | 
			
		||||
  if ((code &   0xff00) != 0) *p++ = (UChar )(((code >>  8) & 0xff));
 | 
			
		||||
  *p++ = (UChar )(code & 0xff);
 | 
			
		||||
 | 
			
		||||
#if 1
 | 
			
		||||
  if (enc_len(ONIG_ENCODING_EUC_JP, buf[0]) != (p - buf))
 | 
			
		||||
    return ONIGERR_INVALID_WIDE_CHAR_VALUE;
 | 
			
		||||
#endif  
 | 
			
		||||
  return p - buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
eucjp_mbc_to_lower(UChar* p, UChar* lower)
 | 
			
		||||
{
 | 
			
		||||
  int len;
 | 
			
		||||
 | 
			
		||||
  if (ONIGENC_IS_MBC_ASCII(p)) {
 | 
			
		||||
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
 | 
			
		||||
    return 1;
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    len = enc_len(ONIG_ENCODING_EUC_JP, *p);
 | 
			
		||||
    if (lower != p) {
 | 
			
		||||
      /* memcpy(lower, p, len); */
 | 
			
		||||
      int i;
 | 
			
		||||
      for (i = 0; i < len; i++) {
 | 
			
		||||
	*lower++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    return len; /* return byte length of converted char to lower */
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
 | 
			
		||||
{
 | 
			
		||||
  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
 | 
			
		||||
    if (code < 128)
 | 
			
		||||
      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
    else {
 | 
			
		||||
      int first = eucjp_code_to_mbc_first(code);
 | 
			
		||||
      return (enc_len(ONIG_ENCODING_EUC_JP, first) > 1 ? TRUE : FALSE);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctype &= ~ONIGENC_CTYPE_WORD;
 | 
			
		||||
    if (ctype == 0) return FALSE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (code < 128)
 | 
			
		||||
    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
  else
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static UChar*
 | 
			
		||||
eucjp_left_adjust_char_head(UChar* start, UChar* s)
 | 
			
		||||
{
 | 
			
		||||
  /* Assumed in this encoding,
 | 
			
		||||
     mb-trail bytes don't mix with single bytes.
 | 
			
		||||
  */
 | 
			
		||||
  UChar *p;
 | 
			
		||||
  int len;
 | 
			
		||||
 | 
			
		||||
  if (s <= start) return s;
 | 
			
		||||
  p = s;
 | 
			
		||||
 | 
			
		||||
  while (!eucjp_islead(*p) && p > start) p--;
 | 
			
		||||
  len = enc_len(ONIG_ENCODING_EUC_JP, *p);
 | 
			
		||||
  if (p + len > s) return p;
 | 
			
		||||
  p += len;
 | 
			
		||||
  return p + ((s - p) & ~1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  UChar c = *s;
 | 
			
		||||
  if (c <= 0x7e || c == 0x8e || c == 0x8f)
 | 
			
		||||
    return TRUE;
 | 
			
		||||
  else
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OnigEncodingType OnigEncodingEUC_JP = {
 | 
			
		||||
  {
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
 | 
			
		||||
  },
 | 
			
		||||
  "EUC-JP",   /* name */
 | 
			
		||||
  3,          /* max byte length */
 | 
			
		||||
  FALSE,      /* is_fold_match */
 | 
			
		||||
  ONIGENC_CTYPE_SUPPORT_LEVEL_SB,    /* ctype_support_level */
 | 
			
		||||
  FALSE,      /* is continuous sb mb codepoint */
 | 
			
		||||
  eucjp_mbc_to_code,
 | 
			
		||||
  eucjp_code_to_mbclen,
 | 
			
		||||
  eucjp_code_to_mbc,
 | 
			
		||||
  eucjp_mbc_to_lower,
 | 
			
		||||
  onigenc_mbn_mbc_is_case_ambig,
 | 
			
		||||
  eucjp_code_is_ctype,
 | 
			
		||||
  onigenc_nothing_get_ctype_code_range,
 | 
			
		||||
  eucjp_left_adjust_char_head,
 | 
			
		||||
  eucjp_is_allowed_reverse_match,
 | 
			
		||||
  onigenc_nothing_get_all_fold_match_code,
 | 
			
		||||
  onigenc_nothing_get_fold_match_info
 | 
			
		||||
};
 | 
			
		||||
							
								
								
									
										77
									
								
								oniggnu.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								oniggnu.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,77 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  oniggnu.h - Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#ifndef ONIGGNU_H
 | 
			
		||||
#define ONIGGNU_H
 | 
			
		||||
 | 
			
		||||
#include "oniguruma.h"
 | 
			
		||||
 | 
			
		||||
#define MBCTYPE_ASCII         0
 | 
			
		||||
#define MBCTYPE_EUC           1
 | 
			
		||||
#define MBCTYPE_SJIS          2
 | 
			
		||||
#define MBCTYPE_UTF8          3
 | 
			
		||||
 | 
			
		||||
/* GNU regex options */
 | 
			
		||||
#ifndef RE_NREGS
 | 
			
		||||
#define RE_NREGS                ONIG_NREGION
 | 
			
		||||
#endif
 | 
			
		||||
#define RE_OPTION_IGNORECASE    ONIG_OPTION_IGNORECASE
 | 
			
		||||
#define RE_OPTION_EXTENDED      ONIG_OPTION_EXTEND
 | 
			
		||||
#define RE_OPTION_MULTILINE     ONIG_OPTION_MULTILINE
 | 
			
		||||
#define RE_OPTION_SINGLELINE    ONIG_OPTION_SINGLELINE
 | 
			
		||||
#define RE_OPTION_LONGEST       ONIG_OPTION_FIND_LONGEST
 | 
			
		||||
#define RE_OPTION_POSIXLINE    (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
 | 
			
		||||
 | 
			
		||||
#ifdef RUBY_PLATFORM
 | 
			
		||||
#define re_mbcinit              ruby_re_mbcinit
 | 
			
		||||
#define re_compile_pattern      ruby_re_compile_pattern
 | 
			
		||||
#define re_recompile_pattern    ruby_re_recompile_pattern
 | 
			
		||||
#define re_free_pattern         ruby_re_free_pattern
 | 
			
		||||
#define re_adjust_startpos      ruby_re_adjust_startpos
 | 
			
		||||
#define re_search               ruby_re_search
 | 
			
		||||
#define re_match                ruby_re_match
 | 
			
		||||
#define re_set_casetable        ruby_re_set_casetable
 | 
			
		||||
#define re_copy_registers       ruby_re_copy_registers
 | 
			
		||||
#define re_free_registers       ruby_re_free_registers
 | 
			
		||||
#define register_info_type      ruby_register_info_type
 | 
			
		||||
#define re_error_code_to_str    ruby_error_code_to_str
 | 
			
		||||
 | 
			
		||||
#define ruby_error_code_to_str  onig_error_code_to_str
 | 
			
		||||
#define ruby_re_copy_registers  onig_region_copy
 | 
			
		||||
#else
 | 
			
		||||
#define re_error_code_to_str    onig_error_code_to_str
 | 
			
		||||
#define re_copy_registers       onig_region_copy
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef ONIG_RUBY_M17N
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void re_mbcinit P_((OnigEncoding));
 | 
			
		||||
#else
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void re_mbcinit P_((int));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void re_free_pattern P_((struct re_pattern_buffer*));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void re_set_casetable P_((const char*));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void re_free_registers P_((struct re_registers*));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int re_alloc_pattern P_((struct re_pattern_buffer**));  /* added */
 | 
			
		||||
 | 
			
		||||
#endif /* ONIGGNU_H */
 | 
			
		||||
							
								
								
									
										715
									
								
								oniguruma.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										715
									
								
								oniguruma.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,715 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  oniguruma.h - Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2002-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#ifndef ONIGURUMA_H
 | 
			
		||||
#define ONIGURUMA_H
 | 
			
		||||
 | 
			
		||||
#define ONIGURUMA
 | 
			
		||||
#define ONIGURUMA_VERSION_MAJOR   2
 | 
			
		||||
#define ONIGURUMA_VERSION_MINOR   2
 | 
			
		||||
#define ONIGURUMA_VERSION_TEENY   4
 | 
			
		||||
 | 
			
		||||
#ifndef P_
 | 
			
		||||
#if defined(__STDC__) || defined(_WIN32)
 | 
			
		||||
# define P_(args) args
 | 
			
		||||
#else
 | 
			
		||||
# define P_(args) ()
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef PV_
 | 
			
		||||
#ifdef HAVE_STDARG_PROTOTYPES
 | 
			
		||||
# define PV_(args) args
 | 
			
		||||
#else
 | 
			
		||||
# define PV_(args) ()
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef ONIG_EXTERN
 | 
			
		||||
#if defined(_WIN32) && !defined(__CYGWIN__)
 | 
			
		||||
#if defined(EXPORT) || defined(RUBY_EXPORT)
 | 
			
		||||
#define ONIG_EXTERN   extern __declspec(dllexport)
 | 
			
		||||
#else
 | 
			
		||||
#define ONIG_EXTERN   extern __declspec(dllimport)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef ONIG_EXTERN
 | 
			
		||||
#define ONIG_EXTERN   extern
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* PART: character encoding */
 | 
			
		||||
 | 
			
		||||
typedef unsigned char  UChar;
 | 
			
		||||
typedef unsigned long  OnigCodePoint;
 | 
			
		||||
typedef unsigned int   OnigDistance;
 | 
			
		||||
 | 
			
		||||
#define ONIG_INFINITE_DISTANCE  ~((OnigDistance )0)
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  OnigCodePoint from;
 | 
			
		||||
  OnigCodePoint to;
 | 
			
		||||
} OnigCodePointRange;
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE  16
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int    target_num;
 | 
			
		||||
  int    target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
 | 
			
		||||
  UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
 | 
			
		||||
} OnigEncFoldMatchInfo;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(RUBY_PLATFORM) && defined(M17N_H)
 | 
			
		||||
 | 
			
		||||
#define ONIG_RUBY_M17N
 | 
			
		||||
typedef m17n_encoding*        OnigEncoding;
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  const char  len_table[256];
 | 
			
		||||
  const char* name;
 | 
			
		||||
  int         max_enc_len;
 | 
			
		||||
  int         is_fold_match;
 | 
			
		||||
  int         ctype_support_level; /* sb-only/full */
 | 
			
		||||
  int         is_continuous_sb_mb; /* code point is continuous from sb to mb */
 | 
			
		||||
  OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
 | 
			
		||||
  int    (*code_to_mbclen)(OnigCodePoint code);
 | 
			
		||||
  int    (*code_to_mbc)(OnigCodePoint code, UChar *buf);
 | 
			
		||||
  int    (*mbc_to_lower)(UChar* p, UChar* lower);
 | 
			
		||||
  int    (*mbc_is_case_ambig)(UChar* p);
 | 
			
		||||
  int    (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
 | 
			
		||||
  int    (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
 | 
			
		||||
  UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
 | 
			
		||||
  int    (*is_allowed_reverse_match)(UChar* p, UChar* e);
 | 
			
		||||
  int    (*get_all_fold_match_code)(OnigCodePoint** codes);
 | 
			
		||||
  int    (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
 | 
			
		||||
} OnigEncodingType;
 | 
			
		||||
 | 
			
		||||
typedef OnigEncodingType* OnigEncoding;
 | 
			
		||||
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
 | 
			
		||||
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
 | 
			
		||||
 | 
			
		||||
#define ONIG_ENCODING_ASCII        (&OnigEncodingASCII)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_1   (&OnigEncodingISO_8859_1)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_2   (&OnigEncodingISO_8859_2)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_3   (&OnigEncodingISO_8859_3)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_4   (&OnigEncodingISO_8859_4)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_5   (&OnigEncodingISO_8859_5)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_6   (&OnigEncodingISO_8859_6)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_7   (&OnigEncodingISO_8859_7)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_8   (&OnigEncodingISO_8859_8)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_9   (&OnigEncodingISO_8859_9)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_10  (&OnigEncodingISO_8859_10)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_11  (&OnigEncodingISO_8859_11)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_13  (&OnigEncodingISO_8859_13)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_14  (&OnigEncodingISO_8859_14)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_15  (&OnigEncodingISO_8859_15)
 | 
			
		||||
#define ONIG_ENCODING_ISO_8859_16  (&OnigEncodingISO_8859_16)
 | 
			
		||||
#define ONIG_ENCODING_UTF8         (&OnigEncodingUTF8)
 | 
			
		||||
#define ONIG_ENCODING_EUC_JP       (&OnigEncodingEUC_JP)
 | 
			
		||||
#define ONIG_ENCODING_EUC_TW       (&OnigEncodingEUC_TW)
 | 
			
		||||
#define ONIG_ENCODING_EUC_KR       (&OnigEncodingEUC_KR)
 | 
			
		||||
#define ONIG_ENCODING_EUC_CN       (&OnigEncodingEUC_CN)
 | 
			
		||||
#define ONIG_ENCODING_SJIS         (&OnigEncodingSJIS)
 | 
			
		||||
#define ONIG_ENCODING_KOI8         (&OnigEncodingKOI8)
 | 
			
		||||
#define ONIG_ENCODING_KOI8_R       (&OnigEncodingKOI8_R)
 | 
			
		||||
#define ONIG_ENCODING_BIG5         (&OnigEncodingBIG5)
 | 
			
		||||
 | 
			
		||||
#endif /* else RUBY && M17N */
 | 
			
		||||
 | 
			
		||||
#define ONIG_ENCODING_UNDEF    ((OnigEncoding )0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* work size */
 | 
			
		||||
#define ONIGENC_CODE_TO_MBC_MAXLEN     7
 | 
			
		||||
#define ONIGENC_MBC_TO_LOWER_MAXLEN    ONIGENC_CODE_TO_MBC_MAXLEN
 | 
			
		||||
 | 
			
		||||
/* character types */
 | 
			
		||||
#define ONIGENC_CTYPE_ALPHA    (1<< 0)
 | 
			
		||||
#define ONIGENC_CTYPE_BLANK    (1<< 1)
 | 
			
		||||
#define ONIGENC_CTYPE_CNTRL    (1<< 2)
 | 
			
		||||
#define ONIGENC_CTYPE_DIGIT    (1<< 3)
 | 
			
		||||
#define ONIGENC_CTYPE_GRAPH    (1<< 4)
 | 
			
		||||
#define ONIGENC_CTYPE_LOWER    (1<< 5)
 | 
			
		||||
#define ONIGENC_CTYPE_PRINT    (1<< 6)
 | 
			
		||||
#define ONIGENC_CTYPE_PUNCT    (1<< 7)
 | 
			
		||||
#define ONIGENC_CTYPE_SPACE    (1<< 8)
 | 
			
		||||
#define ONIGENC_CTYPE_UPPER    (1<< 9)
 | 
			
		||||
#define ONIGENC_CTYPE_XDIGIT   (1<<10)
 | 
			
		||||
#define ONIGENC_CTYPE_WORD     (1<<11)
 | 
			
		||||
#define ONIGENC_CTYPE_ASCII    (1<<12)
 | 
			
		||||
#define ONIGENC_CTYPE_ALNUM    (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
 | 
			
		||||
 | 
			
		||||
/* ctype support level */
 | 
			
		||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB       0
 | 
			
		||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL     1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define enc_len(enc,byte)          ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_IS_UNDEF(enc)          ((enc) == ONIG_ENCODING_UNDEF)
 | 
			
		||||
#define ONIGENC_IS_SINGLEBYTE(enc)     (ONIGENC_MBC_MAXLEN(enc) == 1)
 | 
			
		||||
#define ONIGENC_IS_MBC_HEAD(enc,byte)  (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
 | 
			
		||||
#define ONIGENC_IS_MBC_ASCII(p)           (*(p)   < 128)
 | 
			
		||||
#define ONIGENC_IS_CODE_ASCII(code)       ((code) < 128)
 | 
			
		||||
#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
 | 
			
		||||
  (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
 | 
			
		||||
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
 | 
			
		||||
   ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef ONIG_RUBY_M17N
 | 
			
		||||
 | 
			
		||||
#include <ctype.h> /* for isblank(), isgraph() */
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_MBC_TO_LOWER(enc,p,buf)       onigenc_mbc_to_lower(enc,p,buf)
 | 
			
		||||
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p)      onigenc_mbc_is_case_ambig(enc,p)
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_IS_FOLD_MATCH(enc)            FALSE
 | 
			
		||||
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc)      FALSE
 | 
			
		||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc)      ONIGENC_CTYPE_SUPPORT_LEVEL_SB
 | 
			
		||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
 | 
			
		||||
        onigenc_is_allowed_reverse_match(enc, s, end)
 | 
			
		||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
 | 
			
		||||
        onigenc_get_left_adjust_char_head(enc, start, s)
 | 
			
		||||
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes)     0
 | 
			
		||||
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info)    ONIG_NO_SUPPORT_CONFIG
 | 
			
		||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
 | 
			
		||||
        ONIG_NO_SUPPORT_CONFIG
 | 
			
		||||
#define ONIGENC_MBC_LEN_BY_HEAD(enc,b)        m17n_mbclen(enc,(int )b)
 | 
			
		||||
#define ONIGENC_MBC_MAXLEN(enc)               m17n_mbmaxlen(enc)
 | 
			
		||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) \
 | 
			
		||||
    (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
 | 
			
		||||
                                 : ONIG_INFINITE_DISTANCE)
 | 
			
		||||
#define ONIGENC_MBC_TO_CODE(enc,p,e)       m17n_codepoint((enc),(p),(e))
 | 
			
		||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code)   m17n_codelen((enc),(code))
 | 
			
		||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf)  onigenc_code_to_mbc(enc, code, buf)
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
#define ONIGENC_STEP_BACK(enc,start,s,n)   /* !! not supported !! */
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
 | 
			
		||||
        onigenc_is_code_ctype(enc,code,ctype)
 | 
			
		||||
 | 
			
		||||
#ifdef isblank
 | 
			
		||||
# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code)
 | 
			
		||||
#else
 | 
			
		||||
# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t')
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef isgraph
 | 
			
		||||
# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code)
 | 
			
		||||
#else
 | 
			
		||||
# define ONIGENC_IS_CODE_GRAPH(enc,code) \
 | 
			
		||||
  (isprint((int )code) && !isspace((int )code))
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_IS_CODE_PRINT(enc,code)     m17n_isprint(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_ALNUM(enc,code)     m17n_isalnum(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_ALPHA(enc,code)     m17n_isalpha(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_LOWER(enc,code)     m17n_islower(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_UPPER(enc,code)     m17n_isupper(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_CNTRL(enc,code)     m17n_iscntrl(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_PUNCT(enc,code)     m17n_ispunct(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_SPACE(enc,code)     m17n_isspace(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_DIGIT(enc,code)     m17n_isdigit(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_XDIGIT(enc,code)    m17n_isxdigit(enc,code)
 | 
			
		||||
#define ONIGENC_IS_CODE_WORD(enc,code)      m17n_iswchar(enc,code)
 | 
			
		||||
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
 | 
			
		||||
 | 
			
		||||
#else  /* ONIG_RUBY_M17N */
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_NAME(enc)                      ((enc)->name)
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_MBC_TO_LOWER(enc,p,buf)        (enc)->mbc_to_lower(p,buf)
 | 
			
		||||
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p)       (enc)->mbc_is_case_ambig(p)
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_IS_FOLD_MATCH(enc)             ((enc)->is_fold_match)
 | 
			
		||||
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc)       ((enc)->is_continuous_sb_mb)
 | 
			
		||||
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc)       ((enc)->ctype_support_level)
 | 
			
		||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
 | 
			
		||||
        (enc)->is_allowed_reverse_match(s,end)
 | 
			
		||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
 | 
			
		||||
        (enc)->left_adjust_char_head(start, s)
 | 
			
		||||
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
 | 
			
		||||
        (enc)->get_all_fold_match_code(codes)
 | 
			
		||||
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
 | 
			
		||||
        (enc)->get_fold_match_info(p,end,info)
 | 
			
		||||
#define ONIGENC_STEP_BACK(enc,start,s,n) \
 | 
			
		||||
        onigenc_step_back((enc),(start),(s),(n))
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte)     ((enc)->len_table[(int )(byte)])
 | 
			
		||||
#define ONIGENC_MBC_MAXLEN(enc)               ((enc)->max_enc_len)
 | 
			
		||||
#define ONIGENC_MBC_MAXLEN_DIST(enc)           ONIGENC_MBC_MAXLEN(enc)
 | 
			
		||||
#define ONIGENC_MBC_TO_CODE(enc,p,e)           (enc)->mbc_to_code((p),(e))
 | 
			
		||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code)       (enc)->code_to_mbclen(code)
 | 
			
		||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf)      (enc)->code_to_mbc(code,buf)
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype)  (enc)->code_is_ctype(code,ctype)
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
 | 
			
		||||
#define ONIGENC_IS_CODE_PRINT(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
 | 
			
		||||
#define ONIGENC_IS_CODE_ALNUM(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
 | 
			
		||||
#define ONIGENC_IS_CODE_ALPHA(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
 | 
			
		||||
#define ONIGENC_IS_CODE_LOWER(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
 | 
			
		||||
#define ONIGENC_IS_CODE_UPPER(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
 | 
			
		||||
#define ONIGENC_IS_CODE_CNTRL(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
 | 
			
		||||
#define ONIGENC_IS_CODE_PUNCT(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
 | 
			
		||||
#define ONIGENC_IS_CODE_SPACE(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
 | 
			
		||||
#define ONIGENC_IS_CODE_BLANK(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
 | 
			
		||||
#define ONIGENC_IS_CODE_DIGIT(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
 | 
			
		||||
#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
 | 
			
		||||
#define ONIGENC_IS_CODE_WORD(enc,code) \
 | 
			
		||||
        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
 | 
			
		||||
        (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
 | 
			
		||||
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
 | 
			
		||||
 | 
			
		||||
#endif /* is not ONIG_RUBY_M17N */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* encoding API */
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onigenc_init P_(());
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onigenc_set_default_encoding P_((OnigEncoding enc));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
OnigEncoding onigenc_get_default_encoding P_(());
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void  onigenc_set_default_caseconv_table P_((UChar* table));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* PART: regular expression */
 | 
			
		||||
 | 
			
		||||
/* config parameters */
 | 
			
		||||
#define ONIG_NREGION                          10
 | 
			
		||||
#define ONIG_MAX_BACKREF_NUM                1000
 | 
			
		||||
#define ONIG_MAX_REPEAT_NUM               100000
 | 
			
		||||
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM      1000
 | 
			
		||||
/* constants */
 | 
			
		||||
#define ONIG_MAX_ERROR_MESSAGE_LEN            90
 | 
			
		||||
 | 
			
		||||
#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
 | 
			
		||||
ONIG_EXTERN OnigEncoding    OnigEncDefaultCharEncoding;
 | 
			
		||||
#undef ismbchar
 | 
			
		||||
#define ismbchar(c) (mbclen((c)) != 1)
 | 
			
		||||
#define mbclen(c)  (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef unsigned int        OnigOptionType;
 | 
			
		||||
 | 
			
		||||
#define ONIG_OPTION_DEFAULT            ONIG_OPTION_NONE
 | 
			
		||||
 | 
			
		||||
/* options */
 | 
			
		||||
#define ONIG_OPTION_NONE                 0
 | 
			
		||||
#define ONIG_OPTION_IGNORECASE           1L
 | 
			
		||||
#define ONIG_OPTION_EXTEND               (ONIG_OPTION_IGNORECASE         << 1)
 | 
			
		||||
#define ONIG_OPTION_MULTILINE            (ONIG_OPTION_EXTEND             << 1)
 | 
			
		||||
#define ONIG_OPTION_SINGLELINE           (ONIG_OPTION_MULTILINE          << 1)
 | 
			
		||||
#define ONIG_OPTION_FIND_LONGEST         (ONIG_OPTION_SINGLELINE         << 1)
 | 
			
		||||
#define ONIG_OPTION_FIND_NOT_EMPTY       (ONIG_OPTION_FIND_LONGEST       << 1)
 | 
			
		||||
#define ONIG_OPTION_NEGATE_SINGLELINE    (ONIG_OPTION_FIND_NOT_EMPTY     << 1)
 | 
			
		||||
#define ONIG_OPTION_DONT_CAPTURE_GROUP   (ONIG_OPTION_NEGATE_SINGLELINE  << 1)
 | 
			
		||||
#define ONIG_OPTION_CAPTURE_GROUP        (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
 | 
			
		||||
/* options (search time) */
 | 
			
		||||
#define ONIG_OPTION_NOTBOL               (ONIG_OPTION_CAPTURE_GROUP << 1)
 | 
			
		||||
#define ONIG_OPTION_NOTEOL               (ONIG_OPTION_NOTBOL << 1)
 | 
			
		||||
#define ONIG_OPTION_POSIX_REGION         (ONIG_OPTION_NOTEOL << 1)
 | 
			
		||||
 | 
			
		||||
#define ONIG_OPTION_ON(options,regopt)      ((options) |= (regopt))
 | 
			
		||||
#define ONIG_OPTION_OFF(options,regopt)     ((options) &= ~(regopt))
 | 
			
		||||
#define ONIG_IS_OPTION_ON(options,option)   ((options) & (option))
 | 
			
		||||
 | 
			
		||||
/* syntax */
 | 
			
		||||
typedef struct {
 | 
			
		||||
  unsigned int  op;
 | 
			
		||||
  unsigned int  op2;
 | 
			
		||||
  unsigned int  behavior;
 | 
			
		||||
  OnigOptionType options;    /* default option */
 | 
			
		||||
} OnigSyntaxType;
 | 
			
		||||
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
 | 
			
		||||
 | 
			
		||||
/* predefined syntaxes (see regparse.c) */
 | 
			
		||||
#define ONIG_SYNTAX_POSIX_BASIC        (&OnigSyntaxPosixBasic)
 | 
			
		||||
#define ONIG_SYNTAX_POSIX_EXTENDED     (&OnigSyntaxPosixExtended)
 | 
			
		||||
#define ONIG_SYNTAX_EMACS              (&OnigSyntaxEmacs)
 | 
			
		||||
#define ONIG_SYNTAX_GREP               (&OnigSyntaxGrep)
 | 
			
		||||
#define ONIG_SYNTAX_GNU_REGEX          (&OnigSyntaxGnuRegex)
 | 
			
		||||
#define ONIG_SYNTAX_JAVA               (&OnigSyntaxJava)
 | 
			
		||||
#define ONIG_SYNTAX_PERL               (&OnigSyntaxPerl)
 | 
			
		||||
#define ONIG_SYNTAX_RUBY               (&OnigSyntaxRuby)
 | 
			
		||||
 | 
			
		||||
/* default syntax */
 | 
			
		||||
ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;
 | 
			
		||||
#define ONIG_SYNTAX_DEFAULT   OnigDefaultSyntax
 | 
			
		||||
 | 
			
		||||
/* syntax (operators) */
 | 
			
		||||
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS    (1<<0)
 | 
			
		||||
#define ONIG_SYN_OP_DOT_ANYCHAR                 (1<<1)   /* . */
 | 
			
		||||
#define ONIG_SYN_OP_ASTERISK_ZERO_INF           (1<<2)   /* * */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF       (1<<3)
 | 
			
		||||
#define ONIG_SYN_OP_PLUS_ONE_INF                (1<<4)   /* + */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF            (1<<5)
 | 
			
		||||
#define ONIG_SYN_OP_QMARK_ZERO_ONE              (1<<6)   /* ? */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE          (1<<7)
 | 
			
		||||
#define ONIG_SYN_OP_BRACE_INTERVAL              (1<<8)   /* {lower,upper} */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL          (1<<9)   /* \{lower,upper\} */
 | 
			
		||||
#define ONIG_SYN_OP_VBAR_ALT                    (1<<10)   /* | */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_VBAR_ALT                (1<<11)  /* \| */
 | 
			
		||||
#define ONIG_SYN_OP_LPAREN_SUBEXP               (1<<12)  /* (...)   */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP           (1<<13)  /* \(...\) */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR           (1<<14)  /* \A, \Z, \z */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR  (1<<15)  /* \G     */
 | 
			
		||||
#define ONIG_SYN_OP_DECIMAL_BACKREF             (1<<16)  /* \num   */
 | 
			
		||||
#define ONIG_SYN_OP_BRACKET_CC                  (1<<17)  /* [...]  */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_W_WORD                  (1<<18)  /* \w, \W */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END     (1<<19)  /* \<. \> */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_B_WORD_BOUND            (1<<20)  /* \b, \B */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE           (1<<21)  /* \s, \S */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_D_DIGIT                 (1<<22)  /* \d, \D */
 | 
			
		||||
#define ONIG_SYN_OP_LINE_ANCHOR                 (1<<23)  /* ^, $   */
 | 
			
		||||
#define ONIG_SYN_OP_POSIX_BRACKET               (1<<24)  /* [:xxxx:] */
 | 
			
		||||
#define ONIG_SYN_OP_QMARK_NON_GREEDY            (1<<25)  /* ??,*?,+?,{n,m}? */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_CONTROL_CHARS           (1<<26)  /* \n,\r,\t,\a ... */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_C_CONTROL               (1<<27)  /* \cx  */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_OCTAL3                  (1<<28)  /* \OOO */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_X_HEX2                  (1<<29)  /* \xHH */
 | 
			
		||||
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8            (1<<30)  /* \x{7HHHHHHH} */
 | 
			
		||||
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE        (1<<0)   /* \Q...\E */
 | 
			
		||||
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT         (1<<1)   /* (?...) */
 | 
			
		||||
#define ONIG_SYN_OP2_OPTION_PERL                (1<<2)   /* (?imsx),(?-imsx) */
 | 
			
		||||
#define ONIG_SYN_OP2_OPTION_RUBY                (1<<3)   /* (?imx), (?-imx)  */
 | 
			
		||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT     (1<<4)   /* ?+,*+,++ */
 | 
			
		||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL   (1<<5)   /* {n,m}+   */
 | 
			
		||||
#define ONIG_SYN_OP2_CCLASS_SET_OP              (1<<6)   /* [...&&..[..]..] */
 | 
			
		||||
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       (1<<7)   /* (?<name>...) */
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        (1<<8)   /* \k<name> */
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL          (1<<9)   /* \g<name>, \g<n> */
 | 
			
		||||
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY     (1<<10)  /* (?@..),(?@<x>..) */
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL  (1<<11)  /* \C-x */
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META     (1<<12)  /* \M-x */
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_V_VTAB                 (1<<13)  /* \v as VTAB */
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_U_HEX4                 (1<<14)  /* \uHHHH */
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR         (1<<15)  /* \`, \' */
 | 
			
		||||
#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY        (1<<16)  /* \p{...}, \P{...} */
 | 
			
		||||
 | 
			
		||||
/* syntax (behavior) */
 | 
			
		||||
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS           (1<<31) /* not implemented */
 | 
			
		||||
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS        (1<<0)  /* ?, *, +, {n,m} */
 | 
			
		||||
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS      (1<<1)  /* error or ignore */
 | 
			
		||||
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP    (1<<2)  /* ...)... */
 | 
			
		||||
#define ONIG_SYN_ALLOW_INVALID_INTERVAL          (1<<3)  /* {??? */
 | 
			
		||||
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV       (1<<4)  /* {,n} => {0,n} */
 | 
			
		||||
#define ONIG_SYN_STRICT_CHECK_BACKREF            (1<<5)  /* /(\1)/,/\1()/ ..*/
 | 
			
		||||
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND   (1<<6)  /* (?<=a|bc) */
 | 
			
		||||
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP        (1<<7)  /* see doc/RE */
 | 
			
		||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8)  /* (?<x>)(?<x>) */
 | 
			
		||||
 | 
			
		||||
/* syntax (behavior) in char class [...] */
 | 
			
		||||
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC      (1<<20) /* [^...] */
 | 
			
		||||
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC          (1<<21) /* [..\w..] etc.. */
 | 
			
		||||
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC         (1<<22)
 | 
			
		||||
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC     (1<<23) /* [0-9-a]=[0-9\-a] */
 | 
			
		||||
/* syntax (behavior) warning */
 | 
			
		||||
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED          (1<<24) /* [,-,] */
 | 
			
		||||
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT    (1<<25) /* (?:a*)+ */
 | 
			
		||||
 | 
			
		||||
/* meta character specifiers (onig_set_meta_char()) */
 | 
			
		||||
#define ONIG_META_CHAR_ESCAPE               0
 | 
			
		||||
#define ONIG_META_CHAR_ANYCHAR              1
 | 
			
		||||
#define ONIG_META_CHAR_ANYTIME              2
 | 
			
		||||
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME     3
 | 
			
		||||
#define ONIG_META_CHAR_ONE_OR_MORE_TIME     4
 | 
			
		||||
#define ONIG_META_CHAR_ANYCHAR_ANYTIME      5
 | 
			
		||||
 | 
			
		||||
#define ONIG_INEFFECTIVE_META_CHAR          0
 | 
			
		||||
 | 
			
		||||
/* error codes */
 | 
			
		||||
#define ONIG_IS_PATTERN_ERROR(ecode)   ((ecode) <= -100 && (ecode) > -1000)
 | 
			
		||||
/* normal return */
 | 
			
		||||
#define ONIG_NORMAL                                            0
 | 
			
		||||
#define ONIG_MISMATCH                                         -1
 | 
			
		||||
#define ONIG_NO_SUPPORT_CONFIG                                -2
 | 
			
		||||
/* internal error */
 | 
			
		||||
#define ONIGERR_PARSER_BUG                                    -11
 | 
			
		||||
#define ONIGERR_STACK_BUG                                     -12
 | 
			
		||||
#define ONIGERR_UNDEFINED_BYTECODE                            -13
 | 
			
		||||
#define ONIGERR_UNEXPECTED_BYTECODE                           -14
 | 
			
		||||
#define ONIGERR_MATCH_STACK_LIMIT_OVER                        -15
 | 
			
		||||
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED                -21
 | 
			
		||||
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR  -22
 | 
			
		||||
/* general error */
 | 
			
		||||
#define ONIGERR_INVALID_ARGUMENT                              -30 
 | 
			
		||||
/* syntax error */
 | 
			
		||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE                    -100
 | 
			
		||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET                  -101
 | 
			
		||||
#define ONIGERR_EMPTY_CHAR_CLASS                             -102
 | 
			
		||||
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS                  -103
 | 
			
		||||
#define ONIGERR_END_PATTERN_AT_BACKSLASH                     -104
 | 
			
		||||
#define ONIGERR_END_PATTERN_AT_META                          -105
 | 
			
		||||
#define ONIGERR_END_PATTERN_AT_CONTROL                       -106
 | 
			
		||||
#define ONIGERR_META_CODE_SYNTAX                             -108
 | 
			
		||||
#define ONIGERR_CONTROL_CODE_SYNTAX                          -109
 | 
			
		||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE             -110
 | 
			
		||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE           -111
 | 
			
		||||
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS      -112
 | 
			
		||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED      -113
 | 
			
		||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID            -114
 | 
			
		||||
#define ONIGERR_NESTED_REPEAT_OPERATOR                       -115
 | 
			
		||||
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS                  -116
 | 
			
		||||
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS       -117
 | 
			
		||||
#define ONIGERR_END_PATTERN_IN_GROUP                         -118
 | 
			
		||||
#define ONIGERR_UNDEFINED_GROUP_OPTION                       -119
 | 
			
		||||
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE                   -121
 | 
			
		||||
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN                  -122
 | 
			
		||||
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN                 -123
 | 
			
		||||
/* values error (syntax error) */
 | 
			
		||||
#define ONIGERR_TOO_BIG_NUMBER                               -200
 | 
			
		||||
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE              -201
 | 
			
		||||
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE     -202
 | 
			
		||||
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS                    -203
 | 
			
		||||
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE          -204
 | 
			
		||||
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES                   -205
 | 
			
		||||
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING                  -206
 | 
			
		||||
#define ONIGERR_TOO_BIG_BACKREF_NUMBER                       -207
 | 
			
		||||
#define ONIGERR_INVALID_BACKREF                              -208
 | 
			
		||||
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED         -209
 | 
			
		||||
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE                     -212
 | 
			
		||||
#define ONIGERR_EMPTY_GROUP_NAME                             -214
 | 
			
		||||
#define ONIGERR_INVALID_GROUP_NAME                           -215
 | 
			
		||||
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME                   -216
 | 
			
		||||
#define ONIGERR_UNDEFINED_NAME_REFERENCE                     -217
 | 
			
		||||
#define ONIGERR_UNDEFINED_GROUP_REFERENCE                    -218
 | 
			
		||||
#define ONIGERR_MULTIPLEX_DEFINED_NAME                       -219
 | 
			
		||||
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL               -220
 | 
			
		||||
#define ONIGERR_NEVER_ENDING_RECURSION                       -221
 | 
			
		||||
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY        -222
 | 
			
		||||
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME                   -223
 | 
			
		||||
/* errors related to thread */
 | 
			
		||||
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT                 -1001
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
 | 
			
		||||
#define ONIG_MAX_CAPTURE_HISTORY_GROUP   31
 | 
			
		||||
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
 | 
			
		||||
  ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
 | 
			
		||||
 | 
			
		||||
/* match result region type */
 | 
			
		||||
struct re_registers {
 | 
			
		||||
  int  allocated;
 | 
			
		||||
  int  num_regs;
 | 
			
		||||
  int* beg;
 | 
			
		||||
  int* end;
 | 
			
		||||
  /* extended */
 | 
			
		||||
  struct re_registers** list; /* capture history. list[1]-list[31] */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define ONIG_REGION_NOTPOS            -1
 | 
			
		||||
 | 
			
		||||
typedef struct re_registers   OnigRegion;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  UChar* par;
 | 
			
		||||
  UChar* par_end;
 | 
			
		||||
} OnigErrorInfo;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int lower;
 | 
			
		||||
  int upper;
 | 
			
		||||
} OnigRepeatRange;
 | 
			
		||||
 | 
			
		||||
typedef void (*OnigWarnFunc) P_((char* s));
 | 
			
		||||
extern void onig_null_warn P_((char* s));
 | 
			
		||||
#define ONIG_NULL_WARN       onig_null_warn
 | 
			
		||||
 | 
			
		||||
#define ONIG_CHAR_TABLE_SIZE   256
 | 
			
		||||
 | 
			
		||||
/* regex_t state */
 | 
			
		||||
#define ONIG_STATE_NORMAL              0
 | 
			
		||||
#define ONIG_STATE_SEARCHING           1
 | 
			
		||||
#define ONIG_STATE_COMPILING          -1
 | 
			
		||||
#define ONIG_STATE_MODIFY             -2
 | 
			
		||||
 | 
			
		||||
#define ONIG_STATE(reg) \
 | 
			
		||||
  ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
 | 
			
		||||
 | 
			
		||||
typedef struct re_pattern_buffer {
 | 
			
		||||
  /* common members of BBuf(bytes-buffer) */
 | 
			
		||||
  unsigned char* p;         /* compiled pattern */
 | 
			
		||||
  unsigned int used;        /* used space for p */
 | 
			
		||||
  unsigned int alloc;       /* allocated space for p */
 | 
			
		||||
 | 
			
		||||
  int state;                     /* normal, searching, compiling */
 | 
			
		||||
  int num_mem;                   /* used memory(...) num counted from 1 */
 | 
			
		||||
  int num_repeat;                /* OP_REPEAT/OP_REPEAT_NG id-counter */
 | 
			
		||||
  int num_null_check;            /* OP_NULL_CHECK_START/END id counter */
 | 
			
		||||
  int num_call;                  /* number of subexp call */
 | 
			
		||||
  unsigned int capture_history;  /* (?@...) flag (1-31) */
 | 
			
		||||
  unsigned int bt_mem_start;     /* need backtrack flag */
 | 
			
		||||
  unsigned int bt_mem_end;       /* need backtrack flag */
 | 
			
		||||
  int stack_pop_level;
 | 
			
		||||
  int repeat_range_alloc;
 | 
			
		||||
  OnigRepeatRange* repeat_range;
 | 
			
		||||
 | 
			
		||||
  OnigEncoding  enc;
 | 
			
		||||
  OnigOptionType    options;
 | 
			
		||||
  OnigSyntaxType*   syntax;
 | 
			
		||||
  void*             name_table;
 | 
			
		||||
 | 
			
		||||
  /* optimization info (string search, char-map and anchors) */
 | 
			
		||||
  int            optimize;          /* optimize flag */
 | 
			
		||||
  int            threshold_len;     /* search str-length for apply optimize */
 | 
			
		||||
  int            anchor;            /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
 | 
			
		||||
  OnigDistance   anchor_dmin;       /* (SEMI_)END_BUF anchor distance */
 | 
			
		||||
  OnigDistance   anchor_dmax;       /* (SEMI_)END_BUF anchor distance */
 | 
			
		||||
  int            sub_anchor;        /* start-anchor for exact or map */
 | 
			
		||||
  unsigned char *exact;
 | 
			
		||||
  unsigned char *exact_end;
 | 
			
		||||
  unsigned char  map[ONIG_CHAR_TABLE_SIZE];  /* used as BM skip or char-map */
 | 
			
		||||
  int           *int_map;                   /* BM skip for exact_len > 255 */
 | 
			
		||||
  int           *int_map_backward;          /* BM skip for backward search */
 | 
			
		||||
  OnigDistance   dmin;                      /* min-distance of exact or map */
 | 
			
		||||
  OnigDistance   dmax;                      /* max-distance of exact or map */
 | 
			
		||||
 | 
			
		||||
  /* regex_t link chain */
 | 
			
		||||
  struct re_pattern_buffer* chain;  /* escape compile-conflict */
 | 
			
		||||
} regex_t;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* Oniguruma Native API */
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_init P_((void));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_error_code_to_str PV_((UChar* s, int err_code, ...));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_set_warn_func P_((OnigWarnFunc f));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_set_verb_warn_func P_((OnigWarnFunc f));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_free P_((regex_t*));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
OnigRegion* onig_region_new P_((void));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_region_free P_((OnigRegion* region, int free_self));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_region_clear P_((OnigRegion* region));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_region_resize P_((OnigRegion* region, int n));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end,
 | 
			
		||||
			            int** nums));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_number_of_names P_((regex_t* reg));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
OnigEncoding onig_get_encoding P_((regex_t* reg));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
OnigOptionType onig_get_options P_((regex_t* reg));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_set_meta_char P_((unsigned int what, unsigned int c));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
int onig_end P_((void));
 | 
			
		||||
ONIG_EXTERN
 | 
			
		||||
const char* onig_version P_((void));
 | 
			
		||||
 | 
			
		||||
#endif /* ONIGURUMA_H */
 | 
			
		||||
							
								
								
									
										586
									
								
								regenc.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										586
									
								
								regenc.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,586 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  regenc.c -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2003-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#include "regenc.h"
 | 
			
		||||
 | 
			
		||||
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_init()
 | 
			
		||||
{
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern OnigEncoding
 | 
			
		||||
onigenc_get_default_encoding()
 | 
			
		||||
{
 | 
			
		||||
  return OnigEncDefaultCharEncoding;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_set_default_encoding(OnigEncoding enc)
 | 
			
		||||
{
 | 
			
		||||
  OnigEncDefaultCharEncoding = enc;
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern UChar*
 | 
			
		||||
onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
 | 
			
		||||
{
 | 
			
		||||
  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
 | 
			
		||||
  if (p < s) {
 | 
			
		||||
    p += enc_len(enc, *p);
 | 
			
		||||
  }
 | 
			
		||||
  return p;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern UChar*
 | 
			
		||||
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
 | 
			
		||||
				   UChar* start, UChar* s, UChar** prev)
 | 
			
		||||
{
 | 
			
		||||
  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
 | 
			
		||||
 | 
			
		||||
  if (p < s) {
 | 
			
		||||
    if (prev) *prev = p;
 | 
			
		||||
    p += enc_len(enc, *p);
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    if (prev) *prev = (UChar* )NULL; /* Sorry */
 | 
			
		||||
  }
 | 
			
		||||
  return p;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern UChar*
 | 
			
		||||
onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
 | 
			
		||||
{
 | 
			
		||||
  if (s <= start)
 | 
			
		||||
    return (UChar* )NULL;
 | 
			
		||||
 | 
			
		||||
  return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern UChar*
 | 
			
		||||
onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
 | 
			
		||||
{
 | 
			
		||||
  while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
 | 
			
		||||
    if (s <= start)
 | 
			
		||||
      return (UChar* )NULL;
 | 
			
		||||
 | 
			
		||||
    s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
 | 
			
		||||
  }
 | 
			
		||||
  return s;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifndef ONIG_RUBY_M17N
 | 
			
		||||
 | 
			
		||||
#ifndef NOT_RUBY
 | 
			
		||||
#define USE_APPLICATION_TO_LOWER_CASE_TABLE
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0;
 | 
			
		||||
 | 
			
		||||
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
 | 
			
		||||
static UChar BuiltInAsciiToLowerCaseTable[] = {
 | 
			
		||||
  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
 | 
			
		||||
  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
 | 
			
		||||
  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
 | 
			
		||||
  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
 | 
			
		||||
  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
 | 
			
		||||
  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
 | 
			
		||||
  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
 | 
			
		||||
  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
 | 
			
		||||
  '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
 | 
			
		||||
  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
 | 
			
		||||
  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
 | 
			
		||||
  '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
 | 
			
		||||
  '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
 | 
			
		||||
  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
 | 
			
		||||
  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
 | 
			
		||||
  '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
 | 
			
		||||
  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
 | 
			
		||||
  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
 | 
			
		||||
  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
 | 
			
		||||
  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
 | 
			
		||||
  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
 | 
			
		||||
  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
 | 
			
		||||
  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
 | 
			
		||||
  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
 | 
			
		||||
  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
 | 
			
		||||
  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
 | 
			
		||||
  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
 | 
			
		||||
  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
 | 
			
		||||
  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
 | 
			
		||||
  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
 | 
			
		||||
  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
 | 
			
		||||
  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
 | 
			
		||||
};
 | 
			
		||||
#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
 | 
			
		||||
 | 
			
		||||
unsigned short OnigEncAsciiCtypeTable[256] = {
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
 | 
			
		||||
  0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
 | 
			
		||||
  0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
 | 
			
		||||
  0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
 | 
			
		||||
  0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
 | 
			
		||||
  0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 | 
			
		||||
  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern void
 | 
			
		||||
onigenc_set_default_caseconv_table(UChar* table)
 | 
			
		||||
{
 | 
			
		||||
  if (table == (UChar* )0) {
 | 
			
		||||
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
 | 
			
		||||
    table = BuiltInAsciiToLowerCaseTable;
 | 
			
		||||
#else
 | 
			
		||||
    return ;
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (table != OnigEncAsciiToLowerCaseTable) {
 | 
			
		||||
    OnigEncAsciiToLowerCaseTable = table;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern UChar*
 | 
			
		||||
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
 | 
			
		||||
{
 | 
			
		||||
  return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes)
 | 
			
		||||
{
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_nothing_get_fold_match_info(UChar* p, UChar* end,
 | 
			
		||||
				OnigEncFoldMatchInfo** info)
 | 
			
		||||
{
 | 
			
		||||
  return -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb,
 | 
			
		||||
			 OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
 | 
			
		||||
{
 | 
			
		||||
  return -1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* for single byte encodings */
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower)
 | 
			
		||||
{
 | 
			
		||||
  *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
 | 
			
		||||
  return 1; /* return byte length of converted char to lower */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_ascii_mbc_is_case_ambig(UChar* p)
 | 
			
		||||
{
 | 
			
		||||
  return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern OnigCodePoint
 | 
			
		||||
onigenc_single_byte_mbc_to_code(UChar* p, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  return (OnigCodePoint )(*p);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  return (code & 0xff);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
 | 
			
		||||
{
 | 
			
		||||
  *buf = (UChar )(code & 0xff);
 | 
			
		||||
  return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern UChar*
 | 
			
		||||
onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s)
 | 
			
		||||
{
 | 
			
		||||
  return s;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  return TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern OnigCodePoint
 | 
			
		||||
onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  int c, i, len;
 | 
			
		||||
  OnigCodePoint n;
 | 
			
		||||
 | 
			
		||||
  c = *p++;
 | 
			
		||||
  len = enc_len(enc, c);
 | 
			
		||||
  n = c;
 | 
			
		||||
  if (len == 1) return n;
 | 
			
		||||
 | 
			
		||||
  for (i = 1; i < len; i++) {
 | 
			
		||||
    if (p >= end) break;
 | 
			
		||||
    c = *p++;
 | 
			
		||||
    n <<= 8;  n += c;
 | 
			
		||||
  }
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
 | 
			
		||||
{
 | 
			
		||||
  int len;
 | 
			
		||||
 | 
			
		||||
  if (ONIGENC_IS_MBC_ASCII(p)) {
 | 
			
		||||
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
 | 
			
		||||
    return 1;
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    len = enc_len(enc, *p);
 | 
			
		||||
    if (lower != p) {
 | 
			
		||||
      /* memcpy(lower, p, len); */
 | 
			
		||||
      int i;
 | 
			
		||||
      for (i = 0; i < len; i++) {
 | 
			
		||||
	*lower++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    return len; /* return byte length of converted to lower char */
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mbn_mbc_is_case_ambig(UChar* p)
 | 
			
		||||
{
 | 
			
		||||
  if (ONIGENC_IS_MBC_ASCII(p))
 | 
			
		||||
    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
 | 
			
		||||
 | 
			
		||||
  return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  if ((code & 0xff00) != 0) return 2;
 | 
			
		||||
  else return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mb4_code_to_mbclen(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
       if ((code & 0xff000000) != 0) return 4;
 | 
			
		||||
  else if ((code & 0xff0000) != 0) return 3;
 | 
			
		||||
  else if ((code & 0xff00) != 0) return 2;
 | 
			
		||||
  else return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  int first;
 | 
			
		||||
 | 
			
		||||
  if ((code & 0xff00) != 0) {
 | 
			
		||||
    first = (code >> 8) & 0xff;
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    return (int )code;
 | 
			
		||||
  }
 | 
			
		||||
  return first;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  int first;
 | 
			
		||||
 | 
			
		||||
  if ((code & 0xff000000) != 0) {
 | 
			
		||||
    first = (code >> 24) & 0xff;
 | 
			
		||||
  }
 | 
			
		||||
  else if ((code & 0xff0000) != 0) {
 | 
			
		||||
    first = (code >> 16) & 0xff;
 | 
			
		||||
  }
 | 
			
		||||
  else if ((code & 0xff00) != 0) {
 | 
			
		||||
    first = (code >>  8) & 0xff;
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    return (int )code;
 | 
			
		||||
  }
 | 
			
		||||
  return first;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
 | 
			
		||||
{
 | 
			
		||||
  UChar *p = buf;
 | 
			
		||||
 | 
			
		||||
  if ((code & 0xff00) != 0) {
 | 
			
		||||
    *p++ = (UChar )((code >>  8) & 0xff);
 | 
			
		||||
  }
 | 
			
		||||
  *p++ = (UChar )(code & 0xff);
 | 
			
		||||
 | 
			
		||||
#if 1
 | 
			
		||||
  if (enc_len(enc, buf[0]) != (p - buf))
 | 
			
		||||
    return ONIGERR_INVALID_WIDE_CHAR_VALUE;
 | 
			
		||||
#endif
 | 
			
		||||
  return p - buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
 | 
			
		||||
{
 | 
			
		||||
  UChar *p = buf;
 | 
			
		||||
 | 
			
		||||
  if ((code & 0xff000000) != 0) {
 | 
			
		||||
    *p++ = (UChar )((code >> 24) & 0xff);
 | 
			
		||||
  }
 | 
			
		||||
  if ((code & 0xff0000) != 0) {
 | 
			
		||||
    *p++ = (UChar )((code >> 16) & 0xff);
 | 
			
		||||
  }
 | 
			
		||||
  if ((code & 0xff00) != 0) {
 | 
			
		||||
    *p++ = (UChar )((code >> 8) & 0xff);
 | 
			
		||||
  }
 | 
			
		||||
  *p++ = (UChar )(code & 0xff);
 | 
			
		||||
 | 
			
		||||
#if 1
 | 
			
		||||
  if (enc_len(enc, buf[0]) != (p - buf))
 | 
			
		||||
    return ONIGERR_INVALID_WIDE_CHAR_VALUE;
 | 
			
		||||
#endif
 | 
			
		||||
  return p - buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
 | 
			
		||||
			  unsigned int ctype)
 | 
			
		||||
{
 | 
			
		||||
  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
 | 
			
		||||
    if (code < 128)
 | 
			
		||||
      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
    else {
 | 
			
		||||
      int first = onigenc_mb2_code_to_mbc_first(code);
 | 
			
		||||
      return (enc_len(enc, first) > 1 ? TRUE : FALSE);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctype &= ~ONIGENC_CTYPE_WORD;
 | 
			
		||||
    if (ctype == 0) return FALSE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (code < 128)
 | 
			
		||||
    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
  else
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
 | 
			
		||||
			  unsigned int ctype)
 | 
			
		||||
{
 | 
			
		||||
  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
 | 
			
		||||
    if (code < 128)
 | 
			
		||||
      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
    else {
 | 
			
		||||
      int first = onigenc_mb4_code_to_mbc_first(code);
 | 
			
		||||
      return (enc_len(enc, first) > 1 ? TRUE : FALSE);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctype &= ~ONIGENC_CTYPE_WORD;
 | 
			
		||||
    if (ctype == 0) return FALSE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (code < 128)
 | 
			
		||||
    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
  else
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes)
 | 
			
		||||
{
 | 
			
		||||
  static OnigCodePoint list[] = { 0xdf };
 | 
			
		||||
  *codes = list;
 | 
			
		||||
  return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end,
 | 
			
		||||
				    OnigEncFoldMatchInfo** info)
 | 
			
		||||
{
 | 
			
		||||
  /* German alphabet ess-tsett(U+00DF) */
 | 
			
		||||
  static OnigEncFoldMatchInfo ss = {
 | 
			
		||||
    3,
 | 
			
		||||
    { 1, 2, 2 },
 | 
			
		||||
    { "\337", "ss", "SS" } /* 0337: 0xdf */
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  if (p >= end) return -1;
 | 
			
		||||
 | 
			
		||||
  if (*p == 0xdf) {
 | 
			
		||||
    *info = &ss;
 | 
			
		||||
    return 1;
 | 
			
		||||
  }
 | 
			
		||||
  else if (p + 1 < end) {
 | 
			
		||||
    if ((*p == 'S' && *(p+1) == 'S') ||
 | 
			
		||||
	(*p == 's' && *(p+1) == 's')) {
 | 
			
		||||
      *info = &ss;
 | 
			
		||||
      return 2;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return -1; /* is not a fold string. */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#else /* ONIG_RUBY_M17N */
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
 | 
			
		||||
{
 | 
			
		||||
  switch (ctype) {
 | 
			
		||||
  case ONIGENC_CTYPE_ALPHA:
 | 
			
		||||
    return m17n_isalpha(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_BLANK:
 | 
			
		||||
    return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_CNTRL:
 | 
			
		||||
    return m17n_iscntrl(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_DIGIT:
 | 
			
		||||
    return m17n_isdigit(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_GRAPH:
 | 
			
		||||
    return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_LOWER:
 | 
			
		||||
    return m17n_islower(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_PRINT:
 | 
			
		||||
    return m17n_isprint(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_PUNCT:
 | 
			
		||||
    return m17n_ispunct(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_SPACE:
 | 
			
		||||
    return m17n_isspace(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_UPPER:
 | 
			
		||||
    return m17n_isupper(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_XDIGIT:
 | 
			
		||||
    return m17n_isxdigit(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_WORD:
 | 
			
		||||
    return m17n_iswchar(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_ASCII:
 | 
			
		||||
    return (code < 128 ? TRUE : FALSE);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_ALNUM:
 | 
			
		||||
    return m17n_isalnum(enc, code);
 | 
			
		||||
    break;
 | 
			
		||||
  default:
 | 
			
		||||
    break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
 | 
			
		||||
{
 | 
			
		||||
  int c, len;
 | 
			
		||||
 | 
			
		||||
  m17n_mbcput(enc, code, buf);
 | 
			
		||||
  c = m17n_firstbyte(enc, code);
 | 
			
		||||
  len = enc_len(enc, c);
 | 
			
		||||
  return len;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
 | 
			
		||||
{
 | 
			
		||||
  unsigned int c, low;
 | 
			
		||||
 | 
			
		||||
  c   = m17n_codepoint(enc, p, p + enc_len(enc, *p));
 | 
			
		||||
  low = m17n_tolower(enc, c);
 | 
			
		||||
  m17n_mbcput(enc, low, buf);
 | 
			
		||||
 | 
			
		||||
  return m17n_codelen(enc, low);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p)
 | 
			
		||||
{
 | 
			
		||||
  unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
 | 
			
		||||
 | 
			
		||||
  if (m17n_isupper(enc, c) || m17n_islower(enc, c))
 | 
			
		||||
    return TRUE;
 | 
			
		||||
  return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern UChar*
 | 
			
		||||
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
 | 
			
		||||
{
 | 
			
		||||
  UChar *p;
 | 
			
		||||
  int len;
 | 
			
		||||
 | 
			
		||||
  if (s <= start) return s;
 | 
			
		||||
  p = s;
 | 
			
		||||
 | 
			
		||||
  while (!m17n_islead(enc, *p) && p > start) p--;
 | 
			
		||||
  while (p + (len = enc_len(enc, *p)) < s) {
 | 
			
		||||
    p += len;
 | 
			
		||||
  }
 | 
			
		||||
  if (p + len == s) return s;
 | 
			
		||||
  return p;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  return ONIGENC_IS_SINGLEBYTE(enc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern void
 | 
			
		||||
onigenc_set_default_caseconv_table(UChar* table) { }
 | 
			
		||||
 | 
			
		||||
#endif /* ONIG_RUBY_M17N */
 | 
			
		||||
							
								
								
									
										96
									
								
								regenc.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								regenc.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,96 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  regenc.h -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2003-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#ifndef REGENC_H
 | 
			
		||||
#define REGENC_H
 | 
			
		||||
 | 
			
		||||
#ifndef RUBY_PLATFORM
 | 
			
		||||
#include "config.h"
 | 
			
		||||
#endif
 | 
			
		||||
#include "oniguruma.h"
 | 
			
		||||
 | 
			
		||||
#ifndef NULL
 | 
			
		||||
#define NULL   ((void* )0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef TRUE
 | 
			
		||||
#define TRUE    1
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef FALSE
 | 
			
		||||
#define FALSE   0
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* error codes */
 | 
			
		||||
/* internal error */
 | 
			
		||||
#define ONIGERR_MEMORY                                         -5
 | 
			
		||||
#define ONIGERR_TYPE_BUG                                       -6
 | 
			
		||||
/* syntax error [-400, -999] */
 | 
			
		||||
#define ONIGERR_INVALID_WIDE_CHAR_VALUE                      -400
 | 
			
		||||
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE                      -401
 | 
			
		||||
 | 
			
		||||
#define ONIG_NEWLINE     '\n'
 | 
			
		||||
#define ONIG_IS_NEWLINE(c)                ((c) == ONIG_NEWLINE)
 | 
			
		||||
#define ONIG_IS_NULL(p)                    (((void*)(p)) == (void*)0)
 | 
			
		||||
#define ONIG_IS_NOT_NULL(p)                (((void*)(p)) != (void*)0)
 | 
			
		||||
#define ONIG_CHECK_NULL_RETURN(p)          if (ONIG_IS_NULL(p)) return NULL
 | 
			
		||||
#define ONIG_CHECK_NULL_RETURN_VAL(p,val)  if (ONIG_IS_NULL(p)) return (val)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef ONIG_RUBY_M17N
 | 
			
		||||
 | 
			
		||||
#define ONIG_ENCODING_INIT_DEFAULT            ONIG_ENCODING_UNDEF
 | 
			
		||||
 | 
			
		||||
#else  /* ONIG_RUBY_M17N */
 | 
			
		||||
 | 
			
		||||
#define ONIG_ENCODING_INIT_DEFAULT           ONIG_ENCODING_ASCII
 | 
			
		||||
 | 
			
		||||
/* for encoding system implementation (internal) */
 | 
			
		||||
ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes));
 | 
			
		||||
ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
 | 
			
		||||
ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]));
 | 
			
		||||
 | 
			
		||||
/* methods for single byte encoding */
 | 
			
		||||
ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower));
 | 
			
		||||
ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p));
 | 
			
		||||
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
 | 
			
		||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
 | 
			
		||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
 | 
			
		||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
 | 
			
		||||
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
 | 
			
		||||
ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));
 | 
			
		||||
 | 
			
		||||
/* methods for multi byte encoding */
 | 
			
		||||
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end));
 | 
			
		||||
ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower));
 | 
			
		||||
ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p));
 | 
			
		||||
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
 | 
			
		||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
 | 
			
		||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
 | 
			
		||||
ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
 | 
			
		||||
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
 | 
			
		||||
ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
 | 
			
		||||
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
 | 
			
		||||
ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
 | 
			
		||||
 | 
			
		||||
ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
 | 
			
		||||
ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
 | 
			
		||||
 | 
			
		||||
#endif /* is not ONIG_RUBY_M17N */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ONIG_EXTERN OnigEncoding  OnigEncDefaultCharEncoding;
 | 
			
		||||
ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable;
 | 
			
		||||
ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
 | 
			
		||||
 | 
			
		||||
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
 | 
			
		||||
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
 | 
			
		||||
  ((OnigEncAsciiCtypeTable[code] & ctype) != 0)
 | 
			
		||||
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
 | 
			
		||||
    ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
 | 
			
		||||
 | 
			
		||||
#endif /* REGENC_H */
 | 
			
		||||
							
								
								
									
										291
									
								
								regerror.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										291
									
								
								regerror.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,291 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  regerror.c -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2002-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#include "regint.h"
 | 
			
		||||
#include <stdio.h> /* for vsnprintf() */
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_STDARG_PROTOTYPES
 | 
			
		||||
#include <stdarg.h>
 | 
			
		||||
#define va_init_list(a,b) va_start(a,b)
 | 
			
		||||
#else
 | 
			
		||||
#include <varargs.h>
 | 
			
		||||
#define va_init_list(a,b) va_start(a)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
extern char*
 | 
			
		||||
onig_error_code_to_format(int code)
 | 
			
		||||
{
 | 
			
		||||
  char *p;
 | 
			
		||||
 | 
			
		||||
  if (code >= 0) return (char* )0;
 | 
			
		||||
 | 
			
		||||
  switch (code) {
 | 
			
		||||
  case ONIG_MISMATCH:
 | 
			
		||||
    p = "mismatch"; break;
 | 
			
		||||
  case ONIG_NO_SUPPORT_CONFIG:
 | 
			
		||||
    p = "no support in this configuration"; break;
 | 
			
		||||
  case ONIGERR_MEMORY:
 | 
			
		||||
    p = "fail to memory allocation"; break;
 | 
			
		||||
  case ONIGERR_MATCH_STACK_LIMIT_OVER:
 | 
			
		||||
    p = "match-stack limit over"; break;
 | 
			
		||||
  case ONIGERR_TYPE_BUG:
 | 
			
		||||
    p = "undefined type (bug)"; break;
 | 
			
		||||
  case ONIGERR_PARSER_BUG:
 | 
			
		||||
    p = "internal parser error (bug)"; break;
 | 
			
		||||
  case ONIGERR_STACK_BUG:
 | 
			
		||||
    p = "stack error (bug)"; break;
 | 
			
		||||
  case ONIGERR_UNDEFINED_BYTECODE:
 | 
			
		||||
    p = "undefined bytecode (bug)"; break;
 | 
			
		||||
  case ONIGERR_UNEXPECTED_BYTECODE:
 | 
			
		||||
    p = "unexpected bytecode (bug)"; break;
 | 
			
		||||
  case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
 | 
			
		||||
    p = "default multibyte-encoding is not setted"; break;
 | 
			
		||||
  case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
 | 
			
		||||
    p = "can't convert to wide-char on specified multibyte-encoding"; break;
 | 
			
		||||
  case ONIGERR_INVALID_ARGUMENT:
 | 
			
		||||
    p = "invalid argument"; break;
 | 
			
		||||
  case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
 | 
			
		||||
    p = "end pattern at left brace"; break;
 | 
			
		||||
  case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
 | 
			
		||||
    p = "end pattern at left bracket"; break;
 | 
			
		||||
  case ONIGERR_EMPTY_CHAR_CLASS:
 | 
			
		||||
    p = "empty char-class"; break;
 | 
			
		||||
  case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
 | 
			
		||||
    p = "premature end of char-class"; break;
 | 
			
		||||
  case ONIGERR_END_PATTERN_AT_BACKSLASH:
 | 
			
		||||
    p = "end pattern at backslash"; break;
 | 
			
		||||
  case ONIGERR_END_PATTERN_AT_META:
 | 
			
		||||
    p = "end pattern at meta"; break;
 | 
			
		||||
  case ONIGERR_END_PATTERN_AT_CONTROL:
 | 
			
		||||
    p = "end pattern at control"; break;
 | 
			
		||||
  case ONIGERR_META_CODE_SYNTAX:
 | 
			
		||||
    p = "illegal meta-code syntax"; break;
 | 
			
		||||
  case ONIGERR_CONTROL_CODE_SYNTAX:
 | 
			
		||||
    p = "illegal control-code syntax"; break;
 | 
			
		||||
  case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
 | 
			
		||||
    p = "char-class value at end of range"; break;
 | 
			
		||||
  case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
 | 
			
		||||
    p = "char-class value at start of range"; break;
 | 
			
		||||
  case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
 | 
			
		||||
    p = "unmatched range specifier in char-class"; break;
 | 
			
		||||
  case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
 | 
			
		||||
    p = "target of repeat operator is not specified"; break;
 | 
			
		||||
  case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
 | 
			
		||||
    p = "target of repeat operator is invalid"; break;
 | 
			
		||||
  case ONIGERR_NESTED_REPEAT_OPERATOR:
 | 
			
		||||
    p = "nested repeat operator"; break;
 | 
			
		||||
  case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
 | 
			
		||||
    p = "unmatched close parenthesis"; break;
 | 
			
		||||
  case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
 | 
			
		||||
    p = "end pattern with unmatched parenthesis"; break;
 | 
			
		||||
  case ONIGERR_END_PATTERN_IN_GROUP:
 | 
			
		||||
    p = "end pattern in group"; break;
 | 
			
		||||
  case ONIGERR_UNDEFINED_GROUP_OPTION:
 | 
			
		||||
    p = "undefined group option"; break;
 | 
			
		||||
  case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
 | 
			
		||||
    p = "invalid POSIX bracket type"; break;
 | 
			
		||||
  case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
 | 
			
		||||
    p = "invalid pattern in look-behind"; break;
 | 
			
		||||
  case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
 | 
			
		||||
    p = "invalid repeat range {lower,upper}"; break;
 | 
			
		||||
  case ONIGERR_TOO_BIG_NUMBER:
 | 
			
		||||
    p = "too big number"; break;
 | 
			
		||||
  case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
 | 
			
		||||
    p = "too big number for repeat range"; break;
 | 
			
		||||
  case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
 | 
			
		||||
    p = "upper is smaller than lower in repeat range"; break;
 | 
			
		||||
  case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
 | 
			
		||||
    p = "empty range in char class"; break;
 | 
			
		||||
  case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
 | 
			
		||||
    p = "mismatch multibyte code length in char-class range"; break;
 | 
			
		||||
  case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
 | 
			
		||||
    p = "too many multibyte code ranges are specified"; break;
 | 
			
		||||
  case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
 | 
			
		||||
    p = "too short multibyte code string"; break;
 | 
			
		||||
  case ONIGERR_TOO_BIG_BACKREF_NUMBER:
 | 
			
		||||
    p = "too big backref number"; break;
 | 
			
		||||
  case ONIGERR_INVALID_BACKREF:
 | 
			
		||||
#ifdef USE_NAMED_GROUP
 | 
			
		||||
    p = "invalid backref number/name"; break;
 | 
			
		||||
#else
 | 
			
		||||
    p = "invalid backref number"; break;
 | 
			
		||||
#endif
 | 
			
		||||
  case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
 | 
			
		||||
    p = "numbered backref/call is not allowed. (use name)"; break;
 | 
			
		||||
  case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
 | 
			
		||||
    p = "too big wide-char value"; break;
 | 
			
		||||
  case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
 | 
			
		||||
    p = "too long wide-char value"; break;
 | 
			
		||||
  case ONIGERR_INVALID_WIDE_CHAR_VALUE:
 | 
			
		||||
    p = "invalid wide-char value"; break;
 | 
			
		||||
  case ONIGERR_EMPTY_GROUP_NAME:
 | 
			
		||||
    p = "group name is empty"; break;
 | 
			
		||||
  case ONIGERR_INVALID_GROUP_NAME:
 | 
			
		||||
    p = "invalid group name <%n>"; break;
 | 
			
		||||
  case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
 | 
			
		||||
#ifdef USE_NAMED_GROUP
 | 
			
		||||
    p = "invalid char in group name <%n>"; break;
 | 
			
		||||
#else
 | 
			
		||||
    p = "invalid char in group number <%n>"; break;
 | 
			
		||||
#endif
 | 
			
		||||
  case ONIGERR_UNDEFINED_NAME_REFERENCE:
 | 
			
		||||
    p = "undefined name <%n> reference"; break;
 | 
			
		||||
  case ONIGERR_UNDEFINED_GROUP_REFERENCE:
 | 
			
		||||
    p = "undefined group <%n> reference"; break;
 | 
			
		||||
  case ONIGERR_MULTIPLEX_DEFINED_NAME:
 | 
			
		||||
    p = "multiplex defined name <%n>"; break;
 | 
			
		||||
  case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
 | 
			
		||||
    p = "multiplex definition name <%n> call"; break;
 | 
			
		||||
  case ONIGERR_NEVER_ENDING_RECURSION:
 | 
			
		||||
    p = "never ending recursion"; break;
 | 
			
		||||
  case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
 | 
			
		||||
    p = "group number is too big for capture history"; break;
 | 
			
		||||
  case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
 | 
			
		||||
    p = "invalid character property name"; break;
 | 
			
		||||
  case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
 | 
			
		||||
    p = "over thread pass limit count"; break;
 | 
			
		||||
 | 
			
		||||
  default:
 | 
			
		||||
    p = "undefined error code"; break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return p;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
 | 
			
		||||
#define MAX_ERROR_PAR_LEN   30
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
#ifdef HAVE_STDARG_PROTOTYPES
 | 
			
		||||
onig_error_code_to_str(UChar* s, int code, ...)
 | 
			
		||||
#else
 | 
			
		||||
onig_error_code_to_str(s, code, va_alist)
 | 
			
		||||
  UChar* s;
 | 
			
		||||
  int code;
 | 
			
		||||
  va_dcl 
 | 
			
		||||
#endif
 | 
			
		||||
{
 | 
			
		||||
  UChar *p, *q;
 | 
			
		||||
  OnigErrorInfo* einfo;
 | 
			
		||||
  int len;
 | 
			
		||||
  va_list vargs;
 | 
			
		||||
 | 
			
		||||
  va_init_list(vargs, code);
 | 
			
		||||
 | 
			
		||||
  switch (code) {
 | 
			
		||||
  case ONIGERR_UNDEFINED_NAME_REFERENCE:
 | 
			
		||||
  case ONIGERR_UNDEFINED_GROUP_REFERENCE:
 | 
			
		||||
  case ONIGERR_MULTIPLEX_DEFINED_NAME:
 | 
			
		||||
  case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
 | 
			
		||||
  case ONIGERR_INVALID_GROUP_NAME:
 | 
			
		||||
  case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
 | 
			
		||||
    einfo = va_arg(vargs, OnigErrorInfo*);
 | 
			
		||||
    len = einfo->par_end - einfo->par;
 | 
			
		||||
    q = onig_error_code_to_format(code);
 | 
			
		||||
    p = s;
 | 
			
		||||
    while (*q != '\0') {
 | 
			
		||||
      if (*q == '%') {
 | 
			
		||||
	q++;
 | 
			
		||||
	if (*q == 'n') { /* '%n': name */
 | 
			
		||||
	  if (len > MAX_ERROR_PAR_LEN) {
 | 
			
		||||
	    xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3);
 | 
			
		||||
	    p += (MAX_ERROR_PAR_LEN - 3);
 | 
			
		||||
	    xmemcpy(p, "...", 3);
 | 
			
		||||
	    p += 3;
 | 
			
		||||
	  }
 | 
			
		||||
	  else {
 | 
			
		||||
	    xmemcpy(p, einfo->par, len);
 | 
			
		||||
	    p += len;
 | 
			
		||||
	  }
 | 
			
		||||
	  q++;
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
	  goto normal_char;
 | 
			
		||||
      }
 | 
			
		||||
      else {
 | 
			
		||||
      normal_char:
 | 
			
		||||
	*p++ = *q++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    *p = '\0';
 | 
			
		||||
    len = p - s;
 | 
			
		||||
    break;
 | 
			
		||||
 | 
			
		||||
  default:
 | 
			
		||||
    q = onig_error_code_to_format(code);
 | 
			
		||||
    len = strlen(q);
 | 
			
		||||
    xmemcpy(s, q, len);
 | 
			
		||||
    s[len] = '\0';
 | 
			
		||||
    break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  va_end(vargs);
 | 
			
		||||
  return len;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
#ifdef HAVE_STDARG_PROTOTYPES
 | 
			
		||||
onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
 | 
			
		||||
			    char* pat, char* pat_end, char *fmt, ...)
 | 
			
		||||
#else
 | 
			
		||||
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
 | 
			
		||||
    char buf[];
 | 
			
		||||
    int bufsize;
 | 
			
		||||
    OnigEncoding enc;
 | 
			
		||||
    char* pat;
 | 
			
		||||
    char* pat_end;
 | 
			
		||||
    const char *fmt;
 | 
			
		||||
    va_dcl
 | 
			
		||||
#endif
 | 
			
		||||
{
 | 
			
		||||
  int n, need, len;
 | 
			
		||||
  UChar *p, *s;
 | 
			
		||||
  va_list args;
 | 
			
		||||
 | 
			
		||||
  va_init_list(args, fmt);
 | 
			
		||||
  n = vsnprintf(buf, bufsize, fmt, args);
 | 
			
		||||
  va_end(args);
 | 
			
		||||
 | 
			
		||||
  need = (pat_end - pat) * 4 + 4;
 | 
			
		||||
 | 
			
		||||
  if (n + need < bufsize) {
 | 
			
		||||
    strcat(buf, ": /");
 | 
			
		||||
    s = buf + strlen(buf);
 | 
			
		||||
 | 
			
		||||
    p = pat;
 | 
			
		||||
    while (p < (UChar* )pat_end) {
 | 
			
		||||
      if (*p == MC_ESC) {
 | 
			
		||||
	*s++ = *p++;
 | 
			
		||||
	len = enc_len(enc, *p);
 | 
			
		||||
	while (len-- > 0) *s++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
      else if (*p == '/') {
 | 
			
		||||
	*s++ = MC_ESC;
 | 
			
		||||
	*s++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
      else if (ONIGENC_IS_MBC_HEAD(enc, *p)) {
 | 
			
		||||
	len = enc_len(enc, *p);
 | 
			
		||||
	while (len-- > 0) *s++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
      else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
 | 
			
		||||
	       !ONIGENC_IS_CODE_SPACE(enc, *p)) {
 | 
			
		||||
	char b[5];
 | 
			
		||||
	sprintf(b, "\\%03o", *p & 0377);
 | 
			
		||||
	len = strlen(b);
 | 
			
		||||
	while (len-- > 0) *s++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
      else {
 | 
			
		||||
	*s++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    *s++ = '/';
 | 
			
		||||
    *s   = '\0';
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										256
									
								
								reggnu.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										256
									
								
								reggnu.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,256 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  reggnu.c -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2002-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#include "regint.h"
 | 
			
		||||
 | 
			
		||||
#ifndef ONIGGNU_H     /* name changes from oniggnu.h to regex.h in ruby. */
 | 
			
		||||
#include "oniggnu.h"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(RUBY_PLATFORM) || defined(RUBY)
 | 
			
		||||
#ifndef ONIG_RUBY_M17N
 | 
			
		||||
#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef NULL
 | 
			
		||||
#define NULL   ((void* )0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
extern void
 | 
			
		||||
re_free_registers(OnigRegion* r)
 | 
			
		||||
{
 | 
			
		||||
  /* 0: don't free self */
 | 
			
		||||
  onig_region_free(r, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
re_adjust_startpos(regex_t* reg, const char* string, int size,
 | 
			
		||||
		   int startpos, int range)
 | 
			
		||||
{
 | 
			
		||||
  if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
 | 
			
		||||
    UChar *p;
 | 
			
		||||
    UChar *s = (UChar* )string + startpos;
 | 
			
		||||
 | 
			
		||||
    if (range > 0) {
 | 
			
		||||
      p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
      p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
 | 
			
		||||
    }
 | 
			
		||||
    return p - (UChar* )string;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return startpos;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
re_match(regex_t* reg, const char* str, int size, int pos,
 | 
			
		||||
	 struct re_registers* regs)
 | 
			
		||||
{
 | 
			
		||||
  return onig_match(reg, (UChar* )str, (UChar* )(str + size),
 | 
			
		||||
		    (UChar* )(str + pos), regs, ONIG_OPTION_NONE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
 | 
			
		||||
	  struct re_registers* regs)
 | 
			
		||||
{
 | 
			
		||||
  return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
 | 
			
		||||
		     (UChar* )(string + startpos),
 | 
			
		||||
		     (UChar* )(string + startpos + range),
 | 
			
		||||
		     regs, ONIG_OPTION_NONE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
 | 
			
		||||
{
 | 
			
		||||
  int r;
 | 
			
		||||
  OnigErrorInfo einfo;
 | 
			
		||||
 | 
			
		||||
  r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
 | 
			
		||||
  if (r != 0) {
 | 
			
		||||
    if (IS_NOT_NULL(ebuf))
 | 
			
		||||
      (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
 | 
			
		||||
{
 | 
			
		||||
  int r;
 | 
			
		||||
  OnigErrorInfo einfo;
 | 
			
		||||
  OnigEncoding enc;
 | 
			
		||||
 | 
			
		||||
  /* I think encoding and options should be arguments of this function.
 | 
			
		||||
     But this is adapted to present re.c. (2002/11/29)
 | 
			
		||||
   */
 | 
			
		||||
  enc = OnigEncDefaultCharEncoding;
 | 
			
		||||
 | 
			
		||||
  r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
 | 
			
		||||
		     reg->options, enc, OnigDefaultSyntax, &einfo);
 | 
			
		||||
  if (r != 0) {
 | 
			
		||||
    if (IS_NOT_NULL(ebuf))
 | 
			
		||||
      (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
 | 
			
		||||
  }
 | 
			
		||||
  return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern void
 | 
			
		||||
re_free_pattern(regex_t* reg)
 | 
			
		||||
{
 | 
			
		||||
  onig_free(reg);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern int
 | 
			
		||||
re_alloc_pattern(regex_t** reg)
 | 
			
		||||
{
 | 
			
		||||
  return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
 | 
			
		||||
			 OnigDefaultSyntax);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern void
 | 
			
		||||
re_set_casetable(const char* table)
 | 
			
		||||
{
 | 
			
		||||
  onigenc_set_default_caseconv_table((UChar* )table);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
 | 
			
		||||
static const unsigned char mbctab_ascii[] = {
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const unsigned char mbctab_euc[] = {
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const unsigned char mbctab_sjis[] = {
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static const unsigned char mbctab_utf8[] = {
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
  3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const unsigned char *re_mbctab = mbctab_ascii;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
extern void
 | 
			
		||||
#ifdef ONIG_RUBY_M17N
 | 
			
		||||
re_mbcinit(OnigEncoding enc)
 | 
			
		||||
#else
 | 
			
		||||
re_mbcinit(int mb_code)
 | 
			
		||||
#endif
 | 
			
		||||
{
 | 
			
		||||
#ifdef ONIG_RUBY_M17N
 | 
			
		||||
 | 
			
		||||
  onigenc_set_default_encoding(enc);
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
  OnigEncoding enc;
 | 
			
		||||
 | 
			
		||||
  switch (mb_code) {
 | 
			
		||||
  case MBCTYPE_ASCII:
 | 
			
		||||
    enc = ONIG_ENCODING_ASCII;
 | 
			
		||||
    break;
 | 
			
		||||
  case MBCTYPE_EUC:
 | 
			
		||||
    enc = ONIG_ENCODING_EUC_JP;
 | 
			
		||||
    break;
 | 
			
		||||
  case MBCTYPE_SJIS:
 | 
			
		||||
    enc = ONIG_ENCODING_SJIS;
 | 
			
		||||
    break;
 | 
			
		||||
  case MBCTYPE_UTF8:
 | 
			
		||||
    enc = ONIG_ENCODING_UTF8;
 | 
			
		||||
    break;
 | 
			
		||||
  default:
 | 
			
		||||
    return ;
 | 
			
		||||
    break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  onigenc_set_default_encoding(enc);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
 | 
			
		||||
  switch (mb_code) {
 | 
			
		||||
  case MBCTYPE_ASCII:
 | 
			
		||||
    re_mbctab = mbctab_ascii;
 | 
			
		||||
    break;
 | 
			
		||||
  case MBCTYPE_EUC:
 | 
			
		||||
    re_mbctab = mbctab_euc;
 | 
			
		||||
    break;
 | 
			
		||||
  case MBCTYPE_SJIS:
 | 
			
		||||
    re_mbctab = mbctab_sjis;
 | 
			
		||||
    break;
 | 
			
		||||
  case MBCTYPE_UTF8:
 | 
			
		||||
    re_mbctab = mbctab_utf8;
 | 
			
		||||
    break;
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										685
									
								
								regint.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										685
									
								
								regint.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,685 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  regint.h -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2002-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#ifndef REGINT_H
 | 
			
		||||
#define REGINT_H
 | 
			
		||||
 | 
			
		||||
/* for debug */
 | 
			
		||||
/* #define ONIG_DEBUG_PARSE_TREE */
 | 
			
		||||
/* #define ONIG_DEBUG_COMPILE */
 | 
			
		||||
/* #define ONIG_DEBUG_SEARCH */
 | 
			
		||||
/* #define ONIG_DEBUG_MATCH */
 | 
			
		||||
/* #define ONIG_DONT_OPTIMIZE */
 | 
			
		||||
 | 
			
		||||
/* for byte-code statistical data. */
 | 
			
		||||
/* #define ONIG_DEBUG_STATISTICS */
 | 
			
		||||
 | 
			
		||||
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
 | 
			
		||||
    defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
 | 
			
		||||
#ifndef ONIG_DEBUG
 | 
			
		||||
#define ONIG_DEBUG
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
 | 
			
		||||
    (defined(__ppc__) && defined(__APPLE__)) || \
 | 
			
		||||
    defined(__x86_64) || defined(__x86_64__) || \
 | 
			
		||||
    defined(__mc68020__)
 | 
			
		||||
#define PLATFORM_UNALIGNED_WORD_ACCESS
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* config */
 | 
			
		||||
/* spec. config */
 | 
			
		||||
#define USE_NAMED_GROUP
 | 
			
		||||
#define USE_SUBEXP_CALL
 | 
			
		||||
#define USE_FOLD_MATCH                                  /* ess-tsett etc... */
 | 
			
		||||
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
 | 
			
		||||
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE     /* /\n$/ =~ "\n" */
 | 
			
		||||
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
 | 
			
		||||
/* internal config */
 | 
			
		||||
#define USE_RECYCLE_NODE
 | 
			
		||||
#define USE_OP_PUSH_OR_JUMP_EXACT
 | 
			
		||||
#define USE_QUALIFIER_PEEK_NEXT
 | 
			
		||||
 | 
			
		||||
#define INIT_MATCH_STACK_SIZE                     160
 | 
			
		||||
#define MATCH_STACK_LIMIT_SIZE                 500000
 | 
			
		||||
 | 
			
		||||
/* interface to external system */
 | 
			
		||||
#ifdef NOT_RUBY      /* gived from Makefile */
 | 
			
		||||
#include "config.h"
 | 
			
		||||
#define USE_VARIABLE_META_CHARS
 | 
			
		||||
#define USE_VARIABLE_SYNTAX
 | 
			
		||||
#define USE_WORD_BEGIN_END          /* "\<": word-begin, "\>": word-end */
 | 
			
		||||
#define USE_POSIX_REGION_OPTION     /* needed for POSIX API support */
 | 
			
		||||
#define THREAD_ATOMIC_START         /* depend on thread system */
 | 
			
		||||
#define THREAD_ATOMIC_END           /* depend on thread system */
 | 
			
		||||
#define THREAD_PASS                 /* depend on thread system */
 | 
			
		||||
#define xmalloc     malloc
 | 
			
		||||
#define xrealloc    realloc
 | 
			
		||||
#define xfree       free
 | 
			
		||||
#else
 | 
			
		||||
#include "ruby.h"
 | 
			
		||||
#include "version.h"
 | 
			
		||||
#include "rubysig.h"      /* for DEFER_INTS, ENABLE_INTS */
 | 
			
		||||
#define THREAD_ATOMIC_START          DEFER_INTS
 | 
			
		||||
#define THREAD_ATOMIC_END            ENABLE_INTS
 | 
			
		||||
#define THREAD_PASS                  rb_thread_schedule()
 | 
			
		||||
#define DEFAULT_WARN_FUNCTION        rb_warn
 | 
			
		||||
#define DEFAULT_VERB_WARN_FUNCTION   rb_warning
 | 
			
		||||
 | 
			
		||||
#if defined(RUBY_VERSION_MAJOR)
 | 
			
		||||
#if RUBY_VERSION_MAJOR > 1 || \
 | 
			
		||||
(RUBY_VERSION_MAJOR == 1 && \
 | 
			
		||||
 defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
 | 
			
		||||
#define USE_ST_HASH_TABLE
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif /* else NOT_RUBY */
 | 
			
		||||
 | 
			
		||||
#define THREAD_PASS_LIMIT_COUNT    10
 | 
			
		||||
#define xmemset     memset
 | 
			
		||||
#define xmemcpy     memcpy
 | 
			
		||||
#define xmemmove    memmove
 | 
			
		||||
#if defined(_WIN32) && !defined(__CYGWIN__)
 | 
			
		||||
#define xalloca     _alloca
 | 
			
		||||
#ifdef NOT_RUBY
 | 
			
		||||
#define vsnprintf   _vsnprintf
 | 
			
		||||
#endif
 | 
			
		||||
#else
 | 
			
		||||
#define xalloca     alloca
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_STDLIB_H
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__)
 | 
			
		||||
#include <alloca.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_STRING_H
 | 
			
		||||
# include <string.h>
 | 
			
		||||
#else
 | 
			
		||||
# include <strings.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
#include <sys/types.h>
 | 
			
		||||
 | 
			
		||||
#ifdef ONIG_DEBUG
 | 
			
		||||
# include <stdio.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include "regenc.h"
 | 
			
		||||
#include "oniguruma.h"
 | 
			
		||||
 | 
			
		||||
#ifdef MIN
 | 
			
		||||
#undef MIN
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef MAX
 | 
			
		||||
#undef MAX
 | 
			
		||||
#endif
 | 
			
		||||
#define MIN(a,b) (((a)>(b))?(b):(a))
 | 
			
		||||
#define MAX(a,b) (((a)<(b))?(b):(a))
 | 
			
		||||
 | 
			
		||||
#define IS_NULL(p)                    (((void*)(p)) == (void*)0)
 | 
			
		||||
#define IS_NOT_NULL(p)                (((void*)(p)) != (void*)0)
 | 
			
		||||
#define CHECK_NULL_RETURN(p)          if (IS_NULL(p)) return NULL
 | 
			
		||||
#define CHECK_NULL_RETURN_VAL(p,val)  if (IS_NULL(p)) return (val)
 | 
			
		||||
#define NULL_UCHARP                   ((UChar* )0)
 | 
			
		||||
 | 
			
		||||
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
 | 
			
		||||
#define WORD_ALIGNMENT_SIZE       SIZEOF_INT
 | 
			
		||||
 | 
			
		||||
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
 | 
			
		||||
  (pad_size) = WORD_ALIGNMENT_SIZE \
 | 
			
		||||
               - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
 | 
			
		||||
  if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define ALIGNMENT_RIGHT(addr) do {\
 | 
			
		||||
  (addr) += (WORD_ALIGNMENT_SIZE - 1);\
 | 
			
		||||
  (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define B_SHIFT  8
 | 
			
		||||
#define B_MASK   0xff
 | 
			
		||||
 | 
			
		||||
#define SERIALIZE_2BYTE_INT(i,p) do {\
 | 
			
		||||
  *(p)     = ((i) >> B_SHIFT) & B_MASK;\
 | 
			
		||||
  *((p)+1) = (i) & B_MASK;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define SERIALIZE_4BYTE_INT(i,p) do {\
 | 
			
		||||
  *(p)     = ((i) >> B_SHIFT*3) & B_MASK;\
 | 
			
		||||
  *((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\
 | 
			
		||||
  *((p)+2) = ((i) >> B_SHIFT  ) & B_MASK;\
 | 
			
		||||
  *((p)+3) = (i) & B_MASK;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define SERIALIZE_8BYTE_INT(i,p) do {\
 | 
			
		||||
  *(p)     = ((i) >> B_SHIFT*7) & B_MASK;\
 | 
			
		||||
  *((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\
 | 
			
		||||
  *((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\
 | 
			
		||||
  *((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\
 | 
			
		||||
  *((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\
 | 
			
		||||
  *((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\
 | 
			
		||||
  *((p)+6) = ((i) >> B_SHIFT  ) & B_MASK;\
 | 
			
		||||
  *((p)+7) = (i) & B_MASK;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define GET_2BYTE_INT_INC(type,i,p) do {\
 | 
			
		||||
  (i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\
 | 
			
		||||
  (p) += 2;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define GET_4BYTE_INT_INC(type,i,p) do {\
 | 
			
		||||
  (i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \
 | 
			
		||||
		((unsigned int )((p)[1]) << B_SHIFT*2) | \
 | 
			
		||||
		((unsigned int )((p)[2]) << B_SHIFT  ) | \
 | 
			
		||||
		((unsigned int )((p)[3])             )); \
 | 
			
		||||
  (p) += 4;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define GET_8BYTE_INT_INC(type,i,p) do {\
 | 
			
		||||
  (i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \
 | 
			
		||||
		((unsigned long )((p)[1]) << B_SHIFT*6) | \
 | 
			
		||||
		((unsigned long )((p)[2]) << B_SHIFT*5) | \
 | 
			
		||||
		((unsigned long )((p)[3]) << B_SHIFT*4) | \
 | 
			
		||||
		((unsigned long )((p)[4]) << B_SHIFT*3) | \
 | 
			
		||||
		((unsigned long )((p)[5]) << B_SHIFT*2) | \
 | 
			
		||||
		((unsigned long )((p)[6]) << B_SHIFT  ) | \
 | 
			
		||||
		((unsigned long )((p)[7])             )); \
 | 
			
		||||
  (p) += 8;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#if SIZEOF_SHORT == 2
 | 
			
		||||
#define GET_SHORT_INC(i,p)     GET_2BYTE_INT_INC(short,i,p)
 | 
			
		||||
#define SERIALIZE_SHORT(i,p)   SERIALIZE_2BYTE_INT(i,p)
 | 
			
		||||
#elif SIZEOF_SHORT == 4
 | 
			
		||||
#define GET_SHORT_INC(i,p)     GET_4BYTE_INT_INC(short,i,p)
 | 
			
		||||
#define SERIALIZE_SHORT(i,p)   SERIALIZE_4BYTE_INT(i,p)
 | 
			
		||||
#elif SIZEOF_SHORT == 8
 | 
			
		||||
#define GET_SHORT_INC(i,p)     GET_8BYTE_INT_INC(short,i,p)
 | 
			
		||||
#define SERIALIZE_SHORT(i,p)   SERIALIZE_8BYTE_INT(i,p)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if SIZEOF_INT == 2
 | 
			
		||||
#define GET_INT_INC(i,p)       GET_2BYTE_INT_INC(int,i,p)
 | 
			
		||||
#define GET_UINT_INC(i,p)      GET_2BYTE_INT_INC(unsigned,i,p)
 | 
			
		||||
#define SERIALIZE_INT(i,p)     SERIALIZE_2BYTE_INT(i,p)
 | 
			
		||||
#define SERIALIZE_UINT(i,p)    SERIALIZE_2BYTE_INT(i,p)
 | 
			
		||||
#elif SIZEOF_INT == 4
 | 
			
		||||
#define GET_INT_INC(i,p)       GET_4BYTE_INT_INC(int,i,p)
 | 
			
		||||
#define GET_UINT_INC(i,p)      GET_4BYTE_INT_INC(unsigned,i,p)
 | 
			
		||||
#define SERIALIZE_INT(i,p)     SERIALIZE_4BYTE_INT(i,p)
 | 
			
		||||
#define SERIALIZE_UINT(i,p)    SERIALIZE_4BYTE_INT(i,p)
 | 
			
		||||
#elif SIZEOF_INT == 8
 | 
			
		||||
#define GET_INT_INC(i,p)       GET_8BYTE_INT_INC(int,i,p)
 | 
			
		||||
#define GET_UINT_INC(i,p)      GET_8BYTE_INT_INC(unsigned,i,p)
 | 
			
		||||
#define SERIALIZE_INT(i,p)     SERIALIZE_8BYTE_INT(i,p)
 | 
			
		||||
#define SERIALIZE_UINT(i,p)    SERIALIZE_8BYTE_INT(i,p)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
 | 
			
		||||
 | 
			
		||||
/* stack pop level */
 | 
			
		||||
#define STACK_POP_LEVEL_FREE        0
 | 
			
		||||
#define STACK_POP_LEVEL_MEM_START   1
 | 
			
		||||
#define STACK_POP_LEVEL_ALL         2
 | 
			
		||||
 | 
			
		||||
/* optimize flags */
 | 
			
		||||
#define ONIG_OPTIMIZE_NONE              0
 | 
			
		||||
#define ONIG_OPTIMIZE_EXACT             1   /* Slow Search */
 | 
			
		||||
#define ONIG_OPTIMIZE_EXACT_BM          2   /* Boyer Moore Search */
 | 
			
		||||
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV  3   /* BM   (but not simple match) */
 | 
			
		||||
#define ONIG_OPTIMIZE_EXACT_IC          4   /* Slow Search (ignore case) */
 | 
			
		||||
#define ONIG_OPTIMIZE_MAP               5   /* char map */
 | 
			
		||||
 | 
			
		||||
/* bit status */
 | 
			
		||||
typedef unsigned int  BitStatusType;
 | 
			
		||||
 | 
			
		||||
#define BIT_STATUS_BITS_NUM          (sizeof(BitStatusType) * 8)
 | 
			
		||||
#define BIT_STATUS_CLEAR(stats)      (stats) = 0
 | 
			
		||||
#define BIT_STATUS_ON_ALL(stats)     (stats) = ~((BitStatusType )0)
 | 
			
		||||
#define BIT_STATUS_AT(stats,n) \
 | 
			
		||||
  ((n) < BIT_STATUS_BITS_NUM  ?  ((stats) & (1 << n)) : ((stats) & 1))
 | 
			
		||||
 | 
			
		||||
#define BIT_STATUS_ON_AT(stats,n) do {\
 | 
			
		||||
  if ((n) < BIT_STATUS_BITS_NUM)\
 | 
			
		||||
    (stats) |= (1 << (n));\
 | 
			
		||||
  else\
 | 
			
		||||
    (stats) |= 1;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
 | 
			
		||||
  if ((n) < BIT_STATUS_BITS_NUM)\
 | 
			
		||||
    (stats) |= (1 << (n));\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define INT_MAX_LIMIT           ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
 | 
			
		||||
 | 
			
		||||
#define DIGITVAL(code)    ((code) - '0')
 | 
			
		||||
#define ODIGITVAL(code)   DIGITVAL(code)
 | 
			
		||||
#define XDIGITVAL(enc,code) \
 | 
			
		||||
  (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
 | 
			
		||||
   : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
 | 
			
		||||
 | 
			
		||||
#define IS_SINGLELINE(option)     ((option) & ONIG_OPTION_SINGLELINE)
 | 
			
		||||
#define IS_MULTILINE(option)      ((option) & ONIG_OPTION_MULTILINE)
 | 
			
		||||
#define IS_IGNORECASE(option)     ((option) & ONIG_OPTION_IGNORECASE)
 | 
			
		||||
#define IS_EXTEND(option)         ((option) & ONIG_OPTION_EXTEND)
 | 
			
		||||
#define IS_FIND_LONGEST(option)   ((option) & ONIG_OPTION_FIND_LONGEST)
 | 
			
		||||
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
 | 
			
		||||
#define IS_POSIXLINE(option)      (IS_SINGLELINE(option) && IS_MULTILINE(option))
 | 
			
		||||
#define IS_FIND_CONDITION(option) ((option) & \
 | 
			
		||||
          (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
 | 
			
		||||
#define IS_NOTBOL(option)         ((option) & ONIG_OPTION_NOTBOL)
 | 
			
		||||
#define IS_NOTEOL(option)         ((option) & ONIG_OPTION_NOTEOL)
 | 
			
		||||
#define IS_POSIX_REGION(option)   ((option) & ONIG_OPTION_POSIX_REGION)
 | 
			
		||||
 | 
			
		||||
/* OP_SET_OPTION is required for these options.
 | 
			
		||||
#define IS_DYNAMIC_OPTION(option) \
 | 
			
		||||
  (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
 | 
			
		||||
*/
 | 
			
		||||
/* ignore-case and multibyte status are included in compiled code. */
 | 
			
		||||
#define IS_DYNAMIC_OPTION(option)  0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* bitset */
 | 
			
		||||
#define BITS_PER_BYTE      8
 | 
			
		||||
#define SINGLE_BYTE_SIZE   (1 << BITS_PER_BYTE)
 | 
			
		||||
#define BITS_IN_ROOM       (sizeof(Bits) * BITS_PER_BYTE)
 | 
			
		||||
#define BITSET_SIZE        (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
 | 
			
		||||
 | 
			
		||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
 | 
			
		||||
typedef unsigned int   Bits;
 | 
			
		||||
#else
 | 
			
		||||
typedef unsigned char  Bits;
 | 
			
		||||
#endif
 | 
			
		||||
typedef Bits           BitSet[BITSET_SIZE];
 | 
			
		||||
typedef Bits*          BitSetRef;
 | 
			
		||||
 | 
			
		||||
#define SIZE_BITSET        sizeof(BitSet)
 | 
			
		||||
 | 
			
		||||
#define BITSET_CLEAR(bs) do {\
 | 
			
		||||
  int i;\
 | 
			
		||||
  for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BS_ROOM(bs,pos)            (bs)[pos / BITS_IN_ROOM]
 | 
			
		||||
#define BS_BIT(pos)                (1 << (pos % BITS_IN_ROOM))
 | 
			
		||||
 | 
			
		||||
#define BITSET_AT(bs, pos)         (BS_ROOM(bs,pos) & BS_BIT(pos))
 | 
			
		||||
#define BITSET_SET_BIT(bs, pos)     BS_ROOM(bs,pos) |= BS_BIT(pos)
 | 
			
		||||
#define BITSET_CLEAR_BIT(bs, pos)   BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
 | 
			
		||||
#define BITSET_INVERT_BIT(bs, pos)  BS_ROOM(bs,pos) ^= BS_BIT(pos)
 | 
			
		||||
 | 
			
		||||
/* bytes buffer */
 | 
			
		||||
typedef struct _BBuf {
 | 
			
		||||
  UChar* p;
 | 
			
		||||
  unsigned int used;
 | 
			
		||||
  unsigned int alloc;
 | 
			
		||||
} BBuf;
 | 
			
		||||
 | 
			
		||||
#define BBUF_INIT(buf,size)    onig_bbuf_init((BBuf* )(buf), (size))
 | 
			
		||||
 | 
			
		||||
#define BBUF_SIZE_INC(buf,inc) do{\
 | 
			
		||||
  (buf)->alloc += (inc);\
 | 
			
		||||
  (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
 | 
			
		||||
  if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BBUF_EXPAND(buf,low) do{\
 | 
			
		||||
  do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
 | 
			
		||||
  (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
 | 
			
		||||
  if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BBUF_ENSURE_SIZE(buf,size) do{\
 | 
			
		||||
  unsigned int new_alloc = (buf)->alloc;\
 | 
			
		||||
  while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
 | 
			
		||||
  if ((buf)->alloc != new_alloc) {\
 | 
			
		||||
    (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
 | 
			
		||||
    if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
 | 
			
		||||
    (buf)->alloc = new_alloc;\
 | 
			
		||||
  }\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BBUF_WRITE(buf,pos,bytes,n) do{\
 | 
			
		||||
  int used = (pos) + (n);\
 | 
			
		||||
  if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
 | 
			
		||||
  xmemcpy((buf)->p + (pos), (bytes), (n));\
 | 
			
		||||
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BBUF_WRITE1(buf,pos,byte) do{\
 | 
			
		||||
  int used = (pos) + 1;\
 | 
			
		||||
  if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
 | 
			
		||||
  (buf)->p[(pos)] = (byte);\
 | 
			
		||||
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BBUF_ADD(buf,bytes,n)       BBUF_WRITE((buf),(buf)->used,(bytes),(n))
 | 
			
		||||
#define BBUF_ADD1(buf,byte)         BBUF_WRITE1((buf),(buf)->used,(byte))
 | 
			
		||||
#define BBUF_GET_ADD_ADDRESS(buf)   ((buf)->p + (buf)->used)
 | 
			
		||||
#define BBUF_GET_OFFSET_POS(buf)    ((buf)->used)
 | 
			
		||||
 | 
			
		||||
/* from < to */
 | 
			
		||||
#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
 | 
			
		||||
  if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
 | 
			
		||||
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
 | 
			
		||||
  if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
/* from > to */
 | 
			
		||||
#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
 | 
			
		||||
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
/* from > to */
 | 
			
		||||
#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
 | 
			
		||||
  xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
 | 
			
		||||
  (buf)->used -= (from - to);\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BBUF_INSERT(buf,pos,bytes,n) do {\
 | 
			
		||||
  if (pos >= (buf)->used) {\
 | 
			
		||||
    BBUF_WRITE(buf,pos,bytes,n);\
 | 
			
		||||
  }\
 | 
			
		||||
  else {\
 | 
			
		||||
    BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
 | 
			
		||||
    xmemcpy((buf)->p + (pos), (bytes), (n));\
 | 
			
		||||
  }\
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define ANCHOR_BEGIN_BUF        (1<<0)
 | 
			
		||||
#define ANCHOR_BEGIN_LINE       (1<<1)
 | 
			
		||||
#define ANCHOR_BEGIN_POSITION   (1<<2)
 | 
			
		||||
#define ANCHOR_END_BUF          (1<<3)
 | 
			
		||||
#define ANCHOR_SEMI_END_BUF     (1<<4)
 | 
			
		||||
#define ANCHOR_END_LINE         (1<<5)
 | 
			
		||||
 | 
			
		||||
#define ANCHOR_WORD_BOUND       (1<<6)
 | 
			
		||||
#define ANCHOR_NOT_WORD_BOUND   (1<<7)
 | 
			
		||||
#define ANCHOR_WORD_BEGIN       (1<<8)
 | 
			
		||||
#define ANCHOR_WORD_END         (1<<9)
 | 
			
		||||
#define ANCHOR_PREC_READ        (1<<10)
 | 
			
		||||
#define ANCHOR_PREC_READ_NOT    (1<<11)
 | 
			
		||||
#define ANCHOR_LOOK_BEHIND      (1<<12)
 | 
			
		||||
#define ANCHOR_LOOK_BEHIND_NOT  (1<<13)
 | 
			
		||||
 | 
			
		||||
#define ANCHOR_ANYCHAR_STAR     (1<<14)   /* ".*" optimize info */
 | 
			
		||||
#define ANCHOR_ANYCHAR_STAR_PL  (1<<15)   /* ".*" optimize info (posix-line) */
 | 
			
		||||
 | 
			
		||||
/* operation code */
 | 
			
		||||
enum OpCode {
 | 
			
		||||
  OP_FINISH = 0,        /* matching process terminator (no more alternative) */
 | 
			
		||||
  OP_END    = 1,        /* pattern code terminator (success end) */
 | 
			
		||||
 | 
			
		||||
  OP_EXACT1 = 2,        /* single byte, N = 1 */
 | 
			
		||||
  OP_EXACT2,            /* single byte, N = 2 */
 | 
			
		||||
  OP_EXACT3,            /* single byte, N = 3 */
 | 
			
		||||
  OP_EXACT4,            /* single byte, N = 4 */
 | 
			
		||||
  OP_EXACT5,            /* single byte, N = 5 */
 | 
			
		||||
  OP_EXACTN,            /* single byte */
 | 
			
		||||
  OP_EXACTMB2N1,        /* mb-length = 2 N = 1 */
 | 
			
		||||
  OP_EXACTMB2N2,        /* mb-length = 2 N = 2 */
 | 
			
		||||
  OP_EXACTMB2N3,        /* mb-length = 2 N = 3 */
 | 
			
		||||
  OP_EXACTMB2N,         /* mb-length = 2 */
 | 
			
		||||
  OP_EXACTMB3N,         /* mb-length = 3 */
 | 
			
		||||
  OP_EXACTMBN,          /* other length */
 | 
			
		||||
 | 
			
		||||
  OP_EXACT1_IC,         /* single byte, N = 1, ignore case */
 | 
			
		||||
  OP_EXACTN_IC,         /* single byte,        ignore case */
 | 
			
		||||
 | 
			
		||||
  OP_CCLASS,
 | 
			
		||||
  OP_CCLASS_MB,
 | 
			
		||||
  OP_CCLASS_MIX,
 | 
			
		||||
  OP_CCLASS_NOT,
 | 
			
		||||
  OP_CCLASS_MB_NOT,
 | 
			
		||||
  OP_CCLASS_MIX_NOT,
 | 
			
		||||
 | 
			
		||||
  OP_ANYCHAR,                 /* "."  */
 | 
			
		||||
  OP_ANYCHAR_ML,              /* "."  multi-line */
 | 
			
		||||
  OP_ANYCHAR_STAR,            /* ".*" */
 | 
			
		||||
  OP_ANYCHAR_ML_STAR,         /* ".*" multi-line */
 | 
			
		||||
  OP_ANYCHAR_STAR_PEEK_NEXT,
 | 
			
		||||
  OP_ANYCHAR_ML_STAR_PEEK_NEXT,
 | 
			
		||||
 | 
			
		||||
  OP_WORD,
 | 
			
		||||
  OP_NOT_WORD,
 | 
			
		||||
  OP_WORD_SB,
 | 
			
		||||
  OP_WORD_MB,
 | 
			
		||||
  OP_WORD_BOUND,
 | 
			
		||||
  OP_NOT_WORD_BOUND,
 | 
			
		||||
  OP_WORD_BEGIN,
 | 
			
		||||
  OP_WORD_END,
 | 
			
		||||
 | 
			
		||||
  OP_BEGIN_BUF,
 | 
			
		||||
  OP_END_BUF,
 | 
			
		||||
  OP_BEGIN_LINE,
 | 
			
		||||
  OP_END_LINE,
 | 
			
		||||
  OP_SEMI_END_BUF,
 | 
			
		||||
  OP_BEGIN_POSITION,
 | 
			
		||||
 | 
			
		||||
  OP_BACKREF1,
 | 
			
		||||
  OP_BACKREF2,
 | 
			
		||||
  OP_BACKREF3,
 | 
			
		||||
  OP_BACKREFN,
 | 
			
		||||
  OP_BACKREFN_IC,
 | 
			
		||||
  OP_BACKREF_MULTI,
 | 
			
		||||
  OP_BACKREF_MULTI_IC,
 | 
			
		||||
 | 
			
		||||
  OP_MEMORY_START,
 | 
			
		||||
  OP_MEMORY_START_PUSH,   /* push back-tracker to stack */
 | 
			
		||||
  OP_MEMORY_END_PUSH,     /* push back-tracker to stack */
 | 
			
		||||
  OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
 | 
			
		||||
  OP_MEMORY_END,
 | 
			
		||||
  OP_MEMORY_END_REC,      /* push marker to stack */
 | 
			
		||||
 | 
			
		||||
  OP_SET_OPTION_PUSH,    /* set option and push recover option */
 | 
			
		||||
  OP_SET_OPTION,         /* set option */
 | 
			
		||||
 | 
			
		||||
  OP_FAIL,               /* pop stack and move */
 | 
			
		||||
  OP_JUMP,
 | 
			
		||||
  OP_PUSH,
 | 
			
		||||
  OP_POP,
 | 
			
		||||
  OP_PUSH_OR_JUMP_EXACT1,  /* if match exact then push, else jump. */
 | 
			
		||||
  OP_PUSH_IF_PEEK_NEXT,    /* if match exact then push, else none. */
 | 
			
		||||
  OP_REPEAT,               /* {n,m} */
 | 
			
		||||
  OP_REPEAT_NG,            /* {n,m}? (non greedy) */
 | 
			
		||||
  OP_REPEAT_INC,
 | 
			
		||||
  OP_REPEAT_INC_NG,        /* non greedy */
 | 
			
		||||
  OP_NULL_CHECK_START,     /* null loop checker start */
 | 
			
		||||
  OP_NULL_CHECK_END,       /* null loop checker end   */
 | 
			
		||||
  OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
 | 
			
		||||
  OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
 | 
			
		||||
 | 
			
		||||
  OP_PUSH_POS,             /* (?=...)  start */
 | 
			
		||||
  OP_POP_POS,              /* (?=...)  end   */
 | 
			
		||||
  OP_PUSH_POS_NOT,         /* (?!...)  start */
 | 
			
		||||
  OP_FAIL_POS,             /* (?!...)  end   */
 | 
			
		||||
  OP_PUSH_STOP_BT,         /* (?>...)  start */
 | 
			
		||||
  OP_POP_STOP_BT,          /* (?>...)  end   */
 | 
			
		||||
  OP_LOOK_BEHIND,          /* (?<=...) start (no needs end opcode) */
 | 
			
		||||
  OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
 | 
			
		||||
  OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end   */
 | 
			
		||||
 | 
			
		||||
  OP_CALL,                 /* \g<name> */
 | 
			
		||||
  OP_RETURN
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* arguments type */
 | 
			
		||||
#define ARG_SPECIAL     -1
 | 
			
		||||
#define ARG_NON          0
 | 
			
		||||
#define ARG_RELADDR      1
 | 
			
		||||
#define ARG_ABSADDR      2
 | 
			
		||||
#define ARG_LENGTH       3
 | 
			
		||||
#define ARG_MEMNUM       4
 | 
			
		||||
#define ARG_OPTION       5
 | 
			
		||||
 | 
			
		||||
typedef short int   RelAddrType;
 | 
			
		||||
typedef short int   AbsAddrType;
 | 
			
		||||
typedef short int   LengthType;
 | 
			
		||||
typedef short int   MemNumType;
 | 
			
		||||
typedef int         RepeatNumType;
 | 
			
		||||
 | 
			
		||||
#define SIZE_OPCODE        1
 | 
			
		||||
#define SIZE_RELADDR       sizeof(RelAddrType)
 | 
			
		||||
#define SIZE_ABSADDR       sizeof(AbsAddrType)
 | 
			
		||||
#define SIZE_LENGTH        sizeof(LengthType)
 | 
			
		||||
#define SIZE_MEMNUM        sizeof(MemNumType)
 | 
			
		||||
#define SIZE_REPEATNUM     sizeof(RepeatNumType)
 | 
			
		||||
#define SIZE_OPTION        sizeof(OnigOptionType)
 | 
			
		||||
#define SIZE_CODE_POINT    sizeof(OnigCodePoint)
 | 
			
		||||
 | 
			
		||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
 | 
			
		||||
#define GET_RELADDR_INC(addr,p) do{\
 | 
			
		||||
  addr = *((RelAddrType* )(p));\
 | 
			
		||||
  (p) += SIZE_RELADDR;\
 | 
			
		||||
} while(0)
 | 
			
		||||
 | 
			
		||||
#define GET_ABSADDR_INC(addr,p) do{\
 | 
			
		||||
  addr = *((AbsAddrType* )(p));\
 | 
			
		||||
  (p) += SIZE_ABSADDR;\
 | 
			
		||||
} while(0)
 | 
			
		||||
 | 
			
		||||
#define GET_LENGTH_INC(len,p) do{\
 | 
			
		||||
  len = *((LengthType* )(p));\
 | 
			
		||||
  (p) += SIZE_LENGTH;\
 | 
			
		||||
} while(0)
 | 
			
		||||
 | 
			
		||||
#define GET_MEMNUM_INC(num,p) do{\
 | 
			
		||||
  num = *((MemNumType* )(p));\
 | 
			
		||||
  (p) += SIZE_MEMNUM;\
 | 
			
		||||
} while(0)
 | 
			
		||||
 | 
			
		||||
#define GET_REPEATNUM_INC(num,p) do{\
 | 
			
		||||
  num = *((RepeatNumType* )(p));\
 | 
			
		||||
  (p) += SIZE_REPEATNUM;\
 | 
			
		||||
} while(0)
 | 
			
		||||
 | 
			
		||||
#define GET_OPTION_INC(option,p) do{\
 | 
			
		||||
  option = *((OnigOptionType* )(p));\
 | 
			
		||||
  (p) += SIZE_OPTION;\
 | 
			
		||||
} while(0)
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#define GET_RELADDR_INC(addr,p)      GET_SHORT_INC(addr,p)
 | 
			
		||||
#define GET_ABSADDR_INC(addr,p)      GET_SHORT_INC(addr,p)
 | 
			
		||||
#define GET_LENGTH_INC(len,p)        GET_SHORT_INC(len,p)
 | 
			
		||||
#define GET_MEMNUM_INC(num,p)        GET_SHORT_INC(num,p)
 | 
			
		||||
#define GET_REPEATNUM_INC(num,p)     GET_INT_INC(num,p)
 | 
			
		||||
#define GET_OPTION_INC(option,p)     GET_UINT_INC(option,p)
 | 
			
		||||
 | 
			
		||||
#define SERIALIZE_RELADDR(addr,p)    SERIALIZE_SHORT(addr,p)
 | 
			
		||||
#define SERIALIZE_ABSADDR(addr,p)    SERIALIZE_SHORT(addr,p)
 | 
			
		||||
#define SERIALIZE_LENGTH(len,p)      SERIALIZE_SHORT(len,p)
 | 
			
		||||
#define SERIALIZE_MEMNUM(num,p)      SERIALIZE_SHORT(num,p)
 | 
			
		||||
#define SERIALIZE_REPEATNUM(num,p)   SERIALIZE_INT(num,p)
 | 
			
		||||
#define SERIALIZE_OPTION(option,p)   SERIALIZE_UINT(option,p)
 | 
			
		||||
 | 
			
		||||
#define SERIALIZE_BUFSIZE            SIZEOF_INT
 | 
			
		||||
 | 
			
		||||
#endif  /* PLATFORM_UNALIGNED_WORD_ACCESS */
 | 
			
		||||
 | 
			
		||||
/* code point's address must be aligned address. */
 | 
			
		||||
#define GET_CODE_POINT(code,p)   code = *((OnigCodePoint* )(p))
 | 
			
		||||
#define GET_BYTE_INC(byte,p) do{\
 | 
			
		||||
  byte = *(p);\
 | 
			
		||||
  (p)++;\
 | 
			
		||||
} while(0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/* op-code + arg size */
 | 
			
		||||
#define SIZE_OP_ANYCHAR_STAR            SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
 | 
			
		||||
#define SIZE_OP_JUMP                   (SIZE_OPCODE + SIZE_RELADDR)
 | 
			
		||||
#define SIZE_OP_PUSH                   (SIZE_OPCODE + SIZE_RELADDR)
 | 
			
		||||
#define SIZE_OP_POP                     SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_PUSH_OR_JUMP_EXACT1    (SIZE_OPCODE + SIZE_RELADDR + 1)
 | 
			
		||||
#define SIZE_OP_PUSH_IF_PEEK_NEXT      (SIZE_OPCODE + SIZE_RELADDR + 1)
 | 
			
		||||
#define SIZE_OP_REPEAT_INC             (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_REPEAT_INC_NG          (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_PUSH_POS                SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_PUSH_POS_NOT           (SIZE_OPCODE + SIZE_RELADDR)
 | 
			
		||||
#define SIZE_OP_POP_POS                 SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_FAIL_POS                SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_SET_OPTION             (SIZE_OPCODE + SIZE_OPTION)
 | 
			
		||||
#define SIZE_OP_SET_OPTION_PUSH        (SIZE_OPCODE + SIZE_OPTION)
 | 
			
		||||
#define SIZE_OP_FAIL                    SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_MEMORY_START           (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_MEMORY_START_PUSH      (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_MEMORY_END_PUSH        (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_MEMORY_END_PUSH_REC    (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_MEMORY_END             (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_MEMORY_END_REC         (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_PUSH_STOP_BT            SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_POP_STOP_BT             SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_NULL_CHECK_START       (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_NULL_CHECK_END         (SIZE_OPCODE + SIZE_MEMNUM)
 | 
			
		||||
#define SIZE_OP_LOOK_BEHIND            (SIZE_OPCODE + SIZE_LENGTH)
 | 
			
		||||
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT   (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
 | 
			
		||||
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT    SIZE_OPCODE
 | 
			
		||||
#define SIZE_OP_CALL                   (SIZE_OPCODE + SIZE_ABSADDR)
 | 
			
		||||
#define SIZE_OP_RETURN                  SIZE_OPCODE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  UChar esc;
 | 
			
		||||
  UChar anychar;
 | 
			
		||||
  UChar anytime;
 | 
			
		||||
  UChar zero_or_one_time;
 | 
			
		||||
  UChar one_or_more_time;
 | 
			
		||||
  UChar anychar_anytime;
 | 
			
		||||
} OnigMetaCharTableType;
 | 
			
		||||
 | 
			
		||||
extern OnigMetaCharTableType OnigMetaCharTable;
 | 
			
		||||
 | 
			
		||||
#define MC_ESC               OnigMetaCharTable.esc
 | 
			
		||||
#define MC_ANYCHAR           OnigMetaCharTable.anychar
 | 
			
		||||
#define MC_ANYTIME           OnigMetaCharTable.anytime
 | 
			
		||||
#define MC_ZERO_OR_ONE_TIME  OnigMetaCharTable.zero_or_one_time
 | 
			
		||||
#define MC_ONE_OR_MORE_TIME  OnigMetaCharTable.one_or_more_time
 | 
			
		||||
#define MC_ANYCHAR_ANYTIME   OnigMetaCharTable.anychar_anytime
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef ONIG_DEBUG
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  short int opcode;
 | 
			
		||||
  char*     name;
 | 
			
		||||
  short int arg_type;
 | 
			
		||||
} OnigOpInfoType;
 | 
			
		||||
 | 
			
		||||
extern OnigOpInfoType OnigOpInfo[];
 | 
			
		||||
 | 
			
		||||
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
 | 
			
		||||
 | 
			
		||||
#ifdef ONIG_DEBUG_STATISTICS
 | 
			
		||||
extern void onig_statistics_init P_((void));
 | 
			
		||||
extern void onig_print_statistics P_((FILE* f));
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
extern char* onig_error_code_to_format P_((int code));
 | 
			
		||||
extern void  onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
 | 
			
		||||
extern UChar* onig_strdup P_((UChar* s, UChar* end));
 | 
			
		||||
extern int  onig_bbuf_init P_((BBuf* buf, int size));
 | 
			
		||||
extern int  onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
 | 
			
		||||
extern int  onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
 | 
			
		||||
extern void onig_chain_reduce P_((regex_t* reg));
 | 
			
		||||
extern int  onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
 | 
			
		||||
 | 
			
		||||
#endif /* REGINT_H */
 | 
			
		||||
							
								
								
									
										4815
									
								
								regparse.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										4815
									
								
								regparse.c
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										277
									
								
								regparse.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										277
									
								
								regparse.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,277 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  regparse.h -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2003-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#ifndef REGPARSE_H
 | 
			
		||||
#define REGPARSE_H
 | 
			
		||||
 | 
			
		||||
#include "regint.h"
 | 
			
		||||
 | 
			
		||||
/* node type */
 | 
			
		||||
#define N_STRING       (1<< 0)
 | 
			
		||||
#define N_CCLASS       (1<< 1)
 | 
			
		||||
#define N_CTYPE        (1<< 2)
 | 
			
		||||
#define N_ANYCHAR      (1<< 3)
 | 
			
		||||
#define N_BACKREF      (1<< 4)
 | 
			
		||||
#define N_QUALIFIER    (1<< 5)
 | 
			
		||||
#define N_EFFECT       (1<< 6)
 | 
			
		||||
#define N_ANCHOR       (1<< 7)
 | 
			
		||||
#define N_LIST         (1<< 8)
 | 
			
		||||
#define N_ALT          (1<< 9)
 | 
			
		||||
#define N_CALL         (1<<10)
 | 
			
		||||
 | 
			
		||||
#define IS_NODE_TYPE_SIMPLE(type) \
 | 
			
		||||
  (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
 | 
			
		||||
 | 
			
		||||
#define NTYPE(node)        ((node)->type)
 | 
			
		||||
#define NCONS(node)        ((node)->u.cons)
 | 
			
		||||
#define NSTRING(node)      ((node)->u.str)
 | 
			
		||||
#define NCCLASS(node)      ((node)->u.cclass)
 | 
			
		||||
#define NCTYPE(node)       ((node)->u.ctype)
 | 
			
		||||
#define NQUALIFIER(node)   ((node)->u.qualifier)
 | 
			
		||||
#define NANCHOR(node)      ((node)->u.anchor)
 | 
			
		||||
#define NBACKREF(node)     ((node)->u.backref)
 | 
			
		||||
#define NEFFECT(node)      ((node)->u.effect)
 | 
			
		||||
#define NCALL(node)        ((node)->u.call)
 | 
			
		||||
 | 
			
		||||
#define CTYPE_WORD              (1<<0)
 | 
			
		||||
#define CTYPE_NOT_WORD          (1<<1)
 | 
			
		||||
#define CTYPE_WHITE_SPACE       (1<<2)
 | 
			
		||||
#define CTYPE_NOT_WHITE_SPACE   (1<<3)
 | 
			
		||||
#define CTYPE_DIGIT             (1<<4)
 | 
			
		||||
#define CTYPE_NOT_DIGIT         (1<<5)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
 | 
			
		||||
#define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
 | 
			
		||||
 | 
			
		||||
#define EFFECT_MEMORY           (1<<0)
 | 
			
		||||
#define EFFECT_OPTION           (1<<1)
 | 
			
		||||
#define EFFECT_STOP_BACKTRACK   (1<<2)
 | 
			
		||||
 | 
			
		||||
#define REPEAT_INFINITE         -1
 | 
			
		||||
#define IS_REPEAT_INFINITE(n)   ((n) == REPEAT_INFINITE)
 | 
			
		||||
 | 
			
		||||
#define NODE_STR_MARGIN         16
 | 
			
		||||
#define NODE_STR_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */
 | 
			
		||||
#define NODE_BACKREFS_SIZE       7
 | 
			
		||||
 | 
			
		||||
#define NSTR_RAW                (1<<0) /* by backslashed number */
 | 
			
		||||
#define NSTR_CASE_AMBIG         (1<<1)
 | 
			
		||||
 | 
			
		||||
#define NSTRING_LEN(node)            ((node)->u.str.end - (node)->u.str.s)
 | 
			
		||||
#define NSTRING_SET_RAW(node)        (node)->u.str.flag |= NSTR_RAW
 | 
			
		||||
#define NSTRING_CLEAR_RAW(node)      (node)->u.str.flag &= ~NSTR_RAW
 | 
			
		||||
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
 | 
			
		||||
#define NSTRING_IS_RAW(node)         (((node)->u.str.flag & NSTR_RAW) != 0)
 | 
			
		||||
#define NSTRING_IS_CASE_AMBIG(node)  \
 | 
			
		||||
       (((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)
 | 
			
		||||
 | 
			
		||||
#define BACKREFS_P(br) \
 | 
			
		||||
  (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
 | 
			
		||||
 | 
			
		||||
#define CCLASS_SET_NOT(cc)      (cc)->not = 1
 | 
			
		||||
 | 
			
		||||
#define NQ_TARGET_ISNOT_EMPTY     0
 | 
			
		||||
#define NQ_TARGET_IS_EMPTY        1
 | 
			
		||||
#define NQ_TARGET_IS_EMPTY_MEM    2
 | 
			
		||||
#define NQ_TARGET_IS_EMPTY_REC    3
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  UChar* s;
 | 
			
		||||
  UChar* end;
 | 
			
		||||
  unsigned int flag;
 | 
			
		||||
  int    capa;    /* (allocated size - 1) or 0: use buf[] */
 | 
			
		||||
  UChar  buf[NODE_STR_BUF_SIZE];
 | 
			
		||||
} StrNode;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int    not;
 | 
			
		||||
  BitSet bs;
 | 
			
		||||
  BBuf*  mbuf;     /* multi-byte info or NULL */
 | 
			
		||||
} CClassNode;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  struct _Node* target;
 | 
			
		||||
  int lower;
 | 
			
		||||
  int upper;
 | 
			
		||||
  int greedy;
 | 
			
		||||
  int by_number;         /* {n,m} */
 | 
			
		||||
  int target_empty_info;
 | 
			
		||||
  struct _Node* head_exact;
 | 
			
		||||
  struct _Node* next_head_exact;
 | 
			
		||||
  int is_refered;     /* include called node. don't eliminate even if {0} */
 | 
			
		||||
} QualifierNode;
 | 
			
		||||
 | 
			
		||||
/* status bits */
 | 
			
		||||
#define NST_MIN_FIXED        (1<<0)
 | 
			
		||||
#define NST_MAX_FIXED        (1<<1)
 | 
			
		||||
#define NST_CLEN_FIXED       (1<<2)
 | 
			
		||||
#define NST_MARK1            (1<<3)
 | 
			
		||||
#define NST_MARK2            (1<<4)
 | 
			
		||||
#define NST_MEM_BACKREFED    (1<<5)
 | 
			
		||||
#define NST_SIMPLE_REPEAT    (1<<6)  /* for stop backtrack optimization */
 | 
			
		||||
 | 
			
		||||
#define NST_RECURSION        (1<<7)
 | 
			
		||||
#define NST_CALLED           (1<<8)
 | 
			
		||||
#define NST_ADDR_FIXED       (1<<9)
 | 
			
		||||
#define NST_NAMED_GROUP      (1<<10)
 | 
			
		||||
#define NST_NAME_REF         (1<<11)
 | 
			
		||||
 | 
			
		||||
#define SET_EFFECT_STATUS(node,f)      (node)->u.effect.state |=  (f)
 | 
			
		||||
#define CLEAR_EFFECT_STATUS(node,f)    (node)->u.effect.state &= ~(f)
 | 
			
		||||
 | 
			
		||||
#define IS_EFFECT_CALLED(en)           (((en)->state & NST_CALLED)        != 0)
 | 
			
		||||
#define IS_EFFECT_ADDR_FIXED(en)       (((en)->state & NST_ADDR_FIXED)    != 0)
 | 
			
		||||
#define IS_EFFECT_RECURSION(en)        (((en)->state & NST_RECURSION)     != 0)
 | 
			
		||||
#define IS_EFFECT_MARK1(en)            (((en)->state & NST_MARK1)         != 0)
 | 
			
		||||
#define IS_EFFECT_MARK2(en)            (((en)->state & NST_MARK2)         != 0)
 | 
			
		||||
#define IS_EFFECT_MIN_FIXED(en)        (((en)->state & NST_MIN_FIXED)     != 0)
 | 
			
		||||
#define IS_EFFECT_MAX_FIXED(en)        (((en)->state & NST_MAX_FIXED)     != 0)
 | 
			
		||||
#define IS_EFFECT_CLEN_FIXED(en)       (((en)->state & NST_CLEN_FIXED)    != 0)
 | 
			
		||||
#define IS_EFFECT_SIMPLE_REPEAT(en)    (((en)->state & NST_SIMPLE_REPEAT) != 0)
 | 
			
		||||
#define IS_EFFECT_NAMED_GROUP(en)      (((en)->state & NST_NAMED_GROUP)   != 0)
 | 
			
		||||
 | 
			
		||||
#define SET_CALL_RECURSION(node)       (node)->u.call.state |= NST_RECURSION
 | 
			
		||||
#define IS_CALL_RECURSION(cn)          (((cn)->state & NST_RECURSION)  != 0)
 | 
			
		||||
#define IS_CALL_NAME_REF(cn)           (((cn)->state & NST_NAME_REF)   != 0)
 | 
			
		||||
#define IS_BACKREF_NAME_REF(bn)        (((bn)->state & NST_NAME_REF)   != 0)
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int state;
 | 
			
		||||
  int type;
 | 
			
		||||
  int regnum;
 | 
			
		||||
  OnigOptionType option;
 | 
			
		||||
  struct _Node* target;
 | 
			
		||||
  AbsAddrType call_addr;
 | 
			
		||||
  /* for multiple call reference */
 | 
			
		||||
  OnigDistance min_len; /* min length (byte) */
 | 
			
		||||
  OnigDistance max_len; /* max length (byte) */ 
 | 
			
		||||
  int char_len;        /* character length  */
 | 
			
		||||
  int opt_count;       /* referenced count in optimize_node_left() */
 | 
			
		||||
} EffectNode;
 | 
			
		||||
 | 
			
		||||
#define CALLNODE_REFNUM_UNDEF  -1
 | 
			
		||||
 | 
			
		||||
#ifdef USE_SUBEXP_CALL
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int offset;
 | 
			
		||||
  struct _Node* target;
 | 
			
		||||
} UnsetAddr;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int num;
 | 
			
		||||
  int alloc;
 | 
			
		||||
  UnsetAddr* us;
 | 
			
		||||
} UnsetAddrList;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int     state;
 | 
			
		||||
  int     ref_num;
 | 
			
		||||
  UChar*  name;
 | 
			
		||||
  UChar*  name_end;
 | 
			
		||||
  struct _Node* target;  /* EffectNode : EFFECT_MEMORY */
 | 
			
		||||
  UnsetAddrList* unset_addr_list;
 | 
			
		||||
} CallNode;
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int     state;
 | 
			
		||||
  int     back_num;
 | 
			
		||||
  int     back_static[NODE_BACKREFS_SIZE];
 | 
			
		||||
  int*    back_dynamic;
 | 
			
		||||
} BackrefNode;
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  int type;
 | 
			
		||||
  struct _Node* target;
 | 
			
		||||
  int char_len;
 | 
			
		||||
} AnchorNode;
 | 
			
		||||
 | 
			
		||||
typedef struct _Node {
 | 
			
		||||
  int type;
 | 
			
		||||
  union {
 | 
			
		||||
    StrNode       str;
 | 
			
		||||
    CClassNode    cclass;
 | 
			
		||||
    QualifierNode qualifier;
 | 
			
		||||
    EffectNode    effect;
 | 
			
		||||
#ifdef USE_SUBEXP_CALL
 | 
			
		||||
    CallNode      call;
 | 
			
		||||
#endif
 | 
			
		||||
    BackrefNode   backref;
 | 
			
		||||
    AnchorNode    anchor;
 | 
			
		||||
    struct {
 | 
			
		||||
      struct _Node* left;
 | 
			
		||||
      struct _Node* right;
 | 
			
		||||
    } cons;
 | 
			
		||||
    struct {
 | 
			
		||||
      int type;
 | 
			
		||||
    } ctype;
 | 
			
		||||
  } u;
 | 
			
		||||
} Node;
 | 
			
		||||
 | 
			
		||||
#define NULL_NODE  ((Node* )0)
 | 
			
		||||
 | 
			
		||||
#define SCANENV_MEMNODES_SIZE               8
 | 
			
		||||
#define SCANENV_MEM_NODES(senv)   \
 | 
			
		||||
 (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
 | 
			
		||||
    (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
 | 
			
		||||
 | 
			
		||||
typedef struct {
 | 
			
		||||
  OnigOptionType   option;
 | 
			
		||||
  OnigEncoding enc;
 | 
			
		||||
  OnigSyntaxType*  syntax;
 | 
			
		||||
  BitStatusType   capture_history;
 | 
			
		||||
  BitStatusType   bt_mem_start;
 | 
			
		||||
  BitStatusType   bt_mem_end;
 | 
			
		||||
  BitStatusType   backrefed_mem;
 | 
			
		||||
  UChar*          pattern;
 | 
			
		||||
  UChar*          pattern_end;
 | 
			
		||||
  UChar*          error;
 | 
			
		||||
  UChar*          error_end;
 | 
			
		||||
  regex_t*        reg;       /* for reg->names only */
 | 
			
		||||
  int             num_call;
 | 
			
		||||
#ifdef USE_SUBEXP_CALL
 | 
			
		||||
  UnsetAddrList*  unset_addr_list;
 | 
			
		||||
#endif
 | 
			
		||||
  int             num_mem;
 | 
			
		||||
#ifdef USE_NAMED_GROUP
 | 
			
		||||
  int             num_named;
 | 
			
		||||
#endif
 | 
			
		||||
  int             mem_alloc;
 | 
			
		||||
  Node*           mem_nodes_static[SCANENV_MEMNODES_SIZE];
 | 
			
		||||
  Node**          mem_nodes_dynamic;
 | 
			
		||||
} ScanEnv;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define IS_SYNTAX_OP(syn, opm)    (((syn)->op  & (opm)) != 0)
 | 
			
		||||
#define IS_SYNTAX_OP2(syn, opm)   (((syn)->op2 & (opm)) != 0)
 | 
			
		||||
#define IS_SYNTAX_BV(syn, bvm)    (((syn)->behavior & (bvm)) != 0)
 | 
			
		||||
 | 
			
		||||
extern int    onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
 | 
			
		||||
extern int    onig_strncmp P_((UChar* s1, UChar* s2, int n));
 | 
			
		||||
extern void   onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
 | 
			
		||||
extern int    onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
 | 
			
		||||
extern void   onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
 | 
			
		||||
extern void   onig_node_conv_to_str_node P_((Node* node, int raw));
 | 
			
		||||
extern int    onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
 | 
			
		||||
extern void   onig_node_free P_((Node* node));
 | 
			
		||||
extern Node*  onig_node_new_effect P_((int type));
 | 
			
		||||
extern Node*  onig_node_new_anchor P_((int type));
 | 
			
		||||
extern int    onig_free_node_list();
 | 
			
		||||
extern int    onig_names_free P_((regex_t* reg));
 | 
			
		||||
extern int    onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
 | 
			
		||||
 | 
			
		||||
#ifdef ONIG_DEBUG
 | 
			
		||||
#ifdef USE_NAMED_GROUP
 | 
			
		||||
extern int onig_print_names(FILE*, regex_t*);
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif /* REGPARSE_H */
 | 
			
		||||
							
								
								
									
										174
									
								
								sjis.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										174
									
								
								sjis.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,174 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  sjis.c -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2003-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#include "regenc.h"
 | 
			
		||||
 | 
			
		||||
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define SJIS_ISMB_FIRST(byte)  (OnigEncodingSJIS.len_table[byte] > 1)
 | 
			
		||||
#define SJIS_ISMB_TRAIL(byte)  SJIS_CAN_BE_TRAIL_TABLE[(byte)]
 | 
			
		||||
 | 
			
		||||
static OnigCodePoint
 | 
			
		||||
sjis_mbc_to_code(UChar* p, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  int c, i, len;
 | 
			
		||||
  OnigCodePoint n;
 | 
			
		||||
 | 
			
		||||
  c = *p++;
 | 
			
		||||
  len = enc_len(ONIG_ENCODING_SJIS, c);
 | 
			
		||||
  n = c;
 | 
			
		||||
  if (len == 1) return n;
 | 
			
		||||
 | 
			
		||||
  for (i = 1; i < len; i++) {
 | 
			
		||||
    if (p >= end) break;
 | 
			
		||||
    c = *p++;
 | 
			
		||||
    n <<= 8;  n += c;
 | 
			
		||||
  }
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
 | 
			
		||||
{
 | 
			
		||||
  UChar *p = buf;
 | 
			
		||||
 | 
			
		||||
  if ((code & 0xff00) != 0) *p++ = (UChar )(((code >>  8) & 0xff));
 | 
			
		||||
  *p++ = (UChar )(code & 0xff);
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  if (enc_len(ONIG_ENCODING_SJIS, buf[0]) != (p - buf))
 | 
			
		||||
    return REGERR_INVALID_WIDE_CHAR_VALUE;
 | 
			
		||||
#endif
 | 
			
		||||
  return p - buf;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
sjis_mbc_to_lower(UChar* p, UChar* lower)
 | 
			
		||||
{
 | 
			
		||||
  int len;
 | 
			
		||||
 | 
			
		||||
  if (ONIGENC_IS_MBC_ASCII(p)) {
 | 
			
		||||
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
 | 
			
		||||
    return 1;
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    len = enc_len(ONIG_ENCODING_SJIS, *p);
 | 
			
		||||
    if (lower != p) {
 | 
			
		||||
      /* memcpy(lower, p, len); */
 | 
			
		||||
      int i;
 | 
			
		||||
      for (i = 0; i < len; i++) {
 | 
			
		||||
	*lower++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    return len; /* return byte length of converted char to lower */
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
 | 
			
		||||
{
 | 
			
		||||
  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
 | 
			
		||||
    if (code < 128)
 | 
			
		||||
      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
    else {
 | 
			
		||||
      int first = onigenc_mb2_code_to_mbc_first(code);
 | 
			
		||||
      return (enc_len(ONIG_ENCODING_SJIS, first) > 1 ? TRUE : FALSE);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ctype &= ~ONIGENC_CTYPE_WORD;
 | 
			
		||||
    if (ctype == 0) return FALSE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (code < 128)
 | 
			
		||||
    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
 | 
			
		||||
  else
 | 
			
		||||
    return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static UChar*
 | 
			
		||||
sjis_left_adjust_char_head(UChar* start, UChar* s)
 | 
			
		||||
{
 | 
			
		||||
  UChar *p;
 | 
			
		||||
  int len;
 | 
			
		||||
 | 
			
		||||
  if (s <= start) return s;
 | 
			
		||||
  p = s;
 | 
			
		||||
 | 
			
		||||
  if (SJIS_ISMB_TRAIL(*p)) {
 | 
			
		||||
    while (p > start) {
 | 
			
		||||
      if (! SJIS_ISMB_FIRST(*--p)) {
 | 
			
		||||
	p++;
 | 
			
		||||
	break;
 | 
			
		||||
      }
 | 
			
		||||
    } 
 | 
			
		||||
  }
 | 
			
		||||
  len = enc_len(ONIG_ENCODING_SJIS, *p);
 | 
			
		||||
  if (p + len > s) return p;
 | 
			
		||||
  p += len;
 | 
			
		||||
  return p + ((s - p) & ~1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
sjis_is_allowed_reverse_match(UChar* s, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  UChar c = *s;
 | 
			
		||||
  return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OnigEncodingType OnigEncodingSJIS = {
 | 
			
		||||
  {
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
 | 
			
		||||
  },
 | 
			
		||||
  "Shift_JIS",   /* name */
 | 
			
		||||
  2,             /* max byte length */
 | 
			
		||||
  FALSE,         /* is_fold_match */
 | 
			
		||||
  ONIGENC_CTYPE_SUPPORT_LEVEL_SB,     /* ctype_support_level */
 | 
			
		||||
  FALSE,         /* is continuous sb mb codepoint */
 | 
			
		||||
  sjis_mbc_to_code,
 | 
			
		||||
  onigenc_mb2_code_to_mbclen,
 | 
			
		||||
  sjis_code_to_mbc,
 | 
			
		||||
  sjis_mbc_to_lower,
 | 
			
		||||
  onigenc_mbn_mbc_is_case_ambig,
 | 
			
		||||
  sjis_code_is_ctype,
 | 
			
		||||
  onigenc_nothing_get_ctype_code_range,
 | 
			
		||||
  sjis_left_adjust_char_head,
 | 
			
		||||
  sjis_is_allowed_reverse_match,
 | 
			
		||||
  onigenc_nothing_get_all_fold_match_code,
 | 
			
		||||
  onigenc_nothing_get_fold_match_info
 | 
			
		||||
};
 | 
			
		||||
							
								
								
									
										566
									
								
								utf8.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										566
									
								
								utf8.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,566 @@
 | 
			
		|||
/**********************************************************************
 | 
			
		||||
 | 
			
		||||
  utf8.c -  Oniguruma (regular expression library)
 | 
			
		||||
 | 
			
		||||
  Copyright (C) 2003-2004  K.Kosako (kosako@sofnec.co.jp)
 | 
			
		||||
 | 
			
		||||
**********************************************************************/
 | 
			
		||||
#include "regenc.h"
 | 
			
		||||
 | 
			
		||||
#define utf8_islead(c)     ((UChar )((c) & 0xc0) != 0x80)
 | 
			
		||||
 | 
			
		||||
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
 | 
			
		||||
  ((EncUnicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
 | 
			
		||||
 | 
			
		||||
static unsigned short EncUnicode_ISO_8859_1_CtypeTable[256] = {
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1142, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0,
 | 
			
		||||
  0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
 | 
			
		||||
  0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
 | 
			
		||||
  0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x1050, 0x1050, 0x1050, 0x10d0,
 | 
			
		||||
  0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x18d0,
 | 
			
		||||
  0x1050, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1004,
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
 | 
			
		||||
  0x1142, 0x10d0, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050,
 | 
			
		||||
  0x1050, 0x1050, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1050,
 | 
			
		||||
  0x1050, 0x1050, 0x1850, 0x1850, 0x1050, 0x1871, 0x1050, 0x10d0,
 | 
			
		||||
  0x1050, 0x1850, 0x1871, 0x10d0, 0x1850, 0x1850, 0x1850, 0x10d0,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1050,
 | 
			
		||||
  0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1050,
 | 
			
		||||
  0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static OnigCodePoint
 | 
			
		||||
utf8_mbc_to_code(UChar* p, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  int c, len;
 | 
			
		||||
  OnigCodePoint n;
 | 
			
		||||
 | 
			
		||||
  c = *p++;
 | 
			
		||||
  len = enc_len(ONIG_ENCODING_UTF8, c);
 | 
			
		||||
  if (len > 1) {
 | 
			
		||||
    len--;
 | 
			
		||||
    n = c & ((1 << (6 - len)) - 1);
 | 
			
		||||
    while (len--) {
 | 
			
		||||
      c = *p++;
 | 
			
		||||
      n = (n << 6) | (c & ((1 << 6) - 1));
 | 
			
		||||
    }
 | 
			
		||||
    return n;
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
    return (OnigCodePoint )c;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_code_to_mbclen(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  if      ((code & 0xffffff80) == 0) return 1;
 | 
			
		||||
  else if ((code & 0xfffff800) == 0) {
 | 
			
		||||
    if (code <= 0xff && code >= 0xfe)
 | 
			
		||||
      return 1;
 | 
			
		||||
    return 2;
 | 
			
		||||
  }
 | 
			
		||||
  else if ((code & 0xffff0000) == 0) return 3;
 | 
			
		||||
  else if ((code & 0xffe00000) == 0) return 4;
 | 
			
		||||
  else if ((code & 0xfc000000) == 0) return 5;
 | 
			
		||||
  else if ((code & 0x80000000) == 0) return 6;
 | 
			
		||||
  else
 | 
			
		||||
    return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
static int
 | 
			
		||||
utf8_code_to_mbc_first(OnigCodePoint code)
 | 
			
		||||
{
 | 
			
		||||
  if ((code & 0xffffff80) == 0)
 | 
			
		||||
    return code;
 | 
			
		||||
  else {
 | 
			
		||||
    if ((code & 0xfffff800) == 0)
 | 
			
		||||
      return ((code>>6)& 0x1f) | 0xc0;
 | 
			
		||||
    else if ((code & 0xffff0000) == 0)
 | 
			
		||||
      return ((code>>12) & 0x0f) | 0xe0;
 | 
			
		||||
    else if ((code & 0xffe00000) == 0)
 | 
			
		||||
      return ((code>>18) & 0x07) | 0xf0;
 | 
			
		||||
    else if ((code & 0xfc000000) == 0)
 | 
			
		||||
      return ((code>>24) & 0x03) | 0xf8;
 | 
			
		||||
    else if ((code & 0x80000000) == 0)
 | 
			
		||||
      return ((code>>30) & 0x01) | 0xfc;
 | 
			
		||||
    else {
 | 
			
		||||
      return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
 | 
			
		||||
{
 | 
			
		||||
#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
 | 
			
		||||
#define UTF8_TRAIL0(code)        (UChar )(((code) & 0x3f) | 0x80)
 | 
			
		||||
 | 
			
		||||
  if ((code & 0xffffff80) == 0) {
 | 
			
		||||
    *buf = (UChar )code;
 | 
			
		||||
    return 1;
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    UChar *p = buf;
 | 
			
		||||
 | 
			
		||||
    if ((code & 0xfffff800) == 0) {
 | 
			
		||||
      *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
 | 
			
		||||
    }
 | 
			
		||||
    else if ((code & 0xffff0000) == 0) {
 | 
			
		||||
      *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code, 6);
 | 
			
		||||
    }
 | 
			
		||||
    else if ((code & 0xffe00000) == 0) {
 | 
			
		||||
      *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code, 12);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code,  6);
 | 
			
		||||
    }
 | 
			
		||||
    else if ((code & 0xfc000000) == 0) {
 | 
			
		||||
      *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code, 18);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code, 12);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code,  6);
 | 
			
		||||
    }
 | 
			
		||||
    else if ((code & 0x80000000) == 0) {
 | 
			
		||||
      *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code, 24);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code, 18);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code, 12);
 | 
			
		||||
      *p++ = UTF8_TRAILS(code,  6);
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
      return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    *p++ = UTF8_TRAIL0(code);
 | 
			
		||||
    return p - buf;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_mbc_to_lower(UChar* p, UChar* lower)
 | 
			
		||||
{
 | 
			
		||||
  int len;
 | 
			
		||||
 | 
			
		||||
  /* !!! U+0080 - U+00ff is treated by fold match. !!! */
 | 
			
		||||
  if (ONIGENC_IS_MBC_ASCII(p)) {
 | 
			
		||||
    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
 | 
			
		||||
    return 1; /* return byte length of converted char to lower */
 | 
			
		||||
  }
 | 
			
		||||
  else {
 | 
			
		||||
    len = enc_len(ONIG_ENCODING_UTF8, *p);
 | 
			
		||||
    if (lower != p) {
 | 
			
		||||
      /* memcpy(lower, p, len); */
 | 
			
		||||
      int i;
 | 
			
		||||
      for (i = 0; i < len; i++) {
 | 
			
		||||
	*lower++ = *p++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    return len; /* return byte length of converted char to lower */
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_mbc_is_case_ambig(UChar* p)
 | 
			
		||||
{
 | 
			
		||||
  /* !!! U+0080 - U+00ff ( 0x80[0xc2,0x80] - 0xff[0xc3,0xbf] )
 | 
			
		||||
     is treated by fold match. !!! */
 | 
			
		||||
 | 
			
		||||
  if (ONIGENC_IS_MBC_ASCII(p))
 | 
			
		||||
    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
 | 
			
		||||
 | 
			
		||||
  return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
 | 
			
		||||
{
 | 
			
		||||
  if (code < 256) {
 | 
			
		||||
    return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
 | 
			
		||||
    return TRUE;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return FALSE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
 | 
			
		||||
			  OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
 | 
			
		||||
{
 | 
			
		||||
#define CR_SET(sbl,mbl) do { \
 | 
			
		||||
  *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
 | 
			
		||||
  *nmb = sizeof(mbl) / sizeof(OnigCodePointRange); \
 | 
			
		||||
  *sbr = sbl; \
 | 
			
		||||
  *mbr = mbl; \
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
#define CR_SB_SET(sbl) do { \
 | 
			
		||||
  *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
 | 
			
		||||
  *nmb = 0; \
 | 
			
		||||
  *sbr = sbl; \
 | 
			
		||||
} while (0)
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBAlpha[] = {
 | 
			
		||||
    { 0x41, 0x5a },
 | 
			
		||||
    { 0x61, 0x7a }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBAlpha[] = {
 | 
			
		||||
    { 0xaa, 0xaa },
 | 
			
		||||
    { 0xb5, 0xb5 },
 | 
			
		||||
    { 0xba, 0xba },
 | 
			
		||||
    { 0xc0, 0xd6 },
 | 
			
		||||
    { 0xd8, 0xf6 },
 | 
			
		||||
    { 0xf8, 0x220 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBBlank[] = {
 | 
			
		||||
    { 0x09, 0x09 },
 | 
			
		||||
    { 0x20, 0x20 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBBlank[] = {
 | 
			
		||||
    { 0xa0, 0xa0 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBCntrl[] = {
 | 
			
		||||
    { 0x00, 0x1f },
 | 
			
		||||
    { 0x7f, 0x7f }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBCntrl[] = {
 | 
			
		||||
    { 0x80, 0x9f }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBDigit[] = {
 | 
			
		||||
    { 0x30, 0x39 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBGraph[] = {
 | 
			
		||||
    { 0x21, 0x7e }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBGraph[] = {
 | 
			
		||||
    { 0xa1, 0x220 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBLower[] = {
 | 
			
		||||
    { 0x61, 0x7a }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBLower[] = {
 | 
			
		||||
    { 0xaa, 0xaa },
 | 
			
		||||
    { 0xb5, 0xb5 },
 | 
			
		||||
    { 0xba, 0xba },
 | 
			
		||||
    { 0xdf, 0xf6 },
 | 
			
		||||
    { 0xf8, 0xff }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBPrint[] = {
 | 
			
		||||
    { 0x20, 0x7e }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBPrint[] = {
 | 
			
		||||
    { 0xa0, 0x220 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBPunct[] = {
 | 
			
		||||
    { 0x21, 0x23 },
 | 
			
		||||
    { 0x25, 0x2a },
 | 
			
		||||
    { 0x2c, 0x2f },
 | 
			
		||||
    { 0x3a, 0x3b },
 | 
			
		||||
    { 0x3f, 0x40 },
 | 
			
		||||
    { 0x5b, 0x5d },
 | 
			
		||||
    { 0x5f, 0x5f },
 | 
			
		||||
    { 0x7b, 0x7b },
 | 
			
		||||
    { 0x7d, 0x7d }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBPunct[] = {
 | 
			
		||||
    { 0xa1, 0xa1 },
 | 
			
		||||
    { 0xab, 0xab },
 | 
			
		||||
    { 0xad, 0xad },
 | 
			
		||||
    { 0xb7, 0xb7 },
 | 
			
		||||
    { 0xbb, 0xbb },
 | 
			
		||||
    { 0xbf, 0xbf }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBSpace[] = {
 | 
			
		||||
    { 0x09, 0x0d },
 | 
			
		||||
    { 0x20, 0x20 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBSpace[] = {
 | 
			
		||||
    { 0xa0, 0xa0 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBUpper[] = {
 | 
			
		||||
    { 0x41, 0x5a }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBUpper[] = {
 | 
			
		||||
    { 0xc0, 0xd6 },
 | 
			
		||||
    { 0xd8, 0xde }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBXDigit[] = {
 | 
			
		||||
    { 0x30, 0x39 },
 | 
			
		||||
    { 0x41, 0x46 },
 | 
			
		||||
    { 0x61, 0x66 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBWord[] = {
 | 
			
		||||
    { 0x30, 0x39 },
 | 
			
		||||
    { 0x41, 0x5a },
 | 
			
		||||
    { 0x5f, 0x5f },
 | 
			
		||||
    { 0x61, 0x7a }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBWord[] = {
 | 
			
		||||
    { 0xaa, 0xaa },
 | 
			
		||||
    { 0xb2, 0xb3 },
 | 
			
		||||
    { 0xb5, 0xb5 },
 | 
			
		||||
    { 0xb9, 0xba },
 | 
			
		||||
    { 0xbc, 0xbe },
 | 
			
		||||
    { 0xc0, 0xd6 },
 | 
			
		||||
    { 0xd8, 0xf6 },
 | 
			
		||||
#if 0
 | 
			
		||||
    { 0xf8, 0x220 }
 | 
			
		||||
#else
 | 
			
		||||
    { 0xf8, 0x7fffffff } /* all multibyte code as word */
 | 
			
		||||
#endif
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBAscii[] = {
 | 
			
		||||
    { 0x00, 0x7f }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange SBAlnum[] = {
 | 
			
		||||
    { 0x30, 0x39 },
 | 
			
		||||
    { 0x41, 0x5a },
 | 
			
		||||
    { 0x61, 0x7a }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static OnigCodePointRange MBAlnum[] = {
 | 
			
		||||
    { 0xaa, 0xaa },
 | 
			
		||||
    { 0xb5, 0xb5 },
 | 
			
		||||
    { 0xba, 0xba },
 | 
			
		||||
    { 0xc0, 0xd6 },
 | 
			
		||||
    { 0xd8, 0xf6 },
 | 
			
		||||
    { 0xf8, 0x220 }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  switch (ctype) {
 | 
			
		||||
  case ONIGENC_CTYPE_ALPHA:
 | 
			
		||||
    CR_SET(SBAlpha, MBAlpha);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_BLANK:
 | 
			
		||||
    CR_SET(SBBlank, MBBlank);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_CNTRL:
 | 
			
		||||
    CR_SET(SBCntrl, MBCntrl);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_DIGIT:
 | 
			
		||||
    CR_SB_SET(SBDigit);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_GRAPH:
 | 
			
		||||
    CR_SET(SBGraph, MBGraph);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_LOWER:
 | 
			
		||||
    CR_SET(SBLower, MBLower);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_PRINT:
 | 
			
		||||
    CR_SET(SBPrint, MBPrint);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_PUNCT:
 | 
			
		||||
    CR_SET(SBPunct, MBPunct);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_SPACE:
 | 
			
		||||
    CR_SET(SBSpace, MBSpace);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_UPPER:
 | 
			
		||||
    CR_SET(SBUpper, MBUpper);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_XDIGIT:
 | 
			
		||||
    CR_SB_SET(SBXDigit);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_WORD:
 | 
			
		||||
    CR_SET(SBWord, MBWord);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_ASCII:
 | 
			
		||||
    CR_SB_SET(SBAscii);
 | 
			
		||||
    break;
 | 
			
		||||
  case ONIGENC_CTYPE_ALNUM:
 | 
			
		||||
    CR_SET(SBAlnum, MBAlnum);
 | 
			
		||||
    break;
 | 
			
		||||
 | 
			
		||||
  default:
 | 
			
		||||
    return ONIGERR_TYPE_BUG;
 | 
			
		||||
    break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_get_all_fold_match_code(OnigCodePoint** codes)
 | 
			
		||||
{
 | 
			
		||||
  static OnigCodePoint list[] = {
 | 
			
		||||
    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
 | 
			
		||||
    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
 | 
			
		||||
    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
 | 
			
		||||
    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
 | 
			
		||||
 | 
			
		||||
    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
 | 
			
		||||
    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
 | 
			
		||||
    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6,
 | 
			
		||||
    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  *codes = list;
 | 
			
		||||
  return sizeof(list) / sizeof(OnigCodePoint);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_get_fold_match_info(UChar* p, UChar* end, OnigEncFoldMatchInfo** info)
 | 
			
		||||
{
 | 
			
		||||
  
 | 
			
		||||
  static OnigEncFoldMatchInfo xc[] = {
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\200", "\303\240" } }, /* CodePoint 0xc0 */
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\201", "\303\241" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\202", "\303\242" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\203", "\303\243" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\204", "\303\244" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\205", "\303\245" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\206", "\303\246" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\207", "\303\247" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\210", "\303\250" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\211", "\303\251" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\212", "\303\252" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\213", "\303\253" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\214", "\303\254" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\215", "\303\255" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\216", "\303\256" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\217", "\303\257" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\220", "\303\260" } }, /* CodePoint 0xd0 */
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\221", "\303\261" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\222", "\303\262" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\223", "\303\263" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\224", "\303\264" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\225", "\303\265" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\226", "\303\266" } },
 | 
			
		||||
    { 0, { 0 }, { "" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\230", "\303\270" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\231", "\303\271" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\232", "\303\272" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\233", "\303\273" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\234", "\303\274" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\235", "\303\275" } },
 | 
			
		||||
    { 2, { 2, 2 }, { "\303\236", "\303\276" } },
 | 
			
		||||
    { 3, { 2, 2, 2 }, { "\303\237", "ss", "SS" }} /* ess-tsett(U+00DF) */
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  if (p + 1 >= end)  return -1;
 | 
			
		||||
  if (*p < 0x80) {
 | 
			
		||||
    if ((*p == 'S' && *(p+1) == 'S') ||
 | 
			
		||||
	(*p == 's' && *(p+1) == 's')) {
 | 
			
		||||
      *info = &(xc[0xdf - 0xc0]);
 | 
			
		||||
      return 2;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  else if (*p == 195) { /* 195 == '\303' */
 | 
			
		||||
    int c = *(p+1);
 | 
			
		||||
    if (c >= 128) {
 | 
			
		||||
      if (c <= 159) { /* upper */
 | 
			
		||||
	if (c == 151) return -1; /* 0xd7 */
 | 
			
		||||
	*info = &(xc[c - 128]);
 | 
			
		||||
	return 2;
 | 
			
		||||
      }
 | 
			
		||||
      else { /* lower */
 | 
			
		||||
	if (c == 183) return -1; /* 0xf7 */
 | 
			
		||||
	*info = &(xc[c - 160]);
 | 
			
		||||
	return 2;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return -1; /* is not a fold string. */
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static UChar*
 | 
			
		||||
utf8_left_adjust_char_head(UChar* start, UChar* s)
 | 
			
		||||
{
 | 
			
		||||
  UChar *p;
 | 
			
		||||
 | 
			
		||||
  if (s <= start) return s;
 | 
			
		||||
  p = s;
 | 
			
		||||
 | 
			
		||||
  while (!utf8_islead(*p) && p > start) p--;
 | 
			
		||||
  return p;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
utf8_is_allowed_reverse_match(UChar* s, UChar* end)
 | 
			
		||||
{
 | 
			
		||||
  return TRUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OnigEncodingType OnigEncodingUTF8 = {
 | 
			
		||||
  {
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 | 
			
		||||
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 | 
			
		||||
    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
 | 
			
		||||
  },
 | 
			
		||||
  "UTF-8",     /* name */
 | 
			
		||||
  6,           /* max byte length */
 | 
			
		||||
  TRUE,        /* is_fold_match */
 | 
			
		||||
  ONIGENC_CTYPE_SUPPORT_LEVEL_FULL,  /* ctype_support_level */
 | 
			
		||||
  TRUE,                             /* is continuous sb mb codepoint */
 | 
			
		||||
  utf8_mbc_to_code,
 | 
			
		||||
  utf8_code_to_mbclen,
 | 
			
		||||
  utf8_code_to_mbc,
 | 
			
		||||
  utf8_mbc_to_lower,
 | 
			
		||||
  utf8_mbc_is_case_ambig,
 | 
			
		||||
  utf8_code_is_ctype,
 | 
			
		||||
  utf8_get_ctype_code_range,
 | 
			
		||||
  utf8_left_adjust_char_head,
 | 
			
		||||
  utf8_is_allowed_reverse_match,
 | 
			
		||||
  utf8_get_all_fold_match_code,
 | 
			
		||||
  utf8_get_fold_match_info
 | 
			
		||||
};
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue