mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
This commit was generated by cvs2svn to compensate for changes in r372,
which included commits to RCS files with non-trunk default branches. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@373 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
9c5b1986a3
commit
210367ec88
140 changed files with 25635 additions and 14037 deletions
205
regex.h
205
regex.h
|
@ -22,6 +22,8 @@
|
|||
#ifndef __REGEXP_LIBRARY
|
||||
#define __REGEXP_LIBRARY
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* Define number of parens for which we record the beginnings and ends.
|
||||
This affects how much space the `struct re_registers' type takes up. */
|
||||
#ifndef RE_NREGS
|
||||
|
@ -38,174 +40,52 @@
|
|||
#endif
|
||||
|
||||
|
||||
/* This defines the various regexp syntaxes. */
|
||||
extern long re_syntax_options;
|
||||
|
||||
|
||||
/* The following bits are used in the re_syntax_options variable to choose among
|
||||
alternative regexp syntaxes. */
|
||||
|
||||
/* If this bit is set, plain parentheses serve as grouping, and backslash
|
||||
parentheses are needed for literal searching.
|
||||
If not set, backslash-parentheses are grouping, and plain parentheses
|
||||
are for literal searching. */
|
||||
#define RE_NO_BK_PARENS 1L
|
||||
|
||||
/* If this bit is set, plain | serves as the `or'-operator, and \| is a
|
||||
literal.
|
||||
If not set, \| serves as the `or'-operator, and | is a literal. */
|
||||
#define RE_NO_BK_VBAR (1L << 1)
|
||||
|
||||
/* If this bit is set, | binds tighter than ^ or $.
|
||||
If not set, the contrary. */
|
||||
#define RE_TIGHT_VBAR (1L << 3)
|
||||
|
||||
/* If this bit is set, then treat newline as an OR operator.
|
||||
If not set, treat it as a normal character. */
|
||||
#define RE_NEWLINE_OR (1L << 4)
|
||||
|
||||
/* If this bit is set, then special characters may act as normal
|
||||
characters in some contexts. Specifically, this applies to:
|
||||
^ -- only special at the beginning, or after ( or |;
|
||||
$ -- only special at the end, or before ) or |;
|
||||
*, +, ? -- only special when not after the beginning, (, or |.
|
||||
If this bit is not set, special characters (such as *, ^, and $)
|
||||
always have their special meaning regardless of the surrounding
|
||||
context. */
|
||||
#define RE_CONTEXT_INDEP_OPS (1L << 5)
|
||||
|
||||
/* If this bit is not set, then \ before anything inside [ and ] is taken as
|
||||
a real \.
|
||||
If set, then such a \ escapes the following character. This is a
|
||||
special case for awk. */
|
||||
#define RE_AWK_CLASS_HACK (1L << 6)
|
||||
|
||||
/* If this bit is set, then \{ and \} or { and } serve as interval operators.
|
||||
If not set, then \{ and \} and { and } are treated as literals. */
|
||||
#define RE_INTERVALS (1L << 7)
|
||||
|
||||
/* If this bit is not set, then \{ and \} serve as interval operators and
|
||||
{ and } are literals.
|
||||
If set, then { and } serve as interval operators and \{ and \} are
|
||||
literals. */
|
||||
#define RE_NO_BK_CURLY_BRACES (1L << 8)
|
||||
#define RE_NO_BK_BRACES RE_NO_BK_CURLY_BRACES
|
||||
|
||||
/* If this bit is set, then character classes are supported; they are:
|
||||
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
|
||||
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
|
||||
If not set, then character classes are not supported. */
|
||||
#define RE_CHAR_CLASSES (1L << 9)
|
||||
|
||||
/* If this bit is set, then the dot re doesn't match a null byte.
|
||||
If not set, it does. */
|
||||
#define RE_DOT_NOT_NULL (1L << 10)
|
||||
|
||||
/* If this bit is set, then [^...] doesn't match a newline.
|
||||
If not set, it does. */
|
||||
#define RE_HAT_NOT_NEWLINE (1L << 11)
|
||||
|
||||
/* If this bit is set, back references are recognized.
|
||||
If not set, they aren't. */
|
||||
#define RE_NO_BK_REFS (1L << 12)
|
||||
|
||||
/* If this bit is set, back references must refer to a preceding
|
||||
subexpression. If not set, a back reference to a nonexistent
|
||||
subexpression is treated as literal characters. */
|
||||
#define RE_NO_EMPTY_BK_REF (1L << 13)
|
||||
|
||||
/* If this bit is set, bracket expressions can't be empty.
|
||||
If it is set, they can be empty. */
|
||||
#define RE_NO_EMPTY_BRACKETS (1L << 14)
|
||||
|
||||
/* If this bit is set, then *, +, ? and { cannot be first in an re or
|
||||
immediately after a |, or a (. Furthermore, a | cannot be first or
|
||||
last in an re, or immediately follow another | or a (. Also, a ^
|
||||
cannot appear in a nonleading position and a $ cannot appear in a
|
||||
nontrailing position (outside of bracket expressions, that is). */
|
||||
#define RE_CONTEXTUAL_INVALID_OPS (1L << 15)
|
||||
|
||||
/* If this bit is set, then +, ? and | aren't recognized as operators.
|
||||
If it's not, they are. */
|
||||
#define RE_LIMITED_OPS (1L << 16)
|
||||
|
||||
/* If this bit is set, then an ending range point has to collate higher
|
||||
or equal to the starting range point.
|
||||
If it's not set, then when the ending range point collates higher
|
||||
than the starting range point, the range is just considered empty. */
|
||||
#define RE_NO_EMPTY_RANGES (1L << 17)
|
||||
|
||||
/* If this bit is set, then a hyphen (-) can't be an ending range point.
|
||||
If it isn't, then it can. */
|
||||
#define RE_NO_HYPHEN_RANGE_END (1L << 18)
|
||||
|
||||
/* If this bit is not set, then \ inside a bracket expression is literal.
|
||||
If set, then such a \ quotes the following character. */
|
||||
#define RE_BACKSLASH_ESCAPE_IN_LISTS (1L << 19)
|
||||
|
||||
/* Define combinations of bits for the standard possibilities. */
|
||||
#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
|
||||
| RE_CONTEXT_INDEP_OPS)
|
||||
#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK)
|
||||
#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
|
||||
| RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
|
||||
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
|
||||
#define RE_SYNTAX_EMACS 0
|
||||
#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \
|
||||
| RE_CHAR_CLASSES | RE_DOT_NOT_NULL \
|
||||
| RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \
|
||||
| RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \
|
||||
| RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
|
||||
|
||||
#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \
|
||||
| RE_NO_BK_VBAR | RE_NO_BK_PARENS \
|
||||
| RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \
|
||||
| RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
|
||||
| RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \
|
||||
| RE_NO_HYPHEN_RANGE_END)
|
||||
#define RE_OPTION_EXTENDED (1L<<0)
|
||||
#define RE_OPTION_IGNORECASE (1L<<1)
|
||||
#define RE_MAY_IGNORECASE (1L<<2)
|
||||
#define RE_OPTIMIZE_ANCHOR (1L<<4)
|
||||
#define RE_OPTIMIZE_EXACTN (1L<<5)
|
||||
#define RE_OPTIMIZE_NO_BM (1L<<6)
|
||||
|
||||
/* For multi-byte char support */
|
||||
#define RE_MBCTYPE_EUC (1L << 20)
|
||||
#define RE_MBCTYPE_SJIS (1L << 21)
|
||||
#define RE_MBCTYPE_MASK (RE_MBCTYPE_EUC | RE_MBCTYPE_SJIS)
|
||||
#define MBCTYPE_ASCII 0
|
||||
#define MBCTYPE_EUC 1
|
||||
#define MBCTYPE_SJIS 2
|
||||
#define MBCTYPE_UTF8 3
|
||||
|
||||
#ifdef EUC
|
||||
#define DEFAULT_MBCTYPE RE_MBCTYPE_EUC
|
||||
#ifdef __STDC__
|
||||
extern const unsigned char *re_mbctab;
|
||||
void re_mbcinit (int);
|
||||
#else
|
||||
#ifdef SJIS
|
||||
#define DEFAULT_MBCTYPE RE_MBCTYPE_SJIS
|
||||
#else
|
||||
#define DEFAULT_MBCTYPE 0
|
||||
#endif
|
||||
extern unsigned char *re_mbctab;
|
||||
void re_mbcinit ();
|
||||
#endif
|
||||
|
||||
#undef ismbchar
|
||||
#define ismbchar(c) \
|
||||
(re_syntax_options & RE_MBCTYPE_EUC \
|
||||
? ((unsigned char) (c) >= 0x80) \
|
||||
: (re_syntax_options & RE_MBCTYPE_SJIS \
|
||||
? (( 0x80 <= (unsigned char) (c) \
|
||||
&& (unsigned char) (c) <= 0x9f) \
|
||||
|| (0xe0 <= (unsigned char) (c))) \
|
||||
: 0))
|
||||
#define ismbchar(c) re_mbctab[(unsigned char)(c)]
|
||||
#define mbclen(c) (re_mbctab[(unsigned char)(c)]+1)
|
||||
|
||||
/* This data structure is used to represent a compiled pattern. */
|
||||
|
||||
struct re_pattern_buffer
|
||||
{
|
||||
char *buffer; /* Space holding the compiled pattern commands. */
|
||||
long allocated; /* Size of space that `buffer' points to. */
|
||||
long used; /* Length of portion of buffer actually occupied */
|
||||
size_t allocated; /* Size of space that `buffer' points to. */
|
||||
size_t used; /* Length of portion of buffer actually occupied */
|
||||
char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
|
||||
/* re_search uses the fastmap, if there is one,
|
||||
to skip over totally implausible characters. */
|
||||
char *translate; /* Translate table to apply to all characters before
|
||||
comparing, or zero for no translation.
|
||||
The translation is applied to a pattern when it is
|
||||
compiled and to data when it is matched. */
|
||||
char *must; /* Pointer to exact pattern which strings should have
|
||||
to be matched. */
|
||||
|
||||
int *must_skip; /* Pointer to exact pattern skip table for bm_search */
|
||||
char *stclass; /* Pointer to character class list at top */
|
||||
long options; /* Flags for options such as extended_pattern. */
|
||||
long re_nsub; /* Number of subexpressions found by the compiler. */
|
||||
char fastmap_accurate;
|
||||
/* Set to zero when a new pattern is stored,
|
||||
|
@ -219,11 +99,7 @@ struct re_pattern_buffer
|
|||
listed in the fastmap. */
|
||||
};
|
||||
|
||||
|
||||
/* search.c (search_buffer) needs this one value. It is defined both in
|
||||
regex.c and here. */
|
||||
#define RE_EXACTN_VALUE 1
|
||||
|
||||
typedef struct re_pattern_buffer regex_t;
|
||||
|
||||
/* Structure to store register contents data in.
|
||||
|
||||
|
@ -237,12 +113,23 @@ struct re_pattern_buffer
|
|||
|
||||
struct re_registers
|
||||
{
|
||||
unsigned allocated;
|
||||
unsigned num_regs;
|
||||
size_t allocated;
|
||||
size_t num_regs;
|
||||
int *beg;
|
||||
int *end;
|
||||
};
|
||||
|
||||
/* Type for byte offsets within the string. POSIX mandates this. */
|
||||
typedef size_t regoff_t;
|
||||
|
||||
/* POSIX specification for registers. Aside from the different names than
|
||||
`re_registers', POSIX uses an array of structures, instead of a
|
||||
structure of arrays. */
|
||||
typedef struct
|
||||
{
|
||||
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
|
||||
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
|
||||
} regmatch_t;
|
||||
|
||||
|
||||
#ifdef NeXT
|
||||
|
@ -252,14 +139,16 @@ struct re_registers
|
|||
#ifdef __STDC__
|
||||
|
||||
extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *);
|
||||
void re_free_pattern (struct re_pattern_buffer *);
|
||||
/* Is this really advertised? */
|
||||
extern void re_compile_fastmap (struct re_pattern_buffer *);
|
||||
extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
|
||||
extern int re_search (struct re_pattern_buffer *, char*, size_t, size_t, size_t,
|
||||
struct re_registers *);
|
||||
extern int re_match (struct re_pattern_buffer *, char *, int, int,
|
||||
extern int re_match (struct re_pattern_buffer *, char *, size_t, size_t,
|
||||
struct re_registers *);
|
||||
extern long re_set_syntax (long syntax);
|
||||
extern void re_set_casetable (char *table);
|
||||
extern void re_copy_registers (struct re_registers*, struct re_registers*);
|
||||
extern void re_free_registers (struct re_registers*);
|
||||
|
||||
#ifndef RUBY
|
||||
/* 4.2 bsd compatibility. */
|
||||
|
@ -270,19 +159,15 @@ extern int re_exec (char *);
|
|||
#else /* !__STDC__ */
|
||||
|
||||
extern char *re_compile_pattern ();
|
||||
void re_free_regexp ();
|
||||
/* Is this really advertised? */
|
||||
extern void re_compile_fastmap ();
|
||||
extern int re_search ();
|
||||
extern int re_match ();
|
||||
extern long re_set_syntax();
|
||||
extern void re_set_casetable ();
|
||||
extern void re_copy_registers ();
|
||||
extern void re_free_registers ();
|
||||
|
||||
#endif /* __STDC__ */
|
||||
|
||||
|
||||
#ifdef SYNTAX_TABLE
|
||||
extern char *re_syntax_table;
|
||||
#endif
|
||||
|
||||
#endif /* !__REGEXP_LIBRARY */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue