1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* oniggnu.h: imported from Oniguruma library.

* oniguruma.h: ditto.
* regcomp.c: ditto.
* regenc.c: ditto.
* regenc.h: ditto.
* regerror.c: ditto.
* regex.c: ditto.
* regexec.c: ditto.
* reggnu.c: ditto.
* regint.h: ditto.
* regparse.c: ditto.
* regparse.h: ditto.
* ascii.c: ditto.
* euc_jp.c: ditto.
* sjis.c: ditto.
* utf8.c: ditto.

* MANIFEST: added Oniguruma files listed above.

* LEGAL: added Oniguruma license.

* regex.h: now includes oniggnu.h.

* re.c: applied Oniguruma patch.


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5896 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
ksaito 2004-03-05 15:40:36 +00:00
parent 5770336f8b
commit f353d89d82
6 changed files with 107 additions and 4887 deletions

View file

@ -1,3 +1,30 @@
Fri Mar 6 00:39:21 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp>
* oniggnu.h: imported from Oniguruma library.
* oniguruma.h: ditto.
* regcomp.c: ditto.
* regenc.c: ditto.
* regenc.h: ditto.
* regerror.c: ditto.
* regex.c: ditto.
* regexec.c: ditto.
* reggnu.c: ditto.
* regint.h: ditto.
* regparse.c: ditto.
* regparse.h: ditto.
* ascii.c: ditto.
* euc_jp.c: ditto.
* sjis.c: ditto.
* utf8.c: ditto.
* MANIFEST: added Oniguruma files listed above.
* LEGAL: added Oniguruma license.
* regex.h: now includes oniggnu.h.
* re.c: applied Oniguruma patch.
Fri Mar 5 23:13:08 2004 Minero Aoki <aamine@loveruby.net>
* lib/net/http.rb: support WebDAV methods, PROPPATCH, LOCK,

45
LEGAL
View file

@ -5,34 +5,29 @@ All the files in this distribution are covered under either the Ruby's
license (see the file COPYING) or public-domain except some files
mentioned below.
regex.[ch]:
oniggnu.h:
oniguruma.h:
regcomp.c:
regenc.[ch]:
regerror.c:
regex.c:
regexec.c:
reggnu.c:
regint.h:
regparse.[ch]:
ascii.c:
euc_jp.c:
sjis.c:
utf8.c:
These files are under LGPL. Treat them as LGPL says. (See the file
LGPL for details)
Oniguruma ---- (C) K.Kosako <kosako@sofnec.co.jp>
Extended regular expression matching and search library.
Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
http://www.geocities.jp/kosako1/oniguruma/
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file LGPL. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
Last change: May 21, 1993 by t^2
removed gapped buffer support, multiple syntax support by matz <matz@nts.co.jp>
Perl5 extension added by matz <matz@caelum.co.jp>
UTF-8 extension added Jan 16 1999 by Yoshida Masato <yoshidam@tau.bekkoame.ne.jp>
When this software is partly used or it is distributed with Ruby,
this of Ruby follows the license of Ruby.
configure:

View file

@ -13,6 +13,7 @@ README.EXT.ja
README.ja
ToDo
array.c
ascii.c
bignum.c
class.c
compar.c
@ -28,6 +29,7 @@ dmyext.c
enum.c
env.h
error.c
euc_jp.c
eval.c
file.c
gc.c
@ -48,6 +50,8 @@ mkconfig.rb
node.h
numeric.c
object.c
oniggnu.h
oniguruma.h
pack.c
parse.c
parse.y
@ -57,8 +61,17 @@ random.c
range.c
re.c
re.h
regcomp.c
regenc.c
regenc.h
regerror.c
regex.c
regex.h
regexec.c
reggnu.c
regint.h
regparse.c
regparse.h
ruby.1
ruby.c
ruby.h
@ -67,12 +80,14 @@ rubysig.h
rubytest.rb
runruby.rb
signal.c
sjis.c
sprintf.c
st.c
st.h
string.c
struct.c
time.c
utf8.c
util.c
util.h
variable.c

34
re.c
View file

@ -483,11 +483,13 @@ rb_reg_to_s(re)
goto again;
}
if (*ptr == ':' && ptr[len-1] == ')') {
int r;
Regexp *rp;
kcode_set_option(re);
rp = ALLOC(Regexp);
MEMZERO((char *)rp, Regexp, 1);
err = re_compile_pattern(++ptr, len -= 2, rp) != 0;
r = re_alloc_pattern(&rp);
if (r == 0) {
err = (re_compile_pattern(++ptr, len -= 2, rp, NULL) != 0);
}
kcode_reset_option();
re_free_pattern(rp);
}
@ -621,7 +623,8 @@ make_regexp(s, len, flags)
int flags;
{
Regexp *rp;
char *err;
char err[ONIG_MAX_ERROR_MESSAGE_LEN];
int r;
/* Handle escaped characters first. */
@ -630,17 +633,18 @@ make_regexp(s, len, flags)
from that.
*/
rp = ALLOC(Regexp);
MEMZERO((char *)rp, Regexp, 1);
rp->buffer = ALLOC_N(char, 16);
rp->allocated = 16;
rp->fastmap = ALLOC_N(char, 256);
r = re_alloc_pattern(&rp);
if (r) {
re_error_code_to_str((UChar* )err, r);
rb_reg_raise(s, len, err, 0);
}
if (flags) {
rp->options = flags;
}
err = re_compile_pattern(s, len, rp);
r = re_compile_pattern(s, len, rp, err);
if (err != NULL) {
if (r != 0) {
rb_reg_raise(s, len, err, 0);
}
return rp;
@ -842,14 +846,14 @@ rb_reg_prepare_re(re)
}
if (need_recompile) {
char *err;
char err[ONIG_MAX_ERROR_MESSAGE_LEN];
int r;
if (FL_TEST(re, KCODE_FIXED))
kcode_set_option(re);
rb_reg_check(re);
RREGEXP(re)->ptr->fastmap_accurate = 0;
err = re_compile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr);
if (err != NULL) {
r = re_recompile_pattern(RREGEXP(re)->str, RREGEXP(re)->len, RREGEXP(re)->ptr, err);
if (r != 0) {
rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, re);
}
}

4655
regex.c

File diff suppressed because it is too large Load diff

218
regex.h
View file

@ -1,221 +1,17 @@
/* Definitions for data structures and routines for the regular
expression library, version 0.12.
Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc.
/**********************************************************************
This file is part of the GNU C Library. Its master source is NOT part of
the C library, however. The master source lives in /gd/gnu/lib.
regex.h -
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
$Author$
$Date$
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
Copyright (C) 1993-2004 Yukihiro Matsumoto
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file LGPL. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
Last change: May 21, 1993 by t^2 */
/* modified for Ruby by matz@netlab.co.jp */
**********************************************************************/
#ifndef REGEX_H
#define REGEX_H
/* symbol mangling for ruby */
#ifdef RUBY
# define re_adjust_startpos ruby_re_adjust_startpos
# define re_compile_fastmap ruby_re_compile_fastmap
# define re_compile_pattern ruby_re_compile_pattern
# define re_copy_registers ruby_re_copy_registers
# define re_free_pattern ruby_re_free_pattern
# define re_free_registers ruby_re_free_registers
# define re_match ruby_re_match
# define re_mbcinit ruby_re_mbcinit
# define re_search ruby_re_search
# define re_set_casetable ruby_re_set_casetable
# define register_info_type ruby_register_info_type
#endif
#include <stddef.h>
/* Define number of parens for which we record the beginnings and ends.
This affects how much space the `struct re_registers' type takes up. */
#ifndef RE_NREGS
#define RE_NREGS 10
#endif
#define BYTEWIDTH 8
#define RE_REG_MAX ((1<<BYTEWIDTH)-1)
/* Maximum number of duplicates an interval can allow. */
#ifndef RE_DUP_MAX
#define RE_DUP_MAX ((1 << 15) - 1)
#endif
/* If this bit is set, then character classes are supported; they are:
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
If not set, then character classes are not supported. */
#define RE_CHAR_CLASSES (1L << 9)
/* match will be done case insensetively */
#define RE_OPTION_IGNORECASE (1L)
/* perl-style extended pattern available */
#define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE<<1)
/* newline will be included for . */
#define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED<<1)
/* ^ and $ ignore newline */
#define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE<<1)
/* search for longest match, in accord with POSIX regexp */
#define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE<<1)
#define RE_MAY_IGNORECASE (RE_OPTION_LONGEST<<1)
#define RE_OPTIMIZE_ANCHOR (RE_MAY_IGNORECASE<<1)
#define RE_OPTIMIZE_EXACTN (RE_OPTIMIZE_ANCHOR<<1)
#define RE_OPTIMIZE_NO_BM (RE_OPTIMIZE_EXACTN<<1)
#define RE_OPTIMIZE_BMATCH (RE_OPTIMIZE_NO_BM<<1)
/* For multi-byte char support */
#define MBCTYPE_ASCII 0
#define MBCTYPE_EUC 1
#define MBCTYPE_SJIS 2
#define MBCTYPE_UTF8 3
extern
#if defined _WIN32 && !defined __GNUC__ && !defined RUBY_EXPORT
__declspec(dllimport)
# endif
const unsigned char *re_mbctab;
#if defined(__STDC__)
void re_mbcinit (int);
#else
void re_mbcinit ();
#endif
#undef ismbchar
#define ismbchar(c) re_mbctab[(unsigned char)(c)]
#define mbclen(c) (re_mbctab[(unsigned char)(c)]+1)
/* Structure used in re_match() */
typedef union
{
unsigned char *word;
struct {
unsigned is_active : 1;
unsigned matched_something : 1;
} bits;
} register_info_type;
/* This data structure is used to represent a compiled pattern. */
struct re_pattern_buffer
{
char *buffer; /* Space holding the compiled pattern commands. */
int allocated; /* Size of space that `buffer' points to. */
int used; /* Length of portion of buffer actually occupied */
char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
/* re_search uses the fastmap, if there is one,
to skip over totally implausible characters. */
char *must; /* Pointer to exact pattern which strings should have
to be matched. */
int *must_skip; /* Pointer to exact pattern skip table for bm_search */
long options; /* Flags for options such as extended_pattern. */
long re_nsub; /* Number of subexpressions found by the compiler. */
char fastmap_accurate;
/* Set to zero when a new pattern is stored,
set to one when the fastmap is updated from it. */
char can_be_null; /* Set to one by compiling fastmap
if this pattern might match the null string.
It does not necessarily match the null string
in that case, but if this is zero, it cannot.
2 as value means can match null string
but at end of range or before a character
listed in the fastmap. */
/* stack & working area for re_match() */
unsigned char **regstart;
unsigned char **regend;
unsigned char **old_regstart;
unsigned char **old_regend;
register_info_type *reg_info;
unsigned char **best_regstart;
unsigned char **best_regend;
};
typedef struct re_pattern_buffer regex_t;
/* Structure to store register contents data in.
Pass the address of such a structure as an argument to re_match, etc.,
if you want this information back.
For i from 1 to RE_NREGS - 1, start[i] records the starting index in
the string of where the ith subexpression matched, and end[i] records
one after the ending index. start[0] and end[0] are analogous, for
the entire pattern. */
struct re_registers
{
int allocated;
int num_regs;
int *beg;
int *end;
};
/* Type for byte offsets within the string. POSIX mandates this. */
typedef size_t regoff_t;
/* POSIX specification for registers. Aside from the different names than
`re_registers', POSIX uses an array of structures, instead of a
structure of arrays. */
typedef struct
{
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
} regmatch_t;
#ifdef __STDC__
extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *);
void re_free_pattern (struct re_pattern_buffer *);
/* Is this really advertised? */
extern int re_adjust_startpos (struct re_pattern_buffer *, const char*, int, int, int);
extern void re_compile_fastmap (struct re_pattern_buffer *);
extern int re_search (struct re_pattern_buffer *, const char*, int, int, int,
struct re_registers *);
extern int re_match (struct re_pattern_buffer *, const char *, int, int,
struct re_registers *);
extern void re_set_casetable (const char *table);
extern void re_copy_registers (struct re_registers*, struct re_registers*);
extern void re_free_registers (struct re_registers*);
#ifndef RUBY
/* 4.2 bsd compatibility. */
extern char *re_comp (const char *);
extern int re_exec (const char *);
#endif
#else /* !__STDC__ */
extern char *re_compile_pattern ();
void re_free_regexp ();
/* Is this really advertised? */
extern int re_adjust_startpos ();
extern void re_compile_fastmap ();
extern int re_search ();
extern int re_match ();
extern void re_set_casetable ();
extern void re_copy_registers ();
extern void re_free_registers ();
#endif /* __STDC__ */
#include "oniggnu.h"
#endif /* !REGEX_H */