ruby--ruby/include/ruby/re.h

#ifndef RUBY_RE_H                                    /*-*-C++-*-vi:se ft=cpp:*/
#define RUBY_RE_H 1
/**
 * @file
 * @author     $Author$
 * @date       Thu Sep 30 14:18:32 JST 1993
 * @copyright  Copyright (C) 1993-2007 Yukihiro Matsumoto
 * @copyright  This  file  is   a  part  of  the   programming  language  Ruby.
 *             Permission  is hereby  granted,  to  either redistribute  and/or
 *             modify this file, provided that  the conditions mentioned in the
 *             file COPYING are met.  Consult the file for details.
 */
#include "ruby/internal/config.h"

#ifdef HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif

#include <stdio.h>

#include "ruby/regex.h"
#include "ruby/internal/core/rmatch.h"
#include "ruby/internal/dllexport.h"

struct re_registers;            /* Defined in onigmo.h */

RBIMPL_SYMBOL_EXPORT_BEGIN()

/**
 * Creates a  new instance of  ::rb_cRegexp.  It can  be seen as  a specialised
 * version of rb_reg_new_str() where it does not take options.
 *
 * @param[in]  str  Source code in String.
 * @return     Allocated new instance of ::rb_cRegexp.
 */
VALUE rb_reg_regcomp(VALUE str);

/**
 * Runs  the  passed  regular  expression   over  the  passed  string.   Unlike
 * rb_reg_search()  this function  also  takes position  and  direction of  the
 * search, which make  it possible for this  function to run from  in middle of
 * the string.
 *
 * @param[in]  re               Regular expression to execute.
 * @param[in]  str              Target string to search.
 * @param[in]  pos              Offset in `str` to start searching, in bytes.
 * @param[in]  dir              `pos`' direction; 0  means left-to-right, 1 for
 *                              the opposite.
 * @exception  rb_eArgError     `re` is broken.
 * @exception  rb_eRegexpError  `re` is malformed.
 * @retval     -1               Match failed.
 * @retval     otherwise        Offset of first such byte where match happened.
 * @post       `Regexp.last_match` is updated.
 * @post       `$&`, `$~`, etc., are updated.
 *
 * @internal
 *
 * Distinction  between raising  ::rb_eArgError  and  ::rb_eRegexpError is  not
 * obvious, at least to @shyouhei.
 */
long rb_reg_search(VALUE re, VALUE str, long pos, int dir);

/**
 * Substitution.  This  is basically the implementation  of `String#sub`.  Also
 * `String#gsub` repeatedly calls this function.
 *
 * @param[in]  repl  Replacement string, e.g. `"\\1\\2"`
 * @param[in]  src   Source string, to be replaced.
 * @param[in]  regs  Matched data generated by applying `rexp` to `src`.
 * @param[in]  rexp  Regular expression.
 * @return     A substituted string.
 *
 * @internal
 *
 * This  function does  not  check for  encoding compatibility.   `String#sub!`
 * etc. employ their own checker.
 *
 * `regs` should  have been `const  struct re_registers  *` because it  is read
 * only.  Kept as-is for compatibility.
 */
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp);

/**
 * Tell us if this is a wrong idea,  but it seems this function has no usage at
 * all.  Just remains here for theoretical backwards compatibility.
 *
 * @param[in]  re               Regular expression to execute.
 * @param[in]  str              Target string to search.
 * @param[in]  pos              Offset in `str` to start searching, in bytes.
 * @param[in]  dir              `pos`' direction; 0  means left-to-right, 1 for
 *                              the opposite.
 * @return     Adjusted nearest  offset to  `pos` inside of  `str`, where  is a
 *             character boundary.
 *
 */
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir);

/**
 * Escapes  any  characters  that  would  have special  meaning  in  a  regular
 * expression.
 *
 * @param[in]  str  Target string to escape.
 * @return     A copy of `str` whose contents are escaped.
 */
VALUE rb_reg_quote(VALUE str);

/**
 * Exercises  various  checks  and  preprocesses  so  that  the  given  regular
 * expression can be applied to the given string.  The preprocess here includes
 * (but not limited to) for instance encoding conversion.
 *
 * @param[in]  re                  Target regular expression.
 * @param[in]  str                 What `re` is about to run on.
 * @exception  rb_eArgError        `re` does not fit for `str`.
 * @exception  rb_eEncCompatError  `re` and `str` are incompatible.
 * @exception  rb_eRegexpError     `re` is malformed.
 * @return     A preprocessesed pattern buffer ready to be applied to `str`.
 * @note       The return value is manages by our GC.  Don't free.
 *
 * @internal
 *
 * The  return  type,  `regex_t  *`, is  defined  in  `<ruby/onigmo.h>`,  _and_
 * _conflicts_ with POSIX's  `<regex.h>`.  We can no longer  save the situation
 * at this point.  Just don't mix the two.
 */
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);

/**
 * Duplicates a match data.  This  is roughly the same as `onig_region_copy()`,
 * except it tries to GC when there is not enough memory.
 *
 * @param[out]  dst             Target registers to fill.
 * @param[in]   src             Source registers to duplicate.
 * @exception   rb_eNoMemError  Not enough memory.
 * @retval      0               Successful
 * @retval      ONIGERR_MEMORY  Not enough memory, even after GC (unlikely).
 * @post        `dst` has identical contents to `src`.
 *
 * @internal
 *
 * It seems this function is here for `ext/strscan` and nothing else.
 */
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src);

RBIMPL_SYMBOL_EXPORT_END()

#endif /* RUBY_RE_H */
add #include guard hack According to MSVC manual (1), cl.exe can skip including a header file when that: - contains #pragma once, or - starts with #ifndef, or - starts with #if ! defined. GCC has a similar trick (2), but it acts more stricter (e. g. there must be _no tokens_ outside of #ifndef...#endif). Sun C lacked #pragma once for a looong time. Oracle Developer Studio 12.5 finally implemented it, but we cannot assume such recent version. This changeset modifies header files so that each of them include strictly one #ifndef...#endif. I believe this is the most portable way to trigger compiler optimizations. [Bug #16770] 1: https://docs.microsoft.com/en-us/cpp/preprocessor/once 2: https://gcc.gnu.org/onlinedocs/cppinternals/Guard-Macros.html 2020-04-10 01:11:40 -04:00			`#ifndef RUBY_RE_H /--C++--vi:se ft=cpp:/`
* include/ruby: moved public headers. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12501 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2007-06-09 23:06:15 -04:00			`#define RUBY_RE_H 1`
add #include guard hack According to MSVC manual (1), cl.exe can skip including a header file when that: - contains #pragma once, or - starts with #ifndef, or - starts with #if ! defined. GCC has a similar trick (2), but it acts more stricter (e. g. there must be _no tokens_ outside of #ifndef...#endif). Sun C lacked #pragma once for a looong time. Oracle Developer Studio 12.5 finally implemented it, but we cannot assume such recent version. This changeset modifies header files so that each of them include strictly one #ifndef...#endif. I believe this is the most portable way to trigger compiler optimizations. [Bug #16770] 1: https://docs.microsoft.com/en-us/cpp/preprocessor/once 2: https://gcc.gnu.org/onlinedocs/cppinternals/Guard-Macros.html 2020-04-10 01:11:40 -04:00			`/**`
			`* @file`
			`* @author $Author$`
			`* @date Thu Sep 30 14:18:32 JST 1993`
			`* @copyright Copyright (C) 1993-2007 Yukihiro Matsumoto`
			`* @copyright This file is a part of the programming language Ruby.`
			`* Permission is hereby granted, to either redistribute and/or`
			`* modify this file, provided that the conditions mentioned in the`
			`* file COPYING are met. Consult the file for details.`
			`*/`
sed -i 's\|ruby/impl\|ruby/internal\|' To fix build failures. 2020-05-08 05:31:09 -04:00			`#include "ruby/internal/config.h"`
include/ruby/re.h: add doxygen Must not be a bad idea to improve documents. [ci skip] 2021-06-23 02:43:05 -04:00
			`#ifdef HAVE_SYS_TYPES_H`
			`# include <sys/types.h>`
			`#endif`

Initial revision git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 1998-01-16 07:13:05 -05:00			`#include <stdio.h>`

* include/ruby: moved public headers. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12501 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2007-06-09 23:06:15 -04:00			`#include "ruby/regex.h"`
sed -i 's\|ruby/impl\|ruby/internal\|' To fix build failures. 2020-05-08 05:31:09 -04:00			`#include "ruby/internal/core/rmatch.h"`
			`#include "ruby/internal/dllexport.h"`
Initial revision git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 1998-01-16 07:13:05 -05:00
include/ruby/re.h: add doxygen Must not be a bad idea to improve documents. [ci skip] 2021-06-23 02:43:05 -04:00			`struct re_registers; /* Defined in onigmo.h */`

sed -i s/RUBY3/RBIMPL/g Devs do not love "3". The only exception is RUBY3_KEYWORDS in parse.y, which seems unrelated to our interests. 2020-05-04 02:52:56 -04:00			`RBIMPL_SYMBOL_EXPORT_BEGIN()`
Initial revision git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 1998-01-16 07:13:05 -05:00
include/ruby/re.h: add doxygen Must not be a bad idea to improve documents. [ci skip] 2021-06-23 02:43:05 -04:00			`/**`
			`* Creates a new instance of ::rb_cRegexp. It can be seen as a specialised`
			`* version of rb_reg_new_str() where it does not take options.`
			`*`
			`* @param[in] str Source code in String.`
			`* @return Allocated new instance of ::rb_cRegexp.`
			`*/`
			`VALUE rb_reg_regcomp(VALUE str);`

			`/**`
			`* Runs the passed regular expression over the passed string. Unlike`
			`* rb_reg_search() this function also takes position and direction of the`
			`* search, which make it possible for this function to run from in middle of`
			`* the string.`
			`*`
			`* @param[in] re Regular expression to execute.`
			`* @param[in] str Target string to search.`
			* @param[in] pos Offset in `str` to start searching, in bytes.
			* @param[in] dir `pos`' direction; 0 means left-to-right, 1 for
			`* the opposite.`
			* @exception rb_eArgError `re` is broken.
			* @exception rb_eRegexpError `re` is malformed.
			`* @retval -1 Match failed.`
			`* @retval otherwise Offset of first such byte where match happened.`
			* @post `Regexp.last_match` is updated.
			* @post `$&`, `$~`, etc., are updated.
			`*`
			`* @internal`
			`*`
			`* Distinction between raising ::rb_eArgError and ::rb_eRegexpError is not`
			`* obvious, at least to @shyouhei.`
			`*/`
			`long rb_reg_search(VALUE re, VALUE str, long pos, int dir);`

			`/**`
			* Substitution. This is basically the implementation of `String#sub`. Also
			* `String#gsub` repeatedly calls this function.
			`*`
			* @param[in] repl Replacement string, e.g. `"\\1\\2"`
			`* @param[in] src Source string, to be replaced.`
			* @param[in] regs Matched data generated by applying `rexp` to `src`.
			`* @param[in] rexp Regular expression.`
			`* @return A substituted string.`
			`*`
			`* @internal`
			`*`
			* This function does not check for encoding compatibility. `String#sub!`
			`* etc. employ their own checker.`
			`*`
			* `regs` should have been `const struct re_registers *` because it is read
			`* only. Kept as-is for compatibility.`
			`*/`
			`VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp);`

			`/**`
			`* Tell us if this is a wrong idea, but it seems this function has no usage at`
			`* all. Just remains here for theoretical backwards compatibility.`
			`*`
			`* @param[in] re Regular expression to execute.`
			`* @param[in] str Target string to search.`
			* @param[in] pos Offset in `str` to start searching, in bytes.
			* @param[in] dir `pos`' direction; 0 means left-to-right, 1 for
			`* the opposite.`
			* @return Adjusted nearest offset to `pos` inside of `str`, where is a
			`* character boundary.`
			`*`
			`*/`
			`long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir);`

			`/**`
			`* Escapes any characters that would have special meaning in a regular`
			`* expression.`
			`*`
			`* @param[in] str Target string to escape.`
			* @return A copy of `str` whose contents are escaped.
			`*/`
			`VALUE rb_reg_quote(VALUE str);`

			`/**`
			`* Exercises various checks and preprocesses so that the given regular`
			`* expression can be applied to the given string. The preprocess here includes`
			`* (but not limited to) for instance encoding conversion.`
			`*`
			`* @param[in] re Target regular expression.`
			* @param[in] str What `re` is about to run on.
			* @exception rb_eArgError `re` does not fit for `str`.
			* @exception rb_eEncCompatError `re` and `str` are incompatible.
			* @exception rb_eRegexpError `re` is malformed.
			* @return A preprocessesed pattern buffer ready to be applied to `str`.
			`* @note The return value is manages by our GC. Don't free.`
			`*`
			`* @internal`
			`*`
			* The return type, `regex_t *`, is defined in `<ruby/onigmo.h>`, _and_
			* _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation
			`* at this point. Just don't mix the two.`
			`*/`
* configure.in (XCFLAGS): use -fvisibility=hidden if possible. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@28709 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-07-21 17:38:25 -04:00			`regex_t *rb_reg_prepare_re(VALUE re, VALUE str);`
include/ruby/re.h: add doxygen Must not be a bad idea to improve documents. [ci skip] 2021-06-23 02:43:05 -04:00
			`/**`
			* Duplicates a match data. This is roughly the same as `onig_region_copy()`,
			`* except it tries to GC when there is not enough memory.`
			`*`
			`* @param[out] dst Target registers to fill.`
			`* @param[in] src Source registers to duplicate.`
			`* @exception rb_eNoMemError Not enough memory.`
			`* @retval 0 Successful`
			`* @retval ONIGERR_MEMORY Not enough memory, even after GC (unlikely).`
			* @post `dst` has identical contents to `src`.
			`*`
			`* @internal`
			`*`
			* It seems this function is here for `ext/strscan` and nothing else.
			`*/`
			`int rb_reg_region_copy(struct re_registers dst, const struct re_registers src);`
* configure.in (XCFLAGS): use -fvisibility=hidden if possible. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@28709 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-07-21 17:38:25 -04:00
sed -i s/RUBY3/RBIMPL/g Devs do not love "3". The only exception is RUBY3_KEYWORDS in parse.y, which seems unrelated to our interests. 2020-05-04 02:52:56 -04:00			`RBIMPL_SYMBOL_EXPORT_END()`
* include/ruby: moved public headers. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12501 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2007-06-09 23:06:15 -04:00
			`#endif /* RUBY_RE_H */`