diff --git a/include/ruby/internal/intern/re.h b/include/ruby/internal/intern/re.h index ababac51b9..31f5593275 100644 --- a/include/ruby/internal/intern/re.h +++ b/include/ruby/internal/intern/re.h @@ -20,30 +20,229 @@ * extension libraries. They could be written in C++98. * @brief Public APIs related to ::rb_cRegexp. */ +#include "ruby/internal/attr/nonnull.h" #include "ruby/internal/dllexport.h" #include "ruby/internal/value.h" RBIMPL_SYMBOL_EXPORT_BEGIN() /* re.c */ + +/** + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + * + * @internal + * + * This was a function that switched between memcmp and rb_memcicmp depending + * on then-called `ruby_ignorecase`, or the `$=` global variable. That feature + * was abandoned in sometime around version 1.9.0. + */ #define rb_memcmp memcmp -int rb_memcicmp(const void*,const void*,long); -void rb_match_busy(VALUE); -VALUE rb_reg_nth_defined(int, VALUE); -VALUE rb_reg_nth_match(int, VALUE); + +/** + * Identical to st_locale_insensitive_strcasecmp(), except it is timing safe + * and returns something different. + * + * @param[in] s1 Comparison LHS. + * @param[in] s2 Comparison RHS. + * @param[in] n Comparison shall stop after first `n` bytes are scanned. + * @retval <0 `s1` is "less" than `s2`. + * @retval 0 Both sides converted into lowercase would be identical. + * @retval >0 `s1` is "greater" than `s2`. + * @note The "case" here means that of the POSIX Locale. + * + * @internal + * + * Can accept NULLs as long as n is also 0, and returns 0. + */ +int rb_memcicmp(const void *s1,const void *s2, long n); + +/** + * Asserts that the given MatchData is "occupied". MatchData shares its + * backend storages with its Regexp object. But programs can destructively + * tamper its contents. Calling this function beforehand shall prevent such + * modifications to spill over into other objects. + * + * @param[out] md Target instance of ::rb_cMatch. + * @post The object is "busy". + * + * @internal + * + * There is rb_match_unbusy internally, but extension libraries are left unable + * to do so. + */ +void rb_match_busy(VALUE md); + +/** + * Identical to rb_reg_nth_match(), except it just returns Boolean. This could + * skip allocating a returning string, resulting in reduced memory footprints + * if applicable. + * + * @param[in] n Match index. + * @param[in] md An instance of ::rb_cMatch. + * @exception rb_eTypeError `md` is not initialised. + * @retval RUBY_Qnil There is no `n`-th capture. + * @retval RUBY_Qfalse There is a `n`-th capture and is empty. + * @retval RUBY_Qtrue There is a `n`-th capture that has something. + * + * @internal + * + * @shyouhei wonders: why there are both rb_reg_match_defined() and + * rb_match_nth_defined, which are largely the same things, but do not share + * their implementations at all? + */ +VALUE rb_reg_nth_defined(int n, VALUE md); + +/** + * Queries the nth captured substring. + * + * @param[in] n Match index. + * @param[in] md An instance of ::rb_cMatch. + * @exception rb_eTypeError `md` is not initialised. + * @retval RUBY_Qnil There is no `n`-th capture. + * @retval otherwise An allocated instance of ::rb_cString containing + * the contents captured. + */ +VALUE rb_reg_nth_match(int n, VALUE md); + +/** + * Queries the index of the given named capture. Captures could be named. But + * that doesn't mean named ones are not indexed. A regular expression can mix + * named and non-named captures, and they are all indexed. This function + * converts from a name to its index. + * + * @param[in] match An instance of ::rb_cMatch. + * @param[in] backref Capture name, in String, Symbol, or Numeric. + * @exception rb_eIndexError No such named capture. + * @return The index of the given name. + */ int rb_reg_backref_number(VALUE match, VALUE backref); -VALUE rb_reg_last_match(VALUE); -VALUE rb_reg_match_pre(VALUE); -VALUE rb_reg_match_post(VALUE); -VALUE rb_reg_match_last(VALUE); + +/** + * This just returns the argument, stringified. What a poor name. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its 0th capture (i.e. entire matched string). + */ +VALUE rb_reg_last_match(VALUE md); + +/** + * The portion of the original string before the given match. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "prematch". This is perl's ``$```. + */ +VALUE rb_reg_match_pre(VALUE md); + +/** + * The portion of the original string after the given match. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "postmatch". This is perl's `$'`. + */ +VALUE rb_reg_match_post(VALUE md); + +/** + * The portion of the original string that captured at the very last. + * + * @param[in] md An instance of ::rb_cMatch. + * @return Its "lastmatch". This is perl's `$+`. + */ +VALUE rb_reg_match_last(VALUE md); + +/** + * @private + * + * @deprecated This macro once was a thing in the old days, but makes no sense + * any longer today. Exists here for backwards compatibility + * only. You can safely forget about it. + */ #define HAVE_RB_REG_NEW_STR 1 -VALUE rb_reg_new_str(VALUE, int); -VALUE rb_reg_new(const char *, long, int); + +/** + * Identical to rb_reg_new(), except it takes the expression in Ruby's string + * instead of C's. + * + * @param[in] src Source code in String. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @exception rb_eRegexpError `src` and `opts` do not interface. + * @return Allocated new instance of ::rb_cRegexp. + */ +VALUE rb_reg_new_str(VALUE src, int opts); + +RBIMPL_ATTR_NONNULL(()) +/** + * Creates a new Regular expression. + * + * @param[in] src Source code. + * @param[in] len `strlen(src)`. + * @param[in] opts Options e.g. ONIG_OPTION_MULTILINE. + * @return Allocated new instance of ::rb_cRegexp. + */ +VALUE rb_reg_new(const char *src, long len, int opts); + +/** + * Allocates an instance of ::rb_cRegexp. + * + * @private + * + * Nobody should call this function. Regular expressions that are not + * initialised must not exist in the wild. + */ VALUE rb_reg_alloc(void); + +/** + * Initialises an instance of ::rb_cRegexp. + * + * @private + * + * This just raises for ordinal regexp objects. Extension libraries must not + * use. + */ VALUE rb_reg_init_str(VALUE re, VALUE s, int options); -VALUE rb_reg_match(VALUE, VALUE); -VALUE rb_reg_match2(VALUE); -int rb_reg_options(VALUE); + +/** + * This is the match operator. + * + * @param[in] re An instance of ::rb_cRegexp. + * @param[in] str An instance of ::rb_cString. + * @exception rb_eTypeError `str` is not a string. + * @exception rb_eRegexpError Error inside of Onigmo (unlikely). + * @retval RUBY_Qnil Match failed. + * @retval otherwise Matched position (character index inside of + * `str`). + * @post `Regexp.last_match` is updated. + * @post `$&`, `$~`, etc., are updated. + * @note If you do this in ruby, named captures are assigned to local + * variable of the local scope. But that doesn't happen here. The + * assignment is done by the interpreter. + */ +VALUE rb_reg_match(VALUE re, VALUE str); + +/** + * Identical to rb_reg_match(), except it matches against rb_lastline_get() + * (or, the `$_`). + * + * @param[in] re An instance of ::rb_cRegexp. + * @exception rb_eRegexpError Error inside of Onigmo (unlikely). + * @retval RUBY_Qnil Match failed or `$_` is absent. + * @retval otherwise Matched position (character index inside of + * `$_`). + * @post `Regexp.last_match` is updated. + * @post `$&`, `$~`, etc., are updated. + */ +VALUE rb_reg_match2(VALUE re); + +/** + * Queries the options of the passed regular expression. + * + * @param[in] re An instance of ::rb_cRegexp. + * @return Its options. + * @note Possible return values are defined in Onigmo.h. + */ +int rb_reg_options(VALUE re); RBIMPL_SYMBOL_EXPORT_END()