From 1c9106da8bfe96dafa844cf543eda08dca1b176d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3?= Date: Wed, 23 Jun 2021 15:43:05 +0900 Subject: [PATCH] include/ruby/re.h: add doxygen Must not be a bad idea to improve documents. [ci skip] --- include/ruby/re.h | 129 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 8 deletions(-) diff --git a/include/ruby/re.h b/include/ruby/re.h index ec0f425db0..3892d6e7f2 100644 --- a/include/ruby/re.h +++ b/include/ruby/re.h @@ -11,23 +11,136 @@ * file COPYING are met. Consult the file for details. */ #include "ruby/internal/config.h" -#include + +#ifdef HAVE_SYS_TYPES_H +# include +#endif + #include #include "ruby/regex.h" #include "ruby/internal/core/rmatch.h" #include "ruby/internal/dllexport.h" +struct re_registers; /* Defined in onigmo.h */ + RBIMPL_SYMBOL_EXPORT_BEGIN() -VALUE rb_reg_regcomp(VALUE); -long rb_reg_search(VALUE, VALUE, long, int); -VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE); -long rb_reg_adjust_startpos(VALUE, VALUE, long, int); -void rb_match_busy(VALUE); -VALUE rb_reg_quote(VALUE); +/** + * Creates a new instance of ::rb_cRegexp. It can be seen as a specialised + * version of rb_reg_new_str() where it does not take options. + * + * @param[in] str Source code in String. + * @return Allocated new instance of ::rb_cRegexp. + */ +VALUE rb_reg_regcomp(VALUE str); + +/** + * Runs the passed regular expression over the passed string. Unlike + * rb_reg_search() this function also takes position and direction of the + * search, which make it possible for this function to run from in middle of + * the string. + * + * @param[in] re Regular expression to execute. + * @param[in] str Target string to search. + * @param[in] pos Offset in `str` to start searching, in bytes. + * @param[in] dir `pos`' direction; 0 means left-to-right, 1 for + * the opposite. + * @exception rb_eArgError `re` is broken. + * @exception rb_eRegexpError `re` is malformed. + * @retval -1 Match failed. + * @retval otherwise Offset of first such byte where match happened. + * @post `Regexp.last_match` is updated. + * @post `$&`, `$~`, etc., are updated. + * + * @internal + * + * Distinction between raising ::rb_eArgError and ::rb_eRegexpError is not + * obvious, at least to @shyouhei. + */ +long rb_reg_search(VALUE re, VALUE str, long pos, int dir); + +/** + * Substitution. This is basically the implementation of `String#sub`. Also + * `String#gsub` repeatedly calls this function. + * + * @param[in] repl Replacement string, e.g. `"\\1\\2"` + * @param[in] src Source string, to be replaced. + * @param[in] regs Matched data generated by applying `rexp` to `src`. + * @param[in] rexp Regular expression. + * @return A substituted string. + * + * @internal + * + * This function does not check for encoding compatibility. `String#sub!` + * etc. employ their own checker. + * + * `regs` should have been `const struct re_registers *` because it is read + * only. Kept as-is for compatibility. + */ +VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp); + +/** + * Tell us if this is a wrong idea, but it seems this function has no usage at + * all. Just remains here for theoretical backwards compatibility. + * + * @param[in] re Regular expression to execute. + * @param[in] str Target string to search. + * @param[in] pos Offset in `str` to start searching, in bytes. + * @param[in] dir `pos`' direction; 0 means left-to-right, 1 for + * the opposite. + * @return Adjusted nearest offset to `pos` inside of `str`, where is a + * character boundary. + * + */ +long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir); + +/** + * Escapes any characters that would have special meaning in a regular + * expression. + * + * @param[in] str Target string to escape. + * @return A copy of `str` whose contents are escaped. + */ +VALUE rb_reg_quote(VALUE str); + +/** + * Exercises various checks and preprocesses so that the given regular + * expression can be applied to the given string. The preprocess here includes + * (but not limited to) for instance encoding conversion. + * + * @param[in] re Target regular expression. + * @param[in] str What `re` is about to run on. + * @exception rb_eArgError `re` does not fit for `str`. + * @exception rb_eEncCompatError `re` and `str` are incompatible. + * @exception rb_eRegexpError `re` is malformed. + * @return A preprocessesed pattern buffer ready to be applied to `str`. + * @note The return value is manages by our GC. Don't free. + * + * @internal + * + * The return type, `regex_t *`, is defined in ``, _and_ + * _conflicts_ with POSIX's ``. We can no longer save the situation + * at this point. Just don't mix the two. + */ regex_t *rb_reg_prepare_re(VALUE re, VALUE str); -int rb_reg_region_copy(struct re_registers *, const struct re_registers *); + +/** + * Duplicates a match data. This is roughly the same as `onig_region_copy()`, + * except it tries to GC when there is not enough memory. + * + * @param[out] dst Target registers to fill. + * @param[in] src Source registers to duplicate. + * @exception rb_eNoMemError Not enough memory. + * @retval 0 Successful + * @retval ONIGERR_MEMORY Not enough memory, even after GC (unlikely). + * @post `dst` has identical contents to `src`. + * + * @internal + * + * It seems this function is here for `ext/strscan` and nothing else. + */ +int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src); RBIMPL_SYMBOL_EXPORT_END()