From d1fd7bdf484fbda6092c241989a89afb6882650a Mon Sep 17 00:00:00 2001 From: dave Date: Fri, 26 Dec 2003 15:58:28 +0000 Subject: [PATCH] MG added RDoc comments for re.c git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5309 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- re.c | 419 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 419 insertions(+) diff --git a/re.c b/re.c index 29f5a89d16..577a9ad745 100644 --- a/re.c +++ b/re.c @@ -364,6 +364,16 @@ rb_reg_desc(s, len, re) return str; } + +/* + * call-seq: + * rxp.source => str + * + * Returns the original string of the pattern. + * + * /ab+c/ix.source #=> "ab+c" + */ + static VALUE rb_reg_source(re) VALUE re; @@ -384,6 +394,27 @@ rb_reg_inspect(re) return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re); } + +/* + * call-seq: + * rxp.to_s => str + * + * Returns a string containing the regular expression and its options (usng the + * (?xxx:yyy) notation. This string can be fed back in to + * Regexp::new to a regular expression with the same semantics as + * the original. (However, Regexp#== may not return true when + * comparing the two, as the source of the regular expression itself may + * differ, as the example shows). Regexp#inspect produces a + * generally more readable version of rxp. + * + * r1 = /ab+c/ix #=> /ab+c/ix + * s1 = r1.to_s #=> "(?ix-m:ab+c)" + * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/ + * r1 == r2 #=> false + * r1.source #=> "ab+c" + * r2.source #=> "(?ix-m:ab+c)" + */ + static VALUE rb_reg_to_s(re) VALUE re; @@ -490,6 +521,14 @@ rb_reg_raise(s, len, err, re) rb_raise(rb_eRegexpError, "%s: %s", err, RSTRING(desc)->ptr); } + +/* + * call-seq: + * rxp.casefold? => true or false + * + * Returns the value of the case-insensitive flag. + */ + static VALUE rb_reg_casefold_p(re) VALUE re; @@ -499,6 +538,30 @@ rb_reg_casefold_p(re) return Qfalse; } + +/* + * call-seq: + * rxp.options => fixnum + * + * Returns the set of bits corresponding to the options used when creating this + * Regexp (see Regexp::new for details. Note that additional bits + * may be set in the returned options: these are used internally by the regular + * expression code. These extra bits are ignored if the options are passed to + * Regexp::new. + * + * Regexp::IGNORECASE #=> 1 + * Regexp::EXTENDED #=> 2 + * Regexp::MULTILINE #=> 4 + * + * /cat/.options #=> 128 + * /cat/ix.options #=> 131 + * Regexp.new('cat', true).options #=> 129 + * Regexp.new('cat', 0, 's').options #=> 384 + * + * r = /cat/ix + * Regexp.new(r.source, r.options) #=> /cat/ix + */ + static VALUE rb_reg_options_m(re) VALUE re; @@ -507,6 +570,14 @@ rb_reg_options_m(re) return INT2NUM(options); } + +/* + * call-seq: + * rxp.kcode => str + * + * Returns the character set code for the regexp. + */ + static VALUE rb_reg_kcode_m(re) VALUE re; @@ -564,6 +635,20 @@ make_regexp(s, len, flags) return rp; } + +/* + * Document-class: MatchData + * + * MatchData is the type of the special variable $~, + * and is the type of the object returned by Regexp#match and + * Regexp#last_match. It encapsulates all the results of a pattern + * match, results normally accessed through the special variables + * $&, $', $`, $1, + * $2, and so on. Matchdata is also known as + * MatchingData. + * + */ + static VALUE rb_cMatch; static VALUE match_alloc _((VALUE)); @@ -599,6 +684,19 @@ match_init_copy(obj, orig) return obj; } + +/* + * call-seq: + * mtch.length => integer + * mtch.size => integer + * + * Returns the number of elements in the match array. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.length #=> 5 + * m.size #=> 5 + */ + static VALUE match_size(match) VALUE match; @@ -606,6 +704,19 @@ match_size(match) return INT2FIX(RMATCH(match)->regs->num_regs); } + +/* + * call-seq: + * mtch.offset(n) => array + * + * Returns a two-element array containing the beginning and ending offsets of + * the nth match. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.offset(0) #=> [1, 7] + * m.offset(4) #=> [6, 7] + */ + static VALUE match_offset(match, n) VALUE match, n; @@ -622,6 +733,19 @@ match_offset(match, n) INT2FIX(RMATCH(match)->regs->end[i])); } + +/* + * call-seq: + * mtch.begin(n) => integer + * + * Returns the offset of the start of the nth element of the match + * array in the string. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.begin(0) #=> 1 + * m.begin(2) #=> 2 + */ + static VALUE match_begin(match, n) VALUE match, n; @@ -637,6 +761,19 @@ match_begin(match, n) return INT2FIX(RMATCH(match)->regs->beg[i]); } + +/* + * call-seq: + * mtch.end(n) => integer + * + * Returns the offset of the character immediately following the end of the + * nth element of the match array in the string. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.end(0) #=> 7 + * m.end(2) #=> 3 + */ + static VALUE match_end(match, n) VALUE match, n; @@ -846,6 +983,18 @@ rb_reg_last_match(match) return rb_reg_nth_match(0, match); } + +/* + * call-seq: + * mtch.pre_match => str + * + * Returns the portion of the original string before the current match. + * Equivalent to the special variable $`. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.pre_match #=> "T" + */ + VALUE rb_reg_match_pre(match) VALUE match; @@ -859,6 +1008,18 @@ rb_reg_match_pre(match) return str; } + +/* + * call-seq: + * mtch.post_match => str + * + * Returns the portion of the original string after the current match. + * Equivalent to the special variable $'. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.post_match #=> ": The Movie" + */ + VALUE rb_reg_match_post(match) VALUE match; @@ -938,6 +1099,33 @@ match_array(match, start) return ary; } + +/* [MG]:FIXME: I put parens around the /.../.match() in the first line of the + second example to prevent the '*' followed by a '/' from ending the + comment. */ + +/* + * call-seq: + * mtch.to_a => anArray + * + * Returns the array of matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * + * Because to_a is called when exanding + * *variable, there's a useful assignment + * shortcut for extracting matched fields. This is slightly slower than + * accessing the fields directly (as an intermediate array is + * generated). + * + * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) + * all #=> "HX1138" + * f1 #=> "H" + * f2 #=> "X" + * f3 #=> "113" + */ + static VALUE match_to_a(match) VALUE match; @@ -945,6 +1133,19 @@ match_to_a(match) return match_array(match, 0); } + +/* + * call-seq: + * mtch.captures => array + * + * Returns the array of captures; equivalent to mtch.to_a[1..-1]. + * + * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures + * f1 #=> "H" + * f2 #=> "X" + * f3 #=> "113" + * f4 #=> "8" + */ static VALUE match_captures(match) VALUE match; @@ -952,6 +1153,26 @@ match_captures(match) return match_array(match, 1); } + +/* + * call-seq: + * mtch[i] => obj + * mtch[start, length] => array + * mtch[range] => array + * + * Match Reference---MatchData acts as an array, and may be + * accessed using the normal array indexing techniques. mtch[0] is + * equivalent to the special variable $&, and returns the entire + * matched string. mtch[1], mtch[2], and so on return the values + * of the matched backreferences (portions of the pattern between parentheses). + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m[0] #=> "HX1138" + * m[1, 2] #=> ["H", "X"] + * m[1..3] #=> ["H", "X", "113"] + * m[-3, 2] #=> ["X", "113"] + */ + static VALUE match_aref(argc, argv, match) int argc; @@ -977,6 +1198,19 @@ match_entry(match, n) return rb_reg_nth_match(n, match); } + +/* + * call-seq: + * mtch.select([index]*) => array + * + * Uses each index to access the matching values, returning an array of + * the corresponding matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * m.select(0, 2, -2) #=> ["HX1138", "X", "113"] + */ + static VALUE match_values_at(argc, argv, match) int argc; @@ -986,6 +1220,19 @@ match_values_at(argc, argv, match) return rb_values_at(match, RMATCH(match)->regs->num_regs, argc, argv, match_entry); } + +/* + * call-seq: + * mtch.select([index]*) => array + * + * Uses each index to access the matching values, returning an + * array of the corresponding matches. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") + * m.to_a #=> ["HX1138", "H", "X", "113", "8"] + * m.select(0, 2, -2) #=> ["HX1138", "X", "113"] + */ + static VALUE match_select(argc, argv, match) int argc; @@ -1013,6 +1260,17 @@ match_select(argc, argv, match) } } + +/* + * call-seq: + * mtch.to_s => str + * + * Returns the entire matched string. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.to_s #=> "HX1138" + */ + static VALUE match_to_s(match) VALUE match; @@ -1025,6 +1283,17 @@ match_to_s(match) return str; } + +/* + * call-seq: + * mtch.string => str + * + * Returns a frozen copy of the string passed in to match. + * + * m = /(.)(.)(\d+)(\d)/.match("THX1138.") + * m.string #=> "THX1138." + */ + static VALUE match_string(match) VALUE match; @@ -1166,6 +1435,21 @@ rb_reg_hash(re) return INT2FIX(hashval); } + +/* + * call-seq: + * rxp == other_rxp => true or false + * rxp.eql?(other_rxp) => true or false + * + * Equality---Two regexps are equal if their patterns are identical, they have + * the same character set code, and their casefold? values are the + * same. + * + * /abc/ == /abc/x #=> false + * /abc/ == /abc/i #=> false + * /abc/u == /abc/n #=> false + */ + static VALUE rb_reg_equal(re1, re2) VALUE re1, re2; @@ -1182,6 +1466,18 @@ rb_reg_equal(re1, re2) return Qfalse; } + +/* + * call-seq: + * rxp.match(str) => matchdata or nil + * + * Returns a MatchData object describing the match, or + * nil if there was no match. This is equivalent to retrieving the + * value of the special variable $~ following a normal match. + * + * /(.)(.)(.)/.match("abc")[2] #=> "b" + */ + VALUE rb_reg_match(re, str) VALUE re, str; @@ -1200,6 +1496,25 @@ rb_reg_match(re, str) return LONG2FIX(start); } + +/* + * call-seq: + * rxp === str => true or false + * + * Case Equality---Synonym for Regexp#=~ used in case statements. + * + * a = "HELLO" + * case a + * when /^a-z*$/; print "Lower case\n" + * when /^A-Z*$/; print "Upper case\n" + * else; print "Mixed case\n" + * end + * + * produces: + * + * Upper case + */ + VALUE rb_reg_eqq(re, str) VALUE re, str; @@ -1221,6 +1536,18 @@ rb_reg_eqq(re, str) return Qtrue; } + +/* + * call-seq: + * ~ rxp => integer or nil + * + * Match---Matches rxp against the contents of $_. + * Equivalent to rxp =~ $_. + * + * $_ = "input data" + * ~ /at/ #=> 7 + */ + VALUE rb_reg_match2(re) VALUE re; @@ -1240,6 +1567,18 @@ rb_reg_match2(re) return LONG2FIX(start); } + +/* + * call-seq: + * rxp.match(str) => matchdata or nil + * + * Returns a MatchData object describing the match, or + * nil if there was no match. This is equivalent to retrieving the + * value of the special variable $~ following a normal match. + * + * /(.)(.)(.)/.match("abc")[2] #=> "b" + */ + static VALUE rb_reg_match_m(re, str) VALUE re, str; @@ -1252,6 +1591,30 @@ rb_reg_match_m(re, str) return result; } +/* + * call-seq: + * Regexp.new(string [, options [, lang]]) => regexp + * Regexp.new(regexp) => regexp + * Regexp.compile(string [, options [, lang]]) => regexp + * Regexp.compile(regexp) => regexp + * + * Constructs a new regular expression from pattern, which can be either + * a String or a Regexp (in which case that regexp's + * options are propagated, and new options may not be specified (a change as of + * Ruby 1.8). If options is a Fixnum, it should be one or + * more of the constants Regexp::EXTENDED, + * Regexp::IGNORECASE, and Regexp::POSIXLINE, + * or-ed together. Otherwise, if options is not + * nil, the regexp will be case insensitive. The lang + * parameter enables multibyte support for the regexp: `n', `N' = none, `e', + * `E' = EUC, `s', `S' = SJIS, `u', `U' = UTF-8. + * + * r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/ + * r2 = Regexp.new('cat', true) #=> /cat/i + * r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x + * r4 = Regexp.new(r2) #=> /cat/i + */ + static VALUE rb_reg_initialize_m(argc, argv, self) int argc; @@ -1411,6 +1774,19 @@ rb_reg_quote(str) return tmp; } + +/* + * call-seq: + * Regexp.escape(str) => new_str + * Regexp.quote(str) => new_str + * + * Escapes any characters that would have special meaning in a regular + * expression. For any string, + * Regexp.escape(str)=~str will be true. + * + * Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\. + */ + static VALUE rb_reg_s_quote(argc, argv) int argc; @@ -1480,6 +1856,21 @@ rb_reg_options(re) return options; } + +/* + * call-seq: + * Regexp.union([pattern]*) => new_str + * + * Return a Regexp object that is the union of the given + * patterns, i.e., will match any of its parts. The patterns + * can be Regexp objects, in which case their options will be preserved, or + * Strings. If no arguments are given, returns /(?!)/. + * + * Regexp.union #=> /(?!)/ + * Regexp.union("penzance") #=> /penzance/ + * Regexp.union("skiing", "sledding") #=> /skiing|sledding/ + * Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/ + */ static VALUE rb_reg_s_union(argc, argv) int argc; @@ -1756,6 +2147,23 @@ match_setter(val) rb_backref_set(val); } +/* + * call-seq: + * Regexp.last_match => matchdata + * Regexp.last_match(fixnum) => str + * + * The first form returns the MatchData object generated by the + * last successful pattern match. Equivalent to reading the global variable + * $~. The second form returns the nth field in this + * MatchData object. + * + * /c(.)t/ =~ 'cat' #=> 0 + * Regexp.last_match #=> # + * Regexp.last_match(0) #=> "cat" + * Regexp.last_match(1) #=> "a" + * Regexp.last_match(2) #=> nil + */ + static VALUE rb_reg_s_last_match(argc, argv) int argc; @@ -1769,6 +2177,17 @@ rb_reg_s_last_match(argc, argv) return match_getter(); } + +/* + * Document-class: Regexp + * + * A Regexp holds a regular expression, used to match a pattern + * against strings. Regexps are created using the /.../ and + * %r{...} literals, and by the Regexp::new + * constructor. + * + */ + void Init_Regexp() {