1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

MG added RDoc comments for re.c

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5309 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
dave 2003-12-26 15:58:28 +00:00
parent b9a79bc020
commit d1fd7bdf48

419
re.c
View file

@ -364,6 +364,16 @@ rb_reg_desc(s, len, re)
return str;
}
/*
* call-seq:
* rxp.source => str
*
* Returns the original string of the pattern.
*
* /ab+c/ix.source #=> "ab+c"
*/
static VALUE
rb_reg_source(re)
VALUE re;
@ -384,6 +394,27 @@ rb_reg_inspect(re)
return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re);
}
/*
* call-seq:
* rxp.to_s => str
*
* Returns a string containing the regular expression and its options (usng the
* <code>(?xxx:yyy)</code> notation. This string can be fed back in to
* <code>Regexp::new</code> to a regular expression with the same semantics as
* the original. (However, <code>Regexp#==</code> may not return true when
* comparing the two, as the source of the regular expression itself may
* differ, as the example shows). <code>Regexp#inspect</code> produces a
* generally more readable version of <i>rxp</i>.
*
* r1 = /ab+c/ix #=> /ab+c/ix
* s1 = r1.to_s #=> "(?ix-m:ab+c)"
* r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
* r1 == r2 #=> false
* r1.source #=> "ab+c"
* r2.source #=> "(?ix-m:ab+c)"
*/
static VALUE
rb_reg_to_s(re)
VALUE re;
@ -490,6 +521,14 @@ rb_reg_raise(s, len, err, re)
rb_raise(rb_eRegexpError, "%s: %s", err, RSTRING(desc)->ptr);
}
/*
* call-seq:
* rxp.casefold? => true or false
*
* Returns the value of the case-insensitive flag.
*/
static VALUE
rb_reg_casefold_p(re)
VALUE re;
@ -499,6 +538,30 @@ rb_reg_casefold_p(re)
return Qfalse;
}
/*
* call-seq:
* rxp.options => fixnum
*
* Returns the set of bits corresponding to the options used when creating this
* Regexp (see <code>Regexp::new</code> for details. Note that additional bits
* may be set in the returned options: these are used internally by the regular
* expression code. These extra bits are ignored if the options are passed to
* <code>Regexp::new</code>.
*
* Regexp::IGNORECASE #=> 1
* Regexp::EXTENDED #=> 2
* Regexp::MULTILINE #=> 4
*
* /cat/.options #=> 128
* /cat/ix.options #=> 131
* Regexp.new('cat', true).options #=> 129
* Regexp.new('cat', 0, 's').options #=> 384
*
* r = /cat/ix
* Regexp.new(r.source, r.options) #=> /cat/ix
*/
static VALUE
rb_reg_options_m(re)
VALUE re;
@ -507,6 +570,14 @@ rb_reg_options_m(re)
return INT2NUM(options);
}
/*
* call-seq:
* rxp.kcode => str
*
* Returns the character set code for the regexp.
*/
static VALUE
rb_reg_kcode_m(re)
VALUE re;
@ -564,6 +635,20 @@ make_regexp(s, len, flags)
return rp;
}
/*
* Document-class: MatchData
*
* <code>MatchData</code> is the type of the special variable <code>$~</code>,
* and is the type of the object returned by <code>Regexp#match</code> and
* <code>Regexp#last_match</code>. It encapsulates all the results of a pattern
* match, results normally accessed through the special variables
* <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
* <code>$2</code>, and so on. <code>Matchdata</code> is also known as
* <code>MatchingData</code>.
*
*/
static VALUE rb_cMatch;
static VALUE match_alloc _((VALUE));
@ -599,6 +684,19 @@ match_init_copy(obj, orig)
return obj;
}
/*
* call-seq:
* mtch.length => integer
* mtch.size => integer
*
* Returns the number of elements in the match array.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.length #=> 5
* m.size #=> 5
*/
static VALUE
match_size(match)
VALUE match;
@ -606,6 +704,19 @@ match_size(match)
return INT2FIX(RMATCH(match)->regs->num_regs);
}
/*
* call-seq:
* mtch.offset(n) => array
*
* Returns a two-element array containing the beginning and ending offsets of
* the <em>n</em>th match.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.offset(0) #=> [1, 7]
* m.offset(4) #=> [6, 7]
*/
static VALUE
match_offset(match, n)
VALUE match, n;
@ -622,6 +733,19 @@ match_offset(match, n)
INT2FIX(RMATCH(match)->regs->end[i]));
}
/*
* call-seq:
* mtch.begin(n) => integer
*
* Returns the offset of the start of the <em>n</em>th element of the match
* array in the string.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.begin(0) #=> 1
* m.begin(2) #=> 2
*/
static VALUE
match_begin(match, n)
VALUE match, n;
@ -637,6 +761,19 @@ match_begin(match, n)
return INT2FIX(RMATCH(match)->regs->beg[i]);
}
/*
* call-seq:
* mtch.end(n) => integer
*
* Returns the offset of the character immediately following the end of the
* <em>n</em>th element of the match array in the string.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.end(0) #=> 7
* m.end(2) #=> 3
*/
static VALUE
match_end(match, n)
VALUE match, n;
@ -846,6 +983,18 @@ rb_reg_last_match(match)
return rb_reg_nth_match(0, match);
}
/*
* call-seq:
* mtch.pre_match => str
*
* Returns the portion of the original string before the current match.
* Equivalent to the special variable <code>$`</code>.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.pre_match #=> "T"
*/
VALUE
rb_reg_match_pre(match)
VALUE match;
@ -859,6 +1008,18 @@ rb_reg_match_pre(match)
return str;
}
/*
* call-seq:
* mtch.post_match => str
*
* Returns the portion of the original string after the current match.
* Equivalent to the special variable <code>$'</code>.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
* m.post_match #=> ": The Movie"
*/
VALUE
rb_reg_match_post(match)
VALUE match;
@ -938,6 +1099,33 @@ match_array(match, start)
return ary;
}
/* [MG]:FIXME: I put parens around the /.../.match() in the first line of the
second example to prevent the '*' followed by a '/' from ending the
comment. */
/*
* call-seq:
* mtch.to_a => anArray
*
* Returns the array of matches.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
*
* Because <code>to_a</code> is called when exanding
* <code>*</code><em>variable</em>, there's a useful assignment
* shortcut for extracting matched fields. This is slightly slower than
* accessing the fields directly (as an intermediate array is
* generated).
*
* all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
* all #=> "HX1138"
* f1 #=> "H"
* f2 #=> "X"
* f3 #=> "113"
*/
static VALUE
match_to_a(match)
VALUE match;
@ -945,6 +1133,19 @@ match_to_a(match)
return match_array(match, 0);
}
/*
* call-seq:
* mtch.captures => array
*
* Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
*
* f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
* f1 #=> "H"
* f2 #=> "X"
* f3 #=> "113"
* f4 #=> "8"
*/
static VALUE
match_captures(match)
VALUE match;
@ -952,6 +1153,26 @@ match_captures(match)
return match_array(match, 1);
}
/*
* call-seq:
* mtch[i] => obj
* mtch[start, length] => array
* mtch[range] => array
*
* Match Reference---<code>MatchData</code> acts as an array, and may be
* accessed using the normal array indexing techniques. <i>mtch</i>[0] is
* equivalent to the special variable <code>$&</code>, and returns the entire
* matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
* of the matched backreferences (portions of the pattern between parentheses).
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m[0] #=> "HX1138"
* m[1, 2] #=> ["H", "X"]
* m[1..3] #=> ["H", "X", "113"]
* m[-3, 2] #=> ["X", "113"]
*/
static VALUE
match_aref(argc, argv, match)
int argc;
@ -977,6 +1198,19 @@ match_entry(match, n)
return rb_reg_nth_match(n, match);
}
/*
* call-seq:
* mtch.select([index]*) => array
*
* Uses each <i>index</i> to access the matching values, returning an array of
* the corresponding matches.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
* m.select(0, 2, -2) #=> ["HX1138", "X", "113"]
*/
static VALUE
match_values_at(argc, argv, match)
int argc;
@ -986,6 +1220,19 @@ match_values_at(argc, argv, match)
return rb_values_at(match, RMATCH(match)->regs->num_regs, argc, argv, match_entry);
}
/*
* call-seq:
* mtch.select([index]*) => array
*
* Uses each <i>index</i> to access the matching values, returning an
* array of the corresponding matches.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
* m.to_a #=> ["HX1138", "H", "X", "113", "8"]
* m.select(0, 2, -2) #=> ["HX1138", "X", "113"]
*/
static VALUE
match_select(argc, argv, match)
int argc;
@ -1013,6 +1260,17 @@ match_select(argc, argv, match)
}
}
/*
* call-seq:
* mtch.to_s => str
*
* Returns the entire matched string.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.to_s #=> "HX1138"
*/
static VALUE
match_to_s(match)
VALUE match;
@ -1025,6 +1283,17 @@ match_to_s(match)
return str;
}
/*
* call-seq:
* mtch.string => str
*
* Returns a frozen copy of the string passed in to <code>match</code>.
*
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.string #=> "THX1138."
*/
static VALUE
match_string(match)
VALUE match;
@ -1166,6 +1435,21 @@ rb_reg_hash(re)
return INT2FIX(hashval);
}
/*
* call-seq:
* rxp == other_rxp => true or false
* rxp.eql?(other_rxp) => true or false
*
* Equality---Two regexps are equal if their patterns are identical, they have
* the same character set code, and their <code>casefold?</code> values are the
* same.
*
* /abc/ == /abc/x #=> false
* /abc/ == /abc/i #=> false
* /abc/u == /abc/n #=> false
*/
static VALUE
rb_reg_equal(re1, re2)
VALUE re1, re2;
@ -1182,6 +1466,18 @@ rb_reg_equal(re1, re2)
return Qfalse;
}
/*
* call-seq:
* rxp.match(str) => matchdata or nil
*
* Returns a <code>MatchData</code> object describing the match, or
* <code>nil</code> if there was no match. This is equivalent to retrieving the
* value of the special variable <code>$~</code> following a normal match.
*
* /(.)(.)(.)/.match("abc")[2] #=> "b"
*/
VALUE
rb_reg_match(re, str)
VALUE re, str;
@ -1200,6 +1496,25 @@ rb_reg_match(re, str)
return LONG2FIX(start);
}
/*
* call-seq:
* rxp === str => true or false
*
* Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
*
* a = "HELLO"
* case a
* when /^a-z*$/; print "Lower case\n"
* when /^A-Z*$/; print "Upper case\n"
* else; print "Mixed case\n"
* end
*
* <em>produces:</em>
*
* Upper case
*/
VALUE
rb_reg_eqq(re, str)
VALUE re, str;
@ -1221,6 +1536,18 @@ rb_reg_eqq(re, str)
return Qtrue;
}
/*
* call-seq:
* ~ rxp => integer or nil
*
* Match---Matches <i>rxp</i> against the contents of <code>$_</code>.
* Equivalent to <code><i>rxp</i> =~ $_</code>.
*
* $_ = "input data"
* ~ /at/ #=> 7
*/
VALUE
rb_reg_match2(re)
VALUE re;
@ -1240,6 +1567,18 @@ rb_reg_match2(re)
return LONG2FIX(start);
}
/*
* call-seq:
* rxp.match(str) => matchdata or nil
*
* Returns a <code>MatchData</code> object describing the match, or
* <code>nil</code> if there was no match. This is equivalent to retrieving the
* value of the special variable <code>$~</code> following a normal match.
*
* /(.)(.)(.)/.match("abc")[2] #=> "b"
*/
static VALUE
rb_reg_match_m(re, str)
VALUE re, str;
@ -1252,6 +1591,30 @@ rb_reg_match_m(re, str)
return result;
}
/*
* call-seq:
* Regexp.new(string [, options [, lang]]) => regexp
* Regexp.new(regexp) => regexp
* Regexp.compile(string [, options [, lang]]) => regexp
* Regexp.compile(regexp) => regexp
*
* Constructs a new regular expression from <i>pattern</i>, which can be either
* a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
* options are propagated, and new options may not be specified (a change as of
* Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
* more of the constants <code>Regexp::EXTENDED</code>,
* <code>Regexp::IGNORECASE</code>, and <code>Regexp::POSIXLINE</code>,
* <em>or</em>-ed together. Otherwise, if <i>options</i> is not
* <code>nil</code>, the regexp will be case insensitive. The <i>lang</i>
* parameter enables multibyte support for the regexp: `n', `N' = none, `e',
* `E' = EUC, `s', `S' = SJIS, `u', `U' = UTF-8.
*
* r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
* r2 = Regexp.new('cat', true) #=> /cat/i
* r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x
* r4 = Regexp.new(r2) #=> /cat/i
*/
static VALUE
rb_reg_initialize_m(argc, argv, self)
int argc;
@ -1411,6 +1774,19 @@ rb_reg_quote(str)
return tmp;
}
/*
* call-seq:
* Regexp.escape(str) => new_str
* Regexp.quote(str) => new_str
*
* Escapes any characters that would have special meaning in a regular
* expression. For any string,
* <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true.
*
* Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
*/
static VALUE
rb_reg_s_quote(argc, argv)
int argc;
@ -1480,6 +1856,21 @@ rb_reg_options(re)
return options;
}
/*
* call-seq:
* Regexp.union([pattern]*) => new_str
*
* Return a <code>Regexp</code> object that is the union of the given
* <em>pattern</em>s, i.e., will match any of its parts. The <em>pattern</em>s
* can be Regexp objects, in which case their options will be preserved, or
* Strings. If no arguments are given, returns <code>/(?!)/</code>.
*
* Regexp.union #=> /(?!)/
* Regexp.union("penzance") #=> /penzance/
* Regexp.union("skiing", "sledding") #=> /skiing|sledding/
* Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/
*/
static VALUE
rb_reg_s_union(argc, argv)
int argc;
@ -1756,6 +2147,23 @@ match_setter(val)
rb_backref_set(val);
}
/*
* call-seq:
* Regexp.last_match => matchdata
* Regexp.last_match(fixnum) => str
*
* The first form returns the <code>MatchData</code> object generated by the
* last successful pattern match. Equivalent to reading the global variable
* <code>$~</code>. The second form returns the nth field in this
* <code>MatchData</code> object.
*
* /c(.)t/ =~ 'cat' #=> 0
* Regexp.last_match #=> #<MatchData:0x401b3d30>
* Regexp.last_match(0) #=> "cat"
* Regexp.last_match(1) #=> "a"
* Regexp.last_match(2) #=> nil
*/
static VALUE
rb_reg_s_last_match(argc, argv)
int argc;
@ -1769,6 +2177,17 @@ rb_reg_s_last_match(argc, argv)
return match_getter();
}
/*
* Document-class: Regexp
*
* A <code>Regexp</code> holds a regular expression, used to match a pattern
* against strings. Regexps are created using the <code>/.../</code> and
* <code>%r{...}</code> literals, and by the <code>Regexp::new</code>
* constructor.
*
*/
void
Init_Regexp()
{