1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* re.c (rb_reg_s_last_match): accept named capture's name.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14161 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2007-12-09 13:35:38 +00:00
parent 2d101f0a87
commit e56e8c758d
3 changed files with 92 additions and 43 deletions

View file

@ -1,3 +1,7 @@
Sun Dec 9 22:31:36 2007 Tanaka Akira <akr@fsij.org>
* re.c (rb_reg_s_last_match): accept named capture's name.
Sun Dec 9 15:57:53 2007 Tanaka Akira <akr@fsij.org> Sun Dec 9 15:57:53 2007 Tanaka Akira <akr@fsij.org>
* re.c (match_backref_number): new function for converting a backref * re.c (match_backref_number): new function for converting a backref

85
re.c
View file

@ -296,6 +296,11 @@ rb_reg_desc(const char *s, long len, VALUE re)
* Returns the original string of the pattern. * Returns the original string of the pattern.
* *
* /ab+c/ix.source #=> "ab+c" * /ab+c/ix.source #=> "ab+c"
*
* Note that escape sequences are retained as is.
*
* /\x20\+/.source #=> "\\x20\\+"
*
*/ */
static VALUE static VALUE
@ -317,7 +322,8 @@ rb_reg_source(VALUE re)
* <code>#inspect</code> actually produces the more natural version of * <code>#inspect</code> actually produces the more natural version of
* the string than <code>#to_s</code>. * the string than <code>#to_s</code>.
* *
* /ab+c/ix.inspect #=> /ab+c/ix * /ab+c/ix.inspect #=> "/ab+c/ix"
*
*/ */
static VALUE static VALUE
@ -472,6 +478,10 @@ rb_reg_raise_str(VALUE str, int options, const char *err)
* rxp.casefold? => true or false * rxp.casefold? => true or false
* *
* Returns the value of the case-insensitive flag. * Returns the value of the case-insensitive flag.
*
* /a/.casefold? #=> false
* /a/i.casefold? #=> true
* /(?i:a)/.casefold? #=> false
*/ */
static VALUE static VALUE
@ -497,10 +507,10 @@ rb_reg_casefold_p(VALUE re)
* Regexp::EXTENDED #=> 2 * Regexp::EXTENDED #=> 2
* Regexp::MULTILINE #=> 4 * Regexp::MULTILINE #=> 4
* *
* /cat/.options #=> 128 * /cat/.options #=> 0
* /cat/ix.options #=> 131 * /cat/ix.options #=> 3
* Regexp.new('cat', true).options #=> 129 * Regexp.new('cat', true).options #=> 1
* Regexp.new('cat', 0, 's').options #=> 384 * /\xa1\xa2/e.options #=> 16
* *
* r = /cat/ix * r = /cat/ix
* Regexp.new(r.source, r.options) #=> /cat/ix * Regexp.new(r.source, r.options) #=> /cat/ix
@ -551,11 +561,10 @@ make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_bu
* *
* <code>MatchData</code> is the type of the special variable <code>$~</code>, * <code>MatchData</code> is the type of the special variable <code>$~</code>,
* and is the type of the object returned by <code>Regexp#match</code> and * and is the type of the object returned by <code>Regexp#match</code> and
* <code>Regexp#last_match</code>. It encapsulates all the results of a pattern * <code>Regexp.last_match</code>. It encapsulates all the results of a pattern
* match, results normally accessed through the special variables * match, results normally accessed through the special variables
* <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>, * <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
* <code>$2</code>, and so on. <code>Matchdata</code> is also known as * <code>$2</code>, and so on.
* <code>MatchingData</code>.
* *
*/ */
@ -662,6 +671,7 @@ match_backref_number(VALUE match, VALUE backref)
* *
* Returns a two-element array containing the beginning and ending offsets of * Returns a two-element array containing the beginning and ending offsets of
* the <em>n</em>th match. * the <em>n</em>th match.
* <em>n</em> can be a string or symbol to reference a named capture.
* *
* m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.offset(0) #=> [1, 7] * m.offset(0) #=> [1, 7]
@ -695,6 +705,7 @@ match_offset(VALUE match, VALUE n)
* *
* Returns the offset of the start of the <em>n</em>th element of the match * Returns the offset of the start of the <em>n</em>th element of the match
* array in the string. * array in the string.
* <em>n</em> can be a string or symbol to reference a named capture.
* *
* m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.begin(0) #=> 1 * m.begin(0) #=> 1
@ -726,6 +737,7 @@ match_begin(VALUE match, VALUE n)
* *
* Returns the offset of the character immediately following the end of the * Returns the offset of the character immediately following the end of the
* <em>n</em>th element of the match array in the string. * <em>n</em>th element of the match array in the string.
* <em>n</em> can be a string or symbol to reference a named capture.
* *
* m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m.end(0) #=> 7 * m.end(0) #=> 7
@ -762,25 +774,29 @@ rb_match_busy(VALUE match)
* call-seq: * call-seq:
* rxp.fixed_encoding? => true or false * rxp.fixed_encoding? => true or false
* *
* Returns true if rxp is only applicable to * Returns false if rxp is applicable to
* a string encoded as rxp.encoding. * a string with any ASCII compatible encoding.
* Returns true otherwise.
* *
* r = /a/ * r = /a/
* r.fixed_encoding? #=> false * r.fixed_encoding? #=> false
* r =~ "\u{6666} a" #=> 2 * r =~ "\u{6666} a" #=> 2
* r =~ "\xa1\xa2 a".force_encoding("euc-jp") #=> 2 * r =~ "\xa1\xa2 a".force_encoding("euc-jp") #=> 2
* r =~ "abc".force_encoding("euc-jp") #=> 0
* *
* r = /a/u * r = /a/u
* r.fixed_encoding? #=> true * r.fixed_encoding? #=> true
* r.encoding #=> <Encoding:UTF-8> * r.encoding #=> <Encoding:UTF-8>
* r =~ "\u{6666} a" #=> 2 * r =~ "\u{6666} a" #=> 2
* r =~ "\xa1\xa2".force_encoding("euc-jp") # ArgumentError * r =~ "\xa1\xa2".force_encoding("euc-jp") # ArgumentError
* r =~ "abc".force_encoding("euc-jp") #=> 0
* *
* r = /\u{6666}/ * r = /\u{6666}/
* r.fixed_encoding? #=> true * r.fixed_encoding? #=> true
* r.encoding #=> <Encoding:UTF-8> * r.encoding #=> <Encoding:UTF-8>
* r =~ "\u{6666} a" #=> 0 * r =~ "\u{6666} a" #=> 0
* r =~ "\xa1\xa2".force_encoding("euc-jp") # ArgumentError * r =~ "\xa1\xa2".force_encoding("euc-jp") # ArgumentError
* r =~ "abc".force_encoding("euc-jp") #=> nil
*/ */
static VALUE static VALUE
@ -1181,12 +1197,14 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name
* of the matched backreferences (portions of the pattern between parentheses). * of the matched backreferences (portions of the pattern between parentheses).
* *
* m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
* m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
* m[0] #=> "HX1138" * m[0] #=> "HX1138"
* m[1, 2] #=> ["H", "X"] * m[1, 2] #=> ["H", "X"]
* m[1..3] #=> ["H", "X", "113"] * m[1..3] #=> ["H", "X", "113"]
* m[-3, 2] #=> ["X", "113"] * m[-3, 2] #=> ["X", "113"]
* *
* m = /(?<foo>a+)b/.match("ccaaab") * m = /(?<foo>a+)b/.match("ccaaab")
* m #=> #<MatchData "aaab" foo:"aaa">
* m["foo"] #=> "aaa" * m["foo"] #=> "aaa"
* m[:foo] #=> "aaa" * m[:foo] #=> "aaa"
*/ */
@ -1328,6 +1346,9 @@ match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end,
* puts /(.)(.)(.)/.match("foo").inspect * puts /(.)(.)(.)/.match("foo").inspect
* #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o"> * #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
* *
* puts /(.)(.)?(.)/.match("fo").inspect
* #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
*
* puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect * puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
* #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g"> * #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
* *
@ -1951,6 +1972,7 @@ rb_reg_hash(VALUE re)
* *
* /abc/ == /abc/x #=> false * /abc/ == /abc/x #=> false
* /abc/ == /abc/i #=> false * /abc/ == /abc/i #=> false
* /abc/ == /abc/n #=> false
* /abc/u == /abc/n #=> false * /abc/u == /abc/n #=> false
*/ */
@ -1960,6 +1982,7 @@ rb_reg_equal(VALUE re1, VALUE re2)
if (re1 == re2) return Qtrue; if (re1 == re2) return Qtrue;
if (TYPE(re2) != T_REGEXP) return Qfalse; if (TYPE(re2) != T_REGEXP) return Qfalse;
rb_reg_check(re1); rb_reg_check(re2); rb_reg_check(re1); rb_reg_check(re2);
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse; if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse; if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse; if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
@ -2016,6 +2039,7 @@ rb_reg_match_pos(VALUE re, VALUE str, long pos)
* Match---Matches <i>rxp</i> against <i>str</i>. * Match---Matches <i>rxp</i> against <i>str</i>.
* *
* /at/ =~ "input data" #=> 7 * /at/ =~ "input data" #=> 7
* /ax/ =~ "input data" #=> nil
*/ */
VALUE VALUE
@ -2165,14 +2189,12 @@ rb_reg_match_m(int argc, VALUE *argv, VALUE re)
* more of the constants <code>Regexp::EXTENDED</code>, * more of the constants <code>Regexp::EXTENDED</code>,
* <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>, * <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
* <em>or</em>-ed together. Otherwise, if <i>options</i> is not * <em>or</em>-ed together. Otherwise, if <i>options</i> is not
* <code>nil</code>, the regexp will be case insensitive. The <i>lang</i> * <code>nil</code>, the regexp will be case insensitive.
* parameter enables multibyte support for the regexp: `n', `N' = none, `e',
* `E' = EUC, `s', `S' = SJIS, `u', `U' = UTF-8.
* *
* r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/ * r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
* r2 = Regexp.new('cat', true) #=> /cat/i * r2 = Regexp.new('cat', true) #=> /cat/i
* r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x * r3 = Regexp.new('dog', Regexp::EXTENDED) #=> /dog/x
* r4 = Regexp.new(r2) #=>n /cat/i * r4 = Regexp.new(r2) #=> /cat/i
*/ */
static VALUE static VALUE
@ -2331,15 +2353,16 @@ rb_reg_quote(VALUE str)
/* /*
* call-seq: * call-seq:
* Regexp.escape(str) => a_str * Regexp.escape(str) => string
* Regexp.quote(str) => a_str * Regexp.quote(str) => string
* *
* Escapes any characters that would have special meaning in a regular * Escapes any characters that would have special meaning in a regular
* expression. Returns a new escaped string, or self if no characters are * expression. Returns a new escaped string, or self if no characters are
* escaped. For any string, * escaped. For any string,
* <code>Regexp.escape(<i>str</i>)=~<i>str</i></code> will be true. * <code>Regexp.new(Regexp.escape(<i>str</i>))=~<i>str</i></code> will be true.
*
* Regexp.escape('\*?{}.') #=> \\\*\?\{\}\.
* *
* Regexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
*/ */
static VALUE static VALUE
@ -2373,8 +2396,14 @@ rb_check_regexp_type(VALUE re)
* Returns converted regexp or nil if <i>obj</i> cannot be converted * Returns converted regexp or nil if <i>obj</i> cannot be converted
* for any reason. * for any reason.
* *
* Regexp.try_convert(/re/) # => /re/ * Regexp.try_convert(/re/) #=> /re/
* Regexp.try_convert("re") # => nil * Regexp.try_convert("re") #=> nil
*
* o = Object.new
* Regexp.try_convert(o) #=> nil
* def o.to_regexp() /foo/ end
* Regexp.try_convert(o) #=> /foo/
*
*/ */
static VALUE static VALUE
rb_reg_s_try_convert(VALUE dummy, VALUE re) rb_reg_s_try_convert(VALUE dummy, VALUE re)
@ -2696,18 +2725,24 @@ match_setter(VALUE val)
/* /*
* call-seq: * call-seq:
* Regexp.last_match => matchdata * Regexp.last_match => matchdata
* Regexp.last_match(fixnum) => str * Regexp.last_match(n) => str
* *
* The first form returns the <code>MatchData</code> object generated by the * The first form returns the <code>MatchData</code> object generated by the
* last successful pattern match. Equivalent to reading the global variable * last successful pattern match. Equivalent to reading the global variable
* <code>$~</code>. The second form returns the nth field in this * <code>$~</code>. The second form returns the <i>n</i>th field in this
* <code>MatchData</code> object. * <code>MatchData</code> object.
* <em>n</em> can be a string or symbol to reference a named capture.
* *
* /c(.)t/ =~ 'cat' #=> 0 * /c(.)t/ =~ 'cat' #=> 0
* Regexp.last_match #=> #<MatchData "cat" "a"> * Regexp.last_match #=> #<MatchData "cat" "a">
* Regexp.last_match(0) #=> "cat" * Regexp.last_match(0) #=> "cat"
* Regexp.last_match(1) #=> "a" * Regexp.last_match(1) #=> "a"
* Regexp.last_match(2) #=> nil * Regexp.last_match(2) #=> nil
*
* /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
* Regexp.last_match #=> #<MatchData "var = val" lhs:"var" rhs:"val">
* Regexp.last_match(:lhs) #=> "var"
* Regexp.last_match(:rhs) #=> "val"
*/ */
static VALUE static VALUE
@ -2716,7 +2751,11 @@ rb_reg_s_last_match(int argc, VALUE *argv)
VALUE nth; VALUE nth;
if (rb_scan_args(argc, argv, "01", &nth) == 1) { if (rb_scan_args(argc, argv, "01", &nth) == 1) {
return rb_reg_nth_match(NUM2INT(nth), rb_backref_get()); VALUE match = rb_backref_get();
int n;
if (NIL_P(match)) return Qnil;
n = match_backref_number(match, nth);
return rb_reg_nth_match(n, match);
} }
return match_getter(); return match_getter();
} }

View file

@ -69,6 +69,12 @@ class TestRegexp < Test::Unit::TestCase
assert_equal('#<MatchData "&amp; y" foo:"amp" foo:"y">', assert_equal('#<MatchData "&amp; y" foo:"amp" foo:"y">',
/&(?<foo>.*?); (?<foo>y)/.match("aaa &amp; yyy").inspect) /&(?<foo>.*?); (?<foo>y)/.match("aaa &amp; yyy").inspect)
# MatchData#keys /(?<id>[A-Za-z_]+)/ =~ "!abc"
assert_equal("abc", Regexp.last_match(:id))
/a/ =~ "b"
assert_equal(nil, Regexp.last_match)
assert_equal(nil, Regexp.last_match(1))
assert_equal(nil, Regexp.last_match(:foo))
end end
end end