mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/uri/common.rb: new method URI.regexp. [ruby-dev:22121]
* test/uri/test_common.rb: add test for URI.regexp. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5136 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
62848326c8
commit
3356550581
2 changed files with 44 additions and 20 deletions
|
@ -1,3 +1,9 @@
|
||||||
|
Mon Dec 8 13:02:11 2003 Minero Aoki <aamine@loveruby.net>
|
||||||
|
|
||||||
|
* lib/uri/common.rb: new method URI.regexp. [ruby-dev:22121]
|
||||||
|
|
||||||
|
* test/uri/test_common.rb: add test for URI.regexp.
|
||||||
|
|
||||||
Mon Dec 8 12:44:14 2003 Yukihiro Matsumoto <matz@ruby-lang.org>
|
Mon Dec 8 12:44:14 2003 Yukihiro Matsumoto <matz@ruby-lang.org>
|
||||||
|
|
||||||
* pack.c: define swap16 and swap32 only if they are not
|
* pack.c: define swap16 and swap32 only if they are not
|
||||||
|
|
|
@ -396,28 +396,46 @@ module URI
|
||||||
--- URI::extract(str[, schemes])
|
--- URI::extract(str[, schemes])
|
||||||
|
|
||||||
=end
|
=end
|
||||||
def self.extract(str, schemes = [])
|
def self.extract(str, schemes = nil, &block)
|
||||||
urls = []
|
if block_given?
|
||||||
regexp = ABS_URI_REF
|
str.scan(regexp(schemes)) { yield $& }
|
||||||
unless schemes.empty?
|
nil
|
||||||
regexp = Regexp.new('(?=' + schemes.collect{|s|
|
else
|
||||||
Regexp.quote(s + ':')
|
result = []
|
||||||
}.join('|') + ')' + PATTERN::X_ABS_URI,
|
str.scan(regexp(schemes)) { result.push $& }
|
||||||
Regexp::EXTENDED, 'N')
|
result
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
str.scan(regexp) {
|
=begin
|
||||||
if block_given?
|
|
||||||
yield($&)
|
|
||||||
else
|
|
||||||
urls << $&
|
|
||||||
end
|
|
||||||
}
|
|
||||||
|
|
||||||
if block_given?
|
--- URI::regexp([match_schemes])
|
||||||
return nil
|
|
||||||
|
Returns a Regexp object which matches to URI-like strings.
|
||||||
|
If MATCH_SCHEMES given, resulting regexp matches to URIs
|
||||||
|
whose scheme is one of the MATCH_SCHEMES.
|
||||||
|
|
||||||
|
The Regexp object returned by this method includes arbitrary
|
||||||
|
number of capture group (parentheses). Never rely on its
|
||||||
|
number.
|
||||||
|
|
||||||
|
# extract first URI from html_string
|
||||||
|
html_string.slice(URI.regexp)
|
||||||
|
|
||||||
|
# remove ftp URIs
|
||||||
|
html_string.sub(URI.regexp(['ftp'])
|
||||||
|
|
||||||
|
# You should not rely on the number of parentheses
|
||||||
|
html_string.scan(URI.regexp) do |*matches|
|
||||||
|
p $&
|
||||||
|
end
|
||||||
|
|
||||||
|
=end
|
||||||
|
def self.regexp(schemes = nil)
|
||||||
|
unless schemes
|
||||||
|
ABS_URI_REF
|
||||||
else
|
else
|
||||||
return urls
|
/(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue