diff --git a/doc/string.rdoc b/doc/string.rdoc index c9fa0607f3..6c3ced298a 100644 --- a/doc/string.rdoc +++ b/doc/string.rdoc @@ -47,4 +47,96 @@ class String def initialize(str = '', encoding: nil, capacity: nil) Primitive.rb_str_init(str, encoding, capacity) end + + # call-seq: + # split(separator = $;, limit = nil) -> array + # split(separator = $;, limit = nil) {|substring| ... } -> self + # + # Returns an array of substrings of +self+ + # that are the result of splitting +self+ + # at each occurrence of the given +separator+. + # + # When argument +separator+ is $;: + # + # - If $; is +nil+ (its default value), + # the split occurs just as if +separator+ were given as a space character + # (see below). + # + # - If $; is a string, + # the split ocurs just as if +separator+ were given as that string + # (see below). + # + # When argument +separator+ is ' ' and argument +limit+ is +nil+, + # the split occurs at each sequence of whitespace: + # + # 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"] + # "abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"] + # 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"] + # ''.split(' ') # => [] + # + # When argument +separator+ is a string different from ' ' + # and argument +limit+ is +nil+, + # the split occurs at each occurrence of the separator; + # trailing empty substrings are not returned: + # + # 'abracadabra'.split('ab') # => ["", "racad", "ra"] + # 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"] + # ''.split('a') # => [] + # '3.14159'.split('1') # => ["3.", "4", "59"] + # '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"] + # 'тест'.split('т') # => ["", "ес"] + # 'こんにちは'.split('に') # => ["こん", "ちは"] + # + # When argument +separator+ is a Regexp and argument +limit+ is +nil+, + # the split occurs at each occurrence of a match; + # trailing empty substrings are not returned: + # + # 'abracadabra'.split(/ab/) # => ["", "racad", "ra"] + # 'aaabcdaaa'.split(/a/) # => ["", "", "", "bcd"] + # 'aaabcdaaa'.split(//) # => ["a", "a", "a", "b", "c", "d", "a", "a", "a"] + # '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"] + # + # If the \Regexp contains groups, their matches are also included + # in the returned array: + # + # '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"] + # + # As seen above, if argument +limit+ is +nil+, + # trailing empty substrings are not returned; + # the same is true if +limit+ is zero: + # + # 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"] + # 'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"] + # + # If +limit+ is positive integer +n+, no more than n - 1- + # splits occur, so that at most +n+ substrings are returned, + # and trailing empty substrings are included: + # + # 'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"] + # 'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"] + # 'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"] + # 'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""] + # 'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""] + # + # Note that if +separator+ is a \Regexp containing groups, + # their matches are in the returned array, but do not count toward the limit. + # + # If +limit+ is negative, it behaves the same as if +limit+ was +nil+, + # meaning that there is no limit, + # and trailing empty substrings are included: + # + # 'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""] + # + # If a block is given, it is called with each substring: + # + # 'abc def ghi'.split(' ') {|substring| p substring } + # + # Output: + # + # "abc" + # "def" + # "ghi" + # + def split; end + end diff --git a/string.c b/string.c index a97b6adc67..7f6780cb0d 100644 --- a/string.c +++ b/string.c @@ -8583,58 +8583,7 @@ literal_split_pattern(VALUE spat, split_type_t default_type) } /* - * call-seq: - * str.split(pattern=nil, [limit]) -> an_array - * str.split(pattern=nil, [limit]) {|sub| block } -> str - * - * Divides str into substrings based on a delimiter, returning an array - * of these substrings. - * - * If pattern is a String, then its contents are used as - * the delimiter when splitting str. If pattern is a single - * space, str is split on whitespace, with leading and trailing - * whitespace and runs of contiguous whitespace characters ignored. - * - * If pattern is a Regexp, str is divided where the - * pattern matches. Whenever the pattern matches a zero-length string, - * str is split into individual characters. If pattern contains - * groups, the respective matches will be returned in the array as well. - * - * If pattern is nil, the value of $; is used. - * If $; is nil (which is the default), str is - * split on whitespace as if ' ' were specified. - * - * If the limit parameter is omitted, trailing null fields are - * suppressed. If limit is a positive number, at most that number - * of split substrings will be returned (captured groups will be returned - * as well, but are not counted towards the limit). - * If limit is 1, the entire - * string is returned as the only entry in an array. If negative, there is no - * limit to the number of fields returned, and trailing null fields are not - * suppressed. - * - * When the input +str+ is empty an empty Array is returned as the string is - * considered to have no fields to split. - * - * " now's the time ".split #=> ["now's", "the", "time"] - * " now's the time ".split(' ') #=> ["now's", "the", "time"] - * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] - * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"] - * "hello".split(//) #=> ["h", "e", "l", "l", "o"] - * "hello".split(//, 3) #=> ["h", "e", "llo"] - * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"] - * - * "mellow yellow".split("ello") #=> ["m", "w y", "w"] - * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] - * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"] - * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""] - * - * "1:2:3".split(/(:)()()/, 2) #=> ["1", ":", "", "", "2:3"] - * - * "".split(',', -1) #=> [] - * - * If a block is given, invoke the block with each split substring. - * + * String#split is documented at doc/string.rdoc. */ static VALUE