diff --git a/doc/documentation_guide.rdoc b/doc/documentation_guide.rdoc index 6016225d04..f6f7eb3710 100644 --- a/doc/documentation_guide.rdoc +++ b/doc/documentation_guide.rdoc @@ -60,34 +60,25 @@ involving new files doc/*.rdoc: # Documentation for module Bar goes here. module Bar; end -- For an instance method Baz#bat (defined in file baz.c), - create file doc/baz.rdoc, declare class +Baz+ - and instance method +bat+, and place the method documentation above - the method declaration: +- For a method, things are different. + Documenting a method as above disables the "click to toggle source" feature + in the rendered documentaion. - # :markup: ruby - class Baz - # Documentation for method bat goes here. - # (Don't forget the call-seq.) - def bat; end - end + Therefore it's best to use file inclusion: -- For a singleton method Bam.bah (defined in file bam.c), - create file doc/bam.rdoc, declare class +Bam+ - and singleton method +bah+, and place the method documentation above - the method declaration: + - Retain the call-seq in the C code. + - Use file inclusion (+:include:+) to include text from an .rdoc file. - # :markup: ruby - class Bam - # Documentation for method bah goes here. - # (Don't forget the call-seq.) - def self.bah; end - end + Example: - See these examples: - - - https://raw.githubusercontent.com/ruby/ruby/master/doc/string.rdoc - - https://raw.githubusercontent.com/ruby/ruby/master/doc/transcode.rdoc + /* + * call-seq: + * each_byte {|byte| ... } -> self + * each_byte -> enumerator + * + * \:include: doc/string/each_byte.rdoc + * + */ === \RDoc diff --git a/doc/string.rdoc b/doc/string.rdoc deleted file mode 100644 index a4423c9d9b..0000000000 --- a/doc/string.rdoc +++ /dev/null @@ -1,245 +0,0 @@ -# :markup: ruby - -class String - # call-seq: - # String.new(string = '') -> new_string - # String.new(string = '', encoding: encoding) -> new_string - # String.new(string = '', capacity: size) -> new_string - # - # Returns a new \String that is a copy of +string+. - # - # With no arguments, returns the empty string with the Encoding ASCII-8BIT: - # s = String.new - # s # => "" - # s.encoding # => # - # - # With the single \String argument +string+, returns a copy of +string+ - # with the same encoding as +string+: - # s = String.new('Que veut dire ça?') - # s # => "Que veut dire ça?" - # s.encoding # => # - # - # Literal strings like "" or here-documents always use - # Encoding@Script+encoding, unlike String.new. - # - # With keyword +encoding+, returns a copy of +str+ - # with the specified encoding: - # s = String.new(encoding: 'ASCII') - # s.encoding # => # - # s = String.new('foo', encoding: 'ASCII') - # s.encoding # => # - # - # Note that these are equivalent: - # s0 = String.new('foo', encoding: 'ASCII') - # s1 = 'foo'.force_encoding('ASCII') - # s0.encoding == s1.encoding # => true - # - # With keyword +capacity+, returns a copy of +str+; - # the given +capacity+ may set the size of the internal buffer, - # which may affect performance: - # String.new(capacity: 1) # => "" - # String.new(capacity: 4096) # => "" - # - # The +string+, +encoding+, and +capacity+ arguments may all be used together: - # - # String.new('hello', encoding: 'UTF-8', capacity: 25) - # - def initialize(str = '', encoding: nil, capacity: nil) - Primitive.rb_str_init(str, encoding, capacity) - end - - # call-seq: - # bytes -> array_of_bytes - # - # Returns an array of the bytes in +self+: - # - # 'hello'.bytes # => [104, 101, 108, 108, 111] - # 'тест'.bytes # => [209, 130, 208, 181, 209, 129, 209, 130] - # 'こんにちは'.bytes - # # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] - # - def bytes; end - - # call-seq: - # each_byte {|byte| ... } -> self - # each_byte -> enumerator - # - # Calls the given block with each successive byte from +self+; - # returns +self+: - # - # 'hello'.each_byte {|byte| print byte, ' ' } - # print "\n" - # 'тест'.each_byte {|byte| print byte, ' ' } - # print "\n" - # 'こんにちは'.each_byte {|byte| print byte, ' ' } - # print "\n" - # - # Output: - # - # 104 101 108 108 111 - # 209 130 208 181 209 129 209 130 - # 227 129 147 227 130 147 227 129 171 227 129 161 227 129 175 - # - # Returns an enumerator if no block is given. - def each_byte; end - - - # call-seq: - # each_line(line_sep = $/, chomp: false) {|substring| ... } -> self - # each_line(line_sep = $/, chomp: false) -> enumerator - # - # With a block given, forms the substrings ("lines") - # that are the result of splitting +self+ - # at each occurrence of the given line separator +line_sep+; - # passes each line to the block; - # returns +self+: - # - # s = <<~EOT - # This is the first line. - # This is line two. - # - # This is line four. - # This is line five. - # EOT - # - # s.each_line {|line| p line } - # - # Output: - # - # "This is the first line.\n" - # "This is line two.\n" - # "\n" - # "This is line four.\n" - # "This is line five.\n" - # - # With a different +line_sep+: - # - # s.each_line(' is ') {|line| p line } - # - # Output: - # - # "This is " - # "the first line.\nThis is " - # "line two.\n\nThis is " - # "line four.\nThis is " - # "line five.\n" - # - # With +chomp+ as +true+, removes the trailing +line_sep+ from each line: - # - # s.each_line(chomp: true) {|line| p line } - # - # Output: - # - # "This is the first line." - # "This is line two." - # "" - # "This is line four." - # "This is line five." - # - # With an empty string as +line_sep+, - # forms and passes "paragraphs" by splitting at each occurrence - # of two or more newlines: - # - # s.each_line('') {|line| p line } - # - # Output: - # - # "This is the first line.\nThis is line two.\n\n" - # "This is line four.\nThis is line five.\n" - # - # With no block given, returns an enumerator. - # - def each_line; end - - # call-seq: - # split(field_sep = $;, limit = nil) -> array - # split(field_sep = $;, limit = nil) {|substring| ... } -> self - # - # Returns an array of substrings of +self+ - # that are the result of splitting +self+ - # at each occurrence of the given field separator +field_sep+. - # - # When +field_sep+ is $;: - # - # - If $; is +nil+ (its default value), - # the split occurs just as if +field_sep+ were given as a space character - # (see below). - # - # - If $; is a string, - # the split ocurs just as if +field_sep+ were given as that string - # (see below). - # - # When +field_sep+ is ' ' and +limit+ is +nil+, - # the split occurs at each sequence of whitespace: - # - # 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"] - # "abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"] - # 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"] - # ''.split(' ') # => [] - # - # When +field_sep+ is a string different from ' ' - # and +limit+ is +nil+, - # the split occurs at each occurrence of +field_sep+; - # trailing empty substrings are not returned: - # - # 'abracadabra'.split('ab') # => ["", "racad", "ra"] - # 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"] - # ''.split('a') # => [] - # '3.14159'.split('1') # => ["3.", "4", "59"] - # '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"] - # 'тест'.split('т') # => ["", "ес"] - # 'こんにちは'.split('に') # => ["こん", "ちは"] - # - # When +field_sep+ is a Regexp and +limit+ is +nil+, - # the split occurs at each occurrence of a match; - # trailing empty substrings are not returned: - # - # 'abracadabra'.split(/ab/) # => ["", "racad", "ra"] - # 'aaabcdaaa'.split(/a/) # => ["", "", "", "bcd"] - # 'aaabcdaaa'.split(//) # => ["a", "a", "a", "b", "c", "d", "a", "a", "a"] - # '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"] - # - # If the \Regexp contains groups, their matches are also included - # in the returned array: - # - # '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"] - # - # As seen above, if +limit+ is +nil+, - # trailing empty substrings are not returned; - # the same is true if +limit+ is zero: - # - # 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"] - # 'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"] - # - # If +limit+ is positive integer +n+, no more than n - 1- - # splits occur, so that at most +n+ substrings are returned, - # and trailing empty substrings are included: - # - # 'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"] - # 'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"] - # 'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"] - # 'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""] - # 'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""] - # - # Note that if +field_sep+ is a \Regexp containing groups, - # their matches are in the returned array, but do not count toward the limit. - # - # If +limit+ is negative, it behaves the same as if +limit+ was +nil+, - # meaning that there is no limit, - # and trailing empty substrings are included: - # - # 'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""] - # - # If a block is given, it is called with each substring: - # - # 'abc def ghi'.split(' ') {|substring| p substring } - # - # Output: - # - # "abc" - # "def" - # "ghi" - # - def split; end - -end diff --git a/doc/string/bytes.rdoc b/doc/string/bytes.rdoc new file mode 100644 index 0000000000..a9e89f1cd1 --- /dev/null +++ b/doc/string/bytes.rdoc @@ -0,0 +1,6 @@ +Returns an array of the bytes in +self+: + + 'hello'.bytes # => [104, 101, 108, 108, 111] + 'тест'.bytes # => [209, 130, 208, 181, 209, 129, 209, 130] + 'こんにちは'.bytes + # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175] diff --git a/doc/string/each_byte.rdoc b/doc/string/each_byte.rdoc new file mode 100644 index 0000000000..643118fea3 --- /dev/null +++ b/doc/string/each_byte.rdoc @@ -0,0 +1,17 @@ +Calls the given block with each successive byte from +self+; +returns +self+: + + 'hello'.each_byte {|byte| print byte, ' ' } + print "\n" + 'тест'.each_byte {|byte| print byte, ' ' } + print "\n" + 'こんにちは'.each_byte {|byte| print byte, ' ' } + print "\n" + +Output: + + 104 101 108 108 111 + 209 130 208 181 209 129 209 130 + 227 129 147 227 130 147 227 129 171 227 129 161 227 129 175 + +Returns an enumerator if no block is given. diff --git a/doc/string/each_line.rdoc b/doc/string/each_line.rdoc new file mode 100644 index 0000000000..e254c22d40 --- /dev/null +++ b/doc/string/each_line.rdoc @@ -0,0 +1,60 @@ +With a block given, forms the substrings ("lines") +that are the result of splitting +self+ +at each occurrence of the given line separator +line_sep+; +passes each line to the block; +returns +self+: + + s = <<~EOT + This is the first line. + This is line two. + + This is line four. + This is line five. + EOT + + s.each_line {|line| p line } + +Output: + + "This is the first line.\n" + "This is line two.\n" + "\n" + "This is line four.\n" + "This is line five.\n" + +With a different +line_sep+: + + s.each_line(' is ') {|line| p line } + +Output: + + "This is " + "the first line.\nThis is " + "line two.\n\nThis is " + "line four.\nThis is " + "line five.\n" + +With +chomp+ as +true+, removes the trailing +line_sep+ from each line: + + s.each_line(chomp: true) {|line| p line } + +Output: + + "This is the first line." + "This is line two." + "" + "This is line four." + "This is line five." + +With an empty string as +line_sep+, +forms and passes "paragraphs" by splitting at each occurrence +of two or more newlines: + + s.each_line('') {|line| p line } + +Output: + + "This is the first line.\nThis is line two.\n\n" + "This is line four.\nThis is line five.\n" + +With no block given, returns an enumerator. diff --git a/doc/string/new.rdoc b/doc/string/new.rdoc new file mode 100644 index 0000000000..3eee2b82e0 --- /dev/null +++ b/doc/string/new.rdoc @@ -0,0 +1,37 @@ +Returns a new \String that is a copy of +string+. + +With no arguments, returns the empty string with the Encoding ASCII-8BIT: + s = String.new + s # => "" + s.encoding # => # + +With the single \String argument +string+, returns a copy of +string+ +with the same encoding as +string+: + s = String.new('Que veut dire ça?') + s # => "Que veut dire ça?" + s.encoding # => # + +Literal strings like "" or here-documents always use +Encoding@Script+encoding, unlike String.new. + +With keyword +encoding+, returns a copy of +str+ +with the specified encoding: + s = String.new(encoding: 'ASCII') + s.encoding # => # + s = String.new('foo', encoding: 'ASCII') + s.encoding # => # + +Note that these are equivalent: + s0 = String.new('foo', encoding: 'ASCII') + s1 = 'foo'.force_encoding('ASCII') + s0.encoding == s1.encoding # => true + +With keyword +capacity+, returns a copy of +str+; +the given +capacity+ may set the size of the internal buffer, +which may affect performance: + String.new(capacity: 1) # => "" + String.new(capacity: 4096) # => "" + +The +string+, +encoding+, and +capacity+ arguments may all be used together: + + String.new('hello', encoding: 'UTF-8', capacity: 25) diff --git a/doc/string/split.rdoc b/doc/string/split.rdoc new file mode 100644 index 0000000000..d93b76d9b4 --- /dev/null +++ b/doc/string/split.rdoc @@ -0,0 +1,84 @@ +Returns an array of substrings of +self+ +that are the result of splitting +self+ +at each occurrence of the given field separator +field_sep+. + +When +field_sep+ is $;: + +- If $; is +nil+ (its default value), + the split occurs just as if +field_sep+ were given as a space character + (see below). + +- If $; is a string, + the split ocurs just as if +field_sep+ were given as that string + (see below). + +When +field_sep+ is ' ' and +limit+ is +nil+, +the split occurs at each sequence of whitespace: + + 'abc def ghi'.split(' ') => ["abc", "def", "ghi"] + "abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"] + 'abc def ghi'.split(' ') => ["abc", "def", "ghi"] + ''.split(' ') => [] + +When +field_sep+ is a string different from ' ' +and +limit+ is +nil+, +the split occurs at each occurrence of +field_sep+; +trailing empty substrings are not returned: + + 'abracadabra'.split('ab') => ["", "racad", "ra"] + 'aaabcdaaa'.split('a') => ["", "", "", "bcd"] + ''.split('a') => [] + '3.14159'.split('1') => ["3.", "4", "59"] + '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"] + 'тест'.split('т') => ["", "ес"] + 'こんにちは'.split('に') => ["こん", "ちは"] + +When +field_sep+ is a Regexp and +limit+ is +nil+, +the split occurs at each occurrence of a match; +trailing empty substrings are not returned: + + 'abracadabra'.split(/ab/) # => ["", "racad", "ra"] + 'aaabcdaaa'.split(/a/) => ["", "", "", "bcd"] + 'aaabcdaaa'.split(//) => ["a", "a", "a", "b", "c", "d", "a", "a", "a"] + '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"] + +If the \Regexp contains groups, their matches are also included +in the returned array: + + '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"] + +As seen above, if +limit+ is +nil+, +trailing empty substrings are not returned; +the same is true if +limit+ is zero: + + 'aaabcdaaa'.split('a') => ["", "", "", "bcd"] + 'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"] + +If +limit+ is positive integer +n+, no more than n - 1- +splits occur, so that at most +n+ substrings are returned, +and trailing empty substrings are included: + + 'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"] + 'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"] + 'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"] + 'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""] + 'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""] + +Note that if +field_sep+ is a \Regexp containing groups, +their matches are in the returned array, but do not count toward the limit. + +If +limit+ is negative, it behaves the same as if +limit+ was +nil+, +meaning that there is no limit, +and trailing empty substrings are included: + + 'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""] + +If a block is given, it is called with each substring: + + 'abc def ghi'.split(' ') {|substring| p substring } + +Output: + + "abc" + "def" + "ghi" diff --git a/string.c b/string.c index 4776f6e40d..f46cba1a6a 100644 --- a/string.c +++ b/string.c @@ -1810,7 +1810,17 @@ rb_ec_str_resurrect(struct rb_execution_context_struct *ec, VALUE str) return ec_str_duplicate(ec, rb_cString, str); } -/* :nodoc: documented in doc/string.rdoc */ +/* + * + * call-seq: + * String.new(string = '') -> new_string + * String.new(string = '', encoding: encoding) -> new_string + * String.new(string = '', capacity: size) -> new_string + * + * :include: doc/string/new.rdoc + * + */ + static VALUE rb_str_init(int argc, VALUE *argv, VALUE str) { @@ -8652,7 +8662,14 @@ literal_split_pattern(VALUE spat, split_type_t default_type) return default_type; } -// String#split is documented at doc/string.rdoc. +/* + * :call-seq: + * split(field_sep = $;, limit = nil) -> array + * split(field_sep = $;, limit = nil) {|substring| ... } -> self + * + * :include: doc/string/split.rdoc + * + */ static VALUE rb_str_split_m(int argc, VALUE *argv, VALUE str) @@ -9063,7 +9080,14 @@ rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE ary) return orig; } -// String#each_line is documented at doc/string.rdoc. +/* + * call-seq: + * each_line(line_sep = $/, chomp: false) {|substring| ... } -> self + * each_line(line_sep = $/, chomp: false) -> enumerator + * + * :include: doc/string/each_line.rdoc + * + */ static VALUE rb_str_each_line(int argc, VALUE *argv, VALUE str) @@ -9108,7 +9132,14 @@ rb_str_enumerate_bytes(VALUE str, VALUE ary) return str; } -// String#each_byte is documented in doc/string.rdoc. +/* + * call-seq: + * each_byte {|byte| ... } -> self + * each_byte -> enumerator + * + * :include: doc/string/each_byte.rdoc + * + */ static VALUE rb_str_each_byte(VALUE str) @@ -9117,7 +9148,13 @@ rb_str_each_byte(VALUE str) return rb_str_enumerate_bytes(str, 0); } -// String#bytes is documented in doc/string.rdoc. +/* + * call-seq: + * bytes -> array_of_bytes + * + * :include: doc/string/bytes.rdoc + * + */ static VALUE rb_str_bytes(VALUE str)